def main(args): # Load the arguments. model_dir = os.path.dirname(args.model_path) params = Dict2Obj( json.load(open(os.path.join(model_dir, "args.json"), "r"))) # Config logging log_format = '%(levelname)-8s %(message)s' logfile = os.path.join(model_dir, 'eval.log') logging.basicConfig(filename=logfile, level=logging.INFO, format=log_format) logging.getLogger().addHandler(logging.StreamHandler()) logging.info(json.dumps(args.__dict__)) # Load vocabulary wrapper. vocab = load_vocab(params.vocab_path) # Build data loader logging.info("Building data loader...") # Load GloVe embedding. if params.use_glove: embedding = get_glove_embedding(params.embedding_name, 300, vocab) else: embedding = None # Processing input text logging.info("Processing input text...") text, length = process_text(args.text, vocab, max_length=20) d_text = text logging.info("Done") # Build the models logging.info('Creating IQ model...') model = Classifier(len(vocab), embedding_dim=params.embedding_dim, embedding=embedding, hidden_dim=params.num_hidden_nodes, output_dim=params.num_output_nodes, num_layers=params.num_layers, bidirectional=params.bidirectional, dropout=params.dropout, rnn_cell=params.rnn_cell) logging.info("Done") logging.info("Loading model.") model.load_state_dict( torch.load(args.model_path + "model-tf-" + args.state + ".pkl")) # Setup GPUs. if torch.cuda.is_available(): logging.info("Using available GPU...") model.cuda() predict(model, d_text)
def pretrain(source_data_loader, test_data_loader, no_classes, embeddings, epochs=20, batch_size=128, cuda=False): classifier = Classifier() encoder = Encoder(embeddings) if cuda: classifier.cuda() encoder.cuda() ''' Jointly optimize both encoder and classifier ''' encoder_params = filter(lambda p: p.requires_grad, encoder.parameters()) optimizer = optim.Adam( list(encoder_params) + list(classifier.parameters())) # Use weights to normalize imbalanced in data c = [1] * len(no_classes) weights = torch.FloatTensor(len(no_classes)) for i, (a, b) in enumerate(zip(c, no_classes)): weights[i] = 0 if b == 0 else a / b loss_fn = nn.CrossEntropyLoss(weight=Variable(weights)) print('Training encoder and classifier') for e in range(epochs): # pretrain with whole source data -- use groups with DCD for sample in source_data_loader: x, y = Variable(sample[0]), Variable(sample[1]) optimizer.zero_grad() if cuda: x, y = x.cuda(), y.cuda() output = model_fn(encoder, classifier)(x) loss = loss_fn(output, y) loss.backward() optimizer.step() print("Epoch", e, "Loss", loss.data[0], "Accuracy", eval_on_test(test_data_loader, model_fn(encoder, classifier))) return encoder, classifier
def main(args): vecs_builder = VecsBuilder(vecs_path='./glove/glove.6B.300d.txt') vecs = vecs_builder.get_data() train_dataset = Loader(args.max_length,vecs,'train') train_loader = DataLoader(train_dataset, batch_size = args.batch_size, num_workers = 5) val_dataset = Loader(args.max_length,vecs,'val') val_loader = DataLoader(val_dataset, batch_size = args.batch_size) model = Classifier(args.embed_dim, args.hidden_dim,args.num_classes,args.num_hidden_layers) if torch.cuda.is_available(): print('Cuda Functioning..') model.cuda() best_acc = 0 automated_log = open('models/automated_log.txt','w+') automated_log.write('Epochs'+'\t'+'Train-Loss'+'\t'+'Train-Accuracy'+'\t'+'Validation Loss'+'\t'+'Validation Accuracy\n') for epoch in tqdm(range(args.num_epochs)): train_loss,train_acc = train(model,train_loader) val_loss,val_acc = eval(model,val_loader) train_acc = train_acc/train_dataset.num_samples val_acc = val_acc/val_dataset.num_samples # print('Epoch : ',epoch) # print('Train Loss : ',train_loss) # print('Train Acc : ',train_acc) # print('Validation Loss : ',val_loss) # print('Validation Acc : ',val_acc) automated_log.write(str(epoch)+'\t'+str(train_loss)+'\t'+str(train_acc)+'\t'+str(val_loss)+'\t'+str(val_acc)+'\n') if epoch%10==0: model_name = 'models/model_'+str(epoch)+'.pkl' torch.save(model.state_dict(),model_name) if val_acc>best_acc: best_acc = val_acc best_model = 'best.pkl' torch.save(model.state_dict(),best_model) f = open('models/best.txt','w+') report = 'Epoch : '+str(epoch)+'\t Validation Accuracy : '+str(best_acc) f.write(report) f.close() print('Best Model Saved with Valdn Accuracy :',val_acc) automated_log.close()
def pretrain(data, epochs=5, batch_size=128, cuda=False): X_s, y_s, _, _ = data test_dataloader = mnist_dataloader(train=False, cuda=cuda) classifier = Classifier() encoder = Encoder() if cuda: classifier.cuda() encoder.cuda() ''' Jointly optimize both encoder and classifier ''' optimizer = optim.Adam(list(encoder.parameters()) + list(classifier.parameters())) loss_fn = nn.CrossEntropyLoss() for e in range(epochs): for _ in range(len(X_s) // batch_size): inds = torch.randperm(len(X_s))[:batch_size] x, y = Variable(X_s[inds]), Variable(y_s[inds]) optimizer.zero_grad() if cuda: x, y = x.cuda(), y.cuda() y_pred = model_fn(encoder, classifier)(x) loss = loss_fn(y_pred, y) loss.backward() optimizer.step() print("Epoch", e, "Loss", loss.data[0], "Accuracy", eval_on_test(test_dataloader, model_fn(encoder, classifier))) return encoder, classifier
if data_list[stage] is not None: data_loader.set_data_list(data_list[stage]) data_gen[stage] = DataGenerator(data_loader, generator_config[stage]) # - GPUs os.environ['CUDA_VISIBLE_DEVICES'] = str(config['gpus']) torch.backends.cudnn.enabled = True # - model model = Classifier(out_channels=2) if args.checkpoint is not None: model.load_state_dict(torch.load(args.checkpoint)) print('Load checkpoint:', args.checkpoint) if torch.cuda.device_count() > 0: model = model.cuda() model.zero_grad() # - optimizer optim = Optimizer(config['optimizer'])(model) optim.zero_grad() weight = torch.tensor([0.1, 0.99]) if torch.cuda.device_count() > 0: weight = weight.cuda() criterion = torch.nn.CrossEntropyLoss(weight) def F1_score(predis, labels): return 2 * torch.sum(predis * labels) / torch.sum(predis + labels)
def main(args): # Load the arguments. model_dir = os.path.dirname(args.model_path) params = Dict2Obj( json.load(open(os.path.join(model_dir, "args.json"), "r"))) # Config logging log_format = '%(levelname)-8s %(message)s' logfile = os.path.join(model_dir, 'eval.log') logging.basicConfig(filename=logfile, level=logging.INFO, format=log_format) logging.getLogger().addHandler(logging.StreamHandler()) logging.info(json.dumps(args.__dict__)) # Load vocabulary wrapper. vocab = load_vocab(params.vocab_path) # Build data loader logging.info("Building data loader...") # Load GloVe embedding. if params.use_glove: embedding = get_glove_embedding(params.embedding_name, 300, vocab) else: embedding = None # Build data loader logging.info("Building data loader...") data_loader = get_loader(args.dataset, args.batch_size, shuffle=False, num_workers=args.num_workers, max_examples=args.max_examples) logging.info("Done") # Build the models logging.info('Creating a multi class classification model...') model = Classifier(len(vocab), embedding_dim=params.embedding_dim, embedding=embedding, hidden_dim=params.num_hidden_nodes, output_dim=params.num_output_nodes, num_layers=params.num_layers, bidirectional=params.bidirectional, dropout=params.dropout, rnn_cell=params.rnn_cell) logging.info("Done") logging.info("Loading model.") model.load_state_dict( torch.load(args.model_path + "model-tf-" + args.state + ".pkl")) # Setup GPUs. if torch.cuda.is_available(): logging.info("Using available GPU...") model.cuda() scores, gts, preds = evaluate(model, data_loader, vocab, args, params) # Print and save the scores. print(scores) with open(os.path.join(model_dir, args.results_path), 'w') as results_file: json.dump(scores, results_file) with open(os.path.join(model_dir, args.preds_path), 'w') as preds_file: json.dump(preds, preds_file) with open(os.path.join(model_dir, args.gts_path), 'w') as gts_file: json.dump(gts, gts_file)
classifier_pt = torch.load('classifier.pt') cla.load_state_dict(classifier_pt) cla.eval() for method in methods: print(method) model = MADVAE(args) model_pt = torch.load( f'../pretrained_model/{method}/params.pt') model.load_state_dict(model_pt) model.eval() if torch.cuda.is_available(): print("Using CUDA") model = model.cuda() cla = cla.cuda() results = {} for norm in norms: total, total_inbds, adv, adv_inb = accuracy(cla, model, norms=[norm], suffix=f"_{method}") _, adv_old, _ = accuracy_paper(cla, model, norms=[norm], suffix=f"_{method}") results[f'{norm}'] = [adv.item(), adv_inb.item(), adv_old.item()] total, total_inbds, adv, adv_inb = accuracy(cla, model, suffix=f"_{method}") _, adv_old, _ = accuracy_paper(cla, model, suffix=f"_{method}") results['all'] = [adv.item(), adv_inb.item(), adv_old.item()] with open(f'./results/accuracy_{method}.txt', 'w') as f: json.dump(results, f)
# Calculate output of image discriminator (PatchGAN) patch = int(opt.img_size / (2**4)) patch = (1, patch, patch) generator = Generator(opt.latent_dim, opt.channels, opt.img_size, opt.n_residual_blocks) discriminator = Discriminator(opt.channels) classifier = Classifier(opt.channels, opt.img_size, opt.n_classes) generator = nn.DataParallel(generator) generator.cuda() discriminator = nn.DataParallel(discriminator) discriminator.cuda() classifier = nn.DataParallel(classifier) classifier.cuda() adversarial_loss = torch.nn.MSELoss().cuda() task_loss = torch.nn.CrossEntropyLoss().cuda() generator.apply(weights_init_normal) discriminator.apply(weights_init_normal) classifier.apply(weights_init_normal) os.makedirs("data", exist_ok=True) train_source = get_cifar10(train=True) train_target = get_stl10(split='train') optimizer_G = torch.optim.Adam(itertools.chain(generator.parameters(), classifier.parameters()),
def main(): args = parser.parse_args() # model model = Classifier(args.channels) optimizer = optim.SGD( model.parameters(), lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epoch) if args.gpu is not None: model.cuda(args.gpu) # dataset raw_loader = torch.utils.data.DataLoader( Dataset(os.path.join(DATA_DIR, 'raw')), args.batch // 2, shuffle=True, drop_last=True) noised_loader = torch.utils.data.DataLoader( Dataset(os.path.join(DATA_DIR, 'noised_tgt')), args.batch // 2, shuffle=True, drop_last=True) # train for epoch in range(args.epoch): loss = 0 accuracy = 0 count = 0 for x0, x1 in zip(noised_loader, raw_loader): if args.gpu is not None: x0 = x0.cuda(args.gpu) x1 = x1.cuda(args.gpu) # train model.train() x = torch.cat((x0, x1), dim=0) # @UndefinedVariable t = torch.zeros((x.shape[0], 2), device=x.device).float() # @UndefinedVariable t[:x0.shape[0], 0] = 1 t[x0.shape[0]:, 1] = 1 x, t = mixup(x, t) y = model(x) e = (-1 * nn.functional.log_softmax(y, dim=1) * t).sum(dim=1).mean() optimizer.zero_grad() e.backward() optimizer.step() # validate model.eval() with torch.no_grad(): y0 = (model(x0).max(dim=1)[1] == 0).float() y1 = (model(x1).max(dim=1)[1] == 1).float() a = torch.cat((y0, y1), dim=0).mean() # @UndefinedVariable loss += float(e) * len(x) accuracy += float(a) * len(x) count += len(x) print('[{}] lr={:.7f}, loss={:.4f}, accuracy={:.4f}'.format( epoch, float(optimizer.param_groups[0]['lr']), loss / count, accuracy / count), flush=True) scheduler.step() snapshot = {'channels': args.channels, 'model': model.state_dict()} torch.save(snapshot, '{}.tmp'.format(args.file)) os.rename('{}.tmp'.format(args.file), args.file)
source_loader = torch.utils.data.DataLoader(source_dataset_train, batch_size = batch_size, shuffle = True) target_loader = torch.utils.data.DataLoader(target_dataset_train, batch_size = batch_size, shuffle = True) s_test_loader = torch.utils.data.DataLoader(source_dataset_test, batch_size = batch_size, shuffle = True) t_test_loader = torch.utils.data.DataLoader(target_dataser_test, batch_size = batch_size, shuffle = True) total_steps = total_epochs*len(source_loader) '''定义网络框架''' feature_extrator = Extractor() class_classifier = Classifier() class_criterion = nn.NLLLoss() optimizer = optim.SGD([{'params': feature_extrator.parameters()}, {'params': class_classifier.parameters()}], lr= lr, momentum= momentum) if torch.cuda.is_available(): feature_extrator = feature_extrator.cuda() class_classifier = class_classifier.cuda() class_criterion = class_criterion.cuda() def train(f,c,source,target,optimizer,step): result = [] source_data, source_label = source target_data, target_label = target # torchvision.utils.save_image(source_data,'mnist.png') # torchvision.utils.save_image(target_data, 'mnist_M.png') size = min((source_data.shape[0], target_data.shape[0])) # print(size) source_data, source_label = source_data[0:size, :, :, :], source_label[0:size] target_data, target_label = target_data[0:size, :, :, :], target_label[0:size] p = float(step)/total_steps gamma = 2 / (1 + np.exp(-10 * p)) - 1 if torch.cuda.is_available():
def main(): global args # Parse commands from ArgumentParser args = parser.parse_args() # Our text field for imdb data TEXT = torchtext.data.Field(lower=True) # Our label field for imdb data LABEL = torchtext.data.Field(sequential=False) # Load GloVE embeddings orig_embeddings = torch.load(args.data_folder + 'all_orig_emb.pt') total_words = len(orig_embeddings) # Load shared words and all GloVE words with open(args.data_folder + "shared_words.txt", "r") as file: shared_words = file.read().split('\n') with open(args.data_folder + "glove_words.txt", "r") as file: glove_words = file.read().split('\n') # Recreate GloVE_dict glove_dict = {} for i, word in enumerate(glove_words): glove_dict[word] = orig_embeddings[i] # Load IMDB dataset with standard splits and restrictions identical to paper train, test = torchtext.datasets.IMDB.splits( TEXT, LABEL, filter_pred=lambda ex: ex.label != 'neutral' and len(ex.text) <= 400) # Both loops go through the words of train and test dataset, finds words without glove vectors, and replaces them with <unk> for i in range(len(train)): review = train.examples[i].text for i, word in enumerate(review): if word not in glove_dict: review[i] = '<unk>' for i in range(len(test)): review = test.examples[i].text for i, word in enumerate(review): if word not in glove_dict: review[i] = '<unk>' # Build modified vocabulary TEXT.build_vocab(train) LABEL.build_vocab(train) # Create iterators over train and test set train_iter, test_iter = torchtext.data.BucketIterator.splits( (train, test), batch_size=args.batch_size, repeat=False, device=-1) # If we want to use baseline GloVE embeddings if args.embedding_type == 'baseline': # Initialize embedding comp_embedding = np.random.uniform( -0.25, 0.25, (len(TEXT.vocab), args.embedding_size)) # For each vocab word, replace embedding vector with GloVE vector for word in shared_words: comp_embedding[TEXT.vocab.stoi[word]] = glove_dict[word] # Initialize Classifer with our GloVE embedding base_c = Classifier(torch.FloatTensor(comp_embedding), args.batch_size) # Put model into CUDA memory if using GPU if use_gpu: base_c = base_c.cuda() # Initialize Optimizer optimizer = optim.Adam(filter(lambda p: p.requires_grad, base_c.parameters()), lr=args.lr) # Define Loss function loss_func = nn.NLLLoss() else: ''' Note- the model in the paper is different because they only store the source dictionaries, making their model smaller than normal classifiers which is a major purpose of the paper. By my formulation, my model actually has the same size. However, they are fundamentally equivalent, except that the authors would have to preprocess the data (convert words into codes) whereas I simply make an embedding layer of size Vocab like GloVE vectors. Either way, I should get the same levels of accuracy, which is the primary importance of the sentiment classification task- to check whether the coding embeddings still give the same level of accuracy. ''' # Initialize embedding code_embedding = torch.FloatTensor( np.random.uniform(-0.25, 0.25, (len(TEXT.vocab), args.embedding_size))) # Load best model for code embedding generation model = Code_Learner(args.embedding_size, args.M, args.K) model = torch.load(args.model_file) # Put model into CUDA memory if using GPU if use_gpu: code_embedding = code_embedding.cuda() model = model.cuda() # For all words in vocab for i in range(len(TEXT.vocab)): # Try to see if it has a corresponding glove_vector try: glove_vec = glove_dict[TEXT.vocab.itos[i]] if use_gpu: glove_vec = glove_vec.cuda() # If so, then generate our own embedding for the word using our model code_embedding[i] = model(glove_vec, training=False) # The word doesn't have a GloVE vector, keep it randomly initialized except KeyError: pass base_c = Classifier(torch.FloatTensor(code_embedding.cpu()), args.batch_size) # Put model into CUDA memory if using GPU if use_gpu: base_c = base_c.cuda() # Initialize Optimizer optimizer = optim.Adam(filter(lambda p: p.requires_grad, base_c.parameters()), lr=args.lr) # Define Loss function loss_func = nn.NLLLoss() classifier_train(args.epochs, base_c, optimizer, loss_func, train_iter, test_iter, args.embedding_type)
PATH = values["classifier"] transform = transforms.Compose([transforms.RandomAffine(degrees=15, scale=(0.9, 1.0), fillcolor=256), transforms.Grayscale(), transforms.Resize(227), transforms.RandomHorizontalFlip(0.5), transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]) path = values["datasetFolder"]+"/SKETCHES_TRAINING" trainset = torchvision.datasets.ImageFolder(root=path, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=4) classes = set(values["classes"]) net = Classifier() net.to(device) if use_cuda: net.cuda() # Set up loss function and optimiser criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=0.0001, momentum=0.9) epoch = 0 running_loss = 1.0 # Simple training for 500 epochs for epoch in range(500): running_loss = 0.0
def main(): parser = argparse.ArgumentParser() parser.add_argument('--word-dim', type=int, default=300, help='size of word embeddings') parser.add_argument('--hidden-dim', type=int, default=300, help='number of hidden units per layer') parser.add_argument('--num-layers', type=int, default=1, help='number of layers in BiLSTM') parser.add_argument('--att-dim', type=int, default=350, help='number of attention unit') parser.add_argument('--att-hops', type=int, default=4, help='number of attention hops, for multi-hop attention model') parser.add_argument('--clf-hidden-dim', type=int, default=512, help='hidden (fully connected) layer size for classifier MLP') parser.add_argument('--clip', type=float, default=0.5, help='clip to prevent the too large grad in LSTM') parser.add_argument('--lr', type=float, default=.001, help='initial learning rate') parser.add_argument('--weight-decay', type=float, default=1e-5, help='weight decay rate per batch') parser.add_argument('--dropout', type=float, default=0.3) parser.add_argument('--max-epoch', type=int, default=8) parser.add_argument('--seed', type=int, default=666) parser.add_argument('--cuda', action='store_true', default=True) parser.add_argument('--optimizer', default='adam', choices=['adam', 'sgd']) parser.add_argument('--batch-size', type=int, default=32, help='batch size for training') parser.add_argument('--penalization-coeff', type=float, default=0.1, help='the penalization coefficient') parser.add_argument('--fix-word-embedding', action='store_true') parser.add_argument('--model-type', required=True, choices=['sa', 'avgblock', 'hard']) parser.add_argument('--data-type', required=True, choices=['age2', 'dbpedia', 'yahoo']) parser.add_argument('--data', required=True, help='pickle file obtained by dataset dump') parser.add_argument('--save-dir', type=str, required=True, help='path to save the final model') parser.add_argument('--block-size', type=int, default=-1, help='block size only when model-type is avgblock') args = parser.parse_args() torch.manual_seed(args.seed) random.seed(args.seed) if torch.cuda.is_available(): if not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") else: torch.cuda.manual_seed(args.seed) ####################################### # a simple log file, the same content as stdout if not os.path.exists(args.save_dir): os.mkdir(args.save_dir) logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)-8s %(message)s') logFormatter = logging.Formatter('%(asctime)s %(levelname)-8s %(message)s') rootLogger = logging.getLogger() fileHandler = logging.FileHandler(os.path.join(args.save_dir, 'stdout.log')) fileHandler.setFormatter(logFormatter) rootLogger.addHandler(fileHandler) ######################################## for k, v in vars(args).items(): logging.info(k+':'+str(v)) ##################################################################### if args.data_type == 'age2': data = AGE2(datapath=args.data, batch_size=args.batch_size) num_classes = 5 elif args.data_type == 'dbpedia': data = DBpedia(datapath=args.data, batch_size=args.batch_size) num_classes = 14 elif args.data_type == 'yahoo': data = Yahoo(datapath=args.data, batch_size=args.batch_size) num_classes = 10 else: raise Exception('Invalid argument data-type') ##################################################################### if args.model_type == 'avgblock': assert args.block_size > 0 ##################################################################### tic = time.time() model = Classifier( dictionary=data, dropout=args.dropout, num_words=data.num_words, num_layers=args.num_layers, hidden_dim=args.hidden_dim, word_dim=args.word_dim, att_dim=args.att_dim, att_hops=args.att_hops, clf_hidden_dim=args.clf_hidden_dim, num_classes=num_classes, model_type=args.model_type, block_size=args.block_size, ) print('It takes %.2f sec to build the model.' % (time.time() - tic)) logging.info(model) model.word_embedding.weight.data.set_(data.weight) if args.fix_word_embedding: model.word_embedding.weight.requires_grad = False if args.cuda: model = model.cuda() ''' count parameters num_params = sum(np.prod(p.size()) for p in model.parameters()) num_embedding_params = np.prod(model.word_embedding.weight.size()) print('# of parameters: %d' % num_params) print('# of word embedding parameters: %d' % num_embedding_params) print('# of parameters (excluding word embeddings): %d' % (num_params - num_embedding_params)) ''' if args.optimizer == 'adam': optimizer_class = optim.Adam elif args.optimizer == 'sgd': optimizer_class = optim.SGD else: raise Exception('For other optimizers, please add it yourself. supported ones are: SGD and Adam.') params = [p for p in model.parameters() if p.requires_grad] optimizer = optimizer_class(params=params, lr=args.lr, weight_decay=args.weight_decay) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='max', factor=0.5, patience=10, verbose=True) criterion = nn.CrossEntropyLoss() # Identity matrix for each batch I = Variable(torch.eye(args.att_hops).unsqueeze(0).expand(args.batch_size, -1, -1)) if args.cuda: I = I.cuda() trpack = { 'model': model, 'params': params, 'criterion': criterion, 'optimizer': optimizer, 'I': I, } train_summary_writer = tensorboard.FileWriter( logdir=os.path.join(args.save_dir, 'log', 'train'), flush_secs=10) valid_summary_writer = tensorboard.FileWriter( logdir=os.path.join(args.save_dir, 'log', 'valid'), flush_secs=10) tsw, vsw = train_summary_writer, valid_summary_writer logging.info('number of train batches: %d' % data.train_num_batch) validate_every = data.train_num_batch // 10 best_vaild_accuacy = 0 iter_count = 0 tic = time.time() for epoch_num in range(args.max_epoch): for batch_iter, train_batch in enumerate(data.train_minibatch_generator()): progress = epoch_num + batch_iter / data.train_num_batch iter_count += 1 train_loss, train_accuracy = train_iter(args, train_batch, **trpack) add_scalar_summary(tsw, 'loss', train_loss, iter_count) add_scalar_summary(tsw, 'acc', train_accuracy, iter_count) if (batch_iter + 1) % (data.train_num_batch // 100) == 0: tac = (time.time() - tic) / 60 print(' %.2f minutes\tprogress: %.2f' % (tac, progress)) if (batch_iter + 1) % validate_every == 0: correct_sum = 0 for valid_batch in data.dev_minibatch_generator(): correct, supplements = eval_iter(args, model, valid_batch) correct_sum += unwrap_scalar_variable(correct) valid_accuracy = correct_sum / data.dev_size scheduler.step(valid_accuracy) add_scalar_summary(vsw, 'acc', valid_accuracy, iter_count) logging.info('Epoch %.2f: valid accuracy = %.4f' % (progress, valid_accuracy)) if valid_accuracy > best_vaild_accuacy: correct_sum = 0 for test_batch in data.test_minibatch_generator(): correct, supplements = eval_iter(args, model, test_batch) correct_sum += unwrap_scalar_variable(correct) test_accuracy = correct_sum / data.test_size best_vaild_accuacy = valid_accuracy model_filename = ('model-%.2f-%.4f-%.4f.pkl' % (progress, valid_accuracy, test_accuracy)) model_path = os.path.join(args.save_dir, model_filename) torch.save(model.state_dict(), model_path) print('Saved the new best model to %s' % model_path)