def main(args): np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) train, val, test = get_clf(args.train_data, args.val_data, args.test_data, args.max_train_sents, args.max_val_sents, args.max_test_sents) net = Classifier_Net() if (args.load_saved): print('Loaded from saved model ..... ') net = torch.load(os.path.join(args.outputdir, args.outputmodelname)) # loss # weight = torch.FloatTensor(args.n_classes).fill_(1) loss_fn = nn.CrossEntropyLoss() #weight=weight) loss_fn.size_average = False # optimizer optim_fn, optim_params = get_optimizer(args.optimizer) optimizer = optim_fn(net.parameters(), **optim_params) if args.cuda: net.cuda() loss_fn.cuda() global val_acc_best, lr, stop_training, adam_stop val_acc_best = -1e10 adam_stop = False stop_training = False lr = optim_params['lr'] if 'sgd' in args.optimizer else 0.005 epoch = 1 while not stop_training and epoch <= args.n_epochs: train_acc, net = trainepoch(epoch, train, optimizer, args, net, loss_fn) eval_acc = evaluate(epoch, val, optimizer, args, net, 'valid') epoch += 1 # net = torch.load(os.path.join(args.outputdir, args.outputmodelname)) print("The Tests Accuracy is ", evaluate("NO", test, optimizer, args, net, "test"))
def main(args): np.random.seed(args.seed) torch.manual_seed(args.seed) if args.gpu_id > -1: torch.cuda.manual_seed(args.seed) train, val, test = get_nli_hypoth(args.train_data, args.val_data, args.test_data, args.max_train_sents, args.max_val_sents, args.max_test_sents) nli_net = NLI_HYPOTHS_Net() # loss loss_fn = nn.CrossEntropyLoss() loss_mse = nn.MSELoss() # optimizer optim_fn, optim_params = get_optimizer(args.optimizer) optimizer = optim_fn(nli_net.parameters(), **optim_params) if args.gpu_id > -1: nli_net.cuda() loss_fn.cuda() loss_mse.cuda() global val_acc_best, lr, stop_training, adam_stop val_acc_best = -1e10 adam_stop = False stop_training = False lr = optim_params['lr'] if 'sgd' in args.optimizer else None epoch = 1 while not stop_training and epoch <= args.n_epochs: train_acc, nli_net = trainepoch(epoch, train, optimizer, args, nli_net, loss_fn, loss_mse) eval_acc = evaluate(epoch, val, optimizer, args, nli_net, 'valid') epoch += 1 nli_net = torch.load(os.path.join(args.outputdir, args.outputmodelname)) print("The Tests Accuracy is ", evaluate("NO", test, optimizer, args, nli_net, "test"))
# model encoder_types = ['BLSTMEncoder', 'BLSTMprojEncoder', 'BGRUlastEncoder', 'InnerAttentionMILAEncoder',\ 'InnerAttentionYANGEncoder', 'InnerAttentionNAACLEncoder', 'ConvNetEncoder', 'LSTMEncoder'] assert params.encoder_type in encoder_types, "encoder_type must be in " + str( encoder_types) nli_net = NLINet(config_nli_model) print(nli_net) # loss weight = torch.FloatTensor(params.n_classes).fill_(1) loss_fn = nn.CrossEntropyLoss(weight=weight) loss_fn.size_average = False # optimizer optim_fn, optim_params = get_optimizer(params.optimizer) optimizer = optim_fn(nli_net.parameters(), **optim_params) # cuda by default nli_net.cuda() loss_fn.cuda() #src_embeddings.cuda() """ TRAIN """ #src_embeddings.volatile = True val_acc_best = -1e10 adam_stop = False stop_training = False lr = optim_params['lr'] if 'sgd' in params.optimizer else None #index_pad = word2id['<p>']
for name, x in nli_net.named_parameters(): print(name) for name, x in actorModel.named_parameters(): print(name) #print(nli_net.target_pred.enc_lstm.weight_ih_l0) #print(nli_net.target_classifier[4].bias) # loss weight = torch.FloatTensor(params.n_classes).fill_(1) loss_fn = nn.CrossEntropyLoss(weight=weight) loss_fn.size_average = False # optimizer optim_fn, optim_params = get_optimizer(params.optimizer) critic_target_optimizer = optim_fn( list(nli_net.target_pred.parameters()) + list(nli_net.target_classifier.parameters()), **optim_params) optim_fn2, optim_params2 = get_optimizer(params.optimizer) critic_active_optimizer = optim_fn( list(nli_net.active_pred.parameters()) + list(nli_net.active_classifier.parameters()), **optim_params2) optim_fn3, optim_params3 = get_optimizer("adam,lr=0.1") actor_target_optimizer = optim_fn3(actorModel.target_policy.parameters(), **optim_params3) optim_fn4, optim_params4 = get_optimizer("adam,lr=0.1") actor_active_optimizer = optim_fn4(actorModel.active_policy.parameters(),
def run_experiment(params): # print parameters passed, and all parameters print('\ntogrep : {0}\n'.format(sys.argv[1:])) print(params) os.makedirs(params.outputdir, exist_ok=True) """ SEED """ np.random.seed(params.seed) torch.manual_seed(params.seed) torch.cuda.manual_seed(params.seed) """ DATA """ dataset_path = params.dataset_path # build training and test corpus filename_list = recursive_file_list(dataset_path) print('Use the following files for training: ', filename_list) corpus = CBOWDataset(dataset_path, params.num_docs, params.context_size, params.num_samples_per_item, params.mode, params.precomputed_word_vocab, params.max_words, None, 1000, params.precomputed_chunks_dir, params.temp_path) corpus_len = len(corpus) ## split train and test inds = list(range(corpus_len)) shuffle(inds) num_val_samples = int(corpus_len * params.validation_fraction) train_indices = inds[:-num_val_samples] if num_val_samples > 0 else inds test_indices = inds[-num_val_samples:] if num_val_samples > 0 else [] cbow_train_loader = DataLoader(corpus, sampler=SubsetRandomSampler(train_indices), batch_size=params.batch_size, shuffle=False, num_workers=params.num_workers, pin_memory=True, collate_fn=corpus.collate_fn) cbow_test_loader = DataLoader(corpus, sampler=SubsetRandomSampler(test_indices), batch_size=params.batch_size, shuffle=False, num_workers=params.num_workers, pin_memory=True, collate_fn=corpus.collate_fn) ## extract some variables needed for training num_training_samples = corpus.num_training_samples word_vec = corpus.word_vec unigram_dist = corpus.unigram_dist word_vec_copy = corpus._word_vec_count_tuple print("Number of sentences used for training:", str(num_training_samples)) """ MODEL """ # build path where to store the encoder outputmodelname = construct_model_name(params.outputmodelname, params) # build encoder n_words = len(word_vec) if params.w2m_type == "cmow": encoder = get_cmow_encoder( n_words, padding_idx=0, word_emb_dim=params.word_emb_dim, initialization_strategy=params.initialization) output_embedding_size = params.word_emb_dim elif params.w2m_type == "cbow": encoder = get_cbow_encoder(n_words, padding_idx=0, word_emb_dim=params.word_emb_dim) output_embedding_size = params.word_emb_dim elif params.w2m_type == "hybrid": encoder = get_cbow_cmow_hybrid_encoder( n_words, padding_idx=0, word_emb_dim=params.word_emb_dim, initialization_strategy=params.initialization, w2m_type=params.hybrid_cmow, _lambda=params._lambda, cnmow_version=params.cnmow_version) output_embedding_size = 2 * params.word_emb_dim elif params.w2m_type == "cnmow": encoder = get_cnmow_encoder( n_words, padding_idx=0, word_emb_dim=params.word_emb_dim, initialization_strategy=params.initialization, _lambda=params._lambda, cnmow_version=params.cnmow_version) output_embedding_size = params.word_emb_dim # build cbow model cbow_net = CBOWNet(encoder, output_embedding_size, n_words, weights=unigram_dist, n_negs=params.n_negs, padding_idx=0) if torch.cuda.device_count() > 1: print("Using", torch.cuda.device_count(), "GPUs for training!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs cbow_net = nn.DataParallel(cbow_net) use_multiple_gpus = True else: use_multiple_gpus = False # optimizer print([x.size() for x in cbow_net.parameters()]) optim_fn, optim_params = get_optimizer(params.optimizer) optimizer = optim_fn(cbow_net.parameters(), **optim_params) # cuda by default cbow_net.to(device) #.cuda() """ TRAIN """ val_acc_best = -1e10 adam_stop = False stop_training = False lr = optim_params['lr'] if 'sgd' in params.optimizer else None # compute learning rate schedule if params.linear_decay: lr_shrinkage = (lr - params.minlr) / ( (float(num_training_samples) / params.batch_size) * params.n_epochs) def forward_pass(X_batch, tgt_batch, params, check_size=False): X_batch = Variable(X_batch).to(device) #.cuda() tgt_batch = Variable(torch.LongTensor(tgt_batch)).to(device) #.cuda() k = X_batch.size(0) # actual batch size loss = cbow_net(X_batch, tgt_batch).mean() return loss, k def validate(data_loader): cbow_net.eval() with torch.no_grad(): all_costs = [] for X_batch, tgt_batch in data_loader: loss, k = forward_pass(X_batch, tgt_batch, params) all_costs.append(loss.item()) cbow_net.train() return np.mean(all_costs) def trainepoch(epoch): print('\nTRAINING : Epoch ' + str(epoch)) cbow_net.train() all_costs = [] logs = [] words_count = 0 last_time = time.time() correct = 0. if not params.linear_decay: optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] * params.decay if epoch>1\ and 'sgd' in params.optimizer else optimizer.param_groups[0]['lr'] print('Learning rate : {0}'.format( optimizer.param_groups[0]['lr'])) processed_training_samples = 0 start_time = time.time() total_time = 0 total_batch_generation_time = 0 total_forward_time = 0 total_backward_time = 0 total_step_time = 0 last_processed_training_samples = 0 nonlocal processed_batches, stop_training, no_improvement, min_val_loss, losses, min_loss_criterion for i, (X_batch, tgt_batch) in enumerate(cbow_train_loader): # every 10 epochs train the cmow parameters if params.w2m_type == "hybrid": enabled = (i % params.explore_par) == 0 cbow_net.encoder.cmow_encoder.lookup_table.weight.requires_grad = enabled batch_generation_time = (time.time() - start_time) * 1000000 # forward pass forward_start = time.time() loss, k = forward_pass(X_batch, tgt_batch, params) all_costs.append(loss.item()) forward_total = (time.time() - forward_start) * 1000000 # backward backward_start = time.time() optimizer.zero_grad() loss.backward() backward_total = (time.time() - backward_start) * 1000000 # linear learning rate decay if params.linear_decay: optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] - lr_shrinkage if \ 'sgd' in params.optimizer else optimizer.param_groups[0]['lr'] # optimizer step step_time = time.time() optimizer.step() total_step_time += (time.time() - step_time) * 1000000 # log progress processed_training_samples += params.batch_size percentage_done = float( processed_training_samples) / num_training_samples processed_batches += 1 if processed_batches == params.validation_frequency: # compute validation loss and train loss val_loss = round(validate(cbow_test_loader), 5) if num_val_samples > 0 else float('inf') train_loss = round(np.mean(all_costs), 5) # print current loss and processing speed logs.append( 'Epoch {3} - {4:.4} ; lr {2:.4} ; train-loss {0} ; val-loss {5} ; sentence/s {1}' .format( train_loss, int((processed_training_samples - last_processed_training_samples) / (time.time() - last_time)), optimizer.param_groups[0]['lr'], epoch, percentage_done, val_loss)) if params.VERBOSE: print('\n\n\n') print(logs[-1]) last_time = time.time() words_count = 0 all_costs = [] last_processed_training_samples = processed_training_samples if params.VERBOSE: print( "100 Batches took {} microseconds".format(total_time)) print( "get_batch: {} \nforward: {} \nbackward: {} \nstep: {}" .format(total_batch_generation_time / total_time, total_forward_time / total_time, total_backward_time / total_time, total_step_time / total_time)) total_time = 0 total_batch_generation_time = 0 total_forward_time = 0 total_backward_time = 0 total_step_time = 0 processed_batches = 0 # save losses for logging later losses.append((train_loss, val_loss)) # early stopping? if val_loss < min_val_loss: min_val_loss = val_loss # save best model torch.save( cbow_net, os.path.join(params.outputdir, outputmodelname + '.cbow_net')) if params.stop_criterion is not None: stop_crit_loss = eval(params.stop_criterion) if stop_crit_loss < min_loss_criterion: no_improvement = 0 min_loss_criterion = stop_crit_loss else: no_improvement += 1 if no_improvement > params.patience: stop_training = True print("No improvement in loss criterion", str(params.stop_criterion), "for", str(no_improvement), "steps. Terminate training.") break now = time.time() batch_time_micro = (now - start_time) * 1000000 total_time = total_time + batch_time_micro total_batch_generation_time += batch_generation_time total_forward_time += forward_total total_backward_time += backward_total start_time = now """ Train model on CBOW objective """ epoch = 1 processed_batches = 0 min_val_loss = float('inf') min_loss_criterion = float('inf') no_improvement = 0 losses = [] while not stop_training and epoch <= params.n_epochs: trainepoch(epoch) epoch += 1 # load the best model if min_val_loss < float('inf'): cbow_net = torch.load( os.path.join(params.outputdir, outputmodelname + '.cbow_net')) print("Loading model with best validation loss.") else: # we use the current model; print("No model with better validation loss has been saved.") # save word vocabulary and counts pickle.dump( word_vec_copy, open(os.path.join(params.outputdir, outputmodelname + '.vocab'), "wb")) if use_multiple_gpus: cbow_net = cbow_net.module return cbow_net.encoder, losses
def main(args): GLOVE_PATH = "dataset/GloVe/glove.840B.300d.txt" parser = argparse.ArgumentParser(description='NLI training') # paths parser.add_argument("--nlipath", type=str, default='dataset/SNLI/', help="NLI data path (SNLI or MultiNLI)") parser.add_argument("--outputdir", type=str, default='savedir/', help="Output directory") parser.add_argument("--outputmodelname", type=str, default='model.pickle') # dataset, dimensions, transfer learning parser.add_argument("--dataset", type=str, required=True, help="Semantic similarity dataset") parser.add_argument('--dimension', nargs='+', required=True, help='Dimension(s) on the dataset') parser.add_argument('--transfer', default='DNT', help='Transfer learning approach') parser.add_argument('--save', default='no', help='Save trained model') parser.add_argument( '--load_model', default='no', help='If load model, do not perform training, just evalute') # training parser.add_argument("--n_epochs", type=int, default=10) parser.add_argument("--batch_size", type=int, default=16) parser.add_argument("--dpout_model", type=float, default=0., help="encoder dropout") parser.add_argument("--dpout_fc", type=float, default=0., help="classifier dropout") parser.add_argument("--nonlinear_fc", type=float, default=0, help="use nonlinearity in fc") parser.add_argument("--optimizer", type=str, default="sgd,lr=5", help="adam or sgd,lr=0.1") parser.add_argument("--lrshrink", type=float, default=5, help="shrink factor for sgd") parser.add_argument("--decay", type=float, default=1., help="lr decay") parser.add_argument("--minlr", type=float, default=1e-5, help="minimum lr") parser.add_argument("--max_norm", type=float, default=5., help="max norm (grad clipping)") # model parser.add_argument("--encoder_type", type=str, default='BLSTMEncoder', help="see list of encoders") parser.add_argument("--enc_lstm_dim", type=int, default=2048, help="encoder nhid dimension") parser.add_argument("--n_enc_layers", type=int, default=1, help="encoder num layers") parser.add_argument("--fc_dim", type=int, default=512, help="nhid of fc layers") parser.add_argument("--n_classes", type=int, default=3, help="entailment/neutral/contradiction") parser.add_argument("--pool_type", type=str, default='max', help="max or mean") # gpu parser.add_argument("--gpu_id", type=int, default=0, help="GPU ID") parser.add_argument("--seed", type=int, default=1236, help="seed") params, _ = parser.parse_known_args(args) # set gpu device torch.cuda.set_device(params.gpu_id) # print parameters passed, and all parameters #print('\ntogrep : {0}\n'.format(sys.argv[1:])) #print(params) def trainepoch(epoch): print('TRAINING : Epoch ' + str(epoch)) nli_net.train() logs = [] last_time = time.time() #correct = 0. # shuffle the data permutation = np.random.permutation(len(train['s1'])) s1 = train['s1'][permutation] s2 = train['s2'][permutation] targets = [x[permutation] for x in train['labels']] optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] * params.decay if epoch>1\ and 'sgd' in params.optimizer else optimizer.param_groups[0]['lr'] #print('Learning rate : {0}'.format(optimizer.param_groups[0]['lr'])) for stidx in range(0, len(s1), params.batch_size): tgt_batches = [] # prepare batch s1_batch, s1_len = get_batch(s1[stidx:stidx + params.batch_size], word_vec) s2_batch, s2_len = get_batch(s2[stidx:stidx + params.batch_size], word_vec) s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable( s2_batch.cuda()) for i, _ in enumerate(MTL_index): tgt_batches.append( Variable( torch.FloatTensor( targets[i][stidx:stidx + params.batch_size])).cuda()) #for dim in [1,2,3,4]: # model forward outputs = nli_net((s1_batch, s1_len), (s2_batch, s2_len)) # loss if params.transfer == 'DNT': #print(outputs[0]) #print((tgt_batches[0] - 1)/(params.n_classes-1)) losses = [ nli_net.loss_fn(outputs[i], (tgt_batches[i] - 1) / (params.n_classes - 1)) for i, _ in enumerate(MTL_index) ] elif params.transfer == 'NT': losses = [ nli_net.loss_fn(outputs[i], tgt_batches[i]) for i, _ in enumerate(MTL_index) ] #if 'kl' in MTL_index: # output1 = torch.log(output1) loss = np.sum(losses) #loss = loss1 + loss2 + loss3 + loss4# + loss5 + loss6 + loss7 + loss8 #ADDED #optimizer.zero_grad() #loss1.backward(retain_graph=True) #loss2.backward(retain_graph=True) #loss3.backward(retain_graph=True) #loss4.backward(retain_graph=True) #optimizer.step() #END ADDED """ if dim == 1: loss = nli_net.loss_fn(output1, tgt_batch1) elif dim == 2: loss = nli_net.loss_fn(output2, tgt_batch2) elif dim == 3: loss = nli_net.loss_fn(output3, tgt_batch3) elif dim == 4: loss = nli_net.loss_fn(output4, tgt_batch4) """ # backward optimizer.zero_grad() loss.backward() # optimizer step optimizer.step() def evaluate(epoch, eval_type='valid', flag='', correlation=spearmanr, transfer='NT'): nli_net.eval() #correct = 0. preds = [] r = np.arange(1, 1 + nli_net.n_classes) global val_acc_best, lr, stop_training, adam_stop if eval_type == 'valid': print('VALIDATION : Epoch {0}'.format(epoch)) s1 = valid['s1'] s2 = valid['s2'] targets = valid['scores'] elif eval_type == 'test': print('TEST : Epoch {0}'.format(epoch)) s1 = test['s1'] s2 = test['s2'] targets = test['scores'] elif eval_type == 'train': print('EVAL ON TRAIN : Epoch {0}'.format(epoch)) s1 = train['s1'] s2 = train['s2'] targets = train['scores'] else: raise ValueError('Wrong eval_type.') probas = [[] for _ in MTL_index] correct = 0. for i in range(0, len(s1), params.batch_size): # prepare batch s1_batch, s1_len = get_batch(s1[i:i + params.batch_size], word_vec) s2_batch, s2_len = get_batch(s2[i:i + params.batch_size], word_vec) s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable( s2_batch.cuda()) # model forward outputs = nli_net((s1_batch, s1_len), (s2_batch, s2_len)) for i, _ in enumerate(MTL_index): if len(probas[i]) == 0: probas[i] = outputs[i].data.cpu().numpy() else: probas[i] = np.concatenate( (probas[i], outputs[i].data.cpu().numpy()), axis=0) """ if 2 in MTL_index: if 'e' in MTL_index: tgt_batch2 = Variable(torch.LongTensor(target2[i:i + params.batch_size])).cuda() pred2 = output2.data.max(1)[1] correct += pred2.long().eq(tgt_batch2.data.long()).cpu().sum() else: if len(probas2) == 0: probas2 = output2.data.cpu().numpy() else: probas2 = np.concatenate((probas2, output2.data.cpu().numpy()), axis=0) """ if transfer == 'NT': ret = [ correlation(np.dot(x, r), y)[0] for x, y in zip(probas, targets) ] elif transfer == 'DNT': ret = [correlation(x, y)[0] for x, y in zip(probas, targets)] else: raise ValueError('Wrong transfer.') """ if 2 in MTL_index: if 'e' in MTL_index: ret.append(round(100 * correct/len(s1), 2)) else: yhat2 = np.dot(probas2, r) p2 = spearmanr(yhat2, target2)[0] ret.append(p2) else: ret.append(0) """ return ret """ SEED """ np.random.seed(params.seed) torch.manual_seed(params.seed) torch.cuda.manual_seed(params.seed) """ DATA """ #for i in range(1,9): # print(i) # print('----------') dataset_path = { 'stsbenchmark': '../stsbenchmark/', 'sts12': '../SemEval12/', 'sick': '../SICK/', 'activities': '../human_activity_phrase_data/', 'sag': '../ShortAnswerGrading_v2.0/data/processed/', 'typed': '../SemEval13/typed/' } #MTL_index = [1,2,3,4, 'mse'] #'e' MTL_index = [int(x) for x in params.dimension] train, valid, test = get_sts(dataset_path[params.dataset], MTL_index, params.transfer, params.n_classes) word_vec = build_vocab( train['s1'] + train['s2'] + valid['s1'] + valid['s2'] + test['s1'] + test['s2'], GLOVE_PATH) for split in ['s1', 's2']: for data_type in ['train', 'valid', 'test']: eval(data_type)[split] = np.array( [[word for word in sent.split() if word in word_vec] for sent in eval(data_type)[split]]) #eval(data_type)[split] = np.array([['<s>'] + # [word for word in sent.split() if word in word_vec or word[:2] == 'dc'] + # ['</s>'] for sent in eval(data_type)[split]]) params.word_emb_dim = 300 """ MODEL """ # model config config_nli_model = { 'n_words': len(word_vec), 'word_emb_dim': params.word_emb_dim, 'enc_lstm_dim': params.enc_lstm_dim, 'n_enc_layers': params.n_enc_layers, 'dpout_model': params.dpout_model, 'dpout_fc': params.dpout_fc, 'fc_dim': params.fc_dim, 'bsize': params.batch_size, 'n_classes': params.n_classes, 'pool_type': params.pool_type, 'nonlinear_fc': params.nonlinear_fc, 'encoder_type': params.encoder_type, 'use_cuda': True, 'MTL_index': MTL_index, 'transfer': params.transfer } # model encoder_types = [ 'BLSTMEncoder', 'BLSTMprojEncoder', 'BGRUlastEncoder', 'InnerAttentionMILAEncoder', 'InnerAttentionYANGEncoder', 'InnerAttentionNAACLEncoder', 'ConvNetEncoder', 'LSTMEncoder' ] assert params.encoder_type in encoder_types, "encoder_type must be in " + \ str(encoder_types) perfs_all = [] for rd in range(1): print("Round", rd) if params.load_model == 'no': nli_net = NLINet(config_nli_model) nli_net.encoder = torch.load('encoder/infersent.allnli.pickle', map_location={ 'cuda:1': 'cuda:0', 'cuda:2': 'cuda:0' }) else: nli_net = torch.load(params.load_model) print(nli_net) # optimizer optim_fn, optim_params = get_optimizer(params.optimizer) optimizer = optim_fn(nli_net.parameters(), **optim_params) # cuda by default nli_net.cuda() """ TRAIN """ val_acc_best = -1e10 adam_stop = False stop_training = False lr = optim_params['lr'] if 'sgd' in params.optimizer else None last_result = 0 last_test_result = 0 drop_count = 0 """ Train model on Natural Language Inference task """ correlation = spearmanr if params.dataset == 'activities' else pearsonr epoch = 0 perfs_valid = evaluate(epoch, 'valid', 'begin', correlation, params.transfer) perfs_test = evaluate(epoch, 'test', 'begin', correlation, params.transfer) print(perfs_valid, perfs_test) epoch += 1 if params.load_model == 'no': while not stop_training and epoch <= params.n_epochs: trainepoch(epoch) perfs_valid = evaluate(epoch, 'valid', '', correlation, params.transfer) perfs_test = evaluate(epoch, 'test', '', correlation, params.transfer) print(perfs_valid, perfs_test) epoch += 1 #perfs_all.append(perfs) if params.save != 'no': torch.save(nli_net, params.save)
def main(args): """ SEED """ np.random.seed(args.seed) torch.manual_seed(args.seed) if args.gpu_id > -1: torch.cuda.manual_seed(args.seed) """ DATA """ train, valid, test = get_nli(args.nlipath, args.n_classes) word_vecs = build_vocab(train['s1'] + train['s2'] + valid['s1'] + valid['s2'] + test['s1'] + test['s2'], args.embdfile) for split in ['s1', 's2']: for data_type in ['train', 'valid', 'test']: eval(data_type)[split] = np.array([['<s>'] + [word for word in sent.split() if word in word_vecs] + ['</s>'] for sent in eval(data_type)[split]]) args.word_emb_dim = len(word_vecs[list(word_vecs.keys())[0]]) nli_model_configs = get_model_configs(args, len(word_vecs)) nli_model_configs["n_classes"] = args.n_classes # define premise and hypoth encoders premise_encoder = eval(nli_model_configs['encoder_type'])(nli_model_configs) hypoth_encoder = eval(nli_model_configs['encoder_type'])(nli_model_configs) shared_nli_net = SharedNLINet(nli_model_configs, premise_encoder, hypoth_encoder) shared_hypoth_net = SharedHypothNet(nli_model_configs, hypoth_encoder) print(shared_nli_net) print(shared_hypoth_net) if args.pre_trained_model: print( "Pre_trained_model: " + args.pre_trained_model) pre_trained_model = torch.load(args.pre_trained_model) shared_nli_net_params = shared_nli_net.state_dict() pre_trained_params = pre_trained_model.state_dict() assert shared_nli_net_params.keys() == pre_trained_params.keys(), "load model has different parameter state names that NLI_HYPOTHS_NET" for key, parameters in shared_nli_net_params.items(): if parameters.size() == pre_trained_params[key].size(): shared_nli_net_params[key] = pre_trained_params[key] shared_nli_net.load_state_dict(shared_nli_net_params) print(shared_nli_net) if args.pre_trained_adv_model: print( "Pre_trained_adv_model: " + args.pre_trained_adv_model) pre_trained_model = torch.load(args.pre_trained_adv_model) shared_hypoth_net_params = shared_hypoth_net.state_dict() pre_trained_params = pre_trained_model.state_dict() assert shared_hypoth_net_params.keys() == pre_trained_params.keys(), "load model has different parameter state names that NLI_HYPOTHS_NET" for key, parameters in nli_hypoth_params.items(): if parameters.size() == pre_trained_params[key].size(): shared_hypoth_net_params[key] = pre_trained_params[key] shared_hypoth_net.load_state_dict(shared_hypoth_net_params) print(shared_hypoth_net) # nli loss weight = torch.FloatTensor(args.n_classes).fill_(1) loss_fn_nli = nn.CrossEntropyLoss(weight=weight) loss_fn_nli.size_average = False # hypoth (adversarial) loss weight = torch.FloatTensor(args.n_classes).fill_(1) loss_fn_hypoth = nn.CrossEntropyLoss(weight=weight) loss_fn_hypoth.size_average = False # optimizer optim_fn, optim_params = get_optimizer(args.optimizer) optimizer_nli = optim_fn(shared_nli_net.parameters(), **optim_params) #optimizer_hypoth = optim_fn(shared_hypoth_net.parameters(), **optim_params) # only pass hypoth classifier params to avoid updating shared encoder params twice optimizer_hypoth = optim_fn(shared_hypoth_net.classifier.parameters(), **optim_params) if args.gpu_id > -1: shared_nli_net.cuda() shared_hypoth_net.cuda() loss_fn_nli.cuda() loss_fn_hypoth.cuda() """ TRAIN """ global val_acc_best, lr, stop_training, adam_stop val_acc_best = -1e10 adam_stop = False stop_training = False lr = optim_params['lr'] if 'sgd' in args.optimizer else None """ Train model on Natural Language Inference task """ epoch = 1 while not stop_training and epoch <= args.n_epochs: train_acc_nli, train_acc_hypoth, shared_nli_net, shared_hypoth_net = trainepoch(epoch, train, optimizer_nli, optimizer_hypoth, args, word_vecs, shared_nli_net, shared_hypoth_net, loss_fn_nli, loss_fn_hypoth, args.adv_lambda, args.adv_hyp_encoder_lambda) eval_acc_nli, eval_acc_hypoth = evaluate(epoch, valid, optimizer_nli, optimizer_hypoth, args, word_vecs, shared_nli_net, shared_hypoth_net, 'valid', adv_lambda=args.adv_lambda) epoch += 1
def main(args): print "main" """ SEED """ np.random.seed(args.seed) torch.manual_seed(args.seed) if args.gpu_id > -1: torch.cuda.manual_seed(args.seed) """ DATA """ train, val, test = get_nli_hypoth(args.train_lbls_file, args.train_src_file, args.val_lbls_file, \ args.val_src_file, args.test_lbls_file, args.test_src_file, \ args.max_train_sents, args.max_val_sents, args.max_test_sents, args.remove_dup) word_vecs = build_vocab( train['hypoths'] + val['hypoths'] + test['hypoths'], args.embdfile, args.lorelei_embds) args.word_emb_dim = len(word_vecs[word_vecs.keys()[0]]) nli_model_configs = get_model_configs(args, len(word_vecs)) lbls_file = args.train_lbls_file if "mpe" in lbls_file or "snli" in lbls_file or "multinli" in lbls_file or "sick" in lbls_file or "joci" in lbls_file: nli_model_configs["n_classes"] = 3 elif "spr" in lbls_file or "dpr" in lbls_file or "fnplus" in lbls_file or "add_one" in lbls_file or "scitail" in lbls_file: nli_model_configs["n_classes"] = 2 nli_net = NLI_HYPOTHS_Net(nli_model_configs) print(nli_net) # loss weight = torch.FloatTensor(args.n_classes).fill_(1) loss_fn = nn.CrossEntropyLoss(weight=weight) loss_fn.size_average = False # optimizer optim_fn, optim_params = get_optimizer(args.optimizer) optimizer = optim_fn(nli_net.parameters(), **optim_params) if args.gpu_id > -1: nli_net.cuda() loss_fn.cuda() """ TRAIN """ global val_acc_best, lr, stop_training, adam_stop val_acc_best = -1e10 adam_stop = False stop_training = False lr = optim_params['lr'] if 'sgd' in args.optimizer else None """ Train model on Natural Language Inference task """ epoch = 1 while not stop_training and epoch <= args.n_epochs: train_acc, nli_net = trainepoch(epoch, train, optimizer, args, word_vecs, nli_net, loss_fn) eval_acc = evaluate(epoch, val, optimizer, args, word_vecs, nli_net, 'valid') epoch += 1
# model encoder_types = ['InferSent', 'BLSTMprojEncoder', 'BGRUlastEncoder', 'InnerAttentionMILAEncoder', 'InnerAttentionYANGEncoder', 'InnerAttentionNAACLEncoder', 'ConvNetEncoder', 'LSTMEncoder'] assert params.encoder_type in encoder_types, "encoder_type must be in " + \ str(encoder_types) nli_net = NLINet(config_nli_model) print(nli_net) # loss weight = torch.FloatTensor(params.n_classes).fill_(1) loss_fn = nn.CrossEntropyLoss(weight=weight) loss_fn.size_average = False # optimizer optim_fn, optim_params = get_optimizer(params.optimizer) optimizer = optim_fn(nli_net.parameters(), **optim_params) # cuda by default nli_net.cuda() loss_fn.cuda() """ TRAIN """ val_acc_best = -1e10 adam_stop = False stop_training = False lr = optim_params['lr'] if 'sgd' in params.optimizer else None
def main(args): print "main" """ SEED """ np.random.seed(args.seed) torch.manual_seed(args.seed) if args.gpu_id > -1: torch.cuda.manual_seed(args.seed) """ DATA """ train, val, test = get_nli_text(args.train_lbls_file, args.train_src_file, args.val_lbls_file, \ args.val_src_file, args.test_lbls_file, args.test_src_file, \ args.max_train_sents, args.max_val_sents, args.max_test_sents, args.remove_dup) word_vecs = build_vocab( train['hypoths'] + val['hypoths'] + test['hypoths'] + train['premises'] + val['premises'] + test['premises'], args.embdfile, args.lorelei_embds) args.word_emb_dim = len(word_vecs[word_vecs.keys()[0]]) nli_model_configs = get_model_configs(args, len(word_vecs)) lbls_file = args.train_lbls_file if "mpe" in lbls_file or "snli" in lbls_file or "multinli" in lbls_file or "sick" in lbls_file or "joci" in lbls_file or "glue" in lbls_file: nli_model_configs["n_classes"] = 3 elif "spr" in lbls_file or "dpr" in lbls_file or "fnplus" in lbls_file or "add_one" in lbls_file or "scitail" in lbls_file: nli_model_configs["n_classes"] = 2 # define premise and hypoth encoders premise_encoder = eval( nli_model_configs['encoder_type'])(nli_model_configs) hypoth_encoder = eval(nli_model_configs['encoder_type'])(nli_model_configs) shared_nli_net = SharedNLINet(nli_model_configs, premise_encoder, hypoth_encoder) shared_hypoth_net = SharedHypothNet(nli_model_configs, hypoth_encoder) print(shared_nli_net) print(shared_hypoth_net) if args.pre_trained_model: print "Pre_trained_model: " + args.pre_trained_model pre_trained_model = torch.load(args.pre_trained_model) shared_nli_net_params = shared_nli_net.state_dict() pre_trained_params = pre_trained_model.state_dict() assert shared_nli_net_params.keys() == pre_trained_params.keys( ), "load model has different parameter state names that NLI_HYPOTHS_NET" for key, parameters in shared_nli_net_params.items(): if parameters.size() == pre_trained_params[key].size(): shared_nli_net_params[key] = pre_trained_params[key] shared_nli_net.load_state_dict(shared_nli_net_params) print(shared_nli_net) if args.pre_trained_adv_model: print "Pre_trained_adv_model: " + args.pre_trained_adv_model pre_trained_model = torch.load(args.pre_trained_adv_model) shared_hypoth_net_params = shared_hypoth_net.state_dict() pre_trained_params = pre_trained_model.state_dict() assert shared_hypoth_net_params.keys() == pre_trained_params.keys( ), "load model has different parameter state names that NLI_HYPOTHS_NET" for key, parameters in nli_hypoth_params.items(): if parameters.size() == pre_trained_params[key].size(): shared_hypoth_net_params[key] = pre_trained_params[key] shared_hypoth_net.load_state_dict(shared_hypoth_net_params) print(shared_hypoth_net) # nli loss weight = torch.FloatTensor(args.n_classes).fill_(1) loss_fn_nli = nn.CrossEntropyLoss(weight=weight) loss_fn_nli.size_average = False # hypoth (adversarial) loss weight = torch.FloatTensor(args.n_classes).fill_(1) loss_fn_hypoth = nn.CrossEntropyLoss(weight=weight) loss_fn_hypoth.size_average = False # optimizer optim_fn, optim_params = get_optimizer(args.optimizer) optimizer_nli = optim_fn(shared_nli_net.parameters(), **optim_params) #optimizer_hypoth = optim_fn(shared_hypoth_net.parameters(), **optim_params) # only pass hypoth classifier params to avoid updating shared encoder params twice optimizer_hypoth = optim_fn(shared_hypoth_net.classifier.parameters(), **optim_params) if args.gpu_id > -1: shared_nli_net.cuda() shared_hypoth_net.cuda() loss_fn_nli.cuda() loss_fn_hypoth.cuda() """ TRAIN """ global val_acc_best, lr, stop_training, adam_stop val_acc_best = -1e10 adam_stop = False stop_training = False lr = optim_params['lr'] if 'sgd' in args.optimizer else None """ Train model on Natural Language Inference task """ epoch = 1 while not stop_training and epoch <= args.n_epochs: train_acc_nli, train_acc_hypoth, shared_nli_net, shared_hypoth_net = trainepoch( epoch, train, optimizer_nli, optimizer_hypoth, args, word_vecs, shared_nli_net, shared_hypoth_net, loss_fn_nli, loss_fn_hypoth, args.adv_lambda, args.adv_hyp_encoder_lambda) eval_acc_nli, eval_acc_hypoth = evaluate(epoch, val, optimizer_nli, optimizer_hypoth, args, word_vecs, shared_nli_net, shared_hypoth_net, 'valid', adv_lambda=args.adv_lambda) epoch += 1
for name, x in nli_net.named_parameters(): print(name) for name, x in actorModel.named_parameters(): print(name) #print(nli_net.target_pred.enc_lstm.weight_ih_l0) #print(nli_net.target_classifier[4].bias) # loss weight = torch.FloatTensor(params.n_classes).fill_(1) loss_fn = nn.CrossEntropyLoss(weight=weight) loss_fn.size_average = False # optimizer optim_fn, optim_params = get_optimizer(params.optimizer) critic_target_optimizer = optim_fn( list(nli_net.target_pred.parameters()) + list(nli_net.target_classifier.parameters()), **optim_params) optim_fn2, optim_params2 = get_optimizer(params.optimizer) critic_active_optimizer = optim_fn( list(nli_net.active_pred.parameters()) + list(nli_net.active_classifier.parameters()), **optim_params2) optim_fn3, optim_params3 = get_optimizer(params.actor_optimizer) actor_target_optimizer = optim_fn3(actorModel.target_policy.parameters(), **optim_params3) optim_fn4, optim_params4 = get_optimizer(params.actor_optimizer) actor_active_optimizer = optim_fn4(actorModel.active_policy.parameters(),
def main(args): print "main" """ SEED """ np.random.seed(args.seed) torch.manual_seed(args.seed) if args.gpu_id > -1: torch.cuda.manual_seed(args.seed) """ DATA """ train, val, test = get_nli_hypoth(args.train_lbls_file, args.train_src_file, args.val_lbls_file, \ args.val_src_file, args.test_lbls_file, args.test_src_file, \ args.max_train_sents, args.max_val_sents, args.max_test_sents, args.remove_dup) word_vecs = build_vocab( train['hypoths'] + val['hypoths'] + test['hypoths'], args.embdfile, args.lorelei_embds) args.word_emb_dim = len(word_vecs[word_vecs.keys()[0]]) nli_model_configs = get_model_configs(args, len(word_vecs)) lbls_file = args.train_lbls_file if "mpe" in lbls_file or "snli" in lbls_file or "multinli" in lbls_file or "sick" in lbls_file or "joci" in lbls_file: nli_model_configs["n_classes"] = 3 elif "spr" in lbls_file or "dpr" in lbls_file or "fnplus" in lbls_file or "add_one" in lbls_file or "scitail" in lbls_file: nli_model_configs["n_classes"] = 2 nli_net = NLI_HYPOTHS_Net(nli_model_configs) print(nli_net) if args.pre_trained_nli_model: print("Pre_trained_model: " + args.pre_trained_nli_model) from models import SharedNLINet pre_trained_model = torch.load(args.pre_trained_nli_model) nli_net_params = nli_net.state_dict() pre_trained_params = pre_trained_model.state_dict() # this assert will fail becasue pre-trained model has both premise and hypothesis encoders #assert nli_net_params.keys() == pre_trained_params.keys(), "load model has different parameter state names that NLI_HYPOTHS_NET" # instead, we will only copy the hypothesis encoder for key, parameters in nli_net_params.items(): if key.startswith('encoder'): pre_trained_key = key.replace('encoder', 'encoder_hypoth') if parameters.size( ) == pre_trained_params[pre_trained_key].size(): nli_net_params[key] = pre_trained_params[pre_trained_key] nli_net.load_state_dict(nli_net_params) print(nli_net) # loss weight = torch.FloatTensor(args.n_classes).fill_(1) loss_fn = nn.CrossEntropyLoss(weight=weight) loss_fn.size_average = False # optimizer optim_fn, optim_params = get_optimizer(args.optimizer) optimizer = optim_fn(nli_net.parameters(), **optim_params) if args.freeze_encoder: print("Freezing encoder parameters") for p in nli_net.encoder.parameters(): p.requires_grad = False if args.gpu_id > -1: nli_net.cuda() loss_fn.cuda() """ TRAIN """ global val_acc_best, lr, stop_training, adam_stop val_acc_best = -1e10 adam_stop = False stop_training = False lr = optim_params['lr'] if 'sgd' in args.optimizer else None """ Train model on Natural Language Inference task """ epoch = 1 while not stop_training and epoch <= args.n_epochs: train_acc, nli_net = trainepoch(epoch, train, optimizer, args, word_vecs, nli_net, loss_fn) eval_acc = evaluate(epoch, val, optimizer, args, word_vecs, nli_net, 'valid') epoch += 1