def main(unused_argv): if not FLAGS.data_dir: raise ValueError("--data_dir is required.") if not FLAGS.model_config: raise ValueError("--model_config is required.") encoder = encoder_manager.EncoderManager() with open(FLAGS.model_config) as json_config_file: model_config = json.load(json_config_file) if type(model_config) is dict: model_config = [model_config] for mdl_cfg in model_config: model_config = configuration.model_config(mdl_cfg, mode="encode") encoder.load_model(model_config) if FLAGS.eval_task in ["MR", "CR", "SUBJ", "MPQA"]: results = eval_classification.eval_nested_kfold( encoder, FLAGS.eval_task, FLAGS.data_dir, use_nb=False) scores = results[0] print('Mean score', np.mean(scores)) elif FLAGS.eval_task == "SICK": results = eval_sick.evaluate(encoder, evaltest=True, loc=FLAGS.data_dir) elif FLAGS.eval_task == "MSRP": results = eval_msrp.evaluate( encoder, evalcv=True, evaltest=True, use_feats=False, loc=FLAGS.data_dir) elif FLAGS.eval_task == "TREC": eval_trec.evaluate(encoder, evalcv=True, evaltest=True, loc=FLAGS.data_dir) else: raise ValueError("Unrecognized eval_task: %s" % FLAGS.eval_task) encoder.close()
import tools import eval_sick import warnings warnings.filterwarnings('ignore', category=DeprecationWarning) if __name__ == '__main__': embed_map = tools.load_googlenews_vectors() model = tools.load_model(embed_map) eval_sick.evaluate(model, evaltest=True)
def main(unused_argv): if not FLAGS.data_dir: raise ValueError("--data_dir is required.") if not FLAGS.model_config: raise ValueError("--model_config is required.") encoder = encoder_manager.EncoderManager() with open(FLAGS.model_config) as json_config_file: model_config = json.load(json_config_file) if type(model_config) is dict: model_config = [model_config] sp = None if FLAGS.sentencepiece_model_path: print('Loading sentencepiece model', FLAGS.sentencepiece_model_path) sp = spm.SentencePieceProcessor() sp.Load(FLAGS.sentencepiece_model_path) for mdl_cfg in model_config: model_config = configuration.model_config(mdl_cfg, mode="encode") encoder.load_model(model_config) if FLAGS.eval_task in ["MR", "CR", "SUBJ", "MPQA"]: results = eval_classification.eval_nested_kfold(encoder, FLAGS.eval_task, FLAGS.data_dir, use_nb=False) scores = results[0] print('Mean score', np.mean(scores)) elif FLAGS.eval_task == "SICK": results = eval_sick.evaluate(encoder, evaltest=True, loc=FLAGS.data_dir, sp=sp) elif FLAGS.eval_task == "MSRP": results = eval_msrp.evaluate(encoder, evalcv=True, evaltest=True, use_feats=False, loc=FLAGS.data_dir) elif FLAGS.eval_task == "TREC": eval_trec.evaluate(encoder, evalcv=True, evaltest=True, loc=FLAGS.data_dir) elif FLAGS.eval_task == 'SNLI-MT-TR': file_meta_data = { 'file_names': { 'train': 'snli_train_translation.jsonl', 'dev': 'snli_dev_translation.jsonl', 'test': 'snli_test_translation.jsonl' }, 'sentence_keys': { 'sentence1': 'translate-sentence1', 'sentence2': 'translate-sentence2' }, 'label_classes': ['contradiction', 'entailment', 'neutral'] } eval_nli.evaluate(encoder, evaltest=True, loc=FLAGS.data_dir, file_meta_data=file_meta_data, sp=sp) elif FLAGS.eval_task == 'SNLI': file_meta_data = { 'file_names': { 'train': 'snli_1.0_train.jsonl', 'dev': 'snli_1.0_dev.jsonl', 'test': 'snli_1.0_test.jsonl' }, 'sentence_keys': { 'sentence1': 'sentence1', 'sentence2': 'sentence2' }, 'label_classes': ['contradiction', 'entailment', 'neutral'] } eval_nli.evaluate(encoder, evaltest=True, loc=FLAGS.data_dir, file_meta_data=file_meta_data, sp=sp) elif FLAGS.eval_task == 'MULTINLI-MT-TR-MATCHED': file_meta_data = { 'file_names': { 'train': 'multinli_train_translation.jsonl', 'dev': 'multinli_dev_matched_translation.jsonl', 'test': 'multinli_0.9_test_matched_translation_unlabeled.jsonl', 'test_output': 'multinli_0.9_test_matched_translation_unlabeled_output.csv' }, 'sentence_keys': { 'sentence1': 'translate-sentence1', 'sentence2': 'translate-sentence2' }, 'label_classes': ['contradiction', 'entailment', 'neutral'] } eval_multinli.evaluate(encoder, evaltest=True, loc=FLAGS.data_dir, file_meta_data=file_meta_data, sp=sp) elif FLAGS.eval_task == 'MULTINLI-MATCHED': file_meta_data = { 'file_names': { 'train': 'multinli_1.0_train.jsonl', 'dev': 'multinli_1.0_dev_matched.jsonl', 'test': 'multinli_0.9_test_matched_unlabeled.jsonl', 'test_output': 'multinli_0.9_test_matched_unlabeled_output.csv' }, 'sentence_keys': { 'sentence1': 'sentence1', 'sentence2': 'sentence2' }, 'label_classes': ['contradiction', 'entailment', 'neutral'] } eval_multinli.evaluate(encoder, evaltest=True, loc=FLAGS.data_dir, file_meta_data=file_meta_data, sp=sp) elif FLAGS.eval_task == 'MULTINLI-MT-TR-MISMATCHED': file_meta_data = { 'file_names': { 'train': 'multinli_train_translation.jsonl', 'dev': 'multinli_dev_mismatched_translation.jsonl', 'test': 'multinli_0.9_test_mismatched_translation_unlabeled.jsonl', 'test_output': 'multinli_0.9_test_mismatched_translation_unlabeled_output.csv', }, 'sentence_keys': { 'sentence1': 'translate-sentence1', 'sentence2': 'translate-sentence2' }, 'label_classes': ['contradiction', 'entailment', 'neutral'] } eval_multinli.evaluate(encoder, evaltest=True, loc=FLAGS.data_dir, file_meta_data=file_meta_data, sp=sp) elif FLAGS.eval_task == 'MULTINLI-MISMATCHED': file_meta_data = { 'file_names': { 'train': 'multinli_1.0_train.jsonl', 'dev': 'multinli_1.0_dev_mismatched.jsonl', 'test': 'multinli_0.9_test_mismatched_unlabeled.jsonl', 'test_output': 'multinli_0.9_test_mismatched_unlabeled_output.csv' }, 'sentence_keys': { 'sentence1': 'sentence1', 'sentence2': 'sentence2' }, 'label_classes': ['contradiction', 'entailment', 'neutral'] } eval_multinli.evaluate(encoder, evaltest=True, loc=FLAGS.data_dir, file_meta_data=file_meta_data, sp=sp) elif FLAGS.eval_task == 'XNLI-MT-TR': file_meta_data = { 'file_names': { 'train': 'multinli_train_translation.jsonl', 'dev': 'xnli_dev_translation.jsonl', 'test': 'xnli_test_translation.jsonl' }, 'sentence_keys': { 'sentence1': 'translate-sentence1', 'sentence2': 'translate-sentence2' }, 'label_classes': ['contradiction', 'entailment', 'neutral'], 'language': 'any' } eval_xnli.evaluate(encoder, evaltest=True, loc=FLAGS.data_dir, file_meta_data=file_meta_data, sp=sp) elif FLAGS.eval_task == 'XNLI': file_meta_data = { 'file_names': { 'train': 'multinli_1.0_train.jsonl', 'dev': 'xnli.dev.jsonl', 'test': 'xnli.test.jsonl' }, 'sentence_keys': { 'sentence1': 'sentence1', 'sentence2': 'sentence2' }, 'label_classes': ['contradiction', 'entailment', 'neutral'], 'language': 'en' } eval_xnli.evaluate(encoder, evaltest=True, loc=FLAGS.data_dir, file_meta_data=file_meta_data, sp=sp) else: raise ValueError("Unrecognized eval_task: %s" % FLAGS.eval_task) encoder.close()
def trainer( X, dim_word=620, # word vector dimensionality dim=2400, # the number of GRU units encoder='gru', decoder='gru', max_epochs=5, dispFreq=1, decay_c=0., grad_clip=5., n_words=20000, maxlen_w=30, optimizer='adam', batch_size=512, saveto='/u/rkiros/research/semhash/models/toy.npz', dictionary='/ais/gobi3/u/rkiros/bookgen/book_dictionary_large.pkl', saveFreq=5000, reload_=False, reload_path='output_books_full/model_ae_full_bsz_64_iter_313000.npz', SICK_eval=False): # Model options model_options = {} model_options['dim_word'] = dim_word model_options['dim'] = dim model_options['encoder'] = encoder model_options['decoder'] = decoder model_options['max_epochs'] = max_epochs model_options['dispFreq'] = dispFreq model_options['decay_c'] = decay_c model_options['grad_clip'] = grad_clip model_options['n_words'] = n_words model_options['maxlen_w'] = maxlen_w model_options['optimizer'] = optimizer model_options['batch_size'] = batch_size model_options['saveto'] = saveto model_options['dictionary'] = dictionary model_options['saveFreq'] = saveFreq model_options['reload_'] = reload_ model_options['reload_path'] = reload_path print model_options # reload options if reload_ and os.path.exists(reload_path): print 'reloading...' + reload_path with open('%s.pkl' % reload_path, 'rb') as f: models_options = pkl.load(f) reload_idx = int(reload_path.split('_')[-1].split('.')[0]) # load dictionary print 'Loading dictionary...' worddict = load_dictionary(dictionary) # Inverse dictionary word_idict = dict() for kk, vv in worddict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' print 'Building model' params = init_params(model_options) # reload parameters if reload_ and os.path.exists(reload_path): params = load_params(reload_path, params) tparams = init_tparams(params) trng, x, x_mask, y, y_mask, z, z_mask, \ opt_ret, \ cost = \ build_model(tparams, model_options) inps = [x, x_mask, y, y_mask, z, z_mask] # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost, profile=False) print 'Done' # weight decay, if applicable if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv**2).sum() weight_decay *= decay_c cost += weight_decay # after any regularizer print 'Building f_cost...', f_cost = theano.function(inps, cost, profile=False) print 'Done' print 'Done' print 'Building f_grad...', grads = tensor.grad(cost, wrt=itemlist(tparams)) f_grad_norm = theano.function(inps, [(g**2).sum() for g in grads], profile=False) f_weight_norm = theano.function([], [(t**2).sum() for k, t in tparams.iteritems()], profile=False) if grad_clip > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append( tensor.switch(g2 > (grad_clip**2), g / tensor.sqrt(g2) * grad_clip, g)) grads = new_grads lr = tensor.scalar(name='lr') print 'Building optimizers...', # (compute gradients), (updates parameters) f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Optimization' # Each sentence in the minibatch have same length (for encoder) trainX = homogeneous_data.grouper(X) train_iter = homogeneous_data.HomogeneousData(trainX, batch_size=batch_size, maxlen=maxlen_w) if not reload_: uidx = 0 else: uidx = reload_idx lrate = 0.01 for eidx in xrange(max_epochs): n_samples = 0 print 'Epoch ', eidx for x, y, z in train_iter: n_samples += len(x) uidx += 1 x, x_mask, y, y_mask, z, z_mask = homogeneous_data.prepare_data( x, y, z, worddict, maxlen=maxlen_w, n_words=n_words) if x == None: print 'Minibatch with zero sample under length ', maxlen_w uidx -= 1 continue ud_start = time.time() cost = f_grad_shared(x, x_mask, y, y_mask, z, z_mask) f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud if numpy.mod(uidx, saveFreq) == 0: print 'Saving...', saveto_iternum = saveto.format(uidx) params = unzip(tparams) numpy.savez(saveto_iternum, history_errs=[], **params) pkl.dump(model_options, open('%s.pkl' % saveto_iternum, 'wb')) print 'Done' if SICK_eval: print "Evaluating SICK Test performance" embed_map = tools.load_googlenews_vectors() model = tools.load_model(path_to_model=saveto_iternum, embed_map=embed_map) yhat, pr, sr, mse = eval_sick.evaluate(model, evaltest=True) del (model) del (embed_map) print pr, sr, mse res_save_file = saveto.format('ALL').split( '.')[0] + '_SICK_EVAL.txt' with open(res_save_file, 'a') as rsf: cur_time = strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) rsf.write('\n \n {}'.format(cur_time)) rsf.write('\n{}, {}, {}, {}'.format(uidx, pr, se, mse)) print "Done" print 'Seen %d samples' % n_samples
import eval_sick import eval_msrp import datagen import sys # Set random seeds for reproducibility import random random.seed(333) np.random.seed(333) tf.set_random_seed(333) parser = ArgumentParser() parser.add_argument('model', help='Model to evaluate') parser.add_argument('tokenizer', help='Tokenizer object') parser.add_argument('-d', '--data', default='data/', help='Path to test data') parser.add_argument('-e', '--embeddings', help='Embedding file') parser.add_argument('-v', type=int, default=0, help='Verbose level') args = parser.parse_args(sys.argv[1:]) print(args) tokenizer = datagen.load_tokenizer(args.tokenizer) encoder = Encoder(args.model, args.embeddings, tokenizer) print('Encoder created') eval_sick.evaluate(encoder, evaltest=True, loc=args.data, verbose=args.v) eval_msrp.evaluate(encoder, evalcv=True, evaltest=True, use_feats=True, loc=args.data)