def main(unused_argv): if not FLAGS.data_dir: raise ValueError("--data_dir is required.") encoder = encoder_manager.EncoderManager() # Maybe load unidirectional encoder. if FLAGS.uni_checkpoint_path: print("Loading unidirectional model...") uni_config = configuration.model_config() encoder.load_model( uni_config, FLAGS.uni_vocab_file, FLAGS.uni_embeddings_file, FLAGS.uni_checkpoint_path) # Maybe load bidirectional encoder. if FLAGS.bi_checkpoint_path: print("Loading bidirectional model...") bi_config = configuration.model_config(bidirectional_encoder=True) encoder.load_model( bi_config, FLAGS.bi_vocab_file, FLAGS.bi_embeddings_file, FLAGS.bi_checkpoint_path) if FLAGS.eval_task in ["MR", "CR", "SUBJ", "MPQA"]: eval_classification.eval_nested_kfold( encoder, FLAGS.eval_task, FLAGS.data_dir, use_nb=False) elif FLAGS.eval_task == "SICK": eval_sick.evaluate(encoder, evaltest=True, loc=FLAGS.data_dir) elif FLAGS.eval_task == "MSRP": eval_msrp.evaluate(encoder, evalcv=True, evaltest=True, use_feats=True, loc=FLAGS.data_dir) elif FLAGS.eval_task == "TREC": eval_trec.evaluate(encoder, evalcv=True, evaltest=True, loc=FLAGS.data_dir) else: raise ValueError("Unrecognized eval_task: %s" % FLAGS.eval_task) encoder.close()
def main(unused_argv): if not FLAGS.data_dir: raise ValueError("--data_dir is required.") encoder = encoder_manager.EncoderManager() # Maybe load unidirectional encoder. if FLAGS.uni_checkpoint_path: print("Loading unidirectional model...") uni_config = configuration.model_config() encoder.load_model(uni_config, FLAGS.uni_vocab_file, FLAGS.uni_embeddings_file, FLAGS.uni_checkpoint_path) # Maybe load bidirectional encoder. if FLAGS.bi_checkpoint_path: print("Loading bidirectional model...") bi_config = configuration.model_config(bidirectional_encoder=True) encoder.load_model(bi_config, FLAGS.bi_vocab_file, FLAGS.bi_embeddings_file, FLAGS.bi_checkpoint_path) if FLAGS.eval_task in ["MR", "CR", "SUBJ", "MPQA"]: eval_classification.eval_nested_kfold( encoder, FLAGS.eval_task, FLAGS.data_dir, use_nb=False) elif FLAGS.eval_task == "SICK": eval_sick.evaluate(encoder, evaltest=True, loc=FLAGS.data_dir) elif FLAGS.eval_task == "MSRP": eval_msrp.evaluate( encoder, evalcv=True, evaltest=True, use_feats=True, loc=FLAGS.data_dir) elif FLAGS.eval_task == "TREC": eval_trec.evaluate(encoder, evalcv=True, evaltest=True, loc=FLAGS.data_dir) else: raise ValueError("Unrecognized eval_task: %s" % FLAGS.eval_task) encoder.close()
def run_task(config, task, checkpoint=None): """Evaluates the latest model checkpoint on the given task. Args: config: Object containing model configuration parameters. task: Name of the eval task. checkpoint: TF checkpoint to evaluate. If None, the latest checkpoint is fetched and evaluated. Raises: ValueError: If an unrecognized task is passed in --eval_tasks. """ if checkpoint is None: skip_thought_checkpoint = tf.train.latest_checkpoint( FLAGS.checkpoint_dir) if not skip_thought_checkpoint: tf.logging.info("Skipping evaluation. No checkpoint found in: %s", FLAGS.checkpoint_dir) return else: skip_thought_checkpoint = checkpoint # Load the skip thought embeddings and vocabulary. skip_thought_emb = tools.load_skip_thought_embeddings( skip_thought_checkpoint, config.vocab_size, config.word_embedding_dim) _, skip_thought_vocab = tools.load_vocabulary( FLAGS.skip_thought_vocab_file) # Load the Word2Vec model. word2vec_emb = tools.load_embedding_matrix(FLAGS.word2vec_embedding_file) _, word2vec_vocab = tools.load_vocabulary(FLAGS.word2vec_vocab_file) # Run vocabulary expansion. combined_emb = tools.expand_vocabulary(skip_thought_emb, skip_thought_vocab, word2vec_emb, word2vec_vocab) # Load the encoder. g = tf.Graph() with g.as_default(): encoder = word_context_encoder.WordContextEncoder(combined_emb) restore_model = encoder.build_graph_from_config( config, skip_thought_checkpoint) with tf.Session(graph=g) as sess: restore_model(sess) global_step = tf.train.global_step(sess, "global_step:0") class EncoderWrapper(object): """Wrapper class for the encode function.""" @staticmethod def encode(data, verbose=False): encoded = encoder.encode(sess, data, use_norm=FLAGS.use_norm, batch_size=FLAGS.batch_size, use_eos=FLAGS.use_eos) return np.array(encoded) encoder_wrapper = EncoderWrapper() tf.logging.info("Running %s evaluation task.", task) if task in ["MR", "CR", "SUBJ", "MPQA"]: eval_classification.eval_nested_kfold(encoder_wrapper, task, FLAGS.data_dir, use_nb=False) elif task == "SICK": eval_sick.evaluate(encoder_wrapper, evaltest=True, loc=FLAGS.data_dir) elif task == "MSRP": eval_msrp.evaluate(encoder_wrapper, evalcv=True, evaltest=True, use_feats=True, loc=FLAGS.data_dir) elif task == "TREC": eval_trec.evaluate(encoder_wrapper, evalcv=True, evaltest=True, loc=FLAGS.data_dir) elif task == "MultiNLI": results = eval_nli.evaluate(encoder_wrapper, FLAGS.multinli_dir, os.path.join(FLAGS.nli_eval_dir, "multinli", str(global_step)), method=FLAGS.nli_eval_method) best_hyperparams = results["best_hyperparameters"] print(results[best_hyperparams]["dev"]["overall"]) print(results[best_hyperparams]["test"]["overall"]) elif task == "SNLI": results = eval_nli.evaluate(encoder_wrapper, FLAGS.snli_dir, os.path.join(FLAGS.nli_eval_dir, "snli", str(global_step)), method=FLAGS.nli_eval_method) best_hyperparams = results["best_hyperparameters"] print(results[best_hyperparams]["dev"]["overall"]) print(results[best_hyperparams]["test"]["overall"]) else: raise ValueError("Unrecognized eval_task: %s" % FLAGS.eval_task) tf.logging.info("Finished processing evaluation at global step %d.", global_step)