def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle, str_handle): answer_dict = {} losses = [] for _ in tqdm(range(1, num_batches + 1)): qa_id, loss, yp1, yp2, = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2], feed_dict={handle: str_handle}) answer_dict_, _ = convert_tokens(eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) metrics["loss"] = loss loss_sum = tf.Summary(value=[ tf.Summary.Value(tag="{}/loss".format(data_type), simple_value=metrics["loss"]), ]) f1_sum = tf.Summary(value=[ tf.Summary.Value(tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ]) em_sum = tf.Summary(value=[ tf.Summary.Value(tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ]) return metrics, [loss_sum, f1_sum, em_sum]
def test(config): with open(config.word_emb_file, "r") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(config.char_emb_file, "r") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(config.test_eval_file, "r") as fh: eval_file = json.load(fh) with open(config.test_meta, "r") as fh: meta = json.load(fh) total = meta["total"] graph = tf.Graph() print("Loading model...") with graph.as_default() as g: test_batch = get_dataset(config.test_record_file, get_record_parser(config, is_test=True), config).make_one_shot_iterator() model = Model(config, test_batch, word_mat, char_mat, trainable=False, graph=g) sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(config.save_dir)) if config.decay < 1.0: sess.run(model.assign_vars) losses = [] answer_dict = {} remapped_dict = {} for step in tqdm(range(total // config.batch_size + 1)): qa_id, loss, yp1, yp2 = sess.run( [model.qa_id, model.loss, model.yp1, model.yp2]) answer_dict_, remapped_dict_ = convert_tokens( eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()) answer_dict.update(answer_dict_) remapped_dict.update(remapped_dict_) losses.append(loss) loss = np.mean(losses) metrics = evaluate(eval_file, answer_dict) with open(config.answer_file, "w") as fh: json.dump(remapped_dict, fh) print("Exact Match: {}, F1: {}".format(metrics['exact_match'], metrics['f1']))