def train_and_evaluate(self, train_generator, eval_generator, evaluator, epochs=1, eposides=1, save_dir=None, summary_dir=None, save_summary_steps=10): if not self.initialized: self.session.run(tf.global_variables_initializer()) Trainer._train_and_evaluate(self, train_generator, eval_generator, evaluator, epochs=epochs, eposides=eposides, save_dir=save_dir, summary_dir=summary_dir, save_summary_steps=save_summary_steps)
test_batch_generator = BatchGenerator(vocab, eval_data, batch_size=60) model_dir = base_folder + 'models/bidaf/best_weights' model = BiDAF(vocab) model.load(model_dir) model.session.run(tf.local_variables_initializer()) model.inference(test_batch_generator) # inference on test data model.evaluate(test_batch_generator, evaluator) # evaluator.exact_match_score(prediction=,ground_truth=) # print(SquadEvaluator.exact_match_score()) # print(SquadEvaluator.f1_score) eval_batch_generator = test_batch_generator eval_batch_generator.init() eval_instances = eval_batch_generator.get_instances() model.session.run(model.eval_metric_init_op) eval_num_steps = (eval_batch_generator.get_instance_size() + eval_batch_generator.get_batch_size() - 1) // eval_batch_generator.get_batch_size() output = Trainer._eval_sess(model, eval_batch_generator, eval_num_steps, None) pred_answer = model.get_best_answer(output, eval_instances) print('pred_answer={}'.format(pred_answer)) score = evaluator.get_score(model.get_best_answer(output, eval_instances)) metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in score.items()) print("- Eval metrics: " + metrics_string)
def inference(self, batch_generator): Trainer._inference(self, batch_generator)
def evaluate(self, batch_generator, evaluator): Trainer._evaluate(self, batch_generator, evaluator)
def evaluate(self, batch_generator, evaluator): self.bert_embedding.init_bert() Trainer._evaluate(self, batch_generator, evaluator)