def to_json(self) -> collections.OrderedDict: """Converts to dict format and save :return: """ self['hparams_json_file'] = os.path.join(self.get_workspace_dir(), 'hparams.json') hparam_dict = self.to_dict() save_json(self.hparams_json_file, hparam_dict)
def on_train_end(self, logs=None): logger.info("Start Evaluate.") if not os.path.exists(self.report_dir): os.makedirs(self.report_dir) new_logs = self.eval_process(self.test_dataset, self.test_steps) save_json(os.path.join(self.report_dir, 'performance.json'), new_logs) print_boxed(f"Question Answer Evaluation") pprint(new_logs) logger.info(f"Save question answer reports in {self.report_dir}")
def evaluation(hparams: Hparams, checkpoints=None, model=None, test_dataset=None): """Evaluate the model and build report according to different task. :param model: :param test_dataset: :param hparams: :return: """ logger.info("Start Evaluate.") output_hparams = deepcopy(hparams.dataset.outputs) if test_dataset is None: test_dataset = next( load_dataset(hparams, ret_train=False, ret_dev=False, ret_info=False))[0] if model is None: # build model (model, ) = build_model(hparams, return_losses=False, return_metrics=False, return_optimizer=False) # predict using default model saved if checkpoints is None: # load weights if not os.path.exists(hparams.get_model_filename() + ".index"): logger.warning( f"Model from {hparams.get_model_filename()} is not exists, load nothing!" ) else: logger.info( f"Load model weights from {hparams.get_model_filename()}") model.load_weights(hparams.get_model_filename()) # prediction # print(model.evaluate(test_dataset)) for inputs, outputs in tqdm(test_dataset): model_outputs = model.predict(inputs) if not isinstance(model_outputs, (tuple, list)): model_outputs = (model_outputs, ) for idx, one_output_hparam in enumerate(output_hparams): if "ground_truth" not in one_output_hparam: one_output_hparam["ground_truth"] = [] if "predictions" not in one_output_hparam: one_output_hparam['predictions'] = [] prediction_output = tf.nn.softmax(model_outputs[idx], -1) tmp_name = one_output_hparam.name tmp_type = one_output_hparam.type tmp_ground_truth = outputs[tmp_name] if tmp_type in [CLASSLABEL, LIST_OF_CLASSLABEL, LIST_OF_INT]: if tmp_type in [LIST_OF_INT]: tmp_tg = tf.argmax(tmp_ground_truth, -1) else: tmp_tg = tmp_ground_truth if one_output_hparam.task == NER: # [[sent1], [sent2]] one_output_hparam.ground_truth.extend( tmp_tg.numpy().tolist()) tmp_predictions = tf.argmax(prediction_output, -1).numpy().tolist() one_output_hparam.predictions.extend(tmp_predictions) else: # [1, 0, 1, ...] one_output_hparam.ground_truth.extend( tmp_tg.numpy().reshape(-1).tolist()) tmp_predictions = tf.argmax( prediction_output, -1).numpy().reshape(-1).tolist() one_output_hparam.predictions.extend(tmp_predictions) elif isinstance(checkpoints, (tuple, list)): # predict using multi checkpints from k-fold cross validation. for i, ckpt in enumerate(checkpoints): if not os.path.exists(ckpt + ".index"): logger.warning( f"Model from {ckpt} is not exists, load nothing!") continue else: logger.info(f"Load model weights from {ckpt}") model.load_weights(ckpt) for j, (inputs, outputs) in tqdm(enumerate(test_dataset)): model_outputs = model.predict(inputs) if not isinstance(model_outputs, (tuple, list)): model_outputs = (model_outputs, ) for idx, one_output_hparam in enumerate(output_hparams): prediction_output = tf.nn.softmax(model_outputs[idx], -1) if i == 0: if "ground_truth" not in one_output_hparam: one_output_hparam["ground_truth"] = [] if "predictions" not in one_output_hparam: one_output_hparam['predictions'] = [] one_output_hparam['tmp_preds'] = [] one_output_hparam['tmp_preds'].append( prediction_output) tmp_name = one_output_hparam.name tmp_type = one_output_hparam.type tmp_ground_truth = outputs[tmp_name] if tmp_type in [ CLASSLABEL, LIST_OF_CLASSLABEL, LIST_OF_INT ]: if tmp_type in [LIST_OF_INT]: tmp_tg = tf.argmax(tmp_ground_truth, -1) else: tmp_tg = tmp_ground_truth if one_output_hparam.task == NER: # [[sent1], [sent2]] one_output_hparam.ground_truth.extend( tmp_tg.numpy().tolist()) else: # [1, 0, 1, ...] one_output_hparam.ground_truth.extend( tmp_tg.numpy().reshape(-1).tolist()) else: one_output_hparam['tmp_preds'][j] += prediction_output for idx, one_output_hparam in enumerate(output_hparams): prediction_output = one_output_hparam['tmp_preds'][idx] tmp_type = one_output_hparam.type if tmp_type in [CLASSLABEL, LIST_OF_CLASSLABEL, LIST_OF_INT]: if one_output_hparam.task == NER: # [[sent1], [sent2]] tmp_predictions = tf.argmax(prediction_output, -1).numpy().tolist() one_output_hparam.predictions.extend(tmp_predictions) else: # [1, 0, 1, ...] tmp_predictions = tf.argmax( prediction_output, -1).numpy().reshape(-1).tolist() one_output_hparam.predictions.extend(tmp_predictions) # save reports report_folder = hparams.get_report_dir() # evaluation, TODO more reports for one_output_hparam in output_hparams: ground_truth = one_output_hparam.ground_truth predictions = one_output_hparam.predictions if one_output_hparam.type in [ CLASSLABEL, LIST_OF_CLASSLABEL, LIST_OF_INT ]: # some filename cur_report_folder = os.path.join( report_folder, f'{one_output_hparam.name}_{one_output_hparam.type.lower()}') if not os.path.exists(cur_report_folder): os.makedirs(cur_report_folder) if one_output_hparam.task == NER: labels = one_output_hparam.labels # confusion matrix cm = ConfusionMatrix(_2d_to_1d_list(ground_truth), _2d_to_1d_list(predictions), labels) # ner evaluation labels = list( set([ itm[2:] for itm in labels if itm.startswith("B-") or itm.startswith("I-") ])) ner_eval = NEREvaluator( _id_to_label(ground_truth, one_output_hparam.labels), _id_to_label(predictions, one_output_hparam.labels), labels) ner_results, ner_results_agg = ner_eval.evaluate() save_json(os.path.join(cur_report_folder, "ner_results.json"), ner_results) save_json( os.path.join(cur_report_folder, "ner_results_agg.json"), ner_results_agg) else: cm = ConfusionMatrix(ground_truth, predictions, one_output_hparam.labels) # print some reports print_boxed(f"{one_output_hparam.name} Evaluation") cms = cm.confusion_matrix_visual() if len(cm.label2idx) < 10: print(cms) # save reports to files with open( os.path.join(cur_report_folder, "confusion_matrix.txt"), 'w') as f: f.write(cms) print() print(json.dumps(cm.stats(), indent=4)) save_json(os.path.join(cur_report_folder, "stats.json"), cm.stats()) save_json(os.path.join(cur_report_folder, 'per_class_stats.json'), cm.per_class_stats()) # save reports to hparams hparams['performance'] = Hparams() hparams.performance["stats"] = cm.stats() hparams.performance["per_class_stats"] = cm.per_class_stats() logger.info( f"Save {one_output_hparam.name} reports in {cur_report_folder}" ) else: logger.warning( f"{one_output_hparam.name}'s evaluation has not be implemented." )