def evaluate_model(config: Config, model: SoftSequenceNaive, batch_insts_ids, name: str, insts: List[Instance]): ## evaluation metrics = np.asarray([0, 0, 0], dtype=int) batch_id = 0 batch_size = config.batch_size for batch in batch_insts_ids: one_batch_insts = insts[batch_id * batch_size:(batch_id + 1) * batch_size] batch_max_scores, batch_max_ids = model.decode(*batch[0:5], None) metrics += evaluate_batch_insts(one_batch_insts, batch_max_ids, batch[6], batch[1], config.idx2labels, config.use_crf_layer) batch_id += 1 p, total_predict, total_entity = metrics[0], metrics[1], metrics[2] precision = p * 1.0 / total_predict * 100 if total_predict != 0 else 0 recall = p * 1.0 / total_entity * 100 if total_entity != 0 else 0 fscore = 2.0 * precision * recall / (precision + recall) if precision != 0 or recall != 0 else 0 print("[%s set] Precision: %.2f, Recall: %.2f, F1: %.2f" % (name, precision, recall, fscore), flush=True) return [precision, recall, fscore]
def xzk_eval_model(self, dataloader, name=None): with torch.no_grad(): metrics = np.asarray([0, 0, 0], dtype=int) all_true_y_label = list() all_pred_y_label = list() print("testing") for batch, data in tqdm(enumerate(dataloader)): insts = data[-1] data = [x.to(self.device) for x in data[0:-1]] token_id_seq, data_length, char_seq_tensor, char_seq_len, masks, label_seq = data sequence_loss, logits = self.model(token_id_seq, data_length, char_seq_tensor, char_seq_len, masks, label_seq) batch_max_scores, pred_ids = self.model.decode( logits, data_length) metrics += evaluate_batch_insts(insts, pred_ids, label_seq, data_length, self.config.idx2labels, self.config.use_crf_layer) for i in insts: all_pred_y_label.append(i.prediction) all_true_y_label.append(i.output) p, total_predict, total_entity = metrics[0], metrics[1], metrics[2] precision = p * 1.0 / total_predict * 100 if total_predict != 0 else 0 recall = p * 1.0 / total_entity * 100 if total_entity != 0 else 0 fscore = 2.0 * precision * recall / ( precision + recall) if precision != 0 or recall != 0 else 0 print("[%s set] Precision: %.2f, Recall: %.2f, F1: %.2f" % (name, precision, recall, fscore), flush=True) p = precision_score(all_true_y_label, all_pred_y_label) r = recall_score(all_true_y_label, all_pred_y_label) f1 = f1_score(all_true_y_label, all_pred_y_label) print("Precision: %.2f, Recall: %.2f, F1: %.2f" % (p, r, f1), flush=True) print('acc', accuracy_score(all_true_y_label, all_pred_y_label), flush=True) print(classification_report(all_true_y_label, all_pred_y_label)) return precision, recall, fscore