def evaluate_model(config: Config, model: NNCRF, loader: DataLoader, name: str, insts: List[Instance], print_each_type_metric: bool = False): ## evaluation p_dict, total_predict_dict, total_entity_dict = Counter(), Counter(), Counter() batch_id = 0 batch_size = loader.batch_size dev = config.device with torch.no_grad(): for iter, batch in tqdm(enumerate(loader, 1), desc="--evaluating batch", total=len(loader)): one_batch_insts = insts[batch_id * batch_size:(batch_id + 1) * batch_size] batch_max_scores, batch_max_ids = model.decode(words = batch.words.to(dev), word_seq_lens = batch.word_seq_len.to(dev), context_emb=batch.context_emb.to(dev) if batch.context_emb is not None else None, chars = batch.chars.to(dev), char_seq_lens = batch.char_seq_lens.to(dev)) batch_p , batch_predict, batch_total = evaluate_batch_insts(one_batch_insts, batch_max_ids, batch.labels, batch.word_seq_len, config.idx2labels) p_dict += batch_p total_predict_dict += batch_predict total_entity_dict += batch_total batch_id += 1 if print_each_type_metric: for key in total_entity_dict: precision_key, recall_key, fscore_key = get_metric(p_dict[key], total_entity_dict[key], total_predict_dict[key]) print(f"[{key}] Prec.: {precision_key:.2f}, Rec.: {recall_key:.2f}, F1: {fscore_key:.2f}") total_p = sum(list(p_dict.values())) total_predict = sum(list(total_predict_dict.values())) total_entity = sum(list(total_entity_dict.values())) precision, recall, fscore = get_metric(total_p, total_entity, total_predict) print(colored(f"[{name} set Total] Prec.: {precision:.2f}, Rec.: {recall:.2f}, F1: {fscore:.2f}", 'blue'), flush=True) return [precision, recall, fscore]
def evaluate_model(config: Config, model: TransformersCRF, data_loader: DataLoader, name: str, insts: List, print_each_type_metric: bool = False): ## evaluation p_dict, total_predict_dict, total_entity_dict = Counter(), Counter(), Counter() batch_size = data_loader.batch_size with torch.no_grad(): for batch_id, batch in tqdm(enumerate(data_loader, 0), desc="--evaluating batch", total=len(data_loader)): one_batch_insts = insts[batch_id * batch_size:(batch_id + 1) * batch_size] batch_max_scores, batch_max_ids = model.decode(words= batch.input_ids.to(config.device), word_seq_lens = batch.word_seq_len.to(config.device), orig_to_tok_index = batch.orig_to_tok_index.to(config.device), input_mask = batch.attention_mask.to(config.device)) batch_p , batch_predict, batch_total = evaluate_batch_insts(one_batch_insts, batch_max_ids, batch.label_ids, batch.word_seq_len, config.idx2labels) p_dict += batch_p total_predict_dict += batch_predict total_entity_dict += batch_total batch_id += 1 f1Scores = [] if print_each_type_metric or config.print_detail_f1 or (config.earlystop_atr == "macro"): for key in total_entity_dict: precision_key, recall_key, fscore_key = get_metric(p_dict[key], total_entity_dict[key], total_predict_dict[key]) print(f"[{key}] Prec.: {precision_key:.2f}, Rec.: {recall_key:.2f}, F1: {fscore_key:.2f}") f1Scores.append(fscore_key) if len(f1Scores) > 0: print(f"[{name} set Total] Macro F1: {sum(f1Scores) / len(f1Scores):.2f}") total_p = sum(list(p_dict.values())) total_predict = sum(list(total_predict_dict.values())) total_entity = sum(list(total_entity_dict.values())) precision, recall, fscore = get_metric(total_p, total_entity, total_predict) print(colored(f"[{name} set Total] Prec.: {precision:.2f}, Rec.: {recall:.2f}, Micro F1: {fscore:.2f}", 'blue'), flush=True) if config.earlystop_atr == "macro" and len(f1Scores) > 0: fscore = sum(f1Scores) / len(f1Scores) return [precision, recall, fscore]
def get_metric(self, print_each_type_metric): if print_each_type_metric: per_type_metrics = {} for key in self._total_entity_dict: precision_key, recall_key, fscore_key = get_metric( self._p_dict[key], self._total_entity_dict[key], self._total_predict_dict[key]) per_type_metrics[key] = { "Prec": precision_key, "Recl": recall_key, "F1": fscore_key } else: per_type_metrics = None total_p = sum(list(self._p_dict.values())) total_predict = sum(list(self._total_predict_dict.values())) total_entity = sum(list(self._total_entity_dict.values())) precision, recall, fscore = get_metric(total_p, total_entity, total_predict) total_metrics = {"Prec": precision, "Recl": recall, "F1": fscore} return total_metrics, per_type_metrics
list(word2idx.keys())[list(word2idx.values()).index(idx)]) one_batch_insts = insts[batch_id * batch_size:(batch_id + 1) * batch_size] batch_id += 1 batch_max_scores, batch_max_ids = model.decode( words=batch.words.to(dev), word_seq_lens=batch.word_seq_len.to(dev), context_emb=batch.context_emb.to(dev) if batch.context_emb is not None else None, chars=batch.chars.to(dev), char_seq_lens=batch.char_seq_lens.to(dev)) batch_p, batch_predict, batch_total = evaluate_batch_insts( one_batch_insts, f, batch_max_ids, batch.labels, batch.word_seq_len, idx2labels) p_dict += batch_p total_predict_dict += batch_predict total_entity_dict += batch_total # write_contextual_embeddings(f2,words_batch,context_rep,written_words) total_p = sum(list(p_dict.values())) total_predict = sum(list(total_predict_dict.values())) total_entity = sum(list(total_entity_dict.values())) precision, recall, fscore = get_metric(total_p, total_entity, total_predict) print(precision, recall, fscore)