Example #1
0
def evaluate_model(config: Config, model: NNCRF, loader: DataLoader, name: str, insts: List[Instance], print_each_type_metric: bool = False):
    ## evaluation
    p_dict, total_predict_dict, total_entity_dict = Counter(), Counter(), Counter()
    batch_id = 0
    batch_size = loader.batch_size
    dev = config.device
    with torch.no_grad():
        for iter, batch in tqdm(enumerate(loader, 1), desc="--evaluating batch", total=len(loader)):
            one_batch_insts = insts[batch_id * batch_size:(batch_id + 1) * batch_size]
            batch_max_scores, batch_max_ids = model.decode(words = batch.words.to(dev), word_seq_lens = batch.word_seq_len.to(dev),
                         context_emb=batch.context_emb.to(dev) if batch.context_emb is not None else None,
                         chars = batch.chars.to(dev), char_seq_lens = batch.char_seq_lens.to(dev))
            batch_p , batch_predict, batch_total = evaluate_batch_insts(one_batch_insts, batch_max_ids, batch.labels, batch.word_seq_len, config.idx2labels)
            p_dict += batch_p
            total_predict_dict += batch_predict
            total_entity_dict += batch_total
            batch_id += 1
    if print_each_type_metric:
        for key in total_entity_dict:
            precision_key, recall_key, fscore_key = get_metric(p_dict[key], total_entity_dict[key], total_predict_dict[key])
            print(f"[{key}] Prec.: {precision_key:.2f}, Rec.: {recall_key:.2f}, F1: {fscore_key:.2f}")

    total_p = sum(list(p_dict.values()))
    total_predict = sum(list(total_predict_dict.values()))
    total_entity = sum(list(total_entity_dict.values()))
    precision, recall, fscore = get_metric(total_p, total_entity, total_predict)
    print(colored(f"[{name} set Total] Prec.: {precision:.2f}, Rec.: {recall:.2f}, F1: {fscore:.2f}", 'blue'), flush=True)


    return [precision, recall, fscore]
Example #2
0
def evaluate_model(config: Config, model: TransformersCRF, data_loader: DataLoader, name: str, insts: List, print_each_type_metric: bool = False):
    ## evaluation
    p_dict, total_predict_dict, total_entity_dict = Counter(), Counter(), Counter()
    batch_size = data_loader.batch_size
    with torch.no_grad():
        for batch_id, batch in tqdm(enumerate(data_loader, 0), desc="--evaluating batch", total=len(data_loader)):
            one_batch_insts = insts[batch_id * batch_size:(batch_id + 1) * batch_size]
            batch_max_scores, batch_max_ids = model.decode(words= batch.input_ids.to(config.device),
                    word_seq_lens = batch.word_seq_len.to(config.device),
                    orig_to_tok_index = batch.orig_to_tok_index.to(config.device),
                    input_mask = batch.attention_mask.to(config.device))
            batch_p , batch_predict, batch_total = evaluate_batch_insts(one_batch_insts, batch_max_ids, batch.label_ids, batch.word_seq_len, config.idx2labels)
            p_dict += batch_p
            total_predict_dict += batch_predict
            total_entity_dict += batch_total
            batch_id += 1
    f1Scores = []
    if print_each_type_metric or config.print_detail_f1 or (config.earlystop_atr == "macro"):
        for key in total_entity_dict:
            precision_key, recall_key, fscore_key = get_metric(p_dict[key], total_entity_dict[key], total_predict_dict[key])
            print(f"[{key}] Prec.: {precision_key:.2f}, Rec.: {recall_key:.2f}, F1: {fscore_key:.2f}")
            f1Scores.append(fscore_key)
        if len(f1Scores) > 0:
            print(f"[{name} set Total] Macro F1: {sum(f1Scores) / len(f1Scores):.2f}")

    total_p = sum(list(p_dict.values()))
    total_predict = sum(list(total_predict_dict.values()))
    total_entity = sum(list(total_entity_dict.values()))
    precision, recall, fscore = get_metric(total_p, total_entity, total_predict)
    print(colored(f"[{name} set Total] Prec.: {precision:.2f}, Rec.: {recall:.2f}, Micro F1: {fscore:.2f}", 'blue'), flush=True)

    if config.earlystop_atr == "macro" and len(f1Scores) > 0:
        fscore = sum(f1Scores) / len(f1Scores)

    return [precision, recall, fscore]
Example #3
0
    def get_metric(self, print_each_type_metric):
        if print_each_type_metric:
            per_type_metrics = {}
            for key in self._total_entity_dict:
                precision_key, recall_key, fscore_key = get_metric(
                    self._p_dict[key], self._total_entity_dict[key],
                    self._total_predict_dict[key])
                per_type_metrics[key] = {
                    "Prec": precision_key,
                    "Recl": recall_key,
                    "F1": fscore_key
                }
        else:
            per_type_metrics = None

        total_p = sum(list(self._p_dict.values()))
        total_predict = sum(list(self._total_predict_dict.values()))
        total_entity = sum(list(self._total_entity_dict.values()))
        precision, recall, fscore = get_metric(total_p, total_entity,
                                               total_predict)
        total_metrics = {"Prec": precision, "Recl": recall, "F1": fscore}
        return total_metrics, per_type_metrics
Example #4
0
                list(word2idx.keys())[list(word2idx.values()).index(idx)])

        one_batch_insts = insts[batch_id * batch_size:(batch_id + 1) *
                                batch_size]

        batch_id += 1

        batch_max_scores, batch_max_ids = model.decode(
            words=batch.words.to(dev),
            word_seq_lens=batch.word_seq_len.to(dev),
            context_emb=batch.context_emb.to(dev)
            if batch.context_emb is not None else None,
            chars=batch.chars.to(dev),
            char_seq_lens=batch.char_seq_lens.to(dev))

        batch_p, batch_predict, batch_total = evaluate_batch_insts(
            one_batch_insts, f, batch_max_ids, batch.labels,
            batch.word_seq_len, idx2labels)
        p_dict += batch_p
        total_predict_dict += batch_predict
        total_entity_dict += batch_total

        # write_contextual_embeddings(f2,words_batch,context_rep,written_words)

total_p = sum(list(p_dict.values()))
total_predict = sum(list(total_predict_dict.values()))
total_entity = sum(list(total_entity_dict.values()))

precision, recall, fscore = get_metric(total_p, total_entity, total_predict)

print(precision, recall, fscore)