Esempio n. 1
0
def evaluate_model(config: Config, model: SoftSequenceNaive, batch_insts_ids, name: str, insts: List[Instance]):
    ## evaluation
    metrics = np.asarray([0, 0, 0], dtype=int)
    batch_id = 0
    batch_size = config.batch_size
    for batch in batch_insts_ids:
        one_batch_insts = insts[batch_id * batch_size:(batch_id + 1) * batch_size]
        batch_max_scores, batch_max_ids = model.decode(*batch[0:5], None)
        metrics += evaluate_batch_insts(one_batch_insts, batch_max_ids, batch[6], batch[1], config.idx2labels, config.use_crf_layer)
        batch_id += 1
    p, total_predict, total_entity = metrics[0], metrics[1], metrics[2]
    precision = p * 1.0 / total_predict * 100 if total_predict != 0 else 0
    recall = p * 1.0 / total_entity * 100 if total_entity != 0 else 0
    fscore = 2.0 * precision * recall / (precision + recall) if precision != 0 or recall != 0 else 0
    print("[%s set] Precision: %.2f, Recall: %.2f, F1: %.2f" % (name, precision, recall, fscore), flush=True)
    return [precision, recall, fscore]
Esempio n. 2
0
    def xzk_eval_model(self, dataloader, name=None):
        with torch.no_grad():
            metrics = np.asarray([0, 0, 0], dtype=int)
            all_true_y_label = list()
            all_pred_y_label = list()
            print("testing")
            for batch, data in tqdm(enumerate(dataloader)):
                insts = data[-1]
                data = [x.to(self.device) for x in data[0:-1]]
                token_id_seq, data_length, char_seq_tensor, char_seq_len, masks, label_seq = data
                sequence_loss, logits = self.model(token_id_seq, data_length,
                                                   char_seq_tensor,
                                                   char_seq_len, masks,
                                                   label_seq)
                batch_max_scores, pred_ids = self.model.decode(
                    logits, data_length)
                metrics += evaluate_batch_insts(insts, pred_ids, label_seq,
                                                data_length,
                                                self.config.idx2labels,
                                                self.config.use_crf_layer)

                for i in insts:
                    all_pred_y_label.append(i.prediction)
                    all_true_y_label.append(i.output)

            p, total_predict, total_entity = metrics[0], metrics[1], metrics[2]
            precision = p * 1.0 / total_predict * 100 if total_predict != 0 else 0
            recall = p * 1.0 / total_entity * 100 if total_entity != 0 else 0
            fscore = 2.0 * precision * recall / (
                precision + recall) if precision != 0 or recall != 0 else 0
            print("[%s set] Precision: %.2f, Recall: %.2f, F1: %.2f" %
                  (name, precision, recall, fscore),
                  flush=True)

            p = precision_score(all_true_y_label, all_pred_y_label)
            r = recall_score(all_true_y_label, all_pred_y_label)
            f1 = f1_score(all_true_y_label, all_pred_y_label)
            print("Precision: %.2f, Recall: %.2f, F1: %.2f" % (p, r, f1),
                  flush=True)
            print('acc',
                  accuracy_score(all_true_y_label, all_pred_y_label),
                  flush=True)
            print(classification_report(all_true_y_label, all_pred_y_label))
        return precision, recall, fscore