def evaluate_test(medel,test_loader,dev_f):
    medel.eval()
    pred = []
    gold = []
    print('test')
    with torch.no_grad():
        for i, dev_batch in enumerate(test_loader):
            sentence, masks, tags, lengths = dev_batch
            sentence, masks, tags , lengths = Variable(sentence), Variable(masks), Variable(tags),Variable(lengths)
            if use_cuda:
                sentence = sentence.cuda()
                masks = masks.cuda()
                tags = tags.cuda()
            predict_tags = medel(sentence, masks)

            pred.extend([t for t in predict_tags.tolist()])
            gold.extend([t for t in tags.tolist()])

        pred_label,gold_label = recover_label(pred, gold, l2i_dic,i2l_dic)
        pred_label_2 = [t[1:] for t in pred_label]
        gold_label_2 = [t[1:] for t in gold_label]
        fw = open('data/predict_result'+str(dev_f)+'bert.txt','w')
        print(pred_label_2[0],len(pred_label))
        print(gold_label_2[0])
        for i in pred_label_2:
            for j in range(len(i)-1):
                fw.write(i[j])
                fw.write(' ')
            fw.write(i[len(i)-1])
            fw.write('\n')
        acc, p, r, f = get_ner_fmeasure(gold_label_2,pred_label_2)
        print('p: {},r: {}, f: {}'.format(p, r, f))
        # model.train()
        return acc, p, r, f
def evaluate(medel, dev_loader):
    medel.eval()
    pred = []
    gold = []
    print('evaluate')
    for i, dev_batch in enumerate(dev_loader):
        sentence, masksevaluate, tags = dev_batch
        sentence, masks, tags = Variable(sentence), Variable(masks), Variable(tags)
        if use_cuda:
            sentence = sentence.cuda()
            masks = masks.cuda()
            tags = tags.cuda()
        predict_tags = medel(sentence, masks)
        pred.extend([t for t in predict_tags.tolist()])
        gold.extend([t for t in tags.tolist()])
    pred_label,gold_label = recover_label(pred, gold, l2i_dic,i2l_dic)
    acc, p, r, f = get_ner_fmeasure(gold_label,pred_label)
    print('p: {},r: {}, f: {}'.format(p, r, f))
    model.train()
    return acc, p, r, f
def evaluate(medel, dev_loader):
    medel.eval()
    pred = []
    gold = []
    pred_test = []

    print('evaluate')
    with torch.no_grad():
        for i, dev_batch in enumerate(dev_loader):
            sentence, masks, tags , lengths = dev_batch
            sentence, masks, tags, lengths = Variable(sentence), Variable(masks), Variable(tags), Variable(lengths)
            if use_cuda:
                sentence = sentence.cuda()
                masks = masks.cuda()
                tags = tags.cuda()

            predict_tags = medel(sentence, masks)
            loss = model.neg_log_likelihood_loss(sentence, masks, tags)

            pred.extend([t for t in predict_tags.tolist()])
            gold.extend([t for t in tags.tolist()])

            # batch_tagids = medel.test(
            #     scores, lengths, l2i_dic)
            # pred_test = [t for t in batch_tagids.tolist()]

        pred_label,gold_label = recover_label(pred, gold, l2i_dic,i2l_dic)

        # pred_label_test,gold_label = recover_label(pred_test, gold, l2i_dic,i2l_dic)
        #
        # print('xin test fang fa', pred_label_test[0])

        print('dev loss {}'.format(loss.item()))
        pred_label_1 = [t[1:] for t in pred_label]
        gold_label_1 = [t[1:] for t in gold_label]
        print(pred_label_1[0], len(pred_label_1))
        print(gold_label_1[0])
        acc, p, r, f = get_ner_fmeasure(gold_label_1,pred_label_1)
        print('p: {},r: {}, f: {}'.format(p, r, f))
        # model.train()
        return acc, p, r, f
Beispiel #4
0
def dev(model, dev_loader, epoch, config,index2label,dev_lens):
    model.eval()
    eval_loss = 0
    trues = []
    preds = []
    length = 0
    for i, batch in enumerate(dev_loader):
        inputs, masks, tags = batch
        length += inputs.size(0)
        inputs, masks, tags = Variable(inputs), Variable(masks), Variable(tags)
        if config.use_cuda:
            inputs, masks, tags = inputs.cuda(), masks.cuda(), tags.cuda()
        feats = model(inputs, masks)
        path_score, best_path = model.crf(feats, masks.byte())
        loss = model.loss(feats, masks, tags)
        eval_loss += loss.item()
        nums=len(best_path)
        for i in range(nums):
            pred_result=[]
            true_result=[]
            for index in list(best_path[i].cpu().numpy()):
                pred_result.append(index2label[index])
            for index in list(tags[i].cpu().numpy()):
                true_result.append(index2label[index])
            preds.append(pred_result)
            trues.append(true_result)
    # print(len(dev_lens))
    pred_tag_lists = [preds[i][1:dev_lens[i]+1] for i in range(len(dev_lens))]
    tag_lists = [trues[i][1:dev_lens[i]+1] for i in range(len(dev_lens))]
    accuracy, precision, recall, f_measure=get_ner_fmeasure(tag_lists,pred_tag_lists)
    def calculate_category_f1():
        print(pred_tag_lists[:25])
        print(tag_lists[:25])
        labels=[v for k,v in index2label.items()]
        truth_label_count={}
        predict_label_count = {}
        label_count={}
        count=0
        for pred,true in zip(preds,trues):
            for i,t in enumerate(true):
                if t=='<eos>' and pred[i]=='<eos>':
                    count=count+1
                    break
                else:
                    if t not in ['<pad>', 'o', '<start>']:
                        if t==pred[i]:
                            if t not in label_count:
                                label_count[t]=1
                            else:
                                label_count[t] +=1
                        if t not in truth_label_count:
                            truth_label_count[t]=1
                        else:
                            truth_label_count[t]+=1
                        if pred[i] not in predict_label_count:
                            predict_label_count[pred[i]]=1
                        else:
                            predict_label_count[pred[i]] += 1
        precision={}
        recall={}
        f1={}
        # print(label_count.keys())
        # print(predict_label_count.keys())
        # print(truth_label_count.keys())
        for label in labels:
            if label in label_count:
                precision[label]=label_count[label]/predict_label_count[label]
                recall[label]=label_count[label]/truth_label_count[label]
                f1[label]=2*precision[label]*recall[label]/(precision[label]+recall[label])

        # print(sum(precision.values())/len(truth_label_count))
        # print(sum(recall.values())/len(truth_label_count))

        print(precision)
        print(recall)
        print(f1)
    # print(truth_label_count)
    print('eval  epoch: {}|  loss: {}'.format(epoch, eval_loss/length))
    model.train()
    return f_measure