Ejemplo n.º 1
0
            e1_index = pair[0]  #实体1在句子中的索引
            e2_index = pair[1]  #实体2在句子中的索引
            label = pair[2]  #关系
            e1_dir = sentence['entity'][e1_index][:2]  #实体1的左右边界(忽略空格)
            e2_dir = sentence['entity'][e2_index][:2]  #实体2的左右边界(忽略空格)
            labelAbbr = RelationAbbr[label]  #该关系的缩写
            relation_s.append(
                [e1_dir[0], e1_dir[1], e2_dir[0], e2_dir[1], labelAbbr])
        gold_entity.append(entity_s)
        gold_relation.append(relation_s)
    return gold_entity, gold_relation


ChangeIndex(Senslist)  #将实体的位置改成无空格情况的位置
gold_entity, gold_relation = GetGoldAnwer(Senslist)  #保存标注的实体和关系信息
SaveGoldEntity(goldedir, gold_entity)
SaveGoldRelation(goldrdir, gold_relation)
tokenandlabel = GenarateBIO(Senslist, 'BIOES')  #生成token:label的BIOES标签
fp = cs.open(outdir, 'w', 'utf-8')
num_pass = 0
tokenlen_max = 0
tokenlen_ave = 0.0
numtoken = 0.0
if if_repeat == 'N':  #每个token不可以存在多重关系
    tokenandlabel = AddRelation2(Senslist, tokenandlabel)
    for sentence in tokenandlabel:
        for token in sentence:  #
            if len(token) == 2:
                fp.write(token[0] + '\t' + token[1] + '\n')
            else:
                fp.write(token[0] + '\t' + token[2] + '\n')
Ejemplo n.º 2
0
    y_predict = model.predict([xvaild, xcvaild], batch_size=128)
    pre_label = ypre2label(y_predict)
    pre_e, e_token = label2answer(pre_label, vaildtokens)
    print(u'第%s次训练开发集的结果为' % (str(i)))
    pe, re, fe = computeFe(gold_e_vaild, pre_e)
    fpout.write(u'第%s次开发集F值分别为:%f\n' % (str(i + 1), fe))
    if fe > maxf_e:
        maxf_e = fe
        save_model(mpath, model)
    #每轮迭代都额外考察在测试集上的数据,但测试集不参与模型的选择
    y_predict = model.predict([xtest, xctest], batch_size=128)
    pre_label = ypre2label(y_predict)
    pre_e, e_token = label2answer(pre_label, testtokens)
    print(u'第%s次训练测试集的结果为' % (str(i)))
    pe, re, fe = computeFe(gold_e, pre_e)
    fpout.write(u'第%s次测试集F值分别为:%f\n\n' % (str(i + 1), fe))
fpout.close()

#下载实体的F值最高的模型,并用其对测试集进行预测,保存预测出的实体及关系
load_model(mpath, model)
y_predict = model.predict([xtest, xctest], batch_size=128)
pre_label = ypre2label(y_predict)
#得到预测的实体位置及实体到token的映射
pre_e, e_token = label2answer(pre_label, testtokens)
SaveGoldEntity(prepath1, pre_e)
f1 = open(prepath2, 'wb')
pickle.dump(e_token, f1)
f1.close()
#计算实体的F值
pe, re, fe = computeFe(gold_e, pre_e)
print('测试实体的准确率 召回率 F值是%f %f %f' % (pe, re, fe))
Ejemplo n.º 3
0
    print(u'第%s次训练开发集的结果为' % (str(i)))
    pe, re, fe = computeFe(gold_e_vaild, pre_e)
    fpout.write(u'第%s次开发集F值分别为:%f\n' % (str(i + 1), fe))
    if fe > maxf_e:
        maxf_e = fe
        save_model(mpath, model)
    #每轮迭代都额外考察在测试集上的数据,但测试集不参与模型的选择
    y_predict = model.predict(xtest, batch_size=128)
    pre_label = ypre2label(y_predict)
    pre_e, e_token = label2answer(pre_label, testtokens)
    print(u'第%s次训练测试集的结果为' % (str(i)))
    pe, re, fe = computeFe(gold_e, pre_e)
    fpout.write(u'第%s次测试集F值分别为:%f\n\n' % (str(i + 1), fe))
fpout.close()

#下载实体的F值最高的模型,并用其对测试集进行预测,保存预测出的实体及关系
load_model(mpath, model)
y_predict = model.predict(xtest, batch_size=128)
pre_label = ypre2label(y_predict)
#得到预测的实体位置及实体到token的映射
pre_e, e_token = label2answer(pre_label, testtokens)
SaveGoldEntity(
    '../data/predict/pre_e_bils%s-ls%s-%s.txt' %
    (str(bils), str(ls), label_mode), pre_e)
f1 = open('../data/predict/en2t_bils%s-ls%s.pkl', 'wb')
pickle.dump(e_token, f1)
f1.close()
#计算实体的F值
pe, re, fe = computeFe(gold_e, pre_e)
print('测试实体的准确率 召回率 F值是%f %f %f' % (pe, re, fe))
Ejemplo n.º 4
0
    y_predict = model.predict([xvaild, xcvaild], batch_size=64)
    pre_label = ypre2label(y_predict)
    pre_e, temp1, temp2 = label2answer(pre_label, vaildtokens)
    pe, re, fe, fe_label = computeFe(gold_e_vaild, pre_e)
    fpout.write(u'第%s次开发集F值分别为:%f\n' % (str(i + 1), fe))
    if fe > maxf_e:
        maxf_e = fe
        save_model(mpath, model)
    #每轮迭代都额外考察在测试集上的数据,但测试集不参与模型的选择
    y_predict = model.predict([xtest, xctest], batch_size=128)
    pre_label = ypre2label(y_predict)
    pre_e, temp1, temp2 = label2answer(pre_label, testtokens)
    pe, re, fe, fe_label = computeFe(gold_e, pre_e)
    fpout.write(u'第%s次测试集F值分别为:%f\n' % (str(i + 1), fe))
fpout.close()

#下载关系的F值最高的模型,并用其对测试集进行预测
load_model(mpath, model)
y_predict = model.predict([xtest, xctest], batch_size=32)
pre_label = ypre2label(y_predict)
pre_e, e2t_c, e2t_g = label2answer(pre_label, testtokens)
pe, re, fe, fe_label = computeFe(gold_e, pre_e)
#存储预测结果
SaveGoldEntity('../data/predictE_test.txt', pre_e)
pickle.dump(e2t_c, open('../data/e2t_c.pkl', 'wb'))
pickle.dump(e2t_g, open('../data/e2t_g.pkl', 'wb'))
#输出预测结果的F值
print('\nThe final performance on test corpus is :')
print('entity:p ,r ,f %f %f %f' % (pe, re, fe))
print('chem:%f geneY:%f geneN:%f' % (fe_label[0], fe_label[1], fe_label[2]))
print('tis is : crf+bils-bils%s-ls%s' % (str(bils), str(ls)))