Example #1
0
            fp.write(token[0])
            if len(token) == 2:
                fp.write('\t' + token[1])
            else:
                fp.write('\t' + token[-1])
            fp.write('\n')
        fp.write('\n')
fp.close()

#查看由于标签产生的关系错误的位置
from GenarateXY import GetXY
from utils import label2answer, loadtokens, computeFr
testtokens = loadtokens(u'../data/DDI_test_%s_Y.txt' % (label_mode))
xtest, y_test, xctest = GetXY(u'../data/DDI_test_%s_Y.txt' % (label_mode),
                              mask=True)
predict_e, predict_r = label2answer(y_test, testtokens)

for i in range(len(gold_relation)):
    if cmp(gold_relation[i], predict_r[i]) != 0:
        print senindex2file[i]
        print ' '.join(testtokens[i])
        print gold_relation[i]
        print predict_r[i]
        print '\n'
pr, rr, fr, frlabel = computeFr(gold_relation, predict_r)
print u'由goldy得到的关系的PRF为%f %f %f' % (pr, rr, fr)

##计算重叠关系的数量
#num = 0
#for relas in gold_relation:
#    for i in range(len(relas)):
#为每组实验记录F值得迭代
maxf_e = 0.0
fpath = '../data/fscore/crf+bils-bils%s-ls%s-wv%s.txt' % (str(bils), str(ls),
                                                          str(wv))
mpath = '../data/model/crf+bils-bils%s-ls%s-wv%s.h5' % (str(bils), str(ls),
                                                        str(wv))
fpout = cs.open(fpath, 'w', 'utf-8')
#
for i in range(40):
    print(u'第%d轮次:' % (i + 1))
    model.fit([xtrain, xctrain], ytrain, batch_size=128,
              epochs=1)  #epochs练过程中数据将被“轮”多少次
    #使用开发集的f来决定是否保存模型,实体F最大的模型和关系F最大的模型分别保存
    y_predict = model.predict([xvaild, xcvaild], batch_size=64)
    pre_label = ypre2label(y_predict)
    pre_e, temp1, temp2 = label2answer(pre_label, vaildtokens)
    pe, re, fe, fe_label = computeFe(gold_e_vaild, pre_e)
    fpout.write(u'第%s次开发集F值分别为:%f\n' % (str(i + 1), fe))
    if fe > maxf_e:
        maxf_e = fe
        save_model(mpath, model)
    #每轮迭代都额外考察在测试集上的数据,但测试集不参与模型的选择
    y_predict = model.predict([xtest, xctest], batch_size=128)
    pre_label = ypre2label(y_predict)
    pre_e, temp1, temp2 = label2answer(pre_label, testtokens)
    pe, re, fe, fe_label = computeFe(gold_e, pre_e)
    fpout.write(u'第%s次测试集F值分别为:%f\n' % (str(i + 1), fe))
fpout.close()

#下载关系的F值最高的模型,并用其对测试集进行预测
load_model(mpath, model)
Example #3
0
#opt = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

#为每组实验记录F值得迭代
fpath = u'../data/fscore/Fscore-bils%s-ls%s-%s.txt' % (str(bils), str(ls),
                                                       label_mode)
mpath = u'../data/model/bils%s-ls%s-%s.h5' % (str(bils), str(ls), label_mode)
fpout = cs.open(fpath, 'w', 'utf-8')
for i in range(100):
    model.fit(xtrain, ytrain, batch_size=128, epochs=1)  #epochs练过程中数据将被“轮”多少次
    #使用开发集的f来决定是否保存模型,实体F最大的模型和关系F最大的模型分别保存
    y_predict = model.predict(xvaild, batch_size=128)
    pre_label = ypre2label(y_predict)
    pre_e, e_token = label2answer(pre_label, vaildtokens)
    print(u'第%s次训练开发集的结果为' % (str(i)))
    pe, re, fe = computeFe(gold_e_vaild, pre_e)
    fpout.write(u'第%s次开发集F值分别为:%f\n' % (str(i + 1), fe))
    if fe > maxf_e:
        maxf_e = fe
        save_model(mpath, model)
    #每轮迭代都额外考察在测试集上的数据,但测试集不参与模型的选择
    y_predict = model.predict(xtest, batch_size=128)
    pre_label = ypre2label(y_predict)
    pre_e, e_token = label2answer(pre_label, testtokens)
    print(u'第%s次训练测试集的结果为' % (str(i)))
    pe, re, fe = computeFe(gold_e, pre_e)
    fpout.write(u'第%s次测试集F值分别为:%f\n\n' % (str(i + 1), fe))
fpout.close()
Example #4
0
maxtest_ir = 0
besttest_fr = 0.0
besttest_fe = 0.0
besttest_fr_i = 0
fpath = '../data/fscore/crf+bils400-tanh200-wv100_luo.txt'
mpath = '../data/model/crf+bils400-tanh200-wv100_luo.h5'
fpout = cs.open(fpath, 'w', 'utf-8')

for i in range(50):
    print u'第%d轮次:' % (i + 1)
    model.fit([xtrain, xctrain], ytrain, batch_size=32,
              epochs=1)  #epochs练过程中数据将被“轮”多少次
    #使用开发集的f来决定是否保存模型,实体F最大的模型和关系F最大的模型分别保存
    y_predict = model.predict([xvaild, xcvaild], batch_size=32)
    pre_label = ypre2label(y_predict)
    pre_e, pre_r = label2answer(pre_label, vaildtokens)
    dpe, dre, dfe, dfe_label = computeFe(gold_e_vaild, pre_e)
    dpr, drr, dfr, dfr_label = computeFr(gold_r_vaild, pre_r)
    print u'第%s次开发集F值分别为:%f  %f\n' % (str(i + 1), dfe, dfr)
    fpout.write(u'第%s次开发集F值分别为:%f  %f\n' % (str(i + 1), dfe, dfr))
    #if fr > maxf_r:
    #    maxf_r = fr
    #    save_model(mpath, model)
    #每轮迭代都额外考察在测试集上的数据,但测试集不参与模型的选择
    y_predict = model.predict([xtest, xctest], batch_size=32)
    pre_label = ypre2label(y_predict)
    pre_e, pre_r = label2answer(pre_label, testtokens)
    #print (u'第%s次训练测试集的结果为'%(str(i)))
    tpe, tre, tfe, tfe_label = computeFe(gold_e, pre_e)
    tpr, trr, tfr, tfr_label = computeFr(gold_r, pre_r)
    print u'第%s次测试集F值分别为:%f  %f \n' % (str(i + 1), tfe, tfr)