fp.write(token[0]) if len(token) == 2: fp.write('\t' + token[1]) else: fp.write('\t' + token[-1]) fp.write('\n') fp.write('\n') fp.close() #查看由于标签产生的关系错误的位置 from GenarateXY import GetXY from utils import label2answer, loadtokens, computeFr testtokens = loadtokens(u'../data/DDI_test_%s_Y.txt' % (label_mode)) xtest, y_test, xctest = GetXY(u'../data/DDI_test_%s_Y.txt' % (label_mode), mask=True) predict_e, predict_r = label2answer(y_test, testtokens) for i in range(len(gold_relation)): if cmp(gold_relation[i], predict_r[i]) != 0: print senindex2file[i] print ' '.join(testtokens[i]) print gold_relation[i] print predict_r[i] print '\n' pr, rr, fr, frlabel = computeFr(gold_relation, predict_r) print u'由goldy得到的关系的PRF为%f %f %f' % (pr, rr, fr) ##计算重叠关系的数量 #num = 0 #for relas in gold_relation: # for i in range(len(relas)):
#为每组实验记录F值得迭代 maxf_e = 0.0 fpath = '../data/fscore/crf+bils-bils%s-ls%s-wv%s.txt' % (str(bils), str(ls), str(wv)) mpath = '../data/model/crf+bils-bils%s-ls%s-wv%s.h5' % (str(bils), str(ls), str(wv)) fpout = cs.open(fpath, 'w', 'utf-8') # for i in range(40): print(u'第%d轮次:' % (i + 1)) model.fit([xtrain, xctrain], ytrain, batch_size=128, epochs=1) #epochs练过程中数据将被“轮”多少次 #使用开发集的f来决定是否保存模型,实体F最大的模型和关系F最大的模型分别保存 y_predict = model.predict([xvaild, xcvaild], batch_size=64) pre_label = ypre2label(y_predict) pre_e, temp1, temp2 = label2answer(pre_label, vaildtokens) pe, re, fe, fe_label = computeFe(gold_e_vaild, pre_e) fpout.write(u'第%s次开发集F值分别为:%f\n' % (str(i + 1), fe)) if fe > maxf_e: maxf_e = fe save_model(mpath, model) #每轮迭代都额外考察在测试集上的数据,但测试集不参与模型的选择 y_predict = model.predict([xtest, xctest], batch_size=128) pre_label = ypre2label(y_predict) pre_e, temp1, temp2 = label2answer(pre_label, testtokens) pe, re, fe, fe_label = computeFe(gold_e, pre_e) fpout.write(u'第%s次测试集F值分别为:%f\n' % (str(i + 1), fe)) fpout.close() #下载关系的F值最高的模型,并用其对测试集进行预测 load_model(mpath, model)
#opt = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) #为每组实验记录F值得迭代 fpath = u'../data/fscore/Fscore-bils%s-ls%s-%s.txt' % (str(bils), str(ls), label_mode) mpath = u'../data/model/bils%s-ls%s-%s.h5' % (str(bils), str(ls), label_mode) fpout = cs.open(fpath, 'w', 'utf-8') for i in range(100): model.fit(xtrain, ytrain, batch_size=128, epochs=1) #epochs练过程中数据将被“轮”多少次 #使用开发集的f来决定是否保存模型,实体F最大的模型和关系F最大的模型分别保存 y_predict = model.predict(xvaild, batch_size=128) pre_label = ypre2label(y_predict) pre_e, e_token = label2answer(pre_label, vaildtokens) print(u'第%s次训练开发集的结果为' % (str(i))) pe, re, fe = computeFe(gold_e_vaild, pre_e) fpout.write(u'第%s次开发集F值分别为:%f\n' % (str(i + 1), fe)) if fe > maxf_e: maxf_e = fe save_model(mpath, model) #每轮迭代都额外考察在测试集上的数据,但测试集不参与模型的选择 y_predict = model.predict(xtest, batch_size=128) pre_label = ypre2label(y_predict) pre_e, e_token = label2answer(pre_label, testtokens) print(u'第%s次训练测试集的结果为' % (str(i))) pe, re, fe = computeFe(gold_e, pre_e) fpout.write(u'第%s次测试集F值分别为:%f\n\n' % (str(i + 1), fe)) fpout.close()
maxtest_ir = 0 besttest_fr = 0.0 besttest_fe = 0.0 besttest_fr_i = 0 fpath = '../data/fscore/crf+bils400-tanh200-wv100_luo.txt' mpath = '../data/model/crf+bils400-tanh200-wv100_luo.h5' fpout = cs.open(fpath, 'w', 'utf-8') for i in range(50): print u'第%d轮次:' % (i + 1) model.fit([xtrain, xctrain], ytrain, batch_size=32, epochs=1) #epochs练过程中数据将被“轮”多少次 #使用开发集的f来决定是否保存模型,实体F最大的模型和关系F最大的模型分别保存 y_predict = model.predict([xvaild, xcvaild], batch_size=32) pre_label = ypre2label(y_predict) pre_e, pre_r = label2answer(pre_label, vaildtokens) dpe, dre, dfe, dfe_label = computeFe(gold_e_vaild, pre_e) dpr, drr, dfr, dfr_label = computeFr(gold_r_vaild, pre_r) print u'第%s次开发集F值分别为:%f %f\n' % (str(i + 1), dfe, dfr) fpout.write(u'第%s次开发集F值分别为:%f %f\n' % (str(i + 1), dfe, dfr)) #if fr > maxf_r: # maxf_r = fr # save_model(mpath, model) #每轮迭代都额外考察在测试集上的数据,但测试集不参与模型的选择 y_predict = model.predict([xtest, xctest], batch_size=32) pre_label = ypre2label(y_predict) pre_e, pre_r = label2answer(pre_label, testtokens) #print (u'第%s次训练测试集的结果为'%(str(i))) tpe, tre, tfe, tfe_label = computeFe(gold_e, pre_e) tpr, trr, tfr, tfr_label = computeFr(gold_r, pre_r) print u'第%s次测试集F值分别为:%f %f \n' % (str(i + 1), tfe, tfr)