def test_model_global(nn_model, testdata, chardata, pos_data, index2word, resultfile='', batch_size=50): index2word[0] = '' index2word_BIOES = {0: '', 1: 'B', 2: 'I', 3: 'O', 4: 'E', 5: 'S'} index2word_Type = {0: '', 1: 'O', 2: 'LOC', 3: 'ORG', 4: 'PER', 5: 'MISC'} testx = np.asarray(testdata[0], dtype="int32") testy = np.asarray(testdata[1], dtype="int32") testy_BIOES = np.asarray(testdata[3], dtype="int32") testy_Type = np.asarray(testdata[4], dtype="int32") poslabel_test = np.asarray(pos_data, dtype="int32") testchar = np.asarray(chardata, dtype="int32") testresult = [] testresult2 = [] testresult3 = [] predictions = nn_model.predict([testx, testchar]) isfinall = False for si in range(0, len(predictions[0])): ptag = [] for iw, word in enumerate(predictions[0][si]): next_index = np.argmax(word) next_token = index2word_BIOES[next_index] next_index2 = np.argmax(predictions[1][si][iw]) next_token2 = index2word_Type[next_index2] if next_token == 'O' or next_token == '': ptag.append(next_token) else: ptag.append(next_token + '-' + next_token2) ttag = [] for word in testy[si]: next_index = np.argmax(word) next_token = index2word[next_index] ttag.append(next_token) result3 = [] result3.append(ptag) result3.append(ttag) testresult3.append(result3) P, R, F, PR_count, P_count, TR_count = evaluation_NER( testresult3, resultfile=resultfile) print('global---Type>>>>>>>>>>', P, R, F) return P, R, F, PR_count, P_count, TR_count
def test_model(nn_model, inputs_test_x, test_y, index2word, resultfile='', batch_size=10): index2word[0] = '' predictions = nn_model.predict(inputs_test_x) testresult = [] for si in range(0, len(predictions)): sent = predictions[si] # print('predictions',sent) ptag = [] for word in sent: next_index = np.argmax(word) # if next_index != 0: next_token = index2word[next_index] ptag.append(next_token) # print('next_token--ptag--',str(ptag)) senty = test_y[0][si] ttag = [] # flag =0 for word in senty: next_index = np.argmax(word) next_token = index2word[next_index] # if word > 0: # if flag == 0: # flag = 1 # count+=1 ttag.append(next_token) # print(si, 'next_token--ttag--', str(ttag)) result = [] result.append(ptag) result.append(ttag) testresult.append(result) # print(result.shape) # print('count-----------',count) # pickle.dump(testresult, open(resultfile, 'w')) # P, R, F = evaluavtion_triple(testresult) P, R, F, PR_count, P_count, TR_count = evaluation_NER(testresult) # evaluation_NER2(testresult) # print (P, R, F) # evaluation_NER_error(testresult) return P, R, F, PR_count, P_count, TR_count
def test_model(nn_model, testdata, chardata, pos_data, index2word, resultfile='', batch_size=50): index2word[0] = '' testx = np.asarray(testdata[0], dtype="int32") testy = np.asarray(testdata[1], dtype="int32") testpos = np.asarray(pos_data, dtype="int32") testchar = np.asarray(chardata, dtype="int32") testresult = [] predictions = nn_model.predict([testx, testchar]) for si in range(0, len(predictions)): sent = predictions[si] ptag = [] for word in sent: next_index = np.argmax(word) next_token = index2word[next_index] ptag.append(next_token) # print('next_token--ptag--',str(ptag)) senty = testy[si] ttag = [] for word in senty: next_index = np.argmax(word) next_token = index2word[next_index] ttag.append(next_token) result = [] result.append(ptag) result.append(ttag) testresult.append(result) P, R, F, PR_count, P_count, TR_count = evaluation_NER( testresult, resultfile) return P, R, F, PR_count, P_count, TR_count
def test_model(nn_model, inputs_test_x, test_y, index2word, resultfile='', batch_size=10, testfile=''): index2word[0] = '' predictions = nn_model.predict(inputs_test_x) testresult = [] for si in range(0, len(predictions)): sent = predictions[si] # print('predictions',sent) ptag = [] for word in sent: next_index = np.argmax(word) # if next_index == 0: # break next_token = index2word[next_index] ptag.append(next_token) senty = test_y[0][si] ttag = [] for word in senty: next_index = np.argmax(word) if next_index == 0: break next_token = index2word[next_index] ttag.append(next_token) result = [] result.append(ptag) result.append(ttag) testresult.append(result) P, R, F, PR_count, P_count, TR_count = evaluation_NER(testresult) return P, R, F, PR_count, P_count, TR_count
def test_model(nn_model, index2word, batch_size=10): # index2word[0] = '' test_D = data_generator_4test(test, test_posi, batch_size=batch_size, maxlen=max_s) predictions = nn_model.predict_generator(generator=test_D.__iter__(), steps=len(test_D), verbose=1) print(len(test), len(predictions)) testresult = [] for si in range(0, len(predictions)): sent = predictions[si] ptag = [] senty = test_label[si] ttag = [] for wi, word in enumerate(senty): next_index = np.argmax(word) next_token = index2word[next_index] ttag.append(next_token) next_index = np.argmax(sent[wi]) next_token = index2word[next_index] ptag.append(next_token) result = [] result.append(ptag) result.append(ttag) testresult.append(result) P, R, F, PR_count, P_count, TR_count = evaluation_NER(testresult) return P, R, F, PR_count, P_count, TR_count
def test_model_segment(nn_model, testdata, chartest, index2tag): index2tag[0] = '' testx = np.asarray(testdata[0], dtype="int32") testy_BIOES = np.asarray(testdata[1], dtype="int32") testchar = np.asarray(chartest, dtype="int32") predictions = nn_model.predict([testx, testchar]) testresult_1Step = [] testresult2 = [] for si in range(0, len(predictions)): ptag_BIOES = [] ptag_1Step = [] for word in predictions[si]: next_index = np.argmax(word) next_token = index2tag[next_index] ptag_BIOES.append(next_token) if next_token != '': ptag_1Step.append(next_token) ttag_BIOES = [] for word in testy_BIOES[si]: next_index = np.argmax(word) next_token = index2tag[next_index] ttag_BIOES.append(next_token) testresult2.append([ptag_BIOES, ttag_BIOES]) testresult_1Step.append(ptag_1Step) P, R, F, PR_count, P_count, TR_count = evaluation_NER(testresult2, resultfile='') print('NER test results >>>>>>>>>>', P, R, F, PR_count, P_count, TR_count) return testresult_1Step
def test_model_divide(nn_model, testdata, chardata, pos_data, index2word, resultfile='', batch_size=50): index2word[0] = '' index2word_BIOES = {0: '', 1: 'B', 2: 'I', 3: 'O', 4: 'E', 5: 'S'} index2word_Type = {0: '', 1: 'O', 2: 'LOC', 3: 'ORG', 4: 'PER', 5: 'MISC'} testx = np.asarray(testdata[0], dtype="int32") testy = np.asarray(testdata[1], dtype="int32") testy_BIOES = np.asarray(testdata[3], dtype="int32") testy_Type = np.asarray(testdata[4], dtype="int32") poslabel_test = np.asarray(pos_data, dtype="int32") testchar = np.asarray(chardata, dtype="int32") testresult = [] testresult2 = [] testresult3 = [] predictions = nn_model.predict([testx, testchar]) isfinall = False for si in range(0, len(predictions[0])): ptag_BIOES = [] for word in predictions[0][si]: next_index = np.argmax(word) next_token = index2word_BIOES[next_index] ptag_BIOES.append(next_token) # print('next_token--ptag--',str(ptag)) ptag_Type = [] for word in predictions[1][si]: next_index = np.argmax(word) if len(word) == 6: next_token = index2word_Type[next_index] else: isfinall = True next_token = index2word[next_index] ptag_Type.append(next_token) # print('next_token--ptag--',str(ptag)) ttag_BIOES = [] for word in testy_BIOES[si]: next_index = np.argmax(word) next_token = index2word_BIOES[next_index] ttag_BIOES.append(next_token) ttag_Type = [] if isfinall: for word in testy[si]: next_index = np.argmax(word) next_token = index2word[next_index] ttag_Type.append(next_token) else: for word in testy_Type[si]: next_index = np.argmax(word) next_token = index2word_Type[next_index] ttag_Type.append(next_token) result2 = [] result2.append(ptag_BIOES) result2.append(ttag_BIOES) testresult2.append(result2) result3 = [] result3.append(ptag_Type) result3.append(ttag_Type) testresult3.append(result3) P, R, F, PR_count, P_count, TR_count = evaluation_NER_BIOES( testresult2, resultfile=resultfile + '.BIORS.txt') print('divide---BIOES>>>>>>>>>>', P, R, F) if isfinall: P, R, F, PR_count, P_count, TR_count = evaluation_NER( testresult3, resultfile=resultfile) else: P, R, F, PR_count, P_count, TR_count = evaluation_NER_Type( testresult3, resultfile=resultfile + '.Type.txt') print('divide---Type>>>>>>>>>>', P, R, F) return P, R, F, PR_count, P_count, TR_count