'O', 'B-ORG', 'B-MISC', 'B-PER', 'I-PER', 'B-LOC', 'I-ORG', 'I-MISC', 'I-LOC' } for i in range(len(return_list)): if not return_list[i] in check_set: return_list[i] = 'O' print("There is a error check: ", tag, " in index of ", i) return return_list bert_vote_list = clean_prediction(bert_vote_list) elmo_vote_list = clean_prediction(elmo_vote_list) xlnet_vote_list = clean_prediction(xlnet_vote_list) print("--------------BERT EVALUATION---------------") print(evaluate(real, bert_vote_list)) print("-------------------END----------------------") print("--------------ELMO EVALUATION---------------") print(evaluate(real, elmo_vote_list)) print("-------------------END----------------------") print("--------------XLNET EVALUATION---------------") print(evaluate(real, xlnet_vote_list)) print("-------------------END----------------------") def get_position_tag(tag_str): dash_index = tag_str.find("-") if dash_index == -1: return tag_str
result = np.reshape(scores, (-1, len(labels))) id_result = np.argmax(result, axis=1) la_result = [tag_dictionary.get_item_for_index(i) for i in id_result] return la_result #################BERT+ELMO MODEL######################### print( "============================TWO MODEL:BERT+ELMO=================================" ) print("****** avg prediction ******") avg_pred = get_mix_preds_by_result([bert_poss, elmo_poss], "avg") print(evaluate(real, avg_pred)) print("****** confidence prediction ******") confidence_pred = get_mix_preds_by_result([bert_poss, elmo_poss], "confidence") print(evaluate(real, confidence_pred)) print( "============================TWO MODEL:BERT+ELMO=================================" ) #################BERT+ELMO MODEL######################### #################BERT+XLNET MODEL######################### print( "============================TWO MODEL:BERT+XLNET=================================" ) print("****** avg prediction ******") avg_pred = get_mix_preds_by_result([bert_poss, xlnet_poss], "avg") print(evaluate(real, avg_pred))
test_file='test.txt', dev_file='valid.txt') print(corpus) tag_type = 'ner' tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type) print(tag_dictionary) model = SequenceTagger.load(ARGS.path + '/best-model.pt') pred = [] real = [] for sentence in corpus.test: for token in sentence.tokens: real.append(token.get_tag("ner").value) def model_prediction(model): model_pred = [] for sentence in corpus.test: model.predict(sentence) for token in sentence.tokens: model_pred.append(token.get_tag("ner").value) return model_pred print("****** prediction ******") mdoel_pred = model_prediction(model) print(evaluate(real, mdoel_pred))
e_b_x_read = pd.read_csv('result/tag/e_b_x.csv') ebx_vote_list = e_b_x_read['0'].values.tolist() x_f_e_read = pd.read_csv('result/tag/x_f_e.csv') xfe_vote_list = x_f_e_read['0'].values.tolist() b_f_x_read = pd.read_csv('result/tag/b_f_x.csv') bfx_vote_list = b_f_x_read['0'].values.tolist() b_e_f_read = pd.read_csv('result/tag/b_e_f.csv') bef_vote_list = b_e_f_read['0'].values.tolist() x_f_e_b_read = pd.read_csv('result/tag/x_f_e_b.csv') xfeb_vote_list = x_f_e_b_read['0'].values.tolist() print("Already read all required file!") print("--------------BERT EVALUATION---------------") print(evaluate(real, bert_vote_list)) print("-------------------END----------------------") print("--------------ELMO EVALUATION---------------") print(evaluate(real, elmo_vote_list)) print("-------------------END----------------------") print("--------------XLNET EVALUATION---------------") print(evaluate(real, xlnet_vote_list)) print("-------------------END----------------------") print("--------------POOL-FLAIR EVALUATION---------------") # print(evaluate(real, flair_f_vote_list)) print(evaluate(real, flair_vote_list)) print("-------------------END----------------------")
models = [bert_tagger, elmo_tagger, flair_tagger, xlnet_tagger] ensemble_tagger = EnsembleTagger(models=models, tag_type=tag_type, mode='loss') if ARGS.train: trainer: ModelTrainer = ModelTrainer(ensemble_tagger, corpus) trainer.train(model_path, learning_rate=ARGS.lr, mini_batch_size=ARGS.batch_size, max_epochs=ARGS.epoch) real = [] for sentence in corpus.test: for token in sentence.tokens: real.append(token.get_tag("ner").value) def test(model, data): results = [] for sentence in data: model.predict(sentence, all_tag_prob=True) for token in sentence.tokens: results.append(token.get_tag("ner").value) return results ensemble_pred = test(ensemble_tagger, corpus.test) print(evaluate(real, ensemble_pred))
xlnet512_poss_list = np.reshape(xlnet512_poss, (-1, len(labels))) #################BERT+ELMO+XLNET MODEL######################### print( "============================THREE MODEL:BERT+ELMO+XLNET=================================" ) confidence_vote_lists = [] for i in range(len(bert_poss_list)): confidence_vote_lists.append( [bert_poss_list[i], xlnet_poss_list[i], elmo_poss_list[i]]) confidence_predict = [] for confidence_vote_list in confidence_vote_lists: confidence_predict.append(confidence_vote(confidence_vote_list)) print(evaluate(real, confidence_predict)) print( "============================THREE MODEL:BERT+ELMO+XLNET=================================" ) #################BERT+ELMO+XLNET MODEL######################### #################BERT+ELMO+FLAIR MODEL######################### print( "============================THREE MODEL:BERT+ELMO+FLAIR=================================" ) confidence_vote_lists = [] for i in range(len(bert_poss_list)): confidence_vote_lists.append( [bert_poss_list[i], flair_poss_list[i], elmo_poss_list[i]]) confidence_predict = []
use_crf=True) # model = SequenceTagger.load('./log/elmo/best-model.pt') # # from conlleval import evaluate # pred = [] real = [] for sentence in corpus.test: for token in sentence.tokens: real.append(token.get_tag("ner").value) def model_prediction(model): model_pred = [] for sentence in corpus.test: model.predict(sentence) for token in sentence.tokens: model_pred.append(token.get_tag("ner").value) return model_pred from eval.conlleval import evaluate pool_flair_model = SequenceTagger.load( './log/pool_flair_f_20200330002549/best-model.pt') print("****** pool_flair prediction ******") pool_flair_pred = model_prediction(pool_flair_model) print(evaluate(real, pool_flair_pred))