Esempio n. 1
0
        'O', 'B-ORG', 'B-MISC', 'B-PER', 'I-PER', 'B-LOC', 'I-ORG', 'I-MISC',
        'I-LOC'
    }
    for i in range(len(return_list)):
        if not return_list[i] in check_set:
            return_list[i] = 'O'
            print("There is a error check: ", tag, " in index of ", i)
    return return_list


bert_vote_list = clean_prediction(bert_vote_list)
elmo_vote_list = clean_prediction(elmo_vote_list)
xlnet_vote_list = clean_prediction(xlnet_vote_list)

print("--------------BERT EVALUATION---------------")
print(evaluate(real, bert_vote_list))
print("-------------------END----------------------")

print("--------------ELMO EVALUATION---------------")
print(evaluate(real, elmo_vote_list))
print("-------------------END----------------------")

print("--------------XLNET EVALUATION---------------")
print(evaluate(real, xlnet_vote_list))
print("-------------------END----------------------")


def get_position_tag(tag_str):
    dash_index = tag_str.find("-")
    if dash_index == -1:
        return tag_str
    result = np.reshape(scores, (-1, len(labels)))

    id_result = np.argmax(result, axis=1)
    la_result = [tag_dictionary.get_item_for_index(i) for i in id_result]

    return la_result


#################BERT+ELMO MODEL#########################
print(
    "============================TWO MODEL:BERT+ELMO================================="
)
print("****** avg prediction ******")
avg_pred = get_mix_preds_by_result([bert_poss, elmo_poss], "avg")
print(evaluate(real, avg_pred))
print("****** confidence prediction ******")
confidence_pred = get_mix_preds_by_result([bert_poss, elmo_poss], "confidence")
print(evaluate(real, confidence_pred))
print(
    "============================TWO MODEL:BERT+ELMO================================="
)
#################BERT+ELMO MODEL#########################

#################BERT+XLNET MODEL#########################
print(
    "============================TWO MODEL:BERT+XLNET================================="
)
print("****** avg prediction ******")
avg_pred = get_mix_preds_by_result([bert_poss, xlnet_poss], "avg")
print(evaluate(real, avg_pred))
Esempio n. 3
0
                              test_file='test.txt',
                              dev_file='valid.txt')
print(corpus)

tag_type = 'ner'
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary)

model = SequenceTagger.load(ARGS.path + '/best-model.pt')

pred = []
real = []

for sentence in corpus.test:
    for token in sentence.tokens:
        real.append(token.get_tag("ner").value)


def model_prediction(model):
    model_pred = []
    for sentence in corpus.test:
        model.predict(sentence)
        for token in sentence.tokens:
            model_pred.append(token.get_tag("ner").value)
    return model_pred


print("****** prediction ******")
mdoel_pred = model_prediction(model)
print(evaluate(real, mdoel_pred))
Esempio n. 4
0
e_b_x_read = pd.read_csv('result/tag/e_b_x.csv')
ebx_vote_list = e_b_x_read['0'].values.tolist()
x_f_e_read = pd.read_csv('result/tag/x_f_e.csv')
xfe_vote_list = x_f_e_read['0'].values.tolist()
b_f_x_read = pd.read_csv('result/tag/b_f_x.csv')
bfx_vote_list = b_f_x_read['0'].values.tolist()
b_e_f_read = pd.read_csv('result/tag/b_e_f.csv')
bef_vote_list = b_e_f_read['0'].values.tolist()

x_f_e_b_read = pd.read_csv('result/tag/x_f_e_b.csv')
xfeb_vote_list = x_f_e_b_read['0'].values.tolist()

print("Already read all required file!")

print("--------------BERT EVALUATION---------------")
print(evaluate(real, bert_vote_list))
print("-------------------END----------------------")

print("--------------ELMO EVALUATION---------------")
print(evaluate(real, elmo_vote_list))
print("-------------------END----------------------")

print("--------------XLNET EVALUATION---------------")
print(evaluate(real, xlnet_vote_list))
print("-------------------END----------------------")

print("--------------POOL-FLAIR EVALUATION---------------")
# print(evaluate(real, flair_f_vote_list))
print(evaluate(real, flair_vote_list))
print("-------------------END----------------------")
Esempio n. 5
0
        models = [bert_tagger, elmo_tagger, flair_tagger, xlnet_tagger]

    ensemble_tagger = EnsembleTagger(models=models,
                                     tag_type=tag_type,
                                     mode='loss')
if ARGS.train:
    trainer: ModelTrainer = ModelTrainer(ensemble_tagger, corpus)

    trainer.train(model_path,
                  learning_rate=ARGS.lr,
                  mini_batch_size=ARGS.batch_size,
                  max_epochs=ARGS.epoch)

real = []
for sentence in corpus.test:
    for token in sentence.tokens:
        real.append(token.get_tag("ner").value)


def test(model, data):
    results = []
    for sentence in data:
        model.predict(sentence, all_tag_prob=True)
        for token in sentence.tokens:
            results.append(token.get_tag("ner").value)
    return results


ensemble_pred = test(ensemble_tagger, corpus.test)
print(evaluate(real, ensemble_pred))
Esempio n. 6
0
xlnet512_poss_list = np.reshape(xlnet512_poss, (-1, len(labels)))

#################BERT+ELMO+XLNET MODEL#########################
print(
    "============================THREE MODEL:BERT+ELMO+XLNET================================="
)
confidence_vote_lists = []
for i in range(len(bert_poss_list)):
    confidence_vote_lists.append(
        [bert_poss_list[i], xlnet_poss_list[i], elmo_poss_list[i]])

confidence_predict = []
for confidence_vote_list in confidence_vote_lists:
    confidence_predict.append(confidence_vote(confidence_vote_list))

print(evaluate(real, confidence_predict))
print(
    "============================THREE MODEL:BERT+ELMO+XLNET================================="
)
#################BERT+ELMO+XLNET MODEL#########################

#################BERT+ELMO+FLAIR MODEL#########################
print(
    "============================THREE MODEL:BERT+ELMO+FLAIR================================="
)
confidence_vote_lists = []
for i in range(len(bert_poss_list)):
    confidence_vote_lists.append(
        [bert_poss_list[i], flair_poss_list[i], elmo_poss_list[i]])

confidence_predict = []
Esempio n. 7
0
                                        use_crf=True)

# model = SequenceTagger.load('./log/elmo/best-model.pt')
#
# from conlleval import evaluate
#
pred = []
real = []

for sentence in corpus.test:
    for token in sentence.tokens:
        real.append(token.get_tag("ner").value)


def model_prediction(model):
    model_pred = []
    for sentence in corpus.test:
        model.predict(sentence)
        for token in sentence.tokens:
            model_pred.append(token.get_tag("ner").value)
    return model_pred


from eval.conlleval import evaluate

pool_flair_model = SequenceTagger.load(
    './log/pool_flair_f_20200330002549/best-model.pt')
print("****** pool_flair prediction ******")
pool_flair_pred = model_prediction(pool_flair_model)
print(evaluate(real, pool_flair_pred))