def train_ner(x_train, y_train, x_valid, y_valid, x_test, y_test, sequence_length, epoch, batch_size, bert_model_path, model_save_path): """ BERT-BiLSTM-CRF 模型训练,提取症状内部特征 """ bert_embedding = BERTEmbedding(bert_model_path, task=kashgari.LABELING, sequence_length=sequence_length) model = BiLSTM_CRF_Model(bert_embedding) eval_callback_val = EvalCallBack(kash_model=model, valid_x=x_valid, valid_y=y_valid, step=1) eval_callback_test = EvalCallBack(kash_model=model, valid_x=x_test, valid_y=y_test, step=1) model.fit(x_train, y_train, x_validate=x_valid, y_validate=y_valid, epochs=epoch, batch_size=batch_size, callbacks=[eval_callback_val, eval_callback_test]) model.save(model_save_path) model.evaluate(x_test, y_test) return model
def main(): # train_x, train_y = ChineseDailyNerCorpus.load_data("train") # valid_x, valid_y = ChineseDailyNerCorpus.load_data("validate") ChineseDailyNerCorpus.__zip_file__name test_x, test_y = ChineseDailyNerCorpus.load_data("test") # print(f"train data count: {len(train_x)}") # print(f"validate data count: {len(valid_x)}") print(f"test data count: {len(test_x)}") bert_embed = BERTEmbedding("models/chinese_L-12_H-768_A-12", task=kashgari.LABELING, sequence_length=100) model = BiLSTM_CRF_Model(bert_embed) # model.fit( # train_x, # train_y, # x_validate=valid_x, # y_validate=valid_y, # epochs=1, # batch_size=512, # ) model.save("models/ner.h5") model.evaluate(test_x, test_y) predictions = model.predict_classes(test_x) print(predictions)
def train_BERT_BiLSTM_CRF( train_test_devide=0.9, epoch=20, path='/home/peitian_zhang/data/corpus/labeled_train.txt'): train_x, train_y = getTrain(path) x = train_x[:int(len(train_x) * train_test_devide) + 1] y = train_y[:int(len(train_x) * train_test_devide) + 1] bert = BERTEmbedding( model_folder='/home/peitian_zhang/data/chinese_L-12_H-768_A-12', sequence_length=400, task=kashgari.LABELING) model = BiLSTM_CRF_Model(bert) model.fit(x, y, x, y, epochs=epoch, batch_size=64) print('---------evaluate on train---------\n{}'.format( model.evaluate(train_x, train_y))) print('---------evaluate on test----------\n{}'.format( model.evaluate(train_x[int(len(train_x) * train_test_devide) + 1:], train_y[int(len(train_x) * train_test_devide) + 1:]))) try: model.save('/home/peitian_zhang/models/bert_epoch_{}'.format(epoch)) print('Success in saving!') except: pass return model
def train_BiLSTM_CRF(train_test_devide=0.9, epoch=100, path='/home/peitian_zhang/data/corpus/labeled_train.txt'): train_x, train_y = getTrain(path) model = BiLSTM_CRF_Model() x = train_x[:int(len(train_x) * train_test_devide) + 1] y = train_y[:int(len(train_x) * train_test_devide) + 1] model.fit(x, y, x, y, epochs=epoch, batch_size=64) print('---------evaluate on train---------\n{}'.format( model.evaluate(train_x, train_y))) print('---------evaluate on test----------\n{}'.format( model.evaluate(train_x[int(len(train_x) * train_test_devide) + 1:], train_y[int(len(train_x) * train_test_devide) + 1:]))) try: model.save('/home/peitian_zhang/models/bert_epoch_{}'.format(epoch)) print('Success in saving!') except: pass return model
# -*- coding: utf-8 -*- # time: 2019-08-09 16:47 # place: Zhichunlu Beijing import kashgari from kashgari.corpus import DataReader from kashgari.embeddings import BERTEmbedding from kashgari.tasks.labeling import BiLSTM_CRF_Model train_x, train_y = DataReader().read_conll_format_file('./data/time.train') valid_x, valid_y = DataReader().read_conll_format_file('./data/time.dev') test_x, test_y = DataReader().read_conll_format_file('./data/time.test') bert_embedding = BERTEmbedding('chinese_wwm_ext_L-12_H-768_A-12', task=kashgari.LABELING, sequence_length=128) model = BiLSTM_CRF_Model(bert_embedding) model.fit(train_x, train_y, valid_x, valid_y, batch_size=16, epochs=10) model.save('time_ner.h5') model.evaluate(test_x, test_y)
words, labels = [], [] count = 0 for data, label in zip(datafile, labelfile): count += 1 s1 = data.strip().split(' ') s2 = label.strip().split(' ') words.append(s1) labels.append(s2) train_x, test_x, train_y, test_y = train_test_split(words, labels, test_size=0.5, random_state=50) bert_embed = BERTEmbedding('uncased_L-12_H-768_A-12', trainable=False, task=kashgari.LABELING, sequence_length=20, ) model = BiLSTM_CRF_Model(bert_embed) model.fit(train_x, train_y, x_validate=test_x, y_validate=test_y, epochs=35, batch_size=256) model.save('model_bilstm_crf_35_256_64') model.evaluate(x_data=test_x,y_data=test_y,batch_size=64,debug_info=True)
import pickle import kashgari from kashgari.embeddings import BertEmbedding from kashgari.tasks.labeling import BiLSTM_CRF_Model import tensorflow as tf with open('data.pickle', 'rb') as f: data_dic = pickle.load(f) x_train = data_dic[0] x_validation = data_dic[1] y_train = data_dic[2] y_validation = data_dic[3] embedding = BertEmbedding('bert-base-chinese', sequence_length = 128) model = BiLSTM_CRF_Model(embedding) model.fit( x_train = x_train, x_validate = x_validation, y_train = y_train, y_validate = y_validation, epochs=5, batch_size=32, ) model.save('Model') model.evaluate(x_data=x_validation,y_data=y_validation)