예제 #1
0
def train_ner(x_train, y_train, x_valid, y_valid, x_test, y_test,
              sequence_length, epoch, batch_size, bert_model_path,
              model_save_path):
    """
    BERT-BiLSTM-CRF 模型训练,提取症状内部特征
    """
    bert_embedding = BERTEmbedding(bert_model_path,
                                   task=kashgari.LABELING,
                                   sequence_length=sequence_length)

    model = BiLSTM_CRF_Model(bert_embedding)

    eval_callback_val = EvalCallBack(kash_model=model,
                                     valid_x=x_valid,
                                     valid_y=y_valid,
                                     step=1)

    eval_callback_test = EvalCallBack(kash_model=model,
                                      valid_x=x_test,
                                      valid_y=y_test,
                                      step=1)

    model.fit(x_train,
              y_train,
              x_validate=x_valid,
              y_validate=y_valid,
              epochs=epoch,
              batch_size=batch_size,
              callbacks=[eval_callback_val, eval_callback_test])

    model.save(model_save_path)

    model.evaluate(x_test, y_test)

    return model
예제 #2
0
def main():

    # train_x, train_y = ChineseDailyNerCorpus.load_data("train")
    # valid_x, valid_y = ChineseDailyNerCorpus.load_data("validate")
    ChineseDailyNerCorpus.__zip_file__name
    test_x, test_y = ChineseDailyNerCorpus.load_data("test")

    # print(f"train data count: {len(train_x)}")
    # print(f"validate data count: {len(valid_x)}")
    print(f"test data count: {len(test_x)}")

    bert_embed = BERTEmbedding("models/chinese_L-12_H-768_A-12",
                               task=kashgari.LABELING,
                               sequence_length=100)
    model = BiLSTM_CRF_Model(bert_embed)
    # model.fit(
    #     train_x,
    #     train_y,
    #     x_validate=valid_x,
    #     y_validate=valid_y,
    #     epochs=1,
    #     batch_size=512,
    # )
    model.save("models/ner.h5")
    model.evaluate(test_x, test_y)
    predictions = model.predict_classes(test_x)
    print(predictions)
예제 #3
0
def train_BERT_BiLSTM_CRF(
        train_test_devide=0.9,
        epoch=20,
        path='/home/peitian_zhang/data/corpus/labeled_train.txt'):
    train_x, train_y = getTrain(path)
    x = train_x[:int(len(train_x) * train_test_devide) + 1]
    y = train_y[:int(len(train_x) * train_test_devide) + 1]

    bert = BERTEmbedding(
        model_folder='/home/peitian_zhang/data/chinese_L-12_H-768_A-12',
        sequence_length=400,
        task=kashgari.LABELING)
    model = BiLSTM_CRF_Model(bert)

    model.fit(x, y, x, y, epochs=epoch, batch_size=64)

    print('---------evaluate on train---------\n{}'.format(
        model.evaluate(train_x, train_y)))
    print('---------evaluate on test----------\n{}'.format(
        model.evaluate(train_x[int(len(train_x) * train_test_devide) + 1:],
                       train_y[int(len(train_x) * train_test_devide) + 1:])))
    try:
        model.save('/home/peitian_zhang/models/bert_epoch_{}'.format(epoch))
        print('Success in saving!')
    except:
        pass
    return model
예제 #4
0
def train_BiLSTM_CRF(train_test_devide=0.9,
                     epoch=100,
                     path='/home/peitian_zhang/data/corpus/labeled_train.txt'):

    train_x, train_y = getTrain(path)
    model = BiLSTM_CRF_Model()

    x = train_x[:int(len(train_x) * train_test_devide) + 1]
    y = train_y[:int(len(train_x) * train_test_devide) + 1]

    model.fit(x, y, x, y, epochs=epoch, batch_size=64)
    print('---------evaluate on train---------\n{}'.format(
        model.evaluate(train_x, train_y)))
    print('---------evaluate on test----------\n{}'.format(
        model.evaluate(train_x[int(len(train_x) * train_test_devide) + 1:],
                       train_y[int(len(train_x) * train_test_devide) + 1:])))
    try:
        model.save('/home/peitian_zhang/models/bert_epoch_{}'.format(epoch))
        print('Success in saving!')
    except:
        pass
    return model
# -*- coding: utf-8 -*-
# time: 2019-08-09 16:47
# place: Zhichunlu Beijing

import kashgari
from kashgari.corpus import DataReader
from kashgari.embeddings import BERTEmbedding
from kashgari.tasks.labeling import BiLSTM_CRF_Model

train_x, train_y = DataReader().read_conll_format_file('./data/time.train')
valid_x, valid_y = DataReader().read_conll_format_file('./data/time.dev')
test_x, test_y = DataReader().read_conll_format_file('./data/time.test')

bert_embedding = BERTEmbedding('chinese_wwm_ext_L-12_H-768_A-12',
                               task=kashgari.LABELING,
                               sequence_length=128)

model = BiLSTM_CRF_Model(bert_embedding)
model.fit(train_x, train_y, valid_x, valid_y, batch_size=16, epochs=10)

model.save('time_ner.h5')

model.evaluate(test_x, test_y)
예제 #6
0
words, labels = [], []

count = 0
for data, label in zip(datafile, labelfile):
    count += 1
    s1 = data.strip().split(' ')
    s2 = label.strip().split(' ')

    words.append(s1)
    labels.append(s2)

train_x, test_x, train_y, test_y = train_test_split(words, labels, test_size=0.5, random_state=50)


bert_embed = BERTEmbedding('uncased_L-12_H-768_A-12',
                           trainable=False,
                           task=kashgari.LABELING,
                           sequence_length=20,
                           )
model = BiLSTM_CRF_Model(bert_embed)
model.fit(train_x,
          train_y,
          x_validate=test_x,
          y_validate=test_y,
          epochs=35,
          batch_size=256)

model.save('model_bilstm_crf_35_256_64')

model.evaluate(x_data=test_x,y_data=test_y,batch_size=64,debug_info=True)
예제 #7
0
import pickle
import kashgari
from kashgari.embeddings import BertEmbedding
from kashgari.tasks.labeling import BiLSTM_CRF_Model
import tensorflow as tf

with open('data.pickle', 'rb') as f:
    data_dic = pickle.load(f)

x_train = data_dic[0]
x_validation = data_dic[1]
y_train = data_dic[2]
y_validation = data_dic[3]

embedding = BertEmbedding('bert-base-chinese',
                            sequence_length = 128)
model = BiLSTM_CRF_Model(embedding)

model.fit(  x_train = x_train,
            x_validate = x_validation,
            y_train = y_train,
            y_validate = y_validation,
            epochs=5,
            batch_size=32,
            )
model.save('Model')
model.evaluate(x_data=x_validation,y_data=y_validation)