コード例 #1
0
def get_dic(path, t2i_path, l2i_path):
    sentences = read_jsonline(path)
    words = set(word for i in sentences for word in i['word'])
    token_dic = {str(v): i + 1 for i, v in enumerate(words)}
    token_dic["unk"] = 0
    labels = set(word for i in sentences for word in i['tag'])
    label2id = {str(v): i for i, v in enumerate(labels)}
    write_json(t2i_path, token_dic)
    write_json(l2i_path, label2id)
コード例 #2
0
def sentences_len(path):
    sentences = read_jsonline(path)
    len_l = [len(i['word']) for i in sentences]
    print(len_l)
    plt.hist(len_l, bins=40, facecolor="blue", edgecolor="black", alpha=0.7)
    # 显示横轴标签
    plt.xlabel("length")
    # 显示纵轴标签
    plt.ylabel("nums")
    # 显示图标题
    plt.title("statistic")
    plt.show()
コード例 #3
0
ファイル: bert_train.py プロジェクト: xiaoyulikebai/keras_nlp
        self.model.fit_generator(generator.__iter__(),
                                 steps_per_epoch=10000,
                                 epochs=3,
                                 callbacks=callbacks_list,
                                 validation_data=self.v_generator.__iter__(),
                                 nb_val_samples=200)

    def predict(self):
        pass


if __name__ == '__main__':
    ROOT_PATH = '/Users/ouhon/PycharmProjects/keras_nlp_tutorial/NER/'
    path = ROOT_PATH + 'CCKS_2017/data/raw_data/data.jsonl'
    data = read_jsonline(path)

    tag2i_dict = {'O': 0,
                  'B-TREATMENT': 1,
                  'I-TREATMENT': 2,
                  'B-BODY': 3,
                  'I-BODY': 4,
                  'B-SIGNS': 5,
                  'I-SIGNS': 6,
                  'B-CHECK': 7,
                  'I-CHECK': 8,
                  'B-DISEASE': 9,
                  'I-DISEASE': 10}

    i2tag_dict = {str(v): str(i) for i, v in tag2i_dict.items()}
    max_len = max([len(i['content']) for i in data])
コード例 #4
0
# -*- coding: utf-8 -*-
# @Time    : 2020/6/5 下午2:24
# @Author  : Benqi
from tools import read_jsonline, write_jsonline

new = []
f = read_jsonline(
    '/Users/ouhon/PycharmProjects/keras_nlp_tutorial/NER/CCKS_2017/data/raw_data/data.jsonl'
)
for i in f:
    l = i['tag']
    if len(set(l)) > 1:
        new.append(i)
        print(i)

write_jsonline(
    "/Users/ouhon/PycharmProjects/keras_nlp_tutorial/NER/CCKS_2017/data/raw_data/data2.jsonl",
    new)
print(len(f))
print(len(new))