コード例 #1
0
import json


vocab2id, id2vocab = read_vocab(args.vocab_file)
tag2id, id2tag = read_vocab(args.tag_file)
text_sequences ,label_sequences= tokenize(args.test_path,vocab2id,tag2id)



optimizer = tf.keras.optimizers.Adam(args.lr)
model = NerModel(hidden_num = args.hidden_num, vocab_size =len(vocab2id), label_size = len(tag2id), embedding_size = args.embedding_size)
# restore model
ckpt = tf.train.Checkpoint(optimizer=optimizer,model=model)
ckpt.restore(tf.train.latest_checkpoint(args.output_dir))


while True:
    text = input("input:")
    dataset = tf.keras.preprocessing.sequence.pad_sequences([[vocab2id.get(char,0) for char in text]], padding='post')
    print(dataset)
    logits, text_lens = model.predict(dataset)
    paths = []
    for logit, text_len in zip(logits, text_lens):
        viterbi_path, _ = tf_ad.text.viterbi_decode(logit[:text_len], model.transition_params)
        paths.append(viterbi_path)
    print(paths[0])
    print([id2tag[id] for id in paths[0]])

    entities_result = format_result(list(text), [id2tag[id] for id in paths[0]])
    print(json.dumps(entities_result, indent=4, ensure_ascii=False))
コード例 #2
0
        inter = list(set(pre) & set(true))
        correct += len(inter)
    print('正确预测的实体数量为:' + str(correct))
    print('预测为实体的数量为:' + str(pre_all))
    print('实体的数量为:' + str(true_all))
    precision = correct / (pre_all + 1)
    recall = correct / (true_all + 1)
    f1 = 2 * precision * recall / (precision + recall)
    return precision, recall, f1


"""
1. 批量测试文本
"""

logits, text_lens = model.predict(text_sequences)
paths = []
for logit, text_len in zip(logits, text_lens):
    viterbi_path, _ = tf_ad.text.viterbi_decode(logit[:text_len],
                                                model.transition_params)
    paths.append(viterbi_path)  # 返回的 viterbi_path 是标签预测值
Precision, Recall, F1 = evaluationMetrics(id2tag, paths, label_origin)
print('Precision的值为:' + str(Precision))
print('Recall的值为:' + str(Recall))
print('F1的值为:' + str(F1))
"""
2. 测试单个输入文本(不用时需要注释掉)
"""
"""
while True:
    text = input("input:")
コード例 #3
0
args = parser.parse_args()

gpus=tf.config.experimental.list_physical_devices(device_type='GPU')
tf.config.experimental.set_visible_devices(devices=gpus[2], device_type='GPU')
vocab2id, id2vocab = read_vocab(args.vocab_file)
tag2id, id2tag = read_vocab(args.tag_file)
text_sequences, text_lens ,label_sequences= tokenize_pred(args.test_file,vocab2id,tag2id)
train_dataset = tf.data.Dataset.from_tensor_slices((text_sequences, text_lens, label_sequences))
train_dataset = train_dataset.shuffle(len(text_sequences)).batch(args.batch_size, drop_remainder=True)

optimizer = tf.keras.optimizers.Adam(args.lr)
model = NerModel(hidden_num = args.hidden_num, vocab_size =len(vocab2id), label_size = len(tag2id), embedding_size = args.embedding_size)
# restore model
ckpt = tf.train.Checkpoint(optimizer=optimizer,model=model)
ckpt.restore(tf.train.latest_checkpoint(args.output_dir))

for text_batch, text_lens,labels_batch in train_dataset:
    logits, _ = model.predict(text_batch)
    paths = []
    for logit, text_len, labels in zip(logits, text_lens, labels_batch):
        viterbi_path, _ = tf_ad.text.viterbi_decode(logit[:text_len], model.transition_params)
        paths.append(viterbi_path)

    for i in range(len(text_batch)):
        res = {'text':[],'pred':[],'label':[]}
        for j,t in enumerate(paths[i]):
            res['text'].append(id2vocab.get(text_batch[i][j].numpy(),'<UKN>'))
            res['label'].append(id2tag[(labels_batch[i][j]).numpy()])
            res['pred'].append(id2tag[t])
        print(json.dumps(res, ensure_ascii=False))
コード例 #4
0
ファイル: keras_run.py プロジェクト: wangbenglight/Keras-NER
#!/usr/bin/env python
# encoding: utf-8
'''
@author: Ben
@license: (C) Copyright 2013-2017, Node Supply Chain Manager Corporation Limited.
@contact: [email protected]
@file: keras_run.py
@time: 2019/8/15 09:42
@desc:
'''

from model import NerModel
from utils import *

if __name__ == '__main__':
    log.i('Start main function.')

    model = NerModel()
    model.train() if is_train() else model.predict()

    log.i('Process finish')