import json vocab2id, id2vocab = read_vocab(args.vocab_file) tag2id, id2tag = read_vocab(args.tag_file) text_sequences ,label_sequences= tokenize(args.test_path,vocab2id,tag2id) optimizer = tf.keras.optimizers.Adam(args.lr) model = NerModel(hidden_num = args.hidden_num, vocab_size =len(vocab2id), label_size = len(tag2id), embedding_size = args.embedding_size) # restore model ckpt = tf.train.Checkpoint(optimizer=optimizer,model=model) ckpt.restore(tf.train.latest_checkpoint(args.output_dir)) while True: text = input("input:") dataset = tf.keras.preprocessing.sequence.pad_sequences([[vocab2id.get(char,0) for char in text]], padding='post') print(dataset) logits, text_lens = model.predict(dataset) paths = [] for logit, text_len in zip(logits, text_lens): viterbi_path, _ = tf_ad.text.viterbi_decode(logit[:text_len], model.transition_params) paths.append(viterbi_path) print(paths[0]) print([id2tag[id] for id in paths[0]]) entities_result = format_result(list(text), [id2tag[id] for id in paths[0]]) print(json.dumps(entities_result, indent=4, ensure_ascii=False))
inter = list(set(pre) & set(true)) correct += len(inter) print('正确预测的实体数量为:' + str(correct)) print('预测为实体的数量为:' + str(pre_all)) print('实体的数量为:' + str(true_all)) precision = correct / (pre_all + 1) recall = correct / (true_all + 1) f1 = 2 * precision * recall / (precision + recall) return precision, recall, f1 """ 1. 批量测试文本 """ logits, text_lens = model.predict(text_sequences) paths = [] for logit, text_len in zip(logits, text_lens): viterbi_path, _ = tf_ad.text.viterbi_decode(logit[:text_len], model.transition_params) paths.append(viterbi_path) # 返回的 viterbi_path 是标签预测值 Precision, Recall, F1 = evaluationMetrics(id2tag, paths, label_origin) print('Precision的值为:' + str(Precision)) print('Recall的值为:' + str(Recall)) print('F1的值为:' + str(F1)) """ 2. 测试单个输入文本(不用时需要注释掉) """ """ while True: text = input("input:")
args = parser.parse_args() gpus=tf.config.experimental.list_physical_devices(device_type='GPU') tf.config.experimental.set_visible_devices(devices=gpus[2], device_type='GPU') vocab2id, id2vocab = read_vocab(args.vocab_file) tag2id, id2tag = read_vocab(args.tag_file) text_sequences, text_lens ,label_sequences= tokenize_pred(args.test_file,vocab2id,tag2id) train_dataset = tf.data.Dataset.from_tensor_slices((text_sequences, text_lens, label_sequences)) train_dataset = train_dataset.shuffle(len(text_sequences)).batch(args.batch_size, drop_remainder=True) optimizer = tf.keras.optimizers.Adam(args.lr) model = NerModel(hidden_num = args.hidden_num, vocab_size =len(vocab2id), label_size = len(tag2id), embedding_size = args.embedding_size) # restore model ckpt = tf.train.Checkpoint(optimizer=optimizer,model=model) ckpt.restore(tf.train.latest_checkpoint(args.output_dir)) for text_batch, text_lens,labels_batch in train_dataset: logits, _ = model.predict(text_batch) paths = [] for logit, text_len, labels in zip(logits, text_lens, labels_batch): viterbi_path, _ = tf_ad.text.viterbi_decode(logit[:text_len], model.transition_params) paths.append(viterbi_path) for i in range(len(text_batch)): res = {'text':[],'pred':[],'label':[]} for j,t in enumerate(paths[i]): res['text'].append(id2vocab.get(text_batch[i][j].numpy(),'<UKN>')) res['label'].append(id2tag[(labels_batch[i][j]).numpy()]) res['pred'].append(id2tag[t]) print(json.dumps(res, ensure_ascii=False))
#!/usr/bin/env python # encoding: utf-8 ''' @author: Ben @license: (C) Copyright 2013-2017, Node Supply Chain Manager Corporation Limited. @contact: [email protected] @file: keras_run.py @time: 2019/8/15 09:42 @desc: ''' from model import NerModel from utils import * if __name__ == '__main__': log.i('Start main function.') model = NerModel() model.train() if is_train() else model.predict() log.i('Process finish')