import numpy as np import pandas as pd import urllib.request import urllib.parse import tensorflow as tf # 连接数据库 from Data.load_dbdata import upload_data from global_config import Logger from run_similarity import BertSim # 模块导入 https://blog.csdn.net/xiongchengluo1129/article/details/80453599 loginfo = Logger("recommend_articles.log", "info") file = "./Data/NER_Data/q_t_a_testing_predict.txt" bs = BertSim() bs.set_mode(tf.estimator.ModeKeys.PREDICT) def dataset_test(): ''' 用训练问答对中的实体+属性,去知识库中进行问答测试准确率上限 :return: ''' with open(file) as f: total = 0 recall = 0 correct = 0 for line in f: question, entity, attribute, answer, ner = line.split("\t")
@Author : Bruce @Software: PyCharm @File : test.py @Time : 2020-6-28 @Desc : 本代码的目的是请用户在输入端输入问题,模型将与输入问题最相近的问题输出,由用户来选择是否该问题正确; 若正确,则返回答案;若不正确,则提示用户新的问题;如果连续5个问题用户还不能确认提示的问题,则转接人工服务 ''' from run_similarity import BertSim import tensorflow as tf sim = BertSim() if True: sim.set_mode(tf.estimator.ModeKeys.PREDICT) sentence2_set = [ '认证时需要上传什么东西?', '如何进行认证?', '在哪里可以找到认证的页面?', '哪里可以认证?', '认证中要经历哪些过程?' ] print('\n' + '请输入您的问题') question = input('输入问题:') Q_sim = [] for sen in sentence2_set: predict = sim.predict(question, sen) Q_sim.append((sen, predict[0][1])) Q_sim = sorted(Q_sim, key=lambda x: x[1], reverse=True) # print('----------------------------------------------------------------------') print('输入句子为【' + question + '】')
@Desc : 本代码的目的是将问答数据进行导入 @Version : 7/14 运行代码后,首先加载模型,然后等待用户端输入问题。一轮会话结束后,等待用户输入下一个问题。 ''' import os import pathlib from run_similarity import BertSim import tensorflow as tf import time # 指定使用 GPU 跑代码,50个匹配任务1秒内可以完成 os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 指定 GPU 为0, 1 sim = BertSim() sim.set_mode(tf.estimator.ModeKeys.PREDICT) file_path = os.path.abspath(__file__) # 路径为 /home/lixh/works/text/chatbot_project/Chatbot_Retrieval/Chatbot_Retrieval_model/Bert_sim basepath = str(pathlib.Path(file_path).parent) # print(basepath) class similarity(): def __init__(self, path): self.path = path # 对话数据路径 def mainProcess(self): """ 找出与输入问题 :return:
from terminal_predict import predict_service from Data.load_dbdata import upload_data from datetime import time, timedelta, datetime from kbqa_test import estimate_answer import pandas as pd from run_similarity import BertSim import tensorflow as tf from global_config import Logger loginfo = Logger("recommend_articles.log", "info") bs = BertSim() bs.set_mode(tf.estimator.ModeKeys.PREDICT) while True: choice = {} question = input("question:") start1 = datetime.now() ner = predict_service(question) print("识别出的实体:{}".format(ner)) sql_e1 = "select * from nlpccQA where entity ='" + ner + "' order by length(entity) asc " result_e1 = list(upload_data(sql_e1)) print("从数据库中精确找到实体{}个".format(len(result_e1))) result = result_e1 if len(result_e1) == 0: print("精确查找没有查找到实体,采用模糊查找") sql_e0 = "select * from nlpccQA where entity like '%" + ner + "%' order by length(entity) asc " result_e0 = list(upload_data(sql_e0)) print(result_e0) if len(result_e0) == 0: print("这个问题我也不知道呀~~") continue
def predict_online(): """ do online prediction. each time make prediction for one instance. you can change to a batch if you want. :param line: a list. element is: [dummy_label,text_a,text_b] :return: """ #driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "Nic180319")) def convert(line): feature = convert_single_example(0, line, label_list, FLAGS.max_seq_length, tokenizer, 'p') input_ids = np.reshape([feature.input_ids], (batch_size, FLAGS.max_seq_length)) input_mask = np.reshape([feature.input_mask], (batch_size, FLAGS.max_seq_length)) segment_ids = np.reshape([feature.segment_ids], (batch_size, FLAGS.max_seq_length)) label_ids = np.reshape([feature.label_ids], (batch_size, FLAGS.max_seq_length)) return input_ids, input_mask, segment_ids, label_ids global graph with graph.as_default(): print(id2label) while True: print('input the test sentence:') sentence_l = input() sentence = str(sentence_l) start = datetime.now() if len(sentence) < 2: print(sentence) continue sentence = tokenizer.tokenize(sentence) # print('your input is:{}'.format(sentence)) input_ids, input_mask, segment_ids, label_ids = convert(sentence) feed_dict = { input_ids_p: input_ids, input_mask_p: input_mask, segment_ids_p: segment_ids, label_ids_p: label_ids } # run session get current feed_dict result pred_ids_result = sess.run([pred_ids], feed_dict) pred_label_result = convert_id_to_label(pred_ids_result, id2label) print(pred_label_result) #todo: 组合策略 result = strage_combined_link_org_loc(sentence, pred_label_result[0], True) print('识别的实体有:{}'.format(' '.join(result))) #print('Time used: {} sec'.format((datetime.now() - start).seconds)) # yueuu #driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "Nic180319")) hd_graph = Klg() all_rel = hd_graph.old_allrel(driver, name=result[0]) print('知识图谱中跟实体关联的所有关系为:', all_rel) time.sleep(5) sim = BertSim() sim.set_mode(tf.estimator.ModeKeys.PREDICT) sim_score = [] for j in range(len(all_rel)): sim_score.append(sim.predict(sentence_l, all_rel[j])[0][1]) for j in range(len(sim_score)): print(all_rel[j], f'similarity:{sim_score[j]}') max_idx = sim_score.index(max(sim_score)) print('相似度最高的关系为:', all_rel[max_idx]) answer = hd_graph.find(driver, result[0], all_rel[max_idx]) if answer: print("answer:", answer) else: print("不知道")