def get_process_data(self, file): test_data = ChangeDataType.json_to_dict(rootPath + "\\testdata\\uidata\\" + file) # print(test_data) # # 遍历item_process.json文件中的项目与项目问诊内容 # for key in test_data: # print(key + ':' + str(test_data[key])) return test_data
def get_eye_intent(self, api_url, target_file, test_data_file, result_file): """ 通过抽取测试集的数据,调用意图接口,得出的测试结果,在调用函数获取每个target的准确率,召回率,F1 :param target_file: 储存target的文件 :param data_file: 储存接口结果数据的文件 """ # 获取测试集的data test_data = ChangeDataType.csv_to_dict(rootPath + "\\testdata\\apidata\\" + test_data_file) score_list = [] re_intent_list = [] bz_intent_list = [] tf_list = [] re_intent = "" tf = "" # 循环读取sentence,intent for idx, temp in test_data.iterrows(): intent = temp["intention"] sentence = temp["sentence"] # 发起请求 url = api_url.format(sentence) try: r = requests.get(url, timeout=50) result = r.json() re_intent = result["data"]["intent"] # 获取返回data的intent print(re_intent) score = result["data"]["intent_probability"] # 获取返回data的score tf = CommonFunction.get_tf(intent, re_intent) except Exception as e: score = "bad request" print(e) # self.logging.info("句子:" + sentence + "---预期意图:" + intent # + "---实际意图:" + re_intent + "---是否一致:" + tf) # 拼接结果数据 score_list.append(score) bz_intent_list.append(intent) re_intent_list.append(re_intent) tf_list.append(tf) test_data["re_intent"] = re_intent_list test_data["score"] = score_list # 调用方法,拼接test_data值 test_data = CommonFunction.get_collections(test_data, tf_list) now = time.strftime('%y_%m_%d-%H_%M_%S') # 输出excel test_data.to_excel(rootPath + '\\testresults\\resultfile\\' + now + result_file, index=False, encoding="utf-8") GetIntent.get_intent_result(self, target_file, bz_intent_list, re_intent_list)
def get_pro_intent(self, api_url, target_file, test_data_file, result_file): """ 通过抽取测试集的数据,调用意图接口,得出的测试结果,在调用函数获取每个target的准确率,召回率,F1 :param target_file: 储存target的文件 :param data_file: 储存接口结果数据的文件 """ # 获取测试集的data test_data = ChangeDataType.csv_to_dict( rootPath + "\\testdata\\apidata\\intent\\" + test_data_file) score_list = [] re_intent_list = [] exp_intent_list = [] tf_list = [] # 循环读取sentence,intent for idx, temp in test_data.iterrows(): intent = temp["intent"] sentence = temp["sentence"] headers = { 'Authorization': "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjb21wYW55X2lkIjoxLCJyb2JvdF9pZCI6MSwiZXhwIjoxNTg5MzMyNTIzfQ.Durc3V9XA99BejXc2ZOzspPU-JJCY1nUUjceICwBWNE", 'Content-Type': 'application/x-www-form-urlencoded' } data = { 'dialog': sentence, 'client_id': "zltest", } url = api_url.format(sentence) # 接口请求 try: r = requests.post(url, data=data, headers=headers, timeout=50) result = r.json() re_intent = result["ner_result"]["ner"]["intent"]["Value"][0][ "Value"] # 获取返回data的intent print(re_intent) tf = CommonFunction.get_tf(intent, re_intent) except Exception as e: score = "bad request" exp_intent_list.append(intent) re_intent_list.append(re_intent) tf_list.append(tf) test_data["re_intent"] = re_intent_list # 调用方法,拼接test_data值 test_data = CommonFunction.get_collections(test_data, tf_list) now = time.strftime('%y_%m_%d-%H_%M_%S') # 输出excel test_data.to_excel(rootPath + '\\testresults\\resultfile\\' + now + result_file, index=False, encoding="utf-8") GetIntent.get_intent_result(self, target_file, exp_intent_list, re_intent_list)
def get_pro_symptom(self, api_url, test_data_file, result_file): test_data = ChangeDataType.json_to_dict(rootPath + "\\testdata\\apidata\\" + test_data_file) key_list = list(test_data.keys()) value_list = list(test_data.values()) result_value_list = [] tf_list = [] tf = "" for key, value in test_data.items(): url = api_url.format(key) try: r = requests.get(url, timeout=50) result = r.json() result_value = result["norm_symptoms"] tf = CommonFunction.get_tf(result_value, value[0]) except Exception as e: print(e) # self.logging.info("口语症状:" + key + "---预期标准症状:" + value[0] + "---实际标准症状:" # + result_value + "---是否一致:" + tf) result_value_list.append(result_value) tf_list.append(tf) # Binary.binary_plot_curve(value_list, result_value_list) now = time.strftime('%y_%m_%d-%H_%M_%S') workbook = xlwt.Workbook() sheet1 = workbook.add_sheet('sheet1', cell_overwrite_ok=True) sheet1.write(0, 0, "口语症状") sheet1.write(0, 1, "预期标准症状") sheet1.write(0, 2, "实际标准症状") sheet1.write(0, 3, "是否一致") sheet1.write(0, 4, "总数") sheet1.write(1, 4, len(key_list)) sheet1.write(0, 5, "一致数") sheet1.write(1, 5, tf_list.count("TRUE")) sheet1.write(0, 6, "不一致数") sheet1.write(1, 6, tf_list.count("FALSE")) sheet1.write(0, 7, "一致率") sheet1.write(1, 7, "{:.2f}%".format(tf_list.count("TRUE") / len(key_list)) * 100) sheet1.write(0, 8, "不一致率") sheet1.write(1, 8, "{:.2f}%".format(tf_list.count("FALSE") / len(key_list)) * 100) print("总数:", len(tf_list), " 一致数:", tf_list.count("TRUE"), " 不一致数:", tf_list.count("FALSE"), " 一致率:", "{:.2f}%".format(tf_list.count("TRUE") / len(tf_list) * 100), " 不一致率:", "{:.2f}%".format(tf_list.count("FALSE") / len(tf_list) * 100)) for i in range(0, len(key_list)): sheet1.write(i + 1, 0, key_list[i]) sheet1.write(i + 1, 1, value_list[i]) sheet1.write(i + 1, 2, result_value_list[i]) sheet1.write(i + 1, 3, tf_list[i]) workbook.save(rootPath + '\\testresults\\resultfile\\' + now + result_file)
def get_sentence_similarity(self, test_data_file, result_file, compare_score_file): # self.logging = Logging() test_data = ChangeDataType.csv_to_dict(rootPath + "\\testdata\\apidata\\" + test_data_file) score_list = [] re_score_list = [] tf_list = [] re_score = "" tf = "" lb_list = [] for idx, temp in test_data.iterrows(): label = int(temp["label"]) str1 = temp["症状a"] str2 = temp["症状b"] url = "http://192.168.1.74:8233/bert_similarity/v2?str1={}&str2={}&model".format( str1, str2) try: r = requests.get(url, timeout=50) result = r.json() score = result["data"]["score"] compare_score_list = CommonFunction.get_target( (self, compare_score_file)) for i in range(0, len(compare_score_list)): re_score = CommonFunction.get_re_score( score, float(compare_score_list[i])) tf = CommonFunction.get_tf(re_score, label) except Exception as e: score = "bad request" print(score) # self.logging.info("症状1:" + str1 + "---症状2:" + str2 + "---预期分数:" # + str(label) + "---实际分数:" + str(re_score) + "---是否一致:" + tf) score_list.append(score) re_score_list.append(re_score) tf_list.append(tf) lb_list.append(label) test_data["score"] = score_list test_data["re_score"] = re_score_list test_data = CommonFunction.get_collections(test_data, tf_list) Binary.binary_plot_curve(lb_list, re_score_list) now = time.strftime('%y_%m_%d-%H_%M_%S') test_data.to_excel(rootPath + '\\testresults\\resultfile\\' + now + result_file, index=False, encoding="utf-8")
def get_answer(self, file): """ 查看answer中的有效数据,sentence,ner_title,ner_context并做储备,后期做调用 :param file:answer文件 """ sentence_list = [] each_ner_content = [] each_ner_title = [] each_ner_t_list = [] each_ner_c_list = [] test_data = ChangeDataType.csv_to_dict(rootPath + "\\testdata\\pregnant\\" + file) for idx, temp in test_data.iterrows(): # 获取句子,实体,实体类别,意图等 sentence = temp["句子"] ner = temp["实体"] ner_type = temp["实体类别"] # 获取每个句子所有的实体类别:如果一个句子有多个实体类别,中间用$分割 ner_type_l = ner_type.split("$") # 判断是否有多个实体类别 if len(ner_type_l) >= 2: # 根据$拆分每个ner ner_l = ner.split("$") # 循环遍历拆分每个ner和内容 for i in range(1, len(ner_l)): each_ner = ner_l[i].split(":") # 拼接 每个ner的类别和内容 if i == 1: each_ner_title.append(str(each_ner[0])) each_ner_content.append(str(each_ner[1])) if i >= 2: each_ner_title.append("," + str(each_ner[0])) each_ner_content.append("," + str(each_ner[1])) # 拼接sentence,each_ner_content,each_ner_title sentence_list.append(sentence) each_ner_c_list.append(each_ner_content) each_ner_t_list.append(each_ner_title) # 只有单个实体 else: # 拆分ner和内容 each_ner = ner.split("$")[1].split(":") # 拼接 ner的类别和内容 each_ner_title.append(str(each_ner[0])) each_ner_content.append(str(each_ner[1])) # 拼接sentence,each_ner_content,each_ner_title sentence_list.append(sentence) each_ner_c_list.append(each_ner_content) each_ner_t_list.append(each_ner_title) # 重置 each_ner_content,each_ner_title each_ner_content = [] each_ner_title = [] print(sentence_list) print(each_ner_c_list) print(each_ner_t_list) # 建立表格,先将这三个list结果记录文档;sentence_list,each_ner_c_list,each_ner_t_list workbook = xlwt.Workbook() # 设置sheet名字 sheet1 = workbook.add_sheet('answer统计结果', cell_overwrite_ok=True) # 设置三列title sheet1.write(0, 0, "sentence") sheet1.write(0, 1, "ner") sheet1.write(0, 2, "ner_context") # 循环填写三列内容 for i in range(0, len(sentence_list)): sheet1.write(i + 1, 0, sentence_list[i]) sheet1.write(i + 1, 1, each_ner_c_list[i]) sheet1.write(i + 1, 2, each_ner_t_list[i]) # 保存表格 workbook.save(rootPath + '\\testresults\\resultfile\\' + "answer_collection_result.xls") return sentence_list, each_ner_c_list, each_ner_t_list
def get_question(self, file): """ 查看question中的有效数据,sentence,ner_title,ner_context,intent,并做储备,后期做调用 :param file:question文件 """ sentence_list = [] intent_list = [] each_intent_list = [] each_ner_content = [] each_ner_title = [] each_ner_t_list = [] each_ner_c_list = [] test_data = ChangeDataType.csv_to_dict(rootPath + "\\testdata\\pregnant\\" + file) for idx, temp in test_data.iterrows(): # 获取句子,实体,实体类别,意图等 sentence = temp["句子"] ner = temp["实体"] ner_type = temp["实体类别"] intent = temp["意图"] # 获取每个句子所有的实体类别:如果一个句子有多个实体类别,中间用$分割 ner_type_l = ner_type.split("$") # 判断intent在target文件中,并且不等于无 if intent in GetRelation.get_intent_target( self, "intent_target.txt") and ner_type != "无": # 判断是否有多个实体类别 if len(ner_type_l) >= 2: sentence_list.append(sentence) # 拼接意图 each_intent_list.append(intent) # 根据$拆分每个ner ner_l = ner.split("$") # 循环遍历拆分每个ner和内容 for i in range(1, len(ner_l)): each_ner = ner_l[i].split(":") # 拼接 每个ner的类别和内容 if i == 1: each_ner_title.append(str(each_ner[0])) each_ner_content.append(str(each_ner[1])) if i >= 2: each_ner_title.append("," + str(each_ner[0])) each_ner_content.append("," + str(each_ner[1])) intent_list.append(each_intent_list) each_ner_c_list.append(each_ner_content) each_ner_t_list.append(each_ner_title) # 只有单个实体 else: # ner_l = ner.split("$") sentence_list.append(sentence) # 拼接意图 each_intent_list.append(intent) # 拆分ner和内容 each_ner = ner.split("$")[1].split(":") # 拼接 ner的类别和内容 each_ner_title.append(str(each_ner[0])) each_ner_content.append(str(each_ner[1])) intent_list.append(each_intent_list) each_ner_c_list.append(each_ner_content) each_ner_t_list.append(each_ner_title) # 拼接sentence,each_intent_list,each_ner_content,each_ner_title # 重置each_ner_content,each_ner_title,each_intent_list each_ner_content = [] each_ner_title = [] each_intent_list = [] print(sentence_list) print(each_ner_c_list) print(each_ner_t_list) print(intent_list) # 建立表格,先将这四个list结果记录文档;sentence_list,each_ner_c_list,each_ner_t_list,intent_list workbook = xlwt.Workbook() # 设置sheet名字 sheet1 = workbook.add_sheet('question统计结果', cell_overwrite_ok=True) # 设置四列title sheet1.write(0, 0, "sentence") sheet1.write(0, 1, "ner") sheet1.write(0, 2, "ner_context") sheet1.write(0, 3, "intent") # 循环填写四列内容 for i in range(0, len(sentence_list)): sheet1.write(i + 1, 0, sentence_list[i]) sheet1.write(i + 1, 1, each_ner_c_list[i]) sheet1.write(i + 1, 2, each_ner_t_list[i]) sheet1.write(i + 1, 3, intent_list[i]) # 保存表格 workbook.save(rootPath + '\\testresults\\resultfile\\' + "question_collection_result.xls") return sentence_list, each_ner_c_list, each_ner_t_list, intent_list