def download_data_and_compare_sentence( hostname=["http://192.168.50.139:8081/api/"], dirpath="/opt/lavector/sentence/", jsonfile=["192.168.50.139_200_8085_0517.json"], usecache=True): """ 从label_studio的某个hostname下载数据,然后预测,最后给出结果 :return: """ from sentence_api import export_data #从label-studio下载文 if usecache: json_files = [os.path.join(dirpath, j) for j in jsonfile] else: json_files = [] for hname, jfile in zip(hostname, jsonfile): json_file = export_data(hostname=hname, dirpath=dirpath, jsonfile=jfile, proxy=False) json_files.append(json_file) original_data = [] for json_file in json_files: #加载从label-studio获取的到json文件 with open(json_file, 'r') as f: data = json.load(f) print(f"共收集主机{json_file}的数据{len(data)} 条") original_data.extend(data) data = predict_comare_excel_sentence(original_data) return data
def download_data_and_compare(hostname=["http://192.168.50.139:8081/api/"], dirpath="/opt/lavector/absa/", jsonfile=["192.168.50.139_200_8081_0517.json"], service="absa", usecache=True): """ 从label_studio的某个hostname下载数据,然后预测,最后给出结果 :param service: "absa", "dem8", 使用哪个接口的服务进行预测 :return: """ from absa_api import export_data #从label-studio下载文 if usecache: json_files = [os.path.join(dirpath, j) for j in jsonfile] else: json_files = [] for hname, jfile in zip(hostname, jsonfile): json_file = export_data(hostname=hname, dirpath=dirpath, jsonfile=jfile, proxy=False) json_files.append(json_file) original_data = [] for json_file in json_files: #加载从label-studio获取的到json文件 with open(json_file, 'r') as f: data = json.load(f) print(f"共收集主机{json_file}的数据{len(data)} 条") original_data.extend(data) # original_data = original_data[:600] # data = predict_comare_excel(original_data, isabsa=isabsa) data = predict_comare_excel_bad_case(original_data, service=service) return data
def download_data_and_compare(hostname=["http://192.168.50.139:8081/api/"], dirpath="/opt/lavector/absa/", jsonfile=["192.168.50.139_500_8081_0129.json"], isabsa=True, result_excel="result.xlsx", export_wrong_examples_excel="wrong.xlsx", correct_examples_excel="correct.xlsx", type=None): """ 从label_studio的某个hostname下载数据,然后预测,最后给出结果 :return: """ from absa_api import export_data #从label-studio下载文 original_data = [] for hname, jfile in zip(hostname, jsonfile): json_file = export_data(hostname=hname, dirpath=dirpath, jsonfile=jfile, proxy=False) #加载从label-studio获取的到json文件 with open(json_file, 'r') as f: data = json.load(f) print(f"共收集主机{hname}的数据{len(data)} 条") original_data.extend(data) data = predict_comare_excel( original_data, result_excel=result_excel, export_wrong_examples_excel=export_wrong_examples_excel, correct_examples_excel=correct_examples_excel, isabsa=isabsa, type=type) return data
def download_data_and_compare_same(hostname=[ "http://192.168.50.139:8081/api/", "http://192.168.50.139:8085/api/" ], dirpath="/opt/lavector/absa/", jsonfile=[ "192.168.50.139_500_8081_0129.json", "192.168.50.139_500_8085_0129.json" ], isabsa=True): """ 对比相同的hostname的数据 从label_studio的某个hostname下载数据,然后预测,最后给出结果 :return: """ from absa_api import export_data #从label-studio下载文 if len(hostname) != 2: raise Exception("必须准2个hostname,里面包含相同的评估数据") result = [] for hname, jfile in zip(hostname, jsonfile): original_data = [] json_file = export_data(hostname=hname, dirpath=dirpath, jsonfile=jfile, proxy=False) #加载从label-studio获取的到json文件 with open(json_file, 'r') as f: data = json.load(f) print(f"共收集主机{hname}的数据{len(data)} 条") original_data.extend(data) predict_data, excel_data = predict_comare_excel(original_data, isabsa=isabsa) result.append([hname, predict_data, excel_data]) #对比2个人标注的数据 diffrent_data = [] print(f"对比host为 {result[0][0], result[1][0]}") hname1, data1, pre1 = result[0] hname2, data2, pre2 = result[1] if len(data1) != len(data2): raise Exception("两个人标注的数据总数不一致") for d1, d2 in zip(data1, data2): if d1[0] != d2[0]: print("这条数据不一致") else: if d1[4] != d2[4]: print(f"2个人标注的标签不一致") print(d1[0]) print(d1[1]) print(d1[4]) print(d2[4]) one_data = { "text": d1[0], "keyword": d1[1], "P1_label": d1[4], "P2_label": d2[4], "location": d1[2:4] } diffrent_data.append(one_data) print(f"不一致的数据总量是{len(diffrent_data)}") df = pd.DataFrame(diffrent_data) writer = pd.ExcelWriter("diffrent.xlsx", engine='xlsxwriter') df.to_excel(writer) writer.save() print(f"保存到diffrent.xlsx excel成功") return data