Exemple #1
0
def download_data_and_compare_sentence(
        hostname=["http://192.168.50.139:8081/api/"],
        dirpath="/opt/lavector/sentence/",
        jsonfile=["192.168.50.139_200_8085_0517.json"],
        usecache=True):
    """
    从label_studio的某个hostname下载数据,然后预测,最后给出结果
    :return:
    """
    from sentence_api import export_data
    #从label-studio下载文
    if usecache:
        json_files = [os.path.join(dirpath, j) for j in jsonfile]
    else:
        json_files = []
        for hname, jfile in zip(hostname, jsonfile):
            json_file = export_data(hostname=hname,
                                    dirpath=dirpath,
                                    jsonfile=jfile,
                                    proxy=False)
            json_files.append(json_file)
    original_data = []
    for json_file in json_files:
        #加载从label-studio获取的到json文件
        with open(json_file, 'r') as f:
            data = json.load(f)
            print(f"共收集主机{json_file}的数据{len(data)} 条")
            original_data.extend(data)
    data = predict_comare_excel_sentence(original_data)
    return data
Exemple #2
0
def download_data_and_compare(hostname=["http://192.168.50.139:8081/api/"],
                              dirpath="/opt/lavector/absa/",
                              jsonfile=["192.168.50.139_200_8081_0517.json"],
                              service="absa",
                              usecache=True):
    """
    从label_studio的某个hostname下载数据,然后预测,最后给出结果
    :param service: "absa", "dem8", 使用哪个接口的服务进行预测
    :return:
    """
    from absa_api import export_data
    #从label-studio下载文
    if usecache:
        json_files = [os.path.join(dirpath, j) for j in jsonfile]
    else:
        json_files = []
        for hname, jfile in zip(hostname, jsonfile):
            json_file = export_data(hostname=hname,
                                    dirpath=dirpath,
                                    jsonfile=jfile,
                                    proxy=False)
            json_files.append(json_file)
    original_data = []
    for json_file in json_files:
        #加载从label-studio获取的到json文件
        with open(json_file, 'r') as f:
            data = json.load(f)
            print(f"共收集主机{json_file}的数据{len(data)} 条")
            original_data.extend(data)
    # original_data = original_data[:600]
    # data = predict_comare_excel(original_data, isabsa=isabsa)
    data = predict_comare_excel_bad_case(original_data, service=service)
    return data
Exemple #3
0
def download_data_and_compare(hostname=["http://192.168.50.139:8081/api/"],
                              dirpath="/opt/lavector/absa/",
                              jsonfile=["192.168.50.139_500_8081_0129.json"],
                              isabsa=True,
                              result_excel="result.xlsx",
                              export_wrong_examples_excel="wrong.xlsx",
                              correct_examples_excel="correct.xlsx",
                              type=None):
    """
    从label_studio的某个hostname下载数据,然后预测,最后给出结果
    :return:
    """
    from absa_api import export_data
    #从label-studio下载文
    original_data = []
    for hname, jfile in zip(hostname, jsonfile):
        json_file = export_data(hostname=hname,
                                dirpath=dirpath,
                                jsonfile=jfile,
                                proxy=False)
        #加载从label-studio获取的到json文件
        with open(json_file, 'r') as f:
            data = json.load(f)
            print(f"共收集主机{hname}的数据{len(data)} 条")
            original_data.extend(data)
    data = predict_comare_excel(
        original_data,
        result_excel=result_excel,
        export_wrong_examples_excel=export_wrong_examples_excel,
        correct_examples_excel=correct_examples_excel,
        isabsa=isabsa,
        type=type)
    return data
Exemple #4
0
def download_data_and_compare_same(hostname=[
    "http://192.168.50.139:8081/api/", "http://192.168.50.139:8085/api/"
],
                                   dirpath="/opt/lavector/absa/",
                                   jsonfile=[
                                       "192.168.50.139_500_8081_0129.json",
                                       "192.168.50.139_500_8085_0129.json"
                                   ],
                                   isabsa=True):
    """
    对比相同的hostname的数据
    从label_studio的某个hostname下载数据,然后预测,最后给出结果
    :return:
    """
    from absa_api import export_data
    #从label-studio下载文
    if len(hostname) != 2:
        raise Exception("必须准2个hostname,里面包含相同的评估数据")
    result = []
    for hname, jfile in zip(hostname, jsonfile):
        original_data = []
        json_file = export_data(hostname=hname,
                                dirpath=dirpath,
                                jsonfile=jfile,
                                proxy=False)
        #加载从label-studio获取的到json文件
        with open(json_file, 'r') as f:
            data = json.load(f)
            print(f"共收集主机{hname}的数据{len(data)} 条")
            original_data.extend(data)
        predict_data, excel_data = predict_comare_excel(original_data,
                                                        isabsa=isabsa)
        result.append([hname, predict_data, excel_data])
    #对比2个人标注的数据
    diffrent_data = []
    print(f"对比host为 {result[0][0], result[1][0]}")
    hname1, data1, pre1 = result[0]
    hname2, data2, pre2 = result[1]
    if len(data1) != len(data2):
        raise Exception("两个人标注的数据总数不一致")
    for d1, d2 in zip(data1, data2):
        if d1[0] != d2[0]:
            print("这条数据不一致")
        else:
            if d1[4] != d2[4]:
                print(f"2个人标注的标签不一致")
                print(d1[0])
                print(d1[1])
                print(d1[4])
                print(d2[4])
                one_data = {
                    "text": d1[0],
                    "keyword": d1[1],
                    "P1_label": d1[4],
                    "P2_label": d2[4],
                    "location": d1[2:4]
                }
                diffrent_data.append(one_data)
    print(f"不一致的数据总量是{len(diffrent_data)}")
    df = pd.DataFrame(diffrent_data)
    writer = pd.ExcelWriter("diffrent.xlsx", engine='xlsxwriter')
    df.to_excel(writer)
    writer.save()
    print(f"保存到diffrent.xlsx excel成功")
    return data