Esempio n. 1
0
def fromTheInternetTransfer(numGraphs):

    cwd = os.getcwd()
    #clean(cwd)

    ### CONTROL ###
    # create encessary dirs
    outputDir = "exp7_filtered_ctrl"
    create_filtered_dirs(outputDir)
    # All wild graphs
    resave_wild_graphs_for_classification("train",
                                          outputDir,
                                          traintest='train')
    resave_wild_graphs_for_classification("validation",
                                          outputDir,
                                          traintest='val')
    Classifier
    graph_classification(cwd, 0, outputDir)

    ### INTERVENTION ###
    # create encessary dirs
    outputDir = "exp7_filtered_test"
    create_filtered_dirs(outputDir)
    # generated graphs
    poss_sNs = [1, 2, 3]
    for i in range(numGraphs):
        sN = choice(poss_sNs)
        print(sN)
        create_multiData(i,
                         sN,
                         'train',
                         'random',
                         'multi',
                         'multi',
                         'g',
                         outputDir=outputDir)
    # wild graphs
    resave_wild_graphs_for_classification("validation", outputDir)
    # Classifier
    graph_classification(cwd, 0, outputDir)
Esempio n. 2
0
def eval_ve(numGraphs, seriesNumber, graphType, folder='chartOcr_images'):
    hits = 0
    aHits = 0
    ctr = 0
    gt_image_to_corr_set = {}
    ve_image_to_corr_set = {}
    for i in range(numGraphs):
        if seriesNumber == "random":
            seriesNum = choice(posSNs)
        else:
            seriesNum = seriesNumber
        ctr += 1
        name, x1s, x2s, tstr, label_to_corr_map, col_to_corr_map = create_multiData(
            i,
            seriesNum,
            "train",
            graphType,
            "multi",
            "solid",
            'pt',
            outputDir2=folder,
            noHard=True)
        iname = name + ".png"
        print(iname)
        gt_c = Counter(label_to_corr_map.values())
        gt_image_to_corr_set[iname] = sorted(gt_c.elements())
        test_string, test_leg_set = process_img(folder + "/" + iname,
                                                use_text_not_color=False)
        corrList = []
        for elem in test_leg_set:
            corrList.append(elem.split(": ")[1])
        ve_c = Counter(corrList)
        ve_image_to_corr_set[iname] = sorted(ve_c.elements())
        print(gt_c)
        print(ve_c)
        intersection_c = gt_c & ve_c
        union_c = gt_c | ve_c
        aHits += sum(intersection_c.values()) / sum(union_c.values())
        if (ve_c == gt_c):
            hits += 1

    with open(folder + '_info/ground_truth.txt', 'w') as gt_file:
        gt_file.write(json.dumps(gt_image_to_corr_set))
    with open(folder + '_info/results.txt', 'w') as result_file:
        result_file.write(str(hits / ctr) + "\n" + str(aHits / ctr))
    return hits / ctr
Esempio n. 3
0
def exp3(numGraphs, wild=False):
    # generate test graphs
    poss_sNs = [1, 2, 3]
    CURRks = []
    CNNks = []
    KNEEDLEks = []
    sNs = []
    for i in range(numGraphs):
        sN = choice(poss_sNs)
        print(sN)
        sNs.append(sN)
        name = create_multiData(i, sN, 'train', 'random', 'multi', 'multi',
                                'exp1')
        img = cv2.imread(name)
        #img = sat_thresh_filter(img,40)
        print('finished preprocessing')
        CURRks.append(elbowM(img) - 1)
        Image.fromarray(img).save('./exp3_placeholder.png')
        #CNNks.append(find_k('./exp3_placeholder.png')-1)
        KNEEDLEks.append(elbowM(img, kneedleBasic=True) - 1)
    print(sNs)
    print(CURRks)
    print(CNNks)
    print(KNEEDLEks)
    CURRscore = 0
    CNNscore = 0
    KNEEDLEscore = 0
    for i in range(len(sNs)):
        if CURRks[i] == sNs[i]:
            CURRscore += 1
        if CNNks[i] == sNs[i]:
            CNNscore += 1
        if KNEEDLEks[i] == sNs[i]:
            KNEEDLEscore += 1
    CURR = CURRscore / len(sNs)
    CNN = CNNscore / len(sNs)
    KNEEDLE = KNEEDLEscore / len(sNs)
    print(CURR, CNN, KNEEDLE)

    # plot results
    plt.style.use('default')
    X1 = ['Current Method', 'CNN', 'Basic Kneedle Algorithm']
    X2 = [CURR, CNN, KNEEDLE]
    plt.bar(X1, X2)
    plt.ylabel("K Choice Accuracy")
    plt.show()
Esempio n. 4
0
def exp1(numGraphs):
    # generate test graphs
    poss_sNs = [1, 2, 3]
    ks = []
    pks = []
    sNs = []
    for i in range(numGraphs):
        sN = choice(poss_sNs)
        print(sN)
        sNs.append(sN)
        name = create_multiData(i, sN, 'train', 'random', 'multi', 'multi',
                                'exp1')
        img = cv2.imread(name)
        ks.append(elbowM(img) - 1)
        img = sat_thresh_filter(img, 40)
        print('finished preprocessing')
        pks.append(elbowM(img) - 1)
    print(sNs)
    print(ks)
    print(pks)
    pscore = 0
    score = 0
    for i in range(len(sNs)):
        if ks[i] == sNs[i]:
            score += 1
        if pks[i] == sNs[i]:
            pscore += 1
    print(score / len(sNs))
    print(pscore / len(sNs))
    nPP = score / len(sNs)
    PP = pscore / len(sNs)

    # plot results
    plt.style.use('default')
    X1 = ['No Preprocessing', 'Preprocessing']
    X2 = [nPP, PP]
    plt.bar(X1, X2)
    plt.ylabel("K Choice Accuracy")
    plt.show()
Esempio n. 5
0
def exp10(numGraphs):
    ground_truth = []
    ground_truth.append(("filename", "x axis label", "y axis label",
                         "title string", "legend and correlation"))

    posSNs = [1, 2, 3]
    axis_hits = 0
    series_hits = 0
    axis_hits_i = 0
    series_hits_i = 0
    hits = 0
    hits_i = 0
    for i in range(0, numGraphs):
        leg_display_str = ""
        name, x1s, x2s, tstr, label_to_corr_map = create_multiData(
            i, choice(posSNs), "train", "random", "multi", "solid", 'pt')
        iname = name + ".png"
        display_string = "images/" + iname + ", x axis: " + x1s + ", y axis: " + x2s + ", title: " + tstr
        leg_set = set()
        for key in label_to_corr_map:
            leg_set.add(key + ": " + label_to_corr_map[key])
            leg_display_str = leg_display_str + ", " + key + ": " + label_to_corr_map[
                key]
        ground_truth.append((iname, x1s, x2s, tstr, leg_display_str))
        #display_string = display_string + leg_display_str
        #test_string,test_leg_set = process_img("images/" + iname)
        test_string, test_leg_set, test_leg_set_i = process_img("images/" +
                                                                iname,
                                                                algo='old')
        print(display_string)
        print(leg_set)
        print(test_string)
        print(test_leg_set)
        #print(test_string_i)
        print(test_leg_set_i)
        edds = editfast(display_string, test_string)
        if edds < 6:
            axis_hits = axis_hits + 1
        else:
            print('axis fail --------------------------------')
        # edds_i = editfast(display_string, test_string_i)
        # if edds_i < 6:
        #     axis_hits_i = axis_hits_i + 1
        # else:
        #     print('axis fail i --------------------------------')

        # control
        oldsh = series_hits
        for elem1 in leg_set:
            for elem2 in test_leg_set:
                edsm = editfast(elem1, elem2)
                #print('edsm =' + str(edsm))
                if edsm < 3:
                    series_hits = series_hits + (1 / len(leg_set))
                    break
        if oldsh == series_hits:
            print('series fail --------------------------------')
        if not bool(test_leg_set):
            print('empty set for legend')

        # intervetion
        oldsh = series_hits_i
        for elem1 in leg_set:
            for elem2 in test_leg_set_i:
                edsm = editfast(elem1, elem2)
                #print('edsm =' + str(edsm))
                if edsm < 3:
                    series_hits_i = series_hits_i + (1 / len(leg_set))
                    break
        if oldsh == series_hits_i:
            print('series fail i --------------------------------')
        if not bool(test_leg_set_i):
            print('empty set for legend i')

    # control
    hits = (axis_hits + series_hits) / 2
    #print("axis score: " + str(axis_hits/numGraphs))
    print("series score: " + str(series_hits / numGraphs))
    score = (hits / numGraphs) * 100
    #print("total score: " + str(score) + "%")

    # intervention
    #hits_i = (axis_hits_i + series_hits_i)/2
    #print("axis score: " + str(axis_hits_i/numGraphs))
    print("series score: " + str(series_hits_i / numGraphs))
    #score_i = (hits_i/numGraphs)*100
    #print("total score i: " + str(score_i) + "%")

    # write csv files
    with open('./exp10.csv', 'w') as f:
        writer = csv.writer(f, delimiter=',')
        writer.writerows(ground_truth)
    f.close()

    # outputDir = "exp10"
    # create_filtered_dirs(outputDir)
    # poss_sNs = [1,2,3]
    # for i in range(numGraphs):
    #     sN = choice(poss_sNs)
    #     print(sN)
    #     create_multiData(i, sN, 'train', 'random', 'multi', 'multi', 'exp10', pstyle='multi', outputDir2=outputDir)


#exp7(100)
Esempio n. 6
0
def syntheticVariedTransfer(numGraphs):

    cwd = os.getcwd()

    # Control
    # create necessary dirs
    #clean(os.getcwd())
    outputDir = "exp8_filtered_ctrl"
    create_filtered_dirs(outputDir)
    poss_sNs = [1, 2, 3]
    for i in range(numGraphs):
        sN = choice(poss_sNs)
        print(sN)
        create_multiData(i,
                         sN,
                         'train',
                         'random',
                         'multi',
                         'multi',
                         'g',
                         pstyle='multi',
                         outputDir=outputDir)
    for i in range(numGraphs):
        sN = choice(poss_sNs)
        print(sN)
        create_multiData(i,
                         sN,
                         'validation',
                         'random',
                         'multi',
                         'multi',
                         'g',
                         pstyle='multi',
                         outputDir=outputDir)
    # Classifier
    graph_classification(cwd, 0, outputDir)

    # intervention
    # create necessary dirs
    #clean(os.getcwd())
    outputDir = "exp8_filtered_test"
    create_filtered_dirs(outputDir)
    for i in range(numGraphs):
        sN = choice(poss_sNs)
        print(sN)
        create_multiData(i,
                         sN,
                         'train',
                         'random',
                         'multi',
                         'multi',
                         'g',
                         pstyle='default',
                         outputDir=outputDir)
    print("finished part 1")
    for i in range(numGraphs):
        sN = choice(poss_sNs)
        print(sN)
        create_multiData(i,
                         sN,
                         'validation',
                         'random',
                         'multi',
                         'multi',
                         'g',
                         pstyle='multi',
                         outputDir=outputDir)
    # Classifier
    graph_classification(cwd, 0, outputDir)
Esempio n. 7
0
def exp9(numGraphs):

    # create control directory
    path = "./exp9_ctrl"
    try:
        os.mkdir(path)
    except OSError:
        print(
            "Warning: Creation of the directory %s failed, might already exist"
            % path)

    # generate dataset to test OCR
    posSNs = [1, 2, 3]
    #display_test_dic = {}
    #leg_test_dic = {}
    axis_hits_OD = 0
    axis_hits_algo = 0
    for i in range(numGraphs):
        #leg_display_str = ""
        name, x1s, x2s, tstr, label_to_corr_map = create_multiData(
            i,
            choice(posSNs),
            "train",
            "random",
            "multi",
            "solid",
            'pt',
            outputDir2='exp9_ctrl')
        iname = name + ".png"
        display_string = name + ", x axis: " + x1s + ", y axis: " + x2s + ", title: " + tstr  #subsitute: "path + iname" for "name" to match original structure from pipeline_testing.py
        imgPath = path + "/" + iname
        jpgimg = Image.open(imgPath).convert('RGB')
        newimgp = imgPath[:len(imgPath) - 3] + 'jpg'  # convert png to jpg
        jpgimg.save(newimgp)
        ocr = OCR(imgPath,
                  assign_labels(show_inference(detection_model, newimgp)))
        text_dict_OD = ocr.crop()
        display_string_OD = name
        text_dict_algo = {
            'x axis': ocr.xAxisLab,
            'y axis': ocr.yAxisLab,
            'title': ocr.title
        }
        display_string_algo = name
        #print(text_dict_algo)
        for elem in text_dict_OD:
            if elem != 'legend' and text_dict_OD[elem] is not None:
                display_string_OD = display_string_OD + ", " + elem + ": " + ' '.join(
                    text_dict_OD[elem])
        for elem in text_dict_algo:
            if elem != 'legend' and text_dict_algo[elem] is not None:
                display_string_algo = display_string_algo + ", " + elem + ": " + text_dict_algo[
                    elem]

        print(display_string)
        print(display_string_OD)
        print(display_string_algo)

        edds_OD = editfast(display_string, display_string_OD)
        if edds_OD < 6:
            axis_hits_OD = axis_hits_OD + 1
        else:
            print('axis fail OD --------------------------------')
        edds_algo = editfast(display_string, display_string_algo)
        if edds_algo < 6:
            axis_hits_algo = axis_hits_algo + 1
        else:
            print('axis fail algo --------------------------------')

    print("OD score: " + str(axis_hits_OD / numGraphs))
    print("algo score: " + str(axis_hits_algo / numGraphs))
    '''