def fromTheInternetTransfer(numGraphs): cwd = os.getcwd() #clean(cwd) ### CONTROL ### # create encessary dirs outputDir = "exp7_filtered_ctrl" create_filtered_dirs(outputDir) # All wild graphs resave_wild_graphs_for_classification("train", outputDir, traintest='train') resave_wild_graphs_for_classification("validation", outputDir, traintest='val') Classifier graph_classification(cwd, 0, outputDir) ### INTERVENTION ### # create encessary dirs outputDir = "exp7_filtered_test" create_filtered_dirs(outputDir) # generated graphs poss_sNs = [1, 2, 3] for i in range(numGraphs): sN = choice(poss_sNs) print(sN) create_multiData(i, sN, 'train', 'random', 'multi', 'multi', 'g', outputDir=outputDir) # wild graphs resave_wild_graphs_for_classification("validation", outputDir) # Classifier graph_classification(cwd, 0, outputDir)
def eval_ve(numGraphs, seriesNumber, graphType, folder='chartOcr_images'): hits = 0 aHits = 0 ctr = 0 gt_image_to_corr_set = {} ve_image_to_corr_set = {} for i in range(numGraphs): if seriesNumber == "random": seriesNum = choice(posSNs) else: seriesNum = seriesNumber ctr += 1 name, x1s, x2s, tstr, label_to_corr_map, col_to_corr_map = create_multiData( i, seriesNum, "train", graphType, "multi", "solid", 'pt', outputDir2=folder, noHard=True) iname = name + ".png" print(iname) gt_c = Counter(label_to_corr_map.values()) gt_image_to_corr_set[iname] = sorted(gt_c.elements()) test_string, test_leg_set = process_img(folder + "/" + iname, use_text_not_color=False) corrList = [] for elem in test_leg_set: corrList.append(elem.split(": ")[1]) ve_c = Counter(corrList) ve_image_to_corr_set[iname] = sorted(ve_c.elements()) print(gt_c) print(ve_c) intersection_c = gt_c & ve_c union_c = gt_c | ve_c aHits += sum(intersection_c.values()) / sum(union_c.values()) if (ve_c == gt_c): hits += 1 with open(folder + '_info/ground_truth.txt', 'w') as gt_file: gt_file.write(json.dumps(gt_image_to_corr_set)) with open(folder + '_info/results.txt', 'w') as result_file: result_file.write(str(hits / ctr) + "\n" + str(aHits / ctr)) return hits / ctr
def exp3(numGraphs, wild=False): # generate test graphs poss_sNs = [1, 2, 3] CURRks = [] CNNks = [] KNEEDLEks = [] sNs = [] for i in range(numGraphs): sN = choice(poss_sNs) print(sN) sNs.append(sN) name = create_multiData(i, sN, 'train', 'random', 'multi', 'multi', 'exp1') img = cv2.imread(name) #img = sat_thresh_filter(img,40) print('finished preprocessing') CURRks.append(elbowM(img) - 1) Image.fromarray(img).save('./exp3_placeholder.png') #CNNks.append(find_k('./exp3_placeholder.png')-1) KNEEDLEks.append(elbowM(img, kneedleBasic=True) - 1) print(sNs) print(CURRks) print(CNNks) print(KNEEDLEks) CURRscore = 0 CNNscore = 0 KNEEDLEscore = 0 for i in range(len(sNs)): if CURRks[i] == sNs[i]: CURRscore += 1 if CNNks[i] == sNs[i]: CNNscore += 1 if KNEEDLEks[i] == sNs[i]: KNEEDLEscore += 1 CURR = CURRscore / len(sNs) CNN = CNNscore / len(sNs) KNEEDLE = KNEEDLEscore / len(sNs) print(CURR, CNN, KNEEDLE) # plot results plt.style.use('default') X1 = ['Current Method', 'CNN', 'Basic Kneedle Algorithm'] X2 = [CURR, CNN, KNEEDLE] plt.bar(X1, X2) plt.ylabel("K Choice Accuracy") plt.show()
def exp1(numGraphs): # generate test graphs poss_sNs = [1, 2, 3] ks = [] pks = [] sNs = [] for i in range(numGraphs): sN = choice(poss_sNs) print(sN) sNs.append(sN) name = create_multiData(i, sN, 'train', 'random', 'multi', 'multi', 'exp1') img = cv2.imread(name) ks.append(elbowM(img) - 1) img = sat_thresh_filter(img, 40) print('finished preprocessing') pks.append(elbowM(img) - 1) print(sNs) print(ks) print(pks) pscore = 0 score = 0 for i in range(len(sNs)): if ks[i] == sNs[i]: score += 1 if pks[i] == sNs[i]: pscore += 1 print(score / len(sNs)) print(pscore / len(sNs)) nPP = score / len(sNs) PP = pscore / len(sNs) # plot results plt.style.use('default') X1 = ['No Preprocessing', 'Preprocessing'] X2 = [nPP, PP] plt.bar(X1, X2) plt.ylabel("K Choice Accuracy") plt.show()
def exp10(numGraphs): ground_truth = [] ground_truth.append(("filename", "x axis label", "y axis label", "title string", "legend and correlation")) posSNs = [1, 2, 3] axis_hits = 0 series_hits = 0 axis_hits_i = 0 series_hits_i = 0 hits = 0 hits_i = 0 for i in range(0, numGraphs): leg_display_str = "" name, x1s, x2s, tstr, label_to_corr_map = create_multiData( i, choice(posSNs), "train", "random", "multi", "solid", 'pt') iname = name + ".png" display_string = "images/" + iname + ", x axis: " + x1s + ", y axis: " + x2s + ", title: " + tstr leg_set = set() for key in label_to_corr_map: leg_set.add(key + ": " + label_to_corr_map[key]) leg_display_str = leg_display_str + ", " + key + ": " + label_to_corr_map[ key] ground_truth.append((iname, x1s, x2s, tstr, leg_display_str)) #display_string = display_string + leg_display_str #test_string,test_leg_set = process_img("images/" + iname) test_string, test_leg_set, test_leg_set_i = process_img("images/" + iname, algo='old') print(display_string) print(leg_set) print(test_string) print(test_leg_set) #print(test_string_i) print(test_leg_set_i) edds = editfast(display_string, test_string) if edds < 6: axis_hits = axis_hits + 1 else: print('axis fail --------------------------------') # edds_i = editfast(display_string, test_string_i) # if edds_i < 6: # axis_hits_i = axis_hits_i + 1 # else: # print('axis fail i --------------------------------') # control oldsh = series_hits for elem1 in leg_set: for elem2 in test_leg_set: edsm = editfast(elem1, elem2) #print('edsm =' + str(edsm)) if edsm < 3: series_hits = series_hits + (1 / len(leg_set)) break if oldsh == series_hits: print('series fail --------------------------------') if not bool(test_leg_set): print('empty set for legend') # intervetion oldsh = series_hits_i for elem1 in leg_set: for elem2 in test_leg_set_i: edsm = editfast(elem1, elem2) #print('edsm =' + str(edsm)) if edsm < 3: series_hits_i = series_hits_i + (1 / len(leg_set)) break if oldsh == series_hits_i: print('series fail i --------------------------------') if not bool(test_leg_set_i): print('empty set for legend i') # control hits = (axis_hits + series_hits) / 2 #print("axis score: " + str(axis_hits/numGraphs)) print("series score: " + str(series_hits / numGraphs)) score = (hits / numGraphs) * 100 #print("total score: " + str(score) + "%") # intervention #hits_i = (axis_hits_i + series_hits_i)/2 #print("axis score: " + str(axis_hits_i/numGraphs)) print("series score: " + str(series_hits_i / numGraphs)) #score_i = (hits_i/numGraphs)*100 #print("total score i: " + str(score_i) + "%") # write csv files with open('./exp10.csv', 'w') as f: writer = csv.writer(f, delimiter=',') writer.writerows(ground_truth) f.close() # outputDir = "exp10" # create_filtered_dirs(outputDir) # poss_sNs = [1,2,3] # for i in range(numGraphs): # sN = choice(poss_sNs) # print(sN) # create_multiData(i, sN, 'train', 'random', 'multi', 'multi', 'exp10', pstyle='multi', outputDir2=outputDir) #exp7(100)
def syntheticVariedTransfer(numGraphs): cwd = os.getcwd() # Control # create necessary dirs #clean(os.getcwd()) outputDir = "exp8_filtered_ctrl" create_filtered_dirs(outputDir) poss_sNs = [1, 2, 3] for i in range(numGraphs): sN = choice(poss_sNs) print(sN) create_multiData(i, sN, 'train', 'random', 'multi', 'multi', 'g', pstyle='multi', outputDir=outputDir) for i in range(numGraphs): sN = choice(poss_sNs) print(sN) create_multiData(i, sN, 'validation', 'random', 'multi', 'multi', 'g', pstyle='multi', outputDir=outputDir) # Classifier graph_classification(cwd, 0, outputDir) # intervention # create necessary dirs #clean(os.getcwd()) outputDir = "exp8_filtered_test" create_filtered_dirs(outputDir) for i in range(numGraphs): sN = choice(poss_sNs) print(sN) create_multiData(i, sN, 'train', 'random', 'multi', 'multi', 'g', pstyle='default', outputDir=outputDir) print("finished part 1") for i in range(numGraphs): sN = choice(poss_sNs) print(sN) create_multiData(i, sN, 'validation', 'random', 'multi', 'multi', 'g', pstyle='multi', outputDir=outputDir) # Classifier graph_classification(cwd, 0, outputDir)
def exp9(numGraphs): # create control directory path = "./exp9_ctrl" try: os.mkdir(path) except OSError: print( "Warning: Creation of the directory %s failed, might already exist" % path) # generate dataset to test OCR posSNs = [1, 2, 3] #display_test_dic = {} #leg_test_dic = {} axis_hits_OD = 0 axis_hits_algo = 0 for i in range(numGraphs): #leg_display_str = "" name, x1s, x2s, tstr, label_to_corr_map = create_multiData( i, choice(posSNs), "train", "random", "multi", "solid", 'pt', outputDir2='exp9_ctrl') iname = name + ".png" display_string = name + ", x axis: " + x1s + ", y axis: " + x2s + ", title: " + tstr #subsitute: "path + iname" for "name" to match original structure from pipeline_testing.py imgPath = path + "/" + iname jpgimg = Image.open(imgPath).convert('RGB') newimgp = imgPath[:len(imgPath) - 3] + 'jpg' # convert png to jpg jpgimg.save(newimgp) ocr = OCR(imgPath, assign_labels(show_inference(detection_model, newimgp))) text_dict_OD = ocr.crop() display_string_OD = name text_dict_algo = { 'x axis': ocr.xAxisLab, 'y axis': ocr.yAxisLab, 'title': ocr.title } display_string_algo = name #print(text_dict_algo) for elem in text_dict_OD: if elem != 'legend' and text_dict_OD[elem] is not None: display_string_OD = display_string_OD + ", " + elem + ": " + ' '.join( text_dict_OD[elem]) for elem in text_dict_algo: if elem != 'legend' and text_dict_algo[elem] is not None: display_string_algo = display_string_algo + ", " + elem + ": " + text_dict_algo[ elem] print(display_string) print(display_string_OD) print(display_string_algo) edds_OD = editfast(display_string, display_string_OD) if edds_OD < 6: axis_hits_OD = axis_hits_OD + 1 else: print('axis fail OD --------------------------------') edds_algo = editfast(display_string, display_string_algo) if edds_algo < 6: axis_hits_algo = axis_hits_algo + 1 else: print('axis fail algo --------------------------------') print("OD score: " + str(axis_hits_OD / numGraphs)) print("algo score: " + str(axis_hits_algo / numGraphs)) '''