#print penguin_clusters #print noise__ noise_points[s] = [x for x,u in noise__] print str(s) + " - " + str(len(user_identified_penguins)) if len(user_identified_penguins) > 20: break except AssertionError: continue #if len(user_identified_penguins) == 0: # continue if len(user_identified_penguins) <= 20: #print noise__ not_found = cluster_compare(penguins[0],penguins[-1]) #if not_found == []: # continue image_file = cbook.get_sample_data(base_directory + "/Databases/penguins/images/"+object_id+".JPG") image = plt.imread(image_file) fig, ax = plt.subplots() im = ax.imshow(image) try: X,Y = zip(*penguins_center[5]) plt.plot(X,Y,'.',color="red") except ValueError: pass
gold_centers,gold_clusters,noise__ = DivisiveDBSCAN(num_markings).fit(user_markings[20],user_ips[20],debug=True)#,jpeg_file=base_directory + "/Databases/penguins/images/"+object_id+".JPG") print "gold standard number " + str(len(gold_clusters)) #not_found = [] fake_penguins = {} for s in [5,10,15]: print "== " + str(s) #not_found.append([]) fake_penguins[s] = [] for nn in [1,2,3,4,5]: user_identified_penguins,penguin_clusters,noise__ = DivisiveDBSCAN(nn).fit(user_markings[s],user_ips[s],debug=True) #missed penguins - in gold standard but not found not_found = cluster_compare(penguin_clusters,gold_clusters) #fake penguins - found but not actually real fake_found = cluster_compare(gold_clusters,penguin_clusters) print len(not_found),len(fake_found) #not_found[-1].append(len(gold_clusters) -len(not_found)) fake_penguins[s].append(len(fake_found)) overall_found[s][nn-1].append((len(gold_clusters) - len(not_found))/float(len(gold_clusters))) #false positives - fake penguins - can only be calculated after everything else is done for s in [5,10,15]: for nn in [1,2,3,4,5]: if fake_penguins[15][0] == 0: overall_fake[s][nn-1].append(0)
print "done that" total = 0 true_positives = [] false_positives = [] with open(base_directory + "/Databases/penguins_ibcc.out", 'rb') as f: for l in f.readlines(): penguin_index, neg_prob, pos_prob = l.split(" ") penguin = penguins[max_users][image_index][1][int( float(penguin_index))][0] #is this penguin "real" ie. is in the gold standard? if cluster_compare(gold_standard, [ penguin, ]) == []: #yes - penguin is real true_positives.append(float(pos_prob)) else: #penguin is fake false_positives.append(float(pos_prob)) print min(sorted(true_positives)[2:]) print max(false_positives) print len(false_positives) X = [] Y = [] for p in np.arange(0, 1.01, 0.001): X.append(
ibcc.runIbcc(base_directory+"/Databases/penguins_ibcc_config.py") print "done that" total = 0 true_positives = [] false_positives = [] with open(base_directory+"/Databases/penguins_ibcc.out",'rb') as f: for l in f.readlines(): penguin_index, neg_prob,pos_prob = l.split(" ") penguin = penguins[max_users][image_index][1][int(float(penguin_index))][0] #is this penguin "real" ie. is in the gold standard? if cluster_compare(gold_standard,[penguin,]) == []: #yes - penguin is real true_positives.append(float(pos_prob)) else: #penguin is fake false_positives.append(float(pos_prob)) print min(sorted(true_positives)[2:]) print max(false_positives) print len(false_positives) X = [] Y = [] for p in np.arange(0,1.01,0.001): X.append(len([f for f in false_positives if f >= p])/float(len(false_positives))) Y.append(len([t for t in true_positives if t >= p])/float(37.))
print "gold standard number " + str(len(gold_clusters)) # not_found = [] fake_penguins = {} for s in [5, 10, 15]: print "== " + str(s) # not_found.append([]) fake_penguins[s] = [] for nn in [1, 2, 3, 4, 5]: user_identified_penguins, penguin_clusters, noise__ = DivisiveDBSCAN(nn).fit( user_markings[s], user_ips[s], debug=True ) # missed penguins - in gold standard but not found not_found = cluster_compare(penguin_clusters, gold_clusters) # fake penguins - found but not actually real fake_found = cluster_compare(gold_clusters, penguin_clusters) print len(not_found), len(fake_found) # not_found[-1].append(len(gold_clusters) -len(not_found)) fake_penguins[s].append(len(fake_found)) overall_found[s][nn - 1].append((len(gold_clusters) - len(not_found)) / float(len(gold_clusters))) # false positives - fake penguins - can only be calculated after everything else is done for s in [5, 10, 15]: for nn in [1, 2, 3, 4, 5]: if fake_penguins[15][0] == 0: overall_fake[s][nn - 1].append(0)
#print penguin_clusters #print noise__ noise_points[s] = [x for x, u in noise__] print str(s) + " - " + str(len(user_identified_penguins)) if len(user_identified_penguins) > 20: break except AssertionError: continue #if len(user_identified_penguins) == 0: # continue if len(user_identified_penguins) <= 20: #print noise__ not_found = cluster_compare(penguins[0], penguins[-1]) #if not_found == []: # continue image_file = cbook.get_sample_data(base_directory + "/Databases/penguins/images/" + object_id + ".JPG") image = plt.imread(image_file) fig, ax = plt.subplots() im = ax.imshow(image) try: X, Y = zip(*penguins_center[5]) plt.plot(X, Y, '.', color="red") except ValueError: pass
sys.path.append("/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from clusterCompare import cluster_compare if os.path.exists("/home/ggdhines"): base_directory = "/home/ggdhines" else: base_directory = "/home/greg" penguins,temp = pickle.load(open(base_directory+"/Databases/penguins_vote.pickle","rb")) #does this cluster have a corresponding cluster in the gold standard data? #ie. does this cluster represent an actual penguin? for image_index in range(len(penguins[5])): print len(penguins[5]) user_penguins = penguins[5][0] gold_penguins = penguins[5][1] print len(user_penguins) print len(user_penguins[0]) print "===" print len(user_penguins) for upen in user_penguins: print upen print cluster_compare(gold_penguins,[upen,]) break
else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from clusterCompare import cluster_compare if os.path.exists("/home/ggdhines"): base_directory = "/home/ggdhines" else: base_directory = "/home/greg" penguins, temp = pickle.load(open(base_directory + "/Databases/penguins_vote.pickle", "rb")) # does this cluster have a corresponding cluster in the gold standard data? # ie. does this cluster represent an actual penguin? for image_index in range(len(penguins[5])): print len(penguins[5]) user_penguins = penguins[5][0] gold_penguins = penguins[5][1] print len(user_penguins) print len(user_penguins[0]) print "===" print len(user_penguins) for upen in user_penguins: print upen print cluster_compare(gold_penguins, [upen]) break