#!/usr/bin/env python __author__ = 'ggdhines' from penguinAggregation import PenguinAggregation import random import os import sys # add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append("/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from divisiveKmeans import DivisiveKmeans clusterAlg = DivisiveKmeans().__fit__ penguin = PenguinAggregation() subject_ids = penguin.__get_subjects_per_site__("APZ00035mv",complete=True,remove_blanks=True) for i,subject in enumerate(random.sample(subject_ids,50)): print i penguin.__readin_subject__(subject) blankImage = penguin.__cluster_subject__(subject, clusterAlg) if not blankImage: penguin.__save_raw_markings__(subject) break
clusterAlg = DivisiveKmeans().__fit__ penguin = PenguinAggregation() gold_subjects = penguin.__get_gold_subjects__() gold_sample = gold_subjects[:50] penguin.__readin_users__() for count,zooniverse_id in enumerate(gold_sample): if count == 50: break print count, zooniverse_id penguin.__readin_subject__(zooniverse_id,read_in_gold=True) blankImage = penguin.__cluster_subject__(zooniverse_id, clusterAlg,fix_distinct_clusters=True,correction_alg=correctionAlg) penguin.__soy_it__(zooniverse_id) penguin.__signal_ibcc__() penguin.__roc__() # one_overlap = penguin.__off_by_one__(display=True) # last_id = None # # for t in one_overlap: # if t[0] != last_id: # print "*****" # print "=====" # last_id = t[0] # penguin.__relative_confusion__(t)
Y2 = [] Z1 = [] Z2 = [] nonEmpty = 0 index = -1 random.shuffle(subject_ids) while True: index += 1 #for i,subject in enumerate(random.sample(subject_ids,50)): #subject = "APZ000173v" subject = subject_ids[index] #print nonEmpty,index penguin.__readin_subject__( subject, read_in_gold=True) #,users_to_skip=["caitlin.black"]) numClusters, time_to_cluster = penguin.__cluster_subject__( subject, clusterAlg) if numClusters == 0: continue print nonEmpty nonEmpty += 1 if nonEmpty == 50: break accuracy1 = penguin.__accuracy__(subject) X1.append(numClusters) Y1.append(time_to_cluster) Z1.append(accuracy1)
Y2 = [] Z1 = [] Z2 = [] nonEmpty = 0 index = -1 random.shuffle(subject_ids) while True: index += 1 #for i,subject in enumerate(random.sample(subject_ids,50)): #subject = "APZ000173v" subject = subject_ids[index] #print nonEmpty,index agglomerative.__readin_subject__(subject,read_in_gold=True)#,users_to_skip=["caitlin.black"]) dkmeans.__readin_subject__(subject,read_in_gold=True) numClusters,time_to_cluster = agglomerative.__cluster_subject__(subject) if numClusters == 0: continue print nonEmpty nonEmpty += 1 if nonEmpty == 30: break accuracy1 = agglomerative.__accuracy__(subject) X1.append(numClusters) Y1.append(time_to_cluster) Z1.append(accuracy1)
from penguinAggregation import PenguinAggregation import random import os import sys # add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append("/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from divisiveKmeans import DivisiveKmeans clusterAlg = DivisiveKmeans().__fit__ penguin = PenguinAggregation() zooniverse_id_list = random.sample(penguin.__get_subjects_per_site__("APZ0001x3p"),40) for i,zooniverse_id in enumerate(zooniverse_id_list): print i penguin.__readin_subject__(zooniverse_id) blankImage = penguin.__cluster_subject__(zooniverse_id, clusterAlg) if not blankImage: print "+--" penguin.__find_closest_neighbour__(zooniverse_id) #penguin.__plot_cluster_size__(zooniverse_id_list) penguin.__find_one__(zooniverse_id_list)
Z1 = [] Z2 = [] nonEmpty = 0 index = -1 random.shuffle(subject_ids) while True: index += 1 #for i,subject in enumerate(random.sample(subject_ids,50)): #subject = "APZ000173v" subject = subject_ids[index] #print nonEmpty,index agglomerative.__readin_subject__( subject, read_in_gold=True) #,users_to_skip=["caitlin.black"]) dkmeans.__readin_subject__(subject, read_in_gold=True) numClusters, time_to_cluster = agglomerative.__cluster_subject__(subject) if numClusters == 0: continue print nonEmpty nonEmpty += 1 if nonEmpty == 20: break accuracy1 = agglomerative.__accuracy__(subject) X1.append(numClusters) Y1.append(time_to_cluster) Z1.append(accuracy1)
import random import os import sys # add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append( "/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from divisiveKmeans import DivisiveKmeans clusterAlg = DivisiveKmeans().__fit__ penguin = PenguinAggregation() zooniverse_id_list = random.sample( penguin.__get_subjects_per_site__("APZ0001x3p"), 40) for i, zooniverse_id in enumerate(zooniverse_id_list): print i penguin.__readin_subject__(zooniverse_id) blankImage = penguin.__cluster_subject__(zooniverse_id, clusterAlg) if not blankImage: print "+--" penguin.__find_closest_neighbour__(zooniverse_id) #penguin.__plot_cluster_size__(zooniverse_id_list) penguin.__find_one__(zooniverse_id_list)
X2 = [] Y2 = [] Z1 = [] Z2 = [] nonEmpty = 0 index = -1 random.shuffle(subject_ids) while True: index += 1 # for i,subject in enumerate(random.sample(subject_ids,50)): # subject = "APZ000173v" subject = subject_ids[index] # print nonEmpty,index penguin.__readin_subject__(subject, read_in_gold=True) # ,users_to_skip=["caitlin.black"]) numClusters, time_to_cluster = penguin.__cluster_subject__(subject, clusterAlg) if numClusters == 0: continue print nonEmpty nonEmpty += 1 if nonEmpty == 50: break accuracy1 = penguin.__accuracy__(subject) X1.append(numClusters) Y1.append(time_to_cluster) Z1.append(accuracy1)
#!/usr/bin/env python __author__ = 'ggdhines' from penguinAggregation import PenguinAggregation import random import os import sys # add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append( "/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from divisiveKmeans import DivisiveKmeans clusterAlg = DivisiveKmeans().__fit__ penguin = PenguinAggregation() subject_ids = penguin.__get_subjects_per_site__("APZ00035mv", complete=True, remove_blanks=True) for i, subject in enumerate(random.sample(subject_ids, 50)): print i penguin.__readin_subject__(subject) blankImage = penguin.__cluster_subject__(subject, clusterAlg) if not blankImage: penguin.__save_raw_markings__(subject) break
# add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append( "/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from agglomerativeClustering import Ward, TooBig clusterAlg = Ward().__fit__ penguin = PenguinAggregation() subject_ids = pickle.load( open(aggregation.base_directory + "/Databases/penguin_gold.pickle", "rb")) for i, subject in enumerate(random.sample(subject_ids, 50)): #subject = "APZ000173v" print i, subject penguin.__readin_subject__(subject, users_to_skip=["caitlin.black"]) try: numClusters, time = penguin.__cluster_subject__(subject, clusterAlg) except TooBig: print "too big" continue if not blankImage: penguin.__display_raw_markings__(subject) penguin.__display__markings__(subject)
gold_dict[zooniverse_id] = pts[:] zooniverse_id = s["zooniverse_id"] width = s["metadata"]["original_size"]["width"] height = s["metadata"]["original_size"]["height"] pts = [(int(x) / (width / 1000.), int(y) / (height / 563.)) for (x, y) in pts] if penguin.__get_status__(zooniverse_id) != "complete": continue penguin.__readin_subject__(zooniverse_id) blankImage = penguin.__cluster_subject__(zooniverse_id, clusterAlg, fix_distinct_clusters=True) penguin.__roc__() #__ibcc__2(penguin.clusterResults,penguin.users_per_subject) # plt.plot(Xt,Yt,'.') # plt.xlabel("Large cluster size") # plt.ylabel("Small cluster size") # plt.xlim((min(Xt)-0.05,max(Xt)+0.05)) # plt.ylim((min(Yt)-0.05,max(Yt)+0.05)) # plt.show() # for i in range(1,10): # print sum([1 for j in Yt if i == j]) # Y = [] # yErr = []
gold_dict[zooniverse_id] = pts[:] zooniverse_id = s["zooniverse_id"] width = s["metadata"]["original_size"]["width"] height = s["metadata"]["original_size"]["height"] pts = [(int(x)/(width/1000.),int(y)/(height/563.)) for (x,y) in pts] if penguin.__get_status__(zooniverse_id) != "complete": continue penguin.__readin_subject__(zooniverse_id) blankImage = penguin.__cluster_subject__(zooniverse_id, clusterAlg,fix_distinct_clusters=True) penguin.__roc__() #__ibcc__2(penguin.clusterResults,penguin.users_per_subject) # plt.plot(Xt,Yt,'.') # plt.xlabel("Large cluster size") # plt.ylabel("Small cluster size") # plt.xlim((min(Xt)-0.05,max(Xt)+0.05)) # plt.ylim((min(Yt)-0.05,max(Yt)+0.05)) # plt.show() # for i in range(1,10): # print sum([1 for j in Yt if i == j]) # Y = [] # yErr = [] # X = []
import cPickle as pickle import aggregation # add the paths necessary for clustering algorithm and ibcc - currently only works on Greg's computer if os.path.exists("/home/ggdhines"): sys.path.append("/home/ggdhines/PycharmProjects/reduction/experimental/clusteringAlg") else: sys.path.append("/home/greg/github/reduction/experimental/clusteringAlg") from agglomerativeClustering import Ward, TooBig clusterAlg = Ward().__fit__ penguin = PenguinAggregation() subject_ids = pickle.load(open(aggregation.base_directory + "/Databases/penguin_gold.pickle", "rb")) for i, subject in enumerate(random.sample(subject_ids, 50)): # subject = "APZ000173v" print i, subject penguin.__readin_subject__(subject, users_to_skip=["caitlin.black"]) try: numClusters, time = penguin.__cluster_subject__(subject, clusterAlg) except TooBig: print "too big" continue if not blankImage: penguin.__display_raw_markings__(subject) penguin.__display__markings__(subject)