def update_skill(self): ## plogp = np.zeros([2]) ## plogp[0] = 0.5*(self.PD+self.PL)*np.log2(self.PD+self.PL) ## plogp[1] = 0.5*(1.0-self.PD+1.0-self.PL)*np.log2(1.0-self.PD+1.0-self.PL) ## self.contribution = np.sum(plogp) self.skill = swap.expectedInformationGain(0.5, self.PL, self.PD) return self.skill
plotplpd(final_PL1,final_PD1,information1,1,"Stage 1 Information") plotplpd(final_PL2,final_PD2,information2,2,"Stage 2 Information") savefig("information_plpd.png") clf(); ########################### ##Skill PL PD plot ########################### bins=101 skill=np.zeros(bins*bins); skill=np.reshape(skill,(bins,bins)); for ii in range(bins): M_ll=0.01*ii; for jj in range(bins): M_nn=0.01*jj; skill[ii][jj]=swap.expectedInformationGain(0.5, M_ll, M_nn); ax=subplot(1,1,1); im=ax.imshow(skill,origin='lower',extent=(0,1,0,1)); cbar=colorbar(im); cbar.solids.set_edgecolor("face"); ax.set_xlim(0,1); ax.set_ylim(0,1); ax.set_xlabel("P$_L$"); ax.set_ylabel("P$_D$"); ax.set_title("Skill"); xx=np.arange(-0.1,2,0.1); ax.axhline(0.5,color="k",linestyle='dashed'); ax.axvline(0.5,color="k",linestyle='dashed'); ax.plot(xx,1-xx,color="k");
def make_lens_catalog(args): """ NAME make_lens_catalog PURPOSE Given location of collection pickle, this script produces a set of annotated images of lenses (heatmaps for lens locations, markers for where clicks were, etc). COMMENTS You have to download the file so it chooses whever your output directory is to also download the raw images. This should be pretty customizable. FLAGS -h Print this message --skill Weight by skill INPUTS collection.pickle OUTPUTS lens.dat Assumed format: ID kind x y Prob N0 Skill Dist Here: ID = Space Warps subject ID kind = Space Warps subject type (sim, dud, test) x,y = object (cluster) centroid, in pixels P = Space Warps subject probability N0 = number of markers in the cluster S = total skill per cluster, summed over markers D = biggest distance within cluster EXAMPLE BUGS AUTHORS This file is part of the Space Warps project, and is distributed under the GPL v2 by the Space Warps Science Team. http://spacewarps.org/ HISTORY 2013-07-16 started Davis (KIPAC) """ # ------------------------------------------------------------------ # Some defaults: flags = {'skill': False, 'output_directory': './', 'output_name': 'catalog.dat', 'image_y_size': 440, 'catalog_path': '', 'update_collection': '',} # ------------------------------------------------------------------ # Read in options: # this has to be easier to do... for arg in args: if arg in flags: flags[arg] = args[arg] elif arg == 'collection_path': collection_path = args[arg] else: print "make_lens_atlas: unrecognized flag ",arg print "make_lens_catalog: illustrating behaviour captured in collection file: " print "make_lens_catalog: ",collection_path memory = joblib.Memory(cachedir=flags['output_directory']) memory.clear() catalog_path = flags['output_directory'] + flags['output_name'] if len(flags['output_name']) > 0: F = open(catalog_path, 'w') F.write('id,kind,x,y,prob,n0,skill,dist\n') # ------------------------------------------------------------------ # Read in files: collection = swap.read_pickle(collection_path, 'collection') ID_list = collection.list() print "make_lens_catalog: collection numbers ", len(ID_list) if flags['catalog_path'] != '': print "make_lens_catalog: filtering from catalog ",flags['catalog_path'] catalog_in = csv2rec(flags['catalog_path']) ID_list = np.unique(catalog_in['id']) # ------------------------------------------------------------------ # Run through data: catalog = {} for ID in ID_list: subject = collection.member[ID] kind = subject.kind P = subject.mean_probability itwas = subject.annotationhistory['ItWas'] x_all = subject.annotationhistory['At_X'] y_all = subject.annotationhistory['At_Y'] x_markers = np.array([xi for xj in x_all for xi in xj]) y_markers = np.array([yi for yj in y_all for yi in yj]) catalog.update({ID: {'agents_reject': [], 'x': x_markers, 'y': y_markers,}}) PL_all = subject.annotationhistory['PL'] PD_all = subject.annotationhistory['PD'] # filter out the empty clicks PL_list = [] PL_nots = [] for i, xj in enumerate(x_all): # len(xj) of empty = 0 PL_list.append([PL_all[i]] * len(xj)) if len(xj) == 0: PL_nots.append(PL_all[i]) PL = np.array([PLi for PLj in PL_list for PLi in PLj]) PL_nots = np.array(PL_nots) # filter out the empty clicks PD_list = [] PD_nots = [] for i, xj in enumerate(x_all): PD_list.append([PD_all[i]] * len(xj)) if len(xj) == 0: PD_nots.append(PD_all[i]) catalog[ID]['agents_reject'].append(i) PD = np.array([PDi for PDj in PD_list for PDi in PDj]) PD_nots = np.array(PD_nots) skill = swap.expectedInformationGain(0.5, PL, PD) # skill # it is only fair to write out the NOTs, too # do the empty guys skill_nots = swap.expectedInformationGain(0.5, PL_nots, PD_nots) # skill x, y = -1, -1 N0 = len(skill_nots) S = np.sum(skill_nots) D = 0 ## catalog.append((ID, kind, x, y, P, N0, S, D)) if len(catalog)%500 == 0: print len(catalog) if len(flags['output_name']) > 0: F.write('{0},{1},{2},{3},{4},{5},{6},{7}\n'.format( ID, kind, x, y, P, N0, S, D)) if len(x_markers) == 0: # apparently everyone was a not... catalog[ID]['agents_labels'] = np.array([]) continue # ------------------------------------------------------------------ # cluster print 'make_lens_catalog: subject ID = ', ID if flags['skill']: cluster_centers, cluster_center_labels, cluster_labels, \ n_clusters, dist_within = outlier_clusters(x_markers, y_markers, skill, memory=memory) else: cluster_centers, cluster_center_labels, cluster_labels, \ n_clusters, dist_within = outlier_clusters(x_markers, y_markers, None, memory=memory) # need to get: x, y, N0, S catalog[ID]['agents_labels'] = cluster_labels for cluster_center_label in cluster_center_labels: cluster_center = cluster_centers[cluster_center_label] members = (cluster_labels == cluster_center_label) x, y = cluster_center # convert y to catalog convention y = flags['image_y_size'] - y N0 = np.sum(members) S = np.sum(skill[members]) D = dist_within[cluster_center_label] if cluster_center_label == -1: # outlier cluster # so really every point is its own cluster... D = 0 ## catalog.append((ID, kind, x, y, P, N0, S, D)) ## if len(catalog)%500 == 0: ## print len(catalog) # TODO: make some requirement to be included (exclude outliers) if len(flags['output_name']) > 0: F.write('{0},{1},{2},{3},{4},{5},{6},{7}\n'.format( ID, kind, x, y, P, N0, S, D)) print 'make_lens_catalog: Clearing memory' # clear memory memory.clear() if len(flags['output_name']) > 0: print 'make_lens_catalog: closing file!' F.close() if len(flags['update_collection']) > 0: print 'make_lens_catalog: writing updated collection to', flags['update_collection'] # TODO: get the other params correct!!!! collection_fat = swap.collection.Collection() for ID in catalog: subject = collection.member[ID] atx = subject.annotationhistory['At_X'] labels_in = list(catalog[ID]['agents_labels']) labels_fat = [] for atx_i in atx: labels_fat.append([]) for atx_ij in atx_i: labels_fat[-1].append(labels_in.pop(0)) subject.annotationhistory.update({'labels': labels_fat}) collection_fat.member.update({ID: subject}) swap.write_pickle(collection_fat, flags['update_collection']) print 'make_lens_catalog: All done!' return catalog
def make_lens_atlas(args): """ NAME make_lens_atlas PURPOSE Given location of bureau and collection pickles as well as a list of subjects, this script produces a set of annotated images of lenses (heatmaps for lens locations, markers for where clicks were, etc). COMMENTS You have to download the file so it chooses whever your output directory is to also download the raw images. This should be pretty customizable. FLAGS -h Print this message --heatmap Do heatmaps --contour Do contours --field Do full image --stamp Do cutouts --alpha Do alpha --points N Take N agents and plot them. Any number < 0 = do all --skill Weight agent markers by skill INPUTS collection collection.pickle catalog catalog.dat Assumed format: ID kind x y Prob N0 Skill Dist Here: ID = Space Warps subject ID kind = Space Warps subject type (sim, dud, test) x,y = object (cluster) centroid, in pixels P = Space Warps subject probability N0 = number of markers in the cluster S = total skill per cluster, summed over markers D = biggest distance within cluster OUTPUTS EXAMPLE BUGS TODO: incorporate some of these defaults into the flags dictionary AUTHORS This file is part of the Space Warps project, and is distributed under the GPL v2 by the Space Warps Science Team. http://spacewarps.org/ HISTORY 2013-07-16 started Davis (KIPAC) """ # ------------------------------------------------------------------ # Some defaults: flags = {'points': 30, 'heatmap': False, 'contour': False, 'field': False, 'stamp': False, 'alpha': False, 'skill': False, 'output_directory': './', 'output_format': 'png', 'stamp_size': 50, 'dist_max': 30, 'stamp_min': 1, 'smooth_click': 3, 'figsize_stamp': 5, 'figsize_field': 10, 'image_y_size': 440, 'diagnostics': False, } # ------------------------------------------------------------------ # Read in options: # this has to be easier to do... for arg in args: if arg in flags: flags[arg] = args[arg] elif arg == 'collection': collection_path = args[arg] elif arg == 'catalog': catalog_path = args[arg] else: print "make_lens_atlas: unrecognized flag ",arg xbins = np.arange(flags['stamp_size'] * 2) ybins = np.arange(flags['stamp_size'] * 2) figsize_stamp = (flags['figsize_stamp'], flags['figsize_stamp']) figsize_field = (flags['figsize_field'], flags['figsize_field']) image_y_size = flags['image_y_size'] print "make_lens_atlas: illustrating behaviour captured in collection, and lens files: " print "make_lens_atlas: ", collection_path print "make_lens_atlas: ", catalog_path # ------------------------------------------------------------------ # Read in files: #bureau = swap.read_pickle(bureau_path, 'bureau') # TODO: needed? collection = swap.read_pickle(collection_path, 'collection') catalog = csv2rec(catalog_path) #print "make_lens_atlas: bureau numbers ", len(bureau.list()) print "make_lens_atlas: collection numbers ", len(collection.list()) print "make_lens_atlas: catalog numbers ", len(catalog) # ------------------------------------------------------------------ # Run through data: # ------------------------------------------------------------------ # Stamps: if flags['stamp']: print "make_lens_atlas: running stamps" for lens_i in range(len(catalog)): ID = catalog[lens_i]['id'] kind = catalog[lens_i]['kind'] x = catalog[lens_i]['x'] # flip y axis y = image_y_size - catalog[lens_i]['y'] N0 = catalog[lens_i]['n0'] if 'dist' in catalog.dtype.names: if catalog[lens_i]['dist'] == 0: continue if ((x < 0)): # this is one of the 'non points'; skip continue if (N0 < flags['stamp_min']): # not enough points! continue subject = collection.member[ID] annotationhistory = subject.annotationhistory # ------------------------------------------------------------------ # download png url = subject.location outname = flags['output_directory'] + '{0}_field.png'.format(ID) im = get_online_png(url, outname) min_x = np.int(np.max((x - flags['stamp_size'], 0))) max_x = np.int(np.min((x + flags['stamp_size'], im.shape[0]))) min_y = np.int(np.max((y - flags['stamp_size'], 0))) max_y = np.int(np.min((y + flags['stamp_size'], im.shape[1]))) min_member_x = np.int(np.max((x - flags['dist_max'], 0))) max_member_x = np.int(np.min((x + flags['dist_max'], im.shape[0]))) min_member_y = np.int(np.max((y - flags['dist_max'], 0))) max_member_y = np.int(np.min((y + flags['dist_max'], im.shape[1]))) if (min_x >= max_x) + (min_y >= max_y): print "make_lens_atlas: misshapen lens for ID ", ID continue # if it is a training image, claim the alpha parameter if im.shape[2] == 4: alpha = im[:, :, 3][min_y: max_y, min_x: max_x] im = im[:, :, :3][min_y: max_y, min_x: max_x] else: alpha = None im = im[min_y: max_y, min_x: max_x] fig = plt.figure(figsize=figsize_stamp) ax = fig.add_subplot(111) ax.imshow(im, origin=origin) ax.scatter(x - min_x, y - min_y, marker='d', c=(0, 1.0, 0), s=100, alpha=0.75) if ((flags['contour']) + (flags['heatmap']) + (flags['points'] != 0)): itwas = annotationhistory['ItWas'] x_all = annotationhistory['At_X'] y_all = annotationhistory['At_Y'] x_markers_all = np.array([xi for xj in x_all for xi in xj]) y_markers_all = np.array([yi for yj in y_all for yi in yj]) agents_numbers = np.arange( x_markers_all.size) if 'labels' in annotationhistory: # find which label is closest to your folks labels_all = annotationhistory['labels'] labels = np.array([xi for xj in labels_all for xi in xj]) cluster_labels = list(set(labels)) data = np.vstack((x_markers_all, y_markers_all)).T cluster_centers = np.array([np.mean(data[labels == i], axis=0) for i in cluster_labels]) # find which label is closest to the (x,y) label_center = cluster_labels[np.argmin(np.sum(np.square(cluster_centers - np.vstack((x, y)).T), axis=1))] conds = (labels == label_center) else: # now filter markers by those that are within # dist_max of the center (since I don't record cluster # members...) conds = ((x_markers_all >= min_member_x) * (x_markers_all <= max_member_x) * (y_markers_all >= min_member_y) * (y_markers_all <= max_member_y)) agents = agents_numbers[conds] x_markers = x_markers_all[agents] y_markers = y_markers_all[agents] # filter markers n_catalog = len(agents) if (flags['points'] > 0) * \ (flags['points'] < n_catalog): agents_points = np.random.choice( agents, size=flags['points'], replace=False) else: agents_points = agents x_markers_filtered = x_markers_all[agents_points] y_markers_filtered = y_markers_all[agents_points] if (flags['skill']) * (len(agents) > 0): PL_all = annotationhistory['PL'] PD_all = annotationhistory['PD'] # filter out the empty clicks PL_list = [] for i, xj in enumerate(x_all): PL_list.append([PL_all[i]] * len(xj)) PL = np.array([PLi for PLj in PL_list for PLi in PLj]) # filter out the empty clicks PD_list = [] for i, xj in enumerate(x_all): PD_list.append([PD_all[i]] * len(xj)) PD = np.array([PDi for PDj in PD_list for PDi in PDj]) skill_all = swap.expectedInformationGain(0.5, PL, PD) skill = skill_all[agents] smax = 100 smin = 5 if np.max(skill) != np.min(skill): sizes_all = (skill_all - np.min(skill)) * (smax - smin) / \ (np.max(skill) - np.min(skill)) sizes_filtered = sizes_all[agents_points] else: sizes_filtered = 50 else: skill = None sizes_filtered = 50 colors = (0, 1.0, 0) # ---------------------------------------------------------- # heatmaps if (flags['heatmap']) * (len(agents) > 0): fig_heatmap = plt.figure(figsize=figsize_stamp) ax_heatmap = fig_heatmap.add_subplot(111) # now do the lens locations # don't need to filter the x's since that is filtered by # xbins and ybins anyways pdf2d(x_markers - min_x, y_markers - min_y, xbins=xbins, ybins=ybins, weights=skill, smooth=flags['smooth_click'], color=(0, 1.0, 0), style='hist', axis=ax_heatmap) if flags['alpha'] * (alpha != None): contour_hist(alpha.T, extent=(xbins[0], xbins[-1], ybins[0], ybins[-1]), color='w', style='contour', axis=ax_heatmap) ax_heatmap.tick_params(\ axis='both', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off left='off', right='off', labelleft='off', labelbottom='off') # labels along the bottom edge are off # CPD 04.08.14: Flip axis to old conventions ax_heatmap.invert_yaxis() try: outfile = flags['output_directory'] + \ '{0}_cluster_{1}_heatmap.{2}'.format( ID, lens_i, flags['output_format']) # fig_heatmap.savefig(outfile) #fig_heatmap.canvas.print_png(outfile) fig_heatmap.savefig(outfile, bbox_inches='tight', pad_inches=0) except: print 'make_lens_catalog: heatmap problem with ', ID, lens_i # import ipdb; ipdb.set_trace() # --------------------------------------------------------- # back to our other plots # contours if (flags['contour']) * (len(agents) > 0): # now do the lens locations # don't need to filter the x's since that is filtered by # xbins and ybins anyways pdf2d(x_markers - min_x, y_markers - min_y, xbins=xbins, ybins=ybins, weights=skill, smooth=flags['smooth_click'], color=(0, 1.0, 0), style='contour', axis=ax) # plot points if (flags['points'] != 0) * (len(agents) > 0): ax.scatter(x_markers_filtered - min_x, y_markers_filtered - min_y, c=colors, s=sizes_filtered, alpha=0.25) # plot alpha if flags['alpha'] * (alpha != None): contour_hist(alpha.T, extent=(xbins[0], xbins[-1], ybins[0], ybins[-1]), color='w', style='contour', axis=ax) # ---------------------------------------------------------- ax.tick_params(\ axis='both', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off left='off', right='off', labelleft='off', labelbottom='off') # labels along the bottom edge are off ax.invert_yaxis() try: outfile = flags['output_directory'] + \ '{0}_cluster_{1}_contour.{2}'.format( ID, lens_i, flags['output_format'] ) # fig.savefig(outfile) fig.savefig(outfile, bbox_inches='tight', pad_inches=0) # fig.canvas.print_png(outfile) except: print 'make_lens_catalog: contour problem with ', ID, lens_i # import ipdb; ipdb.set_trace() plt.close('all') # ------------------------------------------------------------------ # Fields if flags['field']: print "make_lens_atlas: running fields" # find the unique IDs. mark centers and also centrals if clustering is # done #import ipdb; ipdb.set_trace() unique_IDs = np.unique(catalog['id']) for ID in unique_IDs: mini_catalog = catalog[catalog['id'] == ID] subject = collection.member[ID] annotationhistory = subject.annotationhistory # plot cluster centers kind = mini_catalog['kind'] x_centers = mini_catalog['x'] # flip y from catalog y_centers = image_y_size - mini_catalog['y'] skill_centers = mini_catalog['skill'] # filter out the -1 entry center_cond = (x_centers > 0) * (y_centers > 0) # filter outliers if possible if 'dist' in mini_catalog.dtype.names: center_cond *= mini_catalog['dist'] > 0 skill_centers = skill_centers[center_cond] x_centers = x_centers[center_cond] y_centers = y_centers[center_cond] colors_centers = [(0, 1.0, 0) for i in x_centers] if len(colors_centers) == 0: #welp, nothing here continue # ------------------------------------------------------------------ # download png url = subject.location outname = flags['output_directory'] + '{0}_field.png'.format(ID) im = get_online_png(url, outname) # if it is a training image, claim the alpha parameter if im.shape[2] == 4: alpha = im[:, :, 3] im = im[:, :, :3] else: alpha = None fig = plt.figure(figsize=figsize_field) ax = fig.add_subplot(111) ax.imshow(im, origin=origin) xbins = np.arange(im.shape[0]) ybins = np.arange(im.shape[1]) min_x = 0 min_y = 0 max_x = im.shape[0] max_y = im.shape[1] if (flags['skill']) * (np.max(skill_centers) != np.min(skill_centers)): sizes_centers = ( (skill_centers - np.min(skill_centers)) * (200 - 10) / (np.max(skill_centers) - np.min(skill_centers))) else: sizes_centers = [100 for i in x_centers] sizes_centers = [100 for i in x_centers] ax.scatter(x_centers, y_centers, marker='d', c=colors_centers, s=sizes_centers, alpha=0.75) if flags['diagnostics']: r = flags['dist_max'] b = flags['stamp_size'] b_ones = np.ones(100) * b b_arr = np.linspace(-b, b, 100) def xy(x0, y0, r, phi): return x0 + r * np.cos(phi), y0 + r * np.sin(phi) phis = np.arange(0, 6.28, 0.01) for i in xrange(len(x_centers)): x_center = x_centers[i] y_center = y_centers[i] ax.plot( *xy(x_center, y_center, r, phis), c='w', ls='-', linewidth=4) # plot box ax.plot(x_center + b_ones, y_center + b_arr, c='r', ls='--', linewidth=4) ax.plot(x_center - b_ones, y_center + b_arr, c='r', ls='--', linewidth=4) ax.plot(x_center + b_arr, y_center + b_ones, c='r', ls='--', linewidth=4) ax.plot(x_center + b_arr, y_center - b_ones, c='r', ls='--', linewidth=4) itwas = annotationhistory['ItWas'] x_all = annotationhistory['At_X'] y_all = annotationhistory['At_Y'] x_markers_all = np.array([xi for xj in x_all for xi in xj]) y_markers_all = np.array([yi for yj in y_all for yi in yj]) # now filter markers by those that are within # stamp_size of the stamp # I'm pretty sure this step is redundant when going over the full # image? agents_numbers = np.arange( x_markers_all.size) conds = ((x_markers_all >= min_x) * (x_markers_all <= max_x) * (y_markers_all >= min_y) * (y_markers_all <= max_y)) agents = agents_numbers[conds] x_markers = x_markers_all[agents] y_markers = y_markers_all[agents] # filter markers n_catalog = len(agents) if (flags['points'] > 0) * \ (flags['points'] < n_catalog): agents_points = np.random.choice( agents, size=flags['points'], replace=False) else: agents_points = agents x_markers_filtered = x_markers_all[agents_points] y_markers_filtered = y_markers_all[agents_points] if flags['skill']: PL_all = annotationhistory['PL'] PD_all = annotationhistory['PD'] # filter out the empty clicks PL_list = [] for i, xj in enumerate(x_all): PL_list.append([PL_all[i]] * len(xj)) PL = np.array([PLi for PLj in PL_list for PLi in PLj]) # filter out the empty clicks PD_list = [] for i, xj in enumerate(x_all): PD_list.append([PD_all[i]] * len(xj)) PD = np.array([PDi for PDj in PD_list for PDi in PDj]) skill_all = swap.expectedInformationGain(0.5, PL, PD) skill = skill_all[agents] smax = 100 smin = 5 if np.max(skill) != np.min(skill): sizes_all = (skill_all - np.min(skill)) * (smax - smin) / \ (np.max(skill) - np.min(skill)) sizes_filtered = sizes_all[agents_points] else: sizes_filtered = 50 else: skill = None sizes_filtered = 50 if 'labels' in annotationhistory: # find which label is closest to your folks labels_all = annotationhistory['labels'] labels = np.array([xi for xj in labels_all for xi in xj]) labels_filtered = labels[agents_points] colors = [] alpha = 0.75 for label in labels_filtered: if label == -1: colors.append((1.0, 0.0, 0)) else: colors.append((0, 1.0, 0)) else: colors = (0, 1.0, 0) alpha = 0.25 # ---------------------------------------------------------- # contours if flags['contour'] * (len(x_markers) >= flags['stamp_min']): # now do the lens locations # don't need to filter the x's since that is filtered by # xbins and ybins anyways pdf2d(x_markers - min_x, y_markers - min_y, xbins=xbins, ybins=ybins, weights=skill, smooth=flags['smooth_click'], color=(0, 1.0, 0), style='contour', axis=ax) # ---------------------------------------------------------- # plot points if flags['points'] != 0: ax.scatter(x_markers_filtered - min_x, y_markers_filtered - min_y, c=colors, s=sizes_filtered, alpha=alpha) # ---------------------------------------------------------- # do alpha if flags['alpha'] * (alpha != None): contour_hist(alpha.T, extent=(xbins[0], xbins[-1], ybins[0], ybins[-1]), color='w', style='contour', axis=ax) ax.tick_params(\ axis='both', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off left='off', right='off', labelleft='off', labelbottom='off') # labels along the bottom edge are off ax.invert_yaxis() try: outfile = flags['output_directory'] + '{0}_field_output.{1}'.format(ID, flags['output_format']) # fig.savefig(outfile) fig.savefig(outfile, bbox_inches='tight', pad_inches=0) #fig.canvas.print_png(outfile) except: print 'make_lens_catalog: field problem with field ', ID plt.close('all') print 'make_lens_catalog: All done!'
def make_lens_catalog(args): """ NAME make_lens_catalog PURPOSE Given location of collection pickle, this script produces a set of annotated images of lenses (heatmaps for lens locations, markers for where clicks were, etc). COMMENTS You have to download the file so it chooses whever your output directory is to also download the raw images. This should be pretty customizable. FLAGS -h Print this message --skill Weight by skill INPUTS collection.pickle OUTPUTS lens.dat Assumed format: ID kind x y Prob N0 Skill Dist Here: ID = Space Warps subject ID kind = Space Warps subject type (sim, dud, test) x,y = object (cluster) centroid, in pixels P = Space Warps subject probability N0 = number of markers in the cluster S = total skill per cluster, summed over markers D = biggest distance within cluster EXAMPLE BUGS AUTHORS This file is part of the Space Warps project, and is distributed under the GPL v2 by the Space Warps Science Team. http://spacewarps.org/ HISTORY 2013-07-16 started Davis (KIPAC) """ # ------------------------------------------------------------------ # Some defaults: flags = { 'skill': False, 'output_directory': './', 'output_name': 'catalog.dat', 'image_y_size': 440, 'catalog_path': '', 'update_collection': '', } # ------------------------------------------------------------------ # Read in options: # this has to be easier to do... for arg in args: if arg in flags: flags[arg] = args[arg] elif arg == 'collection_path': collection_path = args[arg] else: print "make_lens_atlas: unrecognized flag ", arg print "make_lens_catalog: illustrating behaviour captured in collection file: " print "make_lens_catalog: ", collection_path memory = joblib.Memory(cachedir=flags['output_directory']) memory.clear() catalog_path = flags['output_directory'] + flags['output_name'] if len(flags['output_name']) > 0: F = open(catalog_path, 'w') F.write('id,kind,x,y,prob,n0,skill,dist\n') # ------------------------------------------------------------------ # Read in files: collection = swap.read_pickle(collection_path, 'collection') ID_list = collection.list() print "make_lens_catalog: collection numbers ", len(ID_list) if flags['catalog_path'] != '': print "make_lens_catalog: filtering from catalog ", flags[ 'catalog_path'] catalog_in = csv2rec(flags['catalog_path']) ID_list = np.unique(catalog_in['id']) # ------------------------------------------------------------------ # Run through data: catalog = {} for ID in ID_list: subject = collection.member[ID] kind = subject.kind P = subject.mean_probability itwas = subject.annotationhistory['ItWas'] x_all = subject.annotationhistory['At_X'] y_all = subject.annotationhistory['At_Y'] x_markers = np.array([xi for xj in x_all for xi in xj]) y_markers = np.array([yi for yj in y_all for yi in yj]) catalog.update( {ID: { 'agents_reject': [], 'x': x_markers, 'y': y_markers, }}) PL_all = subject.annotationhistory['PL'] PD_all = subject.annotationhistory['PD'] # filter out the empty clicks PL_list = [] PL_nots = [] for i, xj in enumerate(x_all): # len(xj) of empty = 0 PL_list.append([PL_all[i]] * len(xj)) if len(xj) == 0: PL_nots.append(PL_all[i]) PL = np.array([PLi for PLj in PL_list for PLi in PLj]) PL_nots = np.array(PL_nots) # filter out the empty clicks PD_list = [] PD_nots = [] for i, xj in enumerate(x_all): PD_list.append([PD_all[i]] * len(xj)) if len(xj) == 0: PD_nots.append(PD_all[i]) catalog[ID]['agents_reject'].append(i) PD = np.array([PDi for PDj in PD_list for PDi in PDj]) PD_nots = np.array(PD_nots) skill = swap.expectedInformationGain(0.5, PL, PD) # skill # it is only fair to write out the NOTs, too # do the empty guys skill_nots = swap.expectedInformationGain(0.5, PL_nots, PD_nots) # skill x, y = -1, -1 N0 = len(skill_nots) S = np.sum(skill_nots) D = 0 ## catalog.append((ID, kind, x, y, P, N0, S, D)) if len(catalog) % 500 == 0: print len(catalog) if len(flags['output_name']) > 0: F.write('{0},{1},{2},{3},{4},{5},{6},{7}\n'.format( ID, kind, x, y, P, N0, S, D)) if len(x_markers) == 0: # apparently everyone was a not... catalog[ID]['agents_labels'] = np.array([]) continue # ------------------------------------------------------------------ # cluster print 'make_lens_catalog: subject ID = ', ID if flags['skill']: cluster_centers, cluster_center_labels, cluster_labels, \ n_clusters, dist_within = outlier_clusters(x_markers, y_markers, skill, memory=memory) else: cluster_centers, cluster_center_labels, cluster_labels, \ n_clusters, dist_within = outlier_clusters(x_markers, y_markers, None, memory=memory) # need to get: x, y, N0, S catalog[ID]['agents_labels'] = cluster_labels for cluster_center_label in cluster_center_labels: cluster_center = cluster_centers[cluster_center_label] members = (cluster_labels == cluster_center_label) x, y = cluster_center # convert y to catalog convention y = flags['image_y_size'] - y N0 = np.sum(members) S = np.sum(skill[members]) D = dist_within[cluster_center_label] if cluster_center_label == -1: # outlier cluster # so really every point is its own cluster... D = 0 ## catalog.append((ID, kind, x, y, P, N0, S, D)) ## if len(catalog)%500 == 0: ## print len(catalog) # TODO: make some requirement to be included (exclude outliers) if len(flags['output_name']) > 0: F.write('{0},{1},{2},{3},{4},{5},{6},{7}\n'.format( ID, kind, x, y, P, N0, S, D)) print 'make_lens_catalog: Clearing memory' # clear memory memory.clear() if len(flags['output_name']) > 0: print 'make_lens_catalog: closing file!' F.close() if len(flags['update_collection']) > 0: print 'make_lens_catalog: writing updated collection to', flags[ 'update_collection'] # TODO: get the other params correct!!!! collection_fat = swap.collection.Collection() for ID in catalog: subject = collection.member[ID] atx = subject.annotationhistory['At_X'] labels_in = list(catalog[ID]['agents_labels']) labels_fat = [] for atx_i in atx: labels_fat.append([]) for atx_ij in atx_i: labels_fat[-1].append(labels_in.pop(0)) subject.annotationhistory.update({'labels': labels_fat}) collection_fat.member.update({ID: subject}) swap.write_pickle(collection_fat, flags['update_collection']) print 'make_lens_catalog: All done!' return catalog
def make_lens_atlas(args): """ NAME make_lens_atlas PURPOSE Given location of bureau and collection pickles as well as a list of subjects, this script produces a set of annotated images of lenses (heatmaps for lens locations, markers for where clicks were, etc). COMMENTS You have to download the file so it chooses whever your output directory is to also download the raw images. This should be pretty customizable. FLAGS -h Print this message --heatmap Do heatmaps --contour Do contours --field Do full image --stamp Do cutouts --alpha Do alpha --points N Take N agents and plot them. Any number < 0 = do all --skill Weight agent markers by skill INPUTS collection collection.pickle catalog catalog.dat Assumed format: ID kind x y Prob N0 Skill Dist Here: ID = Space Warps subject ID kind = Space Warps subject type (sim, dud, test) x,y = object (cluster) centroid, in pixels P = Space Warps subject probability N0 = number of markers in the cluster S = total skill per cluster, summed over markers D = biggest distance within cluster OUTPUTS EXAMPLE BUGS TODO: incorporate some of these defaults into the flags dictionary AUTHORS This file is part of the Space Warps project, and is distributed under the GPL v2 by the Space Warps Science Team. http://spacewarps.org/ HISTORY 2013-07-16 started Davis (KIPAC) """ # ------------------------------------------------------------------ # Some defaults: flags = { 'points': 30, 'heatmap': False, 'contour': False, 'field': False, 'stamp': False, 'alpha': False, 'skill': False, 'output_directory': './', 'output_format': 'png', 'stamp_size': 50, 'dist_max': 30, 'stamp_min': 1, 'smooth_click': 3, 'figsize_stamp': 5, 'figsize_field': 10, 'image_y_size': 440, 'diagnostics': False, } # ------------------------------------------------------------------ # Read in options: # this has to be easier to do... for arg in args: if arg in flags: flags[arg] = args[arg] elif arg == 'collection': collection_path = args[arg] elif arg == 'catalog': catalog_path = args[arg] else: print "make_lens_atlas: unrecognized flag ", arg print(flags) xbins = np.arange(flags['stamp_size'] * 2) ybins = np.arange(flags['stamp_size'] * 2) figsize_stamp = (flags['figsize_stamp'], flags['figsize_stamp']) figsize_field = (flags['figsize_field'], flags['figsize_field']) image_y_size = flags['image_y_size'] print "make_lens_atlas: illustrating behaviour captured in collection, and lens files: " print "make_lens_atlas: ", collection_path print "make_lens_atlas: ", catalog_path # ------------------------------------------------------------------ # Read in files: #bureau = swap.read_pickle(bureau_path, 'bureau') # TODO: needed? collection = swap.read_pickle(collection_path, 'collection') catalog = csv2rec(catalog_path) #print "make_lens_atlas: bureau numbers ", len(bureau.list()) print "make_lens_atlas: collection numbers ", len(collection.list()) print "make_lens_atlas: catalog numbers ", len(catalog) # ------------------------------------------------------------------ # Run through data: # ------------------------------------------------------------------ # Stamps: if flags['stamp']: print "make_lens_atlas: running stamps" for lens_i in range(len(catalog)): ID = catalog[lens_i]['id'] kind = catalog[lens_i]['kind'] x = catalog[lens_i]['x'] # flip y axis y = image_y_size - catalog[lens_i]['y'] N0 = catalog[lens_i]['n0'] if 'dist' in catalog.dtype.names: if catalog[lens_i]['dist'] == 0: continue if ((x < 0)): # this is one of the 'non points'; skip print(lens_i, 'x < 0!') continue if (N0 < flags['stamp_min']): # not enough points! print(lens_i, '{0} < {1}'.format(N0, flags['stamp_min'])) continue subject = collection.member[ID] annotationhistory = subject.annotationhistory # ------------------------------------------------------------------ # download png url = subject.location outname = flags['output_directory'] + '{0}_field.png'.format(ID) im = get_online_png(url, outname) min_x = np.int(np.max((x - flags['stamp_size'], 0))) max_x = np.int(np.min((x + flags['stamp_size'], im.shape[0]))) min_y = np.int(np.max((y - flags['stamp_size'], 0))) max_y = np.int(np.min((y + flags['stamp_size'], im.shape[1]))) min_member_x = np.int(np.max((x - flags['dist_max'], 0))) max_member_x = np.int(np.min((x + flags['dist_max'], im.shape[0]))) min_member_y = np.int(np.max((y - flags['dist_max'], 0))) max_member_y = np.int(np.min((y + flags['dist_max'], im.shape[1]))) if (min_x >= max_x) + (min_y >= max_y): print "make_lens_atlas: misshapen lens for ID ", ID continue # if it is a training image, claim the alpha parameter if im.shape[2] == 4: alpha = im[:, :, 3][min_y:max_y, min_x:max_x] im = im[:, :, :3][min_y:max_y, min_x:max_x] else: alpha = None im = im[min_y:max_y, min_x:max_x] fig = plt.figure(figsize=figsize_stamp) ax = fig.add_subplot(111) ax.imshow(im, origin=origin) ax.scatter(x - min_x, y - min_y, marker='d', c=(0, 1.0, 0), s=100, alpha=0.75) if ((flags['contour']) + (flags['heatmap']) + (flags['points'] != 0)): itwas = annotationhistory['ItWas'] x_all = annotationhistory['At_X'] y_all = annotationhistory['At_Y'] x_markers_all = np.array([xi for xj in x_all for xi in xj]) y_markers_all = np.array([yi for yj in y_all for yi in yj]) agents_numbers = np.arange(x_markers_all.size) if 'labels' in annotationhistory: # find which label is closest to your folks labels_all = annotationhistory['labels'] labels = np.array([xi for xj in labels_all for xi in xj]) cluster_labels = list(set(labels)) data = np.vstack((x_markers_all, y_markers_all)).T cluster_centers = np.array([ np.mean(data[labels == i], axis=0) for i in cluster_labels ]) # find which label is closest to the (x,y) label_center = cluster_labels[np.argmin( np.sum(np.square(cluster_centers - np.vstack((x, y)).T), axis=1))] conds = (labels == label_center) else: # now filter markers by those that are within # dist_max of the center (since I don't record cluster # members...) conds = ((x_markers_all >= min_member_x) * (x_markers_all <= max_member_x) * (y_markers_all >= min_member_y) * (y_markers_all <= max_member_y)) agents = agents_numbers[conds] x_markers = x_markers_all[agents] y_markers = y_markers_all[agents] # filter markers n_catalog = len(agents) if n_catalog < 1: print(lens_i, n_catalog) if (flags['points'] > 0) * \ (flags['points'] < n_catalog): agents_points = np.random.choice(agents, size=flags['points'], replace=False) else: agents_points = agents x_markers_filtered = x_markers_all[agents_points] y_markers_filtered = y_markers_all[agents_points] if (flags['skill']) * (len(agents) > 0): PL_all = annotationhistory['PL'] PD_all = annotationhistory['PD'] # filter out the empty clicks PL_list = [] for i, xj in enumerate(x_all): PL_list.append([PL_all[i]] * len(xj)) PL = np.array([PLi for PLj in PL_list for PLi in PLj]) # filter out the empty clicks PD_list = [] for i, xj in enumerate(x_all): PD_list.append([PD_all[i]] * len(xj)) PD = np.array([PDi for PDj in PD_list for PDi in PDj]) skill_all = swap.expectedInformationGain(0.5, PL, PD) skill = skill_all[agents] smax = 100 smin = 5 if np.max(skill) != np.min(skill): sizes_all = (skill_all - np.min(skill)) * (smax - smin) / \ (np.max(skill) - np.min(skill)) sizes_filtered = sizes_all[agents_points] else: sizes_filtered = 50 else: skill = None sizes_filtered = 50 colors = (0, 1.0, 0) # ---------------------------------------------------------- # heatmaps if (flags['heatmap']) * (len(agents) > 0): fig_heatmap = plt.figure(figsize=figsize_stamp) ax_heatmap = fig_heatmap.add_subplot(111) # now do the lens locations # don't need to filter the x's since that is filtered by # xbins and ybins anyways pdf2d(x_markers - min_x, y_markers - min_y, xbins=xbins, ybins=ybins, weights=skill, smooth=flags['smooth_click'], color=(0, 1.0, 0), style='hist', axis=ax_heatmap) if flags['alpha'] * (alpha != None): contour_hist(alpha.T, extent=(xbins[0], xbins[-1], ybins[0], ybins[-1]), color='w', style='contour', axis=ax_heatmap) ax_heatmap.tick_params(\ axis='both', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off left='off', right='off', labelleft='off', labelbottom='off') # labels along the bottom edge are off # CPD 04.08.14: Flip axis to old conventions ax_heatmap.invert_yaxis() try: outfile = flags['output_directory'] + \ '{0}_cluster_{1}_heatmap.{2}'.format( ID, lens_i, flags['output_format']) # fig_heatmap.savefig(outfile) #fig_heatmap.canvas.print_png(outfile) fig_heatmap.savefig(outfile, bbox_inches='tight', pad_inches=0) except: print 'make_lens_catalog: heatmap problem with ', ID, lens_i # import ipdb; ipdb.set_trace() # --------------------------------------------------------- # back to our other plots # contours if (flags['contour']) * (len(agents) > 0): # now do the lens locations # don't need to filter the x's since that is filtered by # xbins and ybins anyways pdf2d(x_markers - min_x, y_markers - min_y, xbins=xbins, ybins=ybins, weights=skill, smooth=flags['smooth_click'], color=(0, 1.0, 0), style='contour', axis=ax) # plot points if (flags['points'] != 0) * (len(agents) > 0): ax.scatter(x_markers_filtered - min_x, y_markers_filtered - min_y, c=colors, s=sizes_filtered, alpha=0.25) # plot alpha if flags['alpha'] * (alpha != None): contour_hist(alpha.T, extent=(xbins[0], xbins[-1], ybins[0], ybins[-1]), color='w', style='contour', axis=ax) # ---------------------------------------------------------- ax.tick_params(\ axis='both', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off left='off', right='off', labelleft='off', labelbottom='off') # labels along the bottom edge are off ax.invert_yaxis() try: outfile = flags['output_directory'] + \ '{0}_cluster_{1}_contour.{2}'.format( ID, lens_i, flags['output_format'] ) # fig.savefig(outfile) fig.savefig(outfile, bbox_inches='tight', pad_inches=0) # fig.canvas.print_png(outfile) except: print 'make_lens_catalog: contour problem with ', ID, lens_i # import ipdb; ipdb.set_trace() plt.close('all') # ------------------------------------------------------------------ # Fields if flags['field']: print "make_lens_atlas: running fields" # find the unique IDs. mark centers and also centrals if clustering is # done #import ipdb; ipdb.set_trace() unique_IDs = np.unique(catalog['id']) for ID in unique_IDs: mini_catalog = catalog[catalog['id'] == ID] subject = collection.member[ID] annotationhistory = subject.annotationhistory # plot cluster centers kind = mini_catalog['kind'] x_centers = mini_catalog['x'] # flip y from catalog y_centers = image_y_size - mini_catalog['y'] skill_centers = mini_catalog['skill'] # filter out the -1 entry center_cond = (x_centers > 0) * (y_centers > 0) # filter outliers if possible if 'dist' in mini_catalog.dtype.names: center_cond *= mini_catalog['dist'] > 0 skill_centers = skill_centers[center_cond] x_centers = x_centers[center_cond] y_centers = y_centers[center_cond] colors_centers = [(0, 1.0, 0) for i in x_centers] if len(colors_centers) == 0: #welp, nothing here continue # ------------------------------------------------------------------ # download png url = subject.location outname = flags['output_directory'] + '{0}_field.png'.format(ID) im = get_online_png(url, outname) # if it is a training image, claim the alpha parameter if im.shape[2] == 4: alpha = im[:, :, 3] im = im[:, :, :3] else: alpha = None fig = plt.figure(figsize=figsize_field) ax = fig.add_subplot(111) ax.imshow(im, origin=origin) xbins = np.arange(im.shape[0]) ybins = np.arange(im.shape[1]) min_x = 0 min_y = 0 max_x = im.shape[0] max_y = im.shape[1] if (flags['skill']) * (np.max(skill_centers) != np.min(skill_centers)): sizes_centers = ( (skill_centers - np.min(skill_centers)) * (200 - 10) / (np.max(skill_centers) - np.min(skill_centers))) else: sizes_centers = [100 for i in x_centers] sizes_centers = [100 for i in x_centers] ax.scatter(x_centers, y_centers, marker='d', c=colors_centers, s=sizes_centers, alpha=0.75) if flags['diagnostics']: r = flags['dist_max'] b = flags['stamp_size'] b_ones = np.ones(100) * b b_arr = np.linspace(-b, b, 100) def xy(x0, y0, r, phi): return x0 + r * np.cos(phi), y0 + r * np.sin(phi) phis = np.arange(0, 6.28, 0.01) for i in xrange(len(x_centers)): x_center = x_centers[i] y_center = y_centers[i] ax.plot(*xy(x_center, y_center, r, phis), c='w', ls='-', linewidth=4) # plot box ax.plot(x_center + b_ones, y_center + b_arr, c='r', ls='--', linewidth=4) ax.plot(x_center - b_ones, y_center + b_arr, c='r', ls='--', linewidth=4) ax.plot(x_center + b_arr, y_center + b_ones, c='r', ls='--', linewidth=4) ax.plot(x_center + b_arr, y_center - b_ones, c='r', ls='--', linewidth=4) itwas = annotationhistory['ItWas'] x_all = annotationhistory['At_X'] y_all = annotationhistory['At_Y'] x_markers_all = np.array([xi for xj in x_all for xi in xj]) y_markers_all = np.array([yi for yj in y_all for yi in yj]) # now filter markers by those that are within # stamp_size of the stamp # I'm pretty sure this step is redundant when going over the full # image? agents_numbers = np.arange(x_markers_all.size) conds = ((x_markers_all >= min_x) * (x_markers_all <= max_x) * (y_markers_all >= min_y) * (y_markers_all <= max_y)) agents = agents_numbers[conds] x_markers = x_markers_all[agents] y_markers = y_markers_all[agents] # filter markers n_catalog = len(agents) if (flags['points'] > 0) * \ (flags['points'] < n_catalog): agents_points = np.random.choice(agents, size=flags['points'], replace=False) else: agents_points = agents x_markers_filtered = x_markers_all[agents_points] y_markers_filtered = y_markers_all[agents_points] if flags['skill']: PL_all = annotationhistory['PL'] PD_all = annotationhistory['PD'] # filter out the empty clicks PL_list = [] for i, xj in enumerate(x_all): PL_list.append([PL_all[i]] * len(xj)) PL = np.array([PLi for PLj in PL_list for PLi in PLj]) # filter out the empty clicks PD_list = [] for i, xj in enumerate(x_all): PD_list.append([PD_all[i]] * len(xj)) PD = np.array([PDi for PDj in PD_list for PDi in PDj]) skill_all = swap.expectedInformationGain(0.5, PL, PD) skill = skill_all[agents] smax = 100 smin = 5 if np.max(skill) != np.min(skill): sizes_all = (skill_all - np.min(skill)) * (smax - smin) / \ (np.max(skill) - np.min(skill)) sizes_filtered = sizes_all[agents_points] else: sizes_filtered = 50 else: skill = None sizes_filtered = 50 if 'labels' in annotationhistory: # find which label is closest to your folks labels_all = annotationhistory['labels'] labels = np.array([xi for xj in labels_all for xi in xj]) labels_filtered = labels[agents_points] colors = [] alpha = 0.75 for label in labels_filtered: if label == -1: colors.append((1.0, 0.0, 0)) else: colors.append((0, 1.0, 0)) else: colors = (0, 1.0, 0) alpha = 0.25 # ---------------------------------------------------------- # contours if flags['contour'] * (len(x_markers) >= flags['stamp_min']): # now do the lens locations # don't need to filter the x's since that is filtered by # xbins and ybins anyways pdf2d(x_markers - min_x, y_markers - min_y, xbins=xbins, ybins=ybins, weights=skill, smooth=flags['smooth_click'], color=(0, 1.0, 0), style='contour', axis=ax) # ---------------------------------------------------------- # plot points if flags['points'] != 0: ax.scatter(x_markers_filtered - min_x, y_markers_filtered - min_y, c=colors, s=sizes_filtered, alpha=alpha) # ---------------------------------------------------------- # do alpha if flags['alpha'] * (alpha != None): contour_hist(alpha.T, extent=(xbins[0], xbins[-1], ybins[0], ybins[-1]), color='w', style='contour', axis=ax) ax.tick_params(\ axis='both', # changes apply to the x-axis which='both', # both major and minor ticks are affected bottom='off', # ticks along the bottom edge are off top='off', # ticks along the top edge are off left='off', right='off', labelleft='off', labelbottom='off') # labels along the bottom edge are off ax.invert_yaxis() try: outfile = flags[ 'output_directory'] + '{0}_field_output.{1}'.format( ID, flags['output_format']) # fig.savefig(outfile) fig.savefig(outfile, bbox_inches='tight', pad_inches=0) #fig.canvas.print_png(outfile) except: print 'make_lens_catalog: field problem with field ', ID plt.close('all') print 'make_lens_catalog: All done!'