Beispiel #1
0
    def update_skill(self):

        ## plogp = np.zeros([2])
        ## plogp[0] = 0.5*(self.PD+self.PL)*np.log2(self.PD+self.PL)
        ## plogp[1] = 0.5*(1.0-self.PD+1.0-self.PL)*np.log2(1.0-self.PD+1.0-self.PL)
        ## self.contribution = np.sum(plogp)

        self.skill = swap.expectedInformationGain(0.5, self.PL, self.PD)

        return self.skill
Beispiel #2
0
    def update_skill(self):

        ## plogp = np.zeros([2])
        ## plogp[0] = 0.5*(self.PD+self.PL)*np.log2(self.PD+self.PL)
        ## plogp[1] = 0.5*(1.0-self.PD+1.0-self.PL)*np.log2(1.0-self.PD+1.0-self.PL)
        ## self.contribution = np.sum(plogp)

        self.skill = swap.expectedInformationGain(0.5, self.PL, self.PD)

        return self.skill
    plotplpd(final_PL1,final_PD1,information1,1,"Stage 1 Information")
    plotplpd(final_PL2,final_PD2,information2,2,"Stage 2 Information")
    savefig("information_plpd.png")
    clf();    

    ###########################
    ##Skill PL PD plot
    ###########################
    bins=101
    skill=np.zeros(bins*bins);
    skill=np.reshape(skill,(bins,bins));
    for ii in range(bins):
        M_ll=0.01*ii;
        for jj in range(bins):
            M_nn=0.01*jj;
            skill[ii][jj]=swap.expectedInformationGain(0.5, M_ll, M_nn);
 
    ax=subplot(1,1,1);
    im=ax.imshow(skill,origin='lower',extent=(0,1,0,1));
    cbar=colorbar(im);
    cbar.solids.set_edgecolor("face");

    ax.set_xlim(0,1);
    ax.set_ylim(0,1);
    ax.set_xlabel("P$_L$");
    ax.set_ylabel("P$_D$");
    ax.set_title("Skill");
    xx=np.arange(-0.1,2,0.1); 
    ax.axhline(0.5,color="k",linestyle='dashed');
    ax.axvline(0.5,color="k",linestyle='dashed');
    ax.plot(xx,1-xx,color="k");
def make_lens_catalog(args):
    """
    NAME
        make_lens_catalog

    PURPOSE
        Given location of collection pickle, this script produces a set of
        annotated images of lenses (heatmaps for lens locations, markers for
        where clicks were, etc).

    COMMENTS
        You have to download the file so it chooses whever your output
        directory is to also download the raw images.
        This should be pretty customizable.

    FLAGS
        -h              Print this message

        --skill         Weight by skill


    INPUTS
        collection.pickle

    OUTPUTS
        lens.dat
            Assumed format:
            ID   kind   x   y    Prob     N0   Skill   Dist

            Here:
            ID = Space Warps subject ID
            kind = Space Warps subject type (sim, dud, test)
            x,y = object (cluster) centroid, in pixels
            P = Space Warps subject probability
            N0 = number of markers in the cluster
            S = total skill per cluster, summed over markers
            D = biggest distance within cluster

    EXAMPLE

    BUGS

    AUTHORS
        This file is part of the Space Warps project, and is distributed
        under the GPL v2 by the Space Warps Science Team.
        http://spacewarps.org/

    HISTORY
        2013-07-16  started Davis (KIPAC)
    """

    # ------------------------------------------------------------------
    # Some defaults:

    flags = {'skill': False,
             'output_directory': './',
             'output_name': 'catalog.dat',
             'image_y_size': 440,
             'catalog_path': '',
             'update_collection': '',}

    # ------------------------------------------------------------------
    # Read in options:

    # this has to be easier to do...
    for arg in args:
        if arg in flags:
            flags[arg] = args[arg]
        elif arg == 'collection_path':
            collection_path = args[arg]
        else:
            print "make_lens_atlas: unrecognized flag ",arg

    print "make_lens_catalog: illustrating behaviour captured in collection file: "
    print "make_lens_catalog: ",collection_path

    memory = joblib.Memory(cachedir=flags['output_directory'])
    memory.clear()

    catalog_path = flags['output_directory'] + flags['output_name']
    if len(flags['output_name']) > 0:
        F = open(catalog_path, 'w')
        F.write('id,kind,x,y,prob,n0,skill,dist\n')

    # ------------------------------------------------------------------
    # Read in files:

    collection = swap.read_pickle(collection_path, 'collection')
    ID_list = collection.list()
    print "make_lens_catalog: collection numbers ", len(ID_list)

    if flags['catalog_path'] != '':
        print "make_lens_catalog: filtering from catalog ",flags['catalog_path']
        catalog_in = csv2rec(flags['catalog_path'])
        ID_list = np.unique(catalog_in['id'])

    # ------------------------------------------------------------------
    # Run through data:

    catalog = {}
    for ID in ID_list:

        subject = collection.member[ID]
        kind = subject.kind
        P = subject.mean_probability


        itwas = subject.annotationhistory['ItWas']
        x_all = subject.annotationhistory['At_X']
        y_all = subject.annotationhistory['At_Y']

        x_markers = np.array([xi for xj in x_all for xi in xj])
        y_markers = np.array([yi for yj in y_all for yi in yj])

        catalog.update({ID: {'agents_reject': [],
                             'x': x_markers,
                             'y': y_markers,}})
        PL_all = subject.annotationhistory['PL']
        PD_all = subject.annotationhistory['PD']

        # filter out the empty clicks
        PL_list = []
        PL_nots = []
        for i, xj in enumerate(x_all):
            # len(xj) of empty = 0
            PL_list.append([PL_all[i]] * len(xj))
            if len(xj) == 0:
                PL_nots.append(PL_all[i])
        PL = np.array([PLi for PLj in PL_list for PLi in PLj])
        PL_nots = np.array(PL_nots)

        # filter out the empty clicks
        PD_list = []
        PD_nots = []
        for i, xj in enumerate(x_all):
            PD_list.append([PD_all[i]] * len(xj))
            if len(xj) == 0:
                PD_nots.append(PD_all[i])
                catalog[ID]['agents_reject'].append(i)
        PD = np.array([PDi for PDj in PD_list for PDi in PDj])
        PD_nots = np.array(PD_nots)

        skill = swap.expectedInformationGain(0.5, PL, PD)  # skill

        # it is only fair to write out the NOTs, too
        # do the empty guys
        skill_nots = swap.expectedInformationGain(0.5, PL_nots, PD_nots)  # skill

        x, y = -1, -1
        N0 = len(skill_nots)
        S = np.sum(skill_nots)
        D = 0

        ## catalog.append((ID, kind, x, y, P, N0, S, D))
        if len(catalog)%500 == 0:
            print len(catalog)
        if len(flags['output_name']) > 0:
            F.write('{0},{1},{2},{3},{4},{5},{6},{7}\n'.format(
                ID, kind, x, y, P, N0, S, D))

        if len(x_markers) == 0:
            # apparently everyone was a not...
            catalog[ID]['agents_labels'] = np.array([])
            continue

        # ------------------------------------------------------------------
        # cluster
        print 'make_lens_catalog: subject ID = ', ID
        if flags['skill']:
            cluster_centers, cluster_center_labels, cluster_labels, \
                    n_clusters, dist_within = outlier_clusters(x_markers, y_markers, skill, memory=memory)
        else:
            cluster_centers, cluster_center_labels, cluster_labels, \
                    n_clusters, dist_within = outlier_clusters(x_markers, y_markers, None, memory=memory)
        # need to get: x, y, N0, S

        catalog[ID]['agents_labels'] = cluster_labels

        for cluster_center_label in cluster_center_labels:
            cluster_center = cluster_centers[cluster_center_label]
            members = (cluster_labels == cluster_center_label)

            x, y = cluster_center
            # convert y to catalog convention
            y = flags['image_y_size'] - y
            N0 = np.sum(members)
            S = np.sum(skill[members])
            D = dist_within[cluster_center_label]

            if cluster_center_label == -1:
                # outlier cluster
                # so really every point is its own cluster...
                D = 0
            ## catalog.append((ID, kind, x, y, P, N0, S, D))
            ## if len(catalog)%500 == 0:
            ##     print len(catalog)
            # TODO: make some requirement to be included (exclude outliers)
            if len(flags['output_name']) > 0:
                F.write('{0},{1},{2},{3},{4},{5},{6},{7}\n'.format(
                    ID, kind, x, y, P, N0, S, D))


    print 'make_lens_catalog: Clearing memory'
    # clear memory
    memory.clear()

    if len(flags['output_name']) > 0:
        print 'make_lens_catalog: closing file!'
        F.close()

    if len(flags['update_collection']) > 0:
        print 'make_lens_catalog: writing updated collection to', flags['update_collection']

        # TODO: get the other params correct!!!!
        collection_fat = swap.collection.Collection()
        for ID in catalog:
            subject = collection.member[ID]
            atx = subject.annotationhistory['At_X']
            labels_in = list(catalog[ID]['agents_labels'])
            labels_fat = []
            for atx_i in atx:
                labels_fat.append([])
                for atx_ij in atx_i:
                    labels_fat[-1].append(labels_in.pop(0))
            subject.annotationhistory.update({'labels': labels_fat})
            collection_fat.member.update({ID: subject})
        swap.write_pickle(collection_fat, flags['update_collection'])

    print 'make_lens_catalog: All done!'

    return catalog
Beispiel #5
0
    plotplpd(final_PL1,final_PD1,information1,1,"Stage 1 Information")
    plotplpd(final_PL2,final_PD2,information2,2,"Stage 2 Information")
    savefig("information_plpd.png")
    clf();    

    ###########################
    ##Skill PL PD plot
    ###########################
    bins=101
    skill=np.zeros(bins*bins);
    skill=np.reshape(skill,(bins,bins));
    for ii in range(bins):
        M_ll=0.01*ii;
        for jj in range(bins):
            M_nn=0.01*jj;
            skill[ii][jj]=swap.expectedInformationGain(0.5, M_ll, M_nn);
 
    ax=subplot(1,1,1);
    im=ax.imshow(skill,origin='lower',extent=(0,1,0,1));
    cbar=colorbar(im);
    cbar.solids.set_edgecolor("face");

    ax.set_xlim(0,1);
    ax.set_ylim(0,1);
    ax.set_xlabel("P$_L$");
    ax.set_ylabel("P$_D$");
    ax.set_title("Skill");
    xx=np.arange(-0.1,2,0.1); 
    ax.axhline(0.5,color="k",linestyle='dashed');
    ax.axvline(0.5,color="k",linestyle='dashed');
    ax.plot(xx,1-xx,color="k");
Beispiel #6
0
def make_lens_atlas(args):
    """
    NAME
        make_lens_atlas

    PURPOSE
        Given location of bureau and collection pickles as well as a list of
        subjects, this script produces a set of annotated images of lenses
        (heatmaps for lens locations, markers for where clicks were, etc).

    COMMENTS
        You have to download the file so it chooses whever your output
        directory is to also download the raw images.
        This should be pretty customizable.

    FLAGS
        -h              Print this message
        --heatmap       Do heatmaps
        --contour       Do contours
        --field         Do full image
        --stamp         Do cutouts
        --alpha         Do alpha

        --points N      Take N agents and plot them. Any number < 0 = do all
        --skill         Weight agent markers by skill

    INPUTS
        collection collection.pickle
        catalog catalog.dat
            Assumed format:
            ID   kind   x   y    Prob     N0   Skill   Dist

            Here:
            ID = Space Warps subject ID
            kind = Space Warps subject type (sim, dud, test)
            x,y = object (cluster) centroid, in pixels
            P = Space Warps subject probability
            N0 = number of markers in the cluster
            S = total skill per cluster, summed over markers
            D = biggest distance within cluster

    OUTPUTS

    EXAMPLE

    BUGS
        TODO: incorporate some of these defaults into the flags dictionary

    AUTHORS
        This file is part of the Space Warps project, and is distributed
        under the GPL v2 by the Space Warps Science Team.
        http://spacewarps.org/

    HISTORY
        2013-07-16  started Davis (KIPAC)
    """

    # ------------------------------------------------------------------
    # Some defaults:

    flags = {'points': 30,
             'heatmap': False,
             'contour': False,
             'field': False,
             'stamp': False,
             'alpha': False,
             'skill': False,
             'output_directory': './',
             'output_format': 'png',
             'stamp_size': 50,
             'dist_max': 30,
             'stamp_min': 1,
             'smooth_click': 3,
             'figsize_stamp': 5,
             'figsize_field': 10,
             'image_y_size': 440,
             'diagnostics': False,
            }

    # ------------------------------------------------------------------
    # Read in options:

    # this has to be easier to do...
    for arg in args:
        if arg in flags:
            flags[arg] = args[arg]
        elif arg == 'collection':
            collection_path = args[arg]
        elif arg == 'catalog':
            catalog_path = args[arg]
        else:
            print "make_lens_atlas: unrecognized flag ",arg
    xbins = np.arange(flags['stamp_size'] * 2)
    ybins = np.arange(flags['stamp_size'] * 2)
    figsize_stamp = (flags['figsize_stamp'], flags['figsize_stamp'])
    figsize_field = (flags['figsize_field'], flags['figsize_field'])
    image_y_size = flags['image_y_size']

    print "make_lens_atlas: illustrating behaviour captured in collection, and lens files: "
    print "make_lens_atlas: ", collection_path
    print "make_lens_atlas: ", catalog_path


    # ------------------------------------------------------------------
    # Read in files:

    #bureau = swap.read_pickle(bureau_path, 'bureau')  # TODO: needed?
    collection = swap.read_pickle(collection_path, 'collection')
    catalog = csv2rec(catalog_path)

    #print "make_lens_atlas: bureau numbers ", len(bureau.list())
    print "make_lens_atlas: collection numbers ", len(collection.list())
    print "make_lens_atlas: catalog numbers ", len(catalog)

    # ------------------------------------------------------------------
    # Run through data:

    # ------------------------------------------------------------------
    # Stamps:
    if flags['stamp']:
        print "make_lens_atlas: running stamps"
        for lens_i in range(len(catalog)):
            ID = catalog[lens_i]['id']
            kind = catalog[lens_i]['kind']
            x = catalog[lens_i]['x']
            # flip y axis
            y = image_y_size - catalog[lens_i]['y']
            N0 = catalog[lens_i]['n0']
            if 'dist' in catalog.dtype.names:
                if catalog[lens_i]['dist'] == 0:
                    continue

            if ((x < 0)):
                # this is one of the 'non points'; skip
                continue
            if (N0 < flags['stamp_min']):
                # not enough points!
                continue
            subject = collection.member[ID]
            annotationhistory = subject.annotationhistory

            # ------------------------------------------------------------------
            # download png
            url = subject.location
            outname = flags['output_directory'] + '{0}_field.png'.format(ID)
            im = get_online_png(url, outname)
            min_x = np.int(np.max((x - flags['stamp_size'], 0)))
            max_x = np.int(np.min((x + flags['stamp_size'], im.shape[0])))
            min_y = np.int(np.max((y - flags['stamp_size'], 0)))
            max_y = np.int(np.min((y + flags['stamp_size'], im.shape[1])))

            min_member_x = np.int(np.max((x - flags['dist_max'], 0)))
            max_member_x = np.int(np.min((x + flags['dist_max'], im.shape[0])))
            min_member_y = np.int(np.max((y - flags['dist_max'], 0)))
            max_member_y = np.int(np.min((y + flags['dist_max'], im.shape[1])))
            if (min_x >= max_x) + (min_y >= max_y):
                print "make_lens_atlas: misshapen lens for ID ", ID
                continue

            # if it is a training image, claim the alpha parameter
            if im.shape[2] == 4:
                alpha = im[:, :, 3][min_y: max_y, min_x: max_x]
                im = im[:, :, :3][min_y: max_y, min_x: max_x]
            else:
                alpha = None
                im = im[min_y: max_y, min_x: max_x]

            fig = plt.figure(figsize=figsize_stamp)
            ax = fig.add_subplot(111)
            ax.imshow(im, origin=origin)

            ax.scatter(x - min_x,
                       y - min_y,
                       marker='d',
                       c=(0, 1.0, 0), s=100,
                       alpha=0.75)

            if ((flags['contour'])
                + (flags['heatmap'])
                + (flags['points'] != 0)):

                itwas = annotationhistory['ItWas']
                x_all = annotationhistory['At_X']
                y_all = annotationhistory['At_Y']

                x_markers_all = np.array([xi for xj in x_all for xi in xj])
                y_markers_all = np.array([yi for yj in y_all for yi in yj])

                agents_numbers = np.arange(
                        x_markers_all.size)
                if 'labels' in annotationhistory:
                    # find which label is closest to your folks
                    labels_all = annotationhistory['labels']
                    labels = np.array([xi for xj in labels_all for xi in xj])
                    cluster_labels = list(set(labels))
                    data = np.vstack((x_markers_all, y_markers_all)).T
                    cluster_centers = np.array([np.mean(data[labels == i], axis=0)
                                                for i in cluster_labels])
                    # find which label is closest to the (x,y)
                    label_center = cluster_labels[np.argmin(np.sum(np.square(cluster_centers - np.vstack((x, y)).T), axis=1))]
                    conds = (labels == label_center)
                else:
                    # now filter markers by those that are within
                    # dist_max of the center (since I don't record cluster
                    # members...)
                    conds = ((x_markers_all >= min_member_x) *
                             (x_markers_all <= max_member_x) *
                             (y_markers_all >= min_member_y) *
                             (y_markers_all <= max_member_y))
                agents = agents_numbers[conds]
                x_markers = x_markers_all[agents]
                y_markers = y_markers_all[agents]

                # filter markers
                n_catalog = len(agents)
                if (flags['points'] > 0) * \
                        (flags['points'] < n_catalog):
                    agents_points = np.random.choice(
                            agents,
                            size=flags['points'], replace=False)
                else:
                    agents_points = agents
                x_markers_filtered = x_markers_all[agents_points]
                y_markers_filtered = y_markers_all[agents_points]

                if (flags['skill']) * (len(agents) > 0):
                    PL_all = annotationhistory['PL']
                    PD_all = annotationhistory['PD']

                    # filter out the empty clicks
                    PL_list = []
                    for i, xj in enumerate(x_all):
                        PL_list.append([PL_all[i]] * len(xj))
                    PL = np.array([PLi for PLj in PL_list for PLi in PLj])

                    # filter out the empty clicks
                    PD_list = []
                    for i, xj in enumerate(x_all):
                        PD_list.append([PD_all[i]] * len(xj))
                    PD = np.array([PDi for PDj in PD_list for PDi in PDj])

                    skill_all = swap.expectedInformationGain(0.5, PL, PD)
                    skill = skill_all[agents]

                    smax = 100
                    smin = 5
                    if np.max(skill) != np.min(skill):
                        sizes_all = (skill_all - np.min(skill)) * (smax - smin) / \
                                (np.max(skill) - np.min(skill))
                        sizes_filtered = sizes_all[agents_points]
                    else:
                        sizes_filtered = 50
                else:
                    skill = None
                    sizes_filtered = 50
                colors = (0, 1.0, 0)

                # ----------------------------------------------------------
                # heatmaps
                if (flags['heatmap']) * (len(agents) > 0):
                    fig_heatmap = plt.figure(figsize=figsize_stamp)
                    ax_heatmap = fig_heatmap.add_subplot(111)

                    # now do the lens locations
                    # don't need to filter the x's since that is filtered by
                    # xbins and ybins anyways
                    pdf2d(x_markers - min_x, y_markers - min_y,
                          xbins=xbins, ybins=ybins,
                          weights=skill, smooth=flags['smooth_click'],
                          color=(0, 1.0, 0),
                          style='hist',
                          axis=ax_heatmap)

                    if flags['alpha'] * (alpha != None):
                        contour_hist(alpha.T,
                            extent=(xbins[0], xbins[-1], ybins[0], ybins[-1]),
                            color='w', style='contour', axis=ax_heatmap)

                    ax_heatmap.tick_params(\
                        axis='both',          # changes apply to the x-axis
                        which='both',      # both major and minor ticks are affected
                        bottom='off',      # ticks along the bottom edge are off
                        top='off',         # ticks along the top edge are off
                        left='off',
                        right='off',
                        labelleft='off',
                        labelbottom='off') # labels along the bottom edge are off

                    # CPD 04.08.14: Flip axis to old conventions
                    ax_heatmap.invert_yaxis()
                    try:
                        outfile = flags['output_directory'] + \
                                    '{0}_cluster_{1}_heatmap.{2}'.format(
                                        ID, lens_i, flags['output_format'])
                        # fig_heatmap.savefig(outfile)
                        #fig_heatmap.canvas.print_png(outfile)
                        fig_heatmap.savefig(outfile, bbox_inches='tight', pad_inches=0)
                    except:
                        print 'make_lens_catalog: heatmap problem with ', ID, lens_i
                        # import ipdb; ipdb.set_trace()

                # ---------------------------------------------------------
                # back to our other plots
                # contours
                if (flags['contour']) * (len(agents) > 0):

                    # now do the lens locations
                    # don't need to filter the x's since that is filtered by
                    # xbins and ybins anyways
                    pdf2d(x_markers - min_x, y_markers - min_y,
                          xbins=xbins, ybins=ybins,
                          weights=skill, smooth=flags['smooth_click'],
                          color=(0, 1.0, 0),
                          style='contour',
                          axis=ax)

                # plot points
                if (flags['points'] != 0) * (len(agents) > 0):
                    ax.scatter(x_markers_filtered - min_x,
                               y_markers_filtered - min_y,
                               c=colors, s=sizes_filtered,
                               alpha=0.25)

            # plot alpha
            if flags['alpha'] * (alpha != None):
                contour_hist(alpha.T,
                    extent=(xbins[0], xbins[-1], ybins[0], ybins[-1]),
                    color='w', style='contour', axis=ax)

            # ----------------------------------------------------------
            ax.tick_params(\
                axis='both',          # changes apply to the x-axis
                which='both',      # both major and minor ticks are affected
                bottom='off',      # ticks along the bottom edge are off
                top='off',         # ticks along the top edge are off
                left='off',
                right='off',
                labelleft='off',
                labelbottom='off') # labels along the bottom edge are off

            ax.invert_yaxis()
            try:
                outfile = flags['output_directory'] + \
                            '{0}_cluster_{1}_contour.{2}'.format(
                                ID, lens_i, flags['output_format']
                                )
                # fig.savefig(outfile)
                fig.savefig(outfile, bbox_inches='tight', pad_inches=0)
                # fig.canvas.print_png(outfile)
            except:
                print 'make_lens_catalog: contour problem with ', ID, lens_i
                # import ipdb; ipdb.set_trace()
            plt.close('all')

    # ------------------------------------------------------------------
    # Fields
    if flags['field']:
        print "make_lens_atlas: running fields"
        # find the unique IDs. mark centers and also centrals if clustering is
        # done
        #import ipdb; ipdb.set_trace()
        unique_IDs = np.unique(catalog['id'])
        for ID in unique_IDs:
            mini_catalog = catalog[catalog['id'] == ID]
            subject = collection.member[ID]
            annotationhistory = subject.annotationhistory

            # plot cluster centers
            kind = mini_catalog['kind']
            x_centers = mini_catalog['x']
            # flip y from catalog
            y_centers = image_y_size - mini_catalog['y']
            skill_centers = mini_catalog['skill']
            # filter out the -1 entry
            center_cond = (x_centers > 0) * (y_centers > 0)
            # filter outliers if possible
            if 'dist' in mini_catalog.dtype.names:
                center_cond *= mini_catalog['dist'] > 0
            skill_centers = skill_centers[center_cond]
            x_centers = x_centers[center_cond]
            y_centers = y_centers[center_cond]
            colors_centers = [(0, 1.0, 0) for i in x_centers]

            if len(colors_centers) == 0:
                #welp, nothing here
                continue

            # ------------------------------------------------------------------
            # download png
            url = subject.location
            outname = flags['output_directory'] + '{0}_field.png'.format(ID)
            im = get_online_png(url, outname)

            # if it is a training image, claim the alpha parameter
            if im.shape[2] == 4:
                alpha = im[:, :, 3]
                im = im[:, :, :3]
            else:
                alpha = None

            fig = plt.figure(figsize=figsize_field)
            ax = fig.add_subplot(111)
            ax.imshow(im, origin=origin)
            xbins = np.arange(im.shape[0])
            ybins = np.arange(im.shape[1])
            min_x = 0
            min_y = 0
            max_x = im.shape[0]
            max_y = im.shape[1]

            if (flags['skill']) * (np.max(skill_centers) != np.min(skill_centers)):
                sizes_centers = (
                        (skill_centers - np.min(skill_centers)) *
                        (200 - 10) /
                        (np.max(skill_centers) - np.min(skill_centers)))
            else:
                sizes_centers = [100 for i in x_centers]
            sizes_centers = [100 for i in x_centers]
            ax.scatter(x_centers, y_centers,
                       marker='d', c=colors_centers,
                       s=sizes_centers, alpha=0.75)

            if flags['diagnostics']:
                r = flags['dist_max']
                b = flags['stamp_size']
                b_ones = np.ones(100) * b
                b_arr = np.linspace(-b, b, 100)
                def xy(x0, y0, r, phi):
                    return x0 + r * np.cos(phi), y0 + r * np.sin(phi)
                phis = np.arange(0, 6.28, 0.01)
                for i in xrange(len(x_centers)):
                    x_center = x_centers[i]
                    y_center = y_centers[i]

                    ax.plot( *xy(x_center, y_center, r, phis), c='w', ls='-', linewidth=4)

                    # plot box
                    ax.plot(x_center + b_ones, y_center + b_arr, c='r', ls='--', linewidth=4)
                    ax.plot(x_center - b_ones, y_center + b_arr, c='r', ls='--', linewidth=4)
                    ax.plot(x_center + b_arr, y_center + b_ones, c='r', ls='--', linewidth=4)
                    ax.plot(x_center + b_arr, y_center - b_ones, c='r', ls='--', linewidth=4)

            itwas = annotationhistory['ItWas']
            x_all = annotationhistory['At_X']
            y_all = annotationhistory['At_Y']

            x_markers_all = np.array([xi for xj in x_all for xi in xj])
            y_markers_all = np.array([yi for yj in y_all for yi in yj])

            # now filter markers by those that are within
            # stamp_size of the stamp
            # I'm pretty sure this step is redundant when going over the full
            # image?
            agents_numbers = np.arange(
                    x_markers_all.size)
            conds = ((x_markers_all >= min_x) * (x_markers_all <= max_x) *
                     (y_markers_all >= min_y) * (y_markers_all <= max_y))
            agents = agents_numbers[conds]

            x_markers = x_markers_all[agents]
            y_markers = y_markers_all[agents]

            # filter markers
            n_catalog = len(agents)
            if (flags['points'] > 0) * \
                    (flags['points'] < n_catalog):
                agents_points = np.random.choice(
                        agents,
                        size=flags['points'], replace=False)
            else:
                agents_points = agents
            x_markers_filtered = x_markers_all[agents_points]
            y_markers_filtered = y_markers_all[agents_points]

            if flags['skill']:
                PL_all = annotationhistory['PL']
                PD_all = annotationhistory['PD']

                # filter out the empty clicks
                PL_list = []
                for i, xj in enumerate(x_all):
                    PL_list.append([PL_all[i]] * len(xj))
                PL = np.array([PLi for PLj in PL_list for PLi in PLj])

                # filter out the empty clicks
                PD_list = []
                for i, xj in enumerate(x_all):
                    PD_list.append([PD_all[i]] * len(xj))
                PD = np.array([PDi for PDj in PD_list for PDi in PDj])

                skill_all = swap.expectedInformationGain(0.5, PL, PD)
                skill = skill_all[agents]

                smax = 100
                smin = 5
                if np.max(skill) != np.min(skill):
                    sizes_all = (skill_all - np.min(skill)) * (smax - smin) / \
                            (np.max(skill) - np.min(skill))
                    sizes_filtered = sizes_all[agents_points]
                else:
                    sizes_filtered = 50
            else:
                skill = None
                sizes_filtered = 50

            if 'labels' in annotationhistory:
                # find which label is closest to your folks
                labels_all = annotationhistory['labels']
                labels = np.array([xi for xj in labels_all for xi in xj])
                labels_filtered = labels[agents_points]
                colors = []
                alpha = 0.75
                for label in labels_filtered:
                    if label == -1:
                        colors.append((1.0, 0.0, 0))
                    else:
                        colors.append((0, 1.0, 0))
            else:
                colors = (0, 1.0, 0)
                alpha = 0.25

            # ----------------------------------------------------------
            # contours
            if flags['contour'] * (len(x_markers) >= flags['stamp_min']):

                # now do the lens locations
                # don't need to filter the x's since that is filtered by
                # xbins and ybins anyways
                pdf2d(x_markers - min_x, y_markers - min_y,
                      xbins=xbins, ybins=ybins,
                      weights=skill, smooth=flags['smooth_click'],
                      color=(0, 1.0, 0),
                      style='contour',
                      axis=ax)

            # ----------------------------------------------------------
            # plot points
            if flags['points'] != 0:
                ax.scatter(x_markers_filtered - min_x,
                           y_markers_filtered - min_y,
                           c=colors, s=sizes_filtered,
                           alpha=alpha)

            # ----------------------------------------------------------
            # do alpha
            if flags['alpha'] * (alpha != None):
                contour_hist(alpha.T,
                    extent=(xbins[0], xbins[-1], ybins[0], ybins[-1]),
                    color='w', style='contour', axis=ax)


            ax.tick_params(\
                axis='both',          # changes apply to the x-axis
                which='both',      # both major and minor ticks are affected
                bottom='off',      # ticks along the bottom edge are off
                top='off',         # ticks along the top edge are off
                left='off',
                right='off',
                labelleft='off',
                labelbottom='off') # labels along the bottom edge are off

            ax.invert_yaxis()
            try:
                outfile = flags['output_directory'] + '{0}_field_output.{1}'.format(ID, flags['output_format'])
                # fig.savefig(outfile)
                fig.savefig(outfile, bbox_inches='tight', pad_inches=0)
                #fig.canvas.print_png(outfile)
            except:
                print 'make_lens_catalog: field problem with field ', ID
            plt.close('all')

    print 'make_lens_catalog: All done!'
Beispiel #7
0
def make_lens_catalog(args):
    """
    NAME
        make_lens_catalog

    PURPOSE
        Given location of collection pickle, this script produces a set of
        annotated images of lenses (heatmaps for lens locations, markers for
        where clicks were, etc).

    COMMENTS
        You have to download the file so it chooses whever your output
        directory is to also download the raw images.
        This should be pretty customizable.

    FLAGS
        -h              Print this message

        --skill         Weight by skill


    INPUTS
        collection.pickle

    OUTPUTS
        lens.dat
            Assumed format:
            ID   kind   x   y    Prob     N0   Skill   Dist

            Here:
            ID = Space Warps subject ID
            kind = Space Warps subject type (sim, dud, test)
            x,y = object (cluster) centroid, in pixels
            P = Space Warps subject probability
            N0 = number of markers in the cluster
            S = total skill per cluster, summed over markers
            D = biggest distance within cluster

    EXAMPLE

    BUGS

    AUTHORS
        This file is part of the Space Warps project, and is distributed
        under the GPL v2 by the Space Warps Science Team.
        http://spacewarps.org/

    HISTORY
        2013-07-16  started Davis (KIPAC)
    """

    # ------------------------------------------------------------------
    # Some defaults:

    flags = {
        'skill': False,
        'output_directory': './',
        'output_name': 'catalog.dat',
        'image_y_size': 440,
        'catalog_path': '',
        'update_collection': '',
    }

    # ------------------------------------------------------------------
    # Read in options:

    # this has to be easier to do...
    for arg in args:
        if arg in flags:
            flags[arg] = args[arg]
        elif arg == 'collection_path':
            collection_path = args[arg]
        else:
            print "make_lens_atlas: unrecognized flag ", arg

    print "make_lens_catalog: illustrating behaviour captured in collection file: "
    print "make_lens_catalog: ", collection_path

    memory = joblib.Memory(cachedir=flags['output_directory'])
    memory.clear()

    catalog_path = flags['output_directory'] + flags['output_name']
    if len(flags['output_name']) > 0:
        F = open(catalog_path, 'w')
        F.write('id,kind,x,y,prob,n0,skill,dist\n')

    # ------------------------------------------------------------------
    # Read in files:

    collection = swap.read_pickle(collection_path, 'collection')
    ID_list = collection.list()
    print "make_lens_catalog: collection numbers ", len(ID_list)

    if flags['catalog_path'] != '':
        print "make_lens_catalog: filtering from catalog ", flags[
            'catalog_path']
        catalog_in = csv2rec(flags['catalog_path'])
        ID_list = np.unique(catalog_in['id'])

    # ------------------------------------------------------------------
    # Run through data:

    catalog = {}
    for ID in ID_list:

        subject = collection.member[ID]
        kind = subject.kind
        P = subject.mean_probability

        itwas = subject.annotationhistory['ItWas']
        x_all = subject.annotationhistory['At_X']
        y_all = subject.annotationhistory['At_Y']

        x_markers = np.array([xi for xj in x_all for xi in xj])
        y_markers = np.array([yi for yj in y_all for yi in yj])

        catalog.update(
            {ID: {
                'agents_reject': [],
                'x': x_markers,
                'y': y_markers,
            }})
        PL_all = subject.annotationhistory['PL']
        PD_all = subject.annotationhistory['PD']

        # filter out the empty clicks
        PL_list = []
        PL_nots = []
        for i, xj in enumerate(x_all):
            # len(xj) of empty = 0
            PL_list.append([PL_all[i]] * len(xj))
            if len(xj) == 0:
                PL_nots.append(PL_all[i])
        PL = np.array([PLi for PLj in PL_list for PLi in PLj])
        PL_nots = np.array(PL_nots)

        # filter out the empty clicks
        PD_list = []
        PD_nots = []
        for i, xj in enumerate(x_all):
            PD_list.append([PD_all[i]] * len(xj))
            if len(xj) == 0:
                PD_nots.append(PD_all[i])
                catalog[ID]['agents_reject'].append(i)
        PD = np.array([PDi for PDj in PD_list for PDi in PDj])
        PD_nots = np.array(PD_nots)

        skill = swap.expectedInformationGain(0.5, PL, PD)  # skill

        # it is only fair to write out the NOTs, too
        # do the empty guys
        skill_nots = swap.expectedInformationGain(0.5, PL_nots,
                                                  PD_nots)  # skill

        x, y = -1, -1
        N0 = len(skill_nots)
        S = np.sum(skill_nots)
        D = 0

        ## catalog.append((ID, kind, x, y, P, N0, S, D))
        if len(catalog) % 500 == 0:
            print len(catalog)
        if len(flags['output_name']) > 0:
            F.write('{0},{1},{2},{3},{4},{5},{6},{7}\n'.format(
                ID, kind, x, y, P, N0, S, D))

        if len(x_markers) == 0:
            # apparently everyone was a not...
            catalog[ID]['agents_labels'] = np.array([])
            continue

        # ------------------------------------------------------------------
        # cluster
        print 'make_lens_catalog: subject ID = ', ID
        if flags['skill']:
            cluster_centers, cluster_center_labels, cluster_labels, \
                    n_clusters, dist_within = outlier_clusters(x_markers, y_markers, skill, memory=memory)
        else:
            cluster_centers, cluster_center_labels, cluster_labels, \
                    n_clusters, dist_within = outlier_clusters(x_markers, y_markers, None, memory=memory)
        # need to get: x, y, N0, S

        catalog[ID]['agents_labels'] = cluster_labels

        for cluster_center_label in cluster_center_labels:
            cluster_center = cluster_centers[cluster_center_label]
            members = (cluster_labels == cluster_center_label)

            x, y = cluster_center
            # convert y to catalog convention
            y = flags['image_y_size'] - y
            N0 = np.sum(members)
            S = np.sum(skill[members])
            D = dist_within[cluster_center_label]

            if cluster_center_label == -1:
                # outlier cluster
                # so really every point is its own cluster...
                D = 0
            ## catalog.append((ID, kind, x, y, P, N0, S, D))
            ## if len(catalog)%500 == 0:
            ##     print len(catalog)
            # TODO: make some requirement to be included (exclude outliers)
            if len(flags['output_name']) > 0:
                F.write('{0},{1},{2},{3},{4},{5},{6},{7}\n'.format(
                    ID, kind, x, y, P, N0, S, D))

    print 'make_lens_catalog: Clearing memory'
    # clear memory
    memory.clear()

    if len(flags['output_name']) > 0:
        print 'make_lens_catalog: closing file!'
        F.close()

    if len(flags['update_collection']) > 0:
        print 'make_lens_catalog: writing updated collection to', flags[
            'update_collection']

        # TODO: get the other params correct!!!!
        collection_fat = swap.collection.Collection()
        for ID in catalog:
            subject = collection.member[ID]
            atx = subject.annotationhistory['At_X']
            labels_in = list(catalog[ID]['agents_labels'])
            labels_fat = []
            for atx_i in atx:
                labels_fat.append([])
                for atx_ij in atx_i:
                    labels_fat[-1].append(labels_in.pop(0))
            subject.annotationhistory.update({'labels': labels_fat})
            collection_fat.member.update({ID: subject})
        swap.write_pickle(collection_fat, flags['update_collection'])

    print 'make_lens_catalog: All done!'

    return catalog
Beispiel #8
0
def make_lens_atlas(args):
    """
    NAME
        make_lens_atlas

    PURPOSE
        Given location of bureau and collection pickles as well as a list of
        subjects, this script produces a set of annotated images of lenses
        (heatmaps for lens locations, markers for where clicks were, etc).

    COMMENTS
        You have to download the file so it chooses whever your output
        directory is to also download the raw images.
        This should be pretty customizable.

    FLAGS
        -h              Print this message
        --heatmap       Do heatmaps
        --contour       Do contours
        --field         Do full image
        --stamp         Do cutouts
        --alpha         Do alpha

        --points N      Take N agents and plot them. Any number < 0 = do all
        --skill         Weight agent markers by skill

    INPUTS
        collection collection.pickle
        catalog catalog.dat
            Assumed format:
            ID   kind   x   y    Prob     N0   Skill   Dist

            Here:
            ID = Space Warps subject ID
            kind = Space Warps subject type (sim, dud, test)
            x,y = object (cluster) centroid, in pixels
            P = Space Warps subject probability
            N0 = number of markers in the cluster
            S = total skill per cluster, summed over markers
            D = biggest distance within cluster

    OUTPUTS

    EXAMPLE

    BUGS
        TODO: incorporate some of these defaults into the flags dictionary

    AUTHORS
        This file is part of the Space Warps project, and is distributed
        under the GPL v2 by the Space Warps Science Team.
        http://spacewarps.org/

    HISTORY
        2013-07-16  started Davis (KIPAC)
    """

    # ------------------------------------------------------------------
    # Some defaults:

    flags = {
        'points': 30,
        'heatmap': False,
        'contour': False,
        'field': False,
        'stamp': False,
        'alpha': False,
        'skill': False,
        'output_directory': './',
        'output_format': 'png',
        'stamp_size': 50,
        'dist_max': 30,
        'stamp_min': 1,
        'smooth_click': 3,
        'figsize_stamp': 5,
        'figsize_field': 10,
        'image_y_size': 440,
        'diagnostics': False,
    }

    # ------------------------------------------------------------------
    # Read in options:

    # this has to be easier to do...
    for arg in args:
        if arg in flags:
            flags[arg] = args[arg]
        elif arg == 'collection':
            collection_path = args[arg]
        elif arg == 'catalog':
            catalog_path = args[arg]
        else:
            print "make_lens_atlas: unrecognized flag ", arg
    print(flags)
    xbins = np.arange(flags['stamp_size'] * 2)
    ybins = np.arange(flags['stamp_size'] * 2)
    figsize_stamp = (flags['figsize_stamp'], flags['figsize_stamp'])
    figsize_field = (flags['figsize_field'], flags['figsize_field'])
    image_y_size = flags['image_y_size']

    print "make_lens_atlas: illustrating behaviour captured in collection, and lens files: "
    print "make_lens_atlas: ", collection_path
    print "make_lens_atlas: ", catalog_path

    # ------------------------------------------------------------------
    # Read in files:

    #bureau = swap.read_pickle(bureau_path, 'bureau')  # TODO: needed?
    collection = swap.read_pickle(collection_path, 'collection')
    catalog = csv2rec(catalog_path)

    #print "make_lens_atlas: bureau numbers ", len(bureau.list())
    print "make_lens_atlas: collection numbers ", len(collection.list())
    print "make_lens_atlas: catalog numbers ", len(catalog)

    # ------------------------------------------------------------------
    # Run through data:

    # ------------------------------------------------------------------
    # Stamps:
    if flags['stamp']:
        print "make_lens_atlas: running stamps"
        for lens_i in range(len(catalog)):
            ID = catalog[lens_i]['id']
            kind = catalog[lens_i]['kind']
            x = catalog[lens_i]['x']
            # flip y axis
            y = image_y_size - catalog[lens_i]['y']
            N0 = catalog[lens_i]['n0']
            if 'dist' in catalog.dtype.names:
                if catalog[lens_i]['dist'] == 0:
                    continue

            if ((x < 0)):
                # this is one of the 'non points'; skip
                print(lens_i, 'x < 0!')
                continue
            if (N0 < flags['stamp_min']):
                # not enough points!
                print(lens_i, '{0} < {1}'.format(N0, flags['stamp_min']))
                continue
            subject = collection.member[ID]
            annotationhistory = subject.annotationhistory

            # ------------------------------------------------------------------
            # download png
            url = subject.location
            outname = flags['output_directory'] + '{0}_field.png'.format(ID)
            im = get_online_png(url, outname)
            min_x = np.int(np.max((x - flags['stamp_size'], 0)))
            max_x = np.int(np.min((x + flags['stamp_size'], im.shape[0])))
            min_y = np.int(np.max((y - flags['stamp_size'], 0)))
            max_y = np.int(np.min((y + flags['stamp_size'], im.shape[1])))

            min_member_x = np.int(np.max((x - flags['dist_max'], 0)))
            max_member_x = np.int(np.min((x + flags['dist_max'], im.shape[0])))
            min_member_y = np.int(np.max((y - flags['dist_max'], 0)))
            max_member_y = np.int(np.min((y + flags['dist_max'], im.shape[1])))
            if (min_x >= max_x) + (min_y >= max_y):
                print "make_lens_atlas: misshapen lens for ID ", ID
                continue

            # if it is a training image, claim the alpha parameter
            if im.shape[2] == 4:
                alpha = im[:, :, 3][min_y:max_y, min_x:max_x]
                im = im[:, :, :3][min_y:max_y, min_x:max_x]
            else:
                alpha = None
                im = im[min_y:max_y, min_x:max_x]

            fig = plt.figure(figsize=figsize_stamp)
            ax = fig.add_subplot(111)
            ax.imshow(im, origin=origin)

            ax.scatter(x - min_x,
                       y - min_y,
                       marker='d',
                       c=(0, 1.0, 0),
                       s=100,
                       alpha=0.75)

            if ((flags['contour']) + (flags['heatmap']) +
                (flags['points'] != 0)):

                itwas = annotationhistory['ItWas']
                x_all = annotationhistory['At_X']
                y_all = annotationhistory['At_Y']

                x_markers_all = np.array([xi for xj in x_all for xi in xj])
                y_markers_all = np.array([yi for yj in y_all for yi in yj])

                agents_numbers = np.arange(x_markers_all.size)
                if 'labels' in annotationhistory:
                    # find which label is closest to your folks
                    labels_all = annotationhistory['labels']
                    labels = np.array([xi for xj in labels_all for xi in xj])
                    cluster_labels = list(set(labels))
                    data = np.vstack((x_markers_all, y_markers_all)).T
                    cluster_centers = np.array([
                        np.mean(data[labels == i], axis=0)
                        for i in cluster_labels
                    ])
                    # find which label is closest to the (x,y)
                    label_center = cluster_labels[np.argmin(
                        np.sum(np.square(cluster_centers -
                                         np.vstack((x, y)).T),
                               axis=1))]
                    conds = (labels == label_center)
                else:
                    # now filter markers by those that are within
                    # dist_max of the center (since I don't record cluster
                    # members...)
                    conds = ((x_markers_all >= min_member_x) *
                             (x_markers_all <= max_member_x) *
                             (y_markers_all >= min_member_y) *
                             (y_markers_all <= max_member_y))
                agents = agents_numbers[conds]
                x_markers = x_markers_all[agents]
                y_markers = y_markers_all[agents]

                # filter markers
                n_catalog = len(agents)
                if n_catalog < 1:
                    print(lens_i, n_catalog)
                if (flags['points'] > 0) * \
                        (flags['points'] < n_catalog):
                    agents_points = np.random.choice(agents,
                                                     size=flags['points'],
                                                     replace=False)
                else:
                    agents_points = agents
                x_markers_filtered = x_markers_all[agents_points]
                y_markers_filtered = y_markers_all[agents_points]

                if (flags['skill']) * (len(agents) > 0):
                    PL_all = annotationhistory['PL']
                    PD_all = annotationhistory['PD']

                    # filter out the empty clicks
                    PL_list = []
                    for i, xj in enumerate(x_all):
                        PL_list.append([PL_all[i]] * len(xj))
                    PL = np.array([PLi for PLj in PL_list for PLi in PLj])

                    # filter out the empty clicks
                    PD_list = []
                    for i, xj in enumerate(x_all):
                        PD_list.append([PD_all[i]] * len(xj))
                    PD = np.array([PDi for PDj in PD_list for PDi in PDj])

                    skill_all = swap.expectedInformationGain(0.5, PL, PD)
                    skill = skill_all[agents]

                    smax = 100
                    smin = 5
                    if np.max(skill) != np.min(skill):
                        sizes_all = (skill_all - np.min(skill)) * (smax - smin) / \
                                (np.max(skill) - np.min(skill))
                        sizes_filtered = sizes_all[agents_points]
                    else:
                        sizes_filtered = 50
                else:
                    skill = None
                    sizes_filtered = 50
                colors = (0, 1.0, 0)

                # ----------------------------------------------------------
                # heatmaps
                if (flags['heatmap']) * (len(agents) > 0):
                    fig_heatmap = plt.figure(figsize=figsize_stamp)
                    ax_heatmap = fig_heatmap.add_subplot(111)

                    # now do the lens locations
                    # don't need to filter the x's since that is filtered by
                    # xbins and ybins anyways
                    pdf2d(x_markers - min_x,
                          y_markers - min_y,
                          xbins=xbins,
                          ybins=ybins,
                          weights=skill,
                          smooth=flags['smooth_click'],
                          color=(0, 1.0, 0),
                          style='hist',
                          axis=ax_heatmap)

                    if flags['alpha'] * (alpha != None):
                        contour_hist(alpha.T,
                                     extent=(xbins[0], xbins[-1], ybins[0],
                                             ybins[-1]),
                                     color='w',
                                     style='contour',
                                     axis=ax_heatmap)

                    ax_heatmap.tick_params(\
                        axis='both',          # changes apply to the x-axis
                        which='both',      # both major and minor ticks are affected
                        bottom='off',      # ticks along the bottom edge are off
                        top='off',         # ticks along the top edge are off
                        left='off',
                        right='off',
                        labelleft='off',
                        labelbottom='off') # labels along the bottom edge are off

                    # CPD 04.08.14: Flip axis to old conventions
                    ax_heatmap.invert_yaxis()
                    try:
                        outfile = flags['output_directory'] + \
                                    '{0}_cluster_{1}_heatmap.{2}'.format(
                                        ID, lens_i, flags['output_format'])
                        # fig_heatmap.savefig(outfile)
                        #fig_heatmap.canvas.print_png(outfile)
                        fig_heatmap.savefig(outfile,
                                            bbox_inches='tight',
                                            pad_inches=0)
                    except:
                        print 'make_lens_catalog: heatmap problem with ', ID, lens_i
                        # import ipdb; ipdb.set_trace()

                # ---------------------------------------------------------
                # back to our other plots
                # contours
                if (flags['contour']) * (len(agents) > 0):

                    # now do the lens locations
                    # don't need to filter the x's since that is filtered by
                    # xbins and ybins anyways
                    pdf2d(x_markers - min_x,
                          y_markers - min_y,
                          xbins=xbins,
                          ybins=ybins,
                          weights=skill,
                          smooth=flags['smooth_click'],
                          color=(0, 1.0, 0),
                          style='contour',
                          axis=ax)

                # plot points
                if (flags['points'] != 0) * (len(agents) > 0):
                    ax.scatter(x_markers_filtered - min_x,
                               y_markers_filtered - min_y,
                               c=colors,
                               s=sizes_filtered,
                               alpha=0.25)

            # plot alpha
            if flags['alpha'] * (alpha != None):
                contour_hist(alpha.T,
                             extent=(xbins[0], xbins[-1], ybins[0], ybins[-1]),
                             color='w',
                             style='contour',
                             axis=ax)

            # ----------------------------------------------------------
            ax.tick_params(\
                axis='both',          # changes apply to the x-axis
                which='both',      # both major and minor ticks are affected
                bottom='off',      # ticks along the bottom edge are off
                top='off',         # ticks along the top edge are off
                left='off',
                right='off',
                labelleft='off',
                labelbottom='off') # labels along the bottom edge are off

            ax.invert_yaxis()
            try:
                outfile = flags['output_directory'] + \
                            '{0}_cluster_{1}_contour.{2}'.format(
                                ID, lens_i, flags['output_format']
                                )
                # fig.savefig(outfile)
                fig.savefig(outfile, bbox_inches='tight', pad_inches=0)
                # fig.canvas.print_png(outfile)
            except:
                print 'make_lens_catalog: contour problem with ', ID, lens_i
                # import ipdb; ipdb.set_trace()
            plt.close('all')

    # ------------------------------------------------------------------
    # Fields
    if flags['field']:
        print "make_lens_atlas: running fields"
        # find the unique IDs. mark centers and also centrals if clustering is
        # done
        #import ipdb; ipdb.set_trace()
        unique_IDs = np.unique(catalog['id'])
        for ID in unique_IDs:
            mini_catalog = catalog[catalog['id'] == ID]
            subject = collection.member[ID]
            annotationhistory = subject.annotationhistory

            # plot cluster centers
            kind = mini_catalog['kind']
            x_centers = mini_catalog['x']
            # flip y from catalog
            y_centers = image_y_size - mini_catalog['y']
            skill_centers = mini_catalog['skill']
            # filter out the -1 entry
            center_cond = (x_centers > 0) * (y_centers > 0)
            # filter outliers if possible
            if 'dist' in mini_catalog.dtype.names:
                center_cond *= mini_catalog['dist'] > 0
            skill_centers = skill_centers[center_cond]
            x_centers = x_centers[center_cond]
            y_centers = y_centers[center_cond]
            colors_centers = [(0, 1.0, 0) for i in x_centers]

            if len(colors_centers) == 0:
                #welp, nothing here
                continue

            # ------------------------------------------------------------------
            # download png
            url = subject.location
            outname = flags['output_directory'] + '{0}_field.png'.format(ID)
            im = get_online_png(url, outname)

            # if it is a training image, claim the alpha parameter
            if im.shape[2] == 4:
                alpha = im[:, :, 3]
                im = im[:, :, :3]
            else:
                alpha = None

            fig = plt.figure(figsize=figsize_field)
            ax = fig.add_subplot(111)
            ax.imshow(im, origin=origin)
            xbins = np.arange(im.shape[0])
            ybins = np.arange(im.shape[1])
            min_x = 0
            min_y = 0
            max_x = im.shape[0]
            max_y = im.shape[1]

            if (flags['skill']) * (np.max(skill_centers) !=
                                   np.min(skill_centers)):
                sizes_centers = (
                    (skill_centers - np.min(skill_centers)) * (200 - 10) /
                    (np.max(skill_centers) - np.min(skill_centers)))
            else:
                sizes_centers = [100 for i in x_centers]
            sizes_centers = [100 for i in x_centers]
            ax.scatter(x_centers,
                       y_centers,
                       marker='d',
                       c=colors_centers,
                       s=sizes_centers,
                       alpha=0.75)

            if flags['diagnostics']:
                r = flags['dist_max']
                b = flags['stamp_size']
                b_ones = np.ones(100) * b
                b_arr = np.linspace(-b, b, 100)

                def xy(x0, y0, r, phi):
                    return x0 + r * np.cos(phi), y0 + r * np.sin(phi)

                phis = np.arange(0, 6.28, 0.01)
                for i in xrange(len(x_centers)):
                    x_center = x_centers[i]
                    y_center = y_centers[i]

                    ax.plot(*xy(x_center, y_center, r, phis),
                            c='w',
                            ls='-',
                            linewidth=4)

                    # plot box
                    ax.plot(x_center + b_ones,
                            y_center + b_arr,
                            c='r',
                            ls='--',
                            linewidth=4)
                    ax.plot(x_center - b_ones,
                            y_center + b_arr,
                            c='r',
                            ls='--',
                            linewidth=4)
                    ax.plot(x_center + b_arr,
                            y_center + b_ones,
                            c='r',
                            ls='--',
                            linewidth=4)
                    ax.plot(x_center + b_arr,
                            y_center - b_ones,
                            c='r',
                            ls='--',
                            linewidth=4)

            itwas = annotationhistory['ItWas']
            x_all = annotationhistory['At_X']
            y_all = annotationhistory['At_Y']

            x_markers_all = np.array([xi for xj in x_all for xi in xj])
            y_markers_all = np.array([yi for yj in y_all for yi in yj])

            # now filter markers by those that are within
            # stamp_size of the stamp
            # I'm pretty sure this step is redundant when going over the full
            # image?
            agents_numbers = np.arange(x_markers_all.size)
            conds = ((x_markers_all >= min_x) * (x_markers_all <= max_x) *
                     (y_markers_all >= min_y) * (y_markers_all <= max_y))
            agents = agents_numbers[conds]

            x_markers = x_markers_all[agents]
            y_markers = y_markers_all[agents]

            # filter markers
            n_catalog = len(agents)
            if (flags['points'] > 0) * \
                    (flags['points'] < n_catalog):
                agents_points = np.random.choice(agents,
                                                 size=flags['points'],
                                                 replace=False)
            else:
                agents_points = agents
            x_markers_filtered = x_markers_all[agents_points]
            y_markers_filtered = y_markers_all[agents_points]

            if flags['skill']:
                PL_all = annotationhistory['PL']
                PD_all = annotationhistory['PD']

                # filter out the empty clicks
                PL_list = []
                for i, xj in enumerate(x_all):
                    PL_list.append([PL_all[i]] * len(xj))
                PL = np.array([PLi for PLj in PL_list for PLi in PLj])

                # filter out the empty clicks
                PD_list = []
                for i, xj in enumerate(x_all):
                    PD_list.append([PD_all[i]] * len(xj))
                PD = np.array([PDi for PDj in PD_list for PDi in PDj])

                skill_all = swap.expectedInformationGain(0.5, PL, PD)
                skill = skill_all[agents]

                smax = 100
                smin = 5
                if np.max(skill) != np.min(skill):
                    sizes_all = (skill_all - np.min(skill)) * (smax - smin) / \
                            (np.max(skill) - np.min(skill))
                    sizes_filtered = sizes_all[agents_points]
                else:
                    sizes_filtered = 50
            else:
                skill = None
                sizes_filtered = 50

            if 'labels' in annotationhistory:
                # find which label is closest to your folks
                labels_all = annotationhistory['labels']
                labels = np.array([xi for xj in labels_all for xi in xj])
                labels_filtered = labels[agents_points]
                colors = []
                alpha = 0.75
                for label in labels_filtered:
                    if label == -1:
                        colors.append((1.0, 0.0, 0))
                    else:
                        colors.append((0, 1.0, 0))
            else:
                colors = (0, 1.0, 0)
                alpha = 0.25

            # ----------------------------------------------------------
            # contours
            if flags['contour'] * (len(x_markers) >= flags['stamp_min']):

                # now do the lens locations
                # don't need to filter the x's since that is filtered by
                # xbins and ybins anyways
                pdf2d(x_markers - min_x,
                      y_markers - min_y,
                      xbins=xbins,
                      ybins=ybins,
                      weights=skill,
                      smooth=flags['smooth_click'],
                      color=(0, 1.0, 0),
                      style='contour',
                      axis=ax)

            # ----------------------------------------------------------
            # plot points
            if flags['points'] != 0:
                ax.scatter(x_markers_filtered - min_x,
                           y_markers_filtered - min_y,
                           c=colors,
                           s=sizes_filtered,
                           alpha=alpha)

            # ----------------------------------------------------------
            # do alpha
            if flags['alpha'] * (alpha != None):
                contour_hist(alpha.T,
                             extent=(xbins[0], xbins[-1], ybins[0], ybins[-1]),
                             color='w',
                             style='contour',
                             axis=ax)


            ax.tick_params(\
                axis='both',          # changes apply to the x-axis
                which='both',      # both major and minor ticks are affected
                bottom='off',      # ticks along the bottom edge are off
                top='off',         # ticks along the top edge are off
                left='off',
                right='off',
                labelleft='off',
                labelbottom='off') # labels along the bottom edge are off

            ax.invert_yaxis()
            try:
                outfile = flags[
                    'output_directory'] + '{0}_field_output.{1}'.format(
                        ID, flags['output_format'])
                # fig.savefig(outfile)
                fig.savefig(outfile, bbox_inches='tight', pad_inches=0)
                #fig.canvas.print_png(outfile)
            except:
                print 'make_lens_catalog: field problem with field ', ID
            plt.close('all')

    print 'make_lens_catalog: All done!'