def morph_table_gz2():

    # Print LaTeX-formatted tables of the GZ vote counts and fractions, and plot as pie chart.

    overlap = True
    survey = 'decals'

    # Get weights
    try:
        fitsfile = "{0}/dr10/dr10_gz2_main_specz.fits".format(gzpath)
        hdr = fits.getheader(fitsfile, 1)
        colnames = []
        for i in range(hdr['TFIELDS']):
            colnames.append(hdr['TTYPE{0}'.format(i + 1)])

        if overlap:
            if survey == 'gz2':
                collation_file = "{0}/decals/csv/decals_gz2_main.csv".format(
                    gzpath)
            elif survey == 'stripe82':
                collation_file = "{0}/decals/csv/decals_gz2_stripe82c1.csv".format(
                    gzpath)
            elif survey == 'decals':
                collation_file = "{0}/decals/csv/decals_gz2_union.csv".format(
                    gzpath)
            collated = pd.read_csv(collation_file)
        else:
            if survey == 'gz2':
                collation_file = "{0}/dr10/dr10_gz2_main_specz.csv".format(
                    gzpath)
            elif survey == 'stripe82':
                collation_file = "{0}/dr10/dr10_gz2_stripe82_coadd1.csv".format(
                    gzpath)
            collated = pd.read_csv(collation_file, names=colnames)
    except IOError:
        print "Collation file for {0:} does not exist. Aborting.".format(
            survey)
        return None

    columns = collated.columns

    fraccols, colnames = [], []
    if survey == 'decals':
        for c in columns:
            if len(c) > 10:
                if c[-4:] == 'frac' and c[:6] == 'decals':
                    fraccols.append(c)
                if c[7] == 't' and is_number(c[8:10]):
                    colnames.append(c[7:10])
    else:
        for c in columns:
            if c[-17:] == 'weighted_fraction':
                fraccols.append(c)
            if c[0] == 't' and is_number(c[1:3]):
                colnames.append(c[:3])

    collist = list(set(colnames))
    collist.sort()

    # Plot distribution of vote fractions for each task

    ntasks = len(collist)
    ncols = 4 if ntasks > 9 else int(np.sqrt(ntasks))
    nrows = int(ntasks /
                ncols) if ntasks % ncols == 0 else int(ntasks / ncols) + 1

    sd = survey_dict()[survey]
    survey_name = sd['name']

    def f7(seq):
        seen = set()
        seen_add = seen.add
        return [x for x in seq if not (x in seen or seen_add(x))]

    if survey == 'decals':
        tasklabels = f7([re.split("[ax][0-9]", f)[0][11:-1] for f in fraccols])
        labels = [re.split("[ax][0-9]", f)[-1][1:-5] for f in fraccols]
    else:
        tasklabels = f7([re.split("[ax][0-9]", f)[0][4:-1] for f in fraccols])
        labels = [re.split("[ax][0-9]", f[4:-18])[-1][2:] for f in fraccols]

    # Make pie charts of the plurality votes

    votearr = np.array(collated[fraccols])
    class_arr, task_arr, task_ans = [], [], []
    for v in votearr:
        e, a = plurality(v, survey)
        task_arr.append(e)
        task_ans.append(a)

    task_arr = np.array(task_arr)
    task_ans = np.array(task_ans)

    fig, axarr = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15, 12))

    colors = [
        u'#377EB8', u'#E41A1C', u'#4DAF4A', u'#984EA3', u'#FF7F00', u'#A6761D',
        u'#1B9E77'
    ]

    n = (task_arr.shape)[1]
    for i in range(n):
        ax = axarr.ravel()[i]
        c = Counter(task_ans[:, i][task_arr[:, i] == True])
        pv, pl = [], []
        task_total = sum(c.values())
        for k in c:
            pv.append(c[k])
            pl.append(labels[k])

            # Print to screen in LaTeX format
            print "{0:20} & {1:6} & {3:.2f} & {2:.2f}".format(
                labels[k], c[k], c[k] * 1. / task_total,
                c[k] * 1. / len(collated))
        print ""
        ax.pie(pv,
               labels=pl,
               colors=colors,
               autopct=lambda (p): '{:.0f}'.format(p * sum(pv) / 100))
        title = '{0:} - t{1:02} {2:}'.format(
            survey_name, i,
            tasklabels[i]) if i == 0 else 't{0:02} {1:}'.format(
                i, tasklabels[i])
        ax.set_title(title)
        ax.set_aspect('equal')

    # Remove empty axes from subplots
    if axarr.size > ntasks:
        for i in range(axarr.size - ntasks):
            ax = axarr.ravel()[axarr.size - (i + 1)]
            ax.set_axis_off()

    fig.set_tight_layout(True)
    suffix = '_overlap' if overlap else ''
    plt.savefig('{1}/decals/plots/pie_{0}{2}.eps'.format(
        survey, gzpath, suffix))
    plt.close()

    return None
Beispiel #2
0
def morphology_distribution(survey,absolute_counts=False):

    # What's the distribution of morphologies so far?

    """
        Get the updated version of Brooke's aggregation and weighting code
        Try running it on the DECaLS and Illustris data
        Assuming it works, match positionally against GZ2 in TOPCAT (both samples). This might have to be manual
            unless I find a faster implementation of positional matching in Python.
       Adapt the GZ2 code for plurality classifications and run on GZ4 data
       Summarize overall results and split by luminosity
       List galaxies with large changes?
    """

    # Get weights
    try:
        collation_file = "{0:}/gz_reduction_sandbox/data/{1:}_unweighted_classifications_00.csv".format(gzdir,survey)
        collated = pd.read_csv(collation_file)
    except IOError:
        print "Collation file for {0:} does not exist. Aborting.".format(survey)
        return None

    columns = collated.columns

    fraccols,colnames = [],[]
    for c in columns:
        if c[-4:] == 'frac':
            fraccols.append(c)
        if c[0] == 't' and is_number(c[1:3]):
            colnames.append(c[:3])

    collist = list(set(colnames))
    collist.sort()

    # Plot distribution of vote fractions for each task

    ntasks = len(collist)
    ncols = 4 if ntasks > 9 else int(np.sqrt(ntasks))
    nrows = int(ntasks / ncols) if ntasks % ncols == 0 else int(ntasks / ncols) + 1

    sd = survey_dict()[survey]
    survey_name = sd['name']

    def f7(seq):
        seen = set()
        seen_add = seen.add
        return [x for x in seq if not (x in seen or seen_add(x))] 

    tasklabels = f7([re.split("[ax][0-9]",f)[0][4:-1] for f in fraccols])
    labels = [re.split("[ax][0-9]",f[4:-5])[-1][1:] for f in fraccols]

    fig,axarr = plt.subplots(nrows=nrows,ncols=ncols,figsize=(15,12))

    for i,c in enumerate(collist):
        ax = axarr.ravel()[i]
        title = '{0:} - t{1:02} {2:}'.format(survey_name,i,tasklabels[i]) if i == 0 else 't{0:02} {1:}'.format(i,tasklabels[i])
        ax.set_title(title)
        for f in fraccols:
            if f[:3] == c and f[-15:-5] != 'a0_discuss':
                label = re.split("[ax][0-9]",f[4:-5])[-1][1:]
                ax.hist(collated[f],alpha=0.7,label=label)
        ax.set_xlim(0,1)
        ax.legend(loc='upper left',fontsize=6)

    # Remove empty axes from subplots
    if axarr.size > ntasks:
        for i in range(axarr.size - ntasks):
            ax = axarr.ravel()[axarr.size-(i+1)]
            ax.set_axis_off()

    fig.set_tight_layout(True)
    plt.savefig('{0:}/progress/{1:}/votefractions_{1:}.png'.format(gzdir,survey))

    # Make pie charts of the plurality votes

    votearr = np.array(collated[fraccols])
    class_arr,task_arr,task_ans = [],[],[]
    for v in votearr:
        e,a = plurality(v,survey) 
        task_arr.append(e)
        task_ans.append(a)

    task_arr = np.array(task_arr)
    task_ans = np.array(task_ans)

    fig,axarr = plt.subplots(nrows=nrows,ncols=ncols,figsize=(15,12))

    colors=[u'#377EB8', u'#E41A1C', u'#4DAF4A', u'#984EA3', u'#FF7F00',u'#A6761D',u'#1B9E77']

    n = (task_arr.shape)[1]
    for i in range(n):
        ax = axarr.ravel()[i]
        c = Counter(task_ans[:,i][task_arr[:,i] == True])
        pv,pl = [],[]
        for k in c:
            pv.append(c[k])
            pl.append(labels[k])
        if absolute_counts:
            ax.pie(pv,labels=pl,colors=colors,autopct=lambda(p): '{:.0f}'.format(p * sum(pv) / 100))
        else:
            ax.pie(pv,labels=pl,colors=colors,autopct='%1.0f%%')
        title = '{0:} - t{1:02} {2:}'.format(survey_name,i,tasklabels[i]) if i == 0 else 't{0:02} {1:}'.format(i,tasklabels[i])
        ax.set_title(title)
        ax.set_aspect('equal')

    # Remove empty axes from subplots
    if axarr.size > ntasks:
        for i in range(axarr.size - ntasks):
            ax = axarr.ravel()[axarr.size-(i+1)]
            ax.set_axis_off()

    fig.set_tight_layout(True)
    plt.savefig('{0:}/progress/{1:}/pie_{1:}.png'.format(gzdir,survey))
    plt.close()

    # How does the distribution compare to GZ1 and GZ2?


    # Have they discovered anything interesting? 

    """
        Look at long discussions? Maybe tag the ones that science team hasn't participated in yet?
    """

    return None
def feature_comparison(savefig=False):

    # Plot the difference in vote fractions for the matched galaxies

    filename = '{0}/fits/decals_gz2_union.fits'.format(decals_path)

    data = fits.getdata(filename, 1)

    # Map the columns
    matched_cols = [{
        'title': 'smooth',
        'gz2': "gz2_t01_smooth_or_features_a01_smooth_fraction",
        "decals": "decals_t00_smooth_or_features_a0_smooth_frac"
    }, {
        'title': 'features/disk',
        'gz2': "gz2_t01_smooth_or_features_a02_features_or_disk_fraction",
        "decals": "decals_t00_smooth_or_features_a1_features_frac"
    }, {
        'title': 'star',
        'gz2': "gz2_t01_smooth_or_features_a03_star_or_artifact_fraction",
        "decals": "decals_t00_smooth_or_features_a2_artifact_frac"
    }, {
        'title': 'edge-on',
        'gz2': "gz2_t02_edgeon_a04_yes_fraction",
        "decals": "decals_t01_disk_edge_on_a0_yes_frac"
    }, {
        'title': 'not edge-on',
        'gz2': "gz2_t02_edgeon_a05_no_fraction",
        "decals": "decals_t01_disk_edge_on_a1_no_frac"
    }, {
        'title': 'bar',
        'gz2': "gz2_t03_bar_a06_bar_fraction",
        "decals": "decals_t02_bar_a0_bar_frac"
    }, {
        'title': 'no bar',
        'gz2': "gz2_t03_bar_a07_no_bar_fraction",
        "decals": "decals_t02_bar_a1_no_bar_frac"
    }, {
        'title': 'spiral',
        'gz2': "gz2_t04_spiral_a08_spiral_fraction",
        "decals": "decals_t03_spiral_a0_spiral_frac"
    }, {
        'title': 'no spiral',
        'gz2': "gz2_t04_spiral_a09_no_spiral_fraction",
        "decals": "decals_t03_spiral_a1_no_spiral_frac"
    }, {
        'title': 'no bulge',
        'gz2': "gz2_t05_bulge_prominence_a10_no_bulge_fraction",
        "decals": "decals_t04_bulge_prominence_a0_no_bulge_frac"
    }, {
        'title': 'medium bulge',
        'gz2': "gz2_t05_bulge_prominence_a11_just_noticeable_fraction",
        "decals": "decals_t04_bulge_prominence_a1_obvious_frac"
    }, {
        'title': 'obvious bulge',
        'gz2': "gz2_t05_bulge_prominence_a12_obvious_fraction",
        "decals": "decals_t04_bulge_prominence_a2_dominant_frac"
    }, {
        'title': 'completely round',
        'gz2': "gz2_t07_rounded_a16_completely_round_fraction",
        "decals": "decals_t08_rounded_a0_completely_round_frac"
    }, {
        'title': 'in between',
        'gz2': "gz2_t07_rounded_a17_in_between_fraction",
        "decals": "decals_t08_rounded_a1_in_between_frac"
    }, {
        'title': 'cigar shaped',
        'gz2': "gz2_t07_rounded_a18_cigar_shaped_fraction",
        "decals": "decals_t08_rounded_a2_cigar_shaped_frac"
    }, {
        'title': 'ring',
        'gz2': "gz2_t08_odd_feature_a19_ring_fraction",
        "decals": "decals_t10_odd_feature_x1_ring_frac"
    }, {
        'title': 'lens/arc',
        'gz2': "gz2_t08_odd_feature_a20_lens_or_arc_fraction",
        "decals": "decals_t10_odd_feature_x2_lens_frac"
    }, {
        'title': 'irregular',
        'gz2': "gz2_t08_odd_feature_a22_irregular_fraction",
        "decals": "decals_t10_odd_feature_x4_irregular_frac"
    }, {
        'title': 'other',
        'gz2': "gz2_t08_odd_feature_a23_other_fraction",
        "decals": "decals_t10_odd_feature_x5_other_frac"
    }, {
        'title': 'dust lane',
        'gz2': "gz2_t08_odd_feature_a38_dust_lane_fraction",
        "decals": "decals_t10_odd_feature_x3_dustlane_frac"
    }, {
        'title': 'rounded bulge',
        'gz2': "gz2_t09_bulge_shape_a25_rounded_fraction",
        "decals": "decals_t07_bulge_shape_a0_rounded_frac"
    }, {
        'title': 'boxy bulge',
        'gz2': "gz2_t09_bulge_shape_a26_boxy_fraction",
        "decals": "decals_t07_bulge_shape_a1_boxy_frac"
    }, {
        'title': 'no bulge',
        'gz2': "gz2_t09_bulge_shape_a27_no_bulge_fraction",
        "decals": "decals_t07_bulge_shape_a2_no_bulge_frac"
    }, {
        'title': 'tight arms',
        'gz2': "gz2_t10_arms_winding_a28_tight_fraction",
        "decals": "decals_t05_arms_winding_a0_tight_frac"
    }, {
        'title': 'medium arms',
        'gz2': "gz2_t10_arms_winding_a29_medium_fraction",
        "decals": "decals_t05_arms_winding_a1_medium_frac"
    }, {
        'title': 'loose arms',
        'gz2': "gz2_t10_arms_winding_a30_loose_fraction",
        "decals": "decals_t05_arms_winding_a2_loose_frac"
    }, {
        'title': '1 arm',
        'gz2': "gz2_t11_arms_number_a31_1_fraction",
        "decals": "decals_t06_arms_number_a0_1_frac"
    }, {
        'title': '2 arms',
        'gz2': "gz2_t11_arms_number_a32_2_fraction",
        "decals": "decals_t06_arms_number_a1_2_frac"
    }, {
        'title': '3 arms',
        'gz2': "gz2_t11_arms_number_a33_3_fraction",
        "decals": "decals_t06_arms_number_a2_3_frac"
    }, {
        'title': '4 arms',
        'gz2': "gz2_t11_arms_number_a34_4_fraction",
        "decals": "decals_t06_arms_number_a3_4_frac"
    }, {
        'title': '5+ arms',
        'gz2': "gz2_t11_arms_number_a36_more_than_4_fraction",
        "decals": "decals_t06_arms_number_a4_more_than_4_frac"
    }]

    # Working, but still needs to sort for questions that are ACTUALLY ANSWERED. Lots of pileup at 0,0.
    columns = data.columns

    decals_fraccols, gz2_fraccols = [], []
    for c in columns:
        colname = c.name
        if len(colname) > 6:
            if colname[-4:] == 'frac' and colname[:6] == 'decals':
                decals_fraccols.append(c)
        if len(colname) > 17:
            if colname[-8:] == 'fraction' and colname[
                    -17:] != "weighted_fraction" and colname[:3] == 'gz2':
                gz2_fraccols.append(c)

    decals_votearr = data.from_columns(decals_fraccols)
    gz2_votearr = data.from_columns(gz2_fraccols)

    decals_tasks, gz2_tasks = [], []
    for v in decals_votearr:
        e_decals, a_decals = plurality(np.array(list(v)), 'decals')
        decals_tasks.append(e_decals)
    for v in gz2_votearr:
        e_gz2, a_gz2 = plurality(np.array(list(v)), 'gz2')
        gz2_tasks.append(e_gz2)

    fig, axarr = plt.subplots(num=1, nrows=4, ncols=8, figsize=(16, 10))
    nrows = axarr.shape[0]
    ncols = axarr.shape[1]

    def plot_features(ax, taskno, indices):
        plotind = indices.flatten()
        ax.hist2d(data[matched_cols[taskno]['gz2']][plotind],
                  data[matched_cols[taskno]['decals']][plotind],
                  bins=(20, 20),
                  range=[[0, 1], [0, 1]],
                  norm=LogNorm(),
                  cmap=cm.viridis)
        ax.plot([0, 1], [0, 1], linestyle='--', color='red', lw=2)
        ax.set_title(matched_cols[taskno]['title'], fontsize=8)
        ax.get_xaxis().set_ticks([])
        ax.get_yaxis().set_ticks([])
        ax.set_xlabel(r'$f_{GZ2}$', fontsize=10)
        ax.set_ylabel(r'$f_{DECaLS}$', fontsize=10)
        ax.set_aspect('equal')

    # Smooth/features
    answers_per_task = [3, 2, 2, 2, 3, 3, 5, 3, 3, 5]
    match_tasks = [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [6, 8], [7, 10],
                   [8, 7], [9, 5], [10, 6]]

    n = 0
    for a, m in zip(answers_per_task, match_tasks):
        inds = np.array(
            ([np.array(decals_tasks)[:, m[1]] == True])) & np.array(
                ([np.array(gz2_tasks)[:, m[0]] == True]))
        for i in range(a):
            plot_features(axarr.ravel()[n], n, inds)
            n += 1
    '''
    for i in range(nrows):
        ax = axarr.ravel()[i*ncols]
        ax.set_ylabel(r'$f_{GZ2}$',fontsize=10)

    for i in range(ncols):
        ax = axarr.ravel()[(nrows - 1)*ncols + i]
        ax.set_xlabel(r'$f_{DECaLS}$',fontsize=10)
    '''

    for di in range((nrows * ncols) - n):
        fig.delaxes(axarr.ravel()[(nrows * ncols) - (di + 1)])

    fig.tight_layout()
    if savefig:
        plt.savefig('{0}/decals_gz2_feature_comparison.pdf'.format(plot_path))
    else:
        plt.show()

    return None
def morphology_distribution(survey='decals'):

    # What's the plurality distribution of morphologies?

    try:
        collation_file = "{0}/gz_reduction_sandbox/data/decals_unweighted_classifications_00.csv".format(
            gzpath)
        collated = pd.read_csv(collation_file)
    except IOError:
        print "Collation file for {0:} does not exist. Aborting.".format(
            survey)
        return None

    columns = collated.columns

    fraccols, colnames = [], []
    for c in columns:
        if c[-4:] == 'frac':
            fraccols.append(c)
        if c[0] == 't' and is_number(c[1:3]):
            colnames.append(c[:3])

    collist = list(set(colnames))
    collist.sort()

    # Plot distribution of vote fractions for each task

    ntasks = len(collist)
    ncols = 4 if ntasks > 9 else int(np.sqrt(ntasks))
    nrows = int(ntasks /
                ncols) if ntasks % ncols == 0 else int(ntasks / ncols) + 1

    sd = survey_dict()[survey]
    survey_name = sd['name']

    def f7(seq):
        seen = set()
        seen_add = seen.add
        return [x for x in seq if not (x in seen or seen_add(x))]

    tasklabels = f7([re.split("[ax][0-9]", f)[0][11:-1] for f in fraccols])
    labels = [re.split("[ax][0-9]", f)[-1][1:-5] for f in fraccols]

    # Make pie charts of the plurality votes

    votearr = np.array(collated[fraccols])
    class_arr, task_arr, task_ans = [], [], []
    for v in votearr:
        e, a = plurality(v, survey)
        task_arr.append(e)
        task_ans.append(a)

    task_arr = np.array(task_arr)
    task_ans = np.array(task_ans)

    fig, axarr = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15, 12))

    colors = [
        u'#377EB8', u'#E41A1C', u'#4DAF4A', u'#984EA3', u'#FF7F00', u'#A6761D',
        u'#1B9E77'
    ]

    n = (task_arr.shape)[1]
    for i in range(n):
        ax = axarr.ravel()[i]
        c = Counter(task_ans[:, i][task_arr[:, i] == True])
        pv, pl = [], []
        for k in c:
            pv.append(c[k])
            pl.append(labels[k])
        ax.pie(pv,
               labels=pl,
               colors=colors,
               autopct=lambda (p): '{:.0f}'.format(p * sum(pv) / 100))
        title = '{0:} - t{1:02} {2:}'.format(
            survey_name, i,
            tasklabels[i]) if i == 0 else 't{0:02} {1:}'.format(
                i, tasklabels[i])
        ax.set_title(title)
        ax.set_aspect('equal')

    # Remove empty axes from subplots
    if axarr.size > ntasks:
        for i in range(axarr.size - ntasks):
            ax = axarr.ravel()[axarr.size - (i + 1)]
            ax.set_axis_off()

    fig.set_tight_layout(True)
    plt.savefig('{1}/decals/plots/pie_{0:}.eps'.format(survey, gzpath))
    plt.close()

    return None
Beispiel #5
0
def morphology_distribution(survey,absolute_counts=False,dr=None):

    # What's the distribution of morphologies so far?

    """
        Get the updated version of Brooke's aggregation and weighting code
        Try running it on the DECaLS and Illustris data
        Assuming it works, match positionally against GZ2 in TOPCAT (both samples). This might have to be manual
            unless I find a faster implementation of positional matching in Python.
       Adapt the GZ2 code for plurality classifications and run on GZ4 data
       Summarize overall results and split by luminosity
       List galaxies with large changes?
    """

    survey_str = survey_dr(survey,dr)

    # Get weights
    try:
        collation_file = "{0:}/gz_reduction_sandbox/data/{1:}_unweighted_classifications_00.csv".format(gzdir,survey_str)
        collated = pd.read_csv(collation_file)
    except IOError:
        print "Collation file for {0:} does not exist. Aborting.".format(survey_str)
        return None

    columns = collated.columns

    fraccols,colnames = [],[]
    for c in columns:
        if c[-4:] == 'frac':
            fraccols.append(c)
        if c[0] == 't' and is_number(c[1:3]):
            colnames.append(c[:3])

    collist = list(set(colnames))
    collist.sort()

    # Plot distribution of vote fractions for each task

    ntasks = len(collist)
    ncols = 4 if ntasks > 9 else int(np.sqrt(ntasks))
    nrows = int(ntasks / ncols) if ntasks % ncols == 0 else int(ntasks / ncols) + 1

    sd = survey_dict()[survey]
    survey_name = sd['name']

    def f7(seq):
        seen = set()
        seen_add = seen.add
        return [x for x in seq if not (x in seen or seen_add(x))] 

    tasklabels = f7([re.split("[ax][0-9]",f)[0][4:-1] for f in fraccols])
    labels = [re.split("[ax][0-9]",f[4:-5])[-1][1:] for f in fraccols]

    fig,axarr = plt.subplots(nrows=nrows,ncols=ncols,figsize=(15,12))

    for i,c in enumerate(collist):
        ax = axarr.ravel()[i]
        title = '{0:} - t{1:02} {2:}'.format(survey_name,i,tasklabels[i]) if i == 0 else 't{0:02} {1:}'.format(i,tasklabels[i])
        ax.set_title(title)
        for f in fraccols:
            if f[:3] == c and f[-15:-5] != 'a0_discuss':
                label = re.split("[ax][0-9]",f[4:-5])[-1][1:]
                ax.hist(collated[f],alpha=0.7,label=label)
        ax.set_xlim(0,1)
        ax.legend(loc='upper left',fontsize=6)

    # Remove empty axes from subplots
    if axarr.size > ntasks:
        for i in range(axarr.size - ntasks):
            ax = axarr.ravel()[axarr.size-(i+1)]
            ax.set_axis_off()

    fig.set_tight_layout(True)
    plt.savefig('{0:}/progress/{1:}/votefractions_{1:}.png'.format(gzdir,survey_str))

    # Make pie charts of the plurality votes

    votearr = np.array(collated[fraccols])
    class_arr,task_arr,task_ans = [],[],[]
    for v in votearr:
        e,a = plurality(v,survey) 
        task_arr.append(e)
        task_ans.append(a)

    task_arr = np.array(task_arr)
    task_ans = np.array(task_ans)

    fig,axarr = plt.subplots(nrows=nrows,ncols=ncols,figsize=(15,12))

    colors=[u'#377EB8', u'#E41A1C', u'#4DAF4A', u'#984EA3', u'#FF7F00',u'#A6761D',u'#1B9E77']

    n = (task_arr.shape)[1]
    for i in range(n):
        ax = axarr.ravel()[i]
        c = Counter(task_ans[:,i][task_arr[:,i] == True])
        pv,pl = [],[]
        for k in c:
            pv.append(c[k])
            pl.append(labels[k])
        if absolute_counts:
            ax.pie(pv,labels=pl,colors=colors,autopct=lambda(p): '{:.0f}'.format(p * sum(pv) / 100))
        else:
            ax.pie(pv,labels=pl,colors=colors,autopct='%1.0f%%')
        title = '{0:} - t{1:02} {2:}'.format(survey_name,i,tasklabels[i]) if i == 0 else 't{0:02} {1:}'.format(i,tasklabels[i])
        ax.set_title(title)
        ax.set_aspect('equal')

    # Remove empty axes from subplots
    if axarr.size > ntasks:
        for i in range(axarr.size - ntasks):
            ax = axarr.ravel()[axarr.size-(i+1)]
            ax.set_axis_off()

    fig.set_tight_layout(True)
    plt.savefig('{0:}/progress/{1:}/pie_{1:}.png'.format(gzdir,survey_str))
    plt.close()

    # How does the distribution compare to GZ1 and GZ2?


    # Have they discovered anything interesting? 

    """
        Look at long discussions? Maybe tag the ones that science team hasn't participated in yet?
    """

    return None
b0 = df[df['background'] == 0]
grouped = b0.groupby(['subhalo_id'])

fraccols = []
for c in b0.columns:
    if c[-4:] == 'frac':
        fraccols.append(c)

d = {1:0,2:0,3:0,4:0}
for name,group in grouped:

    votearr = np.array(group[fraccols])
    answers = []
    for v in votearr:
        e,a = plurality(v,'illustris') 
        answers.append(np.array(a)[np.array(e) == 1])
    answers_hashable = map(tuple, answers)
    d[len(set(answers_hashable))] += 1

from matplotlib import pyplot as plt

fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(111)
ax.scatter(d.keys(),d.values(),s=40)
ax.set_xlim(0,5)
ax.set_xlabel('Number of different GZ classifications',fontsize=16)
ax.set_ylabel('Count',fontsize=20)
ax.set_title("Illustris - fixed_mass")

plt.savefig('initial_exploration_diffs.png')
Beispiel #7
0
def morph_table_gz2():

    # Print LaTeX-formatted tables of the GZ vote counts and fractions, and plot as pie chart.

    overlap = True
    survey = 'decals'

    # Get weights
    try:
        fitsfile = "{0}/dr10/dr10_gz2_main_specz.fits".format(gzpath)
        hdr = fits.getheader(fitsfile,1)
        colnames = []
        for i in range(hdr['TFIELDS']):
            colnames.append(hdr['TTYPE{0}'.format(i+1)])

        if overlap:
            if survey == 'gz2':
                collation_file = "{0}/decals/csv/decals_gz2_main.csv".format(gzpath)
            elif survey == 'stripe82':
                collation_file = "{0}/decals/csv/decals_gz2_stripe82c1.csv".format(gzpath)
            elif survey == 'decals':
                collation_file = "{0}/decals/csv/decals_gz2_union.csv".format(gzpath)
            collated = pd.read_csv(collation_file)
        else:
            if survey == 'gz2':
                collation_file = "{0}/dr10/dr10_gz2_main_specz.csv".format(gzpath)
            elif survey == 'stripe82':
                collation_file = "{0}/dr10/dr10_gz2_stripe82_coadd1.csv".format(gzpath)
            collated = pd.read_csv(collation_file,names=colnames)
    except IOError:
        print "Collation file for {0:} does not exist. Aborting.".format(survey)
        return None

    columns = collated.columns

    fraccols,colnames = [],[]
    if survey == 'decals':
        for c in columns:
            if len(c) > 10:
                if c[-4:] == 'frac' and c[:6] == 'decals':
                    fraccols.append(c)
                if c[7] == 't' and is_number(c[8:10]):
                    colnames.append(c[7:10])
    else:
        for c in columns:
            if c[-17:] == 'weighted_fraction':
                fraccols.append(c)
            if c[0] == 't' and is_number(c[1:3]):
                colnames.append(c[:3])

    collist = list(set(colnames))
    collist.sort()

    # Plot distribution of vote fractions for each task

    ntasks = len(collist)
    ncols = 4 if ntasks > 9 else int(np.sqrt(ntasks))
    nrows = int(ntasks / ncols) if ntasks % ncols == 0 else int(ntasks / ncols) + 1

    sd = survey_dict()[survey]
    survey_name = sd['name']

    def f7(seq):
        seen = set()
        seen_add = seen.add
        return [x for x in seq if not (x in seen or seen_add(x))] 

    if survey == 'decals':
        tasklabels = f7([re.split("[ax][0-9]",f)[0][11:-1] for f in fraccols])
        labels = [re.split("[ax][0-9]",f)[-1][1:-5] for f in fraccols]
    else:
        tasklabels = f7([re.split("[ax][0-9]",f)[0][4:-1] for f in fraccols])
        labels = [re.split("[ax][0-9]",f[4:-18])[-1][2:] for f in fraccols]

    # Make pie charts of the plurality votes

    votearr = np.array(collated[fraccols])
    class_arr,task_arr,task_ans = [],[],[]
    for v in votearr:
        e,a = plurality(v,survey) 
        task_arr.append(e)
        task_ans.append(a)

    task_arr = np.array(task_arr)
    task_ans = np.array(task_ans)

    fig,axarr = plt.subplots(nrows=nrows,ncols=ncols,figsize=(15,12))

    colors=[u'#377EB8', u'#E41A1C', u'#4DAF4A', u'#984EA3', u'#FF7F00',u'#A6761D',u'#1B9E77']

    n = (task_arr.shape)[1]
    for i in range(n):
        ax = axarr.ravel()[i]
        c = Counter(task_ans[:,i][task_arr[:,i] == True])
        pv,pl = [],[]
        task_total = sum(c.values())
        for k in c:
            pv.append(c[k])
            pl.append(labels[k])

            # Print to screen in LaTeX format
            print "{0:20} & {1:6} & {3:.2f} & {2:.2f}".format(labels[k],c[k],c[k] * 1./task_total,c[k] * 1./len(collated))
        print ""
        ax.pie(pv,labels=pl,colors=colors,autopct=lambda(p): '{:.0f}'.format(p * sum(pv) / 100))
        title = '{0:} - t{1:02} {2:}'.format(survey_name,i,tasklabels[i]) if i == 0 else 't{0:02} {1:}'.format(i,tasklabels[i])
        ax.set_title(title)
        ax.set_aspect('equal')

    # Remove empty axes from subplots
    if axarr.size > ntasks:
        for i in range(axarr.size - ntasks):
            ax = axarr.ravel()[axarr.size-(i+1)]
            ax.set_axis_off()

    fig.set_tight_layout(True)
    suffix = '_overlap' if overlap else ''
    plt.savefig('{1}/decals/plots/pie_{0}{2}.eps'.format(survey,gzpath,suffix))
    plt.close()

    return None
Beispiel #8
0
def morphology_distribution(survey='decals'):

    # What's the plurality distribution of morphologies?
    
    try:
        collation_file = "{0}/gz_reduction_sandbox/data/decals_unweighted_classifications_00.csv".format(gzpath)
        collated = pd.read_csv(collation_file)
    except IOError:
        print "Collation file for {0:} does not exist. Aborting.".format(survey)
        return None

    columns = collated.columns

    fraccols,colnames = [],[]
    for c in columns:
        if c[-4:] == 'frac':
            fraccols.append(c)
        if c[0] == 't' and is_number(c[1:3]):
            colnames.append(c[:3])

    collist = list(set(colnames))
    collist.sort()

    # Plot distribution of vote fractions for each task

    ntasks = len(collist)
    ncols = 4 if ntasks > 9 else int(np.sqrt(ntasks))
    nrows = int(ntasks / ncols) if ntasks % ncols == 0 else int(ntasks / ncols) + 1

    sd = survey_dict()[survey]
    survey_name = sd['name']

    def f7(seq):
        seen = set()
        seen_add = seen.add
        return [x for x in seq if not (x in seen or seen_add(x))] 

    tasklabels = f7([re.split("[ax][0-9]",f)[0][11:-1] for f in fraccols])
    labels = [re.split("[ax][0-9]",f)[-1][1:-5] for f in fraccols]

    # Make pie charts of the plurality votes

    votearr = np.array(collated[fraccols])
    class_arr,task_arr,task_ans = [],[],[]
    for v in votearr:
        e,a = plurality(v,survey) 
        task_arr.append(e)
        task_ans.append(a)

    task_arr = np.array(task_arr)
    task_ans = np.array(task_ans)

    fig,axarr = plt.subplots(nrows=nrows,ncols=ncols,figsize=(15,12))

    colors=[u'#377EB8', u'#E41A1C', u'#4DAF4A', u'#984EA3', u'#FF7F00',u'#A6761D',u'#1B9E77']

    n = (task_arr.shape)[1]
    for i in range(n):
        ax = axarr.ravel()[i]
        c = Counter(task_ans[:,i][task_arr[:,i] == True])
        pv,pl = [],[]
        for k in c:
            pv.append(c[k])
            pl.append(labels[k])
        ax.pie(pv,labels=pl,colors=colors,autopct=lambda(p): '{:.0f}'.format(p * sum(pv) / 100))
        title = '{0:} - t{1:02} {2:}'.format(survey_name,i,tasklabels[i]) if i == 0 else 't{0:02} {1:}'.format(i,tasklabels[i])
        ax.set_title(title)
        ax.set_aspect('equal')

    # Remove empty axes from subplots
    if axarr.size > ntasks:
        for i in range(axarr.size - ntasks):
            ax = axarr.ravel()[axarr.size-(i+1)]
            ax.set_axis_off()

    fig.set_tight_layout(True)
    plt.savefig('{1}/decals/plots/pie_{0:}.eps'.format(survey,gzpath))
    plt.close()

    return None
Beispiel #9
0
def feature_comparison(savefig=False):

    # Plot the difference in vote fractions for the matched galaxies

    filename = '{0}/fits/decals_gz2_union.fits'.format(decals_path)

    data = fits.getdata(filename,1)

    # Map the columns
    matched_cols = [{'title':'smooth',                   'gz2':"gz2_t01_smooth_or_features_a01_smooth_fraction",             "decals":"decals_t00_smooth_or_features_a0_smooth_frac"},
                    {'title':'features/disk',            'gz2':"gz2_t01_smooth_or_features_a02_features_or_disk_fraction",   "decals":"decals_t00_smooth_or_features_a1_features_frac"},
                    {'title':'star',                     'gz2':"gz2_t01_smooth_or_features_a03_star_or_artifact_fraction",   "decals":"decals_t00_smooth_or_features_a2_artifact_frac"},
                    {'title':'edge-on',                  'gz2':"gz2_t02_edgeon_a04_yes_fraction",                            "decals":"decals_t01_disk_edge_on_a0_yes_frac"},
                    {'title':'not edge-on',              'gz2':"gz2_t02_edgeon_a05_no_fraction",                             "decals":"decals_t01_disk_edge_on_a1_no_frac"},
                    {'title':'bar',                      'gz2':"gz2_t03_bar_a06_bar_fraction",                               "decals":"decals_t02_bar_a0_bar_frac"},
                    {'title':'no bar',                   'gz2':"gz2_t03_bar_a07_no_bar_fraction",                            "decals":"decals_t02_bar_a1_no_bar_frac"},
                    {'title':'spiral',                   'gz2':"gz2_t04_spiral_a08_spiral_fraction",                         "decals":"decals_t03_spiral_a0_spiral_frac"},
                    {'title':'no spiral',                'gz2':"gz2_t04_spiral_a09_no_spiral_fraction",                      "decals":"decals_t03_spiral_a1_no_spiral_frac"},
                    {'title':'no bulge',                 'gz2':"gz2_t05_bulge_prominence_a10_no_bulge_fraction",             "decals":"decals_t04_bulge_prominence_a0_no_bulge_frac"},
                    {'title':'medium bulge',             'gz2':"gz2_t05_bulge_prominence_a11_just_noticeable_fraction",      "decals":"decals_t04_bulge_prominence_a1_obvious_frac"},
                    {'title':'obvious bulge',            'gz2':"gz2_t05_bulge_prominence_a12_obvious_fraction",              "decals":"decals_t04_bulge_prominence_a2_dominant_frac"},
                    {'title':'completely round',         'gz2':"gz2_t07_rounded_a16_completely_round_fraction",              "decals":"decals_t08_rounded_a0_completely_round_frac"},
                    {'title':'in between',               'gz2':"gz2_t07_rounded_a17_in_between_fraction",                    "decals":"decals_t08_rounded_a1_in_between_frac"},
                    {'title':'cigar shaped',             'gz2':"gz2_t07_rounded_a18_cigar_shaped_fraction",                  "decals":"decals_t08_rounded_a2_cigar_shaped_frac"},
                    {'title':'ring',                     'gz2':"gz2_t08_odd_feature_a19_ring_fraction",                      "decals":"decals_t10_odd_feature_x1_ring_frac"},
                    {'title':'lens/arc',                 'gz2':"gz2_t08_odd_feature_a20_lens_or_arc_fraction",               "decals":"decals_t10_odd_feature_x2_lens_frac"},
                    {'title':'irregular',                'gz2':"gz2_t08_odd_feature_a22_irregular_fraction",                 "decals":"decals_t10_odd_feature_x4_irregular_frac"},
                    {'title':'other',                    'gz2':"gz2_t08_odd_feature_a23_other_fraction",                     "decals":"decals_t10_odd_feature_x5_other_frac"},
                    {'title':'dust lane',                'gz2':"gz2_t08_odd_feature_a38_dust_lane_fraction",                 "decals":"decals_t10_odd_feature_x3_dustlane_frac"},
                    {'title':'rounded bulge',            'gz2':"gz2_t09_bulge_shape_a25_rounded_fraction",                   "decals":"decals_t07_bulge_shape_a0_rounded_frac"},
                    {'title':'boxy bulge',               'gz2':"gz2_t09_bulge_shape_a26_boxy_fraction",                      "decals":"decals_t07_bulge_shape_a1_boxy_frac"},
                    {'title':'no bulge',                 'gz2':"gz2_t09_bulge_shape_a27_no_bulge_fraction",                  "decals":"decals_t07_bulge_shape_a2_no_bulge_frac"},
                    {'title':'tight arms',               'gz2':"gz2_t10_arms_winding_a28_tight_fraction",                    "decals":"decals_t05_arms_winding_a0_tight_frac"},
                    {'title':'medium arms',              'gz2':"gz2_t10_arms_winding_a29_medium_fraction",                   "decals":"decals_t05_arms_winding_a1_medium_frac"},
                    {'title':'loose arms',               'gz2':"gz2_t10_arms_winding_a30_loose_fraction",                    "decals":"decals_t05_arms_winding_a2_loose_frac"},
                    {'title':'1 arm',                    'gz2':"gz2_t11_arms_number_a31_1_fraction",                         "decals":"decals_t06_arms_number_a0_1_frac"},
                    {'title':'2 arms',                   'gz2':"gz2_t11_arms_number_a32_2_fraction",                         "decals":"decals_t06_arms_number_a1_2_frac"},
                    {'title':'3 arms',                   'gz2':"gz2_t11_arms_number_a33_3_fraction",                         "decals":"decals_t06_arms_number_a2_3_frac"},
                    {'title':'4 arms',                   'gz2':"gz2_t11_arms_number_a34_4_fraction",                         "decals":"decals_t06_arms_number_a3_4_frac"},
                    {'title':'5+ arms',                  'gz2':"gz2_t11_arms_number_a36_more_than_4_fraction",               "decals":"decals_t06_arms_number_a4_more_than_4_frac"}]

    # Working, but still needs to sort for questions that are ACTUALLY ANSWERED. Lots of pileup at 0,0.
    columns = data.columns

    decals_fraccols,gz2_fraccols = [],[]
    for c in columns:
        colname = c.name
        if len(colname) > 6:
            if colname[-4:] == 'frac' and colname[:6] == 'decals':
                decals_fraccols.append(c)
        if len(colname) > 17:
            if colname[-8:] == 'fraction' and colname[-17:] != "weighted_fraction" and colname[:3] == 'gz2':
                gz2_fraccols.append(c)

    decals_votearr = data.from_columns(decals_fraccols)
    gz2_votearr = data.from_columns(gz2_fraccols)

    decals_tasks,gz2_tasks = [],[]
    for v in decals_votearr:
        e_decals,a_decals = plurality(np.array(list(v)),'decals') 
        decals_tasks.append(e_decals)
    for v in gz2_votearr:
        e_gz2,a_gz2 = plurality(np.array(list(v)),'gz2') 
        gz2_tasks.append(e_gz2)


    fig,axarr = plt.subplots(num=1,nrows=4,ncols=8,figsize=(16,10))
    nrows = axarr.shape[0]
    ncols = axarr.shape[1]

    def plot_features(ax,taskno,indices):
        plotind = indices.flatten()
        ax.hist2d(data[matched_cols[taskno]['gz2']][plotind],data[matched_cols[taskno]['decals']][plotind],bins=(20,20),range=[[0,1],[0,1]],norm=LogNorm(),cmap = cm.viridis)
        ax.plot([0,1],[0,1],linestyle='--',color='red',lw=2)
        ax.set_title(matched_cols[taskno]['title'],fontsize=8)
        ax.get_xaxis().set_ticks([])
        ax.get_yaxis().set_ticks([])
        ax.set_xlabel(r'$f_{GZ2}$',fontsize=10)
        ax.set_ylabel(r'$f_{DECaLS}$',fontsize=10)
        ax.set_aspect('equal')

    # Smooth/features
    answers_per_task = [3,2,2,2,3,3,5,3,3,5]
    match_tasks = [[ 0, 0],
                   [ 1, 1],
                   [ 2, 2],
                   [ 3, 3],
                   [ 4, 4],
                   [ 6, 8],
                   [ 7,10],
                   [ 8, 7],
                   [ 9, 5],
                   [10, 6]]

    n = 0
    for a,m in zip(answers_per_task,match_tasks):
        inds = np.array(([np.array(decals_tasks)[:,m[1]] == True])) & np.array(([np.array(gz2_tasks)[:,m[0]] == True]))
        for i in range(a):
            plot_features(axarr.ravel()[n],n,inds)
            n += 1

    '''
    for i in range(nrows):
        ax = axarr.ravel()[i*ncols]
        ax.set_ylabel(r'$f_{GZ2}$',fontsize=10)

    for i in range(ncols):
        ax = axarr.ravel()[(nrows - 1)*ncols + i]
        ax.set_xlabel(r'$f_{DECaLS}$',fontsize=10)
    '''

    for di in range((nrows*ncols)-n):
        fig.delaxes(axarr.ravel()[(nrows*ncols)-(di+1)])

    fig.tight_layout()
    if savefig:
        plt.savefig('{0}/decals_gz2_feature_comparison.pdf'.format(plot_path))
    else:
        plt.show()

    return None