예제 #1
0
def plot_results_many_gen(infile, outfiles):
    df = pickle.load(open(infile, 'r'))
    
    df['ari'] = df.apply(lambda row: rand.compute_adj_rand_index(row['true_assign'], 
                                                                 irm.util.canonicalize_assignment(row['assign'])), axis=1)
    df['empirical_class_n'] = df.apply(lambda row : len(np.unique(row['assign'])), axis=1)

    df_cc = df[(df['dataset_name'] == 'class_compare_gen') & (df['model'] == 'ld')]

    a = df_cc.groupby(['dataset_name', 'jitter', 'nonzero_frac', 'class_n', 
                       'side_n', 'seed', 'truth']).apply(lambda group: group.sort_index(by='score', ascending=False).head(1))

    colors = {'distblock' : 'b', 
              'mixedblock' : 'r', 
              'bumpblock' : 'g'}
    f = pylab.figure(figsize=(4, 3))
    ax = f.add_subplot(1, 1, 1)

    for g_i, (g_idx, g) in enumerate(a.groupby(['truth'])):
        ax.scatter(g.index.get_level_values('class_n') + 0.3*g_i, 
                   g['empirical_class_n'], c=colors[g_idx],
                   edgecolor='none')
    ax.plot([1, 16], [1, 16], c='k')
    ax.set_xlabel("true class number")
    ax.set_ylabel("estimated class number")
    ax.set_xticks([1, 2, 4, 8, 16])
    f.tight_layout()
    f.savefig(outfiles[0])


    colors = {'distblock' : 'b', 
              'mixedblock' : 'r', 
              'bumpblock' : 'g'}

    offsets = {'distblock' : 0.0,
               'mixedblock' : 1.0, 
               'bumpblock' : 2.0}

    f = pylab.figure(figsize=(4, 3))
    ax = f.add_subplot(1, 1, 1)
    CLASS_SPACE = 3.5
    WIDTH = 0.8

    N = 0
    for g_idx, g in a.groupby(['truth']):
        h =  g.groupby(['class_n']).mean()
        herr = g.groupby(['class_n']).std()
        N= len(h)
        print "g_idx", g_idx, h['ari']
        ax.bar(np.arange(N)*CLASS_SPACE + offsets[g_idx], h['ari'], width=WIDTH, 
               color=colors[g_idx])
        ax.errorbar(np.arange(N)*CLASS_SPACE + offsets[g_idx] + WIDTH/2, 
                    h['ari'], yerr= herr['ari'], 
                    capsize=0,elinewidth=4,ecolor='k', linewidth=0)

    ax.set_xlabel("true class number")
    ax.set_ylabel("adjusted rand index")
    ax.set_ylim(0, 1.0)
    ax.set_xticks(np.arange(N)*CLASS_SPACE + 1)
    ax.set_xticklabels([1, 2, 4, 8, 16])
    f.tight_layout()
    f.savefig(outfiles[1])
예제 #2
0
def plot_results_many_gen(infile, outfiles):
    df = pickle.load(open(infile, 'r'))

    df['ari'] = df.apply(lambda row: rand.compute_adj_rand_index(
        row['true_assign'], irm.util.canonicalize_assignment(row['assign'])),
                         axis=1)
    df['empirical_class_n'] = df.apply(
        lambda row: len(np.unique(row['assign'])), axis=1)

    df_cc = df[(df['dataset_name'] == 'class_compare_gen')
               & (df['model'] == 'ld')]

    a = df_cc.groupby([
        'dataset_name', 'jitter', 'nonzero_frac', 'class_n', 'side_n', 'seed',
        'truth'
    ]).apply(
        lambda group: group.sort_index(by='score', ascending=False).head(1))

    colors = {'distblock': 'b', 'mixedblock': 'r', 'bumpblock': 'g'}
    f = pylab.figure(figsize=(4, 3))
    ax = f.add_subplot(1, 1, 1)

    for g_i, (g_idx, g) in enumerate(a.groupby(['truth'])):
        ax.scatter(g.index.get_level_values('class_n') + 0.3 * g_i,
                   g['empirical_class_n'],
                   c=colors[g_idx],
                   edgecolor='none')
    ax.plot([1, 16], [1, 16], c='k')
    ax.set_xlabel("true class number")
    ax.set_ylabel("estimated class number")
    ax.set_xticks([1, 2, 4, 8, 16])
    f.tight_layout()
    f.savefig(outfiles[0])

    colors = {'distblock': 'b', 'mixedblock': 'r', 'bumpblock': 'g'}

    offsets = {'distblock': 0.0, 'mixedblock': 1.0, 'bumpblock': 2.0}

    f = pylab.figure(figsize=(4, 3))
    ax = f.add_subplot(1, 1, 1)
    CLASS_SPACE = 3.5
    WIDTH = 0.8

    N = 0
    for g_idx, g in a.groupby(['truth']):
        h = g.groupby(['class_n']).mean()
        herr = g.groupby(['class_n']).std()
        N = len(h)
        print "g_idx", g_idx, h['ari']
        ax.bar(np.arange(N) * CLASS_SPACE + offsets[g_idx],
               h['ari'],
               width=WIDTH,
               color=colors[g_idx])
        ax.errorbar(np.arange(N) * CLASS_SPACE + offsets[g_idx] + WIDTH / 2,
                    h['ari'],
                    yerr=herr['ari'],
                    capsize=0,
                    elinewidth=4,
                    ecolor='k',
                    linewidth=0)

    ax.set_xlabel("true class number")
    ax.set_ylabel("adjusted rand index")
    ax.set_ylim(0, 1.0)
    ax.set_xticks(np.arange(N) * CLASS_SPACE + 1)
    ax.set_xticklabels([1, 2, 4, 8, 16])
    f.tight_layout()
    f.savefig(outfiles[1])
예제 #3
0
def plot_results(infile, outfiles):
    df = pickle.load(open(infile, 'r'))
    
    df['ari'] = df.apply(lambda row: rand.compute_adj_rand_index(row['true_assign'], 
                                                                 irm.util.canonicalize_assignment(row['assign'])), axis=1)
    df['empirical_class_n'] = df.apply(lambda row : len(np.unique(row['assign'])), axis=1)

    for plot_files, dataset_name in zip(outfiles, PLOT_DATASETS):
        
        df_cc = df[df['dataset_name'] == dataset_name]

        a = df_cc.groupby(['dataset_name', 'jitter', 'model', 'nonzero_frac', 'class_n', 
                           'side_n', 'seed', 'truth']).apply(lambda group: group.sort_index(by='score', ascending=False).head(1))
        colors = {'bb' : 'b', 
                   'ld' : 'r'}
        f = pylab.figure(figsize=(4, 3))
        ax = f.add_subplot(1, 1, 1)
        labels = {'bb' : "conn only", 
                  'ld' : "conn + dist"}
        for g_idx, g in a.groupby(['model']):
            ax.scatter(g.index.get_level_values('class_n'), g['empirical_class_n'], c=colors[g_idx],
                          edgecolor='none', label= labels[g_idx])
        ax.plot([1, 16], [1, 16], c='k', label="ground truth")
        ax.set_xlabel("true type number")
        ax.set_ylabel("estimated type number")
        ax.set_xticks([1, 2, 4, 8, 16])
        ax.legend(loc="upper left", fontsize=10)
        ax.set_yticks([0, 70])
        ax.set_ylim([-2, 70])
        for tic in ax.yaxis.get_major_ticks():
            tic.tick1On = tic.tick2On = False

        f.tight_layout()
        for tic in ax.xaxis.get_major_ticks():
            tic.tick1On = tic.tick2On = False
        spines_to_remove = ['top', 'right']
        for spine in spines_to_remove:
            ax.spines[spine].set_visible(False)

        f.savefig(plot_files[0])


        colors = {'bb' : 'b', 
                   'ld' : 'r'}
        offsets = {'bb' : 0.0, 
                   'ld' : 1.0}
        f = pylab.figure(figsize=(4, 3))
        ax = f.add_subplot(1, 1, 1)
        CLASS_SPACE = 2.5
        WIDTH = 0.8

        N = 0
        for g_idx, g in a.groupby(['model']):
            h =  g.groupby(['class_n']).mean()
            herr = g.groupby(['class_n']).std()
            N= len(h)
            ax.bar(np.arange(N)*CLASS_SPACE + offsets[g_idx], h['ari'], width=WIDTH, 
                    color=colors[g_idx])
            ax.errorbar(np.arange(N)*CLASS_SPACE + offsets[g_idx] + WIDTH/2, 
                        h['ari'], yerr= herr['ari'], capsize=0,elinewidth=2, linewidth=0, ecolor='black')
        #ax.plot([1, 16], [1, 1], c='k')
        ax.set_xlabel("true type number")
        ax.set_ylabel("Cluster accuracy (ARI)")
        ax.set_ylim(0, 1.0)
        ax.set_yticks([0.0, 1.0])
        ax.set_xticks(np.arange(N)*CLASS_SPACE + 1)
        ax.set_xticklabels([1, 2, 4, 8, 16])
        for tic in ax.xaxis.get_major_ticks():
            tic.tick1On = tic.tick2On = False
        spines_to_remove = ['top', 'right']
        for spine in spines_to_remove:
            ax.spines[spine].set_visible(False)

        f.tight_layout()
        f.savefig(plot_files[1])


        ## The future 

        def cluster_var(row):
            assign = row['assign']
            true_assign = row['true_assign']
            print type(row['node_pos'])
            node_pos = row['node_pos']
            def node_to_df(assign, nodes):
                return pandas.DataFrame({'cluster' : assign, 'x' : node_pos[:, 0], 
                                    'y' : node_pos[:, 1], 'z' : node_pos[:, 2]})

            rdf1 = node_to_df(assign, node_pos)
            rdf1['truth'] = False

            rdf2 = node_to_df(true_assign, node_pos)
            rdf2['truth'] = True

            rdf = pandas.concat([rdf1, rdf2])
            rdf[['x', 'y', 'z']] = rdf[['x', 'y', 'z']].astype(float)


            return rdf.groupby(['truth', 'cluster']).var()

        #a = df_cc.groupby(['dataset_name', 'jitter', 'model', 'nonzero_frac', 'class_n', 
        #                    'side_n', 'seed', 'truth']).apply(lambda group: group.sort_index(by='score', ascending=False).head(1))
        df_vars = []
        for rid, r in a.iterrows():
            cv = cluster_var(r.to_dict())
            cv['model'] = r['model']
            cv['seed'] = r['seed']
            cv['class_n'] = r['class_n']
            df_vars.append(cv)
        df_vars = pandas.concat(df_vars)
        df_vars['truth'] = df_vars.index.get_level_values('truth')
        df_vars['std'] = np.sqrt(df_vars['x'] + df_vars['y'])

        f = pylab.figure(figsize=(4.0, 6.5))
        bins = np.linspace(0, 3.5, 20)
        
        bin_width = (bins[1] - bins[0])
        bar_width = bin_width/4.0
        bar_space = bin_width/3.

        for i, class_n in enumerate([4, 8, 16]):
            ax = f.add_subplot(3, 1, i + 1)

            for model_i, (model, color) in enumerate([('bb', 'b'), 
                                                      ('ld', 'r'),]):
                df2 = df_vars[(df_vars['model'] == model) & (df_vars['class_n']==class_n) & (df_vars['truth']==False)]
                hist, _ = np.histogram(df2.dropna()['std'], bins=bins, density=True)
                ax.bar(bins[:-1] + model_i * bar_space, hist*bin_width, width=bar_width, color=color, 
                       label=model, linewidth=0.0)
                       

            df2 = df_vars[(df_vars['model'] == model) & (df_vars['class_n']==class_n) & (df_vars['truth']==True)]
            hist, _ = np.histogram(df2.dropna()['std'], bins=bins, density=True)
            print "Histogram=", hist
            ax.bar(bins[:-1] + 2*bar_space, hist*bin_width,
                   width=bar_width, color='k', 
                   label='truth',  linewidth=0.0)
            ax.set_yticks([0.0, 1.0])
            ax.set_ylim(0.0, 1.05)
            ax.set_ylabel("frac (class=%d)" % class_n)
            ax.set_xticks([0.0, 3.5])
            if i == 0:
                handles, labels = ax.get_legend_handles_labels()
                ax.legend(handles, [   
                    'conn only', 
                    'conn + dist', 
                    'Ground Truth', 
                                    ], 
                          loc='upper left', 
                          fontsize=12)
            if i < 2:
                ax.set_xticklabels([])

            for tic in ax.xaxis.get_major_ticks():
                tic.tick1On = tic.tick2On = False
            for tic in ax.yaxis.get_major_ticks():
                tic.tick1On = tic.tick2On = False
            spines_to_remove = ['top', 'right']
            for spine in spines_to_remove:
                ax.spines[spine].set_visible(False)


        ax.set_xlabel("size of clusters (2D std dev)")
        f.tight_layout()
        f.savefig(plot_files[2])
예제 #4
0
def plot_results(infile, outfiles):
    df = pickle.load(open(infile, 'r'))

    df['ari'] = df.apply(lambda row: rand.compute_adj_rand_index(
        row['true_assign'], irm.util.canonicalize_assignment(row['assign'])),
                         axis=1)
    df['empirical_class_n'] = df.apply(
        lambda row: len(np.unique(row['assign'])), axis=1)

    for plot_files, dataset_name in zip(outfiles, PLOT_DATASETS):

        df_cc = df[df['dataset_name'] == dataset_name]

        a = df_cc.groupby([
            'dataset_name', 'jitter', 'model', 'nonzero_frac', 'class_n',
            'side_n', 'seed', 'truth'
        ]).apply(lambda group: group.sort_index(by='score', ascending=False).
                 head(1))
        colors = {'bb': 'b', 'ld': 'r'}
        f = pylab.figure(figsize=(4, 3))
        ax = f.add_subplot(1, 1, 1)
        labels = {'bb': "conn only", 'ld': "conn + dist"}
        for g_idx, g in a.groupby(['model']):
            ax.scatter(g.index.get_level_values('class_n'),
                       g['empirical_class_n'],
                       c=colors[g_idx],
                       edgecolor='none',
                       label=labels[g_idx])
        ax.plot([1, 16], [1, 16], c='k', label="ground truth")
        ax.set_xlabel("true type number")
        ax.set_ylabel("estimated type number")
        ax.set_xticks([1, 2, 4, 8, 16])
        ax.legend(loc="upper left", fontsize=10)
        ax.set_yticks([0, 70])
        ax.set_ylim([-2, 70])
        for tic in ax.yaxis.get_major_ticks():
            tic.tick1On = tic.tick2On = False

        f.tight_layout()
        for tic in ax.xaxis.get_major_ticks():
            tic.tick1On = tic.tick2On = False
        spines_to_remove = ['top', 'right']
        for spine in spines_to_remove:
            ax.spines[spine].set_visible(False)

        f.savefig(plot_files[0])

        colors = {'bb': 'b', 'ld': 'r'}
        offsets = {'bb': 0.0, 'ld': 1.0}
        f = pylab.figure(figsize=(4, 3))
        ax = f.add_subplot(1, 1, 1)
        CLASS_SPACE = 2.5
        WIDTH = 0.8

        N = 0
        for g_idx, g in a.groupby(['model']):
            h = g.groupby(['class_n']).mean()
            herr = g.groupby(['class_n']).std()
            N = len(h)
            ax.bar(np.arange(N) * CLASS_SPACE + offsets[g_idx],
                   h['ari'],
                   width=WIDTH,
                   color=colors[g_idx])
            ax.errorbar(np.arange(N) * CLASS_SPACE + offsets[g_idx] +
                        WIDTH / 2,
                        h['ari'],
                        yerr=herr['ari'],
                        capsize=0,
                        elinewidth=2,
                        linewidth=0,
                        ecolor='black')
        #ax.plot([1, 16], [1, 1], c='k')
        ax.set_xlabel("true type number")
        ax.set_ylabel("Cluster accuracy (ARI)")
        ax.set_ylim(0, 1.0)
        ax.set_yticks([0.0, 1.0])
        ax.set_xticks(np.arange(N) * CLASS_SPACE + 1)
        ax.set_xticklabels([1, 2, 4, 8, 16])
        for tic in ax.xaxis.get_major_ticks():
            tic.tick1On = tic.tick2On = False
        spines_to_remove = ['top', 'right']
        for spine in spines_to_remove:
            ax.spines[spine].set_visible(False)

        f.tight_layout()
        f.savefig(plot_files[1])

        ## The future

        def cluster_var(row):
            assign = row['assign']
            true_assign = row['true_assign']
            print type(row['node_pos'])
            node_pos = row['node_pos']

            def node_to_df(assign, nodes):
                return pandas.DataFrame({
                    'cluster': assign,
                    'x': node_pos[:, 0],
                    'y': node_pos[:, 1],
                    'z': node_pos[:, 2]
                })

            rdf1 = node_to_df(assign, node_pos)
            rdf1['truth'] = False

            rdf2 = node_to_df(true_assign, node_pos)
            rdf2['truth'] = True

            rdf = pandas.concat([rdf1, rdf2])
            rdf[['x', 'y', 'z']] = rdf[['x', 'y', 'z']].astype(float)

            return rdf.groupby(['truth', 'cluster']).var()

        #a = df_cc.groupby(['dataset_name', 'jitter', 'model', 'nonzero_frac', 'class_n',
        #                    'side_n', 'seed', 'truth']).apply(lambda group: group.sort_index(by='score', ascending=False).head(1))
        df_vars = []
        for rid, r in a.iterrows():
            cv = cluster_var(r.to_dict())
            cv['model'] = r['model']
            cv['seed'] = r['seed']
            cv['class_n'] = r['class_n']
            df_vars.append(cv)
        df_vars = pandas.concat(df_vars)
        df_vars['truth'] = df_vars.index.get_level_values('truth')
        df_vars['std'] = np.sqrt(df_vars['x'] + df_vars['y'])

        f = pylab.figure(figsize=(4.0, 6.5))
        bins = np.linspace(0, 3.5, 20)

        bin_width = (bins[1] - bins[0])
        bar_width = bin_width / 4.0
        bar_space = bin_width / 3.

        for i, class_n in enumerate([4, 8, 16]):
            ax = f.add_subplot(3, 1, i + 1)

            for model_i, (model, color) in enumerate([
                ('bb', 'b'),
                ('ld', 'r'),
            ]):
                df2 = df_vars[(df_vars['model'] == model)
                              & (df_vars['class_n'] == class_n) &
                              (df_vars['truth'] == False)]
                hist, _ = np.histogram(df2.dropna()['std'],
                                       bins=bins,
                                       density=True)
                ax.bar(bins[:-1] + model_i * bar_space,
                       hist * bin_width,
                       width=bar_width,
                       color=color,
                       label=model,
                       linewidth=0.0)

            df2 = df_vars[(df_vars['model'] == model)
                          & (df_vars['class_n'] == class_n) &
                          (df_vars['truth'] == True)]
            hist, _ = np.histogram(df2.dropna()['std'],
                                   bins=bins,
                                   density=True)
            print "Histogram=", hist
            ax.bar(bins[:-1] + 2 * bar_space,
                   hist * bin_width,
                   width=bar_width,
                   color='k',
                   label='truth',
                   linewidth=0.0)
            ax.set_yticks([0.0, 1.0])
            ax.set_ylim(0.0, 1.05)
            ax.set_ylabel("frac (class=%d)" % class_n)
            ax.set_xticks([0.0, 3.5])
            if i == 0:
                handles, labels = ax.get_legend_handles_labels()
                ax.legend(handles, [
                    'conn only',
                    'conn + dist',
                    'Ground Truth',
                ],
                          loc='upper left',
                          fontsize=12)
            if i < 2:
                ax.set_xticklabels([])

            for tic in ax.xaxis.get_major_ticks():
                tic.tick1On = tic.tick2On = False
            for tic in ax.yaxis.get_major_ticks():
                tic.tick1On = tic.tick2On = False
            spines_to_remove = ['top', 'right']
            for spine in spines_to_remove:
                ax.spines[spine].set_visible(False)

        ax.set_xlabel("size of clusters (2D std dev)")
        f.tight_layout()
        f.savefig(plot_files[2])