def plot_results_many_gen(infile, outfiles): df = pickle.load(open(infile, 'r')) df['ari'] = df.apply(lambda row: rand.compute_adj_rand_index(row['true_assign'], irm.util.canonicalize_assignment(row['assign'])), axis=1) df['empirical_class_n'] = df.apply(lambda row : len(np.unique(row['assign'])), axis=1) df_cc = df[(df['dataset_name'] == 'class_compare_gen') & (df['model'] == 'ld')] a = df_cc.groupby(['dataset_name', 'jitter', 'nonzero_frac', 'class_n', 'side_n', 'seed', 'truth']).apply(lambda group: group.sort_index(by='score', ascending=False).head(1)) colors = {'distblock' : 'b', 'mixedblock' : 'r', 'bumpblock' : 'g'} f = pylab.figure(figsize=(4, 3)) ax = f.add_subplot(1, 1, 1) for g_i, (g_idx, g) in enumerate(a.groupby(['truth'])): ax.scatter(g.index.get_level_values('class_n') + 0.3*g_i, g['empirical_class_n'], c=colors[g_idx], edgecolor='none') ax.plot([1, 16], [1, 16], c='k') ax.set_xlabel("true class number") ax.set_ylabel("estimated class number") ax.set_xticks([1, 2, 4, 8, 16]) f.tight_layout() f.savefig(outfiles[0]) colors = {'distblock' : 'b', 'mixedblock' : 'r', 'bumpblock' : 'g'} offsets = {'distblock' : 0.0, 'mixedblock' : 1.0, 'bumpblock' : 2.0} f = pylab.figure(figsize=(4, 3)) ax = f.add_subplot(1, 1, 1) CLASS_SPACE = 3.5 WIDTH = 0.8 N = 0 for g_idx, g in a.groupby(['truth']): h = g.groupby(['class_n']).mean() herr = g.groupby(['class_n']).std() N= len(h) print "g_idx", g_idx, h['ari'] ax.bar(np.arange(N)*CLASS_SPACE + offsets[g_idx], h['ari'], width=WIDTH, color=colors[g_idx]) ax.errorbar(np.arange(N)*CLASS_SPACE + offsets[g_idx] + WIDTH/2, h['ari'], yerr= herr['ari'], capsize=0,elinewidth=4,ecolor='k', linewidth=0) ax.set_xlabel("true class number") ax.set_ylabel("adjusted rand index") ax.set_ylim(0, 1.0) ax.set_xticks(np.arange(N)*CLASS_SPACE + 1) ax.set_xticklabels([1, 2, 4, 8, 16]) f.tight_layout() f.savefig(outfiles[1])
def plot_results_many_gen(infile, outfiles): df = pickle.load(open(infile, 'r')) df['ari'] = df.apply(lambda row: rand.compute_adj_rand_index( row['true_assign'], irm.util.canonicalize_assignment(row['assign'])), axis=1) df['empirical_class_n'] = df.apply( lambda row: len(np.unique(row['assign'])), axis=1) df_cc = df[(df['dataset_name'] == 'class_compare_gen') & (df['model'] == 'ld')] a = df_cc.groupby([ 'dataset_name', 'jitter', 'nonzero_frac', 'class_n', 'side_n', 'seed', 'truth' ]).apply( lambda group: group.sort_index(by='score', ascending=False).head(1)) colors = {'distblock': 'b', 'mixedblock': 'r', 'bumpblock': 'g'} f = pylab.figure(figsize=(4, 3)) ax = f.add_subplot(1, 1, 1) for g_i, (g_idx, g) in enumerate(a.groupby(['truth'])): ax.scatter(g.index.get_level_values('class_n') + 0.3 * g_i, g['empirical_class_n'], c=colors[g_idx], edgecolor='none') ax.plot([1, 16], [1, 16], c='k') ax.set_xlabel("true class number") ax.set_ylabel("estimated class number") ax.set_xticks([1, 2, 4, 8, 16]) f.tight_layout() f.savefig(outfiles[0]) colors = {'distblock': 'b', 'mixedblock': 'r', 'bumpblock': 'g'} offsets = {'distblock': 0.0, 'mixedblock': 1.0, 'bumpblock': 2.0} f = pylab.figure(figsize=(4, 3)) ax = f.add_subplot(1, 1, 1) CLASS_SPACE = 3.5 WIDTH = 0.8 N = 0 for g_idx, g in a.groupby(['truth']): h = g.groupby(['class_n']).mean() herr = g.groupby(['class_n']).std() N = len(h) print "g_idx", g_idx, h['ari'] ax.bar(np.arange(N) * CLASS_SPACE + offsets[g_idx], h['ari'], width=WIDTH, color=colors[g_idx]) ax.errorbar(np.arange(N) * CLASS_SPACE + offsets[g_idx] + WIDTH / 2, h['ari'], yerr=herr['ari'], capsize=0, elinewidth=4, ecolor='k', linewidth=0) ax.set_xlabel("true class number") ax.set_ylabel("adjusted rand index") ax.set_ylim(0, 1.0) ax.set_xticks(np.arange(N) * CLASS_SPACE + 1) ax.set_xticklabels([1, 2, 4, 8, 16]) f.tight_layout() f.savefig(outfiles[1])
def plot_results(infile, outfiles): df = pickle.load(open(infile, 'r')) df['ari'] = df.apply(lambda row: rand.compute_adj_rand_index(row['true_assign'], irm.util.canonicalize_assignment(row['assign'])), axis=1) df['empirical_class_n'] = df.apply(lambda row : len(np.unique(row['assign'])), axis=1) for plot_files, dataset_name in zip(outfiles, PLOT_DATASETS): df_cc = df[df['dataset_name'] == dataset_name] a = df_cc.groupby(['dataset_name', 'jitter', 'model', 'nonzero_frac', 'class_n', 'side_n', 'seed', 'truth']).apply(lambda group: group.sort_index(by='score', ascending=False).head(1)) colors = {'bb' : 'b', 'ld' : 'r'} f = pylab.figure(figsize=(4, 3)) ax = f.add_subplot(1, 1, 1) labels = {'bb' : "conn only", 'ld' : "conn + dist"} for g_idx, g in a.groupby(['model']): ax.scatter(g.index.get_level_values('class_n'), g['empirical_class_n'], c=colors[g_idx], edgecolor='none', label= labels[g_idx]) ax.plot([1, 16], [1, 16], c='k', label="ground truth") ax.set_xlabel("true type number") ax.set_ylabel("estimated type number") ax.set_xticks([1, 2, 4, 8, 16]) ax.legend(loc="upper left", fontsize=10) ax.set_yticks([0, 70]) ax.set_ylim([-2, 70]) for tic in ax.yaxis.get_major_ticks(): tic.tick1On = tic.tick2On = False f.tight_layout() for tic in ax.xaxis.get_major_ticks(): tic.tick1On = tic.tick2On = False spines_to_remove = ['top', 'right'] for spine in spines_to_remove: ax.spines[spine].set_visible(False) f.savefig(plot_files[0]) colors = {'bb' : 'b', 'ld' : 'r'} offsets = {'bb' : 0.0, 'ld' : 1.0} f = pylab.figure(figsize=(4, 3)) ax = f.add_subplot(1, 1, 1) CLASS_SPACE = 2.5 WIDTH = 0.8 N = 0 for g_idx, g in a.groupby(['model']): h = g.groupby(['class_n']).mean() herr = g.groupby(['class_n']).std() N= len(h) ax.bar(np.arange(N)*CLASS_SPACE + offsets[g_idx], h['ari'], width=WIDTH, color=colors[g_idx]) ax.errorbar(np.arange(N)*CLASS_SPACE + offsets[g_idx] + WIDTH/2, h['ari'], yerr= herr['ari'], capsize=0,elinewidth=2, linewidth=0, ecolor='black') #ax.plot([1, 16], [1, 1], c='k') ax.set_xlabel("true type number") ax.set_ylabel("Cluster accuracy (ARI)") ax.set_ylim(0, 1.0) ax.set_yticks([0.0, 1.0]) ax.set_xticks(np.arange(N)*CLASS_SPACE + 1) ax.set_xticklabels([1, 2, 4, 8, 16]) for tic in ax.xaxis.get_major_ticks(): tic.tick1On = tic.tick2On = False spines_to_remove = ['top', 'right'] for spine in spines_to_remove: ax.spines[spine].set_visible(False) f.tight_layout() f.savefig(plot_files[1]) ## The future def cluster_var(row): assign = row['assign'] true_assign = row['true_assign'] print type(row['node_pos']) node_pos = row['node_pos'] def node_to_df(assign, nodes): return pandas.DataFrame({'cluster' : assign, 'x' : node_pos[:, 0], 'y' : node_pos[:, 1], 'z' : node_pos[:, 2]}) rdf1 = node_to_df(assign, node_pos) rdf1['truth'] = False rdf2 = node_to_df(true_assign, node_pos) rdf2['truth'] = True rdf = pandas.concat([rdf1, rdf2]) rdf[['x', 'y', 'z']] = rdf[['x', 'y', 'z']].astype(float) return rdf.groupby(['truth', 'cluster']).var() #a = df_cc.groupby(['dataset_name', 'jitter', 'model', 'nonzero_frac', 'class_n', # 'side_n', 'seed', 'truth']).apply(lambda group: group.sort_index(by='score', ascending=False).head(1)) df_vars = [] for rid, r in a.iterrows(): cv = cluster_var(r.to_dict()) cv['model'] = r['model'] cv['seed'] = r['seed'] cv['class_n'] = r['class_n'] df_vars.append(cv) df_vars = pandas.concat(df_vars) df_vars['truth'] = df_vars.index.get_level_values('truth') df_vars['std'] = np.sqrt(df_vars['x'] + df_vars['y']) f = pylab.figure(figsize=(4.0, 6.5)) bins = np.linspace(0, 3.5, 20) bin_width = (bins[1] - bins[0]) bar_width = bin_width/4.0 bar_space = bin_width/3. for i, class_n in enumerate([4, 8, 16]): ax = f.add_subplot(3, 1, i + 1) for model_i, (model, color) in enumerate([('bb', 'b'), ('ld', 'r'),]): df2 = df_vars[(df_vars['model'] == model) & (df_vars['class_n']==class_n) & (df_vars['truth']==False)] hist, _ = np.histogram(df2.dropna()['std'], bins=bins, density=True) ax.bar(bins[:-1] + model_i * bar_space, hist*bin_width, width=bar_width, color=color, label=model, linewidth=0.0) df2 = df_vars[(df_vars['model'] == model) & (df_vars['class_n']==class_n) & (df_vars['truth']==True)] hist, _ = np.histogram(df2.dropna()['std'], bins=bins, density=True) print "Histogram=", hist ax.bar(bins[:-1] + 2*bar_space, hist*bin_width, width=bar_width, color='k', label='truth', linewidth=0.0) ax.set_yticks([0.0, 1.0]) ax.set_ylim(0.0, 1.05) ax.set_ylabel("frac (class=%d)" % class_n) ax.set_xticks([0.0, 3.5]) if i == 0: handles, labels = ax.get_legend_handles_labels() ax.legend(handles, [ 'conn only', 'conn + dist', 'Ground Truth', ], loc='upper left', fontsize=12) if i < 2: ax.set_xticklabels([]) for tic in ax.xaxis.get_major_ticks(): tic.tick1On = tic.tick2On = False for tic in ax.yaxis.get_major_ticks(): tic.tick1On = tic.tick2On = False spines_to_remove = ['top', 'right'] for spine in spines_to_remove: ax.spines[spine].set_visible(False) ax.set_xlabel("size of clusters (2D std dev)") f.tight_layout() f.savefig(plot_files[2])
def plot_results(infile, outfiles): df = pickle.load(open(infile, 'r')) df['ari'] = df.apply(lambda row: rand.compute_adj_rand_index( row['true_assign'], irm.util.canonicalize_assignment(row['assign'])), axis=1) df['empirical_class_n'] = df.apply( lambda row: len(np.unique(row['assign'])), axis=1) for plot_files, dataset_name in zip(outfiles, PLOT_DATASETS): df_cc = df[df['dataset_name'] == dataset_name] a = df_cc.groupby([ 'dataset_name', 'jitter', 'model', 'nonzero_frac', 'class_n', 'side_n', 'seed', 'truth' ]).apply(lambda group: group.sort_index(by='score', ascending=False). head(1)) colors = {'bb': 'b', 'ld': 'r'} f = pylab.figure(figsize=(4, 3)) ax = f.add_subplot(1, 1, 1) labels = {'bb': "conn only", 'ld': "conn + dist"} for g_idx, g in a.groupby(['model']): ax.scatter(g.index.get_level_values('class_n'), g['empirical_class_n'], c=colors[g_idx], edgecolor='none', label=labels[g_idx]) ax.plot([1, 16], [1, 16], c='k', label="ground truth") ax.set_xlabel("true type number") ax.set_ylabel("estimated type number") ax.set_xticks([1, 2, 4, 8, 16]) ax.legend(loc="upper left", fontsize=10) ax.set_yticks([0, 70]) ax.set_ylim([-2, 70]) for tic in ax.yaxis.get_major_ticks(): tic.tick1On = tic.tick2On = False f.tight_layout() for tic in ax.xaxis.get_major_ticks(): tic.tick1On = tic.tick2On = False spines_to_remove = ['top', 'right'] for spine in spines_to_remove: ax.spines[spine].set_visible(False) f.savefig(plot_files[0]) colors = {'bb': 'b', 'ld': 'r'} offsets = {'bb': 0.0, 'ld': 1.0} f = pylab.figure(figsize=(4, 3)) ax = f.add_subplot(1, 1, 1) CLASS_SPACE = 2.5 WIDTH = 0.8 N = 0 for g_idx, g in a.groupby(['model']): h = g.groupby(['class_n']).mean() herr = g.groupby(['class_n']).std() N = len(h) ax.bar(np.arange(N) * CLASS_SPACE + offsets[g_idx], h['ari'], width=WIDTH, color=colors[g_idx]) ax.errorbar(np.arange(N) * CLASS_SPACE + offsets[g_idx] + WIDTH / 2, h['ari'], yerr=herr['ari'], capsize=0, elinewidth=2, linewidth=0, ecolor='black') #ax.plot([1, 16], [1, 1], c='k') ax.set_xlabel("true type number") ax.set_ylabel("Cluster accuracy (ARI)") ax.set_ylim(0, 1.0) ax.set_yticks([0.0, 1.0]) ax.set_xticks(np.arange(N) * CLASS_SPACE + 1) ax.set_xticklabels([1, 2, 4, 8, 16]) for tic in ax.xaxis.get_major_ticks(): tic.tick1On = tic.tick2On = False spines_to_remove = ['top', 'right'] for spine in spines_to_remove: ax.spines[spine].set_visible(False) f.tight_layout() f.savefig(plot_files[1]) ## The future def cluster_var(row): assign = row['assign'] true_assign = row['true_assign'] print type(row['node_pos']) node_pos = row['node_pos'] def node_to_df(assign, nodes): return pandas.DataFrame({ 'cluster': assign, 'x': node_pos[:, 0], 'y': node_pos[:, 1], 'z': node_pos[:, 2] }) rdf1 = node_to_df(assign, node_pos) rdf1['truth'] = False rdf2 = node_to_df(true_assign, node_pos) rdf2['truth'] = True rdf = pandas.concat([rdf1, rdf2]) rdf[['x', 'y', 'z']] = rdf[['x', 'y', 'z']].astype(float) return rdf.groupby(['truth', 'cluster']).var() #a = df_cc.groupby(['dataset_name', 'jitter', 'model', 'nonzero_frac', 'class_n', # 'side_n', 'seed', 'truth']).apply(lambda group: group.sort_index(by='score', ascending=False).head(1)) df_vars = [] for rid, r in a.iterrows(): cv = cluster_var(r.to_dict()) cv['model'] = r['model'] cv['seed'] = r['seed'] cv['class_n'] = r['class_n'] df_vars.append(cv) df_vars = pandas.concat(df_vars) df_vars['truth'] = df_vars.index.get_level_values('truth') df_vars['std'] = np.sqrt(df_vars['x'] + df_vars['y']) f = pylab.figure(figsize=(4.0, 6.5)) bins = np.linspace(0, 3.5, 20) bin_width = (bins[1] - bins[0]) bar_width = bin_width / 4.0 bar_space = bin_width / 3. for i, class_n in enumerate([4, 8, 16]): ax = f.add_subplot(3, 1, i + 1) for model_i, (model, color) in enumerate([ ('bb', 'b'), ('ld', 'r'), ]): df2 = df_vars[(df_vars['model'] == model) & (df_vars['class_n'] == class_n) & (df_vars['truth'] == False)] hist, _ = np.histogram(df2.dropna()['std'], bins=bins, density=True) ax.bar(bins[:-1] + model_i * bar_space, hist * bin_width, width=bar_width, color=color, label=model, linewidth=0.0) df2 = df_vars[(df_vars['model'] == model) & (df_vars['class_n'] == class_n) & (df_vars['truth'] == True)] hist, _ = np.histogram(df2.dropna()['std'], bins=bins, density=True) print "Histogram=", hist ax.bar(bins[:-1] + 2 * bar_space, hist * bin_width, width=bar_width, color='k', label='truth', linewidth=0.0) ax.set_yticks([0.0, 1.0]) ax.set_ylim(0.0, 1.05) ax.set_ylabel("frac (class=%d)" % class_n) ax.set_xticks([0.0, 3.5]) if i == 0: handles, labels = ax.get_legend_handles_labels() ax.legend(handles, [ 'conn only', 'conn + dist', 'Ground Truth', ], loc='upper left', fontsize=12) if i < 2: ax.set_xticklabels([]) for tic in ax.xaxis.get_major_ticks(): tic.tick1On = tic.tick2On = False for tic in ax.yaxis.get_major_ticks(): tic.tick1On = tic.tick2On = False spines_to_remove = ['top', 'right'] for spine in spines_to_remove: ax.spines[spine].set_visible(False) ax.set_xlabel("size of clusters (2D std dev)") f.tight_layout() f.savefig(plot_files[2])