def box_plot_pandas(bin_vec, real_vec, ax=None): """ Wrapper around matplotlib's boxplot function. Inputs bin_vec: Series of labels real_vec: Series of measurements to be grouped according to bin_vec """ _, ax = init_ax(ax) bin_vec, real_vec = match_series(bin_vec, real_vec) categories = bin_vec.value_counts().index data = [real_vec[bin_vec == num] for num in categories] bp = ax.boxplot(data, positions=range(len(categories)), widths=.3, patch_artist=True) if real_vec.name: ax.set_ylabel(real_vec.name) if bin_vec.name: ax.set_xlabel(bin_vec.name) [p.set_visible(False) for p in bp['fliers']] [p.set_visible(False) for p in bp['caps']] [p.set_visible(False) for p in bp['whiskers']] for p in bp['medians']: p.set_color(colors[0]) p.set_lw(3) p.set_alpha(.8) for i, p in enumerate(bp['boxes']): p.set_color('grey') p.set_lw(3) p.set_alpha(.7) if len(data[i]) < 3: p.set_alpha(0)
def fancy_raster(df, cluster=False, cmap=plt.cm.get_cmap('Spectral'), norm=None, ax=None): if cluster: d = sp.spatial.distance.pdist(df) D = sp.spatial.distance.squareform(d) Y = sp.cluster.hierarchy.linkage(D) Z = sp.cluster.hierarchy.dendrogram(Y, no_plot=True) order = Z['leaves'] df = df.ix[order, order] _, ax = init_ax(ax, figsize=(12,8)) img = ax.imshow(df, interpolation='Nearest', cmap=cmap, norm=norm) ax.set_yticks(range(len(df.index))) ax.set_yticklabels(df.index) ax.set_xticks(np.arange(len(df.columns))) ax.set_xticklabels(df.columns, rotation=360-90, ha='center'); ax.hlines(np.arange(len(df.index)-1)+.5, -.5, len(df.columns)-.5, color='white', lw=6) ax.vlines(np.arange(len(df.columns)-1)+.5, -.5, len(df.index)-.5, color='white', lw=6) if cluster: icoord = np.array(Z['icoord']) - np.array(Z['icoord']).min() icoord = icoord * ((len(Z['leaves']) - 1) / icoord.max()) dcoord = -1*np.array(Z['dcoord']) - .7 for i,z,c in zip(icoord, dcoord, Z['color_list']): ax.plot(i,z,color=c, lw=2, alpha=.8) ax.tick_params(axis='x', top='off') ax.set_frame_on(False) return img
def fischer_bar_chart(bin_vec, response_vec, ax=None, filename=None): fig, ax = init_ax(ax) t = pd.crosstab(bin_vec, response_vec) t.plot(kind='bar', ax=ax) if filename is not None: fig.savefig(filename) return fig
def fancy_raster(df, cluster=False, cmap=plt.cm.get_cmap('Spectral'), norm=None, ax=None): if cluster: d = sp.spatial.distance.pdist(df) D = sp.spatial.distance.squareform(d) Y = sp.cluster.hierarchy.linkage(D) Z = sp.cluster.hierarchy.dendrogram(Y, no_plot=True) order = Z['leaves'] df = df.ix[order, order] _, ax = init_ax(ax, figsize=(12, 8)) img = ax.imshow(df, interpolation='Nearest', cmap=cmap, norm=norm) ax.set_yticks(range(len(df.index))) ax.set_yticklabels(df.index) ax.set_xticks(np.arange(len(df.columns))) ax.set_xticklabels(df.columns, rotation=360 - 90, ha='center'); ax.hlines(np.arange(len(df.index) - 1) + .5, -.5, len(df.columns) - .5, color='white', lw=6) ax.vlines(np.arange(len(df.columns) - 1) + .5, -.5, len(df.index) - .5, color='white', lw=6) if cluster: icoord = np.array(Z['icoord']) - np.array(Z['icoord']).min() icoord = icoord * ((len(Z['leaves']) - 1) / icoord.max()) dcoord = -1 * np.array(Z['dcoord']) - .7 for i, z, c in zip(icoord, dcoord, Z['color_list']): ax.plot(i, z, color=c, lw=2, alpha=.8) ax.tick_params(axis='x', top='off') ax.set_frame_on(False) return img
def violin_plot_pandas(bin_vec, real_vec, ann='p', order=None, ax=None, filename=None): """ http://pyinsci.blogspot.com/2009/09/violin-plot-with-matplotlib.html Wrapper around matplotlib's boxplot function to add violin profile. Inputs bin_vec: Series of labels real_vec: Series of measurements to be grouped according to bin_vec """ fig, ax = init_ax(ax) ax.set_ylabel(real_vec.name) ax.set_xlabel(bin_vec.name) bin_vec, real_vec = match_series(bin_vec, real_vec) try: if order is None: categories = bin_vec.value_counts().index else: categories = order _violin_plot(ax, [real_vec[bin_vec == num] for num in categories], pos=categories, bp=True) ax.set_xticklabels( [str(c) + '\n(n=%i)' % sum(bin_vec == c) for c in categories]) except: box_plot_pandas(bin_vec, real_vec, ax=ax) #if type(bin_vec.name) == str: # ax.set_title(str(bin_vec.name) + ' x ' + str(real_vec.name)) p_value = Stats.kruskal_pandas(bin_vec, real_vec)['p'] if ann == 'p_fancy': ax.annotate('$p = {}$'.format(latex_float(p_value)), (.95, -.02), xycoords='axes fraction', ha='right', va='bottom', size=14) if ann == 'p': ax.annotate('p = {0:.1e}'.format(p_value), (.95, .02), xycoords='axes fraction', ha='right', va='bottom', size=12) elif ann is not None: ax.annotate(ann, (.95, .02), xycoords='axes fraction', ha='right', va='bottom', size=12) if filename is not None: fig.savefig(filename) return
def histo_compare(hit_vec, response_vec, ax=None): ''' Split response_vec by hit_vec and compared histograms. Also plots the kde of the whole response_vec. ''' fig, ax = init_ax(ax) kde1 = sp.stats.gaussian_kde(response_vec) x_eval = np.linspace(min(response_vec), max(response_vec), num=200) ax.plot(x_eval, kde1(x_eval), 'k-') miss, hit = split_a_by_b(response_vec, hit_vec) ax.hist(miss, bins=20, normed=True, alpha=.2, label='WT'); ax.hist(hit, bins=10, normed=True, alpha=.5, label='Mut'); ax.legend() return fig
def series_scatter(s1, s2, ax=None, ann='p', filename=None, **plot_args): fig, ax = init_ax(ax, figsize=(6,4)) if 's' not in plot_args: plot_args['s'] = 75 if 'alpha' not in plot_args: plot_args['alpha'] = .5 ax.scatter(*match_series(s1, s2), **plot_args) ax.set_xlabel(s1.name) ax.set_ylabel(s2.name) if ann == 'p': ax.annotate('p = {0:.1e}'.format(Tests.spearman_pandas(s1, s2)['p']), (.95, -.02), xycoords='axes fraction', ha='right',va='bottom', size=14) if ann == 'fancy_p': ax.annotate('$p = {}$'.format(latex_float(Tests.spearman_pandas(s1, s2)['p'])), (.95, -.02), xycoords='axes fraction', ha='right',va='bottom', size=14) if filename is not None: fig.savefig(filename)
def series_scatter(s1, s2, ax=None, ann='p', filename=None, **plot_args): fig, ax = init_ax(ax, figsize=(6, 4)) if 's' not in plot_args: plot_args['s'] = 75 if 'alpha' not in plot_args: plot_args['alpha'] = .5 ax.scatter(*match_series(s1, s2), **plot_args) ax.set_xlabel(s1.name) ax.set_ylabel(s2.name) if ann == 'p': ax.annotate('p = {0:.1e}'.format(Tests.spearman_pandas(s1, s2)['p']), (.95, -.02), xycoords='axes fraction', ha='right', va='bottom', size=14) if ann == 'fancy_p': ax.annotate('$p = {}$'.format(latex_float(Tests.spearman_pandas(s1, s2)['p'])), (.95, -.02), xycoords='axes fraction', ha='right', va='bottom', size=14) if filename is not None: fig.savefig(filename)
def violin_plot_pandas(bin_vec, real_vec, ann='p', order=None, ax=None, filename=None): """ http://pyinsci.blogspot.com/2009/09/violin-plot-with-matplotlib.html Wrapper around matplotlib's boxplot function to add violin profile. Inputs bin_vec: Series of labels real_vec: Series of measurements to be grouped according to bin_vec """ fig, ax = init_ax(ax) ax.set_ylabel(real_vec.name) ax.set_xlabel(bin_vec.name) bin_vec, real_vec = match_series(bin_vec, real_vec) try: if order is None: categories = bin_vec.value_counts().index else: categories = order _violin_plot(ax, [real_vec[bin_vec == num] for num in categories], pos=categories, bp=True) ax.set_xticklabels([str(c) + '\n(n=%i)' % sum(bin_vec == c) for c in categories]) except: box_plot_pandas(bin_vec, real_vec, ax=ax) #if type(bin_vec.name) == str: # ax.set_title(str(bin_vec.name) + ' x ' + str(real_vec.name)) p_value = Stats.kruskal_pandas(bin_vec, real_vec)['p'] if ann == 'p_fancy': ax.annotate('$p = {}$'.format(latex_float(p_value)), (.95, -.02), xycoords='axes fraction', ha='right', va='bottom', size=14) if ann == 'p': ax.annotate('p = {0:.1e}'.format(p_value), (.95, .02), xycoords='axes fraction', ha='right', va='bottom', size=12) elif ann is not None: ax.annotate(ann, (.95, .02), xycoords='axes fraction', ha='right', va='bottom', size=12) if filename is not None: fig.savefig(filename) return
def count_plot(vec, name=None, ax=None): _, ax = init_ax(ax) vec.value_counts().sort_index().plot(kind='bar', ax=ax) ax.set_ylabel('# of Patients') ax.set_xlabel(name if name is not None else vec.name)