def series_scatter(s1, s2, ax=None, ann='p', filename=None, **plot_args): fig, ax = init_ax(ax, figsize=(6, 4)) if 's' not in plot_args: plot_args['s'] = 75 if 'alpha' not in plot_args: plot_args['alpha'] = .5 ax.scatter(*match_series(s1, s2), **plot_args) ax.set_xlabel(s1.name) ax.set_ylabel(s2.name) if ann == 'p': ax.annotate('p = {0:.1e}'.format(Tests.spearman_pandas(s1, s2)['p']), (.95, -.02), xycoords='axes fraction', ha='right', va='bottom', size=14) if ann == 'fancy_p': ax.annotate('$p = {}$'.format(latex_float(Tests.spearman_pandas(s1, s2)['p'])), (.95, -.02), xycoords='axes fraction', ha='right', va='bottom', size=14) if filename is not None: fig.savefig(filename)
def series_scatter(s1, s2, ax=None, ann='p', filename=None, **plot_args): fig, ax = init_ax(ax, figsize=(6,4)) if 's' not in plot_args: plot_args['s'] = 75 if 'alpha' not in plot_args: plot_args['alpha'] = .5 ax.scatter(*match_series(s1, s2), **plot_args) ax.set_xlabel(s1.name) ax.set_ylabel(s2.name) if ann == 'p': ax.annotate('p = {0:.1e}'.format(Tests.spearman_pandas(s1, s2)['p']), (.95, -.02), xycoords='axes fraction', ha='right',va='bottom', size=14) if ann == 'fancy_p': ax.annotate('$p = {}$'.format(latex_float(Tests.spearman_pandas(s1, s2)['p'])), (.95, -.02), xycoords='axes fraction', ha='right',va='bottom', size=14) if filename is not None: fig.savefig(filename)
def violin_plot_pandas(bin_vec, real_vec, ann='p', order=None, ax=None, filename=None): """ http://pyinsci.blogspot.com/2009/09/violin-plot-with-matplotlib.html Wrapper around matplotlib's boxplot function to add violin profile. Inputs bin_vec: Series of labels real_vec: Series of measurements to be grouped according to bin_vec """ fig, ax = init_ax(ax) ax.set_ylabel(real_vec.name) ax.set_xlabel(bin_vec.name) bin_vec, real_vec = match_series(bin_vec, real_vec) try: if order is None: categories = bin_vec.value_counts().index else: categories = order _violin_plot(ax, [real_vec[bin_vec == num] for num in categories], pos=categories, bp=True) ax.set_xticklabels( [str(c) + '\n(n=%i)' % sum(bin_vec == c) for c in categories]) except: box_plot_pandas(bin_vec, real_vec, ax=ax) #if type(bin_vec.name) == str: # ax.set_title(str(bin_vec.name) + ' x ' + str(real_vec.name)) p_value = Stats.kruskal_pandas(bin_vec, real_vec)['p'] if ann == 'p_fancy': ax.annotate('$p = {}$'.format(latex_float(p_value)), (.95, -.02), xycoords='axes fraction', ha='right', va='bottom', size=14) if ann == 'p': ax.annotate('p = {0:.1e}'.format(p_value), (.95, .02), xycoords='axes fraction', ha='right', va='bottom', size=12) elif ann is not None: ax.annotate(ann, (.95, .02), xycoords='axes fraction', ha='right', va='bottom', size=12) if filename is not None: fig.savefig(filename) return
def paired_boxplot_tumor_normal(df, sig=True, cutoffs=[.01, .00001], order=None, ax=None): """ Draws a paired boxplot given a DataFrame with both tumor and normal samples on the index. '01' and '11' are hard-coded as the ids for tumor/normal. """ n = df.groupby(level=0).size() == 2 df = df.ix[n[n].index] if order is None: o = df.xs('11', level=1).median().order().index df = df[o[::-1]] else: df = df[order] l1 = list(df.xs('01', level=1).as_matrix().T) l2 = list(df.xs('11', level=1).as_matrix().T) boxes = [x for t in zip(l1, l2) for x in t] ax1, bp = paired_boxplot(boxes, ax) test = lambda v: Stats.ttest_rel(v.unstack()['01'], v.unstack()['11']) res = df.apply(test).T p = res.p if sig: pts = [(i * 3.5 + .5, 18) for i, n in enumerate(p) if n < cutoffs[1]] if len(pts) > 0: s1 = ax1.scatter(*zip(*pts), marker='$**$', label='$p<10^{-5}$', s=200) else: s1 = None pts = [(i * 3.5 + .5, 18) for i, n in enumerate(p) if (n < cutoffs[0]) and (n > cutoffs[1])] if len(pts) > 0: s2 = ax1.scatter(*zip(*pts), marker='$*$', label='$p<10^{-2}$', s=30) else: s2 = None ax1.legend(bp['boxes'][:2] + [s2, s1], ('Tumor', 'Normal', '$p<10^{-2}$', '$p<10^{-5}$'), loc='best', scatterpoints=1) else: ax1.legend(bp['boxes'][:2], ('Tumor', 'Normal'), loc='best') ax1.set_xticklabels(df.columns)
def violin_plot_pandas(bin_vec, real_vec, ann='p', order=None, ax=None, filename=None): """ http://pyinsci.blogspot.com/2009/09/violin-plot-with-matplotlib.html Wrapper around matplotlib's boxplot function to add violin profile. Inputs bin_vec: Series of labels real_vec: Series of measurements to be grouped according to bin_vec """ fig, ax = init_ax(ax) ax.set_ylabel(real_vec.name) ax.set_xlabel(bin_vec.name) bin_vec, real_vec = match_series(bin_vec, real_vec) try: if order is None: categories = bin_vec.value_counts().index else: categories = order _violin_plot(ax, [real_vec[bin_vec == num] for num in categories], pos=categories, bp=True) ax.set_xticklabels([str(c) + '\n(n=%i)' % sum(bin_vec == c) for c in categories]) except: box_plot_pandas(bin_vec, real_vec, ax=ax) #if type(bin_vec.name) == str: # ax.set_title(str(bin_vec.name) + ' x ' + str(real_vec.name)) p_value = Stats.kruskal_pandas(bin_vec, real_vec)['p'] if ann == 'p_fancy': ax.annotate('$p = {}$'.format(latex_float(p_value)), (.95, -.02), xycoords='axes fraction', ha='right', va='bottom', size=14) if ann == 'p': ax.annotate('p = {0:.1e}'.format(p_value), (.95, .02), xycoords='axes fraction', ha='right', va='bottom', size=12) elif ann is not None: ax.annotate(ann, (.95, .02), xycoords='axes fraction', ha='right', va='bottom', size=12) if filename is not None: fig.savefig(filename) return
def boxplot_panel(hit_vec, response_df): """ Draws a series of paired boxplots with the rows of the response_df split according to hit_vec. """ b = response_df.copy() b.columns = pd.MultiIndex.from_arrays([b.columns, hit_vec.ix[b.columns]]) b = b.T v1, v2 = hit_vec.unique() test = lambda v: Stats.anova( v.reset_index(level=1)[v.index.names[1]], v.reset_index(level=1)[v.name]) res = b.apply(test).T p = res.p.order() b = b.ix[:, p.index] l1 = list(b.xs(v1, level=1).as_matrix().T) l2 = list(b.xs(v2, level=1).as_matrix().T) boxes = [x for t in zip(l1, l2) for x in t] ax1, bp = paired_boxplot(boxes) y_lim = (response_df.T.quantile(.9).max()) * 1.2 pts = [(i * 3.5 + .5, y_lim) for i, n in enumerate(p) if n < .00001] if len(pts) > 0: s1 = ax1.scatter(*zip(*pts), marker='$**$', label='$p<10^{-5}$', s=200) else: s1 = None pts = [(i * 3.5 + .5, y_lim) for i, n in enumerate(p) if (n < .01) and (n > .00001)] if len(pts) > 0: s2 = ax1.scatter(*zip(*pts), marker='$*$', label='$p<10^{-2}$', s=30) else: s2 = None ax1.set_xticklabels(b.columns) ax1.legend(bp['boxes'][:2] + [s2, s1], (v1, v2, '$p<10^{-2}$', '$p<10^{-5}$'), loc='best', scatterpoints=1)
def boxplot_panel(hit_vec, response_df): """ Draws a series of paired boxplots with the rows of the response_df split according to hit_vec. """ b = response_df.copy() b.columns = pd.MultiIndex.from_arrays([b.columns, hit_vec.ix[b.columns]]) b = b.T v1, v2 = hit_vec.unique() test = lambda v: Stats.anova(v.reset_index(level=1)[v.index.names[1]], v.reset_index(level=1)[v.name]) res = b.apply(test).T p = res.p.order() b = b.ix[:, p.index] l1 = list(b.xs(v1, level=1).as_matrix().T) l2 = list(b.xs(v2, level=1).as_matrix().T) boxes = [x for t in zip(l1, l2) for x in t] ax1, bp = paired_boxplot(boxes) y_lim = (response_df.T.quantile(.9).max()) * 1.2 pts = [(i * 3.5 + .5, y_lim) for i, n in enumerate(p) if n < .00001] if len(pts) > 0: s1 = ax1.scatter(*zip(*pts), marker='$**$', label='$p<10^{-5}$', s=200) else: s1 = None pts = [(i * 3.5 + .5, y_lim) for i, n in enumerate(p) if (n < .01) and (n > .00001)] if len(pts) > 0: s2 = ax1.scatter(*zip(*pts), marker='$*$', label='$p<10^{-2}$', s=30) else: s2 = None ax1.set_xticklabels(b.columns) ax1.legend(bp['boxes'][:2] + [s2, s1], (v1, v2, '$p<10^{-2}$', '$p<10^{-5}$'), loc='best', scatterpoints=1)
def exp_change(s): ''' Calculates an anova for the change in expression across a variable on the second level of a MultiIndex. (eg. tumor/normal). ''' return Tests.anova(pd.Series(s.index.get_level_values(1), s.index), s)