def boxplot_panel(hit_vec, response_df): """ Draws a series of paired boxplots with the rows of the response_df split according to hit_vec. """ b = response_df.copy() b.columns = pd.MultiIndex.from_arrays([b.columns, hit_vec.ix[b.columns]]) b = b.T v1, v2 = hit_vec.unique() test = lambda v: Stats.anova( v.reset_index(level=1)[v.index.names[1]], v.reset_index(level=1)[v.name]) res = b.apply(test).T p = res.p.order() b = b.ix[:, p.index] l1 = list(b.xs(v1, level=1).as_matrix().T) l2 = list(b.xs(v2, level=1).as_matrix().T) boxes = [x for t in zip(l1, l2) for x in t] ax1, bp = paired_boxplot(boxes) y_lim = (response_df.T.quantile(.9).max()) * 1.2 pts = [(i * 3.5 + .5, y_lim) for i, n in enumerate(p) if n < .00001] if len(pts) > 0: s1 = ax1.scatter(*zip(*pts), marker='$**$', label='$p<10^{-5}$', s=200) else: s1 = None pts = [(i * 3.5 + .5, y_lim) for i, n in enumerate(p) if (n < .01) and (n > .00001)] if len(pts) > 0: s2 = ax1.scatter(*zip(*pts), marker='$*$', label='$p<10^{-2}$', s=30) else: s2 = None ax1.set_xticklabels(b.columns) ax1.legend(bp['boxes'][:2] + [s2, s1], (v1, v2, '$p<10^{-2}$', '$p<10^{-5}$'), loc='best', scatterpoints=1)
def boxplot_panel(hit_vec, response_df): """ Draws a series of paired boxplots with the rows of the response_df split according to hit_vec. """ b = response_df.copy() b.columns = pd.MultiIndex.from_arrays([b.columns, hit_vec.ix[b.columns]]) b = b.T v1, v2 = hit_vec.unique() test = lambda v: Stats.anova(v.reset_index(level=1)[v.index.names[1]], v.reset_index(level=1)[v.name]) res = b.apply(test).T p = res.p.order() b = b.ix[:, p.index] l1 = list(b.xs(v1, level=1).as_matrix().T) l2 = list(b.xs(v2, level=1).as_matrix().T) boxes = [x for t in zip(l1, l2) for x in t] ax1, bp = paired_boxplot(boxes) y_lim = (response_df.T.quantile(.9).max()) * 1.2 pts = [(i * 3.5 + .5, y_lim) for i, n in enumerate(p) if n < .00001] if len(pts) > 0: s1 = ax1.scatter(*zip(*pts), marker='$**$', label='$p<10^{-5}$', s=200) else: s1 = None pts = [(i * 3.5 + .5, y_lim) for i, n in enumerate(p) if (n < .01) and (n > .00001)] if len(pts) > 0: s2 = ax1.scatter(*zip(*pts), marker='$*$', label='$p<10^{-2}$', s=30) else: s2 = None ax1.set_xticklabels(b.columns) ax1.legend(bp['boxes'][:2] + [s2, s1], (v1, v2, '$p<10^{-2}$', '$p<10^{-5}$'), loc='best', scatterpoints=1)
def exp_change(s): ''' Calculates an anova for the change in expression across a variable on the second level of a MultiIndex. (eg. tumor/normal). ''' return Tests.anova(pd.Series(s.index.get_level_values(1), s.index), s)