Esempio n. 1
0
def series_scatter(s1, s2, ax=None, ann='p', filename=None, **plot_args):
    fig, ax = init_ax(ax, figsize=(6, 4))
    if 's' not in plot_args:
        plot_args['s'] = 75
    if 'alpha' not in plot_args:
        plot_args['alpha'] = .5
    ax.scatter(*match_series(s1, s2), **plot_args)
    ax.set_xlabel(s1.name)
    ax.set_ylabel(s2.name)
    if ann == 'p':
        ax.annotate('p = {0:.1e}'.format(Tests.spearman_pandas(s1, s2)['p']), (.95, -.02),
                    xycoords='axes fraction', ha='right', va='bottom', size=14)
    if ann == 'fancy_p':
        ax.annotate('$p = {}$'.format(latex_float(Tests.spearman_pandas(s1, s2)['p'])), (.95, -.02),
                    xycoords='axes fraction', ha='right', va='bottom', size=14)
    if filename is not None:
        fig.savefig(filename)
Esempio n. 2
0
def series_scatter(s1, s2, ax=None, ann='p', filename=None, **plot_args):
    fig, ax = init_ax(ax, figsize=(6,4))
    if 's' not in plot_args:
        plot_args['s'] = 75
    if 'alpha' not in plot_args:
        plot_args['alpha'] = .5
    ax.scatter(*match_series(s1, s2), **plot_args)
    ax.set_xlabel(s1.name)
    ax.set_ylabel(s2.name)
    if ann == 'p':
        ax.annotate('p = {0:.1e}'.format(Tests.spearman_pandas(s1, s2)['p']), (.95, -.02),
                    xycoords='axes fraction', ha='right',va='bottom', size=14)
    if ann == 'fancy_p':
        ax.annotate('$p = {}$'.format(latex_float(Tests.spearman_pandas(s1, s2)['p'])), (.95, -.02),
                    xycoords='axes fraction', ha='right',va='bottom', size=14)
    if filename is not None:
        fig.savefig(filename)
Esempio n. 3
0
def violin_plot_pandas(bin_vec,
                       real_vec,
                       ann='p',
                       order=None,
                       ax=None,
                       filename=None):
    """
    http://pyinsci.blogspot.com/2009/09/violin-plot-with-matplotlib.html
    Wrapper around matplotlib's boxplot function to add violin profile.
    
    Inputs
        bin_vec: Series of labels
        real_vec: Series of measurements to be grouped according to bin_vec
    """
    fig, ax = init_ax(ax)
    ax.set_ylabel(real_vec.name)
    ax.set_xlabel(bin_vec.name)
    bin_vec, real_vec = match_series(bin_vec, real_vec)
    try:
        if order is None:
            categories = bin_vec.value_counts().index
        else:
            categories = order
        _violin_plot(ax, [real_vec[bin_vec == num] for num in categories],
                     pos=categories,
                     bp=True)
        ax.set_xticklabels(
            [str(c) + '\n(n=%i)' % sum(bin_vec == c) for c in categories])
    except:
        box_plot_pandas(bin_vec, real_vec, ax=ax)

    #if type(bin_vec.name) == str:
    #    ax.set_title(str(bin_vec.name) + ' x ' + str(real_vec.name))

    p_value = Stats.kruskal_pandas(bin_vec, real_vec)['p']
    if ann == 'p_fancy':
        ax.annotate('$p = {}$'.format(latex_float(p_value)), (.95, -.02),
                    xycoords='axes fraction',
                    ha='right',
                    va='bottom',
                    size=14)
    if ann == 'p':
        ax.annotate('p = {0:.1e}'.format(p_value), (.95, .02),
                    xycoords='axes fraction',
                    ha='right',
                    va='bottom',
                    size=12)
    elif ann is not None:
        ax.annotate(ann, (.95, .02),
                    xycoords='axes fraction',
                    ha='right',
                    va='bottom',
                    size=12)
    if filename is not None:
        fig.savefig(filename)
    return
Esempio n. 4
0
def paired_boxplot_tumor_normal(df,
                                sig=True,
                                cutoffs=[.01, .00001],
                                order=None,
                                ax=None):
    """
    Draws a paired boxplot given a DataFrame with both tumor and normal
    samples on the index. '01' and '11' are hard-coded as the ids for
    tumor/normal. 
    """
    n = df.groupby(level=0).size() == 2
    df = df.ix[n[n].index]
    if order is None:
        o = df.xs('11', level=1).median().order().index
        df = df[o[::-1]]
    else:
        df = df[order]
    l1 = list(df.xs('01', level=1).as_matrix().T)
    l2 = list(df.xs('11', level=1).as_matrix().T)
    boxes = [x for t in zip(l1, l2) for x in t]
    ax1, bp = paired_boxplot(boxes, ax)

    test = lambda v: Stats.ttest_rel(v.unstack()['01'], v.unstack()['11'])
    res = df.apply(test).T
    p = res.p

    if sig:
        pts = [(i * 3.5 + .5, 18) for i, n in enumerate(p) if n < cutoffs[1]]
        if len(pts) > 0:
            s1 = ax1.scatter(*zip(*pts),
                             marker='$**$',
                             label='$p<10^{-5}$',
                             s=200)
        else:
            s1 = None
        pts = [(i * 3.5 + .5, 18) for i, n in enumerate(p)
               if (n < cutoffs[0]) and (n > cutoffs[1])]
        if len(pts) > 0:
            s2 = ax1.scatter(*zip(*pts),
                             marker='$*$',
                             label='$p<10^{-2}$',
                             s=30)
        else:
            s2 = None
        ax1.legend(bp['boxes'][:2] + [s2, s1],
                   ('Tumor', 'Normal', '$p<10^{-2}$', '$p<10^{-5}$'),
                   loc='best',
                   scatterpoints=1)
    else:
        ax1.legend(bp['boxes'][:2], ('Tumor', 'Normal'), loc='best')
    ax1.set_xticklabels(df.columns)
Esempio n. 5
0
def violin_plot_pandas(bin_vec, real_vec, ann='p', order=None, ax=None,
                       filename=None):
    """
    http://pyinsci.blogspot.com/2009/09/violin-plot-with-matplotlib.html
    Wrapper around matplotlib's boxplot function to add violin profile.
    
    Inputs
        bin_vec: Series of labels
        real_vec: Series of measurements to be grouped according to bin_vec
    """
    fig, ax = init_ax(ax)
    ax.set_ylabel(real_vec.name)
    ax.set_xlabel(bin_vec.name)
    bin_vec, real_vec = match_series(bin_vec, real_vec)
    try:
        if order is None:
            categories = bin_vec.value_counts().index
        else:
            categories = order
        _violin_plot(ax, [real_vec[bin_vec == num] for num in categories],
                     pos=categories, bp=True)
        ax.set_xticklabels([str(c) + '\n(n=%i)' % sum(bin_vec == c) 
                            for c in categories])
    except:
        box_plot_pandas(bin_vec, real_vec, ax=ax)
        
    #if type(bin_vec.name) == str:
    #    ax.set_title(str(bin_vec.name) + ' x ' + str(real_vec.name))
        
    p_value = Stats.kruskal_pandas(bin_vec, real_vec)['p']
    if ann == 'p_fancy':
        ax.annotate('$p = {}$'.format(latex_float(p_value)), (.95, -.02),
                    xycoords='axes fraction', ha='right', va='bottom', size=14)
    if ann == 'p':
        ax.annotate('p = {0:.1e}'.format(p_value), (.95, .02),
                    xycoords='axes fraction', ha='right', va='bottom', size=12)
    elif ann is not None:
        ax.annotate(ann, (.95, .02), xycoords='axes fraction', ha='right',
                    va='bottom', size=12)
    if filename is not None:
        fig.savefig(filename)
    return
Esempio n. 6
0
def paired_boxplot_tumor_normal(df, sig=True, cutoffs=[.01, .00001],
                                order=None, ax=None):
    """
    Draws a paired boxplot given a DataFrame with both tumor and normal
    samples on the index. '01' and '11' are hard-coded as the ids for
    tumor/normal. 
    """
    n = df.groupby(level=0).size() == 2
    df = df.ix[n[n].index]
    if order is None:
        o = df.xs('11', level=1).median().order().index
        df = df[o[::-1]]
    else:
        df = df[order]
    l1 = list(df.xs('01', level=1).as_matrix().T)
    l2 = list(df.xs('11', level=1).as_matrix().T)
    boxes = [x for t in zip(l1, l2) for x in t]
    ax1, bp = paired_boxplot(boxes, ax)
    
    test = lambda v: Stats.ttest_rel(v.unstack()['01'], v.unstack()['11'])
    res = df.apply(test).T
    p = res.p
    
    if sig:
        pts = [(i * 3.5 + .5, 18) for i, n in enumerate(p) if n < cutoffs[1]]
        if len(pts) > 0:
            s1 = ax1.scatter(*zip(*pts), marker='$**$', label='$p<10^{-5}$', s=200)
        else:
            s1 = None
        pts = [(i * 3.5 + .5, 18) for i, n in enumerate(p) 
               if (n < cutoffs[0]) and (n > cutoffs[1])]
        if len(pts) > 0:
            s2 = ax1.scatter(*zip(*pts), marker='$*$', label='$p<10^{-2}$', s=30)
        else:
            s2 = None
        ax1.legend(bp['boxes'][:2] + [s2, s1],
                   ('Tumor', 'Normal', '$p<10^{-2}$', '$p<10^{-5}$'),
                   loc='best', scatterpoints=1)
    else:
        ax1.legend(bp['boxes'][:2], ('Tumor', 'Normal'), loc='best')
    ax1.set_xticklabels(df.columns)
Esempio n. 7
0
def boxplot_panel(hit_vec, response_df):
    """
    Draws a series of paired boxplots with the rows of the response_df
    split according to hit_vec.  
    """
    b = response_df.copy()
    b.columns = pd.MultiIndex.from_arrays([b.columns, hit_vec.ix[b.columns]])
    b = b.T
    v1, v2 = hit_vec.unique()
    test = lambda v: Stats.anova(
        v.reset_index(level=1)[v.index.names[1]],
        v.reset_index(level=1)[v.name])
    res = b.apply(test).T
    p = res.p.order()
    b = b.ix[:, p.index]

    l1 = list(b.xs(v1, level=1).as_matrix().T)
    l2 = list(b.xs(v2, level=1).as_matrix().T)

    boxes = [x for t in zip(l1, l2) for x in t]
    ax1, bp = paired_boxplot(boxes)

    y_lim = (response_df.T.quantile(.9).max()) * 1.2
    pts = [(i * 3.5 + .5, y_lim) for i, n in enumerate(p) if n < .00001]
    if len(pts) > 0:
        s1 = ax1.scatter(*zip(*pts), marker='$**$', label='$p<10^{-5}$', s=200)
    else:
        s1 = None
    pts = [(i * 3.5 + .5, y_lim) for i, n in enumerate(p)
           if (n < .01) and (n > .00001)]
    if len(pts) > 0:
        s2 = ax1.scatter(*zip(*pts), marker='$*$', label='$p<10^{-2}$', s=30)
    else:
        s2 = None
    ax1.set_xticklabels(b.columns)
    ax1.legend(bp['boxes'][:2] + [s2, s1],
               (v1, v2, '$p<10^{-2}$', '$p<10^{-5}$'),
               loc='best',
               scatterpoints=1)
Esempio n. 8
0
def boxplot_panel(hit_vec, response_df):
    """
    Draws a series of paired boxplots with the rows of the response_df
    split according to hit_vec.  
    """
    b = response_df.copy()
    b.columns = pd.MultiIndex.from_arrays([b.columns, hit_vec.ix[b.columns]])
    b = b.T
    v1, v2 = hit_vec.unique()
    test = lambda v: Stats.anova(v.reset_index(level=1)[v.index.names[1]],
                                 v.reset_index(level=1)[v.name])
    res = b.apply(test).T
    p = res.p.order()
    b = b.ix[:, p.index]
    
    l1 = list(b.xs(v1, level=1).as_matrix().T)
    l2 = list(b.xs(v2, level=1).as_matrix().T)

    boxes = [x for t in zip(l1, l2) for x in t]
    ax1, bp = paired_boxplot(boxes)
    
    y_lim = (response_df.T.quantile(.9).max()) * 1.2
    pts = [(i * 3.5 + .5, y_lim) for i, n in enumerate(p) if n < .00001]
    if len(pts) > 0:
        s1 = ax1.scatter(*zip(*pts), marker='$**$', label='$p<10^{-5}$', s=200)
    else:
        s1 = None
    pts = [(i * 3.5 + .5, y_lim) for i, n in enumerate(p) if (n < .01)
           and (n > .00001)]
    if len(pts) > 0:
        s2 = ax1.scatter(*zip(*pts), marker='$*$', label='$p<10^{-2}$', s=30)
    else:
        s2 = None
    ax1.set_xticklabels(b.columns)
    ax1.legend(bp['boxes'][:2] + [s2, s1],
               (v1, v2, '$p<10^{-2}$', '$p<10^{-5}$'),
               loc='best', scatterpoints=1)
Esempio n. 9
0
def exp_change(s):
    '''
    Calculates an anova for the change in expression across a variable
    on the second level of a MultiIndex. (eg. tumor/normal).
    '''
    return Tests.anova(pd.Series(s.index.get_level_values(1), s.index), s)
Esempio n. 10
0
def exp_change(s):
    '''
    Calculates an anova for the change in expression across a variable
    on the second level of a MultiIndex. (eg. tumor/normal).
    '''
    return Tests.anova(pd.Series(s.index.get_level_values(1), s.index), s)