예제 #1
0
def boxplot_panel(hit_vec, response_df):
    """
    Draws a series of paired boxplots with the rows of the response_df
    split according to hit_vec.  
    """
    b = response_df.copy()
    b.columns = pd.MultiIndex.from_arrays([b.columns, hit_vec.ix[b.columns]])
    b = b.T
    v1, v2 = hit_vec.unique()
    test = lambda v: Stats.anova(
        v.reset_index(level=1)[v.index.names[1]],
        v.reset_index(level=1)[v.name])
    res = b.apply(test).T
    p = res.p.order()
    b = b.ix[:, p.index]

    l1 = list(b.xs(v1, level=1).as_matrix().T)
    l2 = list(b.xs(v2, level=1).as_matrix().T)

    boxes = [x for t in zip(l1, l2) for x in t]
    ax1, bp = paired_boxplot(boxes)

    y_lim = (response_df.T.quantile(.9).max()) * 1.2
    pts = [(i * 3.5 + .5, y_lim) for i, n in enumerate(p) if n < .00001]
    if len(pts) > 0:
        s1 = ax1.scatter(*zip(*pts), marker='$**$', label='$p<10^{-5}$', s=200)
    else:
        s1 = None
    pts = [(i * 3.5 + .5, y_lim) for i, n in enumerate(p)
           if (n < .01) and (n > .00001)]
    if len(pts) > 0:
        s2 = ax1.scatter(*zip(*pts), marker='$*$', label='$p<10^{-2}$', s=30)
    else:
        s2 = None
    ax1.set_xticklabels(b.columns)
    ax1.legend(bp['boxes'][:2] + [s2, s1],
               (v1, v2, '$p<10^{-2}$', '$p<10^{-5}$'),
               loc='best',
               scatterpoints=1)
예제 #2
0
파일: Boxplots.py 프로젝트: Krysia/TCGA
def boxplot_panel(hit_vec, response_df):
    """
    Draws a series of paired boxplots with the rows of the response_df
    split according to hit_vec.  
    """
    b = response_df.copy()
    b.columns = pd.MultiIndex.from_arrays([b.columns, hit_vec.ix[b.columns]])
    b = b.T
    v1, v2 = hit_vec.unique()
    test = lambda v: Stats.anova(v.reset_index(level=1)[v.index.names[1]],
                                 v.reset_index(level=1)[v.name])
    res = b.apply(test).T
    p = res.p.order()
    b = b.ix[:, p.index]
    
    l1 = list(b.xs(v1, level=1).as_matrix().T)
    l2 = list(b.xs(v2, level=1).as_matrix().T)

    boxes = [x for t in zip(l1, l2) for x in t]
    ax1, bp = paired_boxplot(boxes)
    
    y_lim = (response_df.T.quantile(.9).max()) * 1.2
    pts = [(i * 3.5 + .5, y_lim) for i, n in enumerate(p) if n < .00001]
    if len(pts) > 0:
        s1 = ax1.scatter(*zip(*pts), marker='$**$', label='$p<10^{-5}$', s=200)
    else:
        s1 = None
    pts = [(i * 3.5 + .5, y_lim) for i, n in enumerate(p) if (n < .01)
           and (n > .00001)]
    if len(pts) > 0:
        s2 = ax1.scatter(*zip(*pts), marker='$*$', label='$p<10^{-2}$', s=30)
    else:
        s2 = None
    ax1.set_xticklabels(b.columns)
    ax1.legend(bp['boxes'][:2] + [s2, s1],
               (v1, v2, '$p<10^{-2}$', '$p<10^{-5}$'),
               loc='best', scatterpoints=1)
예제 #3
0
def exp_change(s):
    '''
    Calculates an anova for the change in expression across a variable
    on the second level of a MultiIndex. (eg. tumor/normal).
    '''
    return Tests.anova(pd.Series(s.index.get_level_values(1), s.index), s)
예제 #4
0
def exp_change(s):
    '''
    Calculates an anova for the change in expression across a variable
    on the second level of a MultiIndex. (eg. tumor/normal).
    '''
    return Tests.anova(pd.Series(s.index.get_level_values(1), s.index), s)