Ejemplo n.º 1
0
def box_plot_pandas(bin_vec, real_vec, ax=None):
    """
    Wrapper around matplotlib's boxplot function.
    
    Inputs
        bin_vec: Series of labels
        real_vec: Series of measurements to be grouped according to bin_vec
    """
    _, ax = init_ax(ax)
    bin_vec, real_vec = match_series(bin_vec, real_vec)
    categories = bin_vec.value_counts().index
    data = [real_vec[bin_vec == num] for num in categories]
    bp = ax.boxplot(data,
                    positions=range(len(categories)),
                    widths=.3,
                    patch_artist=True)
    if real_vec.name:
        ax.set_ylabel(real_vec.name)
    if bin_vec.name:
        ax.set_xlabel(bin_vec.name)
    [p.set_visible(False) for p in bp['fliers']]
    [p.set_visible(False) for p in bp['caps']]
    [p.set_visible(False) for p in bp['whiskers']]
    for p in bp['medians']:
        p.set_color(colors[0])
        p.set_lw(3)
        p.set_alpha(.8)
    for i, p in enumerate(bp['boxes']):
        p.set_color('grey')
        p.set_lw(3)
        p.set_alpha(.7)
        if len(data[i]) < 3:
            p.set_alpha(0)
Ejemplo n.º 2
0
def fancy_raster(df, cluster=False, cmap=plt.cm.get_cmap('Spectral'), 
                 norm=None, ax=None):
    if cluster:
        d = sp.spatial.distance.pdist(df)
        D = sp.spatial.distance.squareform(d)
        Y = sp.cluster.hierarchy.linkage(D)
        Z = sp.cluster.hierarchy.dendrogram(Y, no_plot=True)
        order = Z['leaves']
        df = df.ix[order, order]
        
    _, ax = init_ax(ax, figsize=(12,8))
    img = ax.imshow(df, interpolation='Nearest', cmap=cmap, norm=norm)
    ax.set_yticks(range(len(df.index)))
    ax.set_yticklabels(df.index)
    ax.set_xticks(np.arange(len(df.columns)))
    ax.set_xticklabels(df.columns, rotation=360-90, ha='center');
    ax.hlines(np.arange(len(df.index)-1)+.5, -.5, len(df.columns)-.5, 
              color='white', lw=6)
    ax.vlines(np.arange(len(df.columns)-1)+.5, -.5, len(df.index)-.5, 
              color='white', lw=6)
    
    if cluster:
        icoord = np.array(Z['icoord']) - np.array(Z['icoord']).min()
        icoord = icoord * ((len(Z['leaves']) - 1) / icoord.max())
    
        dcoord = -1*np.array(Z['dcoord']) - .7 
        for i,z,c in zip(icoord, dcoord, Z['color_list']):
            ax.plot(i,z,color=c, lw=2, alpha=.8)
            
        ax.tick_params(axis='x', top='off')
        ax.set_frame_on(False)
    return img
Ejemplo n.º 3
0
def fischer_bar_chart(bin_vec, response_vec, ax=None, filename=None):
    fig, ax = init_ax(ax)
    t = pd.crosstab(bin_vec, response_vec)
    t.plot(kind='bar', ax=ax)
    if filename is not None:
        fig.savefig(filename)
    return fig     
Ejemplo n.º 4
0
def box_plot_pandas(bin_vec, real_vec, ax=None):
    """
    Wrapper around matplotlib's boxplot function.
    
    Inputs
        bin_vec: Series of labels
        real_vec: Series of measurements to be grouped according to bin_vec
    """
    _, ax = init_ax(ax)
    bin_vec, real_vec = match_series(bin_vec, real_vec)
    categories = bin_vec.value_counts().index
    data = [real_vec[bin_vec == num] for num in categories]
    bp = ax.boxplot(data, positions=range(len(categories)), widths=.3,
                    patch_artist=True)
    if real_vec.name:
        ax.set_ylabel(real_vec.name)
    if bin_vec.name:
        ax.set_xlabel(bin_vec.name)
    [p.set_visible(False) for p in bp['fliers']]
    [p.set_visible(False) for p in bp['caps']]
    [p.set_visible(False) for p in bp['whiskers']]
    for p in bp['medians']:
        p.set_color(colors[0])
        p.set_lw(3)
        p.set_alpha(.8)
    for i, p in enumerate(bp['boxes']):
        p.set_color('grey')
        p.set_lw(3)
        p.set_alpha(.7)
        if len(data[i]) < 3:
            p.set_alpha(0)
Ejemplo n.º 5
0
def fancy_raster(df, cluster=False, cmap=plt.cm.get_cmap('Spectral'),
                 norm=None, ax=None):
    if cluster:
        d = sp.spatial.distance.pdist(df)
        D = sp.spatial.distance.squareform(d)
        Y = sp.cluster.hierarchy.linkage(D)
        Z = sp.cluster.hierarchy.dendrogram(Y, no_plot=True)
        order = Z['leaves']
        df = df.ix[order, order]
        
    _, ax = init_ax(ax, figsize=(12, 8))
    img = ax.imshow(df, interpolation='Nearest', cmap=cmap, norm=norm)
    ax.set_yticks(range(len(df.index)))
    ax.set_yticklabels(df.index)
    ax.set_xticks(np.arange(len(df.columns)))
    ax.set_xticklabels(df.columns, rotation=360 - 90, ha='center');
    ax.hlines(np.arange(len(df.index) - 1) + .5, -.5, len(df.columns) - .5,
              color='white', lw=6)
    ax.vlines(np.arange(len(df.columns) - 1) + .5, -.5, len(df.index) - .5,
              color='white', lw=6)
    
    if cluster:
        icoord = np.array(Z['icoord']) - np.array(Z['icoord']).min()
        icoord = icoord * ((len(Z['leaves']) - 1) / icoord.max())
    
        dcoord = -1 * np.array(Z['dcoord']) - .7 
        for i, z, c in zip(icoord, dcoord, Z['color_list']):
            ax.plot(i, z, color=c, lw=2, alpha=.8)
            
        ax.tick_params(axis='x', top='off')
        ax.set_frame_on(False)
    return img
Ejemplo n.º 6
0
def fischer_bar_chart(bin_vec, response_vec, ax=None, filename=None):
    fig, ax = init_ax(ax)
    t = pd.crosstab(bin_vec, response_vec)
    t.plot(kind='bar', ax=ax)
    if filename is not None:
        fig.savefig(filename)
    return fig     
Ejemplo n.º 7
0
def violin_plot_pandas(bin_vec,
                       real_vec,
                       ann='p',
                       order=None,
                       ax=None,
                       filename=None):
    """
    http://pyinsci.blogspot.com/2009/09/violin-plot-with-matplotlib.html
    Wrapper around matplotlib's boxplot function to add violin profile.
    
    Inputs
        bin_vec: Series of labels
        real_vec: Series of measurements to be grouped according to bin_vec
    """
    fig, ax = init_ax(ax)
    ax.set_ylabel(real_vec.name)
    ax.set_xlabel(bin_vec.name)
    bin_vec, real_vec = match_series(bin_vec, real_vec)
    try:
        if order is None:
            categories = bin_vec.value_counts().index
        else:
            categories = order
        _violin_plot(ax, [real_vec[bin_vec == num] for num in categories],
                     pos=categories,
                     bp=True)
        ax.set_xticklabels(
            [str(c) + '\n(n=%i)' % sum(bin_vec == c) for c in categories])
    except:
        box_plot_pandas(bin_vec, real_vec, ax=ax)

    #if type(bin_vec.name) == str:
    #    ax.set_title(str(bin_vec.name) + ' x ' + str(real_vec.name))

    p_value = Stats.kruskal_pandas(bin_vec, real_vec)['p']
    if ann == 'p_fancy':
        ax.annotate('$p = {}$'.format(latex_float(p_value)), (.95, -.02),
                    xycoords='axes fraction',
                    ha='right',
                    va='bottom',
                    size=14)
    if ann == 'p':
        ax.annotate('p = {0:.1e}'.format(p_value), (.95, .02),
                    xycoords='axes fraction',
                    ha='right',
                    va='bottom',
                    size=12)
    elif ann is not None:
        ax.annotate(ann, (.95, .02),
                    xycoords='axes fraction',
                    ha='right',
                    va='bottom',
                    size=12)
    if filename is not None:
        fig.savefig(filename)
    return
Ejemplo n.º 8
0
def histo_compare(hit_vec, response_vec, ax=None):
    '''
    Split response_vec by hit_vec and compared histograms.  
    Also plots the kde of the whole response_vec.
    '''
    fig, ax = init_ax(ax)
    kde1 = sp.stats.gaussian_kde(response_vec)
    x_eval = np.linspace(min(response_vec), max(response_vec), num=200)
    ax.plot(x_eval, kde1(x_eval), 'k-')
    miss, hit = split_a_by_b(response_vec, hit_vec)
    ax.hist(miss, bins=20, normed=True, alpha=.2, label='WT');
    ax.hist(hit, bins=10, normed=True, alpha=.5, label='Mut');
    ax.legend()
    return fig
Ejemplo n.º 9
0
def histo_compare(hit_vec, response_vec, ax=None):
    '''
    Split response_vec by hit_vec and compared histograms.  
    Also plots the kde of the whole response_vec.
    '''
    fig, ax = init_ax(ax)
    kde1 = sp.stats.gaussian_kde(response_vec)
    x_eval = np.linspace(min(response_vec), max(response_vec), num=200)
    ax.plot(x_eval, kde1(x_eval), 'k-')
    miss, hit = split_a_by_b(response_vec, hit_vec)
    ax.hist(miss, bins=20, normed=True, alpha=.2, label='WT');
    ax.hist(hit, bins=10, normed=True, alpha=.5, label='Mut');
    ax.legend()
    return fig
Ejemplo n.º 10
0
def series_scatter(s1, s2, ax=None, ann='p', filename=None, **plot_args):
    fig, ax = init_ax(ax, figsize=(6,4))
    if 's' not in plot_args:
        plot_args['s'] = 75
    if 'alpha' not in plot_args:
        plot_args['alpha'] = .5
    ax.scatter(*match_series(s1, s2), **plot_args)
    ax.set_xlabel(s1.name)
    ax.set_ylabel(s2.name)
    if ann == 'p':
        ax.annotate('p = {0:.1e}'.format(Tests.spearman_pandas(s1, s2)['p']), (.95, -.02),
                    xycoords='axes fraction', ha='right',va='bottom', size=14)
    if ann == 'fancy_p':
        ax.annotate('$p = {}$'.format(latex_float(Tests.spearman_pandas(s1, s2)['p'])), (.95, -.02),
                    xycoords='axes fraction', ha='right',va='bottom', size=14)
    if filename is not None:
        fig.savefig(filename)
Ejemplo n.º 11
0
def series_scatter(s1, s2, ax=None, ann='p', filename=None, **plot_args):
    fig, ax = init_ax(ax, figsize=(6, 4))
    if 's' not in plot_args:
        plot_args['s'] = 75
    if 'alpha' not in plot_args:
        plot_args['alpha'] = .5
    ax.scatter(*match_series(s1, s2), **plot_args)
    ax.set_xlabel(s1.name)
    ax.set_ylabel(s2.name)
    if ann == 'p':
        ax.annotate('p = {0:.1e}'.format(Tests.spearman_pandas(s1, s2)['p']), (.95, -.02),
                    xycoords='axes fraction', ha='right', va='bottom', size=14)
    if ann == 'fancy_p':
        ax.annotate('$p = {}$'.format(latex_float(Tests.spearman_pandas(s1, s2)['p'])), (.95, -.02),
                    xycoords='axes fraction', ha='right', va='bottom', size=14)
    if filename is not None:
        fig.savefig(filename)
Ejemplo n.º 12
0
def violin_plot_pandas(bin_vec, real_vec, ann='p', order=None, ax=None,
                       filename=None):
    """
    http://pyinsci.blogspot.com/2009/09/violin-plot-with-matplotlib.html
    Wrapper around matplotlib's boxplot function to add violin profile.
    
    Inputs
        bin_vec: Series of labels
        real_vec: Series of measurements to be grouped according to bin_vec
    """
    fig, ax = init_ax(ax)
    ax.set_ylabel(real_vec.name)
    ax.set_xlabel(bin_vec.name)
    bin_vec, real_vec = match_series(bin_vec, real_vec)
    try:
        if order is None:
            categories = bin_vec.value_counts().index
        else:
            categories = order
        _violin_plot(ax, [real_vec[bin_vec == num] for num in categories],
                     pos=categories, bp=True)
        ax.set_xticklabels([str(c) + '\n(n=%i)' % sum(bin_vec == c) 
                            for c in categories])
    except:
        box_plot_pandas(bin_vec, real_vec, ax=ax)
        
    #if type(bin_vec.name) == str:
    #    ax.set_title(str(bin_vec.name) + ' x ' + str(real_vec.name))
        
    p_value = Stats.kruskal_pandas(bin_vec, real_vec)['p']
    if ann == 'p_fancy':
        ax.annotate('$p = {}$'.format(latex_float(p_value)), (.95, -.02),
                    xycoords='axes fraction', ha='right', va='bottom', size=14)
    if ann == 'p':
        ax.annotate('p = {0:.1e}'.format(p_value), (.95, .02),
                    xycoords='axes fraction', ha='right', va='bottom', size=12)
    elif ann is not None:
        ax.annotate(ann, (.95, .02), xycoords='axes fraction', ha='right',
                    va='bottom', size=12)
    if filename is not None:
        fig.savefig(filename)
    return
Ejemplo n.º 13
0
def count_plot(vec, name=None, ax=None):
    _, ax = init_ax(ax)
    vec.value_counts().sort_index().plot(kind='bar', ax=ax)
    ax.set_ylabel('# of Patients')
    ax.set_xlabel(name if name is not None else vec.name)
Ejemplo n.º 14
0
def count_plot(vec, name=None, ax=None):
    _, ax = init_ax(ax)
    vec.value_counts().sort_index().plot(kind='bar', ax=ax)
    ax.set_ylabel('# of Patients')
    ax.set_xlabel(name if name is not None else vec.name)