コード例 #1
0
def plot_OD_curve():

    from src.plot_utils import apply_global_settings
    from src.colors import parula

    data = pd.read_csv('data/070513_cadmium.csv')
    data = data.set_index('Hour')

    apply_global_settings()

    fig = plt.figure(figsize=(8, 6))
    cols = list(reversed(data.columns[data.columns.str.startswith('ave ')]))
    colors = plt.get_cmap('magma_r')

    i = 0
    for col in cols:
        label = "%s uM" % (col.split(' ')[1])
        plt.plot(data.index,
                 data[col],
                 label=label,
                 color=colors(0.1 + 0.9 * (i * 1. / len(cols))),
                 lw=2)
        i += 1

    plt.xlim(0, data.index.max())
    plt.ylim(0, 1.7)
    plt.legend(ncol=4)
    plt.suptitle("Growth curve, cadmium", fontsize=24)
    plt.ylabel("Optical density, OD$_{600}$", fontsize=18)
    plt.xlabel("Time, hours", fontsize=18)

    save_path = '%s/cadmium_growth.pdf' % (save_dir)
    plt.savefig(save_path, transparent=True, dpi=scatter_dpi)
コード例 #2
0
def plot_compare(data, markers, colors, fill_styles, metric='r2', 
    rename={}, show_legend=False):
            
    plot_utils.apply_global_settings()

    model_names = data.columns

    fig, ax = plt.subplots(figsize=(9,4))
    fig.tight_layout(rect=[0.05, 0.05, 0.65, 0.9])

    x = np.arange(6)
    times=[0, 7.5, 15, 30, 60, 120]
    spacing = 0.09
    n_models = len(model_names)

    for i in range(n_models):

        model_name = model_names[i]
        model_data = data[model_name]
        label = model_name
        if label in rename.keys():
            label = rename[label]

        x_pos = x - spacing*(n_models-1)/2. + spacing*i
        
        ax.plot(x_pos, model_data, marker=markers[i],
            color=colors[i], label=label.replace('Promoter occupancy', 
                'Small fragment occupancy'), markersize=7, alpha=1.,
            fillstyle=fill_styles[i], linewidth=0,
            zorder=10)

    if metric == 'mse':
        ax.set_ylim(1, 8.5)
        ax.set_ylabel('MSE', fontsize=12)
        ax.set_title("Model evaluation, MSE")

        for i in np.arange(0, 10, 1):
            ax.axhline(y=i, linewidth=0.1, linestyle='solid', 
                color='#303030', zorder=1)
    else:
        ax.set_yticks(np.arange(-1.2, 1.2, 0.2))
        ax.set_ylim(-0.05, 1.0)
        ax.set_ylabel('Coefficient of determination, $R^2$', fontsize=16)
        ax.set_title("Model evaluation, $R^2$", fontsize=20)

        for i in np.arange(0, 1.0, 0.1):
            ax.axhline(y=i, linewidth=0.1, linestyle='solid', 
                color='#303030', zorder=1)

    ax.set_xticks(np.arange(0, len(times)))
    ax.set_xticklabels(['%s\'' % t for t in times])

    ax.set_xlim(.5, 5.5)
    ax.tick_params('x', labelsize=16, length=0, width=0, pad=10)
    
    if show_legend:
        ax.legend(bbox_to_anchor=(1.02, 1.), fontsize=14, frameon=False)

    for i in range(6):
        ax.axvline(x=i+0.5, linewidth=1.0, color='#303030')
コード例 #3
0
def plot_colorbars(small_peaks, write_path=None):

    from src.chromatin_heatmaps import _make_fake_cbar
    from src import plot_utils

    apply_global_settings(linewidth=2)

    fig, axs = plt.subplots(2, 1, figsize=(8,2))
    plt.subplots_adjust(hspace=0.0, wspace=0.0)
    fig.subplots_adjust(left=0.5)
    fig.patch.set_alpha(0.0)

    ax1, ax2 = tuple(axs)

    titles = ['Log$_2$ fold-change\ntranscription rate',
              'Log$_2$ fold-change\nbinding occupancy']

    scale_cbars = [1, small_peaks.bin_scale]
    formating = ['%.0f', '%.2f']
    for i in range(len(axs)):
        ax = axs[i]
        title = titles[i]
        vlim = small_peaks.im_scale
        scale_cbar = 1./scale_cbars[i]
        _make_fake_cbar(ax, vlim, title, scale=scale_cbar, 
            str_format=formating[i])
        plot_utils.format_spines(ax, lw=1.2)
コード例 #4
0
def plot_cluster_lines(chrom_clustering):

    apply_global_settings(titlepad=15)

    cluster_data = chrom_clustering.hc.clustered_data
    clusters = cluster_data.cluster.unique()
    n_clusters = len(clusters)

    fig, axs = plt.subplots(n_clusters, 3, figsize=(3 * 2, n_clusters * 2.2))
    _ = axs
    fig.patch.set_alpha(0.0)

    fig.tight_layout(rect=[0.05, 0.05, 0.95, 0.95])
    plt.subplots_adjust(hspace=0.75, wspace=0.25)

    for i in np.arange(n_clusters):
        axs_row = axs[i]
        cluster = clusters[i]
        title = 'Cluster %d' % cluster
        chrom_clustering.plot_cluster(axs_row[0],
                                      axs_row[1],
                                      axs_row[2],
                                      cluster,
                                      title=title,
                                      xlab=(i == n_clusters - 1))

        for ax in axs_row:
            ax.patch.set_alpha(1.0)
            ax.patch.set_facecolor('white')
コード例 #5
0
def main():

    print_fl("*******************************")
    print_fl("* 6    Reviewer Materials     *")
    print_fl("*******************************")

    print_preamble()

    mkdirs_safe([save_dir])

    plot_utils.apply_global_settings()

    # plots for shift edge analysis
    shift_edge_analysis.main()

    # additional scatter plots
    scatters()

    xrate_vs_TPM()

    # danpos
    danpos()

    # OD curve
    plot_OD_curve()
コード例 #6
0
def plot_tf_regulon_heatmap(tf, small_peaks, is_high=True):
    
    apply_global_settings(titlepad=15)
    
    regulon_xrate = small_peaks.regulon_xrate

    plot_data = regulon_xrate[regulon_xrate.tf == tf]\
        .set_index('orf_name')[times]
    plot_data = plot_data.drop_duplicates()
    plot_data = plot_data.loc[plot_data[times].mean(axis=1)\
        .sort_values(ascending=False).index]

    fig, ax = plt.subplots(1, 1, figsize=(4, 10))
    fig.tight_layout(rect=[0.1, 0.1, 0.9, 0.9])

    ax.imshow(plot_data, vmin=-small_peaks.im_scale, vmax=small_peaks.im_scale,
        cmap='RdBu_r', aspect=1.)

    if len(plot_data) < 50:
        ax.set_yticks(np.arange(len(plot_data)))
        ax.set_yticklabels(paper_orfs[['name']].loc[plot_data.index]['name'])
    else:
        ax.set_yticks([])

    ax.set_title("%s regulon\ntranscription" % tf)

    ax.tick_params(axis='y', length=0, pad=4, labelsize=10)
    ax.set_xticks([])
    return fig
コード例 #7
0
    def plot_antisense(self, antisense=None):

        apply_global_settings(titlepad=45)

        cluster_data = self.hc.clustered_data
        from src.datasets import read_orfs_data

        if antisense is None:
            antisense = read_orfs_data('%s/antisense_TPM.csv' % rna_dir)

        data = antisense.loc[cluster_data.index]
        data = data.join(cluster_data[['cluster']])

        fig, ax = plt.subplots(figsize=(7, 4))
        fig.tight_layout(rect=[0.05, 0.1, 0.95, 0.8])
        times = [0.0, 7.5, 15, 30, 60, 120]

        num_clusters = len(data.cluster.unique())

        for c in range(1, num_clusters + 1):

            c_data = data[data.cluster == c][times]
            for i in range(len(times)):
                time = times[i]
                cur = c_data[time].values
                lower = np.quantile(cur, 0.75)
                upper = np.quantile(cur, 0.25)
                median = np.median(cur)

                spacing = 0.13
                x = c + spacing * i - spacing * 2.5
                ax.plot([x, x], [lower, upper],
                        linewidth=3.,
                        color='#FF5C5C',
                        alpha=1,
                        solid_capstyle='butt')
                ax.scatter(x,
                           median,
                           s=6,
                           marker='D',
                           color='black',
                           zorder=10)

        ticks = np.arange(num_clusters + 1)
        ax.set_xticks(ticks)
        ax.set_xlim(0.5, num_clusters + 0.5)
        # ax.set_yticks(np.arange(0, 40, 10))

        ax.tick_params(axis='x', length=0, pad=10, labelsize=16)
        ax.tick_params(axis='y', labelsize=16)

        ax.set_ylabel('Transcripts per million', fontsize=18)
        ax.set_xlabel('Cluster', fontsize=18)
        ax.set_title('Antisense transcripts per cluster', fontsize=23)

        for x in np.arange(1, num_clusters):
            ax.axvline(x + 0.5, color='#d0d0d0', linewidth=1)
コード例 #8
0
def plot_frag_len_dist(mnase_data,
                       title="Subsampled, merged fragment lengths",
                       normalize=True,
                       plt_legend=False):

    from config import times
    from src.plot_utils import plot_density, apply_global_settings

    lengths = mnase_data.groupby(
        ['time', 'length']).count()[['chr']].rename(columns={'chr': 'count'})

    from src.timer import Timer

    timer = Timer()

    apply_global_settings()

    fig, ax = plt.subplots(figsize=(6, 4))
    fig.tight_layout(rect=[0.1, 0.1, 0.825, 0.85])

    colors = plt.get_cmap('magma_r')

    i = 0
    for time in times:

        color = colors(float(i) * 0.8 / 5. + 1. / 5)

        data = lengths.loc[time]
        max_len = data.idxmax().values[0]

        print("Most frequent length for %s: %d" % (str(time), max_len))

        if normalize:
            data = data / data.sum()

        ax.plot(data, color=color, label="%s min" % str(time))

        i += 1

    ax.set_title(title, fontsize=20)
    ax.set_xlabel('Fragment length (bp)')
    ax.set_ylabel('Density')
    ax.set_ylim(0, 0.02)
    ax.set_xlim(0, 250)

    if plt_legend:
        ax.legend(bbox_to_anchor=(1.35, 1.), frameon=False)
コード例 #9
0
def plot_nuc_calls_cc():
    from src.plot_utils import apply_global_settings

    from config import cross_corr_sense_path
    cross = pd.read_hdf(cross_corr_sense_path, 'cross_correlation')
    time = 0

    cur_cross = cross.loc['nucleosomal'].query('time == %s' % str(time))
    cols = cur_cross.columns
    cur_cross = cur_cross.reset_index().set_index('orf_name')[cols]

    peak_1 = cur_cross.sum().idxmax()
    peak_2 = cur_cross[np.arange(peak_1 + 80, 500)].sum().idxmax()
    peak_3 = cur_cross[np.arange(peak_2 + 80, 500)].sum().idxmax()

    print_fl("Computed nucleosome spacing:", log=True)
    print_fl("+1, +2 distance: %0.0f" % (peak_2 - peak_1), log=True)
    print_fl("+2, +3 distance: %0.0f" % (peak_3 - peak_2), log=True)

    apply_global_settings()

    fig, ax = plt.subplots(1, 1, figsize=(6, 4))
    fig.tight_layout(rect=[0.1, 0.1, 0.9, 0.9])

    ax.plot(cur_cross.sum())

    import matplotlib.patheffects as path_effects
    for p in [peak_1, peak_2, peak_3]:
        ax.axvline(p, linestyle='solid', color='red', alpha=0.25, lw=3)
        text = ax.text(p, 500, "TSS+%d" % p, ha='center', fontsize=12)
        text.set_path_effects([
            path_effects.Stroke(linewidth=10, foreground='white'),
            path_effects.Normal()
        ])
    x = np.arange(-200, 800, 100)
    ax.set_xticks(x)
    xlabels = [str(val) if val < 0 else '+%d' % val for val in x]
    xlabels[2] = 'TSS'
    ax.set_xticklabels(xlabels)
    ax.set_title("Gene body nucleosomes, 0 min", fontsize=24)
    ax.set_ylim(0, 600)
    ax.set_xlim(-200, 600)
    ax.set_xlabel('Position (bp)')
    ax.set_ylabel(
        'Cumulative nucleosome\ncross correlation score across genes')
コード例 #10
0
    def plot_half_lifes(self):

        apply_global_settings(titlepad=20)

        cluster_data = self.hc.clustered_data
        from src.datasets import read_orfs_data
        half_lifes = read_orfs_data('data/half_life.csv')[['half_life']]

        data = half_lifes.loc[cluster_data.index]
        data = data.join(cluster_data[['cluster']])

        fig, ax = plt.subplots(figsize=(7, 5))
        fig.tight_layout(rect=[0.05, 0.1, 0.95, 0.8])
        times = [0.0, 7.5, 15, 30, 60, 120]

        for c in range(1, 8):
            cur = data[data.cluster == c].half_life
            lower = np.quantile(cur, 0.75)
            upper = np.quantile(cur, 0.25)
            median = np.median(cur)

            spacing = 0.13
            x = c + spacing * 3 - spacing * 2.5
            ax.plot([x, x], [lower, upper],
                    linewidth=6.,
                    color='#abd1fc',
                    alpha=1,
                    solid_capstyle='butt')
            ax.scatter(x, median, s=16, marker='D', color='black', zorder=10)

        ticks = np.arange(8)
        ax.set_xticks(ticks)
        ax.set_xlim(0.5, 7.5)
        # ax.set_yticks(np.arange(0, 200, 50))
        ax.set_ylim(0, 50)

        ax.tick_params(axis='x', length=0, pad=10, labelsize=16)
        ax.tick_params(axis='y', labelsize=16)

        ax.set_ylabel('Half life, min', fontsize=18)
        ax.set_xlabel('Cluster', fontsize=18)
        ax.set_title('Half lifes per cluster', fontsize=30)

        for x in np.arange(1, 8):
            ax.axvline(x + 0.5, color='#d0d0d0', linewidth=1)
コード例 #11
0
def plot_antisense_lengths():

    antisense_boundaries = read_orfs_data('%s/antisense_boundaries_computed.csv' % rna_dir)

    from src.plot_utils import apply_global_settings
    apply_global_settings()

    fig, ax = plt.subplots(figsize=(4.5, 3))
    fig.tight_layout(rect=[0.05, 0.05, 0.95, 0.9])

    antisense_lengths = (antisense_boundaries.stop - antisense_boundaries.start).dropna()

    ax.hist(antisense_lengths, 
             bins=25, linewidth=1, edgecolor='white')
    ax.set_title("Antisense transcript lengths, N=%d" % len(antisense_lengths),
                 fontsize=18)
    ax.set_xlabel("Length (bp)")
    ax.set_ylabel("# of genes")
コード例 #12
0
def plot_association(data, key, name, color):
    apply_global_settings(titlepad=10)

    prom_data = data.sort_values(key)

    colors = plt.get_cmap('tab10')

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 8))
    ax1.scatter(prom_data[key], np.arange(len(prom_data)), s=10, color=color)
    ax1.set_xlim(-7, 7)
    ax1.set_ylim(0, len(prom_data))
    ax1.axvline(0, color='black', linestyle='solid', linewidth=1.5, zorder=1)
    ax1.set_yticks([])
    ax1.set_xlabel('$\\Delta$ %s z-score' % (name[0:1].upper() + name[1:]))
    ax1.axvline(1, color='red', linestyle='solid', linewidth=1.5, zorder=1, alpha=0.5)

    num_q = 10
    x = np.arange(len(prom_data))
    n_q = len(prom_data)/num_q

    for q in range(num_q):
        
        anti = prom_data['120.0_antisense_x_logfold'][(q*n_q):((q+1)*n_q)]
        y_center = n_q*q+n_q/2.

        plot_violin(anti, ax=ax2, bw=0.3, arange=(-10, 10, 0.05), 
                    y_offset=y_center, mult=500., color='#c3abdb')

        plt.plot(np.median(anti), y_center, markersize=7, 
            color='white', marker='D', zorder=4)
        plt.plot(np.median(anti), y_center, markersize=7, 
            color='black', fillstyle='none', marker='D', zorder=4)
        
    ax2.set_ylim(0, len(prom_data))
    ax2.set_xlim(-7, 7)
    ax2.set_xlabel('Log$_2 $ fold-change antisense transcripts')

    ax2.axvline(0, color='black', linestyle='solid', linewidth=1.5, zorder=2)
    ax2.set_yticks([])
    plt.suptitle('Antisense %s\n0-120 min' % name, fontsize=16)

    ax1.set_title("Sorted $\Delta$ %s" % name)
    ax2.set_title("Log$_2 $ fold-change Antisense transcripts")
コード例 #13
0
def main():

    print_fl("*********************")
    print_fl("* 5    Figures     *")
    print_fl("*********************")

    print_preamble()

    plot_utils.apply_global_settings()

    print_fl("\n------- Typhoon ----------\n")
    typhoon_plots()

    print_fl("\n------- Line/Cross Plots ----------\n")
    summary_plots()

    print_fl("\n------- Locus plots ----------\n")
    locus_plots()

    print_fl("\n------- GO Plots ----------\n")
    go_bar_plots()

    print_fl("\n------- Heatmap Plots ----------\n")
    plot_heatmaps()

    print_fl("\n------- Regression Plots ----------\n")
    regression_plots()

    print_fl("\n------- Antisense Plots ----------\n")
    antisense_plots()

    print_fl("\n------- TF Plots ----------\n")
    tf_plots()

    print_fl("\n------- Other ----------\n")
    misc_plots()

    print_fl("\n------- Entropy ----------\n")
    entropy_examples()

    print_fl("\n--------- Shift -----------\n")
    shift_plots()
コード例 #14
0
ファイル: typhoon.py プロジェクト: HarteminkLab/cadmium-paper
def draw_example_mnase_seq(plotter, save_dir):

    from src.chromatin import filter_mnase

    apply_global_settings(linewidth=1.75)
    plotter.linewidth = 1
    span = (124380, 125380)
    data = filter_mnase(plotter.all_mnase_data,
                        span[0],
                        span[1],
                        chrom=2,
                        time=0)

    fig, (ax, leg_ax) = plt.subplots(2, 1, figsize=(5, 4))
    fig.tight_layout(rect=[0.1, 0.1, 0.95, 0.945])
    plt.subplots_adjust(hspace=0.0, wspace=0.5)

    plotter.set_span_chrom(span, 2)
    plotter.plot_typhoon_time(ax, data, 0, scale_z=True)
    ax.set_xlim(*span)
    ax.set_xticks(np.arange(span[0], span[1], 500))
    ax.set_xticks(np.arange(span[0], span[1], 100), minor=True)

    ax.set_yticks(np.arange(0, 250, 100))
    ax.set_yticks(np.arange(0, 250, 50), minor=True)

    ax.tick_params(axis='y', labelsize=11.5, zorder=20)

    ax.set_xlabel("Position (bp)", fontsize=16)
    ax.set_ylabel("Fragment length (bp)", fontsize=16, labelpad=7)

    draw_legend(leg_ax, span, 500)

    write_path = '%s/%s.pdf' % (save_dir, 'example_mnase_seq')
    plt.savefig(write_path, transparent=True)

    plotter.linewidth = 2.5
    apply_global_settings()
コード例 #15
0
def plot_gene_tfs_hm(tf, small_peaks, datastore):

    from config import times
    time = 120.0

    # select orfs with the tf bound
    selected_peaks = small_peaks.all_motifs[small_peaks.all_motifs.tf == tf]
    selected_orfs = small_peaks.all_motifs[small_peaks.all_motifs.tf == tf].orf.values

    # load the transcription data
    xrate = datastore.transcript_rate_logfold[times]\
        .loc[selected_orfs]

    # load the linked peaks
    peaks = small_peaks.linked_peaks_normalized
    peaks = peaks.loc[selected_peaks.peak.values]
    peaks_diff = difference(peaks)
    peaks_diff *= 100.0

    plot_data = peaks_diff.join(selected_peaks.set_index('peak')[['orf']])\
        .reset_index()
    plot_data = plot_data.groupby('orf').mean()

    plot_data = plot_data[times].join(xrate[times], rsuffix='_logfold_TPM', 
        lsuffix='_sm_occ').reset_index().groupby('index').mean()
    
    plot_data = plot_data.sort_values('120.0_sm_occ', ascending=False)
    names = plot_data.join(small_peaks.all_orfs[['name']])['name']
    names = [n for n in names]

    apply_global_settings()

    plt.figure(figsize=(8, 18))
    plt.imshow(plot_data, aspect=20./len(plot_data), vmin=-10, vmax=10, cmap='RdBu_r')
    plt.yticks(np.arange(len(plot_data)), names)
    plt.xticks([3, 9], ['Bin occupancy', 'Transcription'])
コード例 #16
0
def plot_antisense_vs_sense(antisense_logfold_TPM, sense_logfold_rate, time, highlight=[]):
    
    antisense_logfold_TPM = antisense_logfold_TPM.loc[sense_logfold_rate.index]

    apply_global_settings()

    sense_data = sense_logfold_rate[time]
    anti_data = antisense_logfold_TPM[time]

    ax = plot_distribution(sense_data, anti_data, 
        "log$_2$ fold-change Sense transcription rate", 
        "log$_2$ fold-change Antisense transcripts", 
        highlight=highlight,
        xlim=(-8, 8), xstep=2,
        ylim=(-8, 8), ystep=2,
        pearson=False, aux_lw=1.5,
        plot_minor=False,
        title="Sense vs antisense\ntranscription, 0-%.0f min" % time)

    for x in [-2, 2]:
        ax.axvline(x, linewidth=2, color='#505050', zorder=98)
        ax.axhline(x, linewidth=2, color='#505050', zorder=98)

        ax.axvline(x, linestyle='solid', color='#505050', linewidth=2.5, zorder=98)
コード例 #17
0
def plot_tf_summary(small_peaks, head=None, tail=None):

    summ_dif = small_peaks.tf_mean_means

    if head is not None:
        summ_dif = summ_dif.tail(head) # sorted descending
    elif tail is not None:
        summ_dif = summ_dif.head(tail) # sorted descending

    summ_dif = summ_dif.reset_index().rename(columns={'index': 'name'})

    x = summ_dif.index.values

    subset = head is not None or tail is not None

    if subset:
        apply_global_settings()

        if head is not None:
            fig, ax = plt.subplots(1, 1, figsize=(4, 4))
        else:
            fig, ax = plt.subplots(1, 1, figsize=(3, 4))

        fig.tight_layout(rect=[0.15, 0.1, 0.99, 0.9])
        lw = 7

        ax.set_xticks(x)
        ax.set_xticklabels(summ_dif['name'].str.title(), rotation=90, 
            ha='center', va='top')
        ax.tick_params(axis='x', length=0, pad=4, labelsize=13.5)
        ax.set_yticks(np.arange(-0.6, 0.6, 0.1))

        if head is None:
            ax.set_ylabel("Log$_2$ fold-change\naverage occupancy", fontsize=14)
        else:
            ax.set_yticks([])

    else:
        apply_global_settings(linewidth=4, titlepad=80)
        fig, ax = plt.subplots(1, 1, figsize=(16, 10))
        fig.tight_layout(rect=[0.1, 0.1, 0.99, 0.75])
        lw = 7
        ax.set_title("Transcription factor binding\noccupancy dynamics, "
            "0-120 min", fontsize=50)
        ax.tick_params(axis='y', length=10, pad=5, labelsize=22)

        ax.set_ylabel("Log$_2$ fold-change\nin average occupancy", fontsize=30)

        ax.set_xticks([])
        ax.set_yticks(np.arange(-0.6, 0.6, 0.1))

    ax.set_xlim(-0.75, len(x)-1+0.75)

    plot_key = 'mean'

    if subset:
        for x in np.arange(0, len(summ_dif)):
            ax.plot([x, x], [-10, 0], lw=1.5, linestyle='solid', color='#f9f9f9')

    for idx, row in summ_dif.iterrows():
        ax.plot([idx, idx], [0, row.loc[plot_key]], c='#c0c0c0', lw=lw, 
            solid_capstyle='butt')
        
    # high
    filtered = summ_dif[summ_dif['name'].isin(small_peaks.selected_high_tfs.index)]
    for idx, row in filtered.iterrows():
        if subset:
            # ax.axvline(idx, lw=1.5, linestyle='solid', color=red(0.075))
            ax.plot([idx, idx], [-10, 0], lw=1.5, linestyle='solid', color=red(0.075))
        ax.plot([idx, idx], [0, row.loc[plot_key]], c=red(), lw=lw, solid_capstyle='butt')    

    for ticklabel in ax.get_xticklabels():
        if ticklabel.get_text() in filtered['name'].str.title().values:
            ticklabel.set_color(red())

    # low
    filtered = summ_dif[summ_dif['name'].isin(small_peaks.selected_low_tfs.index)]
    for idx, row in filtered.iterrows():
        if subset:
            # ax.axvline(idx, lw=1.5, linestyle='solid', color=blue(0.1))
            ax.plot([idx, idx], [-10, 0], lw=1.5, linestyle='solid', color=blue(0.1))
        ax.plot([idx, idx], [0, row.loc[plot_key]], c=blue(), lw=lw, solid_capstyle='butt')    

    for ticklabel in ax.get_xticklabels():
        if ticklabel.get_text() in filtered['name'].str.title().values:
            ticklabel.set_color(blue())

    if not subset:
        high_n, low_n = small_peaks.view_high, small_peaks.view_low

        plot_rect(ax, -0.5, -1, low_n, 2, color='#f0f0f0', zorder=0)
        plot_rect(ax, len(summ_dif)-high_n+.5, -1, 20, 2, color='#f0f0f0', zorder=0)

    ax.set_ylim(-0.25, 0.25)
    ax.axhline(0, linewidth=2, color='black')
コード例 #18
0
def plot_tf_scatter(small_peaks, tf_name=None, tf_names=None, t0=0.0, t1=120.0,
    no_annotations=False, labeled_peaks=None, dpi=300):
    
    apply_global_settings(dpi=dpi)

    linked_peaks = small_peaks.linked_peaks_normalized
    all_motifs = small_peaks.all_motifs

    plot_data = linked_peaks\
        .loc[small_peaks.prom_peaks['name']].copy()

    fig, ax = plt.subplots(1, 1, figsize=(6.5, 6.5))
    fig.tight_layout(rect=[0.1, 0.1, 0.9, 0.9])

    x = plot_data[t0]
    y = plot_data[t1]

    def plot_line(ax, line):
        x = np.array([0, 1])
        m, b = line
        y = x*m + b
        ax.plot(x, y, c='gray', linestyle='dashed', linewidth=1)

    plot_line(ax, (1, 0))

    if tf_name is not None and tf_names is None:
        tf_names = [tf_name]

    if tf_names is None:
        ax.scatter(x, y, s=1, c='#b0b0b0')
 
        if not no_annotations:

            high_peaks, low_peaks = get_threshold_peaks(small_peaks, 
                plot_data, t0, t1)

            sc1 = ax.scatter(plot_data[plot_data.index.isin(high_peaks)][t0], 
                       plot_data[plot_data.index.isin(high_peaks)][t1], s=20, 
                       color=red(), marker='D', linewidth=1,
                       facecolor='none',)
                
            sc2 = ax.scatter(plot_data[plot_data.index.isin(low_peaks)][t0], 
                       plot_data[plot_data.index.isin(low_peaks)][t1], s=20, 
                       color=blue(), marker='o', linewidth=1,
                       facecolor='none')

            plt.legend([sc1, sc2], 
                ['Increased, N=%d' % len(high_peaks),
                 'Decreased, N=%d' % len(low_peaks)])

            plot_threshold_line(ax, 1)

    else:
        ax.scatter(x, y, s=1, c='#d0d0d0')

        i = 0
        markers = ['o', 'x']
        selected_sc = []
        labels = []
        colors = [parula()(0.5), parula()(0.0)]

        sizes = 0
        for tf_name in tf_names:

            color = colors[i]
            sel_peaks = linked_peaks.loc[all_motifs[all_motifs.tf == tf_name].peak]
            sc = ax.scatter(sel_peaks[t0], sel_peaks[t1],
                color=color, marker=markers[i])

            selected_sc.append(sc)
            labels.append("%s, N=%d" % (tf_name.title(), len(sel_peaks)))

            i += 1

        plt.legend(selected_sc, labels)

        plot_threshold_line(ax, small_peaks.fc_threshold)

    if labeled_peaks is not None:

        labeled_peaks = plot_data.join(labeled_peaks, how='inner')

        for idx, p in labeled_peaks.iterrows():
            ax.text(p.loc[t0], p.loc[t1]+0.005, p['name'],
                    ha='center', va='center', fontsize=13, fontdict={'style':'italic'})

    ax.set_xlim(0., 0.12)
    ax.set_ylim(0., 0.12)
    ax.set_xlabel('Peak occupancy, 0 min')
    ax.set_ylabel('Peak occupancy, %s min' % (str(t1)))

    if tf_names is not None:
        tf_names = [tf.title() for tf in tf_names]
        ax.set_title("%s change in promoter small fragment\npeaks, 0-%.0f min" % 
                     ("/".join(tf_names), t1), fontsize=20)
        
    else:
        ax.set_title("Change in promoter small fragment\npeaks, 0-%s min, N=%d" % 
                     (str(t1), len(x)), fontsize=20)
    
    return fig, ax
コード例 #19
0
def plot_entropy_example(plotter, orf, plot_span, title):

    from src.chromatin import filter_mnase
    from src.utils import get_orf
    from src.reference_data import all_orfs_TSS_PAS
    import matplotlib.pyplot as plt

    span = (orf.TSS - 1000, orf.TSS + 1000)
    data = filter_mnase(plotter.all_mnase_data,
                        span[0],
                        span[1],
                        chrom=orf.chr,
                        time=120)
    data['orf_name'] = orf.name
    data.mid = data.mid - orf.TSS

    from src.kernel_fitter import compute_triple_kernel
    from src.cross_correlation_kernel import MNaseSeqDensityKernel

    nuc_kernel = MNaseSeqDensityKernel(filepath=nuc_kernel_path)
    sm_kernel = MNaseSeqDensityKernel(filepath=sm_kernel_path)
    triple_kernel = compute_triple_kernel(nuc_kernel)

    from src.transformations import exhaustive_counts
    from src.cross_correlation import compute_cross_correlation_metrics

    win_2 = 1000
    cur_wide_counts_df = exhaustive_counts((-win_2, win_2), (0, 250),
                                           'mid',
                                           'length',
                                           parent_keys=['orf_name', 'time'],
                                           data=data,
                                           returns='wide',
                                           log=False)

    cur_cc = compute_cross_correlation_metrics(cur_wide_counts_df,
                                               nuc_kernel,
                                               sm_kernel,
                                               triple_kernel,
                                               times=[120.0])

    triple_cc = cur_cc.loc['triple'].loc[orf.name].loc[120]

    from src.entropy import calc_entropy
    from src.plot_utils import apply_global_settings

    apply_global_settings()

    triple_cc_values = triple_cc[np.arange(plot_span[0],
                                           plot_span[0] + 150)].values
    value = calc_entropy(triple_cc_values)

    fig, ax = plt.subplots(1, 1, figsize=(1.5, 2.5))
    fig.tight_layout(rect=[0.0, 0.0, 1, 0.8])
    plt.subplots_adjust(hspace=0.0, wspace=0.5)

    plotter.set_span_chrom(plot_span, orf.chr)
    plotter.plot_typhoon_time(ax, data, 120, scale_z=True)
    ax.set_xlim(*plot_span)

    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xticks([], minor=True)
    ax.set_yticks([], minor=True)

    ax.tick_params(axis='x', length=0, pad=0)
    ax.tick_params(axis='y', length=0, pad=0)

    x = triple_cc.index.values.astype(int)
    y = triple_cc.values.astype(float)
    ax.fill_between(x, y, color='#28a098')

    ax.set_title("%s\n%.1f bits" % (title, value))
コード例 #20
0
ファイル: typhoon.py プロジェクト: HarteminkLab/cadmium-paper
def draw_example_rna_seq(plotter, save_dir):

    from src.rna_seq_plotter import get_strand_colors

    apply_global_settings(linewidth=2.5)

    span = 252000, 255500
    rna_plotter = plotter.rna_seq_plotter
    orf_plotter = plotter.orfs_plotter
    orfs = plotter.orfs

    rna_plotter.set_span_chrom(span, 6)
    orf_plotter.set_span_chrom(span, 6)

    fig = plt.figure(figsize=(7, 6))
    grid_size = (4, 4)
    orf_ax = plt.subplot2grid(grid_size, (0, 0), colspan=4, rowspan=1)
    ax = plt.subplot2grid(grid_size, (1, 0), colspan=4, rowspan=1)
    leg_ax = plt.subplot2grid(grid_size, (2, 0), colspan=4, rowspan=2)

    fig.tight_layout(rect=[0.05, 0.03, 0.95, 0.945])
    plt.subplots_adjust(hspace=0.25, wspace=0.5)

    custom_orfs = orfs[orfs.name.isin(['RPN12', 'HXK1'])]
    custom_orfs = custom_orfs.reset_index(drop=True)
    custom_orfs['orf_name'] = ''

    orf_plotter.plot_orf_annotations(orf_ax,
                                     orf_classes=['Verified'],
                                     custom_orfs=custom_orfs,
                                     should_auto_offset=False)
    rna_plotter.plot(ax=ax)
    orf_ax.set_ylim(-60, 60)
    ax.set_xlabel('Position (bp)', fontsize=24)

    offset = 390
    column_spacing = 750
    line_len = 400
    strand_spacing = 1800
    txt_space = 50

    color_maps = list(reversed(get_strand_colors()))
    times = rna_plotter.times

    strands = 'Watson', 'Crick'

    y_start = 2

    ax.tick_params(axis='y', labelsize=16, zorder=20)

    for strand_i in range(2):
        time_i = 0
        for column in range(2):
            for y in range(3):

                y_plot = y_start - y

                color = color_maps[strand_i][time_i]
                x_start = offset + strand_i * strand_spacing + column * column_spacing
                x_end = offset + line_len + strand_i * strand_spacing + column * column_spacing
                leg_ax.plot([x_start, x_end], [y_plot, y_plot],
                            lw=4,
                            color=color)
                leg_ax.text(x_start - txt_space,
                            y_plot,
                            "%s'" % str(times[time_i]),
                            ha='right',
                            va='center',
                            fontdict={
                                'fontname': 'Open Sans',
                                'fontweight': 'regular'
                            },
                            fontsize=14)
                time_i += 1
        leg_ax.text(offset + strand_i * strand_spacing + strand_spacing / 4.,
                    2.8,
                    strands[strand_i],
                    ha='center',
                    va='bottom',
                    fontdict={
                        'fontname': 'Open Sans',
                        'fontweight': 'regular'
                    },
                    fontsize=16)

    span_width = span[1] - span[0]

    leg_ax.plot([20, 520], [6, 6],
                lw=24,
                color='#707070',
                solid_capstyle='butt')

    leg_ax.text(50,
                6,
                '500 nt',
                ha='left',
                va='center',
                color='white',
                fontdict={
                    'fontname': 'Open Sans',
                    'fontweight': 'regular'
                },
                fontsize=16)

    leg_ax.set_xlim(0, span_width)
    leg_ax.set_ylim(-3, 7)
    leg_ax.axis('off')
    plt.savefig('%s/example_rna_seq.pdf' % save_dir, transparent=True)

    apply_global_settings()
コード例 #21
0
def plot_antisense_calling(gene_name, rna_seq_pileup):

    from src.rna_seq_plotter import get_smoothing_kernel
    from src.plot_utils import apply_global_settings
    from src.utils import get_orf
    from src.transcription import filter_rna_seq
    from src.transcription import filter_rna_seq_pileup
    from src.transcript_boundaries import load_park_boundaries
    from src.plot_orf_annotations import ORFAnnotationPlotter
    from config import paper_orfs
    from src.reference_data import read_sgd_orfs, read_park_TSS_PAS
    from src.datasets import read_orfs_data

    all_orfs = read_sgd_orfs()
    all_orfs = all_orfs.join(read_park_TSS_PAS()[['TSS', 'PAS']])

    orfs_plotter = ORFAnnotationPlotter(orfs=all_orfs)
    
    antisense_boundaries = read_orfs_data('%s/antisense_boundaries_computed.csv' % rna_dir)

    park_boundaries = load_park_boundaries()
    park_boundaries = park_boundaries.join(paper_orfs[['name']])

    orf = get_orf(gene_name, park_boundaries)

    search_2 = 1000
    span = orf.transcript_start-search_2, orf.transcript_stop+search_2
    gene_pileup = filter_rna_seq_pileup(rna_seq_pileup, 
    span[0], span[1], orf.chr)

    plot_span = span
    gene = orf
    gene_rna_seq = gene_pileup

    apply_global_settings(30)

    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(6, 5.))
    fig.tight_layout(rect=[0.1, 0, 1, 0.85])

    orfs_plotter.set_span_chrom(plot_span, gene.chr)
    orfs_plotter.plot_orf_annotations(ax1)

    sense_data = gene_rna_seq[gene_rna_seq.strand == '+']
    antisense_data = gene_rna_seq[gene_rna_seq.strand == '-']
    sense_data = np.log2(sense_data.groupby('position').sum()+1).pileup
    antisense_data = np.log2(antisense_data.groupby('position').sum()+1).pileup

    smooth_kernel = get_smoothing_kernel(100, 20)

    sense_strand = '+' if gene.strand == '+' else '-'
    antisense_strand = '+' if sense_strand == '-' else '-'

    x = sense_data.index
    sense_data = np.convolve(sense_data, smooth_kernel, mode='same')
    antisense_data = np.convolve(antisense_data, smooth_kernel, mode='same')

    ax2.plot(x, sense_data, color=plt.get_cmap('Blues')(0.5))
    ax2.plot(x, -antisense_data, color=plt.get_cmap('Reds')(0.5))
    ax2.set_xlim(*plot_span)
    ax2.set_ylim(-15, 15)
    ax2.axhline(0, color='black')

    if gene.name in antisense_boundaries.index:
        anti_gene = antisense_boundaries.loc[gene.name]
        
        y_plot = 0, 20 if gene.strand == '-' else -20, 0
        
        ax2.plot([anti_gene.start, anti_gene.start],
                [y_plot[0], y_plot[1]], color='red', linewidth=2.5, solid_capstyle='butt')
        ax2.plot([anti_gene.stop, anti_gene.stop],
                [y_plot[0], y_plot[1]], color='red', linewidth=2.5, solid_capstyle='butt')

    ax2.set_xticks(np.arange(plot_span[0], plot_span[1], 500))
    ax2.set_xticklabels([])
    _ = ax2.set_xticks(np.arange(plot_span[0], plot_span[1], 100), minor=True)

    ax2.tick_params(labelsize=14)
    ax2.set_ylabel("Sum log$_2$ (pileup+1)", fontsize=15)
    ax2.set_xlabel("Position (bp)", fontsize=15)

    ax1.set_title("Calling antisense transcripts", fontsize=26)

    ax2.axvline(383344)
    ax2.axvline(384114)
コード例 #22
0
def plot_bar_counts(antisense_TPM_logfold, transcript_rate_logfold,
        time=120.0):
    """
    Plot the number of genes that lie in each antisense sense bucket for the
    given time
    """

    data = antisense_TPM_logfold.join(transcript_rate_logfold, 
        lsuffix='_antisense_x_logfold',
        rsuffix='_xrate',
        how='inner')

    apply_global_settings(titlepad=20)

    time_str = str(time)

    # to calculate inclusive/exclusive values correctly
    epsilon = 1e-10

    spans = [
        (float('-inf'), -2-epsilon),
        (-2-epsilon, 2+epsilon),
        (2+epsilon, float('inf')),
    ]

    names = [
        'Decreased, <-2',
        'Unchanged, [-2, 2]',
        'Increased, >2'
    ]

    blues = plt.get_cmap('Blues')
    reds = plt.get_cmap('Reds')
    grays = plt.get_cmap('Greys')

    colors = [
        blues(0.35), 
        blues(0.5), 
        blues(0.65), 

        grays(0.35),
        grays(0.5),
        grays(0.65),

        reds(0.35),
        reds(0.5),
        reds(0.65)
    ]

    facecolors = [
        blues(0.35), 
        blues(0.25), 
        blues(0.65), 

        grays(0.35),
        grays(0.25),
        grays(0.65),

        reds(0.35),
        reds(0.25),
        reds(0.65)
    ]

    fig, ax = plt.subplots(figsize=(7, 4.5))
    fig.tight_layout(rect=[0.1, 0.1, 0.75, 0.85])

    i = 0
    sense_i = 0
    ticks = []
    for span_sense in spans:
        anti_i = 0
        for span_antisense in spans:

            sense_k = '%s_xrate' % time_str
            anti_k = '%s_antisense_x_logfold' % time_str
            
            selected = data[(data[sense_k] >= span_sense[0]) & 
                            (data[sense_k] <  span_sense[1]) & 
                            (data[anti_k] >= span_antisense[0]) & 
                            (data[anti_k] <  span_antisense[1])]

            label = None
            if sense_i == 1: label = names[anti_i]

            x = sense_i*3 + anti_i*0.75 - 0.75
            
            y = len(selected)
            plot_y = y
            
            if plot_y > 1250:
                plot_y = 1275

            ax.text(x, plot_y+20, int(y), ha='center') 
            color = colors[i]
            ax.bar(x, plot_y, color=color, label=label, width=.5, 
                  facecolor=facecolors[i], linewidth=2,
                  edgecolor=color,
                   hatch='\\\\',
                  )
            
            i+= 1

            if anti_i == 1:
                ticks.append(x)

            anti_i += 1

        sense_i += 1

    ax.set_xticks(ticks)
    ax.set_xticklabels(names, rotation=0, ha='center')
    ax.tick_params(axis='x', length=0, pad=10)
    ax.set_title('')
    ax.set_ylim(0, 1400)
    ax.set_ylabel('# of genes', labelpad=0)
    ax.set_xlabel('Sense transcription', labelpad=10)
    yticks = np.arange(0, 1400, 200)
    ax.set_yticks(yticks)
    yticklabels = [str(y) for y in yticks]
    yticklabels = yticklabels[:-1] + [('>' + yticklabels[-1])]
    ax.set_yticklabels(yticklabels)
    ax.legend(loc=2, title='Antisense transcripts',
     bbox_to_anchor=(1.0, 1.0), frameon=False)

    for i in range(2):
        ax.axvline(i*3+1.5, color='#F0F0F0', lw=2)

    ax.set_title("Frequency of sense and\nantisense transcription, 0-120 min", fontsize=18)

    ax.plot([2.67, 3.305], [1115, 1185], lw=4, color='white')
    ax.plot([2.67, 3.305], [1100, 1170], lw=2, color=grays(0.5))
    ax.plot([2.67, 3.305], [1130, 1200], lw=2, color=grays(0.5))
コード例 #23
0
def plot_tf_heatmap(small_peaks, lim=5, is_high=True):

    apply_global_settings(titlepad=15)

    fig, ax = plt.subplots(1, 1, figsize=(5, 4))
    fig.tight_layout(rect=[0.1, 0.1, 0.9, 0.9])
    
    datastore = small_peaks.datastore

    if is_high:
        selected_tfs = small_peaks.tf_mean_means.tail(small_peaks.view_high)
        highlighted = small_peaks.selected_high_tfs.index.values
    else:
        selected_tfs = small_peaks.tf_mean_means.head(small_peaks.view_low)
        highlighted = small_peaks.selected_low_tfs.index.values

    all_xrates = datastore.transcript_rate_logfold

    # select which orfs
    all_motifs = small_peaks.all_motifs
    selected = small_peaks.tf_set[(small_peaks.tf_set.index.isin(selected_tfs.index))]

    selected = selected[::-1]

    # collect the regulon for the TF
    regulon_xrate = small_peaks.mean_regulon_xrate\
        .loc[selected.index.values]

    # transcription rate of the TF
    xrate = all_xrates.loc[selected.orf_name]

    # average occupancy of peaks (scale to similar values to xrates)
    bins = small_peaks.tf_means_df.loc[selected.index]*small_peaks.bin_scale

    zeros = np.zeros((len(bins), 1))
    data = np.concatenate([xrate.values, zeros, bins.values, 
                           zeros, regulon_xrate.values], axis=1)

    ax.imshow(data, vmin=-small_peaks.im_scale, vmax=small_peaks.im_scale, 
        cmap='RdBu_r', origin='lower',
        extent=[0, data.shape[1], 0, data.shape[0]], aspect=15./data.shape[1])

    ax.set_xlim(-0.1, data.shape[1]+0.1)
    ax.set_ylim(-0.1, data.shape[0]+0.1)

    hide_spines(ax)

    tfs = [n.title() for n in selected.index]
    ax.set_yticks(np.arange(len(selected))+0.5)
    ax.set_yticklabels(tfs)

    ax.set_xticks([3, 10, 17])
    ax.set_xticklabels(['Transcription', 'Binding\noccupancy', 'Regulon\ntranscription'])

    ax.tick_params(axis='y', length=0, pad=2, labelsize=10)
    ax.tick_params(axis='x', length=0, pad=4, labelsize=11.5)

    title_prefix = "increased" if is_high else "decreased"

    ax.set_title("Transcription factors\nwith %s occupancy" % title_prefix, 
        fontsize=16)

    for x in [6, 13]:
        plot_rect(ax, x, 0, 1, len(data), color='white', fill=True,
            joinstyle='miter')

    for x in [0, 7, 14]:
        plot_rect(ax, x, 0, 6, len(data), edgecolor='black', lw=2, fill=False,
            joinstyle='miter')

    if is_high: color = red()
    else: color = blue()

    for ticklabel in ax.get_yticklabels():
        if ticklabel.get_text().upper() in highlighted:
            ticklabel.set_color(color)
コード例 #24
0
    def plot_bar(self, activated_genes=True, title=None):

        if not activated_genes:
            title_cat = 'decrease'
        else:
            title_cat = 'increase'

        if title is None:
            title = ("Greatest %s in various\nchromatin scores, N=300" % title_cat)

        plot_utils.apply_global_settings(30)

        # df = self.collect_counts()
        df = self.terms_res

        df = df[['Promoter small fragments', 'Nucleosome disorganization', 'Combined chromatin']]
        df = df[df.sum(axis=1) > 0]

        sorted_idx = df.max(axis=1).sort_values(ascending=True).index
        df = df.loc[sorted_idx]

        df = df.tail(8)

        prom_sm_vals = df['Promoter small fragments'].values
        disog_vals = df['Nucleosome disorganization'].values
        both = df['Combined chromatin'].values

        y = np.arange(len(prom_sm_vals)) 
        height = 0.225
        spacing = 0.05

        fig, ax = plt.subplots(figsize=(11, 14))
        fig.tight_layout(rect=[0.35, 0.15, 0.90, 0.87])
        fig.patch.set_alpha(0.0)

        if activated_genes:
            reds = plt.get_cmap('Reds')
            colors = [reds(0.5),reds(0.25), reds(0.8)]
            edgecolors = [reds(0.8), reds(0.6), reds(0.8),]
        else:
            blues = plt.get_cmap('Blues')
            colors = [blues(0.5),blues(0.3), blues(0.9)]
            edgecolors = [blues(0.9),blues(0.7), blues(0.9)]

        prom_y = y + (height+spacing)
        dis_y = y
        both_y = y - (height+spacing)

        rects1 = ax.barh(prom_y, prom_sm_vals, height, 
            label='Small fragment occupancy', 
            color=colors[0], 
            alpha=1)

        for i in range(len(dis_y)):
            ax.barh(y=dis_y[i], width=disog_vals[i], height=height, 
                label='Nucleosome\ndisorganization' if i == 0 else None, 
                color=colors[2])

        for i in range(len(both_y)):
            ax.barh(both_y[i], both[i], height, 
                label='Combined' if i == 0 else None, 
                color=colors[1],
                facecolor=colors[1],
                edgecolor=edgecolors[1],
                hatch='\\\\',
                 alpha=1,
                 linewidth=2)

        group_vals = [prom_sm_vals, disog_vals, both]
        group_ys = [prom_y, dis_y, both_y]

        # determine scale to offset labels
        max_val = df.max().max()
        inc = max_val / 100.

        for g in range(3):
            vals = group_vals[g]
            ys = group_ys[g]
            for i in range(len(vals)):
                val = vals[i]
                if val > inc:
                    ax.text(val + inc, ys[i], ("10$^{-%0.1f}$" % val), 
                        va='center', fontsize=14, 
                        fontdict={'family':'Open Sans'})

        # Add some text for labels, title and custom x-axis tick labels, etc.
        ax.legend(loc=4, bbox_to_anchor=(0.5, -0.25),
         frameon=False, fontsize=18)
        ax.set_yticks(np.arange(len(df)))

        for y in np.arange(1, len(prom_sm_vals)):
            ax.axhline(y=(y-0.5), color='#D0D0D0', linewidth=1)

        terms = df.index.values
        terms = [t[0:1].upper() + t[1:] for t in terms]

        new_terms = []
        for t in terms:

            if t == "Maturation of SSU-rRNA from tricistronic rRNA transcript (SSU-rRNA, 5.8S rRNA, LSU-rRNA)":
                t = "Maturation of SSU-rRNA"

            t_spl = t.split(' ')

            if len(t) > 60: 
                new_terms.append(
                    ' '.join(t_spl[:3]) + '\n' + 
                    ' '.join(t_spl[3:7]) + '\n' + 
                    ' '.join(t_spl[7:]))
            elif len(t) > 30: 
                new_terms.append(' '.join(t_spl[:2]) + '\n' + ' '.join(t_spl[2:]))
            else: new_terms.append(t)
        terms = new_terms

        ax.set_yticklabels(terms)

        plot_utils.format_ticks_font(ax)
        plot_utils.format_ticks_font(ax, which='y', fontsize=12)

        max_fdr = self.terms_res.max().max()

        if activated_genes:
            ticks = np.arange(0, 6, 1)
        else:    
            ticks = np.arange(0, 100, 20)
            ax.set_xlim(0, round(max_fdr+15))

        ax.set_xticks(ticks)
        ax.set_title(title, fontsize=30)
        ax.set_xticklabels(-ticks)
        ax.set_xlabel('log$_{10}$ FDR', fontsize=20)

        ax.tick_params(axis='y', labelsize=18, length=0, pad=20)
        ax.tick_params(axis='x', labelsize=16, pad=10)
コード例 #25
0
ファイル: typhoon.py プロジェクト: HarteminkLab/cadmium-paper
    def plot_figure_setup(self):
        """
        Setup figure for time series subplots with connecting plots between
        """

        # configuration
        times = self.times
        n = len(times)

        titlepad = 10
        self.linewidth = 2

        plot_utils.apply_global_settings(titlepad=titlepad,
                                         linewidth=self.linewidth,
                                         dpi=self.dpi)

        figwidth = self.figwidth
        show_rna = self.show_rna
        show_orfs = self.show_orfs
        figsize = (figwidth, None)
        plot_span = self.span

        add_rows = int(show_orfs + show_rna)
        nrows = n + add_rows
        grid_size = (nrows * 3 - 1, 1)

        # default fig width and grid height
        if figsize is None:
            figsize = (23, grid_size[0])

        # set fig height to the grid height
        elif figsize[1] is None:
            figsize = (figsize[0], grid_size[0])

        fig = plt.figure(figsize=figsize)

        ax0 = plt.subplot2grid(grid_size, (0, 0), colspan=4, rowspan=2)
        ax0.set_xlim(*plot_span)

        time_axes = []
        tween_axes = []

        rna_ax, orf_ax = None, None

        if show_orfs: orf_ax = ax0
        else: time_axes.append(ax0)

        for i in range(0, nrows - 1):

            y = 2 + i * 3

            tween_ax = plt.subplot2grid(grid_size, (y, 0),
                                        colspan=4,
                                        rowspan=1,
                                        zorder=0)
            time_ax = plt.subplot2grid(grid_size, (y + 1, 0),
                                       colspan=4,
                                       rowspan=2,
                                       zorder=0.1)

            tween_ax.set_xlim(plot_span[0], plot_span[1])
            tween_ax.set_ylim(0, 10)

            time_ax.set_xlim(plot_span[0], plot_span[1])
            time_ax.set_ylim(0, 250)

            tween_ax.axis('off')
            tween_ax.xaxis.set_visible(False)

            if i == 0 and show_rna:
                rna_ax = time_ax
                leg_ax = tween_ax
            else:
                # between time subplots
                if i > 1 or not show_rna: tween_axes.append(tween_ax)

                # time subplot
                time_axes.append(time_ax)

        if True:
            draw_legend(leg_ax, plot_span)

        # more padding for title for smaller plots
        if n == 3:
            fig.tight_layout(rect=[0.075, 0.1, 0.95, 0.93])
        else:
            fig.tight_layout(rect=[0.075, 0.1, 0.95, 0.945])

        plt.subplots_adjust(hspace=0.0, wspace=0.5)

        time_axes[-1].set_xlabel("Position (bp)", fontsize=24)

        if len(time_axes) > 2:
            label_idx = max(len(time_axes) / 2 - 1, 1)
            time_axes[label_idx].set_ylabel("Fragment length (bp)",
                                            fontsize=24,
                                            labelpad=10)

        return fig, time_axes, tween_axes, orf_ax, rna_ax
コード例 #26
0
def plot_distribution(x_data,
                      y_data,
                      xlabel,
                      ylabel,
                      highlight=[],
                      title=None,
                      xlim=(-2.5, 2.5),
                      ylim=(-6, 10),
                      xstep=2,
                      ystep=2,
                      pearson=True,
                      ha='right',
                      va='bottom',
                      plot_aux='cross',
                      groups={},
                      highlight_format={},
                      aux_lw=1.5,
                      s=5,
                      markersize=53,
                      ax=None,
                      text_offset=None,
                      tight_layout=None,
                      dpi=300,
                      bw=None,
                      plot_lr=False,
                      titlesize=18,
                      xticks=None,
                      yticks=None,
                      plot_minor=True):

    apply_global_settings(10, dpi=dpi)

    plot_default_ax = ax is None
    if ax is None:
        fig = plt.figure(figsize=(6.5, 6.5))

        fig.patch.set_alpha(0.0)

        grid_len = 9
        grid_size = (grid_len, grid_len)

        ax = plt.subplot2grid(grid_size, (1, 0),
                              colspan=grid_len - 1,
                              rowspan=grid_len - 1)
        tax = plt.subplot2grid(grid_size, (0, 0),
                               colspan=grid_len - 1,
                               rowspan=1)
        rax = plt.subplot2grid(grid_size, (1, grid_len - 1),
                               colspan=1,
                               rowspan=grid_len - 1)
    else:
        tax = None
        rax = None

    if len(groups) > 0 and plot_default_ax:
        fig.tight_layout(rect=[0.14, 0.15, 0.9, 0.9])

    if tight_layout is not None:
        fig.tight_layout(rect=tight_layout)

    if plot_default_ax:
        plt.subplots_adjust(hspace=0.05, wspace=0.04)

    if plot_default_ax:

        if bw is None:
            bw = [0.15, 0.15]

        xspan_diff = xlim[1] - xlim[0]
        yspan_diff = xlim[1] - xlim[0]

        y = plot_density(x_data,
                         ax=tax,
                         arange=(xlim[0], xlim[1], xspan_diff * 1e-3),
                         bw=xspan_diff * 1e-2,
                         fill=True,
                         color='#a0a0a0')
        y_max = np.max(y)
        tax.set_xlim(*xlim)
        tax.set_ylim(y_max * -1e-1, y_max * 1.5)

        x = plot_density(y_data,
                         ax=rax,
                         arange=(ylim[0], ylim[1], yspan_diff * 1e-3),
                         bw=yspan_diff * 1e-2,
                         flip=True,
                         fill=True,
                         color='#a0a0a0')
        x_max = np.max(x)
        rax.set_ylim(*ylim)
        rax.set_xlim(x_max * -1e-1, x_max * 1.5)

        hide_spines(rax)
        hide_spines(tax)

    plot_density_scatter(x_data,
                         y_data,
                         s=s,
                         bw=bw,
                         ax=ax,
                         cmap=parula(),
                         alpha=1.,
                         zorder=20)

    plot_rect(ax,
              xlim[0],
              ylim[0],
              xlim[1] - xlim[0],
              ylim[1] - ylim[0],
              'white',
              fill_alpha=0.5,
              zorder=90)

    for group_name, group in groups.items():
        group_orfs = group['orfs']
        group_x = x_data[x_data.index.isin(group_orfs)]
        group_y = y_data[y_data.index.isin(group_orfs)]
        ax.scatter(group_x,
                   group_y,
                   s=53,
                   facecolor='none',
                   color=group['color'],
                   zorder=98,
                   marker='D',
                   linewidth=1.5,
                   label=group_name,
                   rasterized=True)

    for gene_name in highlight:
        orf_name = get_orf_name(gene_name)
        if orf_name not in x_data.index: continue

        selected_x = x_data.loc[orf_name]
        selected_y = y_data.loc[orf_name]

        if selected_x > xlim[1] or selected_x < xlim[0]: continue
        if selected_y > ylim[1] or selected_y < ylim[0]: continue

        marker = 'D'
        color = '#c43323'
        facecolor = 'none'

        if gene_name in highlight_format.keys():

            gene_fmt = highlight_format[gene_name]

            if 'marker' in gene_fmt.keys():
                marker = gene_fmt['marker']

            if 'color' in gene_fmt.keys():
                color = gene_fmt['color']

            if 'filled' in gene_fmt.keys():
                facecolor = color

        ax.scatter(selected_x,
                   selected_y,
                   s=markersize,
                   facecolor=facecolor,
                   color=color,
                   zorder=98,
                   marker=marker,
                   linewidth=1.5)

        if text_offset is None:
            text_offset = (xlim[1] - xlim[0]) * 5e-3

        offsets = text_offset, text_offset

        cur_ha = ha
        cur_va = va

        if gene_name in highlight_format.keys():
            cur_hl_fmt = highlight_format[gene_name]
            cur_ha = cur_hl_fmt['ha'] if 'ha' in cur_hl_fmt.keys() else ha
            cur_va = cur_hl_fmt['va'] if 'va' in cur_hl_fmt.keys() else va

        if cur_ha == 'right':
            offsets = -text_offset, offsets[1]
        elif cur_ha == 'left':
            offsets = text_offset, offsets[1]
        elif cur_ha == 'center':
            offsets = 0, offsets[1]

        if cur_va == 'top':
            offsets = offsets[0], -text_offset
        elif cur_va == 'bottom':
            offsets = offsets[0], text_offset

        text = ax.text(selected_x + offsets[0],
                       selected_y + offsets[1],
                       gene_name,
                       fontdict={
                           'fontname': 'Open Sans',
                           'fontweight': 'regular',
                           'style': 'italic'
                       },
                       fontsize=12,
                       ha=cur_ha,
                       va=cur_va,
                       zorder=99)
        text.set_path_effects([
            path_effects.Stroke(linewidth=3, foreground='white'),
            path_effects.Normal()
        ])

    if xticks is None:
        xticks = xlim[0], xlim[1] + xstep, xstep

    ax.set_xticks(np.arange(*xticks))

    if yticks is None:
        yticks = ylim[0], ylim[1] + ystep, ystep

    ax.set_yticks(np.arange(*yticks))

    if xstep < 5 and plot_minor:
        ax.set_xticks(np.arange(xticks[0], xticks[1], 1), minor=True)

    if ystep < 5 and plot_minor:
        ax.set_yticks(np.arange(yticks[0], yticks[1], 1), minor=True)

    ax.tick_params(axis='x', pad=5, labelsize=15)
    ax.tick_params(axis='y', pad=5, labelsize=15)

    ax.set_xlim(*xlim)
    ax.set_ylim(*ylim)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)

    if len(groups) > 0:
        ax.legend(loc=1,
                  bbox_to_anchor=(0.475, -0.2),
                  frameon=False,
                  fontsize=14)

    if plot_aux == 'cross' or plot_aux == 'both':
        ax.axvline(0,
                   linestyle='solid',
                   color='#505050',
                   linewidth=aux_lw,
                   zorder=97)
        ax.axhline(0,
                   linestyle='solid',
                   color='#505050',
                   linewidth=aux_lw,
                   zorder=97)

    if plot_aux == 'diag' or plot_aux == 'both':
        ax.plot([xlim[0] * 2, xlim[1] * 2], [xlim[0] * 2, xlim[1] * 2],
                linestyle='solid',
                color='#505050',
                linewidth=aux_lw,
                zorder=97)

    if pearson:
        from src.math_utils import convert_to_latex_sci_not

        cor, pval = pearsonr(x_data, y_data)
        pval = convert_to_latex_sci_not(pval)

        title = ("%s\nPearson's r=%.2f, p=%s" % (title, cor, pval))

    if plot_lr:
        # plot linear regression
        reg, coef, data, y_vals = get_linear_model_coef(x_data, y_data)
        b, m = tuple(coef)
        s = np.arange(-100, 100)
        t = reg.predict(s.reshape(len(s), 1))
        ax.plot(s, t, zorder=100, c='gray', linestyle='dashed', lw=1.5)

        from sklearn.metrics import r2_score
        true = y_data
        predicted = reg.predict(x_data.values.reshape(len(x_data), 1))
        r2 = r2_score(true, predicted)

        title = ("%s, $R^2$=%.2f" % (title, r2))

    if plot_default_ax:
        tax.set_title(title, fontsize=titlesize)
    else:
        ax.set_title(title, fontsize=titlesize)

    return ax
コード例 #27
0
def plot_ends_heatmap(orf_0_nuc_mid_counts,
                      orf_120_nuc_mid_counts,
                      orf_0_nuc_start_counts,
                      orf_120_nuc_start_counts,
                      orf_0_nuc_stop_counts,
                      orf_120_nuc_stop_counts,
                      head=None,
                      tail=None):

    apply_global_settings(titlepad=10)

    mids = [orf_0_nuc_mid_counts, orf_120_nuc_mid_counts]
    starts = [orf_0_nuc_start_counts, orf_120_nuc_start_counts]
    ends = [orf_0_nuc_stop_counts, orf_120_nuc_stop_counts]
    nuc_groups = [starts, mids, ends]
    names = ['Left', 'Middle', 'Right']

    fig = plt.figure(figsize=(6, 5))

    grid_size = (3, 3)
    rows, cols = 3, 3

    ax0 = plt.subplot2grid(grid_size, (0, 0), colspan=1, rowspan=2)
    ax1 = plt.subplot2grid(grid_size, (0, 1), colspan=1, rowspan=2)
    ax2 = plt.subplot2grid(grid_size, (0, 2), colspan=1, rowspan=2)
    axs = [ax0, ax1, ax2]
    origins = [-50, 0, 50]

    ax0 = plt.subplot2grid(grid_size, (2, 0), colspan=1, rowspan=1)
    ax1 = plt.subplot2grid(grid_size, (2, 1), colspan=1, rowspan=1)
    ax2 = plt.subplot2grid(grid_size, (2, 2), colspan=1, rowspan=1)
    axs2 = [ax0, ax1, ax2]

    fig.tight_layout(rect=[0.075, 0.1, 0.95, 0.945])
    plt.subplots_adjust(hspace=0.1, wspace=0.3)

    (ax1, ax2, ax3) = axs

    for i in range(len(axs)):
        ax = axs[i]

        group_120 = nuc_groups[i][1]
        group_0 = nuc_groups[i][0]

        data = group_120 - group_0

        if head is not None:
            data = data.head(head)
        elif tail is not None:
            data = data.tail(tail)

        ax.imshow(data,
                  vmin=-5,
                  vmax=5,
                  aspect=300. / len(data),
                  cmap='RdBu_r',
                  extent=[-500, 500, 0, len(data)])
        ax.set_xlim(-50 + origins[i], 150 + origins[i])
        ax.set_yticks([])
        ax.set_xticks([])
        ax.set_title(names[i])
        ax.axvline(0, color='black', linestyle='dashed', linewidth=1)

    plot_ends_comparison(axs2,
                         orf_0_nuc_mid_counts,
                         orf_120_nuc_mid_counts,
                         orf_0_nuc_start_counts,
                         orf_120_nuc_start_counts,
                         orf_0_nuc_stop_counts,
                         orf_120_nuc_stop_counts,
                         head=head,
                         tail=tail)

    if head is not None:
        topbot = "downstream"
        headtail = head
    else:
        topbot = "upstream"
        headtail = tail

    plt.suptitle("Greatest %d %s nucleosome\nfragments shift, 0-120 min" %
                 (headtail, topbot))