def compute_cross_correlations(strand='sense'):

    from src.cross_correlation_kernel import MNaseSeqDensityKernel
    from src.cross_correlation import calculate_cross_correlation_all_chromosomes

    cc_orfs = paper_orfs
    cc_dir = cc_sense_chrom_dir
    cross_corr_path = cross_corr_sense_path
    if strand == 'antisense': 
        cc_orfs = antisense_orfs
        cc_dir = cc_antisense_chrom_dir
        cross_corr_path = cross_corr_antisense_path

    mkdirs_safe([cc_dir])

    nuc_kernel = MNaseSeqDensityKernel(filepath=nuc_kernel_path)
    sm_kernel = MNaseSeqDensityKernel(filepath=sm_kernel_path)
    triple_kernel = compute_triple_kernel(nuc_kernel)

    print_fl("Cross correlating %d ORFs..." % len(cc_orfs))
    
    cross, summary_cross = calculate_cross_correlation_all_chromosomes(
        all_mnase_data, cc_orfs, nuc_kernel, sm_kernel, triple_kernel,
        save_chrom_dir=cc_dir, timer=timer, log=True,
        find_antisense=(strand == 'antisense'))
    
    cross.to_hdf(cross_corr_path,
        'cross_correlation', mode='w', complevel=9, complib='zlib')
    summary_cross.to_csv('%s/cross_correlation_summary_%s.csv' % 
        (mnase_dir, strand))

    print_fl("Done.")
    timer.print_time()
    print_fl()
Esempio n. 2
0
def danpos():

    from src.dpos_bed import create_bed_for_dpos
    import os
    from src.utils import run_cmd

    working_dir = os.getcwd()

    danpos_output = '%s/danpos/' % (OUTPUT_DIR)
    mkdirs_safe([danpos_output])

    danpos_path = "%s/danpos-2.2.2/danpos.py" % working_dir

    # create DANPOS Bed file
    mnase = pd.read_hdf(mnase_seq_path, 'mnase_data')
    mnase = mnase[mnase.time == 0]

    save_file = 'mnase_0.bed'
    save_path = '%s/%s' % (danpos_output, save_file)
    create_bed_for_dpos(mnase, save_path)
    print_fl("Wrote %s" % save_path)

    bash_command = "scripts/6_reviewer_mats/run_danpos.sh %s %s %s" % \
        (save_file, OUTPUT_DIR, danpos_path)
    output, error = run_cmd(bash_command, stdout_file=None)

    danpos_calls_path = '%s/result/pooled/mnase_0.smooth.positions.xls' % \
        (danpos_output)
    danpos_positions = pd.read_csv(danpos_calls_path, sep='\t')

    plt.hist(danpos_positions[danpos_positions.smt_value < 10000].smt_value,
             bins=100)
    plt.savefig("%s/danpos_smt_pos.png" % danpos_output)

    danpos_positions = danpos_positions.sort_values('smt_value',
                                                    ascending=False)

    top_danpos = danpos_positions.head(2500)
    top_danpos = top_danpos.rename(columns={
        'chr': 'chromosome',
        'smt_pos': 'position'
    })

    from src.chromatin import collect_mnase
    from src.kernel_fitter import compute_nuc_kernel

    nuc_kernel = compute_nuc_kernel(mnase, top_danpos)
    nuc_kernel.save_kernel("%s/danpos_kernel.json" % danpos_output)

    from src.kernel_fitter import compute_triple_kernel
    nuc_kernel.plot_kernel(kernel_type='nucleosome')
    plt.savefig('%s/danpos_nuc_kernel.pdf' % (save_dir), transparent=True)

    triple_kernel = compute_triple_kernel(nuc_kernel)
    triple_kernel.plot_kernel(kernel_type='triple')
    plt.savefig('%s/danpos_triple_kernel.pdf' % (save_dir), transparent=True)
    def __init__(self):

        park_TSS_PAS = read_park_TSS_PAS()
        orfs = paper_orfs#.join(park_TSS_PAS[['PAS']])
        self.typhoon_plotter = TyphoonPlotter(mnase_path=mnase_seq_path,
                                 rna_seq_pileup_path=pileup_path,
                                 orfs=orfs)
        self.sm_kernel = MNaseSeqDensityKernel(filepath=sm_kernel_path)
        self.nuc_kernel = MNaseSeqDensityKernel(filepath=nuc_kernel_path)
        self.triple_kernel = compute_triple_kernel(self.nuc_kernel)
        self.kernel_type = 'nucleosomal'
def main():

    from src.kernel_fitter import compute_triple_kernel

    (_, chrom, antisense) = \
        tuple(sys.argv)
    antisense = antisense.lower() == 'true'

    chrom = int(chrom)
    print_fl("Running cross correlation on chromosome %d, antisense: %s" %
             (chrom, str(antisense)))

    name = task_name(antisense)
    timer = Timer()

    nuc_kernel = MNaseSeqDensityKernel(filepath=nuc_kernel_path)
    sm_kernel = MNaseSeqDensityKernel(filepath=sm_kernel_path)
    triple_kernel = compute_triple_kernel(nuc_kernel)

    print_fl("Reading MNase-seq...", end='')
    all_mnase_data = pd.read_hdf(mnase_seq_path, 'mnase_data')
    print_fl("Done.")
    timer.print_time()
    print_fl()

    if not antisense:
        cc_dir = cc_sense_chrom_dir
        cc_orfs = read_orfs_data("%s/orfs_cd_paper_dataset.csv" % OUTPUT_DIR)
    else:
        cc_dir = cc_antisense_chrom_dir
        cc_orfs = antisense_orfs

    calculate_cross_correlation_chr(all_mnase_data,
                                    cc_orfs,
                                    chrom,
                                    antisense,
                                    nuc_kernel,
                                    sm_kernel,
                                    triple_kernel,
                                    cc_dir,
                                    log=True,
                                    timer=timer)

    child_done(name, WATCH_TMP_DIR, chrom)
Esempio n. 5
0
def plot_entropy_example(plotter, orf, plot_span, title):

    from src.chromatin import filter_mnase
    from src.utils import get_orf
    from src.reference_data import all_orfs_TSS_PAS
    import matplotlib.pyplot as plt

    span = (orf.TSS - 1000, orf.TSS + 1000)
    data = filter_mnase(plotter.all_mnase_data,
                        span[0],
                        span[1],
                        chrom=orf.chr,
                        time=120)
    data['orf_name'] = orf.name
    data.mid = data.mid - orf.TSS

    from src.kernel_fitter import compute_triple_kernel
    from src.cross_correlation_kernel import MNaseSeqDensityKernel

    nuc_kernel = MNaseSeqDensityKernel(filepath=nuc_kernel_path)
    sm_kernel = MNaseSeqDensityKernel(filepath=sm_kernel_path)
    triple_kernel = compute_triple_kernel(nuc_kernel)

    from src.transformations import exhaustive_counts
    from src.cross_correlation import compute_cross_correlation_metrics

    win_2 = 1000
    cur_wide_counts_df = exhaustive_counts((-win_2, win_2), (0, 250),
                                           'mid',
                                           'length',
                                           parent_keys=['orf_name', 'time'],
                                           data=data,
                                           returns='wide',
                                           log=False)

    cur_cc = compute_cross_correlation_metrics(cur_wide_counts_df,
                                               nuc_kernel,
                                               sm_kernel,
                                               triple_kernel,
                                               times=[120.0])

    triple_cc = cur_cc.loc['triple'].loc[orf.name].loc[120]

    from src.entropy import calc_entropy
    from src.plot_utils import apply_global_settings

    apply_global_settings()

    triple_cc_values = triple_cc[np.arange(plot_span[0],
                                           plot_span[0] + 150)].values
    value = calc_entropy(triple_cc_values)

    fig, ax = plt.subplots(1, 1, figsize=(1.5, 2.5))
    fig.tight_layout(rect=[0.0, 0.0, 1, 0.8])
    plt.subplots_adjust(hspace=0.0, wspace=0.5)

    plotter.set_span_chrom(plot_span, orf.chr)
    plotter.plot_typhoon_time(ax, data, 120, scale_z=True)
    ax.set_xlim(*plot_span)

    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xticks([], minor=True)
    ax.set_yticks([], minor=True)

    ax.tick_params(axis='x', length=0, pad=0)
    ax.tick_params(axis='y', length=0, pad=0)

    x = triple_cc.index.values.astype(int)
    y = triple_cc.values.astype(float)
    ax.fill_between(x, y, color='#28a098')

    ax.set_title("%s\n%.1f bits" % (title, value))
Esempio n. 6
0
def misc_plots():

    scatter_dpi = 200

    from src.met4 import plot_timecourse
    from src.chromatin_summary_plots import (
        plot_combined_vs_xrate, plot_sul_prom_disorg, plot_occ_vs_xrate,
        plot_disorg_vs_xrate, plot_diosorg_vs_occ, plot_frag_len_dist)
    from src.cross_correlation_kernel import MNaseSeqDensityKernel

    met4_dir = "%s/met4" % OUTPUT_DIR
    scatters_dir = "%s/scatters" % OUTPUT_DIR
    kernels_dir = "%s/kernels" % OUTPUT_DIR
    mkdirs_safe([met4_dir, scatters_dir, kernels_dir])

    nuc_kernel = MNaseSeqDensityKernel(filepath=nuc_kernel_path)
    nuc_kernel.plot_kernel(kernel_type='nucleosome')
    plt.savefig('%s/nuc_kernel.pdf' % (kernels_dir), transparent=True)

    sm_kernel = MNaseSeqDensityKernel(filepath=sm_kernel_path)
    sm_kernel.plot_kernel(kernel_type='small')
    plt.savefig('%s/sm_kernel.pdf' % (kernels_dir), transparent=True)

    from src.kernel_fitter import compute_triple_kernel
    triple_kernel = compute_triple_kernel(nuc_kernel)
    triple_kernel.plot_kernel(kernel_type='triple')
    plt.savefig('%s/triple_kernel.pdf' % (kernels_dir), transparent=True)

    from src.nucleosome_calling import plot_nuc_calls_cc
    plot_nuc_calls_cc()
    plt.savefig('%s/nuc_cross_cor_0_min.pdf' % (misc_figures_dir),
                transparent=True)

    # met4 plots
    plot_timecourse(datastore)
    plt.savefig('%s/met4_timecourse.pdf' % (met4_dir), transparent=True)

    plot_sul_prom_disorg(datastore)
    plt.savefig('%s/met4_scatter.pdf' % (met4_dir),
                transparent=True,
                dpi=scatter_dpi)

    # scatter plots
    plot_combined_vs_xrate(datastore, selected_genes)
    plt.savefig('%s/combined_vs_xrate.pdf' % (scatters_dir),
                transparent=True,
                dpi=scatter_dpi)

    plot_occ_vs_xrate(datastore, selected_genes)
    plt.savefig('%s/small_vs_xrate.pdf' % (scatters_dir),
                transparent=True,
                dpi=scatter_dpi)

    plot_disorg_vs_xrate(datastore, selected_genes)
    plt.savefig('%s/disorg_vs_xrate.pdf' % (scatters_dir),
                transparent=True,
                dpi=scatter_dpi)

    plot_diosorg_vs_occ(datastore, selected_genes)
    plt.savefig('%s/disorg_vs_small.pdf' % (scatters_dir),
                transparent=True,
                dpi=scatter_dpi)

    plot_ORFs_len(misc_figures_dir)

    plot_coverage(misc_figures_dir)

    global plotter

    if plotter is None:
        plotter = get_plotter()

    # plot sampled mnase data
    plot_frag_len_dist(plotter.all_mnase_data)
    plt.savefig("%s/frag_length_distribution.pdf" % misc_figures_dir,
                transparent=True)

    print_fl("Load all MNase-seq data for fragment length distributions")
    all_mnase_data = pd.read_hdf('%s/mnase_seq_merged_all.h5.z' % mnase_dir,
                                 'mnase_data')
    repl1_mnase = all_mnase_data[all_mnase_data['source'] == 'dm498_503']
    repl2_mnase = all_mnase_data[all_mnase_data['source'] == 'dm504_509']
    print_fl("Done.")

    plot_frag_len_dist(repl1_mnase, "Replicate 1", normalize=True)
    plt.savefig('%s/frag_length_distribution_repl1.pdf' % misc_figures_dir,
                transparent=True)

    plot_frag_len_dist(repl2_mnase, "Replicate 2", normalize=True)
    plt.savefig('%s/frag_length_distribution_repl2.pdf' % misc_figures_dir,
                transparent=True)