Esempio n. 1
0
def run_patient_specific_permutations(n_tot, n_all, n_perm=1000):
    # perms
    n_spec = {pid: [] for pid in n_tot}

    for i in range(n_perm):
        this = []
        for pid in n_tot:
            ix = np.random.permutation(n_all)[:n_tot[pid]]
            this.append(ix)
        this_spec = setops.specific_features(*this)
        for pid, s in zip(n_tot.keys(), this_spec):
            n_spec[pid].append(len(s))

    return n_spec
Esempio n. 2
0
        dmr_res.to_pickle(fn, include_annotation=False)
        logger.info("Saved DMR results to %s", fn)

    dmr_res_all = dmr_res.results_significant

    # 1. check the phenomenon is still observed in our syngeneic comparisons
    # full list
    for_plot = dict([(pid, dmr_res_all['syngeneic_%s' % pid]) for pid in pids])
    plt_dict = bar_plot(for_plot, pids)
    plt_dict['fig'].tight_layout()
    plt_dict['fig'].savefig(os.path.join(outdir,
                                         "syngeneic_full_list_directions.png"),
                            dpi=200)

    # specific list
    spec_ix = setops.specific_features(*[for_plot[pid].keys() for pid in pids])
    for_plot = dict([(pid, dict([(k, for_plot[pid][k]) for k in s]))
                     for pid, s in zip(pids, spec_ix)])
    plt_dict = bar_plot(for_plot, pids)
    plt_dict['fig'].tight_layout()
    plt_dict['fig'].savefig(os.path.join(
        outdir, "syngeneic_specific_list_directions.png"),
                            dpi=200)

    # 2. check that we see the same phenomenon when we switch to the validation cohort comparator
    # full list
    for_plot = dict([(pid, dmr_res_all['consistency_%s' % pid])
                     for pid in pids])
    plt_dict = bar_plot(for_plot, pids)
    plt_dict['fig'].tight_layout()
    plt_dict['fig'].savefig(os.path.join(
Esempio n. 3
0
    dm_res = pd.read_excel(fn)
    # total number of (DM cluster, gene) pairs
    # n_all_de_dm = dm_res.shape[0]
    # reported elsewhere, so use that here (it's approximate)
    n_all_de_dm = 17000

    fn = os.path.join(HGIC_LOCAL_DIR, 'current', 'core_pipeline',
                      'rnaseq_methylation_combined',
                      'de_dmr_concordant_syngeneic_only.xlsx')
    de_dm_res = pd.read_excel(fn)
    # number of patient specific DE/DMRs
    de_dm_per_pat = {
        pid: de_dm_res.index[de_dm_res[pid] == 'Y']
        for pid in pids
    }
    n_tot_de_dm = {pid: de_dm_per_pat[pid].size for pid in pids}
    tt = setops.specific_features(*[de_dm_per_pat[pid] for pid in pids])
    n_ps_de_dmr = {pid: len(t) for pid, t in zip(pids, tt)}

    n_spec_perm_de_dm = run_patient_specific_permutations(n_tot_de_dm,
                                                          n_all_de_dm,
                                                          n_perm=n_perm)

    fig, axs = plot_perms_kde_vs_obs(
        n_spec_perm_de_dm,
        n_ps_de_dmr,
        xlabel='Number of patient-specific DE/DMRs',
        order=pids)
    fig.savefig(os.path.join(outdir, "patient_specific_de_dmr.png"), dpi=200)
    fig.savefig(os.path.join(outdir, "patient_specific_de_dmr.tiff"), dpi=200)
    fig.savefig(os.path.join(outdir, "patient_specific_de_dmr.pdf"))
    # set True to limit to the PIDs that are present in all comparisons
    limit_to_common_comparisons = False

    # compute specific DMRs and plot direction
    common_pids = sorted(setops.reduce_intersection(*pids_included.values()))

    dmrs_specific = {}
    for k in pids_included:
        if limit_to_common_comparisons:
            this_pids = common_pids
        else:
            this_pids = pids_included[k]
        dmrs_specific[k] = {}
        this = [all_results[k].results_significant[p] for p in this_pids]
        this_specific = setops.specific_features(*this)
        for p, cids, spec_dict in zip(this_pids, this_specific, this):
            if p not in dmrs_specific[k]:
                dmrs_specific[k][p] = {}
            for cid in cids:
                dmrs_specific[k][p][cid] = spec_dict[cid]

    for k in pids_included:

        if limit_to_common_comparisons:
            this_pids = common_pids
        else:
            this_pids = pids_included[k]

        # scale figure width according to number of PIDs
        fig_width = 5.5 + 0.24 * (len(this_pids) - 4)
Esempio n. 5
0
    de_by_direction = count_de_by_direction(de_res)

    plt_dict = bar_plot(de_res, pids)
    plt.setp(plt_dict["axs"][0].yaxis.get_ticklabels(), fontsize=fontsize)
    plt.setp(plt_dict["axs"][0].yaxis.get_label(), fontsize=fontsize)
    plt.setp(plt_dict["axs"][1].yaxis.get_ticklabels(), fontsize=fontsize)
    plt.setp(plt_dict["axs"][1].yaxis.get_label(), fontsize=fontsize)
    plt.setp(plt_dict["axs"][1].xaxis.get_ticklabels(), fontsize=fontsize)
    plt.setp(plt_dict["axs"][1].xaxis.get_label(), fontsize=fontsize)
    plt_dict['fig'].tight_layout()
    plt_dict['fig'].savefig(os.path.join(outdir,
                                         "syngeneic_full_list_directions.png"),
                            dpi=200)

    # specific list
    spec_ix = setops.specific_features(*[de_res[pid].index for pid in pids])
    for_plot = dict([(pid, de_res[pid].loc[s])
                     for pid, s in zip(pids, spec_ix)])
    plt_dict = bar_plot(for_plot, pids)
    plt.setp(plt_dict["axs"][0].yaxis.get_ticklabels(), fontsize=fontsize)
    plt.setp(plt_dict["axs"][0].yaxis.get_label(), fontsize=fontsize)
    plt.setp(plt_dict["axs"][1].yaxis.get_ticklabels(), fontsize=fontsize)
    plt.setp(plt_dict["axs"][1].yaxis.get_label(), fontsize=fontsize)
    plt.setp(plt_dict["axs"][1].xaxis.get_ticklabels(), fontsize=fontsize)
    plt.setp(plt_dict["axs"][1].xaxis.get_label(), fontsize=fontsize)
    plt_dict['fig'].tight_layout()
    plt_dict['fig'].savefig(os.path.join(
        outdir, "syngeneic_specific_list_directions.png"),
                            dpi=200)

    # We don't see the phenotype
        chrom_length[new_k] = cl[k]

    for pid in pids:
        fn = os.path.join(outdir, "%s_dmrs.bw" % pid)
        write_bigwig(
            dmr_res_s1[pid].results_significant,
            clusters,
            chrom_length,
            fn,
            chr_prefix=chr_prefix
        )

    # repeat for patient-specific DMRs
    patient_specific_cids = dict(zip(
        pids,
        setops.specific_features(*[dmr_res_s1[pid].results_significant for pid in pids])
    ))

    for pid in pids:
        fn = os.path.join(outdir, "%s_specific_dmrs.bw" % pid)
        this_res = dict([
            (cid, dmr_res_s1[pid].results[cid]) for cid in patient_specific_cids[pid]
        ])
        write_bigwig(
            this_res,
            clusters,
            chrom_length,
            fn,
            chr_prefix=chr_prefix
        )
Esempio n. 7
0
        dmr_res_s1 = dmr.DmrResultCollection.from_pickle(fn, anno=anno)
    else:
        raise IOError(
            "We require a pre-computed file, %s, which could not be found." %
            fn)

    # extract full (all significant) results
    dmr_res_all = dmr_res_s1.results_significant

    clusters = dmr_res_s1[pids[0]].clusters

    n_by_patient = dict([(pid, len(dmr_res_all[pid])) for pid in pids])

    specific_dmrs = dict(
        zip(pids,
            setops.specific_features(*[dmr_res_all[pid] for pid in pids])))
    n_by_patient_specific = dict([(pid, len(specific_dmrs[pid]))
                                  for pid in pids])

    ntot = sum(n_by_patient.values())

    # 1) Null: DMRs are picked uniformly randomly from the pool with variable marginal totals for each patient.
    # Marginal totals are given by the (real) number of DMRs in each patient.
    rvs = dict([(pid, [
        np.random.choice(range(ntot), replace=False, size=n_by_patient[pid])
        for i in range(n_iter)
    ]) for pid in pids])

    inters_1 = [[
        len(x)
        for x in setops.specific_features(*[rvs[pid][i] for pid in pids])
                               'dmr_%s' % t for t in relations_tss
                           ]].any(axis=1)].gene)]) for pid in pids])

    de_by_direction = same_de.count_de_by_direction(de_linked)

    plt_dict = same_de.bar_plot(de_linked, pids, figsize=(3, 4))
    plt_dict['fig'].tight_layout()
    plt.setp(common.get_children_recursive(plt_dict['fig'], plt.Text),
             fontsize=12)
    plt_dict['fig'].savefig(os.path.join(
        outdir, "de_linked_syngeneic_full_list_tss_directions.png"),
                            dpi=200)

    # patient-specific DMRs linked to DE genes

    spec_ix = setops.specific_features(
        *[dmr_res_all[pid].keys() for pid in pids])
    dm_specific = dict([(pid, dict([(k, dmr_res_all[pid][k]) for k in s]))
                        for pid, s in zip(pids, spec_ix)])

    # manually link these
    dm_specific_genes = {}
    for pid in pids:
        cl_ids = dm_specific[pid].keys()
        dm_specific_genes[pid] = setops.reduce_union(
            *[[t[0] for t in dmr_res_s1.clusters[c].genes] for c in cl_ids])

    de_linked_spec = dict([
        (pid, de_res_s1[pid].loc[de_res_s1[pid]['Gene Symbol'].isin(
            dm_specific_genes[pid])]) for pid in pids
    ])