def run_patient_specific_permutations(n_tot, n_all, n_perm=1000): # perms n_spec = {pid: [] for pid in n_tot} for i in range(n_perm): this = [] for pid in n_tot: ix = np.random.permutation(n_all)[:n_tot[pid]] this.append(ix) this_spec = setops.specific_features(*this) for pid, s in zip(n_tot.keys(), this_spec): n_spec[pid].append(len(s)) return n_spec
dmr_res.to_pickle(fn, include_annotation=False) logger.info("Saved DMR results to %s", fn) dmr_res_all = dmr_res.results_significant # 1. check the phenomenon is still observed in our syngeneic comparisons # full list for_plot = dict([(pid, dmr_res_all['syngeneic_%s' % pid]) for pid in pids]) plt_dict = bar_plot(for_plot, pids) plt_dict['fig'].tight_layout() plt_dict['fig'].savefig(os.path.join(outdir, "syngeneic_full_list_directions.png"), dpi=200) # specific list spec_ix = setops.specific_features(*[for_plot[pid].keys() for pid in pids]) for_plot = dict([(pid, dict([(k, for_plot[pid][k]) for k in s])) for pid, s in zip(pids, spec_ix)]) plt_dict = bar_plot(for_plot, pids) plt_dict['fig'].tight_layout() plt_dict['fig'].savefig(os.path.join( outdir, "syngeneic_specific_list_directions.png"), dpi=200) # 2. check that we see the same phenomenon when we switch to the validation cohort comparator # full list for_plot = dict([(pid, dmr_res_all['consistency_%s' % pid]) for pid in pids]) plt_dict = bar_plot(for_plot, pids) plt_dict['fig'].tight_layout() plt_dict['fig'].savefig(os.path.join(
dm_res = pd.read_excel(fn) # total number of (DM cluster, gene) pairs # n_all_de_dm = dm_res.shape[0] # reported elsewhere, so use that here (it's approximate) n_all_de_dm = 17000 fn = os.path.join(HGIC_LOCAL_DIR, 'current', 'core_pipeline', 'rnaseq_methylation_combined', 'de_dmr_concordant_syngeneic_only.xlsx') de_dm_res = pd.read_excel(fn) # number of patient specific DE/DMRs de_dm_per_pat = { pid: de_dm_res.index[de_dm_res[pid] == 'Y'] for pid in pids } n_tot_de_dm = {pid: de_dm_per_pat[pid].size for pid in pids} tt = setops.specific_features(*[de_dm_per_pat[pid] for pid in pids]) n_ps_de_dmr = {pid: len(t) for pid, t in zip(pids, tt)} n_spec_perm_de_dm = run_patient_specific_permutations(n_tot_de_dm, n_all_de_dm, n_perm=n_perm) fig, axs = plot_perms_kde_vs_obs( n_spec_perm_de_dm, n_ps_de_dmr, xlabel='Number of patient-specific DE/DMRs', order=pids) fig.savefig(os.path.join(outdir, "patient_specific_de_dmr.png"), dpi=200) fig.savefig(os.path.join(outdir, "patient_specific_de_dmr.tiff"), dpi=200) fig.savefig(os.path.join(outdir, "patient_specific_de_dmr.pdf"))
# set True to limit to the PIDs that are present in all comparisons limit_to_common_comparisons = False # compute specific DMRs and plot direction common_pids = sorted(setops.reduce_intersection(*pids_included.values())) dmrs_specific = {} for k in pids_included: if limit_to_common_comparisons: this_pids = common_pids else: this_pids = pids_included[k] dmrs_specific[k] = {} this = [all_results[k].results_significant[p] for p in this_pids] this_specific = setops.specific_features(*this) for p, cids, spec_dict in zip(this_pids, this_specific, this): if p not in dmrs_specific[k]: dmrs_specific[k][p] = {} for cid in cids: dmrs_specific[k][p][cid] = spec_dict[cid] for k in pids_included: if limit_to_common_comparisons: this_pids = common_pids else: this_pids = pids_included[k] # scale figure width according to number of PIDs fig_width = 5.5 + 0.24 * (len(this_pids) - 4)
de_by_direction = count_de_by_direction(de_res) plt_dict = bar_plot(de_res, pids) plt.setp(plt_dict["axs"][0].yaxis.get_ticklabels(), fontsize=fontsize) plt.setp(plt_dict["axs"][0].yaxis.get_label(), fontsize=fontsize) plt.setp(plt_dict["axs"][1].yaxis.get_ticklabels(), fontsize=fontsize) plt.setp(plt_dict["axs"][1].yaxis.get_label(), fontsize=fontsize) plt.setp(plt_dict["axs"][1].xaxis.get_ticklabels(), fontsize=fontsize) plt.setp(plt_dict["axs"][1].xaxis.get_label(), fontsize=fontsize) plt_dict['fig'].tight_layout() plt_dict['fig'].savefig(os.path.join(outdir, "syngeneic_full_list_directions.png"), dpi=200) # specific list spec_ix = setops.specific_features(*[de_res[pid].index for pid in pids]) for_plot = dict([(pid, de_res[pid].loc[s]) for pid, s in zip(pids, spec_ix)]) plt_dict = bar_plot(for_plot, pids) plt.setp(plt_dict["axs"][0].yaxis.get_ticklabels(), fontsize=fontsize) plt.setp(plt_dict["axs"][0].yaxis.get_label(), fontsize=fontsize) plt.setp(plt_dict["axs"][1].yaxis.get_ticklabels(), fontsize=fontsize) plt.setp(plt_dict["axs"][1].yaxis.get_label(), fontsize=fontsize) plt.setp(plt_dict["axs"][1].xaxis.get_ticklabels(), fontsize=fontsize) plt.setp(plt_dict["axs"][1].xaxis.get_label(), fontsize=fontsize) plt_dict['fig'].tight_layout() plt_dict['fig'].savefig(os.path.join( outdir, "syngeneic_specific_list_directions.png"), dpi=200) # We don't see the phenotype
chrom_length[new_k] = cl[k] for pid in pids: fn = os.path.join(outdir, "%s_dmrs.bw" % pid) write_bigwig( dmr_res_s1[pid].results_significant, clusters, chrom_length, fn, chr_prefix=chr_prefix ) # repeat for patient-specific DMRs patient_specific_cids = dict(zip( pids, setops.specific_features(*[dmr_res_s1[pid].results_significant for pid in pids]) )) for pid in pids: fn = os.path.join(outdir, "%s_specific_dmrs.bw" % pid) this_res = dict([ (cid, dmr_res_s1[pid].results[cid]) for cid in patient_specific_cids[pid] ]) write_bigwig( this_res, clusters, chrom_length, fn, chr_prefix=chr_prefix )
dmr_res_s1 = dmr.DmrResultCollection.from_pickle(fn, anno=anno) else: raise IOError( "We require a pre-computed file, %s, which could not be found." % fn) # extract full (all significant) results dmr_res_all = dmr_res_s1.results_significant clusters = dmr_res_s1[pids[0]].clusters n_by_patient = dict([(pid, len(dmr_res_all[pid])) for pid in pids]) specific_dmrs = dict( zip(pids, setops.specific_features(*[dmr_res_all[pid] for pid in pids]))) n_by_patient_specific = dict([(pid, len(specific_dmrs[pid])) for pid in pids]) ntot = sum(n_by_patient.values()) # 1) Null: DMRs are picked uniformly randomly from the pool with variable marginal totals for each patient. # Marginal totals are given by the (real) number of DMRs in each patient. rvs = dict([(pid, [ np.random.choice(range(ntot), replace=False, size=n_by_patient[pid]) for i in range(n_iter) ]) for pid in pids]) inters_1 = [[ len(x) for x in setops.specific_features(*[rvs[pid][i] for pid in pids])
'dmr_%s' % t for t in relations_tss ]].any(axis=1)].gene)]) for pid in pids]) de_by_direction = same_de.count_de_by_direction(de_linked) plt_dict = same_de.bar_plot(de_linked, pids, figsize=(3, 4)) plt_dict['fig'].tight_layout() plt.setp(common.get_children_recursive(plt_dict['fig'], plt.Text), fontsize=12) plt_dict['fig'].savefig(os.path.join( outdir, "de_linked_syngeneic_full_list_tss_directions.png"), dpi=200) # patient-specific DMRs linked to DE genes spec_ix = setops.specific_features( *[dmr_res_all[pid].keys() for pid in pids]) dm_specific = dict([(pid, dict([(k, dmr_res_all[pid][k]) for k in s])) for pid, s in zip(pids, spec_ix)]) # manually link these dm_specific_genes = {} for pid in pids: cl_ids = dm_specific[pid].keys() dm_specific_genes[pid] = setops.reduce_union( *[[t[0] for t in dmr_res_s1.clusters[c].genes] for c in cl_ids]) de_linked_spec = dict([ (pid, de_res_s1[pid].loc[de_res_s1[pid]['Gene Symbol'].isin( dm_specific_genes[pid])]) for pid in pids ])