def run_dmr_analyses(data, comparisons, anno, dmr_params, verbose=True): """ Compute DMRs for paired GBM-iNSC comparisons (defined in that order) for all patients :param me_data: Pandas dataframe containing M values, columns are samples and rows are probes. :param comparisons: Dictionary, each key is a title, each value is a 2-element iterable containing lists of sample names. The comparison is run as group 1 - group 2 in each case. :param anno: :param pids: :param dmr_params: :return: """ dmr_res_obj = dmr.DmrResults(anno=anno) dmr_res_obj.identify_clusters(**dmr_params) dmr_res = {} for the_ttl, the_samples in comparisons.items(): logger.info( "Comparison %s. Group 1: %s. Group 2: %s.", the_ttl, ','.join(the_samples[0]), ','.join(the_samples[1]), ) the_obj = dmr_res_obj.copy() the_obj.test_clusters(data, samples=the_samples, n_jobs=dmr_params['n_jobs'], min_median_change=dmr_params['delta_m_min'], method=dmr_params['dmr_test_method'], alpha=dmr_params['alpha'], **dmr_params['test_kwargs']) dmr_res[the_ttl] = the_obj return dmr.DmrResultCollection(**dmr_res)
def compute_dmr_clusters(anno, dmr_params): clusters = [] cid = 0 for cc in anno.CHR.unique(): coords = anno.loc[anno.CHR == cc, 'MAPINFO'].sort_values() this_clust = dmr.identify_cluster(coords, dmr_params['n_min'], dmr_params['d_max']) for cl in this_clust: clusters.append(dmr.ProbeCluster(cl, anno, cluster_id=cid, chr=cc)) cid += 1 return dmr.DmrResults(clusters=clusters, anno=anno)
def compute_cross_dmr(me_data, me_meta, anno, pids, dmr_params, external_references=(('GIBCO', 'NSC'),)): obj = dmr.DmrResults(anno=anno) obj.identify_clusters(**dmr_params) res = {} # loop over GBM groups for pid1 in pids: res.setdefault(pid1, {}) the_idx1 = me_meta.index.str.contains(pid1) & (me_meta.loc[:, 'type'] == 'GBM') # loop over iNSC groups for pid2 in pids: the_idx2 = me_meta.index.str.contains(pid2) & (me_meta.loc[:, 'type'] == 'iNSC') the_idx = the_idx1 | the_idx2 the_groups = me_meta.loc[the_idx, 'type'].values the_samples = me_meta.index[the_idx].groupby(the_groups).values() the_obj = obj.copy() the_obj.test_clusters(me_data, samples=the_samples, n_jobs=dmr_params['n_jobs'], min_median_change=dmr_params['delta_m_min'], method=dmr_params['dmr_test_method'], **dmr_params['test_kwargs'] ) res[pid1][pid2] = the_obj # loop over external reference NSC groups for er, er_type in external_references: the_idx2 = me_meta.index.str.contains(er) & (me_meta.loc[:, 'type'] == er_type) the_idx = the_idx1 | the_idx2 the_groups = me_meta.loc[the_idx, 'type'].values the_samples = me_meta.index[the_idx].groupby(the_groups).values() the_obj = obj.copy() the_obj.test_clusters(me_data, samples=the_samples, n_jobs=dmr_params['n_jobs'], min_median_change=dmr_params['delta_m_min'], method=dmr_params['dmr_test_method'], **dmr_params['test_kwargs'] ) res[pid1][er] = the_obj return dmr.DmrResultCollection(**res)
def paired_dmr(me_data, me_meta, anno, pids, dmr_params): """ Compute DMRs for paired GBM-iNSC comparisons (defined in that order) for all patients :param me_data: :param me_meta: :param anno: :param pids: :param dmr_params: :return: """ dmr_res_obj = dmr.DmrResults(anno=anno) dmr_res_obj.identify_clusters(**dmr_params) dmr_res = {} for pid in pids: the_idx1 = me_meta.index.str.contains(pid) & (me_meta.loc[:, 'type'] == 'GBM') the_idx2 = me_meta.index.str.contains(pid) & (me_meta.loc[:, 'type'] == 'iNSC') # control comparison order the_samples = [ me_meta.index[the_idx1], me_meta.index[the_idx2], ] # the_idx = the_idx1 | the_idx2 # the_groups = me_meta.loc[the_idx, 'type'].values # the_samples = me_meta.index[the_idx].groupby(the_groups).values() the_obj = dmr_res_obj.copy() the_obj.test_clusters(me_data, samples=the_samples, n_jobs=dmr_params['n_jobs'], min_median_change=dmr_params['delta_m_min'], method=dmr_params['dmr_test_method'], alpha=dmr_params['alpha'], **dmr_params['test_kwargs'] ) dmr_res[pid] = the_obj return dmr.DmrResultCollection(**dmr_res)
facecolor='w', framealpha=0.5) ax.set_ylim([-0.01, 1.01]) ax.set_xlabel("M value") ax.set_ylabel("ECDF") fig.tight_layout() fig.savefig(os.path.join(outdir, "methylation_ecdf.png"), dpi=200) ## TODO: linear interp along y axis to identify regions that could be found DMR due to norming differences? ## TODO: apply this to the hGIC project # 1) DMR: All shBMI1 vs all scramble, etc... (aggregating cell lines) dmr_res_obj = dmr.DmrResults(anno=anno) dmr_res_obj.identify_clusters(**dmr_params) comparisons = [ collections.OrderedDict([ ('shCHD7', ['3021_1_shC', 'C', 'ICb1299_shCHD7', 'p62_3_shChd7']), ('scramble', ['3021_1_Scr', 'S', 'ICb1299_Scr', 'p62_3_Scr']) ]), collections.OrderedDict([ ('shBMI1', ['3021_1_shB', 'B', 'ICb1299_shBMI1', 'p62_3_shBmi1']), ('scramble', ['3021_1_Scr', 'S', 'ICb1299_Scr', 'p62_3_Scr']) ]), collections.OrderedDict([ ('shCHD7shBMI1', ['3021_1_shB+C', 'B+C', 'ICb1299_shBMI1CHD7', 'p62_3_shB+C']), ('scramble', ['3021_1_Scr', 'S', 'ICb1299_Scr', 'p62_3_Scr'])
mdat_019_e = mdat['GBM019_P4'] mdat_019_l = mdat['GBM019Luc_P12'] mdat_019_exvivo = mdat.loc[:, ['GBM019Luc_P3_PDX1', 'GBM019Luc_P2_PDX2']] e_minus_l = pd.Series(mdat_019_e.values - mdat_019_l.values, index=mdat.index) exvivo_minus_l = mdat_019_exvivo.subtract(mdat_019_l.values, axis=0) raise StopIteration # DMR analysis dmr_params = consts.DMR_PARAMS dmr_params['n_jobs'] = mp.cpu_count() anno = loader.load_illumina_methylationepic_annotation() anno = anno.loc[mdat.index] ffpe_gic_dmrs = dmr.DmrResults(anno=anno) ffpe_gic_dmrs.identify_clusters(**dmr_params) # use only GIC and FFPE here this_mdat = mdat.loc[:, obj.meta.descriptor.isin(['In vitro GIC', 'Bulk GBM'] )] this_samples = this_mdat.columns.groupby(obj.meta.loc[this_mdat.columns, 'descriptor']) samples = [ this_samples['In vitro GIC'], this_samples['Bulk GBM'], ] ffpe_gic_dmrs.test_clusters(this_mdat, samples=samples, n_jobs=dmr_params['n_jobs'],