def run_dmr_analyses(data, comparisons, anno, dmr_params, verbose=True): """ Compute DMRs for paired GBM-iNSC comparisons (defined in that order) for all patients :param me_data: Pandas dataframe containing M values, columns are samples and rows are probes. :param comparisons: Dictionary, each key is a title, each value is a 2-element iterable containing lists of sample names. The comparison is run as group 1 - group 2 in each case. :param anno: :param pids: :param dmr_params: :return: """ dmr_res_obj = dmr.DmrResults(anno=anno) dmr_res_obj.identify_clusters(**dmr_params) dmr_res = {} for the_ttl, the_samples in comparisons.items(): logger.info( "Comparison %s. Group 1: %s. Group 2: %s.", the_ttl, ','.join(the_samples[0]), ','.join(the_samples[1]), ) the_obj = dmr_res_obj.copy() the_obj.test_clusters(data, samples=the_samples, n_jobs=dmr_params['n_jobs'], min_median_change=dmr_params['delta_m_min'], method=dmr_params['dmr_test_method'], alpha=dmr_params['alpha'], **dmr_params['test_kwargs']) dmr_res[the_ttl] = the_obj return dmr.DmrResultCollection(**dmr_res)
def run_dmr(me_meta, me_data, dmr_clusters, str_contains_dict, type1='iPSC', type2='FB', **dmr_params): """ Run DMR as type1 - type2. :param me_meta: :param me_data: :param dmr_clusters: :param str_contains_dict: Dictionary. Keys will be used to identify results. Values are 2-tuples with the strings used in the meta.index.str.contains() call that separates samples. :param type1: Refers to the `type` column in meta. :param type2: :param dmr_params: :return: """ res = {} for k, (c1, c2) in str_contains_dict.items(): this = dmr_clusters.copy() the_idx1 = me_meta.index.str.contains(c1) & (me_meta.loc[:, 'type'] == type1) the_idx2 = me_meta.index.str.contains(c2) & (me_meta.loc[:, 'type'] == type2) the_idx = the_idx1 | the_idx2 the_groups = me_meta.loc[the_idx, 'type'].values the_samples = me_meta.index[the_idx].groupby(the_groups) the_samples = [the_samples[type1], the_samples[type2]] this.test_clusters( me_data, samples=the_samples, n_jobs=dmr_params['n_jobs'], min_median_change=dmr_params['delta_m_min'], method=dmr_params['dmr_test_method'], alpha=dmr_params['alpha'], **dmr_params['test_kwargs'] ) res[k] = this return dmr.DmrResultCollection(**res)
def compute_cross_dmr(me_data, me_meta, anno, pids, dmr_params, external_references=(('GIBCO', 'NSC'),)): obj = dmr.DmrResults(anno=anno) obj.identify_clusters(**dmr_params) res = {} # loop over GBM groups for pid1 in pids: res.setdefault(pid1, {}) the_idx1 = me_meta.index.str.contains(pid1) & (me_meta.loc[:, 'type'] == 'GBM') # loop over iNSC groups for pid2 in pids: the_idx2 = me_meta.index.str.contains(pid2) & (me_meta.loc[:, 'type'] == 'iNSC') the_idx = the_idx1 | the_idx2 the_groups = me_meta.loc[the_idx, 'type'].values the_samples = me_meta.index[the_idx].groupby(the_groups).values() the_obj = obj.copy() the_obj.test_clusters(me_data, samples=the_samples, n_jobs=dmr_params['n_jobs'], min_median_change=dmr_params['delta_m_min'], method=dmr_params['dmr_test_method'], **dmr_params['test_kwargs'] ) res[pid1][pid2] = the_obj # loop over external reference NSC groups for er, er_type in external_references: the_idx2 = me_meta.index.str.contains(er) & (me_meta.loc[:, 'type'] == er_type) the_idx = the_idx1 | the_idx2 the_groups = me_meta.loc[the_idx, 'type'].values the_samples = me_meta.index[the_idx].groupby(the_groups).values() the_obj = obj.copy() the_obj.test_clusters(me_data, samples=the_samples, n_jobs=dmr_params['n_jobs'], min_median_change=dmr_params['delta_m_min'], method=dmr_params['dmr_test_method'], **dmr_params['test_kwargs'] ) res[pid1][er] = the_obj return dmr.DmrResultCollection(**res)
def paired_dmr(me_data, me_meta, anno, pids, dmr_params): """ Compute DMRs for paired GBM-iNSC comparisons (defined in that order) for all patients :param me_data: :param me_meta: :param anno: :param pids: :param dmr_params: :return: """ dmr_res_obj = dmr.DmrResults(anno=anno) dmr_res_obj.identify_clusters(**dmr_params) dmr_res = {} for pid in pids: the_idx1 = me_meta.index.str.contains(pid) & (me_meta.loc[:, 'type'] == 'GBM') the_idx2 = me_meta.index.str.contains(pid) & (me_meta.loc[:, 'type'] == 'iNSC') # control comparison order the_samples = [ me_meta.index[the_idx1], me_meta.index[the_idx2], ] # the_idx = the_idx1 | the_idx2 # the_groups = me_meta.loc[the_idx, 'type'].values # the_samples = me_meta.index[the_idx].groupby(the_groups).values() the_obj = dmr_res_obj.copy() the_obj.test_clusters(me_data, samples=the_samples, n_jobs=dmr_params['n_jobs'], min_median_change=dmr_params['delta_m_min'], method=dmr_params['dmr_test_method'], alpha=dmr_params['alpha'], **dmr_params['test_kwargs'] ) dmr_res[pid] = the_obj return dmr.DmrResultCollection(**dmr_res)