) ix = ~e6194_obj.meta.cell_line.isin([ 'NA07057', 'HCT116', 'HEL46.11', ]) e6194_obj.filter_samples(ix) e6194_obj.meta.insert(1, 'array_type', 'EPIC') # GSE110544 (Banovich et al.; iPSC lines) (EPIC) # banov_obj = loader.load_reference('gse110544', norm_method=norm_method) # banov_obj.meta.insert(1, 'array_type', 'EPIC') # HipSci data hip_epic_obj = loader.hipsci(norm_method=norm_method, array_type='epic', n_sample=12) # combine all data obj = loader.loader.MultipleBatchLoader( # [me_obj, encode_epic_obj, e6194_obj, hip_epic_obj, banov_obj] [me_obj, encode_epic_obj, e6194_obj, hip_epic_obj]) meta = obj.meta dat_m = process.m_from_beta(obj.data) this_anno = anno.loc[dat_m.index] dmr_clusters = compute_dmr_clusters(this_anno, dmr_params) ipsc_ref_names_6194 = ['HEL139', 'HEL140', 'HEL141'] ipsc_ref_names_6194_n1 = [ 'HEL139.2_p17', 'HEL139.5_p14', 'HEL139.8_p13', 'HEL140.1_p12',
norm_method=norm_method, samples=our_samples) nazor_ldr = loader.load_reference('GSE31848', norm_method=norm_method) ix = nazor_ldr.meta.index.str.contains(r'(ES__WA)|(iPS__HDF)') ix = ix & (~nazor_ldr.meta.index.str.contains(r'HDF51IPS7') ) # this sample is an outlier, so remove it now nazor_ldr.filter_samples(ix) # Zhou et al.: lots of samples here, but we'll only keep 2 x ESC lines zhou_ldr = loader.load_reference('GSE92462_450K', norm_method=norm_method) ix = zhou_ldr.meta.index.str.contains(r'^H[19]ES') zhou_ldr.filter_samples(ix) hip_epic_ldr = loader.hipsci(norm_method=norm_method, n_sample=n_hipsci, array_type='epic') ## FIXME: this is required to avoid a BUG where the meta column gets renamed to batch_1 in all other loaders hip_epic_ldr.meta.drop('batch', axis=1, inplace=True) # Weltner et al. (E-MTAB-6194) e6194_ldr = loader.load_reference('E-MTAB-6194', norm_method=norm_method) ix = ~e6194_ldr.meta.cell_line.isin( ['NA07057', 'HCT116', 'HEL46.11', 'CCD-1112Sk (CRL-2429)']) e6194_ldr.filter_samples(ix) refs = [('Kim et al.', loader.gse38216( norm_method=norm_method, samples=['H9 ESC 1', 'H9 ESC 2', 'H9 NPC 1', 'H9 NPC 2'])), ('Morey et al.',
# r.batch_id = bid # r.meta.index = ["%s_%s" % (t, bid) for t in r.meta.index] # r.data.columns = r.meta.index # ref_obj = loader.loader.MultipleBatchLoader([t[1] for t in refs]) # ref_meta = ref_obj.meta.loc[ref_obj.meta.index.str.contains('ESC')] # ref_data = ref_data.loc[:, ref_meta.index] # combine all loaders # this will reduce the probe list to the intersection (i.e. 450K) ref_obj = loader.loader.MultipleBatchLoader(refs) ref_meta = ref_obj.meta ref_data = ref_obj.data.dropna() # HipSci data hip_epic_ldr = loader.hipsci(norm_method=norm_method, n_sample=12, array_type='epic') hip_epic_meta = hip_epic_ldr.meta hip_epic_data = hip_epic_ldr.data # hip_450k_meta, hip_450k_data = loader.hipsci(norm_method=norm_method, n_sample=30, array_type='450k') hip_ldr = loader.hipsci(norm_method=norm_method, n_sample=12, array_type='all') hip_meta = hip_ldr.meta hip_data = hip_ldr.data hip_meta.batch = ["HipSci (%s)" % t for t in hip_meta.array_type] # clustering genome-wide # iPSC, FB, ESC # mix of HipSci samples by array_type meta, dat = combine_data_meta(