def read_cpg_profiles(filenames, log=None, *args, **kwargs): """Read methylation profiles. Input files can be gzip compressed. Returns ------- dict `dict (key, value)`, where `key` is the output name and `value` the CpG table. """ cpg_profiles = OrderedDict( ) #a dictionary which remember the order of item inserted, when iterating it, #items are returned in the order their keys were first added. for filename in filenames: if log: log(filename) cpg_file = dat.GzipFile( filename, 'r') #Wrapper to read and write gzip-compressed files. output_name = split_ext( filename) #Remove file extension from `filename`, defined above cpg_profile = dat.read_cpg_profile( cpg_file, sort=True, *args, **kwargs) #Read CpG profile from TSV or bedGraph file. #return :class:`pandas.DataFrame` with columns `chromo`, `pos`, `value`. cpg_profiles[ output_name] = cpg_profile #cpg_profiles store multiple sample information cpg_file.close() return cpg_profiles #return ordered dictory, each item is a pandas data frame
def read_cpg_profiles(filenames, log=None, *args, **kwargs): """Read methylation profiles. Input files can be gzip compressed. Returns ------- dict `dict (key, value)`, where `key` is the output name and `value` the CpG table. """ cpg_profiles = OrderedDict() for filename in filenames: if log: log(filename) cpg_file = dat.GzipFile(filename, 'r') output_name = split_ext(filename) cpg_profile = dat.read_cpg_profile(cpg_file, sort=True, *args, **kwargs) cpg_profiles[output_name] = cpg_profile cpg_file.close() return cpg_profiles
def read_cpg_profiles(filenames, *args, **kwargs): cpg_profiles = OrderedDict() for filename in filenames: cpg_file = dat.GzipFile(filename, 'r') output_name = split_ext(filename) cpg_profile = dat.read_cpg_profile(cpg_file, sort=True, *args, **kwargs) cpg_profiles[output_name] = cpg_profile cpg_file.close() return cpg_profiles
def annotate(anno_file, chromo, pos): anno_file = dat.GzipFile(anno_file, 'r') anno = pd.read_table(anno_file, header=None, usecols=[0, 1, 2], dtype={0: 'str', 1: 'int32', 2: 'int32'}) anno_file.close() anno.columns = ['chromo', 'start', 'end'] anno.chromo = anno.chromo.str.upper().str.replace('CHR', '') anno = anno.loc[anno.chromo == chromo] anno.sort_values('start', inplace=True) start, end = an.join_overlapping(anno.start.values, anno.end.values) anno = np.array(an.is_in(pos, start, end), dtype='int8') return anno