Beispiel #1
0
def read_cpg_profiles(filenames, log=None, *args, **kwargs):
    """Read methylation profiles.

    Input files can be gzip compressed.

    Returns
    -------
    dict
        `dict (key, value)`, where `key` is the output name and `value` the CpG
        table.
    """

    cpg_profiles = OrderedDict(
    )  #a dictionary which remember the order of item inserted, when iterating it,
    #items are returned in the order their keys were first added.
    for filename in filenames:
        if log:
            log(filename)
        cpg_file = dat.GzipFile(
            filename, 'r')  #Wrapper to read and write gzip-compressed files.
        output_name = split_ext(
            filename)  #Remove file extension from `filename`, defined above
        cpg_profile = dat.read_cpg_profile(
            cpg_file, sort=True, *args,
            **kwargs)  #Read CpG profile from TSV or bedGraph file.
        #return :class:`pandas.DataFrame` with columns `chromo`, `pos`, `value`.
        cpg_profiles[
            output_name] = cpg_profile  #cpg_profiles store multiple sample information
        cpg_file.close()
    return cpg_profiles  #return ordered dictory, each item is a pandas data frame
Beispiel #2
0
def read_cpg_profiles(filenames, log=None, *args, **kwargs):
    """Read methylation profiles.

    Input files can be gzip compressed.

    Returns
    -------
    dict
        `dict (key, value)`, where `key` is the output name and `value` the CpG
        table.
    """

    cpg_profiles = OrderedDict()
    for filename in filenames:
        if log:
            log(filename)
        cpg_file = dat.GzipFile(filename, 'r')
        output_name = split_ext(filename)
        cpg_profile = dat.read_cpg_profile(cpg_file,
                                           sort=True,
                                           *args,
                                           **kwargs)
        cpg_profiles[output_name] = cpg_profile
        cpg_file.close()
    return cpg_profiles
Beispiel #3
0
def read_cpg_profiles(filenames, *args, **kwargs):
    cpg_profiles = OrderedDict()
    for filename in filenames:
        cpg_file = dat.GzipFile(filename, 'r')
        output_name = split_ext(filename)
        cpg_profile = dat.read_cpg_profile(cpg_file, sort=True, *args, **kwargs)
        cpg_profiles[output_name] = cpg_profile
        cpg_file.close()
    return cpg_profiles
Beispiel #4
0
def annotate(anno_file, chromo, pos):
    anno_file = dat.GzipFile(anno_file, 'r')
    anno = pd.read_table(anno_file, header=None, usecols=[0, 1, 2],
                         dtype={0: 'str', 1: 'int32', 2: 'int32'})
    anno_file.close()
    anno.columns = ['chromo', 'start', 'end']
    anno.chromo = anno.chromo.str.upper().str.replace('CHR', '')
    anno = anno.loc[anno.chromo == chromo]
    anno.sort_values('start', inplace=True)
    start, end = an.join_overlapping(anno.start.values, anno.end.values)
    anno = np.array(an.is_in(pos, start, end), dtype='int8')
    return anno