コード例 #1
0
        yielded_score_dfm = list(self.__yield_score_dfm(snp_dfm))
        # result = pd.concat(yielded_score_dfm, axis=1)
        result = reduce(lambda left, right: pd.merge(left, right, on='name'),
                        yielded_score_dfm)
        return result

    def save_temp(self, _result):
        _result.to_csv(self.temp_dest, sep='\t', index=False, header=True)


if __name__ == '__main__':

    rc_util = RepliChipUtil()

    snp_bed_fn = os.path.join(sys_tool.find_directory("fsu_repli_chip"),
                              'RSNP_50kb.bed')
    snp_dfm = pd.read_table(snp_bed_fn,
                            header=None,
                            names=['chrom', 'chromStart', 'chromEnd', 'name'])

    rc_util.src_data_dir = sys_tool.find_directory("fsu_repli_chip")
    rc_util.src_data_fn = dict(
        FsuBg02esRep1="wgEncodeFsuRepliChipBg02esWaveSignalRep1.bed",
        FsuBg02esRep2="wgEncodeFsuRepliChipBg02esWaveSignalRep2.bed",
        FsuGm06990Rep1="wgEncodeFsuRepliChipGm06990WaveSignalRep1.bed",
        FsuGm06990Rep2="wgEncodeFsuRepliChipGm06990WaveSignalRep2.bed",
        FsuH1hescRep1="wgEncodeFsuRepliChipH1hescWaveSignalRep1.bed",
        FsuH1hescRep2="wgEncodeFsuRepliChipH1hescWaveSignalRep2.bed",
        FsuH1hescRep3="wgEncodeFsuRepliChipH1hescWaveSignalRep3.bed",
        FsuH7esRep1="wgEncodeFsuRepliChipH7esWaveSignalRep1.bed",
コード例 #2
0
ファイル: tf_util.py プロジェクト: ramseylab/cerenkov
                               'tfbsName': 'pwm'
                           })

        result = snp_dfm.merge(result,
                               how='left',
                               on=['name', 'chrom'],
                               copy=True)

        return result.loc[:, ['name', 'pwm']]

    def save_temp(self, _result):
        _result.to_csv(self.temp_dest, sep='\t', index=False, header=True)


if __name__ == '__main__':
    rsnp_bed_fn = os.path.join(find_directory('dhs'), "RSNP_50kb.bed")
    rsnp_dfm = pd.read_table(rsnp_bed_fn,
                             header=None,
                             names=['chrom', 'chromStart', 'chromEnd', 'name'])

    jaspar_tf_util = JasparTfbsUtil()
    jaspar_tf_util.src_data_dir = find_directory('Jaspar_TFBS')
    jaspar_tf_util.src_data_fn = 'jaspar_tfbs_ensembl_75_hg19.txt'

    result = jaspar_tf_util.extract(rsnp_dfm)

    result.to_csv("FOO_RSNP.tsv", sep='\t', header=True, index=False)

# if __name__ == '__main__':
#     rsnp_bed_fn = "{}/RSNP_50kb.bed".format(find_directory('dhs'))
#     rsnp_dfm = pd.read_table(rsnp_bed_fn, header=None,
コード例 #3
0
ファイル: fitcons_util.py プロジェクト: ramseylab/cerenkov
                                                    index_names=False),
                              from_string=True)
        yielded_dfms = list(self.__yield_fitcons_dfm(snp_bed_obj))

        snp_fitcons = reduce(
            lambda left, right: pd.merge(left, right, on='name'), yielded_dfms)

        return snp_fitcons

    def save_temp(self, _result):
        _result.to_csv(self.temp_dest, sep='\t', index=False, header=True)


if __name__ == '__main__':
    fitcons_util = FitconsUtil()
    fitcons_util.src_data_dir = find_directory("fitcons")
    fitcons_util.src_data_fn = dict(
        fitConsGm="fc-gm-0.bed",
        fitConsH1="fc-h1-0.bed",
        fitConsHu="fc-hu-0.bed",
        fitConsI6="fc-i6-0.bed",
    )

    rsnp_dfm = pd.read_table(os.path.join(find_directory('fitcons'),
                                          "RSNP_50kb.bed"),
                             header=None,
                             names=['chrom', 'chromStart', 'chromEnd', 'name'])
    fitcons_util.temp_dest = 'foo_rsnp.txt'
    fitcons_util.extract(_input=rsnp_dfm)

    csnp_dfm = pd.read_table(os.path.join(find_directory('fitcons'),
コード例 #4
0
ファイル: eqtl_util.py プロジェクト: ramseylab/cerenkov
        # find minimum non-NAN 'P_Val' for each SNP
        result = result.groupby('name').agg(min).reset_index()

        # some SNPs has no non-NAN 'P_Val'; mark their aggregated 'P_VAL' NAN
        result = snp_dfm.merge(result, how='left', on='name', copy=True)

        result.loc[:, 'P_Val'] = result.loc[:, 'P_Val'].apply(
            self.__transform_p_value)
        result = result.rename(columns={'P_Val': 'eqtlPvalue'})
        return result.loc[:, ['name', 'eqtlPvalue']]

    def save_temp(self, _result):
        _result.to_csv(self.temp_dest, sep='\t', header=True, index=False)


if __name__ == '__main__':
    rsnp_dfm = pd.read_table(os.path.join(find_directory('CADD'),
                                          "RSNP_50kb.bed"),
                             header=None,
                             names=['chrom', 'chromStart', 'chromEnd', 'name'])
    print(rsnp_dfm.shape)

    eqtl_util = EqtlUtil()
    eqtl_util.src_data_dir = find_directory('eqtl')
    # eqtl_util.src_data_fn = ['Stomach.portal.eqtl', 'Heart_Left_Ventricle.portal.eqtl']
    eqtl_util.temp_dest = 'foo_rsnp.txt'

    result = eqtl_util.extract(rsnp_dfm)
    print(result)
コード例 #5
0
                                on=['name', 'chrom'],
                                copy=True)

        return snp_dfm.loc[:, ["name", "masterDhsScore", "masterDhsCount"
                               ]].fillna(0)

    def save_temp(self, _result):
        _result.to_csv(self.temp_dest,
                       sep='\t',
                       header=True,
                       index=False,
                       columns=["name", "masterDhsScore", "masterDhsCount"])


if __name__ == '__main__':
    rsnp_bed_fn = os.path.join(find_directory('dhs'), "RSNP_50kb.bed")
    rsnp_dfm = pd.read_table(rsnp_bed_fn,
                             header=None,
                             names=['chrom', 'chromStart', 'chromEnd', 'name'])
    csnp_bed_fn = os.path.join(find_directory('dhs'), "CSNP_50kb.bed")
    csnp_dfm = pd.read_table(csnp_bed_fn,
                             header=None,
                             names=['chrom', 'chromStart', 'chromEnd', 'name'])

    dhs_util = MasterDhsUtil()
    dhs_util.db_config_key = 'local_hg19'

    dhs_util.temp_dest = 'FOO_CSNP.tsv'
    dhs_util.extract(csnp_dfm)

    dhs_util.temp_dest = 'FOO_RSNP.tsv'
コード例 #6
0
    coord_util = CoordUtil()
    coord_util.db_config_key = db_config_key

    tf_util = TfUtil(reproduce_osu17=True)
    tf_util.db_config_key = db_config_key

    # jaspar_tfbs_util = JasparTfbsUtil()
    # jaspar_tfbs_util.src_data_dir = sys_tool.find_directory('Jaspar_TFBS')
    # jaspar_tfbs_util.src_data_fn = 'jaspar_tfbs_ensembl_75_hg19.txt'

    mst_dhs_util = MasterDhsUtil()
    mst_dhs_util.db_config_key = db_config_key

    uni_dhs_util = UniformDhsUtil()
    uni_dhs_util.src_data_dir = sys_tool.find_directory("dhs")
    uni_dhs_util.src_data_fn = "UniformDnaseIHS"

    phastcons_util = PhastconsUtil()
    phastcons_util.db_config_key = db_config_key

    tss_dist_util = TssDistUtil()
    tss_dist_util.db_config_key = db_config_key

    eqtl_util = EqtlUtil()
    eqtl_util.src_data_dir = sys_tool.find_directory('eqtl')

    gerp_util = GerpUtil()
    gerp_util.src_data_dir = sys_tool.find_directory("gerp")
    gerp_util.src_data_fn = "All_hg19_RS.bw"
コード例 #7
0
            'CTCF': 'CTCF_REG',
            'E': 'ENH',
            'PF': 'TSS_FLANK',
            'R': 'REP',
            'T': 'TRAN',
            'TSS': 'TSS',
            'WE': 'WEAK_ENH'
        }

        gwava_dfm = pd.DataFrame(results, index=names.keys()).T.rename(columns=names)

        snp_dfm = snp_dfm.merge(gwava_dfm, how='left', left_on='name', right_index=True, copy=True)

        return snp_dfm.fillna(0).drop(['chrom', 'chromStart', 'chromEnd'], axis=1)

    def save_temp(self, _result):
        _result.to_csv(self.temp_dest, sep='\t', index=False, header=True)


if __name__ == '__main__':
    snp_bed_fn = os.path.join(sys_tool.find_directory("fsu_repli_chip"), 'RSNP_50kb.bed')
    snp_dfm = pd.read_table(snp_bed_fn, header=None,
                            names=['chrom', 'chromStart', 'chromEnd', 'name'])

    gwava_util = GwavaUtil()
    gwava_util.src_data_dir = sys_tool.find_directory('GWAVA')
    gwava_util.src_data_fn = 'segmentation.bed.gz'
    gwava_util.temp_dest = 'GWAVA_RSNP.txt'

    gwava_util.extract(_input=snp_dfm)