params.ctcf_reader_orientOnly.set_sites_orientation(
            input_folder +
            "CTCF/wgEncodeAwgTfbsHaibK562CtcfcPcr1xUniPk.narrowPeak-orient.bed"
        )
        params.ctcf_reader_orientOnly.keep_only_with_orient_data()
        # set corresponding predictor generators and its options:
        OrientBlocksCTCFpg = OrientBlocksPredictorGenerator(
            params.ctcf_reader_orientOnly, params.window_size)
        ConvergentPairPG = ConvergentPairPredictorGenerator(
            params.ctcf_reader, binsize=params.window_size)

        # Read RNA-Seq data
        # RNA-seq_file format: this file should have fields "gene", "start", "end", "chr","FPKM"
        # you can rename table fields below
        params.RNAseqReader = RNAseqReader(fname=input_folder +
                                           "RNA-seq/rna-seqPolyA.tsvpre.txt",
                                           name="RNA")
        # read RNA-seq data and rename table fields
        params.RNAseqReader.read_file(rename={
            "Gene name": "gene",
            "Gene start (bp)": "start",
            "Gene end (bp)": "end",
            "Chromosome/scaffold name": "chr",
            "FPKM": "sigVal"
        },
                                      sep="\t")
        # set corresponding predictor generators and its options:
        RNAseqPG = SmallChipSeqPredictorGenerator(
            params.RNAseqReader, window_size=params.window_size, N_closest=3)

        # write all predictor generators which you want to use:
Beispiel #2
0
        #     #print(row["name"])
        #     params.met_reader = ChiPSeqReader(input_folder + 'methylation/'+ row["filename"], name=row['name'])
        #     params.met_reader.read_file(renamer={"0":"chr","1":"start","2":"end","4":"sigVal"})
        #     metPG.append(SmallChipSeqPredictorGenerator(params.met_reader,params.window_size,N_closest=4))
        # #Read cage data
        # cagePG = []
        # filemanes_df = pd.read_csv(input_folder + "cage/filenames.csv")
        # assert len(os.listdir(input_folder + 'peaks/')) - 1 == len(filenames_df['name'])
        # for index, row in filemanes_df.iterrows():
        #     #print(row["name"])
        #     params.cage_reader = ChiPSeqReader(input_folder + 'cage/' + row["filename"], name=row['name'])
        #     params.cage_reader.read_file(renamer={"0":"chr","1":"start","2":"end","4":"sigVal"})
        #     cagePG.append(SmallChipSeqPredictorGenerator(params.cage_reader,params.window_size,N_closest=4))
        # Read RNA-Seq data
        params.RNAseqReader = RNAseqReader(
            fname=input_folder + "RNA/GSE95111_genes.fpkm_table.txt.pre.txt",
            name="RNA")
        params.RNAseqReader.read_file(rename={
            "Gene name": "gene",
            "Gene start (bp)": "start",
            "Gene end (bp)": "end",
            "Chromosome/scaffold name": "chr",
            "shCtrl-1_0": "sigVal"
        },
                                      sep="\t")
        RNAseqPG = SmallChipSeqPredictorGenerator(
            params.RNAseqReader, window_size=params.window_size, N_closest=3)

        #Read E1 data
        params.eig_reader = E1Reader()
        params.eig_reader.read_files(
Beispiel #3
0
                                                     params.window_size,
                                                     N_closest=4)
    ctcf_reader_orientOnly = ChiPSeqReader(CTCF_file, name="CTCF")
    ctcf_reader_orientOnly.read_file()
    ctcf_reader_orientOnly.set_sites_orientation(CTCF_orient_file)
    ctcf_reader_orientOnly.keep_only_with_orient_data()
    # set corresponding predictor generators and its options:
    OrientBlocksCTCFpg = OrientBlocksPredictorGenerator(
        ctcf_reader_orientOnly, params.window_size)
    ConvergentPairPG = ConvergentPairPredictorGenerator(
        params.ctcf_reader, binsize=params.window_size)

    #Read RNA-Seq data
    #RNA-seq_file format: this file should have fields "gene", "start", "end", "chr","FPKM"
    #you can rename table fields below
    params.RNAseqReader = RNAseqReader(RNA_seq_file, name="RNA")
    #read RNA-seq data and rename table fields
    params.RNAseqReader.read_file(rename={
        "FPKM": "sigVal",
        "Gene start (bp)": "start",
        "Gene end (bp)": "end",
        "Chromosome/scaffold name": "chr",
        "Gene name": "gene"
    },
                                  sep="\t")
    # set corresponding predictor generators and its options:
    RNAseqPG = SmallChipSeqPredictorGenerator(params.RNAseqReader,
                                              window_size=params.window_size,
                                              N_closest=3)
    params.pgs = [
        OrientCtcfpg, NotOrientCTCFpg, OrientBlocksCTCFpg, RNAseqPG,
        # # for index, row in filemanes_df.iterrows():
        # #     #print(row["name"])
        # #     params.met_reader = ChiPSeqReader(input_folder + 'methylation/'+ row["filename"], name=row['name'])
        # #     params.met_reader.read_file(renamer={"0":"chr","1":"start","2":"end","4":"sigVal"})
        # #     metPG.append(SmallChipSeqPredictorGenerator(params.met_reader,params.window_size,N_closest=4))
        # # #Read cage data
        # # cagePG = []
        # # filemanes_df = pd.read_csv(input_folder + "cage/filenames.csv")
        # # # assert len(os.listdir(input_folder + 'cage/')) - 1 == len(filemanes_df['name'])
        # # for index, row in filemanes_df.iterrows():
        # #     #print(row["name"])
        # #     params.cage_reader = ChiPSeqReader(input_folder+"cage/GSM849365_hg19_wgEncodeRikenCageK562CellPapClusters.bed.gz", name=row['name'])# + "cage/" + row["filename"], name=row['name'])
        # #     params.cage_reader.read_file(renamer={"0":"chr","1":"start","2":"end","4":"sigVal"})
        # #     cagePG.append(SmallChipSeqPredictorGenerator(params.cage_reader,params.window_size,N_closest=4))
        #Read RNA-Seq data
        params.RNAseqReader = RNAseqReader(fname=input_folder + "RNA-seq/GSM2533845_NPC_rep1.txtpre.txt",
                                           name="RNA")
        params.RNAseqReader.read_file(rename={ "Gene name": "gene",
                              "Gene start (bp)": "start",
                              "Gene end (bp)": "end",
                              "Chromosome/scaffold name": "chr",
                              "fpkm": "sigVal"},
                      sep="\t")
        RNAseqPG = SmallChipSeqPredictorGenerator(params.RNAseqReader,
                                                  window_size=params.window_size,
                                                  N_closest=3)
        # #Read TSS data
        # params.TssReader=TssReader(fname=input_folder + "TSS/NCBI_refSeq_hg19.bed", name="TSS")
        # params.TssReader.read_file()
        # TSSPG=Distance_to_TSS_PG(params.TssReader)

        # logging.info('create chipPG')
        # chipPG = []
        # filenames_df = pd.read_csv(input_folder + "H1/Chip-seq/filenames.csv")
        # # assert len(os.listdir(input_folder + 'peaks/')) - 1 == len(filenames_df['name'])
        # # print(len(os.listdir(input_folder + 'peaks/')))
        # # print(len(filenames_df['name']))
        # # proteins=set(["RAD21", "SMC3", "POLR2A", "H3K27ac", "H3K27me3", "DNase-seq", "H3K9me3", "H3K4me1", "H3K4me2", "H3K4me3", "YY1"])
        # for index, row in filenames_df.iterrows():
        #     # if row["name"] in proteins:
        #         params.chip_reader = ChiPSeqReader(input_folder + 'H1/Chip-seq/' + row["filename"], name=row['name'])
        #         params.chip_reader.read_file()
        #         chipPG.append(SmallChipSeqPredictorGenerator(params.chip_reader,params.window_size,N_closest=4))

        #Read RNA-Seq data
        params.RNAseqReader = RNAseqReader(
            fname=input_folder +
            "mast_cells/RNA-seq/GSE75526_fpkm (1).pre.txt",
            name="RNA")
        params.RNAseqReader.read_file(rename={
            "Gene name": "gene",
            "Gene start (bp)": "start",
            "Gene end (bp)": "end",
            "Chromosome/scaffold name": "chr",
            "FPKM": "sigVal"
        },
                                      sep="\t")
        RNAseqPG = SmallChipSeqPredictorGenerator(
            params.RNAseqReader, window_size=params.window_size, N_closest=3)

        params.pgs = [
            OrientCtcfpg, NotOrientCTCFpg, OrientBlocksCTCFpg,
            ConvergentPairPG, RNAseqPG