Ejemplo n.º 1
0
 logging.info('create metPG')
 metPG = []
 filemanes_df = pd.read_csv(input_folder + "methylation/filenames.csv")
 # assert len(os.listdir(input_folder + 'peaks/')) - 1 == len(filenames_df['name'])
 for index, row in filemanes_df.iterrows():
     #print(row["name"])
     params.met_reader = ChiPSeqReader(input_folder + 'methylation/'+ row["filename"], name=row['name'])
     params.met_reader.read_file(renamer={"0":"chr","1":"start","2":"end","4":"sigVal"})
     metPG.append(SmallChipSeqPredictorGenerator(params.met_reader,params.window_size,N_closest=4))
 #Read cage data
 cagePG = []
 filemanes_df = pd.read_csv(input_folder + "cage/filenames.csv")
 # assert len(os.listdir(input_folder + 'cage/')) - 1 == len(filemanes_df['name'])
 for index, row in filemanes_df.iterrows():
     #print(row["name"])
     params.cage_reader = ChiPSeqReader(input_folder+"cage/GSM849365_hg19_wgEncodeRikenCageK562CellPapClusters.bed.gz", name=row['name'])# + "cage/" + row["filename"], name=row['name'])
     params.cage_reader.read_file(renamer={"0":"chr","1":"start","2":"end","4":"sigVal"})
     cagePG.append(SmallChipSeqPredictorGenerator(params.cage_reader,params.window_size,N_closest=4))
 #Read RNA-Seq data
 params.RNAseqReader = RNAseqReader(fname=input_folder + "RNA-seq/rna-seqPolyA.tsvpre.txt",
                                    name="RNA")
 params.RNAseqReader.read_file(rename={ "Gene name": "gene",
                       "Gene start (bp)": "start",
                       "Gene end (bp)": "end",
                       "Chromosome/scaffold name": "chr",
                       "FPKM": "sigVal"},
               sep="\t")
 RNAseqPG = SmallChipSeqPredictorGenerator(params.RNAseqReader,
                                           window_size=params.window_size,
                                           N_closest=3)
 # #Read TSS data
Ejemplo n.º 2
0
 # metPG = []
 # filemanes_df = pd.read_csv(input_folder + "methylation/filenames.csv")
 # assert len(os.listdir(input_folder + 'peaks/')) - 1 == len(filenames_df['name'])
 # for index, row in filemanes_df.iterrows():
 #     #print(row["name"])
 #     params.met_reader = ChiPSeqReader(input_folder + 'methylation/'+ row["filename"], name=row['name'])
 #     params.met_reader.read_file(renamer={"0":"chr","1":"start","2":"end","4":"sigVal"})
 #     metPG.append(SmallChipSeqPredictorGenerator(params.met_reader,params.window_size,N_closest=4))
 #Read cage data
 cagePG = []
 filemanes_df = pd.read_csv(input_folder + "cage/filenames.csv")
 # assert len(os.listdir(input_folder + 'cage/')) - 1 == len(filemanes_df['name'])
 for index, row in filemanes_df.iterrows():
     #print(row["name"])
     params.cage_reader = ChiPSeqReader(input_folder + "cage/" +
                                        row["filename"],
                                        name=row['name'])
     params.cage_reader.read_file(renamer={
         "0": "chr",
         "1": "start",
         "2": "end",
         "4": "sigVal"
     })
     cagePG.append(
         SmallChipSeqPredictorGenerator(params.cage_reader,
                                        params.window_size,
                                        N_closest=4))
 #Read RNA-Seq data
 params.RNAseqReader = RNAseqReader(fname=input_folder +
                                    "RNA-seq/rna-seqPolyA.tsvpre.txt",
                                    name="RNA")