params.ctcf_reader_orientOnly.keep_only_with_orient_data() OrientBlocksCTCFpg = OrientBlocksPredictorGenerator(params.ctcf_reader_orientOnly, params.window_size) ConvergentPairPG = ConvergentPairPredictorGenerator(params.ctcf_reader, binsize=params.window_size) #Read other chip-seq data logging.info('create chipPG') chipPG = [] filenames_df = pd.read_csv(input_folder + "peaks/filenames.csv") # assert len(os.listdir(input_folder + 'peaks/')) - 1 == len(filenames_df['name']) # print(len(os.listdir(input_folder + 'peaks/'))) # print(len(filenames_df['name'])) # proteins=set(["RAD21", "SMC3", "POLR2A", "H3K27ac", "H3K27me3", "DNase-seq", "H3K9me3", "H3K4me1", "H3K4me2", "H3K4me3", "YY1"]) for index, row in filenames_df.iterrows(): # if row["name"] in proteins: params.chip_reader = ChiPSeqReader(input_folder + 'peaks/' + row["filename"] + '.gz', name=row['name']) params.chip_reader.read_file() chipPG.append(SmallChipSeqPredictorGenerator(params.chip_reader,params.window_size,N_closest=4)) # assert len(chipPG)==len(proteins) # # # #Read methylation data logging.info('create metPG') metPG = [] filemanes_df = pd.read_csv(input_folder + "methylation/filenames.csv") # assert len(os.listdir(input_folder + 'peaks/')) - 1 == len(filenames_df['name']) for index, row in filemanes_df.iterrows(): #print(row["name"]) params.met_reader = ChiPSeqReader(input_folder + 'methylation/'+ row["filename"], name=row['name']) params.met_reader.read_file(renamer={"0":"chr","1":"start","2":"end","4":"sigVal"}) metPG.append(SmallChipSeqPredictorGenerator(params.met_reader,params.window_size,N_closest=4)) #Read cage data
params.ctcf_reader_orientOnly, params.window_size) ConvergentPairPG = ConvergentPairPredictorGenerator( params.ctcf_reader, binsize=params.window_size) #Read other chip-seq data logging.info('create chipPG') chipPG = [] filenames_df = pd.read_csv(input_folder + "H1/Chip-seq/filenames.csv") # assert len(os.listdir(input_folder + 'peaks/')) - 1 == len(filenames_df['name']) # print(len(os.listdir(input_folder + 'peaks/'))) # print(len(filenames_df['name'])) # proteins=set(["RAD21", "SMC3", "POLR2A", "H3K27ac", "H3K27me3", "DNase-seq", "H3K9me3", "H3K4me1", "H3K4me2", "H3K4me3", "YY1"]) for index, row in filenames_df.iterrows(): # if row["name"] in proteins: params.chip_reader = ChiPSeqReader(input_folder + 'H1/Chip-seq/' + row["filename"], name=row['name']) params.chip_reader.read_file() chipPG.append( SmallChipSeqPredictorGenerator(params.chip_reader, params.window_size, N_closest=4)) # #Read RNA-Seq data # params.RNAseqReader = RNAseqReader(fname=input_folder + "RNA-seq/rna-seqPolyA.tsvpre.txt", # name="RNA") # params.RNAseqReader.read_file(rename={ "Gene name": "gene", # "Gene start (bp)": "start", # "Gene end (bp)": "end", # "Chromosome/scaffold name": "chr", # "FPKM": "sigVal"},