# chr_num="12" # conttype = "contacts.gz" if __name__ == '__main__': #Requered for parallization, at least on Windows #,"chr10", "chr1"]: for conttype in [conttype]: print("hello") logging.basicConfig(format='%(asctime)s %(name)s: %(message)s', datefmt='%I:%M:%S', level=logging.DEBUG) input_folder ="/mnt/scratch/ws/psbelokopytova/202001051010polina_data/3DPredictor/input/K562/" output_folder = "/mnt/scratch/ws/psbelokopytova/202001051010polina_data/3DPredictor/out/K562/5KB/all_predictors/" cell_type="K562" lengths_dict = {'chr1': 1494930, 'chr3': 609806, 'chr5': 518646, 'chr7': 682860, 'chr11': 726290, 'chr13': 115324} params = Parameters() params.binsize = 5000 #sequence resolution of contacts data. Use for finding of normalized coefficient file params.window_size = params.binsize #region around contact to be binned for predictors. Usually equal to binsize params.mindist = params.binsize*2+1 #minimum distance between contacting regions params.maxdist = 1500000 params.sample_size = 250000 #how many contacts write to file params.conttype = conttype params.max_cpus = 11 params.keep_only_orient=False params.use_only_contacts_with_CTCF = "all_cont"#"cont_with_CTCF"#"#"all_cont"#"cont_with_CTCF " write_all_chrms_in_file=False #set True if you want write training file consisting several chromosomes fill_empty_contacts = False #set True if you want use all contacts in region, without empty contacts logging.getLogger(__name__).debug("Using input folder "+input_folder) #Read contacts data
output_folder = args['output_folder'] cell_type = args['cell_type'] start = int(args['start']) end = int(args['end']) chromosome = 'chr' + args['chr_num'] hic_name = args['hic_name'] CTCF_file_name = args['CTCF_file_name'] #RNA_file_name = args['RNA_file_name'] # validate_chrs = args['validate_chrs'].split(",") # for chr in validate_chrs: # chr = int(chr) params = Parameters() params.binsize = int( args['binsize'] ) #sequence resolution of contacts data. Use for finding of normalized coefficient file params.window_size = params.binsize #region around contact to be binned for predictors. Usually equal to binsize params.mindist = params.binsize * 2 + 1 #minimum distance between contacting regions params.maxdist = 1500000 # params.sample_size = end - start params.sample_size = 2 #how many contacts write to file #params.conttype = conttype params.max_cpus = int(args['max_cpus']) params.keep_only_orient = False params.use_only_contacts_with_CTCF = "all_cont" #"all_cont" or "cont_with_CTCF" rearrangement = False # deletion = Interval("chr" + chr_num, start, end) write_all_chrms_in_file = False #set True if you want write training file consisting several chromosomes fill_empty_contacts = False #set True if you want use all contacts in region, without empty contacts