if __name__ == '__main__': if len(sys.argv) < 2: print('TSS data must be given on the command line.') sys.exit() genome_fname = sys.argv[1] genome_path = os.path.join(_input_dir, genome_fname) chromosome_list = chr_tools.get_chr_list() all_tss = genome_info.get_all_tss_locations(genome_path, genome_chrcolname, genome_startcolname, chromosome_list) random_locs = random_genome_locs.generate_random_chr_pos(numb_rands_per_chr) tss_distances_dict, _ = integrate_data.calc_tss_dist(random_locs, all_tss, chromosome_list, os.path.join(_output_dir, output_filename)) '' tss_distances_list = [] for dist_list in tss_distances_dict.values(): # Each value is a list tss_distances_list.extend(dist_list) lessthan2kb, btwn2kb10kb, grtrthan10kb = \ integrate_data.bin_distances(tss_distances_list) # print lessthan2kb, btwn2kb10kb, grtrthan10kb '''
output_dist_colname = 'distance_to_exon' output_exonstart_colname = 'closest_exon_start' output_exonend_colname = 'closest_exon_end' output_event_colname = 'exon_or_nonexon' if __name__ == '__main__': if len(sys.argv) < 2: print('Reference genome info must be given on the command line.') sys.exit() ref_fname = sys.argv[1] chromosome_list = chr_tools.get_chr_list() # Create randomized tandem distribution file. random_genome_locs = random_genome_locs.generate_random_chr_pos(nrandopms_per_chr) # Create a mock textfile with appropriate rownames with open(os.path.join(outputdir, tandem_fname), 'wb') as randomfile: randomwriter = csv.writer(randomfile, delimiter='\t') # Writer colnames header = [tandem_chr_colname, tandem_start_colname] randomwriter.writerow(header) for c in chromosome_list: for pos in random_genome_locs[c]: randomwriter.writerow([c, pos]) # Initialize rf_data class, using tandem_path we just created above. rf_data = ref_and_tandem.data(ref_path=mydirs.joinpath(inputdir, ref_fname), tandem_path=os.path.join(outputdir, tandem_fname), tandem_output_path=mydirs.joinpath(outputdir,