def collect_and_write_peak_summary_stats( peaks, motifs, fasta, chipseq_reads, atacseq_reads, histone_mark_reads, frag_len, ofname): proc_queue = multiprocessing.Queue() for pk in peaks: proc_queue.put(pk) # process a single peak so that we know what to name the columns region = proc_queue.get() peak = load_peak_region( fasta, region[0], max(0, region[1]-2*frag_len), region[2]+2*frag_len, atacseq_reads, histone_mark_reads, motifs, chipseq_reads, frag_len) header, vals = peak.calc_summary_stats() ofp = ProcessSafeOPStream(open(ofname, "w")) args = [proc_queue, ofp, motifs, fasta, chipseq_reads, atacseq_reads, histone_mark_reads, frag_len] ofp.write("\t".join(header) + "\n") ofp.write("\t".join(map(str, vals)) + "\n") fork_and_wait(NTHREADS, process_peaks_worker, args) # let the printing catch up time.sleep(0.1) ofp.close()
def main(): motifs, fasta, peaks = parse_arguments() peak_cntr = Counter() #output_fname = 'predictors.E116_E117_E118.CTCF_REST.txt' #output_fname = 'SELEX.output.txt' output_fname = 'SELEX.predictors.YY1.txt' header, stats = load_summary_stats(peaks[100][1], fasta, motifs) with ThreadSafeFile(output_fname, 'w') as ofp: ofp.write("\t".join(header) + "\n") fork_and_wait(NTHREADS, extract_data_worker, (ofp, peak_cntr, motifs, fasta, peaks))
def score_regions(ofp, genome, regions, motifs): ofp.write("region".ljust(30) + " " + " ".join( (motif.tf_name + "_" + motif.tf_species[0]).ljust(15) for motif in motifs) + "\n") regions_queue = multiprocessing.Queue() regions_queue.cancel_join_thread() for region in regions: regions_queue.put(region) fork_and_wait(36, score_regions_worker, (ofp, genome, regions_queue, motifs)) regions_queue.close()
def main(): genes = load_gtf(sys.argv[1]) reads = MergedReads([ RNAseqReads(fname).init(reads_are_stranded=False) for fname in sys.argv[2:] ]) fl_dists = build_fl_dists_from_annotation(genes, reads) genes_queue = multiprocessing.Queue() for gene in genes: genes_queue.put(gene) with ThreadSafeFile("bins.gtf", "w") as ofp: ofp.write("track type=gtf name=bin_expresion_test\n") args = [genes_queue, reads, fl_dists, ofp] fork_and_wait(24, build_transcripts_lines_worker, args) return