Ejemplo n.º 1
0
def collect_and_write_peak_summary_stats(
        peaks, motifs, fasta, 
        chipseq_reads, 
        atacseq_reads, histone_mark_reads, 
        frag_len, ofname):
    proc_queue = multiprocessing.Queue()
    for pk in peaks: proc_queue.put(pk)

    # process a single peak so that we know what to name the columns
    region = proc_queue.get()
    peak = load_peak_region(
        fasta, 
        region[0], max(0, region[1]-2*frag_len), region[2]+2*frag_len,
        atacseq_reads, histone_mark_reads,
        motifs, 
        chipseq_reads,
        frag_len)
    header, vals = peak.calc_summary_stats()

    ofp = ProcessSafeOPStream(open(ofname, "w"))
    args = [proc_queue, ofp, motifs, fasta, 
            chipseq_reads, atacseq_reads, histone_mark_reads, frag_len]

    ofp.write("\t".join(header) + "\n")
    ofp.write("\t".join(map(str, vals)) + "\n")

    fork_and_wait(NTHREADS, process_peaks_worker, args)
    # let the printing catch up
    time.sleep(0.1)
    ofp.close()
Ejemplo n.º 2
0
def main():
    motifs, fasta, peaks = parse_arguments()
    peak_cntr = Counter()
    #output_fname = 'predictors.E116_E117_E118.CTCF_REST.txt'
    #output_fname = 'SELEX.output.txt'
    output_fname = 'SELEX.predictors.YY1.txt'

    header, stats = load_summary_stats(peaks[100][1], fasta, motifs)
    with ThreadSafeFile(output_fname, 'w') as ofp:
        ofp.write("\t".join(header) + "\n")
        fork_and_wait(NTHREADS, extract_data_worker, (ofp, peak_cntr, motifs, fasta, peaks))
def score_regions(ofp, genome, regions, motifs):
    ofp.write("region".ljust(30) + " " + " ".join(
    (motif.tf_name + "_" + motif.tf_species[0]).ljust(15)
        for motif in motifs) + "\n")

    regions_queue = multiprocessing.Queue()
    regions_queue.cancel_join_thread()
    for region in regions:
        regions_queue.put(region)
    fork_and_wait(36, score_regions_worker, (ofp, genome, regions_queue, motifs))
    regions_queue.close()
Ejemplo n.º 4
0
def main():
    genes = load_gtf(sys.argv[1])

    reads = MergedReads([
        RNAseqReads(fname).init(reads_are_stranded=False)
        for fname in sys.argv[2:]
    ])

    fl_dists = build_fl_dists_from_annotation(genes, reads)

    genes_queue = multiprocessing.Queue()
    for gene in genes:
        genes_queue.put(gene)

    with ThreadSafeFile("bins.gtf", "w") as ofp:
        ofp.write("track type=gtf name=bin_expresion_test\n")
        args = [genes_queue, reads, fl_dists, ofp]
        fork_and_wait(24, build_transcripts_lines_worker, args)

    return