def process_tophat_alignments(fastq_files, bam_file, gene_file, max_fragment_length, output_fastq_files, output_bam_file, unpaired=False, suffix="/"): # index genes exon_intervals, exon_trees = build_exon_interval_trees(gene_file) # open input files bamfh = pysam.Samfile(bam_file, "rb") if unpaired: bam_iter = parse_unpaired_pe_reads(bamfh) else: bam_iter = parse_pe_reads(bamfh) fastq_iters = [parse_fastq(open(fq)) for fq in fastq_files] # open output files outfq = [open(fq, "w") for fq in output_fastq_files] outbamfh = pysam.Samfile(output_bam_file, "wb", template=bamfh) # iterate through fastq files and bam file try: while True: bam_pe_reads = bam_iter.next() # synchronize fastq and bam and write unmapped reads to a file is_unaligned = synchronize_bam_fastq(bam_pe_reads, fastq_iters, outfq, suffix) if is_unaligned: continue # if loop reaches this point then we have a paired-end # read where both pairs align. now need to check if # the alignment is discordant tx_concordant, gene_concordant = \ is_concordant(bamfh, bam_pe_reads, exon_intervals, exon_trees, max_fragment_length) if not gene_concordant: for r in bam_pe_reads[0]: outbamfh.write(r) for r in bam_pe_reads[1]: outbamfh.write(r) except StopIteration: pass # finish remaining fastq lines try: while True: fqreads = [it.next() for it in fastq_iters] print >> outfq[0], fastq_to_string(fqreads[0]) print >> outfq[1], fastq_to_string(fqreads[1]) except StopIteration: pass return config.JOB_SUCCESS
def process_tophat_alignments(fastq_files, bam_file, gene_file, max_fragment_length, output_fastq_files, output_bam_file, unpaired=False, suffix="/"): # index genes exon_intervals, exon_trees = build_exon_interval_trees(gene_file) # open input files bamfh = pysam.Samfile(bam_file, "rb") if unpaired: bam_iter = parse_unpaired_pe_reads(bamfh) else: bam_iter = parse_pe_reads(bamfh) fastq_iters = [parse_fastq(open(fq)) for fq in fastq_files] # open output files outfq = [open(fq, "w") for fq in output_fastq_files] outbamfh = pysam.Samfile(output_bam_file, "wb", template=bamfh) # iterate through fastq files and bam file try: while True: bam_pe_reads = bam_iter.next() # synchronize fastq and bam and write unmapped reads to a file is_unaligned = synchronize_bam_fastq(bam_pe_reads, fastq_iters, outfq, suffix) if is_unaligned: continue # if loop reaches this point then we have a paired-end # read where both pairs align. now need to check if # the alignment is discordant tx_concordant, gene_concordant = \ is_concordant(bamfh, bam_pe_reads, exon_intervals, exon_trees, max_fragment_length) if not gene_concordant: for r in bam_pe_reads[0]: outbamfh.write(r) for r in bam_pe_reads[1]: outbamfh.write(r) except StopIteration: pass # finish remaining fastq lines try: while True: fqreads = [it.next() for it in fastq_iters] print >>outfq[0], fastq_to_string(fqreads[0]) print >>outfq[1], fastq_to_string(fqreads[1]) except StopIteration: pass return config.JOB_SUCCESS
def write_pe_fastq(fqreads, outfq, suffix): print >> outfq[0], fastq_to_string(fqreads[0], suffix="%s1" % (suffix)) print >> outfq[1], fastq_to_string(fqreads[1], suffix="%s2" % (suffix))
def write_pe_fastq(fqreads, outfq, suffix): print >>outfq[0], fastq_to_string(fqreads[0], suffix="%s1" % (suffix)) print >>outfq[1], fastq_to_string(fqreads[1], suffix="%s2" % (suffix))