예제 #1
0
def process_tophat_alignments(fastq_files,
                              bam_file,
                              gene_file,
                              max_fragment_length,
                              output_fastq_files,
                              output_bam_file,
                              unpaired=False,
                              suffix="/"):
    # index genes
    exon_intervals, exon_trees = build_exon_interval_trees(gene_file)
    # open input files
    bamfh = pysam.Samfile(bam_file, "rb")
    if unpaired:
        bam_iter = parse_unpaired_pe_reads(bamfh)
    else:
        bam_iter = parse_pe_reads(bamfh)
    fastq_iters = [parse_fastq(open(fq)) for fq in fastq_files]
    # open output files
    outfq = [open(fq, "w") for fq in output_fastq_files]
    outbamfh = pysam.Samfile(output_bam_file, "wb", template=bamfh)
    # iterate through fastq files and bam file
    try:
        while True:
            bam_pe_reads = bam_iter.next()
            # synchronize fastq and bam and write unmapped reads to a file
            is_unaligned = synchronize_bam_fastq(bam_pe_reads, fastq_iters,
                                                 outfq, suffix)
            if is_unaligned:
                continue
            # if loop reaches this point then we have a paired-end
            # read where both pairs align.  now need to check if
            # the alignment is discordant
            tx_concordant, gene_concordant = \
                is_concordant(bamfh, bam_pe_reads, exon_intervals,
                              exon_trees, max_fragment_length)
            if not gene_concordant:
                for r in bam_pe_reads[0]:
                    outbamfh.write(r)
                for r in bam_pe_reads[1]:
                    outbamfh.write(r)
    except StopIteration:
        pass
    # finish remaining fastq lines
    try:
        while True:
            fqreads = [it.next() for it in fastq_iters]
            print >> outfq[0], fastq_to_string(fqreads[0])
            print >> outfq[1], fastq_to_string(fqreads[1])
    except StopIteration:
        pass
    return config.JOB_SUCCESS
def process_tophat_alignments(fastq_files, bam_file, gene_file,
                              max_fragment_length,
                              output_fastq_files, 
                              output_bam_file,
                              unpaired=False,
                              suffix="/"):
    # index genes 
    exon_intervals, exon_trees = build_exon_interval_trees(gene_file)
    # open input files
    bamfh = pysam.Samfile(bam_file, "rb")
    if unpaired:
        bam_iter = parse_unpaired_pe_reads(bamfh)
    else:
        bam_iter = parse_pe_reads(bamfh)
    fastq_iters = [parse_fastq(open(fq)) for fq in fastq_files]
    # open output files
    outfq = [open(fq, "w") for fq in output_fastq_files]
    outbamfh = pysam.Samfile(output_bam_file, "wb", template=bamfh)
    # iterate through fastq files and bam file
    try:
        while True:
            bam_pe_reads = bam_iter.next()
            # synchronize fastq and bam and write unmapped reads to a file
            is_unaligned = synchronize_bam_fastq(bam_pe_reads, fastq_iters, 
                                                 outfq, suffix)
            if is_unaligned:
                continue
            # if loop reaches this point then we have a paired-end
            # read where both pairs align.  now need to check if
            # the alignment is discordant
            tx_concordant, gene_concordant = \
                is_concordant(bamfh, bam_pe_reads, exon_intervals, 
                              exon_trees, max_fragment_length)
            if not gene_concordant:
                for r in bam_pe_reads[0]:
                    outbamfh.write(r)
                for r in bam_pe_reads[1]:
                    outbamfh.write(r)
    except StopIteration:
        pass
    # finish remaining fastq lines
    try:
        while True:
            fqreads = [it.next() for it in fastq_iters]
            print >>outfq[0], fastq_to_string(fqreads[0])
            print >>outfq[1], fastq_to_string(fqreads[1])
    except StopIteration:
        pass
    return config.JOB_SUCCESS
예제 #3
0
def write_pe_fastq(fqreads, outfq, suffix):
    print >> outfq[0], fastq_to_string(fqreads[0], suffix="%s1" % (suffix))
    print >> outfq[1], fastq_to_string(fqreads[1], suffix="%s2" % (suffix))
def write_pe_fastq(fqreads, outfq, suffix):
    print >>outfq[0], fastq_to_string(fqreads[0], suffix="%s1" % (suffix))
    print >>outfq[1], fastq_to_string(fqreads[1], suffix="%s2" % (suffix))