Ejemplo n.º 1
0
    def _break_reads(self, contig, position, fout, min_read_length=250):
        '''Get all reads from contig, but breaks them all at given position (0-based) in the reference. Writes to fout. Currently pproximate where it breaks (ignores indels in the alignment)'''
        sam_reader = pysam.Samfile(self.bam, "rb")
        for read in sam_reader.fetch(contig):
            seqs = []
            if read.pos < position < read.reference_end - 1:
                split_point = position - read.pos
                if split_point - 1 >= min_read_length:
                    sequence = mapping.aligned_read_to_read(
                        read, revcomp=False,
                        ignore_quality=not self.fastq_out).subseq(
                            0, split_point)
                    sequence.id += '.left'
                    seqs.append(sequence)
                if read.query_length - split_point >= min_read_length:
                    sequence = mapping.aligned_read_to_read(
                        read, revcomp=False,
                        ignore_quality=not self.fastq_out).subseq(
                            split_point, read.query_length)
                    sequence.id += '.right'
                    seqs.append(sequence)
            else:
                seqs.append(
                    mapping.aligned_read_to_read(
                        read, revcomp=False,
                        ignore_quality=not self.fastq_out))

            for seq in seqs:
                if read.is_reverse:
                    seq.revcomp()
                print(seq, file=fout)
Ejemplo n.º 2
0
    def test_aligned_read_to_read(self):
        '''test aligned_read_to_read'''
        infile = os.path.join(data_dir,
                              'mapping_test_aligned_read_to_read.bam')
        sam_reader = pysam.Samfile(infile, "rb")
        aln1, aln2 = [x for x in sam_reader.fetch()]
        read1_fq = pyfastaq.sequences.Fastq(
            'read1',
            'TGTGTAACACTCCACCTCTGGTTCCCAGAGTTCGGTATCCGGCCGATACTTGAGGATAGC',
            'IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGHFEDCBA')
        read1_fa = pyfastaq.sequences.Fasta(
            'read1',
            'TGTGTAACACTCCACCTCTGGTTCCCAGAGTTCGGTATCCGGCCGATACTTGAGGATAGC')
        self.assertEqual(read1_fq, mapping.aligned_read_to_read(aln1))
        self.assertEqual(read1_fq,
                         mapping.aligned_read_to_read(aln1, revcomp=False))
        self.assertEqual(
            read1_fa, mapping.aligned_read_to_read(aln1, ignore_quality=True))

        read2 = pyfastaq.sequences.Fastq(
            'read2',
            'GATCGTCACGAAAGAACCAAGCCGGATCGTGGGAGGGGTACAACTCAGGTGAATTAACGT',
            'HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGFEDC')
        read2_rev = copy.copy(read2)
        read2_rev.revcomp()
        self.assertEqual(read2, mapping.aligned_read_to_read(aln2))
        self.assertEqual(read2_rev,
                         mapping.aligned_read_to_read(aln2, revcomp=False))
Ejemplo n.º 3
0
 def _all_reads_from_contig(self, contig, fout):
     '''Gets all reads from contig called "contig" and writes to fout'''
     sam_reader = pysam.Samfile(self.bam, "rb")
     for read in sam_reader.fetch(contig):
         print(mapping.aligned_read_to_read(
             read, ignore_quality=not self.fastq_out),
               file=fout)
Ejemplo n.º 4
0
    def _get_region(self, contig, start, end, fout, min_length=250):
        '''Writes reads mapping to given region of contig, trimming part of read not in the region'''
        sam_reader = pysam.Samfile(self.bam, "rb")
        trimming_end = (start == 0)
        for read in sam_reader.fetch(contig, start, end):
            read_interval = pyfastaq.intervals.Interval(
                read.pos, read.reference_end - 1)
            seq = mapping.aligned_read_to_read(
                read, ignore_quality=not self.fastq_out, revcomp=False)

            if trimming_end:
                bases_off_start = 0
                bases_off_end = max(0, read.reference_end - 1 - end)
                #seq.seq = seq.seq[:read.query_alignment_end - bases_off_end]
                seq = seq.subseq(0, read.query_alignment_end - bases_off_end)
            else:
                bases_off_start = max(0, start - read.pos + 1)
                #seq.seq = seq.seq[bases_off_start  + read.query_alignment_start:]
                seq = seq.subseq(bases_off_start + read.query_alignment_start,
                                 len(seq))

            if read.is_reverse:
                seq.revcomp()

            if len(seq) >= min_length:
                print(seq, file=fout)
Ejemplo n.º 5
0
 def _get_all_unmapped_reads(self, fout):
     '''Writes all unmapped reads to fout'''
     sam_reader = pysam.Samfile(self.bam, "rb")
     for read in sam_reader.fetch(until_eof=True):
         if read.is_unmapped:
             print(mapping.aligned_read_to_read(read, ignore_quality=True),
                   file=fout)
Ejemplo n.º 6
0
    def test_aligned_read_to_read(self):
        '''test aligned_read_to_read'''
        infile = os.path.join(data_dir, 'mapping_test_aligned_read_to_read.bam')
        sam_reader = pysam.Samfile(infile, "rb")
        aln1, aln2 = [x for x in sam_reader.fetch()]
        read1_fq = pyfastaq.sequences.Fastq('read1', 'TGTGTAACACTCCACCTCTGGTTCCCAGAGTTCGGTATCCGGCCGATACTTGAGGATAGC', 'IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGHFEDCBA')
        read1_fa = pyfastaq.sequences.Fasta('read1', 'TGTGTAACACTCCACCTCTGGTTCCCAGAGTTCGGTATCCGGCCGATACTTGAGGATAGC')
        self.assertEqual(read1_fq, mapping.aligned_read_to_read(aln1))
        self.assertEqual(read1_fq, mapping.aligned_read_to_read(aln1, revcomp=False))
        self.assertEqual(read1_fa, mapping.aligned_read_to_read(aln1, ignore_quality=True))

        read2 = pyfastaq.sequences.Fastq('read2', 'GATCGTCACGAAAGAACCAAGCCGGATCGTGGGAGGGGTACAACTCAGGTGAATTAACGT', 'HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGFEDC')
        read2_rev = copy.copy(read2)
        read2_rev.revcomp()
        self.assertEqual(read2, mapping.aligned_read_to_read(aln2))
        self.assertEqual(read2_rev, mapping.aligned_read_to_read(aln2, revcomp=False))
Ejemplo n.º 7
0
 def _exclude_region(self, contig, start, end, fout):
     '''Writes reads not mapping to the given region of contig, start and end as per python convention'''
     sam_reader = pysam.Samfile(self.bam, "rb")
     exclude_interval = pyfastaq.intervals.Interval(start, end - 1)
     for read in sam_reader.fetch(contig):
         read_interval = pyfastaq.intervals.Interval(read.pos, read.reference_end - 1)
         if not read_interval.intersects(exclude_interval):
             print(mapping.aligned_read_to_read(read, ignore_quality=True), file=fout)
Ejemplo n.º 8
0
 def _exclude_region(self, contig, start, end, fout):
     '''Writes reads not mapping to the given region of contig, start and end as per python convention'''
     sam_reader = pysam.Samfile(self.bam, "rb")
     exclude_interval = pyfastaq.intervals.Interval(start, end - 1)
     for read in sam_reader.fetch(contig):
         read_interval = pyfastaq.intervals.Interval(
             read.pos, read.reference_end - 1)
         if not read_interval.intersects(exclude_interval):
             print(mapping.aligned_read_to_read(read, ignore_quality=True),
                   file=fout)
Ejemplo n.º 9
0
    def _break_reads(self, contig, position, fout, min_read_length=250):
        '''Get all reads from contig, but breaks them all at given position (0-based) in the reference. Writes to fout. Currently pproximate where it breaks (ignores indels in the alignment)'''
        sam_reader = pysam.Samfile(self.bam, "rb")
        for read in sam_reader.fetch(contig):
            seqs = []
            if read.pos < position < read.reference_end - 1:
                split_point = position - read.pos
                if split_point - 1 >= min_read_length:
                    sequence = mapping.aligned_read_to_read(read, revcomp=False, ignore_quality=True).subseq(0, split_point)
                    sequence.id += '.left'
                    seqs.append(sequence)
                if read.query_length - split_point >= min_read_length:
                    sequence = mapping.aligned_read_to_read(read, revcomp=False, ignore_quality=True).subseq(split_point, read.query_length)
                    sequence.id += '.right'
                    seqs.append(sequence)
            else:
                seqs.append(mapping.aligned_read_to_read(read, revcomp=False, ignore_quality=True))

            for seq in seqs:
                if read.is_reverse:
                    seq.revcomp()
                print(seq, file=fout)
Ejemplo n.º 10
0
    def _get_region(self, contig, start, end, fout, min_length=250):
        '''Writes reads mapping to given region of contig, trimming part of read not in the region'''
        sam_reader = pysam.Samfile(self.bam, "rb")
        trimming_end = (start == 0)
        for read in sam_reader.fetch(contig, start, end):
            read_interval = pyfastaq.intervals.Interval(read.pos, read.reference_end - 1)
            seq = mapping.aligned_read_to_read(read, ignore_quality=True, revcomp=False)

            if trimming_end:
                bases_off_start = 0
                bases_off_end = max(0, read.reference_end - 1 - end)
                seq.seq = seq.seq[:read.query_alignment_end - bases_off_end]
            else:
                bases_off_start = max(0, start - read.pos + 1)
                seq.seq = seq.seq[bases_off_start  + read.query_alignment_start:]

            if read.is_reverse:
                seq.revcomp()
          
            if len(seq) >= min_length:
                print(seq, file=fout)
Ejemplo n.º 11
0
 def _get_all_unmapped_reads(self, fout):
     '''Writes all unmapped reads to fout'''
     sam_reader = pysam.Samfile(self.bam, "rb")
     for read in sam_reader.fetch(until_eof=True):
         if read.is_unmapped:
             print(mapping.aligned_read_to_read(read, ignore_quality=True), file=fout)
Ejemplo n.º 12
0
 def _all_reads_from_contig(self, contig, fout):
     '''Gets all reads from contig called "contig" and writes to fout'''
     sam_reader = pysam.Samfile(self.bam, "rb")
     for read in sam_reader.fetch(contig):
         print(mapping.aligned_read_to_read(read, ignore_quality=True), file=fout)