def testUnsupportedCigarToken(self):
        cigar = '3M3X6M'
        seq = 'AAACAACCACCC'  # @IgnorePep8
        quality = 'BBBDDDEEEFFF'

        with self.assertRaises(RuntimeError) as result:
            apply_cigar(cigar, seq, quality)

        self.assertEqual("Unsupported CIGAR token: '3X'.",
                         result.exception.message)
    def testInvalidCigar(self):
        cigar = '3M...6M'
        seq = 'AAACAACCACCC'  # @IgnorePep8
        quality = 'BBBDDDEEEFFF'

        with self.assertRaises(RuntimeError) as result:
            apply_cigar(cigar, seq, quality)

        self.assertEqual("Invalid CIGAR string: '3M...6M'.",
                         result.exception.message)
    def testLongCigar(self):
        cigar = '10M'
        seq = 'AAACAACCA'  # @IgnorePep8
        quality = 'BBBDDDEEE'

        with self.assertRaises(RuntimeError) as result:
            apply_cigar(cigar, seq, quality)

        self.assertEqual(
            "CIGAR string '10M' is too long for sequence 'AAACAACCA'.",
            result.exception.message)
Exemple #4
0
    def testUnsupportedCigarToken(self):
        cigar = '3M3X6M'
        seq     = 'AAACAACCACCC'  # @IgnorePep8
        quality = 'BBBDDDEEEFFF'

        with self.assertRaises(RuntimeError) as result:
            apply_cigar(cigar, seq, quality)

        self.assertEqual(
            "Unsupported CIGAR token: '3X'.",
            result.exception.message)
Exemple #5
0
    def testLongCigar(self):
        cigar = '10M'
        seq     = 'AAACAACCA'  # @IgnorePep8
        quality = 'BBBDDDEEE'

        with self.assertRaises(RuntimeError) as result:
            apply_cigar(cigar, seq, quality)

        self.assertEqual(
            "CIGAR string '10M' is too long for sequence 'AAACAACCA'.",
            result.exception.message)
Exemple #6
0
    def testInvalidCigar(self):
        cigar = '3M...6M'
        inp_sequence = 'AAACAACCACCC'
        inp__quality = 'BBBDDDEEEFFF'

        with self.assertRaises(RuntimeError) as result:
            apply_cigar(cigar, inp_sequence, inp__quality)

        self.assertEqual(
            "Invalid CIGAR string: '3M...6M'.",
            result.exception.args[0])
Exemple #7
0
    def testUnsupportedCigarToken(self):
        cigar = '3M3X6M'
        inp_sequence = 'AAACAACCACCC'
        inp__quality = 'BBBDDDEEEFFF'

        with self.assertRaises(RuntimeError) as result:
            apply_cigar(cigar, inp_sequence, inp__quality)

        self.assertEqual(
            "Unsupported CIGAR token: '3X'.",
            result.exception.args[0])
Exemple #8
0
    def testLongCigar(self):
        cigar = '10M'
        inp_sequence = 'AAACAACCA'
        inp__quality = 'BBBDDDEEE'

        with self.assertRaises(RuntimeError) as result:
            apply_cigar(cigar, inp_sequence, inp__quality)

        self.assertEqual(
            "CIGAR string '10M' is too long for sequence 'AAACAACCA'.",
            result.exception.args[0])
Exemple #9
0
    def testInvalidCigar(self):
        cigar = '3M...6M'
        seq     = 'AAACAACCACCC'  # @IgnorePep8
        quality = 'BBBDDDEEEFFF'

        with self.assertRaises(RuntimeError) as result:
            apply_cigar(cigar, seq, quality)

        self.assertEqual(
            "Invalid CIGAR string: '3M...6M'.",
            result.exception.message)
Exemple #10
0
def merge_reads(quality_cutoff, read_pair):
    """ Merge a pair of reads.

    Also skip reads that don't meet certain criteria.
    @param quality_cutoff: minimum quality score for a base to be counted
    @param read_pair: a sequence of two sequences, each with fields from a
    SAM file record
    @return: (rname, mseq, merged_inserts, qual1, qual2) or None to skip the pair
    """
    read1, read2 = read_pair
    if read2 and read1[2] != read2[2]:
        # region mismatch, ignore the read pair.
        return None
    filtered_reads = []
    rname = None
    for read in read_pair:
        if not read:
            continue
        (_qname,
         flag,
         rname,
         refpos_str,
         _mapq,
         cigar,
         _rnext,
         _pnext,
         _tlen,
         seq,
         qual) = read[:11]  # ignore optional fields
        if is_unmapped_read(flag):
            continue
        filtered_reads.append(dict(rname=rname,
                                   cigar=cigar,
                                   seq=seq,
                                   qual=qual,
                                   pos=int(refpos_str)))
    if not filtered_reads:
        return None
    seq1, qual1, ins1 = apply_cigar(filtered_reads[0]['cigar'],
                                    filtered_reads[0]['seq'],
                                    filtered_reads[0]['qual'],
                                    filtered_reads[0]['pos']-1)
    if len(filtered_reads) == 1:
        seq2 = qual2 = ''
        ins2 = None
    else:
        seq2, qual2, ins2 = apply_cigar(filtered_reads[1]['cigar'],
                                        filtered_reads[1]['seq'],
                                        filtered_reads[1]['qual'],
                                        filtered_reads[1]['pos']-1)
    mseq = merge_pairs(seq1, seq2, qual1, qual2, q_cutoff=quality_cutoff)
    merged_inserts = merge_inserts(ins1, ins2, quality_cutoff)
    return rname, mseq, merged_inserts, qual1, qual2
Exemple #11
0
def merge_reads(quality_cutoff, read_pair):
    """ Merge a pair of reads.

    Also skip reads that don't meet certain criteria.
    @param quality_cutoff: minimum quality score for a base to be counted
    @param read_pair: a sequence of two sequences, each with fields from a
    SAM file record
    @return: (rname, mseq, merged_inserts, qual1, qual2) or None to skip the pair
    """
    read1, read2 = read_pair
    if read2 and read1[2] != read2[2]:
        # region mismatch, ignore the read pair.
        return None
    filtered_reads = []
    for read in read_pair:
        if not read:
            continue
        (_qname,
         flag,
         rname,
         refpos_str,
         _mapq,
         cigar,
         _rnext,
         _pnext,
         _tlen,
         seq,
         qual) = read[:11]  # ignore optional fields
        if is_unmapped_read(flag):
            continue
        filtered_reads.append(dict(rname=rname,
                                   cigar=cigar,
                                   seq=seq,
                                   qual=qual,
                                   pos=int(refpos_str)))
    if not filtered_reads:
        return None
    seq1, qual1, ins1 = apply_cigar(filtered_reads[0]['cigar'],
                                    filtered_reads[0]['seq'],
                                    filtered_reads[0]['qual'],
                                    filtered_reads[0]['pos']-1)
    if len(filtered_reads) == 1:
        seq2 = qual2 = ''
        ins2 = None
    else:
        seq2, qual2, ins2 = apply_cigar(filtered_reads[1]['cigar'],
                                        filtered_reads[1]['seq'],
                                        filtered_reads[1]['qual'],
                                        filtered_reads[1]['pos']-1)
    mseq = merge_pairs(seq1, seq2, qual1, qual2, q_cutoff=quality_cutoff)
    merged_inserts = merge_inserts(ins1, ins2, quality_cutoff)
    return rname, mseq, merged_inserts, qual1, qual2
Exemple #12
0
    def testSoftClipPositions(self):
        cigar = '3S6M'
        pos = 4
        inp__sequence = 'AAACAACCA'
        inp___quality = 'BBBDDDEEE'
        exp_sequence = '----CAACCA'
        exp__quality = '!!!!DDDEEE'
        mapped = set()
        soft_clipped = set()
        expected_mapped = {4, 5, 6, 7, 8, 9}
        expected_soft_clipped = {1, 2, 3}

        clipped_seq, clipped_quality, inserts = apply_cigar(
            cigar,
            inp__sequence,
            inp___quality,
            pos=pos,
            mapped=mapped,
            soft_clipped=soft_clipped)

        self.assertEqual(exp_sequence, clipped_seq)
        self.assertEqual(exp__quality, clipped_quality)
        self.assertEqual({}, inserts)
        self.assertEqual(expected_mapped, mapped)
        self.assertEqual(expected_soft_clipped, soft_clipped)
Exemple #13
0
    def testLargeToken(self):
        cigar = '12M'
        seq     = 'AAACAACCACCC'  # @IgnorePep8
        quality = 'BBBBBBBBBBBB'
        expected_seq = seq
        expected_quality = quality

        clipped_seq, clipped_quality, inserts = apply_cigar(cigar, seq, quality)

        self.assertEqual(expected_seq, clipped_seq)
        self.assertEqual(expected_quality, clipped_quality)
        self.assertEqual({}, inserts)
Exemple #14
0
    def testInsertionLowQuality(self):
        cigar = '3M3I6M'
        seq              = 'AAACAACCACCC'  # @IgnorePep8
        quality          = 'BBBD*DEEEFFF'  # @IgnorePep8
        expected_seq     = 'AAACCACCC'  # @IgnorePep8
        expected_quality = 'BBBEEEFFF'

        clipped_seq, clipped_quality, inserts = apply_cigar(cigar, seq, quality)

        self.assertEqual(expected_seq, clipped_seq)
        self.assertEqual(expected_quality, clipped_quality)
        self.assertEqual({3: ('CAA', 'D*D')}, inserts)
Exemple #15
0
    def testSoftClip(self):
        cigar = '3S6M'
        seq              = 'AAACAACCA'  # @IgnorePep8
        quality          = 'BBBDDDEEE'  # @IgnorePep8
        expected_seq     =    'CAACCA'  # @IgnorePep8
        expected_quality =    'DDDEEE'  # @IgnorePep8

        clipped_seq, clipped_quality, inserts = apply_cigar(cigar, seq, quality)

        self.assertEqual(expected_seq, clipped_seq)
        self.assertEqual(expected_quality, clipped_quality)
        self.assertEqual({}, inserts)
Exemple #16
0
    def testDeletion(self):
        cigar = '6M3D3M'
        seq              = 'AAACAACCA'  # @IgnorePep8
        quality          = 'BBBDDDEEE'  # @IgnorePep8
        expected_seq     = 'AAACAA---CCA'  # @IgnorePep8
        expected_quality = 'BBBDDD   EEE'

        clipped_seq, clipped_quality, inserts = apply_cigar(cigar, seq, quality)

        self.assertEqual(expected_seq, clipped_seq)
        self.assertEqual(expected_quality, clipped_quality)
        self.assertEqual({}, inserts)
Exemple #17
0
    def testTrivial(self):
        cigar = '9M'
        seq     = 'AAACAACCA'  # @IgnorePep8
        quality = 'BBBBBBBBB'
        expected_seq = seq
        expected_quality = quality

        clipped_seq, clipped_quality, inserts = apply_cigar(cigar, seq, quality)

        self.assertEqual(expected_seq, clipped_seq)
        self.assertEqual(expected_quality, clipped_quality)
        self.assertEqual({}, inserts)
Exemple #18
0
    def testDeletion(self):
        cigar = '6M3D3M'
        inp_sequence = 'AAACAACCA'
        inp__quality = 'BBBDDDEEE'
        exp_sequence = 'AAACAA---CCA'
        exp__quality = 'BBBDDD   EEE'

        clipped_seq, clipped_quality, inserts = apply_cigar(cigar, inp_sequence, inp__quality)

        self.assertEqual(exp_sequence, clipped_seq)
        self.assertEqual(exp__quality, clipped_quality)
        self.assertEqual({}, inserts)
Exemple #19
0
    def testSoftClip(self):
        cigar = '3S6M'
        inp_sequence = 'AAACAACCA'
        inp__quality = 'BBBDDDEEE'
        expect_sequence = 'CAACCA'
        expect__quality = 'DDDEEE'

        clipped_seq, clipped_quality, inserts = apply_cigar(cigar, inp_sequence, inp__quality)

        self.assertEqual(expect_sequence, clipped_seq)
        self.assertEqual(expect__quality, clipped_quality)
        self.assertEqual({}, inserts)
Exemple #20
0
    def testInsertionLowQuality(self):
        cigar = '3M3I6M'
        inp_sequence = 'AAACAACCACCC'
        inp__quality = 'BBBD*DEEEFFF'
        exp_sequence = 'AAACCACCC'
        exp__quality = 'BBBEEEFFF'

        clipped_seq, clipped_quality, inserts = apply_cigar(cigar, inp_sequence, inp__quality)

        self.assertEqual(exp_sequence, clipped_seq)
        self.assertEqual(exp__quality, clipped_quality)
        self.assertEqual({3: ('CAA', 'D*D')}, inserts)
Exemple #21
0
    def testLargeToken(self):
        cigar = '12M'
        inp_sequence = 'AAACAACCACCC'
        inp__quality = 'BBBBBBBBBBBB'
        exp_sequence = inp_sequence
        exp__quality = inp__quality

        clipped_seq, clipped_quality, inserts = apply_cigar(cigar, inp_sequence, inp__quality)

        self.assertEqual(exp_sequence, clipped_seq)
        self.assertEqual(exp__quality, clipped_quality)
        self.assertEqual({}, inserts)
Exemple #22
0
    def testTrivial(self):
        cigar = '9M'
        inp_sequence = 'AAACAACCA'
        inp__quality = 'BBBBBBBBB'
        exp_sequence = inp_sequence
        exp__quality = inp__quality

        clipped_seq, clipped_quality, inserts = apply_cigar(cigar, inp_sequence, inp__quality)

        self.assertEqual(exp_sequence, clipped_seq)
        self.assertEqual(exp__quality, clipped_quality)
        self.assertEqual({}, inserts)
Exemple #23
0
    def testPadding(self):
        cigar = '12M'
        seq              = 'AAACAACCACCC'  # @IgnorePep8
        quality          = 'BBBDDDEEEFFF'  # @IgnorePep8
        pos = 3
        expected_seq     = '---AAACAACCACCC'  # @IgnorePep8
        expected_quality = '!!!BBBDDDEEEFFF'

        clipped_seq, clipped_quality, inserts = apply_cigar(cigar, seq, quality, pos)

        self.assertEqual(expected_seq, clipped_seq)
        self.assertEqual(expected_quality, clipped_quality)
        self.assertEqual({}, inserts)
Exemple #24
0
    def testPadding(self):
        cigar = '12M'
        inp_sequence = 'AAACAACCACCC'
        inp__quality = 'BBBDDDEEEFFF'
        pos = 3
        exp_sequence = '---AAACAACCACCC'
        exp__quality = '!!!BBBDDDEEEFFF'

        clipped_seq, clipped_quality, inserts = apply_cigar(cigar, inp_sequence, inp__quality, pos)

        self.assertEqual(exp_sequence, clipped_seq)
        self.assertEqual(exp__quality, clipped_quality)
        self.assertEqual({}, inserts)
    def testInsertionLowQuality(self):
        cigar = '3M3I6M'
        seq = 'AAACAACCACCC'  # @IgnorePep8
        quality = 'BBBD*DEEEFFF'  # @IgnorePep8
        expected_seq = 'AAACCACCC'  # @IgnorePep8
        expected_quality = 'BBBEEEFFF'

        clipped_seq, clipped_quality, inserts = apply_cigar(
            cigar, seq, quality)

        self.assertEqual(expected_seq, clipped_seq)
        self.assertEqual(expected_quality, clipped_quality)
        self.assertEqual({3: ('CAA', 'D*D')}, inserts)
    def testTrivial(self):
        cigar = '9M'
        seq = 'AAACAACCA'  # @IgnorePep8
        quality = 'BBBBBBBBB'
        expected_seq = seq
        expected_quality = quality

        clipped_seq, clipped_quality, inserts = apply_cigar(
            cigar, seq, quality)

        self.assertEqual(expected_seq, clipped_seq)
        self.assertEqual(expected_quality, clipped_quality)
        self.assertEqual({}, inserts)
    def testLargeToken(self):
        cigar = '12M'
        seq = 'AAACAACCACCC'  # @IgnorePep8
        quality = 'BBBBBBBBBBBB'
        expected_seq = seq
        expected_quality = quality

        clipped_seq, clipped_quality, inserts = apply_cigar(
            cigar, seq, quality)

        self.assertEqual(expected_seq, clipped_seq)
        self.assertEqual(expected_quality, clipped_quality)
        self.assertEqual({}, inserts)
    def testSoftClip(self):
        cigar = '3S6M'
        seq = 'AAACAACCA'  # @IgnorePep8
        quality = 'BBBDDDEEE'  # @IgnorePep8
        expected_seq = 'CAACCA'  # @IgnorePep8
        expected_quality = 'DDDEEE'  # @IgnorePep8

        clipped_seq, clipped_quality, inserts = apply_cigar(
            cigar, seq, quality)

        self.assertEqual(expected_seq, clipped_seq)
        self.assertEqual(expected_quality, clipped_quality)
        self.assertEqual({}, inserts)
    def testDeletion(self):
        cigar = '6M3D3M'
        seq = 'AAACAACCA'  # @IgnorePep8
        quality = 'BBBDDDEEE'  # @IgnorePep8
        expected_seq = 'AAACAA---CCA'  # @IgnorePep8
        expected_quality = 'BBBDDD   EEE'

        clipped_seq, clipped_quality, inserts = apply_cigar(
            cigar, seq, quality)

        self.assertEqual(expected_seq, clipped_seq)
        self.assertEqual(expected_quality, clipped_quality)
        self.assertEqual({}, inserts)
    def testPadding(self):
        cigar = '12M'
        seq = 'AAACAACCACCC'  # @IgnorePep8
        quality = 'BBBDDDEEEFFF'  # @IgnorePep8
        pos = 3
        expected_seq = '---AAACAACCACCC'  # @IgnorePep8
        expected_quality = '!!!BBBDDDEEEFFF'

        clipped_seq, clipped_quality, inserts = apply_cigar(
            cigar, seq, quality, pos)

        self.assertEqual(expected_seq, clipped_seq)
        self.assertEqual(expected_quality, clipped_quality)
        self.assertEqual({}, inserts)
    def testInsertionInsideClipRegionWithOffset(self):
        cigar = '2M1I2M'
        seq = 'TAGCT'  # @IgnorePep8
        quality = 'AABCC'
        pos = 3
        clip_from = 4
        clip_to = 20
        expected_seq = 'ACT'  # @IgnorePep8
        expected_quality = 'ACC'

        clipped_seq, clipped_quality, inserts = apply_cigar(
            cigar, seq, quality, pos, clip_from, clip_to)

        self.assertEqual(expected_seq, clipped_seq)
        self.assertEqual(expected_quality, clipped_quality)
        self.assertEqual({1: ('G', 'B')}, inserts)
    def testInsertionBeforeClip(self):
        cigar = '3M3I9M'
        seq = 'AAAGGGCAACCACCC'  # @IgnorePep8
        quality = 'BBBHHHDDDEEEFFF'  # @IgnorePep8
        pos = 0
        clip_from = 3
        clip_to = 8
        expected_seq = 'CAACCA'  # @IgnorePep8
        expected_quality = 'DDDEEE'

        clipped_seq, clipped_quality, inserts = apply_cigar(
            cigar, seq, quality, pos, clip_from, clip_to)

        self.assertEqual(expected_seq, clipped_seq)
        self.assertEqual(expected_quality, clipped_quality)
        self.assertEqual({0: ('GGG', 'HHH')}, inserts)
    def testInsertionAfterClipping(self):
        cigar = '3M3I3M'
        seq = "ACTTAGAAA"  # @IgnorePep8
        quality = 'AAABBBDDD'
        pos = 0
        clip_from = 0
        clip_to = 2
        expected_seq = 'ACT'  # @IgnorePep8
        expected_quality = 'AAA'

        clipped_seq, clipped_quality, inserts = apply_cigar(
            cigar, seq, quality, pos, clip_from, clip_to)

        self.assertEqual(expected_seq, clipped_seq)
        self.assertEqual(expected_quality, clipped_quality)
        self.assertEqual({}, inserts)
    def testInsertionAtEndOfClipping(self):
        cigar = '3M3I3M'
        seq = "ACTTAGAAA"  # @IgnorePep8
        quality = 'AAABBBDDD'
        pos = 0
        clip_from = 0
        clip_to = 3
        expected_seq = 'ACTA'  # @IgnorePep8
        expected_quality = 'AAAD'

        clipped_seq, clipped_quality, inserts = apply_cigar(
            cigar, seq, quality, pos, clip_from, clip_to)

        self.assertEqual(expected_seq, clipped_seq)
        self.assertEqual(expected_quality, clipped_quality)
        self.assertEqual({3: ('TAG', 'BBB')}, inserts)
    def testClipInsertionLowQuality(self):
        cigar = '6M3I6M'
        seq = 'AAACAAGGGCCACCC'  # @IgnorePep8
        quality = 'BBBDDDHH*EEEFFF'  # @IgnorePep8
        pos = 0
        clip_from = 3
        clip_to = 8
        expected_seq = 'CAACCA'  # @IgnorePep8
        expected_quality = 'DDDEEE'

        clipped_seq, clipped_quality, inserts = apply_cigar(
            cigar, seq, quality, pos, clip_from, clip_to)

        self.assertEqual(expected_seq, clipped_seq)
        self.assertEqual(expected_quality, clipped_quality)
        self.assertEqual({3: ('GGG', 'HH*')}, inserts)
    def testClippingEverything(self):
        cigar = '12M'
        seq = 'AAACAACCACCC'  # @IgnorePep8
        quality = 'BBBDDDEEEFFF'  # @IgnorePep8
        pos = 0
        clip_from = 100
        clip_to = 108
        expected_seq = ''  # @IgnorePep8
        expected_quality = ''

        clipped_seq, clipped_quality, inserts = apply_cigar(
            cigar, seq, quality, pos, clip_from, clip_to)

        self.assertEqual(expected_seq, clipped_seq)
        self.assertEqual(expected_quality, clipped_quality)
        self.assertEqual({}, inserts)
    def testInsertionAfterClipRegionWithOffset(self):
        cigar = '5M1I2M'
        seq = 'TAGCTCAG'  # @IgnorePep8
        quality = 'AAAAABCC'
        pos = 10
        clip_from = 10
        clip_to = 13
        expected_seq = 'TAGC'  # @IgnorePep8
        expected_quality = 'AAAA'
        expected_inserts = {}

        clipped_seq, clipped_quality, inserts = apply_cigar(
            cigar, seq, quality, pos, clip_from, clip_to)

        self.assertEqual(expected_seq, clipped_seq)
        self.assertEqual(expected_quality, clipped_quality)
        self.assertEqual(expected_inserts, inserts)
Exemple #38
0
    def testInsertionAfterDeletion(self):
        cigar = '3M3D3M3I3M'
        (seq,
         quality,
         expected_seq,
         expected_quality) = ('TTTCCCAAATTT',
                              '111222333444',
                              'TTT---CCCTTT',
                              '111   222444')
        expected_inserts = {9: ('AAA', '333')}

        seq, quality, inserts = apply_cigar(
          cigar,
          seq,
          quality)

        self.assertEqual(expected_seq, seq)
        self.assertEqual(expected_quality, quality)
        self.assertEqual(expected_inserts, inserts)
Exemple #39
0
    def testInsertionAfterInsertion(self):
        cigar = '3M3I3M3I3M'
        (seq,
         quality,
         expected_seq,
         expected_quality) = ('TTTGGGCCCAAATTT',
                              '111222333444555',
                              'TTTCCCTTT',
                              '111333555')
        expected_inserts = {3: ('GGG', '222'), 6: ('AAA', '444')}

        seq, quality, inserts = apply_cigar(
          cigar,
          seq,
          quality)

        self.assertEqual(expected_seq, seq)
        self.assertEqual(expected_quality, quality)
        self.assertEqual(expected_inserts, inserts)
Exemple #40
0
    def testInsertionAtEndOfClipping(self):
        cigar = '3M3I3M'
        inp_sequence = "ACTTAGAAA"
        inp__quality = 'AAABBBDDD'
        pos = 0
        clip_from = 0
        clip_to = 3
        exp_sequence = 'ACTA'
        exp__quality = 'AAAD'

        clipped_seq, clipped_quality, inserts = apply_cigar(
            cigar,
            inp_sequence,
            inp__quality,
            pos,
            clip_from,
            clip_to)

        self.assertEqual(exp_sequence, clipped_seq)
        self.assertEqual(exp__quality, clipped_quality)
        self.assertEqual({3: ('TAG', 'BBB')}, inserts)
Exemple #41
0
    def testInsertionAfterClipping(self):
        cigar = '3M3I3M'
        inp_sequence = "ACTTAGAAA"
        inp__quality = 'AAABBBDDD'
        pos = 0
        clip_from = 0
        clip_to = 2
        exp_sequence = 'ACT'
        exp__quality = 'AAA'

        clipped_seq, clipped_quality, inserts = apply_cigar(
            cigar,
            inp_sequence,
            inp__quality,
            pos,
            clip_from,
            clip_to)

        self.assertEqual(exp_sequence, clipped_seq)
        self.assertEqual(exp__quality, clipped_quality)
        self.assertEqual({}, inserts)
Exemple #42
0
    def testInsertionAfterClipping(self):
        cigar = '3M3I3M'
        seq     = "ACTTAGAAA"  # @IgnorePep8
        quality = 'AAABBBDDD'
        pos = 0
        clip_from = 0
        clip_to = 2
        expected_seq     = 'ACT'  # @IgnorePep8
        expected_quality = 'AAA'

        clipped_seq, clipped_quality, inserts = apply_cigar(
          cigar,
          seq,
          quality,
          pos,
          clip_from,
          clip_to)

        self.assertEqual(expected_seq, clipped_seq)
        self.assertEqual(expected_quality, clipped_quality)
        self.assertEqual({}, inserts)
Exemple #43
0
    def testInsertionAtEndOfClipping(self):
        cigar = '3M3I3M'
        seq     = "ACTTAGAAA"  # @IgnorePep8
        quality = 'AAABBBDDD'
        pos = 0
        clip_from = 0
        clip_to = 3
        expected_seq     = 'ACTA'  # @IgnorePep8
        expected_quality = 'AAAD'

        clipped_seq, clipped_quality, inserts = apply_cigar(
          cigar,
          seq,
          quality,
          pos,
          clip_from,
          clip_to)

        self.assertEqual(expected_seq, clipped_seq)
        self.assertEqual(expected_quality, clipped_quality)
        self.assertEqual({3: ('TAG', 'BBB')}, inserts)
Exemple #44
0
    def testClipInsertionLowQuality(self):
        cigar = '6M3I6M'
        inp_sequence = 'AAACAAGGGCCACCC'
        inp__quality = 'BBBDDDHH*EEEFFF'
        pos = 0
        clip_from = 3
        clip_to = 8
        exp_sequence = 'CAACCA'
        exp__quality = 'DDDEEE'

        clipped_seq, clipped_quality, inserts = apply_cigar(
            cigar,
            inp_sequence,
            inp__quality,
            pos,
            clip_from,
            clip_to)

        self.assertEqual(exp_sequence, clipped_seq)
        self.assertEqual(exp__quality, clipped_quality)
        self.assertEqual({3: ('GGG', 'HH*')}, inserts)
Exemple #45
0
    def testClippingEverything(self):
        cigar = '12M'
        inp_sequence = 'AAACAACCACCC'
        inp__quality = 'BBBDDDEEEFFF'
        pos = 0
        clip_from = 100
        clip_to = 108
        exp_sequence = ''
        exp__quality = ''

        clipped_seq, clipped_quality, inserts = apply_cigar(
            cigar,
            inp_sequence,
            inp__quality,
            pos,
            clip_from,
            clip_to)

        self.assertEqual(exp_sequence, clipped_seq)
        self.assertEqual(exp__quality, clipped_quality)
        self.assertEqual({}, inserts)
Exemple #46
0
    def testInsertionInsideClipRegionWithOffset(self):
        cigar = '2M1I2M'
        inp_sequence = 'TAGCT'
        inp__quality = 'AABCC'
        pos = 3
        clip_from = 4
        clip_to = 20
        exp_sequence = 'ACT'
        exp__quality = 'ACC'

        clipped_seq, clipped_quality, inserts = apply_cigar(
            cigar,
            inp_sequence,
            inp__quality,
            pos,
            clip_from,
            clip_to)

        self.assertEqual(exp_sequence, clipped_seq)
        self.assertEqual(exp__quality, clipped_quality)
        self.assertEqual({1: ('G', 'B')}, inserts)
Exemple #47
0
    def testInsertionBeforeClip(self):
        cigar = '3M3I9M'
        inp_sequence = 'AAAGGGCAACCACCC'
        inp__quality = 'BBBHHHDDDEEEFFF'
        pos = 0
        clip_from = 3
        clip_to = 8
        exp_sequence = 'CAACCA'
        exp__quality = 'DDDEEE'

        clipped_seq, clipped_quality, inserts = apply_cigar(
            cigar,
            inp_sequence,
            inp__quality,
            pos,
            clip_from,
            clip_to)

        self.assertEqual(exp_sequence, clipped_seq)
        self.assertEqual(exp__quality, clipped_quality)
        self.assertEqual({0: ('GGG', 'HHH')}, inserts)