예제 #1
0
 def test_deletion_repeat(self):
     qseq = (
         'GAGT'
         'GAGACTCTGT'
         'GAA'
         'AAAGAAAAAAAAAA'
         'A'
         'ATATATATATATATAAATATA'
         'C'
         'ATATTATGTATCAAATATATAT'
         'TATGTGTAATATACATCATGTATCAAATATATATTATGTATAATATACATCATATATCAAATATATATTATGTG'
     )
     # deleted reference: TATGTGTAATATACATCATGTATCAAA
     print(qseq[:76], qseq[76:])
     read = MockRead('name',
                     reference_name='11_86018001-86018500',
                     reference_start=28,
                     cigar=[(CIGAR.S, 4), (CIGAR.EQ, 10), (CIGAR.X, 3),
                            (CIGAR.EQ, 14), (CIGAR.X, 1), (CIGAR.EQ, 21),
                            (CIGAR.X, 1), (CIGAR.EQ, 22), (CIGAR.D, 27),
                            (CIGAR.EQ, 74)],
                     query_sequence=qseq)
     expected_cigar = [(CIGAR.S, 4), (CIGAR.EQ, 10), (CIGAR.X, 3),
                       (CIGAR.EQ, 14), (CIGAR.X, 1), (CIGAR.EQ, 21),
                       (CIGAR.X, 1), (CIGAR.EQ, 22 + 30), (CIGAR.D, 27),
                       (CIGAR.EQ, 74 - 30)]
     std_cigar = hgvs_standardize_cigar(
         read, REFERENCE_GENOME[read.reference_name].seq)
     print(SamRead.deletion_sequences(read, REFERENCE_GENOME))
     read.cigar = std_cigar
     print(SamRead.deletion_sequences(read, REFERENCE_GENOME))
     self.assertEqual(expected_cigar, std_cigar)
예제 #2
0
 def test_bubble_sort_indel_sections_drop_mismatch_with_hardclipping(self):
     ref = 'ATAGGC' 'ATCT' 'ACGA' 'ACGA' 'ACGA' 'GATCGCTACG'
     # original
     # ATAGGCATCTACG   AA  CGAACGAGATCGCTACG
     #       ATCTC  TTT  TTCGAACG
     # expected
     # ATAGGCATCT      ACGAACGAACGAGATCGCTACG
     #       ATCTCTTTTT     CGAACG
     read = MockRead(
         'name',
         1,
         6,
         reference_name='1',
         query_sequence='ATCTCTTTTTCGAACG',
         cigar=[
             (CIGAR.H, 10),
             (CIGAR.EQ, 4),
             (CIGAR.X, 1),
             (CIGAR.D, 2),
             (CIGAR.I, 3),
             (CIGAR.D, 2),
             (CIGAR.I, 2),
             (CIGAR.EQ, 6),
         ],
     )
     print(SamRead.deletion_sequences(read, {'1': MockObject(seq=ref)}))
     print(SamRead.insertion_sequences(read))
     print(read.query_sequence, len(read.query_sequence))
     self.assertEqual(
         [(CIGAR.H, 10), (CIGAR.EQ, 4), (CIGAR.I, 6), (CIGAR.D, 5),
          (CIGAR.EQ, 6)],
         hgvs_standardize_cigar(read, ref),
     )
예제 #3
0
 def test_odd_deletion_in_repeat(self):
     rseq = 'AAAGAAAAAAAAAAAAT' 'ATATATATATA' 'TAAATATACATATTATGTATCAAATATATATTATGTGTAATATACATCATGTATC'
     qseq = 'TTTTAAAAAAAAAAAAT' 'ATATATATATA' 'ATATACATATTATGTATCAAATATATATTATGTGTAATATACATCATGTATC'
     print(len(qseq) - 28)
     read = MockRead('name',
                     reference_name='1',
                     reference_start=4,
                     cigar=convert_string_to_cigar('4S13=3D63='),
                     query_sequence=qseq)
     reference_genome = {'1': MockObject(seq=rseq)}
     exp = convert_string_to_cigar('4S24=3D52=')
     new_cigar = hgvs_standardize_cigar(read, rseq)
     print(SamRead.deletion_sequences(read, reference_genome))
     read.cigar = new_cigar
     print(SamRead.deletion_sequences(read, reference_genome))
     self.assertEqual(exp, new_cigar)
예제 #4
0
 def test_deletions(self):
     exp = ['cde', 'nopq']
     read = MockRead(
         reference_start=0, reference_name='1', query_sequence='',
         cigar=convert_string_to_cigar('2=3D8=4D9=')
     )
     self.assertEqual(exp, SamRead.deletion_sequences(read, self.reference_genome))
예제 #5
0
 def test_deletions(self):
     exp = ['cde', 'nopq']
     read = MockRead(
         reference_start=0,
         reference_name='1',
         query_sequence='',
         cigar=convert_string_to_cigar('2=3D8=4D9='),
     )
     assert (SamRead.deletion_sequences(
         read, {'1': MockObject(seq='abcdefghijklmnopqrstuvwxyz')}) == exp)
예제 #6
0
 def test_even_deletion_in_repeat(self):
     rseq = ('AAAGAAAAAAAAAAAAT'
             'ATATATATATA'
             'TAAATATACATATTATGTATCAAATATATATTATGTGTAATATACATCATGTATC')
     qseq = ('TTTTAAAAAAAAAAAAT'
             'ATATATATATA'
             'AATATACATATTATGTATCAAATATATATTATGTGTAATATACATCATGTATC')
     print(len(qseq) - 28)
     read = MockRead(
         'name',
         reference_name='1',
         reference_start=4,
         cigar=convert_string_to_cigar('4S13=2D64='),
         query_sequence=qseq,
     )
     reference_genome = {'1': MockObject(seq=rseq)}
     exp = convert_string_to_cigar('4S24=2D53=')
     new_cigar = hgvs_standardize_cigar(read, rseq)
     print(SamRead.deletion_sequences(read, reference_genome))
     read.cigar = new_cigar
     print(SamRead.deletion_sequences(read, reference_genome))
     assert new_cigar == exp