Ejemplo n.º 1
0
 def test_insertions(self):
     exp = ['kkk', 'kkkk']
     read = MockRead(reference_start=0,
                     reference_name='1',
                     query_sequence='abcdekkkfghijklmnopqkkkkrstuvwxyz',
                     cigar=convert_string_to_cigar('5=3I12=4I9='))
     self.assertEqual(exp, SamRead.insertion_sequences(read))
Ejemplo n.º 2
0
 def test_bubble_sort_indel_sections_drop_mismatch_with_hardclipping(self):
     ref = 'ATAGGC' 'ATCT' 'ACGA' 'ACGA' 'ACGA' 'GATCGCTACG'
     # original
     # ATAGGCATCTACG   AA  CGAACGAGATCGCTACG
     #       ATCTC  TTT  TTCGAACG
     # expected
     # ATAGGCATCT      ACGAACGAACGAGATCGCTACG
     #       ATCTCTTTTT     CGAACG
     read = MockRead(
         'name',
         1,
         6,
         reference_name='1',
         query_sequence='ATCTCTTTTTCGAACG',
         cigar=[
             (CIGAR.H, 10),
             (CIGAR.EQ, 4),
             (CIGAR.X, 1),
             (CIGAR.D, 2),
             (CIGAR.I, 3),
             (CIGAR.D, 2),
             (CIGAR.I, 2),
             (CIGAR.EQ, 6),
         ],
     )
     print(SamRead.deletion_sequences(read, {'1': MockObject(seq=ref)}))
     print(SamRead.insertion_sequences(read))
     print(read.query_sequence, len(read.query_sequence))
     self.assertEqual(
         [(CIGAR.H, 10), (CIGAR.EQ, 4), (CIGAR.I, 6), (CIGAR.D, 5),
          (CIGAR.EQ, 6)],
         hgvs_standardize_cigar(read, ref),
     )
Ejemplo n.º 3
0
 def test_complex(self):
     qseq = (
         'TATTTGGAAATATTTGTAAGATAGATGTCTCTG' 'C'
         'CTCCTTCTGTTTCTGTCTCTGTCTCTTGCACTCTCTCTCTCCCTCTCTT'
         'TCTCTCTCTCTCTCTCTCTCTCTCTC'
         'TCTATATATATATATATA'
         'T' 'A' 'T' 'C' 'T'
         'ACACACACACACACACAC')
     rseq = (
         'TATTTGGAAATATTTGTAAGATAGATGTCTCTG' 'T'
         'CTCCTTCTGTTTCTGTCTCTGTCTCTTGCACTCTCTCTCTCCCTCTCTT'
         'TCTATATATATATATATA'
         'C' 'A' 'C'
         'ACACACACACACACACAC')
     read = MockRead(
         'name', reference_name='mock', reference_start=0, query_sequence=qseq,
         cigar=[
             (CIGAR.EQ, 33), (CIGAR.X, 1), (CIGAR.EQ, 49), (CIGAR.I, 26),
             (CIGAR.EQ, 18), (CIGAR.X, 1), (CIGAR.EQ, 1), (CIGAR.I, 1),
             (CIGAR.EQ, 1), (CIGAR.I, 1), (CIGAR.EQ, 18)]
     )
     print(rseq)
     print(read.query_sequence[:83], read.query_sequence[83 + 26: 83 + 26 + 20], read.query_sequence[83 + 26 + 22:])
     print(read.query_sequence)
     print(SamRead.insertion_sequences(read))
     new_cigar = [
         (CIGAR.EQ, 33), (CIGAR.X, 1), (CIGAR.EQ, 52), (CIGAR.I, 26),
         (CIGAR.EQ, 15), (CIGAR.X, 1), (CIGAR.EQ, 1), (CIGAR.I, 1),
         (CIGAR.EQ, 1), (CIGAR.I, 1), (CIGAR.EQ, 18)]
     std_cigar = hgvs_standardize_cigar(read, rseq)
     print(new_cigar)
     print(std_cigar)
     self.assertEqual(new_cigar, std_cigar)