def test_deletion_repeat(self): qseq = ( 'GAGT' 'GAGACTCTGT' 'GAA' 'AAAGAAAAAAAAAA' 'A' 'ATATATATATATATAAATATA' 'C' 'ATATTATGTATCAAATATATAT' 'TATGTGTAATATACATCATGTATCAAATATATATTATGTATAATATACATCATATATCAAATATATATTATGTG' ) # deleted reference: TATGTGTAATATACATCATGTATCAAA print(qseq[:76], qseq[76:]) read = MockRead('name', reference_name='11_86018001-86018500', reference_start=28, cigar=[(CIGAR.S, 4), (CIGAR.EQ, 10), (CIGAR.X, 3), (CIGAR.EQ, 14), (CIGAR.X, 1), (CIGAR.EQ, 21), (CIGAR.X, 1), (CIGAR.EQ, 22), (CIGAR.D, 27), (CIGAR.EQ, 74)], query_sequence=qseq) expected_cigar = [(CIGAR.S, 4), (CIGAR.EQ, 10), (CIGAR.X, 3), (CIGAR.EQ, 14), (CIGAR.X, 1), (CIGAR.EQ, 21), (CIGAR.X, 1), (CIGAR.EQ, 22 + 30), (CIGAR.D, 27), (CIGAR.EQ, 74 - 30)] std_cigar = hgvs_standardize_cigar( read, REFERENCE_GENOME[read.reference_name].seq) print(SamRead.deletion_sequences(read, REFERENCE_GENOME)) read.cigar = std_cigar print(SamRead.deletion_sequences(read, REFERENCE_GENOME)) self.assertEqual(expected_cigar, std_cigar)
def test_bubble_sort_indel_sections_drop_mismatch_with_hardclipping(self): ref = 'ATAGGC' 'ATCT' 'ACGA' 'ACGA' 'ACGA' 'GATCGCTACG' # original # ATAGGCATCTACG AA CGAACGAGATCGCTACG # ATCTC TTT TTCGAACG # expected # ATAGGCATCT ACGAACGAACGAGATCGCTACG # ATCTCTTTTT CGAACG read = MockRead( 'name', 1, 6, reference_name='1', query_sequence='ATCTCTTTTTCGAACG', cigar=[ (CIGAR.H, 10), (CIGAR.EQ, 4), (CIGAR.X, 1), (CIGAR.D, 2), (CIGAR.I, 3), (CIGAR.D, 2), (CIGAR.I, 2), (CIGAR.EQ, 6), ], ) print(SamRead.deletion_sequences(read, {'1': MockObject(seq=ref)})) print(SamRead.insertion_sequences(read)) print(read.query_sequence, len(read.query_sequence)) self.assertEqual( [(CIGAR.H, 10), (CIGAR.EQ, 4), (CIGAR.I, 6), (CIGAR.D, 5), (CIGAR.EQ, 6)], hgvs_standardize_cigar(read, ref), )
def test_odd_deletion_in_repeat(self): rseq = 'AAAGAAAAAAAAAAAAT' 'ATATATATATA' 'TAAATATACATATTATGTATCAAATATATATTATGTGTAATATACATCATGTATC' qseq = 'TTTTAAAAAAAAAAAAT' 'ATATATATATA' 'ATATACATATTATGTATCAAATATATATTATGTGTAATATACATCATGTATC' print(len(qseq) - 28) read = MockRead('name', reference_name='1', reference_start=4, cigar=convert_string_to_cigar('4S13=3D63='), query_sequence=qseq) reference_genome = {'1': MockObject(seq=rseq)} exp = convert_string_to_cigar('4S24=3D52=') new_cigar = hgvs_standardize_cigar(read, rseq) print(SamRead.deletion_sequences(read, reference_genome)) read.cigar = new_cigar print(SamRead.deletion_sequences(read, reference_genome)) self.assertEqual(exp, new_cigar)
def test_deletions(self): exp = ['cde', 'nopq'] read = MockRead( reference_start=0, reference_name='1', query_sequence='', cigar=convert_string_to_cigar('2=3D8=4D9=') ) self.assertEqual(exp, SamRead.deletion_sequences(read, self.reference_genome))
def test_deletions(self): exp = ['cde', 'nopq'] read = MockRead( reference_start=0, reference_name='1', query_sequence='', cigar=convert_string_to_cigar('2=3D8=4D9='), ) assert (SamRead.deletion_sequences( read, {'1': MockObject(seq='abcdefghijklmnopqrstuvwxyz')}) == exp)
def test_even_deletion_in_repeat(self): rseq = ('AAAGAAAAAAAAAAAAT' 'ATATATATATA' 'TAAATATACATATTATGTATCAAATATATATTATGTGTAATATACATCATGTATC') qseq = ('TTTTAAAAAAAAAAAAT' 'ATATATATATA' 'AATATACATATTATGTATCAAATATATATTATGTGTAATATACATCATGTATC') print(len(qseq) - 28) read = MockRead( 'name', reference_name='1', reference_start=4, cigar=convert_string_to_cigar('4S13=2D64='), query_sequence=qseq, ) reference_genome = {'1': MockObject(seq=rseq)} exp = convert_string_to_cigar('4S24=2D53=') new_cigar = hgvs_standardize_cigar(read, rseq) print(SamRead.deletion_sequences(read, reference_genome)) read.cigar = new_cigar print(SamRead.deletion_sequences(read, reference_genome)) assert new_cigar == exp