def test_single_duplication_with_trailing_untemp(self): r = MockRead(query_sequence=( 'GGATGATTTACCTTGGGTAATGAAACTCA' 'GATTTTGCTGTTGTTTTTGTTC' 'GATTTTGCTGTTGTTTTTGTTC' 'GTCAA' 'CAAAGTGTTTTATACTGATAAAGCAACCCCGGTTTAGCATTGCCATTGGTAA'), query_name='duplication_with_untemp', reference_id=2, reference_name='reference3', reference_start=1497, cigar=[(CIGAR.EQ, 51), (CIGAR.I, 27), (CIGAR.EQ, 52)], is_reverse=False) # repeat: GATTTTGCTGTTGTTTTTGTTC print(r) print(REFERENCE_GENOME['reference3'][1497:1497 + 51]) print(REFERENCE_GENOME['reference3'][1548 - 21:1548 + 1]) bpp = align.call_read_events(r)[0] print(bpp) bpp = align.convert_to_duplication(bpp, REFERENCE_GENOME) print(bpp) self.assertEqual('GTCAA', bpp.untemplated_seq) self.assertEqual(ORIENT.RIGHT, bpp.break1.orient) self.assertEqual(ORIENT.LEFT, bpp.break2.orient) self.assertEqual(bpp.break2.start, 1548) self.assertEqual(bpp.break1.start, 1527)
def test_ins_and_del(self): r = MockRead( reference_id=0, reference_name='1', reference_start=0, cigar=[(CIGAR.M, 10), (CIGAR.I, 3), (CIGAR.M, 5), (CIGAR.D, 7), (CIGAR.M, 5)], query_sequence='ACTGAATCGTGGGTAGCTGCTAG', ) # only report the major del event for now bpps = align.call_read_events(r) self.assertEqual(2, len(bpps)) bpp = bpps[0] self.assertEqual(False, bpp.opposing_strands) self.assertEqual(10, bpp.break1.start) self.assertEqual(10, bpp.break1.end) self.assertEqual(11, bpp.break2.start) self.assertEqual(11, bpp.break2.end) self.assertEqual('GGG', bpp.untemplated_seq) bpp = bpps[1] self.assertEqual(False, bpp.opposing_strands) self.assertEqual(15, bpp.break1.start) self.assertEqual(15, bpp.break1.end) self.assertEqual(23, bpp.break2.start) self.assertEqual(23, bpp.break2.end)
def test_read_with_exons(self): contig = MockRead( query_sequence='CTTGAAGGAAACTGAATTCAAAAAGATCAAAGTGCTGGGCTCCGGTGCGTTCGGCACGGTGTATAAGGGACTCTGGATCCCAGAAGGTGAGAAAGTTAAAATTCCCGTCGCTATCAAGACATCTCCGAAAGCCAACAAGGAAATCCTCGATGAAGCCTACGTGATGGCCAGCGTGGACAACCCCCACGTGTGCCGCCTGCTGGGCATCTGCCTCACCTCCACCGTGCAGCTCATCATGCAGCTCATGCCCTTCGGCTGCCTCCTGGACTATGTCCGGGAACACAAAGACAATATTGGCTCCCAGTACCTGCTCAACTGGTGTGTGCAGATCGCAAAGGGCATGAACTACTTGGAGGACCGTCGCTTGGTGCACCGCGACCTGGCAGCCAGGAACGTACTGGTGAAAACACCGCAGCATGTCAAGATCACAGATTTTGGGCTGGCCAAACTGCTGGGTGCGGAAGAGAAAGAATACCATGCAGAAGGAGGCAAAGTGCCTATCAAGTGGATGGCATTGGAATCAATTTTACACAGAATCTATACCCACCAGAGTGATGTCTGGAGCTACGGGGTGACCGTTTGGGAGTTGATGACCTTTGGATCCAA', cigar=_cigar.convert_string_to_cigar('68M678D50M15D34M6472D185M10240D158M891D74M8I5883D29M'), reference_name='7', reference_id=6, reference_start=55241669 ) self.assertEqual(6, len(align.call_read_events(contig)))
def test_single_insertion(self): r = MockRead(reference_id=0, reference_name='1', reference_start=0, cigar=[(CIGAR.M, 10), (CIGAR.I, 8), (CIGAR.M, 5)], query_sequence='ACTGAATCGTGGGTAGCTGCTAG') bpp = align.call_read_events(r)[0] self.assertEqual(False, bpp.opposing_strands) self.assertEqual(10, bpp.break1.start) self.assertEqual(10, bpp.break1.end) self.assertEqual(11, bpp.break2.start) self.assertEqual(11, bpp.break2.end) self.assertEqual('GGGTAGCT', bpp.untemplated_seq)
def test_single_duplication(self): r = MockRead( name='seq1', reference_name='gene3', reference_start=27155, cigar=[(CIGAR.M, 65), (CIGAR.I, 6), (CIGAR.D, 95), (CIGAR.M, 21), (CIGAR.S, 17)], query_sequence='TAGTTGGATCTCTGTGCTGACTGACTGACAGACAGACTTTAGTGTCTGTGTGCTGACTGACAGACAGACTTTAGTGTCTGTGTGCTGACT' 'GACAGACTCTAGTAGTGTC' ) bpp = align.call_read_events(r)[0] self.assertEqual(27220, bpp.break1.start) self.assertEqual(27316, bpp.break2.start) self.assertEqual('AGACTT', bpp.untemplated_seq)
def test_hardclipping(self): read = SamRead(reference_name='15') read.reference_start = 71491944 read.cigar = _cigar.convert_string_to_cigar('12=1D25=113H') read.query_sequence = 'GTGTGTGGTGTGGGGTGTGTGGTGTGTGTGGTGTGTG' read.is_reverse = True expected_bpp = BreakpointPair( Breakpoint('15', 71491956, orient='L', strand='-'), Breakpoint('15', 71491958, orient='R', strand='-'), untemplated_seq='') events = align.call_read_events(read, is_stranded=True) self.assertEqual(1, len(events)) self.assertEqual(expected_bpp.break1, events[0].break1) self.assertEqual(expected_bpp.break2, events[0].break2)
def test_single_insertion(self): r = MockRead( reference_id=0, reference_name='1', reference_start=0, cigar=[(CIGAR.M, 10), (CIGAR.I, 8), (CIGAR.M, 5)], query_sequence='ACTGAATCGTGGGTAGCTGCTAG', ) bpp = align.call_read_events(r)[0] assert bpp.opposing_strands is False assert bpp.break1.start == 10 assert bpp.break1.end == 10 assert bpp.break2.start == 11 assert bpp.break2.end == 11 assert bpp.untemplated_seq == 'GGGTAGCT'
def test_single_one_event(self): r = MockRead(reference_id=0, reference_name='1', reference_start=0, cigar=[(CIGAR.M, 10), (CIGAR.I, 3), (CIGAR.D, 7), (CIGAR.M, 10)], query_sequence='ACTGAATCGTGGGTAGCTGCTAG') bpps = align.call_read_events(r) self.assertEqual(1, len(bpps)) bpp = bpps[0] self.assertEqual(False, bpp.opposing_strands) self.assertEqual(10, bpp.break1.start) self.assertEqual(10, bpp.break1.end) self.assertEqual(18, bpp.break2.start) self.assertEqual(18, bpp.break2.end) self.assertEqual('GGG', bpp.untemplated_seq)
def test_single_duplication_with_leading_untemp(self): r = MockRead( query_sequence=( 'CTCCCACCAGGAGCTCGTCCTCACCACGTCCTGCACCAGCACCTCCAGCTCCCGCAGCAGCGCCTCGCCCCCACGGTGCGCGCTCCGCGCCGGTTCC' 'ATGGGCTCCGTAGGTTCCATGGGCTCCGTAGGTTCCATGGGCTCCGTAGGTTCCATGGGCTCCGTAGGTTCCATCGGCTCCGTGGGTTCCATGGACT' 'CTGTGGGCTCGGGCCCGACGCGCACGGAGGACTGGAGGACTGGGGCGTGTGTCTGCGGTGCAGGCGAGGCGGGGCGGGC'), query_name='duplication_with_untemp', reference_id=16, reference_name='reference17', reference_start=1882, cigar=[(CIGAR.EQ, 126), (CIGAR.I, 54), (CIGAR.EQ, 93)], is_reverse=False) bpp = align.call_read_events(r)[0] self.assertEqual('AGGTTCCATGGGCTCCGTAGGTTCCATGGGCTCCGTAGGTTCCATCGGCTCCGT', bpp.untemplated_seq) self.assertEqual(ORIENT.LEFT, bpp.break1.orient) self.assertEqual(ORIENT.RIGHT, bpp.break2.orient)
def test_single_one_event(self): r = MockRead( reference_id=0, reference_name='1', reference_start=0, cigar=[(CIGAR.M, 10), (CIGAR.I, 3), (CIGAR.D, 7), (CIGAR.M, 10)], query_sequence='ACTGAATCGTGGGTAGCTGCTAG', ) bpps = align.call_read_events(r) assert len(bpps) == 1 bpp = bpps[0] assert bpp.opposing_strands is False assert bpp.break1.start == 10 assert bpp.break1.end == 10 assert bpp.break2.start == 18 assert bpp.break2.end == 18 assert bpp.untemplated_seq == 'GGG'
def test_single_duplication_with_no_untemp(self): r = MockRead(query_sequence=( 'GGATGATTTACCTTGGGTAATGAAACTCAGATTTTGCTGTTGTTTTTGTTCGATTTTGCTGTTGTTTTTGTTCCAAAGTGTTTTATACTGATAAAGCAACC' 'CCGGTTTAGCATTGCCATTGGTAA'), query_name='duplication_with_untemp', reference_id=2, reference_name='reference3', reference_start=1497, cigar=[(CIGAR.EQ, 51), (CIGAR.I, 22), (CIGAR.EQ, 52)], is_reverse=False) # repeat: GATTTTGCTGTTGTTTTTGTTC bpp = align.convert_to_duplication( align.call_read_events(r)[0], REFERENCE_GENOME) self.assertEqual('', bpp.untemplated_seq) self.assertEqual(ORIENT.RIGHT, bpp.break1.orient) self.assertEqual(ORIENT.LEFT, bpp.break2.orient) self.assertEqual(bpp.break2.start, 1548) self.assertEqual(bpp.break1.start, 1527)
def test_ins_and_del(self): r = MockRead( reference_id=0, reference_name='1', reference_start=0, cigar=[(CIGAR.M, 10), (CIGAR.I, 3), (CIGAR.M, 5), (CIGAR.D, 7), (CIGAR.M, 5)], query_sequence='ACTGAATCGTGGGTAGCTGCTAG', ) # only report the major del event for now bpps = align.call_read_events(r) assert len(bpps) == 2 bpp = bpps[0] assert bpp.opposing_strands is False assert bpp.break1.start == 10 assert bpp.break1.end == 10 assert bpp.break2.start == 11 assert bpp.break2.end == 11 assert bpp.untemplated_seq == 'GGG' bpp = bpps[1] assert bpp.opposing_strands is False assert bpp.break1.start == 15 assert bpp.break1.end == 15 assert bpp.break2.start == 23 assert bpp.break2.end == 23