def test_read_pair_inversion_gap_in_query_coverage(self): # seq AAATTTCCCGGGAATTCCGGATCGATCGAT # r1 AAATTTCCCGGGAATTccggatcgatcgat + # r2c aaatttcccgggaattccGGATCGATCGAT - # i ----------------CC------------ # r2 ATCTATCGATCCggaattcccgggaaattt 100+12 = 111 - 3 = 108 seq = 'AAATTTCCCGGGAATTCCGGATCGATCGAT' # 30 r1 = MockRead(reference_id=0, reference_name='1', reference_start=0, cigar=[(CIGAR.M, 16), (CIGAR.S, 14)], query_sequence=seq, is_reverse=False) r2 = MockRead(reference_id=0, reference_name='1', reference_start=99, cigar=[(CIGAR.M, 12), (CIGAR.S, 18)], query_sequence=reverse_complement(seq), is_reverse=True) bpp = align.call_paired_read_event(r1, r2) self.assertEqual(STRAND.POS, bpp.break1.strand) self.assertEqual(STRAND.NEG, bpp.break2.strand) self.assertEqual(ORIENT.LEFT, bpp.break1.orient) self.assertEqual(ORIENT.LEFT, bpp.break2.orient) self.assertEqual('CC', bpp.untemplated_seq) self.assertEqual(16, bpp.break1.start) self.assertEqual(111, bpp.break2.start) self.assertEqual('AAATTTCCCGGGAATT', bpp.break1.seq) self.assertEqual(reverse_complement('GGATCGATCGAT'), bpp.break2.seq)
def test_read_pair_deletion_overlapping_query_coverage(self): # seq AAATTTCCCGGGAATTCCGGATCGATCGAT # r1 AAATTTCCCGGGAATTCCGGAtcgatcgat # r2 aaatttcccgggaattccGGATCGATCGAT seq = 'AAATTTCCCGGGAATTCCGGATCGATCGAT' # 30 r1 = MockRead(reference_id=0, reference_name='1', reference_start=0, cigar=[(CIGAR.M, 21), (CIGAR.S, 9)], query_sequence=seq, is_reverse=False) r2 = MockRead(reference_id=0, reference_name='1', reference_start=99, cigar=[(CIGAR.S, 18), (CIGAR.M, 12)], query_sequence=seq, is_reverse=False) self.assertEqual(21, r1.reference_end) bpp = align.call_paired_read_event(r1, r2) self.assertEqual(STRAND.POS, bpp.break1.strand) self.assertEqual(STRAND.POS, bpp.break2.strand) self.assertEqual(ORIENT.LEFT, bpp.break1.orient) self.assertEqual(ORIENT.RIGHT, bpp.break2.orient) self.assertEqual('', bpp.untemplated_seq) self.assertEqual(21, bpp.break1.start) self.assertEqual(103, bpp.break2.start) self.assertEqual('AAATTTCCCGGGAATTCCGGA', bpp.break1.seq) self.assertEqual('TCGATCGAT', bpp.break2.seq)
def test_read_pair_large_inversion_overlapping_query_coverage(self): s = 'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT' read1 = MockRead(reference_id=3, reference_start=1114, cigar=[(CIGAR.S, 125), (CIGAR.EQ, 120)], query_sequence=s, is_reverse=False) read2 = MockRead(reference_id=3, reference_start=2187, cigar=[(CIGAR.S, 117), (CIGAR.EQ, 8), (CIGAR.D, 1), (CIGAR.M, 120)], query_sequence=reverse_complement(s), is_reverse=True) bpp = align.call_paired_read_event(read1, read2) self.assertEqual(STRAND.POS, bpp.break1.strand) self.assertEqual(STRAND.NEG, bpp.break2.strand) self.assertEqual(ORIENT.RIGHT, bpp.break1.orient) self.assertEqual(ORIENT.RIGHT, bpp.break2.orient) self.assertEqual('', bpp.untemplated_seq) self.assertEqual(1115, bpp.break1.start) self.assertEqual(2188 + 3, bpp.break2.start) print(bpp.break1.seq) print(bpp.break2.seq) self.assertEqual( 'TCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAG' 'GGTTTTCATTTCTGTATGTTAAT', bpp.break1.seq) self.assertEqual( 'GCAGAGCTATATATTTAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCACGACCAGGAACCAAACAGGAAAGAATCCA' 'AATTCTGTGTTTACAGGGCTTTCATGCTCAG', bpp.break2.seq)
def test_read_pair_indel(self): # seq AAATTTCCCGGGAATTCCGGATCGATCGAT 1-30 1-? # r1 AAATTTCCCgggaattccggatcgatcgat 1-9 1-9 # r2 aaatttcccgggaattccggaTCGATCGAT 22-30 100-108 # i ---------GGGAATTCCGGA--------- 10-21 n/a seq = 'AAATTTCCCGGGAATTCCGGATCGATCGAT' # 30 r1 = MockRead(reference_id=0, reference_name='1', reference_start=0, cigar=[(CIGAR.M, 9), (CIGAR.S, 21)], query_sequence=seq, is_reverse=False) r2 = MockRead(reference_id=0, reference_name='1', reference_start=99, cigar=[(CIGAR.S, 21), (CIGAR.M, 9)], query_sequence=seq, is_reverse=False) bpp = align.call_paired_read_event(r1, r2) self.assertEqual(STRAND.POS, bpp.break1.strand) self.assertEqual(STRAND.POS, bpp.break2.strand) self.assertEqual(ORIENT.LEFT, bpp.break1.orient) self.assertEqual(ORIENT.RIGHT, bpp.break2.orient) self.assertEqual('GGGAATTCCGGA', bpp.untemplated_seq) self.assertEqual(9, bpp.break1.start) self.assertEqual(100, bpp.break2.start) self.assertEqual('AAATTTCCC', bpp.break1.seq) self.assertEqual('TCGATCGAT', bpp.break2.seq)
def test_read_pair_translocation(self): # seq AAATTTCCCGGGAATTCCGGATCGATCGAT # r1 AAATTTCCCGGGAATTCCGGAtcgatcgat # r2 aaatttcccgggaattccggaTCGATCGAT seq = 'AAATTTCCCGGGAATTCCGGATCGATCGAT' # 30 r1 = MockRead(reference_id=0, reference_name='2', reference_start=0, cigar=[(CIGAR.M, 21), (CIGAR.S, 9)], query_sequence=seq, is_reverse=False) r2 = MockRead(reference_id=0, reference_name='1', reference_start=99, cigar=[(CIGAR.S, 21), (CIGAR.M, 9)], query_sequence=seq, is_reverse=False) bpp = align.call_paired_read_event(r1, r2) self.assertEqual(STRAND.POS, bpp.break1.strand) self.assertEqual(STRAND.POS, bpp.break2.strand) self.assertEqual(ORIENT.RIGHT, bpp.break1.orient) self.assertEqual(ORIENT.LEFT, bpp.break2.orient) self.assertEqual('1', bpp.break1.chr) self.assertEqual('2', bpp.break2.chr) self.assertEqual('', bpp.untemplated_seq)
def test_read_pair_large_inversion_overlapping_query_coverage(self): s = 'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT' read1 = MockRead( reference_id=3, reference_start=1114, cigar=[(CIGAR.S, 125), (CIGAR.EQ, 120)], query_sequence=s, is_reverse=False, ) read2 = MockRead( reference_id=3, reference_start=2187, cigar=[(CIGAR.S, 117), (CIGAR.EQ, 8), (CIGAR.D, 1), (CIGAR.M, 120)], query_sequence=reverse_complement(s), is_reverse=True, ) bpp = align.call_paired_read_event(read1, read2, is_stranded=True) assert bpp.break1.strand == STRAND.POS assert bpp.break2.strand == STRAND.NEG assert bpp.break1.orient == ORIENT.RIGHT assert bpp.break2.orient == ORIENT.RIGHT assert bpp.untemplated_seq == '' assert bpp.break1.start == 1115 assert bpp.break2.start == 2188 + 3 print(bpp.break1.seq) print(bpp.break2.seq) assert ( bpp.break1.seq == 'TCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT' ) assert ( bpp.break2.seq == 'GCAGAGCTATATATTTAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCACGACCAGGAACCAAACAGGAAAGAATCCAAATTCTGTGTTTACAGGGCTTTCATGCTCAG' )
def test_read_pair_inversion_overlapping_query_coverage(self): # seq AAATTTCCCGGGAATTCCGGATCGATCGAT # r1 AAATTTCCCGGGAATTCCGGAtcgatcgat + # r2c aaatttcccgggaattccGGATCGATCGAT - # i ------------------GGA--------- # r2 ATCTATCGATCCggaattcccgggaaattt 100+12 = 111 - 3 = 108 seq = 'AAATTTCCCGGGAATTCCGGATCGATCGAT' # 30 r1 = MockRead( reference_id=0, reference_name='1', reference_start=0, cigar=[(CIGAR.M, 21), (CIGAR.S, 9)], query_sequence=seq, is_reverse=False, ) r2 = MockRead( reference_id=0, reference_name='1', reference_start=99, cigar=[(CIGAR.M, 12), (CIGAR.S, 18)], query_sequence=reverse_complement(seq), is_reverse=True, ) bpp = align.call_paired_read_event(r1, r2, is_stranded=True) assert bpp.break1.strand == STRAND.POS assert bpp.break2.strand == STRAND.NEG assert bpp.break1.orient == ORIENT.LEFT assert bpp.break2.orient == ORIENT.LEFT assert bpp.untemplated_seq == '' assert bpp.break1.start == 21 assert bpp.break2.start == 108 assert bpp.break1.seq == 'AAATTTCCCGGGAATTCCGGA' assert bpp.break2.seq == reverse_complement('TCGATCGAT')
def test_read_pair_deletion_overlapping_query_coverage(self): # seq AAATTTCCCGGGAATTCCGGATCGATCGAT # r1 AAATTTCCCGGGAATTCCGGAtcgatcgat # r2 aaatttcccgggaattccGGATCGATCGAT seq = 'AAATTTCCCGGGAATTCCGGATCGATCGAT' # 30 r1 = MockRead( reference_id=0, reference_name='1', reference_start=0, cigar=[(CIGAR.M, 21), (CIGAR.S, 9)], query_sequence=seq, is_reverse=False, ) r2 = MockRead( reference_id=0, reference_name='1', reference_start=99, cigar=[(CIGAR.S, 18), (CIGAR.M, 12)], query_sequence=seq, is_reverse=False, ) assert r1.reference_end == 21 bpp = align.call_paired_read_event(r1, r2, is_stranded=True) assert bpp.break1.strand == STRAND.POS assert bpp.break2.strand == STRAND.POS assert bpp.break1.orient == ORIENT.LEFT assert bpp.break2.orient == ORIENT.RIGHT assert bpp.untemplated_seq == '' assert bpp.break1.start == 21 assert bpp.break2.start == 103 assert bpp.break1.seq == 'AAATTTCCCGGGAATTCCGGA' assert bpp.break2.seq == 'TCGATCGAT'
def test_read_pair_translocation(self): # seq AAATTTCCCGGGAATTCCGGATCGATCGAT # r1 AAATTTCCCGGGAATTCCGGAtcgatcgat # r2 aaatttcccgggaattccggaTCGATCGAT seq = 'AAATTTCCCGGGAATTCCGGATCGATCGAT' # 30 r1 = MockRead( reference_id=0, reference_name='2', reference_start=0, cigar=[(CIGAR.M, 21), (CIGAR.S, 9)], query_sequence=seq, is_reverse=False, ) r2 = MockRead( reference_id=0, reference_name='1', reference_start=99, cigar=[(CIGAR.S, 21), (CIGAR.M, 9)], query_sequence=seq, is_reverse=False, ) bpp = align.call_paired_read_event(r1, r2, is_stranded=True) assert bpp.break1.strand == STRAND.POS assert bpp.break2.strand == STRAND.POS assert bpp.break1.orient == ORIENT.RIGHT assert bpp.break2.orient == ORIENT.LEFT assert bpp.break1.chr == '1' assert bpp.break2.chr == '2' assert bpp.untemplated_seq == ''
def test_read_pair_indel(self): # seq AAATTTCCCGGGAATTCCGGATCGATCGAT 1-30 1-? # r1 AAATTTCCCgggaattccggatcgatcgat 1-9 1-9 # r2 aaatttcccgggaattccggaTCGATCGAT 22-30 100-108 # i ---------GGGAATTCCGGA--------- 10-21 n/a seq = 'AAATTTCCCGGGAATTCCGGATCGATCGAT' # 30 r1 = MockRead( reference_id=0, reference_name='1', reference_start=0, cigar=[(CIGAR.M, 9), (CIGAR.S, 21)], query_sequence=seq, is_reverse=False, ) r2 = MockRead( reference_id=0, reference_name='1', reference_start=99, cigar=[(CIGAR.S, 21), (CIGAR.M, 9)], query_sequence=seq, is_reverse=False, ) bpp = align.call_paired_read_event(r1, r2, is_stranded=True) assert bpp.break1.strand == STRAND.POS assert bpp.break2.strand == STRAND.POS assert bpp.break1.orient == ORIENT.LEFT assert bpp.break2.orient == ORIENT.RIGHT assert bpp.untemplated_seq == 'GGGAATTCCGGA' assert bpp.break1.start == 9 assert bpp.break2.start == 100 assert bpp.break1.seq == 'AAATTTCCC' assert bpp.break2.seq == 'TCGATCGAT'