def test_insertion_to_duplication(self): # BPP(Breakpoint(3:60204611L), Breakpoint(3:60204612R), opposing=False, seq='CATACATACATACATACATACATACATACATA') # insertion contig [seq2] contig_alignment_score: 0.99, contig_alignment_mq: Interval(255, 255) # (3:60132614[seq2]140=71788D69=32I86=, None)) bpp = BreakpointPair( Breakpoint('3', 60204611, orient='L'), Breakpoint('3', 60204612, orient='R'), untemplated_seq='CATACATACATACATACATACATACATACATA', opposing_strands=False) reference_genome = { '3': MockObject(seq=MockLongString( 'CAGGGTCTGAGCTCTTAACTCTATACTGCCTACATACATACATACATACATACATATATACATACATATATAAATT', offset=60204555)) } print(reference_genome['3'].seq[60204588:60204588 + 8], 'CATACATA') setattr(bpp, 'read1', MockObject(query_sequence='', query_name=None)) setattr(bpp, 'read2', None) event = align.convert_to_duplication(bpp, reference_genome) print(event) self.assertEqual(ORIENT.RIGHT, event.break1.orient) self.assertEqual(60204588, event.break1.start) self.assertEqual(ORIENT.LEFT, event.break2.orient) self.assertEqual(60204611, event.break2.start) # CATACATACATACATACATACATACATACATA # ........................******** self.assertEqual('CATACATA', event.untemplated_seq)
def test_single_duplication_with_trailing_untemp(self): r = MockRead(query_sequence=( 'GGATGATTTACCTTGGGTAATGAAACTCA' 'GATTTTGCTGTTGTTTTTGTTC' 'GATTTTGCTGTTGTTTTTGTTC' 'GTCAA' 'CAAAGTGTTTTATACTGATAAAGCAACCCCGGTTTAGCATTGCCATTGGTAA'), query_name='duplication_with_untemp', reference_id=2, reference_name='reference3', reference_start=1497, cigar=[(CIGAR.EQ, 51), (CIGAR.I, 27), (CIGAR.EQ, 52)], is_reverse=False) # repeat: GATTTTGCTGTTGTTTTTGTTC print(r) print(REFERENCE_GENOME['reference3'][1497:1497 + 51]) print(REFERENCE_GENOME['reference3'][1548 - 21:1548 + 1]) bpp = align.call_read_events(r)[0] print(bpp) bpp = align.convert_to_duplication(bpp, REFERENCE_GENOME) print(bpp) self.assertEqual('GTCAA', bpp.untemplated_seq) self.assertEqual(ORIENT.RIGHT, bpp.break1.orient) self.assertEqual(ORIENT.LEFT, bpp.break2.orient) self.assertEqual(bpp.break2.start, 1548) self.assertEqual(bpp.break1.start, 1527)
def test_single_duplication_with_no_untemp(self): r = MockRead(query_sequence=( 'GGATGATTTACCTTGGGTAATGAAACTCAGATTTTGCTGTTGTTTTTGTTCGATTTTGCTGTTGTTTTTGTTCCAAAGTGTTTTATACTGATAAAGCAACC' 'CCGGTTTAGCATTGCCATTGGTAA'), query_name='duplication_with_untemp', reference_id=2, reference_name='reference3', reference_start=1497, cigar=[(CIGAR.EQ, 51), (CIGAR.I, 22), (CIGAR.EQ, 52)], is_reverse=False) # repeat: GATTTTGCTGTTGTTTTTGTTC bpp = align.convert_to_duplication( align.call_read_events(r)[0], REFERENCE_GENOME) self.assertEqual('', bpp.untemplated_seq) self.assertEqual(ORIENT.RIGHT, bpp.break1.orient) self.assertEqual(ORIENT.LEFT, bpp.break2.orient) self.assertEqual(bpp.break2.start, 1548) self.assertEqual(bpp.break1.start, 1527)
def test_single_bp_insertion(self): bpp = BreakpointPair( Breakpoint('3', 121, orient='L'), Breakpoint('3', 122, orient='R'), untemplated_seq='T', opposing_strands=False ) reference_genome = {'3': MockObject( seq=MockLongString('ATCGAGCTACGGATCTTTTTTCGATCGATCAATA', offset=100))} print(reference_genome['3'].seq[120 - 10:121]) setattr(bpp, 'read1', MockObject(query_sequence='', query_name=None)) setattr(bpp, 'read2', None) event = align.convert_to_duplication(bpp, reference_genome) print(event) self.assertEqual(ORIENT.RIGHT, event.break1.orient) self.assertEqual(121, event.break1.start) self.assertEqual(ORIENT.LEFT, event.break2.orient) self.assertEqual(121, event.break2.start) self.assertEqual('', event.untemplated_seq)