def test_shift_overaligned(self): # qwertyuiopas---kkkkk------dfghjklzxcvbnm # .......... ................ gene = Gene('1', 1, 1000, strand='+') transcript = PreTranscript(exons=[(1, 12), (20, 28)], gene=gene, strand='+') for spl_patt in transcript.generate_splicing_patterns(): transcript.transcripts.append(Transcript(transcript, spl_patt)) gene.transcripts.append(transcript) read = SamRead( reference_name='1', reference_start=0, cigar=_cigar.convert_string_to_cigar('14=7D12='), query_sequence='qwertyuiopasdfghjklzxcvbnm', ) evidence = TranscriptomeEvidence( annotations={}, reference_genome={'1': MockObject(seq='qwertyuiopasdfkkkkkdfghjklzxcvbnm')}, bam_cache=MockObject(get_read_reference_name=lambda r: r.reference_name), break1=Breakpoint('1', 1, orient='L', strand='+'), break2=Breakpoint('1', 10, orient='R', strand='+'), read_length=75, stdev_fragment_size=75, median_fragment_size=220, ) evidence.overlapping_transcripts.add(transcript) new_read = evidence.standardize_read(read) assert new_read.cigar == _cigar.convert_string_to_cigar('12=7N14=')
def setUp(self): b1 = Breakpoint('1', 1051, 1051, 'L') b2 = Breakpoint('1', 1551, 1551, 'R') self.read_length = 50 self.trans_ev = TranscriptomeEvidence( {}, # fake the annotations b1, b2, None, None, # bam_cache and reference_genome opposing_strands=False, read_length=self.read_length, stdev_fragment_size=100, median_fragment_size=100, stdev_count_abnormal=1, ) self.genomic_ev = GenomeEvidence( b1, b2, None, None, # bam_cache and reference_genome opposing_strands=False, read_length=self.read_length, stdev_fragment_size=100, median_fragment_size=100, stdev_count_abnormal=1, )
class TestComputeFragmentSizes(unittest.TestCase): def setUp(self): b1 = Breakpoint('1', 1051, 1051, 'L') b2 = Breakpoint('1', 1551, 1551, 'R') self.read_length = 50 self.trans_ev = TranscriptomeEvidence( {}, # fake the annotations b1, b2, None, None, # bam_cache and reference_genome opposing_strands=False, read_length=self.read_length, stdev_fragment_size=100, median_fragment_size=100, stdev_count_abnormal=1, ) self.genomic_ev = GenomeEvidence( b1, b2, None, None, # bam_cache and reference_genome opposing_strands=False, read_length=self.read_length, stdev_fragment_size=100, median_fragment_size=100, stdev_count_abnormal=1) def test_genomic_vs_trans_no_annotations(self): # should be identical read, mate = mock_read_pair( MockRead('name', '1', 1051 - self.read_length + 1, 1051, is_reverse=False), MockRead('name', '1', 2300, 2300 + self.read_length - 1, is_reverse=True)) self.assertEqual(self.trans_ev.compute_fragment_size(read, mate), self.genomic_ev.compute_fragment_size(read, mate)) def test_reverse_reads(self): read, mate = mock_read_pair( MockRead('name', '1', 1001, 1100, is_reverse=False), MockRead('name', '1', 2201, 2301, is_reverse=True)) self.assertEqual(Interval(1300), self.genomic_ev.compute_fragment_size(read, mate)) self.assertEqual(Interval(1300), self.genomic_ev.compute_fragment_size(mate, read)) self.assertEqual(Interval(1300), self.trans_ev.compute_fragment_size(read, mate)) self.assertEqual(Interval(1300), self.trans_ev.compute_fragment_size(mate, read))
def test_deletion_in_exon(self, egfr_evidence): bpp = BreakpointPair( Breakpoint('7', 55238890, orient=ORIENT.LEFT), Breakpoint('7', 55238899, orient=ORIENT.RIGHT), untemplated_seq='', ) assert bpp.net_size(lambda p1, p2: TranscriptomeEvidence.distance( egfr_evidence, p1, p2)) == Interval(-8) bpp = BreakpointPair( Breakpoint('7', 55238890, orient=ORIENT.LEFT), Breakpoint('7', 55238899, orient=ORIENT.RIGHT), untemplated_seq='GTAC', ) assert bpp.net_size(lambda p1, p2: TranscriptomeEvidence.distance( egfr_evidence, p1, p2)) == Interval(-4)
def test_deletion_across_intron(self, egfr_evidence): # 55240539_55240621 55323947_55324313 bpp = BreakpointPair( Breakpoint('7', 55240610, orient=ORIENT.LEFT), Breakpoint('7', 55323950, orient=ORIENT.RIGHT), untemplated_seq='GTAC', ) assert bpp.net_size(lambda p1, p2: TranscriptomeEvidence.distance( egfr_evidence, p1, p2)) == Interval(-10) # 55210998_55211181 55218987_55219055 bpp = BreakpointPair( Breakpoint('7', 55211180, orient=ORIENT.LEFT), Breakpoint('7', 55218990, orient=ORIENT.RIGHT), untemplated_seq='', ) assert bpp.net_size(lambda p1, p2: TranscriptomeEvidence.distance( egfr_evidence, p1, p2)) == Interval(-4 + -135, -4)
def test_insertion_at_exon_start(self, egfr_evidence): # 55238868_55238906 bpp = BreakpointPair( Breakpoint('7', 55233130, orient=ORIENT.LEFT), Breakpoint('7', 55238868, orient=ORIENT.RIGHT), untemplated_seq='TTATCG', ) assert bpp.net_size(lambda p1, p2: TranscriptomeEvidence.distance( egfr_evidence, p1, p2)) == Interval(6)
def test_indel_in_intron(self, egfr_evidence): # 55238868_55238906 bpp = BreakpointPair( Breakpoint('7', 5523700, orient=ORIENT.LEFT), Breakpoint('7', 5523751, orient=ORIENT.RIGHT), untemplated_seq='TTATCG', ) assert bpp.net_size(lambda p1, p2: TranscriptomeEvidence.distance( egfr_evidence, p1, p2)) == Interval(-44)
def test_shift_no_transcripts(self): read = SamRead(reference_name='1', reference_start=0, cigar=_cigar.convert_string_to_cigar('14=7D18='), query_sequence='qwertyuiopasdfdfghjklzxcvbnm') evidence = TranscriptomeEvidence( annotations={}, reference_genome={ '1': MockObject(seq='qwertyuiopasdfkkkkkdfghjklzxcvbnm') }, bam_cache=None, break1=Breakpoint('1', 1, orient='L', strand='+'), break2=Breakpoint('1', 10, orient='R', strand='+'), read_length=75, stdev_fragment_size=75, median_fragment_size=220) new_cigar = evidence.exon_boundary_shift_cigar(read) self.assertEqual(_cigar.convert_string_to_cigar('14=7D18='), new_cigar)
def test_insertion_at_exon_start_mixed(self, egfr_evidence): # EXON 15: 55232973-55233130 # EXON 16: 55238868-55238906 # EXON 17: 55240676-55240817 bpp = BreakpointPair( Breakpoint('7', 55238867, orient=ORIENT.LEFT), Breakpoint('7', 55238868, orient=ORIENT.RIGHT), untemplated_seq='TTATCG', ) assert bpp.net_size(lambda p1, p2: TranscriptomeEvidence.distance( egfr_evidence, p1, p2)) == Interval(6)
def trans_evidence(read_length): return TranscriptomeEvidence( {}, # fake the annotations Breakpoint('1', 1051, 1051, 'L'), Breakpoint('1', 1551, 1551, 'R'), None, None, # bam_cache and reference_genome opposing_strands=False, read_length=read_length, stdev_fragment_size=100, median_fragment_size=100, config={'validate.stdev_count_abnormal': 1}, )
def transcriptome_window(ev, breakpoint, transcripts=None): if transcripts: ev.overlapping_transcripts.update(transcripts) return TranscriptomeEvidence.generate_window(ev, breakpoint)
def egfr_distance(self, pos1, pos2): return TranscriptomeEvidence.distance(self.evidence, pos1, pos2)
def transcriptome_window(self, breakpoint, transcripts=None): if transcripts: self.trans_evidence.overlapping_transcripts.update(transcripts) return TranscriptomeEvidence.generate_window(self.trans_evidence, breakpoint)