def setUp(self): self.transcript = PreTranscript([(1001, 1100), (1301, 1400), (1701, 1800)], strand=STRAND.NEG) for patt in self.transcript.generate_splicing_patterns(): self.transcript.transcripts.append( Transcript(self.transcript, patt)) self.trans_evidence = MockObject( annotations={}, read_length=100, max_expected_fragment_size=550, call_error=11, overlapping_transcripts={self.transcript}, ) setattr( self.trans_evidence, '_select_transcripts', lambda *pos: self.trans_evidence.overlapping_transcripts, ) setattr( self.trans_evidence, 'traverse', partial(TranscriptomeEvidence.traverse, self.trans_evidence), )
def build_transcript(gene, exons, cds_start, cds_end, domains, strand=None, is_best_transcript=False, name=None): pre_transcript = PreTranscript( exons, gene=gene, strand=strand if strand is not None else gene.get_strand(), is_best_transcript=is_best_transcript, name=name) if gene is not None: gene.unspliced_transcripts.append(pre_transcript) for spl in pre_transcript.generate_splicing_patterns(): t = Transcript(pre_transcript, spl) pre_transcript.spliced_transcripts.append(t) tx = Translation(cds_start, cds_end, t, domains=domains) t.translations.append(tx) return pre_transcript
def test_shift_overaligned(self): # qwertyuiopas---kkkkk------dfghjklzxcvbnm # .......... ................ gene = Gene('1', 1, 1000, strand='+') transcript = PreTranscript(exons=[(1, 12), (20, 28)], gene=gene, strand='+') for spl_patt in transcript.generate_splicing_patterns(): transcript.transcripts.append(Transcript(transcript, spl_patt)) gene.transcripts.append(transcript) read = SamRead( reference_name='1', reference_start=0, cigar=_cigar.convert_string_to_cigar('14=7D12='), query_sequence='qwertyuiopasdfghjklzxcvbnm', ) evidence = TranscriptomeEvidence( annotations={}, reference_genome={'1': MockObject(seq='qwertyuiopasdfkkkkkdfghjklzxcvbnm')}, bam_cache=MockObject(get_read_reference_name=lambda r: r.reference_name), break1=Breakpoint('1', 1, orient='L', strand='+'), break2=Breakpoint('1', 10, orient='R', strand='+'), read_length=75, stdev_fragment_size=75, median_fragment_size=220, ) evidence.overlapping_transcripts.add(transcript) new_read = evidence.standardize_read(read) assert new_read.cigar == _cigar.convert_string_to_cigar('12=7N14=')
def test_net_zero(self): transcript = PreTranscript([(1001, 1100), (1301, 1400), (1701, 1800)], strand=STRAND.POS) for patt in transcript.generate_splicing_patterns(): transcript.transcripts.append(Transcript(transcript, patt)) trans_evidence = MockObject( annotations={}, read_length=100, max_expected_fragment_size=550, call_error=11, overlapping_transcripts={transcript}, ) setattr( trans_evidence, '_select_transcripts', lambda *pos: trans_evidence.overlapping_transcripts, ) setattr( trans_evidence, 'distance', partial(TranscriptomeEvidence.distance, trans_evidence), ) bpp = BreakpointPair( Breakpoint('1', 1099, orient=ORIENT.LEFT), Breakpoint('1', 1302, orient=ORIENT.RIGHT), untemplated_seq='TT', ) dist = partial(TranscriptomeEvidence.distance, trans_evidence) assert bpp.net_size() == Interval(-200) assert bpp.net_size(dist) == Interval(0)
def setUp(self): gene = Gene('1', 1, 9999, name='KRAS', strand=STRAND.POS) self.pre_transcript = PreTranscript(gene=gene, exons=[(1001, 1100), (1401, 1500), (1701, 1750), (3001, 4000)]) gene.unspliced_transcripts.append(self.pre_transcript) for spl in self.pre_transcript.generate_splicing_patterns(): self.pre_transcript.transcripts.append( Transcript(self.pre_transcript, spl)) self.annotations = {gene.chr: [gene]} self.genome_evidence = MockObject(annotations={}, read_length=100, max_expected_fragment_size=550, call_error=11) self.trans_evidence = MockObject( annotations={}, read_length=100, max_expected_fragment_size=550, call_error=11, overlapping_transcripts={self.pre_transcript}, ) setattr( self.trans_evidence, '_select_transcripts', lambda *pos: self.trans_evidence.overlapping_transcripts, ) setattr( self.trans_evidence, 'traverse', partial(TranscriptomeEvidence.traverse, self.trans_evidence), )
def test_empty_intron(self, distance_setup): t2 = PreTranscript([(1001, 1100), (1501, 1600), (2001, 2200), (2201, 2300)], strand='+') for patt in t2.generate_splicing_patterns(): t2.transcripts.append(Transcript(t2, patt)) print(t2) print(distance_setup.trans_evidence.overlapping_transcripts) distance_setup.trans_evidence.overlapping_transcripts.add(t2) dist = distance_setup.trans_evidence.distance(1001, 2301) assert dist == Interval(400, 400)
def test_multiple_transcripts(self, trans_window_setup): # [(1001, 1100), (1401, 1500), (1701, 1750), (3001, 4000)]) b = Breakpoint(chr='1', start=1150, orient=ORIENT.RIGHT) gene = trans_window_setup.annotations['1'][0] t2 = PreTranscript(gene=gene, exons=[(1001, 1100), (1200, 1300), (2100, 2200)]) for patt in t2.generate_splicing_patterns(): t2.transcripts.append(Transcript(t2, patt)) gene.transcripts.append(t2) # 989 - 2561 # 989 - 3411 assert transcriptome_window( trans_window_setup.trans_evidence, b, [trans_window_setup.pre_transcript, t2] ) == Interval(1040, 3160)
def setup_by_strand(self, strand): self.ex1 = Exon(100, 199, strand=strand) # C self.ex2 = Exon(500, 599, strand=strand) # G self.ex3 = Exon(1200, 1299, strand=strand) # T self.ex4 = Exon(1500, 1599, strand=strand) # C self.ex5 = Exon(1700, 1799, strand=strand) # G self.ex6 = Exon(2000, 2099, strand=strand) # C # introns: 99, 300, 600, 200, 100, ... reference_sequence = 'a' * 99 + 'C' * 100 + 'a' * 300 + 'G' * 100 reference_sequence += 'a' * 600 + 'T' * 100 + 'a' * 200 + 'C' * 100 reference_sequence += 'a' * 100 + 'G' * 100 + 'a' * 200 + 'C' * 100 self.reference_sequence = reference_sequence self.pre_transcript = PreTranscript(exons=[self.ex1, self.ex2, self.ex3, self.ex4, self.ex5, self.ex6], strand=strand)
def trans_window_setup(): n = argparse.Namespace() gene = Gene('1', 1, 9999, name='KRAS', strand=STRAND.POS) n.pre_transcript = PreTranscript( gene=gene, exons=[(1001, 1100), (1401, 1500), (1701, 1750), (3001, 4000)] ) gene.unspliced_transcripts.append(n.pre_transcript) for spl in n.pre_transcript.generate_splicing_patterns(): n.pre_transcript.transcripts.append(Transcript(n.pre_transcript, spl)) n.annotations = {gene.chr: [gene]} n.genome_evidence = MockObject( annotations={}, read_length=100, max_expected_fragment_size=550, config={**DEFAULTS, 'validate.call_error': 11}, ) n.trans_evidence = MockObject( annotations={}, read_length=100, max_expected_fragment_size=550, overlapping_transcripts={n.pre_transcript}, config={**DEFAULTS, 'validate.call_error': 11}, ) setattr( n.trans_evidence, '_select_transcripts', lambda *pos: n.trans_evidence.overlapping_transcripts, ) setattr( n.trans_evidence, 'traverse', partial(TranscriptomeEvidence.traverse, n.trans_evidence), ) return n
def distance_setup(): n = argparse.Namespace() n.transcript = PreTranscript( [(1001, 1100), (1501, 1600), (2001, 2100), (2201, 2300)], strand='+' ) for patt in n.transcript.generate_splicing_patterns(): n.transcript.transcripts.append(Transcript(n.transcript, patt)) n.trans_evidence = MockObject( annotations={}, read_length=100, max_expected_fragment_size=550, call_error=11, overlapping_transcripts={n.transcript}, ) setattr( n.trans_evidence, '_select_transcripts', lambda *pos: n.trans_evidence.overlapping_transcripts, ) setattr( n.trans_evidence, 'distance', partial(TranscriptomeEvidence.distance, n.trans_evidence), ) return n
class TestNetSizeTrans(unittest.TestCase): def setUp(self): self.transcript = PreTranscript([(1001, 1100), (1301, 1400), (1701, 1800)], strand=STRAND.POS) for patt in self.transcript.generate_splicing_patterns(): self.transcript.transcripts.append( Transcript(self.transcript, patt)) self.trans_evidence = MockObject( annotations={}, read_length=100, max_expected_fragment_size=550, call_error=11, overlapping_transcripts={self.transcript}) setattr(self.trans_evidence, '_select_transcripts', lambda *pos: self.trans_evidence.overlapping_transcripts) setattr(self.trans_evidence, 'distance', partial(TranscriptomeEvidence.distance, self.trans_evidence)) def test_net_zero(self): bpp = BreakpointPair(Breakpoint('1', 1099, orient=ORIENT.LEFT), Breakpoint('1', 1302, orient=ORIENT.RIGHT), untemplated_seq='TT') dist = partial(TranscriptomeEvidence.distance, self.trans_evidence) self.assertEqual(Interval(-200), bpp.net_size()) self.assertEqual(Interval(0), bpp.net_size(dist))
def setUp(self): self.transcript = PreTranscript([(1001, 1100), (1501, 1600), (2001, 2100), (2201, 2300)], strand='+') for patt in self.transcript.generate_splicing_patterns(): self.transcript.transcripts.append( Transcript(self.transcript, patt)) self.trans_evidence = MockObject( annotations={}, read_length=100, max_expected_fragment_size=550, call_error=11, overlapping_transcripts={self.transcript}) setattr(self.trans_evidence, '_select_transcripts', lambda *pos: self.trans_evidence.overlapping_transcripts) setattr(self.trans_evidence, 'distance', partial(TranscriptomeEvidence.distance, self.trans_evidence))
class TestDistance(unittest.TestCase): def setUp(self): self.transcript = PreTranscript([(1001, 1100), (1501, 1600), (2001, 2100), (2201, 2300)], strand='+') for patt in self.transcript.generate_splicing_patterns(): self.transcript.transcripts.append( Transcript(self.transcript, patt)) self.trans_evidence = MockObject( annotations={}, read_length=100, max_expected_fragment_size=550, call_error=11, overlapping_transcripts={self.transcript}) setattr(self.trans_evidence, '_select_transcripts', lambda *pos: self.trans_evidence.overlapping_transcripts) setattr(self.trans_evidence, 'distance', partial(TranscriptomeEvidence.distance, self.trans_evidence)) def test_exonic(self): self.assertEqual(Interval(149), self.trans_evidence.distance(1001, 1550)) def test_intergenic_exonic(self): dist = self.trans_evidence.distance(101, 1550) self.assertEqual(Interval(1049, 1049), dist) def test_intergenic_intergenic(self): dist = self.trans_evidence.distance(101, 300) self.assertEqual(Interval(199), dist) def test_aligned_intronic(self): dist = self.trans_evidence.distance(1102, 1499) self.assertEqual(Interval(5), dist) def test_indel_at_exon_boundary(self): self.assertEqual(Interval(2), self.trans_evidence.distance(1101, 1501)) def test_no_annotations(self): dist = self.trans_evidence.distance(101, 300, []) self.assertEqual(Interval(199), dist) def test_intergenic_intronic(self): dist = self.trans_evidence.distance(101, 1400) self.assertEqual(Interval(1101), dist) def test_empty_intron(self): t2 = PreTranscript([(1001, 1100), (1501, 1600), (2001, 2200), (2201, 2300)], strand='+') for patt in t2.generate_splicing_patterns(): t2.transcripts.append(Transcript(t2, patt)) print(t2) print(self.trans_evidence.overlapping_transcripts) self.trans_evidence.overlapping_transcripts.add(t2) dist = self.trans_evidence.distance(1001, 2301) self.assertEqual(Interval(400, 400), dist)
def test_many_small_exons(self): g = Gene('fake', 17271277, 17279592, strand='+') pre_transcript = PreTranscript( gene=g, exons=[ (17271277, 17271984), (17272649, 17272709), (17275586, 17275681), (17275769, 17275930), (17276692, 17276817), (17277168, 17277388), # 220 (17277845, 17277888), # 44 (17278293, 17278378), # 86 (17279229, 17279592) # 364 ]) g.transcripts.append(pre_transcript) for patt in pre_transcript.generate_splicing_patterns(): pre_transcript.transcripts.append(Transcript(pre_transcript, patt)) b = Breakpoint(chr='fake', start=17279591, orient=ORIENT.LEFT) self.assertEqual(Interval(17277321, 17279701), self.transcriptome_window(b, [pre_transcript]))
def setUp(self): self.gev1 = BreakpointPair( Breakpoint('1', 1), Breakpoint('1', 10), opposing_strands=True, data={ COLUMNS.event_type: SVTYPE.DEL, COLUMNS.call_method: CALL_METHOD.CONTIG, COLUMNS.fusion_sequence_fasta_id: None, COLUMNS.protocol: PROTOCOL.GENOME, }, ) self.gev2 = BreakpointPair( Breakpoint('1', 1), Breakpoint('1', 10), opposing_strands=True, data={ COLUMNS.event_type: SVTYPE.DEL, COLUMNS.call_method: CALL_METHOD.CONTIG, COLUMNS.fusion_sequence_fasta_id: None, COLUMNS.protocol: PROTOCOL.GENOME, }, ) self.ust1 = PreTranscript(exons=[(1, 100), (301, 400), (501, 600)], strand=STRAND.POS, name='t1') self.ust2 = PreTranscript(exons=[(1001, 1100), (1301, 1400), (1501, 1600)], strand=STRAND.POS, name='t2') self.distances = { CALL_METHOD.CONTIG: 0, CALL_METHOD.FLANK: 0, CALL_METHOD.SPLIT: 10 } self.TRANSCRIPTS = { self.ust1.name: self.ust1, self.ust2.name: self.ust2 }
def pos_splicing_pattern(): n = argparse.Namespace() n.ex1 = Exon(100, 199, strand=STRAND.POS) # C n.ex2 = Exon(500, 599, strand=STRAND.POS) # G n.ex3 = Exon(1200, 1299, strand=STRAND.POS) # T n.ex4 = Exon(1500, 1599, strand=STRAND.POS) # C n.ex5 = Exon(1700, 1799, strand=STRAND.POS) # G n.ex6 = Exon(2000, 2099, strand=STRAND.POS) # C # introns: 99, 300, 600, 200, 100, ... reference_sequence = 'a' * 99 + 'C' * 100 + 'a' * 300 + 'G' * 100 reference_sequence += 'a' * 600 + 'T' * 100 + 'a' * 200 + 'C' * 100 reference_sequence += 'a' * 100 + 'G' * 100 + 'a' * 200 + 'C' * 100 n.reference_sequence = reference_sequence n.pre_transcript = PreTranscript( exons=[n.ex1, n.ex2, n.ex3, n.ex4, n.ex5, n.ex6], strand=STRAND.POS) return n
def tranverse_trans_rev_setup(): n = argparse.Namespace() n.transcript = PreTranscript([(1001, 1100), (1301, 1400), (1701, 1800)], strand=STRAND.NEG) for patt in n.transcript.generate_splicing_patterns(): n.transcript.transcripts.append(Transcript(n.transcript, patt)) n.trans_evidence = MockObject( annotations={}, read_length=100, max_expected_fragment_size=550, call_error=11, overlapping_transcripts={n.transcript}, ) setattr( n.trans_evidence, '_select_transcripts', lambda *pos: n.trans_evidence.overlapping_transcripts, ) setattr( n.trans_evidence, 'traverse', partial(TranscriptomeEvidence.traverse, n.trans_evidence), ) return n
class TestTraverseTransRev(unittest.TestCase): def setUp(self): self.transcript = PreTranscript([(1001, 1100), (1301, 1400), (1701, 1800)], strand=STRAND.NEG) for patt in self.transcript.generate_splicing_patterns(): self.transcript.transcripts.append( Transcript(self.transcript, patt)) self.trans_evidence = MockObject( annotations={}, read_length=100, max_expected_fragment_size=550, call_error=11, overlapping_transcripts={self.transcript}, ) setattr( self.trans_evidence, '_select_transcripts', lambda *pos: self.trans_evidence.overlapping_transcripts, ) setattr( self.trans_evidence, 'traverse', partial(TranscriptomeEvidence.traverse, self.trans_evidence), ) def test_left_before_transcript(self): gpos = self.trans_evidence.traverse(900, 500 - 1, ORIENT.LEFT) self.assertEqual(Interval(401), gpos) self.assertEqual(gpos, GenomeEvidence.traverse(900, 500 - 1, ORIENT.LEFT)) def test_left_after_transcript(self): gpos = self.trans_evidence.traverse(2200, 100, ORIENT.LEFT) self.assertEqual(gpos, GenomeEvidence.traverse(2200, 100, ORIENT.LEFT)) self.assertEqual(Interval(2100), gpos) def test_left_after_transcript2(self): gpos = self.trans_evidence.traverse(1900, 500 - 1, ORIENT.LEFT) self.assertEqual(Interval(901), gpos) def test_left_within_transcript_exonic(self): gpos = self.trans_evidence.traverse(1750, 200 - 1, ORIENT.LEFT) self.assertEqual(Interval(1051), gpos) def test_left_within_exon(self): gpos = self.trans_evidence.traverse(1750, 20 - 1, ORIENT.LEFT) self.assertEqual(1731, gpos.start) self.assertEqual(1731, gpos.end) def test_left_within_transcript_intronic(self): gpos = self.trans_evidence.traverse(1600, 150 - 1, ORIENT.LEFT) self.assertEqual(Interval(1451), gpos) def test_right_before_transcript(self): gpos = self.trans_evidence.traverse(500, 100 - 1, ORIENT.RIGHT) self.assertEqual(Interval(599), gpos) def test_right_before_transcript2(self): gpos = self.trans_evidence.traverse(901, 500 - 1, ORIENT.RIGHT) self.assertEqual(Interval(1900), gpos) def test_right_after_transcript(self): gpos = self.trans_evidence.traverse(2201, 100 - 1, ORIENT.RIGHT) self.assertEqual(Interval(2300), gpos) def test_right_within_transcript(self): gpos = self.trans_evidence.traverse(1351, 100 - 1, ORIENT.RIGHT) self.assertEqual(Interval(1750), gpos) def test_right_within_exon(self): gpos = self.trans_evidence.traverse(1351, 10 - 1, ORIENT.RIGHT) self.assertEqual(Interval(1360), gpos)
def setUp(self): self.pre_transcript = PreTranscript([(101, 200), (301, 400), (501, 600)], strand=STRAND.POS) self.n_ust = PreTranscript([(101, 200), (301, 400), (501, 600)], strand=STRAND.NEG)
def test_single_exon(self): t = PreTranscript([(3, 4)], strand=STRAND.POS) patt = t.generate_splicing_patterns() assert len(patt) == 1 assert len(patt[0]) == 0 assert patt[0].splice_type == SPLICE_TYPE.NORMAL
def unspliced_transcript2(): return PreTranscript( exons=[(1001, 1100), (1301, 1400), (1501, 1600)], strand=STRAND.POS, name='t2' )
def test_single_exon(self): t = PreTranscript([(3, 4)], strand=STRAND.POS) patt = t.generate_splicing_patterns() self.assertEqual(1, len(patt)) self.assertEqual(0, len(patt[0])) self.assertEqual(SPLICE_TYPE.NORMAL, patt[0].splice_type)
class TestSplicingPatterns(unittest.TestCase): def setUp(self): self.setup_by_strand(STRAND.POS) def setup_by_strand(self, strand): self.ex1 = Exon(100, 199, strand=strand) # C self.ex2 = Exon(500, 599, strand=strand) # G self.ex3 = Exon(1200, 1299, strand=strand) # T self.ex4 = Exon(1500, 1599, strand=strand) # C self.ex5 = Exon(1700, 1799, strand=strand) # G self.ex6 = Exon(2000, 2099, strand=strand) # C # introns: 99, 300, 600, 200, 100, ... reference_sequence = 'a' * 99 + 'C' * 100 + 'a' * 300 + 'G' * 100 reference_sequence += 'a' * 600 + 'T' * 100 + 'a' * 200 + 'C' * 100 reference_sequence += 'a' * 100 + 'G' * 100 + 'a' * 200 + 'C' * 100 self.reference_sequence = reference_sequence self.pre_transcript = PreTranscript(exons=[self.ex1, self.ex2, self.ex3, self.ex4, self.ex5, self.ex6], strand=strand) def test_single_exon(self): t = PreTranscript([(3, 4)], strand=STRAND.POS) patt = t.generate_splicing_patterns() self.assertEqual(1, len(patt)) self.assertEqual(0, len(patt[0])) self.assertEqual(SPLICE_TYPE.NORMAL, patt[0].splice_type) def test_normal_pattern_pos(self): patt = self.pre_transcript.generate_splicing_patterns() self.assertEqual(1, len(patt)) self.assertEqual( [ self.ex1.end, self.ex2.start, self.ex2.end, self.ex3.start, self.ex3.end, self.ex4.start, self.ex4.end, self.ex5.start, self.ex5.end, self.ex6.start ], [s.pos for s in patt[0]] ) self.assertEqual(SPLICE_TYPE.NORMAL, patt[0].splice_type) def test_normal_pattern_neg(self): self.setup_by_strand(STRAND.NEG) self.assertTrue(self.pre_transcript.is_reverse) patt = self.pre_transcript.generate_splicing_patterns() self.assertEqual(1, len(patt)) self.assertEqual( [ self.ex1.end, self.ex2.start, self.ex2.end, self.ex3.start, self.ex3.end, self.ex4.start, self.ex4.end, self.ex5.start, self.ex5.end, self.ex6.start ], sorted([s.pos for s in patt[0]]) ) self.assertEqual(SPLICE_TYPE.NORMAL, patt[0].splice_type) def test_abrogate_a_pos(self): self.ex2.start_splice_site.intact = False patt = self.pre_transcript.generate_splicing_patterns() self.assertEqual(2, len(patt)) self.assertEqual( [ self.ex1.end, self.ex3.start, self.ex3.end, self.ex4.start, self.ex4.end, self.ex5.start, self.ex5.end, self.ex6.start ], [s.pos for s in patt[0]] ) self.assertEqual(SPLICE_TYPE.SKIP, patt[0].splice_type) self.assertEqual( [ self.ex2.end, self.ex3.start, self.ex3.end, self.ex4.start, self.ex4.end, self.ex5.start, self.ex5.end, self.ex6.start ], [s.pos for s in patt[1]] ) self.assertEqual(SPLICE_TYPE.RETAIN, patt[1].splice_type) def test_abrogate_a_neg(self): self.setup_by_strand(STRAND.NEG) self.ex2.start_splice_site.intact = False patt = sorted(self.pre_transcript.generate_splicing_patterns()) self.assertEqual(2, len(patt)) self.assertEqual( [ self.ex1.end, self.ex3.start, self.ex3.end, self.ex4.start, self.ex4.end, self.ex5.start, self.ex5.end, self.ex6.start ], sorted([s.pos for s in patt[0]]) ) self.assertEqual(SPLICE_TYPE.SKIP, patt[0].splice_type) self.assertEqual( [ self.ex2.end, self.ex3.start, self.ex3.end, self.ex4.start, self.ex4.end, self.ex5.start, self.ex5.end, self.ex6.start ], sorted([s.pos for s in patt[1]]) ) self.assertEqual(SPLICE_TYPE.RETAIN, patt[1].splice_type) def test_abrogate_a_last_exon(self): self.ex6.start_splice_site.intact = False patt = self.pre_transcript.generate_splicing_patterns() self.assertEqual(1, len(patt)) self.assertEqual( [ self.ex1.end, self.ex2.start, self.ex2.end, self.ex3.start, self.ex3.end, self.ex4.start, self.ex4.end, self.ex5.start ], [s.pos for s in patt[0]] ) self.assertEqual(SPLICE_TYPE.RETAIN, patt[0].splice_type) def test_abrogate_d_first_exon(self): self.ex1.end_splice_site.intact = False patt = self.pre_transcript.generate_splicing_patterns() self.assertEqual(1, len(patt)) self.assertEqual( [ self.ex2.end, self.ex3.start, self.ex3.end, self.ex4.start, self.ex4.end, self.ex5.start, self.ex5.end, self.ex6.start ], [s.pos for s in patt[0]] ) self.assertEqual(SPLICE_TYPE.RETAIN, patt[0].splice_type) def test_abrogate_ad(self): self.ex2.start_splice_site.intact = False patt = self.pre_transcript.generate_splicing_patterns() self.assertEqual(2, len(patt)) self.assertEqual( [ self.ex1.end, self.ex3.start, self.ex3.end, self.ex4.start, self.ex4.end, self.ex5.start, self.ex5.end, self.ex6.start ], [s.pos for s in patt[0]] ) self.assertEqual(SPLICE_TYPE.SKIP, patt[0].splice_type) self.assertEqual( [ self.ex2.end, self.ex3.start, self.ex3.end, self.ex4.start, self.ex4.end, self.ex5.start, self.ex5.end, self.ex6.start ], [s.pos for s in patt[1]] ) self.assertEqual(SPLICE_TYPE.RETAIN, patt[1].splice_type) def test_abrogate_da(self): self.ex2.end_splice_site.intact = False self.ex3.start_splice_site.intact = False patt = self.pre_transcript.generate_splicing_patterns() self.assertEqual(1, len(patt)) self.assertEqual( [ self.ex1.end, self.ex2.start, self.ex3.end, self.ex4.start, self.ex4.end, self.ex5.start, self.ex5.end, self.ex6.start ], [s.pos for s in patt[0]] ) self.assertEqual(SPLICE_TYPE.RETAIN, patt[0].splice_type) def test_multiple_exons_or_multiple_introns_abrogate_ada(self): self.ex2.start_splice_site.intact = False self.ex2.end_splice_site.intact = False self.ex3.start_splice_site.intact = False patt = self.pre_transcript.generate_splicing_patterns() self.assertEqual(2, len(patt)) self.assertEqual( [ self.ex1.end, self.ex4.start, self.ex4.end, self.ex5.start, self.ex5.end, self.ex6.start ], [s.pos for s in patt[0]] ) self.assertEqual(SPLICE_TYPE.MULTI_SKIP, patt[0].splice_type) self.assertEqual( [ self.ex3.end, self.ex4.start, self.ex4.end, self.ex5.start, self.ex5.end, self.ex6.start ], [s.pos for s in patt[1]] ) self.assertEqual(SPLICE_TYPE.MULTI_RETAIN, patt[1].splice_type) def test_multiple_exons_or_multiple_introns_abrogate_dad(self): self.ex2.end_splice_site.intact = False self.ex3.start_splice_site.intact = False self.ex3.end_splice_site.intact = False patt = self.pre_transcript.generate_splicing_patterns() self.assertEqual(2, len(patt)) self.assertEqual( [ self.ex1.end, self.ex2.start, self.ex4.end, self.ex5.start, self.ex5.end, self.ex6.start ], [s.pos for s in patt[0]] ) self.assertEqual(SPLICE_TYPE.MULTI_RETAIN, patt[0].splice_type) self.assertEqual( [ self.ex1.end, self.ex4.start, self.ex4.end, self.ex5.start, self.ex5.end, self.ex6.start ], [s.pos for s in patt[1]] ) self.assertEqual(SPLICE_TYPE.MULTI_SKIP, patt[1].splice_type) def test_complex(self): self.ex2.end_splice_site.intact = False self.ex4.end_splice_site.intact = False patt = self.pre_transcript.generate_splicing_patterns() self.assertEqual(4, len(patt)) self.assertTrue(SPLICE_TYPE.COMPLEX in [p.splice_type for p in patt])
def negative_transcript(): return PreTranscript([(101, 200), (301, 400), (501, 600)], strand=STRAND.NEG)
class TestTranscriptomeEvidenceWindow(unittest.TestCase): def setUp(self): gene = Gene('1', 1, 9999, name='KRAS', strand=STRAND.POS) self.pre_transcript = PreTranscript(gene=gene, exons=[(1001, 1100), (1401, 1500), (1701, 1750), (3001, 4000)]) gene.unspliced_transcripts.append(self.pre_transcript) for spl in self.pre_transcript.generate_splicing_patterns(): self.pre_transcript.transcripts.append( Transcript(self.pre_transcript, spl)) self.annotations = {gene.chr: [gene]} self.genome_evidence = MockObject(annotations={}, read_length=100, max_expected_fragment_size=550, call_error=11) self.trans_evidence = MockObject( annotations={}, read_length=100, max_expected_fragment_size=550, call_error=11, overlapping_transcripts={self.pre_transcript}, ) setattr( self.trans_evidence, '_select_transcripts', lambda *pos: self.trans_evidence.overlapping_transcripts, ) setattr( self.trans_evidence, 'traverse', partial(TranscriptomeEvidence.traverse, self.trans_evidence), ) def transcriptome_window(self, breakpoint, transcripts=None): if transcripts: self.trans_evidence.overlapping_transcripts.update(transcripts) return TranscriptomeEvidence.generate_window(self.trans_evidence, breakpoint) def genome_window(self, breakpoint): return GenomeEvidence.generate_window(self.genome_evidence, breakpoint) def test_before_start(self): b = Breakpoint(chr='1', start=100, orient=ORIENT.RIGHT) self.assertEqual(self.genome_window(b), self.transcriptome_window(b)) b = Breakpoint(chr='1', start=500, orient=ORIENT.RIGHT) self.assertEqual(self.genome_window(b), self.transcriptome_window(b)) def test_after_end(self): b = Breakpoint(chr='1', start=6000, orient=ORIENT.RIGHT) self.assertEqual(self.genome_window(b), self.transcriptome_window(b)) def test_exonic_long_exon(self): b = Breakpoint(chr='1', start=3200, orient=ORIENT.RIGHT) self.assertEqual(self.genome_window(b), self.transcriptome_window(b)) def test_intronic_long_exon(self): b = Breakpoint(chr='1', start=2970, orient=ORIENT.RIGHT) self.assertEqual(self.genome_window(b), self.transcriptome_window(b)) def test_intronic_long_intron(self): b = Breakpoint(chr='1', start=1800, orient=ORIENT.RIGHT) print(self.genome_window(b)) self.assertEqual(Interval(1490, 2360), self.transcriptome_window(b)) def test_intronic_short_exon_right(self): b = Breakpoint(chr='1', start=1690, orient=ORIENT.RIGHT) print(self.genome_window(b)) self.assertEqual(Interval(1580, 3500), self.transcriptome_window(b)) def test_intronic_short_exon_left(self): b = Breakpoint(chr='1', start=2200, orient=ORIENT.LEFT) self.assertEqual(Interval(1440, 2310), self.transcriptome_window(b)) def test_multiple_transcripts(self): # [(1001, 1100), (1401, 1500), (1701, 1750), (3001, 4000)]) b = Breakpoint(chr='1', start=1150, orient=ORIENT.RIGHT) gene = self.annotations['1'][0] t2 = PreTranscript(gene=gene, exons=[(1001, 1100), (1200, 1300), (2100, 2200)]) for patt in t2.generate_splicing_patterns(): t2.transcripts.append(Transcript(t2, patt)) gene.transcripts.append(t2) # 989 - 2561 # 989 - 3411 self.assertEqual( Interval(1040, 3160), self.transcriptome_window(b, [self.pre_transcript, t2])) def test_many_small_exons(self): g = Gene('fake', 17271277, 17279592, strand='+') pre_transcript = PreTranscript( gene=g, exons=[ (17271277, 17271984), (17272649, 17272709), (17275586, 17275681), (17275769, 17275930), (17276692, 17276817), (17277168, 17277388), # 220 (17277845, 17277888), # 44 (17278293, 17278378), # 86 (17279229, 17279592), # 364 ], ) g.transcripts.append(pre_transcript) for patt in pre_transcript.generate_splicing_patterns(): pre_transcript.transcripts.append(Transcript(pre_transcript, patt)) b = Breakpoint(chr='fake', start=17279591, orient=ORIENT.LEFT) self.assertEqual(Interval(17277321, 17279701), self.transcriptome_window(b, [pre_transcript]))
def positive_transcript(): return PreTranscript([(101, 200), (301, 400), (501, 600)], strand=STRAND.POS)
def unspliced_transcript1(): return PreTranscript(exons=[(1, 100), (301, 400), (501, 600)], strand=STRAND.POS, name='t1')