def setUp(self):
        self.transcript = PreTranscript([(1001, 1100), (1301, 1400),
                                         (1701, 1800)],
                                        strand=STRAND.NEG)
        for patt in self.transcript.generate_splicing_patterns():
            self.transcript.transcripts.append(
                Transcript(self.transcript, patt))

        self.trans_evidence = MockObject(
            annotations={},
            read_length=100,
            max_expected_fragment_size=550,
            call_error=11,
            overlapping_transcripts={self.transcript},
        )
        setattr(
            self.trans_evidence,
            '_select_transcripts',
            lambda *pos: self.trans_evidence.overlapping_transcripts,
        )
        setattr(
            self.trans_evidence,
            'traverse',
            partial(TranscriptomeEvidence.traverse, self.trans_evidence),
        )
Example #2
0
def build_transcript(gene,
                     exons,
                     cds_start,
                     cds_end,
                     domains,
                     strand=None,
                     is_best_transcript=False,
                     name=None):
    pre_transcript = PreTranscript(
        exons,
        gene=gene,
        strand=strand if strand is not None else gene.get_strand(),
        is_best_transcript=is_best_transcript,
        name=name)
    if gene is not None:
        gene.unspliced_transcripts.append(pre_transcript)

    for spl in pre_transcript.generate_splicing_patterns():
        t = Transcript(pre_transcript, spl)
        pre_transcript.spliced_transcripts.append(t)

        tx = Translation(cds_start, cds_end, t, domains=domains)
        t.translations.append(tx)

    return pre_transcript
Example #3
0
 def test_shift_overaligned(self):
     # qwertyuiopas---kkkkk------dfghjklzxcvbnm
     # ..........      ................
     gene = Gene('1', 1, 1000, strand='+')
     transcript = PreTranscript(exons=[(1, 12), (20, 28)], gene=gene, strand='+')
     for spl_patt in transcript.generate_splicing_patterns():
         transcript.transcripts.append(Transcript(transcript, spl_patt))
     gene.transcripts.append(transcript)
     read = SamRead(
         reference_name='1',
         reference_start=0,
         cigar=_cigar.convert_string_to_cigar('14=7D12='),
         query_sequence='qwertyuiopasdfghjklzxcvbnm',
     )
     evidence = TranscriptomeEvidence(
         annotations={},
         reference_genome={'1': MockObject(seq='qwertyuiopasdfkkkkkdfghjklzxcvbnm')},
         bam_cache=MockObject(get_read_reference_name=lambda r: r.reference_name),
         break1=Breakpoint('1', 1, orient='L', strand='+'),
         break2=Breakpoint('1', 10, orient='R', strand='+'),
         read_length=75,
         stdev_fragment_size=75,
         median_fragment_size=220,
     )
     evidence.overlapping_transcripts.add(transcript)
     new_read = evidence.standardize_read(read)
     assert new_read.cigar == _cigar.convert_string_to_cigar('12=7N14=')
Example #4
0
    def test_net_zero(self):
        transcript = PreTranscript([(1001, 1100), (1301, 1400), (1701, 1800)], strand=STRAND.POS)
        for patt in transcript.generate_splicing_patterns():
            transcript.transcripts.append(Transcript(transcript, patt))
        trans_evidence = MockObject(
            annotations={},
            read_length=100,
            max_expected_fragment_size=550,
            call_error=11,
            overlapping_transcripts={transcript},
        )
        setattr(
            trans_evidence,
            '_select_transcripts',
            lambda *pos: trans_evidence.overlapping_transcripts,
        )
        setattr(
            trans_evidence,
            'distance',
            partial(TranscriptomeEvidence.distance, trans_evidence),
        )

        bpp = BreakpointPair(
            Breakpoint('1', 1099, orient=ORIENT.LEFT),
            Breakpoint('1', 1302, orient=ORIENT.RIGHT),
            untemplated_seq='TT',
        )
        dist = partial(TranscriptomeEvidence.distance, trans_evidence)
        assert bpp.net_size() == Interval(-200)
        assert bpp.net_size(dist) == Interval(0)
 def setUp(self):
     gene = Gene('1', 1, 9999, name='KRAS', strand=STRAND.POS)
     self.pre_transcript = PreTranscript(gene=gene,
                                         exons=[(1001, 1100), (1401, 1500),
                                                (1701, 1750), (3001, 4000)])
     gene.unspliced_transcripts.append(self.pre_transcript)
     for spl in self.pre_transcript.generate_splicing_patterns():
         self.pre_transcript.transcripts.append(
             Transcript(self.pre_transcript, spl))
     self.annotations = {gene.chr: [gene]}
     self.genome_evidence = MockObject(annotations={},
                                       read_length=100,
                                       max_expected_fragment_size=550,
                                       call_error=11)
     self.trans_evidence = MockObject(
         annotations={},
         read_length=100,
         max_expected_fragment_size=550,
         call_error=11,
         overlapping_transcripts={self.pre_transcript},
     )
     setattr(
         self.trans_evidence,
         '_select_transcripts',
         lambda *pos: self.trans_evidence.overlapping_transcripts,
     )
     setattr(
         self.trans_evidence,
         'traverse',
         partial(TranscriptomeEvidence.traverse, self.trans_evidence),
     )
Example #6
0
 def test_empty_intron(self, distance_setup):
     t2 = PreTranscript([(1001, 1100), (1501, 1600), (2001, 2200), (2201, 2300)], strand='+')
     for patt in t2.generate_splicing_patterns():
         t2.transcripts.append(Transcript(t2, patt))
     print(t2)
     print(distance_setup.trans_evidence.overlapping_transcripts)
     distance_setup.trans_evidence.overlapping_transcripts.add(t2)
     dist = distance_setup.trans_evidence.distance(1001, 2301)
     assert dist == Interval(400, 400)
Example #7
0
 def test_multiple_transcripts(self, trans_window_setup):
     #  [(1001, 1100), (1401, 1500), (1701, 1750), (3001, 4000)])
     b = Breakpoint(chr='1', start=1150, orient=ORIENT.RIGHT)
     gene = trans_window_setup.annotations['1'][0]
     t2 = PreTranscript(gene=gene, exons=[(1001, 1100), (1200, 1300), (2100, 2200)])
     for patt in t2.generate_splicing_patterns():
         t2.transcripts.append(Transcript(t2, patt))
     gene.transcripts.append(t2)
     # 989 - 2561
     # 989 - 3411
     assert transcriptome_window(
         trans_window_setup.trans_evidence, b, [trans_window_setup.pre_transcript, t2]
     ) == Interval(1040, 3160)
Example #8
0
 def setup_by_strand(self, strand):
     self.ex1 = Exon(100, 199, strand=strand)  # C
     self.ex2 = Exon(500, 599, strand=strand)  # G
     self.ex3 = Exon(1200, 1299, strand=strand)  # T
     self.ex4 = Exon(1500, 1599, strand=strand)  # C
     self.ex5 = Exon(1700, 1799, strand=strand)  # G
     self.ex6 = Exon(2000, 2099, strand=strand)  # C
     # introns: 99, 300, 600, 200, 100, ...
     reference_sequence = 'a' * 99 + 'C' * 100 + 'a' * 300 + 'G' * 100
     reference_sequence += 'a' * 600 + 'T' * 100 + 'a' * 200 + 'C' * 100
     reference_sequence += 'a' * 100 + 'G' * 100 + 'a' * 200 + 'C' * 100
     self.reference_sequence = reference_sequence
     self.pre_transcript = PreTranscript(exons=[self.ex1, self.ex2, self.ex3, self.ex4, self.ex5, self.ex6], strand=strand)
Example #9
0
def trans_window_setup():
    n = argparse.Namespace()
    gene = Gene('1', 1, 9999, name='KRAS', strand=STRAND.POS)
    n.pre_transcript = PreTranscript(
        gene=gene, exons=[(1001, 1100), (1401, 1500), (1701, 1750), (3001, 4000)]
    )
    gene.unspliced_transcripts.append(n.pre_transcript)
    for spl in n.pre_transcript.generate_splicing_patterns():
        n.pre_transcript.transcripts.append(Transcript(n.pre_transcript, spl))
    n.annotations = {gene.chr: [gene]}
    n.genome_evidence = MockObject(
        annotations={},
        read_length=100,
        max_expected_fragment_size=550,
        config={**DEFAULTS, 'validate.call_error': 11},
    )
    n.trans_evidence = MockObject(
        annotations={},
        read_length=100,
        max_expected_fragment_size=550,
        overlapping_transcripts={n.pre_transcript},
        config={**DEFAULTS, 'validate.call_error': 11},
    )
    setattr(
        n.trans_evidence,
        '_select_transcripts',
        lambda *pos: n.trans_evidence.overlapping_transcripts,
    )
    setattr(
        n.trans_evidence,
        'traverse',
        partial(TranscriptomeEvidence.traverse, n.trans_evidence),
    )
    return n
Example #10
0
def distance_setup():
    n = argparse.Namespace()
    n.transcript = PreTranscript(
        [(1001, 1100), (1501, 1600), (2001, 2100), (2201, 2300)], strand='+'
    )
    for patt in n.transcript.generate_splicing_patterns():
        n.transcript.transcripts.append(Transcript(n.transcript, patt))
    n.trans_evidence = MockObject(
        annotations={},
        read_length=100,
        max_expected_fragment_size=550,
        call_error=11,
        overlapping_transcripts={n.transcript},
    )
    setattr(
        n.trans_evidence,
        '_select_transcripts',
        lambda *pos: n.trans_evidence.overlapping_transcripts,
    )
    setattr(
        n.trans_evidence,
        'distance',
        partial(TranscriptomeEvidence.distance, n.trans_evidence),
    )
    return n
Example #11
0
class TestNetSizeTrans(unittest.TestCase):
    def setUp(self):
        self.transcript = PreTranscript([(1001, 1100), (1301, 1400),
                                         (1701, 1800)],
                                        strand=STRAND.POS)
        for patt in self.transcript.generate_splicing_patterns():
            self.transcript.transcripts.append(
                Transcript(self.transcript, patt))
        self.trans_evidence = MockObject(
            annotations={},
            read_length=100,
            max_expected_fragment_size=550,
            call_error=11,
            overlapping_transcripts={self.transcript})
        setattr(self.trans_evidence, '_select_transcripts',
                lambda *pos: self.trans_evidence.overlapping_transcripts)
        setattr(self.trans_evidence, 'distance',
                partial(TranscriptomeEvidence.distance, self.trans_evidence))

    def test_net_zero(self):
        bpp = BreakpointPair(Breakpoint('1', 1099, orient=ORIENT.LEFT),
                             Breakpoint('1', 1302, orient=ORIENT.RIGHT),
                             untemplated_seq='TT')
        dist = partial(TranscriptomeEvidence.distance, self.trans_evidence)
        self.assertEqual(Interval(-200), bpp.net_size())
        self.assertEqual(Interval(0), bpp.net_size(dist))
Example #12
0
 def setUp(self):
     self.transcript = PreTranscript([(1001, 1100), (1501, 1600),
                                      (2001, 2100), (2201, 2300)],
                                     strand='+')
     for patt in self.transcript.generate_splicing_patterns():
         self.transcript.transcripts.append(
             Transcript(self.transcript, patt))
     self.trans_evidence = MockObject(
         annotations={},
         read_length=100,
         max_expected_fragment_size=550,
         call_error=11,
         overlapping_transcripts={self.transcript})
     setattr(self.trans_evidence, '_select_transcripts',
             lambda *pos: self.trans_evidence.overlapping_transcripts)
     setattr(self.trans_evidence, 'distance',
             partial(TranscriptomeEvidence.distance, self.trans_evidence))
Example #13
0
class TestDistance(unittest.TestCase):
    def setUp(self):
        self.transcript = PreTranscript([(1001, 1100), (1501, 1600),
                                         (2001, 2100), (2201, 2300)],
                                        strand='+')
        for patt in self.transcript.generate_splicing_patterns():
            self.transcript.transcripts.append(
                Transcript(self.transcript, patt))
        self.trans_evidence = MockObject(
            annotations={},
            read_length=100,
            max_expected_fragment_size=550,
            call_error=11,
            overlapping_transcripts={self.transcript})
        setattr(self.trans_evidence, '_select_transcripts',
                lambda *pos: self.trans_evidence.overlapping_transcripts)
        setattr(self.trans_evidence, 'distance',
                partial(TranscriptomeEvidence.distance, self.trans_evidence))

    def test_exonic(self):
        self.assertEqual(Interval(149),
                         self.trans_evidence.distance(1001, 1550))

    def test_intergenic_exonic(self):
        dist = self.trans_evidence.distance(101, 1550)
        self.assertEqual(Interval(1049, 1049), dist)

    def test_intergenic_intergenic(self):
        dist = self.trans_evidence.distance(101, 300)
        self.assertEqual(Interval(199), dist)

    def test_aligned_intronic(self):
        dist = self.trans_evidence.distance(1102, 1499)
        self.assertEqual(Interval(5), dist)

    def test_indel_at_exon_boundary(self):
        self.assertEqual(Interval(2), self.trans_evidence.distance(1101, 1501))

    def test_no_annotations(self):
        dist = self.trans_evidence.distance(101, 300, [])
        self.assertEqual(Interval(199), dist)

    def test_intergenic_intronic(self):
        dist = self.trans_evidence.distance(101, 1400)
        self.assertEqual(Interval(1101), dist)

    def test_empty_intron(self):
        t2 = PreTranscript([(1001, 1100), (1501, 1600), (2001, 2200),
                            (2201, 2300)],
                           strand='+')
        for patt in t2.generate_splicing_patterns():
            t2.transcripts.append(Transcript(t2, patt))
        print(t2)
        print(self.trans_evidence.overlapping_transcripts)
        self.trans_evidence.overlapping_transcripts.add(t2)
        dist = self.trans_evidence.distance(1001, 2301)
        self.assertEqual(Interval(400, 400), dist)
Example #14
0
 def test_many_small_exons(self):
     g = Gene('fake', 17271277, 17279592, strand='+')
     pre_transcript = PreTranscript(
         gene=g,
         exons=[
             (17271277, 17271984),
             (17272649, 17272709),
             (17275586, 17275681),
             (17275769, 17275930),
             (17276692, 17276817),
             (17277168, 17277388),  # 220
             (17277845, 17277888),  # 44
             (17278293, 17278378),  # 86
             (17279229, 17279592)  # 364
         ])
     g.transcripts.append(pre_transcript)
     for patt in pre_transcript.generate_splicing_patterns():
         pre_transcript.transcripts.append(Transcript(pre_transcript, patt))
     b = Breakpoint(chr='fake', start=17279591, orient=ORIENT.LEFT)
     self.assertEqual(Interval(17277321, 17279701),
                      self.transcriptome_window(b, [pre_transcript]))
Example #15
0
    def setUp(self):
        self.gev1 = BreakpointPair(
            Breakpoint('1', 1),
            Breakpoint('1', 10),
            opposing_strands=True,
            data={
                COLUMNS.event_type: SVTYPE.DEL,
                COLUMNS.call_method: CALL_METHOD.CONTIG,
                COLUMNS.fusion_sequence_fasta_id: None,
                COLUMNS.protocol: PROTOCOL.GENOME,
            },
        )
        self.gev2 = BreakpointPair(
            Breakpoint('1', 1),
            Breakpoint('1', 10),
            opposing_strands=True,
            data={
                COLUMNS.event_type: SVTYPE.DEL,
                COLUMNS.call_method: CALL_METHOD.CONTIG,
                COLUMNS.fusion_sequence_fasta_id: None,
                COLUMNS.protocol: PROTOCOL.GENOME,
            },
        )

        self.ust1 = PreTranscript(exons=[(1, 100), (301, 400), (501, 600)],
                                  strand=STRAND.POS,
                                  name='t1')
        self.ust2 = PreTranscript(exons=[(1001, 1100), (1301, 1400),
                                         (1501, 1600)],
                                  strand=STRAND.POS,
                                  name='t2')
        self.distances = {
            CALL_METHOD.CONTIG: 0,
            CALL_METHOD.FLANK: 0,
            CALL_METHOD.SPLIT: 10
        }
        self.TRANSCRIPTS = {
            self.ust1.name: self.ust1,
            self.ust2.name: self.ust2
        }
Example #16
0
def pos_splicing_pattern():
    n = argparse.Namespace()
    n.ex1 = Exon(100, 199, strand=STRAND.POS)  # C
    n.ex2 = Exon(500, 599, strand=STRAND.POS)  # G
    n.ex3 = Exon(1200, 1299, strand=STRAND.POS)  # T
    n.ex4 = Exon(1500, 1599, strand=STRAND.POS)  # C
    n.ex5 = Exon(1700, 1799, strand=STRAND.POS)  # G
    n.ex6 = Exon(2000, 2099, strand=STRAND.POS)  # C
    # introns: 99, 300, 600, 200, 100, ...
    reference_sequence = 'a' * 99 + 'C' * 100 + 'a' * 300 + 'G' * 100
    reference_sequence += 'a' * 600 + 'T' * 100 + 'a' * 200 + 'C' * 100
    reference_sequence += 'a' * 100 + 'G' * 100 + 'a' * 200 + 'C' * 100
    n.reference_sequence = reference_sequence
    n.pre_transcript = PreTranscript(
        exons=[n.ex1, n.ex2, n.ex3, n.ex4, n.ex5, n.ex6], strand=STRAND.POS)
    return n
Example #17
0
def tranverse_trans_rev_setup():
    n = argparse.Namespace()
    n.transcript = PreTranscript([(1001, 1100), (1301, 1400), (1701, 1800)], strand=STRAND.NEG)
    for patt in n.transcript.generate_splicing_patterns():
        n.transcript.transcripts.append(Transcript(n.transcript, patt))

    n.trans_evidence = MockObject(
        annotations={},
        read_length=100,
        max_expected_fragment_size=550,
        call_error=11,
        overlapping_transcripts={n.transcript},
    )
    setattr(
        n.trans_evidence,
        '_select_transcripts',
        lambda *pos: n.trans_evidence.overlapping_transcripts,
    )
    setattr(
        n.trans_evidence,
        'traverse',
        partial(TranscriptomeEvidence.traverse, n.trans_evidence),
    )
    return n
class TestTraverseTransRev(unittest.TestCase):
    def setUp(self):
        self.transcript = PreTranscript([(1001, 1100), (1301, 1400),
                                         (1701, 1800)],
                                        strand=STRAND.NEG)
        for patt in self.transcript.generate_splicing_patterns():
            self.transcript.transcripts.append(
                Transcript(self.transcript, patt))

        self.trans_evidence = MockObject(
            annotations={},
            read_length=100,
            max_expected_fragment_size=550,
            call_error=11,
            overlapping_transcripts={self.transcript},
        )
        setattr(
            self.trans_evidence,
            '_select_transcripts',
            lambda *pos: self.trans_evidence.overlapping_transcripts,
        )
        setattr(
            self.trans_evidence,
            'traverse',
            partial(TranscriptomeEvidence.traverse, self.trans_evidence),
        )

    def test_left_before_transcript(self):
        gpos = self.trans_evidence.traverse(900, 500 - 1, ORIENT.LEFT)
        self.assertEqual(Interval(401), gpos)
        self.assertEqual(gpos,
                         GenomeEvidence.traverse(900, 500 - 1, ORIENT.LEFT))

    def test_left_after_transcript(self):
        gpos = self.trans_evidence.traverse(2200, 100, ORIENT.LEFT)
        self.assertEqual(gpos, GenomeEvidence.traverse(2200, 100, ORIENT.LEFT))
        self.assertEqual(Interval(2100), gpos)

    def test_left_after_transcript2(self):
        gpos = self.trans_evidence.traverse(1900, 500 - 1, ORIENT.LEFT)
        self.assertEqual(Interval(901), gpos)

    def test_left_within_transcript_exonic(self):
        gpos = self.trans_evidence.traverse(1750, 200 - 1, ORIENT.LEFT)
        self.assertEqual(Interval(1051), gpos)

    def test_left_within_exon(self):
        gpos = self.trans_evidence.traverse(1750, 20 - 1, ORIENT.LEFT)
        self.assertEqual(1731, gpos.start)
        self.assertEqual(1731, gpos.end)

    def test_left_within_transcript_intronic(self):
        gpos = self.trans_evidence.traverse(1600, 150 - 1, ORIENT.LEFT)
        self.assertEqual(Interval(1451), gpos)

    def test_right_before_transcript(self):
        gpos = self.trans_evidence.traverse(500, 100 - 1, ORIENT.RIGHT)
        self.assertEqual(Interval(599), gpos)

    def test_right_before_transcript2(self):
        gpos = self.trans_evidence.traverse(901, 500 - 1, ORIENT.RIGHT)
        self.assertEqual(Interval(1900), gpos)

    def test_right_after_transcript(self):
        gpos = self.trans_evidence.traverse(2201, 100 - 1, ORIENT.RIGHT)
        self.assertEqual(Interval(2300), gpos)

    def test_right_within_transcript(self):
        gpos = self.trans_evidence.traverse(1351, 100 - 1, ORIENT.RIGHT)
        self.assertEqual(Interval(1750), gpos)

    def test_right_within_exon(self):
        gpos = self.trans_evidence.traverse(1351, 10 - 1, ORIENT.RIGHT)
        self.assertEqual(Interval(1360), gpos)
Example #19
0
 def setUp(self):
     self.pre_transcript = PreTranscript([(101, 200), (301, 400),
                                          (501, 600)],
                                         strand=STRAND.POS)
     self.n_ust = PreTranscript([(101, 200), (301, 400), (501, 600)],
                                strand=STRAND.NEG)
Example #20
0
 def test_single_exon(self):
     t = PreTranscript([(3, 4)], strand=STRAND.POS)
     patt = t.generate_splicing_patterns()
     assert len(patt) == 1
     assert len(patt[0]) == 0
     assert patt[0].splice_type == SPLICE_TYPE.NORMAL
Example #21
0
def unspliced_transcript2():
    return PreTranscript(
        exons=[(1001, 1100), (1301, 1400), (1501, 1600)], strand=STRAND.POS, name='t2'
    )
Example #22
0
 def test_single_exon(self):
     t = PreTranscript([(3, 4)], strand=STRAND.POS)
     patt = t.generate_splicing_patterns()
     self.assertEqual(1, len(patt))
     self.assertEqual(0, len(patt[0]))
     self.assertEqual(SPLICE_TYPE.NORMAL, patt[0].splice_type)
Example #23
0
class TestSplicingPatterns(unittest.TestCase):

    def setUp(self):
        self.setup_by_strand(STRAND.POS)

    def setup_by_strand(self, strand):
        self.ex1 = Exon(100, 199, strand=strand)  # C
        self.ex2 = Exon(500, 599, strand=strand)  # G
        self.ex3 = Exon(1200, 1299, strand=strand)  # T
        self.ex4 = Exon(1500, 1599, strand=strand)  # C
        self.ex5 = Exon(1700, 1799, strand=strand)  # G
        self.ex6 = Exon(2000, 2099, strand=strand)  # C
        # introns: 99, 300, 600, 200, 100, ...
        reference_sequence = 'a' * 99 + 'C' * 100 + 'a' * 300 + 'G' * 100
        reference_sequence += 'a' * 600 + 'T' * 100 + 'a' * 200 + 'C' * 100
        reference_sequence += 'a' * 100 + 'G' * 100 + 'a' * 200 + 'C' * 100
        self.reference_sequence = reference_sequence
        self.pre_transcript = PreTranscript(exons=[self.ex1, self.ex2, self.ex3, self.ex4, self.ex5, self.ex6], strand=strand)

    def test_single_exon(self):
        t = PreTranscript([(3, 4)], strand=STRAND.POS)
        patt = t.generate_splicing_patterns()
        self.assertEqual(1, len(patt))
        self.assertEqual(0, len(patt[0]))
        self.assertEqual(SPLICE_TYPE.NORMAL, patt[0].splice_type)

    def test_normal_pattern_pos(self):
        patt = self.pre_transcript.generate_splicing_patterns()
        self.assertEqual(1, len(patt))
        self.assertEqual(
            [
                self.ex1.end, self.ex2.start,
                self.ex2.end, self.ex3.start,
                self.ex3.end, self.ex4.start,
                self.ex4.end, self.ex5.start,
                self.ex5.end, self.ex6.start
            ],
            [s.pos for s in patt[0]]
        )
        self.assertEqual(SPLICE_TYPE.NORMAL, patt[0].splice_type)

    def test_normal_pattern_neg(self):
        self.setup_by_strand(STRAND.NEG)
        self.assertTrue(self.pre_transcript.is_reverse)
        patt = self.pre_transcript.generate_splicing_patterns()
        self.assertEqual(1, len(patt))
        self.assertEqual(
            [
                self.ex1.end, self.ex2.start,
                self.ex2.end, self.ex3.start,
                self.ex3.end, self.ex4.start,
                self.ex4.end, self.ex5.start,
                self.ex5.end, self.ex6.start
            ],
            sorted([s.pos for s in patt[0]])
        )
        self.assertEqual(SPLICE_TYPE.NORMAL, patt[0].splice_type)

    def test_abrogate_a_pos(self):
        self.ex2.start_splice_site.intact = False
        patt = self.pre_transcript.generate_splicing_patterns()
        self.assertEqual(2, len(patt))

        self.assertEqual(
            [
                self.ex1.end, self.ex3.start,
                self.ex3.end, self.ex4.start,
                self.ex4.end, self.ex5.start,
                self.ex5.end, self.ex6.start
            ],
            [s.pos for s in patt[0]]
        )
        self.assertEqual(SPLICE_TYPE.SKIP, patt[0].splice_type)

        self.assertEqual(
            [
                self.ex2.end, self.ex3.start,
                self.ex3.end, self.ex4.start,
                self.ex4.end, self.ex5.start,
                self.ex5.end, self.ex6.start
            ],
            [s.pos for s in patt[1]]
        )
        self.assertEqual(SPLICE_TYPE.RETAIN, patt[1].splice_type)

    def test_abrogate_a_neg(self):
        self.setup_by_strand(STRAND.NEG)
        self.ex2.start_splice_site.intact = False
        patt = sorted(self.pre_transcript.generate_splicing_patterns())
        self.assertEqual(2, len(patt))
        self.assertEqual(
            [
                self.ex1.end, self.ex3.start,
                self.ex3.end, self.ex4.start,
                self.ex4.end, self.ex5.start,
                self.ex5.end, self.ex6.start
            ],
            sorted([s.pos for s in patt[0]])
        )
        self.assertEqual(SPLICE_TYPE.SKIP, patt[0].splice_type)
        self.assertEqual(
            [
                self.ex2.end, self.ex3.start,
                self.ex3.end, self.ex4.start,
                self.ex4.end, self.ex5.start,
                self.ex5.end, self.ex6.start
            ],
            sorted([s.pos for s in patt[1]])
        )
        self.assertEqual(SPLICE_TYPE.RETAIN, patt[1].splice_type)

    def test_abrogate_a_last_exon(self):
        self.ex6.start_splice_site.intact = False
        patt = self.pre_transcript.generate_splicing_patterns()
        self.assertEqual(1, len(patt))
        self.assertEqual(
            [
                self.ex1.end, self.ex2.start,
                self.ex2.end, self.ex3.start,
                self.ex3.end, self.ex4.start,
                self.ex4.end, self.ex5.start
            ],
            [s.pos for s in patt[0]]
        )
        self.assertEqual(SPLICE_TYPE.RETAIN, patt[0].splice_type)

    def test_abrogate_d_first_exon(self):
        self.ex1.end_splice_site.intact = False
        patt = self.pre_transcript.generate_splicing_patterns()
        self.assertEqual(1, len(patt))
        self.assertEqual(
            [
                self.ex2.end, self.ex3.start,
                self.ex3.end, self.ex4.start,
                self.ex4.end, self.ex5.start,
                self.ex5.end, self.ex6.start
            ],
            [s.pos for s in patt[0]]
        )
        self.assertEqual(SPLICE_TYPE.RETAIN, patt[0].splice_type)

    def test_abrogate_ad(self):
        self.ex2.start_splice_site.intact = False
        patt = self.pre_transcript.generate_splicing_patterns()
        self.assertEqual(2, len(patt))
        self.assertEqual(
            [
                self.ex1.end, self.ex3.start,
                self.ex3.end, self.ex4.start,
                self.ex4.end, self.ex5.start,
                self.ex5.end, self.ex6.start
            ],
            [s.pos for s in patt[0]]
        )
        self.assertEqual(SPLICE_TYPE.SKIP, patt[0].splice_type)

        self.assertEqual(
            [
                self.ex2.end, self.ex3.start,
                self.ex3.end, self.ex4.start,
                self.ex4.end, self.ex5.start,
                self.ex5.end, self.ex6.start
            ],
            [s.pos for s in patt[1]]
        )
        self.assertEqual(SPLICE_TYPE.RETAIN, patt[1].splice_type)

    def test_abrogate_da(self):
        self.ex2.end_splice_site.intact = False
        self.ex3.start_splice_site.intact = False
        patt = self.pre_transcript.generate_splicing_patterns()
        self.assertEqual(1, len(patt))
        self.assertEqual(
            [
                self.ex1.end, self.ex2.start,
                self.ex3.end, self.ex4.start,
                self.ex4.end, self.ex5.start,
                self.ex5.end, self.ex6.start
            ],
            [s.pos for s in patt[0]]
        )
        self.assertEqual(SPLICE_TYPE.RETAIN, patt[0].splice_type)

    def test_multiple_exons_or_multiple_introns_abrogate_ada(self):
        self.ex2.start_splice_site.intact = False
        self.ex2.end_splice_site.intact = False
        self.ex3.start_splice_site.intact = False
        patt = self.pre_transcript.generate_splicing_patterns()
        self.assertEqual(2, len(patt))

        self.assertEqual(
            [
                self.ex1.end, self.ex4.start,
                self.ex4.end, self.ex5.start,
                self.ex5.end, self.ex6.start
            ],
            [s.pos for s in patt[0]]
        )
        self.assertEqual(SPLICE_TYPE.MULTI_SKIP, patt[0].splice_type)

        self.assertEqual(
            [
                self.ex3.end, self.ex4.start,
                self.ex4.end, self.ex5.start,
                self.ex5.end, self.ex6.start
            ],
            [s.pos for s in patt[1]]
        )
        self.assertEqual(SPLICE_TYPE.MULTI_RETAIN, patt[1].splice_type)

    def test_multiple_exons_or_multiple_introns_abrogate_dad(self):
        self.ex2.end_splice_site.intact = False
        self.ex3.start_splice_site.intact = False
        self.ex3.end_splice_site.intact = False
        patt = self.pre_transcript.generate_splicing_patterns()
        self.assertEqual(2, len(patt))

        self.assertEqual(
            [
                self.ex1.end, self.ex2.start,
                self.ex4.end, self.ex5.start,
                self.ex5.end, self.ex6.start
            ],
            [s.pos for s in patt[0]]
        )
        self.assertEqual(SPLICE_TYPE.MULTI_RETAIN, patt[0].splice_type)

        self.assertEqual(
            [
                self.ex1.end, self.ex4.start,
                self.ex4.end, self.ex5.start,
                self.ex5.end, self.ex6.start
            ],
            [s.pos for s in patt[1]]
        )
        self.assertEqual(SPLICE_TYPE.MULTI_SKIP, patt[1].splice_type)

    def test_complex(self):
        self.ex2.end_splice_site.intact = False
        self.ex4.end_splice_site.intact = False
        patt = self.pre_transcript.generate_splicing_patterns()
        self.assertEqual(4, len(patt))
        self.assertTrue(SPLICE_TYPE.COMPLEX in [p.splice_type for p in patt])
Example #24
0
def negative_transcript():
    return PreTranscript([(101, 200), (301, 400), (501, 600)], strand=STRAND.NEG)
class TestTranscriptomeEvidenceWindow(unittest.TestCase):
    def setUp(self):
        gene = Gene('1', 1, 9999, name='KRAS', strand=STRAND.POS)
        self.pre_transcript = PreTranscript(gene=gene,
                                            exons=[(1001, 1100), (1401, 1500),
                                                   (1701, 1750), (3001, 4000)])
        gene.unspliced_transcripts.append(self.pre_transcript)
        for spl in self.pre_transcript.generate_splicing_patterns():
            self.pre_transcript.transcripts.append(
                Transcript(self.pre_transcript, spl))
        self.annotations = {gene.chr: [gene]}
        self.genome_evidence = MockObject(annotations={},
                                          read_length=100,
                                          max_expected_fragment_size=550,
                                          call_error=11)
        self.trans_evidence = MockObject(
            annotations={},
            read_length=100,
            max_expected_fragment_size=550,
            call_error=11,
            overlapping_transcripts={self.pre_transcript},
        )
        setattr(
            self.trans_evidence,
            '_select_transcripts',
            lambda *pos: self.trans_evidence.overlapping_transcripts,
        )
        setattr(
            self.trans_evidence,
            'traverse',
            partial(TranscriptomeEvidence.traverse, self.trans_evidence),
        )

    def transcriptome_window(self, breakpoint, transcripts=None):
        if transcripts:
            self.trans_evidence.overlapping_transcripts.update(transcripts)
        return TranscriptomeEvidence.generate_window(self.trans_evidence,
                                                     breakpoint)

    def genome_window(self, breakpoint):
        return GenomeEvidence.generate_window(self.genome_evidence, breakpoint)

    def test_before_start(self):
        b = Breakpoint(chr='1', start=100, orient=ORIENT.RIGHT)
        self.assertEqual(self.genome_window(b), self.transcriptome_window(b))

        b = Breakpoint(chr='1', start=500, orient=ORIENT.RIGHT)
        self.assertEqual(self.genome_window(b), self.transcriptome_window(b))

    def test_after_end(self):
        b = Breakpoint(chr='1', start=6000, orient=ORIENT.RIGHT)
        self.assertEqual(self.genome_window(b), self.transcriptome_window(b))

    def test_exonic_long_exon(self):
        b = Breakpoint(chr='1', start=3200, orient=ORIENT.RIGHT)
        self.assertEqual(self.genome_window(b), self.transcriptome_window(b))

    def test_intronic_long_exon(self):
        b = Breakpoint(chr='1', start=2970, orient=ORIENT.RIGHT)
        self.assertEqual(self.genome_window(b), self.transcriptome_window(b))

    def test_intronic_long_intron(self):
        b = Breakpoint(chr='1', start=1800, orient=ORIENT.RIGHT)
        print(self.genome_window(b))
        self.assertEqual(Interval(1490, 2360), self.transcriptome_window(b))

    def test_intronic_short_exon_right(self):
        b = Breakpoint(chr='1', start=1690, orient=ORIENT.RIGHT)
        print(self.genome_window(b))
        self.assertEqual(Interval(1580, 3500), self.transcriptome_window(b))

    def test_intronic_short_exon_left(self):
        b = Breakpoint(chr='1', start=2200, orient=ORIENT.LEFT)
        self.assertEqual(Interval(1440, 2310), self.transcriptome_window(b))

    def test_multiple_transcripts(self):
        #  [(1001, 1100), (1401, 1500), (1701, 1750), (3001, 4000)])
        b = Breakpoint(chr='1', start=1150, orient=ORIENT.RIGHT)
        gene = self.annotations['1'][0]
        t2 = PreTranscript(gene=gene,
                           exons=[(1001, 1100), (1200, 1300), (2100, 2200)])
        for patt in t2.generate_splicing_patterns():
            t2.transcripts.append(Transcript(t2, patt))
        gene.transcripts.append(t2)
        # 989 - 2561
        # 989 - 3411
        self.assertEqual(
            Interval(1040, 3160),
            self.transcriptome_window(b, [self.pre_transcript, t2]))

    def test_many_small_exons(self):
        g = Gene('fake', 17271277, 17279592, strand='+')
        pre_transcript = PreTranscript(
            gene=g,
            exons=[
                (17271277, 17271984),
                (17272649, 17272709),
                (17275586, 17275681),
                (17275769, 17275930),
                (17276692, 17276817),
                (17277168, 17277388),  # 220
                (17277845, 17277888),  # 44
                (17278293, 17278378),  # 86
                (17279229, 17279592),  # 364
            ],
        )
        g.transcripts.append(pre_transcript)
        for patt in pre_transcript.generate_splicing_patterns():
            pre_transcript.transcripts.append(Transcript(pre_transcript, patt))
        b = Breakpoint(chr='fake', start=17279591, orient=ORIENT.LEFT)
        self.assertEqual(Interval(17277321, 17279701),
                         self.transcriptome_window(b, [pre_transcript]))
Example #26
0
def positive_transcript():
    return PreTranscript([(101, 200), (301, 400), (501, 600)], strand=STRAND.POS)
Example #27
0
def unspliced_transcript1():
    return PreTranscript(exons=[(1, 100), (301, 400), (501, 600)], strand=STRAND.POS, name='t1')