Ejemplo n.º 1
0
    def test_merging_stop_codons_4(self):
        """
        Situation:
            * 1 stop codon split in two exons
        """
        intervals = list_to_intervals([
            # for this test no more than one interval is needed...
            ['1', '.', 'transcript', '20', '70', '.', '+', '.', '.'],
            ['1', '.', 'stop_codon', '40', '40', '.', '+', '.', '.'],
            ['1', '.', 'stop_codon', '60', '61', '.', '+', '.', '.'],
        ])
        exons = list_to_intervals([
            ['1', '.', 'exon', '20', '40', '.', '+', '.', '.'],
            ['1', '.', 'exon', '60', '70', '.', '+', '.', '.'],
        ])
        cdses = list_to_intervals([
            ['1', '.', 'CDS', '30', '39', '.', '+', '.', '.'],
        ])

        expeted_new_cdses = [
            ['1', '.', 'CDS', '30', '40', '.', '+', '.', '.'],
            ['1', '.', 'CDS', '60', '61', '.', '+', '.', '.'],
        ]
        expeted_utrs = [
            ['1', '.', 'UTR5', '20', '29', '.', '+', '.', '.'],
            ['1', '.', 'UTR3', '62', '70', '.', '+', '.', '.'],
        ]
        new_cdses, utrs = segment._get_non_cds_exons(cdses, exons, intervals)
        new_cdses, utrs = intervals_to_list(new_cdses), intervals_to_list(utrs)
        self.assertEqual(expeted_new_cdses, new_cdses)
        self.assertEqual(expeted_utrs, utrs)

        # Negative strand:
        intervals = list_to_intervals([
            # for this test no more than one interval is needed...
            ['1', '.', 'transcript', '20', '80', '.', '-', '.', '.'],
            ['1', '.', 'stop_codon', '39', '40', '.', '-', '.', '.'],
            ['1', '.', 'stop_codon', '60', '60', '.', '-', '.', '.'],
        ])
        exons = list_to_intervals([
            ['1', '.', 'exon', '20', '40', '.', '-', '.', '.'],
            ['1', '.', 'exon', '60', '80', '.', '-', '.', '.'],
        ])
        cdses = list_to_intervals([
            ['1', '.', 'CDS', '61', '65', '.', '-', '.', '.'],
        ])

        expeted_new_cdses = [
            ['1', '.', 'CDS', '60', '65', '.', '-', '.', '.'],
            ['1', '.', 'CDS', '39', '40', '.', '-', '.', '.'],
        ]
        expeted_utrs = [
            ['1', '.', 'UTR3', '20', '38', '.', '-', '.', '.'],
            ['1', '.', 'UTR5', '66', '80', '.', '-', '.', '.'],
        ]
        new_cdses, utrs = segment._get_non_cds_exons(cdses, exons, intervals)
        new_cdses, utrs = intervals_to_list(new_cdses), intervals_to_list(utrs)
        self.assertEqual(expeted_new_cdses, new_cdses)
        self.assertEqual(expeted_utrs, utrs)
Ejemplo n.º 2
0
    def test_1(self):
        """
        Situation:
            * no stop codons
            * 1 "empty" exon before first cds
            * 1 "empty" exon after last cds
            * 1 exons shared by UTR5 and CDS
            * 1 exons shared by UTR3 and CDS
        """
        intervals = list_to_intervals([
            # for this test no more than one interval is needed...
            ['1', '.', 'transcript', '20', '90', '.', '+', '.', '.'],
        ])
        exons = list_to_intervals([
            ['1', '.', 'exon', '20', '30', '.', '+', '.', '.'],
            ['1', '.', 'exon', '40', '50', '.', '+', '.', '.'],
            ['1', '.', 'exon', '60', '70', '.', '+', '.', '.'],
            ['1', '.', 'exon', '80', '90', '.', '+', '.', '.'],
        ])
        cdses = list_to_intervals([
            ['1', '.', 'CDS', '45', '50', '.', '+', '.', '.'],
            ['1', '.', 'CDS', '60', '65', '.', '+', '.', '.'],
        ])

        expeted_new_cdses = [
            ['1', '.', 'CDS', '45', '50', '.', '+', '.', '.'],
            ['1', '.', 'CDS', '60', '65', '.', '+', '.', '.'],
        ]
        expeted_utrs = [
            ['1', '.', 'UTR5', '20', '30', '.', '+', '.', '.'],
            ['1', '.', 'UTR5', '40', '44', '.', '+', '.', '.'],
            ['1', '.', 'UTR3', '66', '70', '.', '+', '.', '.'],
            ['1', '.', 'UTR3', '80', '90', '.', '+', '.', '.'],
        ]
        new_cdses, utrs = segment._get_non_cds_exons(cdses, exons, intervals)
        new_cdses, utrs = intervals_to_list(new_cdses), intervals_to_list(utrs)
        self.assertEqual(expeted_new_cdses, new_cdses)
        self.assertEqual(expeted_utrs, utrs)

        # Also test for negative strand:
        intervals, exons, cdses = map(reverse_strand,
                                      [intervals, exons, cdses])

        expeted_new_cdses = reverse_strand(expeted_new_cdses)
        expeted_utrs = [
            ['1', '.', 'UTR3', '20', '30', '.', '-', '.', '.'],
            ['1', '.', 'UTR3', '40', '44', '.', '-', '.', '.'],
            ['1', '.', 'UTR5', '66', '70', '.', '-', '.', '.'],
            ['1', '.', 'UTR5', '80', '90', '.', '-', '.', '.'],
        ]

        new_cdses, utrs = segment._get_non_cds_exons(cdses, exons, intervals)
        new_cdses, utrs = intervals_to_list(new_cdses), intervals_to_list(utrs)
        self.assertEqual(expeted_new_cdses, new_cdses)
        self.assertEqual(expeted_utrs, utrs)
Ejemplo n.º 3
0
    def test_merging_stop_codons_1(self):
        """
        Situation:
            * stop codon and CDS completely overlap
        """
        intervals = list_to_intervals([
            # for this test no more than is needed...
            ['1', '.', 'transcript', '20', '62', '.', '+', '.', '.'],
            ['1', '.', 'stop_codon', '60', '62', '.', '+', '.', '.'],
        ])
        exons = list_to_intervals([
            ['1', '.', 'exon', '20', '40', '.', '+', '.', '.'],
            ['1', '.', 'exon', '60', '62', '.', '+', '.', '.'],
        ])
        cdses = list_to_intervals([
            ['1', '.', 'CDS', '20', '40', '.', '+', '.', '.'],
            ['1', '.', 'CDS', '60', '62', '.', '+', '.', '.'],
        ])

        expeted_new_cdses = [
            ['1', '.', 'CDS', '20', '40', '.', '+', '.', '.'],
            ['1', '.', 'CDS', '60', '62', '.', '+', '.', '.'],
        ]
        expeted_utrs = []
        new_cdses, utrs = segment._get_non_cds_exons(cdses, exons, intervals)
        new_cdses, utrs = intervals_to_list(new_cdses), intervals_to_list(utrs)
        self.assertEqual(expeted_new_cdses, new_cdses)
        self.assertEqual(expeted_utrs, utrs)

        # Negative strand:
        intervals = list_to_intervals([
            # for this test no more than is needed...
            ['1', '.', 'transcript', '20', '80', '.', '-', '.', '.'],
            ['1', '.', 'stop_codon', '20', '22', '.', '-', '.', '.'],
        ])
        exons = list_to_intervals([
            ['1', '.', 'exon', '20', '22', '.', '+', '.', '.'],
            ['1', '.', 'exon', '60', '80', '.', '+', '.', '.'],
        ])
        cdses = list_to_intervals([
            ['1', '.', 'CDS', '20', '22', '.', '-', '.', '.'],
            ['1', '.', 'CDS', '60', '80', '.', '-', '.', '.'],
        ])

        expeted_new_cdses = [
            ['1', '.', 'CDS', '20', '22', '.', '-', '.', '.'],
            ['1', '.', 'CDS', '60', '80', '.', '-', '.', '.'],
        ]
        expeted_utrs = []
        new_cdses, utrs = segment._get_non_cds_exons(cdses, exons, intervals)
        new_cdses, utrs = intervals_to_list(new_cdses), intervals_to_list(utrs)
        self.assertEqual(expeted_new_cdses, new_cdses)
        self.assertEqual(expeted_utrs, utrs)
Ejemplo n.º 4
0
    def test_merging_stop_codons_3(self):
        """
        Situation:
            * 1 stop codon given on same exon as CDS, bit inside CDS!
        """
        intervals = list_to_intervals([
            # for this test no more than one interval is needed...
            ['1', '.', 'transcript', '60', '70', '.', '+', '.', '.'],
            ['1', '.', 'stop_codon', '63', '65', '.', '+', '.', '.'],
        ])
        exons = list_to_intervals([
            ['1', '.', 'exon', '60', '70', '.', '+', '.', '.'],
        ])
        cdses = list_to_intervals([
            ['1', '.', 'CDS', '60', '65', '.', '+', '.', '.'],
        ])

        expeted_new_cdses = [
            ['1', '.', 'CDS', '60', '65', '.', '+', '.', '.'],
        ]
        expeted_utrs = [
            ['1', '.', 'UTR3', '66', '70', '.', '+', '.', '.'],
        ]
        new_cdses, utrs = segment._get_non_cds_exons(cdses, exons, intervals)
        new_cdses, utrs = intervals_to_list(new_cdses), intervals_to_list(utrs)
        self.assertEqual(expeted_new_cdses, new_cdses)
        self.assertEqual(expeted_utrs, utrs)

        # Negative strand:
        intervals = list_to_intervals([
            # for this test no more than one interval is needed...
            ['1', '.', 'transcript', '60', '70', '.', '-', '.', '.'],
            ['1', '.', 'stop_codon', '65', '67', '.', '-', '.', '.'],
        ])
        exons = list_to_intervals([
            ['1', '.', 'exon', '60', '70', '.', '-', '.', '.'],
        ])
        cdses = list_to_intervals([
            ['1', '.', 'CDS', '65', '70', '.', '-', '.', '.'],
        ])

        expeted_new_cdses = [
            ['1', '.', 'CDS', '65', '70', '.', '-', '.', '.'],
        ]
        expeted_utrs = [
            ['1', '.', 'UTR3', '60', '64', '.', '-', '.', '.'],
        ]
        new_cdses, utrs = segment._get_non_cds_exons(cdses, exons, intervals)
        new_cdses, utrs = intervals_to_list(new_cdses), intervals_to_list(utrs)
        self.assertEqual(expeted_new_cdses, new_cdses)
        self.assertEqual(expeted_utrs, utrs)