Example #1
0
    def test_no_required_attributes(self):
        """
        Raise error if transcript_id attribute is not present.
        """
        gtf = make_file_from_list([
            ['1', '.', 'transcript', '500', '600', '.', '+', '.', 'gene_id "G1";'],
        ])

        message = "First element in gene content is neither gene or transcript!"
        with self.assertRaisesRegex(Exception, message):
            list((segment._get_gene_content(gtf, ['1', 'MT'])))
Example #2
0
    def test_all_good(self):
        """
        * second gene has no 'gene' interval - but it is present in output as it should
        * last interval is on chromosome 2, but it is not in the output
        """
        gtf_data = list_to_intervals([
            ['1', '.', 'gene', '100', '300', '.', '+', '.',
             'gene_id "G1";'],
            ['1', '.', 'transcript', '100', '250', '.', '+', '.',
             'gene_id "G1"; transcript_id "T1";'],
            ['1', '.', 'exon', '100', '150', '.', '+', '.',
             'gene_id "G1"; transcript_id "T1"; exon_number "1";'],
            ['1', '.', 'exon', '200', '250', '.', '+', '.',
             'gene_id "G1"; transcript_id "T1"; exon_number "2";'],
            ['1', '.', 'transcript', '150', '300', '.', '+', '.',
             'gene_id "G1"; transcript_id "T2";'],
            ['1', '.', 'exon', '150', '200', '.', '+', '.',
             'gene_id "G1"; transcript_id "T2"; exon_number "1";'],
            ['1', '.', 'exon', '250', '300', '.', '+', '.',
             'gene_id "G1"; transcript_id "T2"; exon_number "2";'],
            ['1', '.', 'transcript', '400', '500', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3";'],
            ['1', '.', 'exon', '400', '430', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3"; exon_number "1"'],
            ['1', '.', 'CDS', '410', '430', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3";'],
            ['1', '.', 'exon', '470', '500', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3"; exon_number "2"'],
            ['1', '.', 'CDS', '470', '490', '.', '+', '.',
             'gene_id "G2"; transcript_id "T3";'],
            ['2', '.', 'CDS', '470', '490', '.', '+', '.',
             'gene_id "G3"; transcript_id "T4";'],
        ])
        gtf = make_file_from_list(intervals_to_list(gtf_data))

        gene1, gene2 = list(segment._get_gene_content(gtf, ['1', 'MT'], report_progress=True))

        expected1 = {
            'gene': gtf_data[0],
            'T1': gtf_data[1:4],
            'T2': gtf_data[4:7],
        }

        extra_gene = create_interval_from_list(
            ['1', '.', 'gene', '400', '500', '.', '+', '.', 'gene_id "G2";'])
        expected2 = {
            'gene': extra_gene,
            'T3': gtf_data[7:-1],
        }

        self.assertEqual(gene1, expected1)
        self.assertEqual(gene2, expected2)
Example #3
0
    def test_already_processed_gene(self):
        """
        Raise error if member of already processed gene is found.
        """
        gtf = make_file_from_list([
            ['1', '.', 'gene', '100', '300', '.', '+', '.', 'gene_id "G1";'],
            ['1', '.', 'transcript', '100', '250', '.', '+', '.', 'gene_id "G1"; transcript_id "T1";'],
            ['1', '.', 'gene', '500', '700', '.', '+', '.', 'gene_id "G2";'],
            ['1', '.', 'transcript', '500', '600', '.', '+', '.', 'gene_id "G1"; transcript_id "T3";'],
        ])

        with self.assertRaises(AssertionError):
            list((segment._get_gene_content(gtf, ['1', 'MT'])))
Example #4
0
    def test_already_processed(self):
        """
        Raise error if member of already processed transcript is found.
        """
        gtf = make_file_from_list([
            ['1', '.', 'gene', '100', '300', '.', '+', '.', 'gene_id "G1";'],
            ['1', '.', 'transcript', '100', '250', '.', '+', '.', 'gene_id "G1"; transcript_id "T1";'],
            ['1', '.', 'transcript', '150', '300', '.', '+', '.', 'gene_id "G1"; transcript_id "T2";'],
            ['1', '.', 'exon', '150', '200', '.', '+', '.', 'gene_id "G1"; transcript_id "T1"; exon_number "1";'],
        ])

        with self.assertRaises(AssertionError):
            next((segment._get_gene_content(gtf, ['1', 'MT'])))