def test_no_required_attributes(self): """ Raise error if transcript_id attribute is not present. """ gtf = make_file_from_list([ ['1', '.', 'transcript', '500', '600', '.', '+', '.', 'gene_id "G1";'], ]) message = "First element in gene content is neither gene or transcript!" with self.assertRaisesRegex(Exception, message): list((segment._get_gene_content(gtf, ['1', 'MT'])))
def test_all_good(self): """ * second gene has no 'gene' interval - but it is present in output as it should * last interval is on chromosome 2, but it is not in the output """ gtf_data = list_to_intervals([ ['1', '.', 'gene', '100', '300', '.', '+', '.', 'gene_id "G1";'], ['1', '.', 'transcript', '100', '250', '.', '+', '.', 'gene_id "G1"; transcript_id "T1";'], ['1', '.', 'exon', '100', '150', '.', '+', '.', 'gene_id "G1"; transcript_id "T1"; exon_number "1";'], ['1', '.', 'exon', '200', '250', '.', '+', '.', 'gene_id "G1"; transcript_id "T1"; exon_number "2";'], ['1', '.', 'transcript', '150', '300', '.', '+', '.', 'gene_id "G1"; transcript_id "T2";'], ['1', '.', 'exon', '150', '200', '.', '+', '.', 'gene_id "G1"; transcript_id "T2"; exon_number "1";'], ['1', '.', 'exon', '250', '300', '.', '+', '.', 'gene_id "G1"; transcript_id "T2"; exon_number "2";'], ['1', '.', 'transcript', '400', '500', '.', '+', '.', 'gene_id "G2"; transcript_id "T3";'], ['1', '.', 'exon', '400', '430', '.', '+', '.', 'gene_id "G2"; transcript_id "T3"; exon_number "1"'], ['1', '.', 'CDS', '410', '430', '.', '+', '.', 'gene_id "G2"; transcript_id "T3";'], ['1', '.', 'exon', '470', '500', '.', '+', '.', 'gene_id "G2"; transcript_id "T3"; exon_number "2"'], ['1', '.', 'CDS', '470', '490', '.', '+', '.', 'gene_id "G2"; transcript_id "T3";'], ['2', '.', 'CDS', '470', '490', '.', '+', '.', 'gene_id "G3"; transcript_id "T4";'], ]) gtf = make_file_from_list(intervals_to_list(gtf_data)) gene1, gene2 = list(segment._get_gene_content(gtf, ['1', 'MT'], report_progress=True)) expected1 = { 'gene': gtf_data[0], 'T1': gtf_data[1:4], 'T2': gtf_data[4:7], } extra_gene = create_interval_from_list( ['1', '.', 'gene', '400', '500', '.', '+', '.', 'gene_id "G2";']) expected2 = { 'gene': extra_gene, 'T3': gtf_data[7:-1], } self.assertEqual(gene1, expected1) self.assertEqual(gene2, expected2)
def test_already_processed_gene(self): """ Raise error if member of already processed gene is found. """ gtf = make_file_from_list([ ['1', '.', 'gene', '100', '300', '.', '+', '.', 'gene_id "G1";'], ['1', '.', 'transcript', '100', '250', '.', '+', '.', 'gene_id "G1"; transcript_id "T1";'], ['1', '.', 'gene', '500', '700', '.', '+', '.', 'gene_id "G2";'], ['1', '.', 'transcript', '500', '600', '.', '+', '.', 'gene_id "G1"; transcript_id "T3";'], ]) with self.assertRaises(AssertionError): list((segment._get_gene_content(gtf, ['1', 'MT'])))
def test_already_processed(self): """ Raise error if member of already processed transcript is found. """ gtf = make_file_from_list([ ['1', '.', 'gene', '100', '300', '.', '+', '.', 'gene_id "G1";'], ['1', '.', 'transcript', '100', '250', '.', '+', '.', 'gene_id "G1"; transcript_id "T1";'], ['1', '.', 'transcript', '150', '300', '.', '+', '.', 'gene_id "G1"; transcript_id "T2";'], ['1', '.', 'exon', '150', '200', '.', '+', '.', 'gene_id "G1"; transcript_id "T1"; exon_number "1";'], ]) with self.assertRaises(AssertionError): next((segment._get_gene_content(gtf, ['1', 'MT'])))