def test_merging_stop_codons_4(self): """ Situation: * 1 stop codon split in two exons """ intervals = list_to_intervals([ # for this test no more than one interval is needed... ['1', '.', 'transcript', '20', '70', '.', '+', '.', '.'], ['1', '.', 'stop_codon', '40', '40', '.', '+', '.', '.'], ['1', '.', 'stop_codon', '60', '61', '.', '+', '.', '.'], ]) exons = list_to_intervals([ ['1', '.', 'exon', '20', '40', '.', '+', '.', '.'], ['1', '.', 'exon', '60', '70', '.', '+', '.', '.'], ]) cdses = list_to_intervals([ ['1', '.', 'CDS', '30', '39', '.', '+', '.', '.'], ]) expeted_new_cdses = [ ['1', '.', 'CDS', '30', '40', '.', '+', '.', '.'], ['1', '.', 'CDS', '60', '61', '.', '+', '.', '.'], ] expeted_utrs = [ ['1', '.', 'UTR5', '20', '29', '.', '+', '.', '.'], ['1', '.', 'UTR3', '62', '70', '.', '+', '.', '.'], ] new_cdses, utrs = segment._get_non_cds_exons(cdses, exons, intervals) new_cdses, utrs = intervals_to_list(new_cdses), intervals_to_list(utrs) self.assertEqual(expeted_new_cdses, new_cdses) self.assertEqual(expeted_utrs, utrs) # Negative strand: intervals = list_to_intervals([ # for this test no more than one interval is needed... ['1', '.', 'transcript', '20', '80', '.', '-', '.', '.'], ['1', '.', 'stop_codon', '39', '40', '.', '-', '.', '.'], ['1', '.', 'stop_codon', '60', '60', '.', '-', '.', '.'], ]) exons = list_to_intervals([ ['1', '.', 'exon', '20', '40', '.', '-', '.', '.'], ['1', '.', 'exon', '60', '80', '.', '-', '.', '.'], ]) cdses = list_to_intervals([ ['1', '.', 'CDS', '61', '65', '.', '-', '.', '.'], ]) expeted_new_cdses = [ ['1', '.', 'CDS', '60', '65', '.', '-', '.', '.'], ['1', '.', 'CDS', '39', '40', '.', '-', '.', '.'], ] expeted_utrs = [ ['1', '.', 'UTR3', '20', '38', '.', '-', '.', '.'], ['1', '.', 'UTR5', '66', '80', '.', '-', '.', '.'], ] new_cdses, utrs = segment._get_non_cds_exons(cdses, exons, intervals) new_cdses, utrs = intervals_to_list(new_cdses), intervals_to_list(utrs) self.assertEqual(expeted_new_cdses, new_cdses) self.assertEqual(expeted_utrs, utrs)
def test_1(self): """ Situation: * no stop codons * 1 "empty" exon before first cds * 1 "empty" exon after last cds * 1 exons shared by UTR5 and CDS * 1 exons shared by UTR3 and CDS """ intervals = list_to_intervals([ # for this test no more than one interval is needed... ['1', '.', 'transcript', '20', '90', '.', '+', '.', '.'], ]) exons = list_to_intervals([ ['1', '.', 'exon', '20', '30', '.', '+', '.', '.'], ['1', '.', 'exon', '40', '50', '.', '+', '.', '.'], ['1', '.', 'exon', '60', '70', '.', '+', '.', '.'], ['1', '.', 'exon', '80', '90', '.', '+', '.', '.'], ]) cdses = list_to_intervals([ ['1', '.', 'CDS', '45', '50', '.', '+', '.', '.'], ['1', '.', 'CDS', '60', '65', '.', '+', '.', '.'], ]) expeted_new_cdses = [ ['1', '.', 'CDS', '45', '50', '.', '+', '.', '.'], ['1', '.', 'CDS', '60', '65', '.', '+', '.', '.'], ] expeted_utrs = [ ['1', '.', 'UTR5', '20', '30', '.', '+', '.', '.'], ['1', '.', 'UTR5', '40', '44', '.', '+', '.', '.'], ['1', '.', 'UTR3', '66', '70', '.', '+', '.', '.'], ['1', '.', 'UTR3', '80', '90', '.', '+', '.', '.'], ] new_cdses, utrs = segment._get_non_cds_exons(cdses, exons, intervals) new_cdses, utrs = intervals_to_list(new_cdses), intervals_to_list(utrs) self.assertEqual(expeted_new_cdses, new_cdses) self.assertEqual(expeted_utrs, utrs) # Also test for negative strand: intervals, exons, cdses = map(reverse_strand, [intervals, exons, cdses]) expeted_new_cdses = reverse_strand(expeted_new_cdses) expeted_utrs = [ ['1', '.', 'UTR3', '20', '30', '.', '-', '.', '.'], ['1', '.', 'UTR3', '40', '44', '.', '-', '.', '.'], ['1', '.', 'UTR5', '66', '70', '.', '-', '.', '.'], ['1', '.', 'UTR5', '80', '90', '.', '-', '.', '.'], ] new_cdses, utrs = segment._get_non_cds_exons(cdses, exons, intervals) new_cdses, utrs = intervals_to_list(new_cdses), intervals_to_list(utrs) self.assertEqual(expeted_new_cdses, new_cdses) self.assertEqual(expeted_utrs, utrs)
def test_merging_stop_codons_1(self): """ Situation: * stop codon and CDS completely overlap """ intervals = list_to_intervals([ # for this test no more than is needed... ['1', '.', 'transcript', '20', '62', '.', '+', '.', '.'], ['1', '.', 'stop_codon', '60', '62', '.', '+', '.', '.'], ]) exons = list_to_intervals([ ['1', '.', 'exon', '20', '40', '.', '+', '.', '.'], ['1', '.', 'exon', '60', '62', '.', '+', '.', '.'], ]) cdses = list_to_intervals([ ['1', '.', 'CDS', '20', '40', '.', '+', '.', '.'], ['1', '.', 'CDS', '60', '62', '.', '+', '.', '.'], ]) expeted_new_cdses = [ ['1', '.', 'CDS', '20', '40', '.', '+', '.', '.'], ['1', '.', 'CDS', '60', '62', '.', '+', '.', '.'], ] expeted_utrs = [] new_cdses, utrs = segment._get_non_cds_exons(cdses, exons, intervals) new_cdses, utrs = intervals_to_list(new_cdses), intervals_to_list(utrs) self.assertEqual(expeted_new_cdses, new_cdses) self.assertEqual(expeted_utrs, utrs) # Negative strand: intervals = list_to_intervals([ # for this test no more than is needed... ['1', '.', 'transcript', '20', '80', '.', '-', '.', '.'], ['1', '.', 'stop_codon', '20', '22', '.', '-', '.', '.'], ]) exons = list_to_intervals([ ['1', '.', 'exon', '20', '22', '.', '+', '.', '.'], ['1', '.', 'exon', '60', '80', '.', '+', '.', '.'], ]) cdses = list_to_intervals([ ['1', '.', 'CDS', '20', '22', '.', '-', '.', '.'], ['1', '.', 'CDS', '60', '80', '.', '-', '.', '.'], ]) expeted_new_cdses = [ ['1', '.', 'CDS', '20', '22', '.', '-', '.', '.'], ['1', '.', 'CDS', '60', '80', '.', '-', '.', '.'], ] expeted_utrs = [] new_cdses, utrs = segment._get_non_cds_exons(cdses, exons, intervals) new_cdses, utrs = intervals_to_list(new_cdses), intervals_to_list(utrs) self.assertEqual(expeted_new_cdses, new_cdses) self.assertEqual(expeted_utrs, utrs)
def test_merging_stop_codons_3(self): """ Situation: * 1 stop codon given on same exon as CDS, bit inside CDS! """ intervals = list_to_intervals([ # for this test no more than one interval is needed... ['1', '.', 'transcript', '60', '70', '.', '+', '.', '.'], ['1', '.', 'stop_codon', '63', '65', '.', '+', '.', '.'], ]) exons = list_to_intervals([ ['1', '.', 'exon', '60', '70', '.', '+', '.', '.'], ]) cdses = list_to_intervals([ ['1', '.', 'CDS', '60', '65', '.', '+', '.', '.'], ]) expeted_new_cdses = [ ['1', '.', 'CDS', '60', '65', '.', '+', '.', '.'], ] expeted_utrs = [ ['1', '.', 'UTR3', '66', '70', '.', '+', '.', '.'], ] new_cdses, utrs = segment._get_non_cds_exons(cdses, exons, intervals) new_cdses, utrs = intervals_to_list(new_cdses), intervals_to_list(utrs) self.assertEqual(expeted_new_cdses, new_cdses) self.assertEqual(expeted_utrs, utrs) # Negative strand: intervals = list_to_intervals([ # for this test no more than one interval is needed... ['1', '.', 'transcript', '60', '70', '.', '-', '.', '.'], ['1', '.', 'stop_codon', '65', '67', '.', '-', '.', '.'], ]) exons = list_to_intervals([ ['1', '.', 'exon', '60', '70', '.', '-', '.', '.'], ]) cdses = list_to_intervals([ ['1', '.', 'CDS', '65', '70', '.', '-', '.', '.'], ]) expeted_new_cdses = [ ['1', '.', 'CDS', '65', '70', '.', '-', '.', '.'], ] expeted_utrs = [ ['1', '.', 'UTR3', '60', '64', '.', '-', '.', '.'], ] new_cdses, utrs = segment._get_non_cds_exons(cdses, exons, intervals) new_cdses, utrs = intervals_to_list(new_cdses), intervals_to_list(utrs) self.assertEqual(expeted_new_cdses, new_cdses) self.assertEqual(expeted_utrs, utrs)