def test_intron_annotation(self): """sequences annotated with introns should return correct seq""" for symbol, stable_id, rank, exp_seq5, exp_seq3 in [ ("IL2", "ENST00000226730", 1, "gtaagtatat", "actttcttag"), ("IL13", "ENST00000304506", 3, "gtaaggcatc", "tgtcctgcag"), ]: gene = asserted_one(self.human.get_genes_matching(symbol=symbol)) seq = gene.get_annotated_seq(feature_types="gene") intron = asserted_one( seq.get_annotations_matching("intron", "%s-%d" % (stable_id, rank))) intron_seq = str(seq.get_region_covering_all(intron).get_slice()) self.assertEqual(intron_seq[:10], exp_seq5.upper()) self.assertEqual(intron_seq[-10:], exp_seq3.upper())
def test_intron_number(self): """number of introns should be correct""" for gene_id, transcript_id, exp_number in [ ("ENSG00000227268", "ENST00000445946", 0), ("ENSG00000132199", "ENST00000583771", 5), ("ENSG00000132199", "ENST00000340116", 14), ]: gene = asserted_one( self.human.get_genes_matching(stableid=gene_id)) transcript = asserted_one( [t for t in gene.transcripts if t.stableid == transcript_id]) if exp_number == 0: self.assertEqual(transcript.introns, None) else: self.assertEqual(len(transcript.introns), exp_number)
def test_intron(self): """should get correct Intron sequence, regardless of strand""" # IL2 is on - strand, IL13 is on + strand, both have three introns IL2_exp_introns = [ (1, 122456203, 122456293, "gtaagtatat", "actttcttag"), (2, 122453853, 122456143, "gtaagtacaa", "attattctag"), (3, 122451862, 122453709, "gtaaggcatt", "tcttttatag"), ] IL13_exp_introns = [ (1, 132658360, 132659417, "gtgagtgtcg", "gctcccacag"), (2, 132659471, 132659723, "gtaaggacct", "ctccccacag"), (3, 132659828, 132660174, "gtaaggcatc", "tgtcctgcag"), ] for symbol, stable_id, exp_introns in [ ("IL2", "ENST00000226730", IL2_exp_introns), ("IL13", "ENST00000304506", IL13_exp_introns), ]: gene = asserted_one(self.human.get_genes_matching(symbol=symbol)) strand = gene.location.strand transcript = asserted_one( [t for t in gene.transcripts if t.stableid == stable_id]) introns = transcript.introns self.assertEqual(len(introns), len(exp_introns)) idx = 0 for intron in introns: loc = intron.location start, end = loc.start, loc.end seq = str(intron.seq) exp_rank, exp_start, exp_end, exp_seq5, exp_seq3 = exp_introns[ idx] self.assertEqual(loc.strand, strand) # test the order using rank self.assertEqual(intron.rank, exp_rank) # test position self.assertEqual(start, exp_start) self.assertEqual(end, exp_end) # test sequence self.assertEqual(seq[:10], exp_seq5.upper()) self.assertEqual(seq[-10:], exp_seq3.upper()) idx += 1