def test_wrong_cds(self): transcript = Transcript() transcript.chrom = "15" transcript.source = "protein_coding" transcript.start = 47631264 transcript.end = 48051999 exons = [(47631264, 47631416), (47704590, 47704669), (47762671, 47762742), (47893062, 47893093), (47895572, 47895655), (48051942, 48051999)] transcript.strand = "+" transcript.add_exons(exons) transcript.id = "ENST00000560636" transcript.parent = "ENSG00000137872" cds_line = "\t".join([ "15", "protein_coding", "CDS", "48051996", "48051996", ".", "+", "0", "ID=ENST00000560636.cds1;Parent=ENST00000560636" ]) cds_line = GffLine(cds_line) transcript.add_exon(cds_line) logger = Mikado.utilities.log_utils.create_null_logger() transcript.logger = logger with self.assertLogs("null", level="WARNING"): transcript.finalize() trimmed = trim_coding(transcript, logger, max_length=50) self.assertEqual(trimmed.start, 47631366) self.assertEqual(trimmed.end, 48051992)
def setUp(self): # Prepare the model self.model_lines = """Chr5 tair10 transcript 26584797 26595528 100 + . ID=c58_g1_i3.mrna1.19;Parent=c58_g1_i3.path1.19;Name=c58_g1_i3.mrna1.19;gene_name=c58_g1_i3 Chr5 tair10 exon 26584797 26584879 . + . ID=c58_g1_i3.mrna1.19.exon1;Parent=c58_g1_i3.mrna1.19 Chr5 tair10 exon 26585220 26585273 . + . ID=c58_g1_i3.mrna1.19.exon2;Parent=c58_g1_i3.mrna1.19 Chr5 tair10 exon 26585345 26585889 . + . ID=c58_g1_i3.mrna1.19.exon3;Parent=c58_g1_i3.mrna1.19 Chr5 tair10 exon 26585982 26586294 . + . ID=c58_g1_i3.mrna1.19.exon4;Parent=c58_g1_i3.mrna1.19 Chr5 tair10 exon 26586420 26586524 . + . ID=c58_g1_i3.mrna1.19.exon5;Parent=c58_g1_i3.mrna1.19 Chr5 tair10 exon 26586638 26586850 . + . ID=c58_g1_i3.mrna1.19.exon6;Parent=c58_g1_i3.mrna1.19 Chr5 tair10 exon 26586934 26586996 . + . ID=c58_g1_i3.mrna1.19.exon7;Parent=c58_g1_i3.mrna1.19 Chr5 tair10 exon 26587084 26587202 . + . ID=c58_g1_i3.mrna1.19.exon8;Parent=c58_g1_i3.mrna1.19 Chr5 tair10 exon 26587287 26587345 . + . ID=c58_g1_i3.mrna1.19.exon9;Parent=c58_g1_i3.mrna1.19 Chr5 tair10 exon 26587427 26587472 . + . ID=c58_g1_i3.mrna1.19.exon10;Parent=c58_g1_i3.mrna1.19 Chr5 tair10 exon 26595411 26595528 . + . ID=c58_g1_i3.mrna1.19.exon11;Parent=c58_g1_i3.mrna1.19""" self.gff_lines = [] for line in self.model_lines.split("\n"): line = line.rstrip().lstrip() line = GffLine(line) self.gff_lines.append(line) self.model = Transcript(self.gff_lines[0]) self.model.add_exons(self.gff_lines[1:]) self.model.finalize() self.exons = [ self.fasta[line.chrom][line.start - 1:line.end] for line in self.gff_lines[1:] ] self.assertEqual(sum([len(exon) for exon in self.exons]), 1718, self.exons) # We need the whole genomic fragment self.model_fasta = self.fasta["Chr5"][self.model.start - 1:self.model.end] self.assertEqual(self.gff_lines[1].start, 26584797) self.assertEqual(self.gff_lines[1].end, 26584879) self.assertEqual(self.model.exons[0][0], self.gff_lines[1].start) self.assertEqual(self.model.exons[0][1], self.gff_lines[1].end)
def setUp(self): lines = """Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 mRNA 40282 46004 . - . ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960;Name=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2;aed=0.0;note=TRIAE_CS42_5DL_TGACv1_434051_AA1427960;confidence=High;has_start=True;has_stop=True;original_stop=True;protein_rank=P1;transcript_rank=T2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 exon 40282 40933 . - . ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.exon1;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 three_prime_UTR 40282 40720 . - . ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.three_prime_UTR1;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 CDS 40721 40933 . - 0 ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.CDS1;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 CDS 41018 41111 . - 1 ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.CDS2;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 exon 41018 41111 . - . ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.exon2;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 CDS 41227 41468 . - 0 ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.CDS3;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 exon 41227 41468 . - . ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.exon3;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 CDS 41673 41831 . - 0 ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.CDS4;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 exon 41673 41831 . - . ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.exon4;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 CDS 41946 42820 . - 2 ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.CDS5;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 exon 41946 42820 . - . ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.exon5;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 CDS 42905 42913 . - 2 ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.CDS6;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 exon 42905 42913 . - . ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.exon6;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 CDS 45373 45496 . - 0 ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.CDS7;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 exon 45373 45496 . - . ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.exon7;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 CDS 45600 45651 . - 1 ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.CDS8;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 exon 45600 45651 . - . ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.exon8;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 CDS 45726 45726 . - 2 ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.CDS9;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 exon 45726 45726 . - . ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.exon9;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 CDS 45875 45893 . - 0 ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.CDS10;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 exon 45875 46004 . - . ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.exon10;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2 Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL TGACv1 five_prime_UTR 45894 46004 . - . ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.five_prime_UTR1;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2""" lines = [GffLine("\t".join(_.split())) for _ in lines.split("\n") if _] self.transcript = Transcript(lines[0], logger=self.logger) self.transcript.add_exons(lines[1:]) self.correct_phases = {(40721, 40933): 2, (41018, 41111): 0, (41227, 41468): 2, (41673, 41831): 2, (41946, 42820): 1, (42905, 42913): 1, (45373, 45496): 2, (45600, 45651): 0, (45726, 45726): 2, (45875, 45893): 0}