Exemple #1
0
    def test_wrong_cds(self):

        transcript = Transcript()
        transcript.chrom = "15"
        transcript.source = "protein_coding"
        transcript.start = 47631264
        transcript.end = 48051999

        exons = [(47631264, 47631416), (47704590, 47704669),
                 (47762671, 47762742), (47893062, 47893093),
                 (47895572, 47895655), (48051942, 48051999)]

        transcript.strand = "+"
        transcript.add_exons(exons)
        transcript.id = "ENST00000560636"
        transcript.parent = "ENSG00000137872"
        cds_line = "\t".join([
            "15", "protein_coding", "CDS", "48051996", "48051996", ".", "+",
            "0", "ID=ENST00000560636.cds1;Parent=ENST00000560636"
        ])
        cds_line = GffLine(cds_line)
        transcript.add_exon(cds_line)
        logger = Mikado.utilities.log_utils.create_null_logger()
        transcript.logger = logger
        with self.assertLogs("null", level="WARNING"):
            transcript.finalize()

        trimmed = trim_coding(transcript, logger, max_length=50)
        self.assertEqual(trimmed.start, 47631366)
        self.assertEqual(trimmed.end, 48051992)
    def setUp(self):

        # Prepare the model
        self.model_lines = """Chr5	tair10	transcript	26584797	26595528	100	+	.	ID=c58_g1_i3.mrna1.19;Parent=c58_g1_i3.path1.19;Name=c58_g1_i3.mrna1.19;gene_name=c58_g1_i3
    Chr5	tair10	exon	26584797	26584879	.	+	.	ID=c58_g1_i3.mrna1.19.exon1;Parent=c58_g1_i3.mrna1.19
    Chr5	tair10	exon	26585220	26585273	.	+	.	ID=c58_g1_i3.mrna1.19.exon2;Parent=c58_g1_i3.mrna1.19
    Chr5	tair10	exon	26585345	26585889	.	+	.	ID=c58_g1_i3.mrna1.19.exon3;Parent=c58_g1_i3.mrna1.19
    Chr5	tair10	exon	26585982	26586294	.	+	.	ID=c58_g1_i3.mrna1.19.exon4;Parent=c58_g1_i3.mrna1.19
    Chr5	tair10	exon	26586420	26586524	.	+	.	ID=c58_g1_i3.mrna1.19.exon5;Parent=c58_g1_i3.mrna1.19
    Chr5	tair10	exon	26586638	26586850	.	+	.	ID=c58_g1_i3.mrna1.19.exon6;Parent=c58_g1_i3.mrna1.19
    Chr5	tair10	exon	26586934	26586996	.	+	.	ID=c58_g1_i3.mrna1.19.exon7;Parent=c58_g1_i3.mrna1.19
    Chr5	tair10	exon	26587084	26587202	.	+	.	ID=c58_g1_i3.mrna1.19.exon8;Parent=c58_g1_i3.mrna1.19
    Chr5	tair10	exon	26587287	26587345	.	+	.	ID=c58_g1_i3.mrna1.19.exon9;Parent=c58_g1_i3.mrna1.19
    Chr5	tair10	exon	26587427	26587472	.	+	.	ID=c58_g1_i3.mrna1.19.exon10;Parent=c58_g1_i3.mrna1.19
    Chr5	tair10	exon	26595411	26595528	.	+	.	ID=c58_g1_i3.mrna1.19.exon11;Parent=c58_g1_i3.mrna1.19"""

        self.gff_lines = []
        for line in self.model_lines.split("\n"):
            line = line.rstrip().lstrip()
            line = GffLine(line)
            self.gff_lines.append(line)

        self.model = Transcript(self.gff_lines[0])
        self.model.add_exons(self.gff_lines[1:])
        self.model.finalize()

        self.exons = [
            self.fasta[line.chrom][line.start - 1:line.end]
            for line in self.gff_lines[1:]
        ]

        self.assertEqual(sum([len(exon) for exon in self.exons]), 1718,
                         self.exons)
        # We need the whole genomic fragment
        self.model_fasta = self.fasta["Chr5"][self.model.start -
                                              1:self.model.end]
        self.assertEqual(self.gff_lines[1].start, 26584797)
        self.assertEqual(self.gff_lines[1].end, 26584879)
        self.assertEqual(self.model.exons[0][0], self.gff_lines[1].start)
        self.assertEqual(self.model.exons[0][1], self.gff_lines[1].end)
Exemple #3
0
    def setUp(self):

        lines = """Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	mRNA	40282	46004	.	-	.	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960;Name=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2;aed=0.0;note=TRIAE_CS42_5DL_TGACv1_434051_AA1427960;confidence=High;has_start=True;has_stop=True;original_stop=True;protein_rank=P1;transcript_rank=T2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	exon	40282	40933	.	-	.	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.exon1;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	three_prime_UTR	40282	40720	.	-	.	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.three_prime_UTR1;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	CDS	40721	40933	.	-	0	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.CDS1;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	CDS	41018	41111	.	-	1	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.CDS2;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	exon	41018	41111	.	-	.	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.exon2;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	CDS	41227	41468	.	-	0	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.CDS3;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	exon	41227	41468	.	-	.	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.exon3;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	CDS	41673	41831	.	-	0	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.CDS4;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	exon	41673	41831	.	-	.	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.exon4;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	CDS	41946	42820	.	-	2	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.CDS5;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	exon	41946	42820	.	-	.	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.exon5;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	CDS	42905	42913	.	-	2	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.CDS6;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	exon	42905	42913	.	-	.	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.exon6;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	CDS	45373	45496	.	-	0	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.CDS7;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	exon	45373	45496	.	-	.	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.exon7;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	CDS	45600	45651	.	-	1	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.CDS8;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	exon	45600	45651	.	-	.	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.exon8;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	CDS	45726	45726	.	-	2	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.CDS9;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	exon	45726	45726	.	-	.	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.exon9;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	CDS	45875	45893	.	-	0	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.CDS10;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	exon	45875	46004	.	-	.	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.exon10;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2
Triticum_aestivum_CS42_TGACv1_scaffold_434051_5DL	TGACv1	five_prime_UTR	45894	46004	.	-	.	ID=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2.five_prime_UTR1;Parent=TRIAE_CS42_5DL_TGACv1_434051_AA1427960.2"""

        lines = [GffLine("\t".join(_.split())) for _ in lines.split("\n") if _]
        self.transcript = Transcript(lines[0], logger=self.logger)
        self.transcript.add_exons(lines[1:])
        self.correct_phases = {(40721, 40933): 2,
                               (41018, 41111): 0,
                               (41227, 41468): 2,
                               (41673, 41831): 2,
                               (41946, 42820): 1,
                               (42905, 42913): 1,
                               (45373, 45496): 2,
                               (45600, 45651): 0,
                               (45726, 45726): 2,
                               (45875, 45893): 0}