Ejemplo n.º 1
0
    def testGetTranscriptsAnnot_withoutUTR_threeExons(self):
        exon_1 = Exon(10, 40, "+", "chr1", "fwd_exon_1")
        exon_2 = Exon(91, 150, "+", "chr1", "fwd_exon_2")
        exon_3 = Exon(201, 361, "+", "chr1", "fwd_exon_3")
        cds_1 = CDS(10, 40, "+", "chr1", "fwd_cds_1")
        cds_2 = CDS(91, 150, "+", "chr1", "fwd_cds_2")
        cds_3 = CDS(201, 361, "+", "chr1", "fwd_cds_3")
        gene_1 = Gene(10, 350, None, "chr1", "gene_1", {"id": "g_1"})
        transcrit_1 = Transcript(None,
                                 None,
                                 None,
                                 "chr1",
                                 "transcrit_1", {"id": "tr_1"},
                                 parent=gene_1,
                                 children=[exon_1, exon_2, exon_3])
        protein_1 = Protein(None,
                            None,
                            None,
                            "chr1",
                            "protein_1",
                            children=[cds_1, cds_2, cds_3],
                            transcript=transcrit_1)
        queries = [
            Region(80, 100, None, "chr1", "query_1",
                   {"desc": "starts before exon_2 ; ends in exon_2."}),
            Region(100, 180, None, "chr1", "query_2",
                   {"desc": "starts in exon_2 ; ends after exon_2."}),
            Region(
                91, 150, None, "chr1", "query_3", {
                    "desc":
                    "starts at the start of exon_2 ; ends at the end of exon_2."
                }),
            Region(80, 170, None, "chr1", "query_4",
                   {"desc": "starts before exon_2 ; ends after exon_2."}),
            Region(80, 230, None, "chr1", "query_5",
                   {"desc": "starts before exon_2 ; ends in exon_3."}),
            Region(100, 400, None, "chr1", "query_6",
                   {"desc": "starts in exon_2 ; ends after exon_3."}),
            Region(100, 250, None, "chr1", "query_7",
                   {"desc": "starts in exon_2 ; ends in exon_3."}),
            Region(80, 370, None, "chr1", "query_8",
                   {"desc": "starts before exon_2 ; ends after exon_3."}),
            Region(90, 151, None, "chr1", "query_9", {
                "desc":
                "starts just before exon_2 ; ends just after exon_2."
            })
        ]

        # Expected forward 3 exons
        expected = {
            "query_1": {
                "start_EXON": None,
                "start_INTRON": "1/2",
                "start_Protein_position": 11,
                "end_EXON": "2/3",
                "end_INTRON": None,
                "end_Protein_position": 14
            },
            "query_2": {
                "start_EXON": "2/3",
                "start_INTRON": None,
                "start_Protein_position": 14,
                "end_EXON": None,
                "end_INTRON": "2/2",
                "end_Protein_position": 31
            },
            "query_3": {
                "start_EXON": "2/3",
                "start_INTRON": None,
                "start_Protein_position": 11,
                "end_EXON": "2/3",
                "end_INTRON": None,
                "end_Protein_position": 31
            },
            "query_4": {
                "start_EXON": None,
                "start_INTRON": "1/2",
                "start_Protein_position": 11,
                "end_EXON": None,
                "end_INTRON": "2/2",
                "end_Protein_position": 31
            },
            "query_5": {
                "start_EXON": None,
                "start_INTRON": "1/2",
                "start_Protein_position": 11,
                "end_EXON": "3/3",
                "end_INTRON": None,
                "end_Protein_position": 41
            },
            "query_6": {
                "start_EXON": "2/3",
                "start_INTRON": None,
                "start_Protein_position": 14,
                "end_EXON": "3/3",
                "end_INTRON": None,
                "end_Protein_position": 84
            },
            "query_7": {
                "start_EXON": "2/3",
                "start_INTRON": None,
                "start_Protein_position": 14,
                "end_EXON": "3/3",
                "end_INTRON": None,
                "end_Protein_position": 47
            },
            "query_8": {
                "start_EXON": None,
                "start_INTRON": "1/2",
                "start_Protein_position": 11,
                "end_EXON": "3/3",
                "end_INTRON": None,
                "end_Protein_position": 84
            },
            "query_9": {
                "start_EXON": None,
                "start_INTRON": "1/2",
                "start_Protein_position": 11,
                "end_EXON": None,
                "end_INTRON": "2/2",
                "end_Protein_position": 31
            },
        }
        for query_name, query_res in expected.items():
            for key, val in {
                    "SYMBOL": "gene_1",
                    "Gene": "g_1",
                    "Feature": "tr_1",
                    "Feature_type": "Transcript",
                    "STRAND": "1"
            }.items():
                query_res[key] = val
        # Apply forward strand
        for exon in transcrit_1.children:
            exon.strand = "+"
        for cds in protein_1.children:
            cds.strand = "+"
        transcrit_1.sortChildren()
        protein_1.sortChildren()
        # Assert
        for curr_query in queries:
            annotations = getTranscriptsAnnot(curr_query, [transcrit_1])
            self.assertEqual([expected[curr_query.name]], annotations)

        # Expected reverse 3 exons
        expected = {
            "query_1": {
                "start_EXON": "2/3",
                "start_INTRON": None,
                "start_Protein_position": 71,
                "end_EXON": None,
                "end_INTRON": "2/2",
                "end_Protein_position": 74
            },
            "query_2": {
                "start_EXON": None,
                "start_INTRON": "1/2",
                "start_Protein_position": 54,
                "end_EXON": "2/3",
                "end_INTRON": None,
                "end_Protein_position": 71
            },
            "query_3": {
                "start_EXON": "2/3",
                "start_INTRON": None,
                "start_Protein_position": 54,
                "end_EXON": "2/3",
                "end_INTRON": None,
                "end_Protein_position": 74
            },
            "query_4": {
                "start_EXON": None,
                "start_INTRON": "1/2",
                "start_Protein_position": 54,
                "end_EXON": None,
                "end_INTRON": "2/2",
                "end_Protein_position": 74
            },
            "query_5": {
                "start_EXON": "1/3",
                "start_INTRON": None,
                "start_Protein_position": 44,
                "end_EXON": None,
                "end_INTRON": "2/2",
                "end_Protein_position": 74
            },
            "query_6": {
                "start_EXON": "1/3",
                "start_INTRON": None,
                "start_Protein_position": 1,
                "end_EXON": "2/3",
                "end_INTRON": None,
                "end_Protein_position": 71
            },
            "query_7": {
                "start_EXON": "1/3",
                "start_INTRON": None,
                "start_Protein_position": 38,
                "end_EXON": "2/3",
                "end_INTRON": None,
                "end_Protein_position": 71
            },
            "query_8": {
                "start_EXON": "1/3",
                "start_INTRON": None,
                "start_Protein_position": 1,
                "end_EXON": None,
                "end_INTRON": "2/2",
                "end_Protein_position": 74
            },
            "query_9": {
                "start_EXON": None,
                "start_INTRON": "1/2",
                "start_Protein_position": 54,
                "end_EXON": None,
                "end_INTRON": "2/2",
                "end_Protein_position": 74
            }
        }
        for query_name, query_res in expected.items():
            for key, val in {
                    "SYMBOL": "gene_1",
                    "Gene": "g_1",
                    "Feature": "tr_1",
                    "Feature_type": "Transcript",
                    "STRAND": "-1"
            }.items():
                query_res[key] = val
        # Apply reverse strand
        for exon in transcrit_1.children:
            exon.strand = "-"
        for cds in protein_1.children:
            cds.strand = "-"
        transcrit_1.sortChildren()
        protein_1.sortChildren()
        # Asert
        for curr_query in queries:
            annotations = getTranscriptsAnnot(curr_query, [transcrit_1])
            self.assertEqual([expected[curr_query.name]], annotations)