Ejemplo n.º 1
0
    def setUp(self):
        tmp_folder = tempfile.gettempdir()
        unique_id = str(uuid.uuid1())

        # Temporary files
        self.tmp_fasta_idx = os.path.join(tmp_folder, unique_id + ".fasta.fai")
        self.tmp_fasta = os.path.join(tmp_folder, unique_id + ".fasta")

        # Create sequence file
        content_fasta = """>one
ATGCATGCATGCATGCATGCATGCATGCAT
GCATGCATGCATGCATGCATGCATGCATGC
ATGCAT
>two another chromosome
ATGCATGCATGCAT
GCATGCATGCATGC"""
        with open(self.tmp_fasta, "w") as FH_out:
            FH_out.write(content_fasta)

        # Proteins
        # 1 3 5 7 9  11 14 16 19 21        30 33 36 39 41  44 47 50
        # ATGCATGCAT GCATG CATGC ATGCATGCA TGCATGCATGC ATGCATGCATGCATGCATGC
        #            ..... *****           *******....      ......
        # prot_1           12345           6789 11
        tr_1 = Transcript(None,
                          None,
                          "+",
                          "one",
                          children=[
                              Exon(10, 20, "+", "one"),
                              Exon(30, 40, "+", "one"),
                              Exon(45, 50, "+", "one")
                          ])
        self.prot_1 = Protein(16, 36, "+", "one", transcript=tr_1)
        tr_2 = Transcript(None,
                          None,
                          "-",
                          "one",
                          children=[
                              Exon(10, 20, "-", "one"),
                              Exon(30, 40, "-", "one"),
                              Exon(45, 50, "-", "one")
                          ])
        self.prot_2 = Protein(16, 36, "-", "one", transcript=tr_2)

        # Create index
        content_fasta_idx = """one	66	5	30	31
two	28	98	14	15"""
        with open(self.tmp_fasta_idx, "w") as FH_out:
            FH_out.write(content_fasta_idx)
Ejemplo n.º 2
0
 def addProtein(self):
     protein_1 = Protein(10, 30, "+", "chr1", "p1")
     protein_2 = Protein(32, 50, "+", "chr1", "p2")
     transcript_1 = Transcript(name="tr1")
     # Empty
     self.assertEqual([elt.name for elt in transcript_1.proteins], [])
     # Add protein_1
     transcript_1.addProtein(protein_1)
     self.assertEqual([elt.name for elt in transcript_1.proteins],
                      ["p1"])  # Check from tr
     self.assertEqual(protein_1.transcript, transcript_1)  # Check from prot
     # Add protein_2
     transcript_1.addProtein(protein_2)
     self.assertEqual([elt.name for elt in transcript_1.proteins],
                      ["p1", "p2"])  # Check from tr
     self.assertEqual(protein_1.transcript, transcript_1)  # Check from prot
     self.assertEqual(protein_2.transcript, transcript_1)  # Check from prot
Ejemplo n.º 3
0
 def setUp(self):
     # Forward
     self.fwd = {
         "cds_1": CDS(10, 30, "+", "chr1"),
         "cds_2": CDS(40, 70, "+", "chr1"),
         "cds_3": CDS(80, 99, "+", "chr1")
     }
     self.fwd["protein"] = Protein(
         children=[self.fwd["cds_1"], self.fwd["cds_3"], self.fwd["cds_2"]])
     # Reverse
     self.rvs = {
         "cds_1": CDS(10, 30, "-", "chr1"),
         "cds_2": CDS(40, 70, "-", "chr1"),
         "cds_3": CDS(80, 99, "-", "chr1")
     }
     self.rvs["protein"] = Protein(
         children=[self.rvs["cds_1"], self.rvs["cds_3"], self.rvs["cds_2"]])
Ejemplo n.º 4
0
 def testSetProteins(self):
     # By init
     protein_1 = Protein(10, 30, "+", "chr1", "p1")
     protein_2 = Protein(32, 50, "+", "chr1", "p2")
     transcript_1 = Transcript(name="tr1", proteins=[protein_1, protein_2])
     self.assertEqual([prot.name for prot in transcript_1.proteins],
                      [protein_1.name, protein_2.name])
     self.assertEqual(
         [protein_1.transcript.name, protein_2.transcript.name],
         [transcript_1.name, transcript_1.name])
     # By method
     protein_1 = Protein(10, 30, "+", "chr1", "p1")
     protein_2 = Protein(32, 50, "+", "chr1", "p2")
     transcript_1 = Transcript(name="tr1")
     transcript_1.proteins = [protein_1, protein_2]
     self.assertEqual([prot.name for prot in transcript_1.proteins],
                      [protein_1.name, protein_2.name])
     self.assertEqual(
         [protein_1.transcript.name, protein_2.transcript.name],
         [transcript_1.name, transcript_1.name])
     # Replace proteins
     protein_1 = Protein(10, 30, "+", "chr1", "p1")
     protein_2 = Protein(32, 50, "+", "chr1", "p2")
     protein_3 = Protein(54, 70, "+", "chr1", "p3")
     transcript_1 = Transcript(name="tr1", proteins=[protein_1, protein_3])
     transcript_1.proteins = [protein_2, protein_3]
     self.assertIsNone(protein_1.transcript)
     self.assertNotIn(protein_1, transcript_1.proteins)
     self.assertEqual(protein_2.transcript, transcript_1)
     self.assertIn(protein_2, transcript_1.proteins)
     self.assertEqual(protein_3.transcript, transcript_1)
     self.assertIn(protein_3, transcript_1.proteins)
Ejemplo n.º 5
0
 def delProtein(self):
     protein_1 = Protein(10, 30, "+", "chr1", "p1")
     protein_2 = Protein(32, 50, "+", "chr1", "p2")
     transcript_1 = Transcript(name="tr1", proteins=[protein_1, protein_2])
     # Init
     self.assertEqual([elt.name for elt in transcript_1.proteins],
                      ["p1", "p2"])  # Check from tr
     self.assertEqual(protein_1.transcript, transcript_1)  # Check from prot
     self.assertEqual(protein_2.transcript, transcript_1)  # Check from prot
     # Delete protein 2
     transcript_1.delProtein(protein_2)
     self.assertEqual([elt.name for elt in transcript_1.proteins],
                      ["p1"])  # Check from tr
     self.assertEqual(protein_1.transcript, transcript_1)  # Check from prot
     self.assertEqual(protein_2.transcript, None)  # Check from prot
     # Delete protein 1
     transcript_1.addProtein(protein_1)
     self.assertEqual([elt.name for elt in transcript_1.proteins],
                      [])  # Check from tr
     self.assertEqual(protein_1.transcript, None)  # Check from prot
     self.assertEqual(protein_2.transcript, None)  # Check from prot
Ejemplo n.º 6
0
    def testGetTranscriptsAnnot_withoutUTR_oneExon(self):
        exon_1 = Exon(91, 150, "+", "chr1", "exon_2")
        cds_1 = CDS(91, 150, "+", "chr1", "cds_1")
        gene_1 = Gene(10, 350, None, "chr1", "gene_1", {"id": "g_1"})
        transcrit_1 = Transcript(None,
                                 None,
                                 None,
                                 "chr1",
                                 "transcrit_1", {"id": "tr_1"},
                                 parent=gene_1,
                                 children=[exon_1])
        protein_1 = Protein(None,
                            None,
                            None,
                            "chr1",
                            "protein_2",
                            children=[cds_1],
                            transcript=transcrit_1)
        queries = [
            Region(80, 160, None, "chr1", "query_1",
                   {"desc": "starts before exon_1 ; ends after exon_1."}),
            Region(
                91, 150, None, "chr1", "query_2",
                {"desc": "starts at start of exon_1 ; ends at end of exon_1."
                 }),
            Region(100, 110, None, "chr1", "query_3",
                   {"desc": "starts in exon_1 ; ends in exon_1."}),
            Region(80, 100, None, "chr1", "query_4",
                   {"desc": "starts before exon_1 ; ends in exon_1."}),
            Region(110, 200, None, "chr1", "query_5",
                   {"desc": "starts in exon_1 ; ends after exon_1."}),
        ]

        # Expected forward 1 exon
        expected = {
            "query_1": {
                "start_EXON": "1/1",
                "start_INTRON": None,
                "start_Protein_position": 1,
                "end_EXON": "1/1",
                "end_INTRON": None,
                "end_Protein_position": 20
            },
            "query_2": {
                "start_EXON": "1/1",
                "start_INTRON": None,
                "start_Protein_position": 1,
                "end_EXON": "1/1",
                "end_INTRON": None,
                "end_Protein_position": 20
            },
            "query_3": {
                "start_EXON": "1/1",
                "start_INTRON": None,
                "start_Protein_position": 4,
                "end_EXON": "1/1",
                "end_INTRON": None,
                "end_Protein_position": 7
            },
            "query_4": {
                "start_EXON": "1/1",
                "start_INTRON": None,
                "start_Protein_position": 1,
                "end_EXON": "1/1",
                "end_INTRON": None,
                "end_Protein_position": 4
            },
            "query_5": {
                "start_EXON": "1/1",
                "start_INTRON": None,
                "start_Protein_position": 7,
                "end_EXON": "1/1",
                "end_INTRON": None,
                "end_Protein_position": 20
            },
        }
        for query_name, query_res in expected.items():
            for key, val in {
                    "SYMBOL": "gene_1",
                    "Gene": "g_1",
                    "Feature": "tr_1",
                    "Feature_type": "Transcript",
                    "STRAND": "1"
            }.items():
                query_res[key] = val
        # Apply forward strand
        for exon in transcrit_1.children:
            exon.strand = "+"
        for cds in protein_1.children:
            cds.strand = "+"
        transcrit_1.sortChildren()
        protein_1.sortChildren()
        # Asert
        for curr_query in queries:
            annotations = getTranscriptsAnnot(curr_query, [transcrit_1])
            self.assertEqual([expected[curr_query.name]], annotations)

        # Expected reverse 1 exon
        expected = {
            "query_1": {
                "start_EXON": "1/1",
                "start_INTRON": None,
                "start_Protein_position": 1,
                "end_EXON": "1/1",
                "end_INTRON": None,
                "end_Protein_position": 20
            },
            "query_2": {
                "start_EXON": "1/1",
                "start_INTRON": None,
                "start_Protein_position": 1,
                "end_EXON": "1/1",
                "end_INTRON": None,
                "end_Protein_position": 20
            },
            "query_3": {
                "start_EXON": "1/1",
                "start_INTRON": None,
                "start_Protein_position": 14,
                "end_EXON": "1/1",
                "end_INTRON": None,
                "end_Protein_position": 17
            },
            "query_4": {
                "start_EXON": "1/1",
                "start_INTRON": None,
                "start_Protein_position": 17,
                "end_EXON": "1/1",
                "end_INTRON": None,
                "end_Protein_position": 20
            },
            "query_5": {
                "start_EXON": "1/1",
                "start_INTRON": None,
                "start_Protein_position": 1,
                "end_EXON": "1/1",
                "end_INTRON": None,
                "end_Protein_position": 14
            },
        }
        for query_name, query_res in expected.items():
            for key, val in {
                    "SYMBOL": "gene_1",
                    "Gene": "g_1",
                    "Feature": "tr_1",
                    "Feature_type": "Transcript",
                    "STRAND": "-1"
            }.items():
                query_res[key] = val
        # Apply reverse strand
        for exon in transcrit_1.children:
            exon.strand = "-"
        for cds in protein_1.children:
            cds.strand = "-"
        transcrit_1.sortChildren()
        protein_1.sortChildren()
        # Asert
        for curr_query in queries:
            annotations = getTranscriptsAnnot(curr_query, [transcrit_1])
            self.assertEqual([expected[curr_query.name]], annotations)
Ejemplo n.º 7
0
    def testGetTranscriptsAnnot_withoutUTR_threeExons(self):
        exon_1 = Exon(10, 40, "+", "chr1", "fwd_exon_1")
        exon_2 = Exon(91, 150, "+", "chr1", "fwd_exon_2")
        exon_3 = Exon(201, 361, "+", "chr1", "fwd_exon_3")
        cds_1 = CDS(10, 40, "+", "chr1", "fwd_cds_1")
        cds_2 = CDS(91, 150, "+", "chr1", "fwd_cds_2")
        cds_3 = CDS(201, 361, "+", "chr1", "fwd_cds_3")
        gene_1 = Gene(10, 350, None, "chr1", "gene_1", {"id": "g_1"})
        transcrit_1 = Transcript(None,
                                 None,
                                 None,
                                 "chr1",
                                 "transcrit_1", {"id": "tr_1"},
                                 parent=gene_1,
                                 children=[exon_1, exon_2, exon_3])
        protein_1 = Protein(None,
                            None,
                            None,
                            "chr1",
                            "protein_1",
                            children=[cds_1, cds_2, cds_3],
                            transcript=transcrit_1)
        queries = [
            Region(80, 100, None, "chr1", "query_1",
                   {"desc": "starts before exon_2 ; ends in exon_2."}),
            Region(100, 180, None, "chr1", "query_2",
                   {"desc": "starts in exon_2 ; ends after exon_2."}),
            Region(
                91, 150, None, "chr1", "query_3", {
                    "desc":
                    "starts at the start of exon_2 ; ends at the end of exon_2."
                }),
            Region(80, 170, None, "chr1", "query_4",
                   {"desc": "starts before exon_2 ; ends after exon_2."}),
            Region(80, 230, None, "chr1", "query_5",
                   {"desc": "starts before exon_2 ; ends in exon_3."}),
            Region(100, 400, None, "chr1", "query_6",
                   {"desc": "starts in exon_2 ; ends after exon_3."}),
            Region(100, 250, None, "chr1", "query_7",
                   {"desc": "starts in exon_2 ; ends in exon_3."}),
            Region(80, 370, None, "chr1", "query_8",
                   {"desc": "starts before exon_2 ; ends after exon_3."}),
            Region(90, 151, None, "chr1", "query_9", {
                "desc":
                "starts just before exon_2 ; ends just after exon_2."
            })
        ]

        # Expected forward 3 exons
        expected = {
            "query_1": {
                "start_EXON": None,
                "start_INTRON": "1/2",
                "start_Protein_position": 11,
                "end_EXON": "2/3",
                "end_INTRON": None,
                "end_Protein_position": 14
            },
            "query_2": {
                "start_EXON": "2/3",
                "start_INTRON": None,
                "start_Protein_position": 14,
                "end_EXON": None,
                "end_INTRON": "2/2",
                "end_Protein_position": 31
            },
            "query_3": {
                "start_EXON": "2/3",
                "start_INTRON": None,
                "start_Protein_position": 11,
                "end_EXON": "2/3",
                "end_INTRON": None,
                "end_Protein_position": 31
            },
            "query_4": {
                "start_EXON": None,
                "start_INTRON": "1/2",
                "start_Protein_position": 11,
                "end_EXON": None,
                "end_INTRON": "2/2",
                "end_Protein_position": 31
            },
            "query_5": {
                "start_EXON": None,
                "start_INTRON": "1/2",
                "start_Protein_position": 11,
                "end_EXON": "3/3",
                "end_INTRON": None,
                "end_Protein_position": 41
            },
            "query_6": {
                "start_EXON": "2/3",
                "start_INTRON": None,
                "start_Protein_position": 14,
                "end_EXON": "3/3",
                "end_INTRON": None,
                "end_Protein_position": 84
            },
            "query_7": {
                "start_EXON": "2/3",
                "start_INTRON": None,
                "start_Protein_position": 14,
                "end_EXON": "3/3",
                "end_INTRON": None,
                "end_Protein_position": 47
            },
            "query_8": {
                "start_EXON": None,
                "start_INTRON": "1/2",
                "start_Protein_position": 11,
                "end_EXON": "3/3",
                "end_INTRON": None,
                "end_Protein_position": 84
            },
            "query_9": {
                "start_EXON": None,
                "start_INTRON": "1/2",
                "start_Protein_position": 11,
                "end_EXON": None,
                "end_INTRON": "2/2",
                "end_Protein_position": 31
            },
        }
        for query_name, query_res in expected.items():
            for key, val in {
                    "SYMBOL": "gene_1",
                    "Gene": "g_1",
                    "Feature": "tr_1",
                    "Feature_type": "Transcript",
                    "STRAND": "1"
            }.items():
                query_res[key] = val
        # Apply forward strand
        for exon in transcrit_1.children:
            exon.strand = "+"
        for cds in protein_1.children:
            cds.strand = "+"
        transcrit_1.sortChildren()
        protein_1.sortChildren()
        # Assert
        for curr_query in queries:
            annotations = getTranscriptsAnnot(curr_query, [transcrit_1])
            self.assertEqual([expected[curr_query.name]], annotations)

        # Expected reverse 3 exons
        expected = {
            "query_1": {
                "start_EXON": "2/3",
                "start_INTRON": None,
                "start_Protein_position": 71,
                "end_EXON": None,
                "end_INTRON": "2/2",
                "end_Protein_position": 74
            },
            "query_2": {
                "start_EXON": None,
                "start_INTRON": "1/2",
                "start_Protein_position": 54,
                "end_EXON": "2/3",
                "end_INTRON": None,
                "end_Protein_position": 71
            },
            "query_3": {
                "start_EXON": "2/3",
                "start_INTRON": None,
                "start_Protein_position": 54,
                "end_EXON": "2/3",
                "end_INTRON": None,
                "end_Protein_position": 74
            },
            "query_4": {
                "start_EXON": None,
                "start_INTRON": "1/2",
                "start_Protein_position": 54,
                "end_EXON": None,
                "end_INTRON": "2/2",
                "end_Protein_position": 74
            },
            "query_5": {
                "start_EXON": "1/3",
                "start_INTRON": None,
                "start_Protein_position": 44,
                "end_EXON": None,
                "end_INTRON": "2/2",
                "end_Protein_position": 74
            },
            "query_6": {
                "start_EXON": "1/3",
                "start_INTRON": None,
                "start_Protein_position": 1,
                "end_EXON": "2/3",
                "end_INTRON": None,
                "end_Protein_position": 71
            },
            "query_7": {
                "start_EXON": "1/3",
                "start_INTRON": None,
                "start_Protein_position": 38,
                "end_EXON": "2/3",
                "end_INTRON": None,
                "end_Protein_position": 71
            },
            "query_8": {
                "start_EXON": "1/3",
                "start_INTRON": None,
                "start_Protein_position": 1,
                "end_EXON": None,
                "end_INTRON": "2/2",
                "end_Protein_position": 74
            },
            "query_9": {
                "start_EXON": None,
                "start_INTRON": "1/2",
                "start_Protein_position": 54,
                "end_EXON": None,
                "end_INTRON": "2/2",
                "end_Protein_position": 74
            }
        }
        for query_name, query_res in expected.items():
            for key, val in {
                    "SYMBOL": "gene_1",
                    "Gene": "g_1",
                    "Feature": "tr_1",
                    "Feature_type": "Transcript",
                    "STRAND": "-1"
            }.items():
                query_res[key] = val
        # Apply reverse strand
        for exon in transcrit_1.children:
            exon.strand = "-"
        for cds in protein_1.children:
            cds.strand = "-"
        transcrit_1.sortChildren()
        protein_1.sortChildren()
        # Asert
        for curr_query in queries:
            annotations = getTranscriptsAnnot(curr_query, [transcrit_1])
            self.assertEqual([expected[curr_query.name]], annotations)
Ejemplo n.º 8
0
class TestProteinSeq(unittest.TestCase):
    def setUp(self):
        tmp_folder = tempfile.gettempdir()
        unique_id = str(uuid.uuid1())

        # Temporary files
        self.tmp_fasta_idx = os.path.join(tmp_folder, unique_id + ".fasta.fai")
        self.tmp_fasta = os.path.join(tmp_folder, unique_id + ".fasta")

        # Create sequence file
        content_fasta = """>one
ATGCATGCATGCATGCATGCATGCATGCAT
GCATGCATGCATGCATGCATGCATGCATGC
ATGCAT
>two another chromosome
ATGCATGCATGCAT
GCATGCATGCATGC"""
        with open(self.tmp_fasta, "w") as FH_out:
            FH_out.write(content_fasta)

        # Proteins
        # 1 3 5 7 9  11 14 16 19 21        30 33 36 39 41  44 47 50
        # ATGCATGCAT GCATG CATGC ATGCATGCA TGCATGCATGC ATGCATGCATGCATGCATGC
        #            ..... *****           *******....      ......
        # prot_1           12345           6789 11
        tr_1 = Transcript(None,
                          None,
                          "+",
                          "one",
                          children=[
                              Exon(10, 20, "+", "one"),
                              Exon(30, 40, "+", "one"),
                              Exon(45, 50, "+", "one")
                          ])
        self.prot_1 = Protein(16, 36, "+", "one", transcript=tr_1)
        tr_2 = Transcript(None,
                          None,
                          "-",
                          "one",
                          children=[
                              Exon(10, 20, "-", "one"),
                              Exon(30, 40, "-", "one"),
                              Exon(45, 50, "-", "one")
                          ])
        self.prot_2 = Protein(16, 36, "-", "one", transcript=tr_2)

        # Create index
        content_fasta_idx = """one	66	5	30	31
two	28	98	14	15"""
        with open(self.tmp_fasta_idx, "w") as FH_out:
            FH_out.write(content_fasta_idx)

    def tearDown(self):
        # Clean temporary files
        for curr_file in [self.tmp_fasta, self.tmp_fasta_idx]:
            if os.path.exists(curr_file):
                os.remove(curr_file)

    def testGetCodonSeqFromProtPos(self):
        data = [
            {
                "prot": self.prot_1,
                "aa_pos": 1,
                "expected": "CAT"
            },
            {
                "prot": self.prot_1,
                "aa_pos": 2,
                "expected": "GCT"
            },
            {
                "prot": self.prot_1,
                "aa_pos": 4,
                "expected": "TGC"
            },
            {
                "prot": self.prot_2,
                "aa_pos": 1,
                "expected": "GCA"
            },
            {
                "prot": self.prot_2,
                "aa_pos": 3,
                "expected": "AGC"
            },
            {
                "prot": self.prot_2,
                "aa_pos": 4,
                "expected": "ATG"
            },
        ]
        with IdxFastaIO(self.tmp_fasta) as reader:
            for curr in data:
                self.assertEqual(
                    curr["prot"].getCodonSeqFromProtPos(
                        curr["aa_pos"], reader), curr["expected"])
            with self.assertRaises(Exception):
                self.prot_1.getCodonSeqFromProtPos(8, reader)  # Not in protein

    def testGetCodonInfo(self):
        data = [{
            "prot": self.prot_1,
            "ref_pos": 16,
            "expected": (1, 1, "CAT")
        }, {
            "prot": self.prot_1,
            "ref_pos": 18,
            "expected": (1, 3, "CAT")
        }, {
            "prot": self.prot_1,
            "ref_pos": 19,
            "expected": (2, 1, "GCT")
        }, {
            "prot": self.prot_1,
            "ref_pos": 20,
            "expected": (2, 2, "GCT")
        }, {
            "prot": self.prot_1,
            "ref_pos": 34,
            "expected": (4, 1, "TGC")
        }, {
            "prot": self.prot_1,
            "ref_pos": 35,
            "expected": (4, 2, "TGC")
        }, {
            "prot": self.prot_1,
            "ref_pos": 36,
            "expected": (4, 3, "TGC")
        }, {
            "prot": self.prot_2,
            "ref_pos": 36,
            "expected": (1, 1, "GCA")
        }, {
            "prot": self.prot_2,
            "ref_pos": 34,
            "expected": (1, 3, "GCA")
        }, {
            "prot": self.prot_2,
            "ref_pos": 30,
            "expected": (3, 1, "AGC")
        }, {
            "prot": self.prot_2,
            "ref_pos": 20,
            "expected": (3, 2, "AGC")
        }, {
            "prot": self.prot_2,
            "ref_pos": 19,
            "expected": (3, 3, "AGC")
        }, {
            "prot": self.prot_2,
            "ref_pos": 18,
            "expected": (4, 1, "ATG")
        }, {
            "prot": self.prot_2,
            "ref_pos": 16,
            "expected": (4, 3, "ATG")
        }]
        with IdxFastaIO(self.tmp_fasta) as reader:
            for curr in data:
                self.assertEqual(
                    curr["prot"].getCodonInfo(curr["ref_pos"], reader),
                    curr["expected"])
            with self.assertRaises(Exception):
                self.prot_1.getCodonInfo(14, reader)  # In exon but not in CDS
Ejemplo n.º 9
0
 def testGetCodonRefPos(self):
     tr_1 = Transcript(None,
                       None,
                       "+",
                       "chr1",
                       children=[
                           Exon(10, 20, "+", "chr1"),
                           Exon(30, 40, "+", "chr1"),
                           Exon(45, 50, "+", "chr1")
                       ])
     prot_1 = Protein(16, 36, "+", "chr1", transcript=tr_1)
     tr_2 = Transcript(None,
                       None,
                       "-",
                       "chr1",
                       children=[
                           Exon(10, 20, "-", "chr1"),
                           Exon(30, 40, "-", "chr1"),
                           Exon(45, 50, "-", "chr1")
                       ])
     prot_2 = Protein(16, 36, "-", "chr1", transcript=tr_2)
     data = [
         {
             "prot": prot_1,
             "aa_pos": 1,
             "expected": [16, 17, 18]
         },
         {
             "prot": prot_1,
             "aa_pos": 2,
             "expected": [19, 20, 30]
         },
         {
             "prot": prot_1,
             "aa_pos": 3,
             "expected": [31, 32, 33]
         },
         {
             "prot": prot_1,
             "aa_pos": 4,
             "expected": [34, 35, 36]
         },
         {
             "prot": prot_2,
             "aa_pos": 1,
             "expected": [36, 35, 34]
         },
         {
             "prot": prot_2,
             "aa_pos": 2,
             "expected": [33, 32, 31]
         },
         {
             "prot": prot_2,
             "aa_pos": 3,
             "expected": [30, 20, 19]
         },
         {
             "prot": prot_2,
             "aa_pos": 4,
             "expected": [18, 17, 16]
         },
     ]
     for curr in data:
         self.assertEqual(curr["prot"].getCodonRefPos(curr["aa_pos"]),
                          curr["expected"])
         with self.assertRaises(Exception):
             self.prot_1.getCodonRefPos(8)  # Not in protein
Ejemplo n.º 10
0
 def testGetCDSFromTranscript(self):
     res = []
     # One exon forward
     transcript_1 = Transcript(None,
                               None,
                               "+",
                               "chr1",
                               children=[Exon(100, 150, "+", "chr1")])
     res.append({
         "expected": [CDS(100, 150, "+", "chr1")],
         "observed":
         Protein(100, 150, "+", "chr1",
                 transcript=transcript_1).getCDSFromTranscript()
     })
     res.append({
         "expected": [CDS(110, 140, "+", "chr1")],
         "observed":
         Protein(110, 140, "+", "chr1",
                 transcript=transcript_1).getCDSFromTranscript()
     })
     res.append({
         "expected": [CDS(110, 150, "+", "chr1")],
         "observed":
         Protein(110, 150, "+", "chr1",
                 transcript=transcript_1).getCDSFromTranscript()
     })
     res.append({
         "expected": [CDS(100, 140, "+", "chr1")],
         "observed":
         Protein(100, 140, "+", "chr1",
                 transcript=transcript_1).getCDSFromTranscript()
     })
     # Three exons forward
     transcript_2 = Transcript(None,
                               None,
                               "+",
                               "chr1",
                               children=[
                                   Exon(30, 80, "+", "chr1"),
                                   Exon(100, 150, "+", "chr1"),
                                   Exon(170, 200, "+", "chr1")
                               ])
     res.append({
         "expected": [CDS(100, 150, "+", "chr1")],
         "observed":
         Protein(100, 150, "+", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected": [CDS(110, 140, "+", "chr1")],
         "observed":
         Protein(110, 140, "+", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected": [CDS(110, 150, "+", "chr1")],
         "observed":
         Protein(110, 150, "+", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected": [CDS(100, 140, "+", "chr1")],
         "observed":
         Protein(100, 140, "+", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected": [CDS(30, 80, "+", "chr1"),
                      CDS(100, 150, "+", "chr1")],
         "observed":
         Protein(30, 150, "+", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected": [CDS(60, 80, "+", "chr1"),
                      CDS(100, 150, "+", "chr1")],
         "observed":
         Protein(60, 150, "+", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected": [CDS(80, 80, "+", "chr1"),
                      CDS(100, 150, "+", "chr1")],
         "observed":
         Protein(80, 150, "+", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected": [
             CDS(80, 80, "+", "chr1"),
             CDS(100, 150, "+", "chr1"),
             CDS(170, 170, "+", "chr1")
         ],
         "observed":
         Protein(80, 170, "+", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected":
         [CDS(100, 150, "+", "chr1"),
          CDS(170, 200, "+", "chr1")],
         "observed":
         Protein(100, 200, "+", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected":
         [CDS(100, 150, "+", "chr1"),
          CDS(170, 190, "+", "chr1")],
         "observed":
         Protein(100, 190, "+", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected":
         [CDS(110, 150, "+", "chr1"),
          CDS(170, 200, "+", "chr1")],
         "observed":
         Protein(110, 200, "+", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected":
         [CDS(110, 150, "+", "chr1"),
          CDS(170, 190, "+", "chr1")],
         "observed":
         Protein(110, 190, "+", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     # One exon reverse
     transcript_1 = Transcript(None,
                               None,
                               "-",
                               "chr1",
                               children=[Exon(100, 150, "-", "chr1")])
     res.append({
         "expected": [CDS(100, 150, "-", "chr1")],
         "observed":
         Protein(100, 150, "-", "chr1",
                 transcript=transcript_1).getCDSFromTranscript()
     })
     res.append({
         "expected": [CDS(110, 140, "-", "chr1")],
         "observed":
         Protein(110, 140, "-", "chr1",
                 transcript=transcript_1).getCDSFromTranscript()
     })
     res.append({
         "expected": [CDS(110, 150, "-", "chr1")],
         "observed":
         Protein(110, 150, "-", "chr1",
                 transcript=transcript_1).getCDSFromTranscript()
     })
     res.append({
         "expected": [CDS(100, 140, "-", "chr1")],
         "observed":
         Protein(100, 140, "-", "chr1",
                 transcript=transcript_1).getCDSFromTranscript()
     })
     # Three exons reverse
     transcript_2 = Transcript(None,
                               None,
                               "-",
                               "chr1",
                               children=[
                                   Exon(170, 200, "-", "chr1"),
                                   Exon(100, 150, "-", "chr1"),
                                   Exon(30, 80, "-", "chr1")
                               ])
     res.append({
         "expected": [CDS(100, 150, "-", "chr1")],
         "observed":
         Protein(100, 150, "-", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected": [CDS(110, 140, "-", "chr1")],
         "observed":
         Protein(110, 140, "-", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected": [CDS(110, 150, "-", "chr1")],
         "observed":
         Protein(110, 150, "-", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected": [CDS(100, 140, "-", "chr1")],
         "observed":
         Protein(100, 140, "-", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected": [CDS(100, 150, "-", "chr1"),
                      CDS(30, 80, "-", "chr1")],
         "observed":
         Protein(30, 150, "-", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected": [CDS(100, 150, "-", "chr1"),
                      CDS(60, 80, "-", "chr1")],
         "observed":
         Protein(60, 150, "-", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected": [CDS(100, 150, "-", "chr1"),
                      CDS(80, 80, "-", "chr1")],
         "observed":
         Protein(80, 150, "-", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected": [
             CDS(170, 170, "-", "chr1"),
             CDS(100, 150, "-", "chr1"),
             CDS(80, 80, "-", "chr1")
         ],
         "observed":
         Protein(80, 170, "-", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected":
         [CDS(170, 200, "-", "chr1"),
          CDS(100, 150, "-", "chr1")],
         "observed":
         Protein(100, 200, "-", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected":
         [CDS(170, 190, "-", "chr1"),
          CDS(100, 150, "-", "chr1")],
         "observed":
         Protein(100, 190, "-", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected":
         [CDS(170, 200, "-", "chr1"),
          CDS(110, 150, "-", "chr1")],
         "observed":
         Protein(110, 200, "-", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     res.append({
         "expected":
         [CDS(170, 190, "-", "chr1"),
          CDS(110, 150, "-", "chr1")],
         "observed":
         Protein(110, 190, "-", "chr1",
                 transcript=transcript_2).getCDSFromTranscript()
     })
     # Launch evaluation
     for eval_pair in res:
         self.assertEqual(
             ", ".join([
                 curr_cds.getCoordinatesStr()
                 for curr_cds in eval_pair["expected"]
             ]),
             ", ".join([
                 curr_cds.getCoordinatesStr()
                 for curr_cds in eval_pair["observed"]
             ]),
         )
Ejemplo n.º 11
0
    def setUp(self):
        tmp_folder = tempfile.gettempdir()
        unique_id = str(uuid.uuid1())

        # Temporary files
        self.tmp_ensembl_in_gtf = os.path.join(tmp_folder,
                                               unique_id + "_ensembl_in.gtf")
        self.tmp_ensembl_out_gtf = os.path.join(tmp_folder,
                                                unique_id + "_ensembl_out.gtf")
        self.tmp_ncbi_in_gtf = os.path.join(tmp_folder,
                                            unique_id + "_ncbi_in.gtf")
        self.tmp_ncbi_out_gtf = os.path.join(tmp_folder,
                                             unique_id + "_ncbi_out.gtf")

        # Ensembl GTF
        self.ensembl_expected = [
            Gene(54770583, 54771134, "+", "6", "KRASP1", {
                "id": "ENSG00000220635"
            }, None, [
                Transcript(
                    54770583, 54771134, "+", "6", "KRASP1-201",
                    {"id": "ENST00000407852"}, None,
                    [Exon(54770583, 54771134, "+", "6", "ENST00000407852_e1")])
            ]),
            Gene(
                25204789, 25250936, "-", "12", "KRAS",
                {"id": "ENSG00000133703"}, None, [
                    Transcript(
                        25204789, 25250931, "-", "12", "KRAS-202",
                        {"id": "ENST00000311936"}, None, [
                            Exon(25250751, 25250931, "-", "12",
                                 "ENST00000311936_e1"),
                            Exon(25245274, 25245395, "-", "12",
                                 "ENST00000311936_e2"),
                            Exon(25227234, 25227412, "-", "12",
                                 "ENST00000311936_e3"),
                            Exon(25225614, 25225773, "-", "12",
                                 "ENST00000311936_e5"),
                            Exon(25204789, 25209911, "-", "12",
                                 "ENST00000311936_e6")
                        ], [
                            Protein(25209798, 25245384, "-", "12", None, None,
                                    None, [
                                        CDS(25245274, 25245384, "-", "12", ""),
                                        CDS(25227234, 25227412, "-", "12", ""),
                                        CDS(25225614, 25225773, "-", "12", ""),
                                        CDS(25209798, 25209911, "-", "12", "")
                                    ])
                        ]),
                    Transcript(
                        25209168, 25250936, "-", "12", "KRAS-204",
                        {"id": "ENST00000557334"}, None, [
                            Exon(25250751, 25250936, "-", "12",
                                 "ENST00000557334_e1"),
                            Exon(25245274, 25245395, "-", "12",
                                 "ENST00000557334_e2"),
                            Exon(25209168, 25209911, "-", "12",
                                 "ENST00000557334_e3")
                        ], [
                            Protein(25209798, 25245384, "-", "12", None, None,
                                    None, [
                                        CDS(25245274, 25245384, "-", "12", ""),
                                        CDS(25209798, 25209911, "-", "12", "")
                                    ])
                        ]),
                    Transcript(
                        25209431, 25250803, "-", "12", "KRAS-201",
                        {"id": "ENST00000256078"}, None, [
                            Exon(25250751, 25250803, "-", "12",
                                 "ENST00000256078_e1"),
                            Exon(25245274, 25245395, "-", "12",
                                 "ENST00000256078_e2"),
                            Exon(25227234, 25227412, "-", "12",
                                 "ENST00000256078_e3"),
                            Exon(25225614, 25225773, "-", "12",
                                 "ENST00000256078_e4"),
                            Exon(25215437, 25215560, "-", "12",
                                 "ENST00000256078_e5"),
                            Exon(25209431, 25209911, "-", "12",
                                 "ENST00000256078_e6")
                        ], [
                            Protein(25215444, 25245384, "-", "12", None, None,
                                    None, [
                                        CDS(25245274, 25245384, "-", "12", ""),
                                        CDS(25227234, 25227412, "-", "12", ""),
                                        CDS(25225614, 25225773, "-", "12", ""),
                                        CDS(25215444, 25215560, "-", "12", "")
                                    ])
                        ]),
                    Transcript(
                        25233819, 25250929, "-", "12", "KRAS-203",
                        {"id": "ENST00000556131"}, None, [
                            Exon(25250764, 25250929, "-", "12",
                                 "ENST00000556131_e1"),
                            Exon(25245274, 25245395, "-", "12",
                                 "ENST00000556131_e2"),
                            Exon(25233819, 25235226, "-", "12",
                                 "ENST00000556131_e3")
                        ], [
                            Protein(25235209, 25245384, "-", "12", None, None,
                                    None, [
                                        CDS(25245274, 25245384, "-", "12", ""),
                                        CDS(25235209, 25235226, "-", "12", "")
                                    ])
                        ])
                ])
        ]
        with open(self.tmp_ensembl_in_gtf, "w") as FH_gtf:
            FH_gtf.write("""#!genome-build GRCh38.p12
#!genome-version GRCh38
#!genome-date 2013-12
#!genome-build-accession NCBI:GCA_000001405.27
#!genebuild-last-updated 2018-07
6	havana	gene	54770583	54771134	.	+	.	gene_id \"ENSG00000220635\"; gene_version \"2\"; gene_name \"KRASP1\"; gene_source \"havana\"; gene_biotype \"processed_pseudogene\";
6	havana	transcript	54770583	54771134	.	+	.	gene_id \"ENSG00000220635\"; gene_version \"2\"; transcript_id \"ENST00000407852\"; transcript_version \"2\"; gene_name \"KRASP1\"; gene_source \"havana\"; gene_biotype \"processed_pseudogene\"; transcript_name \"KRASP1-201\"; transcript_source \"havana\"; transcript_biotype \"processed_pseudogene\"; tag \"basic\"; transcript_support_level \"NA\";
6	havana	exon	54770583	54771134	.	+	.	gene_id \"ENSG00000220635\"; gene_version \"2\"; transcript_id \"ENST00000407852\"; transcript_version \"2\"; exon_number \"1\"; gene_name \"KRASP1\"; gene_source \"havana\"; gene_biotype \"processed_pseudogene\"; transcript_name \"KRASP1-201\"; transcript_source \"havana\"; transcript_biotype \"processed_pseudogene\"; exon_id \"ENSE00001550689\"; exon_version \"2\"; tag \"basic\"; transcript_support_level \"NA\";
12	ensembl_havana	gene	25204789	25250936	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\";
12	ensembl_havana	transcript	25204789	25250931	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000311936\"; transcript_version \"7\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-202\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8702\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	exon	25250751	25250931	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000311936\"; transcript_version \"7\"; exon_number \"1\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-202\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8702\"; exon_id \"ENSE00001189804\"; exon_version \"4\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	exon	25245274	25245395	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000311936\"; transcript_version \"7\"; exon_number \"2\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-202\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8702\"; exon_id \"ENSE00000936617\"; exon_version \"1\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	CDS	25245274	25245384	.	-	0	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000311936\"; transcript_version \"7\"; exon_number \"2\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-202\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8702\"; protein_id \"ENSP00000308495\"; protein_version \"3\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	start_codon	25245382	25245384	.	-	0	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000311936\"; transcript_version \"7\"; exon_number \"2\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-202\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8702\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	exon	25227234	25227412	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000311936\"; transcript_version \"7\"; exon_number \"3\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-202\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8702\"; exon_id \"ENSE00001719809\"; exon_version \"1\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	CDS	25227234	25227412	.	-	0	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000311936\"; transcript_version \"7\"; exon_number \"3\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-202\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8702\"; protein_id \"ENSP00000308495\"; protein_version \"3\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	exon	25225614	25225773	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000311936\"; transcript_version \"7\"; exon_number \"4\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-202\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8702\"; exon_id \"ENSE00001644818\"; exon_version \"1\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	CDS	25225614	25225773	.	-	1	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000311936\"; transcript_version \"7\"; exon_number \"4\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-202\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8702\"; protein_id \"ENSP00000308495\"; protein_version \"3\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	exon	25204789	25209911	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000311936\"; transcript_version \"7\"; exon_number \"5\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-202\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8702\"; exon_id \"ENSE00002456976\"; exon_version \"1\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	CDS	25209798	25209911	.	-	0	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000311936\"; transcript_version \"7\"; exon_number \"5\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-202\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8702\"; protein_id \"ENSP00000308495\"; protein_version \"3\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	stop_codon	25209795	25209797	.	-	0	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000311936\"; transcript_version \"7\"; exon_number \"5\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-202\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8702\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	five_prime_utr	25250751	25250931	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000311936\"; transcript_version \"7\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-202\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8702\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	five_prime_utr	25245385	25245395	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000311936\"; transcript_version \"7\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-202\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8702\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	three_prime_utr	25204789	25209794	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000311936\"; transcript_version \"7\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-202\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8702\"; tag \"basic\"; transcript_support_level \"1\";
12	havana	transcript	25209168	25250936	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000557334\"; transcript_version \"5\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-204\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; tag \"basic\"; transcript_support_level \"5\";
12	havana	exon	25250751	25250936	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000557334\"; transcript_version \"5\"; exon_number \"1\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-204\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00002446502\"; exon_version \"1\"; tag \"basic\"; transcript_support_level \"5\";
12	havana	exon	25245274	25245395	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000557334\"; transcript_version \"5\"; exon_number \"2\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-204\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000936617\"; exon_version \"1\"; tag \"basic\"; transcript_support_level \"5\";
12	havana	CDS	25245274	25245384	.	-	0	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000557334\"; transcript_version \"5\"; exon_number \"2\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-204\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000452512\"; protein_version \"1\"; tag \"basic\"; transcript_support_level \"5\";
12	havana	start_codon	25245382	25245384	.	-	0	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000557334\"; transcript_version \"5\"; exon_number \"2\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-204\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; tag \"basic\"; transcript_support_level \"5\";
12	havana	exon	25209168	25209911	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000557334\"; transcript_version \"5\"; exon_number \"3\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-204\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00002464674\"; exon_version \"1\"; tag \"basic\"; transcript_support_level \"5\";
12	havana	CDS	25209798	25209911	.	-	0	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000557334\"; transcript_version \"5\"; exon_number \"3\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-204\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000452512\"; protein_version \"1\"; tag \"basic\"; transcript_support_level \"5\";
12	havana	stop_codon	25209795	25209797	.	-	0	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000557334\"; transcript_version \"5\"; exon_number \"3\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-204\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; tag \"basic\"; transcript_support_level \"5\";
12	havana	five_prime_utr	25250751	25250936	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000557334\"; transcript_version \"5\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-204\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; tag \"basic\"; transcript_support_level \"5\";
12	havana	five_prime_utr	25245385	25245395	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000557334\"; transcript_version \"5\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-204\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; tag \"basic\"; transcript_support_level \"5\";
12	havana	three_prime_utr	25209168	25209794	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000557334\"; transcript_version \"5\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-204\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; tag \"basic\"; transcript_support_level \"5\";
12	ensembl_havana	transcript	25209431	25250803	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000256078\"; transcript_version \"8\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-201\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8703\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	exon	25250751	25250803	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000256078\"; transcript_version \"8\"; exon_number \"1\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-201\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8703\"; exon_id \"ENSE00002513959\"; exon_version \"1\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	exon	25245274	25245395	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000256078\"; transcript_version \"8\"; exon_number \"2\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-201\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8703\"; exon_id \"ENSE00000936617\"; exon_version \"1\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	CDS	25245274	25245384	.	-	0	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000256078\"; transcript_version \"8\"; exon_number \"2\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-201\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8703\"; protein_id \"ENSP00000256078\"; protein_version \"4\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	start_codon	25245382	25245384	.	-	0	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000256078\"; transcript_version \"8\"; exon_number \"2\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-201\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8703\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	exon	25227234	25227412	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000256078\"; transcript_version \"8\"; exon_number \"3\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-201\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8703\"; exon_id \"ENSE00001719809\"; exon_version \"1\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	CDS	25227234	25227412	.	-	0	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000256078\"; transcript_version \"8\"; exon_number \"3\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-201\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8703\"; protein_id \"ENSP00000256078\"; protein_version \"4\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	exon	25225614	25225773	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000256078\"; transcript_version \"8\"; exon_number \"4\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-201\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8703\"; exon_id \"ENSE00001644818\"; exon_version \"1\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	CDS	25225614	25225773	.	-	1	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000256078\"; transcript_version \"8\"; exon_number \"4\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-201\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8703\"; protein_id \"ENSP00000256078\"; protein_version \"4\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	exon	25215437	25215560	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000256078\"; transcript_version \"8\"; exon_number \"5\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-201\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8703\"; exon_id \"ENSE00001189807\"; exon_version \"5\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	CDS	25215444	25215560	.	-	0	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000256078\"; transcript_version \"8\"; exon_number \"5\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-201\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8703\"; protein_id \"ENSP00000256078\"; protein_version \"4\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	stop_codon	25215441	25215443	.	-	0	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000256078\"; transcript_version \"8\"; exon_number \"5\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-201\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8703\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	exon	25209431	25209911	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000256078\"; transcript_version \"8\"; exon_number \"6\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-201\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8703\"; exon_id \"ENSE00002477035\"; exon_version \"1\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	five_prime_utr	25250751	25250803	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000256078\"; transcript_version \"8\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-201\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8703\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	five_prime_utr	25245385	25245395	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000256078\"; transcript_version \"8\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-201\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8703\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	three_prime_utr	25215437	25215440	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000256078\"; transcript_version \"8\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-201\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8703\"; tag \"basic\"; transcript_support_level \"1\";
12	ensembl_havana	three_prime_utr	25209431	25209911	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000256078\"; transcript_version \"8\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-201\"; transcript_source \"ensembl_havana\"; transcript_biotype \"protein_coding\"; tag \"CCDS\"; ccds_id \"CCDS8703\"; tag \"basic\"; transcript_support_level \"1\";
12	havana	transcript	25233819	25250929	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000556131\"; transcript_version \"1\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-203\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; tag \"basic\"; transcript_support_level \"1\";
12	havana	exon	25250764	25250929	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000556131\"; transcript_version \"1\"; exon_number \"1\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-203\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00002530521\"; exon_version \"1\"; tag \"basic\"; transcript_support_level \"1\";
12	havana	exon	25245274	25245395	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000556131\"; transcript_version \"1\"; exon_number \"2\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-203\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00000936617\"; exon_version \"1\"; tag \"basic\"; transcript_support_level \"1\";
12	havana	CDS	25245274	25245384	.	-	0	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000556131\"; transcript_version \"1\"; exon_number \"2\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-203\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000451856\"; protein_version \"1\"; tag \"basic\"; transcript_support_level \"1\";
12	havana	start_codon	25245382	25245384	.	-	0	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000556131\"; transcript_version \"1\"; exon_number \"2\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-203\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; tag \"basic\"; transcript_support_level \"1\";
12	havana	exon	25233819	25235226	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000556131\"; transcript_version \"1\"; exon_number \"3\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-203\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; exon_id \"ENSE00002478081\"; exon_version \"1\"; tag \"basic\"; transcript_support_level \"1\";
12	havana	CDS	25235209	25235226	.	-	0	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000556131\"; transcript_version \"1\"; exon_number \"3\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-203\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; protein_id \"ENSP00000451856\"; protein_version \"1\"; tag \"basic\"; transcript_support_level \"1\";
12	havana	stop_codon	25235206	25235208	.	-	0	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000556131\"; transcript_version \"1\"; exon_number \"3\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-203\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; tag \"basic\"; transcript_support_level \"1\";
12	havana	five_prime_utr	25250764	25250929	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000556131\"; transcript_version \"1\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-203\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; tag \"basic\"; transcript_support_level \"1\";
12	havana	five_prime_utr	25245385	25245395	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000556131\"; transcript_version \"1\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-203\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; tag \"basic; other\"; transcript_support_level \"1\";
12	havana	three_prime_utr	25233819	25235205	.	-	.	gene_id \"ENSG00000133703\"; gene_version \"11\"; transcript_id \"ENST00000556131\"; transcript_version \"1\"; gene_name \"KRAS\"; gene_source \"ensembl_havana\"; gene_biotype \"protein_coding\"; transcript_name \"KRAS-203\"; transcript_source \"havana\"; transcript_biotype \"protein_coding\"; tag \"basic;\"; transcript_support_level \"1\";"""
                         )

        # NCBI GTF
        self.ncbi_expected = [
            Gene(54635272, 54640529, "+", "6", "KRASP1", {"id": "3844"}, None,
                 [
                     Transcript(
                         54635272, 54640529, "+", "6", "gene14201",
                         {"id": "gene14201"}, None,
                         [Exon(54635272, 54640529, "+", "6", "gene14201_e1")])
                 ]),
            Gene(25357723, 25403865, "-", "12", "KRAS", {"id": "3845"}, None, [
                Transcript(
                    25357723, 25403865, "-", "12", "rna36549",
                    {"id": "rna36549"}, None, [
                        Exon(25357723, 25362845, "-", "12", "rna36549_e1"),
                        Exon(25378548, 25378707, "-", "12", "rna36549_e2"),
                        Exon(25380168, 25380346, "-", "12", "rna36549_e3"),
                        Exon(25398208, 25398329, "-", "12", "rna36549_e5"),
                        Exon(25403685, 25403865, "-", "12", "rna36549_e6")
                    ], [
                        Protein(25362729, 25398318, "-", "12", None, None,
                                None, [
                                    CDS(25362729, 25362845, "-", "12", ""),
                                    CDS(25378548, 25378707, "-", "12", ""),
                                    CDS(25380168, 25380346, "-", "12", ""),
                                    CDS(25398208, 25398318, "-", "12", "")
                                ])
                    ]),
                Transcript(25357723, 25403865, "-", "12", "rna36550", {
                    "id": "rna36550"
                }, None, [
                    Exon(25357723, 25362845, "-", "12", "rna36550_e1"),
                    Exon(25368371, 25368494, "-", "12", "rna36550_e2"),
                    Exon(25378548, 25378707, "-", "12", "rna36550_e3"),
                    Exon(25380168, 25380346, "-", "12", "rna36550_e4"),
                    Exon(25398208, 25398329, "-", "12", "rna36550_e5"),
                    Exon(25403685, 25403865, "-", "12", "rna36550_e6")
                ], [
                    Protein(25368375, 25398318, "-", "12", None, None, None, [
                        CDS(25368375, 25368494, "-", "12", ""),
                        CDS(25378548, 25378707, "-", "12", ""),
                        CDS(25380168, 25380346, "-", "12", ""),
                        CDS(25398208, 25398318, "-", "12", ""),
                    ])
                ])
            ]),
            Gene(2527306, 2529079, "+", "X", "CD99P1", {"id": "401577"}, None,
                 [
                     Transcript(
                         2527306, 2529079, "+", "X", "rna58916",
                         {"id": "rna58916"}, None, [
                             Exon(2527306, 2527522, "+", "X", "rna58916_e1"),
                             Exon(2529037, 2529079, "+", "X", "rna58916_e2")
                         ])
                 ]),
            Gene(2477306, 2479079, "+", "Y", "CD99P1", {"id": "401577"}, None,
                 [
                     Transcript(
                         2477306, 2479079, "+", "Y", "rna61353",
                         {"id": "rna61353"}, None, [
                             Exon(2477306, 2477522, "+", "Y", "rna61353_e1"),
                             Exon(2479037, 2479079, "+", "Y", "rna61353_e2")
                         ])
                 ])
        ]
        with open(self.tmp_ncbi_in_gtf, "w") as FH_gtf:
            FH_gtf.write(
                """6	Curated Genomic	exon	54635272	54640529	.	+	.	transcript_id \"gene14201\"; gene_id \"3844\"; gene_name \"KRASP1\";
12	BestRefSeq	exon	25357723	25362845	.	-	.	transcript_id \"rna36549\"; gene_id \"3845\"; gene_name \"KRAS\";
12	BestRefSeq	exon	25378548	25378707	.	-	.	transcript_id \"rna36549\"; gene_id \"3845\"; gene_name \"KRAS\";
12	BestRefSeq	exon	25380168	25380346	.	-	.	transcript_id \"rna36549\"; gene_id \"3845\"; gene_name \"KRAS\";
12	BestRefSeq	exon	25398208	25398329	.	-	.	transcript_id \"rna36549\"; gene_id \"3845\"; gene_name \"KRAS\";
12	BestRefSeq	exon	25403685	25403865	.	-	.	transcript_id \"rna36549\"; gene_id \"3845\"; gene_name \"KRAS\";
12	BestRefSeq	CDS	25362729	25362845	.	-	0	transcript_id \"rna36549\"; gene_id \"3845\"; gene_name \"KRAS\";
12	BestRefSeq	CDS	25378548	25378707	.	-	1	transcript_id \"rna36549\"; gene_id \"3845\"; gene_name \"KRAS\";
12	BestRefSeq	CDS	25380168	25380346	.	-	0	transcript_id \"rna36549\"; gene_id \"3845\"; gene_name \"KRAS\";
12	BestRefSeq	CDS	25398208	25398318	.	-	0	transcript_id \"rna36549\"; gene_id \"3845\"; gene_name \"KRAS\";
12	BestRefSeq	exon	25357723	25362845	.	-	.	transcript_id \"rna36550\"; gene_id \"3845\"; gene_name \"KRAS\";
12	BestRefSeq	exon	25368371	25368494	.	-	.	transcript_id \"rna36550\"; gene_id \"3845\"; gene_name \"KRAS\";
12	BestRefSeq	exon	25378548	25378707	.	-	.	transcript_id \"rna36550\"; gene_id \"3845\"; gene_name \"KRAS\";
12	BestRefSeq	exon	25380168	25380346	.	-	.	transcript_id \"rna36550\"; gene_id \"3845\"; gene_name \"KRAS\";
12	BestRefSeq	exon	25398208	25398329	.	-	.	transcript_id \"rna36550\"; gene_id \"3845\"; gene_name \"KRAS\";
12	BestRefSeq	exon	25403685	25403865	.	-	.	transcript_id \"rna36550\"; gene_id \"3845\"; gene_name \"KRAS\";
12	BestRefSeq	CDS	25368375	25368494	.	-	0	transcript_id \"rna36550\"; gene_id \"3845\"; gene_name \"KRAS\";
12	BestRefSeq	CDS	25378548	25378707	.	-	1	transcript_id \"rna36550\"; gene_id \"3845\"; gene_name \"KRAS\";
12	BestRefSeq	CDS	25380168	25380346	.	-	0	transcript_id \"rna36550\"; gene_id \"3845\"; gene_name \"KRAS\";
12	BestRefSeq	CDS	25398208	25398318	.	-	0	transcript_id \"rna36550\"; gene_id \"3845\"; gene_name \"KRAS\";
X	BestRefSeq	exon	2527306	2527522	.	+	.	transcript_id \"rna58916\"; gene_id \"401577\"; gene_name \"CD99P1\";
X	BestRefSeq	exon	2529037	2529079	.	+	.	transcript_id \"rna58916\"; gene_id \"401577\"; gene_name \"CD99P1\";
Y	BestRefSeq	exon	2477306	2477522	.	+	.	transcript_id \"rna61353\"; gene_id \"401577\"; gene_name \"CD99P1\";
Y	BestRefSeq	exon	2479037	2479079	.	+	.	transcript_id \"rna61353\"; gene_id \"401577\"; gene_name \"CD99P1\";"""
            )