예제 #1
0
def construct_gene_object(ensembl, transcript_id):
    """ creates an Transcript object for a gene from ensembl databases
    
    Args:
        ensembl: EnsemblRequest object to request data from ensembl
        transcript_id: string for an Ensembl transcript ID
    
    Returns:
        a Transcript object, containing transcript coordinates and gene and
        transcript sequence.
    
    Raises:
        ValueError if CDS from genomic sequence given gene coordinates and CDS
        retrieved from Ensembl do not match.
    """

    # get the sequence for the identified transcript
    (chrom, start, end, strand,
     genomic_sequence) = ensembl.get_genomic_seq_for_transcript(transcript_id,
                                                                expand=10)
    cds_sequence = ensembl.get_cds_seq_for_transcript(transcript_id)

    # get the locations of the exons and cds from ensembl
    cds_ranges = ensembl.get_cds_ranges_for_transcript(transcript_id)
    exon_ranges = ensembl.get_exon_ranges_for_transcript(transcript_id)

    # start a Transcript object with the locations and sequence
    transcript = Transcript(transcript_id, chrom, start, end, strand)
    transcript.set_exons(exon_ranges, cds_ranges)
    transcript.set_cds(cds_ranges)

    transcript.add_cds_sequence(cds_sequence)
    transcript.add_genomic_sequence(genomic_sequence, offset=10)

    return transcript
예제 #2
0
 def test_set_cds(self):
     """ test that set_cds() works correctly
     """
     
     exons = [(0, 200), (800, 1000)]
     cds = [(100, 200), (800, 900)]
     
     # make sure we raise an error if we try to set the CDS before the exons
     with self.assertRaises(ValueError):
         tx = Transcript('test', '1', 0, 1000, '+')
         tx.set_cds(cds)
     
     # check CDS positions
     self.gene.set_exons(exons, cds)
     self.gene.set_cds(cds)
     self.assertEqual(self.gene.get_cds(), [{'start': 100, 'end': 200}, {'start': 800, 'end': 900}])
     
     # check that CDS ends outside an exon are corrected
     exons = [(0, 200), (300, 400), (800, 1000)]
     cds = [(100, 200), (300, 402)]
     self.gene.set_exons(exons, cds)
     self.gene.set_cds(cds)
     self.assertEqual(self.gene.get_cds(), [{'start': 100, 'end': 200},
         {'start': 300, 'end': 400}, {'start': 800, 'end': 802}])
     
     cds = [(298, 400), (800, 1000)]
     self.gene.set_exons(exons, cds)
     self.gene.set_cds(cds)
     self.assertEqual(self.gene.get_cds(), [{'start': 198, 'end': 200},
         {'start': 300, 'end': 400}, {'start': 800, 'end': 1000}])
예제 #3
0
async def construct_gene_object(ensembl, transcript_id):
    """ creates an Transcript object for a gene from ensembl databases
    
    Args:
        ensembl: EnsemblRequest object to request data from ensembl
        transcript_id: string for an Ensembl transcript ID
    
    Returns:
        a Transcript object, containing transcript coordinates and gene and
        transcript sequence.
    
    Raises:
        ValueError if CDS from genomic sequence given gene coordinates and CDS
        retrieved from Ensembl do not match.
    """
    tasks = [
        get_genomic_seq_for_transcript(ensembl, transcript_id, expand=10),
        get_cds_seq_for_transcript(ensembl, transcript_id),
        get_cds_ranges_for_transcript(ensembl, transcript_id),
        get_exon_ranges_for_transcript(ensembl, transcript_id)
    ]
    (chrom, start, end, strand,
     genomic), cds_seq, cds, exons = await asyncio.gather(*tasks)

    # start a Transcript object with the locations and sequence
    transcript = Transcript(transcript_id, chrom, start, end, strand)
    transcript.set_exons(exons, cds)
    transcript.set_cds(cds)
    transcript.add_cds_sequence(cds_seq)
    transcript.add_genomic_sequence(genomic, offset=10)

    return transcript
예제 #4
0
 def construct_gene(self, name='TEST', chrom='1', start=100, end=179,
         strand='+', exons=[(100, 119), (160, 179)],
         cds=[(110, 119), (160, 170)]):
     
     tx = Transcript(name, chrom, start, end, strand)
     tx.set_exons(exons, cds)
     tx.set_cds(cds)
     
     return tx
예제 #5
0
def construct_gene_object(ensembl, transcript_id):
    """ creates an Transcript object for a gene from ensembl databases
    
    Args:
        ensembl: EnsemblRequest object to request data from ensembl
        transcript_id: string for an Ensembl transcript ID
    
    Returns:
        a Transcript object, containing transcript coordinates and gene and
        transcript sequence.
    
    Raises:
        ValueError if CDS from genomic sequence given gene coordinates and CDS
        retrieved from Ensembl do not match.
    """
    
    # get the sequence for the identified transcript
    (chrom, start, end, strand, genomic_sequence) = ensembl.get_genomic_seq_for_transcript(transcript_id, expand=10)
    cds_sequence = ensembl.get_cds_seq_for_transcript(transcript_id)
    
    # get the locations of the exons and cds from ensembl
    cds_ranges = ensembl.get_cds_ranges_for_transcript(transcript_id)
    exon_ranges = ensembl.get_exon_ranges_for_transcript(transcript_id)
    
    # start a Transcript object with the locations and sequence
    transcript = Transcript(transcript_id, chrom, start, end, strand)
    transcript.set_exons(exon_ranges, cds_ranges)
    transcript.set_cds(cds_ranges)
    
    transcript.add_cds_sequence(cds_sequence)
    transcript.add_genomic_sequence(genomic_sequence, offset=10)
    
    return transcript
예제 #6
0
 def test_merge_coordinates(self):
     """ test that we can merge transcripts with odd overlaps
     """
     
     a = Transcript("a", "1", 10, 20, "+")
     
     exons1 = [{'start': 10, 'end': 20}, {'start': 25, 'end': 40}]
     exons2 = [{'start': 10, 'end': 30}]
     
     self.assertEqual(a.merge_coordinates(exons1, exons2),
         a.merge_coordinates(exons2, exons1))
예제 #7
0
    def test_add_genomic_sequencE_without_cds_coords(self):
        """ test that error is raised if we add gDNA without CDS coords
        """

        a = Transcript("a", "1", 10, 20, "+")
        a.set_exons([(10, 20)], [(10, 20)])

        with self.assertRaises(ValueError):
            a.add_genomic_sequence('CGTAGACTGTACGCATCGATT', offset=5)

        a.set_cds([(10, 20)])
        a.add_genomic_sequence('CGTAGACTGTACGCATCGATT', offset=5)
예제 #8
0
    def test_fix_cds_boundary(self):
        """ test that _fix_out_of_exon_cds_boundary() works correctly
        """

        exons = [(1100, 1200), (1300, 1400), (1800, 1900)]
        cds = [(1300, 1400)]

        tx = Transcript('test', '1', 0, 1000, '+')

        tx.set_exons(exons, cds)

        self.assertEqual(tx.fix_cds_boundary(1295), {
            'start': 1195,
            'end': 1200
        })
        self.assertEqual(tx.fix_cds_boundary(1205), {
            'start': 1300,
            'end': 1305
        })

        self.assertEqual(tx.fix_cds_boundary(1402), {
            'start': 1800,
            'end': 1802
        })
        self.assertEqual(tx.fix_cds_boundary(1798), {
            'start': 1398,
            'end': 1400
        })

        # raise an error if the position is within the exons
        with self.assertRaises(ValueError):
            self.gene.fix_cds_boundary(1105)
 def test_add_genomic_sequencE_without_cds_coords(self):
     """ test that error is raised if we add gDNA without CDS coords
     """
     
     a = Transcript("a", "1", 10, 20, "+")
     a.set_exons([(10, 20)], [(10, 20)])
     
     with self.assertRaises(ValueError):
         a.add_genomic_sequence('CGTAGACTGTACGCATCGATT', offset=5)
     
     a.set_cds([(10, 20)])
     a.add_genomic_sequence('CGTAGACTGTACGCATCGATT', offset=5)
예제 #10
0
 def construct_gene(self):
     
     chrom = "1"
     name = "TEST"
     strand = "+"
     start = 0
     end = 70
     exons = [(5, 58)]
     cds = [(5, 58)]
     transcript = Transcript(name, chrom, start, end, strand)
     transcript.set_exons(exons, cds)
     transcript.set_cds(cds)
     
     cds = "ATGTGGGCTCCACCAGCAGCAATCATGGGATGGGCCCACCAAGAAGGTGGGTAA"
     gdna = "GGGGGATGTGGGCTCCACCAGCAGCAATCATGGGATGGGCCCACCAAGAAGGTGGGTAACCAGGCCCC"
     transcript.add_cds_sequence(cds)
     transcript.add_genomic_sequence(gdna)
     
     return transcript
예제 #11
0
 def test_fix_cds_boundary(self):
     """ test that _fix_out_of_exon_cds_boundary() works correctly
     """
     
     exons = [(1100, 1200), (1300, 1400), (1800, 1900)]
     cds = [(1300, 1400)]
     
     tx = Transcript('test', '1', 0, 1000, '+')
     
     tx.set_exons(exons, cds)
     
     self.assertEqual(tx.fix_cds_boundary(1295), {'start': 1195, 'end': 1200})
     self.assertEqual(tx.fix_cds_boundary(1205), {'start': 1300, 'end': 1305})
     
     self.assertEqual(tx.fix_cds_boundary(1402), {'start': 1800, 'end': 1802})
     self.assertEqual(tx.fix_cds_boundary(1798), {'start': 1398, 'end': 1400})
     
     # raise an error if the position is within the exons
     with self.assertRaises(ValueError):
         self.gene.fix_cds_boundary(1105)
예제 #12
0
 def test_get_de_novos_in_transcript(self):
     """ test that we can identify de novos within the CDS of a transcript
     """
     
     exon_ranges = [(10, 20), (30, 40), (90, 100)]
     cds_ranges = [(30, 40), (90, 95)]
     
     # define a simple transcript
     tx = Transcript("test1", '1', 10, 100, "+")
     tx.set_exons(exon_ranges, cds_ranges)
     tx.set_cds(cds_ranges)
     
     # check that only the site in the CDS is returned
     sites = [15, 35, 100]
     self.assertEqual(get_de_novos_in_transcript(tx, sites), [35])
     
     # check that we can return multiple sites in the CDS
     sites = [15, 35, 90]
     self.assertEqual(get_de_novos_in_transcript(tx, sites), [35, 90])
     
     # check if we pass in an empty list, we get one back
     self.assertEqual(get_de_novos_in_transcript(tx, []), [])
예제 #13
0
 def construct_gene(self, name='TEST', chrom='1', start=1000, end=2000,
         strand='+', exons=[(1000, 1200), (1800, 2000)],
         cds=[(1100, 1200), (1800, 1900)]):
     
     tx = Transcript(name, chrom, start, end, strand)
     tx.set_exons(exons, cds)
     tx.set_cds(cds)
     
     return tx
예제 #14
0
    def construct_gene(self,
                       name='TEST',
                       chrom='1',
                       start=100,
                       end=179,
                       strand='+',
                       exons=[(100, 119), (160, 179)],
                       cds=[(110, 119), (160, 170)]):

        tx = Transcript(name, chrom, start, end, strand)
        tx.set_exons(exons, cds)
        tx.set_cds(cds)

        return tx
예제 #15
0
    def test_get_de_novos_in_transcript(self):
        """ test that we can identify de novos within the CDS of a transcript
        """

        exon_ranges = [(10, 20), (30, 40), (90, 100)]
        cds_ranges = [(30, 40), (90, 95)]

        # define a simple transcript
        tx = Transcript("test1", '1', 10, 100, "+")
        tx.set_exons(exon_ranges, cds_ranges)
        tx.set_cds(cds_ranges)

        # check that only the site in the CDS is returned
        sites = [15, 35, 100]
        self.assertEqual(get_de_novos_in_transcript(tx, sites), [35])

        # check that we can return multiple sites in the CDS
        sites = [15, 35, 90]
        self.assertEqual(get_de_novos_in_transcript(tx, sites), [35, 90])

        # check if we pass in an empty list, we get one back
        self.assertEqual(get_de_novos_in_transcript(tx, []), [])
예제 #16
0
 def test___add__cds_length_fixed(self):
     """ check that we can merge transcripts, even with fixed CDS coords
     """
     
     a = Transcript("a", "1", 10, 20, "+")
     a.set_exons([(10, 20)], [(10, 20)])
     a.set_cds([(10, 20)])
     
     a.add_cds_sequence('ACTGTACGCAT')
     a.add_genomic_sequence('CGTAGACTGTACGCATCGATT', offset=5)
     
     b = Transcript("b", "1", 0, 10, "+")
     b.set_exons([(0, 10)], [(0, 10)])
     b.set_cds([(0, 10)])
     
     b.add_cds_sequence('ACTGTACGCAT')
     b.add_genomic_sequence('CGTAGACTGTACGCATCGTAG', offset=5)
     
     # without a fix to tx.cpp to adjust an exon coordinate simultaneously,
     # the line below would give an error.
     c = a + b
예제 #17
0
 def test___add__not_overlapping(self):
     ''' test that __add__() works correctly when transcripts do not overlap
     '''
     
     a = Transcript("a", "1", 10, 50, "+")
     b = Transcript("b", "1", 60, 80, "+")
     
     a.set_exons([(10, 50)], [(10, 50)])
     a.set_cds([(10, 50)])
     a.add_genomic_sequence('N' * 40)
     
     b.set_exons([(60, 80)], [(60, 80)])
     b.set_cds([(60, 80)])
     b.add_genomic_sequence('N' * 20)
     
     self.assertEqual(len((a + b).get_genomic_sequence()), 70)
예제 #18
0
 def test___add__(self):
     """ test that __add__() works correctly
     """
     
     exons = [(10, 20), (50, 60), (90, 100)]
     cds_2 = [(50, 60), (90, 95)]
     
     a = Transcript("a", "1", 10, 100, "+")
     b = Transcript("b", "1", 10, 100, "+")
     c = Transcript("c", "1", 10, 100, "+")
     d = Transcript("d", "1", 10, 100, "+")
     
     a.set_exons(exons, [(55, 60), (90, 100)])
     a.set_cds([(55, 60), (90, 100)])
     
     b.set_exons(exons, [(50, 60), (90, 95)])
     b.set_cds([(50, 60), (90, 95)])
     
     c.set_exons([(45, 65)], [(45, 65)])
     c.set_cds([(45, 65)])
     
     d.set_exons([(30, 40)], [(30, 40)])
     d.set_cds([(30, 40)])
     
     # check that adding two Transcripts gives the union of CDS regions
     self.assertEqual((a + b).get_cds(), [{'start': 50, 'end': 60}, {'start': 90, 'end': 100}])
     self.assertEqual((a + c).get_cds(), [{'start': 45, 'end': 65}, {'start': 90, 'end': 100}])
     
     # check that addition is reversible
     self.assertEqual((c + a).get_cds(), [{'start': 45, 'end': 65}, {'start': 90, 'end': 100}])
     
     # check that adding previously unknown exons works
     self.assertEqual((a + d).get_cds(), [{'start': 30, 'end': 40}, {'start': 55, 'end': 60}, {'start': 90, 'end': 100}])
     
     # check that we can add transcript + None correctly
     self.assertEqual(a + None, a)
     self.assertEqual(None + a, a)
예제 #19
0
 def set_transcript(self):
     """ construct a transcript for a known gene
     """
     
     exon_ranges=[(120933859, 120934019), (120934219, 120934356),
         (120935876, 120936296)]
     cds_ranges=[(120934225, 120934356), (120935876, 120936013)]
     
     expected = Transcript("ENST00000242577", '12', 120933859, 120936296, "+")
     expected.set_exons(exon_ranges, cds_ranges)
     expected.set_cds(cds_ranges)
     
     cds = "ATGTGCGACCGAAAGGCCGTGATCAAAAATGCGGACATGTCGGAAGAGATGCAACAGGACTC" \
         "GGTGGAGTGCGCTACTCAGGCGCTGGAGAAATACAACATAGAGAAGGACATTGCGGCTCATATC" \
         "AAGAAGGAATTTGACAAGAAGTACAATCCCACCTGGCATTGCATCGTGGGGAGGAACTTCGGTA" \
         "GTTATGTGACACATGAAACCAAACACTTCATCTACTTCTACCTGGGCCAAGTGGCCATTCTTCT" \
         "GTTCAAATCTGGTTAA"
     
     genomic = "GGGCGGGGCCGGCGGGAGCAGGGCGGGGCCTGAGCACTAGGCGGCGGCGGCTGGCGTGGG" \
         "GCTGCTTAGATGCGCCACGGTTTCGGTAGCGACGGTATCTCTAGCCGGGCCTGAGCTGTGCTAGCA" \
         "CCTCCCCCAGGAGACCGTTGCAGTCGGCCAGCCCCCTTCTCCACGGTGAGAAACTCGGGGGGCCAG" \
         "GGGGTGTCCTCGCTGCCTTATTTCGCCCCACTCCGGACTTAGCCCTCCGCGTAGCCCGCGCTTCCT" \
         "GAGAAGTGGGGTGGGGGGCGTCGTCCCGTGGTGGCGCCGGCCGGGGTGGGGGCAGTTAGTGCCTGG" \
         "GGGGCGCGGCCCAACTCAACCCCTTACCCCAGGCCTTGCCCACTAGGTAACCATGTGCGACCGAAA" \
         "GGCCGTGATCAAAAATGCGGACATGTCGGAAGAGATGCAACAGGACTCGGTGGAGTGCGCTACTCA" \
         "GGCGCTGGAGAAATACAACATAGAGAAGGACATTGCGGCTCATATCAAGAAGGTGAGGATGGGCGC" \
         "GGGGGCCGATACGCAGCCGGGAGCAGGGGGTTCCTTCCCCCCGATCCTGCTTTCCTAAGGGCGCCT" \
         "GACAGGTCCCGGGAATACTGCTGGCGGCTTGGGGCGTAGAAGCTTCCAGAAAGGACGCAGATGCAT" \
         "TTTGCGCTCCTGTGGAGAAGACCAGACCCCCGGCGTCCGAAGTTTTTTTTTTTTTTTTTTTAATTA" \
         "CCCAGCTCCGCGGGGGGAAAGCGCCACCTAGCAACGGTATCTAAGATCAGGGAGCAGCGGTTCCCC" \
         "CTTCTGTGTGGTTCCTGCGCCGAGGATCCATCTGGGTGTTCCGGAGGGGGGAGCTGCGTGGGTGTT" \
         "TCCAGCCGGGCCGGGAGGAGATCTTGCCAGCCTTCCAGTGGGGAGTTGAGGGAAGGTGGTGGGTGG" \
         "TGGCGGGGCTGGGGGCTGGGGTAGGGGCTTGGTAAATGGCAGTCTAGAAAGCCGGCAGGACTGCCA" \
         "ACTTCTCGAGCAGTGTTTGCTGGAAGGGAAGAAAGCTGGCAGCCTAAGCCGTGGGAGGGTTCCAGT" \
         "CGAGAATGGGAAGATGAAAGACTTCAGATGGAACAGAAATAAATGCCTTTTTTGACAAACGCAGCA" \
         "GTGCGTGCCTCTAGCTTGCAAGAGCGTTACTCCCCTTCATAGCTTTAAAAGGTTTTCGCACTGCGT" \
         "GCAGTTAGAGTAGCTAAATCTTGTGTGACGCTCCACAAACACTTGTAAGAATTTTGCAGAGAAAGA" \
         "TAACCGTTGCCACCCAATGCCCCCCACAGGCATTCTACTCCCCAGTACCTCTTAGGGTGGGAGAAA" \
         "TGGTGAAGAGTTGTTCCTACAACTTGCTAACCTAGTGGACAGGGTAGTAGATTAGCATCATCCGGA" \
         "TAGATGTGAAGAGGACGGCTGTTTGGATAATAATTAAGGATAAAATTTGGCCAGTTGACAGATTCT" \
         "GTTTCCAGCAGTTTTTACAGCAACAGTGGAGTGCTTCAGTATTGTGTTCCTGTAAATTTAATTTTG" \
         "ATCCGCAATCATTTGGTATACAATGCTGTTTGAAGTTTTGTCCTATTGGAAAAGTCTTGTGTTGCA" \
         "GGGGTGCAGTTAAGATCTTTGTGATGAGGAATGGGATGGGCTAATTTTTTGCCGTTTTCTTGGAAT" \
         "TGGGGGCATGGCAAATACAGTAGGGTAGTTTAGTTCTCTACACAGAACATGATAAACTACACCTGT" \
         "TGATGTCACCGTCTGTCAATGAATATTATAGAAGGTATGAAGGTGTAATTACCATAATAACAAAAC" \
         "ACCCTGTCTTTAGGGCTGACCTTTCGTCCTTTGACCTCCTCAGCCTCCATTCCCATCTTCGCTCAG" \
         "ACTGCAAGTATGTTTGTATTAATGTACTATGTAGGCGGCTTGGAGCTGGGGAACATTCTTTCATTC" \
         "TAAGAATTTGCAGATGCTGACGTTCCTCCTTTCTGCCCCTACAGGCTCTGGCTTATCCAAGAGGCA" \
         "AACACTGACCTCTGGTAATTAAAATCCTAGTTCTTTTCTTTTGTCTTTTCCAGGAATTTGACAAGA" \
         "AGTACAATCCCACCTGGCATTGCATCGTGGGGAGGAACTTCGGTAGTTATGTGACACATGAAACCA" \
         "AACACTTCATCTACTTCTACCTGGGCCAAGTGGCCATTCTTCTGTTCAAATCTGGTTAAAAGCATG" \
         "GACTGTGCCACACACCCAGTGATCCATCCAAAAACAAGGACTGCAGCCTAAATTCCAAATACCAGA" \
         "GACTGAAATTTTCAGCCTTGCTAAGGGAACATCTCGATGTTTGAACCTTTGTTGTGTTTTGTACAG" \
         "GGCATTCTCTGTACTAGTTTGTCGTGGTTATAAAACAATTAGCAGAATAGCCTACATTTGTATTTA" \
         "TTTTCTATTCCATACTTCTGCCCACGTTGTTTTCTCTCAAAATCCATTCCTTTAAAAAATAAATCT" \
         "GATGCAGATGTGTATGTGTGTG"
     
     expected.add_cds_sequence(cds)
     expected.add_genomic_sequence(genomic, offset=10)
     
     return expected
예제 #20
0
    def set_transcript(self):
        """ construct a transcript for a known gene
        """

        exon_ranges = [(120933859, 120934019), (120934219, 120934356),
                       (120935876, 120936296)]
        cds_ranges = [(120934225, 120934356), (120935876, 120936013)]

        expected = Transcript("ENST00000242577", '12', 120933859, 120936296,
                              "+")
        expected.set_exons(exon_ranges, cds_ranges)
        expected.set_cds(cds_ranges)

        cds = "ATGTGCGACCGAAAGGCCGTGATCAAAAATGCGGACATGTCGGAAGAGATGCAACAGGACTC" \
            "GGTGGAGTGCGCTACTCAGGCGCTGGAGAAATACAACATAGAGAAGGACATTGCGGCTCATATC" \
            "AAGAAGGAATTTGACAAGAAGTACAATCCCACCTGGCATTGCATCGTGGGGAGGAACTTCGGTA" \
            "GTTATGTGACACATGAAACCAAACACTTCATCTACTTCTACCTGGGCCAAGTGGCCATTCTTCT" \
            "GTTCAAATCTGGTTAA"

        genomic = "GGGCGGGGCCGGCGGGAGCAGGGCGGGGCCTGAGCACTAGGCGGCGGCGGCTGGCGTGGG" \
            "GCTGCTTAGATGCGCCACGGTTTCGGTAGCGACGGTATCTCTAGCCGGGCCTGAGCTGTGCTAGCA" \
            "CCTCCCCCAGGAGACCGTTGCAGTCGGCCAGCCCCCTTCTCCACGGTGAGAAACTCGGGGGGCCAG" \
            "GGGGTGTCCTCGCTGCCTTATTTCGCCCCACTCCGGACTTAGCCCTCCGCGTAGCCCGCGCTTCCT" \
            "GAGAAGTGGGGTGGGGGGCGTCGTCCCGTGGTGGCGCCGGCCGGGGTGGGGGCAGTTAGTGCCTGG" \
            "GGGGCGCGGCCCAACTCAACCCCTTACCCCAGGCCTTGCCCACTAGGTAACCATGTGCGACCGAAA" \
            "GGCCGTGATCAAAAATGCGGACATGTCGGAAGAGATGCAACAGGACTCGGTGGAGTGCGCTACTCA" \
            "GGCGCTGGAGAAATACAACATAGAGAAGGACATTGCGGCTCATATCAAGAAGGTGAGGATGGGCGC" \
            "GGGGGCCGATACGCAGCCGGGAGCAGGGGGTTCCTTCCCCCCGATCCTGCTTTCCTAAGGGCGCCT" \
            "GACAGGTCCCGGGAATACTGCTGGCGGCTTGGGGCGTAGAAGCTTCCAGAAAGGACGCAGATGCAT" \
            "TTTGCGCTCCTGTGGAGAAGACCAGACCCCCGGCGTCCGAAGTTTTTTTTTTTTTTTTTTTAATTA" \
            "CCCAGCTCCGCGGGGGGAAAGCGCCACCTAGCAACGGTATCTAAGATCAGGGAGCAGCGGTTCCCC" \
            "CTTCTGTGTGGTTCCTGCGCCGAGGATCCATCTGGGTGTTCCGGAGGGGGGAGCTGCGTGGGTGTT" \
            "TCCAGCCGGGCCGGGAGGAGATCTTGCCAGCCTTCCAGTGGGGAGTTGAGGGAAGGTGGTGGGTGG" \
            "TGGCGGGGCTGGGGGCTGGGGTAGGGGCTTGGTAAATGGCAGTCTAGAAAGCCGGCAGGACTGCCA" \
            "ACTTCTCGAGCAGTGTTTGCTGGAAGGGAAGAAAGCTGGCAGCCTAAGCCGTGGGAGGGTTCCAGT" \
            "CGAGAATGGGAAGATGAAAGACTTCAGATGGAACAGAAATAAATGCCTTTTTTGACAAACGCAGCA" \
            "GTGCGTGCCTCTAGCTTGCAAGAGCGTTACTCCCCTTCATAGCTTTAAAAGGTTTTCGCACTGCGT" \
            "GCAGTTAGAGTAGCTAAATCTTGTGTGACGCTCCACAAACACTTGTAAGAATTTTGCAGAGAAAGA" \
            "TAACCGTTGCCACCCAATGCCCCCCACAGGCATTCTACTCCCCAGTACCTCTTAGGGTGGGAGAAA" \
            "TGGTGAAGAGTTGTTCCTACAACTTGCTAACCTAGTGGACAGGGTAGTAGATTAGCATCATCCGGA" \
            "TAGATGTGAAGAGGACGGCTGTTTGGATAATAATTAAGGATAAAATTTGGCCAGTTGACAGATTCT" \
            "GTTTCCAGCAGTTTTTACAGCAACAGTGGAGTGCTTCAGTATTGTGTTCCTGTAAATTTAATTTTG" \
            "ATCCGCAATCATTTGGTATACAATGCTGTTTGAAGTTTTGTCCTATTGGAAAAGTCTTGTGTTGCA" \
            "GGGGTGCAGTTAAGATCTTTGTGATGAGGAATGGGATGGGCTAATTTTTTGCCGTTTTCTTGGAAT" \
            "TGGGGGCATGGCAAATACAGTAGGGTAGTTTAGTTCTCTACACAGAACATGATAAACTACACCTGT" \
            "TGATGTCACCGTCTGTCAATGAATATTATAGAAGGTATGAAGGTGTAATTACCATAATAACAAAAC" \
            "ACCCTGTCTTTAGGGCTGACCTTTCGTCCTTTGACCTCCTCAGCCTCCATTCCCATCTTCGCTCAG" \
            "ACTGCAAGTATGTTTGTATTAATGTACTATGTAGGCGGCTTGGAGCTGGGGAACATTCTTTCATTC" \
            "TAAGAATTTGCAGATGCTGACGTTCCTCCTTTCTGCCCCTACAGGCTCTGGCTTATCCAAGAGGCA" \
            "AACACTGACCTCTGGTAATTAAAATCCTAGTTCTTTTCTTTTGTCTTTTCCAGGAATTTGACAAGA" \
            "AGTACAATCCCACCTGGCATTGCATCGTGGGGAGGAACTTCGGTAGTTATGTGACACATGAAACCA" \
            "AACACTTCATCTACTTCTACCTGGGCCAAGTGGCCATTCTTCTGTTCAAATCTGGTTAAAAGCATG" \
            "GACTGTGCCACACACCCAGTGATCCATCCAAAAACAAGGACTGCAGCCTAAATTCCAAATACCAGA" \
            "GACTGAAATTTTCAGCCTTGCTAAGGGAACATCTCGATGTTTGAACCTTTGTTGTGTTTTGTACAG" \
            "GGCATTCTCTGTACTAGTTTGTCGTGGTTATAAAACAATTAGCAGAATAGCCTACATTTGTATTTA" \
            "TTTTCTATTCCATACTTCTGCCCACGTTGTTTTCTCTCAAAATCCATTCCTTTAAAAAATAAATCT" \
            "GATGCAGATGTGTATGTGTGTG"

        expected.add_cds_sequence(cds)
        expected.add_genomic_sequence(genomic, offset=10)

        return expected