def test_standard_code_lookup(self):
        """GeneticCodes should hold codes keyed by id as string and number"""
        sgc_new = GeneticCode(*self.ncbi_standard)
        sgc_number = GeneticCodes[1]
        sgc_string = GeneticCodes['1']
        for sgc in sgc_new, sgc_number, sgc_string:
            self.assertEqual(sgc.code_sequence, 'FFLLSSSSYY**CC*WLLLLPPPPHHQQR'
                             'RRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG')
            self.assertEqual(sgc.start_codon_sequence, '---M---------------M--'
                             '-------------M----------------------------')
            self.assertEqual(
                sgc.start_codons, {'TTG': 'M', 'CTG': 'M', 'ATG': 'M'})
            self.assertEqual(sgc.id, 1)
            self.assertEqual(sgc.name, 'Standard Nuclear')
            self.assertEqual(sgc['TTT'], 'F')
            self.assertEqual(sgc.is_start('ATG'), True)
            self.assertEqual(sgc.is_start('AAA'), False)
            self.assertEqual(sgc.is_stop('TAA'), True)
            self.assertEqual(sgc.is_stop('AAA'), False)

        mtgc = GeneticCodes[2]
        self.assertEqual(mtgc.name, 'Vertebrate Mitochondrial')
        self.assertEqual(mtgc.is_start('AUU'), True)
        self.assertEqual(mtgc.is_stop('UGA'), False)

        self.assertEqual(sgc_new.changes(mtgc), {'AGA': 'R*', 'AGG': 'R*',
                                                 'ATA': 'IM', 'TGA': '*W'})
        self.assertEqual(mtgc.changes(sgc_new), {'AGA': '*R', 'AGG': '*R',
                                                 'ATA': 'MI', 'TGA': 'W*'})
        self.assertEqual(mtgc.changes(mtgc), {})
        self.assertEqual(mtgc.changes('FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTT'
                         'TNNKKSSRRVVVVAAAADDEEGGGG'), {'AGA': '*R',
                         'AGG': '*R', 'ATA': 'MI', 'TGA': 'W*'})
    def test_translate_six_frames(self):
        """GeneticCode translate_six_frames provides six-frame translation"""

        class fake_rna(str):

            """Fake RNA class with reverse-complement"""
            def __new__(cls, seq, rev):
                return str.__new__(cls, seq)

            def __init__(self, seq, rev):
                self.seq = seq
                self.rev = rev

            def rc(self):
                return self.rev

        test_rna = fake_rna('AUGCUAACAUAAA', 'UUUAUGUUAGCAU')
        #                    .  .  .  .  .    .  .  .  .  .
        sgc = GeneticCode(self.sgc)
        self.assertEqual(sgc.translate_six_frames(test_rna), [
            Protein('MLT*'), Protein('C*HK'), Protein('ANI'), Protein('FMLA'),
            Protein('LC*H'), Protein('YVS')])

        # should also actually work with an RNA or DNA sequence!!!
        test_rna = RNA('AUGCUAACAUAAA')
        self.assertEqual(sgc.translate_six_frames(test_rna), [
            Protein('MLT*'), Protein('C*HK'), Protein('ANI'), Protein('FMLA'),
            Protein('LC*H'), Protein('YVS')])
 def test_stop_indexes(self):
     """should return stop codon indexes for a specified frame"""
     sgc = GeneticCode(self.sgc)
     seq = DNA('ATGCTAACATAAA')
     expected = [[9], [4], []]
     for frame, expect in enumerate(expected):
         got = sgc.get_stop_indices(seq, start=frame)
         self.assertEqual(got, expect)
 def test_standard_code(self):
     """Standard genetic code from NCBI should have correct properties"""
     sgc = GeneticCode(*self.ncbi_standard)
     self.assertEqual(sgc.code_sequence, 'FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRI'
                      'IIMTTTTNNKKSSRRVVVVAAAADDEEGGGG')
     self.assertEqual(sgc.start_codon_sequence, '---M---------------M------'
                      '---------M----------------------------')
     self.assertEqual(sgc.start_codons, {'TTG': 'M', 'CTG': 'M',
                                         'ATG': 'M'})
     self.assertEqual(sgc.id, 1)
     self.assertEqual(sgc.name, 'Standard Nuclear')
     self.assertEqual(sgc['UUU'], 'F')
     self.assertEqual(sgc.is_start('ATG'), True)
     self.assertEqual(sgc.is_start('AAA'), False)
     self.assertEqual(sgc.is_stop('UAA'), True)
     self.assertEqual(sgc.is_stop('AAA'), False)
     self.assertEqual(len(sgc.sense_codons), 61)
     self.assertTrue('AAA' in sgc.sense_codons)
     self.assertFalse('TGA' in sgc.sense_codons)
    def test_translate(self):
        """GeneticCode translate should return correct amino acid string"""
        allg = GeneticCode(self.allg)
        sgc = GeneticCode(self.sgc)
        mt = GeneticCode(self.mt)

        seq = 'AUGCAUGACUUUUGA'
        #      .  .  .  .  .        markers for codon start
        self.assertEqual(allg.translate(seq), Protein('GGGGG'))
        self.assertEqual(allg.translate(seq, 1), Protein('GGGG'))
        self.assertEqual(allg.translate(seq, 2), Protein('GGGG'))
        self.assertEqual(allg.translate(seq, 3), Protein('GGGG'))
        self.assertEqual(allg.translate(seq, 4), Protein('GGG'))
        self.assertEqual(allg.translate(seq, 12), Protein('G'))
        self.assertEqual(allg.translate(seq, 14), Protein(''))
        self.assertRaises(ValueError, allg.translate, seq, 15)
        self.assertRaises(ValueError, allg.translate, seq, 20)

        self.assertEqual(sgc.translate(seq), Protein('MHDF*'))
        self.assertEqual(sgc.translate(seq, 3), Protein('HDF*'))
        self.assertEqual(sgc.translate(seq, 6), Protein('DF*'))
        self.assertEqual(sgc.translate(seq, 9), Protein('F*'))
        self.assertEqual(sgc.translate(seq, 12), Protein('*'))
        self.assertEqual(sgc.translate(seq, 14), Protein(''))
        # check shortest translatable sequences
        self.assertEqual(sgc.translate('AAA'), Protein('K'))
        self.assertEqual(sgc.translate(''), Protein(''))

        # check that different code gives different results
        self.assertEqual(mt.translate(seq), Protein('MHDFW'))

        # check translation with invalid codon(s)
        self.assertEqual(sgc.translate('AAANNNCNC123UUU'), Protein('KXXXF'))