Python genetic_code Examples, skbio.sequence.genetic_code Python Examples

Example #1

0

Show file

File: test_genetic_code.py Project: vivekiitkgp/scikit-bio

    def test_standard_code_lookup(self):
        """genetic_code should hold codes keyed by id as string and number"""
        sgc_new = GeneticCode(*self.ncbi_standard)
        sgc_number = genetic_code(1)
        sgc_string = genetic_code("1")
        sgc_empty = genetic_code()
        for sgc in sgc_new, sgc_number, sgc_string, sgc_empty:
            self.assertEqual(sgc.code_sequence, "FFLLSSSSYY**CC*WLLLLPPPPHHQQR" "RRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG")
            self.assertEqual(
                sgc.start_codon_sequence, "---M---------------M--" "-------------M----------------------------"
            )
            self.assertEqual(sgc.start_codons, {"TTG": "M", "CTG": "M", "ATG": "M"})
            self.assertEqual(sgc.id, 1)
            self.assertEqual(sgc.name, "Standard Nuclear")
            self.assertEqual(sgc["TTT"], "F")
            self.assertEqual(sgc.is_start("ATG"), True)
            self.assertEqual(sgc.is_start("AAA"), False)
            self.assertEqual(sgc.is_stop("TAA"), True)
            self.assertEqual(sgc.is_stop("AAA"), False)

        mtgc = genetic_code(2)
        self.assertEqual(mtgc.name, "Vertebrate Mitochondrial")
        self.assertEqual(mtgc.is_start("AUU"), True)
        self.assertEqual(mtgc.is_stop("UGA"), False)

        self.assertEqual(sgc_new.changes(mtgc), {"AGA": "R*", "AGG": "R*", "ATA": "IM", "TGA": "*W"})
        self.assertEqual(mtgc.changes(sgc_new), {"AGA": "*R", "AGG": "*R", "ATA": "MI", "TGA": "W*"})
        self.assertEqual(mtgc.changes(mtgc), {})
        self.assertEqual(
            mtgc.changes("FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTT" "TNNKKSSRRVVVVAAAADDEEGGGG"),
            {"AGA": "*R", "AGG": "*R", "ATA": "MI", "TGA": "W*"},
        )

Example #2

0

Show file

File: test_genetic_code.py Project: 7924102/scikit-bio

    def test_standard_code_lookup(self):
        """genetic_code should hold codes keyed by id as string and number"""
        sgc_new = GeneticCode(*self.ncbi_standard)
        sgc_number = genetic_code(1)
        sgc_string = genetic_code('1')
        sgc_empty = genetic_code()
        for sgc in sgc_new, sgc_number, sgc_string, sgc_empty:
            self.assertEqual(sgc.code_sequence, 'FFLLSSSSYY**CC*WLLLLPPPPHHQQR'
                             'RRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG')
            self.assertEqual(sgc.start_codon_sequence, '---M---------------M--'
                             '-------------M----------------------------')
            self.assertEqual(
                sgc.start_codons, {'TTG': 'M', 'CTG': 'M', 'ATG': 'M'})
            self.assertEqual(sgc.id, 1)
            self.assertEqual(sgc.name, 'Standard Nuclear')
            self.assertEqual(sgc['TTT'], 'F')
            self.assertEqual(sgc.is_start('ATG'), True)
            self.assertEqual(sgc.is_start('AAA'), False)
            self.assertEqual(sgc.is_stop('TAA'), True)
            self.assertEqual(sgc.is_stop('AAA'), False)

        mtgc = genetic_code(2)
        self.assertEqual(mtgc.name, 'Vertebrate Mitochondrial')
        self.assertEqual(mtgc.is_start('AUU'), True)
        self.assertEqual(mtgc.is_stop('UGA'), False)

        self.assertEqual(sgc_new.changes(mtgc), {'AGA': 'R*', 'AGG': 'R*',
                                                 'ATA': 'IM', 'TGA': '*W'})
        self.assertEqual(mtgc.changes(sgc_new), {'AGA': '*R', 'AGG': '*R',
                                                 'ATA': 'MI', 'TGA': 'W*'})
        self.assertEqual(mtgc.changes(mtgc), {})
        self.assertEqual(mtgc.changes('FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTT'
                         'TNNKKSSRRVVVVAAAADDEEGGGG'), {'AGA': '*R',
                         'AGG': '*R', 'ATA': 'MI', 'TGA': 'W*'})

Example #3

0

Show file

    def test_standard_code_lookup(self):
        """genetic_code should hold codes keyed by id as string and number"""
        sgc_new = GeneticCode(*self.ncbi_standard)
        sgc_number = genetic_code(1)
        sgc_string = genetic_code('1')
        sgc_empty = genetic_code()
        for sgc in sgc_new, sgc_number, sgc_string, sgc_empty:
            self.assertEqual(
                sgc.code_sequence, 'FFLLSSSSYY**CC*WLLLLPPPPHHQQR'
                'RRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG')
            self.assertEqual(
                sgc.start_codon_sequence, '---M---------------M--'
                '-------------M----------------------------')
            self.assertEqual(sgc.start_codons, {
                'TTG': 'M',
                'CTG': 'M',
                'ATG': 'M'
            })
            self.assertEqual(sgc.id, 1)
            self.assertEqual(sgc.name, 'Standard Nuclear')
            self.assertEqual(sgc['TTT'], 'F')
            self.assertEqual(sgc.is_start('ATG'), True)
            self.assertEqual(sgc.is_start('AAA'), False)
            self.assertEqual(sgc.is_stop('TAA'), True)
            self.assertEqual(sgc.is_stop('AAA'), False)

        mtgc = genetic_code(2)
        self.assertEqual(mtgc.name, 'Vertebrate Mitochondrial')
        self.assertEqual(mtgc.is_start('AUU'), True)
        self.assertEqual(mtgc.is_stop('UGA'), False)

        self.assertEqual(sgc_new.changes(mtgc), {
            'AGA': 'R*',
            'AGG': 'R*',
            'ATA': 'IM',
            'TGA': '*W'
        })
        self.assertEqual(mtgc.changes(sgc_new), {
            'AGA': '*R',
            'AGG': '*R',
            'ATA': 'MI',
            'TGA': 'W*'
        })
        self.assertEqual(mtgc.changes(mtgc), {})
        self.assertEqual(
            mtgc.changes('FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTT'
                         'TNNKKSSRRVVVVAAAADDEEGGGG'), {
                             'AGA': '*R',
                             'AGG': '*R',
                             'ATA': 'MI',
                             'TGA': 'W*'
                         })

Example #4

0

Show file

File: convert_dna_to_aa.py Project: danielparton/peanut

def two_dna_point_mutants_to_aa(wt_sequence):
    """
    wt_sequence (string) DNA sequence
    assumes starting from correct reading frame
    """
    AA_sequences = set()
    orig_code = genetic_code(11)
    AA_sequences.add(orig_code.translate(wt_sequence).sequence)

    for k1, char1 in enumerate(wt_sequence[:-1]):
        for k2, char2 in enumerate(wt_sequence[k1+1:]):
            for mutant1 in ['A','C','G','U']:
                if mutant1 == char1:
                    continue
                for mutant2 in ['A','C','G','U']:
                    if k1 == 0:
                        if k2 < len(wt_sequence):
                            this_dna_string = ""+mutant1+wt_sequence[k1+1:k2]+mutant2+wt_sequence[k2+1:]
                        else:
                            this_dna_string = ""+mutant1+wt_sequence[k1+1:k2]+mutant2
                    if k1 > 0 and k1 < len(wt_sequence)-1:
                        if k2 < len(wt_sequence):
                            this_dna_string = ""+wt_sequence[0:k1]+mutant1+wt_sequence[k1+1:k2]+mutant2+wt_sequence[k2+1:]
                        else:
                            this_dna_string = ""+wt_sequence[0:k1]+mutant1+wt_sequence[k1+1:k2]+mutant2
                    if k1 == len(wt_sequence)-1:
                        this_dna_string = ""+wt_sequence[0:k1]+mutant1+mutant2
                    this_sequence = orig_code.translate(this_dna_string).sequence
                    if '*' in this_sequence[:-1]:
                        continue
                    else:
                        AA_sequences.add(this_sequence)
    return AA_sequences

Example #5

0

Show file

File: convert_dna_to_aa.py Project: danielparton/peanut

def all_dna_point_mutants_to_aa(wt_sequence):
    """
    wt_sequence (string) DNA sequence
    assumes starting from correct reading frame
    """
    AA_sequences = set()
    orig_code = genetic_code(11)
    AA_sequences.add(orig_code.translate(wt_sequence).sequence)

    for k, char in enumerate(wt_sequence):
        for mutant in ['A','C','G','U']:
            if mutant == char:
                continue
            if k == 0:
                this_dna_string = ""+mutant+wt_sequence[k+1:]
            if k > 0 and k < len(wt_sequence):
                this_dna_string = ""+wt_sequence[0:k]+mutant+wt_sequence[k+1:]
            if k == len(wt_sequence):
                this_dna_string = ""+wt_sequence[0:k]+mutant
            this_sequence = orig_code.translate(this_dna_string).sequence
            if '*' in this_sequence[:-1]:
                continue
            else:
                AA_sequences.add(this_sequence)
    return AA_sequences

Example #6

0

Show file

def dna_to_aa(sequence, try_frames=False):
    """
    Translates from the input DNA nucleotide sequence to amino acid sequence

    Arguments:
    ----------
        sequence : str
            DNA nucleotide sequence
        Optional:
        ---------
            try_frames : Bool
                if True, tries 6 possible reading frames, translates all to amino
                acids and chooses sequence with fewest stop codons
                default = False
    Returns:
    --------
        aa_sequence : str
            sequence of one-letter amino acid codes
    """
    orig_code = genetic_code(11)

    if not try_frames:
        return orig_code.translate(sequence).sequence

    sequence = DNASequence(sequence)
    translated = orig_code.translate_six_frames(sequence)
    stops = [aastring.sequence.count('*') for aastring in translated]

    return translated[stops.index(min(stops))].sequence

Example #7

0

Show file

def two_dna_point_mutants_to_aa(wt_sequence):
    """
    Finds all potential sequences which can be achieved by making 2 nucleotide
    mutations and translating to amino acid sequence
    Ignores mutations that lead to nonsense instead of missense mutations
    Assumes wt_sequence starts on the correct reading frame
        
    Makes double mutants via nested loops through each nucleotide in the sequence twice,
    is therefore VERY SLOW

    Arguments:
    ----------
        wt_sequence : str
            DNA nucleotide sequence
    Returns:
    --------
        AA_sequences : set of str
            each str is a unique sequence of one-letter amino acid codes
    """
    AA_sequences = set()
    orig_code = genetic_code(11)
    AA_sequences.add(orig_code.translate(wt_sequence).sequence)

    for k1, char1 in enumerate(wt_sequence[:-1]):
        for k2, char2 in enumerate(wt_sequence[k1 + 1:]):
            for mutant1 in ['A', 'C', 'G', 'U']:
                if mutant1 == char1:
                    continue
                for mutant2 in ['A', 'C', 'G', 'U']:
                    if k1 == 0:
                        if k2 < len(wt_sequence):
                            this_dna_string = "" + mutant1 + wt_sequence[
                                k1 + 1:k2] + mutant2 + wt_sequence[k2 + 1:]
                        else:
                            this_dna_string = "" + mutant1 + wt_sequence[
                                k1 + 1:k2] + mutant2
                    if k1 > 0 and k1 < len(wt_sequence) - 1:
                        if k2 < len(wt_sequence):
                            this_dna_string = "" + wt_sequence[
                                0:k1] + mutant1 + wt_sequence[
                                    k1 + 1:k2] + mutant2 + wt_sequence[k2 + 1:]
                        else:
                            this_dna_string = "" + wt_sequence[
                                0:k1] + mutant1 + wt_sequence[k1 +
                                                              1:k2] + mutant2
                    if k1 == len(wt_sequence) - 1:
                        this_dna_string = "" + wt_sequence[
                            0:k1] + mutant1 + mutant2
                    this_sequence = orig_code.translate(
                        this_dna_string).sequence
                    if '*' in this_sequence[:-1]:
                        continue
                    else:
                        AA_sequences.add(this_sequence)
    return AA_sequences

Example #8

0

Show file

File: primer_design.py Project: danielparton/peanut

def make_single_mutant(sequence,wt_res,res_num,mut_res,first_res=1):
    """
    sequence (string) DNA sequence
    wt_res (char) single letter amino acid code of wildtype residue to be mutated
    res_num (int) residue id number of residue to be mutated
    mut_res (char) single letter amino acid code of mutant residue
    first_res (int) residue id number of first residue in sequence (default = 1)

    DNA sequence needs to start with the first residue of the protein (no promoter, etc)
    take DNA sequence, convert to AA, define AA point mutant, find corresponding codon of wt and mut, output forward and reverse primers
    DNA sequence should be only the kinase domain

    Desired mutation must require only a single nucleotide change
    """
    orig_code = genetic_code(11)
    sequence = sequence.upper()
    aa_sequence = orig_code.translate(sequence).sequence

    if not str(wt_res) == aa_sequence[res_num-first_res]:
        raise IOError("Desired residue not found -- check wildtype residue name and id, and first residue id")
    # start of codon of residue of interest is at (res_num - first_res)*3

    wt_codon = DNASequence(sequence[(res_num - first_res)*3:(res_num - first_res)*3+3])

    mut_codons = orig_code.synonyms[mut_res]
    mut_codon = None
    for codon in mut_codons:
        if wt_codon.distance(DNASequence(codon))*3 == 1:
            mut_codon = codon

    if not mut_codon:
        print("Cannot make desired mutant with a single base change")
        mut_codon = make_mutant(wt_codon, mut_codons)

    good_melting_temp = False
    start_ix = max(0,(res_num-first_res)*3-11)
    end_ix = min(len(sequence),(res_num+1-first_res)*3+11)

    while not good_melting_temp:
        if end_ix - start_ix > 45:
            print("Acceptable melting temp was not found")
            break
        forward_primer = sequence[start_ix:(res_num - first_res)*3]+mut_codon+sequence[(res_num+1 - first_res)*3:end_ix]
        forward_primer = forward_primer.lower()
        good_melting_temp, start_ix, end_ix = check_melting_temp(forward_primer, start_ix, end_ix, len(sequence))
    
    forward_sequence = DNASequence(forward_primer)
    reverse_sequence = forward_sequence.rc()
    
    reverse_primer = reverse_sequence.sequence

    return forward_primer, reverse_primer

Example #9

0

Show file

File: convert_dna_to_aa.py Project: danielparton/peanut

def dna_to_aa(sequence, try_frames=False):
    """
    sequence (string) DNA sequence
    will search for correct reading frame
    """
    orig_code = genetic_code(11)

    if not try_frames:
        return orig_code.translate(sequence).sequence

    sequence = DNASequence(sequence)
    translated = orig_code.translate_six_frames(sequence)
    stops = [aastring.sequence.count('*') for aastring in translated]

    return translated[stops.index(min(stops))].sequence

Example #10

0

Show file

 def __init__(self, sequence, first_res=1):
     """
     Arguments:
     ----------
         sequence : str
             original DNA nucleotide sequence, treated as wild type
         first_res : int
             residue id number of first residue in sequence (default = 1)
     """
     orig_code = genetic_code(11)
     sequence = sequence.upper()
     aa_sequence = orig_code.translate(sequence).sequence
 
     self.sequence = sequence
     self.aa_sequence = aa_sequence
     self.first_res = first_res
     self.orig_code = orig_code
     return

Example #11

0

Show file

def all_dna_point_mutants_to_aa(wt_sequence):
    """
    Finds all potential sequences which can be achieved by making a single nucleotide
    mutation and translating to amino acid sequence
    Ignores mutations that lead to nonsense instead of missense mutations
    Assumes wt_sequence starts on the correct reading frame
        
    Arguments:
    ----------
        wt_sequence : str
            DNA nucleotide sequence
    Returns:
    --------
        AA_sequences : set of str
            each str is a unique sequence of one-letter amino acid codes
    """
    AA_sequences = set()
    orig_code = genetic_code(11)
    AA_sequences.add(orig_code.translate(wt_sequence).sequence)

    for k, char in enumerate(wt_sequence):
        for mutant in ['A', 'C', 'G', 'U']:
            if mutant == char:
                continue
            if k == 0:
                this_dna_string = "" + mutant + wt_sequence[k + 1:]
            if k > 0 and k < len(wt_sequence):
                this_dna_string = "" + wt_sequence[0:k] + mutant + wt_sequence[
                    k + 1:]
            if k == len(wt_sequence):
                this_dna_string = "" + wt_sequence[0:k] + mutant
            this_sequence = orig_code.translate(this_dna_string).sequence
            if '*' in this_sequence[:-1]:
                continue
            else:
                AA_sequences.add(this_sequence)
    return AA_sequences

Example #12

0

Show file

 def test_genetic_code_with_invalid_id(self):
     with self.assertRaises(ValueError):
         genetic_code(30)

Example #13

0

Show file

 def test_genetic_code_with_too_many_args(self):
     with self.assertRaises(TypeError):
         genetic_code(1, 2)

Example #14

0

Show file

File: test_genetic_code.py Project: 7924102/scikit-bio

 def test_genetic_code_with_invalid_id(self):
     with self.assertRaises(ValueError):
         genetic_code(30)

Example #15

0

Show file

File: test_genetic_code.py Project: 7924102/scikit-bio

 def test_genetic_code_with_too_many_args(self):
     with self.assertRaises(TypeError):
         genetic_code(1, 2)