def evaluate(chosenAA): #get all possible triplet codons for the desired AA possible_triplets = [] for aa in chosenAA: possible_triplets.append(dna.GetCodons(aa, separate=True)) first, second, third = sumupcodons(possible_triplets) #takes the triplets and splits them up into their first, second and third positions print('allcodons', (first, second, third)) degenerate1 = degenerate(first) #Gets degenerate codon that represents all bases at position 1 print('degenerate1', degenerate1) degenerate2 = degenerate(second) #Gets degenerate codon that represents all bases at position 2 print('degenerate2', degenerate2) degenerate3 = degenerate(third) #Gets degenerate codon that represents all bases at position 3 print('degenerate3', degenerate3) if degenerate1 == 'N' and degenerate2 == 'N' and degenerate3 == 'N': triplet = degenerate1 + degenerate2 + 'K' #for this special case I want NNK, not NNN else: triplet = degenerate1 + degenerate2 + degenerate3 # summing up ##triplet holds the degenerate triplet codon ###now I just need to convert this to a list of real codons and then check to which aa they match Realcodons = dna.combine([dna.UnAmb(triplet[0]), dna.UnAmb(triplet[1]), dna.UnAmb(triplet[2])]) #condense the different codons for position 1, 2, and 3 to a list of triplets ResultingAA = [] for codon in Realcodons: ResultingAA.append(dna.Translate(codon)) #Check which AA these codons code for TargetAA, OffTargetAA = chosenvsresulting(chosenAA, ResultingAA) #Check which of these AA were desired and which were not return (triplet, TargetAA, OffTargetAA)
def find_degenerate(self, AA_list): ''' Method for finding an degenerate codon encoding a list of desired amino acids. The method finds the codon(s) with fewest off-target amino acids. To reduce redundancy, the method then goes through all the best codons (they all have the same number of off-target amino acids) and finds the one with the lowest number of codons. If there are still more than one which are equivalent, the method then picks one WITHOUT a stop codon. The input is a list of upper case amino acids in single-letter code. The valid values are: FLSYCWPHERIMTNKVADQG* The output is a tuple of the best degenerate codon and the off-target amino acids. The degenerate codon is a string of three of the following characters: GATCRYWSMKHBVDN The off-target amino acids is a list of upper case amino acids in single letter code. ''' #make sure input is OK assert all([ s in 'FLSYCWPHERIMTNKVADQG*U' for s in AA_list ]), 'Error, one or more of the amino acids %s are not valid.' % AA_list #get all codons for chosen amino acids regular_triplets = [ dna.GetCodons(aa, table=self.getTable(), separate=True, exclude=True) for aa in AA_list ] #some of the codons are list of lists (happens when the amino acid has codons at different parts of the codon circle) #I need to flatten this into separate lists with which go on further regular_triplets = self.flatten_codon_list(regular_triplets) best_score = None all_alternatives = [] #to save the result of all possible triplets for codon_list in regular_triplets: #get all nucleotides for first, second and third position while retaining list structure first, second, third = self.sumupcodons(codon_list) #check which degenerate nucleotide can be used to find at least one match in each of the lists possible_triplets = dna.combine([ dna.commonNuc(first), dna.commonNuc(second), dna.commonNuc(third) ]) #now go through them and see which is best for triplet in possible_triplets: #convert the triplet back to a list of real codons Realcodons = dna.combine( [ dna.UnAmb(triplet[0]), dna.UnAmb(triplet[1]), dna.UnAmb(triplet[2]) ] ) #condense the different codons for position 1, 2, and 3 to a list of triplets #Check which AA these codons code for ResultingAA = [ dna.Translate(codon, table=self.getTable()) for codon in Realcodons ] #compare which amino acids were desired with the ones resulting from the degenerate codon offtarget = sorted( self.extra_list_elements(AA_list, ResultingAA)) #add to all options if any([True for s in all_alternatives if s[0] == triplet ]) is False: all_alternatives.append([triplet] + AA_list + offtarget) #if there are fewer off-target amino acids with the new codon, keep it if len(offtarget) < best_score or best_score == None: best_score = len(offtarget) good_triplets = [] good_triplets.append(triplet) elif len(offtarget) == best_score: good_triplets.append(triplet) #the saved triplets all have the same number of off-target amino acids, now keep the one with the lowest number of codons (to reduce ambiguity) best_triplet = None #for storing best degenerate triplet best_offtarget = None #for storing the off-target AA of the best triplet best_score = None #for storing the length of the off-target list alternatives = [ ] #for saving alternative triplets and their encoded amino acids for triplet in good_triplets: #convert the triplet back to a list of real codons Realcodons = dna.combine( [ dna.UnAmb(triplet[0]), dna.UnAmb(triplet[1]), dna.UnAmb(triplet[2]) ] ) #condense the different codons for position 1, 2, and 3 to a list of triplets #Check which AA these codons code for ResultingAA = [ dna.Translate(codon, table=self.getTable()) for codon in Realcodons ] #compare which amino acids were desired with the ones resulting from the degenerate codon offtarget = sorted(self.extra_list_elements(AA_list, ResultingAA)) #save alternatives stats if any([True for s in alternatives if s[0] == triplet]) is False: alternatives.append([triplet] + AA_list + offtarget) #save the stats in case there are fewer codons if len(Realcodons) < best_score or best_score == None: #save stats best_score = len(Realcodons) best_triplet = triplet best_offtarget = offtarget #if another codon has same stats as the previous best one, replace the previous codon if it has an off-target stop elif len(Realcodons) == best_score and '*' in best_offtarget: #save stats best_score = len(Realcodons) best_triplet = triplet best_offtarget = offtarget return best_triplet, best_offtarget, alternatives, all_alternatives
def find_degenerate(self, AA_list): ''' Method for finding an ambiguous codon encoding a list of desired amino acids. The method finds the codon with fewest off-target amino acids. The input is a list of upper case amino acids in the single-letter code. The valid values are: FLSYCWPHERIMTNKVADQG* The output is a tuple of the best ambiguous codon and the off-target amino acids. The ambiguous codon is a string of three of the following characters: GATCRYWSMKHBVDN The off-target amino acids is a list of upper case amino acids in single letter code. ''' #make sure input is OK assert all([s in 'FLSYCWPHERIMTNKVADQG*' for s in AA_list]), 'Error, one or more of the amino acids %s are not valid.' % AA_list #get all codons for chosen amino acids regular_triplets = [dna.GetCodons(aa, table=self.getTable(), separate=True) for aa in AA_list] #some of the codons are list of lists (happens when the amino acid has codons at different parts of the codon circle) #I need to flatten this into separate lists with which go on further regular_triplets = self.flatten_codon_list(regular_triplets) best_score = None for codon_list in regular_triplets: #get all nucleotides for first, second and third position while retaining list structure first, second, third = self.sumupcodons(codon_list) #check which degenerate nucleotide can be used to find at least one match in each of the lists possible_triplets = dna.combine([dna.commonNuc(first), dna.commonNuc(second), dna.commonNuc(third)]) #now go through them and see which is best for triplet in possible_triplets: #convert the triplet back to a list of real codons Realcodons = dna.combine([dna.UnAmb(triplet[0]), dna.UnAmb(triplet[1]), dna.UnAmb(triplet[2])]) #condense the different codons for position 1, 2, and 3 to a list of triplets #Check which AA these codons code for ResultingAA = [dna.Translate(codon, table=self.getTable()) for codon in Realcodons] #compare which amino acids were desired with the ones resulting from the degenerate codon offtarget = sorted(self.extra_list_elements(AA_list, ResultingAA)) #if there are fewer off-target amino acids with the new codon, keep it if len(offtarget) < best_score or best_score == None: best_score = len(offtarget) good_triplets = [] good_triplets.append(triplet) elif len(offtarget) == best_score: good_triplets.append(triplet) #the saved triplets all have the same number of off-target amino acids, now keep the one with the lowest number of codons for each AA best_triplet = None #for storing best degenerate triplet best_offtarget = None #for storing the off-target AA of the best triplet best_score = None #for storing the length of the off-target list for triplet in good_triplets: #convert the triplet back to a list of real codons Realcodons = dna.combine([dna.UnAmb(triplet[0]), dna.UnAmb(triplet[1]), dna.UnAmb(triplet[2])]) #condense the different codons for position 1, 2, and 3 to a list of triplets #save the stats in case there are fewer codons if len(Realcodons) < best_score or best_score == None: #Check which AA these codons code for ResultingAA = [dna.Translate(codon, table=self.getTable()) for codon in Realcodons] #compare which amino acids were desired with the ones resulting from the degenerate codon offtarget = sorted(self.extra_list_elements(AA_list, ResultingAA)) #save stats best_score = len(Realcodons) best_triplet = triplet best_offtarget = offtarget return best_triplet, best_offtarget