def evaluate(chosenAA):	
	#get all possible triplet codons for the desired AA
	possible_triplets = []
	for aa in chosenAA:
		possible_triplets.append(dna.GetCodons(aa, separate=True))

	first, second, third = sumupcodons(possible_triplets) #takes the triplets and splits them up into their first, second and third positions
	print('allcodons', (first, second, third))
	degenerate1 = degenerate(first) #Gets degenerate codon that represents all bases at position 1
	print('degenerate1', degenerate1)
	degenerate2 = degenerate(second) #Gets degenerate codon that represents all bases at position 2
	print('degenerate2', degenerate2)
	degenerate3 = degenerate(third) #Gets degenerate codon that represents all bases at position 3
	print('degenerate3', degenerate3)

	if degenerate1 == 'N' and degenerate2 == 'N' and degenerate3 == 'N': 
		triplet = degenerate1 + degenerate2 + 'K' #for this special case I want NNK, not NNN
	else: 
		triplet = degenerate1 + degenerate2 + degenerate3 # summing up
	
	##triplet holds the degenerate triplet codon


	###now I just need to convert this to a list of real codons and then check to which aa they match
	Realcodons = dna.combine([dna.UnAmb(triplet[0]), dna.UnAmb(triplet[1]), dna.UnAmb(triplet[2])]) #condense the different codons for position 1, 2, and 3 to a list of triplets
	
	ResultingAA = []
	for codon in Realcodons:
		ResultingAA.append(dna.Translate(codon)) #Check which AA these codons code for

	
	TargetAA, OffTargetAA = chosenvsresulting(chosenAA, ResultingAA) #Check which of these AA were desired and which were not

	return (triplet, TargetAA, OffTargetAA)
Exemple #2
0
    def find_degenerate(self, AA_list):
        '''
		Method for finding an degenerate codon encoding a list of desired amino acids.
		The method finds the codon(s) with fewest off-target amino acids.
		To reduce redundancy, the method then goes through all the best codons 
		(they all have the same number of off-target amino acids) and finds the one with the lowest number of codons. 
		If there are still more than one which are equivalent, the method then picks one WITHOUT a stop codon.
		
		The input is a list of upper case amino acids in single-letter code.
		The valid values are: FLSYCWPHERIMTNKVADQG*		
		
		The output is a tuple of the best degenerate codon and the off-target amino acids.
		The degenerate codon is a string of three of the following characters: GATCRYWSMKHBVDN
		The off-target amino acids is a list of upper case amino acids in single letter code.
		'''
        #make sure input is OK
        assert all([
            s in 'FLSYCWPHERIMTNKVADQG*U' for s in AA_list
        ]), 'Error, one or more of the amino acids %s are not valid.' % AA_list

        #get all codons for chosen amino acids
        regular_triplets = [
            dna.GetCodons(aa,
                          table=self.getTable(),
                          separate=True,
                          exclude=True) for aa in AA_list
        ]

        #some of the codons are list of lists (happens when the amino acid has codons at different parts of the codon circle)
        #I need to flatten this into separate lists with which go on further
        regular_triplets = self.flatten_codon_list(regular_triplets)

        best_score = None
        all_alternatives = []  #to save the result of all possible triplets
        for codon_list in regular_triplets:
            #get all nucleotides for first, second and third position while retaining list structure
            first, second, third = self.sumupcodons(codon_list)

            #check which degenerate nucleotide can be used to find at least one match in each of the lists
            possible_triplets = dna.combine([
                dna.commonNuc(first),
                dna.commonNuc(second),
                dna.commonNuc(third)
            ])

            #now go through them and see which is best
            for triplet in possible_triplets:
                #convert the triplet back to a list of real codons
                Realcodons = dna.combine(
                    [
                        dna.UnAmb(triplet[0]),
                        dna.UnAmb(triplet[1]),
                        dna.UnAmb(triplet[2])
                    ]
                )  #condense the different codons for position 1, 2, and 3 to a list of triplets

                #Check which AA these codons code for
                ResultingAA = [
                    dna.Translate(codon, table=self.getTable())
                    for codon in Realcodons
                ]

                #compare which amino acids were desired with the ones resulting from the degenerate codon
                offtarget = sorted(
                    self.extra_list_elements(AA_list, ResultingAA))

                #add to all options
                if any([True for s in all_alternatives if s[0] == triplet
                        ]) is False:
                    all_alternatives.append([triplet] + AA_list + offtarget)

                #if there are fewer off-target amino acids with the new codon, keep it
                if len(offtarget) < best_score or best_score == None:
                    best_score = len(offtarget)
                    good_triplets = []
                    good_triplets.append(triplet)
                elif len(offtarget) == best_score:
                    good_triplets.append(triplet)

        #the saved triplets all have the same number of off-target amino acids, now keep the one with the lowest number of codons (to reduce ambiguity)
        best_triplet = None  #for storing best degenerate triplet
        best_offtarget = None  #for storing the off-target AA of the best triplet
        best_score = None  #for storing the length of the off-target list
        alternatives = [
        ]  #for saving alternative triplets and their encoded amino acids
        for triplet in good_triplets:
            #convert the triplet back to a list of real codons
            Realcodons = dna.combine(
                [
                    dna.UnAmb(triplet[0]),
                    dna.UnAmb(triplet[1]),
                    dna.UnAmb(triplet[2])
                ]
            )  #condense the different codons for position 1, 2, and 3 to a list of triplets

            #Check which AA these codons code for
            ResultingAA = [
                dna.Translate(codon, table=self.getTable())
                for codon in Realcodons
            ]

            #compare which amino acids were desired with the ones resulting from the degenerate codon
            offtarget = sorted(self.extra_list_elements(AA_list, ResultingAA))

            #save alternatives stats
            if any([True for s in alternatives if s[0] == triplet]) is False:
                alternatives.append([triplet] + AA_list + offtarget)

            #save the stats in case there are fewer codons
            if len(Realcodons) < best_score or best_score == None:

                #save stats
                best_score = len(Realcodons)
                best_triplet = triplet
                best_offtarget = offtarget

            #if another codon has same stats as the previous best one, replace the previous codon if it has an off-target stop
            elif len(Realcodons) == best_score and '*' in best_offtarget:

                #save stats
                best_score = len(Realcodons)
                best_triplet = triplet
                best_offtarget = offtarget

        return best_triplet, best_offtarget, alternatives, all_alternatives
	def find_degenerate(self, AA_list):
		'''
		Method for finding an ambiguous codon encoding a list of desired amino acids.
		The method finds the codon with fewest off-target amino acids.
		The input is a list of upper case amino acids in the single-letter code.
		The valid values are: FLSYCWPHERIMTNKVADQG*		
		
		The output is a tuple of the best ambiguous codon and the off-target amino acids.
		The ambiguous codon is a string of three of the following characters: GATCRYWSMKHBVDN
		The off-target amino acids is a list of upper case amino acids in single letter code.
		'''
		#make sure input is OK
		assert all([s in 'FLSYCWPHERIMTNKVADQG*' for s in AA_list]), 'Error, one or more of the amino acids %s are not valid.' % AA_list
		
		#get all codons for chosen amino acids
		regular_triplets = [dna.GetCodons(aa, table=self.getTable(), separate=True) for aa in AA_list]
		
		#some of the codons are list of lists (happens when the amino acid has codons at different parts of the codon circle)
		#I need to flatten this into separate lists with which go on further
		regular_triplets = self.flatten_codon_list(regular_triplets)
		best_score = None
		for codon_list in regular_triplets:
			#get all nucleotides for first, second and third position while retaining list structure		
			first, second, third = self.sumupcodons(codon_list) 
			
			#check which degenerate nucleotide can be used to find at least one match in each of the lists
			possible_triplets = dna.combine([dna.commonNuc(first), dna.commonNuc(second), dna.commonNuc(third)])
			
			#now go through them and see which is best
			for triplet in possible_triplets:
				#convert the triplet back to a list of real codons 
				Realcodons = dna.combine([dna.UnAmb(triplet[0]), dna.UnAmb(triplet[1]), dna.UnAmb(triplet[2])]) #condense the different codons for position 1, 2, and 3 to a list of triplets
			
				#Check which AA these codons code for
				ResultingAA = [dna.Translate(codon, table=self.getTable()) for codon in Realcodons]

				#compare which amino acids were desired with the ones resulting from the degenerate codon
				offtarget = sorted(self.extra_list_elements(AA_list, ResultingAA))
				
				#if there are fewer off-target amino acids with the new codon, keep it
				if len(offtarget) < best_score or best_score == None:
					best_score = len(offtarget)
					good_triplets = []
					good_triplets.append(triplet)
				elif len(offtarget) == best_score:
					good_triplets.append(triplet)
		
		#the saved triplets all have the same number of off-target amino acids, now keep the one with the lowest number of codons for each AA
		best_triplet = None #for storing best degenerate triplet
		best_offtarget = None #for storing the off-target AA of the best triplet
		best_score = None #for storing the length of the off-target list
		for triplet in good_triplets:
			#convert the triplet back to a list of real codons 
			Realcodons = dna.combine([dna.UnAmb(triplet[0]), dna.UnAmb(triplet[1]), dna.UnAmb(triplet[2])]) #condense the different codons for position 1, 2, and 3 to a list of triplets
		
			#save the stats in case there are fewer codons
			if len(Realcodons) < best_score or best_score == None:
				#Check which AA these codons code for
				ResultingAA = [dna.Translate(codon, table=self.getTable()) for codon in Realcodons]

				#compare which amino acids were desired with the ones resulting from the degenerate codon
				offtarget = sorted(self.extra_list_elements(AA_list, ResultingAA))
				
				#save stats
				best_score = len(Realcodons)
				best_triplet = triplet
				best_offtarget = offtarget
				
		return best_triplet, best_offtarget