def update_statusbar(self, evt): '''Updates statusbar''' Position = genbank.cursor_position selection = self.get_dna_selection() # set text if selection[1] != -1: # it is a selection length = abs(selection[0] - selection[1]) + 1 # length of the selected if length == 3: # if its just one codo show full name of amino acid dnaSq = genbank.gb.GetDNA() selDNA = dnaSq[selection[0] - 1:selection[1]] AAoneLetter = dna.Translate(selDNA) AAFull = dna.protein.one_to_full(AAoneLetter) AA = " amino acid: %s (%s)" % (AAFull, AAoneLetter) elif length % 3 == 0: # if partable trough 3 show all amino acids that fit dnaSq = genbank.gb.GetDNA() selDNA = dnaSq[selection[0] - 1:selection[1]] fullProtein = dna.Translate(selDNA) AA = " amino acid: %s" % (fullProtein) else: AA = "" self.SetStatusText('Selection: %d to %d, %d bp %s' % (selection[0], selection[1], length, AA), 0) #text in first field else: self.SetStatusText('Position: %s bp' % (Position), 0) #text in first field
def evaluateTriplet(self, amb_codon): ''' Evaluate the degenerate codon by computing which amino acids it codes for. The input is a string, three letters long and comprising only IUPAC Nucleotide ambiguity code. The valid values is any combination of three of the following: GATCRYWSMKHBVDN ''' #make sure input is OK assert type(amb_codon) is str and len( amb_codon ) == 3, 'Error, the degenerate codon must be a string three characters long.' m = re.match('^[GATCRYWSMKHBVDN]{3}$', amb_codon) assert m != None, 'Error, the codon %s is not valid. It may only use the chracters GATCRYWSMKHBVDN.' % amb_codon #compute target amino acids and set variables self.target = list( set([ dna.Translate(s, self.getTable()) for s in dna.UnAmb(amb_codon) ])) self.setTriplet(amb_codon) self.setOffTarget([]) #Now let's get the alternative codons triplet, offtarget, alternatives, all_options = self.find_degenerate( self.getTarget()) self.setAlternatives(alternatives) self.setExtendedAlternatives(sorted(all_options, key=len)) #see which other amino acids are possible without further off-target self.setPossible(self.next_steps())
def getCodonsPerAA(self): ''' Retrieves a dictionary specifying how many times each amino acid is coded for by the ambiguous codon. Output is a dictionary with amino acid upper case single letter keys and integer values. ''' return protein.count_aa( dna.Translate(''.join(dna.UnAmb(self.getTriplet())), self.getTable()))
def translate_selection_reverse_complement(self): '''Translate reverse-complement of selected DNA''' start, finish = self.get_selection() if finish == -1: raise ValueError('Cannot translate an empty selection') else: DNA = genbank.gb.GetDNA(start, finish) protein = dna.Translate(dna.RC(DNA)) self.translate_output(protein, DNA, 'complement strand')
def translate_selection(self): '''Translate selected DNA''' start, finish = self.get_selection() if finish == -1: raise ValueError('Cannot translate an empty selection') else: DNA = genbank.gb.GetDNA(start, finish) protein = dna.Translate(DNA) self.translate_output(protein, DNA, 'leading strand')
def translate_feature(self): '''Translate specified feature''' feature = genbank.gb.allgbfeatures[2] DNA = genbank.gb.getdnaforgbfeature(feature[4]) protein = dna.Translate(DNA) self.translate_output(protein, DNA, 'feature "%s"' % feature[4][7:])
def find_degenerate(self, AA_list): ''' Method for finding an degenerate codon encoding a list of desired amino acids. The method finds the codon(s) with fewest off-target amino acids. To reduce redundancy, the method then goes through all the best codons (they all have the same number of off-target amino acids) and finds the one with the lowest number of codons. If there are still more than one which are equivalent, the method then picks one WITHOUT a stop codon. The input is a list of upper case amino acids in single-letter code. The valid values are: FLSYCWPHERIMTNKVADQG* The output is a tuple of the best degenerate codon and the off-target amino acids. The degenerate codon is a string of three of the following characters: GATCRYWSMKHBVDN The off-target amino acids is a list of upper case amino acids in single letter code. ''' #make sure input is OK assert all([ s in 'FLSYCWPHERIMTNKVADQG*U' for s in AA_list ]), 'Error, one or more of the amino acids %s are not valid.' % AA_list #get all codons for chosen amino acids regular_triplets = [ dna.GetCodons(aa, table=self.getTable(), separate=True, exclude=True) for aa in AA_list ] #some of the codons are list of lists (happens when the amino acid has codons at different parts of the codon circle) #I need to flatten this into separate lists with which go on further regular_triplets = self.flatten_codon_list(regular_triplets) best_score = None all_alternatives = [] #to save the result of all possible triplets for codon_list in regular_triplets: #get all nucleotides for first, second and third position while retaining list structure first, second, third = self.sumupcodons(codon_list) #check which degenerate nucleotide can be used to find at least one match in each of the lists possible_triplets = dna.combine([ dna.commonNuc(first), dna.commonNuc(second), dna.commonNuc(third) ]) #now go through them and see which is best for triplet in possible_triplets: #convert the triplet back to a list of real codons Realcodons = dna.combine( [ dna.UnAmb(triplet[0]), dna.UnAmb(triplet[1]), dna.UnAmb(triplet[2]) ] ) #condense the different codons for position 1, 2, and 3 to a list of triplets #Check which AA these codons code for ResultingAA = [ dna.Translate(codon, table=self.getTable()) for codon in Realcodons ] #compare which amino acids were desired with the ones resulting from the degenerate codon offtarget = sorted( self.extra_list_elements(AA_list, ResultingAA)) #add to all options if any([True for s in all_alternatives if s[0] == triplet ]) is False: all_alternatives.append([triplet] + AA_list + offtarget) #if there are fewer off-target amino acids with the new codon, keep it if len(offtarget) < best_score or best_score == None: best_score = len(offtarget) good_triplets = [] good_triplets.append(triplet) elif len(offtarget) == best_score: good_triplets.append(triplet) #the saved triplets all have the same number of off-target amino acids, now keep the one with the lowest number of codons (to reduce ambiguity) best_triplet = None #for storing best degenerate triplet best_offtarget = None #for storing the off-target AA of the best triplet best_score = None #for storing the length of the off-target list alternatives = [ ] #for saving alternative triplets and their encoded amino acids for triplet in good_triplets: #convert the triplet back to a list of real codons Realcodons = dna.combine( [ dna.UnAmb(triplet[0]), dna.UnAmb(triplet[1]), dna.UnAmb(triplet[2]) ] ) #condense the different codons for position 1, 2, and 3 to a list of triplets #Check which AA these codons code for ResultingAA = [ dna.Translate(codon, table=self.getTable()) for codon in Realcodons ] #compare which amino acids were desired with the ones resulting from the degenerate codon offtarget = sorted(self.extra_list_elements(AA_list, ResultingAA)) #save alternatives stats if any([True for s in alternatives if s[0] == triplet]) is False: alternatives.append([triplet] + AA_list + offtarget) #save the stats in case there are fewer codons if len(Realcodons) < best_score or best_score == None: #save stats best_score = len(Realcodons) best_triplet = triplet best_offtarget = offtarget #if another codon has same stats as the previous best one, replace the previous codon if it has an off-target stop elif len(Realcodons) == best_score and '*' in best_offtarget: #save stats best_score = len(Realcodons) best_triplet = triplet best_offtarget = offtarget return best_triplet, best_offtarget, alternatives, all_alternatives
def Draw_wheel(self): ''' This is the codon wheel view. ''' ################# self.xc = 850/3 #centre of codon circle in x self.yc = 450/2 #centre of codon circle in y self.Radius = self.yc/1.2 #These parameters determine the "thickness" of the nucleotide and amino acid sections first_nucleotide_thickness = self.Radius/3.0 second_nucleotide_thickness = self.Radius/4.5 third_nucleotide_thickness = self.Radius/10.0 amino_acid_thickness = self.Radius/3.0 ########################### ## draw first nucleotide ## ########################### #set parameters radius = first_nucleotide_thickness thickness = first_nucleotide_thickness font = wx.Font(pointSize=self.Radius/6.5, family=wx.FONTFAMILY_SWISS, style=wx.FONTWEIGHT_NORMAL, weight=wx.FONTWEIGHT_BOLD) self.gcdc.SetFont(font) self.gcdc.SetPen(wx.Pen(colour=self.first_nuc_background, width=1)) self.gcdc.SetBrush(wx.Brush(self.first_nuc_background)) nucleotides = ['T', 'C', 'A', 'G'] #do the drawing for i in range(len(nucleotides)): #draw the background start_angle = 0 + 90*i finish_angle = 90+90*i pointlist = self.make_arc(self.xc, self.yc, start_angle, finish_angle, radius, thickness, step=5) self.gcdc.DrawPolygon(pointlist) #determine text color #if nucleotide is part of degenerate codon it should have a different color self.gcdc.SetTextForeground((self.nucleotide_color)) if self.codon is not False: if nucleotides[i].replace('U','T') in dna.UnAmb(self.codon[0]): self.gcdc.SetTextForeground((self.coding_nucleotide_color)) #draw the text text_extent = self.gcdc.GetTextExtent(nucleotides[i]) x1, y1 = self.AngleToPoints(self.xc, self.yc, radius/2, finish_angle-(finish_angle-start_angle)/2) #(centre_x, centre_y, radius, angle) self.gcdc.DrawText(nucleotides[i], x1-text_extent[0]/2, y1-text_extent[1]/2) ############################ ## draw second nucleotide ## ############################ #set parameters radius = first_nucleotide_thickness+second_nucleotide_thickness thickness = second_nucleotide_thickness font_size = self.Radius/12.0 if font_size < 1: print('The problem lies with the self.Radius/12.0. Seems like it is too small.') font_size = 10 font = wx.Font(pointSize=self.Radius/12.0, family=wx.FONTFAMILY_SWISS, style=wx.FONTWEIGHT_NORMAL, weight=wx.FONTWEIGHT_BOLD) self.gcdc.SetFont(font) self.gcdc.SetPen(wx.Pen(colour=self.second_nuc_background, width=1)) self.gcdc.SetBrush(wx.Brush(self.second_nuc_background)) nucleotides = ['TT', 'TC', 'TA', 'TG','CT', 'CC', 'CA', 'CG','AT', 'AC', 'AA', 'AG', 'GT', 'GC', 'GA', 'GG'] #do the drawing for i in range(len(nucleotides)): #draw the background start_angle = 0 + 22.5*i finish_angle = 22.5+22.5*i pointlist = self.make_arc(self.xc, self.yc, start_angle, finish_angle, radius, thickness, step=0.5) self.gcdc.DrawPolygon(pointlist) #determine text color #if nucleotide is part of degenerate codon it should have a different color self.gcdc.SetTextForeground((self.nucleotide_color)) if self.codon is not False: if nucleotides[i].replace('U','T') in dna.UnAmb(self.codon[0:2]): self.gcdc.SetTextForeground((self.coding_nucleotide_color)) #draw the text text_extent = self.gcdc.GetTextExtent(nucleotides[i][1]) x1, y1 = self.AngleToPoints(self.xc, self.yc, first_nucleotide_thickness+second_nucleotide_thickness/2, finish_angle-(finish_angle-start_angle)/2) self.gcdc.DrawText(nucleotides[i][1], x1-text_extent[0]/2, y1-text_extent[1]/2) ########################### ## draw third nucleotide ## ########################### #set parameters radius = first_nucleotide_thickness+second_nucleotide_thickness+third_nucleotide_thickness thickness = third_nucleotide_thickness font = wx.Font(pointSize=self.Radius/25.0, family=wx.FONTFAMILY_SWISS, style=wx.FONTWEIGHT_NORMAL, weight=wx.FONTWEIGHT_BOLD) self.gcdc.SetFont(font) self.gcdc.SetPen(wx.Pen(colour=self.third_nuc_background, width=1)) self.gcdc.SetBrush(wx.Brush(self.third_nuc_background)) codons = ['TTT', 'TTC', 'TTA', 'TTG','TCT', 'TCC', 'TCA', 'TCG','TAT', 'TAC', 'TAA', 'TAG', 'TGT', 'TGC', 'TGA', 'TGG',\ 'CTT', 'CTC', 'CTA', 'CTG','CCT', 'CCC', 'CCA', 'CCG','CAT', 'CAC', 'CAA', 'CAG', 'CGT', 'CGC', 'CGA', 'CGG',\ 'ATT', 'ATC', 'ATA', 'ATG','ACT', 'ACC', 'ACA', 'ACG','AAT', 'AAC', 'AAA', 'AAG', 'AGT', 'AGC', 'AGA', 'AGG',\ 'GTT', 'GTC', 'GTA', 'GTG','GCT', 'GCC', 'GCA', 'GCG','GAT', 'GAC', 'GAA', 'GAG', 'GGT', 'GGC', 'GGA', 'GGG'] #do the drawing for i in range(len(codons)): #draw the background start_angle = 0 + 5.625*i finish_angle = 5.625+5.625*i pointlist = self.make_arc(self.xc, self.yc, start_angle, finish_angle, radius, thickness, step=0.1) self.gcdc.DrawPolygon(pointlist) #determine text color #if nucleotide is part of degenerate codon it should have a different color self.gcdc.SetTextForeground((self.nucleotide_color)) if self.codon is not False: if codons[i].replace('U','T') in dna.UnAmb(self.codon): self.gcdc.SetTextForeground((self.coding_nucleotide_color)) #draw the text text_extent = self.gcdc.GetTextExtent(codons[i][2]) x1, y1 = self.AngleToPoints(self.xc, self.yc, first_nucleotide_thickness+second_nucleotide_thickness+third_nucleotide_thickness/2, finish_angle-(finish_angle-start_angle)/2) self.gcdc.DrawText(codons[i][2], x1-text_extent[0]/2, y1-text_extent[1]/2) ############################################ ## draw the amino acid segments and names ## ############################################ #set parameters radius = first_nucleotide_thickness+second_nucleotide_thickness+third_nucleotide_thickness+amino_acid_thickness thickness = amino_acid_thickness font = wx.Font(pointSize=self.Radius/20.0, family=wx.FONTFAMILY_SWISS, style=wx.FONTWEIGHT_NORMAL, weight=wx.FONTWEIGHT_BOLD) self.gcdc.SetFont(font) self.gcdc.SetTextForeground(('#000000')) finish_angle = 0 #do the drawing AA_width = 0 current_AA = dna.Translate(codons[0], self.table) for codon in codons: AA = dna.Translate(codon, self.table) if codon == 'GGG': #catch the last codon AA_width += 1 AA = None if current_AA == AA: AA_width += 1 else: #draw the amino acid segments self.gcdc.SetPen(wx.Pen(colour=self.aa_background, width=0)) if current_AA in self.target: #if current AA is a selected one self.gcdc.SetPen(wx.Pen(colour=self.target_color, width=0)) self.gcdc.SetBrush(wx.Brush(self.target_color)) elif current_AA in self.offtarget: #if it is in the off-targets list self.gcdc.SetPen(wx.Pen(colour=self.offtarget_color, width=0)) self.gcdc.SetBrush(wx.Brush(self.offtarget_color)) elif current_AA in self.possible: #if current AA is among the ones that may be selected without further off-targets self.gcdc.SetPen(wx.Pen(colour=self.possible_color, width=0)) self.gcdc.SetBrush(wx.Brush(self.possible_color)) else: #otherwise use standard color self.gcdc.SetBrush(wx.Brush(self.aa_background)) start_angle = finish_angle finish_angle = start_angle+5.625*AA_width pointlist = self.make_arc(self.xc, self.yc, start_angle, finish_angle, radius, thickness, step=0.1) self.gcdc.DrawPolygon(pointlist) #draw hidden color which is used for hittests self.catalog[str(self.NextRGB()+(255,))] = current_AA self.hidden_dc.SetPen(wx.Pen(colour=self.unique_color, width=0)) self.hidden_dc.SetBrush(wx.Brush(colour=self.unique_color)) self.hidden_dc.DrawPolygon(pointlist) #draw lines angle = start_angle self.gcdc.SetPen(wx.Pen(colour=self.line_color, width=1)) if angle in [0,90,180,270]: radius = 0 elif angle % 22.5 == 0: radius = first_nucleotide_thickness elif angle % 5.625 ==0: radius = first_nucleotide_thickness+second_nucleotide_thickness x1, y1 = self.AngleToPoints(self.xc, self.yc, radius, angle) radius = radius = first_nucleotide_thickness+second_nucleotide_thickness+third_nucleotide_thickness+amino_acid_thickness x2, y2 = self.AngleToPoints(self.xc, self.yc, radius, angle) self.gcdc.DrawLine(x1, y1, x2, y2) #draw amino acid text text_angle = finish_angle-(finish_angle-start_angle)/2 if finish_angle <= 180: text_extent = self.gcdc.GetTextExtent(protein.one_to_three(current_AA)+' (%s)' % current_AA) text_radius = (first_nucleotide_thickness+second_nucleotide_thickness+third_nucleotide_thickness)*1.05 #need to adjust for text height. Imagine right angled triangle. Adjecent is radius. Opposite is half of the text height. Calculate tan angle. tanangle = (0.5*text_extent[1])/text_radius #calculate the Tan(angle) radians = math.atan(tanangle) #negate the Tan part and get radians degrees = radians*(180/math.pi) #convert radians to degrees text_position_angle = text_angle-degrees tx, ty = self.AngleToPoints(self.xc, self.yc, text_radius, text_position_angle) self.gcdc.DrawRotatedText(protein.one_to_three(current_AA)+' (%s)' % current_AA, tx, ty, -text_angle+90) else: text_extent = self.gcdc.GetTextExtent(protein.one_to_three(current_AA)+' (%s)' % current_AA) text_radius = (first_nucleotide_thickness+second_nucleotide_thickness+third_nucleotide_thickness)*1.05 + text_extent[0] #need to adjust for text height. Imagine right angled triangle. Adjacent is radius. Opposite is half of the text height. Calculate tan angle. tanangle = (0.5*text_extent[1])/text_radius #calculate the Tan(angle) radians = math.atan(tanangle) #negate the Tin part and get radians degrees = radians*(180/math.pi) #convert radians to degrees text_position_angle = text_angle+degrees tx, ty = self.AngleToPoints(self.xc, self.yc, text_radius, text_position_angle) self.gcdc.DrawRotatedText(protein.one_to_three(current_AA)+' (%s)' % current_AA, tx, ty, -text_angle-90) #now re-set the parameters for the next round current_AA = AA AA_width = 1 ########################################################################### ## draw the highlighted amino acid (the one that the mouse hovers above) ## ########################################################################### self.gcdc.SetPen(wx.Pen(colour=self.aa_highlight, width=1)) self.gcdc.SetBrush(wx.Brush(colour=(0,0,0,0))) #transparent finish_angle = 0 start_angle = 0 AA_width = 0 current_AA = dna.Translate(codons[0], self.table) for codon in codons: AA = dna.Translate(codon, self.table) if codon == 'GGG': #catch the last codon AA_width += 1 AA = None if current_AA == AA: AA_width += 1 else: #if current AA is highlighted, redraw that segment with a different pen finish_angle = start_angle+5.625*AA_width if current_AA == self.highlighted: #if highlighted AA is the current one pointlist = self.make_arc(self.xc, self.yc, start_angle, finish_angle, radius, thickness, step=0.1) self.gcdc.DrawPolygon(pointlist) start_angle = finish_angle current_AA = AA AA_width = 1