def hillClimb(gene, direction, max_steps, alignment, regions, tolerance_fxn, scores, conservation_cutoff, error_rate, tracer): # While codons = cai.splitByFrame(gene, 0) step = 0 total_steps = 0 gc = translate._genetic_code all_alt_codons = [c for c in gc.keys() if not 'U' in c] prot = translate.TranslateRaw(gene, bad_aa='X') letters = 'ACDEFGHIKLNPQRSTVY' site_error_probs = getSiteErrorProbabilities(''.join(codons), error_rate) (orig_score, prob_acc, prob_no_error) = getTranslationOutcomes( ''.join(codons), alignment, tolerance_fxn, scores, conservation_cutoff, all_alt_codons, site_error_probs) score = orig_score tracer.write("score\tprev.score\tdiff\torig.score\tstep\ttotal.step\n") tracer.write("# Starting hillclimb\n") while step < max_steps: # Pick synonymous codons at random # Pick AA at random aa = random.choice(letters) # Pick part of protein to randomize region = random.choice(regions) # Get indices of all synonymous codons with that amino acid indices = [i for i in range(region[0], region[1]) if prot[i] == aa] if len(indices) < 2: continue inds = random.sample(indices, 2) # Swap them tmp = codons[inds[0]] codons[inds[0]] = codons[inds[1]] codons[inds[1]] = tmp # Score the resulting gene site_error_probs = getSiteErrorProbabilities(''.join(codons), error_rate) (new_score, prob_acc, prob_no_error) = getTranslationOutcomes( ''.join(codons), alignment, tolerance_fxn, scores, conservation_cutoff, all_alt_codons, site_error_probs) # If score goes in the right direction, set step=0 and continue diff = new_score - score eps = 1e-5 if abs(diff) > eps and sign(diff) == direction and checkSequence( ''.join(codons), gene, prot): line = "#%s\n%s\t%d\t%d\n" % ( ''.join(codons), pf([new_score, score, diff, orig_score ]), step, total_steps) print line, tracer.write(line) tracer.flush() step = 0 score = new_score else: # Otherwise reverse swap tmp = codons[inds[0]] codons[inds[0]] = codons[inds[1]] codons[inds[1]] = tmp step += 1 total_steps += 1 return ''.join(codons)
def hillClimb(gene, direction, max_steps, alignment, regions, tolerance_fxn, scores, conservation_cutoff, error_rate, tracer): # While codons = cai.splitByFrame(gene,0) step = 0 total_steps = 0 gc = translate._genetic_code all_alt_codons = [c for c in gc.keys() if not 'U' in c] prot = translate.TranslateRaw(gene,bad_aa='X') letters = 'ACDEFGHIKLNPQRSTVY' site_error_probs = getSiteErrorProbabilities(''.join(codons), error_rate) (orig_score, prob_acc, prob_no_error) = getTranslationOutcomes(''.join(codons), alignment, tolerance_fxn, scores, conservation_cutoff, all_alt_codons, site_error_probs) score = orig_score tracer.write("score\tprev.score\tdiff\torig.score\tstep\ttotal.step\n") tracer.write("# Starting hillclimb\n") while step < max_steps: # Pick synonymous codons at random # Pick AA at random aa = random.choice(letters) # Pick part of protein to randomize region = random.choice(regions) # Get indices of all synonymous codons with that amino acid indices = [i for i in range(region[0],region[1]) if prot[i]==aa] if len(indices)<2: continue inds = random.sample(indices, 2) # Swap them tmp = codons[inds[0]] codons[inds[0]] = codons[inds[1]] codons[inds[1]] = tmp # Score the resulting gene site_error_probs = getSiteErrorProbabilities(''.join(codons), error_rate) (new_score, prob_acc, prob_no_error) = getTranslationOutcomes(''.join(codons), alignment, tolerance_fxn, scores, conservation_cutoff, all_alt_codons, site_error_probs) # If score goes in the right direction, set step=0 and continue diff = new_score - score eps = 1e-5 if abs(diff)>eps and sign(diff) == direction and checkSequence(''.join(codons), gene, prot): line = "#%s\n%s\t%d\t%d\n" %(''.join(codons), pf([new_score, score, diff, orig_score]), step, total_steps) print line, tracer.write(line) tracer.flush() step = 0 score = new_score else: # Otherwise reverse swap tmp = codons[inds[0]] codons[inds[0]] = codons[inds[1]] codons[inds[1]] = tmp step += 1 total_steps += 1 return ''.join(codons)
def getSiteErrorProbabilities(gene, error_rate): # Probability of an error at each site codons = cai.splitByFrame(gene,0) site_error_wts = [siteErrorWeight(c, cai._yeast_relative_adaptiveness, 10.0) for c in codons] # Site weights sum to the length of the gene. weight_sum = sum(site_error_wts) site_error_wts = [len(codons)*w/weight_sum for w in site_error_wts] assert abs(sum(site_error_wts) - float(len(codons))) < 1e-5 if False: for i in range(len(site_error_wts)): if prot[i] != bad_aa: print "%d\t%s\t%1.4f" % (i,prot[i],site_error_wts[i]) #sew_red = [site_error_wts[i] for i in range(len(site_error_wts)) if prot[i] != bad_aa] sum_sews = sum(site_error_wts) site_error_probs = [w*error_rate for w in site_error_wts] return site_error_probs
def getSiteErrorProbabilities(gene, error_rate): # Probability of an error at each site codons = cai.splitByFrame(gene, 0) site_error_wts = [ siteErrorWeight(c, cai._yeast_relative_adaptiveness, 10.0) for c in codons ] # Site weights sum to the length of the gene. weight_sum = sum(site_error_wts) site_error_wts = [len(codons) * w / weight_sum for w in site_error_wts] assert abs(sum(site_error_wts) - float(len(codons))) < 1e-5 if False: for i in range(len(site_error_wts)): if prot[i] != bad_aa: print "%d\t%s\t%1.4f" % (i, prot[i], site_error_wts[i]) #sew_red = [site_error_wts[i] for i in range(len(site_error_wts)) if prot[i] != bad_aa] sum_sews = sum(site_error_wts) site_error_probs = [w * error_rate for w in site_error_wts] return site_error_probs
def getTranslationOutcomes(gene, alignment, tolerance_fxn, scores, conservation_cutoff, all_alt_codons, site_error_probs): gc = translate._genetic_code bad_aa = 'X' prot = translate.TranslateRaw(gene, bad_aa) # Translate all possible codon point mutants of the protein # Compute the probability of folding given an arbitrary error prob_fold = 1.0 prob_acc = 1.0 prob_no_error = 1.0 n_alternatives = 0 codons = cai.splitByFrame(gene, 0) prob_error_folds_list = [] prob_syn_list = [] if False: relads = [cai._yeast_relative_adaptiveness[c] for c in codons] print "# corr:", stats.Spearman_Rank_Correlation( relads, site_error_probs) print "# sd:", stats.StatsSummary(site_error_probs), min( site_error_probs), max(site_error_probs) print "# sd:", stats.StatsSummary(site_error_probs), min( site_error_probs), max(site_error_probs) for i in range(len(site_error_probs)): print site_error_probs[i], site_error_probs[i] / min( site_error_probs) #pf(sew_red) #diffs = [s/max(sew_red) for s in sew_red] #pf(diffs) #print min(sew_red), max(sew_red), stats.StatsSummary(diffs) for i in range(len(prot)): if prot[i] == bad_aa: continue codon = codons[i] #gene[3*i:3*i+3] #print codons #print "" site_prob = 0.0 alt_codons = [ c for c in all_alt_codons if sequenceDiffs(codon, c) == 1 ] p_error_folds = 0.0 # Probability of folding given an error at this site. p_synonymous = 0.0 # Probability of a synonymous error p_missense_error_folds = 0.0 # Probability of folding given a missense error for ac in alt_codons: prob_mis_error = probMistranslation(codon, ac) site_prob += prob_mis_error aa = gc[ac] if aa == prot[i]: p_synonymous += prob_mis_error tol = 1.0 else: #tol = False tol = tolerance_fxn(aa, i, alignment, scores, conservation_cutoff) p_missense_error_folds += prob_mis_error * tol prob_tolerated = tol #int(tol) p_error_folds += prob_mis_error * prob_tolerated # Probability of folding is the product of # the probability of this particular error, given an error at this site, # the probability of an error at this site, # and the probability that this error is tolerated. p_missense_error_folds = p_missense_error_folds / (1 - p_synonymous) prob_error_folds_list.append(p_missense_error_folds) prob_syn_list.append(p_synonymous) prob_fold *= (1 - site_error_probs[i] * (1 - p_error_folds)) prob_acc *= (1 - site_error_probs[i] * (1 - p_synonymous)) prob_no_error *= (1 - site_error_probs[i]) return prob_fold, prob_acc, prob_no_error
def getTranslationOutcomes(gene, alignment, tolerance_fxn, scores, conservation_cutoff, all_alt_codons, site_error_probs): gc = translate._genetic_code bad_aa = 'X' prot = translate.TranslateRaw(gene, bad_aa) # Translate all possible codon point mutants of the protein # Compute the probability of folding given an arbitrary error prob_fold = 1.0 prob_acc = 1.0 prob_no_error = 1.0 n_alternatives = 0 codons = cai.splitByFrame(gene,0) prob_error_folds_list = [] prob_syn_list = [] if False: relads = [cai._yeast_relative_adaptiveness[c] for c in codons] print "# corr:", stats.Spearman_Rank_Correlation(relads, site_error_probs) print "# sd:", stats.StatsSummary(site_error_probs), min(site_error_probs), max(site_error_probs) print "# sd:", stats.StatsSummary(site_error_probs), min(site_error_probs), max(site_error_probs) for i in range(len(site_error_probs)): print site_error_probs[i], site_error_probs[i]/min(site_error_probs) #pf(sew_red) #diffs = [s/max(sew_red) for s in sew_red] #pf(diffs) #print min(sew_red), max(sew_red), stats.StatsSummary(diffs) for i in range(len(prot)): if prot[i] == bad_aa: continue codon = codons[i] #gene[3*i:3*i+3] #print codons #print "" site_prob = 0.0 alt_codons = [c for c in all_alt_codons if sequenceDiffs(codon,c)==1] p_error_folds = 0.0 # Probability of folding given an error at this site. p_synonymous = 0.0 # Probability of a synonymous error p_missense_error_folds = 0.0 # Probability of folding given a missense error for ac in alt_codons: prob_mis_error = probMistranslation(codon, ac) site_prob += prob_mis_error aa = gc[ac] if aa == prot[i]: p_synonymous += prob_mis_error tol = 1.0 else: #tol = False tol = tolerance_fxn(aa, i, alignment, scores, conservation_cutoff) p_missense_error_folds += prob_mis_error * tol prob_tolerated = tol #int(tol) p_error_folds += prob_mis_error * prob_tolerated # Probability of folding is the product of # the probability of this particular error, given an error at this site, # the probability of an error at this site, # and the probability that this error is tolerated. p_missense_error_folds = p_missense_error_folds/(1-p_synonymous) prob_error_folds_list.append(p_missense_error_folds) prob_syn_list.append(p_synonymous) prob_fold *= (1-site_error_probs[i]*(1-p_error_folds)) prob_acc *= (1-site_error_probs[i]*(1-p_synonymous)) prob_no_error *= (1-site_error_probs[i]) return prob_fold, prob_acc, prob_no_error