Example #1
0
def hillClimb(gene, direction, max_steps, alignment, regions, tolerance_fxn,
              scores, conservation_cutoff, error_rate, tracer):
    # While
    codons = cai.splitByFrame(gene, 0)
    step = 0
    total_steps = 0
    gc = translate._genetic_code
    all_alt_codons = [c for c in gc.keys() if not 'U' in c]
    prot = translate.TranslateRaw(gene, bad_aa='X')
    letters = 'ACDEFGHIKLNPQRSTVY'
    site_error_probs = getSiteErrorProbabilities(''.join(codons), error_rate)
    (orig_score, prob_acc, prob_no_error) = getTranslationOutcomes(
        ''.join(codons), alignment, tolerance_fxn, scores, conservation_cutoff,
        all_alt_codons, site_error_probs)
    score = orig_score
    tracer.write("score\tprev.score\tdiff\torig.score\tstep\ttotal.step\n")
    tracer.write("# Starting hillclimb\n")
    while step < max_steps:
        # Pick synonymous codons at random
        # Pick AA at random
        aa = random.choice(letters)
        # Pick part of protein to randomize
        region = random.choice(regions)
        # Get indices of all synonymous codons with that amino acid
        indices = [i for i in range(region[0], region[1]) if prot[i] == aa]
        if len(indices) < 2:
            continue
        inds = random.sample(indices, 2)
        # Swap them
        tmp = codons[inds[0]]
        codons[inds[0]] = codons[inds[1]]
        codons[inds[1]] = tmp
        # Score the resulting gene
        site_error_probs = getSiteErrorProbabilities(''.join(codons),
                                                     error_rate)
        (new_score, prob_acc, prob_no_error) = getTranslationOutcomes(
            ''.join(codons), alignment, tolerance_fxn, scores,
            conservation_cutoff, all_alt_codons, site_error_probs)
        # If score goes in the right direction, set step=0 and continue
        diff = new_score - score
        eps = 1e-5
        if abs(diff) > eps and sign(diff) == direction and checkSequence(
                ''.join(codons), gene, prot):
            line = "#%s\n%s\t%d\t%d\n" % (
                ''.join(codons), pf([new_score, score, diff, orig_score
                                     ]), step, total_steps)
            print line,
            tracer.write(line)
            tracer.flush()
            step = 0
            score = new_score
        else:
            # Otherwise reverse swap
            tmp = codons[inds[0]]
            codons[inds[0]] = codons[inds[1]]
            codons[inds[1]] = tmp
            step += 1
        total_steps += 1
    return ''.join(codons)
Example #2
0
File: transacc.py Project: dad/base
def hillClimb(gene, direction, max_steps, alignment, regions, tolerance_fxn, scores, conservation_cutoff, error_rate, tracer):
	# While
	codons = cai.splitByFrame(gene,0)
	step = 0
	total_steps = 0
	gc = translate._genetic_code
	all_alt_codons = [c for c in gc.keys() if not 'U' in c]
	prot = translate.TranslateRaw(gene,bad_aa='X')
	letters = 'ACDEFGHIKLNPQRSTVY'
	site_error_probs = getSiteErrorProbabilities(''.join(codons), error_rate)
	(orig_score, prob_acc, prob_no_error) = getTranslationOutcomes(''.join(codons), alignment, tolerance_fxn, scores, conservation_cutoff, all_alt_codons, site_error_probs)
	score = orig_score
	tracer.write("score\tprev.score\tdiff\torig.score\tstep\ttotal.step\n")
	tracer.write("# Starting hillclimb\n")
	while step < max_steps:
		# Pick synonymous codons at random
		# Pick AA at random
		aa = random.choice(letters)
		# Pick part of protein to randomize
		region = random.choice(regions)
		# Get indices of all synonymous codons with that amino acid
		indices = [i for i in range(region[0],region[1]) if prot[i]==aa]
		if len(indices)<2:
			continue
		inds = random.sample(indices, 2)
		# Swap them
		tmp = codons[inds[0]]
		codons[inds[0]] = codons[inds[1]]
		codons[inds[1]] = tmp
		# Score the resulting gene
		site_error_probs = getSiteErrorProbabilities(''.join(codons), error_rate)
		(new_score, prob_acc, prob_no_error) = getTranslationOutcomes(''.join(codons), alignment, tolerance_fxn, scores, conservation_cutoff, all_alt_codons, site_error_probs)
		# If score goes in the right direction, set step=0 and continue
		diff = new_score - score
		eps = 1e-5
		if abs(diff)>eps and sign(diff) == direction and checkSequence(''.join(codons), gene, prot):
			line = "#%s\n%s\t%d\t%d\n" %(''.join(codons), pf([new_score, score, diff, orig_score]), step, total_steps)
			print line,
			tracer.write(line)
			tracer.flush()
			step = 0
			score = new_score
		else:
			# Otherwise reverse swap
			tmp = codons[inds[0]]
			codons[inds[0]] = codons[inds[1]]
			codons[inds[1]] = tmp
			step += 1
		total_steps += 1
	return ''.join(codons)
Example #3
0
File: transacc.py Project: dad/base
def getSiteErrorProbabilities(gene, error_rate):
	# Probability of an error at each site
	codons = cai.splitByFrame(gene,0)
	site_error_wts = [siteErrorWeight(c, cai._yeast_relative_adaptiveness, 10.0) for c in codons]
	# Site weights sum to the length of the gene.
	weight_sum = sum(site_error_wts)
	site_error_wts = [len(codons)*w/weight_sum for w in site_error_wts]
	assert abs(sum(site_error_wts) - float(len(codons))) < 1e-5
	if False:
		for i in range(len(site_error_wts)):
			if prot[i] != bad_aa:
				print "%d\t%s\t%1.4f" % (i,prot[i],site_error_wts[i])
	#sew_red = [site_error_wts[i] for i in range(len(site_error_wts)) if prot[i] != bad_aa]
	sum_sews = sum(site_error_wts)
	site_error_probs = [w*error_rate for w in site_error_wts]
	return site_error_probs
Example #4
0
def getSiteErrorProbabilities(gene, error_rate):
    # Probability of an error at each site
    codons = cai.splitByFrame(gene, 0)
    site_error_wts = [
        siteErrorWeight(c, cai._yeast_relative_adaptiveness, 10.0)
        for c in codons
    ]
    # Site weights sum to the length of the gene.
    weight_sum = sum(site_error_wts)
    site_error_wts = [len(codons) * w / weight_sum for w in site_error_wts]
    assert abs(sum(site_error_wts) - float(len(codons))) < 1e-5
    if False:
        for i in range(len(site_error_wts)):
            if prot[i] != bad_aa:
                print "%d\t%s\t%1.4f" % (i, prot[i], site_error_wts[i])
    #sew_red = [site_error_wts[i] for i in range(len(site_error_wts)) if prot[i] != bad_aa]
    sum_sews = sum(site_error_wts)
    site_error_probs = [w * error_rate for w in site_error_wts]
    return site_error_probs
Example #5
0
def getTranslationOutcomes(gene, alignment, tolerance_fxn, scores,
                           conservation_cutoff, all_alt_codons,
                           site_error_probs):
    gc = translate._genetic_code
    bad_aa = 'X'
    prot = translate.TranslateRaw(gene, bad_aa)
    # Translate all possible codon point mutants of the protein
    # Compute the probability of folding given an arbitrary error
    prob_fold = 1.0
    prob_acc = 1.0
    prob_no_error = 1.0
    n_alternatives = 0
    codons = cai.splitByFrame(gene, 0)
    prob_error_folds_list = []
    prob_syn_list = []
    if False:
        relads = [cai._yeast_relative_adaptiveness[c] for c in codons]
        print "# corr:", stats.Spearman_Rank_Correlation(
            relads, site_error_probs)
        print "# sd:", stats.StatsSummary(site_error_probs), min(
            site_error_probs), max(site_error_probs)
        print "# sd:", stats.StatsSummary(site_error_probs), min(
            site_error_probs), max(site_error_probs)
        for i in range(len(site_error_probs)):
            print site_error_probs[i], site_error_probs[i] / min(
                site_error_probs)
    #pf(sew_red)
    #diffs = [s/max(sew_red) for s in sew_red]
    #pf(diffs)
    #print min(sew_red), max(sew_red), stats.StatsSummary(diffs)

    for i in range(len(prot)):
        if prot[i] == bad_aa:
            continue
        codon = codons[i]  #gene[3*i:3*i+3]
        #print codons
        #print ""
        site_prob = 0.0
        alt_codons = [
            c for c in all_alt_codons if sequenceDiffs(codon, c) == 1
        ]
        p_error_folds = 0.0  # Probability of folding given an error at this site.
        p_synonymous = 0.0  # Probability of a synonymous error
        p_missense_error_folds = 0.0  # Probability of folding given a missense error
        for ac in alt_codons:
            prob_mis_error = probMistranslation(codon, ac)
            site_prob += prob_mis_error
            aa = gc[ac]
            if aa == prot[i]:
                p_synonymous += prob_mis_error
                tol = 1.0
            else:
                #tol = False
                tol = tolerance_fxn(aa, i, alignment, scores,
                                    conservation_cutoff)
                p_missense_error_folds += prob_mis_error * tol
            prob_tolerated = tol  #int(tol)
            p_error_folds += prob_mis_error * prob_tolerated
        # Probability of folding is the product of
        # the probability of this particular error, given an error at this site,
        # the probability of an error at this site,
        # and the probability that this error is tolerated.
        p_missense_error_folds = p_missense_error_folds / (1 - p_synonymous)
        prob_error_folds_list.append(p_missense_error_folds)
        prob_syn_list.append(p_synonymous)
        prob_fold *= (1 - site_error_probs[i] * (1 - p_error_folds))
        prob_acc *= (1 - site_error_probs[i] * (1 - p_synonymous))
        prob_no_error *= (1 - site_error_probs[i])

    return prob_fold, prob_acc, prob_no_error
Example #6
0
File: transacc.py Project: dad/base
def getTranslationOutcomes(gene, alignment, tolerance_fxn, scores, conservation_cutoff, all_alt_codons, site_error_probs):
	gc = translate._genetic_code
	bad_aa = 'X'
	prot = translate.TranslateRaw(gene, bad_aa)
	# Translate all possible codon point mutants of the protein
	# Compute the probability of folding given an arbitrary error
	prob_fold = 1.0
	prob_acc = 1.0
	prob_no_error = 1.0
	n_alternatives = 0
	codons = cai.splitByFrame(gene,0)
	prob_error_folds_list = []
	prob_syn_list = []
	if False:
		relads = [cai._yeast_relative_adaptiveness[c] for c in codons]
		print "# corr:", stats.Spearman_Rank_Correlation(relads, site_error_probs)
		print "# sd:", stats.StatsSummary(site_error_probs), min(site_error_probs), max(site_error_probs)
		print "# sd:", stats.StatsSummary(site_error_probs), min(site_error_probs), max(site_error_probs)
		for i in range(len(site_error_probs)):
			print site_error_probs[i], site_error_probs[i]/min(site_error_probs)
	#pf(sew_red)
	#diffs = [s/max(sew_red) for s in sew_red]
	#pf(diffs)
	#print min(sew_red), max(sew_red), stats.StatsSummary(diffs)

	for i in range(len(prot)):
		if prot[i] == bad_aa:
			continue
		codon = codons[i] #gene[3*i:3*i+3]
		#print codons
		#print ""
		site_prob = 0.0
		alt_codons = [c for c in all_alt_codons if sequenceDiffs(codon,c)==1]
		p_error_folds = 0.0 # Probability of folding given an error at this site.
		p_synonymous = 0.0 # Probability of a synonymous error
		p_missense_error_folds = 0.0 # Probability of folding given a missense error
		for ac in alt_codons:
			prob_mis_error = probMistranslation(codon, ac)
			site_prob += prob_mis_error
			aa = gc[ac]
			if aa == prot[i]:
				p_synonymous += prob_mis_error
				tol = 1.0
			else:
				#tol = False
				tol = tolerance_fxn(aa, i, alignment, scores, conservation_cutoff)
				p_missense_error_folds += prob_mis_error * tol
			prob_tolerated = tol #int(tol)
			p_error_folds += prob_mis_error * prob_tolerated
		# Probability of folding is the product of
		# the probability of this particular error, given an error at this site,
		# the probability of an error at this site,
		# and the probability that this error is tolerated.
		p_missense_error_folds = p_missense_error_folds/(1-p_synonymous)
		prob_error_folds_list.append(p_missense_error_folds)
		prob_syn_list.append(p_synonymous)
		prob_fold *= (1-site_error_probs[i]*(1-p_error_folds))
		prob_acc *= (1-site_error_probs[i]*(1-p_synonymous))
		prob_no_error *= (1-site_error_probs[i])

	return prob_fold, prob_acc, prob_no_error