def gillespie(A, x0, tf, death=0, malthus=0, fs=None): """Given an array of fitnesses a mutation matrix and an initial population vector, sample the system until time t. Note: agrees with largest_eigenvector :) """ K = len(A) t = 0 x = np.array(x0) A = np.array(A) hist = [] hist = [(t, np.copy(x))] while t < tf: birth_rates = x.dot(A) death_rates = x * death rates = np.hstack([birth_rates, death_rates]) total_rate = 1.0 / np.sum(rates) dt = random.expovariate(total_rate) selection = inverse_cdf_sample(range(2 * K), rates, normalized=False) t += dt if selection < K: # birth reaction x[selection] += 1 else: selection -= K x[selection] -= 1 if malthus and np.sum(x) > malthus: choice = inverse_cdf_sample(range(K), x, normalized=False) x[choice] -= 1 if t < tf: hist.append((t, np.copy(x))) if random.random() < 0.0001: print t, x, x / float(np.sum(x)) last_state = hist[-1][1] hist.append((tf, last_state)) return hist
def estremo_gibbs(iterations=50000, verbose=False, every=1000, sigma=1, mu=-10, Ne=5): nu = Ne - 1 L = 10 N = 20 code, motif = (sample_code(L=10, sigma=1), random_motif(length=L, num_sites=N)) def log_f((code, motif)): eps = map(lambda x: -log(x), pw_prob_sites(motif, code)) return sum(nu * log(1 / (1 + exp(ep - mu))) for ep in eps) chain = [(code, motif[:])] print log_f((code, motif)) for iteration in trange(iterations): for i in range(N): site = motif[i] for j in range(L): b = site[j] log_ps = [] bps = [bp for bp in "ACGT" if not bp == b] for bp in bps: site_p = subst(site, bp, j) log_ps.append(log_f((code, [site_p]))) log_ps = [p - min(log_ps) for p in log_ps] bp = inverse_cdf_sample(bps, map(exp, log_ps), normalized=False) motif[i] = subst(site, bp, j) for k in range(L - 1): for b1 in "ACGT": for b2 in "ACGT": dws = [random.gauss(0, 0.1) for _ in range(10)] code_ps = [[d.copy() for d in code] for _ in range(10)] for code_p, dw in zip(code_ps, dws): code_p[k][b1, b2] += dw log_ps = [log_f((code_p, motif)) for code_p in code_ps] log_ps = [p - min(log_ps) for p in log_ps] code_p = inverse_cdf_sample(code_ps, map(exp, log_ps), normalized=False) code = code_p print log_f((code, motif)) chain.append((code, motif[:])) return chain x0 = (sample_code(L=10, sigma=1), random_motif(length=10, num_sites=20)) chain = mh(log_f, prop, x0, use_log=True, iterations=iterations, verbose=verbose, every=every) return chain
def update(chromosome,qf,koffs,verbose=False): """Do a single iteration of SSA Given: chromosome: binary vector describing occupation state of ith site qf: number of free copies in cytosol koffs: vector of off-rates Return: updated_chromosome updated_qf time at which transition chromosome -> updated_chromosome occurs""" # Determine which reactions can occur # all free sites can become bound at rate qf * 1 # all bound sites can become unbound at rate koff_i rates = [koffs[i] if bs else qf for i,bs in enumerate(chromosome)] sum_rate = sum(rates) idx = inverse_cdf_sample(range(G),normalize(rates)) time = random.expovariate(sum_rate) updated_chromosome = chromosome[:] if chromosome[idx]: # if reaction is an unbinding reaction... if verbose: print "unbinding at: ",idx updated_chromosome[idx] = 0 updated_qf = qf + 1 else: # a binding reaction... if verbose: print "binding at: ",idx updated_chromosome[idx] = 1 updated_qf = qf - 1 return updated_chromosome,updated_qf,time
def sample_path_ref2(qf,koffs,t_final,chromosome=None,verbose=False): """Simulate a sample path until time t_final and return the marginal occupancies. Integrates update, sample_path_ref framework. """ if chromosome is None: # then start from empty chromosome chromosome = [0] * len(koffs) t = 0 dt = 0 occs = [0 for c in chromosome] while t < t_final: rates = [koffs[i] if bs else qf for i,bs in enumerate(chromosome)] sum_rate = sum(rates) dt = random.expovariate(sum_rate) t += dt if t > t_final: dt = t_final - t + dt # update occupancies after deciding dt, before updating chromosome occs = zipWith(lambda occ,ch:occ + ch*dt,occs,chromosome) idx = inverse_cdf_sample(range(G),normalize(rates)) if chromosome[idx]: # if reaction is an unbinding reaction... if verbose: print "unbinding at: ",idx chromosome[idx] = 0 qf += 1 else: # a binding reaction... if verbose: print "binding at: ",idx chromosome[idx] = 1 qf -= 1 if verbose: print "t:",t,"dt:",dt,"q:",qf,"qbound:",sum(chromosome),"mean occ:",sum([occ/t for occ in occs]) return [occ/t_final for occ in occs]
def sample_site_from_copies(sigma, Ne, L, copies, ps=None): if ps is None: ps = ps_from_copies(sigma, Ne, L, copies) k = inverse_cdf_sample(range(L + 1), ps, normalized=False) return "".join( permute(["A" for _ in range(L - k)] + [random.choice("CGT") for _ in range(k)]))
def moran_process(N=1000, turns=10000, init=sample_species, mutate=mutate, fitness=fitness, pop=None): if pop is None: pop = [(lambda spec: (spec, fitness(spec)))(sample_species()) for _ in trange(N)] hist = [] for turn in xrange(turns): fits = [f for (s, f) in pop] #print fits birth_idx = inverse_cdf_sample(range(N), fits, normalized=False) death_idx = random.randrange(N) #print birth_idx,death_idx mother, f = pop[birth_idx] daughter = mutate(mother) #print "mutated" pop[death_idx] = (daughter, fitness(daughter)) mean_fits = mean(fits) hist.append((f, mean_fits)) if turn % 10 == 0: mean_dna_ic = mean( [motif_ic(sites, correct=False) for ((sites, eps), _) in pop]) mean_rec_h = mean( [h_np(boltzmann(eps)) for ((dna, eps), _) in pop]) print turn, "sel_fit:", f, "mean_fit:", mean_fits, "mean_dna_ic:", mean_dna_ic, "mean_rec_h:", mean_rec_h return pop
def moran_process(N=1000,turns=10000,mean_site_muts=1,mean_rec_muts=1,init=sample_species,mutate=mutate, fitness=fitness,pop=None,print_modulus=100,hist_modulus=10): #ringer = (np.array([1]+[0]*(K-1)),sample_eps()) if pop is None: pop = [(lambda spec:(spec,fitness(spec)))(init()) for _ in trange(N)] # ringer = make_ringer() # pop[0] = (ringer,fitness(ringer)) #pop = [(ringer,fitness(ringer)) for _ in xrange(N)] site_mu = min(1/float(n*L) * mean_site_muts,1) rec_mu = min(1/float(K) * mean_rec_muts,1) hist = [] for turn in xrange(turns): fits = [f for (s,f) in pop] #print fits birth_idx = inverse_cdf_sample(range(N),fits,normalized=False) if birth_idx is None: return pop death_idx = random.randrange(N) #print birth_idx,death_idx mother,f = pop[birth_idx] daughter = mutate(mother,site_mu,rec_mu) #print "mutated" pop[death_idx] = (daughter,fitness(daughter)) mean_fits = mean(fits) #hist.append((f,mean_fits)) if turn % hist_modulus == 0: mean_dna_ic = mean([motif_ic(sites,correct=False) for ((sites,eps),_) in pop]) mean_rec = mean([recognizer_promiscuity(x) for (x,f) in pop]) mean_recced = mean([sites_recognized((dna,rec)) for ((dna,rec),_) in pop]) hist.append((turn,f,mean_fits,mean_dna_ic,mean_rec,mean_recced)) if turn % print_modulus == 0: print turn,"sel_fit:",f,"mean_fit:",mean_fits,"mean_dna_ic:",mean_dna_ic,"mean_rec_prom:",mean_rec return pop,hist
def moran_process(mean_rec_muts, mean_site_muts, N=1000, turns=10000, init=make_ringer2, mutate=mutate, fitness=fitness, pop=None): site_mu = mean_site_muts / float(n * L) bd_mu = mean_rec_muts / float(L) if pop is None: pop = [(lambda spec: (spec, fitness(spec)))(init()) for _ in trange(N)] hist = [] for turn in xrange(turns): fits = [f for (s, f) in pop] birth_idx = inverse_cdf_sample(range(N), fits, normalized=False) death_idx = random.randrange(N) #print birth_idx,death_idx mother, f = pop[birth_idx] daughter = mutate(mother, site_mu, bd_mu) #print "mutated" pop[death_idx] = (daughter, fitness(daughter)) mean_fits = mean(fits) hist.append((f, mean_fits)) if turn % 1000 == 0: mean_dna_ic = mean( [motif_ic(sites, correct=False) for ((bd, sites), _) in pop]) print turn, "sel_fit:", f, "mean_fit:", mean_fits, "mean_dna_ic:", mean_dna_ic return pop, hist
def simulate(ks,p,tf=10): n = len(ks) X = [1] * n + [0] * n + [p] #state is a vector of n empty sites, n complexes, protein t = 0 history = [(t,X[:])] while t < tf: p = X[-1] print "X:",X rates = [p*ks[i]*X[i] for i in range(n)] + [X[n + i] for i in range(n)] print "rates:",rates master_rate = float(sum(rates)) dt = expon.rvs(1,1/master_rate) print "dt:",dt print "normalized rates:",normalize(rates) j = inverse_cdf_sample(range(len(X)),normalize(rates)) print "chose reaction:",j if j < n: print "forming complex" #update state for complex formation X[j] = 0 X[n+j] = 1 X[-1] -= 1 else: #update state for complex dissociation print "dissolving complex" X[j] = 0 X[j-n] = 1 X[-1] += 1 t += dt history.append((t,X[:])) return history
def moran(fs, mus, n, t): """do moran process of n individuals for t generations""" K = len(fs) pop = np.zeros(K) for i in xrange(n): pop[random.randrange(K)] += 1 for _ in trange(t): #print "starting:",pop b = inverse_cdf_sample(range(K), fs * pop, normalized=False) m = inverse_cdf_sample(range(K), mus[b, :], normalized=False) d = inverse_cdf_sample(range(K), pop, normalized=False) #print "b:",b,"d:",d pop[d] -= 1 pop[m] += 1 #print pop return pop / np.sum(pop)
def moran_tracking(fs, mus, tf, init_state=0): """Track a single trajectory of a population until tf. Assume mus are probabilities, i.e. row stochastic""" t = 0 K = len(fs) x = np.zeros(K) i = init_state x[i] = 1 hist = [] while t < tf: if False: #random.random() < 0.0001: print t / tf, x hist.append((t, np.copy(x))) lifetime = random.expovariate(fs[i]) t += lifetime j = inverse_cdf_sample(range(K), mus[i, :]) if random.random() < fs[j] / ( fs[j] + fs[i]): # track new lineage with probability 1/2 x[i] = 0 x[j] = 1 i = j assert np.sum(x) == 1 last_state = hist[-1][1] hist.append((tf, last_state)) return hist
def prop(motif): # determine number of mutations to perform #k = discrete_exponential_sampler(N,lamb) k = inverse_cdf_sample(range(N), ps) #motif_p = mutate_motif_p_exact(ringer,p) # probability of mutation per basepair motif_p = mutate_motif_k_times(ringer, k) return motif_p
def entropy_from_ps(ps, N): K = len(ps) ns = [0] * K xs = range(K) for i in xrange(N): j = inverse_cdf_sample(xs, ps) ns[j] += 1 return h([n / float(N) for n in ns])
def random_walk_eigenvector(A, iterations=50000): K = len(A) history = np.zeros(K) state = random.randrange(K) for _ in xrange(iterations): history[state] += 1 state = inverse_cdf_sample(range(K), (A[state, :]).tolist()) return history / np.sum(history)
def sample_log_odds(matrix, n, lamb=1): matrix_probs = [ normalize([exp(-lamb * ep) for ep in row]) for row in matrix ] return [ "".join([inverse_cdf_sample("ACGT", probs) for probs in matrix_probs]) for i in xrange(n) ]
def entropy_from_ps(ps, N): K = len(ps) ns = [0] * K xs = range(K) for i in xrange(N): j = inverse_cdf_sample(xs, ps) ns[j] += 1 return h([n/float(N) for n in ns])
def sample_site(sigma, mu, Ne, L): phats = [phat(k, sigma, mu, Ne, L) for k in range(L + 1)] # Z = sum(phats) # ps = [ph/Z for ph in phats] k = inverse_cdf_sample(range(L + 1), phats, normalized=False) return "".join( permute(["A" for _ in range(L - k)] + [random.choice("CGT") for _ in range(k)]))
def weighted_regress_dep(xs,ys, sample_points=100): regress_points = [] avg_yval = mean(map(abs,ys)) ws = [exp(-abs(y)/avg_yval) for y in ys] for i in xrange(sample_points): rx,ry = inverse_cdf_sample(zip(xs,ys),ws,normalized=False) regress_points.append((rx,ry)) rxs,rys = transpose(regress_points) return (polyfit(rxs,rys,1))
def random_walk(A, iterations=50000): K = len(A) path = np.zeros(iterations) state = random.randrange(K) for turn in xrange(iterations): path[turn] = state state = inverse_cdf_sample(range(K), (A[state, :]).tolist(), normalized=False) return path
def moran_ancestor(fs, mus, n, t): K = len(fs) pop = [(i, random.randrange(K)) for i in range(1, n + 1)] next_idx = n + 1 pop_hist = {idx: (typ, 0) for (idx, typ) in pop} ancestor_history = np.zeros(K) for time in trange(t): bidx, btype = inverse_cdf_sample(pop, [fs[typ] for idx, typ in pop], normalized=False) cidx = next_idx next_idx += 1 ctype = inverse_cdf_sample(range(K), mus[btype, :], normalized=False) pop_hist[cidx] = (ctype, bidx) pop[random.randrange(n)] = (cidx, ctype) anc_idx = find_ancestor(pop, pop_hist) if anc_idx > 0: anc_typ, _ = pop_hist[anc_idx] print time, anc_idx, anc_typ if anc_idx > 0 else None ancestor_history[anc_typ] += 1 return ancestor_history / np.sum(ancestor_history)
def est_ncp(ps,n,trials,verbose=False): accs = 0 A = range(len(ps)) for trial in xrange(trials): if verbose: if trial % verbose == 0: print "%s/%s\r" % (trial,trials), sys.stdout.flush() xs = [inverse_cdf_sample(ps) for i in range(n)] if len(set(xs)) == n: accs += 1 return accs/float(trials)
def mutation_motif_with_ic(n, L, desired_ic, epsilon=0.1): ringer = ["A" * L] * n N = L * n ks_rel_motifs = [] while not ks_rel_motifs: motifs = [mutate_motif_k_times(ringer, k) for k in range(N + 1)] ks_rel_motifs = [(k, motif) for k, motif in enumerate(motifs) if inrange(motif, desired_ic, epsilon)] ks, rel_motifs = transpose(ks_rel_motifs) ps = [exp(log_prob_motif_with_mismatch(n, L, k)) for k in ks] ringified_motif = inverse_cdf_sample(rel_motifs, ks, normalized=False) return deringify_motif(ringified_motif)
def mh_eigenvector3(fs, mus, iterations=50000): K = len(mus) print K hist = np.zeros(K) i = random.randrange(K) for _ in xrange(iterations): hist[i] += 1 trans_rates = mus[i, :] j = inverse_cdf_sample(range(K), trans_rates, normalized=False) if random.random() < fs[j] / fs[i]: i = j return hist / np.sum(hist)
def single_moran(fs, mus, iterations=50000): K = len(mus) print K hist = np.zeros(K) i = random.randrange(K) for _ in trange(iterations): hist[i] += 1 trans_rates = mus[i, :] j = inverse_cdf_sample(range(K), trans_rates, normalized=False) if random.random() < 1 / 2.0: #fs[j]/(fs[i]+fs[j]): i = j return hist / np.sum(hist)
def qs_sampling(fs, mus, iterations=50000): K = len(mus) hist = np.zeros(K) i = random.randrange(K) for _ in xrange(iterations): hist[i] += 1 trans_probs = mus[i, :] fi = fs[i] offspring = [scipy.stats.poisson(mu * fi).rvs() for mu in trans_probs] offspring[i] += 1 i = inverse_cdf_sample(range(K), offspring, normalized=False) return hist / np.sum(hist)
def entropic_sampling(matrix, n=16): """sample motifs uniformly wrt fitness""" L = len(matrix) ringer = ringer_motif(matrix, n) N = n * L ps = [1.0 / N for i in range(N)] ks = range(N) replicates = 10000 motifs = [ mutate_motif_k_times(ringer, inverse_cdf_sample(ks, ps)) for i in trange(replicates) ] log_fs = [log_fitness(matrix, motif, G) for motif in motifs]
def sequential_sample_ref(ks,q): G = len(ks) chromosome = [0]*(G+1) new_ks = ks[:] + [1] for tf in range(q): #print "new_ks:",new_ks,"q:",q Z = float(sum(new_ks)) pos = inverse_cdf_sample([k/Z for k in new_ks]) #print "pos:",pos chromosome[pos] += 1 if chromosome[pos] > 1 and pos < G: raise Exception("Chromosome[%s] > 1" % pos) if pos < G: new_ks[pos] = 0 return chromosome
def smart_rsa(ks,q,sampler=None,verbose=False,debug_efficiency=False): """Perform random sequential adsorption without rejection. Note: Not a method for sampling from equilibrium distribution! ks is a vector of the form [k0,k1,kg], i.e. off-rate k0 = 1. """ ss = [] N = len(ks) _ks = ks[:] for j in range(q): s = inverse_cdf_sample(normalize(_ks)) ss.append(s) if s > 0: _ks[s] = 0 return ss
def maxent_motif_with_ic(n, L, desired_ic, tolerance=10**-2, beta=None): """sample motif from max ent distribution with mean desired_ic""" # first we adjust the desired ic upwards so that when motif_ic is # called with 1st order correction, we get the desired ic. if beta is None: correction_per_col = 3 / (2 * log(2) * n) desired_ic += L * correction_per_col beta = find_beta_for_mean_motif_ic(n, L, desired_ic, tolerance=tolerance) ps = count_ps_from_beta(n, beta) counts = [inverse_cdf_sample(enumerate_counts(n), ps) for i in range(L)] cols = [sample_col_from_count(count) for count in counts] return map(lambda site: "".join(site), transpose(cols))
def sella_hirsch_gibbs_sampler(Ne=1000, n=16, L=16, G=5 * 10**6, sigma=1, init="random", matrix=None, x0=None, iterations=50000): if matrix is None: matrix = sample_matrix(L, sigma) if x0 is None: if init == "random": x0 = random_motif(L, n) elif init == "ringer": x0 = ringer_motif(matrix, n) else: x0 = init nu = Ne - 1 def log_f(motif): return nu * log_fitness(matrix, motif, G) def prop(motif): #return mutate_motif_p(motif,1) # on average, 1 mutation per motif, (binomially distributed) return mutate_motif_p( motif, 4) # on average, 4 mutation per motif, (binomially distributed) motif = x0 chain = [motif] for iteration in xrange(iterations): for i in range(n): for j in range(L): prop_motifs = [subst_motif(motif, i, j, b) for b in "ACGT"] log_fs = map(log_f, prop_motifs) log_f_hat = mean(log_fs) log_fs_resid = [lf - log_f_hat for lf in log_fs] ps = normalize(map(exp, log_fs_resid)) idx = inverse_cdf_sample(range(4), ps) motif = prop_motifs[idx] chain.append(motif) if iteration % 10 == 0: print "iterations:(%s/%s)" % (iteration, iterations), "log_f:", log_fs[idx] return matrix, chain
def random_partition(N, K): part = [] K_ = K last = N for _ in range(K_): #ws = [num_parts(N-i, K-1, i) for i in range(N+1)] if K == 1: i = N else: ws = [ num_parts(N - i, K - 1, i) if i <= last else 0 for i in range(N + 1) ] i = inverse_cdf_sample(range(N + 1), ws, normalized=False) part.append(i) N -= i K -= 1 last = i #print part, N, K return tuple(part)
def moran_process(fitness, mutate, init_species, N=1000, turns=10000, pop=None, diagnostic_modulus=100, diagnostics=lambda pop: mean([f for (s, f) in pop])): if pop is None: pop = [(lambda spec: (spec, fitness(spec)))(init_species()) for _ in trange(N)] for turn in xrange(turns): fits = [f for (s, f) in pop] birth_idx = inverse_cdf_sample(range(N), fits, normalized=False) death_idx = random.randrange(N) mother, f = pop[birth_idx] daughter = mutate(mother) pop[death_idx] = (daughter, fitness(daughter)) mean_fits = mean(fits) if turn % diagnostic_modulus == 0: print turn, diagnostics(pop) return pop
def moran_process(N=1000,turns=10000,init=sample_species,mutate=mutate,fitness=fitness,pop=None): if pop is None: pop = [(lambda spec:(spec,fitness(spec)))(sample_species()) for _ in trange(N)] hist = [] for turn in xrange(turns): fits = [f for (s,f) in pop] #print fits birth_idx = inverse_cdf_sample(range(N),fits,normalized=False) death_idx = random.randrange(N) #print birth_idx,death_idx mother,f = pop[birth_idx] daughter = mutate(mother) #print "mutated" pop[death_idx] = (daughter,fitness(daughter)) mean_fits = mean(fits) hist.append((f,mean_fits)) if turn % 10 == 0: mean_dna_ic = mean([motif_ic(sites,correct=False) for ((sites,eps),_) in pop]) mean_rec_h = mean([h_np(boltzmann(eps)) for ((dna,eps),_) in pop]) print turn,"sel_fit:",f,"mean_fit:",mean_fits,"mean_dna_ic:",mean_dna_ic,"mean_rec_h:",mean_rec_h return pop
def moran_process(N=1000,turns=10000,mu=10**-3,ep_sigma=10**-3): ringer = (np.array([1]+[0]*(K-1)),sample_eps()) # pop = [(lambda spec:(spec,fitness(spec)))(sample_species()) # for _ in xrange(N)] pop = [(ringer,fitness(ringer)) for _ in xrange(N)] hist = [] for turn in xrange(turns): fits = [f for (s,f) in pop] #print fits birth_idx = inverse_cdf_sample(range(N),fits,normalized=False) death_idx = random.randrange(N) #print birth_idx,death_idx mother,f = pop[birth_idx] daughter = mutate(mother,mu,ep_sigma) #print "mutated" pop[death_idx] = (daughter,fitness(daughter)) mean_fits = mean(fits) hist.append((f,mean_fits)) if turn % 100 == 0: mean_dna_h = mean([h_np(dna) for ((dna,eps),f) in pop]) mean_rec_h = mean([h_np(boltzmann(eps)) for ((dna,eps),f) in pop]) print turn,"mean_fitness:",f,mean_fits,mean_dna_h,mean_rec_h return hist
def moran_process(mean_rec_muts,mean_site_muts,N=1000,turns=10000, init=make_ringer2,mutate=mutate,fitness=fitness,pop=None): site_mu = mean_site_muts/float(n*L) bd_mu = mean_rec_muts/float(L) if pop is None: pop = [(lambda spec:(spec,fitness(spec)))(init()) for _ in trange(N)] hist = [] for turn in xrange(turns): fits = [f for (s,f) in pop] birth_idx = inverse_cdf_sample(range(N),fits,normalized=False) death_idx = random.randrange(N) #print birth_idx,death_idx mother,f = pop[birth_idx] daughter = mutate(mother,site_mu,bd_mu) #print "mutated" pop[death_idx] = (daughter,fitness(daughter)) mean_fits = mean(fits) hist.append((f,mean_fits)) if turn % 1000 == 0: mean_dna_ic = mean([motif_ic(sites,correct=False) for ((bd,sites),_) in pop]) print turn,"sel_fit:",f,"mean_fit:",mean_fits,"mean_dna_ic:",mean_dna_ic return pop,hist
def update(self, final_time): # Compute propensities propensities = [propensity(self.state, self.time) for propensity in self.propensities] self.logging(propensities) p = sum(propensities) # rate of sum of random variables # determine time of next reaction if p == 0: raise Exception("No possible reactions") dt = rexp(p) # optimize next line later # determine which reaction new_time = self.time + dt if new_time < final_time: v = inverse_cdf_sample(self.stoich_vectors, normalize(propensities)) # update state vector self.state = zipWith(lambda x, y: x + y, self.state, v) self.time = new_time self.logging(str(self.time) + " " + str(self.state)) self.history.append((self.time, self.state)) # print self.state self.reactions_performed += 1 else: self.finished_run = True
def sample_site_from_copies(sigma,Ne,L,copies,ps=None): if ps is None: ps = ps_from_copies(sigma, Ne, L, copies) k = inverse_cdf_sample(range(L+1), ps,normalized=False) return "".join(permute(["A" for _ in range(L-k)] + [random.choice("CGT") for _ in range(k)]))
def sample_site(sigma,mu,Ne,L): phats = [phat(k,sigma,mu,Ne,L) for k in range(L+1)] # Z = sum(phats) # ps = [ph/Z for ph in phats] k = inverse_cdf_sample(range(L+1), phats,normalized=False) return "".join(permute(["A" for _ in range(L-k)] + [random.choice("CGT") for _ in range(k)]))
def rpower_law(alpha=2, M=1000): return inverse_cdf_sample(range(1, M + 1), [1.0 / (i**alpha) for i in range(1, M + 1)], normalized=False)
def sample_site_vb(matrix, mu, Ne): nu = Ne - 1 alpha = nu*exp(-mu) log_new_mat = [([(-(1+alpha*(exp(ep)))) for ep in row]) for row in matrix] return "".join(inverse_cdf_sample("ACGT",ps) for ps in new_mat)
def rQ(xs): """MFA proposal""" return [inverse_cdf_sample(range(K), boltzmann(mf_h)) for mf_h in mf_hs]
def sample_from_matrix(matrix, lamb): return "".join([inverse_cdf_sample("ACGT", [exp(-lamb*ep) for ep in row], normalized=False) for row in matrix])
def sample_ps(ps, N): """Return a sample from the multinomial distribution given by ps""" ks = range(N) return [inverse_cdf_sample(ks, ps) for i in xrange(N)]
def sample_site(): return "".join(inverse_cdf_sample("ACGT",ps) for ps in pss)
def sample_col_ents(ent_dist,L,a,b): ents = ent_dist.keys() solutions = interval_subset_sum(ents,L,a,b) weights = [prod(ent_dist[s] for s in sol) for sol in solutions] return inverse_cdf_sample(solutions,weights,normalized=False)
def update_spatial(chromosome,qf,koffs,verbose=False): """Do a single iteration of SSA with sliding reactions included. Given: chromosome: binary vector describing occupation state of ith site. Each site can be unbound (0), bound non-specifically(1) or bound specifically(2). Non-specific binding has a free energy of -7 kbt, sequence-specific binding is as usual. A TF bound non-specifically can slide 1 bp left or right with no change in free energy. qf: number of free copies in cytosol koffs: vector of off-rates Return: updated_chromosome updated_qf time at which transition chromosome -> updated_chromosome occurs """ # Determine which reactions can occur # free copies can bind non-specifically at rate ep_ns # non-specifically bound copies can transition to specific binding or slide left or right # specifically bound copies can transition to non-specific binding ep_ns = -7 k_ns = exp(-beta*ep_ns) k1 = 1 # rate for reactions that happen on default simulation timescale G = len(chromosome) reactions = [(i,'N',qf*k_ns) for i in xrange(G)] for i,c in enumerate(chromosome): # if c == 0 and qf > 0: # reactions.append((i,'N',qf*k_ns)) if c == 1: # if bound non-specifically # tf can bind specifically reactions.append((i,'S',k1)) # tf can fall off reactions.append((i,'F',k1)) # tf can slide if chromosome[(i-1)%G] == 0: reactions.append((i,'L',k1)) if chromosome[(i+1)%G] == 0: reactions.append((i,'R',k1)) elif c == 2: reactions.append((i,'U',koffs[i])) ### rates = [reaction[2] for reaction in reactions] sum_rate = sum(rates) chr_idx,rx_type,rate = inverse_cdf_sample(reactions,normalize(rates)) time = random.expovariate(sum_rate) if verbose: print chr_idx,rx_type,rate,time updated_chromosome = chromosome[:] if rx_type == 'N': # tf binds non-specifically updated_chromosome[chr_idx] = 1 updated_qf = qf - 1 elif rx_type == 'S': # tf transitions to specific binding updated_chromosome[chr_idx] = 2 updated_qf = qf elif rx_type == 'F': updated_chromosome[chr_idx] = 0 updated_qf = qf + 1 elif rx_type == 'L': updated_chromosome[chr_idx] = 0 updated_chromosome[(chr_idx-1)%G] = 1 updated_qf = qf elif rx_type == 'R': updated_chromosome[chr_idx] = 0 updated_chromosome[(chr_idx+1)%G] = 1 updated_qf = qf elif rx_type == 'U': updated_chromosome[chr_idx] = 1 updated_qf = qf else: print "Didn't recognize reaction type:",rx_type assert False return updated_chromosome,updated_qf,time
def rcol(beta, N): ps = rvector(beta, K=4) col = [inverse_cdf_sample("ACGT", ps) for _ in xrange(N)] return col
def rpower_law(alpha=2,M=1000): return inverse_cdf_sample(range(1,M+1),[1.0/(i**alpha) for i in range(1,M+1)],normalized=False)
def rcol(beta, N): ps = rvector(beta, K=4) col = [inverse_cdf_sample("ACGT",ps) for _ in xrange(N)] return col
def rQ(): return "".join([inverse_cdf_sample("ACGT",qs) for qs in qss])
def rQ(): return "".join([inverse_cdf_sample("ACGT",ps,normalized=False) for ps in pss])