def update_spatial_xs(q,ns_bound,s_bound,koffs,verbose=False): """ Given: q: total copy number ns_bound: list of ns_bounde copies s_bound: list of specifically bound copies koffs: chromosomal off-rates return: updated xs """ ep_ns = -7 k_ns = exp(-beta*ep_ns) k1 = 1 # rate for reactions that happen on default simulation timescale G = len(koffs) qf = q - (len(ns_bound) + len(s_bound)) print "qf:",qf reactions = [(i,'N',qf*k_ns) for i in xrange(G)] for i in ns_bound: # tf can bind specifically reactions.append((i,'S',k1)) # tf can fall off reactions.append((i,'F',k1)) # tf can slide if not (i-1)% G in ns_bound + s_bound: reactions.append((i,'L',k1)) if not (i+1)% G in ns_bound + s_bound: reactions.append((i,'R',k1)) for i in s_bound: reactions.append((i,'U',koffs[i])) ### rates = [reaction[2] for reaction in reactions] sum_rate = sum(rates) chr_idx,rx_type,rate = inverse_cdf_sampler(reactions,normalize(rates))() time = random.expovariate(sum_rate) updated_ns_bound = ns_bound[:] updated_s_bound = s_bound[:] if verbose: print chr_idx,rx_type,rate,time,"ns:",len(ns_bound),"s:",len(s_bound) if rx_type == 'N': # tf binds non-specifically updated_ns_bound.append(chr_idx) elif rx_type == 'S': # tf transitions to specific binding updated_ns_bound.remove(chr_idx) updated_s_bound.append(chr_idx) elif rx_type == 'F': updated_ns_bound.remove(chr_idx) elif rx_type == 'L': updated_ns_bound.remove(chr_idx) updated_ns_bound.append((chr_idx-1)%G) elif rx_type == 'R': updated_ns_bound.remove(chr_idx) updated_ns_bound.append((chr_idx+1)%G) elif rx_type == 'U': updated_s_bound.remove(chr_idx) updated_ns_bound.append(chr_idx) else: print "Didn't recognize reaction type:",rx_type assert False return updated_ns_bound,updated_s_bound,time
def gibbs_sample_fast(ks,xs,iterations,cur_ks=None,cur_Z=None): """ ks: a G-length vector of rate constants xs: a q-length vector such that x_i contains the position of the ith TF ([0 to G-1]), or G if off-chromosome""" xs_new = xs[:] cur_ks = ks[:] cur_Z = float(sum(cur_ks)) G = len(ks) q = len(xs) for x in xs_new: if x < G: cur_ks[x] = 0 cur_Z -= ks[x] for iteration in xrange(iterations): #print iteration for j in range(q): cur_pos = xs_new[j] if cur_pos < G: cur_ks[cur_pos] = ks[cur_pos] cur_Z += ks[cur_pos] sampler = inverse_cdf_sampler([cur_k/cur_Z for cur_k in cur_ks]) new_pos = sampler() xs_new[j] = new_pos if new_pos < G: cur_ks[new_pos] = 0 cur_Z -= ks[new_pos] return xs_new
def make_sampler(ks): """Return an efficient bisection sampler for inverse cdf sampling from ks. Usage: >>> sampler = make_sampler(ks) >>> x = sampler() """ Z = float(sum(ks)) ps = [k/Z for k in ks] return inverse_cdf_sampler(ps)
def direct_sampling_ref(ks,q): """ks is a vector of the form [k0,k1,kg], i.e. k0 = 1""" Z = float(sum(ks)) G = len(ks) ps = [k/Z for k in ks] sampler = inverse_cdf_sampler(ps) while True: ss = [sampler() for j in range(q)] counts = Counter(ss) if all(counts[i] <= 1 for i in range(1,G+1) if i > 0): return ss
def mh_simulate(iterations=50000,verbose=False,method="direct_sampling"): copy_number = 5 def logf(config): return -hamiltonian(config) def prop(config): new_config = config[:] attached_tfs = sum(config) # number currently bound to chromosome r = random.random() if r < attached_tfs/float(copy_number): # choose a tf on the chromosome pos = random.choice(positions(config)) new_config[pos] = 0 # else: choose a tf off the chromosome new_pos = random.choice(range(config_len + 1)) if new_pos < config_len: new_config[new_pos] = 1 # else tf goes off chromosome return new_config Z = float(sum(ks)) ps = [k/Z for k in ks] sampler = inverse_cdf_sampler(range(len(ks)),ps) def prop_direct(config): sample = direct_sampling(ks,copy_number,sampler=sampler) return from_positions(sample) def log_dprop_direct(config,old_config): occupancy = sum(config) poses = positions(config) return log(falling_fac(copy_number,occupancy)*product(exp(-beta*eps[i]*config[i]) for i in range(config_len))) def prop_rsa(config): sample = rsa(ks,copy_number) return from_positions(sample) def log_dprop_rsa(config,old_config): #print config _ks = ks[:] prob = 1 for i,x in enumerate(config): if x > 0: prob *= _ks[i]/sum(_ks) #print x,prob _ks[i] = 0 return log(prob) x0 = [0]*config_len if method == "direct_sampling": return mh(logf,prop_direct,x0,dprop=log_dprop_direct,verbose=verbose,use_log=True,iterations=iterations) elif method == "rsa": return mh(logf,prop_rsa,x0,dprop=log_dprop_rsa,verbose=verbose,use_log=True,iterations=iterations) else: return mh(logf,prop,x0,dprop=None,verbose=verbose,use_log=True,iterations=iterations)
def gibbs_sample(ks,xs): """ ks: a G-length vector of rate constants xs: a q-length vector such that x_i contains the position of the ith TF ([0 to G-1]), or G if off-chromosome""" xs_new = xs[:] #print "xs_new:",xs_new G = len(ks) #print "G:",G q = len(xs) #print "q:",q for j in range(q): cur_pos = xs_new[j] cur_ks = [(k if (i not in xs_new or i == cur_pos) else 0) for i,k in enumerate(ks)] + [1] cur_Z = float(sum(cur_ks)) sampler = inverse_cdf_sampler([cur_k/cur_Z for cur_k in cur_ks]) new_pos = sampler() xs_new[j] = new_pos return xs_new
def compare_alias_and_inverse_cdf(): from project_utils import inverse_cdf_sampler import time from utils import qqplot from ticktock import tic, toc G = 5000000 q = 50 num_samples = 1000000 ps = [random.random() * 50 / float(G) for i in range(G)] tic("inverse sampler") inv_sampler = inverse_cdf_sampler(ps) toc() tic("inverse sampling") inv_samples = [inv_sampler() for i in range(num_samples)] toc() tic("alias sampler") al_sampler = alias_sampler(ps) toc() tic("alias sampling") al_samples = [al_sampler() for i in range(num_samples)] toc() return inv_samples, al_samples