def uniform_motif_with_ic_imh_ref(n, L, desired_ic, epsilon=0.1, iterations=None, verbose=False, num_chains=8): correction_per_col = 3 / (2 * log(2) * n) desired_ic_for_beta = desired_ic + L * correction_per_col beta = find_beta_for_mean_motif_ic(n, L, desired_ic_for_beta) ps = count_ps_from_beta(n, beta) count_sampler = inverse_cdf_sampler(enumerate_counts(n), ps) def Q(motif): counts = [count_sampler() for i in range(L)] cols = [sample_col_from_count(count) for count in counts] motif_p = map(lambda site: "".join(site), transpose(cols)) return motif_p def log_dQ(motif_p, motif): return (beta * motif_ic(motif_p)) def log_f(motif): in_range = abs(motif_ic(motif) - desired_ic) < epsilon return 0 if in_range else -10.0**100 if iterations: x0 = sample_until(lambda x: log_f(x) > -1, lambda: Q(None), 1)[0] chain = mh(log_f, proposal=Q, dprop=log_dQ, x0=x0, iterations=iterations, use_log=True, verbose=False) return chain else: #use gelman rubin criterion x0s = sample_until(lambda x: log_f(x) > -1, lambda: Q(None), num_chains) iterations = 100 converged = False chains = [[] for _ in range(num_chains)] while not converged: for chain, x0 in zip(chains, x0s): chain.extend( mh(log_f, proposal=Q, dprop=log_dQ, x0=x0, iterations=iterations, use_log=True, verbose=False)) ic_chains = mmap(motif_ic, chains) R_hat, neff = gelman_rubin(ic_chains) if R_hat < 1.1: return chains else: x0s = [chain[-1] for chain in chains] iterations *= 2
def ror_experiment(): L = 10 n = 100 sigmas = np.linspace(0.1,10,10) alphas = np.linspace(0,1,10) for sigma in sigmas: for alpha in alphas: theta = - alpha * sigma * L matrix = sample_matrix(L,sigma) sampler = lambda : sample_motif_neglect_fg(matrix,1,Ne=2)[0] motif = sample_until(lambda site:score_seq(matrix,site) < theta,sampler,n) print sigma, alpha, total_motif_mi(motif)
def uniform_motif_with_ic_imh(n, L, desired_ic, epsilon=0.1, iterations=None, verbose=False, beta=None, num_chains=8): if beta is None: correction_per_col = 3 / (2 * log(2) * n) desired_ic_for_beta = desired_ic + L * correction_per_col beta = find_beta_for_mean_motif_ic(n, L, desired_ic_for_beta) ps = count_ps_from_beta(n, beta) count_sampler = inverse_cdf_sampler(enumerate_counts(n), ps) def Q(motif): counts = [count_sampler() for i in range(L)] cols = [sample_col_from_count(count) for count in counts] motif_p = map(lambda site: "".join(site), transpose(cols)) return motif_p def log_dQ(motif_p, motif): return (beta * motif_ic(motif_p)) def log_f(motif): in_range = abs(motif_ic(motif) - desired_ic) < epsilon return 0 if in_range else -10.0**100 x0 = sample_until(lambda x: log_f(x) > -1, lambda: Q(None), 1)[0] # first, determine probability of landing in range ar = 0 iterations = 100 while ar == 0: ar = mh(log_f, proposal=Q, dprop=log_dQ, x0=x0, iterations=iterations, use_log=True, verbose=False, return_ar=True) iterations *= 2 iterations = int(1.0 / ar * 10) chain = mh(log_f, proposal=Q, dprop=log_dQ, x0=x0, iterations=iterations, use_log=True, verbose=False) return chain
def uniform_motif_imh_tv(n, L, desired_ic, beta=None, epsilon=None, tv=0.01): """run uniform imh to within total variation bound tv""" correction_per_col = 3 / (2 * log(2) * n) desired_ic_for_beta = desired_ic + L * correction_per_col if beta == None: beta = find_beta_for_mean_motif_ic(n, L, desired_ic_for_beta) if epsilon == None: epsilon = 1.0 / (2 * beta) print "maximally efficient epsilon:", epsilon ps = count_ps_from_beta(n, beta) count_sampler = inverse_cdf_sampler(enumerate_counts(n), ps) def Qp(motif): counts = [count_sampler() for i in range(L)] cols = [sample_col_from_count(count) for count in counts] motif_p = map(lambda site: "".join(site), transpose(cols)) return motif_p def Q(motif): return sample_until(lambda m: abs(motif_ic(m) - desired_ic) < epsilon, lambda: Qp(None), 1)[0] def log_dQ(motif_p, motif): return (beta * motif_ic(motif_p)) def log_f(motif): in_range = abs(motif_ic(motif) - desired_ic) < epsilon return 0 if in_range else -10.0**100 alpha = exp(-2 * beta * epsilon) iterations = int(ceil(log(tv) / log(1 - alpha))) print "iterations:", iterations x0 = sample_until(lambda x: log_f(x) > -1, lambda: Q(None), 1)[0] # first, determine probability of landing in range chain = mh(log_f, proposal=Q, dprop=log_dQ, x0=x0, iterations=iterations, use_log=True, verbose=False) return chain
def rQ(): return sample_until(lambda M: inrange(M, desired_ic, epsilon), rQ_raw, 1, progress_bar=False)[0]
def Q(motif): return sample_until(lambda m: abs(motif_ic(m) - desired_ic) < epsilon, lambda: Qp(None), 1)[0]