def uniform_motifs_accept_reject(n, L, desired_ic, num_motifs, epsilon=0.1, beta=None, verbose=False): if beta is None: correction_per_col = 3 / (2 * log(2) * n) desired_ic_for_beta = desired_ic + L * correction_per_col beta = find_beta_for_mean_motif_ic(n, L, desired_ic_for_beta, verbose=verbose) ps = count_ps_from_beta(n, beta) count_sampler = inverse_cdf_sampler(enumerate_counts(n), ps) return [ uniform_motif_accept_reject(n, L, desired_ic, epsilon=epsilon, beta=beta, ps=ps, count_sampler=count_sampler, verbose=verbose) for i in trange(num_motifs) ]
def uniform_motif_accept_reject(n, L, desired_ic, epsilon=0.1, beta=None, ps=None, count_sampler=None, verbose=False): print "uniform motif accept reject:", n, L, desired_ic, beta correction_per_col = 3 / (2 * log(2) * n) desired_ic_for_beta = desired_ic + L * correction_per_col if desired_ic_for_beta == 2 * L: # if we reach the upper limit, things break down cols = [sample_col_from_count((0, 0, 0, n)) for _ in range(L)] motif_p = map(lambda site: "".join(site), transpose(cols)) return motif_p if beta is None: beta = find_beta_for_mean_motif_ic(n, L, desired_ic_for_beta) if verbose: print "beta:", beta if ps is None: ps = count_ps_from_beta(n, beta) if count_sampler is None: count_sampler = inverse_cdf_sampler(enumerate_counts(n), ps) def rQ_raw(): counts = [count_sampler() for i in range(L)] cols = [sample_col_from_count(count) for count in counts] motif_p = map(lambda site: "".join(site), transpose(cols)) return motif_p def rQ(): return sample_until(lambda M: inrange(M, desired_ic, epsilon), rQ_raw, 1, progress_bar=False)[0] def dQhat(motif): return exp(beta * motif_ic(motif)) Imin = desired_ic - epsilon Imax = desired_ic + epsilon log_M = -beta * Imin if verbose: print "Imin, Imax, log_M:", Imin, Imax, log_M def dQ(motif): return exp(beta * motif_ic(motif) + log_M) def AR(motif): return 1.0 / dQ(motif) #M = exp(-beta*(desired_ic - epsilon)) # which ic? +/- correction trials = 0 while True: trials += 1 motif = rQ() r = random.random() if r < AR(motif): return motif if verbose and trials % 100 == 0: print trials, AR(motif)
def uniform_motif_with_ic_imh_ref(n, L, desired_ic, epsilon=0.1, iterations=None, verbose=False, num_chains=8): correction_per_col = 3 / (2 * log(2) * n) desired_ic_for_beta = desired_ic + L * correction_per_col beta = find_beta_for_mean_motif_ic(n, L, desired_ic_for_beta) ps = count_ps_from_beta(n, beta) count_sampler = inverse_cdf_sampler(enumerate_counts(n), ps) def Q(motif): counts = [count_sampler() for i in range(L)] cols = [sample_col_from_count(count) for count in counts] motif_p = map(lambda site: "".join(site), transpose(cols)) return motif_p def log_dQ(motif_p, motif): return (beta * motif_ic(motif_p)) def log_f(motif): in_range = abs(motif_ic(motif) - desired_ic) < epsilon return 0 if in_range else -10.0**100 if iterations: x0 = sample_until(lambda x: log_f(x) > -1, lambda: Q(None), 1)[0] chain = mh(log_f, proposal=Q, dprop=log_dQ, x0=x0, iterations=iterations, use_log=True, verbose=False) return chain else: #use gelman rubin criterion x0s = sample_until(lambda x: log_f(x) > -1, lambda: Q(None), num_chains) iterations = 100 converged = False chains = [[] for _ in range(num_chains)] while not converged: for chain, x0 in zip(chains, x0s): chain.extend( mh(log_f, proposal=Q, dprop=log_dQ, x0=x0, iterations=iterations, use_log=True, verbose=False)) ic_chains = mmap(motif_ic, chains) R_hat, neff = gelman_rubin(ic_chains) if R_hat < 1.1: return chains else: x0s = [chain[-1] for chain in chains] iterations *= 2
def uniform_motif_with_ic_imh(n, L, desired_ic, epsilon=0.1, iterations=None, verbose=False, beta=None, num_chains=8): if beta is None: correction_per_col = 3 / (2 * log(2) * n) desired_ic_for_beta = desired_ic + L * correction_per_col beta = find_beta_for_mean_motif_ic(n, L, desired_ic_for_beta) ps = count_ps_from_beta(n, beta) count_sampler = inverse_cdf_sampler(enumerate_counts(n), ps) def Q(motif): counts = [count_sampler() for i in range(L)] cols = [sample_col_from_count(count) for count in counts] motif_p = map(lambda site: "".join(site), transpose(cols)) return motif_p def log_dQ(motif_p, motif): return (beta * motif_ic(motif_p)) def log_f(motif): in_range = abs(motif_ic(motif) - desired_ic) < epsilon return 0 if in_range else -10.0**100 x0 = sample_until(lambda x: log_f(x) > -1, lambda: Q(None), 1)[0] # first, determine probability of landing in range ar = 0 iterations = 100 while ar == 0: ar = mh(log_f, proposal=Q, dprop=log_dQ, x0=x0, iterations=iterations, use_log=True, verbose=False, return_ar=True) iterations *= 2 iterations = int(1.0 / ar * 10) chain = mh(log_f, proposal=Q, dprop=log_dQ, x0=x0, iterations=iterations, use_log=True, verbose=False) return chain
def uniform_motif_imh_tv(n, L, desired_ic, beta=None, epsilon=None, tv=0.01): """run uniform imh to within total variation bound tv""" correction_per_col = 3 / (2 * log(2) * n) desired_ic_for_beta = desired_ic + L * correction_per_col if beta == None: beta = find_beta_for_mean_motif_ic(n, L, desired_ic_for_beta) if epsilon == None: epsilon = 1.0 / (2 * beta) print "maximally efficient epsilon:", epsilon ps = count_ps_from_beta(n, beta) count_sampler = inverse_cdf_sampler(enumerate_counts(n), ps) def Qp(motif): counts = [count_sampler() for i in range(L)] cols = [sample_col_from_count(count) for count in counts] motif_p = map(lambda site: "".join(site), transpose(cols)) return motif_p def Q(motif): return sample_until(lambda m: abs(motif_ic(m) - desired_ic) < epsilon, lambda: Qp(None), 1)[0] def log_dQ(motif_p, motif): return (beta * motif_ic(motif_p)) def log_f(motif): in_range = abs(motif_ic(motif) - desired_ic) < epsilon return 0 if in_range else -10.0**100 alpha = exp(-2 * beta * epsilon) iterations = int(ceil(log(tv) / log(1 - alpha))) print "iterations:", iterations x0 = sample_until(lambda x: log_f(x) > -1, lambda: Q(None), 1)[0] # first, determine probability of landing in range chain = mh(log_f, proposal=Q, dprop=log_dQ, x0=x0, iterations=iterations, use_log=True, verbose=False) return chain