def uniform_motif(N, L, desired_ic, epsilon=0.1, beta=None, ps=None, count_sampler=None, verbose=False): if verbose: print "uniform motif accept reject:", N, L, desired_ic, beta correction_per_col = 3 / (2 * log(2) * N) desired_ic_for_beta = desired_ic + L * correction_per_col if desired_ic_for_beta == 2 * L: # if we reach the upper limit, things break down cols = [sample_col_from_count((0, 0, 0, N)) for _ in range(L)] motif_p = map(lambda site: "".join(site), transpose(cols)) return motif_p if beta is None: beta = find_beta_for_mean_motif_ic(N, L, desired_ic_for_beta) if verbose: print "beta:", beta if ps is None: ps = count_ps_from_beta(N, beta) if count_sampler is None: count_sampler = inverse_cdf_sampler(enumerate_counts(N), ps) def rQ_raw(): counts = [count_sampler() for i in range(L)] cols = [sample_col_from_count(count) for count in counts] motif_p = map(lambda site: "".join(site), transpose(cols)) return motif_p def rQ(): return sample_until(lambda M: inrange(M, desired_ic, epsilon), rQ_raw, 1, progress_bar=False)[0] def dQhat(motif): return exp(beta * motif_ic(motif)) Imin = desired_ic - epsilon Imax = desired_ic + epsilon log_M = -beta * Imin if verbose: print "Imin, Imax, log_M:", Imin, Imax, log_M def dQ(motif): return exp(beta * motif_ic(motif) + log_M) def AR(motif): return 1.0 / dQ(motif) #M = exp(-beta*(desired_ic - epsilon)) # which ic? +/- correction trials = 0 while True: trials += 1 motif = rQ() r = random.random() if r < AR(motif): return motif if verbose and trials % 100 == 0: print trials, AR(motif)
def uniform_motifs(N, L, desired_ic, num_motifs, epsilon=0.1, beta=None, verbose=False): if beta is None: correction_per_col = 3 / (2 * log(2) * N) desired_ic_for_beta = desired_ic + L * correction_per_col beta = find_beta_for_mean_motif_ic(N, L, desired_ic_for_beta, verbose=verbose) ps = count_ps_from_beta(N, beta) count_sampler = inverse_cdf_sampler(enumerate_counts(N), ps) return [ uniform_motif(N, L, desired_ic, epsilon=epsilon, beta=beta, ps=ps, count_sampler=count_sampler, verbose=verbose) for i in trange(num_motifs) ]
def maxent_motif(N, L, desired_ic, tolerance=10**-10, beta=None, verbose=False, A=4): """sample motif from max ent distribution with mean desired_ic""" # first we adjust the desired ic upwards so that when motif_ic is # called with 1st order correction, we get the desired ic. if beta is None: if verbose: print "finding beta" correction_per_col = (A - 1) / (2 * log(2) * N) desired_ic += L * correction_per_col beta = find_beta_for_mean_motif_ic(N, L, desired_ic, tolerance=tolerance, verbose=verbose, A=A) ps = count_ps_from_beta(N, beta, A=A) count_sampler = inverse_cdf_sampler(enumerate_counts(N, A), ps) counts = [count_sampler() for i in range(L)] cols = [sample_col_from_count(count, A=A) for count in counts] return map(lambda site: "".join(site), transpose(cols))
def maxent_motifs(N, L, desired_ic, num_motifs, tolerance=10**-10, A=4, beta=None, countses=None, entropies=None, log_cols=None, verbose=False): ### computational if countses is None: logger("countses", verbose) countses = enumerate_counts(N, A, verbose=verbose) if entropies is None: logger("entropies", verbose) entropies = np.array(map(entropy_from_counts, tqdm(countses))) if log_cols is None: iterator = tqdm(countses) if verbose else countses logger("log_cols", verbose) log_cols = np.array([log_counts_to_cols(counts, A=A) for counts in iterator]) if beta is None: correction_per_col = (A-1)/(2*log(2)*N) desired_ic += L * correction_per_col beta = find_beta_for_mean_motif_ic(N,L,desired_ic,tolerance=tolerance,verbose=verbose, A=A, countses=countses, entropies=entropies, log_cols=log_cols) logger("beta: %s" % beta, verbose) logger("computing count ps from beta", verbose) ps = count_ps_from_beta(N,beta, A=A, verbose=verbose, log_cols=log_cols, entropies=entropies) count_sampler = inverse_cdf_sampler(countses, ps) def sample(): counts = [count_sampler() for i in range(L)] cols = [sample_col_from_count(count, A=A) for count in counts] return map(lambda site:"".join(site),transpose(cols)) iterator = trange if verbose else xrange if verbose: print "sampling" return [sample() for _ in iterator(num_motifs)]
def uniform_motifs(N,L,desired_ic,num_motifs,epsilon=0.1,beta=None,verbose=False): if beta is None: correction_per_col = 3/(2*log(2)*N) desired_ic_for_beta = desired_ic + L * correction_per_col beta = find_beta_for_mean_motif_ic(N,L,desired_ic_for_beta,verbose=verbose) ps = count_ps_from_beta(N,beta) count_sampler = inverse_cdf_sampler(enumerate_counts(N),ps) return [uniform_motif(N,L,desired_ic,epsilon=epsilon,beta=beta, ps=ps,count_sampler=count_sampler,verbose=verbose) for i in trange(num_motifs)]
def maxent_motifs(N, L, desired_ic, num_motifs, tolerance=10**-10, A=4, beta=None, countses=None, entropies=None, log_cols=None, verbose=False): ### computational if countses is None: logger("countses", verbose) countses = enumerate_counts(N, A, verbose=verbose) if entropies is None: logger("entropies", verbose) entropies = np.array(map(entropy_from_counts, tqdm(countses))) if log_cols is None: iterator = tqdm(countses) if verbose else countses logger("log_cols", verbose) log_cols = np.array( [log_counts_to_cols(counts, A=A) for counts in iterator]) if beta is None: correction_per_col = (A - 1) / (2 * log(2) * N) desired_ic += L * correction_per_col beta = find_beta_for_mean_motif_ic(N, L, desired_ic, tolerance=tolerance, verbose=verbose, A=A, countses=countses, entropies=entropies, log_cols=log_cols) logger("beta: %s" % beta, verbose) logger("computing count ps from beta", verbose) ps = count_ps_from_beta(N, beta, A=A, verbose=verbose, log_cols=log_cols, entropies=entropies) count_sampler = inverse_cdf_sampler(countses, ps) def sample(): counts = [count_sampler() for i in range(L)] cols = [sample_col_from_count(count, A=A) for count in counts] return map(lambda site: "".join(site), transpose(cols)) iterator = trange if verbose else xrange if verbose: print "sampling" return [sample() for _ in iterator(num_motifs)]
def maxent_motif(N,L,desired_ic,tolerance=10**-10,beta=None,verbose=False, A=4): """sample motif from max ent distribution with mean desired_ic""" # first we adjust the desired ic upwards so that when motif_ic is # called with 1st order correction, we get the desired ic. if beta is None: if verbose: print "finding beta" correction_per_col = (A-1)/(2*log(2)*N) desired_ic += L * correction_per_col beta = find_beta_for_mean_motif_ic(N,L,desired_ic,tolerance=tolerance,verbose=verbose, A=A) ps = count_ps_from_beta(N, beta, A=A) count_sampler = inverse_cdf_sampler(enumerate_counts(N, A), ps) counts = [count_sampler() for i in range(L)] cols = [sample_col_from_count(count, A=A) for count in counts] return map(lambda site:"".join(site),transpose(cols))
def uniform_motif(N,L,desired_ic,epsilon=0.1,beta=None,ps=None,count_sampler=None,verbose=False): if verbose: print "uniform motif accept reject:",N,L,desired_ic,beta correction_per_col = 3/(2*log(2)*N) desired_ic_for_beta = desired_ic + L * correction_per_col if desired_ic_for_beta == 2*L: # if we reach the upper limit, things break down cols = [sample_col_from_count((0,0,0,N)) for _ in range(L)] motif_p = map(lambda site:"".join(site),transpose(cols)) return motif_p if beta is None: beta = find_beta_for_mean_motif_ic(N,L,desired_ic_for_beta) if verbose: print "beta:",beta if ps is None: ps = count_ps_from_beta(N,beta) if count_sampler is None: count_sampler = inverse_cdf_sampler(enumerate_counts(N),ps) def rQ_raw(): counts = [count_sampler() for i in range(L)] cols = [sample_col_from_count(count) for count in counts] motif_p = map(lambda site:"".join(site),transpose(cols)) return motif_p def rQ(): return sample_until(lambda M:inrange(M,desired_ic,epsilon),rQ_raw,1,progress_bar=False)[0] def dQhat(motif): return exp(beta*motif_ic(motif)) Imin = desired_ic - epsilon Imax = desired_ic + epsilon log_M = -beta*Imin if verbose: print "Imin, Imax, log_M:",Imin, Imax, log_M def dQ(motif): return exp(beta*motif_ic(motif) + log_M) def AR(motif): return 1.0/dQ(motif) #M = exp(-beta*(desired_ic - epsilon)) # which ic? +/- correction trials = 0 while True: trials +=1 motif = rQ() r = random.random() if r < AR(motif): return motif if verbose and trials % 100 == 0: print trials, AR(motif)