def avg_ic_from_theta(theta, N, L, trials=3):
    sigma, mu, Ne = theta
    matrices = [sample_matrix(L, sigma) for i in xrange(trials)]
    motifs = [sample_motif_cftp(matrix, mu, Ne, N) for matrix in matrices]
    ics = map(motif_ic,motifs)
    mean_ic = mean(ics)
    return mean_ic
def sample_motifs_evo_ic(motif, iterations=1000, verbose=False, theta=None):
    N = len(motif)
    L = len(motif[0])
    des_ic = motif_ic(motif)
    chain = evo_ic_sample_motif2(N, L, des_ic, iterations=iterations, verbose=False, theta=theta)
    motifs = [sample_motif_cftp(sample_matrix(L, sigma), mu, Ne, N) for (sigma, mu, Ne) in tqdm(chain)]
    return chain, motifs
def degradation_experiment():
    """Determine whether linear or pairwise models are more resistant to degradation"""
    L = 10
    N = 50
    Ne = 5
    nu = Ne - 1
    sigma = 1
    mu = -10
    matrix = sample_matrix(L, sigma)
    code = sample_code(L, sigma)
    li_motif = sample_motif_cftp(matrix, mu, Ne, N)
    pw_motif = sample_pw_motif_mh(code, N, Ne, mu, iterations=100000)[-1]

    def li_log_fitness(motif):
        eps = [score_seq(matrix, site) for site in motif]
        return sum(-nu * log((1 + exp(ep - mu))) for ep in eps)

    def pw_log_fitness(motif):
        eps = map(lambda x: -log(x), pw_prob_sites(motif, code))
        return sum(log(1 / (1 + exp(ep - mu))**nu) for ep in eps)

    li_base_fit = li_log_fitness(li_motif)
    li_mut_fits = [li_log_fitness(mutate_motif(li_motif)) for i in range(100)]
    pw_base_fit = pw_log_fitness(pw_motif)
    pw_mut_fits = [pw_log_fitness(mutate_motif(pw_motif)) for i in range(100)]
Beispiel #4
0
def avg_ic_from_theta(theta, N, L, trials=3):
    sigma, mu, Ne = theta
    matrices = [sample_matrix(L, sigma) for i in xrange(trials)]
    motifs = [sample_motif_cftp(matrix, mu, Ne, N) for matrix in matrices]
    ics = map(motif_ic, motifs)
    mean_ic = mean(ics)
    return mean_ic
 def f(theta):
     sigma, mu, Ne = theta
     matrices = [sample_matrix(L, sigma) for i in xrange(trials)]
     motifs = [sample_motif_cftp(matrix, mu, Ne, N) for matrix in matrices]
     ics = map(motif_ic,motifs)
     ic = mean(ics)
     print "sigma, mu, Ne:", sigma, mu, Ne
     print "mean IC:", ic
     return exp(-beta*(ic - des_ic)**2)
Beispiel #6
0
 def sample_mis(L, sigma, copy_factor, Ne, N, trials=100):
     mis = []
     for _ in trange(trials):
         matrix = sample_matrix(L, sigma)
         copies = copy_factor * N
         mu = approx_mu(matrix, copies)
         motif = sample_motif_cftp(matrix, mu, Ne, N)
         mis.append(motif_mi(motif))
     return mis
Beispiel #7
0
 def f(theta):
     sigma, mu, Ne = theta
     matrices = [sample_matrix(L, sigma) for i in xrange(trials)]
     motifs = [sample_motif_cftp(matrix, mu, Ne, N) for matrix in matrices]
     ics = map(motif_ic, motifs)
     ic = mean(ics)
     print "sigma, mu, Ne:", sigma, mu, Ne
     print "mean IC:", ic
     return exp(-beta * (ic - des_ic)**2)
def resample_from_post_chain(chain, N):
    """given chain of the form [(mat, mu, Ne)], perform reduction:
    mat -> sigma -> mat' -> motif'

    Conclusion: heavily underestimates IC.
    """
    L = len(chain[0][0])
    sigmas = [sigma_from_matrix(mat) for (mat, mu, Ne) in chain]
    matrices = [sample_matrix(L, sigma) for sigma in sigmas]
    motifs = [sample_motif_cftp(matrix, mu, Ne, N) for matrix, (_, mu, Ne) in tqdm(zip(matrices, chain))]
    return motifs
Beispiel #9
0
def resample_from_post_chain(chain, N):
    """given chain of the form [(mat, mu, Ne)], perform reduction:
    mat -> sigma -> mat' -> motif'

    Conclusion: heavily underestimates IC.
    """
    L = len(chain[0][0])
    sigmas = [sigma_from_matrix(mat) for (mat, mu, Ne) in chain]
    matrices = [sample_matrix(L, sigma) for sigma in sigmas]
    motifs = [
        sample_motif_cftp(matrix, mu, Ne, N)
        for matrix, (_, mu, Ne) in tqdm(zip(matrices, chain))
    ]
    return motifs
Beispiel #10
0
def test_predict_ic(trials=100):
    pred_ics = []
    obs_ics = []
    for trial in trange(trials):
        sigma = random.random() * 5 + 0.1
        L = random.randrange(5, 15)
        matrix = sample_matrix(L, sigma)
        mu = random.random() * (-20)
        Ne = random.random() * 5 + 1
        pred_ic = predict_ic(matrix, mu, Ne)
        obs_ic = motif_ic(sample_motif_cftp(matrix, mu, Ne, n=100))
        pred_ics.append(pred_ic)
        obs_ics.append(obs_ic)
    r, p = scatter(pred_ics, obs_ics)
    print r, p
Beispiel #11
0
def sample_motifs_evo_ic(motif, iterations=1000, verbose=False, theta=None):
    N = len(motif)
    L = len(motif[0])
    des_ic = motif_ic(motif)
    chain = evo_ic_sample_motif2(N,
                                 L,
                                 des_ic,
                                 iterations=iterations,
                                 verbose=False,
                                 theta=theta)
    motifs = [
        sample_motif_cftp(sample_matrix(L, sigma), mu, Ne, N)
        for (sigma, mu, Ne) in tqdm(chain)
    ]
    return chain, motifs
def test_predict_ic(trials=100):
    pred_ics = []
    obs_ics = []
    for trial in trange(trials):
        sigma = random.random() * 5 + 0.1
        L = random.randrange(5, 15)
        matrix = sample_matrix(L, sigma)
        mu = random.random() * (-20)
        Ne = random.random() * 5 + 1
        pred_ic = predict_ic(matrix, mu, Ne)
        obs_ic = motif_ic(sample_motif_cftp(matrix, mu, Ne, n=100))
        pred_ics.append(pred_ic)
        obs_ics.append(obs_ic)
    r, p = scatter(pred_ics, obs_ics)
    print r, p
Beispiel #13
0
def posterior_chain(motif,
                    iterations=50000,
                    theta0=None,
                    sigma=1,
                    num_spoof_sites='N',
                    verbose=False):
    """do MH with doubly intractable MCMC one-point estimator"""
    L = len(motif[0])
    N = len(motif)
    if num_spoof_sites == 'N':
        num_spoof_sites = N  # should this be N or 1?
    if theta0 is None:
        matrix0 = [[0, 0, 0, 0] for i in range(L)]
        mu0 = -10
        Ne0 = 3
        theta = (matrix0, mu0, Ne0)
    else:
        theta = theta0
    log_f_theta = log_fhat(theta, motif)
    chain = []
    acceptances = 0
    for it in trange(iterations):
        theta_p = prop2(theta, sigma)
        log_f_theta_p = log_fhat(theta_p, motif)
        matrix_p, mu_p, Ne_p = theta_p
        xp = sample_motif_cftp(matrix_p, mu_p, Ne_p, num_spoof_sites)
        log_Z = log_fhat(theta, xp)
        log_Z_p = log_fhat(theta_p, xp)
        log_ar = log_f_theta_p - log_f_theta + N / num_spoof_sites * (log_Z -
                                                                      log_Z_p)
        if log(random.random()) < log_ar:
            theta = theta_p
            log_f_theta = log_f_theta_p
            log_Z = log_Z_p
            acceptances += 1
        chain.append(theta)
        if verbose:
            print "log(f), log_Z:", log_f_theta, log_Z
            print "mean_ep:", mean(score_seq(theta[0], site) for site in motif)
            print "mean_occ:", mean(occs(theta, motif))
            print "mu, Ne:", theta[1], theta[2]
    print "acceptances:", acceptances / float(it + 1)
    return chain
def sanity_check(trials = 1000):
    L = 10
    matrix = [[-2,0,0,0] for i in range(L)]
    mu = -10
    Ne = 2
    nu = Ne - 1
    log_match_phats = [-nu * log(1+exp(-2*k - mu)) + log_choose(L,k) + k * log(1/4.0) + (L-k) * log(3/4.0)
                       for k in range(L+1)]
    match_ps = normalize(map(exp, log_match_phats))
    mh_motif = sample_motif_mh(matrix, mu, Ne, trials)
    mh_match_counts = Counter([site.count('A') for site in mh_motif])
    mh_match_ps = [mh_match_counts[k]/float(trials) for k in range(L+1)]
    cftp_motif = sample_motif_cftp(matrix, mu, Ne, trials)
    cftp_match_counts = Counter([site.count('A') for site in cftp_motif])
    cftp_match_ps = [cftp_match_counts[k]/float(trials) for k in range(L+1)]
    plt.plot(match_ps, label="Analytic")
    plt.plot(mh_match_ps, label="MH")
    plt.plot(cftp_match_ps, label="CFTP")
    plt.xlabel("Matches")
    plt.ylabel("Frequency")
def posterior_chain(motif, iterations=50000, theta0=None, sigma=1, num_spoof_sites='N', verbose=False):
    """do MH with doubly intractable MCMC one-point estimator"""
    L = len(motif[0])
    N = len(motif)
    if num_spoof_sites == 'N':
        num_spoof_sites = N  # should this be N or 1?
    if theta0 is None:
        matrix0 = [[0,0,0,0] for i in range(L)]
        mu0 = -10
        Ne0 = 3
        theta = (matrix0, mu0, Ne0)
    else:
        theta = theta0
    log_f_theta = log_fhat(theta, motif)
    chain = []
    acceptances = 0
    for it in trange(iterations):
        theta_p = prop2(theta, sigma)
        log_f_theta_p = log_fhat(theta_p, motif)
        matrix_p, mu_p, Ne_p = theta_p
        xp = sample_motif_cftp(matrix_p, mu_p, Ne_p, num_spoof_sites)
        log_Z = log_fhat(theta, xp)
        log_Z_p = log_fhat(theta_p, xp)
        log_ar = log_f_theta_p - log_f_theta + N/num_spoof_sites * (log_Z - log_Z_p)
        if log(random.random()) < log_ar:
            theta = theta_p
            log_f_theta = log_f_theta_p
            log_Z = log_Z_p
            acceptances += 1
        chain.append(theta)
        if verbose:
            print "log(f), log_Z:", log_f_theta, log_Z
            print "mean_ep:", mean(score_seq(theta[0],site) for site in motif)
            print "mean_occ:", mean(occs(theta, motif))
            print "mu, Ne:", theta[1], theta[2]
    print "acceptances:", acceptances/float(it+1)
    return chain
Beispiel #16
0
def sanity_check(trials=1000):
    L = 10
    matrix = [[-2, 0, 0, 0] for i in range(L)]
    mu = -10
    Ne = 2
    nu = Ne - 1
    log_match_phats = [
        -nu * log(1 + exp(-2 * k - mu)) + log_choose(L, k) + k * log(1 / 4.0) +
        (L - k) * log(3 / 4.0) for k in range(L + 1)
    ]
    match_ps = normalize(map(exp, log_match_phats))
    mh_motif = sample_motif_mh(matrix, mu, Ne, trials)
    mh_match_counts = Counter([site.count('A') for site in mh_motif])
    mh_match_ps = [mh_match_counts[k] / float(trials) for k in range(L + 1)]
    cftp_motif = sample_motif_cftp(matrix, mu, Ne, trials)
    cftp_match_counts = Counter([site.count('A') for site in cftp_motif])
    cftp_match_ps = [
        cftp_match_counts[k] / float(trials) for k in range(L + 1)
    ]
    plt.plot(match_ps, label="Analytic")
    plt.plot(mh_match_ps, label="MH")
    plt.plot(cftp_match_ps, label="CFTP")
    plt.xlabel("Matches")
    plt.ylabel("Frequency")
Beispiel #17
0
 def sample_motif((sigma, cf, Ne)):
     matrix = sample_matrix(L, sigma)
     mu = approx_mu(matrix, cf * N)
     return sample_motif_cftp(matrix, mu, Ne, N)
Beispiel #18
0
 def f(theta):
     matrix, mu, Ne = theta
     motif = sample_motif_cftp(matrix, mu, Ne, N)
     return exp(-beta * (motif_ic(motif) - des_ic)**2)
Beispiel #19
0
def eps_from_theta(theta, L, N=100):
    matrix = sample_matrix(L, sigma)
    motif = sample_motif_cftp(matrix, mu, Ne, N)
    eps = [score_seq(matrix, site) for site in motif]
    return eps
Beispiel #20
0
            theta = theta_p
            log_f_theta = log_f_theta_p
            log_Z = log_Z_p
            acceptances += 1
        chain.append(theta)
        if verbose:
            print "log(f), log_Z:", log_f_theta, log_Z
            print "mean_ep:", mean(score_seq(theta[0], site) for site in motif)
            print "mean_occ:", mean(occs(theta, motif))
            print "mu, Ne:", theta[1], theta[2]
    print "acceptances:", acceptances / float(it + 1)
    return chain


def motif_from_theta((matrix, mu, Ne), N):
    return sample_motif_cftp(matrix, mu, Ne, N)


def logmod(x):
    return sign(x) * log(abs(x) + 1)


def interpret_chain(chain, motif, filename=None):
    N = len(motif)
    log_fhats = [log_fhat(theta, motif) for theta in chain]
    log_Zs = [log_ZM_hack(theta, N) for theta in chain]
    log_ps = [lf - log_Z for (lf, log_Z) in zip(log_fhats, log_Zs)]
    plt.plot(
        map(logmod,
            [mean(score_seq(x[0], site) for site in motif) for x in chain]),
        label="Mean Site Energy (kBT)")
def eps_from_theta(theta, L, N=100):
    matrix = sample_matrix(L, sigma)
    motif = sample_motif_cftp(matrix, mu, Ne, N)
    eps = [score_seq(matrix, site) for site in motif]
    return eps
def observe_ic_from_theta(theta, L, num_matrices=3):
    sigma, mu, Ne = theta
    return mean((motif_ic(sample_motif_cftp(sample_matrix(L, sigma), mu, Ne, n=100))
                         for _ in range(num_matrices)))
 def f(theta):
     matrix, mu, Ne = theta
     motif = sample_motif_cftp(matrix, mu, Ne, N)
     return exp(-beta*(motif_ic(motif) - des_ic)**2)
Beispiel #24
0
def observe_ic_from_theta(theta, L, num_matrices=3):
    sigma, mu, Ne = theta
    return mean(
        (motif_ic(sample_motif_cftp(sample_matrix(L, sigma), mu, Ne, n=100))
         for _ in range(num_matrices)))
        if log(random.random()) < log_ar:
            theta = theta_p
            log_f_theta = log_f_theta_p
            log_Z = log_Z_p
            acceptances += 1
        chain.append(theta)
        if verbose:
            print "log(f), log_Z:", log_f_theta, log_Z
            print "mean_ep:", mean(score_seq(theta[0],site) for site in motif)
            print "mean_occ:", mean(occs(theta, motif))
            print "mu, Ne:", theta[1], theta[2]
    print "acceptances:", acceptances/float(it+1)
    return chain

def motif_from_theta((matrix, mu, Ne), N):
    return sample_motif_cftp(matrix, mu, Ne, N)

def logmod(x):
    return sign(x)*log(abs(x) + 1)
    
def interpret_chain(chain, motif, filename=None):
    N = len(motif)
    log_fhats = [log_fhat(theta,motif) for theta in chain]
    log_Zs = [log_ZM_hack(theta,N) for theta in chain]
    log_ps = [lf - log_Z for (lf, log_Z) in zip(log_fhats, log_Zs)]
    plt.plot(map(logmod, [mean(score_seq(x[0],site) for site in motif) for x in chain]),
             label="Mean Site Energy (kBT)")
    plt.plot(map(logmod, [x[1] for x in chain]),label="$\mu$ (kBT)")
    plt.plot(map(logmod, [x[2] for x in chain]),label="$Ne$")
    plt.plot(map(logmod, log_fhats),label="log fhat")
    plt.plot(map(logmod, log_Zs),label="log_ZM")