Exemple #1
0
def avg_ic_from_theta(theta, N, L, trials=3):
    sigma, mu, Ne = theta
    matrices = [sample_matrix(L, sigma) for i in xrange(trials)]
    motifs = [sample_motif_cftp(matrix, mu, Ne, N) for matrix in matrices]
    ics = map(motif_ic, motifs)
    mean_ic = mean(ics)
    return mean_ic
def sample_motifs_evo_ic(motif, iterations=1000, verbose=False, theta=None):
    N = len(motif)
    L = len(motif[0])
    des_ic = motif_ic(motif)
    chain = evo_ic_sample_motif2(N, L, des_ic, iterations=iterations, verbose=False, theta=theta)
    motifs = [sample_motif_cftp(sample_matrix(L, sigma), mu, Ne, N) for (sigma, mu, Ne) in tqdm(chain)]
    return chain, motifs
Exemple #3
0
def experiment2_():
    L = 10
    sigma = 1
    code = sample_code(L, 1)
    mu = -10
    Ne = 2
    sites = [random_site(L) for i in xrange(10000)]
    apw_eps = [score(code, site) for site in sites]
    site_sigma = sd(apw_eps)
    pssm = sample_matrix(L, sqrt(site_sigma**2 / L))

    #linear_eps = [score_seq(pssm, site) for site in sites]
    def apw_phat(site):
        ep = score(code, site)
        return 1 / (1 + exp(ep - mu))**(Ne - 1)

    def linear_phat(site):
        ep = score_seq(pssm, site)
        return 1 / (1 + exp(ep - mu))**(Ne - 1)

    def sample_apw_site():
        return mh(apw_phat, proposal=mutate_site, x0=random_site(L))

    apw_chain = mh(apw_phat, proposal=mutate_site, x0=random_site(L))
    linear_chain = mh(linear_phat, proposal=mutate_site, x0=random_site(L))
    apw_fits = map(apw_phat, apw_chain)
    linear_fits = map(linear_phat, linear_chain)
    return apw_fits, linear_fits
Exemple #4
0
def experiment3(trials=10):
    mu = -10
    Ne = 5
    L = 10
    sigma = 1
    codes = [sample_code(L, sigma) for i in range(trials)]
    pssms = [sample_matrix(L, sigma) for i in range(trials)]
    sites = [random_site(L) for i in xrange(10000)]
    apw_site_sigmas = [
        sd([score(code, site) for site in sites]) for code in codes
    ]
    linear_site_sigmas = [
        sd([score_seq(pssm, site) for site in sites]) for pssm in pssms
    ]

    def apw_phat(code, site):
        ep = score(code, site)
        return 1 / (1 + exp(ep - mu))**(Ne - 1)

    def apw_occ(code, site):
        ep = score(code, site)
        return 1 / (1 + exp(ep - mu))

    def linear_phat(pssm, site):
        ep = score_seq(pssm, site)
        return 1 / (1 + exp(ep - mu))**(Ne - 1)

    def linear_occ(pssm, site):
        ep = score_seq(pssm, site)
        return 1 / (1 + exp(ep - mu))

    apw_mean_fits = [
        exp(
            mean(
                map(
                    log10,
                    mh(lambda s: apw_phat(code, s),
                       proposal=mutate_site,
                       x0=random_site(L),
                       capture_state=lambda s: apw_occ(code, s))[1:])))
        for code in tqdm(codes)
    ]
    linear_mean_fits = [
        exp(
            mean(
                map(
                    log10,
                    mh(lambda s: linear_phat(pssm, s),
                       proposal=mutate_site,
                       x0=random_site(L),
                       capture_state=lambda s: linear_occ(pssm, s))[1:])))
        for pssm in tqdm(pssms)
    ]
    plt.scatter(apw_site_sigmas, apw_mean_fits, label='apw')
    plt.scatter(linear_site_sigmas,
                linear_mean_fits,
                color='g',
                label='linear')
    plt.semilogy()
    plt.legend(loc='lower right')
def experiment3(trials=10):
    mu = -10
    Ne = 5
    L = 10
    sigma = 1
    codes = [sample_code(L, sigma) for i in range(trials)]
    pssms = [sample_matrix(L, sigma) for i in range(trials)]
    sites = [random_site(L) for i in xrange(10000)]
    apw_site_sigmas = [sd([score(code,site) for site in sites]) for code in codes]
    linear_site_sigmas = [sd([score_seq(pssm,site) for site in sites]) for pssm in pssms]
    def apw_phat(code, site):
        ep = score(code, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def apw_occ(code, site):
        ep = score(code, site)
        return 1/(1+exp(ep-mu))
    def linear_phat(pssm, site):
        ep = score_seq(pssm, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def linear_occ(pssm, site):
        ep = score_seq(pssm, site)
        return 1/(1+exp(ep-mu))
    apw_mean_fits = [exp(mean(map(log10, mh(lambda s:apw_phat(code, s), proposal=mutate_site, x0=random_site(L),
                                          capture_state = lambda s:apw_occ(code, s))[1:])))
                         for code in tqdm(codes)]
    linear_mean_fits = [exp(mean(map(log10, mh(lambda s:linear_phat(pssm, s), proposal=mutate_site, x0=random_site(L),
                                             capture_state = lambda s:linear_occ(pssm, s))[1:])))
                        for pssm in tqdm(pssms)]
    plt.scatter(apw_site_sigmas, apw_mean_fits, label='apw')
    plt.scatter(linear_site_sigmas, linear_mean_fits, color='g',label='linear')
    plt.semilogy()
    plt.legend(loc='lower right')
def avg_ic_from_theta(theta, N, L, trials=3):
    sigma, mu, Ne = theta
    matrices = [sample_matrix(L, sigma) for i in xrange(trials)]
    motifs = [sample_motif_cftp(matrix, mu, Ne, N) for matrix in matrices]
    ics = map(motif_ic,motifs)
    mean_ic = mean(ics)
    return mean_ic
def degradation_experiment():
    """Determine whether linear or pairwise models are more resistant to degradation"""
    L = 10
    N = 50
    Ne = 5
    nu = Ne - 1
    sigma = 1
    mu = -10
    matrix = sample_matrix(L, sigma)
    code = sample_code(L, sigma)
    li_motif = sample_motif_cftp(matrix, mu, Ne, N)
    pw_motif = sample_pw_motif_mh(code, N, Ne, mu, iterations=100000)[-1]

    def li_log_fitness(motif):
        eps = [score_seq(matrix, site) for site in motif]
        return sum(-nu * log((1 + exp(ep - mu))) for ep in eps)

    def pw_log_fitness(motif):
        eps = map(lambda x: -log(x), pw_prob_sites(motif, code))
        return sum(log(1 / (1 + exp(ep - mu))**nu) for ep in eps)

    li_base_fit = li_log_fitness(li_motif)
    li_mut_fits = [li_log_fitness(mutate_motif(li_motif)) for i in range(100)]
    pw_base_fit = pw_log_fitness(pw_motif)
    pw_mut_fits = [pw_log_fitness(mutate_motif(pw_motif)) for i in range(100)]
 def linear_fit(sigma, mu, Ne):
     pssm = sample_matrix(L, sigma)
     def linear_phat(site):
         ep = score_seq(pssm, site)
         return 1/(1+exp(ep-mu))**(Ne-1)
     chain = mh(lambda s:linear_phat(s), proposal=mutate_site, x0=random_site(L),
                capture_state = lambda s:linear_occ(pssm, mu, s))[25000:]
     return mean(chain)
 def f(theta):
     sigma, mu, Ne = theta
     matrices = [sample_matrix(L, sigma) for i in xrange(trials)]
     motifs = [sample_motif_cftp(matrix, mu, Ne, N) for matrix in matrices]
     ics = map(motif_ic,motifs)
     ic = mean(ics)
     print "sigma, mu, Ne:", sigma, mu, Ne
     print "mean IC:", ic
     return exp(-beta*(ic - des_ic)**2)
Exemple #10
0
 def f(theta):
     sigma, mu, Ne = theta
     matrices = [sample_matrix(L, sigma) for i in xrange(trials)]
     motifs = [sample_motif_cftp(matrix, mu, Ne, N) for matrix in matrices]
     ics = map(motif_ic, motifs)
     ic = mean(ics)
     print "sigma, mu, Ne:", sigma, mu, Ne
     print "mean IC:", ic
     return exp(-beta * (ic - des_ic)**2)
Exemple #11
0
def test_log_ZS_gaussian(L, sigma=1):
    """test wrt analytic, importance methods"""
    matrix = sample_matrix(L, sigma)
    mu = random.random() * 20 - 10
    Ne = random.random() * 2 + 1
    ans_analytic = log_ZS_analytic((matrix, mu, Ne))
    #ans_importance = log_ZS_importance((matrix, mu, Ne))
    ans_gaussian = log_ZS_gaussian((matrix, mu, Ne))
    #return ans_analytic, ans_importance, ans_gaussian
    return ans_analytic, ans_gaussian
def test_log_ZS_gaussian(L, sigma = 1):
    """test wrt analytic, importance methods"""
    matrix = sample_matrix(L,sigma)
    mu = random.random() * 20 - 10
    Ne = random.random() * 2 + 1
    ans_analytic = log_ZS_analytic((matrix, mu, Ne))
    #ans_importance = log_ZS_importance((matrix, mu, Ne))
    ans_gaussian = log_ZS_gaussian((matrix, mu, Ne))
    #return ans_analytic, ans_importance, ans_gaussian
    return ans_analytic, ans_gaussian
def resample_from_post_chain(chain, N):
    """given chain of the form [(mat, mu, Ne)], perform reduction:
    mat -> sigma -> mat' -> motif'

    Conclusion: heavily underestimates IC.
    """
    L = len(chain[0][0])
    sigmas = [sigma_from_matrix(mat) for (mat, mu, Ne) in chain]
    matrices = [sample_matrix(L, sigma) for sigma in sigmas]
    motifs = [sample_motif_cftp(matrix, mu, Ne, N) for matrix, (_, mu, Ne) in tqdm(zip(matrices, chain))]
    return motifs
Exemple #14
0
    def linear_fit(sigma, mu, Ne):
        pssm = sample_matrix(L, sigma)

        def linear_phat(site):
            ep = score_seq(pssm, site)
            return 1 / (1 + exp(ep - mu))**(Ne - 1)

        chain = mh(lambda s: linear_phat(s),
                   proposal=mutate_site,
                   x0=random_site(L),
                   capture_state=lambda s: linear_occ(pssm, mu, s))[25000:]
        return mean(chain)
def experiment1():
    matrices = [[sample_matrix(10, sigma) for sigma in sigmas] for Ne in Nes]
    motifses = mmap(
        lambda matrix: [
            sample_motif_cftp(matrix, approx_mu(matrix, 10 * n), Ne, n)
            for i in range(10)
        ], tqdm(matrices))

    occs = [[
        mean(
            mean_occupancy(matrix, m, approx_mu(matrix, 10 * n))
            for m in motif) for (matrix, motif) in zip(matrix_row, motif_row)
    ] for (matrix_row, motif_row) in zip(matrices, motifses)]
Exemple #16
0
def resample_from_post_chain(chain, N):
    """given chain of the form [(mat, mu, Ne)], perform reduction:
    mat -> sigma -> mat' -> motif'

    Conclusion: heavily underestimates IC.
    """
    L = len(chain[0][0])
    sigmas = [sigma_from_matrix(mat) for (mat, mu, Ne) in chain]
    matrices = [sample_matrix(L, sigma) for sigma in sigmas]
    motifs = [
        sample_motif_cftp(matrix, mu, Ne, N)
        for matrix, (_, mu, Ne) in tqdm(zip(matrices, chain))
    ]
    return motifs
def test_predict_ic(trials=100):
    pred_ics = []
    obs_ics = []
    for trial in trange(trials):
        sigma = random.random() * 5 + 0.1
        L = random.randrange(5, 15)
        matrix = sample_matrix(L, sigma)
        mu = random.random() * (-20)
        Ne = random.random() * 5 + 1
        pred_ic = predict_ic(matrix, mu, Ne)
        obs_ic = motif_ic(sample_motif_cftp(matrix, mu, Ne, n=100))
        pred_ics.append(pred_ic)
        obs_ics.append(obs_ic)
    r, p = scatter(pred_ics, obs_ics)
    print r, p
Exemple #18
0
def sample_motifs_evo_ic(motif, iterations=1000, verbose=False, theta=None):
    N = len(motif)
    L = len(motif[0])
    des_ic = motif_ic(motif)
    chain = evo_ic_sample_motif2(N,
                                 L,
                                 des_ic,
                                 iterations=iterations,
                                 verbose=False,
                                 theta=theta)
    motifs = [
        sample_motif_cftp(sample_matrix(L, sigma), mu, Ne, N)
        for (sigma, mu, Ne) in tqdm(chain)
    ]
    return chain, motifs
def predict_ic_from_theta(theta, L):
    sigma, mu, Ne = theta
    nu = Ne - 1
    ep_star = mu - log(Ne - 1)
    matrix = sample_matrix(L, sigma)
    ep_min = sum(map(min, matrix))
    des_ep = max(ep_star, ep_min + 1)
    def f(lamb):
        psfm = psfm_from_matrix(matrix, lamb)
        return sum([sum(ep*p for ep,p in zip(eps, ps)) for eps, ps in zip(matrix, psfm)]) - des_ep
    log_psfm = [[log(p) for p in ps] for ps in psfm]
    lamb = bisect_interval(f,-20,20)
    sites = ([sample_from_psfm(psfm) for i in range(100)])
    log_ps = [-nu*log(1+exp(score_seq(matrix, site) - mu)) for site in sites]
    log_qs = [score_seq(log_psfm, site) for site in sites]
Exemple #20
0
def test_predict_ic(trials=100):
    pred_ics = []
    obs_ics = []
    for trial in trange(trials):
        sigma = random.random() * 5 + 0.1
        L = random.randrange(5, 15)
        matrix = sample_matrix(L, sigma)
        mu = random.random() * (-20)
        Ne = random.random() * 5 + 1
        pred_ic = predict_ic(matrix, mu, Ne)
        obs_ic = motif_ic(sample_motif_cftp(matrix, mu, Ne, n=100))
        pred_ics.append(pred_ic)
        obs_ics.append(obs_ic)
    r, p = scatter(pred_ics, obs_ics)
    print r, p
Exemple #21
0
def L_sigma_plot(mu=-10):
    def occupancy(matrix):
        site = ringer_motif(matrix, 1)[0]
        ep = score_seq(matrix, site)
        return 1 / (1 + exp(ep - mu))

    Ls = range(1, 30)
    sigmas = np.linspace(0, 20, 100)
    occ_matrix = [[
        mean(occupancy(sample_matrix(L, sigma)) for i in range(10)) for L in Ls
    ] for sigma in tqdm(sigmas)]
    pred_matrix = [[1 / (1 + exp(-L * sigma - mu)) for L in Ls]
                   for sigma in sigmas]
    plt.subplot(1, 2, 1)
    plt.imshow(occ_matrix, interpolation='none', aspect='auto')
    plt.subplot(1, 2, 2)
    plt.imshow(pred_matrix, interpolation='none', aspect='auto')
    plt.colorbar()
Exemple #22
0
def main_experiment(samples=30, iterations=10000, delta_ic=0.1):
    results_dict = {}
    for tf_idx, tf in enumerate(tfdf.tfs):
        print "starting on:", tf
        motif = getattr(tfdf, tf)
        if motif_ic(motif) < 5:
            print "excluding", tf, "for low IC"
            continue
        bio_ic = motif_ic(motif)
        n = len(motif)
        L = len(motif[0])
        matrix = matrix_from_motif(motif)
        sigma = sigma_from_matrix(matrix)
        mu = approximate_mu(matrix, n, G)
        Ne = estimate_Ne(matrix, mu, n, bio_ic)
        spoofs = []
        ar = 0
        spoof_trials = 0.0
        while len(spoofs) < samples:
            spoof_trials += 1
            matrix, chain = sella_hirsch_mh(Ne=Ne,
                                            mu=mu,
                                            n=1,
                                            matrix=sample_matrix(L, sigma),
                                            init='ringer',
                                            iterations=iterations)
            spoof_motif = concat(
                [random.choice(chain[iterations / 2:]) for i in range(n)])
            if abs(motif_ic(spoof_motif) - bio_ic) < delta_ic:
                spoofs.append(spoof_motif)
                ar += 1
            print "spoof acceptance rate:", ar / spoof_trials, len(
                spoofs), samples, spoof_trials
        #spoofs = [chain[-1] for (spoof_matrix,chain,Ne) in [spoof_motif(motif,Ne) for i in range(samples)]]
        results_dict[tf] = {
            fname: map(eval(fname), spoofs)
            for fname in "motif_ic motif_gini total_motif_mi".split()
        }
        print "finished:", tf, "(%s/%s)" % (tf_idx, len(tfdf.tfs))
        print bio_ic, mean_ci(results_dict[tf]['motif_ic'])
    return results_dict
Exemple #23
0
def predict_ic_from_theta(theta, L):
    sigma, mu, Ne = theta
    nu = Ne - 1
    ep_star = mu - log(Ne - 1)
    matrix = sample_matrix(L, sigma)
    ep_min = sum(map(min, matrix))
    des_ep = max(ep_star, ep_min + 1)

    def f(lamb):
        psfm = psfm_from_matrix(matrix, lamb)
        return sum([
            sum(ep * p for ep, p in zip(eps, ps))
            for eps, ps in zip(matrix, psfm)
        ]) - des_ep

    log_psfm = [[log(p) for p in ps] for ps in psfm]
    lamb = bisect_interval(f, -20, 20)
    sites = ([sample_from_psfm(psfm) for i in range(100)])
    log_ps = [
        -nu * log(1 + exp(score_seq(matrix, site) - mu)) for site in sites
    ]
    log_qs = [score_seq(log_psfm, site) for site in sites]
Exemple #24
0
def sella_hirsch_mh_penalize_mu(Ne=5,
                                n=16,
                                L=16,
                                G=5 * 10**6,
                                sigma=1,
                                alpha=0.01,
                                init="random",
                                matrix=None,
                                x0=None,
                                iterations=50000,
                                p=None):
    print "p:", p
    if matrix is None:
        matrix = sample_matrix(L, sigma)
    if x0 is None:
        if init == "random":
            x0 = (random_motif(L, n), random.gauss(0, 1))
        elif init == "ringer":
            x0 = (ringer_motif(matrix, n), random.gauss(0, 1))
        elif init == "anti_ringer":
            x0 = (anti_ringer_motif(matrix, n), random.gauss(0, 1))
        else:
            x0 = init
    if p is None:
        p = 1.0 / (n * L)
    nu = Ne - 1

    def log_f((motif, mu)):
        return nu * log_fitness_penalize_mu(matrix, motif, mu, alpha)

    def prop((motif, mu)):
        motif_p = mutate_motif_p(motif,
                                 p)  # probability of mutation per basepair
        mu_p = mu + random.gauss(0, 0.1)
        return motif_p, mu_p

    chain = mh(log_f, prop, x0, use_log=True, iterations=iterations)
    return matrix, chain
Exemple #25
0
def sella_hirsch_mh(Ne=5,
                    n=16,
                    L=16,
                    sigma=1,
                    mu=0,
                    init="random",
                    matrix=None,
                    x0=None,
                    iterations=50000,
                    p=None):
    print "p:", p
    if matrix is None:
        matrix = sample_matrix(L, sigma)
    else:
        L = len(matrix)
    if x0 is None:
        if init == "random":
            x0 = random_motif(L, n)
        elif init == "ringer":
            x0 = ringer_motif(matrix, n)
        elif init == "anti_ringer":
            x0 = anti_ringer_motif(matrix, n)
        else:
            x0 = init
    if p is None:
        p = 1.0 / (n * L)
    nu = Ne - 1

    def log_f(motif):
        return nu * log_fitness(matrix, motif, mu)

    def prop(motif):
        motif_p = mutate_motif_p(motif,
                                 p)  # probability of mutation per basepair
        return motif_p

    chain = mh(log_f, prop, x0, use_log=True, iterations=iterations)
    return matrix, chain
Exemple #26
0
def spoof_motif(motif, Ne=None, iterations=10000):
    matrix = matrix_from_motif(motif)
    L = len(motif[0])
    n = len(motif)
    sigma = sigma_from_matrix(matrix)
    spoof_matrix = sample_matrix(L, sigma)
    bio_ic = motif_ic(motif)
    # this method of reading site_mu, site_sigma off of motif is slightly suspect...
    site_mu = site_mu_from_matrix(matrix_from_motif(motif))
    site_sigma = site_sigma_from_matrix(matrix_from_motif(motif))
    # now need to find mu, nu
    n = len(motif)
    assumed_copies = 10 * n
    mu = approximate_mu(matrix, assumed_copies, G)
    spoof_mu = approximate_mu(spoof_matrix, assumed_copies, G)
    if Ne is None:
        Ne = estimate_Ne(spoof_matrix, spoof_mu, n, bio_ic)
        print "chose Ne:", Ne
    spoof_matrix, chain = sella_hirsch_mh(Ne=Ne,
                                          matrix=spoof_matrix,
                                          mu=mu,
                                          n=n)
    return spoof_matrix, chain, Ne
def experiment2_():
    L = 10
    sigma = 1
    code = sample_code(L, 1)
    mu = -10
    Ne = 2
    sites = [random_site(L) for i in xrange(10000)]
    apw_eps = [score(code, site) for site in sites]
    site_sigma = sd(apw_eps)
    pssm = sample_matrix(L, sqrt(site_sigma**2/L))
    #linear_eps = [score_seq(pssm, site) for site in sites]
    def apw_phat(site):
        ep = score(code, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def linear_phat(site):
        ep = score_seq(pssm, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def sample_apw_site():
        return mh(apw_phat, proposal=mutate_site, x0=random_site(L))
    apw_chain = mh(apw_phat, proposal=mutate_site, x0=random_site(L))
    linear_chain = mh(linear_phat, proposal=mutate_site, x0=random_site(L))
    apw_fits = map(apw_phat, apw_chain)
    linear_fits = map(linear_phat, linear_chain)
    return apw_fits, linear_fits
Exemple #28
0
def eps_from_theta(theta, L, N=100):
    matrix = sample_matrix(L, sigma)
    motif = sample_motif_cftp(matrix, mu, Ne, N)
    eps = [score_seq(matrix, site) for site in motif]
    return eps
def predict_ic_from_theta(theta, L, num_matrices=3):
    sigma, mu, Ne = theta
    return mean(predict_ic(sample_matrix(L, sigma), mu, Ne, N=100) for _ in range(num_matrices))
def sample_pair(sigma, Ne):
    matrix = sample_matrix(L, sigma)
    mu = approx_mu(matrix, 10 * n)
    motif = sample_motif_cftp(matrix, mu, Ne, n)
    return matrix, motif
def sample_mean_occ(sigma, Ne):
    matrix = sample_matrix(L, sigma)
    mu = approx_mu(matrix, 10 * n)
    motif = sample_motif_cftp(matrix, mu, Ne, n)
    return mean_occupancy(matrix, motif, mu)
def observe_ic_from_theta(theta, L, num_matrices=3):
    sigma, mu, Ne = theta
    return mean((motif_ic(sample_motif_cftp(sample_matrix(L, sigma), mu, Ne, n=100))
                         for _ in range(num_matrices)))
Exemple #33
0
def observe_ic_from_theta(theta, L, num_matrices=3):
    sigma, mu, Ne = theta
    return mean(
        (motif_ic(sample_motif_cftp(sample_matrix(L, sigma), mu, Ne, n=100))
         for _ in range(num_matrices)))
Exemple #34
0
def predict_ic_from_theta(theta, L, num_matrices=3):
    sigma, mu, Ne = theta
    return mean(
        predict_ic(sample_matrix(L, sigma), mu, Ne, N=100)
        for _ in range(num_matrices))
Exemple #35
0
def random_genotype(n, L, linear_sigma, pairwise_sigma, copies):
    motif = random_motif(L, n)
    pwm = sample_matrix(L, linear_sigma)
    pairwise_weights = [[[random.gauss(0, pairwise_sigma) for i in range(4)]
                         for j in range(4)] for k in range(L - 1)]
    return motif, copies, (pwm, pairwise_weights)
def eps_from_theta(theta, L, N=100):
    matrix = sample_matrix(L, sigma)
    motif = sample_motif_cftp(matrix, mu, Ne, N)
    eps = [score_seq(matrix, site) for site in motif]
    return eps