def estremo_gibbs(iterations=50000,
                  verbose=False,
                  every=1000,
                  sigma=1,
                  mu=-10,
                  Ne=5):
    nu = Ne - 1
    L = 10
    N = 20
    code, motif = (sample_code(L=10,
                               sigma=1), random_motif(length=L, num_sites=N))

    def log_f((code, motif)):
        eps = map(lambda x: -log(x), pw_prob_sites(motif, code))
        return sum(nu * log(1 / (1 + exp(ep - mu))) for ep in eps)

    chain = [(code, motif[:])]
    print log_f((code, motif))
    for iteration in trange(iterations):
        for i in range(N):
            site = motif[i]
            for j in range(L):
                b = site[j]
                log_ps = []
                bps = [bp for bp in "ACGT" if not bp == b]
                for bp in bps:
                    site_p = subst(site, bp, j)
                    log_ps.append(log_f((code, [site_p])))
                log_ps = [p - min(log_ps) for p in log_ps]
                bp = inverse_cdf_sample(bps,
                                        map(exp, log_ps),
                                        normalized=False)
                motif[i] = subst(site, bp, j)
        for k in range(L - 1):
            for b1 in "ACGT":
                for b2 in "ACGT":
                    dws = [random.gauss(0, 0.1) for _ in range(10)]
                    code_ps = [[d.copy() for d in code] for _ in range(10)]
                    for code_p, dw in zip(code_ps, dws):
                        code_p[k][b1, b2] += dw
                    log_ps = [log_f((code_p, motif)) for code_p in code_ps]
                    log_ps = [p - min(log_ps) for p in log_ps]
                    code_p = inverse_cdf_sample(code_ps,
                                                map(exp, log_ps),
                                                normalized=False)
                    code = code_p
        print log_f((code, motif))
        chain.append((code, motif[:]))
    return chain

    x0 = (sample_code(L=10, sigma=1), random_motif(length=10, num_sites=20))
    chain = mh(log_f,
               prop,
               x0,
               use_log=True,
               iterations=iterations,
               verbose=verbose,
               every=every)
    return chain
Beispiel #2
0
def experiment3(trials=10):
    mu = -10
    Ne = 5
    L = 10
    sigma = 1
    codes = [sample_code(L, sigma) for i in range(trials)]
    pssms = [sample_matrix(L, sigma) for i in range(trials)]
    sites = [random_site(L) for i in xrange(10000)]
    apw_site_sigmas = [
        sd([score(code, site) for site in sites]) for code in codes
    ]
    linear_site_sigmas = [
        sd([score_seq(pssm, site) for site in sites]) for pssm in pssms
    ]

    def apw_phat(code, site):
        ep = score(code, site)
        return 1 / (1 + exp(ep - mu))**(Ne - 1)

    def apw_occ(code, site):
        ep = score(code, site)
        return 1 / (1 + exp(ep - mu))

    def linear_phat(pssm, site):
        ep = score_seq(pssm, site)
        return 1 / (1 + exp(ep - mu))**(Ne - 1)

    def linear_occ(pssm, site):
        ep = score_seq(pssm, site)
        return 1 / (1 + exp(ep - mu))

    apw_mean_fits = [
        exp(
            mean(
                map(
                    log10,
                    mh(lambda s: apw_phat(code, s),
                       proposal=mutate_site,
                       x0=random_site(L),
                       capture_state=lambda s: apw_occ(code, s))[1:])))
        for code in tqdm(codes)
    ]
    linear_mean_fits = [
        exp(
            mean(
                map(
                    log10,
                    mh(lambda s: linear_phat(pssm, s),
                       proposal=mutate_site,
                       x0=random_site(L),
                       capture_state=lambda s: linear_occ(pssm, s))[1:])))
        for pssm in tqdm(pssms)
    ]
    plt.scatter(apw_site_sigmas, apw_mean_fits, label='apw')
    plt.scatter(linear_site_sigmas,
                linear_mean_fits,
                color='g',
                label='linear')
    plt.semilogy()
    plt.legend(loc='lower right')
Beispiel #3
0
def experiment2_():
    L = 10
    sigma = 1
    code = sample_code(L, 1)
    mu = -10
    Ne = 2
    sites = [random_site(L) for i in xrange(10000)]
    apw_eps = [score(code, site) for site in sites]
    site_sigma = sd(apw_eps)
    pssm = sample_matrix(L, sqrt(site_sigma**2 / L))

    #linear_eps = [score_seq(pssm, site) for site in sites]
    def apw_phat(site):
        ep = score(code, site)
        return 1 / (1 + exp(ep - mu))**(Ne - 1)

    def linear_phat(site):
        ep = score_seq(pssm, site)
        return 1 / (1 + exp(ep - mu))**(Ne - 1)

    def sample_apw_site():
        return mh(apw_phat, proposal=mutate_site, x0=random_site(L))

    apw_chain = mh(apw_phat, proposal=mutate_site, x0=random_site(L))
    linear_chain = mh(linear_phat, proposal=mutate_site, x0=random_site(L))
    apw_fits = map(apw_phat, apw_chain)
    linear_fits = map(linear_phat, linear_chain)
    return apw_fits, linear_fits
def degradation_experiment():
    """Determine whether linear or pairwise models are more resistant to degradation"""
    L = 10
    N = 50
    Ne = 5
    nu = Ne - 1
    sigma = 1
    mu = -10
    matrix = sample_matrix(L, sigma)
    code = sample_code(L, sigma)
    li_motif = sample_motif_cftp(matrix, mu, Ne, N)
    pw_motif = sample_pw_motif_mh(code, N, Ne, mu, iterations=100000)[-1]

    def li_log_fitness(motif):
        eps = [score_seq(matrix, site) for site in motif]
        return sum(-nu * log((1 + exp(ep - mu))) for ep in eps)

    def pw_log_fitness(motif):
        eps = map(lambda x: -log(x), pw_prob_sites(motif, code))
        return sum(log(1 / (1 + exp(ep - mu))**nu) for ep in eps)

    li_base_fit = li_log_fitness(li_motif)
    li_mut_fits = [li_log_fitness(mutate_motif(li_motif)) for i in range(100)]
    pw_base_fit = pw_log_fitness(pw_motif)
    pw_mut_fits = [pw_log_fitness(mutate_motif(pw_motif)) for i in range(100)]
def sanity_check():
    pw_motifs = [(lambda code: [pw_sample_site(code)
                                for i in range(100)])(sample_code(10, 1))
                 for _ in range(100)]
    li_motifs = maxent_motifs(100, 10, 10, 100)
    cv_experiment(pw_motifs)
    cv_experiment(li_motifs)
def experiment3(trials=10):
    mu = -10
    Ne = 5
    L = 10
    sigma = 1
    codes = [sample_code(L, sigma) for i in range(trials)]
    pssms = [sample_matrix(L, sigma) for i in range(trials)]
    sites = [random_site(L) for i in xrange(10000)]
    apw_site_sigmas = [sd([score(code,site) for site in sites]) for code in codes]
    linear_site_sigmas = [sd([score_seq(pssm,site) for site in sites]) for pssm in pssms]
    def apw_phat(code, site):
        ep = score(code, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def apw_occ(code, site):
        ep = score(code, site)
        return 1/(1+exp(ep-mu))
    def linear_phat(pssm, site):
        ep = score_seq(pssm, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def linear_occ(pssm, site):
        ep = score_seq(pssm, site)
        return 1/(1+exp(ep-mu))
    apw_mean_fits = [exp(mean(map(log10, mh(lambda s:apw_phat(code, s), proposal=mutate_site, x0=random_site(L),
                                          capture_state = lambda s:apw_occ(code, s))[1:])))
                         for code in tqdm(codes)]
    linear_mean_fits = [exp(mean(map(log10, mh(lambda s:linear_phat(pssm, s), proposal=mutate_site, x0=random_site(L),
                                             capture_state = lambda s:linear_occ(pssm, s))[1:])))
                        for pssm in tqdm(pssms)]
    plt.scatter(apw_site_sigmas, apw_mean_fits, label='apw')
    plt.scatter(linear_site_sigmas, linear_mean_fits, color='g',label='linear')
    plt.semilogy()
    plt.legend(loc='lower right')
 def apw_fit(sigma, mu, Ne):
     code = sample_code(L, sigma)
     def apw_phat(site):
         ep = score(code, site)
         return 1/(1+exp(ep-mu))**(Ne-1)
     chain = mh(lambda s:apw_phat(s), proposal=mutate_site, x0=random_site(L),
                capture_state = lambda s:apw_occ(code, mu, s))[25000:]
     return mean(chain)
Beispiel #8
0
    def apw_fit(sigma, mu, Ne):
        code = sample_code(L, sigma)

        def apw_phat(site):
            ep = score(code, site)
            return 1 / (1 + exp(ep - mu))**(Ne - 1)

        chain = mh(lambda s: apw_phat(s),
                   proposal=mutate_site,
                   x0=random_site(L),
                   capture_state=lambda s: apw_occ(code, mu, s))[25000:]
        return mean(chain)
def experiment1_():
    L = 10
    sigma = 1
    code = sample_code(L, 1)
    mu = -10
    Ne = 2
    pssm = linearize(code)
    def apw_phat(site):
        ep = score(code, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def linear_phat(site):
        ep = score_seq(pssm, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def sample_apw_site():
        return mh(apw_phat, proposal=mutate_site, x0=random_site(L))
    apw_chain = mh(apw_phat, proposal=mutate_site, x0=random_site(L))
    linear_chain = mh(linear_phat, proposal=mutate_site, x0=random_site(L))
    apw_fits = map(apw_phat, apw_chain)
    linear_fits = map(linear_phat, linear_chain)
    return apw_fits, linear_fits
Beispiel #10
0
def experiment1_():
    L = 10
    sigma = 1
    code = sample_code(L, 1)
    mu = -10
    Ne = 2
    pssm = linearize(code)

    def apw_phat(site):
        ep = score(code, site)
        return 1 / (1 + exp(ep - mu))**(Ne - 1)

    def linear_phat(site):
        ep = score_seq(pssm, site)
        return 1 / (1 + exp(ep - mu))**(Ne - 1)

    def sample_apw_site():
        return mh(apw_phat, proposal=mutate_site, x0=random_site(L))

    apw_chain = mh(apw_phat, proposal=mutate_site, x0=random_site(L))
    linear_chain = mh(linear_phat, proposal=mutate_site, x0=random_site(L))
    apw_fits = map(apw_phat, apw_chain)
    linear_fits = map(linear_phat, linear_chain)
    return apw_fits, linear_fits
def estremo(iterations=50000, verbose=False, every=1, sigma=1, mu=-10, Ne=5):
    nu = Ne - 1

    def log_f((code, motif)):
        eps = map(lambda x: -log(x), pw_prob_sites(motif, code))
        return sum(nu * log(1 / (1 + exp(ep - mu))) for ep in eps)

    def prop((code, motif)):
        code_p = [d.copy() for d in code]
        i = random.randrange(len(code))
        b1, b2 = random.choice("ACGT"), random.choice("ACGT")
        code_p[i][(b1, b2)] += random.gauss(0, sigma)
        motif_p = mutate_motif(motif)
        return (code_p, motif_p)

    x0 = (sample_code(L=10, sigma=1), random_motif(length=10, num_sites=20))
    chain = mh(log_f,
               prop,
               x0,
               use_log=True,
               iterations=iterations,
               verbose=verbose,
               every=every)
    return chain
def experiment2_():
    L = 10
    sigma = 1
    code = sample_code(L, 1)
    mu = -10
    Ne = 2
    sites = [random_site(L) for i in xrange(10000)]
    apw_eps = [score(code, site) for site in sites]
    site_sigma = sd(apw_eps)
    pssm = sample_matrix(L, sqrt(site_sigma**2/L))
    #linear_eps = [score_seq(pssm, site) for site in sites]
    def apw_phat(site):
        ep = score(code, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def linear_phat(site):
        ep = score_seq(pssm, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def sample_apw_site():
        return mh(apw_phat, proposal=mutate_site, x0=random_site(L))
    apw_chain = mh(apw_phat, proposal=mutate_site, x0=random_site(L))
    linear_chain = mh(linear_phat, proposal=mutate_site, x0=random_site(L))
    apw_fits = map(apw_phat, apw_chain)
    linear_fits = map(linear_phat, linear_chain)
    return apw_fits, linear_fits