def estremo_gibbs(iterations=50000,
                  verbose=False,
                  every=1000,
                  sigma=1,
                  mu=-10,
                  Ne=5):
    nu = Ne - 1
    L = 10
    N = 20
    code, motif = (sample_code(L=10,
                               sigma=1), random_motif(length=L, num_sites=N))

    def log_f((code, motif)):
        eps = map(lambda x: -log(x), pw_prob_sites(motif, code))
        return sum(nu * log(1 / (1 + exp(ep - mu))) for ep in eps)

    chain = [(code, motif[:])]
    print log_f((code, motif))
    for iteration in trange(iterations):
        for i in range(N):
            site = motif[i]
            for j in range(L):
                b = site[j]
                log_ps = []
                bps = [bp for bp in "ACGT" if not bp == b]
                for bp in bps:
                    site_p = subst(site, bp, j)
                    log_ps.append(log_f((code, [site_p])))
                log_ps = [p - min(log_ps) for p in log_ps]
                bp = inverse_cdf_sample(bps,
                                        map(exp, log_ps),
                                        normalized=False)
                motif[i] = subst(site, bp, j)
        for k in range(L - 1):
            for b1 in "ACGT":
                for b2 in "ACGT":
                    dws = [random.gauss(0, 0.1) for _ in range(10)]
                    code_ps = [[d.copy() for d in code] for _ in range(10)]
                    for code_p, dw in zip(code_ps, dws):
                        code_p[k][b1, b2] += dw
                    log_ps = [log_f((code_p, motif)) for code_p in code_ps]
                    log_ps = [p - min(log_ps) for p in log_ps]
                    code_p = inverse_cdf_sample(code_ps,
                                                map(exp, log_ps),
                                                normalized=False)
                    code = code_p
        print log_f((code, motif))
        chain.append((code, motif[:]))
    return chain

    x0 = (sample_code(L=10, sigma=1), random_motif(length=10, num_sites=20))
    chain = mh(log_f,
               prop,
               x0,
               use_log=True,
               iterations=iterations,
               verbose=verbose,
               every=every)
    return chain
Ejemplo n.º 2
0
def validation_plot(L=10, N=50, ref_trials=1000):
    check_points = np.linspace(0, 10, 10)
    #ics_ref = sorted([motif_ic(random_motif(L, N)) for i in range(ref_trials)])
    ics_ref = sorted([
        motif_ic(random_motif(L, N), correct=True) for i in trange(ref_trials)
    ])

    plt.plot(ics_ref,
             1 - np.linspace(0, 1, len(ics_ref)),
             label="Empirical Complementary CDF",
             marker='o',
             linestyle='')
    plt.plot(
        check_points,
        [exp(ic_log_pvalue(N, L, ic, method="MC")) for ic in check_points],
        label="Importance Sampling Estimate")
    plt.plot(
        check_points,
        [exp(ic_log_pvalue(N, L, ic, method="UB")) for ic in check_points],
        label="Analytic Upper Bound")
    plt.plot(check_points, [
        exp(ic_log_pvalue(N, L, ic, method="analytic")) for ic in check_points
    ],
             label="Analytic P-value")
    plt.semilogy()
    plt.legend()
    plt.xlabel("Information Content (bits)")
    plt.ylabel("P-value")
    plt.xlim(0, 1.2)
    plt.show()
Ejemplo n.º 3
0
def log_ZM_empirical_ref3(theta, N,trials=1000):
    L = len(theta[0])
    lfhs = [log_fhat(theta, random_motif(L, 1)) for _ in xrange(trials)]
    log_avg = logsum(lfhs) - log(trials)
    log_ZS = L*log(4) + log_avg
    log_ZM = N * log_ZS
    return log_ZM
Ejemplo n.º 4
0
def log_ZM_empirical_ref3(theta, N, trials=1000):
    L = len(theta[0])
    lfhs = [log_fhat(theta, random_motif(L, 1)) for _ in xrange(trials)]
    log_avg = logsum(lfhs) - log(trials)
    log_ZS = L * log(4) + log_avg
    log_ZM = N * log_ZS
    return log_ZM
def best_ic_motif(L,n,trials):
    best_ic = 0
    for i in trange(trials):
        motif = random_motif(L,n)
        cur_ic = motif_ic(motif,correct=False)
        if  cur_ic > best_ic:
            best_motif = motif
    return best_motif
Ejemplo n.º 6
0
def match_ic_mi(N,
                L,
                des_ic,
                des_mi,
                iterations=50000,
                take_stock=None,
                eta=0.01,
                alpha=1,
                beta=0):
    if take_stock is None:
        take_stock = int((N * L) * log(N * L))
    x = random_motif(L, N)
    xs = [None] * iterations
    ics = [0.0] * iterations
    mis = [0.0] * iterations
    alphas = [0.0] * iterations
    betas = [0.0] * iterations
    ic = motif_ic(x)
    mi = total_motif_mi(x)
    accepts = 0
    for i in xrange(iterations):
        # if i == iterations/2:
        #     eta *= 0.1
        xp = mutate_motif(x)
        icp = motif_ic(xp)
        mip = total_motif_mi(xp)
        log_y = (alpha * ic + beta * mi)
        log_yp = (alpha * icp + beta * mip)
        if log(random.random()) < log_yp - log_y:
            accepts += 1
            x = xp
            ic = icp
            mi = mip
        ics[i] = (ic)
        mis[i] = (mi)
        xs[i] = (x)
        #print sum(site.count("A") for site in x)

        alphas[i] = (alpha)
        betas[i] = (beta)
        if i > 0 and i % take_stock == 0:
            if i < iterations / 10:
                mean_ic = mean(ics[i - take_stock:i])
                mean_mi = mean(mis[i - take_stock:i])
                alpha += eta * (des_ic - mean_ic) * exp(
                    -i / (10 * float(iterations)))
                beta += eta * (des_mi - mean_mi) * exp(
                    -i / (10 * float(iterations)))
            else:
                mean_ic = mean(ics[i - take_stock:i])
                mean_mi = mean(mis[i - take_stock:i])
                alpha = poly1d(polyfit(ics[:i], alphas[:i], 1))(des_ic)
                beta = poly1d(polyfit(mis[:i], betas[:i], 1))(des_mi)
            fmt_string = (
                "mean ic: % 1.2f, mean mi: % 1.2f, alpha: % 1.2f, beta: % 1.2f"
                % (mean_ic, mean_mi, alpha, beta))
            print i, "AR:", accepts / (i + 1.0), fmt_string
    return xs, ics, mis, alphas, betas
def sample_pw_motif_mh(code, N, Ne, mu, iterations=50000):
    nu = Ne - 1

    def log_f(motif):
        eps = map(lambda x: -log(x), pw_prob_sites(motif, code))
        return sum(log(1 / (1 + exp(ep - mu))**nu) for ep in eps)

    prop = mutate_motif
    L = len(code) + 1
    x0 = random_motif(L, N)
    return mh(log_f, prop, x0, cache=True, use_log=True, iterations=iterations)
Ejemplo n.º 8
0
def sella_hirsch_mh_sampling(n=16,L=16,G=1000,N=100,sigma=1,iterations=50000):
    Zb = compute_Zb(n,L,sigma,G)
    nu = N-1
    def fitness(motif):
        eps = [sigma*sum(b!="A" for b in site) for site in motif]
        fg = sum(exp(-sigma*ep) for ep in eps)
        return fg/(fg + Zb)
    def log_p(motif):
        return (nu * log(fitness(motif)))
    def proposal(motif):
        p = 4.0/(n*L)
        return mutate_motif_p(motif,p)
    x0 = random_motif(n,L)
    chain = mh(log_p,proposal,x0,use_log=True,iterations=iterations)
    return chain
Ejemplo n.º 9
0
def validation_plot(L=10, N=50, ref_trials=1000):
    check_points = np.linspace(0,10,10)
    #ics_ref = sorted([motif_ic(random_motif(L, N)) for i in range(ref_trials)])
    ics_ref = sorted([motif_ic(random_motif(L, N), correct=True) for i in trange(ref_trials)])

    plt.plot(ics_ref, 1 - np.linspace(0,1,len(ics_ref)),label="Empirical Complementary CDF", marker='o',linestyle='')
    plt.plot(check_points, [exp(ic_log_pvalue(N, L, ic, method="MC")) for ic in check_points],
             label="Importance Sampling Estimate")
    plt.plot(check_points, [exp(ic_log_pvalue(N, L, ic, method="UB")) for ic in check_points],
             label="Analytic Upper Bound")
    plt.plot(check_points, [exp(ic_log_pvalue(N, L, ic, method="analytic")) for ic in check_points],
             label="Analytic P-value")
    plt.semilogy()
    plt.legend()
    plt.xlabel("Information Content (bits)")
    plt.ylabel("P-value")
    plt.xlim(0,1.2)
    plt.show()
Ejemplo n.º 10
0
def sella_hirsch_mh_penalize_mu(Ne=5,
                                n=16,
                                L=16,
                                G=5 * 10**6,
                                sigma=1,
                                alpha=0.01,
                                init="random",
                                matrix=None,
                                x0=None,
                                iterations=50000,
                                p=None):
    print "p:", p
    if matrix is None:
        matrix = sample_matrix(L, sigma)
    if x0 is None:
        if init == "random":
            x0 = (random_motif(L, n), random.gauss(0, 1))
        elif init == "ringer":
            x0 = (ringer_motif(matrix, n), random.gauss(0, 1))
        elif init == "anti_ringer":
            x0 = (anti_ringer_motif(matrix, n), random.gauss(0, 1))
        else:
            x0 = init
    if p is None:
        p = 1.0 / (n * L)
    nu = Ne - 1

    def log_f((motif, mu)):
        return nu * log_fitness_penalize_mu(matrix, motif, mu, alpha)

    def prop((motif, mu)):
        motif_p = mutate_motif_p(motif,
                                 p)  # probability of mutation per basepair
        mu_p = mu + random.gauss(0, 0.1)
        return motif_p, mu_p

    chain = mh(log_f, prop, x0, use_log=True, iterations=iterations)
    return matrix, chain
Ejemplo n.º 11
0
def sella_hirsch_mh(Ne=5,
                    n=16,
                    L=16,
                    sigma=1,
                    mu=0,
                    init="random",
                    matrix=None,
                    x0=None,
                    iterations=50000,
                    p=None):
    print "p:", p
    if matrix is None:
        matrix = sample_matrix(L, sigma)
    else:
        L = len(matrix)
    if x0 is None:
        if init == "random":
            x0 = random_motif(L, n)
        elif init == "ringer":
            x0 = ringer_motif(matrix, n)
        elif init == "anti_ringer":
            x0 = anti_ringer_motif(matrix, n)
        else:
            x0 = init
    if p is None:
        p = 1.0 / (n * L)
    nu = Ne - 1

    def log_f(motif):
        return nu * log_fitness(matrix, motif, mu)

    def prop(motif):
        motif_p = mutate_motif_p(motif,
                                 p)  # probability of mutation per basepair
        return motif_p

    chain = mh(log_f, prop, x0, use_log=True, iterations=iterations)
    return matrix, chain
Ejemplo n.º 12
0
def sella_hirsch_mh_sampling(n=16,
                             L=16,
                             G=1000,
                             N=100,
                             sigma=1,
                             iterations=50000):
    Zb = compute_Zb(n, L, sigma, G)
    nu = N - 1

    def fitness(motif):
        eps = [sigma * sum(b != "A" for b in site) for site in motif]
        fg = sum(exp(-sigma * ep) for ep in eps)
        return fg / (fg + Zb)

    def log_p(motif):
        return (nu * log(fitness(motif)))

    def proposal(motif):
        p = 4.0 / (n * L)
        return mutate_motif_p(motif, p)

    x0 = random_motif(n, L)
    chain = mh(log_p, proposal, x0, use_log=True, iterations=iterations)
    return chain
def estremo(iterations=50000, verbose=False, every=1, sigma=1, mu=-10, Ne=5):
    nu = Ne - 1

    def log_f((code, motif)):
        eps = map(lambda x: -log(x), pw_prob_sites(motif, code))
        return sum(nu * log(1 / (1 + exp(ep - mu))) for ep in eps)

    def prop((code, motif)):
        code_p = [d.copy() for d in code]
        i = random.randrange(len(code))
        b1, b2 = random.choice("ACGT"), random.choice("ACGT")
        code_p[i][(b1, b2)] += random.gauss(0, sigma)
        motif_p = mutate_motif(motif)
        return (code_p, motif_p)

    x0 = (sample_code(L=10, sigma=1), random_motif(length=10, num_sites=20))
    chain = mh(log_f,
               prop,
               x0,
               use_log=True,
               iterations=iterations,
               verbose=verbose,
               every=every)
    return chain
Ejemplo n.º 14
0
def random_genotype(n, L, linear_sigma, pairwise_sigma, copies):
    motif = random_motif(L, n)
    pwm = sample_matrix(L, linear_sigma)
    pairwise_weights = [[[random.gauss(0, pairwise_sigma) for i in range(4)]
                         for j in range(4)] for k in range(L - 1)]
    return motif, copies, (pwm, pairwise_weights)
Ejemplo n.º 15
0
def log_ZM_empirical_ref2(theta, N, trials=1000):
    L = len(theta[0])
    lfhs = [log_fhat(theta, random_motif(L, N)) for _ in xrange(trials)]
    return N*L * log(4) + logsum(lfhs) - log(trials)
Ejemplo n.º 16
0
def log_ZM_empirical_ref2(theta, N, trials=1000):
    L = len(theta[0])
    lfhs = [log_fhat(theta, random_motif(L, N)) for _ in xrange(trials)]
    return N * L * log(4) + logsum(lfhs) - log(trials)
def motif_mh(L,n,desired_ic):
    x0 = random_motif(L,n)
    def logf(motif,mu):
        return (mu*motif_ic(motif,correct=False))
    return mh()
Ejemplo n.º 18
0
 def init_species():
     return random_motif(n, L)
Ejemplo n.º 19
0
 def init_species():
     return random_motif(n,L)