Esempio n. 1
0
def experiment2_():
    L = 10
    sigma = 1
    code = sample_code(L, 1)
    mu = -10
    Ne = 2
    sites = [random_site(L) for i in xrange(10000)]
    apw_eps = [score(code, site) for site in sites]
    site_sigma = sd(apw_eps)
    pssm = sample_matrix(L, sqrt(site_sigma**2 / L))

    #linear_eps = [score_seq(pssm, site) for site in sites]
    def apw_phat(site):
        ep = score(code, site)
        return 1 / (1 + exp(ep - mu))**(Ne - 1)

    def linear_phat(site):
        ep = score_seq(pssm, site)
        return 1 / (1 + exp(ep - mu))**(Ne - 1)

    def sample_apw_site():
        return mh(apw_phat, proposal=mutate_site, x0=random_site(L))

    apw_chain = mh(apw_phat, proposal=mutate_site, x0=random_site(L))
    linear_chain = mh(linear_phat, proposal=mutate_site, x0=random_site(L))
    apw_fits = map(apw_phat, apw_chain)
    linear_fits = map(linear_phat, linear_chain)
    return apw_fits, linear_fits
Esempio n. 2
0
def experiment3(trials=10):
    mu = -10
    Ne = 5
    L = 10
    sigma = 1
    codes = [sample_code(L, sigma) for i in range(trials)]
    pssms = [sample_matrix(L, sigma) for i in range(trials)]
    sites = [random_site(L) for i in xrange(10000)]
    apw_site_sigmas = [
        sd([score(code, site) for site in sites]) for code in codes
    ]
    linear_site_sigmas = [
        sd([score_seq(pssm, site) for site in sites]) for pssm in pssms
    ]

    def apw_phat(code, site):
        ep = score(code, site)
        return 1 / (1 + exp(ep - mu))**(Ne - 1)

    def apw_occ(code, site):
        ep = score(code, site)
        return 1 / (1 + exp(ep - mu))

    def linear_phat(pssm, site):
        ep = score_seq(pssm, site)
        return 1 / (1 + exp(ep - mu))**(Ne - 1)

    def linear_occ(pssm, site):
        ep = score_seq(pssm, site)
        return 1 / (1 + exp(ep - mu))

    apw_mean_fits = [
        exp(
            mean(
                map(
                    log10,
                    mh(lambda s: apw_phat(code, s),
                       proposal=mutate_site,
                       x0=random_site(L),
                       capture_state=lambda s: apw_occ(code, s))[1:])))
        for code in tqdm(codes)
    ]
    linear_mean_fits = [
        exp(
            mean(
                map(
                    log10,
                    mh(lambda s: linear_phat(pssm, s),
                       proposal=mutate_site,
                       x0=random_site(L),
                       capture_state=lambda s: linear_occ(pssm, s))[1:])))
        for pssm in tqdm(pssms)
    ]
    plt.scatter(apw_site_sigmas, apw_mean_fits, label='apw')
    plt.scatter(linear_site_sigmas,
                linear_mean_fits,
                color='g',
                label='linear')
    plt.semilogy()
    plt.legend(loc='lower right')
def sample_model(model, iterations=50000,x0=None):
    k = len(model)
    L = int(1 + sqrt(1+8*k)/2)
    if x0 is None:
        x0 = random_site(L)
    chain = mh(lambda s:score(model,s),
               proposal=mutate_site,
               x0=random_site(L),
               use_log=True, iterations=iterations)
    return chain
Esempio n. 4
0
def sample_model(model, iterations=50000, x0=None):
    k = len(model)
    L = int(1 + sqrt(1 + 8 * k) / 2)
    if x0 is None:
        x0 = random_site(L)
    chain = mh(lambda s: score(model, s),
               proposal=mutate_site,
               x0=random_site(L),
               use_log=True,
               iterations=iterations)
    return chain
def sample_site_cftp_dep(matrix, mu, Ne):
    L = len(matrix)
    def log_phat(s):
        ep = score_seq(matrix,s)
        nu = Ne - 1
        return -nu*log(1 + exp(ep - mu))
    first_site = "A"*L
    last_site = "T"*L
    best_site = "".join(["ACGT"[argmin(row)] for row in matrix])
    worst_site = "".join(["ACGT"[argmax(row)] for row in matrix])
    trajs = [[best_site],[random_site(L)],[random_site(L)],[random_site(L)], [worst_site]]
    def mutate_site(site,(ri,rb)):
        return subst(site,"ACGT"[rb],ri)
def sample_motif_ar_tilted(matrix, mu, Ne, N):
    nu = Ne - 1
    L = len(matrix)
    ep_min, ep_max, L = sum(map(min,matrix)), sum(map(max,matrix)), len(matrix)
    site_sigma = site_sigma_from_matrix(matrix)
    density = lambda ep:(1/(1+exp(ep-mu)))**(Ne-1) * dnorm(ep,0,site_sigma)*(ep_min <= ep <= ep_max)
    d_density = lambda ep:ep/site_sigma**2 + nu/(1+exp(mu-ep))
    phat = lambda ep:(1/(1+exp(ep-mu)))**(Ne-1)
    mode = bisect_interval(d_density, -100, 100)
    if mode < ep_min:
        mode = ep_min + 1 # don't want mode right on the nose of ep_min for sampling purposes, so offset it a bit
    dmode = density(mode)
    # calculate mean epsilon via rejection sampling
    motif = []
    def mean_ep(lamb):
        psfm = psfm_from_matrix(matrix, lamb=lamb)
        return sum([ep * p for (mat_row, psfm_row) in zip(matrix, psfm)
                    for (ep, p) in zip(mat_row, psfm_row)])
    lamb = bisect_interval(lambda l:mean_ep(l) - mode, -20, 20)
    tilted_psfm = psfm_from_matrix(matrix, lamb=lamb)
    log_tilted_psfm = [map(log,row) for row in tilted_psfm]
    while len(motif) < N:
        site = random_site(L)
        ep = score_seq(matrix, site)
        if random.random() < phat(ep)/pmode:
            motif.append(site)    
    return motif
def experiment3(trials=10):
    mu = -10
    Ne = 5
    L = 10
    sigma = 1
    codes = [sample_code(L, sigma) for i in range(trials)]
    pssms = [sample_matrix(L, sigma) for i in range(trials)]
    sites = [random_site(L) for i in xrange(10000)]
    apw_site_sigmas = [sd([score(code,site) for site in sites]) for code in codes]
    linear_site_sigmas = [sd([score_seq(pssm,site) for site in sites]) for pssm in pssms]
    def apw_phat(code, site):
        ep = score(code, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def apw_occ(code, site):
        ep = score(code, site)
        return 1/(1+exp(ep-mu))
    def linear_phat(pssm, site):
        ep = score_seq(pssm, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def linear_occ(pssm, site):
        ep = score_seq(pssm, site)
        return 1/(1+exp(ep-mu))
    apw_mean_fits = [exp(mean(map(log10, mh(lambda s:apw_phat(code, s), proposal=mutate_site, x0=random_site(L),
                                          capture_state = lambda s:apw_occ(code, s))[1:])))
                         for code in tqdm(codes)]
    linear_mean_fits = [exp(mean(map(log10, mh(lambda s:linear_phat(pssm, s), proposal=mutate_site, x0=random_site(L),
                                             capture_state = lambda s:linear_occ(pssm, s))[1:])))
                        for pssm in tqdm(pssms)]
    plt.scatter(apw_site_sigmas, apw_mean_fits, label='apw')
    plt.scatter(linear_site_sigmas, linear_mean_fits, color='g',label='linear')
    plt.semilogy()
    plt.legend(loc='lower right')
Esempio n. 8
0
def sample_motif_ar_tilted(matrix, mu, Ne, N):
    nu = Ne - 1
    L = len(matrix)
    ep_min, ep_max, L = sum(map(min, matrix)), sum(map(max,
                                                       matrix)), len(matrix)
    site_sigma = site_sigma_from_matrix(matrix)
    density = lambda ep: (1 / (1 + exp(ep - mu)))**(Ne - 1) * dnorm(
        ep, 0, site_sigma) * (ep_min <= ep <= ep_max)
    d_density = lambda ep: ep / site_sigma**2 + nu / (1 + exp(mu - ep))
    phat = lambda ep: (1 / (1 + exp(ep - mu)))**(Ne - 1)
    mode = bisect_interval(d_density, -100, 100)
    if mode < ep_min:
        mode = ep_min + 1  # don't want mode right on the nose of ep_min for sampling purposes, so offset it a bit
    dmode = density(mode)
    # calculate mean epsilon via rejection sampling
    motif = []

    def mean_ep(lamb):
        psfm = psfm_from_matrix(matrix, lamb=lamb)
        return sum([
            ep * p for (mat_row, psfm_row) in zip(matrix, psfm)
            for (ep, p) in zip(mat_row, psfm_row)
        ])

    lamb = bisect_interval(lambda l: mean_ep(l) - mode, -20, 20)
    tilted_psfm = psfm_from_matrix(matrix, lamb=lamb)
    log_tilted_psfm = [map(log, row) for row in tilted_psfm]
    while len(motif) < N:
        site = random_site(L)
        ep = score_seq(matrix, site)
        if random.random() < phat(ep) / pmode:
            motif.append(site)
    return motif
 def apw_fit(sigma, mu, Ne):
     code = sample_code(L, sigma)
     def apw_phat(site):
         ep = score(code, site)
         return 1/(1+exp(ep-mu))**(Ne-1)
     chain = mh(lambda s:apw_phat(s), proposal=mutate_site, x0=random_site(L),
                capture_state = lambda s:apw_occ(code, mu, s))[25000:]
     return mean(chain)
 def linear_fit(sigma, mu, Ne):
     pssm = sample_matrix(L, sigma)
     def linear_phat(site):
         ep = score_seq(pssm, site)
         return 1/(1+exp(ep-mu))**(Ne-1)
     chain = mh(lambda s:linear_phat(s), proposal=mutate_site, x0=random_site(L),
                capture_state = lambda s:linear_occ(pssm, mu, s))[25000:]
     return mean(chain)
Esempio n. 11
0
def select_sites_by_occupancy(matrix, mu, n):
    L = len(matrix)
    motif = []
    while len(motif) < n:
        site = random_site(L)
        if random.random() < 1 / (1 + exp(score_seq(matrix, site) - mu)):
            motif.append(site)
            print len(motif)
    return motif
Esempio n. 12
0
def alignment_simulation():
    ell = 100
    L = 10
    N = 50
    trials = 100000
    seqs = [random_site(ell) for i in range(N)]
    def random_alignment():
        rs = [random.randrange(ell-L+1) for _ in range(N)]
        return [seq[r:r+L] for seq, r in zip(seqs,rs)]
    ics = [motif_ic(random_alignment()) for _ in trange(trials)]
Esempio n. 13
0
def mr_system_mh(alphas,G=100000.0,n=16,L=10):
    scale = 10000 #lower means less stringent
    matrix = [[0,0,0,0] for i in range(L)]
    motif = [random_site(L) for i in range(n)]
    scaled_sse = lambda matrix,motif:(sse(matrix,motif,alphas,G,n))*scale
    return mh(lambda (matrix,motif):exp(-scaled_sse(matrix,motif)),
              lambda (matrix,motif):propose(matrix,motif),
              (matrix,motif),
              iterations=100000,
              every=1000,verbose=True)
def experiment1_():
    L = 10
    sigma = 1
    code = sample_code(L, 1)
    mu = -10
    Ne = 2
    pssm = linearize(code)
    def apw_phat(site):
        ep = score(code, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def linear_phat(site):
        ep = score_seq(pssm, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def sample_apw_site():
        return mh(apw_phat, proposal=mutate_site, x0=random_site(L))
    apw_chain = mh(apw_phat, proposal=mutate_site, x0=random_site(L))
    linear_chain = mh(linear_phat, proposal=mutate_site, x0=random_site(L))
    apw_fits = map(apw_phat, apw_chain)
    linear_fits = map(linear_phat, linear_chain)
    return apw_fits, linear_fits
def sample_site_ar(matrix, mu, Ne, lamb=None, modulus=10**6, return_ar=False):
    nu = Ne - 1
    if nu == 0:
        return random_site(len(matrix))
    if lamb is None:
        lamb = nu/2.0
    L = len(matrix)
    def rQ():
        return sample_from_matrix(matrix, lamb)
    #log_Z = sum(log(sum(exp(-lamb*ep) for ep in col)) for col in matrix)
    def log_dQ(site):
        log_numer = -lamb*sum(row["ACGT".index(b)] for b,row in zip(site,matrix))
        return  log_numer# - log_Z
    def log_fit(site):
        return -nu*log(1+exp(score_seq(matrix,site)-mu))
    ep_max = sum(max(row) for row in matrix)
    ep_min = sum(min(row) for row in matrix)
    alpha = lamb/float(nu)
    def find_logM(ep):
        return nu*log((exp(alpha*ep)/(1+exp(ep-mu))))
    def log_M_p(ep):
        term1 = alpha*exp(alpha*ep)/(exp(ep-mu)+1)
        term2 = -exp(alpha*ep+ep-mu)/((exp(ep-mu)+1)**2)
        return term1 + term2
    if alpha != 1 and alpha/(1-alpha) > 0:
        ep_crit = log(alpha/(1-alpha)) + mu
    else:
        deriv = log_M_p(0)
        if deriv < 0:
            ep_crit = ep_min
        else:
            ep_crit = ep_max
    log_M = find_logM(ep_crit)
    # print "choosing from:",find_logM(ep_min), find_logM(ep_max)
    # print "log_M:",log_M
    trials = 0
    while True:
        trials += 1
        s = rQ()
        log_f = log_fit(s)
        log_prop = log_dQ(s) + log_M
        log_ar = log_f - log_prop
        log_r = log(random.random())
        accept = log_r < log_ar
        #print trials, s, "*" if accept else " ",log_r, log_ar, log_f, log_prop
        assert log_f < 0
        assert log_ar < 0
        if trials % modulus == 0:
            print trials, s, "*" if accept else " ",log_r, log_ar, log_f, log_prop
        if accept:
            if return_ar:
                return trials
            else:
                return s
def roc_experiment(motif, trials=10**5):
    pw_model = pairwise_model_from_motif(motif)
    li_model = linear_model_from_motif(motif)
    L = len(motif[0])
    negatives = [random_site(L) for i in trange(trials)]
    pw_pos = [pw_prob_site(site, pw_model) for site in motif]
    pw_neg = [pw_prob_site(site, pw_model) for site in tqdm(negatives)]
    li_pos = [linear_prob_site(site, li_model) for site in motif]
    li_neg = [linear_prob_site(site, li_model) for site in tqdm(negatives)]
    _, _, _, pw_auc = roc_curve(pw_pos, pw_neg)
    _, _, _, li_auc = roc_curve(li_pos, li_neg, color='g')
    return li_auc, pw_auc
Esempio n. 17
0
def alignment_simulation():
    ell = 100
    L = 10
    N = 50
    trials = 100000
    seqs = [random_site(ell) for i in range(N)]

    def random_alignment():
        rs = [random.randrange(ell - L + 1) for _ in range(N)]
        return [seq[r:r + L] for seq, r in zip(seqs, rs)]

    ics = [motif_ic(random_alignment()) for _ in trange(trials)]
Esempio n. 18
0
    def apw_fit(sigma, mu, Ne):
        code = sample_code(L, sigma)

        def apw_phat(site):
            ep = score(code, site)
            return 1 / (1 + exp(ep - mu))**(Ne - 1)

        chain = mh(lambda s: apw_phat(s),
                   proposal=mutate_site,
                   x0=random_site(L),
                   capture_state=lambda s: apw_occ(code, mu, s))[25000:]
        return mean(chain)
Esempio n. 19
0
    def linear_fit(sigma, mu, Ne):
        pssm = sample_matrix(L, sigma)

        def linear_phat(site):
            ep = score_seq(pssm, site)
            return 1 / (1 + exp(ep - mu))**(Ne - 1)

        chain = mh(lambda s: linear_phat(s),
                   proposal=mutate_site,
                   x0=random_site(L),
                   capture_state=lambda s: linear_occ(pssm, mu, s))[25000:]
        return mean(chain)
Esempio n. 20
0
def train_pairwise_model(motif,
                         pc=1 / 16.0,
                         decay_timescale=10000,
                         take_stock=1000,
                         eta=0.01,
                         stop_crit=0.01):
    L = len(motif[0])
    N = len(motif)
    fs = get_pairwise_freqs(motif, pc=pc)
    ws = [{(b1, b2): 0
           for (b1, b2) in dinucs} for _ in range(int(choose(L, 2)))]
    x = random_site(L)
    log_y = score(ws, x)
    chain = []
    # sses = [0.0] * (int(iterations/take_stock) + 1)
    #chain = []
    #for iteration in xrange(iterations):
    iteration = 0
    stock_counter = take_stock
    while True:
        xp = mutate_site(x)
        log_yp = score(ws, xp)
        if log(random.random()) < log_yp - log_y:
            x = xp
            log_y = log_yp
        chain.append(x)
        if iteration > 0 and iteration % stock_counter == 0:
            current_fs = get_pairwise_freqs(
                sample(N,
                       chain[iteration - stock_counter:iteration],
                       replace=False))
            sse = 0
            for w, f, cur_f in zip(ws, fs, current_fs):
                for b1, b2 in dinucs:
                    delta = f[b1, b2] - cur_f[b1, b2]
                    sse += delta**2
                    w[b1, b2] += eta * (
                        delta)  #* exp(-iteration/float(decay_timescale))
            #sses[iteration/take_stock] = sse
            sse_per_col_pair = sse / choose(L, 2)
            print iteration, stock_counter, sse_per_col_pair, exp(
                -iteration / float(decay_timescale)), ws[0]['A', 'A']
            stock_counter += random.randrange(2)
            #print "motif_ic:", motif_ic(chain[iteration-stock_counter : iteration])
            if iteration > 0 and sse_per_col_pair < stop_crit:
                print "breaking:", sse, sse_per_col_pair
                break
            log_y = score(ws, x)  # recalculate this because weights change
            #stock_counter += take_stock * (iteration > take_stock)
        iteration += 1
    return ws
Esempio n. 21
0
def site_mh(matrix, mu, Ne, iterations=50000):
    site_mu, site_sigma = site_mu_from_matrix(matrix), site_sigma_from_matrix(
        matrix)
    L = len(matrix)
    nu = Ne - 1
    log_f = lambda site: log_Pe(score_seq(matrix, site), site_mu, site_sigma,
                                mu, Ne)
    #prop = lambda site:random_site(L)
    prop = lambda site: mutate_site(site)
    return mh(log_f,
              prop,
              x0=random_site(L),
              use_log=True,
              iterations=iterations)
Esempio n. 22
0
def mr_system_sa(alphas,init_system=None,G=100000.0,n=16,L=10,
              sse_epsilon=0.0001,proposal=propose,scale=1000,
              iterations=10000,return_trajectory=False):
    if init_system is None:
        matrix = [[0,0,0,0] for i in range(L)]
        motif = [random_site(L) for i in range(n)]
    else:
        matrix,motif = init_system
    scaled_sse = lambda(matrix,motif):sse(matrix,motif,alphas,G,n)*scale
    return anneal(scaled_sse,
                  lambda(matrix,motif):proposal(matrix,motif),
                  (matrix,motif),
                  iterations=iterations,
                  stopping_crit = sse_epsilon*scale,
                  return_trajectory=return_trajectory)
def experiment2_():
    L = 10
    sigma = 1
    code = sample_code(L, 1)
    mu = -10
    Ne = 2
    sites = [random_site(L) for i in xrange(10000)]
    apw_eps = [score(code, site) for site in sites]
    site_sigma = sd(apw_eps)
    pssm = sample_matrix(L, sqrt(site_sigma**2/L))
    #linear_eps = [score_seq(pssm, site) for site in sites]
    def apw_phat(site):
        ep = score(code, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def linear_phat(site):
        ep = score_seq(pssm, site)
        return 1/(1+exp(ep-mu))**(Ne-1)
    def sample_apw_site():
        return mh(apw_phat, proposal=mutate_site, x0=random_site(L))
    apw_chain = mh(apw_phat, proposal=mutate_site, x0=random_site(L))
    linear_chain = mh(linear_phat, proposal=mutate_site, x0=random_site(L))
    apw_fits = map(apw_phat, apw_chain)
    linear_fits = map(linear_phat, linear_chain)
    return apw_fits, linear_fits
Esempio n. 24
0
def experiment1_():
    L = 10
    sigma = 1
    code = sample_code(L, 1)
    mu = -10
    Ne = 2
    pssm = linearize(code)

    def apw_phat(site):
        ep = score(code, site)
        return 1 / (1 + exp(ep - mu))**(Ne - 1)

    def linear_phat(site):
        ep = score_seq(pssm, site)
        return 1 / (1 + exp(ep - mu))**(Ne - 1)

    def sample_apw_site():
        return mh(apw_phat, proposal=mutate_site, x0=random_site(L))

    apw_chain = mh(apw_phat, proposal=mutate_site, x0=random_site(L))
    linear_chain = mh(linear_phat, proposal=mutate_site, x0=random_site(L))
    apw_fits = map(apw_phat, apw_chain)
    linear_fits = map(linear_phat, linear_chain)
    return apw_fits, linear_fits
Esempio n. 25
0
def sample_uniform_energy(matrix):
    mu = sum(map(mean, matrix))
    sigma = sqrt(sum(map(lambda x:variance(x,correct=False), matrix)))
    ep_min = sum(map(min, matrix))
    ep_max = sum(map(max, matrix))
    M_min = 1/norm.pdf(ep_min, mu, sigma)
    M_max = 1/norm.pdf(ep_max, mu, sigma)
    M = max(M_min, M_max)
    trials = 0
    while True:
        trials += 1
        if trials % 10000 == 0:
            print trials
        site = random_site(L)
        ep = score_seq(matrix, site)
        ar = 1/(M*norm.pdf(ep, mu, sigma))
        if random.random() < ar:
            return site
Esempio n. 26
0
def sample_uniform_energy(matrix):
    mu = sum(map(mean, matrix))
    sigma = sqrt(sum(map(lambda x: variance(x, correct=False), matrix)))
    ep_min = sum(map(min, matrix))
    ep_max = sum(map(max, matrix))
    M_min = 1 / norm.pdf(ep_min, mu, sigma)
    M_max = 1 / norm.pdf(ep_max, mu, sigma)
    M = max(M_min, M_max)
    trials = 0
    while True:
        trials += 1
        if trials % 10000 == 0:
            print trials
        site = random_site(L)
        ep = score_seq(matrix, site)
        ar = 1 / (M * norm.pdf(ep, mu, sigma))
        if random.random() < ar:
            return site
def train_pairwise_model(motif, pc=1/16.0, decay_timescale=10000, take_stock=1000, eta=0.01, stop_crit=0.01):
    L = len(motif[0])
    N = len(motif)
    fs = get_pairwise_freqs(motif, pc=pc)
    ws = [{(b1, b2):0 for (b1,b2) in dinucs} for _ in range(int(choose(L,2)))]
    x = random_site(L)
    log_y = score(ws, x)
    chain = []
    # sses = [0.0] * (int(iterations/take_stock) + 1)
    #chain = []
    #for iteration in xrange(iterations):
    iteration = 0
    stock_counter = take_stock
    while True:
        xp = mutate_site(x)
        log_yp = score(ws, xp)
        if log(random.random()) < log_yp - log_y:
            x = xp
            log_y = log_yp
        chain.append(x)
        if iteration > 0 and iteration % stock_counter == 0:
            current_fs = get_pairwise_freqs(sample(N,chain[iteration-stock_counter : iteration], replace=False))
            sse = 0
            for w, f, cur_f in zip(ws, fs, current_fs):
                for b1, b2 in dinucs:
                    delta = f[b1, b2] - cur_f[b1,b2]
                    sse += delta**2
                    w[b1, b2] += eta*(delta) #* exp(-iteration/float(decay_timescale))
            #sses[iteration/take_stock] = sse
            sse_per_col_pair = sse/choose(L,2)
            print iteration, stock_counter, sse_per_col_pair, exp(-iteration/float(decay_timescale)), ws[0]['A','A']
            stock_counter += random.randrange(2)
            #print "motif_ic:", motif_ic(chain[iteration-stock_counter : iteration])
            if iteration > 0 and sse_per_col_pair < stop_crit:
                print "breaking:", sse, sse_per_col_pair
                break
            log_y = score(ws, x) # recalculate this because weights change
            #stock_counter += take_stock * (iteration > take_stock)
        iteration += 1
    return ws
def sample_site_bf(matrix, mu, Ne, ringer_site=None, verbose=False):
    """Sample site of length L from stationary fitness distribution under
    E(s) at effective population Ne, chemical potential mu.  (bf for
    brute force)
    """
    nu = Ne - 1
    L = len(matrix)
    if ringer_site is None:
        ringer_site = ringer_motif(matrix,1)[0]
    def phat(s):
        ep = score_seq(matrix,s)
        return (1 + exp(ep - mu))**(-nu)
    phat_max = phat(ringer_site)
    trials = 0
    while True:
        trials += 1
        site = random_site(L)
        ar = phat(site)/phat_max
        if random.random() < ar:
            if verbose:
                print trials, ar
            return site
Esempio n. 29
0
def mr_system(alphas,init_system=None,G=100000.0,n=16,L=10,
              sse_epsilon=0.00000001,use_annealing=True,scale=1000,
              iterations=10000,motif_prob=0.5,verbose=False):
    proposal = lambda matrix,motif:propose(matrix,motif,motif_prob=motif_prob)
    if init_system is None:
        matrix = [[0,0,0,0] for i in range(L)]
        motif = [random_site(L) for i in range(n)]
    else:
        matrix,motif = init_system
    if use_annealing:
        scaled_sse = lambda(matrix,motif):((sse(matrix,motif,alphas,G,n))*scale)
        return anneal(scaled_sse,
                      lambda(matrix,motif):proposal(matrix,motif),
                      (matrix,motif),
                      iterations=iterations,
                      stopping_crit = sse_epsilon*scale,verbose=verbose)
    else:
        scaled_sse = lambda(matrix,motif):exp((sse(matrix,motif,alphas,G,n))*-scale)
        return mh(scaled_sse,
                  lambda(matrix,motif):proposal(matrix,motif),
                  (matrix,motif),
                  iterations=iterations,
                  every=100,verbose=True)
Esempio n. 30
0
def sample_species2():
    bd = [random.choice(aas) for i in range(L)]
    site = random_site(L)
    sites = [site for i in range(n)]
    return (bd, sites)
 def sample_apw_site():
     return mh(apw_phat, proposal=mutate_site, x0=random_site(L))
Esempio n. 32
0
 def sample_apw_site():
     return mh(apw_phat, proposal=mutate_site, x0=random_site(L))
Esempio n. 33
0
def log_Z_analytic((matrix, mu, Ne), N):
    """compute log_Z analytically"""
    acc = 0
    nu = Ne - 1
    L = len(matrix)
    for kmer in kmers(L):
        ep = score_seq(matrix, "".join(kmer))
        acc += (1/(1+exp(ep-mu)))**(Ne-1)
    return N * log(acc)

def log_ZS_naive((matrix, mu, Ne), trials=1000):
    acc = 0
    nu = Ne - 1
    L = len(matrix)
    for i in xrange(trials):
        ep = score_seq(matrix, random_site(L))
        acc += (1/(1+exp(ep-mu)))**(Ne-1)
    mean_Zs = acc / trials
    return L * log(4) + log(mean_Zs)

def log_ZM_naive((matrix, mu, Ne), N, trials=1000):
    return N * log_ZS_naive((matrix, mu, Ne), trials=1000)
    
def log_ZS_hack((matrix, mu, Ne), N):
    L = len(matrix)
    mat_mu = sum(map(mean,matrix))
    mat_sigma = sqrt(sum(map(lambda xs:variance(xs,correct=False), matrix)))
    log_perc_below_threshold = norm.logcdf(mu - log((Ne-1)), mat_mu, mat_sigma)
    log_Zs = L * log(4) + log_perc_below_threshold
    return log_Zs
Esempio n. 34
0
from sample import direct_sampling,rsa
from project_utils import score_seq,sample_average,inverse_cdf_sampler,falling_fac
from utils import random_site,pairs,mh,maybesave,transpose,product
from matplotlib import pyplot as plt
from math import log,exp
import random
random.seed(1)

genome = "ACGTTGCA" * 5 + random_site(80) + "ACGTTGCA" * 5 + random_site(10)
G = len(genome)
beta = 1
energy_matrix = [[-2,0,0,0],
                 [-0,-2,0,0],
                 [-0,0,-2,0],
                 [-0,0,0,-2],
                 [-0,0,0,-2],
                 [-0,0,-2,0],
                 [-0,-2,0,0],
                 [-2,0,0,0]]

w = len(energy_matrix)
config_len = G-w
interaction_energy = -8 # TFs in contact get -2 added to configuration energy
exclusion_energy = 1000000
eps =[score_seq(energy_matrix,genome[i:i+w]) for i in range(G-w+1)]
ks = [exp(-ep) for ep in eps]
    
def positions(config):
    return [i for i,x in enumerate(config) if x > 0]

def from_positions(poses):
def sample_Zb_terms(L,sigma,trials=10000):
    matrix = sample_matrix(L,sigma)
    return [score_seq(matrix,random_site(L)) for i in xrange(trials)]
Esempio n. 36
0
    """compute log_Z analytically"""
    acc = 0
    nu = Ne - 1
    L = len(matrix)
    for kmer in kmers(L):
        ep = score_seq(matrix, "".join(kmer))
        acc += (1 / (1 + exp(ep - mu)))**(Ne - 1)
    return N * log(acc)


def log_ZS_naive((matrix, mu, Ne), trials=1000):
    acc = 0
    nu = Ne - 1
    L = len(matrix)
    for i in xrange(trials):
        ep = score_seq(matrix, random_site(L))
        acc += (1 / (1 + exp(ep - mu)))**(Ne - 1)
    mean_Zs = acc / trials
    return L * log(4) + log(mean_Zs)


def log_ZM_naive((matrix, mu, Ne), N, trials=1000):
    return N * log_ZS_naive((matrix, mu, Ne), trials=1000)


def log_ZS_hack((matrix, mu, Ne), N):
    L = len(matrix)
    mat_mu = sum(map(mean, matrix))
    mat_sigma = sqrt(sum(map(lambda xs: variance(xs, correct=False), matrix)))
    log_perc_below_threshold = norm.logcdf(mu - log((Ne - 1)), mat_mu,
                                           mat_sigma)
Esempio n. 37
0
def mh_motif(n,w,desired_ic,epsilon,scale=10,iterations=10000):
    """Find a motif satisfying desired_ic +/- epsilon by mh sampling"""
    motif = [random_site(w) for i in range(n) ]
    f = lambda m:exp(-abs(desired_ic-motif_ic(m))*scale)
    proposal = mutate_motif
    return mh(f,proposal,motif,iterations=iterations)
Esempio n. 38
0
def sample_sites(n=n,L=L):
    return [random_site(L) for i in range(n)]
def Zb_from_matrix_ref(matrix,G):
    L = len(matrix)
    eps = np.array([score_seq(matrix,random_site(L)) for i in trange(G)])
    return np.sum(np.exp(-eps))
Esempio n. 40
0
File: data.py Progetto: poneill/amic
"""
This file contains data for the genome (GENOME) and DNA binding domain (TRUE_ENERGY_MATRIX)
"""
from utils import random_site

W = 10 # width of DNA binding domain

TRUE_ENERGY_MATRIX = ([[-2,0,0,0] for i in range(W)]) 
with open('genome.fa') as f:
    lines = f.readlines()
#GENOME = lines[1]
GENOME = random_site(5000000)
L = len(GENOME)
MEAN_FRAG_LENGTH = 250

# Mon May 12 19:20:38 EDT 2014
# toy genome of 10k bases, MEAN_FRAG_LENGTH = 50 works perfectly