def sample_motif_ar_tilted(matrix, mu, Ne, N): nu = Ne - 1 L = len(matrix) ep_min, ep_max, L = sum(map(min,matrix)), sum(map(max,matrix)), len(matrix) site_sigma = site_sigma_from_matrix(matrix) density = lambda ep:(1/(1+exp(ep-mu)))**(Ne-1) * dnorm(ep,0,site_sigma)*(ep_min <= ep <= ep_max) d_density = lambda ep:ep/site_sigma**2 + nu/(1+exp(mu-ep)) phat = lambda ep:(1/(1+exp(ep-mu)))**(Ne-1) mode = bisect_interval(d_density, -100, 100) if mode < ep_min: mode = ep_min + 1 # don't want mode right on the nose of ep_min for sampling purposes, so offset it a bit dmode = density(mode) # calculate mean epsilon via rejection sampling motif = [] def mean_ep(lamb): psfm = psfm_from_matrix(matrix, lamb=lamb) return sum([ep * p for (mat_row, psfm_row) in zip(matrix, psfm) for (ep, p) in zip(mat_row, psfm_row)]) lamb = bisect_interval(lambda l:mean_ep(l) - mode, -20, 20) tilted_psfm = psfm_from_matrix(matrix, lamb=lamb) log_tilted_psfm = [map(log,row) for row in tilted_psfm] while len(motif) < N: site = random_site(L) ep = score_seq(matrix, site) if random.random() < phat(ep)/pmode: motif.append(site) return motif
def sample_motif_ar_tilted(matrix, mu, Ne, N): nu = Ne - 1 L = len(matrix) ep_min, ep_max, L = sum(map(min, matrix)), sum(map(max, matrix)), len(matrix) site_sigma = site_sigma_from_matrix(matrix) density = lambda ep: (1 / (1 + exp(ep - mu)))**(Ne - 1) * dnorm( ep, 0, site_sigma) * (ep_min <= ep <= ep_max) d_density = lambda ep: ep / site_sigma**2 + nu / (1 + exp(mu - ep)) phat = lambda ep: (1 / (1 + exp(ep - mu)))**(Ne - 1) mode = bisect_interval(d_density, -100, 100) if mode < ep_min: mode = ep_min + 1 # don't want mode right on the nose of ep_min for sampling purposes, so offset it a bit dmode = density(mode) # calculate mean epsilon via rejection sampling motif = [] def mean_ep(lamb): psfm = psfm_from_matrix(matrix, lamb=lamb) return sum([ ep * p for (mat_row, psfm_row) in zip(matrix, psfm) for (ep, p) in zip(mat_row, psfm_row) ]) lamb = bisect_interval(lambda l: mean_ep(l) - mode, -20, 20) tilted_psfm = psfm_from_matrix(matrix, lamb=lamb) log_tilted_psfm = [map(log, row) for row in tilted_psfm] while len(motif) < N: site = random_site(L) ep = score_seq(matrix, site) if random.random() < phat(ep) / pmode: motif.append(site) return motif
def mean_ep(lamb): try: psfm = psfm_from_matrix(matrix, lamb=lamb) return sum([ep * p for (mat_row, psfm_row) in zip(matrix, psfm) for (ep, p) in zip(mat_row, psfm_row)]) except: print matrix, lamb raise Exception
def rejection_sample_site((matrix, mu, Ne)): psfm = psfm_from_matrix(matrix) log_psfm = [[log(p) for p in row] for row in psfm] log_psfm_prob = lambda site:score_seq(log_psfm, site) log_M = -sum(map(max,psfm)) sites = [sample_from_psfm(psfm) for _ in xrange(trials)] log_fs = [log_fhat((matrix, mu, Ne), [site]) for site in sites] log_qs = [log_psfm_prob(site) for site in sites] ars = [exp(log_f - (log_q + log_M)) for log_f, log_q in zip(log_fs, log_qs)]
def mean_ep(lamb): try: psfm = psfm_from_matrix(matrix, lamb=lamb) return sum([ ep * p for (mat_row, psfm_row) in zip(matrix, psfm) for (ep, p) in zip(mat_row, psfm_row) ]) except: print matrix, lamb raise Exception
def rejection_sample_site((matrix, mu, Ne)): psfm = psfm_from_matrix(matrix) log_psfm = [[log(p) for p in row] for row in psfm] log_psfm_prob = lambda site: score_seq(log_psfm, site) log_M = -sum(map(max, psfm)) sites = [sample_from_psfm(psfm) for _ in xrange(trials)] log_fs = [log_fhat((matrix, mu, Ne), [site]) for site in sites] log_qs = [log_psfm_prob(site) for site in sites] ars = [ exp(log_f - (log_q + log_M)) for log_f, log_q in zip(log_fs, log_qs) ]
def sample_site_imh(matrix, mu, Ne, lamb, iterations=None): nu = Ne - 1 L = len(matrix) if iterations is None: iterations = 10*L log_phat = lambda site:-nu*log(1+exp(score_seq(matrix,site)-mu)) tilted_psfm = psfm_from_matrix(matrix, lamb=lamb) log_tilted_psfm = [map(log,row) for row in tilted_psfm] def prop(_): return sample_from_psfm(tilted_psfm) def log_dprop(xp, _): return score_seq(log_tilted_psfm, xp) return mh(log_phat, proposal=prop, dprop=log_dprop, x0=prop(None), use_log=True)[-1]
def predict_ic(matrix, mu, Ne, N=100): nu = Ne - 1 ep_min, ep_max, L = sum(map(min, matrix)), sum(map(max, matrix)), len(matrix) site_sigma = site_sigma_from_matrix(matrix) density = lambda ep: (1 / (1 + exp(ep - mu)))**(Ne - 1) * dnorm( ep, 0, site_sigma) * (ep_min <= ep <= ep_max) d_density = lambda ep: ep / site_sigma**2 + nu / (1 + exp(mu - ep)) mode = bisect_interval(d_density, -100, 100) if mode < ep_min: mode = ep_min dmode = density(mode) # calculate mean epsilon via rejection sampling eps = [] while len(eps) < N: ep = random.random() * (ep_max - ep_min) + ep_min if random.random() < density(ep) / dmode: eps.append(ep) #return eps des_mean_ep = mean(eps) des_mean_ep_analytic = integrate.quad(lambda ep: ep * density(ep), ep_min, ep_max) # print "des_means:", des_mean_ep, des_mean_ep_analytic # print "min ep: %s max_ep: %s des_mean_ep: %s" % (ep_min, ep_max, des_mean_ep) def mean_ep(lamb): try: psfm = psfm_from_matrix(matrix, lamb=lamb) return sum([ ep * p for (mat_row, psfm_row) in zip(matrix, psfm) for (ep, p) in zip(mat_row, psfm_row) ]) except: print matrix, lamb raise Exception try: lamb = bisect_interval(lambda l: mean_ep(l) - des_mean_ep, -20, 20) except: print matrix, mu, Ne raise Exception tilted_psfm = psfm_from_matrix(matrix, lamb) return sum([2 - h(col) for col in tilted_psfm])
def sample_site_imh(matrix, mu, Ne, lamb, iterations=None): nu = Ne - 1 L = len(matrix) if iterations is None: iterations = 10 * L log_phat = lambda site: -nu * log(1 + exp(score_seq(matrix, site) - mu)) tilted_psfm = psfm_from_matrix(matrix, lamb=lamb) log_tilted_psfm = [map(log, row) for row in tilted_psfm] def prop(_): return sample_from_psfm(tilted_psfm) def log_dprop(xp, _): return score_seq(log_tilted_psfm, xp) return mh(log_phat, proposal=prop, dprop=log_dprop, x0=prop(None), use_log=True)[-1]
def predict_ic(matrix, mu, Ne, N=100): nu = Ne - 1 ep_min, ep_max, L = sum(map(min,matrix)), sum(map(max,matrix)), len(matrix) site_sigma = site_sigma_from_matrix(matrix) density = lambda ep:(1/(1+exp(ep-mu)))**(Ne-1) * dnorm(ep,0,site_sigma)*(ep_min <= ep <= ep_max) d_density = lambda ep:ep/site_sigma**2 + nu/(1+exp(mu-ep)) mode = bisect_interval(d_density, -100, 100) if mode < ep_min: mode = ep_min dmode = density(mode) # calculate mean epsilon via rejection sampling eps = [] while len(eps) < N: ep = random.random() * (ep_max - ep_min) + ep_min if random.random() < density(ep)/dmode: eps.append(ep) #return eps des_mean_ep = mean(eps) des_mean_ep_analytic = integrate.quad(lambda ep:ep*density(ep), ep_min, ep_max) # print "des_means:", des_mean_ep, des_mean_ep_analytic # print "min ep: %s max_ep: %s des_mean_ep: %s" % (ep_min, ep_max, des_mean_ep) def mean_ep(lamb): try: psfm = psfm_from_matrix(matrix, lamb=lamb) return sum([ep * p for (mat_row, psfm_row) in zip(matrix, psfm) for (ep, p) in zip(mat_row, psfm_row)]) except: print matrix, lamb raise Exception try: lamb = bisect_interval(lambda l:mean_ep(l) - des_mean_ep, -20, 20) except: print matrix, mu, Ne raise Exception tilted_psfm = psfm_from_matrix(matrix, lamb) return sum([2 - h(col) for col in tilted_psfm])
def mean_ep(lamb): psfm = psfm_from_matrix(matrix, lamb=lamb) return sum([ ep * p for (mat_row, psfm_row) in zip(matrix, psfm) for (ep, p) in zip(mat_row, psfm_row) ])
def f(lamb): psfm = psfm_from_matrix(matrix, lamb) return sum([ sum(ep * p for ep, p in zip(eps, ps)) for eps, ps in zip(matrix, psfm) ]) - des_ep
def log_Z_hack((matrix, mu, Ne), N): L = len(matrix) mat_mu = sum(map(mean, matrix)) mat_sigma = sqrt(sum(map(lambda xs: variance(xs, correct=False), matrix))) log_perc_below_threshold = norm.logcdf(mu - log((Ne - 1)), mat_mu, mat_sigma) log_Zs = L * log(4) + log_perc_below_threshold ans_ref = ((N * L * log(4)) + log_perc_below_threshold) ans = N * log_Zs return ans def log_ZS_importance((matrix, mu, Ne), trials=1000): L = len(matrix) psfm = psfm_from_matrix(matrix) log_psfm = [[log(p) for p in row] for row in psfm] log_psfm_prob = lambda site: score_seq(log_psfm, site) sites = [sample_from_psfm(psfm) for _ in xrange(trials)] mean_ZS = mean( exp( log_fhat((matrix, mu, Ne), [site]) + log(1.0 / 4**L) - log_psfm_prob(site)) for site in sites) ZS = 4**L * mean_ZS return log(ZS) def log_ZS_importance_ref((matrix, mu, Ne), trials=1000): L = len(matrix) psfm = [[0.25] * 4 for _ in range(L)] log_psfm = [[log(p) for p in row] for row in psfm]
def mean_ep(lamb): psfm = psfm_from_matrix(matrix, lamb=lamb) return sum([ep * p for (mat_row, psfm_row) in zip(matrix, psfm) for (ep, p) in zip(mat_row, psfm_row)])
def f(lamb): psfm = psfm_from_matrix(matrix, lamb) return sum([sum(ep*p for ep,p in zip(eps, ps)) for eps, ps in zip(matrix, psfm)]) - des_ep
log_ZS = log_ZS_hack((matrix, mu, Ne), N) return N * log_ZS def log_Z_hack((matrix, mu, Ne), N): L = len(matrix) mat_mu = sum(map(mean,matrix)) mat_sigma = sqrt(sum(map(lambda xs:variance(xs,correct=False), matrix))) log_perc_below_threshold = norm.logcdf(mu - log((Ne-1)), mat_mu, mat_sigma) log_Zs = L * log(4) + log_perc_below_threshold ans_ref = ((N*L * log(4)) + log_perc_below_threshold) ans = N * log_Zs return ans def log_ZS_importance((matrix, mu, Ne), trials=1000): L = len(matrix) psfm = psfm_from_matrix(matrix) log_psfm = [[log(p) for p in row] for row in psfm] log_psfm_prob = lambda site:score_seq(log_psfm, site) sites = [sample_from_psfm(psfm) for _ in xrange(trials)] mean_ZS = mean(exp(log_fhat((matrix, mu, Ne), [site]) + log(1.0/4**L) - log_psfm_prob(site)) for site in sites) ZS = 4**L * mean_ZS return log(ZS) def log_ZS_importance_ref((matrix, mu, Ne), trials=1000): L = len(matrix) psfm = [[0.25]*4 for _ in range(L)] log_psfm = [[log(p) for p in row] for row in psfm] log_psfm_prob = lambda site:score_seq(log_psfm, site) sites = [sample_from_psfm(psfm) for _ in xrange(trials)] mean_ZS = mean(exp(log_fhat((matrix, mu, Ne), [site]) + log(1.0/4**L) - log_psfm_prob(site))