コード例 #1
0
def sample_motif_ar_tilted(matrix, mu, Ne, N):
    nu = Ne - 1
    L = len(matrix)
    ep_min, ep_max, L = sum(map(min,matrix)), sum(map(max,matrix)), len(matrix)
    site_sigma = site_sigma_from_matrix(matrix)
    density = lambda ep:(1/(1+exp(ep-mu)))**(Ne-1) * dnorm(ep,0,site_sigma)*(ep_min <= ep <= ep_max)
    d_density = lambda ep:ep/site_sigma**2 + nu/(1+exp(mu-ep))
    phat = lambda ep:(1/(1+exp(ep-mu)))**(Ne-1)
    mode = bisect_interval(d_density, -100, 100)
    if mode < ep_min:
        mode = ep_min + 1 # don't want mode right on the nose of ep_min for sampling purposes, so offset it a bit
    dmode = density(mode)
    # calculate mean epsilon via rejection sampling
    motif = []
    def mean_ep(lamb):
        psfm = psfm_from_matrix(matrix, lamb=lamb)
        return sum([ep * p for (mat_row, psfm_row) in zip(matrix, psfm)
                    for (ep, p) in zip(mat_row, psfm_row)])
    lamb = bisect_interval(lambda l:mean_ep(l) - mode, -20, 20)
    tilted_psfm = psfm_from_matrix(matrix, lamb=lamb)
    log_tilted_psfm = [map(log,row) for row in tilted_psfm]
    while len(motif) < N:
        site = random_site(L)
        ep = score_seq(matrix, site)
        if random.random() < phat(ep)/pmode:
            motif.append(site)    
    return motif
コード例 #2
0
def sample_motif_ar_tilted(matrix, mu, Ne, N):
    nu = Ne - 1
    L = len(matrix)
    ep_min, ep_max, L = sum(map(min, matrix)), sum(map(max,
                                                       matrix)), len(matrix)
    site_sigma = site_sigma_from_matrix(matrix)
    density = lambda ep: (1 / (1 + exp(ep - mu)))**(Ne - 1) * dnorm(
        ep, 0, site_sigma) * (ep_min <= ep <= ep_max)
    d_density = lambda ep: ep / site_sigma**2 + nu / (1 + exp(mu - ep))
    phat = lambda ep: (1 / (1 + exp(ep - mu)))**(Ne - 1)
    mode = bisect_interval(d_density, -100, 100)
    if mode < ep_min:
        mode = ep_min + 1  # don't want mode right on the nose of ep_min for sampling purposes, so offset it a bit
    dmode = density(mode)
    # calculate mean epsilon via rejection sampling
    motif = []

    def mean_ep(lamb):
        psfm = psfm_from_matrix(matrix, lamb=lamb)
        return sum([
            ep * p for (mat_row, psfm_row) in zip(matrix, psfm)
            for (ep, p) in zip(mat_row, psfm_row)
        ])

    lamb = bisect_interval(lambda l: mean_ep(l) - mode, -20, 20)
    tilted_psfm = psfm_from_matrix(matrix, lamb=lamb)
    log_tilted_psfm = [map(log, row) for row in tilted_psfm]
    while len(motif) < N:
        site = random_site(L)
        ep = score_seq(matrix, site)
        if random.random() < phat(ep) / pmode:
            motif.append(site)
    return motif
コード例 #3
0
 def mean_ep(lamb):
     try:
         psfm = psfm_from_matrix(matrix, lamb=lamb)
         return sum([ep * p for (mat_row, psfm_row) in zip(matrix, psfm)
                     for (ep, p) in zip(mat_row, psfm_row)])
     except:
         print matrix, lamb
         raise Exception
コード例 #4
0
def rejection_sample_site((matrix, mu, Ne)):
    psfm = psfm_from_matrix(matrix)
    log_psfm = [[log(p) for p in row] for row in psfm]
    log_psfm_prob = lambda site:score_seq(log_psfm, site)
    log_M = -sum(map(max,psfm))
    sites = [sample_from_psfm(psfm) for _ in xrange(trials)]
    log_fs = [log_fhat((matrix, mu, Ne), [site]) for site in sites]
    log_qs = [log_psfm_prob(site) for site in sites]
    ars = [exp(log_f - (log_q + log_M)) for log_f, log_q in zip(log_fs, log_qs)]
コード例 #5
0
 def mean_ep(lamb):
     try:
         psfm = psfm_from_matrix(matrix, lamb=lamb)
         return sum([
             ep * p for (mat_row, psfm_row) in zip(matrix, psfm)
             for (ep, p) in zip(mat_row, psfm_row)
         ])
     except:
         print matrix, lamb
         raise Exception
コード例 #6
0
def rejection_sample_site((matrix, mu, Ne)):
    psfm = psfm_from_matrix(matrix)
    log_psfm = [[log(p) for p in row] for row in psfm]
    log_psfm_prob = lambda site: score_seq(log_psfm, site)
    log_M = -sum(map(max, psfm))
    sites = [sample_from_psfm(psfm) for _ in xrange(trials)]
    log_fs = [log_fhat((matrix, mu, Ne), [site]) for site in sites]
    log_qs = [log_psfm_prob(site) for site in sites]
    ars = [
        exp(log_f - (log_q + log_M)) for log_f, log_q in zip(log_fs, log_qs)
    ]
コード例 #7
0
def sample_site_imh(matrix, mu, Ne, lamb, iterations=None):
    nu = Ne - 1
    L = len(matrix)
    if iterations is None:
        iterations = 10*L
    log_phat = lambda site:-nu*log(1+exp(score_seq(matrix,site)-mu))
    tilted_psfm = psfm_from_matrix(matrix, lamb=lamb)
    log_tilted_psfm = [map(log,row) for row in tilted_psfm]
    def prop(_):
        return sample_from_psfm(tilted_psfm)
    def log_dprop(xp, _):
        return score_seq(log_tilted_psfm, xp)
    return mh(log_phat, proposal=prop, dprop=log_dprop, x0=prop(None), use_log=True)[-1]
コード例 #8
0
def predict_ic(matrix, mu, Ne, N=100):
    nu = Ne - 1
    ep_min, ep_max, L = sum(map(min, matrix)), sum(map(max,
                                                       matrix)), len(matrix)
    site_sigma = site_sigma_from_matrix(matrix)
    density = lambda ep: (1 / (1 + exp(ep - mu)))**(Ne - 1) * dnorm(
        ep, 0, site_sigma) * (ep_min <= ep <= ep_max)
    d_density = lambda ep: ep / site_sigma**2 + nu / (1 + exp(mu - ep))
    mode = bisect_interval(d_density, -100, 100)
    if mode < ep_min:
        mode = ep_min
    dmode = density(mode)
    # calculate mean epsilon via rejection sampling
    eps = []
    while len(eps) < N:
        ep = random.random() * (ep_max - ep_min) + ep_min
        if random.random() < density(ep) / dmode:
            eps.append(ep)
    #return eps
    des_mean_ep = mean(eps)
    des_mean_ep_analytic = integrate.quad(lambda ep: ep * density(ep), ep_min,
                                          ep_max)

    # print "des_means:", des_mean_ep, des_mean_ep_analytic
    # print "min ep: %s max_ep: %s des_mean_ep: %s" % (ep_min, ep_max, des_mean_ep)
    def mean_ep(lamb):
        try:
            psfm = psfm_from_matrix(matrix, lamb=lamb)
            return sum([
                ep * p for (mat_row, psfm_row) in zip(matrix, psfm)
                for (ep, p) in zip(mat_row, psfm_row)
            ])
        except:
            print matrix, lamb
            raise Exception

    try:
        lamb = bisect_interval(lambda l: mean_ep(l) - des_mean_ep, -20, 20)
    except:
        print matrix, mu, Ne
        raise Exception
    tilted_psfm = psfm_from_matrix(matrix, lamb)
    return sum([2 - h(col) for col in tilted_psfm])
コード例 #9
0
def sample_site_imh(matrix, mu, Ne, lamb, iterations=None):
    nu = Ne - 1
    L = len(matrix)
    if iterations is None:
        iterations = 10 * L
    log_phat = lambda site: -nu * log(1 + exp(score_seq(matrix, site) - mu))
    tilted_psfm = psfm_from_matrix(matrix, lamb=lamb)
    log_tilted_psfm = [map(log, row) for row in tilted_psfm]

    def prop(_):
        return sample_from_psfm(tilted_psfm)

    def log_dprop(xp, _):
        return score_seq(log_tilted_psfm, xp)

    return mh(log_phat,
              proposal=prop,
              dprop=log_dprop,
              x0=prop(None),
              use_log=True)[-1]
コード例 #10
0
def predict_ic(matrix, mu, Ne, N=100):
    nu = Ne - 1
    ep_min, ep_max, L = sum(map(min,matrix)), sum(map(max,matrix)), len(matrix)
    site_sigma = site_sigma_from_matrix(matrix)
    density = lambda ep:(1/(1+exp(ep-mu)))**(Ne-1) * dnorm(ep,0,site_sigma)*(ep_min <= ep <= ep_max)
    d_density = lambda ep:ep/site_sigma**2 + nu/(1+exp(mu-ep))
    mode = bisect_interval(d_density, -100, 100)
    if mode < ep_min:
        mode = ep_min
    dmode = density(mode)
    # calculate mean epsilon via rejection sampling
    eps = []
    while len(eps) < N:
        ep = random.random() * (ep_max - ep_min) + ep_min
        if random.random() < density(ep)/dmode:
            eps.append(ep)
    #return eps
    des_mean_ep = mean(eps)
    des_mean_ep_analytic = integrate.quad(lambda ep:ep*density(ep), ep_min, ep_max)
    # print "des_means:", des_mean_ep, des_mean_ep_analytic
    # print "min ep: %s max_ep: %s des_mean_ep: %s" % (ep_min, ep_max, des_mean_ep)
    def mean_ep(lamb):
        try:
            psfm = psfm_from_matrix(matrix, lamb=lamb)
            return sum([ep * p for (mat_row, psfm_row) in zip(matrix, psfm)
                        for (ep, p) in zip(mat_row, psfm_row)])
        except:
            print matrix, lamb
            raise Exception
    try:
        lamb = bisect_interval(lambda l:mean_ep(l) - des_mean_ep, -20, 20)
    except:
        print matrix, mu, Ne
        raise Exception
    tilted_psfm = psfm_from_matrix(matrix, lamb)
    return sum([2 - h(col) for col in tilted_psfm])
コード例 #11
0
 def mean_ep(lamb):
     psfm = psfm_from_matrix(matrix, lamb=lamb)
     return sum([
         ep * p for (mat_row, psfm_row) in zip(matrix, psfm)
         for (ep, p) in zip(mat_row, psfm_row)
     ])
コード例 #12
0
 def f(lamb):
     psfm = psfm_from_matrix(matrix, lamb)
     return sum([
         sum(ep * p for ep, p in zip(eps, ps))
         for eps, ps in zip(matrix, psfm)
     ]) - des_ep
コード例 #13
0
def log_Z_hack((matrix, mu, Ne), N):
    L = len(matrix)
    mat_mu = sum(map(mean, matrix))
    mat_sigma = sqrt(sum(map(lambda xs: variance(xs, correct=False), matrix)))
    log_perc_below_threshold = norm.logcdf(mu - log((Ne - 1)), mat_mu,
                                           mat_sigma)
    log_Zs = L * log(4) + log_perc_below_threshold
    ans_ref = ((N * L * log(4)) + log_perc_below_threshold)
    ans = N * log_Zs
    return ans


def log_ZS_importance((matrix, mu, Ne), trials=1000):
    L = len(matrix)
    psfm = psfm_from_matrix(matrix)
    log_psfm = [[log(p) for p in row] for row in psfm]
    log_psfm_prob = lambda site: score_seq(log_psfm, site)
    sites = [sample_from_psfm(psfm) for _ in xrange(trials)]
    mean_ZS = mean(
        exp(
            log_fhat((matrix, mu, Ne), [site]) + log(1.0 / 4**L) -
            log_psfm_prob(site)) for site in sites)
    ZS = 4**L * mean_ZS
    return log(ZS)


def log_ZS_importance_ref((matrix, mu, Ne), trials=1000):
    L = len(matrix)
    psfm = [[0.25] * 4 for _ in range(L)]
    log_psfm = [[log(p) for p in row] for row in psfm]
コード例 #14
0
 def mean_ep(lamb):
     psfm = psfm_from_matrix(matrix, lamb=lamb)
     return sum([ep * p for (mat_row, psfm_row) in zip(matrix, psfm)
                 for (ep, p) in zip(mat_row, psfm_row)])
コード例 #15
0
 def f(lamb):
     psfm = psfm_from_matrix(matrix, lamb)
     return sum([sum(ep*p for ep,p in zip(eps, ps)) for eps, ps in zip(matrix, psfm)]) - des_ep
コード例 #16
0
    log_ZS = log_ZS_hack((matrix, mu, Ne), N)
    return N * log_ZS

def log_Z_hack((matrix, mu, Ne), N):
    L = len(matrix)
    mat_mu = sum(map(mean,matrix))
    mat_sigma = sqrt(sum(map(lambda xs:variance(xs,correct=False), matrix)))
    log_perc_below_threshold = norm.logcdf(mu - log((Ne-1)), mat_mu, mat_sigma)
    log_Zs = L * log(4) + log_perc_below_threshold
    ans_ref = ((N*L * log(4)) +  log_perc_below_threshold)
    ans = N * log_Zs
    return ans

def log_ZS_importance((matrix, mu, Ne), trials=1000):
    L = len(matrix)
    psfm = psfm_from_matrix(matrix)
    log_psfm = [[log(p) for p in row] for row in psfm]
    log_psfm_prob = lambda site:score_seq(log_psfm, site)
    sites = [sample_from_psfm(psfm) for _ in xrange(trials)]
    mean_ZS = mean(exp(log_fhat((matrix, mu, Ne), [site]) + log(1.0/4**L) - log_psfm_prob(site))
                  for site in sites)
    ZS = 4**L * mean_ZS
    return log(ZS)

def log_ZS_importance_ref((matrix, mu, Ne), trials=1000):
    L = len(matrix)
    psfm = [[0.25]*4 for _ in range(L)]
    log_psfm = [[log(p) for p in row] for row in psfm]
    log_psfm_prob = lambda site:score_seq(log_psfm, site)
    sites = [sample_from_psfm(psfm) for _ in xrange(trials)]
    mean_ZS = mean(exp(log_fhat((matrix, mu, Ne), [site]) + log(1.0/4**L) - log_psfm_prob(site))