Ejemplo n.º 1
0
def cluster(xs):
    """given scalars xs, perform 2-component Gaussian clustering via EM"""
    mu0 = min(xs)
    mu1 = max(xs)
    sigma0 = 1
    sigma1 = 1
    for i in range(10):
        probs0 = [dnorm(x, mu0, sigma0) for x in xs]
        probs1 = [dnorm(x, mu1, sigma1) for x in xs]
        assignments = [
            int(prob1 > prob0) for prob0, prob1 in zip(probs0, probs1)
        ]
        xs0 = [x for (x, a) in zip(xs, assignments) if a == 0]
        xs1 = [x for (x, a) in zip(xs, assignments) if a == 1]
        mu0, sigma0 = mean(xs0), sd(xs0, correct=False)
        mu1, sigma1 = mean(xs1), sd(xs1, correct=False)
        if sigma0 == 0:
            sigma0 = sigma1
        if sigma1 == 0:
            sigma1 = sigma0
        print "mu0: {} sigma0: {} mu1: {}: sigma1: {} xs0: {} xs1: {}".format(
            mu0, sigma0, mu1, sigma1, len(xs0), len(xs1))

    def f(x):
        return dnorm(x, mu1,
                     sigma1) / (dnorm(x, mu0, sigma0) + dnorm(x, mu1, sigma1))

    return f
 def marginal(i, j):
     red_matrix = [row for jp, row in enumerate(matrix) if not j == jp]
     red_site_mu = site_mu_from_matrix(red_matrix)
     red_site_sigma = site_sigma_from_matrix(red_matrix)
     ep = matrix[i][j]
     nom = integrate.quad(lambda ep_rest:f(ep + ep_rest)*dnorm(ep_rest, red_site_mu, red_site_sigma),
                          ep_min, ep_max)
     denom = integrate.quad(lambda ep_rest:f(ep_rest)*dnorm(ep_rest, site_mu, site_sigma), ep_min, ep_max)
def test_dphidsigma():
    x = random.random()
    mu = random.random()
    sigma = random.random()
    pred = dphidsigma(x, mu, sigma)
    obs = diff(lambda sigma: dnorm(x, mu, sigma), sigma, 10**-10)
    return pred, obs
Ejemplo n.º 4
0
def sample_motif_ar_tilted(matrix, mu, Ne, N):
    nu = Ne - 1
    L = len(matrix)
    ep_min, ep_max, L = sum(map(min,matrix)), sum(map(max,matrix)), len(matrix)
    site_sigma = site_sigma_from_matrix(matrix)
    density = lambda ep:(1/(1+exp(ep-mu)))**(Ne-1) * dnorm(ep,0,site_sigma)*(ep_min <= ep <= ep_max)
    d_density = lambda ep:ep/site_sigma**2 + nu/(1+exp(mu-ep))
    phat = lambda ep:(1/(1+exp(ep-mu)))**(Ne-1)
    mode = bisect_interval(d_density, -100, 100)
    if mode < ep_min:
        mode = ep_min + 1 # don't want mode right on the nose of ep_min for sampling purposes, so offset it a bit
    dmode = density(mode)
    # calculate mean epsilon via rejection sampling
    motif = []
    def mean_ep(lamb):
        psfm = psfm_from_matrix(matrix, lamb=lamb)
        return sum([ep * p for (mat_row, psfm_row) in zip(matrix, psfm)
                    for (ep, p) in zip(mat_row, psfm_row)])
    lamb = bisect_interval(lambda l:mean_ep(l) - mode, -20, 20)
    tilted_psfm = psfm_from_matrix(matrix, lamb=lamb)
    log_tilted_psfm = [map(log,row) for row in tilted_psfm]
    while len(motif) < N:
        site = random_site(L)
        ep = score_seq(matrix, site)
        if random.random() < phat(ep)/pmode:
            motif.append(site)    
    return motif
def test_dphidmu():
    x = random.random()
    mu = random.random()
    sigma = random.random()
    pred = dphidmu(x, mu, sigma)
    obs = diff(lambda mu: dnorm(x, mu, sigma), mu, 10**-10)
    return pred, obs
Ejemplo n.º 6
0
def sample_motif_ar_tilted(matrix, mu, Ne, N):
    nu = Ne - 1
    L = len(matrix)
    ep_min, ep_max, L = sum(map(min, matrix)), sum(map(max,
                                                       matrix)), len(matrix)
    site_sigma = site_sigma_from_matrix(matrix)
    density = lambda ep: (1 / (1 + exp(ep - mu)))**(Ne - 1) * dnorm(
        ep, 0, site_sigma) * (ep_min <= ep <= ep_max)
    d_density = lambda ep: ep / site_sigma**2 + nu / (1 + exp(mu - ep))
    phat = lambda ep: (1 / (1 + exp(ep - mu)))**(Ne - 1)
    mode = bisect_interval(d_density, -100, 100)
    if mode < ep_min:
        mode = ep_min + 1  # don't want mode right on the nose of ep_min for sampling purposes, so offset it a bit
    dmode = density(mode)
    # calculate mean epsilon via rejection sampling
    motif = []

    def mean_ep(lamb):
        psfm = psfm_from_matrix(matrix, lamb=lamb)
        return sum([
            ep * p for (mat_row, psfm_row) in zip(matrix, psfm)
            for (ep, p) in zip(mat_row, psfm_row)
        ])

    lamb = bisect_interval(lambda l: mean_ep(l) - mode, -20, 20)
    tilted_psfm = psfm_from_matrix(matrix, lamb=lamb)
    log_tilted_psfm = [map(log, row) for row in tilted_psfm]
    while len(motif) < N:
        site = random_site(L)
        ep = score_seq(matrix, site)
        if random.random() < phat(ep) / pmode:
            motif.append(site)
    return motif
Ejemplo n.º 7
0
def log_ZS_sophisticated((matrix, mu, Ne)):
    L = len(matrix)
    nu = Ne - 1
    mat_mu = sum(map(mean,matrix))
    mat_sigma = sqrt(sum(map(lambda xs:variance(xs,correct=False), matrix)))
    dfde = lambda ep: -nu*exp(ep-mu)/(1+exp(ep-mu)) - (ep-mat_mu)/mat_sigma**2
    ep_min = sum(map(min, matrix))
    ep_max = sum(map(max, matrix))
    try:
        mode = secant_interval(dfde,ep_min - 20, ep_max + 20)
    except:
        print (matrix, mu, Ne)
        raise Exception
    kappa = -nu*(exp(mu-mode)/(1+exp(mu-mode))**2) - 1/mat_sigma**2
    sigma_approx = sqrt(-1/kappa)
    integrand = lambda ep:dnorm(ep, mat_mu, mat_sigma) * (1+exp(ep-mu))**-nu
    gauss_max = dnorm(mode, mode, sigma_approx)
    integrand_max = integrand(mode)
    mean_ZS = integrand_max / gauss_max
    return L * log(4) + log(mean_ZS)
Ejemplo n.º 8
0
def log_ZS_sophisticated((matrix, mu, Ne)):
    L = len(matrix)
    nu = Ne - 1
    mat_mu = sum(map(mean, matrix))
    mat_sigma = sqrt(sum(map(lambda xs: variance(xs, correct=False), matrix)))
    dfde = lambda ep: -nu * exp(ep - mu) / (1 + exp(ep - mu)) - (
        ep - mat_mu) / mat_sigma**2
    ep_min = sum(map(min, matrix))
    ep_max = sum(map(max, matrix))
    try:
        mode = secant_interval(dfde, ep_min - 20, ep_max + 20)
    except:
        print(matrix, mu, Ne)
        raise Exception
    kappa = -nu * (exp(mu - mode) / (1 + exp(mu - mode))**2) - 1 / mat_sigma**2
    sigma_approx = sqrt(-1 / kappa)
    integrand = lambda ep: dnorm(ep, mat_mu, mat_sigma) * (1 + exp(ep - mu)
                                                           )**-nu
    gauss_max = dnorm(mode, mode, sigma_approx)
    integrand_max = integrand(mode)
    mean_ZS = integrand_max / gauss_max
    return L * log(4) + log(mean_ZS)
Ejemplo n.º 9
0
def predict_ic(matrix, mu, Ne, N=100):
    nu = Ne - 1
    ep_min, ep_max, L = sum(map(min, matrix)), sum(map(max,
                                                       matrix)), len(matrix)
    site_sigma = site_sigma_from_matrix(matrix)
    density = lambda ep: (1 / (1 + exp(ep - mu)))**(Ne - 1) * dnorm(
        ep, 0, site_sigma) * (ep_min <= ep <= ep_max)
    d_density = lambda ep: ep / site_sigma**2 + nu / (1 + exp(mu - ep))
    mode = bisect_interval(d_density, -100, 100)
    if mode < ep_min:
        mode = ep_min
    dmode = density(mode)
    # calculate mean epsilon via rejection sampling
    eps = []
    while len(eps) < N:
        ep = random.random() * (ep_max - ep_min) + ep_min
        if random.random() < density(ep) / dmode:
            eps.append(ep)
    #return eps
    des_mean_ep = mean(eps)
    des_mean_ep_analytic = integrate.quad(lambda ep: ep * density(ep), ep_min,
                                          ep_max)

    # print "des_means:", des_mean_ep, des_mean_ep_analytic
    # print "min ep: %s max_ep: %s des_mean_ep: %s" % (ep_min, ep_max, des_mean_ep)
    def mean_ep(lamb):
        try:
            psfm = psfm_from_matrix(matrix, lamb=lamb)
            return sum([
                ep * p for (mat_row, psfm_row) in zip(matrix, psfm)
                for (ep, p) in zip(mat_row, psfm_row)
            ])
        except:
            print matrix, lamb
            raise Exception

    try:
        lamb = bisect_interval(lambda l: mean_ep(l) - des_mean_ep, -20, 20)
    except:
        print matrix, mu, Ne
        raise Exception
    tilted_psfm = psfm_from_matrix(matrix, lamb)
    return sum([2 - h(col) for col in tilted_psfm])
Ejemplo n.º 10
0
def predict_ic(matrix, mu, Ne, N=100):
    nu = Ne - 1
    ep_min, ep_max, L = sum(map(min,matrix)), sum(map(max,matrix)), len(matrix)
    site_sigma = site_sigma_from_matrix(matrix)
    density = lambda ep:(1/(1+exp(ep-mu)))**(Ne-1) * dnorm(ep,0,site_sigma)*(ep_min <= ep <= ep_max)
    d_density = lambda ep:ep/site_sigma**2 + nu/(1+exp(mu-ep))
    mode = bisect_interval(d_density, -100, 100)
    if mode < ep_min:
        mode = ep_min
    dmode = density(mode)
    # calculate mean epsilon via rejection sampling
    eps = []
    while len(eps) < N:
        ep = random.random() * (ep_max - ep_min) + ep_min
        if random.random() < density(ep)/dmode:
            eps.append(ep)
    #return eps
    des_mean_ep = mean(eps)
    des_mean_ep_analytic = integrate.quad(lambda ep:ep*density(ep), ep_min, ep_max)
    # print "des_means:", des_mean_ep, des_mean_ep_analytic
    # print "min ep: %s max_ep: %s des_mean_ep: %s" % (ep_min, ep_max, des_mean_ep)
    def mean_ep(lamb):
        try:
            psfm = psfm_from_matrix(matrix, lamb=lamb)
            return sum([ep * p for (mat_row, psfm_row) in zip(matrix, psfm)
                        for (ep, p) in zip(mat_row, psfm_row)])
        except:
            print matrix, lamb
            raise Exception
    try:
        lamb = bisect_interval(lambda l:mean_ep(l) - des_mean_ep, -20, 20)
    except:
        print matrix, mu, Ne
        raise Exception
    tilted_psfm = psfm_from_matrix(matrix, lamb)
    return sum([2 - h(col) for col in tilted_psfm])
Ejemplo n.º 11
0
 def interp(xstar):
     numer = sum(y * dnorm(xstar, x, sigma) for x, y in zip(xs, ys))
     denom = sum(dnorm(xstar, x, sigma) for x in xs)
     return numer / denom
def dphidmu(x, mu, sigma):
    return (x - mu) / float(sigma**2) * dnorm(x, mu, sigma)
def dphidsigma(x, mu, sigma):
    return dnorm(x, mu, sigma) * ((x - mu)**2 / (sigma**3) - 1 / sigma**2)
Ejemplo n.º 14
0
    if random.random() < 0.5: # flip a coin and update weight matrix or mu
        altered_col = random.randrange(w) # pick a column to alter
        altered_row = random.randrange(4) # pick a row to alter
        dw = random.gauss(0,MAT_SIGMA) # add N(0,2) noise
        new_mat[altered_col][altered_row] += dw
        new_fwd_eps,new_rev_eps = update_scores_np(fwd_eps,rev_eps,altered_col,altered_row,dw,w,genome)
    else:
        new_mu += random.gauss(0,MU_SIGMA)
        new_fwd_eps,new_rev_eps = fwd_eps,rev_eps # careful about returning copy...?
    return ((new_mat,new_mu),(new_fwd_eps,new_rev_eps))

def log_dprop(((matp,mup),epsp),((mat,mu),eps)):
    dmat = sum([xp - x for (rowp,row) in zip(matp,mat) for (xp,x) in zip(rowp,row)])
    dmu = mup - mu
    if dmat != 0:
        return log(1/2.0 * dnorm(dmat,0,MAT_SIGMA))
    else:
        return log(1/2.0 * dnorm(dmu,0,MAT_SIGMA))
        #return log(dnorm(dmat,0,MAT_SIGMA)) + log(dnorm(dmu,0,MU_SIGMA))
    
def capture_state((mat_and_mu,site_scores)):
    return mat_and_mu

def complete_log_likelihood(((matrix,mu),eps),mapped_reads,num_cells=NUM_CELLS_RECOVERED):
    """Compute log likelihood of matrix, given chip seq data"""
    print "entering complete log likelihood"
    ps = np.append(fd_solve_np(eps,mu),[0]*(w-1))
    G = len(ps)
    #print "G=",G
    # if random.random() < 1:#0.01:
    #     pprint(matrix)
def dlgn(x, mu, sigma):
    """return density of 1/(1+exp(N(mu,sigma**2)))"""
    return dnorm(log(1 / x - 1), mu, sigma) * 1 / (x * (1 - x))
Ejemplo n.º 16
0
def Pe(ep, site_mu, site_sigma, mu, Ne):
    nu = Ne - 1
    Z = norm.cdf(mu - log(nu), site_mu, site_sigma)
    return 1 / Z * (1 /
                    (1 + exp(ep - mu))**nu) * dnorm(ep, site_mu, site_sigma)
Ejemplo n.º 17
0
 def f(xp):
     return mean(dnorm(xp, mu=x, sigma=sigma) for x in xs)
Ejemplo n.º 18
0
 def f(x):
     return dnorm(x, mu1,
                  sigma1) / (dnorm(x, mu0, sigma0) + dnorm(x, mu1, sigma1))
Ejemplo n.º 19
0
        ep = score_seq(matrix, site)
        ar = 1 / (M * norm.pdf(ep, mu, sigma))
        if random.random() < ar:
            return site


def log_ZS_gaussian((matrix, mu, Ne), trials=1000, integration='quad'):
    nu = Ne - 1
    L = len(matrix)
    mat_mu = sum(map(mean, matrix))
    mat_sigma = sqrt(sum(map(lambda x: variance(x, correct=False), matrix)))
    ep_min = sum(map(min, matrix))
    ep_max = sum(map(max, matrix))
    p = lambda x: norm.pdf(x, mat_mu, mat_sigma)
    f = lambda x: (1 + exp(x - mu))**-nu
    integrand = lambda ep: dnorm(ep, mat_mu, mat_sigma) * (1 + exp(ep - mu)
                                                           )**-nu
    log_integrand = lambda ep: log(dnorm(ep, mat_mu, mat_sigma)) + -nu * log(
        1 + exp(ep - mu))
    if integration == 'quad':
        try:
            mean_ZS, err = integrate.quad(integrand,
                                          ep_min,
                                          ep_max,
                                          epsabs=10**-15)
        except:
            print(matrix, mue, Ne)
            raise Exception
    elif integration == 'mc':
        mean_ZS = mean(
            f(random.gauss(mat_mu, mat_sigma)) for _ in xrange(trials))
Ejemplo n.º 20
0
 def P(ep, mu, alpha):
     return (1 / (1 + exp(ep - mu)) * exp(-alpha * mu))**nu * dnorm(
         ep, site_mu, site_sigma)
Ejemplo n.º 21
0
 def log_prior((matrix, mu, Ne)):
     log_matrix_prior = sum(
         [log(dnorm(ep, 0, 1)) for row in matrix for ep in row])
     log_mu_prior = log(dnorm(mu, 0, 10))
     log_Ne_prior = log(exp(-Ne))
     return log_matrix_prior + log_mu_prior + log_Ne_prior
Ejemplo n.º 22
0
        site = random_site(L)
        ep = score_seq(matrix, site)
        ar = 1/(M*norm.pdf(ep, mu, sigma))
        if random.random() < ar:
            return site

def log_ZS_gaussian((matrix, mu, Ne), trials=1000, integration='quad'):
    nu = Ne - 1
    L = len(matrix)
    mat_mu = sum(map(mean, matrix))
    mat_sigma = sqrt(sum(map(lambda x:variance(x,correct=False), matrix)))
    ep_min = sum(map(min, matrix))
    ep_max = sum(map(max, matrix))
    p = lambda x:norm.pdf(x, mat_mu, mat_sigma)
    f = lambda x: (1+exp(x-mu))**-nu
    integrand = lambda ep:dnorm(ep, mat_mu, mat_sigma) * (1+exp(ep-mu))**-nu
    log_integrand = lambda ep:log(dnorm(ep, mat_mu, mat_sigma)) + -nu*log(1+exp(ep-mu))
    if integration == 'quad':
        try:
            mean_ZS, err = integrate.quad(integrand, ep_min, ep_max,epsabs=10**-15)
        except:
            print (matrix, mue, Ne)
            raise Exception
    elif integration == 'mc':
        mean_ZS = mean(f(random.gauss(mat_mu, mat_sigma)) for _ in xrange(trials))
    elif integration == 'uniform':
        dx = (ep_max - ep_min)/trials
        mean_ZS = sum([p(x)*f(x) for x in np.linspace(ep_min, ep_max,trials)]) * dx
    elif integration == 'hack':
        mean_ZS = norm.cdf(mu - log(nu), mat_mu, mat_sigma)
    else:
Ejemplo n.º 23
0
 def log_prior((matrix, mu, Ne)):
     log_matrix_prior = sum([log(dnorm(ep,0,1)) for row in matrix for ep in row])
     log_mu_prior = log(dnorm(mu,0,10))
     log_Ne_prior = log(exp(-Ne))
     return log_matrix_prior + log_mu_prior + log_Ne_prior
def dvar(x, mu, sigma):
    return dnorm(log(exp(x) - 1), mu, sigma) * exp(x) / (exp(x) - 1)