Ejemplo n.º 1
0
def slda_update_log_phi(text, log_phi, log_gamma, log_beta, y_d, eta, sigma_squared):
    """
        Same as update_phi_lda_E_step but in log probability space.
    """
    (N, K) = log_phi.shape

    log_phi_sum = logsumexp(log_phi, axis=0)
    Ns = (N * sigma_squared)
    ElogTheta = graphlib.dirichlet_expectation(np.exp(log_gamma))

    front = (-1.0 / (2 * N * Ns))
    pC = (1.0 * y_d / Ns * eta)  
    eta_dot_eta = front * (eta * eta)
    log_const = np.log(ElogTheta + pC + eta_dot_eta)

    log_right_eta_times_const = np.log(front * 2 * eta)

    ensure(isinstance(text, np.ndarray))

    # if text is in array form, do an approximate fast matrix update
    log_phi_minus_n = -1 + (logsumexp([log_phi, (-1 + log_phi_sum)]))

    log_phi[:,:] = logsumexp([log_beta[:,text].T, 
                              logdotexp(np.matrix(logdotexp(log_phi_minus_n, np.log(eta))).T, 
                                        np.matrix(log_right_eta_times_const)), 
                              log_const,], axis=0)

    graphlib.log_row_normalize(log_phi)

    return log_phi
Ejemplo n.º 2
0
def initialize_beta(num_topics, num_words):
    """Initializes beta randomly using a random dirichlet.
        Accepts integers number of topics, and number of words in vocab.
        Returns a TxW matrix which have the probabilities of word
            distributions.  Each row sums to 1.
    """
    log_beta = initialize_log_beta(num_topics, num_words)
    return np.exp(log_beta)
Ejemplo n.º 3
0
def partial_slda_update_phi(text, phi, gamma, beta, y_d, eta, sigma_squared):
    """Same as slda update phi, but eta may be smaller than total number of topics.
        So only some of the topics contribute to y.
    """
    (N, K) = phi.shape
    Ks = len(eta)

    phi_sum = np.sum(phi[:,:Ks], axis=0)
    Ns = (N * sigma_squared)
    ElogTheta = graphlib.dirichlet_expectation(gamma)

    front = (-1.0 / (2 * N * Ns))
    eta_dot_eta = front * (eta * eta)
    pC = ((1.0 * y_d / Ns) * eta) + eta_dot_eta

    right_eta_times_const = (front * 2 * eta)

    if isinstance(text, np.ndarray):
        # if text is in array form, do an approximate fast matrix update
        phi_minus_n = -(phi[:,:Ks] - phi_sum)
        phi[:,:] = ElogTheta + np.log(beta[:,text].T)
        phi[:,:Ks] += pC
        phi[:,:Ks] += np.dot(np.matrix(np.dot(phi_minus_n, eta)).T, np.matrix(right_eta_times_const))
        graphlib.log_row_normalize(phi)
        phi[:,:] = np.exp(phi[:,:])
    else:
        # otherwise, iterate through each word
        for n,word,count in iterwords(text):
            phi_sum -= phi[n,:Ks]

            pB = np.log(beta[:,word])
            pD = (np.dot(eta, phi_sum) * right_eta_times_const) 

            # must exponentiate and normalize immediately!
            phi[n,:] = ElogTheta + pB
            phi[n,:] += pC + pD
            phi[n,:] -= graphlib.logsumexp(phi[n,:]) # normalize in logspace
            phi[n,:] = np.exp(phi[n,:])


            # add this back into the sum
            # unlike in LDA, this cannot be computed in parallel
            phi_sum += phi[n,:Ks]
    return phi
Ejemplo n.º 4
0
    def test_exp(self):
        import math
        from numpypy import array, exp

        a = array([-5.0, -0.0, 0.0, 12345678.0, float("inf"), -float("inf"), -12343424.0])
        b = exp(a)
        for i in range(4):
            try:
                res = math.exp(a[i])
            except OverflowError:
                res = float("inf")
            assert b[i] == res
Ejemplo n.º 5
0
def _unoptimized_slda_update_phi(text, phi, gamma, beta, y_d, eta, sigma_squared):
    """
        Update phi in LDA. 
        phi is N x K matrix.
        gamma is a K-size vector

     update phid:
     φd,n ∝ exp{ E[log θ|γ] + 
                 E[log p(wn|β1:K)] + 
                 (y / Nσ2) η  — 
                 [2(ηTφd,-n)η + (η∘η)] / (2N2σ2) }
     
     Note that E[log p(wn|β1:K)] = log βTwn
    """
    (N, K) = phi.shape
    #assert len(eta) == K
    #assert len(gamma) == K
    #assert beta.shape[0] == K

    phi_sum = np.sum(phi, axis=0)
    Ns = (N * sigma_squared)
    ElogTheta = graphlib.dirichlet_expectation(gamma)
    ensure(len(ElogTheta) == K)

    pC = (1.0 * y_d / Ns * eta)  
    eta_dot_eta = (eta * eta)
    front = (-1.0 / (2 * N * Ns))

    for n,word,count in iterwords(text):
        phi_sum -= phi[n]
        ensure(len(phi_sum) == K)

        pB = np.log(beta[:,word])
        pD = (front * (((2 * np.dot(eta, phi_sum) * eta) + eta_dot_eta))
                            )
        ensure(len(pB) == K)
        ensure(len(pC) == K)
        ensure(len(pD) == K)

        # must exponentiate and sum immediately!
        #phi[n,:] = np.exp(ElogTheta + pB + pC + pD)
        #phi[n,:] /= np.sum(phi[n,:])
        # log normalize before exp for numerical stability
        phi[n,:] = ElogTheta + pB + pC + pD
        phi[n,:] -= graphlib.logsumexp(phi[n,:])
        phi[n,:] = np.exp(phi[n,:])

        # add this back into the sum
        # unlike in LDA, this cannot be computed in parallel
        phi_sum += phi[n]

    return phi
Ejemplo n.º 6
0
def lda_update_phi(text, phi, gamma, beta, normalize=True, logspace=False):
    """
        Update phi in LDA. 
        phi is N x K matrix.
        gamma is a K-size vector

     update phid:
     φd,n ∝ exp{ E[log θ|γ] + 
                 E[log p(wn|β1:K)] }
     
     Note that E[log p(wn|β1:K)] = log βTwn
    """
    (N, K) = phi.shape

    ElogTheta = graphlib.dirichlet_expectation(gamma)

    # todo: call a log version of this in slda and others!
    ensure(isinstance(text, np.ndarray))
    phi[:,:] = ElogTheta + np.log(beta[:,text].T)
    if normalize:
        graphlib.log_row_normalize(phi)
    if not logspace:
        phi[:,:] = np.exp(phi[:,:])
    return phi
Ejemplo n.º 7
0
def logistic_sigmoid(v):
    """Returns 1 / (1 + e^(-v))"""
    return 1.0 / (1 + np.exp(-v))
Ejemplo n.º 8
0
            do_var = ptr(do_var)
        getattr(ff, name_fit)(P.size, ptr(P), x.size, ptr(x), ptr(y),
                              ptr(ydata), ptr(a), do_var)
        return P

    return fun, fun_diff, fun_rms, fun_fit


e2, e2_diff, e2_rms, e2_fit = _fun_factory('_e2')
IV3, IV3_diff, IV3_rms, IV3_fit = _fun_factory('_IV3')
IVdbl, IVdbl_diff, IVdbl_rms, IVdbl_fit = _fun_factory('_IVdbl')

IV4, IV4_diff, IV4_rms, IV4_fit = _fun_a_factory('_IV4')
IV5, IV5_diff, IV5_rms, IV5_fit = _fun_a_factory('_IV5')
IV6, IV6_diff, IV6_rms, IV6_fit = _fun_a_factory('_IV6')
IVdbl2, IVdbl2_diff, IVdbl2_rms, IVdbl2_fit = _fun_a_factory('_IVdbl2')

if __name__ == "__main__":
    try:
        import numpypy as np
    except ImportError:
        import numpy as np

    P = np.array([1., 1.])
    x = np.arange(4.)
    y = np.empty(x.shape, x.dtype)

    print e2(P, x, y)

    print P[0] * np.exp(-P[1] * x)