Beispiel #1
0
def initialize_log_uniform(matrix):
    """Returns log of initialize_uniform."""
    if len(matrix.shape) == 1:
        matrix[:] = np.log(initialize_uniform(matrix))
    else:
        matrix[:,:] = np.log(initialize_uniform(matrix))
    return matrix
Beispiel #2
0
def slda_update_log_phi(text, log_phi, log_gamma, log_beta, y_d, eta, sigma_squared):
    """
        Same as update_phi_lda_E_step but in log probability space.
    """
    (N, K) = log_phi.shape

    log_phi_sum = logsumexp(log_phi, axis=0)
    Ns = (N * sigma_squared)
    ElogTheta = graphlib.dirichlet_expectation(np.exp(log_gamma))

    front = (-1.0 / (2 * N * Ns))
    pC = (1.0 * y_d / Ns * eta)  
    eta_dot_eta = front * (eta * eta)
    log_const = np.log(ElogTheta + pC + eta_dot_eta)

    log_right_eta_times_const = np.log(front * 2 * eta)

    ensure(isinstance(text, np.ndarray))

    # if text is in array form, do an approximate fast matrix update
    log_phi_minus_n = -1 + (logsumexp([log_phi, (-1 + log_phi_sum)]))

    log_phi[:,:] = logsumexp([log_beta[:,text].T, 
                              logdotexp(np.matrix(logdotexp(log_phi_minus_n, np.log(eta))).T, 
                                        np.matrix(log_right_eta_times_const)), 
                              log_const,], axis=0)

    graphlib.log_row_normalize(log_phi)

    return log_phi
Beispiel #3
0
def lda_elbo_entropy(gamma, phi):
    """Entropy of variational distribution q in LDA.

    Accepts phi (N x K) matrix.
            gamma (a K-size vector) for document


    Returns double representing the entropy in the elbo of LDA..

    H(q) = 
    – ΣNΣK φDn,klog φDn,k – log Γ(ΣKγkD) + ΣKlog Γ(γkD)  – ΣK(γkD – 1)E[log θkD]
    """
    elbo = 0.0
    (N,K) = phi.shape
    ensure(len(gamma) == K)
    elbo += -1 * np.sum(phi * np.log(phi))

    elbo += -1 * gammaln(np.sum(gamma))
    elbo += np.sum(gammaln(gamma))

    ElogTheta = graphlib.dirichlet_expectation(gamma)
    ensure(ElogTheta.shape == gamma.shape)
    elbo += -1 * sum((gamma - 1) * ElogTheta)

    return elbo
Beispiel #4
0
def partial_slda_update_phi(text, phi, gamma, beta, y_d, eta, sigma_squared):
    """Same as slda update phi, but eta may be smaller than total number of topics.
        So only some of the topics contribute to y.
    """
    (N, K) = phi.shape
    Ks = len(eta)

    phi_sum = np.sum(phi[:,:Ks], axis=0)
    Ns = (N * sigma_squared)
    ElogTheta = graphlib.dirichlet_expectation(gamma)

    front = (-1.0 / (2 * N * Ns))
    eta_dot_eta = front * (eta * eta)
    pC = ((1.0 * y_d / Ns) * eta) + eta_dot_eta

    right_eta_times_const = (front * 2 * eta)

    if isinstance(text, np.ndarray):
        # if text is in array form, do an approximate fast matrix update
        phi_minus_n = -(phi[:,:Ks] - phi_sum)
        phi[:,:] = ElogTheta + np.log(beta[:,text].T)
        phi[:,:Ks] += pC
        phi[:,:Ks] += np.dot(np.matrix(np.dot(phi_minus_n, eta)).T, np.matrix(right_eta_times_const))
        graphlib.log_row_normalize(phi)
        phi[:,:] = np.exp(phi[:,:])
    else:
        # otherwise, iterate through each word
        for n,word,count in iterwords(text):
            phi_sum -= phi[n,:Ks]

            pB = np.log(beta[:,word])
            pD = (np.dot(eta, phi_sum) * right_eta_times_const) 

            # must exponentiate and normalize immediately!
            phi[n,:] = ElogTheta + pB
            phi[n,:] += pC + pD
            phi[n,:] -= graphlib.logsumexp(phi[n,:]) # normalize in logspace
            phi[n,:] = np.exp(phi[n,:])


            # add this back into the sum
            # unlike in LDA, this cannot be computed in parallel
            phi_sum += phi[n,:Ks]
    return phi
Beispiel #5
0
def calculate_EZ_from_big_log_phi(big_log_phi):
    """
        Accepts a big phi matrix (like ((Nd+Nc) x (K+J))
        Calculates E[Zd].
        Returns the final vector (K+J).

        E[Z] = φ := (1/N)ΣNφn
    """
    Ndc,KJ = big_log_phi.shape
    return logsumexp(big_log_phi, axis=0) - np.log(Ndc)
Beispiel #6
0
def calculate_EZ_from_small_log_phis(log_phi1, log_phi2):
    """
        Accepts a two small phi matrices (like (NdxK) and (NcxJ))
        Calculates E[Zd].
        Returns the final vector (K+J).

        E[Z] = φ := (1/N)ΣNφn
    """
    Ndc = log_phi1.shape[0] + log_phi2.shape[0]
    ez = np.concatenate((logsumexp(log_phi1, axis=0), logsumexp(log_phi2, axis=0)), axis=1)
    return ez - np.log(Ndc)
Beispiel #7
0
def _unoptimized_slda_update_phi(text, phi, gamma, beta, y_d, eta, sigma_squared):
    """
        Update phi in LDA. 
        phi is N x K matrix.
        gamma is a K-size vector

     update phid:
     φd,n ∝ exp{ E[log θ|γ] + 
                 E[log p(wn|β1:K)] + 
                 (y / Nσ2) η  — 
                 [2(ηTφd,-n)η + (η∘η)] / (2N2σ2) }
     
     Note that E[log p(wn|β1:K)] = log βTwn
    """
    (N, K) = phi.shape
    #assert len(eta) == K
    #assert len(gamma) == K
    #assert beta.shape[0] == K

    phi_sum = np.sum(phi, axis=0)
    Ns = (N * sigma_squared)
    ElogTheta = graphlib.dirichlet_expectation(gamma)
    ensure(len(ElogTheta) == K)

    pC = (1.0 * y_d / Ns * eta)  
    eta_dot_eta = (eta * eta)
    front = (-1.0 / (2 * N * Ns))

    for n,word,count in iterwords(text):
        phi_sum -= phi[n]
        ensure(len(phi_sum) == K)

        pB = np.log(beta[:,word])
        pD = (front * (((2 * np.dot(eta, phi_sum) * eta) + eta_dot_eta))
                            )
        ensure(len(pB) == K)
        ensure(len(pC) == K)
        ensure(len(pD) == K)

        # must exponentiate and sum immediately!
        #phi[n,:] = np.exp(ElogTheta + pB + pC + pD)
        #phi[n,:] /= np.sum(phi[n,:])
        # log normalize before exp for numerical stability
        phi[n,:] = ElogTheta + pB + pC + pD
        phi[n,:] -= graphlib.logsumexp(phi[n,:])
        phi[n,:] = np.exp(phi[n,:])

        # add this back into the sum
        # unlike in LDA, this cannot be computed in parallel
        phi_sum += phi[n]

    return phi
Beispiel #8
0
def lda_elbo_terms(document, alpha, beta, gamma, phi):
    """
    Calculates some terms in the elbo for a document.
    Same as in LDA.

    E[log p(θD|αD)] + ΣNE[log p(ZnD|θD)] + ΣNE[log p(wnD|ZnD,β1:KD)]

    E[log p(θ|a)] = log Γ(Σkai) – Σklog Γ(ai) + ΣK(ak-1)E[log θk] 
    E[log p(Zn|θ)] = ΣKφn,kE[log θk]
    E[log p(wn|Zn,β1:K)]  = ΣKφn,klog βk,Wn

    (Note that E[log θk] = Ψ(γk) – Ψ(Σj=1..Kγj) ).
    """
    N,K = phi.shape
    elbo = 0.0

    # E[log p(θ|a)] = log Γ(Σkai) – Σklog Γ(ai) + ΣK(ak-1)E[log θk] 
    elbo += gammaln(np.sum(alpha)) - np.sum(gammaln(alpha))

    ElogTheta = graphlib.dirichlet_expectation(gamma)
    #assert len(ElogTheta) == len(alpha)
    #assert ElogTheta.shape == alpha.shape
    elbo += np.sum((alpha - 1) * ElogTheta)

    if isinstance(document, np.ndarray):
        # even faster optimization
        elbo += np.sum(phi * (ElogTheta + (np.log(beta[:,document]).T)))
    else:
        for n,word,count in iterwords(document):
            # E[log p(Zn|θ)] = ΣKφn,kE[log θk]
            # E[log p(wn|Zn,β1:K)]  = ΣKφn,klog βk,Wn

            # optimization:
            # E[log p(Zn|θ)] + E[log p(wn|Zn,β1:K)] = ΣKφn,k(E[log θk] + log βk,Wn)
            elbo += np.sum(phi[n] * (ElogTheta + np.log(beta[:,word])))

    return elbo
Beispiel #9
0
def calculate_EZZT_from_small_log_phis(phi1, phi2):
    """
        Accepts a big phi matrix (like ((Nd+Nc) x (K+J))
        Calculates E[ZdZdT].
        Returns the final matrix ((K+J) x (K+J)).

        (Also, E[ZdZdT] = (1/N2)(ΣNΣm!=nφd,nφd,mT  +  ΣNdiag{φd,n})
    """
    Nd,K = phi1.shape
    Nc,J = phi2.shape
    (Ndc, KJ) = (Nd+Nc, K+J)
    inner_sum = np.zeros((KJ, KJ))

    p1 = np.matrix(phi1)
    p2 = np.matrix(phi2)

    for i in xrange(K):
        for j in xrange(K):
            m = logdotexp(np.matrix(p1[:,i]), np.matrix(p1[:,j]).T)
            m += np.diagonal(np.ones(Nd) * -1000)
            inner_sum[i,j] = logsumexp(m.flatten())

    for i in xrange(J):
        for j in xrange(J):
            m = logdotexp(np.matrix(p2[:,i]), np.matrix(p2[:,j]).T)
            m += np.diagonal(np.ones(Nc) * -1000)
            inner_sum[K+i,K+j] = logsumexp(m.flatten())

    for i in xrange(K):
        for j in xrange(J):
            m = logdotexp(np.matrix(p1[:,i]), np.matrix(p2[:,j]).T)
            inner_sum[i,K+j] = logsumexp(m.flatten())

    for i in xrange(J):
        for j in xrange(K):
            m = logdotexp(np.matrix(p2[:,i]), np.matrix(p1[:,j]).T)
            inner_sum[K+i,j] = logsumexp(m.flatten())

    big_phi_sum = np.concatenate((logsumexp(phi1, axis=0),
                                  logsumexp(phi2, axis=0)), axis=1)
    ensure(big_phi_sum.shape == (KJ,))
    for i in xrange(KJ):
        inner_sum[i,i] = logsumexp([inner_sum[i,i], big_phi_sum[i]])

    inner_sum -= np.log(Ndc * Ndc)
    return inner_sum
Beispiel #10
0
    def growth(self):
        ''' Calculate the log-growth rate and return a string containing
        all the growth rate'''
        
        amp = lambda v: np.log(abs(v)) if v != 0 else 0
        gr = lambda new,old,n: str(amp(new.field[self.Nz//3,n])
                                   - amp(abs(old[n])))
        out = "".join([ gr(self.T, self.T_old,n) + "\t" +
                        gr(self.omega, self.omega_old,n) + "\t" +
                        gr(self.psi, self.psi_old,n) + "\t"
                        for n in range(self.NFourier) ])

        # save the arrays for next output
        self.T_old = self.T.field[self.Nz//3,:].copy()
        self.omega_old = self.omega.field[self.Nz//3,:].copy()
        self.psi_old = self.psi.field[self.Nz//3,:].copy()

        return out+"\n"
Beispiel #11
0
def np_log(a):
    """Takes a nd array or int, returns log
    """
    if ispypy():
        if isinstance(a, np.ndarray):
            n = np.zeros(a.shape)
            if len(a.shape) == 1:
                for i in xrange(len(a)):
                    n[i] = math.log(a[i])
            else:
                assert len(a.shape) == 2
                for i in xrange(a.shape[0]):
                    for j in xrange(a.shape[1]):
                        n[i,j] = math.log(a[i,j])
            return n
        else:
            return math.log(a)
    else:
        return np.log(a)
Beispiel #12
0
def lm_elbo_y_from_small_phis(y, eta, phiD, phiC, sigma_squared):
    """
    Calculates some terms in the elbo for a document.
    Same as in sLDA.

    E[log p(y|Z1:N,η,σ2)] = (–1/2)log 2πσ2 – (1/2σ2)[y2– 2yηTE[Z] + ηTE[ZZT]η]

    Test:
    Should be the same as slda_elbo_y when phiD and phiC are catercorner concatenated.
    """
    elbo = 0.0
    ss = sigma_squared
    elbo += (-0.5) * np.log(2 * np.pi * ss)
    
    ez = calculate_EZ_from_small_phis(phiD, phiC)
    ezzt = calculate_EZZT_from_small_phis(phiD, phiC)
    nEZZTn = np.dot(np.dot(eta, ezzt), eta)
    elbo += (-0.5 / ss) * (y*y - (2 * y * np.dot(eta, ez)) + nEZZTn)
    return elbo
Beispiel #13
0
def slda_elbo_y(y, eta, phi, sigma_squared):
    """
    Calculates some terms in the elbo for a document.
    Same as in sLDA.

    E[log p(y|Z1:N,η,σ2)] = (–1/2)log 2πσ2 – (1/2σ2)[y2– 2yηTE[Z] + ηTE[ZZT]η]
    """
    elbo = 0.0
    ss = sigma_squared
    elbo += (-0.5) * np.log(2 * np.pi * ss)
    
    #print 'will calculate ez...'
    ez = calculate_EZ(phi)
    #print 'will calculate ezzt...'
    ezzt = calculate_EZZT(phi)
    #print 'will calculate nEZZTn...'
    nEZZTn = np.dot(np.dot(eta, ezzt), eta)
    #print 'will sum up elbo...'
    elbo += (-0.5 / ss) * (y*y - (2 * y * np.dot(eta, ez)) + nEZZTn)
    return elbo
Beispiel #14
0
def lda_update_phi(text, phi, gamma, beta, normalize=True, logspace=False):
    """
        Update phi in LDA. 
        phi is N x K matrix.
        gamma is a K-size vector

     update phid:
     φd,n ∝ exp{ E[log θ|γ] + 
                 E[log p(wn|β1:K)] }
     
     Note that E[log p(wn|β1:K)] = log βTwn
    """
    (N, K) = phi.shape

    ElogTheta = graphlib.dirichlet_expectation(gamma)

    # todo: call a log version of this in slda and others!
    ensure(isinstance(text, np.ndarray))
    phi[:,:] = ElogTheta + np.log(beta[:,text].T)
    if normalize:
        graphlib.log_row_normalize(phi)
    if not logspace:
        phi[:,:] = np.exp(phi[:,:])
    return phi