def slda_update_log_phi(text, log_phi, log_gamma, log_beta, y_d, eta, sigma_squared): """ Same as update_phi_lda_E_step but in log probability space. """ (N, K) = log_phi.shape log_phi_sum = logsumexp(log_phi, axis=0) Ns = (N * sigma_squared) ElogTheta = graphlib.dirichlet_expectation(np.exp(log_gamma)) front = (-1.0 / (2 * N * Ns)) pC = (1.0 * y_d / Ns * eta) eta_dot_eta = front * (eta * eta) log_const = np.log(ElogTheta + pC + eta_dot_eta) log_right_eta_times_const = np.log(front * 2 * eta) ensure(isinstance(text, np.ndarray)) # if text is in array form, do an approximate fast matrix update log_phi_minus_n = -1 + (logsumexp([log_phi, (-1 + log_phi_sum)])) log_phi[:,:] = logsumexp([log_beta[:,text].T, logdotexp(np.matrix(logdotexp(log_phi_minus_n, np.log(eta))).T, np.matrix(log_right_eta_times_const)), log_const,], axis=0) graphlib.log_row_normalize(log_phi) return log_phi
def initialize_beta(num_topics, num_words): """Initializes beta randomly using a random dirichlet. Accepts integers number of topics, and number of words in vocab. Returns a TxW matrix which have the probabilities of word distributions. Each row sums to 1. """ log_beta = initialize_log_beta(num_topics, num_words) return np.exp(log_beta)
def partial_slda_update_phi(text, phi, gamma, beta, y_d, eta, sigma_squared): """Same as slda update phi, but eta may be smaller than total number of topics. So only some of the topics contribute to y. """ (N, K) = phi.shape Ks = len(eta) phi_sum = np.sum(phi[:,:Ks], axis=0) Ns = (N * sigma_squared) ElogTheta = graphlib.dirichlet_expectation(gamma) front = (-1.0 / (2 * N * Ns)) eta_dot_eta = front * (eta * eta) pC = ((1.0 * y_d / Ns) * eta) + eta_dot_eta right_eta_times_const = (front * 2 * eta) if isinstance(text, np.ndarray): # if text is in array form, do an approximate fast matrix update phi_minus_n = -(phi[:,:Ks] - phi_sum) phi[:,:] = ElogTheta + np.log(beta[:,text].T) phi[:,:Ks] += pC phi[:,:Ks] += np.dot(np.matrix(np.dot(phi_minus_n, eta)).T, np.matrix(right_eta_times_const)) graphlib.log_row_normalize(phi) phi[:,:] = np.exp(phi[:,:]) else: # otherwise, iterate through each word for n,word,count in iterwords(text): phi_sum -= phi[n,:Ks] pB = np.log(beta[:,word]) pD = (np.dot(eta, phi_sum) * right_eta_times_const) # must exponentiate and normalize immediately! phi[n,:] = ElogTheta + pB phi[n,:] += pC + pD phi[n,:] -= graphlib.logsumexp(phi[n,:]) # normalize in logspace phi[n,:] = np.exp(phi[n,:]) # add this back into the sum # unlike in LDA, this cannot be computed in parallel phi_sum += phi[n,:Ks] return phi
def test_exp(self): import math from numpypy import array, exp a = array([-5.0, -0.0, 0.0, 12345678.0, float("inf"), -float("inf"), -12343424.0]) b = exp(a) for i in range(4): try: res = math.exp(a[i]) except OverflowError: res = float("inf") assert b[i] == res
def _unoptimized_slda_update_phi(text, phi, gamma, beta, y_d, eta, sigma_squared): """ Update phi in LDA. phi is N x K matrix. gamma is a K-size vector update phid: φd,n ∝ exp{ E[log θ|γ] + E[log p(wn|β1:K)] + (y / Nσ2) η — [2(ηTφd,-n)η + (η∘η)] / (2N2σ2) } Note that E[log p(wn|β1:K)] = log βTwn """ (N, K) = phi.shape #assert len(eta) == K #assert len(gamma) == K #assert beta.shape[0] == K phi_sum = np.sum(phi, axis=0) Ns = (N * sigma_squared) ElogTheta = graphlib.dirichlet_expectation(gamma) ensure(len(ElogTheta) == K) pC = (1.0 * y_d / Ns * eta) eta_dot_eta = (eta * eta) front = (-1.0 / (2 * N * Ns)) for n,word,count in iterwords(text): phi_sum -= phi[n] ensure(len(phi_sum) == K) pB = np.log(beta[:,word]) pD = (front * (((2 * np.dot(eta, phi_sum) * eta) + eta_dot_eta)) ) ensure(len(pB) == K) ensure(len(pC) == K) ensure(len(pD) == K) # must exponentiate and sum immediately! #phi[n,:] = np.exp(ElogTheta + pB + pC + pD) #phi[n,:] /= np.sum(phi[n,:]) # log normalize before exp for numerical stability phi[n,:] = ElogTheta + pB + pC + pD phi[n,:] -= graphlib.logsumexp(phi[n,:]) phi[n,:] = np.exp(phi[n,:]) # add this back into the sum # unlike in LDA, this cannot be computed in parallel phi_sum += phi[n] return phi
def lda_update_phi(text, phi, gamma, beta, normalize=True, logspace=False): """ Update phi in LDA. phi is N x K matrix. gamma is a K-size vector update phid: φd,n ∝ exp{ E[log θ|γ] + E[log p(wn|β1:K)] } Note that E[log p(wn|β1:K)] = log βTwn """ (N, K) = phi.shape ElogTheta = graphlib.dirichlet_expectation(gamma) # todo: call a log version of this in slda and others! ensure(isinstance(text, np.ndarray)) phi[:,:] = ElogTheta + np.log(beta[:,text].T) if normalize: graphlib.log_row_normalize(phi) if not logspace: phi[:,:] = np.exp(phi[:,:]) return phi
def logistic_sigmoid(v): """Returns 1 / (1 + e^(-v))""" return 1.0 / (1 + np.exp(-v))
do_var = ptr(do_var) getattr(ff, name_fit)(P.size, ptr(P), x.size, ptr(x), ptr(y), ptr(ydata), ptr(a), do_var) return P return fun, fun_diff, fun_rms, fun_fit e2, e2_diff, e2_rms, e2_fit = _fun_factory('_e2') IV3, IV3_diff, IV3_rms, IV3_fit = _fun_factory('_IV3') IVdbl, IVdbl_diff, IVdbl_rms, IVdbl_fit = _fun_factory('_IVdbl') IV4, IV4_diff, IV4_rms, IV4_fit = _fun_a_factory('_IV4') IV5, IV5_diff, IV5_rms, IV5_fit = _fun_a_factory('_IV5') IV6, IV6_diff, IV6_rms, IV6_fit = _fun_a_factory('_IV6') IVdbl2, IVdbl2_diff, IVdbl2_rms, IVdbl2_fit = _fun_a_factory('_IVdbl2') if __name__ == "__main__": try: import numpypy as np except ImportError: import numpy as np P = np.array([1., 1.]) x = np.arange(4.) y = np.empty(x.shape, x.dtype) print e2(P, x, y) print P[0] * np.exp(-P[1] * x)