Example #1
0
def entropy(X, k=1):
    '''
    Returns the entropy of X,
    given as array X = array(n,dx)
    where
      n = number of samples
      dx = number of dimensions
    
    Optionally:
      k = number of nearest neighbors for density estimation
    '''
    # Distance to kth nearest neighbor
    r = nearest_distances(X, k) # squared distances
    n, d = X.shape
    volume_unit_ball = (pi ** (.5 * d)) / gamma(.5 * d + 1)
    '''
    F. Perez-Cruz, (2008). Estimation of Information Theoretic Measures
    for Continuous Random Variables. Advances in Neural Information
    Processing Systems 21 (NIPS). Vancouver (Canada), December.
     
    return .5*d*mean(log(r))+log(volume_unit_ball)+log(n-1)-log(k)
    '''
    '''
    Kozachenko, L. F. & Leonenko, N. N. 1987 Sample estimate of entropy
    of a random vector. Probl. Inf. Transm. 23, 95-101.
    See also: Evans, D. 2008 A computationally efficient estimator for
    mutual information, Proc. R. Soc. A 464 (2093), 1203-1215.
    and:
    Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual
    information. Phys Rev E 69(6 Pt 2):066138.
    '''
    return .5 * d * mean(log(r)) + log(volume_unit_ball) + psi(n) - psi(k)
    def compute_elbo(self, doc_ids, doc_cnt, doc_links):
        """ compute evidence lower bound for trained model
        """
        elbo = 0

        e_log_theta = psi(self.gamma) - psi(np.sum(self.gamma, 1))[:, np.newaxis]  # D x K
        log_beta = np.log(self.beta + eps)

        for di in xrange(self.n_doc):
            words = doc_ids[di]
            cnt = doc_cnt[di]

            elbo += np.sum(cnt * (self.phi[di] * log_beta[:, words]))  # E_q[log p(w_{d,n}|\beta,z_{d,n})]
            elbo += np.sum((self.alpha - 1.0) * e_log_theta[di, :])  # E_q[log p(\theta_d | alpha)]
            elbo += np.sum(self.phi[di].T * e_log_theta[di, :])  # E_q[log p(z_{d,n}|\theta_d)]

            elbo += (
                -gammaln(np.sum(self.gamma[di, :]))
                + np.sum(gammaln(self.gamma[di, :]))
                - np.sum((self.gamma[di, :] - 1.0) * (e_log_theta[di, :]))
            )  # - E_q[log q(theta|gamma)]
            elbo += -np.sum(cnt * self.phi[di] * np.log(self.phi[di]))  # - E_q[log q(z|phi)]

            for adi in doc_links[di]:
                elbo += (
                    np.dot(self.eta, self.pi[di] * self.pi[adi]) + self.nu
                )  # E_q[log p(y_{d1,d2}|z_{d1},z_{d2},\eta,\nu)]

        return elbo
def estimate_alpha_from_counts(D, K, initial_alpha, counts, n_iter=1000):
    """
    Estimate posterior alpha of a Dirichlet multinomial from samples of the multinomial counts.
    This implements the fixed point update as described in Minka, T. P. (2003). Estimating a Dirichlet distribution.
    Annals of Physics, 2000(8), 1-13. http://doi.org/10.1007/s00256-007-0299-1
    """

    counts = counts.astype(float)
    sdata = np.sum(counts, axis=1)

    # initialise old and new alphas before iteration
    alpha_old = np.ones(K) * initial_alpha
    for i in range(n_iter):

        sa = np.sum(alpha_old)
        temp = np.tile(alpha_old, (D, 1))
        g = np.sum(psi(counts + temp), axis=0) - D*psi(alpha_old)
        h = np.sum(psi(sdata + sa)) - D*psi(sa)
        alpha_new = alpha_old * (g/h)
        if np.max(np.abs(alpha_new-alpha_old)) < 1e-6:
            break
        if np.isnan(np.min(alpha_new)): # prevent NaN from propagating
            return alpha_old

        # set alpha_new to alpha_old for the next iteration update
        alpha_old = alpha_new

    return alpha_new
    def loglik(self,obs=None):

        dim=self.__dim__

        if obs is None:
            obs=numpy.arange(dim)
        else:
            assert numpy.ndim(obs)==1

        pi=self.__param__.pi
        alpha=self.__param__.alpha

        if numpy.isfinite(alpha):

            val=numpy.zeros(numpy.size(obs))

            val[:]=-numpy.inf

            ind,=numpy.where(pi[obs]>0.0)

            # Evaluate the expected log-likelihood.
            val[ind]=special.psi(alpha*pi[obs[ind]])-special.psi(alpha)

            return val

        else:
            return numpy.log(pi[obs])
Example #5
0
    def compute_moments_and_cgf(self, phi, mask=True):
        r"""
        Compute the moments and :math:`g(\phi)`.

        .. math::

           \overline{\mathbf{u}}  (\boldsymbol{\phi})
           &=
           \begin{bmatrix}
             \psi(\phi_1) - \psi(\sum_d \phi_{1,d})
           \end{bmatrix}
           \\
           g_{\boldsymbol{\phi}} (\boldsymbol{\phi})
           &=
           TODO
        """

        if np.any(np.asanyarray(phi) <= 0):
            raise ValueError("Natural parameters should be positive")

        sum_gammaln = np.sum(special.gammaln(phi[0]), axis=-1)
        gammaln_sum = special.gammaln(np.sum(phi[0], axis=-1))
        psi_sum = special.psi(np.sum(phi[0], axis=-1, keepdims=True))
        
        # Moments <log x>
        u0 = special.psi(phi[0]) - psi_sum
        u = [u0]
        # G
        g = gammaln_sum - sum_gammaln

        return (u, g)
Example #6
0
def localVB(doc, alpha, k, expElogBeta):
  # Global Variables
  VAR_MAX_ITER = 10
  VAR_CONVERGED = 0.01
  isConverged = 0
  # Initialization
  ids = [id for id,_ in doc]
  cts = np.array([cnt for _,cnt in doc])
  
  gamma = np.asarray([1.0 / k  for i in xrange(k)]) # 1*k
  expElogTheta = np.exp(psi(gamma)) # 1*k
  
  expElogBetaD = expElogBeta_k_by_d(expElogBeta,k,ids)  # k*d
  phinorm = np.dot(expElogTheta,expElogBetaD) + 1e-100 # 1 * d
  
  for round in xrange(VAR_MAX_ITER):

    lastgamma = gamma
    gamma = alpha + expElogTheta * np.dot(cts/phinorm, expElogBetaD.T)
    
    expElogTheta = np.exp(psi(gamma))
    phinorm = np.dot(expElogTheta,expElogBetaD) + 1e-100 # 1 * d
    # isConverged ?
    meanchange = np.mean(abs(gamma - lastgamma))
    if (meanchange < VAR_CONVERGED):
      isConverged = 1
      break

  # calculate phi
  
  phi_cts = np.dot( np.dot( np.diag(expElogTheta), expElogBetaD), np.diag(cts/phinorm))

  return phi_cts,ids,gamma,isConverged
Example #7
0
File: lda.py Project: laiguokun/LDA
def Mstep(max_iter):
	global alpha,beta,Gamma,Phi,doc,doc_cnt;
	#update beta
	for i in range(K):
		for v in range(voca_size):
			beta[i][v] = 0;
		for d in range(doc_size):
			for n in range(len(doc[d])):
				beta[i][doc[d][n]] += doc_cnt[d][n] * Phi[d][n][i];
	beta_sum = sum_matrix(beta, 0);
	for k in range(K):
		for i in range(voca_size):
			beta[k][i] = beta[k][i]/beta_sum[k];
	#update alpha
	last = 0;
	iter_num = 0;
	const = 0;
	for d in range(doc_size):
		gamma_sum = sum_vector(Gamma[d]);
		for i in range(K):
			const += (sp.psi(Gamma[d][i]) - sp.psi(gamma_sum));	
	now = -compute_alpha_mle(alpha);
	origin = now;
	while (abs(last - now) > 1e-9 and iter_num < max_iter):
		da = K * (doc_size * (sp.psi(alpha * K) - sp.psi(alpha))) + const;
		dda = K * (doc_size * (K * sp.polygamma(1, alpha * K) - sp.polygamma(1, alpha)));
		dx = -da/dda;
		alpha = backtrack(alpha,dx,da,0.01,0.5);
		last = now;
		now = -compute_alpha_mle(alpha);
		iter_num += 1;
	if (now < origin):
		print('error alpha');
Example #8
0
 def expected_log_m(self):
     """
     Compute the expected log probability of each block
     :return:
     """
     E_log_m = psi(self.mf_pi) - psi(self.mf_pi.sum())
     return E_log_m
Example #9
0
	def run_e_step(self):
		""" compute variational expectations 
		"""
		ll = 0.

		for p in xrange(self.N):
			for q in xrange(self.N):
				new_phi = np.zeros(self.K)

				for g in xrange(self.K):
					new_phi[g] = np.exp(psi(self.gamma[p,g])-psi(np.sum(self.gamma[p,:]))) * np.prod(( (self.B[g,:]**self.Y[p,q]) 
						* ((1.-self.B[g,:])**(1.-self.Y[p,q])) ) 
						** self.phi[q,p,:] )
				self.phi[p,q,:] = new_phi/np.sum(new_phi)

				new_phi = np.zeros(self.K)
				for h in xrange(self.K):
					new_phi[h] = np.exp(psi(self.gamma[q,h])-psi(np.sum(self.gamma[q,:]))) * np.prod(( (self.B[:,h]**self.Y[p,q]) 
						* ((1.-self.B[:,h])**(1.-self.Y[p,q])) ) 
						** self.phi[p,q,:] )
				self.phi[q,p,:] = new_phi/np.sum(new_phi)

				for k in xrange(self.K):
					self.gamma[p,k] = self.alpha[k] + np.sum(self.phi[p,:,k]) + np.sum(self.phi[:,p,k])
					self.gamma[q,k] = self.alpha[k] + np.sum(self.phi[q,:,k]) + np.sum(self.phi[:,q,k])

		return ll
Example #10
0
    def maximization(self):
        sNo = self.p.sNo

        for i in range(sNo):
            self.p.avgPi[i] = (self.p.uPiArr[i] + self.p.gmMat[0][i]) / (self.p.sumUPi + 1.0)
            self.p.avgLnPi[i] = spsp.psi(self.p.uPiArr[i] + self.p.gmMat[0][i])
            self.p.avgLnPi[i] -= spsp.psi(self.p.sumUPi + 1.0)

            for j in range(sNo):
                self.p.avgA[i][j] = (self.p.uAMat[i][j] + self.p.Nij[i][j]) / (self.p.sumUAArr[i] + self.p.Nii[i])
                self.p.avgLnA[i][j] = spsp.psi(self.p.uAMat[i][j] + self.p.Nij[i][j])
                self.p.avgLnA[i][j] -= spsp.psi(self.p.sumUAArr[i] + self.p.Nii[i])

            self.p.btMu[i] = self.p.uBtArr[i] + self.p.Ni[i]
            self.p.mu0[i] = (self.p.uBtArr[i] * self.p.uMuArr[i] + self.p.Ni[i] * self.p.barX[i]) / self.p.btMu[i]
            self.p.aLm[i] = self.p.uAArr[i] + self.p.Ni[i] / 2.0
            self.p.bLm[i] = self.p.uBArr[i] + (self.p.NiSi[i] / 2.0)
            self.p.bLm[i] += (
                self.p.uBtArr[i]
                * self.p.Ni[i]
                * (self.p.barX[i] - self.p.uMuArr[i]) ** 2.0
                / 2.0
                / (self.p.uBtArr[i] + self.p.Ni[i])
            )

            self.p.avgMu[i] = self.p.mu0[i]
            self.p.avgLm[i] = self.p.aLm[i] / self.p.bLm[i]
            self.p.avgLnLm[i] = spsp.psi(self.p.aLm[i]) - math.log(self.p.bLm[i])
Example #11
0
    def update_V(self, corpus):
        lb = 0 

        sumLnZ = np.sum(psi(corpus.A) - np.log(corpus.B), 0)        # K dim

        tmp = np.dot(corpus.R, corpus.w)  # M x K
        sum_r_w = np.sum(tmp, 0)
        assert len(sum_r_w) == self.K

        for i in xrange(self.c_a_max_step):
            one_V = 1-self.V
            stickLeft = self.getStickLeft(self.V)       # prod(1-V_(dim-1))
            p = self.V * stickLeft

            psiV = psi(self.beta * p)

            vVec = - self.beta*stickLeft*sum_r_w + self.beta*stickLeft*sumLnZ - corpus.M*self.beta*stickLeft*psiV;

            for k in xrange(self.K):
                tmp1 = self.beta*sum(sum_r_w[k+1:]*p[k+1:]/one_V[k]);
                tmp2 = self.beta*sum(sumLnZ[k+1:]*p[k+1:]/one_V[k]);
                tmp3 = corpus.M*self.beta*sum(psiV[k+1:]*p[k+1:]/one_V[k]);
                vVec[k] = vVec[k] + tmp1 - tmp2;
                vVec[k] = vVec[k] + tmp3;
                vVec[k] = vVec[k] 
            vVec[:self.K-2] -= (self.alpha-1)/one_V[:self.K-2];
            vVec[self.K-1] = 0;
            step_stick = self.getstepSTICK(self.V,vVec,sum_r_w,sumLnZ,self.beta,self.alpha,corpus.M);
            self.V = self.V + step_stick*vVec;
            self.p = self.getP(self.V)

        lb += self.K*gammaln(self.alpha+1) - self.K*gammaln(self.alpha) + np.sum((self.alpha-1)*np.log(1-self.V[:self.K-1]))
        if self.is_verbose:
            print 'p(V)-q(V) %f' % lb
        return lb
Example #12
0
    def update_alpha(self, gammat, rho):
        """
        Update parameters for the Dirichlet prior on the per-document
        topic weights `alpha` given the last `gammat`.

        Uses Newton's method, described in **Huang: Maximum Likelihood Estimation of Dirichlet Distribution Parameters.** (http://www.stanford.edu/~jhuang11/research/dirichlet/dirichlet.pdf)

        """
        N = float(len(gammat))
        logphat = sum(dirichlet_expectation(gamma) for gamma in gammat) / N
        dalpha = numpy.copy(self.alpha)
        gradf = N * (psi(numpy.sum(self.alpha)) - psi(self.alpha) + logphat)

        c = N * polygamma(1, numpy.sum(self.alpha))
        q = -N * polygamma(1, self.alpha)

        b = numpy.sum(gradf / q) / ( 1 / c + numpy.sum(1 / q))

        dalpha = -(gradf - b) / q

        if all(rho() * dalpha + self.alpha > 0):
            self.alpha += rho() * dalpha
        else:
            logger.warning("updated alpha not positive")
        logger.info("optimized alpha %s" % list(self.alpha))

        return self.alpha
Example #13
0
    def variation_update(self):
        #update phi, gamma
        e_log_theta = psi(self.gamma) - psi(np.sum(self.gamma, 1))[:,np.newaxis]

        new_beta = np.zeros([self.K, self.V])

        for di in xrange(self.D):
            words = self.doc_ids[di]
            cnt = self.doc_cnt[di]
            doc_len = np.sum(cnt)

            new_phi = np.log(self.beta[:,words]+eps) + e_log_theta[di,:][:,np.newaxis]

            gradient = np.zeros(self.K)
            for adi in self.doc_links[di]:
                gradient += self.eta * self.pi[adi,:] / doc_len

            new_phi += gradient[:,np.newaxis]
            new_phi = np.exp(new_phi)
            new_phi = new_phi/np.sum(new_phi,0)

            self.phi[di] = new_phi

            self.pi[di,:] = np.sum(cnt * self.phi[di],1)/np.sum(cnt * self.phi[di])
            self.gamma[di,:] = np.sum(cnt * self.phi[di], 1) + self.alpha
            new_beta[:, words] += (cnt * self.phi[di])

        self.beta = new_beta / np.sum(new_beta, 1)[:,np.newaxis]
def _fit_s(D, a0, logp, tol=1e-7, maxiter=1000):
    '''Assuming a fixed mean for Dirichlet distribution, maximize likelihood
    for preicision a.k.a. s'''
    N, K = D.shape
    s1 = a0.sum()
    m = a0 / s1
    mlogp = (m*logp).sum()
    for i in xrange(maxiter):
        s0 = s1
        g = psi(s1) - (m*psi(s1*m)).sum() + mlogp
        h = _trigamma(s1) - ((m**2)*_trigamma(s1*m)).sum()

        if g + s1 * h < 0:
            s1 = 1/(1/s0 + g/h/(s0**2))
        if s1 <= 0:
            s1 = s0 * exp(-g/(s0*h + g)) # Newton on log s
        if s1 <= 0:
            s1 = 1/(1/s0 + g/((s0**2)*h + 2*s0*g)) # Newton on 1/s
        if s1 <= 0:
            s1 = s0 - g/h # Newton
        if s1 <= 0:
            raise Exception('Unable to update s from {}'.format(s0))

        a = s1 * m
        if abs(s1 - s0) < tol:
            return a

    raise Exception('Failed to converge after {} iterations, s is {}'
            .format(maxiter, s1))
def rice_homomorf_est(image, SNR = 0, LPF = 4.8, mode = 2, config = build_default()):
    window_size = config['ex_window_size']
    (M2, Sigma_n) = em_ml_rice2D(image, config['ex_iterations'], [window_size, window_size])
    Sigma_n2 = lpf(Sigma_n, config['lpf_f_SNR'])
    M1 = filter2B(image, numpy.ones((5, 5)) / 25)

    if (SNR.shape[0] == 1) and SNR == 0:
        SNR = M2 / Sigma_n

    Rn = abs(image - M1)
    lRn = numpy.log(Rn * (Rn != 0) + 0.001 * (Rn == 0))
    LPF2 = lpf(lRn, LPF)
    Mapa2 = numpy.exp(LPF2)
    MapaG = Mapa2 * 2 / numpy.sqrt(2) * numpy.exp(-special.psi(1)/2.)

    LocalMean = 0
    if mode == 1:
        LocalMean = M1
    elif mode == 2:
        LocalMean = M2

    Rn = numpy.abs(image - LocalMean)
    lRn = numpy.log(Rn * (Rn != 0) + 0.001 * (Rn == 0))
    LPF2 = lpf(lRn, LPF)
    Fc1 = correct_rice_gauss(SNR)
    LPF1 = LPF2 - Fc1
    LPF1 = lpf(LPF1, config['lpf_f_Rice'], 2.0)
    Mapa1 = exp(LPF1)
    MapaR = Mapa1*2/numpy.sqrt(2)*numpy.exp(-special.psi(1)/2.)
    return MapaR, MapaG
Example #16
0
def chaowangjost(counts):
    """Entropy calculation using Chao, Wang, Jost correction.
    doi: 10.1111/2041-210X.12108

    Parameters
    ----------
    counts : list
        bin counts

    Returns
    -------
    entropy : float
    """
    n_samples = npsum(counts)
    bcbc = bincount(counts.astype(int))
    if len(bcbc) < 3:
        return grassberger(counts)
    if bcbc[2] == 0:
        if bcbc[1] == 0:
            A = 1.
        else:
            A = 2. / ((n_samples - 1.) * (bcbc[1] - 1.) + 2.)
    else:
        A = 2. * bcbc[2] / ((n_samples - 1.) * (bcbc[1] - 1.) +
                            2. * bcbc[2])
    pr = arange(1, int(n_samples))
    pr = 1. / pr * (1. - A) ** pr
    entropy = npsum(counts / n_samples * (psi(n_samples) -
                    nan_to_num(psi(counts))))

    if bcbc[1] > 0 and A != 1.:
        entropy += nan_to_num(bcbc[1] / n_samples *
                              (1 - A) ** (1 - n_samples *
                                          (-log(A) - npsum(pr))))
    return entropy
Example #17
0
def entropy(X, k=1):
    ''' Returns the entropy of the X.
    Parameters
    ===========
    X : array-like, shape (n_samples, n_features)
        The data the entropy of which is computed
    k : int, optional
        number of nearest neighbors for density estimation
    Notes
    ======
    Kozachenko, L. F. & Leonenko, N. N. 1987 Sample estimate of entropy
    of a random vector. Probl. Inf. Transm. 23, 95-101.
    See also: Evans, D. 2008 A computationally efficient estimator for
    mutual information, Proc. R. Soc. A 464 (2093), 1203-1215.
    and:
    Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual
    information. Phys Rev E 69(6 Pt 2):066138.
    '''

    # Distance to kth nearest neighbor
    r = nearest_distances(X, k) # squared distances
    n, d = X.shape
    volume_unit_ball = (pi**(.5*d)) / gamma(.5*d + 1)
    '''
    F. Perez-Cruz, (2008). Estimation of Information Theoretic Measures
    for Continuous Random Variables. Advances in Neural Information
    Processing Systems 21 (NIPS). Vancouver (Canada), December.
    return d*mean(log(r))+log(volume_unit_ball)+log(n-1)-log(k)
    '''
    return (d*np.mean(np.log(r + np.finfo(X.dtype).eps))
            + np.log(volume_unit_ball) + psi(n) - psi(k))
Example #18
0
def knn_mutinf(x, y, k=None, boxsize=None):
    """Entropy calculation

    Parameters
    ----------
    x : array_like, shape = (n_samples, n_dim)
        Independent variable
    y : array_like, shape = (n_samples, n_dim)
        Independent variable
    k : int
        Number of bins.
    boxsize : float (or None)
        Wrap space between [0., boxsize)
    Returns
    -------
    mi : float
    """

    data = hstack((x, y))

    k = k if k else max(3, int(data.shape[0] * 0.01))

    # Find nearest neighbors in joint space, p=inf means max-norm
    dvec = nearest_distances(data, k=k)
    a, b, c, d = (
        avgdigamma(atleast_2d(x).reshape(data.shape[0], -1), dvec),
        avgdigamma(atleast_2d(y).reshape(data.shape[0], -1), dvec),
        psi(k),
        psi(data.shape[0]),
    )
    return -a - b + c + d
Example #19
0
def _e_log_beta(c0,d0,c,d):
    ''' Calculates expectation of log pdf of beta distributed parameter'''
    log_C    = gammaln(c0 + d0) - gammaln(c0) - gammaln(d0)
    psi_cd   = psi(c+d)
    log_mu   = (c0 - 1) * ( psi(c) - psi_cd )
    log_i_mu = (d0 - 1) * ( psi(d) - psi_cd )
    return np.sum(log_C + log_mu + log_i_mu)
Example #20
0
 def _update_resps(self, X, alphaK, *args):
     '''
     Updates distribution of latent variable with Dirichlet prior
     '''
     e_log_weights = psi(alphaK) - psi(np.sum(alphaK))
     return self._update_resps_parametric(X,e_log_weights,self.n_components,
                                          *args)
Example #21
0
File: lda.py Project: laiguokun/LDA
def compute_mle(d):
	global alpha,beta,Gamma,Phi,doc,timer,doc_cnt;
	res = 0;
	res += sp.gammaln(K * alpha);
	res -= K * sp.gammaln(alpha);
	gamma_sum = sum_vector(Gamma[d]);
	length = len(doc[d]);
	psi = [];

	for i in range(K):
		psi.append(sp.psi(Gamma[d][i]) - sp.psi(gamma_sum));

	for i in range(K):
		res += (alpha - 1) * psi[i];
		res += sp.gammaln(Gamma[d][i]);
		res -= (Gamma[d][i] - 1) * psi[i];

	now = time.time();

	for n in range(length):
		for i in range(K):
			res += doc_cnt[d][n] * Phi[d][n][i] * psi[i];
#			res -= doc_cnt[d][n] * Phi[d][n][i] * math.log(Phi[d][n][i]);
			res += doc_cnt[d][n] * Phi[d][n][i] * math.log(beta[i][doc[d][n]]/Phi[d][n][i]);

	timer += time.time() - now;
	res -= sp.gammaln(gamma_sum);
	return res;
Example #22
0
    def update_Z(self, corpus, iter):
        lb = 0
        bp = self.beta * self.p

        corpus.A = bp + corpus.phi_doc
        # taylor approximation on E[\sum lnZ]
        xi = np.sum(corpus.A / corpus.B, 1)
        E_inv_w = np.zeros([corpus.M, corpus.K])
        ln_E_w = np.zeros([corpus.M, corpus.K])
        for mi in xrange(corpus.M):
            E_inv_w[mi, :] = np.prod((corpus.w_A / corpus.w_B)[corpus.R[mi, :] == 1, :], 0)
            ln_E_w[mi, :] = np.sum((np.log(corpus.w_B) - psi(corpus.w_A)) * corpus.R[mi, :][:, np.newaxis], 0)

        if iter < self.hdp_init_step:
            corpus.B = 1.0 + (corpus.Nm / xi)[:, np.newaxis]
        else:
            corpus.B = E_inv_w + (corpus.Nm / xi)[:, np.newaxis]

        # expectation of p(Z)
        lb += np.sum(
            -bp * ln_E_w + (bp - 1) * (psi(corpus.A) - np.log(corpus.B)) - E_inv_w * (corpus.A / corpus.B) - gammaln(bp)
        )

        # entropy of q(Z)
        lb -= np.sum(
            corpus.A * np.log(corpus.B)
            + (corpus.A - 1) * (psi(corpus.A) - np.log(corpus.B))
            - corpus.A
            - gammaln(corpus.A)
        )
        if self.is_verbose:
            print "p(z)-q(z) %f" % lb
        return lb
Example #23
0
def knn_entropy(*args, k=None, boxsize=None):
    """Entropy calculation

    Parameters
    ----------
    args : numpy.ndarray, shape = (n_samples, ) or (n_samples, n_dims)
        Data of which to calculate entropy. Each array must have the same
        number of samples.
    k : int
        Number of bins.
    boxsize : float (or None)
        Wrap space between [0., boxsize)
    Returns
    -------
    entropy : float
    """
    data = vstack((args)).T
    n_samples = data.shape[0]
    k = k if k else max(3, int(data.shape[0] * 0.01))
    n_dims = data.shape[1]

    nneighbor = nearest_distances(data, k=k)
    const = psi(n_samples) - psi(k) + n_dims * log(2)

    return (const + n_dims * log(nneighbor).mean())
Example #24
0
    def update_beta(self, corpus):
        ElogW = np.log(corpus.w_B) - psi(corpus.w_A)
        lnZ = psi(corpus.A) - np.log(corpus.B)

        first = np.zeros([corpus.M, self.K])
        for mi in xrange(corpus.M):
            first[mi, :] = -self.p * np.sum(ElogW[corpus.R[mi, :] == 1, :], 0)
        # first_sum = np.sum(first)
        # second = np.sum(lnZ * self.p)

        # for i in xrange(1):
        #     last = - corpus.M * np.sum(self.p*psi(self.beta * self.p))
        #     gradient = first_sum + second + last
        #     gradient /= corpus.M * np.sum(self.p * self.p * psi(self.beta*self.p))
        #     step = self.getstepBeta(gradient, self.beta, first, lnZ, self.p, corpus)
        #     self.beta += step*gradient

        # since beta does not change a lot, this way is more efficient
        candidate = np.linspace(-1, 1, 31)
        f = np.zeros(len(candidate))
        for i in xrange(len(candidate)):
            step = candidate[i]
            new_beta = self.beta + self.beta * step
            if new_beta < 0:
                f[i] = -np.inf
            else:
                bp = new_beta * self.p
                f[i] = np.sum(new_beta * first) + np.sum(bp * lnZ) - np.sum(corpus.M * gammaln(bp))
        best_idx = f.argsort()[-1]
        maxstep = candidate[best_idx]
        self.beta += self.beta * maxstep

        if self.is_verbose:
            print "new beta = %.2f, %.2f" % (self.beta, candidate[best_idx])
Example #25
0
def e_step_one_iter(alpha, beta, docs, phi, ips):
    M, K = docs.size, alpha.size


    for m in xrange(M):
        N_m = docs[m].size
        psi_sum_ips = psi(ips[m, :].sum())
        for n in xrange(N_m):
            for i in xrange(K):
                E_q = psi(ips[m, i]) - psi_sum_ips
                phi[m][n, i] = (beta[i, docs[m][n]] *
                                np.exp(E_q))
        phi[m] /= phi[m].sum(axis=1)[:, None]  # normalize phi
        ips[m] = alpha + phi[m].sum(axis=0)


    # gradient computation
    grad_ips = np.zeros(ips.shape, dtype=np.float64)
    for m in xrange(M):
        for i in xrange(K):
            grad_ips[m, i]\
                = (polygamma(1, ips[m, i]) * (alpha[i] + phi[m][:, i].sum() - ips[m, i]) -
                   polygamma(1, ips[m, :].sum()) * (alpha.sum() + phi[m].sum() - ips[m, :].sum()))

    return (phi, ips, grad_ips)
Example #26
0
def dkl_wishart(a1,B1,a2,B2):
    """
    returns the KL divergence bteween two Wishart distribution of
    parameters (a1,B1) and (a2,B2),
    where a1 and a2 are degrees of freedom
    B1 and B2 are scale matrices
    """
    from scipy.special import psi,gammaln
    from numpy.linalg import det,pinv
    tiny = 1.e-15
    # fixme: check size
    dim = B1.shape[0]
    d1 = max(det(B1),tiny)
    d2 = max(det(B2),tiny)
    lgc = dim*(dim-1)*np.log(np.pi)/4
    lg1 = lgc
    lg2 = lgc
    lw1 = -np.log(d1) + dim*np.log(2)
    lw2 = -np.log(d2) + dim*np.log(2)
    for i in range(dim):
        lg1 += gammaln((a1-i)/2)
        lg2 += gammaln((a2-i)/2)
        lw1 += psi((a1-i)/2)
        lw2 += psi((a2-i)/2)
    lz1 = 0.5*a1*dim*np.log(2)-0.5*a1*np.log(d1)+lg1
    lz2 = 0.5*a2*dim*np.log(2)-0.5*a2*np.log(d2)+lg2
    dkl = (a1-dim-1)*lw1-(a2-dim-1)*lw2-a1*dim
    dkl += a1*np.trace(np.dot(B2,pinv(B1)))
    dkl /=2
    dkl += (lz2-lz1)
    return dkl
    def get_vlb(self):
        vlb = 0

        # Get the VLB of the expected class assignments
        E_ln_m = self.mf_expected_log_m()
        for n in xrange(self.N):
            # Add the cross entropy of p(c | m)
            vlb += Discrete().negentropy(E_x=self.mf_m[n,:], E_ln_p=E_ln_m)

            # Subtract the negative entropy of q(c)
            vlb -= Discrete(self.mf_m[n,:]).negentropy()

        # Get the VLB of the connection probability matrix
        # Add the cross entropy of p(p | tau1, tau0)
        vlb += Beta(self.tau1, self.tau0).\
            negentropy(E_ln_p=(psi(self.mf_tau1) - psi(self.mf_tau0 + self.mf_tau1)),
                       E_ln_notp=(psi(self.mf_tau0) - psi(self.mf_tau0 + self.mf_tau1))).sum()

        # Subtract the negative entropy of q(p)
        vlb -= Beta(self.mf_tau1, self.mf_tau0).negentropy().sum()

        # Get the VLB of the block probability vector, m
        # Add the cross entropy of p(m | pi)
        vlb += Dirichlet(self.pi).negentropy(E_ln_g=self.mf_expected_log_m())

        # Subtract the negative entropy of q(m)
        vlb -= Dirichlet(self.mf_pi).negentropy()

        for c1 in xrange(self.C):
            for c2 in xrange(self.C):
                vlb += self.weight_models[c1][c2].get_vlb()

        return vlb
Example #28
0
    def compute_moments_and_cgf(self, phi, mask=True):
        r"""
        Compute the moments and :math:`g(\phi)`.

        .. math::

           \overline{\mathbf{u}}  (\boldsymbol{\phi})
           &=
           \begin{bmatrix}
             \psi(\phi_1) - \psi(\sum_d \phi_{1,d})
           \end{bmatrix}
           \\
           g_{\boldsymbol{\phi}} (\boldsymbol{\phi})
           &=
           TODO
        """
        sum_gammaln = np.sum(special.gammaln(phi[0]), axis=-1)
        gammaln_sum = special.gammaln(np.sum(phi[0], axis=-1))
        psi_sum = special.psi(np.sum(phi[0], axis=-1, keepdims=True))
        
        # Moments <log x>
        u0 = special.psi(phi[0]) - psi_sum
        u = [u0]
        # G
        g = gammaln_sum - sum_gammaln

        return (u, g)
Example #29
0
  def estimate_abundances(self):
    """
    Compute expectations and variances of the log relative abundances (log rho)
    of each target. Use these to compute 95% confidence intervals of the relative
    abundances themselves.
    """
    log_theta = np.zeros(self.ntargs)
    sd_log_theta = np.zeros(self.ntargs)
    for t in xrange(self.ntargs):
      log_theta[t] = psi(self.alpha[t]) - psi(self.alpha[t]+self.beta[t])
      var_log_theta = polygamma(1,self.alpha[t]) - polygamma(1,
         self.alpha[t]+self.beta[t])
      for j in xrange(t):
        log_theta[t] += psi(self.beta[j]) - psi(self.alpha[j]+self.beta[j])
        var_log_theta += polygamma(1,self.beta[j]) - polygamma(1,
           self.alpha[j]+self.beta[j])
      sd_log_theta[t] = sqrt(var_log_theta)
    self.log_theta = log_theta
    self.sd_log_theta = sd_log_theta
    theta_ci_low = np.zeros(self.ntargs)
    theta_ci_hi = np.zeros(self.ntargs)
    for t in xrange(self.ntargs):
      self.targ_samp_prob[t] = exp(log_theta[t])
      theta_ci_low[t] = exp(log_theta[t] - ci95sd * sd_log_theta[t])
      theta_ci_hi[t] = exp(log_theta[t] + ci95sd * sd_log_theta[t])

    # Compute relative abundances and confidence limits
    w = self.targ_samp_prob / self.eff_len
    self.rho = w / sum(w)
    w_low = theta_ci_low / self.eff_len
    self.rho_ci_low = w_low / sum(w_low)
    w_hi = theta_ci_hi / self.eff_len
    self.rho_ci_hi = w_hi / sum(w_hi)
Example #30
0
def dirichlet_expectation(alpha):
    """
    For a vector theta ~ Dir(alpha), computes E[log(theta)] given alpha.
    """
    if (len(alpha.shape) == 1):
        return(psi(alpha) - psi(n.sum(alpha)))
    return(psi(alpha) - psi(n.sum(alpha, 1))[:, n.newaxis])
Example #31
0
def update_alpha_fp(alpha, theta, sentence_subj, tol=1e-12):
# Fixed point method in [Minka00]
    K = np.size(alpha, 0)
    M, S = np.shape(theta)

    for k in xrange(K):
        theta_k = theta[sentence_subj == k, :]
        log_p = 1.0 / M * np.sum(np.log(theta_k), 0)
        print log_p
        while True:
            oldnorm = np.linalg.norm(alpha[k])
            alpha[k] = inversepsi(psi(np.sum(alpha[k])) + log_p)
            if abs(np.linalg.norm(alpha[k]) - oldnorm) < tol:
                break

    return alpha
Example #32
0
 def findAlphaBeta(self):
     # ADJUST ALPHA AND BETA BY USING MINKA'S FIXED-POINT ITERATION
     numerator = 0
     denominator = 0
     for d in range(self.DOCS):
         numerator += psi(self.cntDT[d] + self.alpha) - psi(self.alpha)
         denominator += psi(np.sum(self.cntDT[d] + self.alpha)) - psi(
             np.sum(self.alpha))
     self.alpha *= numerator / denominator  # UPDATE ALPHA
     numerator = 0
     denominator = 0
     for z in range(self.TOPICS):
         numerator += np.sum(
             psi(self.cntTW[z] + self.beta) - psi(self.beta))
         denominator += psi(np.sum(self.cntTW[z] + self.beta)) - psi(
             self.VOCABS * self.beta)
     self.beta = (self.beta * numerator) / (self.VOCABS * denominator
                                            )  # UPDATE BETA
    def _expec_s(self):
        if not self.use_svi:
            return super(GPClassifierSVI, self)._expec_s()

        self.old_s = self.s
        invK_mm_expecFF = self.invK_mm.dot(
            self.uS + self.um_minus_mu0.dot(self.um_minus_mu0.T))
        self.rate_s = self.rate_s0 + 0.5 * np.trace(invK_mm_expecFF)
        # Update expectation of s. See approximations for Binary Gaussian Process Classification, Hannes Nickisch
        self.s = self.shape_s / self.rate_s
        self.Elns = psi(self.shape_s) - np.log(self.rate_s)
        if self.verbose:
            logging.debug("Updated inverse output scale: " + str(self.s))

        self.Ks_mm = self.K_mm / self.s
        self.invKs_mm = self.invK_mm * self.s
        self.Ks_nm = self.K_nm / self.s
Example #34
0
 def _init_component(self, m, dim):
     assert self.mode_dims[m] == dim
     K = self.n_components
     s = self.smoothness
     if not self.debug:
         gamma_DK = s * rn.gamma(s, 1. / s, size=(dim, K))
         delta_DK = s * rn.gamma(s, 1. / s, size=(dim, K))
     else:
         gamma_DK = s * np.ones((dim, K))
         delta_DK = s * np.ones((dim, K))
     self.gamma_DK_M[m] = gamma_DK
     self.delta_DK_M[m] = delta_DK
     self.E_DK_M[m] = gamma_DK / delta_DK
     self.sumE_MK[m, :] = self.E_DK_M[m].sum(axis=0)
     self.G_DK_M[m] = np.exp(sp.psi(gamma_DK) - np.log(delta_DK))
     if m == 0 or not self.debug:
         self.beta_M[m] = 1. / self.E_DK_M[m].mean()
Example #35
0
 def update(self, X):
     Y = np.zeros(self.K)
     XY = np.zeros((self.K, self.N))
     for x in X:
         L = np.array([
             psi(self.phi[k]) - self.tau[k]**(-1) -
             ((x - self.mu[k])**2).sum() / 2 for k in range(self.K)
         ])
         y = np.exp(L) / np.exp(L).sum()
         Y += y
         XY += np.array([x * y[k] for k in range(self.K)])
     self.phi = self.phi + Y
     self.mu = np.array([
         (self.tau[k] * self.mu[k] + XY[k]) / (self.tau[k] + Y[k])
         for k in range(self.K)
     ])
     self.tau = self.tau + Y
Example #36
0
    def compute_likelihood(self, u, gamma, digamma_gamma, gammaSum, phiO, phiD,
                           phiT, betaO, betaD, betaT, docs, idx_corpus_o,
                           idx_corpus_d, idx_corpus_t):
        J = self.J
        K = self.K
        L = self.L
        alpha = self.alpha

        likelihood = 0
        digsum = psi(gammaSum)
        likelihood = loggamma(
            alpha * J * K * L) - J * K * L * loggamma(alpha) - (
                loggamma(gammaSum))  # 1.1, 1.2, 1.3

        for j in range(J):
            for k in range(K):
                for l in range(L):
                    likelihood += (alpha - 1) * (
                        digamma_gamma[j, k, l] - digsum) + loggamma(
                            gamma[u, j, k, l]) - (gamma[u, j, k, l] - 1) * (
                                digamma_gamma[j, k, l] - digsum
                            )  # 2.1, 2.2, 2.3
                    for w in range(len(idx_corpus_o[u])
                                   ):  #  int(docs.iloc[u]['wordcount'])
                        if phiO[w, j] > 0 and phiD[w, k] > 0 and phiT[w,
                                                                      l] > 0:
                            likelihood += phiO[w,
                                               j] * phiD[w, k] * phiT[w, l] * (
                                                   digamma_gamma[j, k, l] -
                                                   digsum)  # 3.1
        for j in range(self.J):
            for wo in range(len(idx_corpus_o[u])):
                if phiO[wo, j] > 0:
                    likelihood += -phiO[wo, j] * math.log(phiO[wo, j]) + phiO[
                        wo, j] * betaO[j, idx_corpus_o[u][wo]]  # 3.2 O; 3.3 O
        for k in range(self.K):
            for wd in range(len(idx_corpus_d[u])):
                if phiD[wd, k] > 0:
                    likelihood += -phiD[wd, k] * math.log(phiD[wd, k]) + phiD[
                        wd, k] * betaD[k, idx_corpus_d[u][wd]]  # 3.2 D; 3.3 D
        for l in range(self.L):
            for wt in range(len(idx_corpus_t[u])):
                if phiT[wt, l] > 0:
                    likelihood += -phiT[wt, l] * math.log(phiT[wt, l]) + phiT[
                        wt, l] * betaT[l, idx_corpus_t[u][wt]]  # 3.2 T; 3.3 T
        return likelihood
Example #37
0
 def wishpart(self, k):
     part1 = sum(
         [psi((self.Vr[k] + 1 - d) / 2) for d in range(1, self.D + 1)])
     part1 += self.D * np.log(2) + np.log(np.linalg.det(self.VW[k]))
     part1 *= (self.r - self.Vr[k]) / 2
     part2 = np.linalg.inv(self.VW[k]) - np.linalg.inv(self.W)
     part2 = np.dot(part2, self.Vr[k] * self.VW[k])
     part2 = 0.5 * np.trace(part2)
     part3 = -(self.r / 2) * np.log(np.linalg.det(self.W))
     part4 = (self.Vr[k] / 2) * np.log(np.linalg.det(self.VW[k]))
     part5 = (self.Vr[k] - self.r) * (self.D / 2) * np.log(2)
     part6 = sum(
         [loggamma((self.Vr[k] + 1 - d) / 2) for d in range(1, self.D + 1)])
     part6 -= sum(
         [loggamma((self.r + 1 - d) / 2) for d in range(1, self.D + 1)])
     res = part1 + part2 + part3 + part4 + part5 + part6
     return res
Example #38
0
def _fixedpoint(D, tol=1e-7, maxiter=None):
    '''Simple fixed point iteration method for MLE of Dirichlet distribution'''
    N, K = D.shape
    logp = log(D).mean(axis=0)
    a0 = _init_a(D)

    # Start updating
    if maxiter is None:
        maxiter = sys.maxint
    for i in xrange(maxiter):
        a1 = _ipsi(psi(a0.sum()) + logp)
        # if norm(a1-a0) < tol:
        if abs(loglikelihood(D, a1)-loglikelihood(D, a0)) < tol: # much faster
            return a1
        a0 = a1
    raise Exception('Failed to converge after {} iterations, values are {}.'
                    .format(maxiter, a1))
Example #39
0
 def predictFactor(self):
     """Predict expected factor values from prior parameters"""
     for conditioner in self.conditionerRanges:
         
         nu=self.pseudoCounts[conditioner]
         
         fsum=0.0
         
         ccond=tuple()
         if conditioner != (None,):
             ccond=conditioner
 
         for condrv in self.conditionedRanges:
             self.factor[condrv+ccond]=np.exp(spf.psi(nu*self.naturalParams[(condrv,conditioner)])-spf.psi(nu))
             fsum+=self.factor[condrv+ccond]
         for condrv in self.conditionedRanges:
             self.factor[condrv+ccond]/=fsum
Example #40
0
def invpsi(x):
    r"""
    Inverse digamma (psi) function.

    The digamma function is the derivative of the log gamma function.
    This calculates the value Y > 0 for a value X such that digamma(Y) = X.

    See: http://www4.ncsu.edu/~pfackler/
    """
    L = 1.0
    y = np.exp(x)
    while (L > 1e-10):
        y += L * np.sign(x - special.psi(y))
        L /= 2
    # Ad hoc by Jaakko
    y[x < -10] = -1 / x[x < -10]
    return y
Example #41
0
    def update_alpha_beta(self):

        # Update Beta
        x = 0
        y = 0
        for z in range(self.TOPICS):
            x += np.sum(psi(self.cntTW[z] + self.beta) - psi(self.beta))
            y += psi(np.sum(self.cntTW[z] + self.beta)) - psi(
                self.VOCABS * self.beta)
        self.beta = (self.beta * x) / (self.VOCABS * y)  # UPDATE BETA

        # Update Alpha
        x = 0
        y = 0
        for d in range(self.DOCS):
            y += psi(np.sum(self.cntDT[d] + self.alpha)) - psi(
                np.sum(self.alpha))
            x += psi(self.cntDT[d] + self.alpha) - psi(self.alpha)
        self.alpha *= x / y  # UPDATE ALPHA
Example #42
0
    def _expec_lnPi(self, posterior=True):
        self.expec_responsibilities()
        self.expec_weights()

        # check if E_t has been initialised. Only update alpha if it has. Otherwise E[lnPi] is given by the prior
        if np.any(self.E_t) and posterior:
            self._post_Alpha()
        sumAlpha = np.sum(self.alpha, 1)
        psiSumAlpha = psi(sumAlpha)
        for j in range(self.nclasses):
            for s in range(self.nscores):
                self.lnPi[:,
                          s, :] = (psi(self.alpha[:, s, :]) -
                                   psiSumAlpha)[np.newaxis, :]  #.dot(self.r)

        # need to update the cluster pseudo-count distributions first to get new expected eta and beta
        #translate \eta and \beta to \alpha.
        worker_counts = self.alpha - self.alpha_tr  #the counts for each worker
        self.a = np.zeros((self.nclasses, self.nscores, self.nclusters))
        self.b = np.zeros((self.nclasses, self.nscores, self.nclusters))

        for j in range(self.nclasses):
            # v_j^(k) ~ Beta( \beta_j^{q_k} ), where q_k is the cluster ID of worker k
            logv_j = psi(self.beta[j, :, :].dot(self.r.T)) - psi(
                self.beta[j, :, :].dot(self.r.T) +
                np.sum(worker_counts[j, :, :], axis=0)[np.newaxis, :])

            # s^(k)_{j, l} ~ Antoniak( n^(k)_{j, l}, \beta_j^{q_k} \eta_{j, l}^{q_k} )
            #The exact computation of the expected number of tables is given in:
            # A Note on the Implementation of Hierarchical Dirichlet Processes, Phil Blunsom et al.
            #The antoniak distribution is explained in: Distributed Algorithms for Topic Models, David Newman et al.
            s_j = np.zeros((self.nscores, self.nclusters))
            for l in range(self.nscores):
                counts = worker_counts[j, l, :][:, np.newaxis]
                conc = (self.beta[j, 0, :] * self.eta[j, l, :])[np.newaxis, :]
                # For the updates to eta and beta, we take an expectation of ln p(s^(k)_{j, l}) over cluster membership of k by
                # computing s^(k) using a weighted sum with weights p(q_k = m)
                # -- this follows from the equations in Moreno and Teh
                s_jl = conc * (psi(conc + counts) - psi(conc))  # nclusters x K
                s_j[l, :] = np.sum(s_jl * self.r, axis=0)

            # \eta_j^(m) ~ Dir( sum_{ k where q_k=m } s^(k)_{j, .} + \phi_j \gamma_j )
            # We need to determine expectation of \eta
            self.phigamma[
                j, :, :] = s_j + self.phi0[j, :, :] * self.gamma0[j, :, :]
            self.eta[j, :, :] = self.phigamma[j, :, :] / np.sum(
                self.phigamma[j, :, :], axis=0)[np.newaxis, :]

            # \beta_j^(k) ~ Gamma( sum_{k where q_k=m} sum_{l} s_{j, l}^(k) + a_j, b_j - sum_{k where q_k=m} log(v_{j}^(k) ) )
            # we need expectation of beta
            self.a[j, :, :] = np.sum(s_j, axis=0) + self.a0[j]
            self.b[j, :, :] = self.b0[j] - logv_j.dot(self.r)
        self.beta = self.a / self.b
    def partialLogL_alt(self, problem, allpars, fitIndex):
        """
        Return the partial derivative of log( likelihood ) to the parameters.

        Parameters
        ----------
        problem : Problem
            to be solved
        allpars : array_like
            parameters of the problem
        fitIndex : array_like
            indices of parameters to be fitted

        """
        self.ncalls += 1

        scale = allpars[-2]
        power = allpars[-1]
        res = problem.residuals(allpars[:-2])

        ars = numpy.abs(res / scale)
        rsp = numpy.power(ars, power)
        if problem.weights is not None:
            rsp = rsp * problem.weights

        dLdm = power * rsp / res
        dM = problem.partial(allpars[:-2])

        dL = numpy.zeros(len(fitIndex), dtype=float)
        i = 0
        for k in fitIndex:
            if k >= 0:
                dL[i] = numpy.sum(dLdm * dM[:, k])
                i += 1
            elif k == -2:
                dL[-2] = -problem.sumweight / scale + power * numpy.sum(
                    rsp) / scale
            else:
                # special.psi( x ) is the same as special.polygamma( 1, x )
                dldp = problem.sumweight * (power + special.psi(1.0 / power))
                dldp /= (power * power)
                dldp -= (numpy.sum(rsp * numpy.log(ars)))
                dL[-1] = dldp

        return dL
    def nextPartialData(self, problem, allpars, fitIndex, mockdata=None):
        """
        Return the partial derivative of all elements of the log( likelihood )
        to the parameters.

        Parameters
        ----------
        problem : Problem
            to be solved
        allpars : array_like
            parameters of the problem
        fitIndex : array_like
            indices of parameters to be fitted
        mockdata : array_like
            as calculated by the model

        """
        param = allpars[:-2]
        res = problem.residuals(param, mockdata=mockdata)
        scale = allpars[-2]
        power = allpars[-1]

        ars = numpy.abs(res / scale)
        rsp = numpy.power(ars, power)
        if problem.weights is not None:
            rsp = rsp * problem.weights
            wgt = problem.weights
        else:
            wgt = 1.0

        dLdm = power * rsp / res
        dM = problem.partial(param)
        ##      TBD import mockdata into partial
        #        dM = problem.partial( param, mockdata=mockdata )

        # special.psi( x ) is the same as special.polygamma( 1, x )
        dlp = wgt * (power + special.psi(1.0 / power)) / (power * power)

        for k in fitIndex:
            if k >= 0:
                yield (dLdm * dM[:, k])
            elif k == -2:
                yield (power * rsp - wgt) / scale
            else:
                yield dlp - rsp * numpy.log(ars)
def _update_em_full(X, L, F, a, fix):
    fix_l, fix_f, fix_a = fix

    e = sys.float_info.min
    ## update a
    if not fix_a:
        LFt = L @ F.T
        start = time.time()
        # res = minimize(_obj_a, 1, method='nelder-mead',args = (X,LFt, a),
        #               options={'xtol': 1e-5, 'disp': False, 'maxiter':10})

        I, J = X.shape
        C1 = np.sum(psi(X + a.reshape(1, -1)) - log(LFt + a.reshape(1, -1)),
                    axis=0)
        C2 = np.sum((X + a.reshape(1, -1)) / (LFt + a.reshape(1, -1)), axis=0)
        params = [I, J, C1, C2]

        res = minimize(_obj_a,
                       a,
                       method='nelder-mead',
                       args=(params),
                       options={
                           'xtol': 1e-5,
                           'disp': False,
                           'maxiter': 50
                       })
        # res = minimize(_obj_a, 1, method='Newton-CG', jac=_obj_a_der,args = (params),
        # 			   options={'xtol': 1e-5, 'disp': False, 'maxiter':10})
        runtime = time.time() - start
        a = res.x
    ## update L
    if not fix_l:
        #LFt = L @ F.T
        M1 = (X / LFt) @ F
        M2 = ((X + a.reshape(1, -1)) / (LFt + a.reshape(1, -1))) @ F
        L = L * (M1 / M2)
        L = np.clip(L, a_min=e, a_max=None)
    ## update F
    if not fix_f:
        LFt = L @ F.T
        N1 = (X / LFt).T @ L
        N2 = ((X + a.reshape(1, -1)) / (LFt + a.reshape(1, -1))).T @ L
        F = F * (N1 / N2)
        F = np.clip(F, a_min=e, a_max=None)
    return L, F, a
Example #46
0
    def predict_features(self, features):
        # get the expected log word likelihoods of each token
        self.features = np.array(features)
        # compute ERho
        ElnRho = []
        for j in range(self.L):
            ElnL = np.sum(psi(
                (self.nu[j] + 1 + np.arange(1, self.D + 1)) /
                2)) + self.D * np.log(2.) + np.log(np.linalg.det(self.W[j]))
            Ecov = self.D / self.beta[j] + self.nu[j] * (
                self.features - self.m[j][None, :]) @ self.W[j] @ (
                    self.features - self.m[j][None, :]).T
            ElnRho.append(ElnL - self.D / 2.0 * np.log(2 * np.pi) - 0.5 * Ecov)
        lnptext_given_t = np.array(ElnRho).T
        lnptext_given_t -= logsumexp(lnptext_given_t, axis=1)[:, None]
        self.ElnRho = lnptext_given_t

        return lnptext_given_t  # N x nclasses where N is number of tokens/data points
Example #47
0
def uniform_divergence(x, tx, m=2):
    x = normalize(x, tx)
    cx = Counter(x)
    xk = np.array(cx.keys(), dtype=float)
    xk.sort()
    delta = np.zeros(len(xk))
    if len(xk) > 1:
        delta[0] = xk[1] - xk[0]
        delta[1:-1] = (xk[m:] - xk[:-m]) / m
        delta[-1] = xk[-1] - xk[-2]
    else:
        delta = np.array(np.sqrt(12))
    counter = np.array([cx[i] for i in xk], dtype=float)
    delta = delta / np.sum(delta)
    hx = np.sum(counter * np.log(counter / delta)) / len(x)
    hx -= np.log(len(x))
    hx += (psi(m) - np.log(m))
    return hx
Example #48
0
    def estimation(self, y):
        """ Estimate Shannon entropy.
        
        Parameters
        ----------
        y : (number of samples, dimension)-ndarray
            One row of y corresponds to one sample.
    
        Returns
        -------
        h : float
            Estimated Shannon entropy.
            
        References
        ----------
        M. N. Goria, Nikolai N. Leonenko, V. V. Mergel, and P. L. Novi 
        Inverardi. A new class of random vector entropy estimators and its 
        applications in testing statistical hypotheses. Journal of 
        Nonparametric Statistics, 17: 277-297, 2005. (S={k})
        
        Harshinder Singh, Neeraj Misra, Vladimir Hnizdo, Adam Fedorowicz
        and Eugene Demchuk. Nearest neighbor estimates of entropy.
        American Journal of Mathematical and Management Sciences, 23,
        301-321, 2003. (S={k})
        
        L. F. Kozachenko and Nikolai N. Leonenko. A statistical estimate
        for the entropy of a random vector. Problems of Information
        Transmission, 23:9-16, 1987. (S={1})
        
        Examples
        --------
        h = co.estimation(y)

        """

        num_of_samples, dim = y.shape
        distances_yy = knn_distances(y, y, True, self.knn_method, self.k,
                                     self.eps, 2)[0]
        v = volume_of_the_unit_ball(dim)
        distances_yy[:, self.k - 1][distances_yy[:, self.k - 1] == 0] = 1e-6
        h = log(num_of_samples - 1) - psi(self.k) + log(v) + \
            dim * sum(log(distances_yy[:, self.k-1])) / num_of_samples

        return h
    def partialLogL(self, model, parlist, fitIndex):
        """
        Return the partial derivative of log( likelihood ) to the parameters.

        Parameters
        ----------
        model : Model
            model to calculate mock data
        parlist : array_like
            parameters of the problem
        fitIndex : array_like
            indices of the parameters to be fitted

        """
        self.ncalls += 1
        np = model.npchain
        scale = parlist[np]
        power = parlist[np + 1]
        res = self.getResiduals(model, parlist[:np])

        ars = numpy.abs(res / scale)
        rsp = numpy.power(ars, power)
        if self.weights is not None:
            rsp = rsp * self.weights

        dLdm = power * rsp / res
        dM = model.partial(self.xdata, parlist[:np])

        dL = numpy.zeros(len(fitIndex), dtype=float)
        i = 0
        for k in fitIndex:
            if k < np:
                dL[i] = numpy.sum(dLdm * dM[:, k])
            elif k == np:
                dL[i] = -self.sumweight / scale + power * numpy.sum(
                    rsp) / scale
            else:
                # special.psi( x ) is the same as special.polygamma( 1, x )
                dL[i] = self.sumweight * (power + special.psi(1.0 / power))
                dL[i] /= (power * power)
                dL[i] -= (numpy.sum(rsp * numpy.log(ars)))
            i += 1

        return dL
Example #50
0
    def klgamma(self, pa, pb, qa, qb):

        ## The KL distance for the gamma distribution. It is not used, but ported from the MATLAB code.

        n = max([pb.shape[1], pa.shape[1]])

        if pa.shape[1] == 1:
            pa = pa * np.ones((1, n))
        if pb.shape[1] == 1:
            pb = pb * np.ones((1, n))

        qa = qa * np.ones((1, n))
        qb = qb * np.ones((1, n))

        kl = sum(pa * np.log(pb) - gammaln(pa) - qa * np.log(qb) +
                 gammaln(qa) + (pa - qa) * (psi(pa) - np.log(pb)) -
                 (pb - qb) * pa / pb)

        return kl
Example #51
0
    def update_resp(self, Xf):
        """Updates the responsibilities matrix, based on the current
        goodness-of-fit of the classifiers, and the current gating weight
        vectors. Xf is the gateing feature matrix.
        """
        R, cls = self.R, self.cls
        Dy = float(cls[0].W.shape[0])

        # fill R with goodness-of-fit data from classifiers
        for k in xrange(R.shape[1]):
            cl = cls[k]
            tau_ak, tau_bk = cl.tau_ak, cl.tau_bk
            # k'th column is exp( Dy/2 E[ln Tk] - 1/2 (E[Tk] res + Dy var) )
            R[:, k] = exp(0.5 * (Dy * (psi(tau_ak) - log(tau_bk)) -
                                 (tau_ak / tau_bk) * cl.res + Dy * cl.var))
        # multiply with current gating
        R *= self.gating_matrix(Xf)
        # normalise row vectors
        R /= sum(R, 1).reshape(R.shape[0], 1)
Example #52
0
    def _estimate_alpha_beta(self):
        # ADJUST ALPHA AND BETA BY USING MINKA'S FIXED-POINT ITERATION
        numerator = 0
        denominator = 0
        previous_min = np.min(self.alpha)
        for r in range(self.nb_records):
            numerator += psi(self.cnt_rk[r] + self.alpha) - psi(self.alpha)
            denominator += psi(np.sum(self.cnt_rk[r] + self.alpha)) - psi(
                np.sum(self.alpha))
        self.alpha = self.alpha * (numerator / denominator)  # UPDATE ALPHA

        if 0 in self.alpha:  #THIS CASE IS VERY RARE AND HAPPENS WHERE A HIDDEN CLASS K HAS RECEIVED 0 ASSIGNMENTS
            print(
                "|----WARNING: alpha = 0 encountered"
            )  #FORCE THE 0 ALPHAS TO THE MINIMUM BETWEEN (THE SMALLEST NON NULL APLHA, 1/NB_RECORDS,
            self.alpha[self.alpha == 0] = min(
                previous_min, np.min(self.alpha[self.alpha > 0]),
                1.0 / self.nb_records)  #AND THE SMALLEST APLHA BEFORE UPDATE

        for f in range(self.nb_features):
            numerator = 0
            denominator = 0
            previous_min = np.min(self.beta[f])
            for k in range(self.nb_hclass):
                numerator += psi(self.cnt_kv[f][k] + self.beta[f]) - psi(
                    self.beta[f])
                denominator += psi(
                    np.sum(self.cnt_kv[f][k] + self.beta[f])) - psi(
                        np.sum(self.beta[f]))
            self.beta[f] = self.beta[f] * (numerator / denominator
                                           )  # UPDATE BETA

            if 0 in self.beta[
                    f]:  #THIS CASE IS VERY RARE AND HAPPENS WHERE A VALUE DO NOT HAVE ANY OCCURENCE IN THE CORPUS
                print(
                    "|----WARNING: beta = 0 encountered"
                )  #FORCE THE 0 BETAS TO THE MINIMUM BETWEEN (THE SMALLEST NON NULL APLHA, 1/NB_VALUES,
                self.beta[f][self.beta[f] == 0] = min(
                    previous_min, np.min(self.beta[f][self.beta[f] > 0]), 1.0 /
                    self.vocab_size[f])  #AND THE SMALLEST APLHA BEFORE UPDATE