def update_global_params_VB(self, SS, rho=None, mergeCompA=None, mergeCompB=None, sortorder=None, **kwargs): ''' Update global parameters. ''' if mergeCompA is None: # Standard case: # Update via gradient descent. rho, omega = self._find_optimum_rhoomega(SS, **kwargs) else: # Special update case for merges: # Fast, heuristic update for rho and omega directly from existing # values beta = rho2beta_active(self.rho) beta[mergeCompA] += beta[mergeCompB] beta = np.delete(beta, mergeCompB, axis=0) rho = beta2rho(beta, SS.K) omega = self.omega omega[mergeCompA] += omega[mergeCompB] omega = np.delete(omega, mergeCompB, axis=0) self.rho = rho self.omega = omega self.K = SS.K self.ClearCache()
def setParamsFromBeta(self, K, beta=None, oldWay=1): """ Set params to reasonable values given comp probabilities. Parameters -------- K : int number of components beta : 1D array, size K. optional, default=[1/K 1/K ... 1/K] probability of each component Post Condition for VB --------- Attributes rho, omega set so q(beta) has properties: * mean of (nearly) beta, allowing for some small remaining mass. * moderate variance. """ self.ClearCache() K = int(K) self.K = K if beta is None: beta = 1.0 / K * np.ones(K) assert beta.ndim == 1 assert np.sum(beta) <= 1.0 + 1e-9 assert beta.size == self.K if oldWay: if np.allclose(beta.sum(), 1.0): betaRem = np.minimum(0.05, 1. / (K)) else: betaRem = 1 - np.sum(beta) betaWithRem = np.hstack([beta, betaRem]) betaWithRem /= betaWithRem.sum() self.rho = beta2rho(betaWithRem, self.K) self.omega = (10 + self.gamma) * np.ones(self.K) return if beta.size == K: # Append in small remaining/leftover mass betaRem = np.minimum(1.0 / (2 * K), 0.05) betaWithRem = np.hstack([beta * (1.0 - betaRem), betaRem]) assert np.allclose(np.sum(betaWithRem), 1.0) else: assert beta.size == K + 1 betaWithRem = beta # Convert beta to eta1, eta0 theta = self.K * betaWithRem eta1 = theta[:-1].copy() eta0 = theta[::-1].cumsum()[::-1][1:] self.rho = eta1 / (eta1 + eta0) self.omega = eta1 + eta0
def calcHardMergeGap(self, SS, kA=0, kB=1, curLalloc=None, returnRhoOmega=False): ''' Compute gain in ELBO from merger of cluster pair (kA, kB) Returns ------- gainL : float ''' gamma = self.gamma alpha = self.alpha D = SS.nDoc curOmega = self.omega propOmega = self.omega[1:] curRho = self.rho propEbeta = rho2beta_active(self.rho) propEbeta[kA] += propEbeta[kB] propEbeta = np.delete(propEbeta, kB) propRho = beta2rho(propEbeta, propEbeta.size) if curLalloc is None: curLalloc = L_alloc(alpha=alpha, gamma=gamma, nDoc=SS.nDoc, rho=curRho, omega=curOmega, todict=0) propLalloc = L_alloc(alpha=alpha, gamma=gamma, nDoc=SS.nDoc, rho=propRho, omega=propOmega, todict=0) gainLalloc = propLalloc - curLalloc ''' curEta1 = curRho * curOmega curEta0 = (1.0-curRho) * curOmega propEta1 = propRho * propOmega propEta0 = (1.0-propRho) * propOmega gainLtop_alpha = -1 * SS.nDoc * np.log(alpha) def cBeta(a, b): return gammaln(a+b) - gammaln(a) - gammaln(b) gainLtop_cBeta = 0.0 curKvec = kvec(SS.K) for k in range(0, SS.K): # kA, kA+1, ... kB-1, kB curLtop_beta_k = \ cBeta(1, gamma) - cBeta(curEta1[k], curEta0[k]) \ + (D + 1.0 - curEta1[k]) \ * (digamma(curEta1[k]) - digamma(curOmega[k])) \ + (D * curKvec[k] + gamma - curEta0[k]) \ * (digamma(curEta0[k]) - digamma(curOmega[k])) gainLtop_cBeta -= curLtop_beta_k for k in range(0, SS.K-1): # kA, kA+1, ... kB-1 propLtop_beta_k = \ cBeta(1, gamma) - cBeta(propEta1[k], propEta0[k]) \ + (D + 1.0 - propEta1[k]) \ * (digamma(propEta1[k]) - digamma(propOmega[k])) \ + (D * curKvec[k+1] + gamma - propEta0[k]) \ * (digamma(propEta0[k]) - digamma(propOmega[k])) gainLtop_cBeta += propLtop_beta_k gainLalloc_direct = gainLtop_alpha + gainLtop_cBeta assert np.allclose(gainLalloc, gainLalloc_direct) ''' if returnRhoOmega: return gainLalloc, propRho, propOmega return gainLalloc