def calcMargLik(self, SS): ''' Calculate marginal likelihood of assignments, summed over all comps ''' theta = self.gamma / SS.K + SS.N cPrior = gammaln(self.gamma) - SS.K * gammaln(self.gamma / SS.K) cPost = gammaln(np.sum(theta)) - np.sum(gammaln(theta)) return cPrior - cPost
def E_logpV( self ): logNormC = gammaln(self.alpha0 + self.alpha1) \ - gammaln(self.alpha0) - gammaln(self.alpha1) logBetaPDF = (self.alpha1-1)*self.ElogV + (self.alpha0-1)*self.Elog1mV if self.truncType == 'z': return self.K*logNormC + logBetaPDF.sum() elif self.truncType == 'v': return self.K*logNormC + logBetaPDF[:-1].sum()
def log_pdf_dirichlet(self, wvec=None, avec=None): ''' Return scalar log probability for Dir(wvec | avec) ''' if wvec is None: wvec = self.w if avec is None: avec = (self.gamma / self.K) * np.ones(self.K) logC = gammaln(np.sum(avec)) - np.sum(gammaln(avec)) return logC + np.sum((avec - 1.0) * np.log(wvec + 1e-100))
def E_logqV( self ): logNormC = gammaln(self.qalpha0 + self.qalpha1) \ - gammaln(self.qalpha0) - gammaln(self.qalpha1) logBetaPDF = (self.qalpha1-1)*self.ElogV + (self.qalpha0-1)*self.Elog1mV if self.truncType == 'z': return logNormC.sum() + logBetaPDF.sum() elif self.truncType == 'v': # skip last entry because entropy of Beta(1,0) = 0 return logNormC[:-1].sum() + logBetaPDF[:-1].sum()
def log_pdf_dirichlet(self, wvec=None, avec=None): """ Return scalar log probability for Dir(wvec | avec) """ if wvec is None: wvec = self.w if avec is None: avec = self.alpha0 * np.ones(self.K) logC = gammaln(np.sum(avec)) - np.sum(gammaln(avec)) return logC + np.sum((avec - 1.0) * np.log(wvec))
def calcMargLik(self, SS): """ Calculate marginal likelihood of assignments, summed over all comps """ mask = SS.N > 0 Nvec = SS.N[mask] K = Nvec.size return gammaln(self.gamma0) \ + K * np.log(self.gamma0) \ + np.sum(gammaln(Nvec)) \ - gammaln(np.sum(Nvec) + self.gamma0)
def c_Beta(a1, a0): ''' Evaluate cumulant function of the Beta distribution When input is vectorized, we compute sum over all entries. Returns ------- c : scalar real ''' return np.sum(gammaln(a1 + a0)) - np.sum(gammaln(a1)) - np.sum(gammaln(a0))
def log_pdf_dirichlet(PiMat, alphavec): ''' Return scalar log probability for Dir(PiMat | alphavec) ''' PiMat = as2D(PiMat + 1e-100) J, K = PiMat.shape if isinstance(alphavec, float): alphavec = alphavec * np.ones(K) elif alphavec.ndim == 0: alphavec = alphavec * np.ones(K) assert alphavec.size == K cDir = gammaln(np.sum(alphavec)) - np.sum(gammaln(alphavec)) return K * cDir + np.sum(np.dot(np.log(PiMat), alphavec - 1.0))
def L_alloc_no_slack(self): ''' Compute allocation term of objective function, without slack term Returns ------- L : scalar float ''' N = self.theta.shape[0] K = self.K prior_cDir = N * (gammaln(self.alpha) - K * gammaln(self.alpha / K)) post_cDir = np.sum(gammaln(np.sum(self.theta, axis=1))) - \ np.sum(gammaln(self.theta)) return prior_cDir - post_cDir
def c_Beta_ReturnVec(eta1, eta0): ''' Evaluate cumulant of Beta distribution for vector of parameters Parameters ------- eta1 : 1D array, size K represents ON pseudo-count parameter of the Beta eta0 : 1D array, size K represents OFF pseudo-count parameter of the Beta Returns ------- cvec : 1D array, size K ''' return gammaln(eta1 + eta0) - gammaln(eta1) - gammaln(eta0)
def c_Beta(eta1, eta0): ''' Evaluate cumulant function of Beta distribution Parameters ------- eta1 : 1D array, size K represents ON pseudo-count parameter of the Beta eta0 : 1D array, size K represents OFF pseudo-count parameter of the Beta Returns ------- c : float = \sum_k c_B(eta1[k], eta0[k]) ''' return np.sum(gammaln(eta1 + eta0) - gammaln(eta1) - gammaln(eta0))
def elbo_alloc(self): K = self.K normPinit = gammaln(self.K * self.startAlpha) \ - self.K * gammaln(self.startAlpha) normQinit = gammaln(np.sum(self.startTheta)) \ - np.sum(gammaln(self.startTheta)) normPtrans = K * gammaln(K * self.transAlpha + self.kappa) - \ self.K * (self.K - 1) * gammaln(self.transAlpha) - \ self.K * gammaln(self.transAlpha + self.kappa) normQtrans = np.sum(gammaln(np.sum(self.transTheta, axis=1))) \ - np.sum(gammaln(self.transTheta)) return normPinit + normPtrans - normQinit - normQtrans
def E_cDalphabeta_surrogate(alpha, rho, omega): ''' Compute expected value of cumulant function of alpha * beta. Returns ------- csur : scalar float ''' K = rho.size eta1 = rho * omega eta0 = (1 - rho) * omega digammaBoth = digamma(eta1 + eta0) ElogU = digamma(eta1) - digammaBoth Elog1mU = digamma(eta0) - digammaBoth OFFcoef = kvec(K) calpha = gammaln(alpha) + (K + 1) * np.log(alpha) return calpha + np.sum(ElogU) + np.inner(OFFcoef, Elog1mU)
def get_log_norm_const(self): ''' Calculate log normalization constant (aka log partition function) for this Gauss-Gamma distribution. p(mu,Lam) = NormalGamma( mu, Lam | a, b, m, kappa) = 1/Z f(mu|Lam) g(Lam), where Z is const w.r.t mu,Lam Normalization constant = Z = \int f() g() dmu dLam Returns -------- logZ : float ''' D = self.D a = self.a b = self.b logNormConstNormal = 0.5 * D * (LOGTWOPI - np.log(self.kappa)) logNormConstGamma = np.sum(gammaln(a)) - np.inner(a, np.log(b)) return logNormConstNormal + logNormConstGamma
def calcMergeTermsFromSeparateLP( Data=None, LPa=None, SSa=None, LPb=None, SSb=None, mUIDPairs=None): ''' Compute merge terms that combine two comps from separate LP dicts. Returns ------- Mdict : dict of key, array-value pairs ''' M = len(mUIDPairs) m_sumLogPi = np.zeros(M) m_gammalnTheta = np.zeros(M) m_slackTheta = np.zeros(M) m_Hresp = np.zeros(M) assert np.allclose(LPa['digammaSumTheta'], LPb['digammaSumTheta']) for m, (uidA, uidB) in enumerate(mUIDPairs): kA = SSa.uid2k(uidA) kB = SSb.uid2k(uidB) m_resp = LPa['resp'][:, kA] + LPb['resp'][:, kB] if hasattr(Data, 'word_count') and \ Data.nUniqueToken == m_resp.shape[0]: m_Hresp[m] = -1 * calcRlogRdotv( m_resp[:,np.newaxis], Data.word_count) else: m_Hresp[m] = -1 * calcRlogR(m_resp[:,np.newaxis]) DTC_vec = LPa['DocTopicCount'][:, kA] + LPb['DocTopicCount'][:, kB] theta_vec = LPa['theta'][:, kA] + LPb['theta'][:, kB] m_gammalnTheta[m] = np.sum(gammaln(theta_vec)) ElogPi_vec = digamma(theta_vec) - LPa['digammaSumTheta'] m_sumLogPi[m] = np.sum(ElogPi_vec) # slack = (Ndm - theta_dm) * E[log pi_dm] slack_vec = ElogPi_vec slack_vec *= (DTC_vec - theta_vec) m_slackTheta[m] = np.sum(slack_vec) return dict( Hresp=m_Hresp, gammalnTheta=m_gammalnTheta, slackTheta=m_slackTheta, sumLogPi=m_sumLogPi)
def c_Func(avec, K=0): ''' Evaluate cumulant function of the Dirichlet distribution Returns ------- c : scalar real ''' if isinstance(avec, float) or avec.ndim == 0: assert K > 0 avec = avec * np.ones(K) return gammaln(np.sum(avec)) - np.sum(gammaln(avec)) elif avec.ndim == 1: return gammaln(np.sum(avec)) - np.sum(gammaln(avec)) else: return np.sum(gammaln(np.sum(avec, axis=1))) - np.sum(gammaln(avec))
def c_Dir(AMat, arem=None): ''' Evaluate cumulant function of the Dir distribution When input is vectorized, we compute sum over all entries. Returns ------- c : scalar real ''' AMat = np.asarray(AMat) D = AMat.shape[0] if arem is None: if AMat.ndim == 1: return gammaln(np.sum(AMat)) - np.sum(gammaln(AMat)) else: return np.sum(gammaln(np.sum(AMat, axis=1))) \ - np.sum(gammaln(AMat)) return np.sum(gammaln(np.sum(AMat, axis=1) + arem)) \ - np.sum(gammaln(AMat)) \ - D * np.sum(gammaln(arem))
def calc_log_norm_const(cls, a, b, m, kappa): logNormConstNormal = 0.5 * D * (LOGTWOPI + np.log(kappa)) logNormConstGamma = np.sum(gammaln(a)) - np.inner(a, np.log(b)) return logNormConstNormal + logNormConstGamma
def E_logpW( self ): ''' Bishop PRML eq. 10.73 ''' return gammaln(self.K*self.alpha0) \ - self.K*gammaln(self.alpha0) + (self.alpha0-1)*self.Elogw.sum()
def c_Dir(tvec): return gammaln(tvec.sum()) - gammaln(tvec).sum()
def E_logqW(self): """ Bishop PRML eq. 10.76 """ return gammaln(self.alpha.sum()) - gammaln(self.alpha).sum() + np.inner((self.alpha - 1), self.Elogw)
def E_logpW(self): ''' Bishop PRML eq. 10.73 ''' return gammaln(self.gamma) \ - self.K * gammaln(self.gamma/self.K) + \ (self.gamma / self.K - 1) * self.Elogw.sum()
def E_logqW(self): ''' Bishop PRML eq. 10.76 ''' return gammaln(self.theta.sum()) - gammaln(self.theta).sum() \ + np.inner((self.theta - 1), self.Elogw)
def E_logpW(self): """ Bishop PRML eq. 10.73 """ return gammaln(self.K * self.alpha0) - self.K * gammaln(self.alpha0) + (self.alpha0 - 1) * self.Elogw.sum()
def calcELBO_NonlinearTerms(Data=None, SS=None, LP=None, todict=0, rho=None, Ebeta=None, alpha=None, resp=None, nDoc=None, DocTopicCount=None, theta=None, thetaRem=None, ElogPi=None, ElogPiRem=None, sumLogPi=None, sumLogPiRem=None, sumLogPiRemVec=None, Hresp=None, slackTheta=None, slackThetaRem=None, gammalnTheta=None, gammalnSumTheta=None, gammalnThetaRem=None, thetaEmptyComp=None, ElogPiEmptyComp=None, ElogPiOrigComp=None, gammalnThetaOrigComp=None, slackThetaOrigComp=None, returnMemoizedDict=0, **kwargs): """ Calculate ELBO objective terms non-linear in suff stats. """ if resp is not None: N, K = resp.shape elif LP is not None: if 'resp' in LP: N, K = LP['resp'].shape else: N, K = LP['spR'].shape if Ebeta is None: Ebeta = rho2beta(rho, returnSize='K+1') if LP is not None: DocTopicCount = LP['DocTopicCount'] nDoc = DocTopicCount.shape[0] theta = LP['theta'] thetaRem = LP['thetaRem'] ElogPi = LP['ElogPi'] ElogPiRem = LP['ElogPiRem'] sumLogPi = np.sum(ElogPi, axis=0) sumLogPiRem = np.sum(ElogPiRem) if 'thetaEmptyComp' in LP: thetaEmptyComp = LP['thetaEmptyComp'] ElogPiEmptyComp = LP['ElogPiEmptyComp'] ElogPiOrigComp = LP['ElogPiOrigComp'] gammalnThetaOrigComp = LP['gammalnThetaOrigComp'] slackThetaOrigComp = LP['slackThetaOrigComp'] HrespOrigComp = LP['HrespOrigComp'] elif SS is not None: sumLogPi = SS.sumLogPi nDoc = SS.nDoc if hasattr(SS, 'sumLogPiRemVec'): sumLogPiRemVec = SS.sumLogPiRemVec else: sumLogPiRem = SS.sumLogPiRem if DocTopicCount is not None and theta is None: theta = DocTopicCount + alpha * Ebeta[:-1] thetaRem = alpha * Ebeta[-1] if theta is not None and ElogPi is None: digammasumtheta = digamma(theta.sum(axis=1) + thetaRem) ElogPi = digamma(theta) - digammasumtheta[:, np.newaxis] ElogPiRem = digamma(thetaRem) - digammasumtheta[:, np.newaxis] if sumLogPi is None and ElogPi is not None: sumLogPi = np.sum(ElogPi, axis=0) sumLogPiRem = np.sum(ElogPiRem) if Hresp is None: if SS is not None and SS.hasELBOTerm('Hresp'): Hresp = SS.getELBOTerm('Hresp') else: if hasattr(Data, 'word_count') and N == Data.word_count.size: if resp is not None: Hresp = -1 * NumericUtil.calcRlogRdotv( resp, Data.word_count) elif 'resp' in LP: Hresp = -1 * NumericUtil.calcRlogRdotv( LP['resp'], Data.word_count) elif 'spR' in LP: Hresp = calcSparseRlogRdotv( v=Data.word_count, **LP) else: raise ValueError("Missing resp assignments!") else: if resp is not None: Hresp = -1 * NumericUtil.calcRlogR(resp) elif 'resp' in LP: Hresp = -1 * NumericUtil.calcRlogR(LP['resp']) elif 'spR' in LP: assert 'nnzPerRow' in LP Hresp = calcSparseRlogR(**LP) else: raise ValueError("Missing resp assignments!") if slackTheta is None: if SS is not None and SS.hasELBOTerm('slackTheta'): slackTheta = SS.getELBOTerm('slackTheta') slackThetaRem = SS.getELBOTerm('slackThetaRem') else: slackTheta = DocTopicCount - theta slackTheta *= ElogPi slackTheta = np.sum(slackTheta, axis=0) slackThetaRem = -1 * np.sum(thetaRem * ElogPiRem) if gammalnTheta is None: if SS is not None and SS.hasELBOTerm('gammalnTheta'): gammalnSumTheta = SS.getELBOTerm('gammalnSumTheta') gammalnTheta = SS.getELBOTerm('gammalnTheta') gammalnThetaRem = SS.getELBOTerm('gammalnThetaRem') else: sumTheta = np.sum(theta, axis=1) + thetaRem gammalnSumTheta = np.sum(gammaln(sumTheta)) gammalnTheta = np.sum(gammaln(theta), axis=0) gammalnThetaRem = theta.shape[0] * gammaln(thetaRem) if thetaEmptyComp is not None: gammalnThetaEmptyComp = nDoc * gammaln(thetaEmptyComp) - \ gammalnThetaOrigComp slackThetaEmptyComp = -np.sum(thetaEmptyComp * ElogPiEmptyComp) - \ slackThetaOrigComp if returnMemoizedDict: Mdict = dict(Hresp=Hresp, slackTheta=slackTheta, slackThetaRem=slackThetaRem, gammalnTheta=gammalnTheta, gammalnThetaRem=gammalnThetaRem, gammalnSumTheta=gammalnSumTheta) if thetaEmptyComp is not None: Mdict['HrespEmptyComp'] = -1 * HrespOrigComp Mdict['gammalnThetaEmptyComp'] = gammalnThetaEmptyComp Mdict['slackThetaEmptyComp'] = slackThetaEmptyComp return Mdict # First, compute all local-only terms Lentropy = np.sum(Hresp) Lslack = slackTheta.sum() + slackThetaRem LcDtheta = -1 * (gammalnSumTheta - gammalnTheta.sum() - gammalnThetaRem) # For stochastic (soVB), we need to scale up these terms # Only used when --doMemoELBO is set to 0 (not recommended) if SS is not None and SS.hasAmpFactor(): Lentropy *= SS.ampF Lslack *= SS.ampF LcDtheta *= SS.ampF # Next, compute the slack term alphaEbeta = alpha * Ebeta Lslack_alphaEbeta = np.sum(alphaEbeta[:-1] * sumLogPi) if sumLogPiRemVec is not None: Ebeta_gt = 1 - np.cumsum(Ebeta[:-1]) Lslack_alphaEbeta += alpha * np.inner(Ebeta_gt, sumLogPiRemVec) else: Lslack_alphaEbeta += alphaEbeta[-1] * sumLogPiRem Lslack += Lslack_alphaEbeta if todict: return dict( Lslack=Lslack, Lentropy=Lentropy, LcDtheta=LcDtheta, Lslack_alphaEbeta=Lslack_alphaEbeta) return LcDtheta + Lslack + Lentropy
def calcSummaryStats(Dslice, LP=None, alpha=None, alphaEbeta=None, doTrackTruncationGrowth=0, doPrecompEntropy=0, doPrecompMergeEntropy=0, mergePairSelection=None, mPairIDs=None, trackDocUsage=0, **kwargs): """ Calculate summary from local parameters for given data slice. Parameters ------- Data : bnpy data object LP : local param dict with fields resp : Data.nObs x K array, where resp[n,k] = posterior resp of comp k doPrecompEntropy : boolean flag indicates whether to precompute ELBO terms in advance used for memoized learning algorithms (moVB) Returns ------- SS : SuffStatBag with K components Relevant fields * nDoc : scalar float Counts total documents available in provided data. * sumLogPi : 1D array, size K Entry k equals \sum_{d in docs} E[ \log \pi_{dk} ] * sumLogPiRem : scalar float Equals sum over docs of probability of inactive topics. Also has optional ELBO field when precompELBO is True * Hvec : 1D array, size K Vector of entropy contributions from each comp. Hvec[k] = \sum_{n=1}^N H[q(z_n)], a function of 'resp' """ if mPairIDs is None: M = 0 else: M = len(mPairIDs) K = LP['DocTopicCount'].shape[1] if 'digammaSumTheta' not in LP: digammaSumTheta = digamma(LP['theta'].sum(axis=1) + LP['thetaRem']) LP['digammaSumTheta'] = digammaSumTheta # Used for merges if 'ElogPi' not in LP: LP['ElogPiRem'] = digamma(LP['thetaRem']) - LP['digammaSumTheta'] LP['ElogPi'] = digamma(LP['theta']) - \ LP['digammaSumTheta'][:, np.newaxis] SS = SuffStatBag(K=K, D=Dslice.dim, M=M) SS.setField('nDoc', Dslice.nDoc, dims=None) SS.setField('sumLogPi', np.sum(LP['ElogPi'], axis=0), dims='K') if 'ElogPiEmptyComp' in LP: sumLogPiEmptyComp = np.sum(LP['ElogPiEmptyComp']) - \ np.sum(LP['ElogPiOrigComp']) SS.setField('sumLogPiEmptyComp', sumLogPiEmptyComp, dims=None) if doTrackTruncationGrowth: remvec = np.zeros(K) remvec[K - 1] = np.sum(LP['ElogPiRem']) SS.setField('sumLogPiRemVec', remvec, dims='K') else: SS.setField('sumLogPiRem', np.sum(LP['ElogPiRem']), dims=None) if doPrecompEntropy: Mdict = calcELBO_NonlinearTerms(Data=Dslice, LP=LP, returnMemoizedDict=1) if type(Mdict['Hresp']) == float: # SPARSE HARD ASSIGNMENTS SS.setELBOTerm('Hresp', Mdict['Hresp'], dims=None) else: SS.setELBOTerm('Hresp', Mdict['Hresp'], dims=('K', )) SS.setELBOTerm('slackTheta', Mdict['slackTheta'], dims='K') SS.setELBOTerm('gammalnTheta', Mdict['gammalnTheta'], dims='K') if 'ElogPiEmptyComp' in LP: SS.setELBOTerm('slackThetaEmptyComp', Mdict['slackThetaEmptyComp']) SS.setELBOTerm('gammalnThetaEmptyComp', Mdict['gammalnThetaEmptyComp']) SS.setELBOTerm('HrespEmptyComp', Mdict['HrespEmptyComp']) else: SS.setELBOTerm('gammalnSumTheta', Mdict['gammalnSumTheta'], dims=None) SS.setELBOTerm('slackThetaRem', Mdict['slackThetaRem'], dims=None) SS.setELBOTerm('gammalnThetaRem', Mdict['gammalnThetaRem'].sum(), dims=None) if doPrecompMergeEntropy: if mPairIDs is None: raise NotImplementedError("TODO: all pairs for merges") m_Hresp = calcHrespForSpecificMergePairs(LP, Dslice, mPairIDs) if m_Hresp is not None: SS.setMergeTerm('Hresp', m_Hresp, dims=('M')) m_sumLogPi = np.zeros(M) m_gammalnTheta = np.zeros(M) m_slackTheta = np.zeros(M) for m, (kA, kB) in enumerate(mPairIDs): theta_vec = LP['theta'][:, kA] + LP['theta'][:, kB] ElogPi_vec = digamma(theta_vec) - LP['digammaSumTheta'] m_gammalnTheta[m] = np.sum(gammaln(theta_vec)) m_sumLogPi[m] = np.sum(ElogPi_vec) # slack = (Ndm - theta_dm) * E[log pi_dm] slack_vec = ElogPi_vec slack_vec *= -1 * (alphaEbeta[kA] + alphaEbeta[kB]) m_slackTheta[m] = np.sum(slack_vec) SS.setMergeTerm('gammalnTheta', m_gammalnTheta, dims=('M')) SS.setMergeTerm('sumLogPi', m_sumLogPi, dims=('M')) SS.setMergeTerm('slackTheta', m_slackTheta, dims=('M')) # Uncomment this for verification of merge calculations. # for (kA, kB) in mPairIDs: # self.verifySSForMergePair(Data, SS, LP, kA, kB) # .... end merge computations # Selection terms (computes doc-topic correlation) if mergePairSelection is not None: if mergePairSelection.count('corr') > 0: Tmat = LP['DocTopicCount'] SS.setSelectionTerm('DocTopicPairMat', np.dot(Tmat.T, Tmat), dims=('K', 'K')) SS.setSelectionTerm('DocTopicSum', np.sum(Tmat, axis=0), dims='K') if trackDocUsage: # Track num of times a topic appears nontrivially in a doc DocUsage = np.sum(LP['DocTopicCount'] > 0.01, axis=0) SS.setSelectionTerm('DocUsageCount', DocUsage, dims='K') Pi = LP['theta'] / LP['theta'].sum(axis=1)[:, np.newaxis] SumPi = np.sum(Pi, axis=0) SS.setSelectionTerm('SumPi', SumPi, dims='K') return SS