def E_logqZ(self, Data, LP): ''' Calculate E[ log q(z)] for each active topic Returns ------- ElogqZ : 1D array, size K ''' if hasattr(Data, 'word_count'): return NumericUtil.calcRlogRdotv(LP['resp'], Data.word_count) else: return NumericUtil.calcRlogR(LP['resp'])
def L_entropy(Data=None, LP=None, resp=None, returnVector=0): """ Calculate entropy of soft assignments term in ELBO objective. Returns ------- L_entropy : scalar float """ spR = None if LP is not None: if 'resp' in LP: resp = LP['resp'] elif 'spR' in LP: spR = LP['spR'] N, K = LP['spR'].shape else: raise ValueError("LP dict missing resp or spR") if resp is not None: N, K = LP['resp'].shape if hasattr(Data, 'word_count') and N == Data.word_count.size: if resp is not None: Hvec = -1 * NumericUtil.calcRlogRdotv(resp, Data.word_count) elif spR is not None: Hvec = calcSparseRlogRdotv(v=Data.word_count, **LP) else: raise ValueError("Missing resp assignments!") else: if resp is not None: Hvec = -1 * NumericUtil.calcRlogR(resp) elif 'spR' in LP: assert 'nnzPerRow' in LP Hvec = calcSparseRlogR(**LP) else: raise ValueError("Missing resp assignments!") assert Hvec.size == K assert Hvec.min() >= -1e-6 if returnVector: return Hvec return Hvec.sum()
def makeExpansionLPFromZ_HDPTopicModel(Dslice=None, curModel=None, curLPslice=None, ktarget=None, xInitSS=None, targetZ=None, atomType=None, chosenDataIDs=None, emptyPiFrac=None, **kwargs): ''' Create expanded local parameters from Z assignments on target subset. Returns ------- xLP : dict with fields resp : N x Kfresh DocTopicCount : D x Kfresh theta : D x Kfresh ElogPi : D x Kfresh ''' Kfresh = targetZ.max() + 1 N = curLPslice['resp'].shape[0] # Compute prior probability of each proposed comp xPiVec, emptyPi = make_xPiVec_and_emptyPi(curModel=curModel, ktarget=ktarget, Kfresh=Kfresh, xInitSS=xInitSS, **kwargs) xalphaPi = curModel.allocModel.alpha * xPiVec emptyalphaPi = curModel.allocModel.alpha * emptyPi # Compute likelihood under each proposed comp xObsModel = curModel.obsModel.copy() xObsModel.update_global_params(xInitSS) xLPslice = xObsModel.calc_local_params(Dslice) # Initialize xresp so each atom is normalized # This is the "default", for non-target atoms. xresp = xLPslice['E_log_soft_ev'] xresp += np.log(xalphaPi) # log prior probability xresp -= xresp.max(axis=1)[:, np.newaxis] assert np.allclose(xresp.max(axis=1), 0.0) np.exp(xresp, out=xresp) xresp /= xresp.sum(axis=1)[:, np.newaxis] # Now, replace all targeted atoms with an all-or-nothing assignment if atomType == 'doc' and curModel.getAllocModelName().count('HDP'): if curModel.getObsModelName().count('Mult'): for pos, d in enumerate(chosenDataIDs): start = Dslice.doc_range[d] stop = Dslice.doc_range[d + 1] xresp[start:stop, :] = 1e-100 xresp[start:stop, targetZ[pos]] = 1.0 elif curModel.getObsModelName().count('Bern'): # For all words in each targeted doc, # Assign them to the corresponding cluster in targetZ for pos, d in enumerate(chosenDataIDs): bstart = Dslice.vocab_size * d bstop = Dslice.vocab_size * (d + 1) xresp[bstart:bstop, :] = 1e-100 xresp[bstart:bstop, targetZ[pos]] = 1.0 #words_d = Dslice.word_id[ # Dslice.doc_range[d]:Dslice.doc_range[d+1]] #xresp[bstart + words_d, :] = 1e-100 #xresp[bstart + words_d, targetZ[pos]] = 1.0 else: for pos, n in enumerate(chosenDataIDs): xresp[n, :] = 1e-100 xresp[n, targetZ[pos]] = 1.0 assert np.allclose(1.0, xresp.sum(axis=1)) # Make resp consistent with ktarget comp xresp *= curLPslice['resp'][:, ktarget][:, np.newaxis] np.maximum(xresp, 1e-100, out=xresp) # Create xDocTopicCount xDocTopicCount = np.zeros((Dslice.nDoc, Kfresh)) for d in range(Dslice.nDoc): start = Dslice.doc_range[d] stop = Dslice.doc_range[d + 1] if hasattr(Dslice, 'word_id') and \ curModel.getObsModelName().count('Mult'): xDocTopicCount[d] = np.dot(Dslice.word_count[start:stop], xresp[start:stop]) elif hasattr(Dslice, 'word_id') and \ curModel.getObsModelName().count('Bern'): bstart = d * Dslice.vocab_size bstop = (d + 1) * Dslice.vocab_size xDocTopicCount[d] = np.sum(xresp[bstart:bstop], axis=0) else: xDocTopicCount[d] = np.sum(xresp[start:stop], axis=0) # Create xtheta xtheta = xDocTopicCount + xalphaPi[np.newaxis, :] # Package up into xLPslice xLPslice['resp'] = xresp xLPslice['DocTopicCount'] = xDocTopicCount xLPslice['theta'] = xtheta assert np.allclose(xDocTopicCount.sum(axis=1), curLPslice['DocTopicCount'][:, ktarget]) assert np.allclose( xtheta.sum(axis=1) + emptyalphaPi, curLPslice['theta'][:, ktarget]) # Compute other LP quantities related to log prob (topic | doc) # and fill these into the expanded LP dict digammaSumTheta = curLPslice['digammaSumTheta'].copy() xLPslice['digammaSumTheta'] = digammaSumTheta xLPslice['ElogPi'] = \ digamma(xLPslice['theta']) - digammaSumTheta[:, np.newaxis] xLPslice['thetaRem'] = curLPslice['thetaRem'].copy() xLPslice['ElogPiRem'] = curLPslice['ElogPiRem'].copy() # Compute quantities related to leaving ktarget almost empty, # as we expand and transfer mass to other comps if emptyalphaPi > 0: thetaEmptyComp = emptyalphaPi ElogPiEmptyComp = digamma(thetaEmptyComp) - digammaSumTheta xLPslice['thetaEmptyComp'] = thetaEmptyComp xLPslice['ElogPiEmptyComp'] = ElogPiEmptyComp # Compute quantities related to OrigComp, the original target cluster. # These need to be tracked and turned into relevant summaries # so that they can be used to created a valid proposal state "propSS" xLPslice['ElogPiOrigComp'] = curLPslice['ElogPi'][:, ktarget] xLPslice['gammalnThetaOrigComp'] = \ np.sum(gammaln(curLPslice['theta'][:, ktarget])) slack = curLPslice['DocTopicCount'][:, ktarget] - \ curLPslice['theta'][:, ktarget] xLPslice['slackThetaOrigComp'] = np.sum(slack * curLPslice['ElogPi'][:, ktarget]) if hasattr(Dslice, 'word_count') and \ xLPslice['resp'].shape[0] == Dslice.word_count.size: xLPslice['HrespOrigComp'] = -1 * NumericUtil.calcRlogRdotv( curLPslice['resp'][:, ktarget], Dslice.word_count) else: xLPslice['HrespOrigComp'] = -1 * NumericUtil.calcRlogR( curLPslice['resp'][:, ktarget]) return xLPslice
def restrictedLocalStep_HDPTopicModel(Dslice=None, curLPslice=None, ktarget=0, xObsModel=None, xalphaPi=None, thetaEmptyComp=None, xInitLPslice=None, b_localStepSingleDoc='fast', **kwargs): ''' Returns ------- xLPslice : dict with updated fields Fields with learned values * resp : N x Kfresh * DocTopicCount : nDoc x Kfresh * theta : nDoc x Kfresh * ElogPi : nDoc x Kfresh Fields copied directly from curLPslice * digammaSumTheta : 1D array, size nDoc * thetaRem : scalar * ElogPiRem : scalar * thetaEmptyComp * ElogPiEmptyComp ''' Kfresh = xObsModel.K assert Kfresh == xalphaPi.size # Compute conditional likelihoods for every data atom xLPslice = xObsModel.calc_local_params(Dslice) assert 'E_log_soft_ev' in xLPslice # Initialize DocTopicCount and theta xLPslice['resp'] = xLPslice['E_log_soft_ev'] xLPslice['DocTopicCount'] = np.zeros((Dslice.nDoc, Kfresh)) xLPslice['theta'] = np.zeros((Dslice.nDoc, Kfresh)) xLPslice['_nIters'] = -1 * np.ones(Dslice.nDoc) xLPslice['_maxDiff'] = -1 * np.ones(Dslice.nDoc) if b_localStepSingleDoc == 'fast': restrictedLocalStepForSingleDoc_Func = \ restrictedLocalStepForSingleDoc_HDPTopicModel else: print('SLOW<<<!!') restrictedLocalStepForSingleDoc_Func = \ restrictedLocalStepForSingleDoc_HDPTopicModel_SlowerButStable # Fill in these fields, one doc at a time for d in range(Dslice.nDoc): xLPslice = restrictedLocalStepForSingleDoc_Func( d=d, Dslice=Dslice, curLPslice=curLPslice, xLPslice=xLPslice, xInitLPslice=xInitLPslice, ktarget=ktarget, Kfresh=Kfresh, xalphaPi=xalphaPi, obsModelName=xObsModel.__class__.__name__, **kwargs) # Compute other LP quantities related to log prob (topic | doc) # and fill these into the expanded LP dict digammaSumTheta = curLPslice['digammaSumTheta'].copy() xLPslice['digammaSumTheta'] = digammaSumTheta xLPslice['ElogPi'] = \ digamma(xLPslice['theta']) - digammaSumTheta[:, np.newaxis] xLPslice['thetaRem'] = curLPslice['thetaRem'].copy() xLPslice['ElogPiRem'] = curLPslice['ElogPiRem'].copy() # Compute quantities related to leaving ktarget almost empty, # as we expand and transfer mass to other comps if thetaEmptyComp > 0: ElogPiEmptyComp = digamma(thetaEmptyComp) - digammaSumTheta xLPslice['thetaEmptyComp'] = thetaEmptyComp xLPslice['ElogPiEmptyComp'] = ElogPiEmptyComp if isExpansion: # Compute quantities related to OrigComp, the original target cluster. # These need to be tracked and turned into relevant summaries # so that they can be used to created a valid proposal state "propSS" xLPslice['ElogPiOrigComp'] = curLPslice['ElogPi'][:, ktarget] xLPslice['gammalnThetaOrigComp'] = \ np.sum(gammaln(curLPslice['theta'][:, ktarget])) slack = curLPslice['DocTopicCount'][:, ktarget] - \ curLPslice['theta'][:, ktarget] xLPslice['slackThetaOrigComp'] = np.sum( slack * curLPslice['ElogPi'][:, ktarget]) if hasattr(Dslice, 'word_count') and \ xLPslice['resp'].shape[0] == Dslice.word_count.size: xLPslice['HrespOrigComp'] = -1 * NumericUtil.calcRlogRdotv( curLPslice['resp'][:, ktarget], Dslice.word_count) else: xLPslice['HrespOrigComp'] = -1 * NumericUtil.calcRlogR( curLPslice['resp'][:, ktarget]) return xLPslice
def calcELBO_NonlinearTerms(Data=None, SS=None, LP=None, todict=0, rho=None, Ebeta=None, alpha=None, resp=None, nDoc=None, DocTopicCount=None, theta=None, thetaRem=None, ElogPi=None, ElogPiRem=None, sumLogPi=None, sumLogPiRem=None, sumLogPiRemVec=None, Hresp=None, slackTheta=None, slackThetaRem=None, gammalnTheta=None, gammalnSumTheta=None, gammalnThetaRem=None, thetaEmptyComp=None, ElogPiEmptyComp=None, ElogPiOrigComp=None, gammalnThetaOrigComp=None, slackThetaOrigComp=None, returnMemoizedDict=0, **kwargs): """ Calculate ELBO objective terms non-linear in suff stats. """ if resp is not None: N, K = resp.shape elif LP is not None: if 'resp' in LP: N, K = LP['resp'].shape else: N, K = LP['spR'].shape if Ebeta is None: Ebeta = rho2beta(rho, returnSize='K+1') if LP is not None: DocTopicCount = LP['DocTopicCount'] nDoc = DocTopicCount.shape[0] theta = LP['theta'] thetaRem = LP['thetaRem'] ElogPi = LP['ElogPi'] ElogPiRem = LP['ElogPiRem'] sumLogPi = np.sum(ElogPi, axis=0) sumLogPiRem = np.sum(ElogPiRem) if 'thetaEmptyComp' in LP: thetaEmptyComp = LP['thetaEmptyComp'] ElogPiEmptyComp = LP['ElogPiEmptyComp'] ElogPiOrigComp = LP['ElogPiOrigComp'] gammalnThetaOrigComp = LP['gammalnThetaOrigComp'] slackThetaOrigComp = LP['slackThetaOrigComp'] HrespOrigComp = LP['HrespOrigComp'] elif SS is not None: sumLogPi = SS.sumLogPi nDoc = SS.nDoc if hasattr(SS, 'sumLogPiRemVec'): sumLogPiRemVec = SS.sumLogPiRemVec else: sumLogPiRem = SS.sumLogPiRem if DocTopicCount is not None and theta is None: theta = DocTopicCount + alpha * Ebeta[:-1] thetaRem = alpha * Ebeta[-1] if theta is not None and ElogPi is None: digammasumtheta = digamma(theta.sum(axis=1) + thetaRem) ElogPi = digamma(theta) - digammasumtheta[:, np.newaxis] ElogPiRem = digamma(thetaRem) - digammasumtheta[:, np.newaxis] if sumLogPi is None and ElogPi is not None: sumLogPi = np.sum(ElogPi, axis=0) sumLogPiRem = np.sum(ElogPiRem) if Hresp is None: if SS is not None and SS.hasELBOTerm('Hresp'): Hresp = SS.getELBOTerm('Hresp') else: if hasattr(Data, 'word_count') and N == Data.word_count.size: if resp is not None: Hresp = -1 * NumericUtil.calcRlogRdotv( resp, Data.word_count) elif 'resp' in LP: Hresp = -1 * NumericUtil.calcRlogRdotv( LP['resp'], Data.word_count) elif 'spR' in LP: Hresp = calcSparseRlogRdotv( v=Data.word_count, **LP) else: raise ValueError("Missing resp assignments!") else: if resp is not None: Hresp = -1 * NumericUtil.calcRlogR(resp) elif 'resp' in LP: Hresp = -1 * NumericUtil.calcRlogR(LP['resp']) elif 'spR' in LP: assert 'nnzPerRow' in LP Hresp = calcSparseRlogR(**LP) else: raise ValueError("Missing resp assignments!") if slackTheta is None: if SS is not None and SS.hasELBOTerm('slackTheta'): slackTheta = SS.getELBOTerm('slackTheta') slackThetaRem = SS.getELBOTerm('slackThetaRem') else: slackTheta = DocTopicCount - theta slackTheta *= ElogPi slackTheta = np.sum(slackTheta, axis=0) slackThetaRem = -1 * np.sum(thetaRem * ElogPiRem) if gammalnTheta is None: if SS is not None and SS.hasELBOTerm('gammalnTheta'): gammalnSumTheta = SS.getELBOTerm('gammalnSumTheta') gammalnTheta = SS.getELBOTerm('gammalnTheta') gammalnThetaRem = SS.getELBOTerm('gammalnThetaRem') else: sumTheta = np.sum(theta, axis=1) + thetaRem gammalnSumTheta = np.sum(gammaln(sumTheta)) gammalnTheta = np.sum(gammaln(theta), axis=0) gammalnThetaRem = theta.shape[0] * gammaln(thetaRem) if thetaEmptyComp is not None: gammalnThetaEmptyComp = nDoc * gammaln(thetaEmptyComp) - \ gammalnThetaOrigComp slackThetaEmptyComp = -np.sum(thetaEmptyComp * ElogPiEmptyComp) - \ slackThetaOrigComp if returnMemoizedDict: Mdict = dict(Hresp=Hresp, slackTheta=slackTheta, slackThetaRem=slackThetaRem, gammalnTheta=gammalnTheta, gammalnThetaRem=gammalnThetaRem, gammalnSumTheta=gammalnSumTheta) if thetaEmptyComp is not None: Mdict['HrespEmptyComp'] = -1 * HrespOrigComp Mdict['gammalnThetaEmptyComp'] = gammalnThetaEmptyComp Mdict['slackThetaEmptyComp'] = slackThetaEmptyComp return Mdict # First, compute all local-only terms Lentropy = np.sum(Hresp) Lslack = slackTheta.sum() + slackThetaRem LcDtheta = -1 * (gammalnSumTheta - gammalnTheta.sum() - gammalnThetaRem) # For stochastic (soVB), we need to scale up these terms # Only used when --doMemoELBO is set to 0 (not recommended) if SS is not None and SS.hasAmpFactor(): Lentropy *= SS.ampF Lslack *= SS.ampF LcDtheta *= SS.ampF # Next, compute the slack term alphaEbeta = alpha * Ebeta Lslack_alphaEbeta = np.sum(alphaEbeta[:-1] * sumLogPi) if sumLogPiRemVec is not None: Ebeta_gt = 1 - np.cumsum(Ebeta[:-1]) Lslack_alphaEbeta += alpha * np.inner(Ebeta_gt, sumLogPiRemVec) else: Lslack_alphaEbeta += alphaEbeta[-1] * sumLogPiRem Lslack += Lslack_alphaEbeta if todict: return dict( Lslack=Lslack, Lentropy=Lentropy, LcDtheta=LcDtheta, Lslack_alphaEbeta=Lslack_alphaEbeta) return LcDtheta + Lslack + Lentropy
def restrictedLocalStep_HDPTopicModel(Dslice=None, curLPslice=None, ktarget=0, kabsorbList=None, xObsModel=None, xalphaPi=None, nUpdateSteps=3, doBuildOnInit=False, convThr=0.5, thetaEmptyComp=0.0, **kwargs): ''' Compute local parameters for HDPTopicModel via restricted local step. Returns ------- xLPslice : dict with updated fields Fields with learned values * resp : N x Kfresh * DocTopicCount : nDoc x Kfresh * theta : nDoc x Kfresh * ElogPi : nDoc x Kfresh Fields copied directly from curLPslice * digammaSumTheta : 1D array, size nDoc * thetaRem : scalar * ElogPiRem : scalar ''' if doBuildOnInit: xWholeSS = xInitSS.copy() Kfresh = xObsModel.K assert Kfresh == xalphaPi.size xLPslice = dict() # Default warm_start initialization for DocTopicCount # by copying the previous counts at all absorbing states if kabsorbList is None: xLPslice['DocTopicCount'] = np.zeros((Dslice.nDoc, Kfresh)) xLPslice['resp'] = np.zeros((curLPslice['resp'].shape[0], Kfresh)) else: # Initialize DocTopicCounts by copying those from absorbing states xLPslice['DocTopicCount'] = \ curLPslice['DocTopicCount'][:, kabsorbList].copy() # Initialize resp by copying existing resp for absorbing state # Note: this is NOT consistent with some docs in DocTopicCount # but that will get fixed by restricted step xLPslice['resp'] = \ curLPslice['resp'][:, kabsorbList].copy() xLPslice['theta'] = \ xLPslice['DocTopicCount'] + xalphaPi[np.newaxis,:] xLPslice['_nIters'] = -1 * np.ones(Dslice.nDoc) xLPslice['_maxDiff'] = -1 * np.ones(Dslice.nDoc) for step in range(nUpdateSteps): # Compute conditional likelihoods for every data atom xLPslice = xObsModel.calc_local_params(Dslice, xLPslice) assert 'E_log_soft_ev' in xLPslice assert 'obsModelName' in xLPslice # Fill in these fields, one doc at a time for d in xrange(Dslice.nDoc): xLPslice = restrictedLocalStepForSingleDoc_HDPTopicModel( d=d, Dslice=Dslice, curLPslice=curLPslice, xLPslice=xLPslice, ktarget=ktarget, kabsorbList=kabsorbList, xalphaPi=xalphaPi, thetaEmptyComp=thetaEmptyComp, **kwargs) isLastStep = step == nUpdateSteps - 1 if not isLastStep: xSS = xObsModel.calcSummaryStats(Dslice, None, xLPslice) # Increment if doBuildOnInit: xSS.setUIDs(xWholeSS.uids) xWholeSS += xSS else: xWholeSS = xSS # Global step xObsModel.update_global_params(xWholeSS) # Decrement stats if doBuildOnInit: xWholeSS -= xSS # Assess early stopping if step > 0: thr = np.sum(np.abs(prevCountVec - xSS.getCountVec())) if thr < convThr: break prevCountVec = xSS.getCountVec() # Compute other LP quantities related to log prob (topic | doc) # and fill these into the expanded LP dict digammaSumTheta = curLPslice['digammaSumTheta'].copy() xLPslice['digammaSumTheta'] = digammaSumTheta xLPslice['ElogPi'] = \ digamma(xLPslice['theta']) - digammaSumTheta[:, np.newaxis] xLPslice['thetaRem'] = curLPslice['thetaRem'].copy() xLPslice['ElogPiRem'] = curLPslice['ElogPiRem'].copy() # Compute quantities related to leaving ktarget almost empty, # as we expand and transfer mass to other comps if thetaEmptyComp > 0: ElogPiEmptyComp = digamma(thetaEmptyComp) - digammaSumTheta xLPslice['thetaEmptyComp'] = thetaEmptyComp xLPslice['ElogPiEmptyComp'] = ElogPiEmptyComp # Compute quantities related to OrigComp, the original target cluster. # These need to be tracked and turned into relevant summaries # so that they can be used to created a valid proposal state "propSS" xLPslice['ElogPiOrigComp'] = curLPslice['ElogPi'][:, ktarget] xLPslice['gammalnThetaOrigComp'] = \ np.sum(gammaln(curLPslice['theta'][:, ktarget])) slack = curLPslice['DocTopicCount'][:, ktarget] - \ curLPslice['theta'][:, ktarget] xLPslice['slackThetaOrigComp'] = np.sum( slack * curLPslice['ElogPi'][:, ktarget]) if hasattr(Dslice, 'word_count') and \ xLPslice['resp'].shape[0] == Dslice.word_count.size: xLPslice['HrespOrigComp'] = -1 * NumericUtil.calcRlogRdotv( curLPslice['resp'][:, ktarget], Dslice.word_count) else: xLPslice['HrespOrigComp'] = -1 * NumericUtil.calcRlogR( curLPslice['resp'][:, ktarget]) return xLPslice