def learn_rhoomega_fromFixedCounts(DocTopicCount_d=None, nDoc=0, alpha=None, gamma=None, initrho=None, initomega=None): Nd = np.sum(DocTopicCount_d) K = DocTopicCount_d.size if initrho is None: rho = OptimizerRhoOmega.create_initrho(K) else: rho = initrho if initomega is None: omega = OptimizerRhoOmega.create_initomega(K, nDoc, gamma) else: omega = initomega evalELBOandPrint( rho=rho, omega=omega, DocTopicCount=np.tile(DocTopicCount_d, (nDoc, 1)), alpha=alpha, gamma=gamma, msg='init', ) betaK = rho2beta(rho, returnSize="K") prevbetaK = np.zeros_like(betaK) iterid = 0 while np.sum(np.abs(betaK - prevbetaK)) > 0.000001: iterid += 1 theta_d = DocTopicCount_d + alpha * betaK thetaRem = alpha * (1 - np.sum(betaK)) assert np.allclose(theta_d.sum() + thetaRem, alpha + Nd) digammaSum = digamma(theta_d.sum() + thetaRem) Elogpi_d = digamma(theta_d) - digammaSum ElogpiRem = digamma(thetaRem) - digammaSum sumLogPi = nDoc * np.hstack([Elogpi_d, ElogpiRem]) rho, omega, f, Info = OptimizerRhoOmega.\ find_optimum_multiple_tries( alpha=alpha, gamma=gamma, sumLogPi=sumLogPi, nDoc=nDoc, initrho=rho, initomega=omega, approx_grad=1, ) prevbetaK = betaK.copy() betaK = rho2beta(rho, returnSize="K") if iterid < 5 or iterid % 10 == 0: evalELBOandPrint( rho=rho, omega=omega, DocTopicCount=np.tile(DocTopicCount_d, (nDoc, 1)), alpha=alpha, gamma=gamma, msg=str(iterid), ) return rho, omega
def L_top(rho=None, omega=None, alpha=None, gamma=None, kappa=0, startAlpha=0, **kwargs): ''' Evaluate the top-level term of the surrogate objective ''' if startAlpha == 0: startAlpha = alpha K = rho.size eta1 = rho * omega eta0 = (1 - rho) * omega digamma_omega = digamma(omega) ElogU = digamma(eta1) - digamma_omega Elog1mU = digamma(eta0) - digamma_omega diff_cBeta = K * c_Beta(1.0, gamma) - c_Beta(eta1, eta0) tAlpha = K * K * np.log(alpha) + K * np.log(startAlpha) if kappa > 0: coefU = K + 1.0 - eta1 coef1mU = K * OptimizerRhoOmega.kvec(K) + 1.0 + gamma - eta0 sumEBeta = np.sum(rho2beta(rho, returnSize='K')) tBeta = sumEBeta * (np.log(alpha + kappa) - np.log(kappa)) tKappa = K * (np.log(kappa) - np.log(alpha + kappa)) else: coefU = (K + 1) + 1.0 - eta1 coef1mU = (K + 1) * OptimizerRhoOmega.kvec(K) + gamma - eta0 tBeta = 0 tKappa = 0 diff_logU = np.inner(coefU, ElogU) \ + np.inner(coef1mU, Elog1mU) return tAlpha + tKappa + tBeta + diff_cBeta + diff_logU
def calcELBO_LinearTerms(SS=None, StartStateCount=None, TransStateCount=None, rho=None, omega=None, Ebeta=None, startTheta=None, transTheta=None, startAlpha=0, alpha=0, kappa=None, gamma=None, afterGlobalStep=0, todict=0, **kwargs): """ Calculate ELBO objective terms that are linear in suff stats. Returns ------- L : scalar float L is sum of any term in ELBO that is const/linear wrt suff stats. """ Ltop = L_top(rho=rho, omega=omega, alpha=alpha, gamma=gamma, kappa=kappa, startAlpha=startAlpha) LdiffcDir = -c_Dir(transTheta) - c_Dir(startTheta) if afterGlobalStep: if todict: return dict(Lalloc=Ltop + LdiffcDir, Lslack=0) return Ltop + LdiffcDir K = rho.size if Ebeta is None: Ebeta = rho2beta(rho, returnSize='K+1') if SS is not None: StartStateCount = SS.StartStateCount TransStateCount = SS.TransStateCount # Augment suff stats to be sure have 0 in final column, # which represents inactive states. if StartStateCount.size == K: StartStateCount = np.hstack([StartStateCount, 0]) if TransStateCount.shape[-1] == K: TransStateCount = np.hstack([TransStateCount, np.zeros((K, 1))]) LstartSlack = np.inner(StartStateCount + startAlpha * Ebeta - startTheta, digamma(startTheta) - digamma(startTheta.sum())) alphaEbetaPlusKappa = alpha * np.tile(Ebeta, (K, 1)) alphaEbetaPlusKappa[:, :K] += kappa * np.eye(K) digammaSum = digamma(np.sum(transTheta, axis=1)) LtransSlack = np.sum((TransStateCount + alphaEbetaPlusKappa - transTheta) * (digamma(transTheta) - digammaSum[:, np.newaxis])) if todict: return dict(Lalloc=Ltop + LdiffcDir, Lslack=LstartSlack + LtransSlack) return Ltop + LdiffcDir + LstartSlack + LtransSlack
def DocTopicCount_to_sumLogPi( rho=None, omega=None, betaK=None, DocTopicCount=None, alpha=None, gamma=None, **kwargs): ''' Returns ------- f : scalar ''' K = rho.size if betaK is None: betaK = rho2beta(rho, returnSize="K") theta = DocTopicCount + alpha * betaK[np.newaxis,:] thetaRem = alpha * (1 - np.sum(betaK)) assert np.allclose(theta.sum(axis=1) + thetaRem, alpha + DocTopicCount.sum(axis=1)) digammaSum = digamma(theta.sum(axis=1) + thetaRem) ElogPi = digamma(theta) - digammaSum[:,np.newaxis] ElogPiRem = digamma(thetaRem) - digammaSum sumLogPiActiveVec = np.sum(ElogPi, axis=0) sumLogPiRemVec = np.zeros(K) sumLogPiRemVec[-1] = np.sum(ElogPiRem) LP = dict( ElogPi=ElogPi, ElogPiRem=ElogPiRem, digammaSumTheta=digammaSum, theta=theta, thetaRem=thetaRem) return sumLogPiActiveVec, sumLogPiRemVec, LP
def test_FixedCount_GlobalStepOnce(self, K=2, gamma=10.0, alpha=5.0, DocTopicCount_d=[100. / 2, 100 / 2]): ''' Given fixed counts, run one global update to rho/omega. Verify that regardless of initialization, the recovered beta value is roughly the same. ''' print '' DocTopicCount_d = np.asarray(DocTopicCount_d, dtype=np.float64) print '------------- alpha %6.3f gamma %6.3f' % (alpha, gamma) print '------------- DocTopicCount [%s]' % (np2flatstr(DocTopicCount_d, fmt='%d'), ) print '------------- DocTopicProb [%s]' % (np2flatstr( DocTopicCount_d / DocTopicCount_d.sum(), fmt='%.3f'), ) Nd = np.sum(DocTopicCount_d) theta_d = DocTopicCount_d + alpha * 1.0 / (K + 1) * np.ones(K) thetaRem = alpha * 1 / (K + 1) assert np.allclose(theta_d.sum() + thetaRem, alpha + Nd) digammaSum = digamma(theta_d.sum() + thetaRem) Elogpi_d = digamma(theta_d) - digammaSum ElogpiRem = digamma(thetaRem) - digammaSum for nDoc in [1, 10, 100, 1000]: sumLogPi = nDoc * np.hstack([Elogpi_d, ElogpiRem]) # Now, run inference from many inits to find optimal rho/omega Results = list() for initrho in [None, 1, 2, 3]: initomega = None if isinstance(initrho, int): PRNG = np.random.RandomState(initrho) initrho = PRNG.rand(K) initomega = 100 * PRNG.rand(K) rho, omega, f, Info = OptimizerRhoOmega.\ find_optimum_multiple_tries( alpha=alpha, gamma=gamma, sumLogPi=sumLogPi, nDoc=nDoc, initrho=initrho, initomega=initomega, ) betaK = rho2beta(rho, returnSize='K') Info.update(nDoc=nDoc, alpha=alpha, gamma=gamma, rho=rho, omega=omega, betaK=betaK) Results.append(Info) pprintResult(Results) beta1 = Results[0]['betaK'] for i in range(1, len(Results)): beta_i = Results[i]['betaK'] assert np.allclose(beta1, beta_i, atol=0.0001, rtol=0)
def E_beta(self): ''' Get vector of probabilities for active and inactive topics. Returns ------- beta : 1D array, size K + 1 beta[k] gives probability of comp. k under this model. beta[K] (last index) is aggregated over all inactive topics. ''' if not hasattr(self, 'Ebeta'): self.Ebeta = rho2beta(self.rho) return self.Ebeta
def evalELBOandPrint(DocTopicCount=None, alpha=None, gamma=None, rho=None, omega=None, msg=''): ''' Check on the objective. ''' L = calcELBO_FixedDocTopicCountIgnoreEntropy( DocTopicCount=DocTopicCount, alpha=alpha, gamma=gamma, rho=rho, omega=omega) nDoc = DocTopicCount.shape[0] betaK = rho2beta(rho, returnSize='K') betastr = np2flatstr(betaK, fmt="%.4f") omstr = np2flatstr(omega, fmt="%6.2f") print('%10s % .6e beta %s | omega %s' % ( msg, L / float(nDoc), betastr, omstr))
def calcELBO_NonlinearTerms(Data=None, SS=None, LP=None, todict=0, rho=None, Ebeta=None, alpha=None, resp=None, nDoc=None, DocTopicCount=None, theta=None, thetaRem=None, ElogPi=None, ElogPiRem=None, sumLogPi=None, sumLogPiRem=None, sumLogPiRemVec=None, Hresp=None, slackTheta=None, slackThetaRem=None, gammalnTheta=None, gammalnSumTheta=None, gammalnThetaRem=None, thetaEmptyComp=None, ElogPiEmptyComp=None, ElogPiOrigComp=None, gammalnThetaOrigComp=None, slackThetaOrigComp=None, returnMemoizedDict=0, **kwargs): """ Calculate ELBO objective terms non-linear in suff stats. """ if resp is not None: N, K = resp.shape elif LP is not None: if 'resp' in LP: N, K = LP['resp'].shape else: N, K = LP['spR'].shape if Ebeta is None: Ebeta = rho2beta(rho, returnSize='K+1') if LP is not None: DocTopicCount = LP['DocTopicCount'] nDoc = DocTopicCount.shape[0] theta = LP['theta'] thetaRem = LP['thetaRem'] ElogPi = LP['ElogPi'] ElogPiRem = LP['ElogPiRem'] sumLogPi = np.sum(ElogPi, axis=0) sumLogPiRem = np.sum(ElogPiRem) if 'thetaEmptyComp' in LP: thetaEmptyComp = LP['thetaEmptyComp'] ElogPiEmptyComp = LP['ElogPiEmptyComp'] ElogPiOrigComp = LP['ElogPiOrigComp'] gammalnThetaOrigComp = LP['gammalnThetaOrigComp'] slackThetaOrigComp = LP['slackThetaOrigComp'] HrespOrigComp = LP['HrespOrigComp'] elif SS is not None: sumLogPi = SS.sumLogPi nDoc = SS.nDoc if hasattr(SS, 'sumLogPiRemVec'): sumLogPiRemVec = SS.sumLogPiRemVec else: sumLogPiRem = SS.sumLogPiRem if DocTopicCount is not None and theta is None: theta = DocTopicCount + alpha * Ebeta[:-1] thetaRem = alpha * Ebeta[-1] if theta is not None and ElogPi is None: digammasumtheta = digamma(theta.sum(axis=1) + thetaRem) ElogPi = digamma(theta) - digammasumtheta[:, np.newaxis] ElogPiRem = digamma(thetaRem) - digammasumtheta[:, np.newaxis] if sumLogPi is None and ElogPi is not None: sumLogPi = np.sum(ElogPi, axis=0) sumLogPiRem = np.sum(ElogPiRem) if Hresp is None: if SS is not None and SS.hasELBOTerm('Hresp'): Hresp = SS.getELBOTerm('Hresp') else: if hasattr(Data, 'word_count') and N == Data.word_count.size: if resp is not None: Hresp = -1 * NumericUtil.calcRlogRdotv( resp, Data.word_count) elif 'resp' in LP: Hresp = -1 * NumericUtil.calcRlogRdotv( LP['resp'], Data.word_count) elif 'spR' in LP: Hresp = calcSparseRlogRdotv( v=Data.word_count, **LP) else: raise ValueError("Missing resp assignments!") else: if resp is not None: Hresp = -1 * NumericUtil.calcRlogR(resp) elif 'resp' in LP: Hresp = -1 * NumericUtil.calcRlogR(LP['resp']) elif 'spR' in LP: assert 'nnzPerRow' in LP Hresp = calcSparseRlogR(**LP) else: raise ValueError("Missing resp assignments!") if slackTheta is None: if SS is not None and SS.hasELBOTerm('slackTheta'): slackTheta = SS.getELBOTerm('slackTheta') slackThetaRem = SS.getELBOTerm('slackThetaRem') else: slackTheta = DocTopicCount - theta slackTheta *= ElogPi slackTheta = np.sum(slackTheta, axis=0) slackThetaRem = -1 * np.sum(thetaRem * ElogPiRem) if gammalnTheta is None: if SS is not None and SS.hasELBOTerm('gammalnTheta'): gammalnSumTheta = SS.getELBOTerm('gammalnSumTheta') gammalnTheta = SS.getELBOTerm('gammalnTheta') gammalnThetaRem = SS.getELBOTerm('gammalnThetaRem') else: sumTheta = np.sum(theta, axis=1) + thetaRem gammalnSumTheta = np.sum(gammaln(sumTheta)) gammalnTheta = np.sum(gammaln(theta), axis=0) gammalnThetaRem = theta.shape[0] * gammaln(thetaRem) if thetaEmptyComp is not None: gammalnThetaEmptyComp = nDoc * gammaln(thetaEmptyComp) - \ gammalnThetaOrigComp slackThetaEmptyComp = -np.sum(thetaEmptyComp * ElogPiEmptyComp) - \ slackThetaOrigComp if returnMemoizedDict: Mdict = dict(Hresp=Hresp, slackTheta=slackTheta, slackThetaRem=slackThetaRem, gammalnTheta=gammalnTheta, gammalnThetaRem=gammalnThetaRem, gammalnSumTheta=gammalnSumTheta) if thetaEmptyComp is not None: Mdict['HrespEmptyComp'] = -1 * HrespOrigComp Mdict['gammalnThetaEmptyComp'] = gammalnThetaEmptyComp Mdict['slackThetaEmptyComp'] = slackThetaEmptyComp return Mdict # First, compute all local-only terms Lentropy = np.sum(Hresp) Lslack = slackTheta.sum() + slackThetaRem LcDtheta = -1 * (gammalnSumTheta - gammalnTheta.sum() - gammalnThetaRem) # For stochastic (soVB), we need to scale up these terms # Only used when --doMemoELBO is set to 0 (not recommended) if SS is not None and SS.hasAmpFactor(): Lentropy *= SS.ampF Lslack *= SS.ampF LcDtheta *= SS.ampF # Next, compute the slack term alphaEbeta = alpha * Ebeta Lslack_alphaEbeta = np.sum(alphaEbeta[:-1] * sumLogPi) if sumLogPiRemVec is not None: Ebeta_gt = 1 - np.cumsum(Ebeta[:-1]) Lslack_alphaEbeta += alpha * np.inner(Ebeta_gt, sumLogPiRemVec) else: Lslack_alphaEbeta += alphaEbeta[-1] * sumLogPiRem Lslack += Lslack_alphaEbeta if todict: return dict( Lslack=Lslack, Lentropy=Lentropy, LcDtheta=LcDtheta, Lslack_alphaEbeta=Lslack_alphaEbeta) return LcDtheta + Lslack + Lentropy
def learn_rhoomega_fromFixedCounts(DocTopicCount=None, nDoc=0, canShuffleInit='byUsage', canShuffle=None, maxiter=5, warmStart_rho=1, alpha=None, gamma=None, initrho=None, initomega=None, **kwargs): assert nDoc == DocTopicCount.shape[0] K = DocTopicCount.shape[1] didShuffle = 0 if canShuffleInit: if canShuffleInit.lower().count('byusage'): print 'INITIAL SORTING BY USAGE' avgPi = calcAvgPiFromDocTopicCount(DocTopicCount) bigtosmall = argsort_bigtosmall_stable(avgPi) elif canShuffleInit.lower().count('bycount'): print 'INITIAL SORTING BY COUNT' bigtosmall = argsort_bigtosmall_stable(DocTopicCount.sum(axis=0)) elif canShuffleInit.lower().count('random'): print 'INITIAL SORTING RANDOMLY' PRNG = np.random.RandomState(0) bigtosmall = np.arange(K) PRNG.shuffle(bigtosmall) else: bigtosmall = np.arange(K) # Now, sort. if not np.allclose(bigtosmall, np.arange(K)): DocTopicCount = DocTopicCount[:, bigtosmall] didShuffle = 1 avgPi = calcAvgPiFromDocTopicCount(DocTopicCount) sortedids = argsort_bigtosmall_stable(avgPi) if canShuffleInit.lower().count('byusage'): assert np.allclose(sortedids, np.arange(K)) # Find UIDs of comps to track emptyUIDs = np.flatnonzero(DocTopicCount.sum(axis=0) < 0.0001) if emptyUIDs.size >= 3: firstEmptyUID = emptyUIDs.min() lastEmptyUID = emptyUIDs.max() middleEmptyUID = emptyUIDs[len(emptyUIDs)/2] trackEmptyUIDs = [firstEmptyUID, middleEmptyUID, lastEmptyUID] emptyLabels = ['first', 'middle', 'last'] elif emptyUIDs.size == 2: trackEmptyUIDs = [emptyUIDs.min(), emptyUIDs.max()] emptyLabels = ['first', 'last'] elif emptyUIDs.size == 1: firstEmptyUID = emptyUIDs.min() trackEmptyUIDs = [firstEmptyUID] emptyLabels = ['first'] else: trackEmptyUIDs = [] emptyLabels = [] trackActiveUIDs = list() activeLabels = list() # Track the top 5 active columns of DocTopicCount for pos in range(0, np.minimum(5, K)): if sortedids[pos] not in emptyUIDs: trackActiveUIDs.append(sortedids[pos]) activeLabels.append('max+%d' % (pos)) # Find the minnonemptyID for pos in range(K-1, 0, -1): curid = sortedids[pos] if curid not in emptyUIDs: break minnonemptyPos = pos # Track the 5 smallest active columns of DocTopicCount nBeyond5 = np.minimum(5, K - len(emptyUIDs) - 5) for i in range(-1 * (nBeyond5-1), 1): trackActiveUIDs.append(sortedids[minnonemptyPos + i]) activeLabels.append('min+%d' % (-1 * i)) assert np.all(avgPi[trackActiveUIDs] > 0) assert np.allclose(avgPi[trackEmptyUIDs], 0.0) assert is_sorted_bigtosmall(avgPi[trackActiveUIDs]) nDocToDisplay = np.minimum(nDoc, 10) # Initialize rho if initrho is None: rho = OptimizerRhoOmegaBetter.make_initrho(K, nDoc, gamma) else: if didShuffle: rho, _ = reorder_rho(initrho, bigtosmall) else: rho = initrho # Initialize omega if initomega is None: omega = OptimizerRhoOmegaBetter.make_initomega(K, nDoc, gamma) else: omega = initomega # ELBO value of initial state Ltro = evalELBOandPrint( rho=rho, omega=omega, nDoc=nDoc, DocTopicCount=DocTopicCount, alpha=alpha, gamma=gamma, msg='init', ) Snapshots = dict() Snapshots['DTCSum'] = list() Snapshots['DTCUsage'] = list() Snapshots['beta'] = list() Snapshots['Lscore'] = list() Snapshots['activeLabels'] = activeLabels Snapshots['emptyLabels'] = emptyLabels Snapshots['pos_trackActive'] = list() Snapshots['pos_trackEmpty'] = list() Snapshots['beta_trackActive'] = list() Snapshots['beta_trackEmpty'] = list() Snapshots['count_trackActive'] = list() Snapshots['count_trackEmpty'] = list() Snapshots['beta_trackRem'] = list() LtroList = list() LtroList.append(Ltro) betaK = rho2beta(rho, returnSize="K") iterid = 0 prevbetaK = np.zeros_like(betaK) prevrho = rho.copy() while np.sum(np.abs(betaK - prevbetaK)) > 0.0000001: iterid += 1 if iterid > maxiter: break # Take Snapshots of Learned Params Snapshots['Lscore'].append(Ltro) Snapshots['DTCSum'].append(DocTopicCount.sum(axis=0)) Snapshots['DTCUsage'].append((DocTopicCount > 0.001).sum(axis=0)) Snapshots['beta'].append(betaK) Snapshots['pos_trackActive'].append(trackActiveUIDs) Snapshots['pos_trackEmpty'].append(trackEmptyUIDs) Snapshots['beta_trackActive'].append(betaK[trackActiveUIDs]) Snapshots['beta_trackEmpty'].append(betaK[trackEmptyUIDs]) Snapshots['beta_trackRem'].append(1.0 - betaK.sum()) Snapshots['count_trackActive'].append( DocTopicCount.sum(axis=0)[trackActiveUIDs]) Snapshots['count_trackEmpty'].append( DocTopicCount.sum(axis=0)[trackEmptyUIDs]) # Sort by beta didShuffle = 0 tlabel = '_t' if iterid > 1 and canShuffle and canShuffle.lower().count('bybeta'): bigtosmall = argsort_bigtosmall_stable(betaK) if not np.allclose(bigtosmall, np.arange(K)): trackActiveUIDs = mapToNewPos(trackActiveUIDs, bigtosmall) trackEmptyUIDs = mapToNewPos(trackEmptyUIDs, bigtosmall) rho, betaK = reorder_rho(rho, bigtosmall) DocTopicCount = DocTopicCount[:, bigtosmall] didShuffle = 1 tlabel = '_ts' # Update theta sumLogPiActiveVec, sumLogPiRemVec, LP = DocTopicCount_to_sumLogPi( rho=rho, omega=omega, DocTopicCount=DocTopicCount, alpha=alpha, gamma=gamma, **kwargs) # Show ELBO with freshly-optimized theta value. Ltro = evalELBOandPrint( rho=rho, omega=omega, DocTopicCount=DocTopicCount, theta=LP['theta'], thetaRem=LP['thetaRem'], nDoc=nDoc, sumLogPiActiveVec=sumLogPiActiveVec, sumLogPiRemVec=sumLogPiRemVec, alpha=alpha, gamma=gamma, f=None, msg=str(iterid) + tlabel, ) LtroList.append(Ltro) if not LtroList[-1] >= LtroList[-2]: if didShuffle: print 'NOT MONOTONIC! just after theta update with SHUFFLE!' else: print 'NOT MONOTONIC! just after theta standard update' didELBODrop = 0 if canShuffle: if canShuffle.lower().count('bysumlogpi'): bigtosmall = argsort_bigtosmall_stable( sumLogPiActiveVec) elif canShuffle.lower().count('bycounts'): bigtosmall = argsort_bigtosmall_stable( DocTopicCount.sum(axis=0)) elif canShuffle.lower().count('byusage'): estPi = DocTopicCount / DocTopicCount.sum(axis=1)[:,np.newaxis] avgPi = np.sum(estPi, axis=0) bigtosmall = argsort_bigtosmall_stable(avgPi) else: bigtosmall = np.arange(K) if not np.allclose(bigtosmall, np.arange(K)): trackActiveUIDs = mapToNewPos(trackActiveUIDs, bigtosmall) trackEmptyUIDs = mapToNewPos(trackEmptyUIDs, bigtosmall) rho, betaK = reorder_rho(rho, bigtosmall) sumLogPiActiveVec = sumLogPiActiveVec[bigtosmall] DocTopicCount = DocTopicCount[:,bigtosmall] LP['theta'] = LP['theta'][:, bigtosmall] didShuffle = 1 # Show ELBO with freshly-optimized rho value. Ltro = evalELBOandPrint( rho=rho, omega=omega, DocTopicCount=DocTopicCount, theta=LP['theta'], thetaRem=LP['thetaRem'], nDoc=nDoc, sumLogPiActiveVec=sumLogPiActiveVec, sumLogPiRemVec=sumLogPiRemVec, alpha=alpha, gamma=gamma, f=None, msg=str(iterid) + "_ss", ) LtroList.append(Ltro) if not LtroList[-1] >= LtroList[-2]: print 'NOT MONOTONIC! just after %s shuffle update!' % ( canShuffle) didELBODrop = 1 prevrho[:] = rho # Update rhoomega if warmStart_rho: initrho = rho else: initrho = None rho, omega, f, Info = OptimizerRhoOmegaBetter.\ find_optimum_multiple_tries( alpha=alpha, gamma=gamma, sumLogPiActiveVec=sumLogPiActiveVec, sumLogPiRemVec=sumLogPiRemVec, nDoc=nDoc, initrho=initrho, initomega=omega, approx_grad=1, do_grad_omega=0, ) prevbetaK[:] = betaK betaK = rho2beta(rho, returnSize="K") # Show ELBO with freshly-optimized rho value. Ltro = evalELBOandPrint( rho=rho, omega=omega, DocTopicCount=DocTopicCount, theta=LP['theta'], thetaRem=LP['thetaRem'], nDoc=nDoc, sumLogPiActiveVec=sumLogPiActiveVec, sumLogPiRemVec=sumLogPiRemVec, alpha=alpha, gamma=gamma, f=f, msg=str(iterid) + "_r", ) LtroList.append(Ltro) if not LtroList[-1] >= LtroList[-2]: print 'NOT MONOTONIC! just after rho update!' if didELBODrop: if LtroList[-1] >= LtroList[-3]: print 'Phew. Combined update of sorting then optimizing rho OK' else: print 'WHOA! Combined update of sorting then' + \ ' optimizing rho beta NOT MONOTONIC' Snapshots['Lscore'].append(Ltro) Snapshots['DTCSum'].append(DocTopicCount.sum(axis=0)) Snapshots['DTCUsage'].append((DocTopicCount > 0.001).sum(axis=0)) Snapshots['beta'].append(betaK) Snapshots['pos_trackActive'].append(trackActiveUIDs) Snapshots['pos_trackEmpty'].append(trackEmptyUIDs) Snapshots['beta_trackActive'].append(betaK[trackActiveUIDs]) Snapshots['beta_trackEmpty'].append(betaK[trackEmptyUIDs]) Snapshots['beta_trackRem'].append(1.0 - betaK.sum()) Snapshots['count_trackActive'].append( DocTopicCount.sum(axis=0)[trackActiveUIDs]) Snapshots['count_trackEmpty'].append( DocTopicCount.sum(axis=0)[trackEmptyUIDs]) print '\nEmpty cluster ids (%d of %d)' % ( len(trackEmptyUIDs), len(emptyUIDs)) print '-----------------' print ' '.join(['% 10d' % (x) for x in trackEmptyUIDs]) print '\nSelected active clusters to track' print '---------------------------------' print ' '.join(['% 10d' % (x) for x in trackActiveUIDs]) print ' '.join(['% .3e' % (x) for x in avgPi[trackActiveUIDs]]) print '\nDocTopicCount for %d of %d docs' % (nDocToDisplay, nDoc) print '---------------------------------' for n in range(nDocToDisplay): print ' '.join([ '% 9.2f' % (x) for x in DocTopicCount[n, trackActiveUIDs]]) print '\nFinal sumLogPiActiveVec' print '---------------------------------' print ' '.join(['% .3e' % (x) for x in sumLogPiActiveVec[trackActiveUIDs]]) print 'is sumLogPiActiveVec sorted?', \ is_sorted_bigtosmall(sumLogPiActiveVec) return rho, omega, Snapshots
def reorder_rho(rho, bigtosmallIDs): betaK = rho2beta(rho, returnSize='K') newbetaK = betaK[bigtosmallIDs] return OptimizerRhoOmegaBetter.beta2rho(newbetaK, rho.size), newbetaK