def _estimate_u(self, SS, **kwargs): ''' Calculate best 2*K-vector u via L-BFGS gradient descent performing multiple tries in case of numerical issues ''' if hasattr(self, 'U1') and self.U1.size == self.K: initU = np.hstack([self.U1, self.U0]) else: # Use the prior initU = np.hstack([np.ones(self.K), self.alpha0 * np.ones(self.K)]) sumLogPi = np.hstack([SS.sumLogPiActive, SS.sumLogPiUnused]) try: u, fofu, Info = OptimHDP.estimate_u_multiple_tries( sumLogPi=sumLogPi, nDoc=SS.nDoc, gamma=self.gamma, alpha0=self.alpha0, initU=initU) except ValueError as error: if str(error).count('FAILURE') == 0: raise error if hasattr(self, 'U1') and self.U1.size == self.K: Log.error('***** Optim failed. Stay put. ' + str(error)) return # EXIT with current state, failed to update else: Log.error('***** Optim failed. Stuck at prior. ' + str(error)) u = initU # fall back on the prior otherwise return u
def _estimate_u(self, SS, **kwargs): ''' Calculate best 2*K-vector u via L-BFGS gradient descent performing multiple tries in case of numerical issues ''' if hasattr(self, 'U1') and self.U1.size == self.K: initU = np.hstack([self.U1, self.U0]) else: # Use the prior initU = np.hstack([np.ones(self.K), self.alpha0*np.ones(self.K)]) sumLogPi = np.hstack([SS.sumLogPiActive, SS.sumLogPiUnused]) try: u, fofu, Info = OptimHDP.estimate_u_multiple_tries(sumLogPi=sumLogPi, nDoc=SS.nDoc, gamma=self.gamma, alpha0=self.alpha0, initU=initU) except ValueError as error: if str(error).count('FAILURE') == 0: raise error if hasattr(self, 'U1') and self.U1.size == self.K: Log.error('***** Optim failed. Stay put. ' + str(error)) return # EXIT with current state, failed to update else: Log.error('***** Optim failed. Stuck at prior. ' + str(error)) u = initU # fall back on the prior otherwise return u
def set_helper_params(self): ''' Set dependent attribs of this model, given the primary params U1, U0 This includes expectations of various stickbreaking quantities ''' assert self.U1.size == self.K assert self.U0.size == self.K E = OptimHDP._calcExpectations(self.U1, self.U0) self.Ebeta = E['beta'] self.Elogv = E['logv'] self.Elog1mv = E['log1-v']
def set_global_params(self, hmodel=None, U1=None, U0=None, K=0, beta=None, topic_prior=None, Ebeta=None, EbetaLeftover=None, theta=None, **kwargs): if hmodel is not None: self.K = hmodel.allocModel.K self.U1 = hmodel.allocModel.U1 self.U0 = hmodel.allocModel.U0 self.set_helper_params() return if U1 is not None and U0 is not None: self.U1 = U1 self.U0 = U0 self.K = U1.size self.set_helper_params() return if Ebeta is not None and EbetaLeftover is not None: Ebeta = np.squeeze(Ebeta) EbetaLeftover = np.squeeze(EbetaLeftover) beta = np.hstack( [Ebeta, EbetaLeftover]) self.K = beta.size - 1 elif beta is not None: assert beta.size == K beta = np.hstack([beta, np.min(beta)/100.]) beta = beta/np.sum(beta) self.K = beta.size - 1 else: raise ValueError('Bad parameters. Vector beta not specified.') # Now, use the specified value of beta to find the best U1, U0 assert beta.size == self.K + 1 assert abs(np.sum(beta) - 1.0) < 0.001 vMean = OptimHDP.beta2v(beta) # for each k=1,2...K # find the multiplier vMass[k] such that both are true # 1) vMass[k] * vMean[k] > 1.0 # 2) vMass[k] * (1-vMean[k]) > self.alpha0 vMass = np.maximum( 1./vMean , self.alpha0/(1.-vMean)) self.U1 = vMass * vMean self.U0 = vMass * (1-vMean) assert np.all( self.U1 >= 1.0 - 0.00001) assert np.all( self.U0 >= self.alpha0 - 0.00001) assert self.U1.size == self.K assert self.U0.size == self.K ####################################### Set Global Params for Theta if theta is not None and beta is not None: self.theta = theta else: self.theta = np.ones( (self.nNodeTotal, self.K + 1 ) ) self.set_helper_params()
def set_helper_params(self): ''' Set dependent attribs of this model, given the primary params U1, U0 This includes expectations of various stickbreaking quantities ''' E = OptimHDP._calcExpectations(self.U1, self.U0) self.Ebeta = E['beta'] self.Elogv = E['logv'] self.Elog1mv = E['log1-v'] self.ElogEps1 = np.log(self.epsilon) self.ElogEps0 = np.log(1-self.epsilon) self.ElogTheta = digamma(self.theta) \ - digamma(np.sum(self.theta, axis=1))[:,np.newaxis]
def set_helper_params(self): ''' Set dependent attribs of this model, given the primary params U1, U0 This includes expectations of various stickbreaking quantities ''' E = OptimHDP._calcExpectations(self.U1, self.U0) self.Ebeta = E['beta'] self.Elogv = E['logv'] self.Elog1mv = E['log1-v'] self.ElogEps1 = np.log(self.epsilon) self.ElogEps0 = np.log(1 - self.epsilon) self.ElogTheta = digamma(self.theta) \ - digamma(np.sum(self.theta, axis=1))[:,np.newaxis]
def insert_global_params(self, beta=None, **kwargs): Knew = beta.size beta = np.hstack([beta, np.min(beta)/100.]) beta = beta/np.sum(beta) vMean = OptimHDP.beta2v(beta) vMass = np.maximum( 1./vMean , self.alpha0/(1.-vMean)) self.K += Knew self.U1 = np.append(self.U1, vMass * vMean ) self.U0 = np.append(self.U0, vMass * (1-vMean)) assert self.U1.size == self.K assert self.U0.size == self.K self.set_helper_params()
def _convert_beta2u(self, beta): ''' Given a vector beta (size K+1), return educated guess for vectors u1, u0 Returns -------- U1 : 1D array, size K U0 : 1D array, size K ''' assert abs(np.sum(beta) - 1.0) < 0.001 vMean = OptimHDP.beta2v(beta) # for each k=1,2...K # find the multiplier vMass[k] such that both are true # 1) vMass[k] * vMean[k] > 1.0 # 2) vMass[k] * (1-vMean[k]) > self.alpha0 vMass = np.maximum( 1./vMean , self.alpha0/(1.-vMean)) U1 = vMass * vMean U0 = vMass * (1-vMean) return U1, U0
def set_global_params(self, hmodel=None, U1=None, U0=None, K=0, beta=None, topic_prior=None, Ebeta=None, EbetaLeftover=None, theta=None, **kwargs): if hmodel is not None: self.K = hmodel.allocModel.K self.U1 = hmodel.allocModel.U1 self.U0 = hmodel.allocModel.U0 self.set_helper_params() return if U1 is not None and U0 is not None: self.U1 = U1 self.U0 = U0 self.K = U1.size self.set_helper_params() return if Ebeta is not None and EbetaLeftover is not None: Ebeta = np.squeeze(Ebeta) EbetaLeftover = np.squeeze(EbetaLeftover) beta = np.hstack([Ebeta, EbetaLeftover]) self.K = beta.size - 1 elif beta is not None: assert beta.size == K beta = np.hstack([beta, np.min(beta) / 100.]) beta = beta / np.sum(beta) self.K = beta.size - 1 else: raise ValueError('Bad parameters. Vector beta not specified.') # Now, use the specified value of beta to find the best U1, U0 assert beta.size == self.K + 1 assert abs(np.sum(beta) - 1.0) < 0.001 vMean = OptimHDP.beta2v(beta) # for each k=1,2...K # find the multiplier vMass[k] such that both are true # 1) vMass[k] * vMean[k] > 1.0 # 2) vMass[k] * (1-vMean[k]) > self.alpha0 vMass = np.maximum(1. / vMean, self.alpha0 / (1. - vMean)) self.U1 = vMass * vMean self.U0 = vMass * (1 - vMean) assert np.all(self.U1 >= 1.0 - 0.00001) assert np.all(self.U0 >= self.alpha0 - 0.00001) assert self.U1.size == self.K assert self.U0.size == self.K ####################################### Set Global Params for Theta if theta is not None and beta is not None: self.theta = theta else: self.theta = np.ones((self.nNodeTotal, self.K + 1)) self.set_helper_params()