def updateThetaAndThetaRem( SS=None, K=None, NodeStateCount=None, rho=None, alpha=1.0, gamma=10.0): ''' Update parameters theta to maximize objective given suff stats. Returns --------- theta : 2D array, nNodes x K thetaRem : scalar ''' if K is None: K = SS.K if NodeStateCount is None: NodeStateCount = SS.NodeStateCount nNodes = NodeStateCount.shape[0] if rho is None or rho.size != K: rho = OptimizerRhoOmegaBetter.make_initrho(K, nNodes, gamma) # Calculate E_q[alpha * Beta_l] for l = 1, ..., K+1 Ebeta = StickBreakUtil.rho2beta(rho, returnSize='K') alphaEbeta = alpha * Ebeta alphaEbetaRem = alpha * (1- Ebeta.sum()) theta = alphaEbeta + NodeStateCount thetaRem = alphaEbetaRem return theta, thetaRem
def _calcTheta(self, SS): ''' Update parameters theta to maximize objective given suff stats. Returns --------- transTheta : 2D array, size K x K+1 startTheta : 1D array, size K ''' K = SS.K if not hasattr(self, 'rho') or self.rho.size != K: self.rho = OptimizerRhoOmega.create_initrho(K) # Calculate E_q[alpha * Beta_l] for l = 1, ..., K+1 Ebeta = StickBreakUtil.rho2beta(self.rho) alphaEBeta = self.transAlpha * Ebeta # transTheta_kl = M_kl + E_q[alpha * Beta_l] + kappa * 1_{k==l} transTheta = np.zeros((K, K + 1)) transTheta += alphaEBeta[np.newaxis, :] transTheta[:K, :K] += SS.TransStateCount + self.kappa * np.eye(self.K) # startTheta_k = r_1k + E_q[alpha * Beta_l] (where r_1,>K = 0) startTheta = self.startAlpha * Ebeta startTheta[:K] += SS.StartStateCount return transTheta, startTheta
def calcHardMergeGap(self, SS, kA, kB): ''' Calculate scalar improvement in ELBO for hard merge of comps kA, kB Does *not* include any entropy. Returns --------- L : scalar ''' m_K = SS.K - 1 m_SS = SuffStatBag(K=SS.K, D=0) m_SS.setField('StartStateCount', SS.StartStateCount.copy(), dims='K') m_SS.setField('TransStateCount', SS.TransStateCount.copy(), dims=('K', 'K')) m_SS.mergeComps(kA, kB) # Create candidate beta vector m_beta = StickBreakUtil.rho2beta(self.rho) m_beta[kA] += m_beta[kB] m_beta = np.delete(m_beta, kB, axis=0) # Create candidate rho and omega vectors m_rho = StickBreakUtil.beta2rho(m_beta, m_K) m_omega = np.delete(self.omega, kB) # Create candidate startTheta m_startTheta = self.startAlpha * m_beta.copy() m_startTheta[:m_K] += m_SS.StartStateCount # Create candidate transTheta m_transTheta = self.alpha * np.tile(m_beta, (m_K, 1)) if self.kappa > 0: m_transTheta[:, :m_K] += self.kappa * np.eye(m_K) m_transTheta[:, :m_K] += m_SS.TransStateCount # Evaluate objective func. for both candidate and current model Lcur = calcELBO_LinearTerms(SS=SS, rho=self.rho, omega=self.omega, startTheta=self.startTheta, transTheta=self.transTheta, alpha=self.alpha, startAlpha=self.startAlpha, gamma=self.gamma, kappa=self.kappa) Lprop = calcELBO_LinearTerms(SS=m_SS, rho=m_rho, omega=m_omega, startTheta=m_startTheta, transTheta=m_transTheta, alpha=self.alpha, startAlpha=self.startAlpha, gamma=self.gamma, kappa=self.kappa) # Note: This gap relies on fact that all nonlinear terms are entropies, return Lprop - Lcur
def L_slack(self, SS): ''' Compute slack term of the allocation objective function. Returns ------- L : scalar float ''' ElogPi, ElogPiRem = self.E_logPi(returnRem=1) Ebeta = StickBreakUtil.rho2beta(self.rho, returnSize='K') Q = SS.NodeStateCount + self.alpha * Ebeta - self.theta Lslack = np.sum(Q * ElogPi) alphaEbetaRem = self.alpha * (1.0 - Ebeta.sum()) LslackRem = np.sum((alphaEbetaRem - self.thetaRem) * ElogPiRem) return Lslack + LslackRem