def packParamBagForPost(pnu_K=None, ptau_K=None, w_KE=None, P_KEE=None, Post=None, **kwargs): ''' Parse provided array args and pack into parameter bag Returns ------- Post : ParamBag, with K clusters ''' pnu_K = as1D(pnu_K) ptau_K = as1D(ptau_K) w_KE = as2D(w_KE) P_KEE = as3D(P_KEE) K = pnu_K.size E = w_KE.shape[1] if Post is None: Post = ParamBag(K=K, D=E - 1, E=E) elif not hasattr(Post, 'E'): Post.E = E assert Post.K == K assert Post.D == E - 1 assert Post.E == E Post.setField('pnu_K', pnu_K, dims=('K')) Post.setField('ptau_K', ptau_K, dims=('K')) Post.setField('w_KE', w_KE, dims=('K', 'E')) Post.setField('P_KEE', P_KEE, dims=('K', 'E', 'E')) return Post
def calcGlobalParams(self, **kwargs): self.D = self.patchModel.obsModel.D self.K = self.patchModel.obsModel.K self.GP = ParamBag(K=self.K, D=self.D) self._calcAllocGP() self._calcObsGP() self._calcUGP(**kwargs)
def setEstParams(self, obsModel=None, SS=None, LP=None, Data=None, Sigma=None, **kwargs): ''' Create EstParams ParamBag with fields Sigma ''' self.ClearCache() if obsModel is not None: self.EstParams = obsModel.EstParams.copy() self.K = self.EstParams.K return if LP is not None and Data is not None: SS = self.calcSummaryStats(Data, None, LP) if SS is not None: self.updateEstParams(SS) else: K = Sigma.shape[0] self.EstParams = ParamBag(K=K, D=self.D) self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D')) self.K = self.EstParams.K
def setPostFactors(self, obsModel=None, SS=None, LP=None, Data=None, nu=0, B=0, M=0, V=0, **kwargs): ''' Set Post attribute to provided values. ''' self.ClearCache() if obsModel is not None: if hasattr(obsModel, 'Post'): self.Post = obsModel.Post.copy() else: self.setPostFromEstParams(obsModel.EstParams) self.K = self.Post.K return if LP is not None and Data is not None: SS = self.calcSummaryStats(Data, None, LP) if SS is not None: self.updatePost(SS) else: M = as3D(M) B = as3D(B) V = as3D(V) K, D, E = M.shape assert D == self.D assert E == self.E self.Post = ParamBag(K=K, D=self.D, E=self.E) self.Post.setField('nu', as1D(nu), dims=('K')) self.Post.setField('B', B, dims=('K', 'D', 'D')) self.Post.setField('M', M, dims=('K', 'D', 'E')) self.Post.setField('V', V, dims=('K', 'E', 'E')) self.K = self.Post.K
def setEstParams(self, obsModel=None, SS=None, LP=None, Data=None, phi=None, topics=None, **kwargs): ''' Create EstParams ParamBag with fields phi ''' if topics is not None: phi = topics self.ClearCache() if obsModel is not None: self.EstParams = obsModel.EstParams.copy() self.K = self.EstParams.K return if LP is not None and Data is not None: SS = self.calcSummaryStats(Data, None, LP) if SS is not None: self.updateEstParams(SS) else: self.EstParams = ParamBag(K=phi.shape[0], D=phi.shape[1]) self.EstParams.setField('phi', phi, dims=('K', 'D')) self.K = self.EstParams.K
def setPostFactors(self, obsModel=None, SS=None, LP=None, Data=None, lam1=None, lam0=None, **kwargs): ''' Set attribute Post to provided values. ''' self.ClearCache() if obsModel is not None: if hasattr(obsModel, 'Post'): self.Post = obsModel.Post.copy() self.K = self.Post.K else: self.setPostFromEstParams(obsModel.EstParams) return if LP is not None and Data is not None: SS = self.calcSummaryStats(Data, None, LP) if SS is not None: self.updatePost(SS) else: lam1 = as2D(lam1) lam0 = as2D(lam0) D = lam1.shape[-1] if self.D != D: if not lam1.shape[0] == self.D: raise ValueError("Bad dimension for lam1, lam0") lam1 = lam1.T.copy() lam0 = lam0.T.copy() K = lam1.shape[0] self.Post = ParamBag(K=K, D=self.D) self.Post.setField('lam1', lam1, dims=self.CompDims + ('D',)) self.Post.setField('lam0', lam0, dims=self.CompDims + ('D',)) self.K = self.Post.K
def updateEstParams_MAP(self, SS): ''' Update attribute EstParams for all comps given suff stats. Update uses the MAP objective for point estimation. Post Condition --------- Attributes K and EstParams updated in-place. ''' self.ClearCache() if not hasattr(self, 'EstParams') or self.EstParams.K != SS.K: self.EstParams = ParamBag(K=SS.K, D=SS.D) Prior = self.Prior nu = Prior.nu + SS.N kappa = Prior.kappa + SS.N PB = Prior.B + Prior.kappa * np.outer(Prior.m, Prior.m) m = np.empty((SS.K, SS.D)) B = np.empty((SS.K, SS.D, SS.D)) for k in xrange(SS.K): km_x = Prior.kappa * Prior.m + SS.x[k] m[k] = 1.0 / kappa[k] * km_x B[k] = PB + SS.xxT[k] - 1.0 / kappa[k] * np.outer(km_x, km_x) mu, Sigma = MAPEstParams_inplace(nu, B, m, kappa) self.EstParams.setField('mu', mu, dims=('K', 'D')) self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D')) self.K = SS.K
def setPostFromEstParams(self, EstParams, Data=None, nTotalTokens=0, **kwargs): ''' Set attribute Post based on values in EstParams. ''' K = EstParams.K D = EstParams.D if Data is not None: nTotalTokens = Data.word_count.sum() if isinstance(nTotalTokens, int) or nTotalTokens.ndim == 0: nTotalTokens = float(nTotalTokens) / float(K) * np.ones(K) if np.any(nTotalTokens == 0): priorScale = self.Prior.lam.sum() warnings.warn("Enforcing minimum scale of %.3f for lam" % (priorScale)) nTotalTokens = np.maximum(nTotalTokens, priorScale) if 'lam' in kwargs and kwargs['lam'] is not None: lam = kwargs['lam'] else: WordCounts = EstParams.phi * nTotalTokens[:, np.newaxis] assert WordCounts.max() > 0 lam = WordCounts + self.Prior.lam self.Post = ParamBag(K=K, D=D) self.Post.setField('lam', lam, dims=('K', 'D')) self.K = K
def setEstParams(self, obsModel=None, SS=None, LP=None, Data=None, phi=None, **kwargs): ''' Set attribute EstParams to provided values. ''' self.ClearCache() if obsModel is not None: self.EstParams = obsModel.EstParams.copy() self.K = self.EstParams.K return if LP is not None and Data is not None: SS = self.calcSummaryStats(Data, None, LP) if SS is not None: self.updateEstParams(SS) else: self.EstParams = ParamBag(K=phi.shape[0], D=phi.shape[1]) self.EstParams.setField('phi', phi, dims=( 'K', 'D', )) self.K = self.EstParams.K
def setEstParams(self, obsModel=None, SS=None, LP=None, Data=None, mu=None, Sigma=None, **kwargs): ''' Create EstParams ParamBag with fields mu, Sigma ''' self.ClearCache() if obsModel is not None: self.EstParams = obsModel.EstParams.copy() self.K = self.EstParams.K return if LP is not None and Data is not None: SS = self.calcSummaryStats(Data, None, LP) if SS is not None: self.updateEstParams(SS) else: Sigma = as3D(Sigma) K, D, D2 = Sigma.shape mu = as2D(mu) if mu.shape[0] != K: mu = mu.T assert mu.shape[0] == K assert mu.shape[1] == D self.EstParams = ParamBag(K=K, D=D) self.EstParams.setField('mu', mu, dims=('K', 'D')) self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D')) self.K = self.EstParams.K
def createPrior(self, Data, nu=0, B=None, m=None, kappa=None, MMat='zero', ECovMat=None, sF=1.0, **kwargs): ''' Initialize Prior ParamBag attribute. Post Condition ------ Prior expected covariance matrix set to match provided value. ''' D = self.D nu = np.maximum(nu, D + 2) if B is None: if ECovMat is None or isinstance(ECovMat, str): ECovMat = createECovMatFromUserInput(D, Data, ECovMat, sF) B = ECovMat * (nu - D - 1) if B.ndim == 1: B = np.asarray([B], dtype=np.float) elif B.ndim == 0: B = np.asarray([[B]], dtype=np.float) if m is None: if MMat == 'data': m = np.mean(Data.X, axis=0) else: m = np.zeros(D) elif m.ndim < 1: m = np.asarray([m], dtype=np.float) kappa = np.maximum(kappa or 0, 1e-8) self.Prior = ParamBag(K=0, D=D) self.Prior.setField('nu', nu, dims=None) self.Prior.setField('kappa', kappa, dims=None) self.Prior.setField('m', m, dims=('D')) self.Prior.setField('B', B, dims=('D', 'D'))
def setEstParams(self, obsModel=None, SS=None, LP=None, Data=None, A=None, Sigma=None, **kwargs): ''' Initialize EstParams attribute with fields A, Sigma. ''' self.ClearCache() if obsModel is not None: self.EstParams = obsModel.EstParams.copy() self.K = self.EstParams.K return if LP is not None and Data is not None: SS = self.calcSummaryStats(Data, None, LP) if SS is not None: self.updateEstParams(SS) else: A = as3D(A) Sigma = as3D(Sigma) self.EstParams = ParamBag(K=A.shape[0], D=A.shape[1]) self.EstParams.setField('A', A, dims=('K', 'D', 'D')) self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D'))
def setPostFactors(self, obsModel=None, SS=None, LP=None, Data=None, lam=None, WordCounts=None, **kwargs): ''' Set attribute Post to provided values. ''' self.ClearCache() if obsModel is not None: if hasattr(obsModel, 'Post'): self.Post = obsModel.Post.copy() self.K = self.Post.K else: self.setPostFromEstParams(obsModel.EstParams) return if LP is not None and Data is not None: SS = self.calcSummaryStats(Data, None, LP) if SS is not None: self.updatePost(SS) else: if WordCounts is not None: lam = as2D(WordCounts) + lam else: lam = as2D(lam) K, D = lam.shape self.Post = ParamBag(K=K, D=D) self.Post.setField('lam', lam, dims=('K', 'D')) self.K = self.Post.K
def setPostFactors(self, obsModel=None, SS=None, LP=None, Data=None, nu=0, B=0, m=0, kappa=0, **kwargs): ''' Set attribute Post to provided values. ''' self.ClearCache() if obsModel is not None: if hasattr(obsModel, 'Post'): self.Post = obsModel.Post.copy() self.K = self.Post.K else: self.setPostFromEstParams(obsModel.EstParams) return if LP is not None and Data is not None: SS = self.calcSummaryStats(Data, None, LP) if SS is not None: self.updatePost(SS) else: m = as2D(m) if m.shape[1] != self.D: m = m.T.copy() K, _ = m.shape self.Post = ParamBag(K=K, D=self.D) self.Post.setField('nu', as1D(nu), dims=('K')) self.Post.setField('B', B, dims=('K', 'D', 'D')) self.Post.setField('m', m, dims=('K', 'D')) self.Post.setField('kappa', as1D(kappa), dims=('K')) self.K = self.Post.K
def setEstParams(self, obsModel=None, SS=None, LP=None, Data=None, mu=None, sigma=None, Sigma=None, **kwargs): ''' Create EstParams ParamBag with fields mu, Sigma ''' self.ClearCache() if obsModel is not None: self.EstParams = obsModel.EstParams.copy() self.K = self.EstParams.K return if LP is not None and Data is not None: SS = self.calcSummaryStats(Data, None, LP) if SS is not None: self.updateEstParams(SS) else: K = mu.shape[0] if Sigma is not None: assert Sigma.ndim == 3 sigma = np.empty((Sigma.shape[0], Sigma.shape[1])) for k in xrange(K): sigma[k] = np.diag(Sigma[k]) assert sigma.ndim == 2 self.EstParams = ParamBag(K=K, D=mu.shape[1]) self.EstParams.setField('mu', mu, dims=('K', 'D')) self.EstParams.setField('sigma', sigma, dims=('K', 'D')) self.K = self.EstParams.K
def packParamBagForPost(nu=None, beta=None, m=None, kappa=None, D=None, Post=None, **kwargs): ''' ''' m = as2D(m) beta = as2D(beta) if D is None: D = m.shape[1] if m.shape[1] != D: m = m.T.copy() if beta.shape[1] != D: beta = beta.T.copy() K, _ = m.shape if Post is None: Post = ParamBag(K=K, D=D) else: assert isinstance(Post, ParamBag) assert Post.K == K assert Post.D == D Post.setField('nu', as1D(nu), dims=('K')) Post.setField('beta', beta, dims=('K', 'D')) Post.setField('m', m, dims=('K', 'D')) Post.setField('kappa', as1D(kappa), dims=('K')) return Post
def updateEstParams_MaxLik(self, SS): ''' Update attribute EstParams for all comps given suff stats. Update uses the maximum likelihood objective for point estimation. Post Condition --------- Attributes K and EstParams updated in-place. ''' self.ClearCache() if not hasattr(self, 'EstParams') or self.EstParams.K != SS.K: self.EstParams = ParamBag(K=SS.K, D=SS.D) minCovMat = self.min_covar * np.eye(SS.D) A = np.zeros((SS.K, self.D, self.D)) Sigma = np.zeros((SS.K, self.D, self.D)) for k in xrange(SS.K): # Add small pos multiple of identity to make invertible # TODO: This is source of potential stability issues. A[k] = np.linalg.solve(SS.ppT[k] + minCovMat, SS.pxT[k]).T Sigma[k] = SS.xxT[k] \ - 2 * np.dot(SS.pxT[k].T, A[k].T) \ + np.dot(A[k], np.dot(SS.ppT[k], A[k].T)) Sigma[k] /= SS.N[k] # Sigma[k] = 0.5 * (Sigma[k] + Sigma[k].T) # symmetry! Sigma[k] += minCovMat self.EstParams.setField('A', A, dims=('K', 'D', 'D')) self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D')) self.K = SS.K
def setEstParamsFromPost(self, Post=None): ''' Set attribute EstParams based on values in Post. ''' if Post is None: Post = self.Post self.EstParams = ParamBag(K=Post.K, D=Post.D) phi = Post.lam1 / (Post.lam1 + Post.lam0) self.EstParams.setField('phi', phi, dims=('K', 'D',)) self.K = self.EstParams.K
def setEstParamsFromPost(self, Post): ''' Convert from Post (nu, B) to EstParams (Sigma), each EstParam is set to its posterior mean. ''' D = Post.D self.EstParams = ParamBag(K=Post.K, D=D) Sigma = Post.B / (Post.nu - D - 1)[:, np.newaxis, np.newaxis] self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D')) self.K = self.EstParams.K
def createParamBagForPrior( Data=None, D=0, pnu=0, ptau=None, w_E=0, P_EE=None, P_diag_E=None, P_diag_val=1.0, Prior=None, **kwargs): ''' Initialize Prior ParamBag attribute. Returns ------- Prior : ParamBag with dimension attributes K, D, E with parameter attributes pnu, ptau, w_E, P_EE ''' if Data is None: D = int(D) else: D = int(Data.dim) E = D + 1 # Init parameters of 1D Wishart prior on delta pnu = np.maximum(pnu, 1e-9) ptau = np.maximum(ptau, 1e-9) # Initialize precision matrix of the weight vector if P_EE is not None: P_EE = np.asarray(P_EE) elif P_diag_E is not None: P_EE = np.diag(np.asarray(P_diag_E)) else: P_EE = np.diag(P_diag_val * np.ones(E)) assert P_EE.ndim == 2 assert P_EE.shape == (E,E) # Initialize mean of the weight vector w_E = as1D(np.asarray(w_E)) if w_E.size < E: w_E = np.tile(w_E, E)[:E] assert w_E.ndim == 1 assert w_E.size == E if Prior is None: Prior = ParamBag(K=0, D=D, E=E) if not hasattr(Prior, 'E'): Prior.E = E assert Prior.D == D assert Prior.E == E Prior.setField('pnu', pnu, dims=None) Prior.setField('ptau', ptau, dims=None) Prior.setField('w_E', w_E, dims=('E')) Prior.setField('P_EE', P_EE, dims=('E', 'E')) Pw_E = np.dot(P_EE, w_E) wPw_1 = np.dot(w_E, Pw_E) Prior.setField('Pw_E', Pw_E, dims=('E')) Prior.setField('wPw_1', wPw_1, dims=None) return Prior
def setEstParamsFromPost(self, Post): ''' Convert from Post (nu, B, m, kappa) to EstParams (mu, Sigma), each EstParam is set to its posterior mean. ''' self.EstParams = ParamBag(K=Post.K, D=Post.D) mu = Post.m.copy() Sigma = Post.B / (Post.nu[:, np.newaxis, np.newaxis] - Post.D - 1) self.EstParams.setField('mu', mu, dims=('K', 'D')) self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D')) self.K = self.EstParams.K
def setEstParamsFromPost(self, Post): ''' Convert from Post to EstParams. ''' D = Post.D self.EstParams = ParamBag(K=Post.K, D=D) A = Post.M.copy() Sigma = Post.B / (Post.nu - D - 1)[:, np.newaxis, np.newaxis] self.EstParams.setField('A', A, dims=('K', 'D', 'D')) self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D')) self.K = self.EstParams.K
def setEstParamsFromPost(self, Post): ''' Convert from Post (nu, beta, m, kappa) to EstParams (mu, Sigma), each EstParam is set to its posterior mean. ''' self.EstParams = ParamBag(K=Post.K, D=self.D) mu = Post.m.copy() sigma = Post.beta / (Post.nu - 2)[:, np.newaxis] self.EstParams.setField('mu', mu, dims=('K', 'D')) self.EstParams.setField('sigma', sigma, dims=('K', 'D')) self.K = self.EstParams.K
def setEstParamsFromPost(self, Post=None, **kwargs): ''' Convert from Post (lam) to EstParams (phi), each EstParam is set to its posterior mean. ''' if Post is None: Post = self.Post self.EstParams = ParamBag(K=Post.K, D=Post.D) phi = Post.lam / np.sum(Post.lam, axis=1)[:, np.newaxis] self.EstParams.setField('phi', phi, dims=('K', 'D')) self.K = self.EstParams.K
def createPrior(self, Data, lam=1.0, min_phi=1e-100, **kwargs): ''' Initialize Prior ParamBag attribute. ''' D = self.D self.min_phi = min_phi self.Prior = ParamBag(K=0, D=D) lam = np.asarray(lam, dtype=np.float) if lam.ndim == 0: lam = lam * np.ones(D) assert lam.size == D self.Prior.setField('lam', lam, dims=('D')) self.prior_cFunc = c_Func(lam)
def updateEstParams_MAP(self, SS): ''' Update attribute EstParams for all comps given suff stats. Update uses the MAP objective for point estimation. Post Condition --------- Attributes K and EstParams updated in-place. ''' self.ClearCache() if not hasattr(self, 'EstParams') or self.EstParams.K != SS.K: self.EstParams = ParamBag(K=SS.K, D=SS.D) raise NotImplemented('TODO')
def updateEstParams_MAP(self, SS): ''' Update attribute EstParams for all comps given suff stats. Update uses the MAP objective for point estimation. Post Condition --------- Attributes K and EstParams updated in-place. ''' self.ClearCache() if not hasattr(self, 'EstParams') or self.EstParams.K != SS.K: self.EstParams = ParamBag(K=SS.K, D=SS.D) phi = SS.WordCounts + self.Prior.lam - 1 phi /= phi.sum(axis=1)[:, np.newaxis] self.EstParams.setField('phi', phi, dims=('K', 'D'))
def updatePost(self, SS): ''' Update attribute Post for all comps given suff stats. Optimizes the variational objective for approximating the posterior Post Condition -------------- Attributes K and Post updated in-place. ''' self.ClearCache() if not hasattr(self, 'Post') or self.Post.K != SS.K: self.Post = ParamBag(K=SS.K, D=SS.D, E=SS.D+1) self.Post = calcPostParamsFromSS( SS=SS, Prior=self.Prior, returnParamBag=True) self.K = SS.K
def setPostFromEstParams(self, EstParams, Data=None, nTotalTokens=1, **kwargs): ''' Set attribute Post based on values in EstParams. ''' K = EstParams.K D = EstParams.D WordCounts = EstParams.phi * nTotalTokens lam1 = WordCounts + self.Prior.lam1 lam0 = (1 - WordCounts) + self.Prior.lam0 self.Post = ParamBag(K=K, D=D) self.Post.setField('lam1', lam1, dims=('K', 'D')) self.Post.setField('lam0', lam0, dims=('K', 'D')) self.K = K
def updatePost(self, SS): ''' Update attribute Post for all comps given suff stats. Update uses the variational objective. Post Condition --------- Attributes K and Post updated in-place. ''' self.ClearCache() if not hasattr(self, 'Post') or self.Post.K != SS.K: self.Post = ParamBag(K=SS.K, D=SS.D) lam = self.calcPostParams(SS) self.Post.setField('lam', lam, dims=('K', 'D')) self.K = SS.K