예제 #1
0
    def setPostFromEstParams(self,
                             EstParams,
                             Data=None,
                             nTotalTokens=0,
                             **kwargs):
        ''' Set attribute Post based on values in EstParams.
        '''
        K = EstParams.K
        D = EstParams.D

        if Data is not None:
            nTotalTokens = Data.word_count.sum()
        if isinstance(nTotalTokens, int) or nTotalTokens.ndim == 0:
            nTotalTokens = float(nTotalTokens) / float(K) * np.ones(K)
        if np.any(nTotalTokens == 0):
            priorScale = self.Prior.lam.sum()
            warnings.warn("Enforcing minimum scale of %.3f for lam" %
                          (priorScale))
            nTotalTokens = np.maximum(nTotalTokens, priorScale)

        if 'lam' in kwargs and kwargs['lam'] is not None:
            lam = kwargs['lam']
        else:
            WordCounts = EstParams.phi * nTotalTokens[:, np.newaxis]
            assert WordCounts.max() > 0
            lam = WordCounts + self.Prior.lam

        self.Post = ParamBag(K=K, D=D)
        self.Post.setField('lam', lam, dims=('K', 'D'))
        self.K = K
예제 #2
0
    def setPostFactors(self,
                       obsModel=None,
                       SS=None,
                       LP=None,
                       Data=None,
                       lam=None,
                       WordCounts=None,
                       **kwargs):
        ''' Set attribute Post to provided values.
        '''
        self.ClearCache()
        if obsModel is not None:
            if hasattr(obsModel, 'Post'):
                self.Post = obsModel.Post.copy()
                self.K = self.Post.K
            else:
                self.setPostFromEstParams(obsModel.EstParams)
            return

        if LP is not None and Data is not None:
            SS = self.calcSummaryStats(Data, None, LP)

        if SS is not None:
            self.updatePost(SS)
        else:
            if WordCounts is not None:
                lam = as2D(WordCounts) + lam
            else:
                lam = as2D(lam)
            K, D = lam.shape
            self.Post = ParamBag(K=K, D=D)
            self.Post.setField('lam', lam, dims=('K', 'D'))
        self.K = self.Post.K
예제 #3
0
    def updateEstParams_MAP(self, SS):
        ''' Update attribute EstParams for all comps given suff stats.

        Update uses the MAP objective for point estimation.

        Post Condition
        ---------
        Attributes K and EstParams updated in-place.
        '''
        self.ClearCache()
        if not hasattr(self, 'EstParams') or self.EstParams.K != SS.K:
            self.EstParams = ParamBag(K=SS.K, D=SS.D)

        Prior = self.Prior
        nu = Prior.nu + SS.N
        kappa = Prior.kappa + SS.N
        PB = Prior.B + Prior.kappa * np.outer(Prior.m, Prior.m)

        m = np.empty((SS.K, SS.D))
        B = np.empty((SS.K, SS.D, SS.D))
        for k in xrange(SS.K):
            km_x = Prior.kappa * Prior.m + SS.x[k]
            m[k] = 1.0 / kappa[k] * km_x
            B[k] = PB + SS.xxT[k] - 1.0 / kappa[k] * np.outer(km_x, km_x)

        mu, Sigma = MAPEstParams_inplace(nu, B, m, kappa)
        self.EstParams.setField('mu', mu, dims=('K', 'D'))
        self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D'))
        self.K = SS.K
예제 #4
0
    def setEstParams(self,
                     obsModel=None,
                     SS=None,
                     LP=None,
                     Data=None,
                     phi=None,
                     topics=None,
                     **kwargs):
        ''' Create EstParams ParamBag with fields phi
        '''
        if topics is not None:
            phi = topics

        self.ClearCache()
        if obsModel is not None:
            self.EstParams = obsModel.EstParams.copy()
            self.K = self.EstParams.K
            return

        if LP is not None and Data is not None:
            SS = self.calcSummaryStats(Data, None, LP)

        if SS is not None:
            self.updateEstParams(SS)
        else:
            self.EstParams = ParamBag(K=phi.shape[0], D=phi.shape[1])
            self.EstParams.setField('phi', phi, dims=('K', 'D'))
        self.K = self.EstParams.K
예제 #5
0
    def setEstParams(self,
                     obsModel=None,
                     SS=None,
                     LP=None,
                     Data=None,
                     phi=None,
                     **kwargs):
        ''' Set attribute EstParams to provided values.
        '''
        self.ClearCache()
        if obsModel is not None:
            self.EstParams = obsModel.EstParams.copy()
            self.K = self.EstParams.K
            return

        if LP is not None and Data is not None:
            SS = self.calcSummaryStats(Data, None, LP)

        if SS is not None:
            self.updateEstParams(SS)
        else:
            self.EstParams = ParamBag(K=phi.shape[0], D=phi.shape[1])
            self.EstParams.setField('phi', phi, dims=(
                'K',
                'D',
            ))
        self.K = self.EstParams.K
예제 #6
0
    def setPostFactors(self,
                       obsModel=None,
                       SS=None,
                       LP=None,
                       Data=None,
                       nu=0,
                       B=0,
                       **kwargs):
        ''' Set attribute Post to provided values.
        '''
        self.ClearCache()
        if obsModel is not None:
            if hasattr(obsModel, 'Post'):
                self.Post = obsModel.Post.copy()
                self.K = self.Post.K
            else:
                self.setPostFromEstParams(obsModel.EstParams)
            return

        if LP is not None and Data is not None:
            SS = self.calcSummaryStats(Data, None, LP)

        if SS is not None:
            self.updatePost(SS)
        else:
            K = B.shape[0]
            self.Post = ParamBag(K=K, D=self.D)
            self.Post.setField('nu', as1D(nu), dims=('K'))
            self.Post.setField('B', B, dims=('K', 'D', 'D'))
        self.K = self.Post.K
예제 #7
0
    def updateEstParams_MaxLik(self, SS):
        ''' Update attribute EstParams for all comps given suff stats.

        Update uses the maximum likelihood objective for point estimation.

        Post Condition
        ---------
        Attributes K and EstParams updated in-place.
        '''
        self.ClearCache()
        if not hasattr(self, 'EstParams') or self.EstParams.K != SS.K:
            self.EstParams = ParamBag(K=SS.K, D=SS.D, E=SS.E)
        minCovMat = self.min_covar * np.eye(SS.D)
        if SS.E == SS.D:
            minCovMat_EE = minCovMat
        else:
            minCovMat_EE = self.min_covar * np.eye(SS.E)
        A = np.zeros((SS.K, self.D, self.E))
        Sigma = np.zeros((SS.K, self.D, self.D))
        for k in xrange(SS.K):
            # Add small pos multiple of identity to make invertible
            # TODO: This is source of potential stability issues.
            A[k] = np.linalg.solve(SS.ppT[k] + minCovMat_EE, SS.pxT[k]).T

            Sigma[k] = SS.xxT[k] \
                - 2 * np.dot(SS.pxT[k].T, A[k].T) \
                + np.dot(A[k], np.dot(SS.ppT[k], A[k].T))
            Sigma[k] /= SS.N[k]
            # Sigma[k] = 0.5 * (Sigma[k] + Sigma[k].T) # symmetry!
            Sigma[k] += minCovMat
        self.EstParams.setField('A', A, dims=('K', 'D', 'E'))
        self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D'))
        self.K = SS.K
예제 #8
0
    def setEstParams(self,
                     obsModel=None,
                     SS=None,
                     LP=None,
                     Data=None,
                     A=None,
                     Sigma=None,
                     **kwargs):
        ''' Initialize EstParams attribute with fields A, Sigma.
        '''
        self.ClearCache()
        if obsModel is not None:
            self.EstParams = obsModel.EstParams.copy()
            self.K = self.EstParams.K
            return

        if LP is not None and Data is not None:
            SS = self.calcSummaryStats(Data, None, LP)

        if SS is not None:
            self.updateEstParams(SS)
        else:
            A = as3D(A)
            Sigma = as3D(Sigma)
            self.EstParams = ParamBag(K=A.shape[0], D=A.shape[1], E=A.shape[2])
            self.EstParams.setField('A', A, dims=('K', 'D', 'E'))
            self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D'))
예제 #9
0
    def setEstParams(self,
                     obsModel=None,
                     SS=None,
                     LP=None,
                     Data=None,
                     mu=None,
                     Sigma=None,
                     **kwargs):
        ''' Create EstParams ParamBag with fields mu, Sigma
        '''
        self.ClearCache()
        if obsModel is not None:
            self.EstParams = obsModel.EstParams.copy()
            self.K = self.EstParams.K
            return

        if LP is not None and Data is not None:
            SS = self.calcSummaryStats(Data, None, LP)

        if SS is not None:
            self.updateEstParams(SS)
        else:
            Sigma = as3D(Sigma)
            K, D, D2 = Sigma.shape
            mu = as2D(mu)
            if mu.shape[0] != K:
                mu = mu.T
            assert mu.shape[0] == K
            assert mu.shape[1] == D
            self.EstParams = ParamBag(K=K, D=D)
            self.EstParams.setField('mu', mu, dims=('K', 'D'))
            self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D'))
            self.K = self.EstParams.K
예제 #10
0
def packParamBagForPost(nu=None,
                        beta=None,
                        m=None,
                        kappa=None,
                        D=None,
                        Post=None,
                        **kwargs):
    '''
    '''
    m = as2D(m)
    beta = as2D(beta)

    if D is None:
        D = m.shape[1]

    if m.shape[1] != D:
        m = m.T.copy()
    if beta.shape[1] != D:
        beta = beta.T.copy()
    K, _ = m.shape
    if Post is None:
        Post = ParamBag(K=K, D=D)
    else:
        assert isinstance(Post, ParamBag)
        assert Post.K == K
        assert Post.D == D
    Post.setField('nu', as1D(nu), dims=('K'))
    Post.setField('beta', beta, dims=('K', 'D'))
    Post.setField('m', m, dims=('K', 'D'))
    Post.setField('kappa', as1D(kappa), dims=('K'))
    return Post
예제 #11
0
    def setEstParams(self,
                     obsModel=None,
                     SS=None,
                     LP=None,
                     Data=None,
                     Sigma=None,
                     **kwargs):
        ''' Create EstParams ParamBag with fields Sigma
        '''
        self.ClearCache()
        if obsModel is not None:
            self.EstParams = obsModel.EstParams.copy()
            self.K = self.EstParams.K
            return

        if LP is not None and Data is not None:
            SS = self.calcSummaryStats(Data, None, LP)

        if SS is not None:
            self.updateEstParams(SS)
        else:
            K = Sigma.shape[0]
            self.EstParams = ParamBag(K=K, D=self.D)
            self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D'))
        self.K = self.EstParams.K
예제 #12
0
    def setEstParams(self,
                     obsModel=None,
                     SS=None,
                     LP=None,
                     Data=None,
                     mu=None,
                     sigma=None,
                     Sigma=None,
                     **kwargs):
        ''' Create EstParams ParamBag with fields mu, Sigma
        '''
        self.ClearCache()
        if obsModel is not None:
            self.EstParams = obsModel.EstParams.copy()
            self.K = self.EstParams.K
            return

        if LP is not None and Data is not None:
            SS = self.calcSummaryStats(Data, None, LP)

        if SS is not None:
            self.updateEstParams(SS)
        else:
            K = mu.shape[0]
            if Sigma is not None:
                assert Sigma.ndim == 3
                sigma = np.empty((Sigma.shape[0], Sigma.shape[1]))
                for k in xrange(K):
                    sigma[k] = np.diag(Sigma[k])
            assert sigma.ndim == 2
            self.EstParams = ParamBag(K=K, D=mu.shape[1])
            self.EstParams.setField('mu', mu, dims=('K', 'D'))
            self.EstParams.setField('sigma', sigma, dims=('K', 'D'))
        self.K = self.EstParams.K
예제 #13
0
 def setEstParamsFromPost(self, Post):
     ''' Convert from Post (nu, B) to EstParams (Sigma),
          each EstParam is set to its posterior mean.
     '''
     D = Post.D
     self.EstParams = ParamBag(K=Post.K, D=D)
     Sigma = Post.B / (Post.nu - D - 1)[:, np.newaxis, np.newaxis]
     self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D'))
     self.K = self.EstParams.K
예제 #14
0
 def setEstParamsFromPost(self, Post):
     ''' Convert from Post (nu, beta, m, kappa) to EstParams (mu, Sigma),
          each EstParam is set to its posterior mean.
     '''
     self.EstParams = ParamBag(K=Post.K, D=self.D)
     mu = Post.m.copy()
     sigma = Post.beta / (Post.nu - 2)[:, np.newaxis]
     self.EstParams.setField('mu', mu, dims=('K', 'D'))
     self.EstParams.setField('sigma', sigma, dims=('K', 'D'))
     self.K = self.EstParams.K
예제 #15
0
 def setEstParamsFromPost(self, Post=None, **kwargs):
     ''' Convert from Post (lam) to EstParams (phi),
          each EstParam is set to its posterior mean.
     '''
     if Post is None:
         Post = self.Post
     self.EstParams = ParamBag(K=Post.K, D=Post.D)
     phi = Post.lam / np.sum(Post.lam, axis=1)[:, np.newaxis]
     self.EstParams.setField('phi', phi, dims=('K', 'D'))
     self.K = self.EstParams.K
예제 #16
0
 def setEstParamsFromPost(self, Post):
     ''' Convert from Post (nu, B, m, kappa) to EstParams (mu, Sigma),
          each EstParam is set to its posterior mean.
     '''
     self.EstParams = ParamBag(K=Post.K, D=Post.D)
     mu = Post.m.copy()
     Sigma = Post.B / (Post.nu[:, np.newaxis, np.newaxis] - Post.D - 1)
     self.EstParams.setField('mu', mu, dims=('K', 'D'))
     self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D'))
     self.K = self.EstParams.K
예제 #17
0
 def setEstParamsFromPost(self, Post):
     ''' Convert from Post to EstParams.
     '''
     D = Post.D
     self.EstParams = ParamBag(K=Post.K, D=D, E=Post.E)
     A = Post.M.copy()
     Sigma = Post.B / (Post.nu - D - 1)[:, np.newaxis, np.newaxis]
     self.EstParams.setField('A', A, dims=('K', 'D', 'E'))
     self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D'))
     self.K = self.EstParams.K
예제 #18
0
 def setEstParamsFromPost(self, Post=None):
     ''' Set attribute EstParams based on values in Post.
     '''
     if Post is None:
         Post = self.Post
     self.EstParams = ParamBag(K=Post.K, D=Post.D)
     phi = Post.lam1 / (Post.lam1 + Post.lam0)
     self.EstParams.setField('phi', phi, dims=(
         'K',
         'D',
     ))
     self.K = self.EstParams.K
예제 #19
0
 def createPrior(self, Data, lam=1.0, min_phi=1e-100, **kwargs):
     ''' Initialize Prior ParamBag attribute.
     '''
     D = self.D
     self.min_phi = min_phi
     self.Prior = ParamBag(K=0, D=D)
     lam = np.asarray(lam, dtype=np.float)
     if lam.ndim == 0:
         lam = lam * np.ones(D)
     assert lam.size == D
     self.Prior.setField('lam', lam, dims=('D'))
     self.prior_cFunc = c_Func(lam)
예제 #20
0
    def updateEstParams_MAP(self, SS):
        ''' Update attribute EstParams for all comps given suff stats.

        Update uses the MAP objective for point estimation.

        Post Condition
        ---------
        Attributes K and EstParams updated in-place.
        '''
        self.ClearCache()
        if not hasattr(self, 'EstParams') or self.EstParams.K != SS.K:
            self.EstParams = ParamBag(K=SS.K, D=SS.D, E=SS.E)
        raise NotImplemented('TODO')
예제 #21
0
    def updateEstParams_MAP(self, SS):
        ''' Update attribute EstParams for all comps given suff stats.

        Update uses the MAP objective for point estimation.

        Post Condition
        ---------
        Attributes K and EstParams updated in-place.
        '''
        self.ClearCache()
        if not hasattr(self, 'EstParams') or self.EstParams.K != SS.K:
            self.EstParams = ParamBag(K=SS.K, D=SS.D)
        phi = SS.WordCounts + self.Prior.lam - 1
        phi /= phi.sum(axis=1)[:, np.newaxis]
        self.EstParams.setField('phi', phi, dims=('K', 'D'))
예제 #22
0
    def updatePost(self, SS):
        ''' Update attribute Post for all comps given suff stats.

        Update uses the variational objective.

        Post Condition
        ---------
        Attributes K and Post updated in-place.
        '''
        self.ClearCache()
        if not hasattr(self, 'Post') or self.Post.K != SS.K:
            self.Post = ParamBag(K=SS.K, D=SS.D)

        lam = self.calcPostParams(SS)
        self.Post.setField('lam', lam, dims=('K', 'D'))
        self.K = SS.K
예제 #23
0
    def updateEstParams_MaxLik(self, SS):
        ''' Update attribute EstParams for all comps given suff stats.

        Update uses the maximum likelihood objective for point estimation.

        Post Condition
        ---------
        Attributes K and EstParams updated in-place.
        '''
        self.ClearCache()
        if not hasattr(self, 'EstParams') or self.EstParams.K != SS.K:
            self.EstParams = ParamBag(K=SS.K, D=SS.D)
        phi = SS.WordCounts / SS.SumWordCounts[:, np.newaxis]
        # prevent entries from reaching exactly 0
        np.maximum(phi, self.min_phi, out=phi)
        self.EstParams.setField('phi', phi, dims=('K', 'D'))
예제 #24
0
    def updateEstParams_MAP(self, SS):
        ''' Update attribute EstParams for all comps given suff stats.

        Update uses the MAP objective for point estimation.

        Post Condition
        ---------
        Attributes K and EstParams updated in-place.
        '''
        self.ClearCache()
        if not hasattr(self, 'EstParams') or self.EstParams.K != SS.K:
            self.EstParams = ParamBag(K=SS.K, D=SS.D)
        phi_numer = SS.Count1 + self.Prior.lam1 - 1
        phi_denom = SS.Count1 + SS.Count0 + \
            self.Prior.lam1 + self.Prior.lam0 - 2
        phi = phi_numer / phi_denom
        self.EstParams.setField('phi', phi, dims=('K', 'D'))
예제 #25
0
def createParamBagForPrior(Data,
                           D=0,
                           nu=0,
                           beta=None,
                           m=None,
                           kappa=None,
                           MMat='zero',
                           ECovMat=None,
                           sF=1.0,
                           Prior=None,
                           **kwargs):
    ''' Initialize ParamBag of parameters which specify prior.

    Returns
    -------
    Prior : ParamBag
    '''
    if Data is None:
        D = int(D)
    else:
        D = int(Data.dim)
    nu = np.maximum(nu, D + 2)
    kappa = np.maximum(kappa, 1e-8)
    if beta is None:
        if ECovMat is None or isinstance(ECovMat, str):
            ECovMat = createECovMatFromUserInput(D, Data, ECovMat, sF)
        beta = np.diag(ECovMat) * (nu - 2)
    else:
        if beta.ndim == 0:
            beta = np.asarray([beta], dtype=np.float)
    if m is None:
        if MMat == 'data':
            m = np.sum(Data.X, axis=0)
        else:
            m = np.zeros(D)
    elif m.ndim < 1:
        m = np.asarray([m], dtype=np.float)
    if Prior is None:
        Prior = ParamBag(K=0, D=D)
    assert Prior.D == D
    Prior.setField('nu', nu, dims=None)
    Prior.setField('kappa', kappa, dims=None)
    Prior.setField('m', m, dims=('D'))
    Prior.setField('beta', beta, dims=('D'))
    return Prior
예제 #26
0
    def setPostFromEstParams(self,
                             EstParams,
                             Data=None,
                             nTotalTokens=1,
                             **kwargs):
        ''' Set attribute Post based on values in EstParams.
        '''
        K = EstParams.K
        D = EstParams.D

        WordCounts = EstParams.phi * nTotalTokens
        lam1 = WordCounts + self.Prior.lam1
        lam0 = (1 - WordCounts) + self.Prior.lam0

        self.Post = ParamBag(K=K, D=D)
        self.Post.setField('lam1', lam1, dims=('K', 'D'))
        self.Post.setField('lam0', lam0, dims=('K', 'D'))
        self.K = K
예제 #27
0
    def createPrior(self, Data, nu=0, B=None, ECovMat=None, sF=1.0, **kwargs):
        ''' Initialize Prior ParamBag attribute.

        Post Condition
        ------
        Prior expected covariance matrix set to match provided value.
        '''
        D = self.D
        nu = np.maximum(nu, D + 2)
        if B is None:
            if ECovMat is None or isinstance(ECovMat, str):
                ECovMat = createECovMatFromUserInput(D, Data, ECovMat, sF)
            B = ECovMat * (nu - D - 1)
        else:
            B = as2D(B)
        self.Prior = ParamBag(K=0, D=D)
        self.Prior.setField('nu', nu, dims=None)
        self.Prior.setField('B', B, dims=('D', 'D'))
예제 #28
0
    def updateEstParams_MaxLik(self, SS):
        ''' Update attribute EstParams for all comps given suff stats.

        Update uses the maximum likelihood objective for point estimation.

        Post Condition
        ---------
        Attributes K and EstParams updated in-place.
        '''
        self.ClearCache()
        if not hasattr(self, 'EstParams') or self.EstParams.K != SS.K:
            self.EstParams = ParamBag(K=SS.K, D=SS.D)

        minCovMat = self.min_covar * np.eye(SS.D)
        covMat = np.tile(minCovMat, (SS.K, 1, 1))
        for k in xrange(SS.K):
            covMat[k] += SS.xxT[k] / SS.N[k]
        self.EstParams.setField('Sigma', covMat, dims=('K', 'D', 'D'))
        self.K = SS.K
예제 #29
0
    def setPostFromEstParams(self, EstParams, Data=None, N=None):
        ''' Set attribute Post based on values in EstParams.
        '''
        K = EstParams.K
        D = EstParams.D
        if Data is not None:
            N = Data.nObsTotal
        N = np.asarray(N, dtype=np.float)
        if N.ndim == 0:
            N = float(N) / K * np.ones(K)

        nu = self.Prior.nu + N
        B = np.zeros((K, D, D))
        for k in xrange(K):
            B[k] = (nu[k] - D - 1) * EstParams.Sigma[k]
        self.Post = ParamBag(K=K, D=D)
        self.Post.setField('nu', nu, dims=('K'))
        self.Post.setField('B', B, dims=('K', 'D', 'D'))
        self.K = K
예제 #30
0
 def createPrior(self,
                 Data,
                 lam1=1.0,
                 lam0=1.0,
                 priorMean=None,
                 priorScale=None,
                 eps_phi=1e-8,
                 **kwargs):
     ''' Initialize Prior ParamBag attribute.
     '''
     D = self.D
     self.eps_phi = eps_phi
     self.Prior = ParamBag(K=0, D=D)
     if priorMean is None or priorMean.lower().count('none'):
         lam1 = np.asarray(lam1, dtype=np.float)
         lam0 = np.asarray(lam0, dtype=np.float)
     elif isinstance(priorMean, str) and priorMean.count("data"):
         assert priorScale is not None
         priorScale = float(priorScale)
         if hasattr(Data, 'word_id'):
             X = Data.getDocTypeBinaryMatrix()
             dataMean = np.mean(X, axis=0)
         else:
             dataMean = np.mean(Data.X, axis=0)
         dataMean = np.minimum(dataMean, 0.95)  # Make prior more smooth
         dataMean = np.maximum(dataMean, 0.05)
         lam1 = priorScale * dataMean
         lam0 = priorScale * (1 - dataMean)
     else:
         assert priorScale is not None
         priorScale = float(priorScale)
         priorMean = np.asarray(priorMean, dtype=np.float64)
         lam1 = priorScale * priorMean
         lam0 = priorScale * (1 - priorMean)
     if lam1.ndim == 0:
         lam1 = lam1 * np.ones(D)
     if lam0.ndim == 0:
         lam0 = lam0 * np.ones(D)
     assert lam1.size == D
     assert lam0.size == D
     self.Prior.setField('lam1', lam1, dims=('D', ))
     self.Prior.setField('lam0', lam0, dims=('D', ))