Ejemplo n.º 1
0
    def setPostFactors(self, obsModel=None, SS=None, LP=None, Data=None,
                       nu=0, B=0, m=0, kappa=0,
                       **kwargs):
        ''' Set attribute Post to provided values.
        '''
        self.ClearCache()
        if obsModel is not None:
            if hasattr(obsModel, 'Post'):
                self.Post = obsModel.Post.copy()
                self.K = self.Post.K
            else:
                self.setPostFromEstParams(obsModel.EstParams)
            return

        if LP is not None and Data is not None:
            SS = self.calcSummaryStats(Data, None, LP)

        if SS is not None:
            self.updatePost(SS)
        else:
            m = as2D(m)
            if m.shape[1] != self.D:
                m = m.T.copy()
            K, _ = m.shape
            self.Post = ParamBag(K=K, D=self.D)
            self.Post.setField('nu', as1D(nu), dims=('K'))
            self.Post.setField('B', B, dims=('K', 'D', 'D'))
            self.Post.setField('m', m, dims=('K', 'D'))
            self.Post.setField('kappa', as1D(kappa), dims=('K'))
        self.K = self.Post.K
Ejemplo n.º 2
0
def buildCostMatrix(zHat, zTrue):
    ''' Construct cost matrix for alignment of estimated and true sequences

    Args
    --------
    zHat : 1D array
        each entry is an integer label in {0, 1, ... Kest-1}
    zTrue : 1D array
        each entry is an integer label in {0, 1, ... Ktrue-1}
        with optional negative state labels

    Returns
    --------
    CostMatrix : 2D array, size Ktrue x Kest
        CostMatrix[j,k] = count of events across all timesteps,
        where j is assigned, but k is not.
    '''
    zHat = as1D(zHat)
    zTrue = as1D(zTrue)
    Ktrue = int(np.max(zTrue)) + 1
    Kest = int(np.max(zHat)) + 1
    K = np.maximum(Ktrue, Kest)
    CostMatrix = np.zeros((K, K))
    for ktrue in range(K):
        for kest in range(K):
            CostMatrix[ktrue, kest] = np.sum(
                np.logical_and(zTrue == ktrue, zHat != kest))
    return CostMatrix
Ejemplo n.º 3
0
def packParamBagForPost(nu=None,
                        beta=None,
                        m=None,
                        kappa=None,
                        D=None,
                        Post=None,
                        **kwargs):
    '''
    '''
    m = as2D(m)
    beta = as2D(beta)

    if D is None:
        D = m.shape[1]

    if m.shape[1] != D:
        m = m.T.copy()
    if beta.shape[1] != D:
        beta = beta.T.copy()
    K, _ = m.shape
    if Post is None:
        Post = ParamBag(K=K, D=D)
    else:
        assert isinstance(Post, ParamBag)
        assert Post.K == K
        assert Post.D == D
    Post.setField('nu', as1D(nu), dims=('K'))
    Post.setField('beta', beta, dims=('K', 'D'))
    Post.setField('m', m, dims=('K', 'D'))
    Post.setField('kappa', as1D(kappa), dims=('K'))
    return Post
def packParamBagForPost(pnu_K=None,
                        ptau_K=None,
                        w_KE=None,
                        P_KEE=None,
                        Post=None,
                        **kwargs):
    ''' Parse provided array args and pack into parameter bag

    Returns
    -------
    Post : ParamBag, with K clusters
    '''
    pnu_K = as1D(pnu_K)
    ptau_K = as1D(ptau_K)
    w_KE = as2D(w_KE)
    P_KEE = as3D(P_KEE)

    K = pnu_K.size
    E = w_KE.shape[1]
    if Post is None:
        Post = ParamBag(K=K, D=E - 1, E=E)
    elif not hasattr(Post, 'E'):
        Post.E = E
    assert Post.K == K
    assert Post.D == E - 1
    assert Post.E == E
    Post.setField('pnu_K', pnu_K, dims=('K'))
    Post.setField('ptau_K', ptau_K, dims=('K'))
    Post.setField('w_KE', w_KE, dims=('K', 'E'))
    Post.setField('P_KEE', P_KEE, dims=('K', 'E', 'E'))
    return Post
Ejemplo n.º 5
0
    def __init__(self, word_id=None, word_count=None, doc_range=None,
                 vocab_size=0, vocabList=None, vocabfile=None,
                 summary=None,
                 nDocTotal=None, TrueParams=None, **kwargs):
        ''' Constructor for BagOfWordsData object.

        Represents bag-of-words dataset via several 1D vectors,
        where each entry gives the id/count of a single token.

        Parameters
        -------
        word_id : 1D array, size U
            word_id[i] gives the int type for distinct word i.
            Range: [0, 1, 2, 3, ... vocab_size-1]
        word_count : 1D array, size U
            word_count[i] gives the number of times distinct word i
            appears in its corresponding document.
            Range: [1, 2, 3, ...]
        doc_range : 1D array, size nDoc+1
            Defines section of each corpus-wide vector which belongs
            to each document d:
            word_id_d = self.word_id[doc_range[d]:doc_range[d+1]]
            word_ct_d = self.word_count[doc_range[d]:doc_range[d+1]]
        vocab_size : int
            size of set of possible vocabulary words
        vocabList : list of strings
            specifies the word associated with each vocab type id
        nDoc : int
            number of groups (documents) represented in memory
            by this object.
        nDocTotal : int
            total size of the corpus
            will differ from nDoc when this dataset represents a
            small minibatch of some larger corpus.
        TrueParams : None [default], or dict of true parameters.
        '''
        self.word_id = as1D(toCArray(word_id, dtype=np.int32))
        self.word_count = as1D(toCArray(word_count, dtype=np.float64))
        self.doc_range = as1D(toCArray(doc_range, dtype=np.int32))
        self.vocab_size = int(vocab_size)

        if summary is not None:
            self.summary = summary

        # Store "true" parameters that generated toy-data, if provided
        if TrueParams is not None:
            self.TrueParams = TrueParams

        # Add dictionary of vocab words, if provided
        if vocabList is not None:
            self.vocabList = vocabList
        elif vocabfile is not None:
            with open(vocabfile, 'r') as f:
                self.vocabList = [x.strip() for x in f.readlines()]
        else:
            self.vocabList = None
        if vocab_size == 0 and self.vocabList is not None:
            self.vocab_size = len(self.vocabList)
        self._verify_attributes()
        self._set_corpus_size_attributes(nDocTotal)
Ejemplo n.º 6
0
    def __init__(self, X=None, doc_range=None, nDocTotal=None,
                 Xprev=None, TrueZ=None,
                 TrueParams=None, fileNames=None, summary=None, **kwargs):
        ''' Create an instance of GroupXData for provided array X

        Post Condition
        ---------
        self.X : 2D array, size N x D
            with standardized dtype, alignment, byteorder.
        self.Xprev : 2D array, size N x D
            with standardized dtype, alignment, byteorder.
        self.doc_range : 1D array, size nDoc+1
        '''
        self.X = as2D(toCArray(X, dtype=np.float64))
        self.doc_range = as1D(toCArray(doc_range, dtype=np.int32))
        if summary is not None:
            self.summary = summary
        if Xprev is not None:
            self.Xprev = as2D(toCArray(Xprev, dtype=np.float64))

        # Verify attributes are consistent
        self._set_dependent_params(doc_range, nDocTotal)
        self._check_dims()

        # Add optional true parameters / true hard labels
        if TrueParams is not None:
            self.TrueParams = dict()
            for key, arr in TrueParams.items():
                self.TrueParams[key] = toCArray(arr)

        if TrueZ is not None:
            if not hasattr(self, 'TrueParams'):
                self.TrueParams = dict()
            self.TrueParams['Z'] = as1D(toCArray(TrueZ))
            self.TrueParams['K'] = np.unique(self.TrueParams['Z']).size

        # Add optional source files for each group/sequence
        if fileNames is not None:
            if hasattr(fileNames, 'shape') and fileNames.shape == (1, 1):
                fileNames = fileNames[0, 0]
            if len(fileNames) > 1:
                self.fileNames = [str(x).strip()
                                  for x in np.squeeze(fileNames)]
            else:
                self.fileNames = [str(fileNames[0])]
        # Add extra data attributes custom for the dataset
        for key in kwargs:
            if hasattr(self, key):
                continue
            if not key.startswith("__"):
                arr = np.squeeze(as1D(kwargs[key]))
                if arr.shape == ():
                    try:
                        arr = float(arr)
                    except TypeError:
                        continue
                setattr(self, key, arr)
Ejemplo n.º 7
0
def calcHammingDistance(zTrue, zHat, excludeNegLabels=1, verbose=0,
                        **kwargs):
    ''' Compute Hamming distance: sum of all timesteps with different labels.

    Normalizes result to be within [0, 1].

    Args
    --------
    zHat : 1D array
        each entry is an integer label in {0, 1, ... Kest-1}
    zTrue : 1D array
        each entry is an integer label in {0, 1, ... Ktrue-1}

    Returns
    ------
    d : int
        Hamming distance from zTrue to zHat.

    Examples
    ------
    >>> calcHammingDistance([0, 0, 1, 1], [0, 0, 1, 1])
    0.0
    >>> calcHammingDistance([0, 0, 1, 1], [0, 0, 1, 2])
    0.25
    >>> calcHammingDistance([0, 0, 1, 1], [1, 1, 0, 0])
    1.0
    >>> calcHammingDistance([1, 1, 0, -1], [1, 1, 0, 0])
    0.0
    >>> calcHammingDistance([-1, -1, -2, 3], [1, 2, 3, 3])
    0.0
    >>> calcHammingDistance([-1, -1, 0, 1], [1, 2, 0, 1], excludeNegLabels=1)
    0.0
    >>> calcHammingDistance([-1, -1, 0, 1], [1, 2, 0, 1], excludeNegLabels=0)
    0.5
    '''
    zHat = as1D(zHat)
    zTrue = as1D(zTrue)
    if excludeNegLabels:
        assert np.sum(zHat < 0) == 0
        good_tstep_mask = zTrue >= 0
        nGood = np.sum(good_tstep_mask)
        if verbose and np.sum(good_tstep_mask) < zTrue.size:
            print('EXCLUDED %d/%d timesteps' % (np.sum(zTrue < 0), zTrue.size))
        dist = np.sum(zTrue[good_tstep_mask] != zHat[good_tstep_mask])
        dist = dist/float(nGood)
    else:
        dist = np.sum(zTrue != zHat) / float(zHat.size)
    return dist
Ejemplo n.º 8
0
def getPrefixForLapQuery(taskpath, lapQuery):
    ''' Search among checkpoint laps for one nearest to query.

    Returns
    --------
    prefix : str
        For lap 1, prefix = 'Lap0001.000'.
        For lap 5.5, prefix = 'Lap0005.500'.
    lap : int
        lap checkpoint for saved params close to lapQuery
    '''
    try:
        saveLaps = np.loadtxt(os.path.join(taskpath, 'snapshot_lap.txt'))
    except IOError:
        fileList = glob.glob(os.path.join(taskpath, 'Lap*Topic*'))
        if len(fileList) == 0:
            fileList = glob.glob(os.path.join(taskpath, 'Lap*.log_prob_w'))
        assert len(fileList) > 0
        saveLaps = list()
        for fpath in sorted(fileList):
            basename = fpath.split(os.path.sep)[-1]
            lapstr = basename[3:11]
            saveLaps.append(float(lapstr))
        saveLaps = np.sort(np.asarray(saveLaps))

    saveLaps = as1D(saveLaps)
    if lapQuery is None:
        bestLap = saveLaps[-1]  # take final saved value
    else:
        distances = np.abs(lapQuery - saveLaps)
        bestLap = saveLaps[np.argmin(distances)]
    return makePrefixForLap(bestLap), bestLap
Ejemplo n.º 9
0
def eta2pi(eta_d):
    eta_d = as1D(np.asarray(eta_d))
    pi_d = np.ones(eta_d.size+1)
    pi_d[:-1] = np.exp(eta_d)
    pi_d[:-1] += 1e-100
    pi_d /= (1.0 + np.sum(pi_d[:-1]))
    return pi_d
Ejemplo n.º 10
0
    def setPostFactors(self, obsModel=None, SS=None, LP=None, Data=None,
                       nu=0, B=0, M=0, V=0,
                       **kwargs):
        ''' Set Post attribute to provided values.
        '''
        self.ClearCache()
        if obsModel is not None:
            if hasattr(obsModel, 'Post'):
                self.Post = obsModel.Post.copy()
            else:
                self.setPostFromEstParams(obsModel.EstParams)
            self.K = self.Post.K
            return

        if LP is not None and Data is not None:
            SS = self.calcSummaryStats(Data, None, LP)

        if SS is not None:
            self.updatePost(SS)
        else:
            M = as3D(M)
            B = as3D(B)
            V = as3D(V)

            K, D, E = M.shape
            assert D == self.D
            assert E == self.E
            self.Post = ParamBag(K=K, D=self.D, E=self.E)
            self.Post.setField('nu', as1D(nu), dims=('K'))
            self.Post.setField('B', B, dims=('K', 'D', 'D'))
            self.Post.setField('M', M, dims=('K', 'D', 'E'))
            self.Post.setField('V', V, dims=('K', 'E', 'E'))
        self.K = self.Post.K
Ejemplo n.º 11
0
def generateRandomBinaryDataFromMixture(**kwargs):
    for key in Defaults:
        if key not in kwargs:
            kwargs[key] = Defaults[key]
    phi = makePhi(**kwargs)
    nObsTotal = kwargs['nObsTotal']

    PRNG = np.random.RandomState(kwargs['seed'])

    # Select number of observations from each cluster
    beta = 1.0 / K * np.ones(K)
    if nObsTotal < 2 * K:
        # force examples from every cluster
        nPerCluster = np.ceil(nObsTotal / K) * np.ones(K)
    else:
        nPerCluster = as1D(PRNG.multinomial(nObsTotal, beta, size=1))
    nPerCluster = np.int32(nPerCluster)

    # Generate data from each cluster!
    X = np.zeros((nObsTotal, D))
    Z = np.zeros(nObsTotal, dtype=np.int32)
    start = 0
    for k in xrange(K):
        stop = start + nPerCluster[k]
        X[start:stop] = np.float64(
            PRNG.rand(nPerCluster[k], D) < phi[k, :][np.newaxis, :])
        Z[start:stop] = k
        start = stop

    TrueParams = dict()
    TrueParams['beta'] = beta
    TrueParams['phi'] = phi
    TrueParams['Z'] = Z
    return XData(X, TrueParams=TrueParams)
Ejemplo n.º 12
0
def createParamBagForPrior(
        Data=None, D=0,
        pnu=0, ptau=None, w_E=0,
        P_EE=None, P_diag_E=None, P_diag_val=1.0,
        Prior=None,
        **kwargs):
    ''' Initialize Prior ParamBag attribute.

    Returns
    -------
    Prior : ParamBag
        with dimension attributes K, D, E
        with parameter attributes pnu, ptau, w_E, P_EE
    '''
    if Data is None:
        D = int(D)
    else:
        D = int(Data.dim)
    E = D + 1

    # Init parameters of 1D Wishart prior on delta
    pnu = np.maximum(pnu, 1e-9)
    ptau = np.maximum(ptau, 1e-9)

    # Initialize precision matrix of the weight vector
    if P_EE is not None:
        P_EE = np.asarray(P_EE)
    elif P_diag_E is not None:
        P_EE = np.diag(np.asarray(P_diag_E))
    else:
        P_EE = np.diag(P_diag_val * np.ones(E))
    assert P_EE.ndim == 2
    assert P_EE.shape == (E,E)

    # Initialize mean of the weight vector
    w_E = as1D(np.asarray(w_E))
    if w_E.size < E:
        w_E = np.tile(w_E, E)[:E]
    assert w_E.ndim == 1
    assert w_E.size == E

    if Prior is None:
        Prior = ParamBag(K=0, D=D, E=E)
    if not hasattr(Prior, 'E'):
        Prior.E = E
    assert Prior.D == D
    assert Prior.E == E
    Prior.setField('pnu', pnu, dims=None)
    Prior.setField('ptau', ptau, dims=None)
    Prior.setField('w_E', w_E, dims=('E'))
    Prior.setField('P_EE', P_EE, dims=('E', 'E'))

    Pw_E = np.dot(P_EE, w_E)
    wPw_1 = np.dot(w_E, Pw_E)
    Prior.setField('Pw_E', Pw_E, dims=('E'))
    Prior.setField('wPw_1', wPw_1, dims=None)
    return Prior
def calcSummaryStats(Data, SS, LP, **kwargs):
    ''' Calculate summary statistics for given dataset and local parameters

    Returns
    --------
    SS : SuffStatBag object, with K components.
    '''
    if not hasattr(Data, 'X_NE'):
        Data.X_NE = np.hstack([Data.X, np.ones(Data.nObs)[:, np.newaxis]])

    Y_N = Data.Y
    X_NE = Data.X_NE
    E = X_NE.shape[1]

    if 'resp' in LP:
        # Dense responsibility calculations
        resp = LP['resp']
        K = resp.shape[1]
        S_yy_K = dotATB(resp, np.square(Y_N)).flatten()
        S_yx_KE = dotATB(resp, Y_N * X_NE)

        # Expected outer product
        S_xxT_KEE = np.zeros((K, E, E))
        sqrtResp_k_N = np.sqrt(resp[:, 0])
        sqrtR_X_k_NE = sqrtResp_k_N[:, np.newaxis] * X_NE
        S_xxT_KEE[0] = dotATA(sqrtR_X_k_NE)
        for k in xrange(1, K):
            np.sqrt(resp[:, k], out=sqrtResp_k_N)
            np.multiply(sqrtResp_k_N[:, np.newaxis], X_NE, out=sqrtR_X_k_NE)
            S_xxT_KEE[k] = dotATA(sqrtR_X_k_NE)
    else:
        raise ValueError("TODO")
        spR = LP['spR']
        K = spR.shape[1]

    if SS is None:
        SS = SuffStatBag(K=K, D=Data.dim, E=E)
    elif not hasattr(SS, 'E'):
        SS._Fields.E = E
    SS.setField('xxT_KEE', S_xxT_KEE, dims=('K', 'E', 'E'))
    SS.setField('yx_KE', S_yx_KE, dims=('K', 'E'))
    SS.setField('yy_K', S_yy_K, dims=('K'))
    # Expected count for each k
    # Usually computed by allocmodel. But just in case...
    if not hasattr(SS, 'N'):
        if 'resp' in LP:
            SS.setField('N', LP['resp'].sum(axis=0), dims='K')
        else:
            SS.setField('N', as1D(toCArray(LP['spR'].sum(axis=0))), dims='K')

    #SS.setField("N_K", SS.N, dims="K")
    return SS
Ejemplo n.º 14
0
    def calcSmoothedMu(self, X, W=None):
        ''' Compute smoothed estimate of mean of statistic xxT.

        Args
        ----
        X : 2D array, size N x D

        Returns
        -------
        Mu_1 : 2D array, size D x D
            Expected value of Cov[ X[n] ]
        Mu_2 : 1D array, size D
            Expected value of Mean[ X[n] ]
        '''
        if X is None:
            Mu1 = self.Prior.B / self.Prior.nu
            Mu2 = self.Prior.m
            return Mu1, Mu2

        if X.ndim == 1:
            X = X[np.newaxis, :]
        N, D = X.shape
        # Compute suff stats
        if W is None:
            sum_wxxT = np.dot(X.T, X)
            sum_wx = np.sum(X, axis=0)
            sum_w = X.shape[0]
        else:
            W = as1D(W)
            sqrtWX = np.sqrt(W)[:, np.newaxis] * X
            sum_wxxT = np.dot(sqrtWX.T, sqrtWX)
            sum_wx = np.dot(W, X)
            sum_w = np.sum(W)

        kappa = self.Prior.kappa + sum_w
        m = (self.Prior.m * self.Prior.kappa + sum_wx) / kappa
        Mu_2 = m

        prior_kmmT = self.Prior.kappa * np.outer(self.Prior.m, self.Prior.m)
        post_kmmT = kappa * np.outer(m, m)
        B = sum_wxxT + self.Prior.B + prior_kmmT - post_kmmT
        Mu_1 = B / (self.Prior.nu + sum_w)

        assert Mu_1.ndim == 2
        assert Mu_1.shape == (
            D,
            D,
        )
        assert Mu_2.shape == (D, )
        return Mu_1, Mu_2
Ejemplo n.º 15
0
    def calcSmoothedMu(self, X, W=None):
        ''' Compute smoothed estimate of mean of statistic xxT.

        Args
        ----
        X : 2D array, size N x D

        Returns
        -------
        Mu_1 : 2D array, size D
            Expected value of Var[ X[n,d] ]
        Mu_2 : 1D array, size D
            Expected value of Mean[ X[n] ]
        '''
        if X is None:
            Mu1 = self.Prior.beta / self.Prior.nu
            Mu2 = self.Prior.m
            return Mu1, Mu2

        if X.ndim == 1:
            X = X[np.newaxis, :]
        N, D = X.shape
        # Compute suff stats
        if W is None:
            sum_wxx = np.sum(np.square(X), axis=0)
            sum_wx = np.sum(X, axis=0)
            sum_w = X.shape[0]
        else:
            W = as1D(W)
            sum_wxx = np.dot(W, np.square(X))
            sum_wx = np.dot(W, X)
            sum_w = np.sum(W)

        post_kappa = self.Prior.kappa + sum_w
        post_m = (self.Prior.m * self.Prior.kappa + sum_wx) / post_kappa
        Mu_2 = post_m

        prior_kmm = self.Prior.kappa * (self.Prior.m * self.Prior.m)
        post_kmm = post_kappa * (post_m * post_m)
        post_beta = sum_wxx + self.Prior.beta + prior_kmm - post_kmm
        Mu_1 = post_beta / (self.Prior.nu + sum_w)

        assert Mu_1.ndim == 1
        assert Mu_1.shape == (D, )
        assert Mu_2.shape == (D, )
        return Mu_1, Mu_2
Ejemplo n.º 16
0
def calcSummaryStats(Data, SS, LP, **kwargs):
    ''' Calculate summary statistics for given dataset and local parameters

    Returns
    --------
    SS : SuffStatBag object, with K components.
    '''
    X = Data.X
    D = Data.dim
    if 'resp' in LP:
        resp = LP['resp']
        K = resp.shape[1]
        # Compute expected outer-product statistic
        S_xxT = np.zeros((K, Data.dim, Data.dim))
        sqrtResp_k = np.sqrt(resp[:, 0])
        sqrtRX_k = sqrtResp_k[:, np.newaxis] * Data.X
        S_xxT[0] = dotATA(sqrtRX_k)
        for k in xrange(1, K):
            np.sqrt(resp[:, k], out=sqrtResp_k)
            np.multiply(sqrtResp_k[:, np.newaxis], Data.X, out=sqrtRX_k)
            S_xxT[k] = dotATA(sqrtRX_k)

        sqrtResp = np.sqrt(resp)
        xxT = np.zeros((K, D, D))
        for k in xrange(K):
            xxT[k] = dotATA(sqrtResp[:, k][:, np.newaxis] * Data.X)
        assert np.allclose(xxT, S_xxT)
    else:
        spR = LP['spR']
        K = spR.shape[1]
        # Compute expected outer-product statistic
        S_xxT = calcSpRXXT(X=X, spR_csr=spR)

    if SS is None:
        SS = SuffStatBag(K=K, D=D)
    # Expected outer-product for each state k
    SS.setField('xxT', S_xxT, dims=('K', 'D', 'D'))
    # Expected count for each k
    #  Usually computed by allocmodel. But sometimes not (eg TopicModel)
    if not hasattr(SS, 'N'):
        if 'resp' in LP:
            SS.setField('N', LP['resp'].sum(axis=0), dims='K')
        else:
            SS.setField('N', as1D(toCArray(LP['spR'].sum(axis=0))), dims='K')
    return SS
Ejemplo n.º 17
0
def calcSummaryStats(Data, SS, LP, DataAtomType='doc', **kwargs):
    ''' Calculate summary statistics for given dataset and local parameters

    Returns
    --------
    SS : SuffStatBag object, with K components.
    '''
    if 'resp' in LP:
        K = LP['resp'].shape[1]
    else:
        K = LP['spR'].shape[1]
        nnzPerRow = LP['nnzPerRow']
    if SS is None:
        SS = SuffStatBag(K=K, D=Data.vocab_size)
    if DataAtomType == 'doc':
        # X : 2D sparse matrix, size nDoc x vocab_size
        X = Data.getSparseDocTypeCountMatrix()
        # WordCounts : 2D array, size K x vocab_size
        # obtained by sparse matrix multiply
        # here, '*' operator does this because X is sparse matrix type
        Nvec = None
        if 'resp' in LP:
            WordCounts = LP['resp'].T * X
            if not hasattr(SS, 'N'):
                Nvec = LP['resp'].sum(axis=0)
        else:
            WordCounts = (LP['spR'].T * X).toarray()
            if not hasattr(SS, 'N'):
                Nvec = as1D(toCArray(LP['spR'].sum(axis=0)))
        if Nvec is not None:
            SS.setField('N', Nvec, dims=('K'))
    else:
        # 2D sparse matrix, size V x N
        X = Data.getSparseTokenTypeCountMatrix()
        if 'resp' in LP:
            WordCounts = (X * LP['resp']).T  # matrix-matrix product
        else:
            WordCounts = (X * LP['spR']).T.toarray()
    SS.setField('WordCounts', WordCounts, dims=('K', 'D'))
    SS.setField('SumWordCounts', np.sum(WordCounts, axis=1), dims=('K'))
    return SS
    """
Ejemplo n.º 18
0
def pi2eta(pi_d):
    ''' Transform vector on simplex to unconstrained real vector

    Returns
    -------
    eta : 1D array, size K-1

    Examples
    --------
    >>> print float(pi2eta(eta2pi(0.42)))
    0.42

    >>> print float(pi2eta(eta2pi(-1.337)))
    -1.337

    >>> print pi2eta(eta2pi([-1, 0, 1]))
    [-1.  0.  1.]
    '''
    pi_d = as1D(np.asarray(pi_d))
    eta_d = pi_d[:-1] / pi_d[-1]
    np.log(eta_d, out=eta_d)
    return eta_d
Ejemplo n.º 19
0
def calcSummaryStats(Data, SS, LP, **kwargs):
    ''' Calculate summary statistics for given dataset and local parameters

    Returns
    --------
    SS : SuffStatBag object, with K components.
    '''
    X = Data.X
    if 'resp' in LP:
        resp = LP['resp']
        K = resp.shape[1]
        # 1/2: Compute mean statistic
        S_x = dotATB(resp, X)
        # 2/2: Compute expected outer-product statistic
        S_xx = calcRXX_withDenseResp(resp, X)
    else:
        spR = LP['spR']
        K = spR.shape[1]
        # 1/2: Compute mean statistic
        S_x = spR.T * X
        # 2/2: Compute expected outer-product statistic
        S_xx = calcSpRXX(X=X, spR_csr=spR)
    if SS is None:
        SS = SuffStatBag(K=K, D=Data.dim)
    # Expected mean for each state k
    SS.setField('x', S_x, dims=('K', 'D'))
    # Expected sum-of-squares for each state k
    SS.setField('xx', S_xx, dims=('K', 'D'))
    # Expected count for each k
    #  Usually computed by allocmodel. But sometimes not (eg TopicModel)
    if not hasattr(SS, 'N'):
        if 'resp' in LP:
            SS.setField('N', LP['resp'].sum(axis=0), dims='K')
        else:
            SS.setField('N', as1D(toCArray(LP['spR'].sum(axis=0))), dims='K')
    return SS
Ejemplo n.º 20
0
    def calcSmoothedMu(self, X, W=None):
        ''' Compute smoothed estimate of mean of statistic xxT.

        Args
        ----
        X : 2D array, size N x D

        Returns
        -------
        Mu : 2D array, size D x D
        '''
        Prior_nu = self.Prior.nu - self.D - 1
        # Prior_nu = self.Prior.nu

        if X is None:
            Mu = self.Prior.B / (Prior_nu)
            return Mu
        if X.ndim == 1:
            X = X[np.newaxis, :]
        N, D = X.shape
        # Compute suff stats
        if W is None:
            sum_wxxT = np.dot(X.T, X)
            sum_w = X.shape[0]
        else:
            W = as1D(W)
            wX = np.sqrt(W)[:, np.newaxis] * X
            sum_wxxT = np.dot(wX.T, wX)
            sum_w = np.sum(W)
        Mu = (self.Prior.B + sum_wxxT) / (Prior_nu + sum_w)
        assert Mu.ndim == 2
        assert Mu.shape == (
            D,
            D,
        )
        return Mu
Ejemplo n.º 21
0
def plotGauss1DFromHModel(hmodel,
                          compListToPlot=None,
                          compsToHighlight=None,
                          activeCompIDs=None,
                          MaxKToDisplay=50,
                          proba_thr=0.0001,
                          ax_handle=None,
                          Colors=Colors,
                          dataset=None,
                          **kwargs):
    ''' Make line plot of pdf for each component (1D observations).
    '''
    if ax_handle is not None:
        pylab.sca(ax_handle)

    if compsToHighlight is not None:
        compsToHighlight = as1D(np.asarray(compsToHighlight))
    else:
        compsToHighlight = list()
    if compListToPlot is None:
        compListToPlot = np.arange(0, hmodel.obsModel.K)
    if activeCompIDs is None:
        activeCompIDs = np.arange(0, hmodel.obsModel.K)

    # Load appearance probabilities as single vector
    if hmodel.allocModel.K == hmodel.obsModel.K:
        w = hmodel.allocModel.get_active_comp_probs()
    else:
        w = np.ones(hmodel.obsModel.K)

    if dataset is not None:
        if hasattr(dataset, 'X'):
            pylab.hist(dataset.X[:, 0], 50, density=1)
            #Xtile = np.tile(Data.X[:, 0], (2, 1))
            #ys = 0.1 * np.arange(2)
            #pylab.plot(Xtile, ys, 'k-')

    nSkip = 0
    nGood = 0
    for ii, compID in enumerate(compListToPlot):
        if compID not in activeCompIDs:
            continue

        kk = np.flatnonzero(activeCompIDs == compID)
        assert kk.size == 1
        kk = kk[0]

        if w[kk] < proba_thr and compID not in compsToHighlight:
            nSkip += 1
            continue

        mu = hmodel.obsModel.get_mean_for_comp(kk)
        sigma2 = hmodel.obsModel.get_covar_mat_for_comp(kk)

        if len(compsToHighlight) == 0 or compID in compsToHighlight:
            color = Colors[ii % len(Colors)]
            plotGauss1D(mu, sigma2, color=color)
        elif kk not in compsToHighlight:
            plotGauss1D(mu, sigma2, color='k')

        nGood += 1
        if nGood >= MaxKToDisplay:
            print('DISPLAY LIMIT EXCEEDED. Showing %d/%d components' \
                % (nGood, len(activeCompIDs)))
            break
    if nSkip > 0:
        print('SKIPPED %d comps with size below %.2f' % (nSkip, proba_thr))
Ejemplo n.º 22
0
def loadTopicModel(matfilepath,
                   queryLap=None,
                   prefix=None,
                   returnWordCounts=0,
                   returnTPA=0,
                   normalizeTopics=0,
                   normalizeProbs=0,
                   **kwargs):
    ''' Load saved topic model

    Returns
    -------
    topics : 2D array, K x vocab_size (if returnTPA)
    probs : 1D array, size K (if returnTPA)
    alpha : scalar (if returnTPA)
    hmodel : HModel
    WordCounts : 2D array, size K x vocab_size (if returnWordCounts)
    '''
    if prefix is None:
        prefix, lapQuery = getPrefixForLapQuery(matfilepath, queryLap)
    # avoids circular import
    from bnpy.HModel import HModel
    if len(glob.glob(os.path.join(matfilepath, "*.log_prob_w"))) > 0:
        return loadTopicModelFromMEDLDA(matfilepath,
                                        prefix,
                                        returnTPA=returnTPA)

    snapshotList = glob.glob(os.path.join(matfilepath, 'Lap*TopicSnapshot'))
    matfileList = glob.glob(os.path.join(matfilepath, 'Lap*TopicModel.mat'))
    if len(snapshotList) > 0:
        if prefix is None:
            snapshotList.sort()
            snapshotPath = snapshotList[-1]
        else:
            snapshotPath = None
            for curPath in snapshotList:
                if curPath.count(prefix):
                    snapshotPath = curPath
        return loadTopicModelFromTxtFiles(snapshotPath,
                                          normalizeTopics=normalizeTopics,
                                          normalizeProbs=normalizeProbs,
                                          returnWordCounts=returnWordCounts,
                                          returnTPA=returnTPA)

    if prefix is not None:
        matfilepath = os.path.join(matfilepath, prefix + 'TopicModel.mat')
    Mdict = loadDictFromMatfile(matfilepath)
    if 'SparseWordCount_data' in Mdict:
        data = np.asarray(Mdict['SparseWordCount_data'], dtype=np.float64)
        K = int(Mdict['K'])
        vocab_size = int(Mdict['vocab_size'])
        try:
            indices = Mdict['SparseWordCount_indices']
            indptr = Mdict['SparseWordCount_indptr']
            WordCounts = scipy.sparse.csr_matrix((data, indices, indptr),
                                                 shape=(K, vocab_size))
        except KeyError:
            rowIDs = Mdict['SparseWordCount_i'] - 1
            colIDs = Mdict['SparseWordCount_j'] - 1
            WordCounts = scipy.sparse.csr_matrix((data, (rowIDs, colIDs)),
                                                 shape=(K, vocab_size))
        Mdict['WordCounts'] = WordCounts.toarray()
    if returnTPA:
        # Load topics : 2D array, K x vocab_size
        if 'WordCounts' in Mdict:
            topics = Mdict['WordCounts'] + Mdict['lam']
        else:
            topics = Mdict['topics']
        topics = as2D(toCArray(topics, dtype=np.float64))
        assert topics.ndim == 2
        K = topics.shape[0]
        if normalizeTopics:
            topics /= topics.sum(axis=1)[:, np.newaxis]

        # Load probs : 1D array, size K
        try:
            probs = Mdict['probs']
        except KeyError:
            probs = (1.0 / K) * np.ones(K)
        probs = as1D(toCArray(probs, dtype=np.float64))
        assert probs.ndim == 1
        assert probs.size == K
        if normalizeProbs:
            probs = probs / np.sum(probs)

        # Load alpha : scalar float > 0
        try:
            alpha = float(Mdict['alpha'])
        except KeyError:
            if 'alpha' in os.environ:
                alpha = float(os.environ['alpha'])
            else:
                raise ValueError('Unknown parameter alpha')
        if 'eta' in Mdict:
            return topics, probs, alpha, as1D(toCArray(Mdict['eta']))
        return topics, probs, alpha

    infAlg = 'VB'
    if 'gamma' in Mdict:
        aPriorDict = dict(alpha=Mdict['alpha'], gamma=Mdict['gamma'])
        HDPTopicModel = AllocModelConstructorsByName['HDPTopicModel']
        amodel = HDPTopicModel(infAlg, aPriorDict)
    else:
        FiniteTopicModel = AllocModelConstructorsByName['FiniteTopicModel']
        amodel = FiniteTopicModel(infAlg, dict(alpha=Mdict['alpha']))
    omodel = ObsModelConstructorsByName['Mult'](infAlg, **Mdict)
    hmodel = HModel(amodel, omodel)
    hmodel.set_global_params(**Mdict)
    if returnWordCounts:
        return hmodel, Mdict['WordCounts']
    return hmodel
Ejemplo n.º 23
0
 def from_dict(self, Dict):
     self.inferType = Dict['inferType']
     self.K = Dict['K']
     self.uhat = as1D(Dict['uhat'])
Ejemplo n.º 24
0
def alignEstimatedStateSeqToTruth(zHat, zTrue, useInfo=None, returnInfo=False):
    ''' Relabel the states in zHat to minimize the hamming-distance to zTrue

    Args
    --------
    zHat : 1D array
        each entry is an integer label in {0, 1, ... Kest-1}
    zTrue : 1D array
        each entry is an integer label in {0, 1, ... Ktrue-1}

    Returns
    --------
    zHatAligned : 1D array
        relabeled version of zHat that aligns to zTrue
    AInfo : dict
        information about the alignment
    '''
    zHat = as1D(zHat)
    zTrue = as1D(zTrue)
    Kest = zHat.max() + 1
    Ktrue = zTrue.max() + 1

    if useInfo is None:
        if not hasMunkres:
            raise ImportError(
                "alignEstimatedStateSeqToTruth requires the munkres package." +
                " Please install via 'pip install munkres'")

        CostMatrix = buildCostMatrix(zHat, zTrue)
        MunkresAlg = munkres.Munkres()
        tmpAlignedRowColPairs = MunkresAlg.compute(CostMatrix)
        AlignedRowColPairs = list()
        OrigToAlignedMap = dict()
        AlignedToOrigMap = dict()
        for (ktrue, kest) in tmpAlignedRowColPairs:
            if kest < Kest:
                AlignedRowColPairs.append((ktrue, kest))
                OrigToAlignedMap[kest] = ktrue
                AlignedToOrigMap[ktrue] = kest
    else:
        # Unpack existing alignment info
        AlignedRowColPairs = useInfo['AlignedRowColPairs']
        CostMatrix = useInfo['CostMatrix']
        AlignedToOrigMap = useInfo['AlignedToOrigMap']
        OrigToAlignedMap = useInfo['OrigToAlignedMap']
        Ktrue = useInfo['Ktrue']
        Kest = useInfo['Kest']

        assert np.allclose(Ktrue, zTrue.max() + 1)
        Khat = zHat.max() + 1

        # Account for extra states present in zHat
        # that have never been aligned before.
        # They should align to the next available UID in set
        # [Ktrue, Ktrue+1, Ktrue+2, ...]
        # so they don't get confused for a true label
        ktrueextra = np.max([r for r, c in AlignedRowColPairs])
        ktrueextra = int(np.maximum(ktrueextra + 1, Ktrue))
        for khat in np.arange(Kest, Khat + 1):
            if khat in OrigToAlignedMap:
                continue
            OrigToAlignedMap[khat] = ktrueextra
            AlignedToOrigMap[ktrueextra] = khat
            AlignedRowColPairs.append((ktrueextra, khat))
            ktrueextra += 1

    zHatA = -1 * np.ones_like(zHat)
    for kest in np.unique(zHat):
        mask = zHat == kest
        zHatA[mask] = OrigToAlignedMap[kest]
    assert np.all(zHatA >= 0)

    if returnInfo:
        return zHatA, dict(CostMatrix=CostMatrix,
                           AlignedRowColPairs=AlignedRowColPairs,
                           OrigToAlignedMap=OrigToAlignedMap,
                           AlignedToOrigMap=AlignedToOrigMap,
                           Ktrue=Ktrue,
                           Kest=Kest)
    else:
        return zHatA
Ejemplo n.º 25
0
def alignEstimatedStateSeqToTruth(zHat, zTrue, useInfo=None, returnInfo=False, standardize_order_of_extras=True):
    ''' Relabel the states in zHat to minimize the hamming-distance to zTrue

    Args
    --------
    zHat : 1D array
        each entry is an integer label in {0, 1, ... Kest-1}
    zTrue : 1D array
        each entry is an integer label in {0, 1, ... Ktrue-1}

    Returns
    --------
    zHatAligned : 1D array
        relabeled version of zHat that aligns to zTrue
    AInfo : dict
        information about the alignment
    '''
    zHat = as1D(zHat)
    zTrue = as1D(zTrue)
    Kest = zHat.max() + 1
    Ktrue = zTrue.max() + 1

    if useInfo is None:
        if not hasMunkres:
            raise ImportError(
                "alignEstimatedStateSeqToTruth requires the munkres package."
                + " Please install via 'pip install munkres'")

        CostMatrix = buildCostMatrix(zHat, zTrue)
        MunkresAlg = munkres.Munkres()
        tmpAlignedRowColPairs = MunkresAlg.compute(CostMatrix)
        AlignedRowColPairs = list()
        OrigToAlignedMap = dict()
        AlignedToOrigMap = dict()
        for (ktrue, kest) in tmpAlignedRowColPairs:
            if kest < Kest:
                AlignedRowColPairs.append((ktrue, kest))
                OrigToAlignedMap[kest] = ktrue
                AlignedToOrigMap[ktrue] = kest

        if Ktrue < Kest and np.unique(zTrue).size < np.unique(zHat).size and standardize_order_of_extras:
            # All extra states will have ids in Ktrue, Ktrue+1, Ktrue+2, ....
            # Break ties by assigning these in ascending order by first appearance
            extra_states = []
            for (kt, ke) in AlignedRowColPairs:
                if kt >= Ktrue and ke in zHat:
                    extra_states.append(ke)
            rank = dict()
            for x in sorted(extra_states):
                match_ids = np.flatnonzero(zHat == x)
                if match_ids.size == 0:
                    continue
                rank[x] = match_ids[0]

            ## Renumber by rank from smallest to largest
            old_ids, rank_vals = zip(*list(rank.items()))
            old_ids = np.asarray(old_ids)
            new_ids = Ktrue + np.argsort(np.asarray(rank_vals))
            old2new = dict(zip(old_ids, new_ids))
            OrigToAlignedMap = dict()
            AlignedToOrigMap = dict()
            for a, b in AlignedRowColPairs:
                newa = old2new.get(b, a)
                OrigToAlignedMap[b] = newa
                AlignedToOrigMap[newa] = b
            AlignedRowColPairs = list(AlignedToOrigMap.items())

    else:
        # Unpack existing alignment info
        AlignedRowColPairs = useInfo['AlignedRowColPairs']
        CostMatrix = useInfo['CostMatrix']
        AlignedToOrigMap = useInfo['AlignedToOrigMap']
        OrigToAlignedMap = useInfo['OrigToAlignedMap']
        Ktrue = useInfo['Ktrue']
        Kest = useInfo['Kest']

        assert Ktrue >= zTrue.max() + 1
        
        Khat = zHat.max() + 1

        # Account for extra states present in zHat
        # that have never been aligned before.
        # They should align to the next available UID in set
        # [Ktrue, Ktrue+1, Ktrue+2, ...]
        # so they don't get confused for a true label
        ktrueextra = np.max([r for r, c in AlignedRowColPairs])
        ktrueextra = int(np.maximum(ktrueextra + 1, Ktrue))
        for khat in np.arange(Kest, Khat + 1):
            if khat in OrigToAlignedMap:
                continue
            OrigToAlignedMap[khat] = ktrueextra
            AlignedToOrigMap[ktrueextra] = khat
            AlignedRowColPairs.append((ktrueextra, khat))
            ktrueextra += 1

    zHatA = -1 * np.ones_like(zHat)
    for kest in np.unique(zHat):
        mask = zHat == kest
        zHatA[mask] = OrigToAlignedMap[kest]
    assert np.all(zHatA >= 0)

    if returnInfo:
        return zHatA, dict(CostMatrix=CostMatrix,
                           AlignedRowColPairs=AlignedRowColPairs,
                           OrigToAlignedMap=OrigToAlignedMap,
                           AlignedToOrigMap=AlignedToOrigMap,
                           Ktrue=Ktrue,
                           Kest=Kest)
    else:
        return zHatA
Ejemplo n.º 26
0
def loadTopicModelFromTxtFiles(snapshotPath,
                               returnTPA=False,
                               returnWordCounts=False,
                               normalizeProbs=True,
                               normalizeTopics=True,
                               **kwargs):
    ''' Load from snapshot text files.

    Returns
    -------
    hmodel
    '''
    Mdict = dict()
    possibleKeys = [
        'K', 'probs', 'alpha', 'beta', 'lam', 'gamma', 'nTopics', 'nTypes',
        'vocab_size'
    ]
    keyMap = dict(beta='lam', nTopics='K', nTypes='vocab_size')
    for key in possibleKeys:
        try:
            arr = np.loadtxt(snapshotPath + "/%s.txt" % (key))
            if key in keyMap:
                Mdict[keyMap[key]] = arr
            else:
                Mdict[key] = arr
        except Exception:
            pass
    assert 'K' in Mdict
    assert 'lam' in Mdict
    K = int(Mdict['K'])
    V = int(Mdict['vocab_size'])

    if os.path.exists(snapshotPath + "/topics.txt"):
        Mdict['topics'] = np.loadtxt(snapshotPath + "/topics.txt")
        Mdict['topics'] = as2D(toCArray(Mdict['topics'], dtype=np.float64))
        assert Mdict['topics'].ndim == 2
        assert Mdict['topics'].shape == (K, V)
    else:
        TWC_data = np.loadtxt(snapshotPath + "/TopicWordCount_data.txt")
        TWC_inds = np.loadtxt(snapshotPath + "/TopicWordCount_indices.txt",
                              dtype=np.int32)
        if os.path.exists(snapshotPath + "/TopicWordCount_cscindptr.txt"):
            TWC_cscindptr = np.loadtxt(snapshotPath +
                                       "/TopicWordCount_cscindptr.txt",
                                       dtype=np.int32)
            TWC = scipy.sparse.csc_matrix((TWC_data, TWC_inds, TWC_cscindptr),
                                          shape=(K, V))
        else:
            TWC_csrindptr = np.loadtxt(snapshotPath +
                                       "/TopicWordCount_indptr.txt",
                                       dtype=np.int32)
            TWC = scipy.sparse.csr_matrix((TWC_data, TWC_inds, TWC_csrindptr),
                                          shape=(K, V))

        Mdict['WordCounts'] = TWC.toarray()

    if returnTPA:
        # Load topics : 2D array, K x vocab_size
        if 'WordCounts' in Mdict:
            topics = Mdict['WordCounts'] + Mdict['lam']
        else:
            topics = Mdict['topics']
        topics = as2D(toCArray(topics, dtype=np.float64))
        assert topics.ndim == 2
        K = topics.shape[0]
        if normalizeTopics:
            topics /= topics.sum(axis=1)[:, np.newaxis]

        # Load probs : 1D array, size K
        try:
            probs = Mdict['probs']
        except KeyError:
            probs = (1.0 / K) * np.ones(K)
        probs = as1D(toCArray(probs, dtype=np.float64))
        assert probs.ndim == 1
        assert probs.size == K
        if normalizeProbs:
            probs = probs / np.sum(probs)

        # Load alpha : scalar float > 0
        try:
            alpha = float(Mdict['alpha'])
        except KeyError:
            if 'alpha' in os.environ:
                alpha = float(os.environ['alpha'])
            else:
                raise ValueError('Unknown parameter alpha')
        return topics, probs, alpha

    # BUILD HMODEL FROM LOADED TXT
    infAlg = 'VB'
    # avoids circular import
    from bnpy.HModel import HModel
    if 'gamma' in Mdict:
        aPriorDict = dict(alpha=Mdict['alpha'], gamma=Mdict['gamma'])
        HDPTopicModel = AllocModelConstructorsByName['HDPTopicModel']
        amodel = HDPTopicModel(infAlg, aPriorDict)
    else:
        FiniteTopicModel = AllocModelConstructorsByName['FiniteTopicModel']
        amodel = FiniteTopicModel(infAlg, dict(alpha=Mdict['alpha']))
    omodel = ObsModelConstructorsByName['Mult'](infAlg, **Mdict)
    hmodel = HModel(amodel, omodel)
    hmodel.set_global_params(**Mdict)
    if returnWordCounts:
        return hmodel, Mdict['WordCounts']
    return hmodel
Ejemplo n.º 27
0
    def __init__(self,
                 X=None,
                 nObsTotal=None,
                 TrueZ=None,
                 Xprev=None,
                 Y=None,
                 TrueParams=None,
                 name=None,
                 summary=None,
                 dtype='auto',
                 row_names=None,
                 column_names=None,
                 y_column_names=None,
                 xprev_column_names=None,
                 do_copy=True,
                 **kwargs):
        ''' Constructor for XData instance given in-memory dense array X.

        Post Condition
        ---------
        self.X : 2D array, size N x D
            with standardized dtype, alignment, byteorder.
        '''
        if dtype == 'auto':
            dtype = X.dtype
        if not do_copy and X.dtype == dtype:
            self.X = as2D(X)
        else:
            self.X = as2D(toCArray(X, dtype=dtype))

        if Xprev is not None:
            self.Xprev = as2D(toCArray(Xprev, dtype=dtype))
        if Y is not None:
            self.Y = as2D(toCArray(Y, dtype=dtype))

        # Verify attributes are consistent
        self._set_dependent_params(nObsTotal=nObsTotal)
        self._check_dims(do_copy=do_copy)

        # Add optional true parameters / true hard labels
        if TrueParams is not None:
            self.TrueParams = TrueParams
        if TrueZ is not None:
            if not hasattr(self, 'TrueParams'):
                self.TrueParams = dict()
            self.TrueParams['Z'] = as1D(toCArray(TrueZ))
            self.TrueParams['K'] = np.unique(self.TrueParams['Z']).size
        if summary is not None:
            self.summary = summary
        if name is not None:
            self.name = str(name)

        # Add optional row names
        if row_names is None:
            self.row_names = map(str, range(self.nObs))
        else:
            assert len(row_names) == self.nObs
            self.row_names = map(str, row_names)

        # Add optional column names
        if column_names is None:
            self.column_names = map(lambda n: "dim_%d" % n, range(self.dim))
        else:
            assert len(column_names) == self.dim
            self.column_names = map(str, column_names)
Ejemplo n.º 28
0
if __name__ == '__main__':
    N = 7
    muG = np.asarray([0.02, 0.1, 0.5, 0.9, 0.98])
    muG = N * np.vstack([muG, 1.0-muG]).T.copy()
    print muG

    phiG = mu2phi(muG, N)
    print phiG
    print phi2mu(mu2phi(muG, N), N)

    print 'BREGMAN Dist Mat:'
    print bregmanDiv(muG, muG, N)

    for row in xrange(phiG.shape[0]):
        phi = as1D(phiG[row])
        mu = phi2mu(phi,N)

        print 'phi = %.3f' % (phi)
        print 'mu = ', mu
        print '----------'
        cdf = 0.0
        for xVec in generateAllXForFixedN(N):
            pdf = pdf_Phi(xVec, phi, N)
            cdf += pdf
            print '%.3f %.3f %.3f %s' % (
                cdf,
                pdf,
                pdf_Mu(xVec, mu, N),
                xVec)
            #print pdf_Phi(xVec, mu2phi(mu,N), N),
Ejemplo n.º 29
0
 def from_dict(self, Dict):
     self.inferType = Dict['inferType']
     self.K = Dict['K']
     self.rho = as1D(Dict['rho'])
     self.omega = as1D(Dict['omega'])
Ejemplo n.º 30
0
        numPi_d, numf, numInfo = estimatePi2(ids_d, cts_d, topics, alpha,
            scale=1.0, #/np.sum(cts_d),
            approx_grad=True)
        print("Numerical Pi[%d]:\n %s" % (d, pi2str(numPi_d)))

        estPi_d, f, Info = estimatePi2(ids_d, cts_d, topics, alpha,
            scale=1.0, #/np.sum(cts_d))
            approx_grad=False,
            )
        print("Estimated Pi[%d]:\n %s" % (d, pi2str(estPi_d)))

        
        PRNG = np.random.RandomState(d)
        nMatch = 0
        nRep = 10
        for rep in range(nRep):
            initPi_d = as1D(PRNG.dirichlet(K*np.ones(K), size=1))
            estPiFromRand, f2, I2 = estimatePi2(ids_d, cts_d, topics, alpha,
                scale=1.0, #/np.sum(cts_d),
                piInit=initPi_d,
                approx_grad=False)
            if np.allclose(estPi_d, estPiFromRand, rtol=0, atol=atol):
                nMatch += 1
            else:
                print("initrandom Pi[%d]:\n %s" % (
                    d, pi2str(estPiFromRand)))
                print(f)
                print(f2)
        print("%d/%d random inits within %s" % (nMatch, nRep, atol))