def test_correctness(X=None,
                     R=None,
                     nnzPerRow=2,
                     N=100,
                     K=3,
                     D=2,
                     funcList=None,
                     prefix='calcRXX_'):
    if funcList is None:
        funcList = make_funcList(prefix=prefix)
    kwargs = _make_kwarg_dict(X=X, R=R, nnzPerRow=nnzPerRow, N=N, K=K, D=D)
    for i in range(len(funcList)):
        for j in range(i + 1, len(funcList)):
            func_i = funcList[i]
            func_j = funcList[j]
            ans_i = func_i(**kwargs)
            ans_j = func_j(**kwargs)

            if prefix.count('calcRlogR') and kwargs['nnzPerRow'] == 1:
                # SPARSE routine gives scalar 0.0
                # but DENSE routine gives vector of all zeros
                ans_i = as1D(toCArray(ans_i))
                ans_j = as1D(toCArray(ans_j))
                if ans_i.size < K:
                    ans_j = np.sum(ans_j)
                elif ans_j.size < K:
                    ans_i = np.sum(ans_i)
            assert np.allclose(ans_i, ans_j)
    print('  all pairs of funcs give same answer')
Exemple #2
0
def eta2pi(eta_Km1):
    eta_Km1 = as1D(np.asarray(eta_Km1))
    pi_K = np.ones(eta_Km1.size + 1)
    pi_K[:-1] = np.exp(eta_Km1)
    pi_K[:-1] += 1e-100
    pi_K /= (1.0 + np.sum(pi_K[:-1]))
    return pi_K
Exemple #3
0
def pi2eta(pi_K):
    ''' Transform vector on simplex to unconstrained real vector

    Returns
    -------
    eta_Km1 : 1D array, size K-1

    Examples
    --------
    # py2 vs py3 round error without format str
    >>> print("%.5f" % float(pi2eta(eta2pi(0.42))))
    0.42000

    >>> print(float(pi2eta(eta2pi(-1.337))))
    -1.337

    >>> print(pi2eta(eta2pi([-1, 0, 1])))
    [-1.  0.  1.]
    '''
    pi_K = as1D(np.asarray(pi_K))
    eta_Km1 = pi_K[:-1] / pi_K[-1]
    np.log(eta_Km1, out=eta_Km1)
    return eta_Km1
Exemple #4
0
def calcSummaryStats(Data,
                     LP,
                     doPrecompEntropy=False,
                     doPrecompMergeEntropy=False,
                     mPairIDs=None,
                     mergePairSelection=None,
                     trackDocUsage=False,
                     **kwargs):
    """ Calculate sufficient statistics for global updates.

    Parameters
    -------
    Data : bnpy data object
    LP : local param dict with fields
        resp : Data.nObs x K array,
            where resp[n,k] = posterior resp of comp k
    doPrecompEntropy : boolean flag
        indicates whether to precompute ELBO terms in advance
        used for memoized learning algorithms (moVB)
    doPrecompMergeEntropy : boolean flag
        indicates whether to precompute ELBO terms in advance
        for certain merge candidates.

    Returns
    -------
    SS : SuffStatBag with K components
        Summarizes for this mixture model, with fields
        * N : 1D array, size K
            N[k] = expected number of items assigned to comp k

        Also has optional ELBO field when precompELBO is True
        * ElogqZ : 1D array, size K
            Vector of entropy contributions from each comp.
            ElogqZ[k] = \sum_{n=1}^N resp[n,k] log resp[n,k]

        Also has optional Merge field when precompMergeELBO is True
        * ElogqZ : 2D array, size K x K
            Each term is scalar entropy of merge candidate
    """
    if mPairIDs is not None and len(mPairIDs) > 0:
        M = len(mPairIDs)
    else:
        M = 0
    if 'resp' in LP:
        Nvec = np.sum(LP['resp'], axis=0)
        K = Nvec.size
    else:
        # Sparse assignment case
        Nvec = as1D(toCArray(LP['spR'].sum(axis=0)))
        K = LP['spR'].shape[1]

    if hasattr(Data, 'dim'):
        SS = SuffStatBag(K=K, D=Data.dim, M=M)
    else:
        SS = SuffStatBag(K=K, D=Data.vocab_size, M=M)
    SS.setField('N', Nvec, dims=('K'))
    if doPrecompEntropy:
        Mdict = calcELBO_NonlinearTerms(LP=LP, returnMemoizedDict=1)
        if type(Mdict['Hresp']) == float:
            # SPARSE HARD ASSIGNMENTS
            SS.setELBOTerm('Hresp', Mdict['Hresp'], dims=None)
        else:
            SS.setELBOTerm('Hresp', Mdict['Hresp'], dims=('K', ))

    if doPrecompMergeEntropy:
        m_Hresp = None
        if 'resp' in LP:
            m_Hresp = -1 * NumericUtil.calcRlogR_specificpairs(
                LP['resp'], mPairIDs)
        elif 'spR' in LP:
            if LP['nnzPerRow'] > 1:
                m_Hresp = calcSparseMergeRlogR(spR_csr=LP['spR'],
                                               nnzPerRow=LP['nnzPerRow'],
                                               mPairIDs=mPairIDs)
        else:
            raise ValueError("Need resp or spR in LP")
        if m_Hresp is not None:
            assert m_Hresp.size == len(mPairIDs)
            SS.setMergeTerm('Hresp', m_Hresp, dims=('M'))
    if trackDocUsage:
        Usage = np.sum(LP['resp'] > 0.01, axis=0)
        SS.setSelectionTerm('DocUsageCount', Usage, dims='K')

    return SS