def test_correctness(X=None, R=None, nnzPerRow=2, N=100, K=3, D=2, funcList=None, prefix='calcRXX_'): if funcList is None: funcList = make_funcList(prefix=prefix) kwargs = _make_kwarg_dict(X=X, R=R, nnzPerRow=nnzPerRow, N=N, K=K, D=D) for i in range(len(funcList)): for j in range(i + 1, len(funcList)): func_i = funcList[i] func_j = funcList[j] ans_i = func_i(**kwargs) ans_j = func_j(**kwargs) if prefix.count('calcRlogR') and kwargs['nnzPerRow'] == 1: # SPARSE routine gives scalar 0.0 # but DENSE routine gives vector of all zeros ans_i = as1D(toCArray(ans_i)) ans_j = as1D(toCArray(ans_j)) if ans_i.size < K: ans_j = np.sum(ans_j) elif ans_j.size < K: ans_i = np.sum(ans_i) assert np.allclose(ans_i, ans_j) print(' all pairs of funcs give same answer')
def eta2pi(eta_Km1): eta_Km1 = as1D(np.asarray(eta_Km1)) pi_K = np.ones(eta_Km1.size + 1) pi_K[:-1] = np.exp(eta_Km1) pi_K[:-1] += 1e-100 pi_K /= (1.0 + np.sum(pi_K[:-1])) return pi_K
def pi2eta(pi_K): ''' Transform vector on simplex to unconstrained real vector Returns ------- eta_Km1 : 1D array, size K-1 Examples -------- # py2 vs py3 round error without format str >>> print("%.5f" % float(pi2eta(eta2pi(0.42)))) 0.42000 >>> print(float(pi2eta(eta2pi(-1.337)))) -1.337 >>> print(pi2eta(eta2pi([-1, 0, 1]))) [-1. 0. 1.] ''' pi_K = as1D(np.asarray(pi_K)) eta_Km1 = pi_K[:-1] / pi_K[-1] np.log(eta_Km1, out=eta_Km1) return eta_Km1
def calcSummaryStats(Data, LP, doPrecompEntropy=False, doPrecompMergeEntropy=False, mPairIDs=None, mergePairSelection=None, trackDocUsage=False, **kwargs): """ Calculate sufficient statistics for global updates. Parameters ------- Data : bnpy data object LP : local param dict with fields resp : Data.nObs x K array, where resp[n,k] = posterior resp of comp k doPrecompEntropy : boolean flag indicates whether to precompute ELBO terms in advance used for memoized learning algorithms (moVB) doPrecompMergeEntropy : boolean flag indicates whether to precompute ELBO terms in advance for certain merge candidates. Returns ------- SS : SuffStatBag with K components Summarizes for this mixture model, with fields * N : 1D array, size K N[k] = expected number of items assigned to comp k Also has optional ELBO field when precompELBO is True * ElogqZ : 1D array, size K Vector of entropy contributions from each comp. ElogqZ[k] = \sum_{n=1}^N resp[n,k] log resp[n,k] Also has optional Merge field when precompMergeELBO is True * ElogqZ : 2D array, size K x K Each term is scalar entropy of merge candidate """ if mPairIDs is not None and len(mPairIDs) > 0: M = len(mPairIDs) else: M = 0 if 'resp' in LP: Nvec = np.sum(LP['resp'], axis=0) K = Nvec.size else: # Sparse assignment case Nvec = as1D(toCArray(LP['spR'].sum(axis=0))) K = LP['spR'].shape[1] if hasattr(Data, 'dim'): SS = SuffStatBag(K=K, D=Data.dim, M=M) else: SS = SuffStatBag(K=K, D=Data.vocab_size, M=M) SS.setField('N', Nvec, dims=('K')) if doPrecompEntropy: Mdict = calcELBO_NonlinearTerms(LP=LP, returnMemoizedDict=1) if type(Mdict['Hresp']) == float: # SPARSE HARD ASSIGNMENTS SS.setELBOTerm('Hresp', Mdict['Hresp'], dims=None) else: SS.setELBOTerm('Hresp', Mdict['Hresp'], dims=('K', )) if doPrecompMergeEntropy: m_Hresp = None if 'resp' in LP: m_Hresp = -1 * NumericUtil.calcRlogR_specificpairs( LP['resp'], mPairIDs) elif 'spR' in LP: if LP['nnzPerRow'] > 1: m_Hresp = calcSparseMergeRlogR(spR_csr=LP['spR'], nnzPerRow=LP['nnzPerRow'], mPairIDs=mPairIDs) else: raise ValueError("Need resp or spR in LP") if m_Hresp is not None: assert m_Hresp.size == len(mPairIDs) SS.setMergeTerm('Hresp', m_Hresp, dims=('M')) if trackDocUsage: Usage = np.sum(LP['resp'] > 0.01, axis=0) SS.setSelectionTerm('DocUsageCount', Usage, dims='K') return SS