def var_bound(data, modelState, queryState): ''' Determines the variational bounds. Values are mutated in place, but are reset afterwards to their initial values. So it's safe to call in a serial manner. ''' # Unpack the the structs, for ease of access and efficiency W, L, X = data.words, data.links, data.feats D,_ = W.shape outMeans, outVarcs, inMeans, inVarcs, inDocCov, docLens = queryState.outMeans, queryState.outVarcs, queryState.inMeans, queryState.inVarcs, queryState.inDocCov, queryState.docLens K, topicMean, topicCov, outDocCov, vocab, A, dtype = modelState.K, modelState.topicMean, modelState.topicCov, modelState.outDocCov, modelState.vocab, modelState.A, modelState.dtype # Calculate some implicit variables itopicCov = la.inv(topicCov) bound = 0 expMeansOut = np.exp(outMeans - outMeans.max(axis=1)[:, np.newaxis]) expMeansIn = np.exp(inMeans - inMeans.max(axis=0)[np.newaxis, :]) lse_at_k = expMeansIn.sum(axis=0) # Distribution over document topics bound -= (D*K)/2. * LN_OF_2_PI bound -= D/2. * safe_log_det(outDocCov * topicCov) diff = outMeans - topicMean[np.newaxis,:] bound -= 0.5 * np.sum (diff.dot(itopicCov) * diff * 1./outDocCov) bound -= (0.5 / outDocCov) * np.sum(outVarcs * np.diag(itopicCov)[np.newaxis,:]) # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only. # And its entropy bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.log(outVarcs).sum() # Distribution over document in-links inDocPre = np.reciprocal(inDocCov) bound -= (D*K)/2. * LN_OF_2_PI bound -= D/2. * safe_log_det(topicCov) bound -= K/2 * safe_log(inDocCov).sum() diff = inMeans - outMeans bound -= 0.5 * np.sum (diff.dot(itopicCov) * diff * inDocPre[:,np.newaxis]) bound -= 0.5 * np.sum((inVarcs * inDocPre[:,np.newaxis]) * np.diag(itopicCov)[np.newaxis,:]) # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only. # And its entropy bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.log(inVarcs).sum() # Distribution over topic assignments E[p(Z)] and E[p(Y)] W_weights = sparseScalarQuotientOfDot(W, expMeansOut, vocab) # D x V [W / TB] is the quotient of the original over the reconstructed doc-term matrix top_sums = expMeansOut * (W_weights.dot(vocab.T)) # D x K L_weights = sparseScalarQuotientOfNormedDot(L, expMeansOut, expMeansIn, lse_at_k) top_sums += expMeansOut * (L_weights.dot(expMeansIn) / lse_at_k[np.newaxis, :]) # E[p(Z,Y)] linkLens = np.squeeze(np.array(L.sum(axis=1))) bound += np.sum(outMeans * top_sums) bound -= np.sum((docLens + linkLens) * np.log(np.sum(expMeansOut, axis=1))) # H[Z] bound += ((W_weights.dot(vocab.T)) * expMeansOut * outMeans).sum() \ + ((W_weights.dot((np.log(vocab) * vocab).T)) * expMeansOut).sum() \ - np.trace(sparseScalarProductOfSafeLnDot(W_weights, expMeansOut, vocab).dot(vocab.T).dot(expMeansOut.T)) # H[Y] docVocab = (expMeansIn / lse_at_k[np.newaxis,:]).T.copy() bound += ((L_weights.dot(docVocab.T)) * expMeansOut * outMeans).sum() \ + ((L_weights.dot((np.log(docVocab) * docVocab).T)) * expMeansOut).sum() \ - np.trace(sparseScalarProductOfSafeLnDot(L_weights, expMeansOut, docVocab).dot(docVocab.T).dot(expMeansOut.T)) # E[p(W)] vlv = np.log(vocab) * vocab bound += np.trace(expMeansOut.T.dot(W_weights.dot(vlv.T))) # E[p(L) dld = np.log(docVocab) * docVocab bound += np.trace(expMeansOut.T.dot(L_weights.dot(dld.T))) return bound
def var_bound(data, modelState, queryState): ''' Determines the variational bounds. Values are mutated in place, but are reset afterwards to their initial values. So it's safe to call in a serial manner. ''' # Unpack the the structs, for ease of access and efficiency W, L, X = data.words, data.links, data.feats D,_ = W.shape means, varcs, docLens = queryState.means, queryState.varcs, queryState.docLens K, topicMean, topicCov, vocab, A = modelState.K, modelState.topicMean, modelState.topicCov, modelState.vocab, modelState.A # Calculate some implicit variables itopicCov = la.inv(topicCov) bound = 0 expMeansOut = np.exp(means - means.max(axis=1)[:, np.newaxis]) expMeansIn = np.exp(means - means.max(axis=0)[np.newaxis, :]) lse_at_k = expMeansIn.sum(axis=0) if USE_NIW_PRIOR: pseudoObsMeans = K + NIW_PSEUDO_OBS_MEAN pseudoObsVar = K + NIW_PSEUDO_OBS_VAR # distribution over topic covariance bound -= 0.5 * K * pseudoObsVar * log(NIW_PSI) bound -= 0.5 * K * pseudoObsVar * log(2) bound -= fns.multigammaln(pseudoObsVar / 2., K) bound -= 0.5 * (pseudoObsVar + K - 1) * safe_log_det(topicCov) bound += 0.5 * NIW_PSI * np.trace(itopicCov) # and its entropy # is a constant which we skip # distribution over means bound -= 0.5 * K * log(1./pseudoObsMeans) * safe_log_det(topicCov) bound -= 0.5 / pseudoObsMeans * (topicMean).T.dot(itopicCov).dot(topicMean) # and its entropy bound += 0.5 * safe_log_det(topicCov) # + a constant # Distribution over document topics bound -= (D*K)/2. * LN_OF_2_PI bound -= D/2. * la.det(topicCov) diff = means - topicMean[np.newaxis,:] bound -= 0.5 * np.sum (diff.dot(itopicCov) * diff) bound -= 0.5 * np.sum(varcs * np.diag(itopicCov)[np.newaxis,:]) # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only. # And its entropy # bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.sum(np.log(varcs)) # Distribution over word-topic assignments and words and the formers # entropy, and similaarly for out-links. This is somewhat jumbled to # avoid repeatedly taking the exp and log of the means W_weights = sparseScalarQuotientOfDot(W, expMeansOut, vocab) # D x V [W / TB] is the quotient of the original over the reconstructed doc-term matrix w_top_sums = expMeansOut * (W_weights.dot(vocab.T)) # D x K L_weights = sparseScalarQuotientOfNormedDot(L, expMeansOut, expMeansIn, lse_at_k) l_top_sums = L_weights.dot(expMeansIn) / lse_at_k[np.newaxis, :] * expMeansOut bound += np.sum(docLens * np.log(np.sum(expMeansOut, axis=1))) bound += np.sum(sparseScalarProductOfSafeLnDot(W, expMeansOut, vocab).data) # means = np.log(expMeans, out=expMeans) #means = safe_log(expMeansOut, out=means) bound += np.sum(means * w_top_sums) bound += np.sum(2 * ssp.diags(docLens,0) * means.dot(A) * means) bound -= 2. * scaledSelfSoftDot(means, docLens) bound -= 0.5 * np.sum(docLens[:,np.newaxis] * w_top_sums * (np.diag(A))[np.newaxis,:]) bound -= np.sum(means * w_top_sums) return bound