Beispiel #1
0
def var_bound(data, modelState, queryState):
    '''
    Determines the variational bounds. Values are mutated in place, but are
    reset afterwards to their initial values. So it's safe to call in a serial
    manner.
    '''
    
    # Unpack the the structs, for ease of access and efficiency
    W, L, X  = data.words, data.links, data.feats
    D,_ = W.shape
    outMeans, outVarcs, inMeans, inVarcs, inDocCov, docLens = queryState.outMeans, queryState.outVarcs, queryState.inMeans, queryState.inVarcs, queryState.inDocCov, queryState.docLens
    K, topicMean, topicCov, outDocCov, vocab, A, dtype = modelState.K, modelState.topicMean, modelState.topicCov, modelState.outDocCov, modelState.vocab, modelState.A, modelState.dtype

    # Calculate some implicit  variables
    itopicCov = la.inv(topicCov)
    
    bound = 0

    expMeansOut = np.exp(outMeans - outMeans.max(axis=1)[:, np.newaxis])
    expMeansIn  = np.exp(inMeans - inMeans.max(axis=0)[np.newaxis, :])
    lse_at_k    = expMeansIn.sum(axis=0)

    # Distribution over document topics
    bound -= (D*K)/2. * LN_OF_2_PI
    bound -= D/2. * safe_log_det(outDocCov * topicCov)
    diff   = outMeans - topicMean[np.newaxis,:]
    bound -= 0.5 * np.sum (diff.dot(itopicCov) * diff * 1./outDocCov)
    bound -= (0.5 / outDocCov) * np.sum(outVarcs * np.diag(itopicCov)[np.newaxis,:]) # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only.

    # And its entropy
    bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.log(outVarcs).sum()

    # Distribution over document in-links
    inDocPre = np.reciprocal(inDocCov)
    bound -= (D*K)/2. * LN_OF_2_PI
    bound -= D/2. * safe_log_det(topicCov)
    bound -= K/2 * safe_log(inDocCov).sum()
    diff   = inMeans - outMeans
    bound -= 0.5 * np.sum (diff.dot(itopicCov) * diff * inDocPre[:,np.newaxis])
    bound -= 0.5 * np.sum((inVarcs * inDocPre[:,np.newaxis]) * np.diag(itopicCov)[np.newaxis,:]) # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only.

    # And its entropy
    bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.log(inVarcs).sum()

    # Distribution over topic assignments E[p(Z)] and E[p(Y)]
    W_weights  = sparseScalarQuotientOfDot(W, expMeansOut, vocab)  # D x V   [W / TB] is the quotient of the original over the reconstructed doc-term matrix
    top_sums   = expMeansOut * (W_weights.dot(vocab.T)) # D x K

    L_weights  = sparseScalarQuotientOfNormedDot(L, expMeansOut, expMeansIn, lse_at_k)
    top_sums  += expMeansOut * (L_weights.dot(expMeansIn) / lse_at_k[np.newaxis, :])

    # E[p(Z,Y)]
    linkLens = np.squeeze(np.array(L.sum(axis=1)))
    bound += np.sum(outMeans * top_sums)
    bound -= np.sum((docLens + linkLens) * np.log(np.sum(expMeansOut, axis=1)))

    # H[Z]
    bound += ((W_weights.dot(vocab.T)) * expMeansOut * outMeans).sum() \
           + ((W_weights.dot((np.log(vocab) * vocab).T)) * expMeansOut).sum() \
           - np.trace(sparseScalarProductOfSafeLnDot(W_weights, expMeansOut, vocab).dot(vocab.T).dot(expMeansOut.T))

    # H[Y]
    docVocab = (expMeansIn / lse_at_k[np.newaxis,:]).T.copy()
    bound += ((L_weights.dot(docVocab.T)) * expMeansOut * outMeans).sum() \
           + ((L_weights.dot((np.log(docVocab) * docVocab).T)) * expMeansOut).sum() \
           - np.trace(sparseScalarProductOfSafeLnDot(L_weights, expMeansOut, docVocab).dot(docVocab.T).dot(expMeansOut.T))

    # E[p(W)]
    vlv = np.log(vocab) * vocab
    bound += np.trace(expMeansOut.T.dot(W_weights.dot(vlv.T)))

    # E[p(L)
    dld = np.log(docVocab) * docVocab
    bound += np.trace(expMeansOut.T.dot(L_weights.dot(dld.T)))

    return bound
Beispiel #2
0
def var_bound(data, modelState, queryState):
    '''
    Determines the variational bounds. Values are mutated in place, but are
    reset afterwards to their initial values. So it's safe to call in a serial
    manner.
    '''
    
    # Unpack the the structs, for ease of access and efficiency
    W, L, X  = data.words, data.links, data.feats
    D,_ = W.shape
    means, varcs, docLens = queryState.means, queryState.varcs, queryState.docLens
    K, topicMean, topicCov, vocab, A = modelState.K, modelState.topicMean, modelState.topicCov, modelState.vocab, modelState.A
    
    # Calculate some implicit  variables
    itopicCov = la.inv(topicCov)
    
    bound = 0

    expMeansOut = np.exp(means - means.max(axis=1)[:, np.newaxis])
    expMeansIn  = np.exp(means - means.max(axis=0)[np.newaxis, :])
    lse_at_k    = expMeansIn.sum(axis=0)
    
    if USE_NIW_PRIOR:
        pseudoObsMeans = K + NIW_PSEUDO_OBS_MEAN
        pseudoObsVar   = K + NIW_PSEUDO_OBS_VAR

        # distribution over topic covariance
        bound -= 0.5 * K * pseudoObsVar * log(NIW_PSI)
        bound -= 0.5 * K * pseudoObsVar * log(2)
        bound -= fns.multigammaln(pseudoObsVar / 2., K)
        bound -= 0.5 * (pseudoObsVar + K - 1) * safe_log_det(topicCov)
        bound += 0.5 * NIW_PSI * np.trace(itopicCov)

        # and its entropy
        # is a constant which we skip
        
        # distribution over means
        bound -= 0.5 * K * log(1./pseudoObsMeans) * safe_log_det(topicCov)
        bound -= 0.5 / pseudoObsMeans * (topicMean).T.dot(itopicCov).dot(topicMean)
        
        # and its entropy
        bound += 0.5 * safe_log_det(topicCov) # +  a constant
        
    
    # Distribution over document topics
    bound -= (D*K)/2. * LN_OF_2_PI
    bound -= D/2. * la.det(topicCov)
    diff   = means - topicMean[np.newaxis,:]
    bound -= 0.5 * np.sum (diff.dot(itopicCov) * diff)
    bound -= 0.5 * np.sum(varcs * np.diag(itopicCov)[np.newaxis,:]) # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only.
       
    # And its entropy
#     bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.sum(np.log(varcs)) 


    # Distribution over word-topic assignments and words and the formers
    # entropy, and similaarly for out-links. This is somewhat jumbled to
    # avoid repeatedly taking the exp and log of the means
    W_weights  = sparseScalarQuotientOfDot(W, expMeansOut, vocab)  # D x V   [W / TB] is the quotient of the original over the reconstructed doc-term matrix
    w_top_sums = expMeansOut * (W_weights.dot(vocab.T)) # D x K

    L_weights  = sparseScalarQuotientOfNormedDot(L, expMeansOut, expMeansIn, lse_at_k)
    l_top_sums = L_weights.dot(expMeansIn) / lse_at_k[np.newaxis, :] * expMeansOut
    
    bound += np.sum(docLens * np.log(np.sum(expMeansOut, axis=1)))
    bound += np.sum(sparseScalarProductOfSafeLnDot(W, expMeansOut, vocab).data)
    # means = np.log(expMeans, out=expMeans)
    #means = safe_log(expMeansOut, out=means)
    
    bound += np.sum(means * w_top_sums)
    bound += np.sum(2 * ssp.diags(docLens,0) * means.dot(A) * means)
    bound -= 2. * scaledSelfSoftDot(means, docLens)
    bound -= 0.5 * np.sum(docLens[:,np.newaxis] * w_top_sums * (np.diag(A))[np.newaxis,:])
    
    bound -= np.sum(means * w_top_sums)
    
    
    return bound