Ejemplo n.º 1
0
def log_likelihood(data, modelState, queryState):
    ''' 
    Return the log-likelihood of the given data W according to the model
    and the parameters inferred for the entries in W stored in the 
    queryState object.
    '''
    probs = rowwise_softmax(queryState.means)
    doc_dist = colwise_softmax(queryState.means)

    word_likely = np.sum( \
        sparseScalarProductOfSafeLnDot(\
            data.words, \
            probs, \
            modelState.vocab \
        ).data \
    )

    link_likely = np.sum( \
        sparseScalarProductOfSafeLnDot(\
            data.links, \
            probs, \
            doc_dist \
        ).data \
    )

    return word_likely + link_likely
Ejemplo n.º 2
0
def log_likelihood (data, modelState, queryState):
    '''
    Return the log-likelihood of the given data according to the model
    and the parameters inferred for datapoints in the query-state object

    Actually returns a vector of D document specific log likelihoods
    '''
    topicProbs = topicDists(queryState)
    wordLikely = sparseScalarProductOfSafeLnDot(data.words, topicProbs, wordDists(modelState)).sum()

    docProbs      = np.empty((modelState.K, data.doc_count), dtype=modelState.dtype)
    docProbs[:,:] = topicProbs.T
    linkLikely    = sparseScalarProductOfSafeLnDot(data.words, topicProbs, docProbs).sum()
    
    return wordLikely + linkLikely
Ejemplo n.º 3
0
def log_likelihood(modelState, X, W, queryState):
    '''
    Returns the log likelihood of the given features and words according to the
    given model.
    
    modelState - the model, provided by #train() - to use to evaluate the data
    X          - the DxF matrix of features
    W          - the DxT matrix of words
    
    Return:
        The marginal likelihood of the data
    '''
    if W.dtype.kind == 'i':      # for the sparseScalorProductOf() method to work
        W = W.astype(DTYPE)
    
    F, T, vocab = modelState.F, modelState.T, modelState.vocab
    assert X.shape[1] == F, "Model is trained to expect " + str(F) + " features but feature-matrix has " + str(X.shape[1]) + " features"
    assert W.shape[1] == T, "Model is trained to expect " + str(T) + " words, but word-matrix has " + str(W.shape[1]) + " words"
   
    expLmda  = queryState.expLmda;
    row_sums = expLmda.sum(axis=1)
    expLmda /= row_sums[:, np.newaxis] # converts it to a true distribution
    
    likely = np.sum (sparseScalarProductOfSafeLnDot(W, expLmda, vocab).data)
    
    # Revert expLmda to its original value as this is a ref to, not a copy of, the original matrix
    expLmda *= row_sums[:, np.newaxis]
    
    return likely
Ejemplo n.º 4
0
def var_bound(data, model, query, topicDistOverride=None):
    '''
    Determines the variational bounds.
    '''
    bound = 0

    # Unpack the the structs, for ease of access and efficiency
    docLens, topicMeans = \
        query.docLens, query.topicDists
    K, topicPrior, vocabPrior, wordDists, corpusTopicDist, dtype = \
        model.K, model.topicPrior, model.vocabPrior, model.wordDists, model.corpusTopicDist, model.dtype

    tops = topicDistOverride \
        if topicDistOverride is not None \
        else topicDists(query)

    # Initialize z matrix if necessary
    W = data.words
    D, T = W.shape

    wordLikely = sparseScalarProductOfSafeLnDot(data.words, tops, wordDists(model)).sum()
    topicLikely = topicMeans.dot(fns.digamma(corpusTopicDist) - fns.digamma(corpusTopicDist.sum()))


    # Expected joint
    like = W.dot(safe_log(wordDists).T) # D*K
    like += corpusTopicDist[np.newaxis,:]
    like *= safe_log(topicMeans)

    # Entropy
    ent = (-topicMeans * safe_log(topicMeans)).sum()

    return like.sum() + ent
Ejemplo n.º 5
0
def log_likelihood(data, modelState, queryState):
    '''
    Return the log-likelihood of the given data W according to the model
    and the parameters inferred for the entries in W stored in the
    queryState object.

    Actually returns a vector of D document specific log likelihoods
    '''
    return sparseScalarProductOfSafeLnDot(data.words, topicDists(queryState),
                                          wordDists(modelState)).sum()
Ejemplo n.º 6
0
def log_likelihood(data, model, query):
    '''
    Return the log-likelihood of the given data W according to the model
    and the parameters inferred for the entries in W stored in the
    queryState object.
    
    '''
    W = data.words if data.words.dtype is model.dtype else data.words.astype(
        model.dtype)
    return sparseScalarProductOfSafeLnDot(W, topicDists(query),
                                          wordDists(model)).sum()
Ejemplo n.º 7
0
def log_likelihood (data, modelState, queryState):
    ''' 
    Return the log-likelihood of the given data W according to the model
    and the parameters inferred for the entries in W stored in the 
    queryState object.
    '''
    return np.sum( \
        sparseScalarProductOfSafeLnDot(\
            data.words, \
            rowwise_softmax(queryState.means), \
            modelState.vocab \
        ).data \
    )
Ejemplo n.º 8
0
def log_likelihood_point(data, model, query, topicDistOverride=None):
    '''
    Return the log-likelihood of the given data according to the model
    and the parameters inferred for datapoints in the query-state object

    Actually returns a vector of D document specific log likelihoods
    '''
    tops = topicDistOverride \
        if topicDistOverride is not None \
        else topicDists(query)
    wordLikely = sparseScalarProductOfSafeLnDot(data.words, tops,
                                                wordDists(model)).sum()
    return wordLikely
Ejemplo n.º 9
0
def log_likelihood_point(data, modelState, queryState):
    '''
    Return the log-likelihood of the given data W according to the model
    and the parameters inferred for the entries in W stored in the
    queryState object.

    Actually returns a vector of D document specific log likelihoods
    '''
    n_dk, n_kt = queryState.n_dk, modelState.n_kt
    a, b = modelState.topicPrior, modelState.vocabPrior

    if type(a) is float or np.isscalar(a):
        a = constantArray((modelState.K, ), a, modelState.dtype)
    W = data.words if data.words.dtype is modelState.dtype \
        else data.words.astype(modelState.dtype)

    n_dk += a[np.newaxis, :]
    n_kt += b

    # Scale to create distributions over doc-topics and topic-vocabs
    doc_norm = n_dk.sum(axis=1)
    voc_norm = n_kt.sum(axis=1)

    n_dk /= doc_norm[:, np.newaxis]
    n_kt /= voc_norm[:, np.newaxis]

    # Use distributions to create log-likelihood. This could be made
    # faster still by not materializing the (admittedly sparse) matrix
    ln_likely = sparseScalarProductOfSafeLnDot(W, n_dk, n_kt).sum()

    # Rescale back to word-counts
    n_dk *= doc_norm[:, np.newaxis]
    n_kt *= voc_norm[:, np.newaxis]

    n_dk -= a[np.newaxis, :]
    n_kt -= b

    return ln_likely
Ejemplo n.º 10
0
def log_likelihood(data, modelState, queryState):
    '''
    Return the log-likelihood of the given data W and X according to the model
    and the parameters inferred for the entries in W and X stored in the
    queryState object.

    Actually returns a vector of D document specific log likelihoods
    '''
    wordLikely = sparseScalarProductOfSafeLnDot(data.words,
                                                topicDists(queryState),
                                                wordDists(modelState)).sum()

    # For likelihood it's a bit tricky. In theory, given d =/= p, and letting
    # c_d = 1/n_d, where n_d is the word count of document d, it's
    #
    #   ln p(y_dp|weights) = E[\sum_k weights[k] * (c_d \sum_n z_dnk) * (c_p \sum_n z_pnk)]
    #                      = \sum_k weights[k] * c_d * E[\sum_n z_dnk] * c_p * E[\sum_n z_pnk]
    #                      = \sum_k weights[k] * topicDistsMean[d,k] * topicDistsMean[p,k]
    #
    #
    # where topicDistsMean[d,k] is the mean of the k-th element of the Dirichlet parameterised
    # by topicDist[d,:]
    #
    # However in the related paper on Supervised LDA, which uses this trick of average z_dnk,
    # they explicitly say that in the likelihood calculation they use the expectation
    # according to the _variational_ approximate posterior distribution q(z_dn) instead of the
    # actual distribution p(z_dn|topicDist), and thus
    #
    # E[\sum_n z_dnk] = \sum_n E_q[z_dnk]
    #
    # There's no detail of the likelihood in either of the RTM papers, so we use the
    # variational approch

    linkLikely = 0

    return wordLikely + linkLikely
Ejemplo n.º 11
0
def var_bound(data, modelState, queryState):
    '''
    Determines the variational bounds. Values are mutated in place, but are
    reset afterwards to their initial values. So it's safe to call in a serial
    manner.
    '''
    
    # Unpack the the structs, for ease of access and efficiency
    W   = data.words
    D,_ = W.shape
    means, expMeans, varcs, docLens = queryState.means, queryState.expMeans, queryState.varcs, queryState.docLens
    K, topicMean, sigT, vocab, vocabPrior, A = modelState.K, modelState.topicMean, modelState.sigT, modelState.vocab, modelState.vocabPrior, modelState.A
    
    # Calculate some implicit  variables
    isigT = la.inv(sigT)
    
    bound = 0
    
    if USE_NIW_PRIOR:
        pseudoObsMeans = K + NIW_PSEUDO_OBS_MEAN
        pseudoObsVar   = K + NIW_PSEUDO_OBS_VAR

        # distribution over topic covariance
        bound -= 0.5 * K * pseudoObsVar * log(NIW_PSI)
        bound -= 0.5 * K * pseudoObsVar * log(2)
        bound -= fns.multigammaln(pseudoObsVar / 2., K)
        bound -= 0.5 * (pseudoObsVar + K - 1) * safe_log_det(sigT)
        bound += 0.5 * NIW_PSI * np.trace(isigT)

        # and its entropy
        # is a constant which we skip
        
        # distribution over means
        bound -= 0.5 * K * log(1./pseudoObsMeans) * safe_log_det(sigT)
        bound -= 0.5 / pseudoObsMeans * (topicMean).T.dot(isigT).dot(topicMean)
        
        # and its entropy
        bound += 0.5 * safe_log_det(sigT) # +  a constant
        
    
    # Distribution over document topics
    bound -= (D*K)/2. * LN_OF_2_PI
    bound -= D/2. * la.det(sigT)
    diff   = means - topicMean[np.newaxis,:]
    bound -= 0.5 * np.sum (diff.dot(isigT) * diff)
    bound -= 0.5 * np.sum(varcs * np.diag(isigT)[np.newaxis,:]) # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only.
       
    # And its entropy
#     bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.sum(np.log(varcs)) 
    
    # Distribution over word-topic assignments and words and the formers
    # entropy. This is somewhat jumbled to avoid repeatedly taking the
    # exp and log of the means
    expMeans = np.exp(means - means.max(axis=1)[:,np.newaxis], out=expMeans)
    R = sparseScalarQuotientOfDot(W, expMeans, vocab)  # D x V   [W / TB] is the quotient of the original over the reconstructed doc-term matrix
    V = expMeans * (R.dot(vocab.T)) # D x K
    
    bound += np.sum(docLens * np.log(np.sum(expMeans, axis=1)))
    bound += np.sum(sparseScalarProductOfSafeLnDot(W, expMeans, vocab).data)
    
    bound += np.sum(means * V)
    bound += np.sum(2 * ssp.diags(docLens,0) * means.dot(A) * means)
    bound -= 2. * scaledSelfSoftDot(means, docLens)
    bound -= 0.5 * np.sum(docLens[:,np.newaxis] * V * (np.diag(A))[np.newaxis,:])
    
    bound -= np.sum(means * V) 
    
    
    return bound
Ejemplo n.º 12
0
def var_bound(data, modelState, queryState):
    '''
    Determines the variational bounds. Values are mutated in place, but are
    reset afterwards to their initial values. So it's safe to call in repeatedly.
    '''

    # Unpack the the structs, for ease of access and efficiency
    W, X = data.words, data.feats
    D, T, F = W.shape[0], W.shape[1], X.shape[1]
    means, docLens = queryState.means, queryState.docLens
    K, A, U, Y, V, covA, tv, ltv, fv, lfv, vocab, vocabPrior, dtype = \
        modelState.K, modelState.A, modelState.U, modelState.Y,  modelState.V, modelState.covA, modelState.tv, modelState.ltv, modelState.fv, modelState.lfv, modelState.vocab, modelState.vocabPrior, modelState.dtype

    H = 0.5 * (np.eye(K) - np.ones((K, K), dtype=dtype) / K)
    Log2Pi = log(2 * pi)

    bound = 0

    # U and V are parameters with no distribution

    #
    # Y has a normal distribution, it's covariance is unfortunately an expensive computation
    #
    P, Q = U.shape[1], V.shape[1]
    covY = np.eye(P * Q) * (lfv * ltv)
    covY += np.kron(V.T.dot(V), U.T.dot(U))
    covY = la.inv(covY, overwrite_a=True)

    # The expected likelihood of Y
    bound -= 0.5 * P * Q * Log2Pi
    bound -= 0.5 * P * Q * log(ltv * lfv)
    bound -= 0.5 / (lfv * ltv) * np.sum(
        Y * Y)  # 5x faster than np.trace(Y.dot(Y.T))
    bound -= 0.5 * np.trace(covY) * (lfv * ltv)
    # the traces of the posterior+prior covariance products cancel out across likelihoods

    # The entropy of Y
    bound += 0.5 * P * Q * (Log2Pi + 1) + 0.5 * safe_log_det(covY)

    #
    # A has a normal distribution/
    #
    F, K = A.shape[0], A.shape[1]
    diff = A - U.dot(Y).dot(V.T)
    diff *= diff

    # The expected likelihood of A
    bound -= 0.5 * K * F * Log2Pi
    bound -= 0.5 * K * F * log(tv * fv)
    bound -= 0.5 / (fv * tv) * np.sum(diff)

    # The entropy of A
    bound += 0.5 * F * K * (Log2Pi + 1) + 0.5 * K * safe_log_det(covA)

    #
    # Theta, the matrix of means, has a normal distribution. Its row-covarince is diagonal
    # (i.e. it's several independent multi-var normal distros). The posterior is made
    # up of D K-dimensional normals with diagonal covariances
    #
    # We iterate through the topics in batches, to control memory use
    batchSize = min(BatchSize, D)
    batchCount = ceil(D / batchSize)
    feats = np.ndarray(shape=(batchSize, F), dtype=dtype)
    tops = np.ndarray(shape=(batchSize, K), dtype=dtype)
    trace = 0
    for b in range(0, batchCount):
        start = b * batchSize
        end = min(start + batchSize, D)
        batchSize = min(batchSize, end - start)

        feats[:batchSize, :] = X[start:end, :].toarray()
        np.dot(feats[:batchSize, :], A, out=tops[:batchSize, :])
        tops[:batchSize, :] -= means[start:end, :]
        tops[:batchSize, :] *= tops[:batchSize, :]
        trace += np.sum(tops[:batchSize, :])
    feats = None

    # The expected likelihood of the topic-assignments
    bound -= 0.5 * D * K * Log2Pi
    bound -= 0.5 * D * K * log(tv)
    bound -= 0.5 / tv * trace

    bound -= 0.5 * tv * np.sum(covA)  # this trace doesn't cancel as we
    # don't have a posterior on tv
    # The entropy of the topic-assignments
    bound += 0.5 * D * K * (Log2Pi + 1) + 0.5 * np.sum(covA)

    # Distribution over word-topic assignments and words and the formers
    # entropy. This is somewhat jumbled to avoid repeatedly taking the
    # exp and log of the means
    # Again we batch this for safety
    batchSize = min(BatchSize, D)
    batchCount = ceil(D / batchSize)
    V = np.ndarray(shape=(batchSize, K), dtype=dtype)
    for b in range(0, batchCount):
        start = b * batchSize
        end = min(start + batchSize, D)
        batchSize = min(batchSize, end - start)

        meansBatch = means[start:end, :]
        docLensBatch = docLens[start:end]

        np.exp(meansBatch - meansBatch.max(axis=1)[:, np.newaxis],
               out=tops[:batchSize, :])
        expMeansBatch = tops[:batchSize, :]
        R = sparseScalarQuotientOfDot(
            W, expMeansBatch, vocab, start=start, end=end
        )  # BatchSize x V:   [W / TB] is the quotient of the original over the reconstructed doc-term matrix
        V[:batchSize, :] = expMeansBatch * (R[:batchSize, :].dot(vocab.T)
                                            )  # BatchSize x K
        VBatch = V[:batchSize, :]

        bound += np.sum(docLensBatch * np.log(np.sum(expMeansBatch, axis=1)))
        bound += np.sum(
            sparseScalarProductOfSafeLnDot(W,
                                           expMeansBatch,
                                           vocab,
                                           start=start,
                                           end=end).data)

        bound += np.sum(meansBatch * VBatch)
        bound += np.sum(2 * ssp.diags(docLensBatch, 0) * meansBatch.dot(H) *
                        meansBatch)
        bound -= 2. * scaledSelfSoftDot(meansBatch, docLensBatch)
        bound -= 0.5 * np.sum(docLensBatch[:, np.newaxis] * VBatch *
                              (np.diag(H))[np.newaxis, :])

        bound -= np.sum(meansBatch * VBatch)

    return bound
Ejemplo n.º 13
0
def var_bound(data, modelState, queryState, XTX = None):
    '''
    Determines the variational bounds. Values are mutated in place, but are
    reset afterwards to their initial values. So it's safe to call in a serial
    manner.
    '''
    # Unpack the the structs, for ease of access and efficiency
    W, X = data.words, data.feats
    D, _ = W.shape
    means, varcs, lxi, s, docLens = queryState.means, queryState.varcs, queryState.lxi, queryState.s, queryState.docLens
    F, P, K, A, R_A, fv, Y, R_Y, lfv, V, sigT, vocab, dtype = modelState.F, modelState.P, modelState.K, modelState.A, modelState.R_A, modelState.fv, modelState.Y, modelState.R_Y, modelState.lfv, modelState.V, modelState.sigT, modelState.vocab, modelState.dtype
    
    # Calculate some implicit  variables
    xi = ctm._deriveXi(means, varcs, s)
    isigT = la.inv(sigT)
    #lnDetSigT = np.log(la.det(sigT))
    lnDetSigT = lnDetOfDiagMat(sigT)
    verifyProper(lnDetSigT, "lnDetSigT")
    
    if XTX is None:
        XTX = X.T.dot(X)
    
    bound = 0
    
    # Distribution over latent space
    bound -= (P*K)/2. * LN_OF_2_PI
    bound -= P * lnDetSigT
    bound -= K * P * log(lfv)
    bound -= 0.5 * np.sum(1./lfv * isigT.dot(Y) * Y)
    bound -= 0.5 * K * np.trace(R_Y)
    
    # And its entropy
    detR_Y = safeDet(R_Y, "R_Y")
    bound += 0.5 * LN_OF_2_PI_E + P/2. * lnDetSigT + K/2. * log(detR_Y)
    
    # Distribution over mapping from features to topics
    diff   = (A - Y.dot(V))
    bound -= (F*K)/2. * LN_OF_2_PI
    bound -= F * lnDetSigT
    bound -= K * P * log(fv)
    bound -= 0.5 * np.sum (1./lfv * isigT.dot(diff) * diff)
    bound -= 0.5 * K * np.trace(R_A)
    
    # And its entropy
    detR_A = safeDet(R_A, "R_A")
    bound += 0.5 * LN_OF_2_PI_E + F/2. * lnDetSigT + K/2. * log(detR_A)
    
    # Distribution over document topics
    bound -= (D*K)/2. * LN_OF_2_PI
    bound -= D/2. * lnDetSigT
    diff   = means - X.dot(A.T)
    bound -= 0.5 * np.sum (diff.dot(isigT) * diff)
    bound -= 0.5 * np.sum(varcs * np.diag(isigT)[np.newaxis,:]) # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only.
    bound -= 0.5 * K * np.trace(XTX.dot(R_A))
       
    # And its entropy
    bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.sum(np.log(varcs)) 
    
    # Distribution over word-topic assignments
    # This also takes into account all the variables that 
    # constitute the bound on log(sum_j exp(mean_j)) and
    # also incorporates the implicit entropy of Z_dvk
    bound -= np.sum((means*means + varcs) * docLens[:,np.newaxis] * lxi)
    bound += np.sum(means * 2 * docLens[:,np.newaxis] * s[:,np.newaxis] * lxi)
    bound += np.sum(means * -0.5 * docLens[:,np.newaxis])
    # The last term of line 1 gets cancelled out by part of the first term in line 2
    # so neither are included here.
    
    row_maxes = means.max(axis=1)
    means -= row_maxes[:,np.newaxis]
    expMeans = np.exp(means, out=means)
    bound -= -np.sum(sparseScalarProductOfSafeLnDot(W, expMeans, vocab).data)
    
    bound -= np.sum(docLens[:,np.newaxis] * lxi * ((s*s)[:,np.newaxis] - (xi * xi)))
    bound += np.sum(0.5 * docLens[:,np.newaxis] * (s[:,np.newaxis] + xi))
#    bound -= np.sum(docLens[:,np.newaxis] * safe_log_one_plus_exp_of(xi))
    bound -= scaledSumOfLnOnePlusExp(docLens, xi)
    
    bound -= np.dot(s, docLens)
    
    means = np.log(expMeans, out=expMeans)
    means += row_maxes[:,np.newaxis]
    
    return bound
Ejemplo n.º 14
0
def var_bound(data, modelState, queryState, XTX=None):
    '''
    Determines the variational bounds. Values are mutated in place, but are
    reset afterwards to their initial values. So it's safe to call in a serial
    manner.
    '''

    # Unpack the the structs, for ease of access and efficiency
    W, X = data.words, data.feats
    D, _ = W.shape
    means, expMeans, varcs, docLens = queryState.means, queryState.expMeans, queryState.varcs, queryState.docLens
    F, P, K, A, R_A, fv, Y, R_Y, lfv, V, sigT, vocab, Ab, dtype = modelState.F, modelState.P, modelState.K, modelState.A, modelState.R_A, modelState.fv, modelState.Y, modelState.R_Y, modelState.lfv, modelState.V, modelState.sigT, modelState.vocab, modelState.Ab, modelState.dtype

    # Calculate some implicit  variables
    isigT = la.inv(sigT)
    lnDetSigT = lnDetOfDiagMat(sigT)
    verifyProper(lnDetSigT, "lnDetSigT")

    if XTX is None:
        XTX = X.T.dot(X)

    bound = 0

    # Distribution over latent space
    bound -= (P * K) / 2. * LN_OF_2_PI
    bound -= P * lnDetSigT
    bound -= K * P * log(lfv)
    bound -= 0.5 * np.sum(1. / lfv * isigT.dot(Y) * Y)
    bound -= 0.5 * K * np.trace(R_Y)

    # And its entropy
    detR_Y = safeDet(R_Y, "R_Y")
    bound += 0.5 * LN_OF_2_PI_E + P / 2. * lnDetSigT + K / 2. * log(detR_Y)

    # Distribution over mapping from features to topics
    diff = (A - Y.dot(V))
    bound -= (F * K) / 2. * LN_OF_2_PI
    bound -= F * lnDetSigT
    bound -= K * P * log(fv)
    bound -= 0.5 * np.sum(1. / lfv * isigT.dot(diff) * diff)
    bound -= 0.5 * K * np.trace(R_A)

    # And its entropy
    detR_A = safeDet(R_A, "R_A")
    bound += 0.5 * LN_OF_2_PI_E + F / 2. * lnDetSigT + K / 2. * log(detR_A)

    # Distribution over document topics
    bound -= (D * K) / 2. * LN_OF_2_PI
    bound -= D / 2. * lnDetSigT
    diff = means - X.dot(A.T)
    bound -= 0.5 * np.sum(diff.dot(isigT) * diff)
    bound -= 0.5 * np.sum(
        varcs * np.diag(isigT)[np.newaxis, :]
    )  # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only.
    bound -= 0.5 * K * np.trace(XTX.dot(R_A))

    # And its entropy
    bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.sum(np.log(varcs))

    # Distribution over word-topic assignments, and their entropy
    # and distribution over words. This is re-arranged as we need
    # means for some parts, and exp(means) for other parts
    expMeans = np.exp(means - means.max(axis=1)[:, np.newaxis], out=expMeans)
    R = sparseScalarQuotientOfDot(
        W, expMeans, vocab
    )  # D x V   [W / TB] is the quotient of the original over the reconstructed doc-term matrix
    S = expMeans * (R.dot(vocab.T))  # D x K

    bound += np.sum(docLens * np.log(np.sum(expMeans, axis=1)))
    bound += np.sum(sparseScalarProductOfSafeLnDot(W, expMeans, vocab).data)

    bound += np.sum(means * S)
    bound += np.sum(2 * ssp.diags(docLens, 0) * means.dot(Ab) * means)
    bound -= 2. * scaledSelfSoftDot(means, docLens)
    bound -= 0.5 * np.sum(docLens[:, np.newaxis] * S *
                          (np.diag(Ab))[np.newaxis, :])

    bound -= np.sum(means * S)

    return bound
Ejemplo n.º 15
0
def var_bound(data, modelState, queryState):
    '''
    Determines the variational bounds. Values are mutated in place, but are
    reset afterwards to their initial values. So it's safe to call in a serial
    manner.
    '''

    # Unpack the the structs, for ease of access and efficiency
    W, L, X = data.words, data.links, data.feats
    D, _ = W.shape
    means, varcs, docLens = queryState.means, queryState.varcs, queryState.docLens
    K, topicMean, topicCov, vocab, A = modelState.K, modelState.topicMean, modelState.topicCov, modelState.vocab, modelState.A

    # Calculate some implicit  variables
    itopicCov = la.inv(topicCov)

    bound = 0

    expMeansOut = np.exp(means - means.max(axis=1)[:, np.newaxis])
    expMeansIn = np.exp(means - means.max(axis=0)[np.newaxis, :])
    lse_at_k = expMeansIn.sum(axis=0)

    if USE_NIW_PRIOR:
        pseudoObsMeans = K + NIW_PSEUDO_OBS_MEAN
        pseudoObsVar = K + NIW_PSEUDO_OBS_VAR

        # distribution over topic covariance
        bound -= 0.5 * K * pseudoObsVar * log(NIW_PSI)
        bound -= 0.5 * K * pseudoObsVar * log(2)
        bound -= fns.multigammaln(pseudoObsVar / 2., K)
        bound -= 0.5 * (pseudoObsVar + K - 1) * safe_log_det(topicCov)
        bound += 0.5 * NIW_PSI * np.trace(itopicCov)

        # and its entropy
        # is a constant which we skip

        # distribution over means
        bound -= 0.5 * K * log(1. / pseudoObsMeans) * safe_log_det(topicCov)
        bound -= 0.5 / pseudoObsMeans * (
            topicMean).T.dot(itopicCov).dot(topicMean)

        # and its entropy
        bound += 0.5 * safe_log_det(topicCov)  # +  a constant

    # Distribution over document topics
    bound -= (D * K) / 2. * LN_OF_2_PI
    bound -= D / 2. * la.det(topicCov)
    diff = means - topicMean[np.newaxis, :]
    bound -= 0.5 * np.sum(diff.dot(itopicCov) * diff)
    bound -= 0.5 * np.sum(
        varcs * np.diag(itopicCov)[np.newaxis, :]
    )  # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only.

    # And its entropy
    #     bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.sum(np.log(varcs))

    # Distribution over word-topic assignments and words and the formers
    # entropy, and similaarly for out-links. This is somewhat jumbled to
    # avoid repeatedly taking the exp and log of the means
    W_weights = sparseScalarQuotientOfDot(
        W, expMeansOut, vocab
    )  # D x V   [W / TB] is the quotient of the original over the reconstructed doc-term matrix
    w_top_sums = expMeansOut * (W_weights.dot(vocab.T))  # D x K

    L_weights = sparseScalarQuotientOfNormedDot(L, expMeansOut, expMeansIn,
                                                lse_at_k)
    l_top_sums = L_weights.dot(expMeansIn) / lse_at_k[
        np.newaxis, :] * expMeansOut

    bound += np.sum(docLens * np.log(np.sum(expMeansOut, axis=1)))
    bound += np.sum(sparseScalarProductOfSafeLnDot(W, expMeansOut, vocab).data)
    # means = np.log(expMeans, out=expMeans)
    #means = safe_log(expMeansOut, out=means)

    bound += np.sum(means * w_top_sums)
    bound += np.sum(2 * ssp.diags(docLens, 0) * means.dot(A) * means)
    bound -= 2. * scaledSelfSoftDot(means, docLens)
    bound -= 0.5 * np.sum(docLens[:, np.newaxis] * w_top_sums *
                          (np.diag(A))[np.newaxis, :])

    bound -= np.sum(means * w_top_sums)

    return bound