コード例 #1
0
ファイル: mtm3.py プロジェクト: budgefeeney/sidetopics
def log_likelihood (data, modelState, queryState):
    ''' 
    Return the log-likelihood of the given data W according to the model
    and the parameters inferred for the entries in W stored in the 
    queryState object.
    '''
    probs = rowwise_softmax(queryState.outMeans)
    doc_dist = colwise_softmax(queryState.inMeans)

    word_likely = np.sum( \
        sparseScalarProductOfSafeLnDot(\
            data.words, \
            probs, \
            modelState.vocab \
        ).data \
    )

    link_likely = np.sum( \
        sparseScalarProductOfSafeLnDot(\
            data.links, \
            probs, \
            doc_dist \
        ).data \
    )

    return word_likely + link_likely
コード例 #2
0
def log_likelihood (data, modelState, queryState):
    '''
    Return the log-likelihood of the given data according to the model
    and the parameters inferred for datapoints in the query-state object

    Actually returns a vector of D document specific log likelihoods
    '''
    topicProbs = topicDists(queryState)
    wordLikely = sparseScalarProductOfSafeLnDot(data.words, topicProbs, wordDists(modelState)).sum()

    docProbs      = np.empty((modelState.K, data.doc_count), dtype=modelState.dtype)
    docProbs[:,:] = topicProbs.T
    linkLikely    = sparseScalarProductOfSafeLnDot(data.words, topicProbs, docProbs).sum()
    
    return wordLikely + linkLikely
コード例 #3
0
def log_likelihood(data, modelState, queryState):
    """ 
    Return the log-likelihood of the given data W according to the model
    and the parameters inferred for the entries in W stored in the 
    queryState object.
    """
    return np.sum(sparseScalarProductOfSafeLnDot(data.words, rowwise_softmax(queryState.means), modelState.vocab).data)
コード例 #4
0
ファイル: rtm.py プロジェクト: budgefeeney/sidetopics
def log_likelihood (data, modelState, queryState):
    '''
    Return the log-likelihood of the given data W and X according to the model
    and the parameters inferred for the entries in W and X stored in the
    queryState object.

    Actually returns a vector of D document specific log likelihoods
    '''
    wordLikely = sparseScalarProductOfSafeLnDot(data.words, topicDists(queryState), wordDists(modelState)).sum()
    
    # For likelihood it's a bit tricky. In theory, given d =/= p, and letting 
    # c_d = 1/n_d, where n_d is the word count of document d, it's 
    #
    #   ln p(y_dp|weights) = E[\sum_k weights[k] * (c_d \sum_n z_dnk) * (c_p \sum_n z_pnk)]
    #                      = \sum_k weights[k] * c_d * E[\sum_n z_dnk] * c_p * E[\sum_n z_pnk]
    #                      = \sum_k weights[k] * topicDistsMean[d,k] * topicDistsMean[p,k]
    #                      
    #
    # where topicDistsMean[d,k] is the mean of the k-th element of the Dirichlet parameterised
    # by topicDist[d,:]
    #
    # However in the related paper on Supervised LDA, which uses this trick of average z_dnk,
    # they explicitly say that in the likelihood calculation they use the expectation
    # according to the _variational_ approximate posterior distribution q(z_dn) instead of the
    # actual distribution p(z_dn|topicDist), and thus
    #
    # E[\sum_n z_dnk] = \sum_n E_q[z_dnk] 
    #
    # There's no detail of the likelihood in either of the RTM papers, so we use the
    # variational approch
    
    linkLikely = 0
    
    return wordLikely + linkLikely
コード例 #5
0
def log_likelihood (data, modelState, queryState):
    '''
    Return the log-likelihood of the given data W according to the model
    and the parameters inferred for the entries in W stored in the
    queryState object.
    
    Actually returns a vector of D document specific log likelihoods
    '''
    n_dk, n_kt = queryState.n_dk, modelState.n_kt
    a, b       = modelState.topicPrior, modelState.vocabPrior
   
    n_dk += a[np.newaxis,:]
    n_kt += b

    # Scale to create distributions over doc-topics and topic-vocabs
    doc_norm = n_dk.sum(axis = 1)
    voc_norm = n_kt.sum(axis = 1)
    
    n_dk /= doc_norm[:,np.newaxis]
    n_kt /= voc_norm[:,np.newaxis]
    
    # Use distributions to create log-likelihood. This could be made
    # faster still by not materializing the (admittedly sparse) matrix
    ln_likely = sparseScalarProductOfSafeLnDot(data.words, n_dk, n_kt).sum()
    
    # Rescale back to word-counts
    n_dk *= doc_norm[:,np.newaxis]
    n_kt *= voc_norm[:,np.newaxis]
    
    n_dk -= a[np.newaxis, :]
    n_kt -= b
    
    return ln_likely
コード例 #6
0
def log_likelihood (data, modelState, queryState):
    '''
    Return the log-likelihood of the given data W according to the model
    and the parameters inferred for the entries in W stored in the
    queryState object.
    
    Actually returns a vector of D document specific log likelihoods
    '''
    return sparseScalarProductOfSafeLnDot(data.words, topicDists(queryState), wordDists(modelState)).sum()
コード例 #7
0
ファイル: dmr.py プロジェクト: budgefeeney/sidetopics
def log_likelihood (data, model, query):
    '''
    Return the log-likelihood of the given data W according to the model
    and the parameters inferred for the entries in W stored in the
    queryState object.
    
    '''
    W = data.words if data.words.dtype is model.dtype else data.words.astype(model.dtype)
    return sparseScalarProductOfSafeLnDot(W, topicDists(query), wordDists(model)).sum()
コード例 #8
0
def log_likelihood(data, model, query, topicDistOverride=None):
    """
    Return the log-likelihood of the given data according to the model
    and the parameters inferred for datapoints in the query-state object

    Actually returns a vector of D document specific log likelihoods
    """
    tops = topicDistOverride if topicDistOverride is not None else topicDists(query)
    wordLikely = sparseScalarProductOfSafeLnDot(data.words, tops, wordDists(model)).sum()

    return wordLikely
コード例 #9
0
ファイル: ctm.py プロジェクト: budgefeeney/sidetopics
def var_bound(data, modelState, queryState):
    '''
    Determines the variational bounds. Values are mutated in place, but are
    reset afterwards to their initial values. So it's safe to call in a serial
    manner.
    '''

    # Unpack the the structs, for ease of access and efficiency
    W   = data.words
    D,_ = W.shape
    means, expMeans, varcs, lxi, s, docLens = queryState.means, queryState.expMeans, queryState.varcs, queryState.lxi, queryState.s, queryState.docLens
    K, topicMean, sigT, vocab     = modelState.K, modelState.topicMean, modelState.sigT, modelState.vocab
    
    # Calculate some implicit  variables
    xi = _deriveXi(means, varcs, s)
    isigT = la.inv(sigT)
    
    bound = 0
    
    # Distribution over document topics
    bound -= (D*K)/2. * LN_OF_2_PI
    bound -= D/2. * la.det(sigT)
    diff   = means - topicMean[np.newaxis,:]
    bound -= 0.5 * np.sum (diff.dot(isigT) * diff)
    bound -= 0.5 * np.sum (varcs * np.diag(isigT)[np.newaxis,:]) # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only.
       
    # And its entropy
    bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.sum(np.log(varcs)) 
    
    # Distribution over word-topic assignments
    # This also takes into account all the variables that 
    # constitute the bound on log(sum_j exp(mean_j)) and
    # also incorporates the implicit entropy of Z_dvk
    bound -= np.sum((means*means + varcs) * docLens[:,np.newaxis] * lxi)
    bound += np.sum(means * 2 * docLens[:,np.newaxis] * s[:,np.newaxis] * lxi)
    bound += np.sum(means * -0.5 * docLens[:,np.newaxis])
    # The last term of line 1 gets cancelled out by part of the first term in line 2
    # so neither are included here
    
    expMeans = np.exp(means - means.max(axis=1)[:,np.newaxis], out=expMeans)
    bound -= -np.sum(sparseScalarProductOfSafeLnDot(W, expMeans, vocab).data)
    
    bound -= np.sum(docLens[:,np.newaxis] * lxi * ((s*s)[:,np.newaxis] - (xi * xi)))
    bound += np.sum(0.5 * docLens[:,np.newaxis] * (s[:,np.newaxis] + xi))
#    bound -= np.sum(docLens[:,np.newaxis] * safe_log_one_plus_exp_of(xi))
    bound -= scaledSumOfLnOnePlusExp(docLens, xi)
    
    bound -= np.dot(s, docLens)
    
    
    return bound
コード例 #10
0
def var_bound(data, modelState, queryState):
    """
    Determines the variational bounds. Values are mutated in place, but are
    reset afterwards to their initial values. So it's safe to call in repeatedly.
    """

    # Unpack the the structs, for ease of access and efficiency
    W, X = data.words, data.feats
    D, T, F = W.shape[0], W.shape[1], X.shape[1]
    means, docLens = queryState.means, queryState.docLens
    K, A, U, Y, V, covA, tv, ltv, fv, lfv, vocab, vocabPrior, dtype = (
        modelState.K,
        modelState.A,
        modelState.U,
        modelState.Y,
        modelState.V,
        modelState.covA,
        modelState.tv,
        modelState.ltv,
        modelState.fv,
        modelState.lfv,
        modelState.vocab,
        modelState.vocabPrior,
        modelState.dtype,
    )

    H = 0.5 * (np.eye(K) - np.ones((K, K), dtype=dtype) / K)
    Log2Pi = log(2 * pi)

    bound = 0

    # U and V are parameters with no distribution

    #
    # Y has a normal distribution, it's covariance is unfortunately an expensive computation
    #
    P, Q = U.shape[1], V.shape[1]
    covY = np.eye(P * Q) * (lfv * ltv)
    covY += np.kron(V.T.dot(V), U.T.dot(U))
    covY = la.inv(covY, overwrite_a=True)

    # The expected likelihood of Y
    bound -= 0.5 * P * Q * Log2Pi
    bound -= 0.5 * P * Q * log(ltv * lfv)
    bound -= 0.5 / (lfv * ltv) * np.sum(Y * Y)  # 5x faster than np.trace(Y.dot(Y.T))
    bound -= 0.5 * np.trace(covY) * (lfv * ltv)
    # the traces of the posterior+prior covariance products cancel out across likelihoods

    # The entropy of Y
    bound += 0.5 * P * Q * (Log2Pi + 1) + 0.5 * safe_log_det(covY)

    #
    # A has a normal distribution/
    #
    F, K = A.shape[0], A.shape[1]
    diff = A - U.dot(Y).dot(V.T)
    diff *= diff

    # The expected likelihood of A
    bound -= 0.5 * K * F * Log2Pi
    bound -= 0.5 * K * F * log(tv * fv)
    bound -= 0.5 / (fv * tv) * np.sum(diff)

    # The entropy of A
    bound += 0.5 * F * K * (Log2Pi + 1) + 0.5 * K * safe_log_det(covA)

    #
    # Theta, the matrix of means, has a normal distribution. Its row-covarince is diagonal
    # (i.e. it's several independent multi-var normal distros). The posterior is made
    # up of D K-dimensional normals with diagonal covariances
    #
    # We iterate through the topics in batches, to control memory use
    batchSize = min(BatchSize, D)
    batchCount = ceil(D / batchSize)
    feats = np.ndarray(shape=(batchSize, F), dtype=dtype)
    tops = np.ndarray(shape=(batchSize, K), dtype=dtype)
    trace = 0
    for b in range(0, batchCount):
        start = b * batchSize
        end = min(start + batchSize, D)
        batchSize = min(batchSize, end - start)

        feats[:batchSize, :] = X[start:end, :].toarray()
        np.dot(feats[:batchSize, :], A, out=tops[:batchSize, :])
        tops[:batchSize, :] -= means[start:end, :]
        tops[:batchSize, :] *= tops[:batchSize, :]
        trace += np.sum(tops[:batchSize, :])
    feats = None

    # The expected likelihood of the topic-assignments
    bound -= 0.5 * D * K * Log2Pi
    bound -= 0.5 * D * K * log(tv)
    bound -= 0.5 / tv * trace

    bound -= 0.5 * tv * np.sum(covA)  # this trace doesn't cancel as we
    # don't have a posterior on tv
    # The entropy of the topic-assignments
    bound += 0.5 * D * K * (Log2Pi + 1) + 0.5 * np.sum(covA)

    # Distribution over word-topic assignments and words and the formers
    # entropy. This is somewhat jumbled to avoid repeatedly taking the
    # exp and log of the means
    # Again we batch this for safety
    batchSize = min(BatchSize, D)
    batchCount = ceil(D / batchSize)
    V = np.ndarray(shape=(batchSize, K), dtype=dtype)
    for b in range(0, batchCount):
        start = b * batchSize
        end = min(start + batchSize, D)
        batchSize = min(batchSize, end - start)

        meansBatch = means[start:end, :]
        docLensBatch = docLens[start:end]

        np.exp(meansBatch - meansBatch.max(axis=1)[:, np.newaxis], out=tops[:batchSize, :])
        expMeansBatch = tops[:batchSize, :]
        R = sparseScalarQuotientOfDot(
            W, expMeansBatch, vocab, start=start, end=end
        )  # BatchSize x V:   [W / TB] is the quotient of the original over the reconstructed doc-term matrix
        V[:batchSize, :] = expMeansBatch * (R[:batchSize, :].dot(vocab.T))  # BatchSize x K
        VBatch = V[:batchSize, :]

        bound += np.sum(docLensBatch * np.log(np.sum(expMeansBatch, axis=1)))
        bound += np.sum(sparseScalarProductOfSafeLnDot(W, expMeansBatch, vocab, start=start, end=end).data)

        bound += np.sum(meansBatch * VBatch)
        bound += np.sum(2 * ssp.diags(docLensBatch, 0) * meansBatch.dot(H) * meansBatch)
        bound -= 2.0 * scaledSelfSoftDot(meansBatch, docLensBatch)
        bound -= 0.5 * np.sum(docLensBatch[:, np.newaxis] * VBatch * (np.diag(H))[np.newaxis, :])

        bound -= np.sum(meansBatch * VBatch)

    return bound
コード例 #11
0
ファイル: mtm3.py プロジェクト: budgefeeney/sidetopics
def var_bound(data, modelState, queryState):
    '''
    Determines the variational bounds. Values are mutated in place, but are
    reset afterwards to their initial values. So it's safe to call in a serial
    manner.
    '''
    
    # Unpack the the structs, for ease of access and efficiency
    W, L, X  = data.words, data.links, data.feats
    D,_ = W.shape
    outMeans, outVarcs, inMeans, inVarcs, inDocCov, docLens = queryState.outMeans, queryState.outVarcs, queryState.inMeans, queryState.inVarcs, queryState.inDocCov, queryState.docLens
    K, topicMean, topicCov, outDocCov, vocab, A, dtype = modelState.K, modelState.topicMean, modelState.topicCov, modelState.outDocCov, modelState.vocab, modelState.A, modelState.dtype

    # Calculate some implicit  variables
    itopicCov = la.inv(topicCov)
    
    bound = 0

    expMeansOut = np.exp(outMeans - outMeans.max(axis=1)[:, np.newaxis])
    expMeansIn  = np.exp(inMeans - inMeans.max(axis=0)[np.newaxis, :])
    lse_at_k    = expMeansIn.sum(axis=0)

    # Distribution over document topics
    bound -= (D*K)/2. * LN_OF_2_PI
    bound -= D/2. * safe_log_det(outDocCov * topicCov)
    diff   = outMeans - topicMean[np.newaxis,:]
    bound -= 0.5 * np.sum (diff.dot(itopicCov) * diff * 1./outDocCov)
    bound -= (0.5 / outDocCov) * np.sum(outVarcs * np.diag(itopicCov)[np.newaxis,:]) # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only.

    # And its entropy
    bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.log(outVarcs).sum()

    # Distribution over document in-links
    inDocPre = np.reciprocal(inDocCov)
    bound -= (D*K)/2. * LN_OF_2_PI
    bound -= D/2. * safe_log_det(topicCov)
    bound -= K/2 * safe_log(inDocCov).sum()
    diff   = inMeans - outMeans
    bound -= 0.5 * np.sum (diff.dot(itopicCov) * diff * inDocPre[:,np.newaxis])
    bound -= 0.5 * np.sum((inVarcs * inDocPre[:,np.newaxis]) * np.diag(itopicCov)[np.newaxis,:]) # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only.

    # And its entropy
    bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.log(inVarcs).sum()

    # Distribution over topic assignments E[p(Z)] and E[p(Y)]
    W_weights  = sparseScalarQuotientOfDot(W, expMeansOut, vocab)  # D x V   [W / TB] is the quotient of the original over the reconstructed doc-term matrix
    top_sums   = expMeansOut * (W_weights.dot(vocab.T)) # D x K

    L_weights  = sparseScalarQuotientOfNormedDot(L, expMeansOut, expMeansIn, lse_at_k)
    top_sums  += expMeansOut * (L_weights.dot(expMeansIn) / lse_at_k[np.newaxis, :])

    # E[p(Z,Y)]
    linkLens = np.squeeze(np.array(L.sum(axis=1)))
    bound += np.sum(outMeans * top_sums)
    bound -= np.sum((docLens + linkLens) * np.log(np.sum(expMeansOut, axis=1)))

    # H[Z]
    bound += ((W_weights.dot(vocab.T)) * expMeansOut * outMeans).sum() \
           + ((W_weights.dot((np.log(vocab) * vocab).T)) * expMeansOut).sum() \
           - np.trace(sparseScalarProductOfSafeLnDot(W_weights, expMeansOut, vocab).dot(vocab.T).dot(expMeansOut.T))

    # H[Y]
    docVocab = (expMeansIn / lse_at_k[np.newaxis,:]).T.copy()
    bound += ((L_weights.dot(docVocab.T)) * expMeansOut * outMeans).sum() \
           + ((L_weights.dot((np.log(docVocab) * docVocab).T)) * expMeansOut).sum() \
           - np.trace(sparseScalarProductOfSafeLnDot(L_weights, expMeansOut, docVocab).dot(docVocab.T).dot(expMeansOut.T))

    # E[p(W)]
    vlv = np.log(vocab) * vocab
    bound += np.trace(expMeansOut.T.dot(W_weights.dot(vlv.T)))

    # E[p(L)
    dld = np.log(docVocab) * docVocab
    bound += np.trace(expMeansOut.T.dot(L_weights.dot(dld.T)))

    return bound
コード例 #12
0
def var_bound(data, modelState, queryState):
    '''
    Determines the variational bounds. Values are mutated in place, but are
    reset afterwards to their initial values. So it's safe to call in a serial
    manner.
    '''
    
    # Unpack the the structs, for ease of access and efficiency
    W   = data.words
    D,_ = W.shape
    means, expMeans, varcs, docLens = queryState.means, queryState.expMeans, queryState.varcs, queryState.docLens
    K, topicMean, sigT, vocab, vocabPrior, A = modelState.K, modelState.topicMean, modelState.sigT, modelState.vocab, modelState.vocabPrior, modelState.A
    
    # Calculate some implicit  variables
    isigT = la.inv(sigT)
    
    bound = 0
    
    if USE_NIW_PRIOR:
        pseudoObsMeans = K + NIW_PSEUDO_OBS_MEAN
        pseudoObsVar   = K + NIW_PSEUDO_OBS_VAR

        # distribution over topic covariance
        bound -= 0.5 * K * pseudoObsVar * log(NIW_PSI)
        bound -= 0.5 * K * pseudoObsVar * log(2)
        bound -= fns.multigammaln(pseudoObsVar / 2., K)
        bound -= 0.5 * (pseudoObsVar + K - 1) * safe_log_det(sigT)
        bound += 0.5 * NIW_PSI * np.trace(isigT)

        # and its entropy
        # is a constant which we skip
        
        # distribution over means
        bound -= 0.5 * K * log(1./pseudoObsMeans) * safe_log_det(sigT)
        bound -= 0.5 / pseudoObsMeans * (topicMean).T.dot(isigT).dot(topicMean)
        
        # and its entropy
        bound += 0.5 * safe_log_det(sigT) # +  a constant
        
    
    # Distribution over document topics
    bound -= (D*K)/2. * LN_OF_2_PI
    bound -= D/2. * la.det(sigT)
    diff   = means - topicMean[np.newaxis,:]
    bound -= 0.5 * np.sum (diff.dot(isigT) * diff)
    bound -= 0.5 * np.sum(varcs * np.diag(isigT)[np.newaxis,:]) # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only.
       
    # And its entropy
#     bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.sum(np.log(varcs)) 
    
    # Distribution over word-topic assignments and words and the formers
    # entropy. This is somewhat jumbled to avoid repeatedly taking the
    # exp and log of the means
    expMeans = np.exp(means - means.max(axis=1)[:,np.newaxis], out=expMeans)
    R = sparseScalarQuotientOfDot(W, expMeans, vocab)  # D x V   [W / TB] is the quotient of the original over the reconstructed doc-term matrix
    V = expMeans * (R.dot(vocab.T)) # D x K
    
    bound += np.sum(docLens * np.log(np.sum(expMeans, axis=1)))
    bound += np.sum(sparseScalarProductOfSafeLnDot(W, expMeans, vocab).data)
    
    bound += np.sum(means * V)
    bound += np.sum(2 * ssp.diags(docLens,0) * means.dot(A) * means)
    bound -= 2. * scaledSelfSoftDot(means, docLens)
    bound -= 0.5 * np.sum(docLens[:,np.newaxis] * V * (np.diag(A))[np.newaxis,:])
    
    bound -= np.sum(means * V) 
    
    
    return bound
コード例 #13
0
ファイル: stm_yv.py プロジェクト: budgefeeney/sidetopics
def var_bound(data, modelState, queryState, XTX = None):
    '''
    Determines the variational bounds. Values are mutated in place, but are
    reset afterwards to their initial values. So it's safe to call in a serial
    manner.
    '''
    # Unpack the the structs, for ease of access and efficiency
    W, X = data.words, data.feats
    D, _ = W.shape
    means, varcs, lxi, s, docLens = queryState.means, queryState.varcs, queryState.lxi, queryState.s, queryState.docLens
    F, P, K, A, R_A, fv, Y, R_Y, lfv, V, sigT, vocab, dtype = modelState.F, modelState.P, modelState.K, modelState.A, modelState.R_A, modelState.fv, modelState.Y, modelState.R_Y, modelState.lfv, modelState.V, modelState.sigT, modelState.vocab, modelState.dtype
    
    # Calculate some implicit  variables
    xi = ctm._deriveXi(means, varcs, s)
    isigT = la.inv(sigT)
    #lnDetSigT = np.log(la.det(sigT))
    lnDetSigT = lnDetOfDiagMat(sigT)
    verifyProper(lnDetSigT, "lnDetSigT")
    
    if XTX is None:
        XTX = X.T.dot(X)
    
    bound = 0
    
    # Distribution over latent space
    bound -= (P*K)/2. * LN_OF_2_PI
    bound -= P * lnDetSigT
    bound -= K * P * log(lfv)
    bound -= 0.5 * np.sum(1./lfv * isigT.dot(Y) * Y)
    bound -= 0.5 * K * np.trace(R_Y)
    
    # And its entropy
    detR_Y = safeDet(R_Y, "R_Y")
    bound += 0.5 * LN_OF_2_PI_E + P/2. * lnDetSigT + K/2. * log(detR_Y)
    
    # Distribution over mapping from features to topics
    diff   = (A - Y.dot(V))
    bound -= (F*K)/2. * LN_OF_2_PI
    bound -= F * lnDetSigT
    bound -= K * P * log(fv)
    bound -= 0.5 * np.sum (1./lfv * isigT.dot(diff) * diff)
    bound -= 0.5 * K * np.trace(R_A)
    
    # And its entropy
    detR_A = safeDet(R_A, "R_A")
    bound += 0.5 * LN_OF_2_PI_E + F/2. * lnDetSigT + K/2. * log(detR_A)
    
    # Distribution over document topics
    bound -= (D*K)/2. * LN_OF_2_PI
    bound -= D/2. * lnDetSigT
    diff   = means - X.dot(A.T)
    bound -= 0.5 * np.sum (diff.dot(isigT) * diff)
    bound -= 0.5 * np.sum(varcs * np.diag(isigT)[np.newaxis,:]) # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only.
    bound -= 0.5 * K * np.trace(XTX.dot(R_A))
       
    # And its entropy
    bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.sum(np.log(varcs)) 
    
    # Distribution over word-topic assignments
    # This also takes into account all the variables that 
    # constitute the bound on log(sum_j exp(mean_j)) and
    # also incorporates the implicit entropy of Z_dvk
    bound -= np.sum((means*means + varcs) * docLens[:,np.newaxis] * lxi)
    bound += np.sum(means * 2 * docLens[:,np.newaxis] * s[:,np.newaxis] * lxi)
    bound += np.sum(means * -0.5 * docLens[:,np.newaxis])
    # The last term of line 1 gets cancelled out by part of the first term in line 2
    # so neither are included here.
    
    row_maxes = means.max(axis=1)
    means -= row_maxes[:,np.newaxis]
    expMeans = np.exp(means, out=means)
    bound -= -np.sum(sparseScalarProductOfSafeLnDot(W, expMeans, vocab).data)
    
    bound -= np.sum(docLens[:,np.newaxis] * lxi * ((s*s)[:,np.newaxis] - (xi * xi)))
    bound += np.sum(0.5 * docLens[:,np.newaxis] * (s[:,np.newaxis] + xi))
#    bound -= np.sum(docLens[:,np.newaxis] * safe_log_one_plus_exp_of(xi))
    bound -= scaledSumOfLnOnePlusExp(docLens, xi)
    
    bound -= np.dot(s, docLens)
    
    means = np.log(expMeans, out=expMeans)
    means += row_maxes[:,np.newaxis]
    
    return bound
コード例 #14
0
def var_bound(data, modelState, queryState, XTX=None):
    '''
    Determines the variational bounds. Values are mutated in place, but are
    reset afterwards to their initial values. So it's safe to call in a serial
    manner.
    '''
    
    # Unpack the the structs, for ease of access and efficiency
    W, X = data.words, data.feats
    D, _ = W.shape
    means, expMeans, varcs, docLens = queryState.means, queryState.expMeans, queryState.varcs, queryState.docLens
    F, P, K, A, R_A, fv, Y, R_Y, lfv, V, sigT, vocab, Ab, dtype = modelState.F, modelState.P, modelState.K, modelState.A, modelState.R_A, modelState.fv, modelState.Y, modelState.R_Y, modelState.lfv, modelState.V, modelState.sigT, modelState.vocab, modelState.Ab, modelState.dtype
    
    # Calculate some implicit  variables
    isigT = la.inv(sigT)
    lnDetSigT = lnDetOfDiagMat(sigT)
    verifyProper(lnDetSigT, "lnDetSigT")
    
    if XTX is None:
        XTX = X.T.dot(X)
    
    bound = 0
    
    # Distribution over latent space
    bound -= (P*K)/2. * LN_OF_2_PI
    bound -= P * lnDetSigT
    bound -= K * P * log(lfv)
    bound -= 0.5 * np.sum(1./lfv * isigT.dot(Y) * Y)
    bound -= 0.5 * K * np.trace(R_Y)
    
    # And its entropy
    detR_Y = safeDet(R_Y, "R_Y")
    bound += 0.5 * LN_OF_2_PI_E + P/2. * lnDetSigT + K/2. * log(detR_Y)
    
    # Distribution over mapping from features to topics
    diff   = (A - Y.dot(V))
    bound -= (F*K)/2. * LN_OF_2_PI
    bound -= F * lnDetSigT
    bound -= K * P * log(fv)
    bound -= 0.5 * np.sum (1./lfv * isigT.dot(diff) * diff)
    bound -= 0.5 * K * np.trace(R_A)
    
    # And its entropy
    detR_A = safeDet(R_A, "R_A")
    bound += 0.5 * LN_OF_2_PI_E + F/2. * lnDetSigT + K/2. * log(detR_A)
    
    # Distribution over document topics
    bound -= (D*K)/2. * LN_OF_2_PI
    bound -= D/2. * lnDetSigT
    diff   = means - X.dot(A.T)
    bound -= 0.5 * np.sum (diff.dot(isigT) * diff)
    bound -= 0.5 * np.sum(varcs * np.diag(isigT)[np.newaxis,:]) # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only.
    bound -= 0.5 * K * np.trace(XTX.dot(R_A))
       
    # And its entropy
    bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.sum(np.log(varcs)) 
        
    # Distribution over word-topic assignments, and their entropy
    # and distribution over words. This is re-arranged as we need 
    # means for some parts, and exp(means) for other parts
    expMeans = np.exp(means - means.max(axis=1)[:,np.newaxis], out=expMeans)
    R = sparseScalarQuotientOfDot(W, expMeans, vocab)  # D x V   [W / TB] is the quotient of the original over the reconstructed doc-term matrix
    S = expMeans * (R.dot(vocab.T)) # D x K
    
    bound += np.sum(docLens * np.log(np.sum(expMeans, axis=1)))
    bound += np.sum(sparseScalarProductOfSafeLnDot(W, expMeans, vocab).data)

    bound += np.sum(means * S)
    bound += np.sum(2 * ssp.diags(docLens,0) * means.dot(Ab) * means)
    bound -= 2. * scaledSelfSoftDot(means, docLens)
    bound -= 0.5 * np.sum(docLens[:,np.newaxis] * S * (np.diag(Ab))[np.newaxis,:])
    
    bound -= np.sum(means * S) 
    
    return bound
コード例 #15
0
ファイル: mtm2.py プロジェクト: budgefeeney/sidetopics
def var_bound(data, modelState, queryState):
    '''
    Determines the variational bounds. Values are mutated in place, but are
    reset afterwards to their initial values. So it's safe to call in a serial
    manner.
    '''
    
    # Unpack the the structs, for ease of access and efficiency
    W, L, X  = data.words, data.links, data.feats
    D,_ = W.shape
    means, varcs, docLens = queryState.means, queryState.varcs, queryState.docLens
    K, topicMean, topicCov, vocab, A = modelState.K, modelState.topicMean, modelState.topicCov, modelState.vocab, modelState.A
    
    # Calculate some implicit  variables
    itopicCov = la.inv(topicCov)
    
    bound = 0

    expMeansOut = np.exp(means - means.max(axis=1)[:, np.newaxis])
    expMeansIn  = np.exp(means - means.max(axis=0)[np.newaxis, :])
    lse_at_k    = expMeansIn.sum(axis=0)
    
    if USE_NIW_PRIOR:
        pseudoObsMeans = K + NIW_PSEUDO_OBS_MEAN
        pseudoObsVar   = K + NIW_PSEUDO_OBS_VAR

        # distribution over topic covariance
        bound -= 0.5 * K * pseudoObsVar * log(NIW_PSI)
        bound -= 0.5 * K * pseudoObsVar * log(2)
        bound -= fns.multigammaln(pseudoObsVar / 2., K)
        bound -= 0.5 * (pseudoObsVar + K - 1) * safe_log_det(topicCov)
        bound += 0.5 * NIW_PSI * np.trace(itopicCov)

        # and its entropy
        # is a constant which we skip
        
        # distribution over means
        bound -= 0.5 * K * log(1./pseudoObsMeans) * safe_log_det(topicCov)
        bound -= 0.5 / pseudoObsMeans * (topicMean).T.dot(itopicCov).dot(topicMean)
        
        # and its entropy
        bound += 0.5 * safe_log_det(topicCov) # +  a constant
        
    
    # Distribution over document topics
    bound -= (D*K)/2. * LN_OF_2_PI
    bound -= D/2. * la.det(topicCov)
    diff   = means - topicMean[np.newaxis,:]
    bound -= 0.5 * np.sum (diff.dot(itopicCov) * diff)
    bound -= 0.5 * np.sum(varcs * np.diag(itopicCov)[np.newaxis,:]) # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only.
       
    # And its entropy
#     bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.sum(np.log(varcs)) 


    # Distribution over word-topic assignments and words and the formers
    # entropy, and similaarly for out-links. This is somewhat jumbled to
    # avoid repeatedly taking the exp and log of the means
    W_weights  = sparseScalarQuotientOfDot(W, expMeansOut, vocab)  # D x V   [W / TB] is the quotient of the original over the reconstructed doc-term matrix
    w_top_sums = expMeansOut * (W_weights.dot(vocab.T)) # D x K

    L_weights  = sparseScalarQuotientOfNormedDot(L, expMeansOut, expMeansIn, lse_at_k)
    l_top_sums = L_weights.dot(expMeansIn) / lse_at_k[np.newaxis, :] * expMeansOut
    
    bound += np.sum(docLens * np.log(np.sum(expMeansOut, axis=1)))
    bound += np.sum(sparseScalarProductOfSafeLnDot(W, expMeansOut, vocab).data)
    # means = np.log(expMeans, out=expMeans)
    #means = safe_log(expMeansOut, out=means)
    
    bound += np.sum(means * w_top_sums)
    bound += np.sum(2 * ssp.diags(docLens,0) * means.dot(A) * means)
    bound -= 2. * scaledSelfSoftDot(means, docLens)
    bound -= 0.5 * np.sum(docLens[:,np.newaxis] * w_top_sums * (np.diag(A))[np.newaxis,:])
    
    bound -= np.sum(means * w_top_sums)
    
    
    return bound