コード例 #1
0
ファイル: mtm3.py プロジェクト: budgefeeney/sidetopics
def log_likelihood (data, modelState, queryState):
    ''' 
    Return the log-likelihood of the given data W according to the model
    and the parameters inferred for the entries in W stored in the 
    queryState object.
    '''
    probs = rowwise_softmax(queryState.outMeans)
    doc_dist = colwise_softmax(queryState.inMeans)

    word_likely = np.sum( \
        sparseScalarProductOfSafeLnDot(\
            data.words, \
            probs, \
            modelState.vocab \
        ).data \
    )

    link_likely = np.sum( \
        sparseScalarProductOfSafeLnDot(\
            data.links, \
            probs, \
            doc_dist \
        ).data \
    )

    return word_likely + link_likely
コード例 #2
0
ファイル: mtm3.py プロジェクト: budgefeeney/sidetopics
def link_probs(model, train_tops, query_tops, min_link_probs, docSubset=None):
    '''
    Generate the probability of a link for all possible pairs of documents,
    but only store those probabilities that are bigger than or equal to the
    minimum. This ensures, hopefully, that we don't materialise a complete
    DxD matrix, but rather the minimum needed to determine the mean
    average precisions

    :param model: the trained model
    :param train_tops: the representations of the link-target documents
    :param query_tops: the representations of the link-origin documents
    :param min_link_probs: the minimum link probability for each document
    in the subset
    :param docSubset: a list of documents to consider for evaluation. If
    none all documents are considered.
    :return: a (hopefully) sparse len(docSubset)xD matrix of link probabilities
    '''
    # We build the result up as a COO matrix
    rows = []
    cols = []
    vals = []

    # Determine the size of the output
    D = train_tops.outMeans.shape[0]
    if docSubset is None:
        docSubset = [q for q in range(query_tops.outMeans.shape[0])]
    Q = len(docSubset)

    # Calculate the softmax transform parameters
    dstTopDists = colwise_softmax(train_tops.inMeans)

    # Infer the link probabilities
    outRow = -1
    for src in docSubset:
        outRow += 1

        srcTopDist = softmax(query_tops.outMeans[src, :])
        probs      = dstTopDists.dot(srcTopDist)
        relevant   = np.where(probs >= min_link_probs[outRow] - 1E-9)[0]

        rows.extend([outRow] * len(relevant))
        cols.extend(relevant)
        vals.extend(probs[relevant])

    # Build the COO matrix, then covert it to CSR. Converts lists to numpy
    # arrays to ensure appropriate dtypes
    r = np.array(rows, dtype=np.int32)
    c = np.array(cols, dtype=np.int32)
    v = np.array(vals, dtype=model.dtype)

    return ssp.coo_matrix((v, (r, c)), shape=(Q, D)).tocsr()
コード例 #3
0
ファイル: mtm2.py プロジェクト: budgefeeney/sidetopics
def link_probs(model, topics, min_link_probs):
    '''
    Generate the probability of a link for all possible pairs of documents,
    but only store those probabilities that are bigger than or equal to the
    minimum. This ensures, hopefully, that we don't materialise a complete
    DxD matrix, but rather the minimum needed to determine the mean
    average precisions

    :param model: the trained model
    :param topics: the topics for each of the documents we're generating
        links for
    :param min_link_probs: the minimum link probability for each document
    :return: a (hopefully) sparse DxD matrix of link probabilities
    '''
    # We build the result up as a COO matrix
    rows = []
    cols = []
    vals = []

    # Calculate the softmax transform parameters
    D = topics.means.shape[0]
    linkDist = colwise_softmax(topics.means)

    # Infer the link probabilities
    for d in range(D):
        if d == 2935:
            print("Ruh-ro")

        topDistAtD = softmax(topics.means[d, :])
        probs      = linkDist.dot(topDistAtD)
        relevant   = np.where(probs >= min_link_probs[d] - 1E-9)[0]

        rows.extend([d] * len(relevant))
        cols.extend(relevant)
        vals.extend(probs[relevant])

    # Build the COO matrix, then covert it to CSR. Converts lists to numpy
    # arrays to ensure appropriate dtypes
    r = np.array(rows, dtype=np.int32)
    c = np.array(cols, dtype=np.int32)
    v = np.array(vals, dtype=model.dtype)

    return ssp.coo_matrix((v, (r, c)), shape=(D, D)).tocsr()