예제 #1
0
# 参数(bow, minimum_probability=None, minimum_phi_value=None, per_word_topics=False)
# Parameters:
#   bow (list) – Bag-of-words representation of the document to get topics for.
#   minimum_probability (float) – Ignore topics with probability below this value (None by default). If set to None, a value of 1e-8 is used to prevent 0s.
#   per_word_topics (bool) – If True, also returns a list of topics, sorted in descending order of most likely topics for that word. It also returns a list of word_ids and each words corresponding topics’ phi_values, multiplied by feature length (i.e, word count).
#   minimum_phi_value (float) – if per_word_topics is True, this represents a lower bound on the term probabilities that are included (None by default). If set to None, a value of 1e-8 is used to prevent 0s.
# Returns:
#   topic distribution for the given document bow, as a list of (topic_id, topic_probability) 2-tuples.
test = dct.doc2bow("I love Kitten".lower().strip().split())
print(lda.get_document_topics(test))
print(lda[test])

# 参数(word_id, minimum_probability=None)
# 关联的topics for the given word.
# Each topic is represented as a tuple of (topic_id, term_probability).
print(lda.get_term_topics(0))

# ----- 输出指定topic的构成 -----
# 参数(word_id, minimum_probability=None)
# 输出形式 list, format: [(word, probability), … ].
print(lda.get_topic_terms(0))
# 参数(topicno, topn=10)
print(lda.show_topic(0))
# 输出形式 String, format: ‘-0.340 * “category” + 0.298 * “$M$” + 0.183 * “algebra” + … ‘.
# 参数(topicno, topn=10)
print(lda.print_topic(0))

# ----- 输出所有topic的构成 -----
# 默认参数(num_topics=10, num_words=10, log=False, formatted=True)
# 输出形式 String, format: [(0, ‘-0.340 * “category” + 0.298 * “$M$” + 0.183 * “algebra” + … ‘), ...]
print(lda.show_topics())
예제 #2
0
def get_term_topics(model: LdaModel, dictionary: Dictionary, term: str):
    if term in dictionary.token2id:
        return model.get_term_topics(dictionary.token2id[term])
    return None
예제 #3
0
#-*- coding: utf-8 -*-
import pickle

from gensim.corpora import Dictionary
from gensim.models import LdaModel

with open("../data/corpus_test.pkl", "rb") as f:
    corpus = pickle.load(f)

corpus_dictionary = Dictionary(corpus)
corpus = [corpus_dictionary.doc2bow(text) for text in corpus]

CORPUS = corpus
TOPIC_NUM = 10
lda = LdaModel(corpus=CORPUS, num_topics=TOPIC_NUM)

doc_topic_matrix = lda.get_document_topics([(0, 1), (1, 1)])
term_topic_matrix = lda.get_term_topics(1)
topic_term_matrix = lda.get_topic_terms(1)