# 参数(bow, minimum_probability=None, minimum_phi_value=None, per_word_topics=False) # Parameters: # bow (list) – Bag-of-words representation of the document to get topics for. # minimum_probability (float) – Ignore topics with probability below this value (None by default). If set to None, a value of 1e-8 is used to prevent 0s. # per_word_topics (bool) – If True, also returns a list of topics, sorted in descending order of most likely topics for that word. It also returns a list of word_ids and each words corresponding topics’ phi_values, multiplied by feature length (i.e, word count). # minimum_phi_value (float) – if per_word_topics is True, this represents a lower bound on the term probabilities that are included (None by default). If set to None, a value of 1e-8 is used to prevent 0s. # Returns: # topic distribution for the given document bow, as a list of (topic_id, topic_probability) 2-tuples. test = dct.doc2bow("I love Kitten".lower().strip().split()) print(lda.get_document_topics(test)) print(lda[test]) # 参数(word_id, minimum_probability=None) # 关联的topics for the given word. # Each topic is represented as a tuple of (topic_id, term_probability). print(lda.get_term_topics(0)) # ----- 输出指定topic的构成 ----- # 参数(word_id, minimum_probability=None) # 输出形式 list, format: [(word, probability), … ]. print(lda.get_topic_terms(0)) # 参数(topicno, topn=10) print(lda.show_topic(0)) # 输出形式 String, format: ‘-0.340 * “category” + 0.298 * “$M$” + 0.183 * “algebra” + … ‘. # 参数(topicno, topn=10) print(lda.print_topic(0)) # ----- 输出所有topic的构成 ----- # 默认参数(num_topics=10, num_words=10, log=False, formatted=True) # 输出形式 String, format: [(0, ‘-0.340 * “category” + 0.298 * “$M$” + 0.183 * “algebra” + … ‘), ...] print(lda.show_topics())
def get_term_topics(model: LdaModel, dictionary: Dictionary, term: str): if term in dictionary.token2id: return model.get_term_topics(dictionary.token2id[term]) return None
#-*- coding: utf-8 -*- import pickle from gensim.corpora import Dictionary from gensim.models import LdaModel with open("../data/corpus_test.pkl", "rb") as f: corpus = pickle.load(f) corpus_dictionary = Dictionary(corpus) corpus = [corpus_dictionary.doc2bow(text) for text in corpus] CORPUS = corpus TOPIC_NUM = 10 lda = LdaModel(corpus=CORPUS, num_topics=TOPIC_NUM) doc_topic_matrix = lda.get_document_topics([(0, 1), (1, 1)]) term_topic_matrix = lda.get_term_topics(1) topic_term_matrix = lda.get_topic_terms(1)