print("Topic %d:" % (topic_idx)) print(" ".join([ tf_feature_names[i] for i in topic.argsort()[:-args.n_top_words - 1:-1] ])) for i in topic.argsort()[:-args.n_top_words - 1:-1]: print("%.4f" % (topic[i] / sum(topic)), end=' ') print() else: word2id = {} for idx, word in enumerate(tf_feature_names): word2id[word] = idx docs = {} for doc_idx, doc in enumerate(corpus): doc_list = [word2id[w] for w in doc.split() if w in word2id.keys()] docs[doc_idx] = doc_list print("Begin training.") model = LDA_GIBBS(docs, args.topic, args.iter, tf_feature_names) model.learn() print(model.get_perplexity()) for t_id, t in enumerate(model.phi): print("Topic %d:" % (t_id)) print(" ".join([ tf_feature_names[i] for i in t.argsort()[:-args.n_top_words - 1:-1] ])) for i in t.argsort()[:-args.n_top_words - 1:-1]: print("%.4f" % t[i], end=' ') print()