proba(term | topic) = beta[topic][term] We shall for each topic find the top 20 words that contribute to a document being classified as said topic """ top_20_per_topic = np.argsort(self.beta * (-1), axis=1) for i in range(self.nb_topics): for j in range(self.nb_terms): if top_20_per_topic[i][j] < 20: print(self.index[j], end=" ") print() if __name__ == "__main__": """ Example of application using newsgroups """ from sklearn.datasets import fetch_20newsgroups train = fetch_20newsgroups(subset='train', remove=('headers', 'footers', 'quotes')) pp = Preprocessing() index, bow = pp.build_bow(pp.corpus_preproc(train["data"])) lda = LDA(5, bow, index, alpha=0.1, set_alpha=True) lda.estimation(max_iter_em=100, max_iter_var=10) lda.display_word_topic_association()