# stemming the words paragraphs = hf.stem(paragraphs) dictionary = gensim.corpora.Dictionary(paragraphs) # get stopwords stopIds = hf.get_stop_wordids(stopwords, dictionary) dictionary.filter_tokens(stopIds) bags = [] for p in paragraphs: bags.append(dictionary.doc2bow(p)) tfidf_model = gensim.models.TfidfModel(bags) tfidf_corpus = tfidf_model[bags] idf_matrix = gensim.similarities.MatrixSimilarity(tfidf_corpus) lsi_model = gensim.models.LsiModel(tfidf_corpus, id2word=dictionary, num_topics=100) lsi_corpus = lsi_model[bags] lsi_matrix = gensim.similarities.MatrixSimilarity(lsi_corpus) print(lsi_model.show_topics()) """ t = Tasks() t.task_one() t.task_two() t.task_three() t.task_four()