Beispiel #1
0
# stemming the words
paragraphs = hf.stem(paragraphs)

dictionary = gensim.corpora.Dictionary(paragraphs)

# get stopwords
stopIds = hf.get_stop_wordids(stopwords, dictionary)

dictionary.filter_tokens(stopIds)
bags = []
for p in paragraphs:
    bags.append(dictionary.doc2bow(p))

tfidf_model = gensim.models.TfidfModel(bags)
tfidf_corpus = tfidf_model[bags]
idf_matrix = gensim.similarities.MatrixSimilarity(tfidf_corpus)

lsi_model = gensim.models.LsiModel(tfidf_corpus, id2word=dictionary, num_topics=100)
lsi_corpus = lsi_model[bags]
lsi_matrix = gensim.similarities.MatrixSimilarity(lsi_corpus)

print(lsi_model.show_topics())

"""
t = Tasks()
t.task_one()
t.task_two()
t.task_three()
t.task_four()