def train_lda(data=SST_KAGGLE, num_topics=30, save_model=True): documents = get_documents(data=data) corpus = MyCorpus(documents=documents) lda = models.LdaMulticore(corpus, id2word=corpus.dictionary, num_topics=num_topics, workers=2, chunksize=10000, iterations=100) if save_model: fname = Path(lda_pickled_path + data + ".pkl") lda.save(fname=fname)
def train_lsi(data=SST_KAGGLE, num_topics=100, save_model=True): documents = get_documents(data=data) corpus = MyCorpus(documents=documents) tfidf = models.TfidfModel(corpus) corpus_tfidf = tfidf[corpus] lda = models.LsiModel(corpus_tfidf, id2word=corpus.dictionary, num_topics=num_topics, chunksize=10000) if save_model: fname = Path(lda_pickled_path + data + ".pkl") lda.save(fname=fname)