Esempio n. 1
0
def train_lda(data=SST_KAGGLE, num_topics=30, save_model=True):
    documents = get_documents(data=data)
    corpus = MyCorpus(documents=documents)
    lda = models.LdaMulticore(corpus, id2word=corpus.dictionary, num_topics=num_topics, workers=2, chunksize=10000,
                              iterations=100)
    if save_model:
        fname = Path(lda_pickled_path + data + ".pkl")
        lda.save(fname=fname)
Esempio n. 2
0
def train_lsi(data=SST_KAGGLE, num_topics=100, save_model=True):
    documents = get_documents(data=data)
    corpus = MyCorpus(documents=documents)
    tfidf = models.TfidfModel(corpus)
    corpus_tfidf = tfidf[corpus]
    lda = models.LsiModel(corpus_tfidf, id2word=corpus.dictionary, num_topics=num_topics, chunksize=10000)
    if save_model:
        fname = Path(lda_pickled_path + data + ".pkl")
        lda.save(fname=fname)