Python LatentDirichletAllocation._e_step Examples

Programming Language: Python

Namespace/Package Name: sklearn.decomposition

Method/Function: _e_step

Examples at hotexamples.com: 2

Python LatentDirichletAllocation._e_step - 2 examples found. These are the top rated real world Python examples of sklearn.decomposition.LatentDirichletAllocation._e_step extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

LatentDirichletAllocation(30)

fit_transform(30)

score(30)

perplexity(30)

fit(30)

get_params(21)

partial_fit(17)

set_params(6)

_e_step(2)

print_topics(2)

get_document_topics(2)

predict(2)

print_topic(1)

n_jobs(1)

rename(1)

reset_index(1)

save(1)

show_topics(1)

n_topics(1)

get_perplexity(1)

learn(1)

isnull(1)

head(1)

get_feature_names_out(1)

get_feature_names(1)

dump(1)

decision_function(1)

components_(1)

compile(1)

columns(1)

append(1)

add_prefix(1)

_unnormalized_transform(1)

_perplexity_precomp_distr(1)

tolist(1)

Example #1

Show file

File: generate_topics.py Project: visargbsoneji/bigscholarlydata

def generate_topics():
    db, cursor = dbConnect()
    for domain in c.domains:
        start_time = time.time()
        papers, tf, feature_names = load_corpus(domain, db)
        #lda,feature_names=load_model(domain,c.domain_topics[domain])
        lda = LatentDirichletAllocation(n_topics=c.domain_topics[domain],
                                        max_iter=5,
                                        learning_method='online',
                                        learning_offset=50.,
                                        random_state=0)
        lda.fit(tf)
        #---------- MODEL EVALUATION PARAMETERS --------------------------
        perplexity1 = lda.perplexity(tf)
        perplexity2 = lda.perplexity(tf, lda._e_step(tf, False, False)[0])
        score = lda.score(tf, lda._e_step(tf, False, False)[0])
        topic_paper_dist = lda.transform(tf)
        print "for", c.domain_topics[
            domain], domain, "topics ==> perplexity:", perplexity2, "log likelihood:", score

        save_model(lda, domain, c.domain_topics[domain], feature_names)
        #lda,feature_names=load_model(domain,c.domain_topics[domain])
        store_in_db(db, lda, topic_paper_dist, papers, feature_names, domain)
        print "--- time for " + domain + ": " + str(
            (time.time() - start_time) / 60) + " minutes ---"

Example #2

Show file

    termfreq_vectorizer = CountVectorizer()
    texts_vectored = termfreq_vectorizer.fit_transform(texts)

    log.info("learning lda model")
    NORMALIZED = True
    lda_model = LatentDirichletAllocation(n_topics=n_topics,
                                          learning_method='batch',
                                          evaluate_every=10,
                                          n_jobs=2,
                                          verbose=10,
                                          doc_topic_prior=None,
                                          topic_word_prior=None)
    if NORMALIZED:
        log.debug("fitting normalized")
        content_lda = lda_model.fit_transform(texts_vectored)
    else:
        log.debug("fitting ")
        lda_model.fit(texts_vectored)
        content_lda, _ = lda_model._e_step(texts_vectored, cal_sstats=False, random_init=False)
    log.debug("components_ shape: {}".format(lda_model.components_.shape))
    log.debug("content_lda shape: {}".format(content_lda.shape))

    dump_course_topic_distribs(course_ids, file_names, content_lda,
                               'lda_course_topic_distribs_{}.tsv'.format(n_topics))

    dump_topic_word_distribs(lda_model, termfreq_vectorizer,
                             'lda_topic_word_distribs_{}.tsv'.format(n_topics),
                             threshold=0.25)