Exemple #1
0
def test_lda_inference_gd():
    rng = np.random.RandomState(2)
    n_topics = rng.randint(15, 20)
    n_words = rng.randint(400, 500)
    mean_words = 10
    min_words = 3
    doc_topic_prior = 1.
    topic_word_prior = 1.
    n_doc = rng.randint(100, 200)

    gen = LdaSampleGenerator(n_topics=n_topics,
                             n_words=n_words,
                             min_doc_size=min_words,
                             mean_doc_size=mean_words,
                             doc_topic_prior=doc_topic_prior,
                             topic_word_prior=topic_word_prior,
                             random_state=0)

    _, doc_word_mtx = gen.generate_documents(n_doc)
    alpha = gen.doc_topic_prior_
    beta = gen.topic_word_distr_

    docs_distr_uniform = np.ones((n_doc, n_topics)) / n_topics
    ll_uniform = doc_likelihood(doc_word_mtx, docs_distr_uniform, alpha, beta)
    inference_1 = lda_inference_gd(doc_word_mtx, alpha, beta, max_iter=5)
    ones = np.ones(n_doc)
    assert_array_almost_equal(ones, inference_1.sum(axis=1))
    ll_inference_1 = doc_likelihood(doc_word_mtx, inference_1, alpha, beta)

    inference_2 = lda_inference_gd(doc_word_mtx, alpha, beta, max_iter=1000)
    assert_array_almost_equal(ones, inference_2.sum(axis=1))
    ll_inference_2 = doc_likelihood(doc_word_mtx, inference_2, alpha, beta)
    assert_greater(ll_inference_2, ll_inference_1)
    assert_greater(ll_inference_1, ll_uniform)
Exemple #2
0
def test_doc_likelihood():
    rng = np.random.RandomState(2)
    n_topics = rng.randint(15, 20)
    n_words = rng.randint(400, 500)
    mean_words = 10
    min_words = 3
    doc_topic_prior = 1.
    topic_word_prior = 1.
    n_doc = rng.randint(100, 200)

    gen = LdaSampleGenerator(n_topics=n_topics,
                             n_words=n_words,
                             min_doc_size=min_words,
                             mean_doc_size=mean_words,
                             doc_topic_prior=doc_topic_prior,
                             topic_word_prior=topic_word_prior,
                             random_state=0)

    docs_distr, doc_word_mtx = gen.generate_documents(n_doc)
    alpha = gen.doc_topic_prior_
    beta = gen.topic_word_distr_

    doc_ll_true = doc_likelihood(doc_word_mtx, docs_distr, alpha, beta)

    # uniform distribution
    docs_distr_uniform = np.ones((n_doc, n_topics)) / n_topics
    doc_ll_uniform = doc_likelihood(doc_word_mtx, docs_distr_uniform, alpha,
                                    beta)
    assert_greater(doc_ll_true, doc_ll_uniform)
Exemple #3
0
def test_lda_inference_vi():
    rng = np.random.RandomState(4)
    n_topics = rng.randint(15, 20)
    n_words = rng.randint(400, 500)
    mean_words = 10
    min_words = 3
    doc_topic_prior = 1.
    topic_word_prior = 1.
    smooth_param = 0.01
    n_doc = rng.randint(100, 200)

    gen = LdaSampleGenerator(n_topics=n_topics,
                             n_words=n_words,
                             min_doc_size=min_words,
                             mean_doc_size=mean_words,
                             doc_topic_prior=doc_topic_prior,
                             topic_word_prior=topic_word_prior,
                             random_state=2)

    _, doc_word_mtx = gen.generate_documents(n_doc)
    alpha = gen.doc_topic_prior_
    beta = gen.topic_word_distr_

    smooth_beta = (1. - smooth_param) * beta + (smooth_param / n_words)

    docs_distr_uniform = np.ones((n_doc, n_topics)) / n_topics
    ll_uniform = doc_likelihood(doc_word_mtx, docs_distr_uniform, alpha, beta)

    inference_1 = lda_inference_vi(doc_word_mtx,
                                   alpha,
                                   smooth_beta,
                                   max_iter=5)
    inference_1 /= inference_1.sum(axis=1)[:, np.newaxis]

    ll_inference_1 = doc_likelihood(doc_word_mtx, inference_1, alpha, beta)

    inference_2 = lda_inference_vi(doc_word_mtx,
                                   alpha,
                                   smooth_beta,
                                   max_iter=1000)
    inference_2 /= inference_2.sum(axis=1)[:, np.newaxis]
    ll_inference_2 = doc_likelihood(doc_word_mtx, inference_2, alpha, beta)
    assert_greater(ll_inference_2, ll_inference_1)
    assert_greater(ll_inference_1, ll_uniform)