コード例 #1
0
ファイル: test_hdp.py プロジェクト: chyikwei/bnp
def test_partial_fit_after_fit():
    """Test run partial_fit after fit

    partial_fit should reset global parameters
    """

    X = make_uniform_doc_word_matrix(n_topics=10,
                                     words_per_topic=3,
                                     docs_per_topic=3)

    params = {
        'n_topic_truncate': 20,
        'n_doc_truncate': 5,
        'learning_method': 'batch',
        'max_iter': 10,
        'random_state': 1,
    }
    hdp1 = HierarchicalDirichletProcess(**params)
    hdp1.fit(X)
    hdp1.partial_fit(X)
    hdp2 = HierarchicalDirichletProcess(**params)
    hdp2.partial_fit(X)
    assert_almost_equal(hdp1.transform(X), hdp2.transform(X))
コード例 #2
0
ファイル: test_hdp.py プロジェクト: chyikwei/bnp
def test_hdp_transform():
    """Test HDP transform"""

    X = make_uniform_doc_word_matrix(n_topics=10,
                                     words_per_topic=3,
                                     docs_per_topic=3)

    params = {
        'n_topic_truncate': 20,
        'n_doc_truncate': 5,
        'learning_method': 'batch',
        'max_iter': 10,
    }
    hdp = HierarchicalDirichletProcess(**params)

    assert_raises_regexp(NotFittedError, r"^no 'lambda_' attribute",
                         hdp.transform, X)
    hdp.fit(X)
    transformed = hdp.transform(X)
    assert_equal(transformed.shape[0], X.shape[0])
    assert_equal(transformed.shape[1], 20)
コード例 #3
0
ファイル: test_hdp.py プロジェクト: chyikwei/bnp
def test_hdp_fit_transform():
    """Test HDP fit_transform"""

    X = make_uniform_doc_word_matrix(n_topics=10,
                                     words_per_topic=3,
                                     docs_per_topic=3)

    params = {
        'n_topic_truncate': 20,
        'n_doc_truncate': 5,
        'learning_method': 'batch',
        'max_iter': 10,
        'random_state': 1,
    }
    hdp1 = HierarchicalDirichletProcess(**params)
    hdp1.fit(X)
    transformed_1 = hdp1.transform(X)

    hdp2 = HierarchicalDirichletProcess(**params)
    transformed_2 = hdp2.fit_transform(X)
    assert_almost_equal(transformed_1, transformed_2)
コード例 #4
0
                                   total_samples=1e6,
                                   max_doc_update_iter=200,
                                   verbose=1,
                                   mean_change_tol=1e-3,
                                   random_state=100)

for i in range(5):
    t0 = time()
    print("iter %d" % i)
    suffled_tf = shuffle(tf, random_state=rs)
    hdp.partial_fit(suffled_tf)
    print("done in %0.3fs." % (time() - t0))

print("\nTopics in HDP model:")
tf_feature_names = tf_vectorizer.get_feature_names()
print_top_words(hdp, tf_feature_names, n_top_words)

# top topics in each group
print("\nTop topics in each group:")
train_topics = hdp.transform(tf)
# normalize
train_topics = train_topics / np.sum(train_topics, axis=1)[:, np.newaxis]
for grp_idx, group_name in enumerate(target_names):
    doc_idx = np.where(train_targets == grp_idx)[0]
    mean_doc_topics = np.mean(train_topics[doc_idx, :], axis=0)
    top_idx = mean_doc_topics.argsort()[:-n_top_topics - 1:-1]
    print("group: %s:" % group_name)
    print("top topics: %s" % (", ".join(["#%d (%.3f)" %
          (idx, mean_doc_topics[idx]) for idx in top_idx])))
    print()