コード例 #1
0
ファイル: test_hdp.py プロジェクト: chyikwei/bnp
def test_hdp_dense_input():
    """Dense and sparse input should be the same"""

    X = make_uniform_doc_word_matrix(n_topics=10,
                                     words_per_topic=3,
                                     docs_per_topic=3)
    dense_X = X.todense()
    array_X = X.toarray()

    params = {
        'n_topic_truncate': 20,
        'n_doc_truncate': 5,
        'learning_method': 'batch',
        'max_iter': 10,
        'random_state': 1,
    }
    hdp1 = HierarchicalDirichletProcess(**params)
    transformed_1 = hdp1.fit_transform(dense_X)

    hdp2 = HierarchicalDirichletProcess(**params)
    transformed_2 = hdp2.fit_transform(array_X)

    hdp3 = HierarchicalDirichletProcess(**params)
    transformed_3 = hdp3.fit_transform(X)

    assert_almost_equal(transformed_1, transformed_2)
    assert_almost_equal(transformed_2, transformed_3)
コード例 #2
0
ファイル: test_hdp.py プロジェクト: chyikwei/bnp
def test_likelihood_check():
    """Test enable doc_likelihood check

    The result should be the same no matter it
    is True or False.
    """
    X = make_uniform_doc_word_matrix(n_topics=10,
                                     words_per_topic=3,
                                     docs_per_topic=3)

    params = {
        'n_topic_truncate': 20,
        'n_doc_truncate': 5,
        'learning_method': 'batch',
        'max_iter': 10,
        'random_state': 1,
        'check_doc_likelihood': True,
        'evaluate_every': 1,
    }
    hdp1 = HierarchicalDirichletProcess(**params)
    ret1 = hdp1.fit_transform(X)

    params['check_doc_likelihood'] = False
    hdp2 = HierarchicalDirichletProcess(**params)
    ret2 = hdp2.fit_transform(X)
    assert_almost_equal(ret1, ret2)
コード例 #3
0
 def test_uniform_diag_matrix(self):
     """Test diag matrix with make uniform matrix"""
     n_topics = self.rand.randint(100, 200)
     params = {
         'n_topics': n_topics,
         'words_per_topic': 1,
         'docs_per_topic': 1,
     }
     matrix = make_uniform_doc_word_matrix(**params)
     dense = matrix.toarray()
     assert_array_equal(dense, np.eye(n_topics))
コード例 #4
0
 def test_shuffle_uniform_diag_matrix(self):
     """Test suffle diag matrix with make uniform matrix"""
     n_topics = self.rand.randint(100, 200)
     params = {
         'n_topics': n_topics,
         'words_per_topic': 1,
         'docs_per_topic': 1,
         'shuffle': True
     }
     matrix = make_uniform_doc_word_matrix(**params)
     dense = matrix.toarray()
     diag_shift = False
     for idx in range(n_topics):
         if dense[idx, idx] < 1.:
             diag_shift = True
             break
     assert_true(diag_shift)
コード例 #5
0
    def test_make_uniform_matrix(self):
        """Test words per document
        """
        n_topics = self.rand.randint(100, 200)
        words_per_topic = self.rand.randint(10, 20)
        docs_per_topic = self.rand.randint(100, 2000)

        params = {
            'n_topics': n_topics,
            'words_per_topic': words_per_topic,
            'docs_per_topic': docs_per_topic,
        }
        matrix = make_uniform_doc_word_matrix(**params)
        dense = matrix.toarray()
        assert_equal(dense.shape[0], n_topics * docs_per_topic)
        assert_equal(dense.shape[1], n_topics * words_per_topic)
        row_sum = np.sum(dense, axis=1)
        doc_topics = np.repeat(words_per_topic, n_topics * docs_per_topic)
        assert_array_equal(row_sum, doc_topics)
コード例 #6
0
ファイル: test_hdp.py プロジェクト: chyikwei/bnp
def test_hdp_transform():
    """Test HDP transform"""

    X = make_uniform_doc_word_matrix(n_topics=10,
                                     words_per_topic=3,
                                     docs_per_topic=3)

    params = {
        'n_topic_truncate': 20,
        'n_doc_truncate': 5,
        'learning_method': 'batch',
        'max_iter': 10,
    }
    hdp = HierarchicalDirichletProcess(**params)

    assert_raises_regexp(NotFittedError, r"^no 'lambda_' attribute",
                         hdp.transform, X)
    hdp.fit(X)
    transformed = hdp.transform(X)
    assert_equal(transformed.shape[0], X.shape[0])
    assert_equal(transformed.shape[1], 20)
コード例 #7
0
ファイル: test_hdp.py プロジェクト: chyikwei/bnp
def test_hdp_fit_transform():
    """Test HDP fit_transform"""

    X = make_uniform_doc_word_matrix(n_topics=10,
                                     words_per_topic=3,
                                     docs_per_topic=3)

    params = {
        'n_topic_truncate': 20,
        'n_doc_truncate': 5,
        'learning_method': 'batch',
        'max_iter': 10,
        'random_state': 1,
    }
    hdp1 = HierarchicalDirichletProcess(**params)
    hdp1.fit(X)
    transformed_1 = hdp1.transform(X)

    hdp2 = HierarchicalDirichletProcess(**params)
    transformed_2 = hdp2.fit_transform(X)
    assert_almost_equal(transformed_1, transformed_2)
コード例 #8
0
ファイル: test_hdp.py プロジェクト: chyikwei/bnp
def test_hdp_invalid_parameters():
    """Test HDP Invalid paramters
    """
    X = make_uniform_doc_word_matrix(n_topics=10,
                                     words_per_topic=3,
                                     docs_per_topic=3)

    hdp1 = HierarchicalDirichletProcess(n_topic_truncate=10,
                                        n_doc_truncate=3,
                                        max_iter=-1,
                                        random_state=0)

    assert_raises_regexp(ValueError, r"^Invalid ", hdp1.fit, X)

    hdp2 = HierarchicalDirichletProcess(n_topic_truncate=10,
                                        n_doc_truncate=3,
                                        learning_method='na',
                                        random_state=0)

    assert_raises_regexp(ValueError, r"^Invalid 'learning_method'", hdp2.fit,
                         X)
コード例 #9
0
ファイル: test_hdp.py プロジェクト: chyikwei/bnp
def test_hdp_topic_distribution():
    """Test HDP topic_distribution"""

    X = make_uniform_doc_word_matrix(n_topics=10,
                                     words_per_topic=3,
                                     docs_per_topic=3)

    params = {
        'n_topic_truncate': 20,
        'n_doc_truncate': 5,
        'learning_method': 'batch',
        'max_iter': 10,
        'random_state': 1,
    }
    hdp = HierarchicalDirichletProcess(**params)

    assert_raises_regexp(NotFittedError, r"^no 'lambda_' attribute",
                         hdp.topic_distribution)
    hdp.fit(X)
    topic_distr = hdp.topic_distribution()
    assert_almost_equal(np.sum(topic_distr), 1.0)
コード例 #10
0
ファイル: test_hdp.py プロジェクト: chyikwei/bnp
def test_partial_fit_after_fit():
    """Test run partial_fit after fit

    partial_fit should reset global parameters
    """

    X = make_uniform_doc_word_matrix(n_topics=10,
                                     words_per_topic=3,
                                     docs_per_topic=3)

    params = {
        'n_topic_truncate': 20,
        'n_doc_truncate': 5,
        'learning_method': 'batch',
        'max_iter': 10,
        'random_state': 1,
    }
    hdp1 = HierarchicalDirichletProcess(**params)
    hdp1.fit(X)
    hdp1.partial_fit(X)
    hdp2 = HierarchicalDirichletProcess(**params)
    hdp2.partial_fit(X)
    assert_almost_equal(hdp1.transform(X), hdp2.transform(X))