def test_lda_transform_mismatch(): # test `n_features` mismatch in partial_fit and transform rng = np.random.RandomState(0) X = rng.randint(4, size=(20, 10)) X_2 = rng.randint(4, size=(10, 8)) n_components = rng.randint(3, 6) lda = LatentDirichletAllocation(n_components=n_components, random_state=rng) lda.partial_fit(X) with pytest.raises(ValueError, match=r"^The provided data has"): lda.partial_fit(X_2)
def test_lda_partial_fit_dim_mismatch(): # test `n_features` mismatch in `partial_fit` rng = np.random.RandomState(0) n_components = rng.randint(3, 6) n_col = rng.randint(6, 10) X_1 = np.random.randint(4, size=(10, n_col)) X_2 = np.random.randint(4, size=(10, n_col + 1)) lda = LatentDirichletAllocation(n_components=n_components, learning_offset=5., total_samples=20, random_state=rng) lda.partial_fit(X_1) with pytest.raises(ValueError, match=r"^The provided data has"): lda.partial_fit(X_2)
def test_lda_partial_fit_multi_jobs(): # Test LDA online training with multi CPU rng = np.random.RandomState(0) n_components, X = _build_sparse_mtx() lda = LatentDirichletAllocation(n_components=n_components, n_jobs=2, learning_offset=5., total_samples=30, random_state=rng) for i in range(2): lda.partial_fit(X) correct_idx_grps = [(0, 1, 2), (3, 4, 5), (6, 7, 8)] for c in lda.components_: top_idx = set(c.argsort()[-3:][::-1]) assert tuple(sorted(top_idx)) in correct_idx_grps
def test_lda_partial_fit(): # Test LDA online learning (`partial_fit` method) # (same as test_lda_batch) rng = np.random.RandomState(0) n_components, X = _build_sparse_mtx() lda = LatentDirichletAllocation(n_components=n_components, learning_offset=10., total_samples=100, random_state=rng) for i in range(3): lda.partial_fit(X) correct_idx_grps = [(0, 1, 2), (3, 4, 5), (6, 7, 8)] for c in lda.components_: top_idx = set(c.argsort()[-3:][::-1]) assert tuple(sorted(top_idx)) in correct_idx_grps