def test_pca_score_consistency_solvers(svd_solver): # Check the consistency of score between solvers X, _ = datasets.load_digits(return_X_y=True) pca_full = PCA(n_components=30, svd_solver='full', random_state=0) pca_other = PCA(n_components=30, svd_solver=svd_solver, random_state=0) pca_full.fit(X) pca_other.fit(X) assert_allclose(pca_full.score(X), pca_other.score(X), rtol=5e-6)
def test_pca_score(svd_solver): # Test that probabilistic PCA scoring yields a reasonable score n, p = 1000, 3 rng = np.random.RandomState(0) X = rng.randn(n, p) * .1 + np.array([3, 4, 5]) pca = PCA(n_components=2, svd_solver=svd_solver) pca.fit(X) ll1 = pca.score(X) h = -0.5 * np.log(2 * np.pi * np.exp(1) * 0.1**2) * p assert_allclose(ll1 / h, 1, rtol=5e-2) ll2 = pca.score(rng.randn(n, p) * .2 + np.array([3, 4, 5])) assert ll1 > ll2 pca = PCA(n_components=2, whiten=True, svd_solver=svd_solver) pca.fit(X) ll2 = pca.score(X) assert ll1 > ll2
def test_pca_score3(): # Check that probabilistic PCA selects the right model n, p = 200, 3 rng = np.random.RandomState(0) Xl = (rng.randn(n, p) + rng.randn(n, 1) * np.array([3, 4, 5]) + np.array([1, 0, 7])) Xt = (rng.randn(n, p) + rng.randn(n, 1) * np.array([3, 4, 5]) + np.array([1, 0, 7])) ll = np.zeros(p) for k in range(p): pca = PCA(n_components=k, svd_solver='full') pca.fit(Xl) ll[k] = pca.score(Xt) assert ll.argmax() == 1