Esempio n. 1
0
def test_pca_bad_solver():
    X = np.random.RandomState(0).rand(5, 4)
    pca = dd.PCA(n_components=3, svd_solver="bad_argument")
    assert_raises(ValueError, pca.fit, X)
Esempio n. 2
0
def test_whitening():
    # Check that PCA output has unit-variance
    rng = np.random.RandomState(0)
    n_samples = 100
    n_features = 80
    n_components = 30
    rank = 50

    # some low rank data with correlated features
    X = np.dot(
        rng.randn(n_samples, rank),
        np.dot(np.diag(np.linspace(10.0, 1.0, rank)),
               rng.randn(rank, n_features)),
    )
    # the component-wise variance of the first 50 features is 3 times the
    # mean component-wise variance of the remaining 30 features
    X[:, :50] *= 3

    assert X.shape == (n_samples, n_features)

    # the component-wise variance is thus highly varying:
    assert X.std(axis=0).std() > 43.8
    dX = da.from_array(X, chunks=(50, n_features))

    for solver, copy in product(solver_list, (True, False)):
        # whiten the data while projecting to the lower dim subspace
        X_ = dX.copy()  # make sure we keep an original across iterations.
        pca = dd.PCA(
            n_components=n_components,
            whiten=True,
            copy=copy,
            svd_solver=solver,
            random_state=0,
            iterated_power=4,
        )
        # test fit_transform
        X_whitened = pca.fit_transform(X_.copy())
        assert X_whitened.shape == (n_samples, n_components)
        # X_whitened2 = pca.transform(X_)
        # XXX: These differ for randomized.
        # assert_eq(X_whitened.compute(), X_whitened2.compute(),
        #           atol=tol, rtol=tol)

        assert_almost_equal(X_whitened.std(ddof=1, axis=0),
                            np.ones(n_components),
                            decimal=6)
        assert_almost_equal(X_whitened.mean(axis=0), np.zeros(n_components))

        X_ = dX.copy()
        pca = dd.PCA(
            n_components=n_components,
            whiten=False,
            copy=copy,
            svd_solver=solver,
            random_state=0,
        ).fit(X_)
        X_unwhitened = pca.transform(X_)
        assert X_unwhitened.shape == (n_samples, n_components)

        # in that case the output components still have varying variances
        assert_almost_equal(X_unwhitened.std(axis=0).std(), 74.1, 1)
Esempio n. 3
0
def test_basic():
    a = dd.PCA()
    b = sd.PCA()
    a.fit(dX)
    b.fit(X)
    assert_estimator_equal(a, b)