def test_init_arpack_v0(seed):
    # check that the initalization a sampling from an uniform distribution
    # where we can fix the random state
    size = 1000
    v0 = _init_arpack_v0(size, seed)

    rng = check_random_state(seed)
    assert_allclose(v0, rng.uniform(-1, 1, size=size))
Example #2
0
    def _decompose_truncated(self, mat):

        if not 1 <= self.n_components <= self.n_samples_:
            raise ValueError("n_components=%r must be between 1 and "
                             "n_samples=%r with "
                             "svd_solver='%s'" % (
                                 self.n_components,
                                 self.n_samples_,
                                 self.svd_solver,
                             ))
        elif not isinstance(self.n_components, numbers.Integral):
            raise ValueError(
                "n_components=%r must be of type int "
                "when greater than or equal to 1, was of type=%r" %
                (self.n_components, type(self.n_components)))
        elif self.svd_solver == "arpack" and self.n_components == self.n_samples_:
            raise ValueError("n_components=%r must be strictly less than "
                             "n_samples=%r with "
                             "svd_solver='%s'" % (
                                 self.n_components,
                                 self.n_samples_,
                                 self.svd_solver,
                             ))

        random_state = check_random_state(self.random_state)

        if self._fit_svd_solver == "arpack":
            v0 = _init_arpack_v0(min(mat.shape), random_state)
            U, S, Vt = svds(mat, k=self.n_components, tol=self.tol, v0=v0)
            # svds doesn't abide by scipy.linalg.svd/randomized_svd
            # conventions, so reverse its outputs.
            S = S[::-1]
            # flip eigenvectors' sign to enforce deterministic output
            U, Vt = svd_flip(U[:, ::-1], Vt[::-1])

        # We have already eliminated all other solvers, so this must be "randomized"
        else:
            # sign flipping is done inside
            U, S, Vt = randomized_svd(
                mat,
                n_components=self.n_components,
                n_iter=self.iterated_power,
                flip_sign=True,
                random_state=random_state,
            )

        U[:, S < self.tol] = 0.0
        Vt[S < self.tol] = 0.0
        S[S < self.tol] = 0.0

        return U, S, Vt
Example #3
0
def norm_diff(A, norm=2, msg=True, random_state=None):
    """
    Compute the norm diff with the original matrix, when randomized
    SVD is called with *params.

    norm: 2 => spectral; 'fro' => Frobenius
    """

    if msg:
        print("... computing %s norm ..." % norm)
    if norm == 2:
        # s = sp.linalg.norm(A, ord=2)  # slow
        v0 = _init_arpack_v0(min(A.shape), random_state)
        value = sp.sparse.linalg.svds(A, k=1, return_singular_vectors=False, v0=v0)
    else:
        if sp.sparse.issparse(A):
            value = sp.sparse.linalg.norm(A, ord=norm)
        else:
            value = sp.linalg.norm(A, ord=norm)
    return value
Example #4
0
def test_randomized_eigsh_compared_to_others(k):
    """Check that `_randomized_eigsh` is similar to other `eigsh`

    Tests that for a random PSD matrix, `_randomized_eigsh` provides results
    comparable to LAPACK (scipy.linalg.eigh) and ARPACK
    (scipy.sparse.linalg.eigsh).

    Note: some versions of ARPACK do not support k=n_features.
    """

    # make a random PSD matrix
    n_features = 200
    X = make_sparse_spd_matrix(n_features, random_state=0)

    # compare two versions of randomized
    # rough and fast
    eigvals, eigvecs = _randomized_eigsh(
        X, n_components=k, selection="module", n_iter=25, random_state=0
    )
    # more accurate but slow (TODO find realistic settings here)
    eigvals_qr, eigvecs_qr = _randomized_eigsh(
        X,
        n_components=k,
        n_iter=25,
        n_oversamples=20,
        random_state=0,
        power_iteration_normalizer="QR",
        selection="module",
    )

    # with LAPACK
    eigvals_lapack, eigvecs_lapack = linalg.eigh(
        X, eigvals=(n_features - k, n_features - 1)
    )
    indices = eigvals_lapack.argsort()[::-1]
    eigvals_lapack = eigvals_lapack[indices]
    eigvecs_lapack = eigvecs_lapack[:, indices]

    # -- eigenvalues comparison
    assert eigvals_lapack.shape == (k,)
    # comparison precision
    assert_array_almost_equal(eigvals, eigvals_lapack, decimal=6)
    assert_array_almost_equal(eigvals_qr, eigvals_lapack, decimal=6)

    # -- eigenvectors comparison
    assert eigvecs_lapack.shape == (n_features, k)
    # flip eigenvectors' sign to enforce deterministic output
    dummy_vecs = np.zeros_like(eigvecs).T
    eigvecs, _ = svd_flip(eigvecs, dummy_vecs)
    eigvecs_qr, _ = svd_flip(eigvecs_qr, dummy_vecs)
    eigvecs_lapack, _ = svd_flip(eigvecs_lapack, dummy_vecs)
    assert_array_almost_equal(eigvecs, eigvecs_lapack, decimal=4)
    assert_array_almost_equal(eigvecs_qr, eigvecs_lapack, decimal=6)

    # comparison ARPACK ~ LAPACK (some ARPACK implems do not support k=n)
    if k < n_features:
        v0 = _init_arpack_v0(n_features, random_state=0)
        # "LA" largest algebraic <=> selection="value" in randomized_eigsh
        eigvals_arpack, eigvecs_arpack = eigsh(
            X, k, which="LA", tol=0, maxiter=None, v0=v0
        )
        indices = eigvals_arpack.argsort()[::-1]
        # eigenvalues
        eigvals_arpack = eigvals_arpack[indices]
        assert_array_almost_equal(eigvals_lapack, eigvals_arpack, decimal=10)
        # eigenvectors
        eigvecs_arpack = eigvecs_arpack[:, indices]
        eigvecs_arpack, _ = svd_flip(eigvecs_arpack, dummy_vecs)
        assert_array_almost_equal(eigvecs_arpack, eigvecs_lapack, decimal=8)