예제 #1
0
 def test_var_with_ddof(self):
     x = np.random.uniform(0, 10, (20, 100))
     for axis in [None, 0, 1]:
         np.testing.assert_almost_equal(
             np.var(x, axis=axis, ddof=10),
             var(csr_matrix(x), axis=axis, ddof=10),
         )
예제 #2
0
 def test_var_with_ddof(self):
     x = np.random.uniform(0, 10, (20, 100))
     for axis in [None, 0, 1]:
         np.testing.assert_almost_equal(
             np.var(x, axis=axis, ddof=10),
             var(csr_matrix(x), axis=axis, ddof=10),
         )
예제 #3
0
 def test_var(self):
     for data in self.data:
         for axis in chain((None,), range(len(data.shape))):
             # Can't use array_equal here due to differences on 1e-16 level
             np.testing.assert_array_almost_equal(
                 var(csr_matrix(data), axis=axis),
                 np.var(data, axis=axis)
             )
예제 #4
0
 def test_var(self):
     for data in self.data:
         for axis in chain((None,), range(len(data.shape))):
             # Can't use array_equal here due to differences on 1e-16 level
             np.testing.assert_array_almost_equal(
                 var(csr_matrix(data), axis=axis),
                 np.var(data, axis=axis)
             )
예제 #5
0
파일: pca.py 프로젝트: tchen0123/orange3
    def _fit_truncated(self, X, n_components, svd_solver):
        """Fit the model by computing truncated SVD (by ARPACK or randomized) on X"""
        n_samples, n_features = X.shape

        if isinstance(n_components, six.string_types):
            raise ValueError(
                "n_components=%r cannot be a string with svd_solver='%s'" %
                (n_components, svd_solver)
            )
        elif not 1 <= n_components <= min(n_samples, n_features):
            raise ValueError(
                "n_components=%r must be between 1 and min(n_samples, "
                "n_features)=%r with svd_solver='%s'" % (
                    n_components, min(n_samples, n_features), svd_solver
                )
            )
        elif not isinstance(n_components, (numbers.Integral, np.integer)):
            raise ValueError(
                "n_components=%r must be of type int when greater than or "
                "equal to 1, was of type=%r" % (n_components, type(n_components))
            )
        elif svd_solver == "arpack" and n_components == min(n_samples, n_features):
            raise ValueError(
                "n_components=%r must be strictly less than min(n_samples, "
                "n_features)=%r with svd_solver='%s'" % (
                    n_components, min(n_samples, n_features), svd_solver
                )
            )

        random_state = check_random_state(self.random_state)

        self.mean_ = X.mean(axis=0)
        total_var = ut.var(X, axis=0, ddof=1)

        if svd_solver == "arpack":
            # Center data
            X -= self.mean_
            # random init solution, as ARPACK does it internally
            v0 = random_state.uniform(-1, 1, size=min(X.shape))
            U, S, V = sp.linalg.svds(X, k=n_components, tol=self.tol, v0=v0)
            # svds doesn't abide by scipy.linalg.svd/randomized_svd
            # conventions, so reverse its outputs.
            S = S[::-1]
            # flip eigenvectors' sign to enforce deterministic output
            U, V = svd_flip(U[:, ::-1], V[::-1])

        elif svd_solver == "randomized":
            # sign flipping is done inside
            U, S, V = randomized_pca(
                X,
                n_components=n_components,
                n_iter=self.iterated_power,
                flip_sign=True,
                random_state=random_state,
            )

        self.n_samples_, self.n_features_ = n_samples, n_features
        self.components_ = V
        self.n_components_ = n_components

        # Get variance explained by singular values
        self.explained_variance_ = (S ** 2) / (n_samples - 1)
        self.explained_variance_ratio_ = self.explained_variance_ / total_var.sum()
        self.singular_values_ = S.copy()  # Store the singular values.

        if self.n_components_ < min(n_features, n_samples):
            self.noise_variance_ = (total_var.sum() - self.explained_variance_.sum())
            self.noise_variance_ /= min(n_features, n_samples) - n_components
        else:
            self.noise_variance_ = 0

        return U, S, V