def test_var_with_ddof(self): x = np.random.uniform(0, 10, (20, 100)) for axis in [None, 0, 1]: np.testing.assert_almost_equal( np.var(x, axis=axis, ddof=10), var(csr_matrix(x), axis=axis, ddof=10), )
def test_var(self): for data in self.data: for axis in chain((None,), range(len(data.shape))): # Can't use array_equal here due to differences on 1e-16 level np.testing.assert_array_almost_equal( var(csr_matrix(data), axis=axis), np.var(data, axis=axis) )
def _fit_truncated(self, X, n_components, svd_solver): """Fit the model by computing truncated SVD (by ARPACK or randomized) on X""" n_samples, n_features = X.shape if isinstance(n_components, six.string_types): raise ValueError( "n_components=%r cannot be a string with svd_solver='%s'" % (n_components, svd_solver) ) elif not 1 <= n_components <= min(n_samples, n_features): raise ValueError( "n_components=%r must be between 1 and min(n_samples, " "n_features)=%r with svd_solver='%s'" % ( n_components, min(n_samples, n_features), svd_solver ) ) elif not isinstance(n_components, (numbers.Integral, np.integer)): raise ValueError( "n_components=%r must be of type int when greater than or " "equal to 1, was of type=%r" % (n_components, type(n_components)) ) elif svd_solver == "arpack" and n_components == min(n_samples, n_features): raise ValueError( "n_components=%r must be strictly less than min(n_samples, " "n_features)=%r with svd_solver='%s'" % ( n_components, min(n_samples, n_features), svd_solver ) ) random_state = check_random_state(self.random_state) self.mean_ = X.mean(axis=0) total_var = ut.var(X, axis=0, ddof=1) if svd_solver == "arpack": # Center data X -= self.mean_ # random init solution, as ARPACK does it internally v0 = random_state.uniform(-1, 1, size=min(X.shape)) U, S, V = sp.linalg.svds(X, k=n_components, tol=self.tol, v0=v0) # svds doesn't abide by scipy.linalg.svd/randomized_svd # conventions, so reverse its outputs. S = S[::-1] # flip eigenvectors' sign to enforce deterministic output U, V = svd_flip(U[:, ::-1], V[::-1]) elif svd_solver == "randomized": # sign flipping is done inside U, S, V = randomized_pca( X, n_components=n_components, n_iter=self.iterated_power, flip_sign=True, random_state=random_state, ) self.n_samples_, self.n_features_ = n_samples, n_features self.components_ = V self.n_components_ = n_components # Get variance explained by singular values self.explained_variance_ = (S ** 2) / (n_samples - 1) self.explained_variance_ratio_ = self.explained_variance_ / total_var.sum() self.singular_values_ = S.copy() # Store the singular values. if self.n_components_ < min(n_features, n_samples): self.noise_variance_ = (total_var.sum() - self.explained_variance_.sum()) self.noise_variance_ /= min(n_features, n_samples) - n_components else: self.noise_variance_ = 0 return U, S, V