def _fit_full_daal4py(self, X, n_components): n_samples, n_features = X.shape # due to need to flip components, need to do full decomposition self._fit_daal4py(X, min(n_samples, n_features)) U = self._transform_daal4py(X, whiten=True, check_X=False, scale_eigenvalues=True) V = self.components_ U, V = svd_flip(U, V) U = U.copy() V = V.copy() S = self.singular_values_.copy() if n_components == 'mle': n_components = \ _infer_dimension_(self.explained_variance_, n_samples, n_features) elif 0 < n_components < 1.0: n_components = _n_components_from_fraction( self.explained_variance_ratio_, n_components) # Compute noise covariance using Probabilistic PCA model # The sigma2 maximum likelihood (cf. eq. 12.46) if n_components < min(n_features, n_samples): self.noise_variance_ = self.explained_variance_[n_components:].mean() else: self.noise_variance_ = 0. self.n_samples_, self.n_features_ = n_samples, n_features self.components_ = self.components_[:n_components] self.n_components_ = n_components self.explained_variance_ = self.explained_variance_[:n_components] self.explained_variance_ratio_ = \ self.explained_variance_ratio_[:n_components] self.singular_values_ = self.singular_values_[:n_components] return U, S, V
def _fit_full(self, X, n_components): """Fit the model by computing full SVD on X""" n_samples, n_features = X.shape _validate_n_components(n_components, n_samples, n_features) # Center data self.mean_ = np.mean(X, axis=0) X -= self.mean_ if X.shape[0] > X.shape[1] and (X.dtype == np.float64 or X.dtype == np.float32): U, S, V = _daal4py_svd(X) else: U, S, V = np.linalg.svd(X, full_matrices=False) # flip eigenvectors' sign to enforce deterministic output U, V = svd_flip(U, V) components_ = V # Get variance explained by singular values explained_variance_ = (S**2) / (n_samples - 1) total_var = explained_variance_.sum() explained_variance_ratio_ = explained_variance_ / total_var # Postprocess the number of components required if n_components == 'mle': n_components = \ _infer_dimension_(explained_variance_, n_samples, n_features) elif 0 < n_components < 1.0: # number of components for which the cumulated explained # variance percentage is superior to the desired threshold ratio_cumsum = explained_variance_ratio_.cumsum() n_components = np.searchsorted(ratio_cumsum, n_components) + 1 # Compute noise covariance using Probabilistic PCA model # The sigma2 maximum likelihood (cf. eq. 12.46) if n_components < min(n_features, n_samples): self.noise_variance_ = explained_variance_[n_components:].mean() else: self.noise_variance_ = 0. self.n_samples_, self.n_features_ = n_samples, n_features self.components_ = components_[:n_components] self.n_components_ = n_components self.explained_variance_ = explained_variance_[:n_components] self.explained_variance_ratio_ = \ explained_variance_ratio_[:n_components] self.singular_values_ = S[:n_components] return U, S, V
def _fit_full_vanilla(self, X, n_components): """Fit the model by computing full SVD on X""" n_samples, n_features = X.shape # Center data self.mean_ = np.mean(X, axis=0) X -= self.mean_ U, S, V = np.linalg.svd(X, full_matrices=False) # flip eigenvectors' sign to enforce deterministic output U, V = svd_flip(U, V) components_ = V # Get variance explained by singular values explained_variance_ = (S**2) / (n_samples - 1) total_var = explained_variance_.sum() explained_variance_ratio_ = explained_variance_ / total_var # Postprocess the number of components required if n_components == 'mle': n_components = \ _infer_dimension(explained_variance_, n_samples) elif 0 < n_components < 1.0: n_components = _n_components_from_fraction( explained_variance_ratio_, n_components) # Compute noise covariance using Probabilistic PCA model # The sigma2 maximum likelihood (cf. eq. 12.46) if n_components < min(n_features, n_samples): self.noise_variance_ = explained_variance_[n_components:].mean() else: self.noise_variance_ = 0. self.n_samples_, self.n_features_ = n_samples, n_features self.components_ = components_[:n_components] self.n_components_ = n_components self.explained_variance_ = explained_variance_[:n_components] self.explained_variance_ratio_ = \ explained_variance_ratio_[:n_components] self.singular_values_ = S[:n_components] return U, S, V
def _fit_full_daal4py(self, X, n_components): n_samples, n_features = X.shape # due to need to flip components, need to do full decomposition self._fit_daal4py(X, min(n_samples, n_features)) U = self._transform_daal4py(X, whiten=True, check_X=False, scale_eigenvalues=True) V = self.components_ U, V = svd_flip(U, V) U = U.copy() V = V.copy() S = self.singular_values_.copy() if n_components == 'mle': n_components = \ _infer_dimension_(self.explained_variance_, n_samples, n_features) elif 0 < n_components < 1.0: # number of components for which the cumulated explained # variance percentage is superior to the desired threshold ratio_cumsum = stable_cumsum(self.explained_variance_ratio_) n_components = np.searchsorted(ratio_cumsum, n_components) + 1 # Compute noise covariance using Probabilistic PCA model # The sigma2 maximum likelihood (cf. eq. 12.46) if n_components < min(n_features, n_samples): self.noise_variance_ = self.explained_variance_[ n_components:].mean() else: self.noise_variance_ = 0. self.n_samples_, self.n_features_ = n_samples, n_features self.components_ = self.components_[:n_components] self.n_components_ = n_components self.explained_variance_ = self.explained_variance_[:n_components] self.explained_variance_ratio_ = \ self.explained_variance_ratio_[:n_components] self.singular_values_ = self.singular_values_[:n_components] return U, S, V