def _calc(self, data, ret_obj): """ Calculate SVD (wrap numpy SVD). """ try: if self.rng is None: self._U, self._s, self._Vh = _svd(data, full_matrices=False) else: self._U, self._s, self._Vh = _svd(data[..., self.rng], full_matrices=False) except: return False else: return True
def pca(D, n_components=None): """ Principal component analysis Parameters ---------- D : ndarray [n_sample, n_features] Data n_components : int Number of components to calculate (using scipy.sparse.linalg.svds). If None use numpy.linalg.svd Returns ------- Tuple with 3 items: Scores (T), Loadings (W), eigenvalues (singular values-squared) """ Dcenter = D - D.mean(axis=0, keepdims=True) if n_components is None: W, s2, Wt = _svd(_np.dot(Dcenter.T, Dcenter), full_matrices=False) # Note: s2 contains trivial values. # Ex) Let D n x d matrix (n >= d), # s2 is n-length vector, # though the mathematical rank of the metrics is at most d else: if n_components == Dcenter.shape[0]: n_components -= 1 W, s2, Wt = _svds(_np.dot(Dcenter.T, Dcenter), k=n_components) # svds does not sort by variance; thus, manually sorting from biggest to # smallest variance sort_vec = _np.flipud(_np.argsort(s2)) W = W[:, sort_vec] Wt = Wt[sort_vec, :] s2 = s2[sort_vec] # FIXME: T.var(axis=0) is not equal to s2 values. # SVD decomposes A into U * S * V^T # It is thought that U == Wt is false. T = _np.dot(D, W) # Note: T.mean(axis=0) is almost zeros return T, W, s2
def svd(X): return _svd(X, full_matrices=False)