def _sparse_svd(): ## for some reasons arpack does not allow computation of rank(A) eigenvectors (??) # AA = self.data * self.data.transpose() if self.data.shape[0] > 1: # do not compute full rank if desired if self._k > 0 and self._k < self.data.shape[0] - 1: k = self._k else: k = self.data.shape[0] - 1 values, u_vectors = linalg.eigen_symmetric(AA, k=k) else: values, u_vectors = eigh(AA.todense()) # get rid of too low eigenvalues u_vectors = u_vectors[:, values > self._EPS] values = values[values > self._EPS] # sort eigenvectors according to largest value idx = np.argsort(values) values = values[idx[::-1]] # argsort sorts in ascending order -> access is backwards self.U = scipy.sparse.csc_matrix(u_vectors[:, idx[::-1]]) # compute S self.S = scipy.sparse.csc_matrix(np.diag(np.sqrt(values))) # and the inverse of it S_inv = scipy.sparse.csc_matrix(np.diag(1.0 / np.sqrt(values))) # compute V from it self.V = self.U.transpose() * self.data self.V = S_inv * self.V
def _sparse_left_svd(): # for some reasons arpack does not allow computation of rank(A) eigenvectors (??) AA = self.data.transpose()*self.data if self.data.shape[1] > 1: values, v_vectors = linalg.eigen_symmetric(AA,k=self.data.shape[1]-1) else: values, v_vectors = self._eig(AA.todense()) # get rid of too low eigenvalues v_vectors = v_vectors[:, values > self._EPS] values = values[values > self._EPS] # sort eigenvectors according to largest value idx = np.argsort(values) values = values[idx[::-1]] # argsort sorts in ascending order -> access is backwards self.V = scipy.sparse.csc_matrix(v_vectors[:,idx[::-1]]) # compute S self.S = scipy.sparse.csc_matrix(np.diag(np.sqrt(values))) # and the inverse of it S_inv = scipy.sparse.csc_matrix(np.diag(1.0/np.sqrt(values))) self.U = self.data * self.V * S_inv self.V = self.V.transpose()
def _sparse_left_svd(): # for some reasons arpack does not allow computation of rank(A) eigenvectors (??) AA = self.data.transpose()*self.data if self.data.shape[1] > 1: # do not compute full rank if desired if self._k > 0 and self._k < self.data.shape[1]-1: k = self._k else: k = self.data.shape[1]-1 values, v_vectors = linalg.eigen_symmetric(AA,k=k) else: values, v_vectors = eigh(AA.todense()) # get rid of too low eigenvalues v_vectors = v_vectors[:, values > self._EPS] values = values[values > self._EPS] # sort eigenvectors according to largest value idx = np.argsort(values) values = values[idx[::-1]] # argsort sorts in ascending order -> access is backwards self.V = scipy.sparse.csc_matrix(v_vectors[:,idx[::-1]]) # compute S self.S = scipy.sparse.csc_matrix(np.diag(np.sqrt(values))) # and the inverse of it S_inv = scipy.sparse.csc_matrix(np.diag(1.0/np.sqrt(values))) self.U = self.data * self.V * S_inv self.V = self.V.transpose()
def arpack_eigsh(A, **kwargs): """ Scipy 0.9 renamed eigen_symmetric to eigsh in scipy.sparse.linalg.eigen.arpack """ from scipy.sparse.linalg.eigen import arpack if hasattr(arpack, 'eigsh'): return arpack.eigsh(A, **kwargs) else: return arpack.eigen_symmetric(A, **kwargs)
def arpack_eigsh(A, **kwargs): """Compat function for sparse symmetric eigen vectors decomposition Scipy 0.9 renamed eigen_symmetric to eigsh in scipy.sparse.linalg.eigen.arpack """ from scipy.sparse.linalg.eigen import arpack if hasattr(arpack, 'eigsh'): return arpack.eigsh(A, **kwargs) else: return arpack.eigen_symmetric(A, **kwargs)
def eval_evec(self, d, typ, k, which, **kwds): a = d['mat'].astype(typ) exact_eval = self.get_exact_eval(d, typ, k, which) eval, evec = eigen_symmetric(a, k, which=which, **kwds) # check eigenvalues assert_array_almost_equal(eval, exact_eval, decimal=_ndigits[typ]) # check eigenvectors A*evec=eval*evec for i in range(k): assert_array_almost_equal(dot(a, evec[:, i]), eval[i] * evec[:, i], decimal=_ndigits[typ])
def eval_evec(self,d,typ,k,which,**kwds): a=d['mat'].astype(typ) exact_eval=self.get_exact_eval(d,typ,k,which) eval,evec=eigen_symmetric(a,k,which=which,**kwds) # check eigenvalues assert_array_almost_equal(eval,exact_eval,decimal=_ndigits[typ]) # check eigenvectors A*evec=eval*evec for i in range(k): assert_array_almost_equal(dot(a,evec[:,i]), eval[i]*evec[:,i], decimal=_ndigits[typ])
def kpca(data,k): """ Performs the eigen decomposition of the kernel matrix. arguments: * data: 2D numpy array representing the symmetric kernel matrix. * k: number of principal components to keep. return: * w: the eigen values of the covariance matrix sorted in from highest to lowest. * u: the corresponding eigen vectors. u[:,i] is the vector corresponding to w[i] Notes: If you want to perform the full decomposition, consider using 'full_kpca' instead. """ w,u = eigen_symmetric(data,k = k,which = 'LA') return w[::-1],u[:,::-1]
def _sparse_left_svd(): # for some reasons arpack does not allow computation of rank(A) eigenvectors (??) AA = self.data.transpose()*self.data if self.data.shape[1] > 1: # do not compute full rank if desired if self._k > 0 and self._k < AA.shape[1]-1: k = self._k else: k = self.data.shape[1]-1 if scipy.version.version == '0.9.0': values, v_vectors = linalg.eigsh(AA,k=k) else: values, v_vectors = linalg.eigen_symmetric(AA,k=k) else: values, v_vectors = eigh(AA.todense()) # get rid of negative/too low eigenvalues s = np.where(values > self._EPS)[0] v_vectors = v_vectors[:, s] values = values[s] # sort eigenvectors according to largest value idx = np.argsort(values)[::-1] values = values[idx] # argsort sorts in ascending order -> access is backwards self.V = scipy.sparse.csc_matrix(v_vectors[:,idx]) # compute S tmp_val = np.sqrt(values) l = len(idx) self.S = scipy.sparse.spdiags(tmp_val, 0, l, l,format='csc') # and the inverse of it S_inv = scipy.sparse.spdiags(1.0/tmp_val, 0, l, l,format='csc') self.U = self.data * self.V * S_inv self.V = self.V.transpose()
def _sparse_left_svd(): # for some reasons arpack does not allow computation of rank(A) eigenvectors (??) AA = self.data.transpose() * self.data if self.data.shape[1] > 1: # do not compute full rank if desired if self._k > 0 and self._k < AA.shape[1] - 1: k = self._k else: k = self.data.shape[1] - 1 if scipy.version.version == '0.9.0': values, v_vectors = linalg.eigsh(AA, k=k) else: values, v_vectors = linalg.eigen_symmetric(AA, k=k) else: values, v_vectors = eigh(AA.todense()) # get rid of negative/too low eigenvalues s = np.where(values > self._EPS)[0] v_vectors = v_vectors[:, s] values = values[s] # sort eigenvectors according to largest value idx = np.argsort(values)[::-1] values = values[idx] # argsort sorts in ascending order -> access is backwards self.V = scipy.sparse.csc_matrix(v_vectors[:, idx]) # compute S tmp_val = np.sqrt(values) l = len(idx) self.S = scipy.sparse.spdiags(tmp_val, 0, l, l, format='csc') # and the inverse of it S_inv = scipy.sparse.spdiags(1.0 / tmp_val, 0, l, l, format='csc') self.U = self.data * self.V * S_inv self.V = self.V.transpose()
def pca(data,k): """ Performs the eigen decomposition of the covariance matrix. arguments: * data: 2D numpy array where each row is a sample and each column a feature. * k: number of principal components to keep. return: * w: the eigen values of the covariance matrix sorted in from highest to lowest. * u: the corresponding eigen vectors. u[:,i] is the vector corresponding to w[i] Notes: If the number of samples is much smaller than the number of features, you should consider the use of 'svd_pca'. """ cov = np.cov(data.T) w,u = eigen_symmetric(cov,k = k,which = 'LA') return w[::-1],u[:,::-1]
def _sparse_right_svd(): ## for some reasons arpack does not allow computation of rank(A) eigenvectors (??) # AA = self.data * self.data.transpose() if self.data.shape[0] > 1: # only compute a few eigenvectors ... if self._k > 0 and self._k < self.data.shape[0] - 1: k = self._k else: k = self.data.shape[0] - 1 if scipy.version.version == "0.9.0": values, u_vectors = linalg.eigsh(AA, k=k) else: values, u_vectors = linalg.eigen_symmetric(AA, k=k) else: values, u_vectors = eigh(AA.todense()) # get rid of negative/too low eigenvalues s = np.where(values > self._EPS)[0] u_vectors = u_vectors[:, s] values = values[s] # sort eigenvectors according to largest value # argsort sorts in ascending order -> access is backwards idx = np.argsort(values)[::-1] values = values[idx] self.U = scipy.sparse.csc_matrix(u_vectors[:, idx]) # compute S tmp_val = np.sqrt(values) l = len(idx) self.S = scipy.sparse.spdiags(tmp_val, 0, l, l, format="csc") # and the inverse of it S_inv = scipy.sparse.spdiags(1.0 / tmp_val, 0, l, l, format="csc") # compute V from it self.V = self.U.transpose() * self.data self.V = S_inv * self.V
def _sparse_right_svd(): ## for some reasons arpack does not allow computation of rank(A) eigenvectors (??) # AA = self.data * self.data.transpose() if self.data.shape[0] > 1: # only compute a few eigenvectors ... if self._k > 0 and self._k < self.data.shape[0] - 1: k = self._k else: k = self.data.shape[0] - 1 if scipy.version.version == '0.9.0': values, u_vectors = linalg.eigsh(AA, k=k) else: values, u_vectors = linalg.eigen_symmetric(AA, k=k) else: values, u_vectors = eigh(AA.todense()) # get rid of negative/too low eigenvalues s = np.where(values > self._EPS)[0] u_vectors = u_vectors[:, s] values = values[s] # sort eigenvectors according to largest value # argsort sorts in ascending order -> access is backwards idx = np.argsort(values)[::-1] values = values[idx] self.U = scipy.sparse.csc_matrix(u_vectors[:, idx]) # compute S tmp_val = np.sqrt(values) l = len(idx) self.S = scipy.sparse.spdiags(tmp_val, 0, l, l, format='csc') # and the inverse of it S_inv = scipy.sparse.spdiags(1.0 / tmp_val, 0, l, l, format='csc') # compute V from it self.V = self.U.transpose() * self.data self.V = S_inv * self.V
def extern_pca(data,k): """ Performs the eigen decomposition of the covariance matrix based on the eigen decomposition of the exterior product matrix. arguments: * data: 2D numpy array where each row is a sample and each column a feature. * k: number of principal components to keep. return: * w: the eigen values of the covariance matrix sorted in from highest to lowest. * u: the corresponding eigen vectors. u[:,i] is the vector corresponding to w[i] Notes: This function computes PCA, based on the exterior product matrix (C = X*X.T/(n-1)) instead of the covariance matrix (C = X.T*X) and uses relations based of the singular value decomposition to compute the corresponding the final eigen vectors. While this can be much faster when the number of samples is much smaller than the number of features, it can lead to loss of precisions. The (centered) data matrix X can be decomposed as: X.T = U * S * v.T On computes the eigen decomposition of : X * X.T = v*S^2*v.T and the eigen vectors of the covariance matrix are computed as : U = X.T * v * S^(-1) """ data_m = data - data.mean(0) K = np.dot(data_m,data_m.T) w,v = eigen_symmetric(K,k = k,which = 'LA') U = np.dot(data.T,v/np.sqrt(w)) return w[::-1]/(len(data)-1),U[:,::-1]
def nvecs(self, n, r, flipsign = True): """ Compute the leading mode-n vectors for a tensor computes the r leading eigenvalues of Xn*Xn' (where Xn is the mode-n matricization of X), which provides information about the mode-n fibers. In two-dimensions, the r leading mode-1 vectors are the same as the r left singular vectors and the r leading mode-2 vectors are the same as the r right singular vectors. Parameters ---------- X : Tensor n : int, mode-n matricization of X r : int, nnumber of leading eigenvalues to return flipsign : bool, make each column's largest element positive / Make the largest magnitude element be positive Returns ------- M : Matrix """ from numpy import dot from scipy.sparse.linalg.eigen.arpack import eigen_symmetric #from tenmat import tenmat2 #Xn = tenmat2(self, n).data Xn = self.matricization(n) Y = dot(Xn, Xn.T) v = eigen_symmetric(Y, r, which = 'LM') if flipsign: """ not implemented """ pass return v[1]
def spectral_embedding(adjacency, k=8, mode=None): """ Spectral embedding: project the sample on the k first eigen vectors of the graph laplacian. Parameters ----------- adjacency: array-like or sparse matrix, shape: (p, p) The adjacency matrix of the graph to embed. k: integer, optional The dimension of the projection subspace. mode: {None, 'arpack' or 'amg'} The eigenvalue decomposition strategy to use. AMG (Algebraic MultiGrid) is much faster, but requires pyamg to be installed. Returns -------- embedding: array, shape: (p, k) The reduced samples Notes ------ The graph should contain only one connect component, elsewhere the results make little sens. """ from scipy import sparse from scipy.sparse.linalg.eigen.arpack import eigen_symmetric from scipy.sparse.linalg import lobpcg try: from pyamg import smoothed_aggregation_solver amg_loaded = True except ImportError: amg_loaded = False n_nodes = adjacency.shape[0] # XXX: Should we check that the matrices given is symmetric if not amg_loaded: warnings.warn('pyamg not available, using scipy.sparse') if mode is None: mode = ('amg' if amg_loaded else 'arpack') laplacian, dd = graph_laplacian(adjacency, normed=True, return_diag=True) if (mode == 'arpack' or not sparse.isspmatrix(laplacian) or n_nodes < 5*k # This is the threshold under which lobpcg has bugs ): # We need to put the diagonal at zero if not sparse.isspmatrix(laplacian): laplacian[::n_nodes+1] = 0 else: laplacian = laplacian.tocoo() diag_idx = (laplacian.row == laplacian.col) laplacian.data[diag_idx] = 0 # If the matrix has a small number of diagonals (as in the # case of structured matrices comming from images), the # dia format might be best suited for matvec products: n_diags = np.unique(laplacian.row - laplacian.col).size if n_diags <= 7: # 3 or less outer diagonals on each side laplacian = laplacian.todia() else: # csr has the fastest matvec and is thus best suited to # arpack laplacian = laplacian.tocsr() lambdas, diffusion_map = eigen_symmetric(-laplacian, k=k, which='LA') embedding = diffusion_map.T[::-1]*dd elif mode == 'amg': # Use AMG to get a preconditionner and speed up the eigenvalue # problem. laplacian = laplacian.astype(np.float) # lobpcg needs the native float ml = smoothed_aggregation_solver(laplacian.tocsr()) X = np.random.rand(laplacian.shape[0], k) X[:, 0] = 1. / dd.ravel() M = ml.aspreconditioner() lambdas, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.e-12, largest=False) embedding = diffusion_map.T * dd if embedding.shape[0] == 1: raise ValueError else: raise ValueError("Unknown value for mode: '%s'." % mode) return embedding