def _isomap(self, X): # get adjacency (distance) matrix first, then find shortest path graph_distance_mat = graph_shortest_path( self.get_Adjacency_matrix(X), method=self.path_method, directed=False) # center matrix centered_mat = centeralise_matrix(graph_distance_mat) # eigen decomp # get non-one vectors? eigenvalues, eigenvectors = eigen_decomposition( centered_mat, self.n_components, eigen_solver=eigen_solver, seed=self.seed, largest=True, solver_kwds=self.solver_kwds) # return Y = [sqrt(lambda_1)*V_1, ..., sqrt(lambda_d)*V_d] # ind = np.argsort(lambdas); ind = ind[::-1] # sort largest # lambdas = lambdas[ind]; """ERROR: the eigenvalues should already been sorted using the spectral_embedding!!!""" embedding = eigenvectors[:, 0:n_components] * np.sqrt(eigenvalues[0:n_components]) return embedding
def fit(self, X, eigen_solver = None, input_type = 'data'): """Fit the model from data in X. Parameters ---------- X : array-like, shape (n_samples, n_features) Training vector, where n_samples in the number of samples and n_features is the number of features. input_type : string, one of: 'data', 'distance', 'affinity'. The values of input data X. (default = 'data') eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'} The eigenvalue decomposition strategy to use. AMG requires pyamg to be installed. It can be faster on very large, sparse problems, but may also lead to instabilities. Returns ------- self : object Returns the instance itself. """ if input_type is not None: self.input_type = input_type if not isinstance(self.Geometry, geom.Geometry): self.fit_geometry(X) # might want to change the eigen solver if ((eigen_solver is not None) and (eigen_sovler != self.eigen_solver)): self.eigen_solver = eigen_solver # don't re-compute these if it's already been done. # This might be the case if an eigendecompostion fails and a different sovler is selected if self.distance_matrix is None: self.distance_matrix = self.Geometry.get_distance_matrix() if self.graph_distance_matrix is None: self.graph_distance_matrix = graph_shortest_path(self.distance_matrix, method = self.path_method, directed = False) if self.centered_matrix is None: self.centered_matrix = center_matrix(self.graph_distance_matrix) random_state = check_random_state(self.random_state) self.embedding_ = isomap(self.Geometry, n_components=self.n_components, eigen_solver=self.eigen_solver, random_state=random_state, eigen_tol = self.eigen_tol, path_method = self.path_method, distance_matrix = self.distance_matrix, graph_distance_matrix = self.graph_distance_matrix, centered_matrix = self.centered_matrix) return self
def isomap(geom, n_components=8, eigen_solver='auto', random_state=None, path_method='auto', distance_matrix=None, graph_distance_matrix = None, centered_matrix=None, solver_kwds=None): """ Parameters ---------- geom : a Geometry object from megaman.geometry.geometry n_components : integer, optional The dimension of the projection subspace. eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', or 'amg'} 'auto' : algorithm will attempt to choose the best method for input data 'dense' : use standard dense matrix operations for the eigenvalue decomposition. For this method, M must be an array or matrix type. This method should be avoided for large problems. 'arpack' : use arnoldi iteration in shift-invert mode. For this method, M may be a dense matrix, sparse matrix, or general linear operator. Warning: ARPACK can be unstable for some problems. It is best to try several random seeds in order to check results. 'lobpcg' : Locally Optimal Block Preconditioned Conjugate Gradient Method. A preconditioned eigensolver for large symmetric positive definite (SPD) generalized eigenproblems. 'amg' : AMG requires pyamg to be installed. It can be faster on very large, sparse problems, but may also lead to instabilities. random_state : int seed, RandomState instance, or None (default) A pseudo random number generator used for the initialization of the lobpcg eigen vectors decomposition when eigen_solver == 'amg'. By default, arpack is used. path_method : string, method for computing graph shortest path. One of : 'auto', 'D', 'FW', 'BF', 'J'. See scipy.sparse.csgraph.shortest_path for more information. distance_matrix : sparse Ndarray (n_obs, n_obs), optional. Pairwise distance matrix sparse zeros considered 'infinite'. graph_distance_matrix : Ndarray (n_obs, n_obs), optional. Pairwise graph distance matrix. Output of graph_shortest_path. centered_matrix : Ndarray (n_obs, n_obs), optional. Centered version of graph_distance_matrix solver_kwds : any additional keyword arguments to pass to the selected eigen_solver Returns ------- embedding : array, shape=(n_samples, n_components) The reduced samples. Notes ----- """ # Step 1: use geometry to calculate the distance matrix if ((distance_matrix is None) and (centered_matrix is None)): if geom.adjacency_matrix is None: distance_matrix = geom.compute_adjacency_matrix() else: distance_matrix = geom.adjacency_matrix # Step 2: use graph_shortest_path to construct D_G ## WARNING: D_G is an (NxN) DENSE matrix!! if ((graph_distance_matrix is None) and (centered_matrix is None)): graph_distance_matrix = graph_shortest_path(distance_matrix, method=path_method, directed=False) # Step 3: center graph distance matrix if centered_matrix is None: centered_matrix = center_matrix(graph_distance_matrix) # Step 4: compute d largest eigenvectors/values of centered_matrix lambdas, diffusion_map = eigen_decomposition(centered_matrix, n_components, largest=True, eigen_solver=eigen_solver, random_state=random_state, solver_kwds=solver_kwds) # Step 5: # return Y = [sqrt(lambda_1)*V_1, ..., sqrt(lambda_d)*V_d] ind = np.argsort(lambdas); ind = ind[::-1] # sort largest lambdas = lambdas[ind]; diffusion_map = diffusion_map[:, ind] embedding = diffusion_map[:, 0:n_components] * np.sqrt(lambdas[0:n_components]) return embedding
def fit(self, X, y=None, input_type='data'): """Fit the model from data in X. Parameters ---------- input_type : string, one of: 'data', 'distance'. The values of input data X. (default = 'data') X : array-like, shape (n_samples, n_features) Training vector, where n_samples in the number of samples and n_features is the number of features. If self.input_type is 'distance': X : array-like, shape (n_samples, n_samples), Interpret X as precomputed distance or adjacency graph computed from samples. eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'} The eigenvalue decomposition strategy to use. AMG requires pyamg to be installed. It can be faster on very large, sparse problems, but may also lead to instabilities. Returns ------- self : object Returns the instance itself. """ X = self._validate_input(X, input_type) self.fit_geometry(X, input_type) if not hasattr(self, 'distance_matrix'): self.distance_matrix = None if not hasattr(self, 'graph_distance_matrix'): self.graph_distance_matrix = None if not hasattr(self, 'centered_matrix'): self.centered_matrix = None # don't re-compute these if it's already been done. # This might be the case if an eigendecompostion fails and a different sovler is selected if (self.distance_matrix is None and self.geom_.adjacency_matrix is None): self.distance_matrix = self.geom_.compute_adjacency_matrix() elif self.distance_matrix is None: self.distance_matrix = self.geom_.adjacency_matrix if self.graph_distance_matrix is None: self.graph_distance_matrix = graph_shortest_path(self.distance_matrix, method = self.path_method, directed = False) if self.centered_matrix is None: self.centered_matrix = center_matrix(self.graph_distance_matrix) self.embedding_ = isomap(self.geom_, n_components=self.n_components, eigen_solver=self.eigen_solver, random_state=self.random_state, path_method = self.path_method, distance_matrix = self.distance_matrix, graph_distance_matrix = self.graph_distance_matrix, centered_matrix = self.centered_matrix, solver_kwds = self.solver_kwds) return self
def isomap(geom, n_components=8, eigen_solver='auto', random_state=None, path_method='auto', distance_matrix=None, graph_distance_matrix=None, centered_matrix=None, solver_kwds=None): """ Parameters ---------- geom : a Geometry object from megaman.geometry.geometry n_components : integer, optional The dimension of the projection subspace. eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', or 'amg'} 'auto' : algorithm will attempt to choose the best method for input data 'dense' : use standard dense matrix operations for the eigenvalue decomposition. For this method, M must be an array or matrix type. This method should be avoided for large problems. 'arpack' : use arnoldi iteration in shift-invert mode. For this method, M may be a dense matrix, sparse matrix, or general linear operator. Warning: ARPACK can be unstable for some problems. It is best to try several random seeds in order to check results. 'lobpcg' : Locally Optimal Block Preconditioned Conjugate Gradient Method. A preconditioned eigensolver for large symmetric positive definite (SPD) generalized eigenproblems. 'amg' : AMG requires pyamg to be installed. It can be faster on very large, sparse problems, but may also lead to instabilities. random_state : int seed, RandomState instance, or None (default) A pseudo random number generator used for the initialization of the lobpcg eigen vectors decomposition when eigen_solver == 'amg'. By default, arpack is used. path_method : string, method for computing graph shortest path. One of : 'auto', 'D', 'FW', 'BF', 'J'. See scipy.sparse.csgraph.shortest_path for more information. distance_matrix : sparse Ndarray (n_obs, n_obs), optional. Pairwise distance matrix sparse zeros considered 'infinite'. graph_distance_matrix : Ndarray (n_obs, n_obs), optional. Pairwise graph distance matrix. Output of graph_shortest_path. centered_matrix : Ndarray (n_obs, n_obs), optional. Centered version of graph_distance_matrix solver_kwds : any additional keyword arguments to pass to the selected eigen_solver Returns ------- embedding : array, shape=(n_samples, n_components) The reduced samples. Notes ----- """ # Step 1: use geometry to calculate the distance matrix if ((distance_matrix is None) and (centered_matrix is None)): if geom.adjacency_matrix is None: distance_matrix = geom.compute_adjacency_matrix() else: distance_matrix = geom.adjacency_matrix # Step 2: use graph_shortest_path to construct D_G ## WARNING: D_G is an (NxN) DENSE matrix!! if ((graph_distance_matrix is None) and (centered_matrix is None)): graph_distance_matrix = graph_shortest_path(distance_matrix, method=path_method, directed=False) # Step 3: center graph distance matrix if centered_matrix is None: centered_matrix = center_matrix(graph_distance_matrix) # Step 4: compute d largest eigenvectors/values of centered_matrix lambdas, diffusion_map = eigen_decomposition(centered_matrix, n_components, largest=True, eigen_solver=eigen_solver, random_state=random_state, solver_kwds=solver_kwds) # Step 5: # return Y = [sqrt(lambda_1)*V_1, ..., sqrt(lambda_d)*V_d] ind = np.argsort(lambdas) ind = ind[::-1] # sort largest lambdas = lambdas[ind] diffusion_map = diffusion_map[:, ind] embedding = diffusion_map[:, 0:n_components] * np.sqrt( lambdas[0:n_components]) return embedding
def fit(self, X, y=None, input_type='data'): """Fit the model from data in X. Parameters ---------- input_type : string, one of: 'data', 'distance'. The values of input data X. (default = 'data') X : array-like, shape (n_samples, n_features) Training vector, where n_samples in the number of samples and n_features is the number of features. If self.input_type is 'distance': X : array-like, shape (n_samples, n_samples), Interpret X as precomputed distance or adjacency graph computed from samples. eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'} The eigenvalue decomposition strategy to use. AMG requires pyamg to be installed. It can be faster on very large, sparse problems, but may also lead to instabilities. Returns ------- self : object Returns the instance itself. """ X = self._validate_input(X, input_type) self.fit_geometry(X, input_type) if not hasattr(self, 'distance_matrix'): self.distance_matrix = None if not hasattr(self, 'graph_distance_matrix'): self.graph_distance_matrix = None if not hasattr(self, 'centered_matrix'): self.centered_matrix = None # don't re-compute these if it's already been done. # This might be the case if an eigendecompostion fails and a different sovler is selected if (self.distance_matrix is None and self.geom_.adjacency_matrix is None): self.distance_matrix = self.geom_.compute_adjacency_matrix() elif self.distance_matrix is None: self.distance_matrix = self.geom_.adjacency_matrix if self.graph_distance_matrix is None: self.graph_distance_matrix = graph_shortest_path( self.distance_matrix, method=self.path_method, directed=False) if self.centered_matrix is None: self.centered_matrix = center_matrix(self.graph_distance_matrix) self.embedding_ = isomap( self.geom_, n_components=self.n_components, eigen_solver=self.eigen_solver, random_state=self.random_state, path_method=self.path_method, distance_matrix=self.distance_matrix, graph_distance_matrix=self.graph_distance_matrix, centered_matrix=self.centered_matrix, solver_kwds=self.solver_kwds) return self