コード例 #1
0
     def _isomap(self, X):
          # get adjacency (distance) matrix first, then find shortest path 
          graph_distance_mat = graph_shortest_path(
               self.get_Adjacency_matrix(X), 
               method=self.path_method,
               directed=False)

          # center matrix
          centered_mat = centeralise_matrix(graph_distance_mat)

          # eigen decomp
          # get non-one vectors?
          eigenvalues, eigenvectors = eigen_decomposition(
               centered_mat, self.n_components, eigen_solver=eigen_solver,
               seed=self.seed, largest=True,
               solver_kwds=self.solver_kwds)
          
          # return Y = [sqrt(lambda_1)*V_1, ..., sqrt(lambda_d)*V_d]
          # ind = np.argsort(lambdas); ind = ind[::-1] # sort largest
          # lambdas = lambdas[ind];

          """ERROR: the eigenvalues should already been sorted using the spectral_embedding!!!"""

          embedding = eigenvectors[:, 0:n_components] * np.sqrt(eigenvalues[0:n_components])

          return embedding
コード例 #2
0
ファイル: isomap.py プロジェクト: Jerryzcn/Mmani
    def fit(self, X, eigen_solver = None, input_type = 'data'):
        """Fit the model from data in X.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples
            and n_features is the number of features.
        
        input_type : string, one of: 'data', 'distance', 'affinity'. 
            The values of input data X. (default = 'data')
            
        eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}
            The eigenvalue decomposition strategy to use. AMG requires pyamg
            to be installed. It can be faster on very large, sparse problems,
            but may also lead to instabilities.
        
        Returns
        -------
        self : object
            Returns the instance itself.
        """
        if input_type is not None:
            self.input_type = input_type
        if not isinstance(self.Geometry, geom.Geometry):
            self.fit_geometry(X)
        # might want to change the eigen solver
        if ((eigen_solver is not None) and (eigen_sovler != self.eigen_solver)):
            self.eigen_solver = eigen_solver
        
        # don't re-compute these if it's already been done.
        # This might be the case if an eigendecompostion fails and a different sovler is selected
        if self.distance_matrix is None:
            self.distance_matrix = self.Geometry.get_distance_matrix()
        if self.graph_distance_matrix is None:
            self.graph_distance_matrix = graph_shortest_path(self.distance_matrix,
                                                             method = self.path_method,
                                                             directed = False)
        if self.centered_matrix is None:
            self.centered_matrix = center_matrix(self.graph_distance_matrix)
        
        random_state = check_random_state(self.random_state)
        self.embedding_ = isomap(self.Geometry, n_components=self.n_components,
                                 eigen_solver=self.eigen_solver,
                                 random_state=random_state,
                                 eigen_tol = self.eigen_tol,
                                 path_method = self.path_method,
                                 distance_matrix = self.distance_matrix,
                                 graph_distance_matrix = self.graph_distance_matrix,
                                 centered_matrix = self.centered_matrix)
        return self
コード例 #3
0
ファイル: isomap.py プロジェクト: dawong94/megaman
def isomap(geom, n_components=8, eigen_solver='auto',
           random_state=None, path_method='auto',
           distance_matrix=None, graph_distance_matrix = None,
           centered_matrix=None, solver_kwds=None):
    """
    Parameters
    ----------
    geom : a Geometry object from megaman.geometry.geometry
    n_components : integer, optional
        The dimension of the projection subspace.
    eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', or 'amg'}
        'auto' :
            algorithm will attempt to choose the best method for input data
        'dense' :
            use standard dense matrix operations for the eigenvalue decomposition.
            For this method, M must be an array or matrix type.  This method should be avoided for large problems.
        'arpack' :
            use arnoldi iteration in shift-invert mode. For this method,
            M may be a dense matrix, sparse matrix, or general linear operator.
            Warning: ARPACK can be unstable for some problems.  It is best to
            try several random seeds in order to check results.
        'lobpcg' :
            Locally Optimal Block Preconditioned Conjugate Gradient Method.
            A preconditioned eigensolver for large symmetric positive definite
            (SPD) generalized eigenproblems.
        'amg' :
            AMG requires pyamg to be installed. It can be faster on very large,
            sparse problems, but may also lead to instabilities.
    random_state : int seed, RandomState instance, or None (default)
        A pseudo random number generator used for the initialization of the
        lobpcg eigen vectors decomposition when eigen_solver == 'amg'.
        By default, arpack is used.
    path_method : string, method for computing graph shortest path. One of :
        'auto', 'D', 'FW', 'BF', 'J'. See scipy.sparse.csgraph.shortest_path
        for more information.
    distance_matrix : sparse Ndarray (n_obs, n_obs), optional. Pairwise distance matrix
        sparse zeros considered 'infinite'.
    graph_distance_matrix : Ndarray (n_obs, n_obs), optional. Pairwise graph distance
        matrix. Output of graph_shortest_path.
    centered_matrix : Ndarray (n_obs, n_obs), optional. Centered version of
        graph_distance_matrix
    solver_kwds : any additional keyword arguments to pass to the selected eigen_solver

    Returns
    -------
    embedding : array, shape=(n_samples, n_components)
        The reduced samples.

    Notes
    -----
    """
    # Step 1: use geometry to calculate the distance matrix
    if ((distance_matrix is None) and (centered_matrix is None)):
        if geom.adjacency_matrix is None:
            distance_matrix = geom.compute_adjacency_matrix()
        else:
            distance_matrix = geom.adjacency_matrix

    # Step 2: use graph_shortest_path to construct D_G
    ## WARNING: D_G is an (NxN) DENSE matrix!!
    if ((graph_distance_matrix is None) and (centered_matrix is None)):
        graph_distance_matrix = graph_shortest_path(distance_matrix,
                                                    method=path_method,
                                                    directed=False)

    # Step 3: center graph distance matrix
    if centered_matrix is None:
        centered_matrix = center_matrix(graph_distance_matrix)


    # Step 4: compute d largest eigenvectors/values of centered_matrix
    lambdas, diffusion_map = eigen_decomposition(centered_matrix, n_components,
                                                 largest=True,
                                                 eigen_solver=eigen_solver,
                                                 random_state=random_state,
                                                 solver_kwds=solver_kwds)
    # Step 5:
    # return Y = [sqrt(lambda_1)*V_1, ..., sqrt(lambda_d)*V_d]
    ind = np.argsort(lambdas); ind = ind[::-1] # sort largest
    lambdas = lambdas[ind];
    diffusion_map = diffusion_map[:, ind]
    embedding = diffusion_map[:, 0:n_components] * np.sqrt(lambdas[0:n_components])
    return embedding
コード例 #4
0
ファイル: isomap.py プロジェクト: dawong94/megaman
    def fit(self, X, y=None, input_type='data'):
        """Fit the model from data in X.

        Parameters
        ----------
        input_type : string, one of: 'data', 'distance'.
            The values of input data X. (default = 'data')

        X : array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples
            and n_features is the number of features.

        If self.input_type is 'distance':

        X : array-like, shape (n_samples, n_samples),
            Interpret X as precomputed distance or adjacency graph
            computed from samples.

        eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}
            The eigenvalue decomposition strategy to use. AMG requires pyamg
            to be installed. It can be faster on very large, sparse problems,
            but may also lead to instabilities.

        Returns
        -------
        self : object
            Returns the instance itself.
        """

        X = self._validate_input(X, input_type)
        self.fit_geometry(X, input_type)

        if not hasattr(self, 'distance_matrix'):
            self.distance_matrix = None
        if not hasattr(self, 'graph_distance_matrix'):
            self.graph_distance_matrix = None
        if not hasattr(self, 'centered_matrix'):
            self.centered_matrix = None

        # don't re-compute these if it's already been done.
        # This might be the case if an eigendecompostion fails and a different sovler is selected
        if (self.distance_matrix is None and self.geom_.adjacency_matrix is None):
            self.distance_matrix = self.geom_.compute_adjacency_matrix()
        elif self.distance_matrix is None:
            self.distance_matrix = self.geom_.adjacency_matrix
        if self.graph_distance_matrix is None:
            self.graph_distance_matrix = graph_shortest_path(self.distance_matrix,
                                                             method = self.path_method,
                                                             directed = False)
        if self.centered_matrix is None:
            self.centered_matrix = center_matrix(self.graph_distance_matrix)

        self.embedding_ = isomap(self.geom_, n_components=self.n_components,
                                 eigen_solver=self.eigen_solver,
                                 random_state=self.random_state,
                                 path_method = self.path_method,
                                 distance_matrix = self.distance_matrix,
                                 graph_distance_matrix = self.graph_distance_matrix,
                                 centered_matrix = self.centered_matrix,
                                 solver_kwds = self.solver_kwds)
        return self
コード例 #5
0
def isomap(geom,
           n_components=8,
           eigen_solver='auto',
           random_state=None,
           path_method='auto',
           distance_matrix=None,
           graph_distance_matrix=None,
           centered_matrix=None,
           solver_kwds=None):
    """
    Parameters
    ----------
    geom : a Geometry object from megaman.geometry.geometry
    n_components : integer, optional
        The dimension of the projection subspace.
    eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', or 'amg'}
        'auto' :
            algorithm will attempt to choose the best method for input data
        'dense' :
            use standard dense matrix operations for the eigenvalue decomposition.
            For this method, M must be an array or matrix type.  This method should be avoided for large problems.
        'arpack' :
            use arnoldi iteration in shift-invert mode. For this method,
            M may be a dense matrix, sparse matrix, or general linear operator.
            Warning: ARPACK can be unstable for some problems.  It is best to
            try several random seeds in order to check results.
        'lobpcg' :
            Locally Optimal Block Preconditioned Conjugate Gradient Method.
            A preconditioned eigensolver for large symmetric positive definite
            (SPD) generalized eigenproblems.
        'amg' :
            AMG requires pyamg to be installed. It can be faster on very large,
            sparse problems, but may also lead to instabilities.
    random_state : int seed, RandomState instance, or None (default)
        A pseudo random number generator used for the initialization of the
        lobpcg eigen vectors decomposition when eigen_solver == 'amg'.
        By default, arpack is used.
    path_method : string, method for computing graph shortest path. One of :
        'auto', 'D', 'FW', 'BF', 'J'. See scipy.sparse.csgraph.shortest_path
        for more information.
    distance_matrix : sparse Ndarray (n_obs, n_obs), optional. Pairwise distance matrix
        sparse zeros considered 'infinite'.
    graph_distance_matrix : Ndarray (n_obs, n_obs), optional. Pairwise graph distance
        matrix. Output of graph_shortest_path.
    centered_matrix : Ndarray (n_obs, n_obs), optional. Centered version of
        graph_distance_matrix
    solver_kwds : any additional keyword arguments to pass to the selected eigen_solver

    Returns
    -------
    embedding : array, shape=(n_samples, n_components)
        The reduced samples.

    Notes
    -----
    """
    # Step 1: use geometry to calculate the distance matrix
    if ((distance_matrix is None) and (centered_matrix is None)):
        if geom.adjacency_matrix is None:
            distance_matrix = geom.compute_adjacency_matrix()
        else:
            distance_matrix = geom.adjacency_matrix

    # Step 2: use graph_shortest_path to construct D_G
    ## WARNING: D_G is an (NxN) DENSE matrix!!
    if ((graph_distance_matrix is None) and (centered_matrix is None)):
        graph_distance_matrix = graph_shortest_path(distance_matrix,
                                                    method=path_method,
                                                    directed=False)

    # Step 3: center graph distance matrix
    if centered_matrix is None:
        centered_matrix = center_matrix(graph_distance_matrix)

    # Step 4: compute d largest eigenvectors/values of centered_matrix
    lambdas, diffusion_map = eigen_decomposition(centered_matrix,
                                                 n_components,
                                                 largest=True,
                                                 eigen_solver=eigen_solver,
                                                 random_state=random_state,
                                                 solver_kwds=solver_kwds)
    # Step 5:
    # return Y = [sqrt(lambda_1)*V_1, ..., sqrt(lambda_d)*V_d]
    ind = np.argsort(lambdas)
    ind = ind[::-1]  # sort largest
    lambdas = lambdas[ind]
    diffusion_map = diffusion_map[:, ind]
    embedding = diffusion_map[:, 0:n_components] * np.sqrt(
        lambdas[0:n_components])
    return embedding
コード例 #6
0
    def fit(self, X, y=None, input_type='data'):
        """Fit the model from data in X.

        Parameters
        ----------
        input_type : string, one of: 'data', 'distance'.
            The values of input data X. (default = 'data')

        X : array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples
            and n_features is the number of features.

        If self.input_type is 'distance':

        X : array-like, shape (n_samples, n_samples),
            Interpret X as precomputed distance or adjacency graph
            computed from samples.

        eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}
            The eigenvalue decomposition strategy to use. AMG requires pyamg
            to be installed. It can be faster on very large, sparse problems,
            but may also lead to instabilities.

        Returns
        -------
        self : object
            Returns the instance itself.
        """

        X = self._validate_input(X, input_type)
        self.fit_geometry(X, input_type)

        if not hasattr(self, 'distance_matrix'):
            self.distance_matrix = None
        if not hasattr(self, 'graph_distance_matrix'):
            self.graph_distance_matrix = None
        if not hasattr(self, 'centered_matrix'):
            self.centered_matrix = None

        # don't re-compute these if it's already been done.
        # This might be the case if an eigendecompostion fails and a different sovler is selected
        if (self.distance_matrix is None
                and self.geom_.adjacency_matrix is None):
            self.distance_matrix = self.geom_.compute_adjacency_matrix()
        elif self.distance_matrix is None:
            self.distance_matrix = self.geom_.adjacency_matrix
        if self.graph_distance_matrix is None:
            self.graph_distance_matrix = graph_shortest_path(
                self.distance_matrix, method=self.path_method, directed=False)
        if self.centered_matrix is None:
            self.centered_matrix = center_matrix(self.graph_distance_matrix)

        self.embedding_ = isomap(
            self.geom_,
            n_components=self.n_components,
            eigen_solver=self.eigen_solver,
            random_state=self.random_state,
            path_method=self.path_method,
            distance_matrix=self.distance_matrix,
            graph_distance_matrix=self.graph_distance_matrix,
            centered_matrix=self.centered_matrix,
            solver_kwds=self.solver_kwds)
        return self