Example #1
0
    def fit(self, X, y=None):
        """Fit the model from data in X.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples
            and n_features is the number of features.

            If self.input_type is 'distance_matrix', or 'affinity':
            X : array-like, shape (n_samples, n_samples),
            Interpret X as precomputed distance or adjacency graph 
            computed from samples.

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        if not isinstance(self.Geometry, geom.Geometry):
            self.fit_geometry(X)
        random_state = check_random_state(self.random_state)
        self.embedding_ = spectral_embedding(
            self.Geometry,
            n_components=self.n_components,
            eigen_solver=self.eigen_solver,
            random_state=random_state,
            eigen_tol=self.eigen_tol,
            drop_first=self.drop_first,
            diffusion_maps=self.diffusion_maps,
        )
        self.affinity_matrix_ = self.Geometry.affinity_matrix
        self.laplacian_matrix_ = self.Geometry.laplacian_matrix
        self.laplacian_matrix_type_ = self.Geometry.laplacian_type
        return self
Example #2
0
    def fit(self, X, y=None):
        """Fit the model from data in X.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples
            and n_features is the number of features.
        
            If self.input_type is 'distance_matrix', or 'affinity':
            X : array-like, shape (n_samples, n_samples),
            Interpret X as precomputed distance or adjacency graph 
            computed from samples.

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        if not isinstance(self.Geometry, geom.Geometry):
            self.fit_geometry(X)
        random_state = check_random_state(self.random_state)
        (self.embedding_, self.error_) = ltsa(self.Geometry,n_components=self.n_components,
                                                eigen_solver=self.eigen_solver, tol = self.tol,
                                                random_state=random_state, max_iter = self.max_iter)
        return self 
Example #3
0
    def fit(self, X, eigen_solver = None, input_type = 'data'):
        """Fit the model from data in X.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training vector, where n_samples in the number of samples
            and n_features is the number of features.
        
        input_type : string, one of: 'data', 'distance', 'affinity'. 
            The values of input data X. (default = 'data')
            
        eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}
            The eigenvalue decomposition strategy to use. AMG requires pyamg
            to be installed. It can be faster on very large, sparse problems,
            but may also lead to instabilities.
        
        Returns
        -------
        self : object
            Returns the instance itself.
        """
        if input_type is not None:
            self.input_type = input_type
        if not isinstance(self.Geometry, geom.Geometry):
            self.fit_geometry(X)
        # might want to change the eigen solver
        if ((eigen_solver is not None) and (eigen_sovler != self.eigen_solver)):
            self.eigen_solver = eigen_solver
        
        # don't re-compute these if it's already been done.
        # This might be the case if an eigendecompostion fails and a different sovler is selected
        if self.distance_matrix is None:
            self.distance_matrix = self.Geometry.get_distance_matrix()
        if self.graph_distance_matrix is None:
            self.graph_distance_matrix = graph_shortest_path(self.distance_matrix,
                                                             method = self.path_method,
                                                             directed = False)
        if self.centered_matrix is None:
            self.centered_matrix = center_matrix(self.graph_distance_matrix)
        
        random_state = check_random_state(self.random_state)
        self.embedding_ = isomap(self.Geometry, n_components=self.n_components,
                                 eigen_solver=self.eigen_solver,
                                 random_state=random_state,
                                 eigen_tol = self.eigen_tol,
                                 path_method = self.path_method,
                                 distance_matrix = self.distance_matrix,
                                 graph_distance_matrix = self.graph_distance_matrix,
                                 centered_matrix = self.centered_matrix)
        return self
Example #4
0
def eigen_decomposition(G, n_components=8, eigen_solver=None,
                       random_state=None, eigen_tol=0.0, 
                       drop_first=True, largest = True):
    """
    G : 2d numpy/scipy array. Potentially sparse.
        The matrix to find the eigendecomposition of 
    n_components : integer, optional
        The number of eigenvectors to return 

    eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', or 'amg'}
        auto : algorithm will attempt to choose the best method for input data
        dense  : use standard dense matrix operations for the eigenvalue
                    decomposition.  For this method, M must be an array
                    or matrix type.  This method should be avoided for
                    large problems.
        arpack : use arnoldi iteration in shift-invert mode.
                    For this method, M may be a dense matrix, sparse matrix,
                    or general linear operator.
                    Warning: ARPACK can be unstable for some problems.  It is
                    best to try several random seeds in order to check results.
        lobpcg : Locally Optimal Block Preconditioned Conjugate Gradient Method.
            a preconditioned eigensolver for large symmetric positive definite 
            (SPD) generalized eigenproblems.
        amg : AMG requires pyamg to be installed. It can be faster on very large, 
            sparse problems, but may also lead to instabilities.

    random_state : int seed, RandomState instance, or None (default)
        A pseudo random number generator used for the initialization of the
        lobpcg eigen vectors decomposition when eigen_solver == 'amg'.
        By default, arpack is used.

    eigen_tol : float, optional, default=0.0
        Stopping criterion for eigendecomposition when using arpack eigen_solver
    
    Returns
    -------
    lambdas, diffusion_map : eigenvalues, eigenvectors 
    """
    n_nodes = G.shape[0]
    if eigen_solver is None:
        eigen_solver = 'auto'
    elif not eigen_solver in eigen_solvers:
        raise ValueError("Unknown value for eigen_solver: '%s'."
                         "Should be: '%s'"
                         % eigen_solver, eigen_solvers)
    if eigen_solver == 'auto':
        if G.shape[0] > 200:
            eigen_solver = 'arpack'
        else:
            eigen_solver = 'dense'
    
    # Check eigen_solver method
    try:
        from pyamg import smoothed_aggregation_solver
    except ImportError:
        if eigen_solver == "amg":
            raise ValueError("The eigen_solver was set to 'amg', but pyamg is "
                             "not available.")
    # Check input values
    if not isinstance(largest, bool):
        raise ValueError("largest should be True if you want largest eigenvalues otherwise False")
    random_state = check_random_state(random_state)
    if drop_first:
        n_components = n_components + 1     
    # Check for symmetry
    is_symmetric = _is_symmetric(G)
    # Convert G to best type for eigendecomposition 
    if sparse.issparse(G):
        if G.getformat() is not 'csr':
            G.tocsr()
    G = G.astype(np.float)
    
    if ((eigen_solver == 'lobpcg') and (n_nodes < 5 * n_components + 1)):
        warnings.warn("lobpcg has problems with small number of nodes. Using dense eigh")
        eigen_solver = 'dense'
        
    # Try Eigen Methods:
    if eigen_solver == 'arpack':
        if is_symmetric:
            if largest:
                which = 'LM'
            else:
                which = 'SM'
            lambdas, diffusion_map = eigsh(G, k=n_components, which=which,tol=eigen_tol)
        else:
            if largest:
                which = 'LR'
            else:
                which = 'SR'
            lambdas, diffusion_map = eigs(G, k=n_components, which=which,tol=eigen_tol)
        lambdas = np.real(lambdas)         
        diffusion_map = np.real(diffusion_map)
    elif eigen_solver == 'amg':
        if not is_symmetric:
            raise ValueError("lobpcg requires symmetric matrices.")
        if not sparse.issparse(G):
            warnings.warn("AMG works better for sparse matrices")
        # Use AMG to get a preconditioner and speed up the eigenvalue problem.
        ml = smoothed_aggregation_solver(check_array(G, accept_sparse = ['csr']))
        M = ml.aspreconditioner()
        n_find = min(n_nodes, 5 + 2*n_components)
        X = random_state.rand(n_nodes, n_find)
        X[:, 0] = (G.diagonal()).ravel()
        lambdas, diffusion_map = lobpcg(G, X, M=M, largest=largest)   
        sort_order = np.argsort(lambdas)
        if largest:
            lambdas = lambdas[sort_order[::-1]]
            diffusion_map = diffusion_map[:, sort_order[::-1]]
        else:
            lambdas = lambdas[sort_order]
            diffusion_map = diffusion_map[:, sort_order]
        lambdas = lambdas[:n_components]
        diffusion_map = diffusion_map[:, :n_components]
    elif eigen_solver == "lobpcg":
        if not is_symmetric:
            raise ValueError("lobpcg requires symmetric matrices.")
        n_find = min(n_nodes, 5 + 2*n_components)
        X = random_state.rand(n_nodes, n_find)
        lambdas, diffusion_map = lobpcg(G, X, largest=largest)
        sort_order = np.argsort(lambdas)
        if largest:
            lambdas = lambdas[sort_order[::-1]]
            diffusion_map = diffusion_map[:, sort_order[::-1]]
        else:
            lambdas = lambdas[sort_order]
            diffusion_map = diffusion_map[:, sort_order]
        lambdas = lambdas[:n_components]
        diffusion_map = diffusion_map[:, :n_components]
    elif eigen_solver == 'dense':
        if sparse.isspmatrix(G):
            G = G.todense()
        if is_symmetric:
            lambdas, diffusion_map = eigh(G)
        else:
            lambdas, diffusion_map = eig(G)
        if largest:# eigh always returns eigenvalues in ascending order
            lambdas = lambdas[::-1] # reverse order the e-values
            diffusion_map = diffusion_map[:, ::-1] # reverse order the vectors
        lambdas = lambdas[:n_components]
        diffusion_map = diffusion_map[:, :n_components]
    return (lambdas, diffusion_map)
Example #5
0
def null_space(M, k, k_skip=1, eigen_solver='arpack', tol=1E-6, max_iter=100,
               random_state=None):
    # Here we need to replace the call with a eigendecomp call 
    """
    Find the null space of a matrix M: eigenvectors associated with 0 eigenvalues
    Parameters
    ----------
    M : {array, matrix, sparse matrix, LinearOperator}
        Input covariance matrix: should be symmetric positive semi-definite
    k : integer
        Number of eigenvalues/vectors to return
    k_skip : integer, optional
        Number of low eigenvalues to skip.
    eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', or 'amg'}
        auto : algorithm will attempt to choose the best method for input data
        dense  : use standard dense matrix operations for the eigenvalue
                    decomposition.  For this method, M must be an array
                    or matrix type.  This method should be avoided for
                    large problems.
        arpack : use arnoldi iteration in shift-invert mode.
                    For this method, M may be a dense matrix, sparse matrix,
                    or general linear operator.
                    Warning: ARPACK can be unstable for some problems.  It is
                    best to try several random seeds in order to check results.
        lobpcg : Locally Optimal Block Preconditioned Conjugate Gradient Method.
            a preconditioned eigensolver for large symmetric positive definite 
            (SPD) generalized eigenproblems.
        amg : AMG requires pyamg to be installed. It can be faster on very large, 
            sparse problems, but may also lead to instabilities.
    tol : float, optional
        Tolerance for 'arpack' method.
        Not used if eigen_solver=='dense'.
    max_iter : maximum number of iterations for 'arpack' method
        not used if eigen_solver=='dense'
    random_state: numpy.RandomState or int, optional
        The generator or seed used to determine the starting vector for arpack
        iterations.  Defaults to numpy.random.
    """
    if eigen_solver == 'auto':
        if M.shape[0] > 200 and k + k_skip < 10:
            eigen_solver = 'arpack'
        else:
            eigen_solver = 'dense'

    if eigen_solver == 'arpack':
        random_state = check_random_state(random_state)
        v0 = random_state.rand(M.shape[0])
        try:
            eigen_values, eigen_vectors = eigsh(M, k + k_skip, sigma=0.0,
                                                tol=tol, maxiter=max_iter,
                                                v0=v0)
        except RuntimeError as msg:
            raise ValueError("Error in determining null-space with ARPACK. "
                             "Error message: '%s'. "
                             "Note that method='arpack' can fail when the "
                             "weight matrix is singular or otherwise "
                             "ill-behaved.  method='dense' is recommended. "
                             "See online documentation for more information."
                             % msg)

        return eigen_vectors[:, k_skip:], np.sum(eigen_values[k_skip:])
    elif eigen_solver == 'dense':
        if hasattr(M, 'toarray'):
            M = M.toarray()
        eigen_values, eigen_vectors = eigh(M, eigvals=(0, k+k_skip),overwrite_a=True)
        index = np.argsort(np.abs(eigen_values))
        eigen_vectors = eigen_vectors[:, index]
        eigen_values = eigen_values[index]
        return eigen_vectors[:, k_skip:k+1], np.sum(eigen_values[k_skip:k+1])
        # eigen_values, eigen_vectors = eigh(
            # M, eigvals=(k_skip, k + k_skip - 1), overwrite_a=True)
        # index = np.argsort(np.abs(eigen_values))
        # return eigen_vectors[:, index], np.sum(eigen_values)
    elif (eigen_solver == 'amg' or eigen_solver == 'lobpcg'):
        # M should be positive semi-definite. Add 1 to make it pos. def. 
        try:
            M = sparse.identity(M.shape[0]) + M
            n_components = min(k + k_skip + 10, M.shape[0])
            eigen_values, eigen_vectors = eigen_decomposition(M, n_components,
                                                              eigen_solver = eigen_solver,
                                                              drop_first = False, 
                                                              largest = False)
            eigen_values = eigen_values -1 
            index = np.argsort(np.abs(eigen_values))
            eigen_values = eigen_values[index]
            eigen_vectors = eigen_vectors[:, index]
            return eigen_vectors[:, k_skip:k+1], np.sum(eigen_values[k_skip:k+1])
        except LinAlgError: # try again with bigger increase
            warnings.warn("LOBPCG failed the first time. Increasing Pos Def adjustment.")
            M = 2.0*sparse.identity(M.shape[0]) + M
            n_components = min(k + k_skip + 10, M.shape[0])
            eigen_values, eigen_vectors = eigen_decomposition(M, n_components,
                                                              eigen_solver = eigen_solver,
                                                              drop_first = False, 
                                                              largest = False)
            eigen_values = eigen_values - 2
            index = np.argsort(np.abs(eigen_values))
            eigen_values = eigen_values[index]
            eigen_vectors = eigen_vectors[:, index]
            return eigen_vectors[:, k_skip:k+1], np.sum(eigen_values[k_skip:k+1])
    else:
        raise ValueError("Unrecognized eigen_solver '%s'" % eigen_solver)
Example #6
0
def isomap(Geometry, n_components=8, eigen_solver=None,
           random_state=None, eigen_tol=1e-12, path_method='auto',
           distance_matrix = None, graph_distance_matrix = None, 
           centered_matrix = None):
    """
    Parameters
    ----------        
    Geometry : a Geometry object from Mmani.geometry.geometry

    n_components : integer, optional
        The dimension of the projection subspace.

    eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', or 'amg'}
        auto : algorithm will attempt to choose the best method for input data
        dense  : use standard dense matrix operations for the eigenvalue
                    decomposition.  For this method, M must be an array
                    or matrix type.  This method should be avoided for
                    large problems.
        arpack : use arnoldi iteration in shift-invert mode.
                    For this method, M may be a dense matrix, sparse matrix,
                    or general linear operator.
                    Warning: ARPACK can be unstable for some problems.  It is
                    best to try several random seeds in order to check results.
        lobpcg : Locally Optimal Block Preconditioned Conjugate Gradient Method.
            a preconditioned eigensolver for large symmetric positive definite 
            (SPD) generalized eigenproblems.
        amg : AMG requires pyamg to be installed. It can be faster on very large, 
            sparse problems, but may also lead to instabilities.

    random_state : int seed, RandomState instance, or None (default)
        A pseudo random number generator used for the initialization of the
        lobpcg eigen vectors decomposition when eigen_solver == 'amg'.
        By default, arpack is used.

    eigen_tol : float, optional, default=0.0
        Stopping criterion for eigendecomposition of the Laplacian matrix
        when using arpack eigen_solver.
        
    path_method : string, method for computing graph shortest path. One of :
        'auto', 'D', 'FW', 'BF', 'J'. See scipy.sparse.csgraph.shortest_path 
        for more information. 
    
    distance_matrix : sparse Ndarray (n_obs, n_obs), optional. Pairwise distance matrix
        sparse zeros considered 'infinite'. 
    
    graph_distance_matrix : Ndarray (n_obs, n_obs), optional. Pairwise graph distance 
        matrix. Output of graph_shortest_path.
    
    centered_matrix : Ndarray (n_obs, n_obs), optional. Centered version of 
        graph_distance_matrix

    Returns
    -------
    embedding : array, shape=(n_samples, n_components)
        The reduced samples.

    Notes
    -----
    """

    random_state = check_random_state(random_state)    

    if not isinstance(Geometry, geom.Geometry):
        raise RuntimeError("Geometry object not Mmani.embedding.geometry ",
                            "Geometry class")
        
    # Step 1: use geometry to calculate the distance matrix 
    if ((distance_matrix is None) and (centered_matrix is None)):
        distance_matrix = Geometry.get_distance_matrix()
    
    # Step 2: use graph_shortest_path to construct D_G
    ## WARNING: D_G is an (NxN) DENSE matrix!! 
    if ((graph_distance_matrix is None) and (centered_matrix is None)):
        graph_distance_matrix = graph_shortest_path(distance_matrix,
                                                    method=path_method,
                                                    directed=False)
                                                            
    # Step 3: center graph distance matrix 
    if centered_matrix is None:
        centered_matrix = center_matrix(graph_distance_matrix)
    
        
    # Step 4: compute d largest eigenvectors/values of centered_matrix 
    lambdas, diffusion_map = eigen_decomposition(centered_matrix, n_components, eigen_solver,
                                                 random_state, eigen_tol, 
                                                 largest = True)    
    # Step 5: 
    # return Y = [sqrt(lambda_1)*V_1, ..., sqrt(lambda_d)*V_d]
    ind = np.argsort(lambdas); ind = ind[::-1] # sort largest 
    lambdas = lambdas[ind];
    diffusion_map = diffusion_map[:, ind]
    embedding = diffusion_map[:, 0:n_components] * np.sqrt(lambdas[0:n_components])
    return embedding
Example #7
0
def spectral_embedding(
    Geometry, n_components=8, eigen_solver=None, random_state=None, eigen_tol=0.0, drop_first=True, diffusion_maps=False
):
    """Project the sample on the first eigen vectors of the graph Laplacian.
    
    The adjacency matrix is used to compute a normalized graph Laplacian
    whose spectrum (especially the eigen vectors associated to the
    smallest eigen values) has an interpretation in terms of minimal
    number of cuts necessary to split the graph into comparably sized
    components.
    
    This embedding can also 'work' even if the ``adjacency`` variable is
    not strictly the adjacency matrix of a graph but more generally
    an affinity or similarity matrix between samples (for instance the
    heat kernel of a euclidean distance matrix or a k-NN matrix).
    
    However care must taken to always make the affinity matrix symmetric
    so that the eigen vector decomposition works as expected.
    
    Parameters
    ----------        
    Geometry : a Geometry object from Mmani.embedding.geometry

    n_components : integer, optional
        The dimension of the projection subspace.

    eigen_solver : {'auto', 'dense', 'arpack', 'lobpcg', or 'amg'}
        auto : algorithm will attempt to choose the best method for input data
        dense  : use standard dense matrix operations for the eigenvalue
                    decomposition.  For this method, M must be an array
                    or matrix type.  This method should be avoided for
                    large problems.
        arpack : use arnoldi iteration in shift-invert mode.
                    For this method, M may be a dense matrix, sparse matrix,
                    or general linear operator.
                    Warning: ARPACK can be unstable for some problems.  It is
                    best to try several random seeds in order to check results.
        lobpcg : Locally Optimal Block Preconditioned Conjugate Gradient Method.
            a preconditioned eigensolver for large symmetric positive definite 
            (SPD) generalized eigenproblems.
        amg : AMG requires pyamg to be installed. It can be faster on very large, 
            sparse problems, but may also lead to instabilities.
    
    random_state : int seed, RandomState instance, or None (default)
        A pseudo random number generator used for the initialization of the
        lobpcg eigen vectors decomposition when eigen_solver == 'amg'.
        By default, arpack is used.
    
    eigen_tol : float, optional, default=0.0
        Stopping criterion for eigendecomposition of the Laplacian matrix
        when using arpack eigen_solver.
    
    drop_first : bool, optional, default=True
        Whether to drop the first eigenvector. For spectral embedding, this
        should be True as the first eigenvector should be constant vector for
        connected graph, but for spectral clustering, this should be kept as
        False to retain the first eigenvector.
        
    diffusion_map : boolean, optional. Whether to return the diffusion map 
        version by re-scaling the embedding by the eigenvalues. 
        
    Returns
    -------
    embedding : array, shape=(n_samples, n_components)
        The reduced samples.
    
    Notes
    -----
    Spectral embedding is most useful when the graph has one connected
    component. If there graph has many components, the first few eigenvectors
    will simply uncover the connected components of the graph.
    
    References
    ----------
    * http://en.wikipedia.org/wiki/LOBPCG

    * Toward the Optimal Preconditioned Eigensolver: Locally Optimal
      Block Preconditioned Conjugate Gradient Method
      Andrew V. Knyazev
      http://dx.doi.org/10.1137%2FS1064827500366124
    """
    random_state = check_random_state(random_state)

    if not isinstance(Geometry, geom.Geometry):
        raise RuntimeError("Geometry object not Mmani.embedding.geometry Geometry class")
    affinity_matrix = Geometry.get_affinity_matrix()
    if not _graph_is_connected(affinity_matrix):
        warnings.warn("Graph is not fully connected, spectral embedding may not work as expected.")

    laplacian = Geometry.get_laplacian_matrix(return_lapsym=True, symmetrize=True)
    n_nodes = laplacian.shape[0]
    lapl_type = Geometry.laplacian_type

    re_normalize = False
    if eigen_solver in ["amg", "lobpcg"]:  # these methods require a symmetric positive definite matrix!
        if lapl_type not in ["symmetricnormalized", "unnormalized"]:
            re_normalize = True
            # If lobpcg (or amg with lobpcg) is chosen and
            # If the Laplacian is non-symmetric then we need to extract:
            # the w (weight) vector from geometry
            # and the symmetric Laplacian = S.
            # The actual Laplacian is L = W^{-1}S  (Where W is the diagonal matrix of w)
            # Which has the same spectrum as: L* = W^{-1/2}SW^{-1/2} which is symmetric
            # We calculate the eigen-decomposition of L*: [D, V]
            # then use W^{-1/2}V  to compute the eigenvectors of L
            # See (Handbook for Cluster Analysis Chapter 2 Proposition 1).
            # However, since we censor the affinity matrix A at a radius it is not guaranteed
            # to be positive definite. But since L = W^{-1}S has maximum eigenvalue 1 (stochastic matrix)
            # and L* has the same spectrum it also has largest e-value of 1.
            # therefore if we look at I - L* then this has smallest eigenvalue of 0 and so
            # must be positive semi-definite. It also has the same spectrum as L* but
            # lambda(I - L*) = 1 - lambda(L*).
            # Finally, since we want positive definite not semi-definite we use (1+epsilon)*I
            # instead of I to make the smallest eigenvalue epsilon.
            epsilon = 2
            w = np.array(Geometry.w)
            symmetrized_laplacian = Geometry.laplacian_symmetric.copy()
            if sparse.isspmatrix(symmetrized_laplacian):
                symmetrized_laplacian.data /= np.sqrt(w[symmetrized_laplacian.row])
                symmetrized_laplacian.data /= np.sqrt(w[symmetrized_laplacian.col])
                symmetrized_laplacian = (1 + epsilon) * sparse.identity(n_nodes) - symmetrized_laplacian
            else:
                symmetrized_laplacian /= np.sqrt(w)
                symmetrized_laplacian /= np.sqrt(w[:, np.newaxis])
                symmetrixed_laplacian = (1 + epsilon) * np.identity(n_nodes) - symmetrized_laplacian
    if re_normalize:
        print("using symmetrized laplacian")
        lambdas, diffusion_map = eigen_decomposition(
            symmetrized_laplacian, n_components + 1, eigen_solver, random_state, eigen_tol, drop_first, largest=False
        )
        lambdas = -lambdas + epsilon
    else:
        lambdas, diffusion_map = eigen_decomposition(
            laplacian, n_components + 1, eigen_solver, random_state, eigen_tol, drop_first, largest=True
        )
    if re_normalize:
        diffusion_map /= np.sqrt(w[:, np.newaxis])  # put back on original Laplacian space
        diffusion_map /= np.linalg.norm(diffusion_map, axis=0)  # norm 1 vectors
    ind = np.argsort(lambdas)
    ind = ind[::-1]
    lambdas = lambdas[ind]
    lambdas[0] = 0
    diffusion_map = diffusion_map[:, ind]
    if diffusion_maps:
        diffusion_map = diffusion_map * np.sqrt(lambdas)
    if drop_first:
        embedding = diffusion_map[:, 1 : (n_components + 1)]
    else:
        embedding = diffusion_map[:, :n_components]
    return embedding