def test_graph_laplacian():
    for mat in (np.arange(10) * np.arange(10)[:, np.newaxis],
                np.ones((7, 7)),
                np.eye(19),
                np.vander(np.arange(4)) + np.vander(np.arange(4)).T,):
        sp_mat = sparse.csr_matrix(mat)
        for normed in (True, False):
            laplacian = graph_laplacian(mat, normed=normed)
            n_nodes = mat.shape[0]
            if not normed:
                np.testing.assert_array_almost_equal(laplacian.sum(axis=0),
                                                     np.zeros(n_nodes))
            np.testing.assert_array_almost_equal(laplacian.T, laplacian)
            np.testing.assert_array_almost_equal(
                laplacian, graph_laplacian(sp_mat, normed=normed).toarray())
Beispiel #2
0
def my_uniteigenvector_zeroeigenvalue_cluster(k):
    G = nx.read_gpickle('data/undirected(fortest).gpickle')
    A = nx.adjacency_matrix(G, nodelist=G.nodes()[:-1], weight='weight')
    #A=A.toarray()
    #np.fill_diagonal(A,0.01) #add node with its own weight to itself
    #Tri = np.diag(np.sum(A, axis=1))
    #L = Tri - A
    #Tri_1 = np.diag(np.reciprocal(np.sqrt(Tri).diagonal()))
    #Ls = Tri_1.dot(L).dot(Tri_1)

    Ls, dd = graph_laplacian(A,normed=True, return_diag=True)

    eigenvalue_n, eigenvector_n = eigsh(Ls*(-1), k=k,
                                   sigma=1.0, which='LM',
                                   tol=0.0)

    #for ic,vl in enumerate(eigenvalue_n):
    #    if abs(vl-0)<=1e-10:
    #        eigenvector_n[:, ic] = np.full(len(G.nodes()[:-1]),1.0 / math.sqrt(len(G.nodes()[:-1]))) # zero eigenvalue

    eigenvector_n[:, -1] = np.full(len(G.nodes()[:-1]), 1.0 / math.sqrt(len(G.nodes()[:-1])))  # zero eigenvalue

    for ir,n in enumerate(eigenvector_n):
        eigenvector_n[ir]=n/float(np.linalg.norm(n))  # normalize to unitvector

    _, labels, _ = k_means(eigenvector_n, k, random_state=None,
                           n_init=100)
    return labels
Beispiel #3
0
 def fit(self, X, y, unlabeled_data=None):
   num_data = X.shape[0] + unlabeled_data.shape[0]
   num_labeled = X.shape[0]
   num_unlabeled = unlabeled_data.shape[0]
   labeled = np.zeros((num_data,), dtype=np.float32)
   labeled[0:num_labeled] = 1.0
   if issparse(X):
     self.X_ = vstack((util.cast_to_float32(X),
                       util.cast_to_float32(unlabeled_data)), format='csr')
   else:
     self.X_ = np.concatenate((util.cast_to_float32(X),
                               util.cast_to_float32(unlabeled_data)))
   self.gamma = (
       self.gamma if self.gamma is not None else 1.0 / X.shape[1])
   self.kernel_params = {'gamma':self.gamma, 'degree':self.degree, 'coef0':self.coef0}
   kernel_matrix = pairwise_kernels(self.X_, metric=self.kernel,
                                    filter_params=True, **self.kernel_params)
   A = np.dot(np.diag(labeled), kernel_matrix)
   if self.nu2 != 0:
     if self.kernel == 'rbf':
       laplacian_kernel_matrix = kernel_matrix
     else:
       laplacian_kernel_matrix = rbf_kernel(self.X_, gamma=self.gamma)
     laplacian_x_kernel = np.dot(graph_laplacian(
         laplacian_kernel_matrix, normed=self.normalize_laplacian), kernel_matrix)
     A += self.nu2 * laplacian_x_kernel
   y = np.concatenate((y, -np.ones((num_unlabeled,), dtype=np.float32)),
                      axis=0)
   super(LapRLSC, self).fit(A, y, class_for_unlabeled=-1)
Beispiel #4
0
def spectralcluster(A,
                    n_cluster,
                    n_neighbors=6,
                    random_state=None,
                    eigen_tol=0.0):
    #maps = spectral_embedding(affinity, n_components=n_components,eigen_solver=eigen_solver,random_state=random_state,eigen_tol=eigen_tol, drop_first=False)

    # dd is diag
    laplacian, dd = graph_laplacian(A, normed=True, return_diag=True)
    # set the diagonal of the laplacian matrix and convert it to a sparse format well suited for e    # igenvalue decomposition
    laplacian = _set_diag(laplacian, 1)

    # diffusion_map is eigenvectors
    # LM largest eigenvalues
    laplacian *= -1
    eigenvalues, eigenvectors = eigsh(laplacian,
                                      k=n_cluster,
                                      sigma=1.0,
                                      which='LM',
                                      tol=eigen_tol)
    y = eigenvectors.T[n_cluster::-1] * dd
    y = _deterministic_vector_sign_flip(y)[:n_cluster].T

    random_state = check_random_state(random_state)
    centroids, labels, _ = k_means(y, n_cluster, random_state=random_state)

    return eigenvalues, y, centroids, labels
Beispiel #5
0
def caculate_key_frame(filenames):
    vector_data = generate_vector(filenames)
    adjacent_matrix = gen_matrix_gausses(vector_data)
    lap_m, diag_m = graph_laplacian(adjacent_matrix, normed=True, return_diag=True)
    w, v = eig(lap_m)
    w = np.sort(w)[::-1]
    cum_w = np.cumsum(w)
    print cum_w
    print np.diff(cum_w)
Beispiel #6
0
def caculate_key_frame(filenames):
    vector_data = generate_vector(filenames)
    adjacent_matrix = gen_matrix_gausses(vector_data)
    lap_m, diag_m = graph_laplacian(adjacent_matrix, normed=True, return_diag=True)
    w, v = eig(lap_m)
    w = np.sort(w)[::-1]
    cum_w = np.cumsum(w)
    print cum_w
    print np.diff(cum_w)
Beispiel #7
0
def test_graph_laplacian():
    for mat in (
            np.arange(10) * np.arange(10)[:, np.newaxis],
            np.ones((7, 7)),
            np.eye(19),
            np.vander(np.arange(4)) + np.vander(np.arange(4)).T,
    ):
        sp_mat = sparse.csr_matrix(mat)
        for normed in (True, False):
            laplacian = graph_laplacian(mat, normed=normed)
            n_nodes = mat.shape[0]
            if not normed:
                np.testing.assert_array_almost_equal(laplacian.sum(axis=0),
                                                     np.zeros(n_nodes))
            np.testing.assert_array_almost_equal(laplacian.T, laplacian)
            np.testing.assert_array_almost_equal(
                laplacian,
                graph_laplacian(sp_mat, normed=normed).toarray())
 def _build_graph(self):
     n_samples = len(self.similarity_matrix)
     laplacian = graph_laplacian(self.similarity_matrix, normed=True)
     laplacian = -laplacian
     if sparse.isspmatrix(laplacian):
         diag_mask = (laplacian.row == laplcaian.col)
         laplacian.data[diag_mask] = 0.0
     else:
         laplacian.flat[::n_samples + 1] = 0.0
     return laplacian
 def _build_graph(self):
     """Graph matrix for Label Spreading computes the graph laplacian"""
     # compute affinity matrix (or gram matrix)
     if self.kernel == 'knn':
         self.nn_fit = None
     n_samples = self.X_.shape[0]
     affinity_matrix = self._get_kernel(self.X_)
     laplacian = graph_laplacian(affinity_matrix, normed=True)
     laplacian = -laplacian
     if sparse.isspmatrix(laplacian):
         diag_mask = (laplacian.row == laplacian.col)
         laplacian.data[diag_mask] = 0.0
     else:
         laplacian.flat[::n_samples + 1] = 0.0  # set diag to 0.0
     return laplacian
def test_spectral_embedding_unnormalized():
    # Test that spectral_embedding is also processing unnormalized laplacian correctly
    random_state = np.random.RandomState(36)
    data = random_state.randn(10, 30)
    sims = rbf_kernel(data)
    n_components = 8
    embedding_1 = spectral_embedding(sims, norm_laplacian=False, n_components=n_components, drop_first=False)

    # Verify using manual computation with dense eigh
    laplacian, dd = graph_laplacian(sims, normed=False, return_diag=True)
    _, diffusion_map = eigh(laplacian)
    embedding_2 = diffusion_map.T[:n_components] * dd
    embedding_2 = _deterministic_vector_sign_flip(embedding_2).T

    assert_array_almost_equal(embedding_1, embedding_2)
Beispiel #11
0
 def seriation(self, A):
     n_components = 2
     eigen_tol = 0.00001
     if sparse.issparse(A):
         A = A.todense()
     np.fill_diagonal(A, 0)
     laplacian, dd = graph_laplacian(A, return_diag=True)
     laplacian *= -1
     lambdas, diffusion_map = eigsh(laplacian,
                                    k=n_components,
                                    sigma=1.0,
                                    which='LM',
                                    tol=eigen_tol)
     embedding = diffusion_map.T[n_components::-1]  # * dd
     sort_index = np.argsort(embedding[1])
     return sort_index
Beispiel #12
0
def test_arpack_eigsh_initialization():
    # Non-regression test that shows null-space computation is better with 
    # initialization of eigsh from [-1,1] instead of [0,1]
    random_state = check_random_state(42)

    A = random_state.rand(50, 50)
    A = np.dot(A.T, A)  # create s.p.d. matrix
    A = graph_laplacian(A) + 1e-7 * np.identity(A.shape[0])
    k = 5

    # Test if eigsh is working correctly
    # New initialization [-1,1] (as in original ARPACK)
    # Was [0,1] before, with which this test could fail
    v0 = random_state.uniform(-1,1, A.shape[0])
    w, _ = eigsh(A, k=k, sigma=0.0, v0=v0)

    # Eigenvalues of s.p.d. matrix should be nonnegative, w[0] is smallest
    assert_greater_equal(w[0], 0)
def test_spectral_embedding_unnormalized():
    # Test that spectral_embedding is also processing unnormalized laplacian correctly
    random_state = np.random.RandomState(36)
    data = random_state.randn(10, 30)
    sims = rbf_kernel(data)
    n_components = 8
    embedding_1 = spectral_embedding(sims,
                                     norm_laplacian=False,
                                     n_components=n_components,
                                     drop_first=False)

    # Verify using manual computation with dense eigh
    laplacian, dd = graph_laplacian(sims, normed=False, return_diag=True)
    _, diffusion_map = eigh(laplacian)
    embedding_2 = diffusion_map.T[:n_components] * dd
    embedding_2 = _deterministic_vector_sign_flip(embedding_2).T

    assert_array_almost_equal(embedding_1, embedding_2)
def test_arpack_eigsh_initialization():
    # Non-regression test that shows null-space computation is better with
    # initialization of eigsh from [-1,1] instead of [0,1]
    random_state = check_random_state(42)

    A = random_state.rand(50, 50)
    A = np.dot(A.T, A)  # create s.p.d. matrix
    A = graph_laplacian(A) + 1e-7 * np.identity(A.shape[0])
    k = 5

    # Test if eigsh is working correctly
    # New initialization [-1,1] (as in original ARPACK)
    # Was [0,1] before, with which this test could fail
    v0 = random_state.uniform(-1, 1, A.shape[0])
    w, _ = eigsh(A, k=k, sigma=0.0, v0=v0)

    # Eigenvalues of s.p.d. matrix should be nonnegative, w[0] is smallest
    assert_greater_equal(w[0], 0)
Beispiel #15
0
    def cluster(self, affinities):
        laplacian, diagonal = graphutil.graph_laplacian(affinities,
                                                        normed=True,
                                                        return_diag=True)

        self.embedding = self.embed(laplacian, diagonal, self.k, self.tol)

        centroid_vals, self.labels, _ = k_means(self.embedding,
                                                self.k,
                                                random_state=self.rand,
                                                n_init=self.n_init,
                                                init=self.init_centroids)

        self.centroids = []
        for c in centroid_vals:
            self.centroids.append(
                np.argmin([np.sum((c - e)**2) for e in self.embedding]))

        return self.labels
Beispiel #16
0
    def _check_manifold_gradient(self):
        print "Gradient verification for the manifold regularization term...",
        n_examples = 10
        n_features = 5
        beta = np.random.rand() * 100.0
        epsilon = 1e-6

        # Initialization
        O = np.random.randint(0, 2, (n_examples, n_features))
        A = np.random.rand(n_examples, n_examples)
        A = np.dot(A.T, A)  # Make it symmetric
        np.fill_diagonal(A, 100.)
        L = graph_laplacian(A)
        w = np.random.rand(n_features)

        # Compute the gradient according to the expression
        OLO = np.dot(np.dot(O.T, L), O)
        gradient = 4 * beta * np.dot(OLO, w) / (n_examples * L.shape[0]**2)

        # Compute the empirical gradient estimate
        def loss(w):
            l = 0.0
            for i in xrange(O.shape[0]):
                for j in xrange(O.shape[0]):
                    l += A[i, j] * (np.dot(w, O[i]) - np.dot(w, O[j]))**2
            l /= (n_examples * L.shape[0]**2)
            return beta * l

        # Check the gradient for each component of w
        for i in xrange(w.shape[0]):
            w_1 = w.copy()
            w_2 = w.copy()
            w_1[i] += epsilon
            w_2[i] -= epsilon
            empirical_gradient = (loss(w_1) - loss(w_2)) / (2 * epsilon)
            if not np.allclose(empirical_gradient, gradient[i]):
                print "FAILED. Expected gradient: %.8f   Calculated gradient: %.8f" % (
                    empirical_gradient, gradient[i])
                return False
        else:
            print "PASSED"
            return True
Beispiel #17
0
def create_laplacian(Adjacency,
                     norm_lap=None,
                     sparse=None):
    """Finds the Graph Laplacian from a Weighted Adjacency Matrix

    Parameters
    ----------
    * Adjacency       - a sparse NxN array

    Returns
    -------
    * Laplacian       - an NxN laplacian array
    * Diagonal        - an NxN diagonal array
    """
    L, D = graph_laplacian(Adjacency, normed=norm_lap,
                           return_diag=True)
    D = spdiags(data=D,
                diags=[0],
                m=Adjacency.shape[0],
                n=Adjacency.shape[0])
    return L, D
Beispiel #18
0
def predict_k(affinity_matrix):
	
	normed_laplacian, dd = graph_laplacian(affinity_matrix, normed=True, return_diag=True)
	laplacian = _set_diag(normed_laplacian, 1,norm_laplacian=True)

	n_components = affinity_matrix.shape[0] - 1

	eigenvalues, eigenvectors = eigsh(-laplacian, k=n_components, which="LM", sigma=1.0, maxiter=5000)
	eigenvalues = -eigenvalues[::-1]  # Reverse and sign inversion.

	max_gap = 0
	gap_pre_index = 0
	for i in range(1, eigenvalues.size):
		gap = eigenvalues[i] - eigenvalues[i - 1]
		if gap > max_gap:
			max_gap = gap
			gap_pre_index = i - 1

	k = gap_pre_index + 1

	return k
Beispiel #19
0
def spectralcluster(A, n_cluster, n_neighbors=6, random_state=None, eigen_tol=0.0):
    #maps = spectral_embedding(affinity, n_components=n_components,eigen_solver=eigen_solver,random_state=random_state,eigen_tol=eigen_tol, drop_first=False)

    # dd is diag
    laplacian, dd = graph_laplacian(A, normed=True, return_diag=True)
    # set the diagonal of the laplacian matrix and convert it to a sparse format well suited for e    # igenvalue decomposition
    laplacian = _set_diag(laplacian, 1)
    
    # diffusion_map is eigenvectors
    # LM largest eigenvalues
    laplacian *= -1
    eigenvalues, eigenvectors = eigsh(laplacian, k=n_cluster,
                                   sigma=1.0, which='LM',
                                   tol=eigen_tol)
    y = eigenvectors.T[n_cluster::-1] * dd
    y = _deterministic_vector_sign_flip(y)[:n_cluster].T

    random_state = check_random_state(random_state)
    centroids, labels, _ = k_means(y, n_cluster, random_state=random_state)

    return eigenvalues, y, centroids, labels
Beispiel #20
0
def create_laplacian(Adjacency,
                     norm_lap=None,
                     method='Personal',
                     sparse=None):
    """Finds the Graph Laplacian from a Weighted Adjacency Matrix

    Parameters
    ----------
    * Adjacency       - a sparse NxN array

    Returns
    -------
    * Laplacian       - an NxN laplacian array
    * Diagonal        - an NxN diagonal array
    """

    if method in ['personal', 'Personal']:

        D = spdiags(data=np.squeeze(np.asarray(Adjacency.sum(axis=1))),
                    diags=[0],
                    m=Adjacency.shape[0],
                    n=Adjacency.shape[0])
        return D-Adjacency, D


    elif method in ['sklearn', 'scikit']:

        L, D = graph_laplacian(Adjacency, normed=norm_lap,
                               return_diag=True)
        D = spdiags(data=D,
                    diags=[0],
                    m=Adjacency.shape[0],
                    n=Adjacency.shape[0])
        return L, D

    else:
        raise ValueError('Unrecognized Graph Laplacian method'
        'construction.')
Beispiel #21
0
import service.prodbox
from sklearn.manifold import SpectralEmbedding, spectral_embedding_
from sklearn.cluster import SpectralClustering
from sklearn.utils.sparsetools import connected_components
from sklearn.neighbors import kneighbors_graph
from sklearn.utils.graph import graph_laplacian
import numpy as np
from sklearn.utils.arpack import eigsh

app = service.prodbox.CinemaService()

X = app.getWeightedSearchFeatures(15)

graph = kneighbors_graph(X, 10)
lap = graph_laplacian(graph, True)

from sklearn.decomposition import TruncatedSVD
svd = TruncatedSVD(n_components=30, algorithm="arpack")
lap = spectral_embedding_._set_diag(lap, 1)
svd.fit(-lap)

eigenvalues = np.diag(svd.components_ * (-lap).todense() * svd.components_.T)

eigenvalues2, _ = eigsh(-lap, k=30, which='LM', sigma=1)
print(eigenvalues)

print(eigenvalues2)

se = SpectralEmbedding(n_components=30,
                       eigen_solver='arpack',
                       affinity="nearest_neighbors")
def predict_k(affinity_matrix):
    """
    Predict number of clusters based on the eigengap.

    Parameters
    ----------
    affinity_matrix : array-like or sparse matrix, shape: (n_samples, n_samples)
        adjacency matrix.
        Each element of this matrix contains a measure of similarity between two of the data points.

    Returns
    ----------
    k : integer
        estimated number of cluster.

    Note
    ---------
    If graph is not fully connected, zero component as single cluster.

    References
    ----------
    A Tutorial on Spectral Clustering, 2007
        Luxburg, Ulrike
        http://www.kyb.mpg.de/fileadmin/user_upload/files/publications/attachments/Luxburg07_tutorial_4488%5b0%5d.pdf

    """

    """
    If normed=True, L = D^(-1/2) * (D - A) * D^(-1/2) else L = D - A.
    normed=True is recommended.
    """
    normed_laplacian, dd = graph_laplacian(affinity_matrix, normed=True, return_diag=True)
    laplacian = _set_diag(normed_laplacian, 1)

    """
    n_components size is N - 1.
    Setting N - 1 may lead to slow execution time...
    """
    n_components = affinity_matrix.shape[0] - 1

    """
    shift-invert mode
    The shift-invert mode provides more than just a fast way to obtain a few small eigenvalues.
    http://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html

    The normalized Laplacian has eigenvalues between 0 and 2.
    I - L has eigenvalues between -1 and 1.
    """
    eigenvalues, eigenvectors = eigsh(-laplacian, k=n_components, which="LM", sigma=1.0, maxiter=5000)
    eigenvalues = -eigenvalues[::-1]  # Reverse and sign inversion.

    max_gap = 0
    gap_pre_index = 0
    for i in range(1, eigenvalues.size):
        gap = eigenvalues[i] - eigenvalues[i - 1]
        if gap > max_gap:
            max_gap = gap
            gap_pre_index = i - 1

    k = gap_pre_index + 1

    return k
import service.prodbox
from sklearn.manifold import SpectralEmbedding, spectral_embedding_
from sklearn.cluster import SpectralClustering
from sklearn.utils.sparsetools import connected_components
from sklearn.neighbors import kneighbors_graph
from sklearn.utils.graph import graph_laplacian
import numpy as np
from sklearn.utils.arpack import eigsh

app = service.prodbox.CinemaService()

X = app.getWeightedSearchFeatures(15)

graph = kneighbors_graph(X, 10)
lap = graph_laplacian(graph, True)

from sklearn.decomposition import TruncatedSVD
svd = TruncatedSVD(n_components = 30, algorithm="arpack")
lap = spectral_embedding_._set_diag(lap, 1)
svd.fit(-lap)

eigenvalues = np.diag(svd.components_ * (-lap).todense() * svd.components_.T)

eigenvalues2, _ = eigsh(-lap, k=30, which='LM', sigma=1)
print(eigenvalues)

print(eigenvalues2)

se = SpectralEmbedding(n_components = 30, eigen_solver='arpack', affinity="nearest_neighbors")
se.fit(X)
Beispiel #24
0
def laplacian_matrix(B, d, normed=False):
    A = similarity_matrix(B, d)
    # return np.diag(np.sum(A, axis=0)) - A
    return graph_laplacian(A, normed=normed)
Beispiel #25
0
def predict_k(affinity_matrix):
    """
    Predict number of clusters based on the eigengap.

    Parameters
    ----------
    affinity_matrix : array-like or sparse matrix, shape: (n_samples, n_samples)
        adjacency matrix.
        Each element of this matrix contains a measure of similarity between two of the data points.

    Returns
    ----------
    k : integer
        estimated number of cluster.

    Note
    ---------
    If graph is not fully connected, zero component as single cluster.

    References
    ----------
    A Tutorial on Spectral Clustering, 2007
        Luxburg, Ulrike
        http://www.kyb.mpg.de/fileadmin/user_upload/files/publications/attachments/Luxburg07_tutorial_4488%5b0%5d.pdf

    """
    """
    If normed=True, L = D^(-1/2) * (D - A) * D^(-1/2) else L = D - A.
    normed=True is recommended.
    """
    normed_laplacian, dd = graph_laplacian(affinity_matrix,
                                           normed=True,
                                           return_diag=True)
    laplacian = _set_diag(normed_laplacian, 1)
    """
    n_components size is N - 1.
    Setting N - 1 may lead to slow execution time...
    """
    n_components = affinity_matrix.shape[0] - 1
    """
    shift-invert mode
    The shift-invert mode provides more than just a fast way to obtain a few small eigenvalues.
    http://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html

    The normalized Laplacian has eigenvalues between 0 and 2.
    I - L has eigenvalues between -1 and 1.
    """
    eigenvalues, eigenvectors = eigsh(-laplacian,
                                      k=n_components,
                                      which="LM",
                                      sigma=1.0,
                                      maxiter=5000)
    eigenvalues = -eigenvalues[::-1]  # Reverse and sign inversion.

    max_gap = 0
    gap_pre_index = 0
    for i in range(1, eigenvalues.size):
        gap = eigenvalues[i] - eigenvalues[i - 1]
        if gap > max_gap:
            max_gap = gap
            gap_pre_index = i - 1

    k = gap_pre_index + 1

    return k
Beispiel #26
0
percent = perc_matrix.div(perc_matrix['total'],
                          axis='index') * 100  # calculate row percentage
percent = percent.drop(['total'], axis=1)  # drop total column

cat_perc = []
for cat in matrix.columns:
    cat_tuple = (cat, matrix[cat].mean())
    cat_perc.append(cat_tuple)
# sort category percentages
cat_perc = sorted(cat_perc, key=lambda x: x[1])

graph = cosine_similarity(matrix)  # use cosine similarity, as in Noulas et al.

# https://github.com/mingmingyang/auto_spectral_clustering/blob/master/autosp.py
# how to calculate spectral clusters
norm_laplacian, dd = graph_laplacian(graph, normed=True, return_diag=True)
laplacian = _set_diag(norm_laplacian, 1, norm_laplacian=True)
n_components = graph.shape[0] - 1

eigenvalues, eigenvectors = eigsh(-laplacian,
                                  k=n_components,
                                  which="LM",
                                  sigma=1.0,
                                  maxiter=5000)
eigenvalues = -eigenvalues[::-1]

max_gap = 0
gap_pre_index = 0
for i in range(1, eigenvalues.size):
    gap = eigenvalues[i] - eigenvalues[i - 1]
    if gap > max_gap:
Beispiel #27
0
    def spectral_embedding(self,
                           adjacency,
                           n_components=8,
                           eigen_solver=None,
                           random_state=None,
                           eigen_tol=0.0,
                           drop_first=True):
        """
        see original at https://github.com/scikit-learn/scikit-learn/blob/14031f6/sklearn/manifold/spectral_embedding_.py#L133
        custermize1: return lambdas with the embedded matrix.
        custermize2: norm_laplacian is always True
        """
        norm_laplacian = True
        adjacency = check_symmetric(adjacency)

        try:
            from pyamg import smoothed_aggregation_solver
        except ImportError:
            if eigen_solver == "amg":
                raise ValueError(
                    "The eigen_solver was set to 'amg', but pyamg is "
                    "not available.")

        if eigen_solver is None:
            eigen_solver = 'arpack'
        elif eigen_solver not in ('arpack', 'lobpcg', 'amg'):
            raise ValueError("Unknown value for eigen_solver: '%s'."
                             "Should be 'amg', 'arpack', or 'lobpcg'" %
                             eigen_solver)

        random_state = check_random_state(random_state)

        n_nodes = adjacency.shape[0]
        # Whether to drop the first eigenvector
        if drop_first:
            n_components = n_components + 1

        if not _graph_is_connected(adjacency):
            warnings.warn("Graph is not fully connected, spectral embedding"
                          " may not work as expected.")

        laplacian, dd = graph_laplacian(adjacency,
                                        normed=norm_laplacian,
                                        return_diag=True)
        if (eigen_solver == 'arpack' or eigen_solver != 'lobpcg' and
            (not sparse.isspmatrix(laplacian) or n_nodes < 5 * n_components)):
            # lobpcg used with eigen_solver='amg' has bugs for low number of nodes
            # for details see the source code in scipy:
            # https://github.com/scipy/scipy/blob/v0.11.0/scipy/sparse/linalg/eigen
            # /lobpcg/lobpcg.py#L237
            # or matlab:
            # http://www.mathworks.com/matlabcentral/fileexchange/48-lobpcg-m
            laplacian = _set_diag(laplacian, 1, norm_laplacian)

            # Here we'll use shift-invert mode for fast eigenvalues
            # (see http://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html
            #  for a short explanation of what this means)
            # Because the normalized Laplacian has eigenvalues between 0 and 2,
            # I - L has eigenvalues between -1 and 1.  ARPACK is most efficient
            # when finding eigenvalues of largest magnitude (keyword which='LM')
            # and when these eigenvalues are very large compared to the rest.
            # For very large, very sparse graphs, I - L can have many, many
            # eigenvalues very near 1.0.  This leads to slow convergence.  So
            # instead, we'll use ARPACK's shift-invert mode, asking for the
            # eigenvalues near 1.0.  This effectively spreads-out the spectrum
            # near 1.0 and leads to much faster convergence: potentially an
            # orders-of-magnitude speedup over simply using keyword which='LA'
            # in standard mode.
            try:
                # We are computing the opposite of the laplacian inplace so as
                # to spare a memory allocation of a possibly very large array
                laplacian *= -1
                lambdas, diffusion_map = eigsh(laplacian,
                                               k=n_components,
                                               sigma=1.0,
                                               which='LM',
                                               tol=eigen_tol)
                embedding = diffusion_map.T[n_components::-1] * dd

            except RuntimeError:
                # When submatrices are exactly singular, an LU decomposition
                # in arpack fails. We fallback to lobpcg
                eigen_solver = "lobpcg"
                # Revert the laplacian to its opposite to have lobpcg work
                laplacian *= -1

        if eigen_solver == 'amg':
            # Use AMG to get a preconditioner and speed up the eigenvalue
            # problem.
            if not sparse.issparse(laplacian):
                warnings.warn("AMG works better for sparse matrices")
            # lobpcg needs double precision floats
            laplacian = check_array(laplacian,
                                    dtype=np.float64,
                                    accept_sparse=True)
            laplacian = _set_diag(laplacian, 1, norm_laplacian)
            ml = smoothed_aggregation_solver(check_array(laplacian, 'csr'))
            M = ml.aspreconditioner()
            X = random_state.rand(laplacian.shape[0], n_components + 1)
            X[:, 0] = dd.ravel()
            lambdas, diffusion_map = lobpcg(laplacian,
                                            X,
                                            M=M,
                                            tol=1.e-12,
                                            largest=False)
            embedding = diffusion_map.T * dd
            if embedding.shape[0] == 1:
                raise ValueError

        elif eigen_solver == "lobpcg":
            # lobpcg needs double precision floats
            laplacian = check_array(laplacian,
                                    dtype=np.float64,
                                    accept_sparse=True)
            if n_nodes < 5 * n_components + 1:
                # see note above under arpack why lobpcg has problems with small
                # number of nodes
                # lobpcg will fallback to eigh, so we short circuit it
                if sparse.isspmatrix(laplacian):
                    laplacian = laplacian.toarray()
                lambdas, diffusion_map = eigh(laplacian)
                embedding = diffusion_map.T[:n_components] * dd
            else:
                laplacian = _set_diag(laplacian, 1, norm_laplacian)
                # We increase the number of eigenvectors requested, as lobpcg
                # doesn't behave well in low dimension
                X = random_state.rand(laplacian.shape[0], n_components + 1)
                X[:, 0] = dd.ravel()
                lambdas, diffusion_map = lobpcg(laplacian,
                                                X,
                                                tol=1e-15,
                                                largest=False,
                                                maxiter=2000)
                embedding = diffusion_map.T[:n_components] * dd
                if embedding.shape[0] == 1:
                    raise ValueError

        embedding = _deterministic_vector_sign_flip(embedding)
        if drop_first:
            return embedding[1:n_components].T, lambdas
        else:
            return embedding[:n_components].T, lambdas
Beispiel #28
0
def spectral_embedding(adjacency, n_components=8, eigen_solver=None,
                       random_state=None, eigen_tol=0.0,
                       norm_laplacian=True, drop_first=True,
                       mode=None):
    """Project the sample on the first eigen vectors of the graph Laplacian.
    MMP:TO CHANGE THIS

    The adjacency matrix is used to compute a normalized graph Laplacian
    whose spectrum (especially the eigen vectors associated to the
    smallest eigen values) has an interpretation in terms of minimal
    number of cuts necessary to split the graph into comparably sized
    components.

    This embedding can also 'work' even if the ``adjacency`` variable is
    not strictly the adjacency matrix of a graph but more generally
    an affinity or similarity matrix between samples (for instance the
    heat kernel of a euclidean distance matrix or a k-NN matrix).

    However care must taken to always make the affinity matrix symmetric
    so that the eigen vector decomposition works as expected.

    Parameters
    ----------
    adjacency : array-like or sparse matrix, shape: (n_samples, n_samples)
        The adjacency matrix of the graph to embed.

    n_components : integer, optional
        The dimension of the projection subspace.

    eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}
        The eigenvalue decomposition strategy to use. AMG requires pyamg
        to be installed. It can be faster on very large, sparse problems,
        but may also lead to instabilities.

    random_state : int seed, RandomState instance, or None (default)
        A pseudo random number generator used for the initialization of the
        lobpcg eigen vectors decomposition when eigen_solver == 'amg'.
        By default, arpack is used.

    eigen_tol : float, optional, default=0.0
        Stopping criterion for eigendecomposition of the Laplacian matrix
        when using arpack eigen_solver.

    drop_first : bool, optional, default=True
        Whether to drop the first eigenvector. For spectral embedding, this
        should be True as the first eigenvector should be constant vector for
        connected graph, but for spectral clustering, this should be kept as
        False to retain the first eigenvector.

    Returns
    -------
    embedding : array, shape=(n_samples, n_components)
        The reduced samples.

    Notes
    -----
    Spectral embedding is most useful when the graph has one connected
    component. If there graph has many components, the first few eigenvectors
    will simply uncover the connected components of the graph.

    References
    ----------
    * http://en.wikipedia.org/wiki/LOBPCG

    * Toward the Optimal Preconditioned Eigensolver: Locally Optimal
      Block Preconditioned Conjugate Gradient Method
      Andrew V. Knyazev
      http://dx.doi.org/10.1137%2FS1064827500366124
    """

    try:
        from pyamg import smoothed_aggregation_solver
    except ImportError:
        if eigen_solver == "amg" or mode == "amg":
            raise ValueError("The eigen_solver was set to 'amg', but pyamg is "
                             "not available.")

    if not mode is None:
        warnings.warn("'mode' was renamed to eigen_solver "
                      "and will be removed in 0.15.",
                      DeprecationWarning)
        eigen_solver = mode

    if eigen_solver is None:
        eigen_solver = 'arpack'
    elif not eigen_solver in ('arpack', 'lobpcg', 'amg'):
        raise ValueError("Unknown value for eigen_solver: '%s'."
                         "Should be 'amg', 'arpack', or 'lobpcg'"
                         % eigen_solver)

    random_state = check_random_state(random_state)

    n_nodes = adjacency.shape[0]
    # Whether to drop the first eigenvector
    if drop_first:
        n_components = n_components + 1
    # Check that the matrices given is symmetric
    if ((not sparse.isspmatrix(adjacency) and
         not np.all((adjacency - adjacency.T) < 1e-10)) or
        (sparse.isspmatrix(adjacency) and
         not np.all((adjacency - adjacency.T).data < 1e-10))):
        warnings.warn("Graph adjacency matrix should be symmetric. "
                      "Converted to be symmetric by average with its "
                      "transpose.")
    adjacency = .5 * (adjacency + adjacency.T)

    if not _graph_is_connected(adjacency):
        warnings.warn("Graph is not fully connected, spectral embedding"
                      " may not work as expected.")

    laplacian, dd = graph_laplacian(adjacency,
                                    normed=norm_laplacian, return_diag=True)
    if (eigen_solver == 'arpack'
        or eigen_solver != 'lobpcg' and
            (not sparse.isspmatrix(laplacian)
             or n_nodes < 5 * n_components)):
        # lobpcg used with eigen_solver='amg' has bugs for low number of nodes
        # for details see the source code in scipy:
        # https://github.com/scipy/scipy/blob/v0.11.0/scipy/sparse/linalg/eigen
        # /lobpcg/lobpcg.py#L237
        # or matlab:
        # http://www.mathworks.com/matlabcentral/fileexchange/48-lobpcg-m
        laplacian = _set_diag(laplacian, 1)

        # Here we'll use shift-invert mode for fast eigenvalues
        # (see http://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html
        #  for a short explanation of what this means)
        # Because the normalized Laplacian has eigenvalues between 0 and 2,
        # I - L has eigenvalues between -1 and 1.  ARPACK is most efficient
        # when finding eigenvalues of largest magnitude (keyword which='LM')
        # and when these eigenvalues are very large compared to the rest.
        # For very large, very sparse graphs, I - L can have many, many
        # eigenvalues very near 1.0.  This leads to slow convergence.  So
        # instead, we'll use ARPACK's shift-invert mode, asking for the
        # eigenvalues near 1.0.  This effectively spreads-out the spectrum
        # near 1.0 and leads to much faster convergence: potentially an
        # orders-of-magnitude speedup over simply using keyword which='LA'
        # in standard mode.
        try:
            lambdas, diffusion_map = eigsh(-laplacian, k=n_components,
                                           sigma=1.0, which='LM',
                                           tol=eigen_tol)
            embedding = diffusion_map.T[n_components::-1] * dd
        except RuntimeError:
            # When submatrices are exactly singular, an LU decomposition
            # in arpack fails. We fallback to lobpcg
            eigen_solver = "lobpcg"

    if eigen_solver == 'amg':
        # Use AMG to get a preconditioner and speed up the eigenvalue
        # problem.
        if not sparse.issparse(laplacian):
            warnings.warn("AMG works better for sparse matrices")
        laplacian = laplacian.astype(np.float)  # lobpcg needs native floats
        laplacian = _set_diag(laplacian, 1)
        ml = smoothed_aggregation_solver(atleast2d_or_csr(laplacian))
        M = ml.aspreconditioner()
        X = random_state.rand(laplacian.shape[0], n_components + 1)
        X[:, 0] = dd.ravel()
        lambdas, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.e-12,
                                        largest=False)
        embedding = diffusion_map.T * dd
        if embedding.shape[0] == 1:
            raise ValueError

    elif eigen_solver == "lobpcg":
        laplacian = laplacian.astype(np.float)  # lobpcg needs native floats
        if n_nodes < 5 * n_components + 1:
            # see note above under arpack why lobpcg has problems with small
            # number of nodes
            # lobpcg will fallback to symeig, so we short circuit it
            if sparse.isspmatrix(laplacian):
                laplacian = laplacian.todense()
            lambdas, diffusion_map = symeig(laplacian)
            embedding = diffusion_map.T[:n_components] * dd
        else:
            # lobpcg needs native floats
            laplacian = laplacian.astype(np.float)
            laplacian = _set_diag(laplacian, 1)
            # We increase the number of eigenvectors requested, as lobpcg
            # doesn't behave well in low dimension
            X = random_state.rand(laplacian.shape[0], n_components + 1)
            X[:, 0] = dd.ravel()
            lambdas, diffusion_map = lobpcg(laplacian, X, tol=1e-15,
                                            largest=False, maxiter=2000)
            embedding = diffusion_map.T[:n_components] * dd
            if embedding.shape[0] == 1:
                raise ValueError
    if drop_first:
        return embedding[1:n_components].T
    else:
        return embedding[:n_components].T
def laplacian_matrix(B, d, normed=False):
    A = similarity_matrix(B, d)
    # return np.diag(np.sum(A, axis=0)) - A
    return graph_laplacian(A, normed=normed)
Beispiel #30
0
# -*- coding: utf-8 -*-
import numpy as np
from sklearn.utils.graph import graph_laplacian

def assign_undirected_weight(W, i, j, v):
    W[i,j] = W[j,i] = v

n = 5
W = np.zeros((n,n))
assign_undirected_weight(W,0,1,0.08)
assign_undirected_weight(W,0,2,0.09)
assign_undirected_weight(W,1,2,0.45)
assign_undirected_weight(W,1,3,0.22)
assign_undirected_weight(W,1,4,0.24)
assign_undirected_weight(W,2,3,0.2)
assign_undirected_weight(W,2,4,0.19)
assign_undirected_weight(W,3,4,1)

adjacency = W;
print W
laplacian, dd = graph_laplacian(adjacency, normed=True, return_diag=True)

print laplacian
print dd
Beispiel #31
0
test_data = np.array(test_data)
test_label = np.array(test_label)

train_data = np.concatenate((train_data, test_data))
train_label = np.concatenate((train_label, test_label))

print 'nsample: ' + str(train_data.shape[0])
print 'nclass: ' + str(np.unique(train_label).shape[0])

gamma_value = 5.0
affinity_matrix = rbf_kernel(train_data, gamma=gamma_value)
from sklearn.utils.graph import graph_laplacian
from sklearn.utils.extmath import _deterministic_vector_sign_flip
### calculate laplacian matrix
laplacian, dd = graph_laplacian(affinity_matrix, normed=True, return_diag=True)
laplacian *= -1.

nclass = np.unique(train_label).shape[0]
nsample = train_data.shape[0]

#### Configuring AdaGrad
print 'mini batch size = 100'
master_stepsize = 0.0025
outer_iter = 600
nsampleround = 50
ncols = 2
auto_corr = 0.0
ndim = nclass
print 'nsampleround: ' + str(nsampleround)
print 'ncols: ' + str(ncols)
Beispiel #32
0
    def fit(self, X, X_species, y, orthologs, species_graph_adjacency,
            species_graph_names):
        """Fit the model

        Parameters
        ----------
        X: array_like, dtype=float, shape=(n_examples, n_features)
            The feature vectors of each labeled example.
        X_species: array_like, dtype=str, shape=(n_examples,)
            The name of the species to which each example belongs.
        y: array_like, dtype=float, shape(n_examples,)
            The labels of the examples in X.
        orthologs: dict
            A dictionnary in which the keys are indices of X and the values are another dict, which contain
            the orthologous sequences and their species names. TIP: use an HDF5 file to store this information if the data
            doesn't fit into memory. Note: assumes that there is at most 1 ortholog per species.

            ex: {0: {"species": ["species1", "species5", "species2"],
                     "X": [[0, 2, 1, 4],    # Ortholog 1
                           [9, 4, 3, 1],    # Ortholog 2
                           [0, 0, 2, 1]]},  # Ortholog 3
                 1: {"species": ["species1", "species3"],
                     "X": [[1, 4, 7, 6],
                           [4, 4, 9, 3]]}}
        species_graph_adjacency: array_like, dtype=float, shape=(n_species, n_species)
            The adjacency matrix of the species graph.
        species_graph_names: array_like, dtype=str, shape(n_species,)
            The names of the species in the graph. The names should follow the same order as the adjacency matrix.
            ex: If species_graph_names[4] relates to species_graph_adjacency[4] and species_graph_adjacency[:, 4].

        Note
        ----
        It is recommended to center the features vectors for the examples and their orthologs using a standard scaler.
        (see: http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html).

        """
        # Create a mapping between species names and indices in the graph adjacency matrix
        idx_by_species = dict(
            zip(species_graph_names, range(len(species_graph_names))))

        if self.fit_intercept:
            X = np.hstack((X, np.ones(X.shape[0]).reshape(
                -1, 1)))  # Add a feature for each example that serves as bias

        # Precompute the laplacian of the species graph
        # Note: we assume that there is one entry per species. This sacrifices a bit of memory, but allows the precomputation
        #       the graph laplacian.
        L = graph_laplacian(species_graph_adjacency,
                            normed=self.normalize_laplacian)
        L *= 2.0 * self.beta

        matrix_to_invert = np.zeros((X.shape[1], X.shape[1]))

        # Compute the Phi^t x L x Phi product, where L is the block diagonal matrix with blocks equal to variable L
        for i, x in enumerate(X):
            # H5py doesn't support integer keys
            if isinstance(orthologs, h.File):
                i = str(i)

            if len(orthologs[i]["species"]) > 0:
                # Load the orthologs of X and create a matrix that also contains x
                x_orthologs_species = [
                    idx_by_species[s] for s in orthologs[i]["species"]
                ]
                x_orthologs_feats = orthologs[i]["X"]
                if self.fit_intercept:
                    x_orthologs_feats = np.hstack(
                        (x_orthologs_feats,
                         np.ones(x_orthologs_feats.shape[0]).reshape(
                             -1, 1)))  # Add this bias term

                X_tmp = np.zeros(
                    (len(species_graph_names), x_orthologs_feats.shape[1]))
                X_tmp[x_orthologs_species] = x_orthologs_feats
                X_tmp[idx_by_species[X_species[i]]] = x

                # Compute the efficient product and add it to the nasty product
                matrix_to_invert += np.dot(np.dot(X_tmp.T, L), X_tmp)

        # Compute the Phi^T x Phi matrix product that includes the labeled examples only
        matrix_to_invert += np.dot(X.T, X)

        # Compute the alpha * I product
        matrix_to_invert += self.alpha * np.eye(X.shape[1])

        # Compute the value of w, the predictor that minimizes the objective function
        self.w = np.dot(np.dot(np.linalg.inv(matrix_to_invert), X.T),
                        y).reshape(-1, )
Beispiel #33
0
def spectral_embedding(adjacency,
                       n_components=8,
                       eigen_solver=None,
                       random_state=None,
                       eigen_tol=0.0,
                       norm_laplacian=True,
                       drop_first=True):
    """Project the sample on the first eigenvectors of the graph Laplacian.

    The adjacency matrix is used to compute a normalized graph Laplacian
    whose spectrum (especially the eigenvectors associated to the
    smallest eigenvalues) has an interpretation in terms of minimal
    number of cuts necessary to split the graph into comparably sized
    components.

    This embedding can also 'work' even if the ``adjacency`` variable is
    not strictly the adjacency matrix of a graph but more generally
    an affinity or similarity matrix between samples (for instance the
    heat kernel of a euclidean distance matrix or a k-NN matrix).

    However care must taken to always make the affinity matrix symmetric
    so that the eigenvector decomposition works as expected.

    Read more in the :ref:`User Guide <spectral_embedding>`.

    Parameters
    ----------
    adjacency : array-like or sparse matrix, shape: (n_samples, n_samples)
        The adjacency matrix of the graph to embed.

    n_components : integer, optional, default 8
        The dimension of the projection subspace.

    eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}, default None
        The eigenvalue decomposition strategy to use. AMG requires pyamg
        to be installed. It can be faster on very large, sparse problems,
        but may also lead to instabilities.

    random_state : int seed, RandomState instance, or None (default)
        A pseudo random number generator used for the initialization of the
        lobpcg eigenvectors decomposition when eigen_solver == 'amg'.
        By default, arpack is used.

    eigen_tol : float, optional, default=0.0
        Stopping criterion for eigendecomposition of the Laplacian matrix
        when using arpack eigen_solver.

    drop_first : bool, optional, default=True
        Whether to drop the first eigenvector. For spectral embedding, this
        should be True as the first eigenvector should be constant vector for
        connected graph, but for spectral clustering, this should be kept as
        False to retain the first eigenvector.

    norm_laplacian : bool, optional, default=True
        If True, then compute normalized Laplacian.

    Returns
    -------
    embedding : array, shape=(n_samples, n_components)
        The reduced samples.

    Notes
    -----
    Spectral embedding is most useful when the graph has one connected
    component. If there graph has many components, the first few eigenvectors
    will simply uncover the connected components of the graph.

    References
    ----------
    * https://en.wikipedia.org/wiki/LOBPCG

    * Toward the Optimal Preconditioned Eigensolver: Locally Optimal
      Block Preconditioned Conjugate Gradient Method
      Andrew V. Knyazev
      http://dx.doi.org/10.1137%2FS1064827500366124
    """
    adjacency = check_symmetric(adjacency)

    try:
        from pyamg import smoothed_aggregation_solver
    except ImportError:
        if eigen_solver == "amg":
            raise ValueError("The eigen_solver was set to 'amg', but pyamg is "
                             "not available.")

    if eigen_solver is None:
        eigen_solver = 'arpack'
    elif eigen_solver not in ('arpack', 'lobpcg', 'amg'):
        raise ValueError("Unknown value for eigen_solver: '%s'."
                         "Should be 'amg', 'arpack', or 'lobpcg'" %
                         eigen_solver)

    random_state = check_random_state(random_state)

    n_nodes = adjacency.shape[0]
    # Whether to drop the first eigenvector
    if drop_first:
        n_components = n_components + 1

    if not _graph_is_connected(adjacency):
        warnings.warn("Graph is not fully connected, spectral embedding"
                      " may not work as expected.")

    laplacian, dd = graph_laplacian(adjacency,
                                    normed=norm_laplacian,
                                    return_diag=True)
    if (eigen_solver == 'arpack' or eigen_solver != 'lobpcg' and
        (not sparse.isspmatrix(laplacian) or n_nodes < 5 * n_components)):
        # lobpcg used with eigen_solver='amg' has bugs for low number of nodes
        # for details see the source code in scipy:
        # https://github.com/scipy/scipy/blob/v0.11.0/scipy/sparse/linalg/eigen
        # /lobpcg/lobpcg.py#L237
        # or matlab:
        # http://www.mathworks.com/matlabcentral/fileexchange/48-lobpcg-m
        laplacian = _set_diag(laplacian, 1, norm_laplacian)

        # Here we'll use shift-invert mode for fast eigenvalues
        # (see http://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html
        #  for a short explanation of what this means)
        # Because the normalized Laplacian has eigenvalues between 0 and 2,
        # I - L has eigenvalues between -1 and 1.  ARPACK is most efficient
        # when finding eigenvalues of largest magnitude (keyword which='LM')
        # and when these eigenvalues are very large compared to the rest.
        # For very large, very sparse graphs, I - L can have many, many
        # eigenvalues very near 1.0.  This leads to slow convergence.  So
        # instead, we'll use ARPACK's shift-invert mode, asking for the
        # eigenvalues near 1.0.  This effectively spreads-out the spectrum
        # near 1.0 and leads to much faster convergence: potentially an
        # orders-of-magnitude speedup over simply using keyword which='LA'
        # in standard mode.
        try:
            # We are computing the opposite of the laplacian inplace so as
            # to spare a memory allocation of a possibly very large array
            laplacian *= -1
            v0 = random_state.uniform(-1, 1, laplacian.shape[0])
            lambdas, diffusion_map = eigsh(laplacian,
                                           k=n_components,
                                           sigma=1.0,
                                           which='LM',
                                           tol=eigen_tol,
                                           v0=v0)
            # 根据模型的假设,将原来的乘法改为除法
            embedding = diffusion_map.T[n_components::-1] / dd
        except RuntimeError:
            # When submatrices are exactly singular, an LU decomposition
            # in arpack fails. We fallback to lobpcg
            eigen_solver = "lobpcg"
            # Revert the laplacian to its opposite to have lobpcg work
            laplacian *= -1

    if eigen_solver == 'amg':
        # Use AMG to get a preconditioner and speed up the eigenvalue
        # problem.
        if not sparse.issparse(laplacian):
            warnings.warn("AMG works better for sparse matrices")
        # lobpcg needs double precision floats
        laplacian = check_array(laplacian,
                                dtype=np.float64,
                                accept_sparse=True)
        laplacian = _set_diag(laplacian, 1, norm_laplacian)
        ml = smoothed_aggregation_solver(check_array(laplacian, 'csr'))
        M = ml.aspreconditioner()
        X = random_state.rand(laplacian.shape[0], n_components + 1)
        X[:, 0] = dd.ravel()
        lambdas, diffusion_map = lobpcg(laplacian,
                                        X,
                                        M=M,
                                        tol=1.e-12,
                                        largest=False)
        # 根据模型的假设,将原来的乘法改为除法
        embedding = diffusion_map.T / dd
        if embedding.shape[0] == 1:
            raise ValueError

    elif eigen_solver == "lobpcg":
        # lobpcg needs double precision floats
        laplacian = check_array(laplacian,
                                dtype=np.float64,
                                accept_sparse=True)
        if n_nodes < 5 * n_components + 1:
            # see note above under arpack why lobpcg has problems with small
            # number of nodes
            # lobpcg will fallback to eigh, so we short circuit it
            if sparse.isspmatrix(laplacian):
                laplacian = laplacian.toarray()
            lambdas, diffusion_map = eigh(laplacian)
            # 根据模型的假设,将原来的乘法改为除法
            embedding = diffusion_map.T[:n_components] / dd
        else:
            laplacian = _set_diag(laplacian, 1, norm_laplacian)
            # We increase the number of eigenvectors requested, as lobpcg
            # doesn't behave well in low dimension
            X = random_state.rand(laplacian.shape[0], n_components + 1)
            X[:, 0] = dd.ravel()
            lambdas, diffusion_map = lobpcg(laplacian,
                                            X,
                                            tol=1e-15,
                                            largest=False,
                                            maxiter=2000)
            # 根据模型的假设,将原来的乘法改为除法
            embedding = diffusion_map.T[:n_components] / dd
            if embedding.shape[0] == 1:
                raise ValueError

    embedding = _deterministic_vector_sign_flip(embedding)
    if drop_first:
        return embedding[1:n_components].T
    else:
        return embedding[:n_components].T