Esempio n. 1
0
    def _svd(self, array, n_components, n_discard):
        """Returns first `n_components` left and right singular
        vectors u and v, discarding the first `n_discard`.

        """
        if self.svd_method == "randomized":
            kwargs = {}
            if self.n_svd_vecs is not None:
                kwargs["n_oversamples"] = self.n_svd_vecs
            u, _, vt = randomized_svd(array, n_components, random_state=self.random_state, **kwargs)

        elif self.svd_method == "arpack":
            u, _, vt = svds(array, k=n_components, ncv=self.n_svd_vecs)
            if np.any(np.isnan(vt)):
                # some eigenvalues of A * A.T are negative, causing
                # sqrt() to be np.nan. This causes some vectors in vt
                # to be np.nan.
                _, v = eigsh(safe_sparse_dot(array.T, array), ncv=self.n_svd_vecs)
                vt = v.T
            if np.any(np.isnan(u)):
                _, u = eigsh(safe_sparse_dot(array, array.T), ncv=self.n_svd_vecs)

        assert_all_finite(u)
        assert_all_finite(vt)
        u = u[:, n_discard:]
        vt = vt[n_discard:]
        return u, vt.T
Esempio n. 2
0
    def _svd(self, array, n_components, n_discard):
        """Returns first `n_components` left and right singular
        vectors u and v, discarding the first `n_discard`.

        """
        if self.svd_method == 'randomized':
            kwargs = {}
            if self.n_svd_vecs is not None:
                kwargs['n_oversamples'] = self.n_svd_vecs
            u, _, vt = randomized_svd(array,
                                      n_components,
                                      random_state=self.random_state,
                                      **kwargs)

        elif self.svd_method == 'arpack':
            u, _, vt = svds(array, k=n_components, ncv=self.n_svd_vecs)
            if np.any(np.isnan(vt)):
                # some eigenvalues of A * A.T are negative, causing
                # sqrt() to be np.nan. This causes some vectors in vt
                # to be np.nan.
                _, v = eigsh(safe_sparse_dot(array.T, array),
                             ncv=self.n_svd_vecs)
                vt = v.T
            if np.any(np.isnan(u)):
                _, u = eigsh(safe_sparse_dot(array, array.T),
                             ncv=self.n_svd_vecs)

        assert_all_finite(u)
        assert_all_finite(vt)
        u = u[:, n_discard:]
        vt = vt[n_discard:]
        return u, vt.T
def custom_svd(array, n_components, n_discard,n_svd_vecs):
	u, _, vt = svds(array, k=n_components, ncv=n_svd_vecs)
	if np.any(np.isnan(vt)):
		_, v = eigsh(safe_sparse_dot(array.T, array),ncv=n_svd_vecs)
		vt = v.T
	if np.any(np.isnan(u)):
		_, u = eigsh(safe_sparse_dot(array, array.T),ncv=n_svd_vecs)
	assert_all_finite(u)
	assert_all_finite(vt)
	u = u[:, n_discard:]
	vt = vt[n_discard:]
	return u, vt.T
Esempio n. 4
0
def spectralcluster(A,
                    n_cluster,
                    n_neighbors=6,
                    random_state=None,
                    eigen_tol=0.0):
    #maps = spectral_embedding(affinity, n_components=n_components,eigen_solver=eigen_solver,random_state=random_state,eigen_tol=eigen_tol, drop_first=False)

    # dd is diag
    laplacian, dd = graph_laplacian(A, normed=True, return_diag=True)
    # set the diagonal of the laplacian matrix and convert it to a sparse format well suited for e    # igenvalue decomposition
    laplacian = _set_diag(laplacian, 1)

    # diffusion_map is eigenvectors
    # LM largest eigenvalues
    laplacian *= -1
    eigenvalues, eigenvectors = eigsh(laplacian,
                                      k=n_cluster,
                                      sigma=1.0,
                                      which='LM',
                                      tol=eigen_tol)
    y = eigenvectors.T[n_cluster::-1] * dd
    y = _deterministic_vector_sign_flip(y)[:n_cluster].T

    random_state = check_random_state(random_state)
    centroids, labels, _ = k_means(y, n_cluster, random_state=random_state)

    return eigenvalues, y, centroids, labels
Esempio n. 5
0
def my_uniteigenvector_zeroeigenvalue_cluster(k):
    G = nx.read_gpickle('data/undirected(fortest).gpickle')
    A = nx.adjacency_matrix(G, nodelist=G.nodes()[:-1], weight='weight')
    #A=A.toarray()
    #np.fill_diagonal(A,0.01) #add node with its own weight to itself
    #Tri = np.diag(np.sum(A, axis=1))
    #L = Tri - A
    #Tri_1 = np.diag(np.reciprocal(np.sqrt(Tri).diagonal()))
    #Ls = Tri_1.dot(L).dot(Tri_1)

    Ls, dd = graph_laplacian(A,normed=True, return_diag=True)

    eigenvalue_n, eigenvector_n = eigsh(Ls*(-1), k=k,
                                   sigma=1.0, which='LM',
                                   tol=0.0)

    #for ic,vl in enumerate(eigenvalue_n):
    #    if abs(vl-0)<=1e-10:
    #        eigenvector_n[:, ic] = np.full(len(G.nodes()[:-1]),1.0 / math.sqrt(len(G.nodes()[:-1]))) # zero eigenvalue

    eigenvector_n[:, -1] = np.full(len(G.nodes()[:-1]), 1.0 / math.sqrt(len(G.nodes()[:-1])))  # zero eigenvalue

    for ir,n in enumerate(eigenvector_n):
        eigenvector_n[ir]=n/float(np.linalg.norm(n))  # normalize to unitvector

    _, labels, _ = k_means(eigenvector_n, k, random_state=None,
                           n_init=100)
    return labels
Esempio n. 6
0
 def embed(self, laplacian, diagonal, k, tol=0):
     k = k + 1
     lambdas, diffusion_map = eigsh(-laplacian, k=k, which='SM', tol=tol)
     embedding = diffusion_map.T[k::-1] * diagonal
     if self.do_scale:
         return scale(embedding[1:k].T, axis=1)
     else:
         return embedding[1:k].T
Esempio n. 7
0
def runEmbed(data, n_components):
    lambdas, vectors = eigsh(data, k=n_components)   
    lambdas = lambdas[::-1]  
    vectors = vectors[:, ::-1]  
    psi = vectors/vectors[:, 0][:, None]  
    lambdas = lambdas[1:] / (1 - lambdas[1:])  
    embedding = psi[:, 1:(n_components + 1)] * lambdas[:n_components][None, :]  
    #embedding_sorted = np.argsort(embedding[:], axis=1)
    return embedding
Esempio n. 8
0
def runEmbed(data, n_components):
    lambdas, vectors = eigsh(data, k=n_components)
    lambdas = lambdas[::-1]
    vectors = vectors[:, ::-1]
    psi = vectors / vectors[:, 0][:, None]
    lambdas = lambdas[1:] / (1 - lambdas[1:])
    embedding = psi[:, 1:(n_components + 1)] * lambdas[:n_components][None, :]
    #embedding_sorted = np.argsort(embedding[:], axis=1)
    return embedding
Esempio n. 9
0
 def seriation(self, A):
     n_components = 2
     eigen_tol = 0.00001
     if sparse.issparse(A):
         A = A.todense()
     np.fill_diagonal(A, 0)
     laplacian, dd = graph_laplacian(A, return_diag=True)
     laplacian *= -1
     lambdas, diffusion_map = eigsh(laplacian,
                                    k=n_components,
                                    sigma=1.0,
                                    which='LM',
                                    tol=eigen_tol)
     embedding = diffusion_map.T[n_components::-1]  # * dd
     sort_index = np.argsort(embedding[1])
     return sort_index
Esempio n. 10
0
def main(argv):

    # Set defaults:
    n_components_embedding = 25
    comp_min = 2
    comp_max = 20 + 1
    varname = 'data'
    filename = './test'

    # Import files
    f = h5py.File(('%s.mat' % filename), 'r')
    dataCorr = np.array(f.get('%s' % varname))

    # Prep matrix
    K = (dataCorr + 1) / 2.
    v = np.sqrt(np.sum(K, axis=1))
    A = K / (v[:, None] * v[None, :])
    del K
    A = np.squeeze(A * [A > 0])

    # Run embedding
    lambdas, vectors = eigsh(A, k=n_components_embedding)
    lambdas = lambdas[::-1]
    vectors = vectors[:, ::-1]
    psi = vectors / vectors[:, 0][:, None]
    lambdas = lambdas[1:] / (1 - lambdas[1:])
    embedding = psi[:, 1:(n_components_embedding +
                          1)] * lambdas[:n_components_embedding][None, :]

    # Run kmeans clustering

    def kmeans(embedding, n_components):
        est = KMeans(n_clusters=n_components,
                     n_jobs=-1,
                     init='k-means++',
                     n_init=300)
        est.fit_transform(embedding)
        labels = est.labels_
        data = labels.astype(np.float)
        return data

    results = list()
    for n_components in xrange(comp_min, comp_max):
        results.append(kmeans(embedding, n_components))

    savemat(('%s_results.mat' % filename), {'results': results})
 def runFiedler(conn):
     # https://github.com/margulies/topography
     # prep for embedding
     K = (conn + 1) / 2.
     v = np.sqrt(np.sum(K, axis=1))
     A = K/(v[:, None] * v[None, :])
     del K
     A = np.squeeze(A * [A > 0])
     # diffusion embedding
     n_components_embedding = 2
     lambdas, vectors = eigsh(A, k=n_components_embedding+1)
     del A
     lambdas = lambdas[::-1]
     vectors = vectors[:, ::-1]
     psi = vectors/vectors[:, 0][:, None]
     lambdas = lambdas[1:] / (1 - lambdas[1:])
     embedding = psi[:, 1:(n_components_embedding + 1 + 1)] * lambdas[:n_components_embedding+1][None, :]
     return embedding
Esempio n. 12
0
def test_arpack_eigsh_initialization():
    # Non-regression test that shows null-space computation is better with 
    # initialization of eigsh from [-1,1] instead of [0,1]
    random_state = check_random_state(42)

    A = random_state.rand(50, 50)
    A = np.dot(A.T, A)  # create s.p.d. matrix
    A = graph_laplacian(A) + 1e-7 * np.identity(A.shape[0])
    k = 5

    # Test if eigsh is working correctly
    # New initialization [-1,1] (as in original ARPACK)
    # Was [0,1] before, with which this test could fail
    v0 = random_state.uniform(-1,1, A.shape[0])
    w, _ = eigsh(A, k=k, sigma=0.0, v0=v0)

    # Eigenvalues of s.p.d. matrix should be nonnegative, w[0] is smallest
    assert_greater_equal(w[0], 0)
Esempio n. 13
0
def test_arpack_eigsh_initialization():
    # Non-regression test that shows null-space computation is better with
    # initialization of eigsh from [-1,1] instead of [0,1]
    random_state = check_random_state(42)

    A = random_state.rand(50, 50)
    A = np.dot(A.T, A)  # create s.p.d. matrix
    A = laplacian(A) + 1e-7 * np.identity(A.shape[0])
    k = 5

    # Test if eigsh is working correctly
    # New initialization [-1,1] (as in original ARPACK)
    # Was [0,1] before, with which this test could fail
    v0 = random_state.uniform(-1, 1, A.shape[0])
    w, _ = eigsh(A, k=k, sigma=0.0, v0=v0)

    # Eigenvalues of s.p.d. matrix should be nonnegative, w[0] is smallest
    assert_greater_equal(w[0], 0)
Esempio n. 14
0
    def _fit_transform(self, K):
        """ Fit's using kernel K"""
        # center kernel
        K = self._centerer.fit_transform(K)

        if self.n_components is None:
            n_components = K.shape[0]
        else:
            n_components = min(K.shape[0], self.n_components)

        # compute eigenvectors
        if self.eigen_solver == 'auto':
            if K.shape[0] > 200 and n_components < 10:
                eigen_solver = 'arpack'
            else:
                eigen_solver = 'dense'
        else:
            eigen_solver = self.eigen_solver

        if eigen_solver == 'dense':
            self.lambdas_, self.alphas_ = linalg.eigh(
                K, eigvals=(K.shape[0] - n_components, K.shape[0] - 1))
            self.evals_, self.evecs_ = linalg.eigh(K)

        elif eigen_solver == 'arpack':
            self.lambdas_, self.alphas_ = eigsh(K,
                                                n_components,
                                                which="LA",
                                                tol=self.tol,
                                                maxiter=self.max_iter)

        # sort eigenvectors in descending order
        indices = self.lambdas_.argsort()[::-1]
        self.lambdas_ = self.lambdas_[indices]
        self.alphas_ = self.alphas_[:, indices]

        # remove eigenvectors with a zero eigenvalue
        if self.remove_zero_eig or self.n_components is None:
            self.alphas_ = self.alphas_[:, self.lambdas_ > 0]
            self.lambdas_ = self.lambdas_[self.lambdas_ > 0]

        return K
def main(argv):
    
    # Set defaults:
    n_components_embedding = 25
    comp_min = 2
    comp_max = 20 + 1
    varname = 'data'
    filename = './test'
    
    # Import files
    f = h5py.File(('%s.mat' % filename),'r')
    dataCorr = np.array(f.get('%s' % varname))

    # Prep matrix
    K = (dataCorr + 1) / 2.  
    v = np.sqrt(np.sum(K, axis=1)) 
    A = K/(v[:, None] * v[None, :])  
    del K
    A = np.squeeze(A * [A > 0])

    # Run embedding
    lambdas, vectors = eigsh(A, k=n_components_embedding)   
    lambdas = lambdas[::-1]  
    vectors = vectors[:, ::-1]  
    psi = vectors/vectors[:, 0][:, None]  
    lambdas = lambdas[1:] / (1 - lambdas[1:])  
    embedding = psi[:, 1:(n_components_embedding + 1)] * lambdas[:n_components_embedding][None, :]

    # Run kmeans clustering

    def kmeans(embedding, n_components):
        est = KMeans(n_clusters=n_components, n_jobs=-1, init='k-means++', n_init=300)
        est.fit_transform(embedding)
        labels = est.labels_
        data = labels.astype(np.float)
        return data

    results = list()
    for n_components in xrange(comp_min,comp_max):   
        results.append(kmeans(embedding, n_components))

    savemat(('%s_results.mat' % filename), {'results':results})
Esempio n. 16
0
def DoFiedler(conn):
    # prep for embedding
    K = (conn + 1) / 2.
    v = np.sqrt(np.sum(K, axis=1))
    A = K / (v[:, None] * v[None, :])
    del K
    A = np.squeeze(A * [A > 0])

    # diffusion embedding
    n_components_embedding = 2
    lambdas, vectors = eigsh(A, k=n_components_embedding + 1)
    del A
    lambdas = lambdas[::-1]
    vectors = vectors[:, ::-1]
    psi = vectors / vectors[:, 0][:, None]
    lambdas = lambdas[1:] / (1 - lambdas[1:])
    embedding = psi[:, 1:(n_components_embedding + 1 +
                          1)] * lambdas[:n_components_embedding + 1][None, :]

    return embedding
    def _fit_transform(self, K):
        """ Fit's using kernel K"""
        # center kernel
        K = self._centerer.fit_transform(K)

        if self.n_components is None:
            n_components = K.shape[0]
        else:
            n_components = min(K.shape[0], self.n_components)

        # compute eigenvectors
        if self.eigen_solver == 'auto':
            if K.shape[0] > 200 and n_components < 10:
                eigen_solver = 'arpack'
            else:
                eigen_solver = 'dense'
        else:
            eigen_solver = self.eigen_solver

        if eigen_solver == 'dense':
            self.lambdas_, self.alphas_ = linalg.eigh(
                K, eigvals=(K.shape[0] - n_components, K.shape[0] - 1))
            self.evals_, self.evecs_ = linalg.eigh(K)

        elif eigen_solver == 'arpack':
            self.lambdas_, self.alphas_ = eigsh(K, n_components,
                                                which="LA",
                                                tol=self.tol,
                                                maxiter=self.max_iter)

        # sort eigenvectors in descending order
        indices = self.lambdas_.argsort()[::-1]
        self.lambdas_ = self.lambdas_[indices]
        self.alphas_ = self.alphas_[:, indices]

        # remove eigenvectors with a zero eigenvalue
        if self.remove_zero_eig or self.n_components is None:
            self.alphas_ = self.alphas_[:, self.lambdas_ > 0]
            self.lambdas_ = self.lambdas_[self.lambdas_ > 0]

        return K
Esempio n. 18
0
def predict_k(affinity_matrix):
	
	normed_laplacian, dd = graph_laplacian(affinity_matrix, normed=True, return_diag=True)
	laplacian = _set_diag(normed_laplacian, 1,norm_laplacian=True)

	n_components = affinity_matrix.shape[0] - 1

	eigenvalues, eigenvectors = eigsh(-laplacian, k=n_components, which="LM", sigma=1.0, maxiter=5000)
	eigenvalues = -eigenvalues[::-1]  # Reverse and sign inversion.

	max_gap = 0
	gap_pre_index = 0
	for i in range(1, eigenvalues.size):
		gap = eigenvalues[i] - eigenvalues[i - 1]
		if gap > max_gap:
			max_gap = gap
			gap_pre_index = i - 1

	k = gap_pre_index + 1

	return k
Esempio n. 19
0
def spectralcluster(A, n_cluster, n_neighbors=6, random_state=None, eigen_tol=0.0):
    #maps = spectral_embedding(affinity, n_components=n_components,eigen_solver=eigen_solver,random_state=random_state,eigen_tol=eigen_tol, drop_first=False)

    # dd is diag
    laplacian, dd = graph_laplacian(A, normed=True, return_diag=True)
    # set the diagonal of the laplacian matrix and convert it to a sparse format well suited for e    # igenvalue decomposition
    laplacian = _set_diag(laplacian, 1)
    
    # diffusion_map is eigenvectors
    # LM largest eigenvalues
    laplacian *= -1
    eigenvalues, eigenvectors = eigsh(laplacian, k=n_cluster,
                                   sigma=1.0, which='LM',
                                   tol=eigen_tol)
    y = eigenvectors.T[n_cluster::-1] * dd
    y = _deterministic_vector_sign_flip(y)[:n_cluster].T

    random_state = check_random_state(random_state)
    centroids, labels, _ = k_means(y, n_cluster, random_state=random_state)

    return eigenvalues, y, centroids, labels
Esempio n. 20
0
def DoFiedler(conn):
    # prep for embedding
    # K : matrix of similarities / Kernel matrix / Gram matrix
    # make conn non-negative, -1<since conn<1
    K = (conn + 1) / 2.
    # axis=1 meaning operating over rows, "row sum's of K"
    v = np.sqrt(np.sum(K, axis=1))
    # make a random walk on data, D is diagonal matrix
    D = v[:, None] * v[None, :]
    # row-normalization of K gives transition matrix A => A = D^-1 * K
    A = K/D
    del K
    A = np.squeeze(A * [A > 0])
    n_components_embedding = 5
    lambdas, vectors = eigsh(A, k=n_components_embedding)
    del A
    # sorting eigenvalues and -vectors in descending order
    lambdas = lambdas[::-1]
    vectors = vectors[:, ::-1]
    psi = vectors/vectors[:, 0][:, None]
    # begin from second largest eigenvalue and corr. eigenvector
    lambdas = lambdas[1:] / (1 - lambdas[1:])
    embedding = psi[:, 1:(n_components_embedding + 1)] * lambdas[:n_components_embedding][None, :]
    return embedding
 fullsize = len(dataAll)
 del dataAll
 
 # correlate
 dataCorr = np.corrcoef(np.transpose(np.array(dataNorm)))
 del dataNorm
 dataCorr[np.isnan(dataCorr)] = 0
 # prep for embedding
 K = (dataCorr + 1) / 2.  
 del dataCorr
 v = np.sqrt(np.sum(K, axis=1)) 
 A = K/(v[:, None] * v[None, :])  
 del K
 A = np.squeeze(A * [A > 0])
 # diffusion embedding
 lambdas, vectors = eigsh(A, k=n_components_embedding+1)  
 del A
 lambdas = lambdas[::-1]  
 vectors = vectors[:, ::-1]  
 psi = vectors/vectors[:, 0][:, None]  
 lambdas = lambdas[1:] / (1 - lambdas[1:])  
 embedding = psi[:, 1:(n_components_embedding + 1 + 1)] * lambdas[:n_components_embedding+1][None, :]  
 # kmeans clustering
 results = []
 for n_components in xrange(comp_min, comp_max+1):
     est = KMeans(n_clusters=n_components, n_jobs=-1, init='k-means++', n_init=300)
     est.fit_transform(embedding)
     labels = est.labels_
     clust = labels.astype(np.float)
     # reinsert zeros:
     padded = np.zeros(fullsize)
Esempio n. 22
0
    def spectral_embedding(self,
                           adjacency,
                           n_components=8,
                           eigen_solver=None,
                           random_state=None,
                           eigen_tol=0.0,
                           drop_first=True):
        """
        see original at https://github.com/scikit-learn/scikit-learn/blob/14031f6/sklearn/manifold/spectral_embedding_.py#L133
        custermize1: return lambdas with the embedded matrix.
        custermize2: norm_laplacian is always True
        """
        norm_laplacian = True
        adjacency = check_symmetric(adjacency)

        try:
            from pyamg import smoothed_aggregation_solver
        except ImportError:
            if eigen_solver == "amg":
                raise ValueError(
                    "The eigen_solver was set to 'amg', but pyamg is "
                    "not available.")

        if eigen_solver is None:
            eigen_solver = 'arpack'
        elif eigen_solver not in ('arpack', 'lobpcg', 'amg'):
            raise ValueError("Unknown value for eigen_solver: '%s'."
                             "Should be 'amg', 'arpack', or 'lobpcg'" %
                             eigen_solver)

        random_state = check_random_state(random_state)

        n_nodes = adjacency.shape[0]
        # Whether to drop the first eigenvector
        if drop_first:
            n_components = n_components + 1

        if not _graph_is_connected(adjacency):
            warnings.warn("Graph is not fully connected, spectral embedding"
                          " may not work as expected.")

        laplacian, dd = graph_laplacian(adjacency,
                                        normed=norm_laplacian,
                                        return_diag=True)
        if (eigen_solver == 'arpack' or eigen_solver != 'lobpcg' and
            (not sparse.isspmatrix(laplacian) or n_nodes < 5 * n_components)):
            # lobpcg used with eigen_solver='amg' has bugs for low number of nodes
            # for details see the source code in scipy:
            # https://github.com/scipy/scipy/blob/v0.11.0/scipy/sparse/linalg/eigen
            # /lobpcg/lobpcg.py#L237
            # or matlab:
            # http://www.mathworks.com/matlabcentral/fileexchange/48-lobpcg-m
            laplacian = _set_diag(laplacian, 1, norm_laplacian)

            # Here we'll use shift-invert mode for fast eigenvalues
            # (see http://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html
            #  for a short explanation of what this means)
            # Because the normalized Laplacian has eigenvalues between 0 and 2,
            # I - L has eigenvalues between -1 and 1.  ARPACK is most efficient
            # when finding eigenvalues of largest magnitude (keyword which='LM')
            # and when these eigenvalues are very large compared to the rest.
            # For very large, very sparse graphs, I - L can have many, many
            # eigenvalues very near 1.0.  This leads to slow convergence.  So
            # instead, we'll use ARPACK's shift-invert mode, asking for the
            # eigenvalues near 1.0.  This effectively spreads-out the spectrum
            # near 1.0 and leads to much faster convergence: potentially an
            # orders-of-magnitude speedup over simply using keyword which='LA'
            # in standard mode.
            try:
                # We are computing the opposite of the laplacian inplace so as
                # to spare a memory allocation of a possibly very large array
                laplacian *= -1
                lambdas, diffusion_map = eigsh(laplacian,
                                               k=n_components,
                                               sigma=1.0,
                                               which='LM',
                                               tol=eigen_tol)
                embedding = diffusion_map.T[n_components::-1] * dd

            except RuntimeError:
                # When submatrices are exactly singular, an LU decomposition
                # in arpack fails. We fallback to lobpcg
                eigen_solver = "lobpcg"
                # Revert the laplacian to its opposite to have lobpcg work
                laplacian *= -1

        if eigen_solver == 'amg':
            # Use AMG to get a preconditioner and speed up the eigenvalue
            # problem.
            if not sparse.issparse(laplacian):
                warnings.warn("AMG works better for sparse matrices")
            # lobpcg needs double precision floats
            laplacian = check_array(laplacian,
                                    dtype=np.float64,
                                    accept_sparse=True)
            laplacian = _set_diag(laplacian, 1, norm_laplacian)
            ml = smoothed_aggregation_solver(check_array(laplacian, 'csr'))
            M = ml.aspreconditioner()
            X = random_state.rand(laplacian.shape[0], n_components + 1)
            X[:, 0] = dd.ravel()
            lambdas, diffusion_map = lobpcg(laplacian,
                                            X,
                                            M=M,
                                            tol=1.e-12,
                                            largest=False)
            embedding = diffusion_map.T * dd
            if embedding.shape[0] == 1:
                raise ValueError

        elif eigen_solver == "lobpcg":
            # lobpcg needs double precision floats
            laplacian = check_array(laplacian,
                                    dtype=np.float64,
                                    accept_sparse=True)
            if n_nodes < 5 * n_components + 1:
                # see note above under arpack why lobpcg has problems with small
                # number of nodes
                # lobpcg will fallback to eigh, so we short circuit it
                if sparse.isspmatrix(laplacian):
                    laplacian = laplacian.toarray()
                lambdas, diffusion_map = eigh(laplacian)
                embedding = diffusion_map.T[:n_components] * dd
            else:
                laplacian = _set_diag(laplacian, 1, norm_laplacian)
                # We increase the number of eigenvectors requested, as lobpcg
                # doesn't behave well in low dimension
                X = random_state.rand(laplacian.shape[0], n_components + 1)
                X[:, 0] = dd.ravel()
                lambdas, diffusion_map = lobpcg(laplacian,
                                                X,
                                                tol=1e-15,
                                                largest=False,
                                                maxiter=2000)
                embedding = diffusion_map.T[:n_components] * dd
                if embedding.shape[0] == 1:
                    raise ValueError

        embedding = _deterministic_vector_sign_flip(embedding)
        if drop_first:
            return embedding[1:n_components].T, lambdas
        else:
            return embedding[:n_components].T, lambdas
Esempio n. 23
0
def null_space(M, k, k_skip=1, eigen_solver='dense', tol=1E-6, max_iter=100,
               random_state=None):
    """
    Find the null space of a matrix M.

    Parameters
    ----------
    M : {array, matrix, sparse matrix, LinearOperator}
        Input covariance matrix: should be symmetric positive semi-definite

    k : integer
        Number of eigenvalues/vectors to return

    k_skip : integer, optional
        Number of low eigenvalues to skip.

    eigen_solver : string, {'auto', 'arpack', 'dense'}
        auto : algorithm will attempt to choose the best method for input data
        arpack : use arnoldi iteration in shift-invert mode.
                    For this method, M may be a dense matrix, sparse matrix,
                    or general linear operator.
                    Warning: ARPACK can be unstable for some problems.  It is
                    best to try several random seeds in order to check results.
        dense  : use standard dense matrix operations for the eigenvalue
                    decomposition.  For this method, M must be an array
                    or matrix type.  This method should be avoided for
                    large problems.

    tol : float, optional
        Tolerance for 'arpack' method.
        Not used if eigen_solver=='dense'.

    max_iter : maximum number of iterations for 'arpack' method
        not used if eigen_solver=='dense'

    random_state: numpy.RandomState or int, optional
        The generator or seed used to determine the starting vector for arpack
        iterations.  Defaults to numpy.random.

	Returns
	-------
	embedding_vectors : array[float, float], shape=(n_components, n_samples)
		Eigenvectors used for embedding
	
	eigenvectors : array[float, float], shape=(n_features, n_samples)
		All eigenvectors, in descending order by eigenvalues. Eigenvectors are 
		stored in columns. The vector corresponding to evals[i] is stored in evecs[:,i]

	eigenvalues : array[float], shape=(n_features) 
		All eigenvalues, in descending order

    """
    if eigen_solver == 'auto':
        if M.shape[0] > 200 and k + k_skip < 10:
            eigen_solver = 'arpack'
        else:
            eigen_solver = 'dense'

    if eigen_solver == 'arpack':
        random_state = check_random_state(random_state)
        v0 = random_state.rand(M.shape[0])
        try:
            eigen_values, eigen_vectors = eigsh(M, k + k_skip, sigma=0.0,
                                                tol=tol, maxiter=max_iter,
                                                v0=v0)
        except RuntimeError as msg:
            raise ValueError("Error in determining null-space with ARPACK. "
                             "Error message: '%s'. "
                             "Note that method='arpack' can fail when the "
                             "weight matrix is singular or otherwise "
                             "ill-behaved.  method='dense' is recommended. "
                             "See online documentation for more information."
                             % msg)

        return eigen_vectors[:, k_skip:], eigen_values[k_skip:]
    elif eigen_solver == 'dense':
        if hasattr(M, 'toarray'):
            M = M.toarray()
        eigen_values, eigen_vectors = eigh(
            M, eigvals=(k_skip, k + k_skip - 1))
        index = np.argsort(np.abs(eigen_values))

        evals, evecs = eigh(M, overwrite_a=True)
        order = np.argsort(evals)[::-1]
        evals = evals[order]
        evecs = evecs[:, order]

        return eigen_vectors[:, index], evals, evecs
    else:
        raise ValueError("Unrecognized eigen_solver '%s'" % eigen_solver)
def predict_k(affinity_matrix):
    """
    Predict number of clusters based on the eigengap.

    Parameters
    ----------
    affinity_matrix : array-like or sparse matrix, shape: (n_samples, n_samples)
        adjacency matrix.
        Each element of this matrix contains a measure of similarity between two of the data points.

    Returns
    ----------
    k : integer
        estimated number of cluster.

    Note
    ---------
    If graph is not fully connected, zero component as single cluster.

    References
    ----------
    A Tutorial on Spectral Clustering, 2007
        Luxburg, Ulrike
        http://www.kyb.mpg.de/fileadmin/user_upload/files/publications/attachments/Luxburg07_tutorial_4488%5b0%5d.pdf

    """

    """
    If normed=True, L = D^(-1/2) * (D - A) * D^(-1/2) else L = D - A.
    normed=True is recommended.
    """
    normed_laplacian, dd = graph_laplacian(affinity_matrix, normed=True, return_diag=True)
    laplacian = _set_diag(normed_laplacian, 1)

    """
    n_components size is N - 1.
    Setting N - 1 may lead to slow execution time...
    """
    n_components = affinity_matrix.shape[0] - 1

    """
    shift-invert mode
    The shift-invert mode provides more than just a fast way to obtain a few small eigenvalues.
    http://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html

    The normalized Laplacian has eigenvalues between 0 and 2.
    I - L has eigenvalues between -1 and 1.
    """
    eigenvalues, eigenvectors = eigsh(-laplacian, k=n_components, which="LM", sigma=1.0, maxiter=5000)
    eigenvalues = -eigenvalues[::-1]  # Reverse and sign inversion.

    max_gap = 0
    gap_pre_index = 0
    for i in range(1, eigenvalues.size):
        gap = eigenvalues[i] - eigenvalues[i - 1]
        if gap > max_gap:
            max_gap = gap
            gap_pre_index = i - 1

    k = gap_pre_index + 1

    return k
Esempio n. 25
0
    cat_tuple = (cat, matrix[cat].mean())
    cat_perc.append(cat_tuple)
# sort category percentages
cat_perc = sorted(cat_perc, key=lambda x: x[1])

graph = cosine_similarity(matrix)  # use cosine similarity, as in Noulas et al.

# https://github.com/mingmingyang/auto_spectral_clustering/blob/master/autosp.py
# how to calculate spectral clusters
norm_laplacian, dd = graph_laplacian(graph, normed=True, return_diag=True)
laplacian = _set_diag(norm_laplacian, 1, norm_laplacian=True)
n_components = graph.shape[0] - 1

eigenvalues, eigenvectors = eigsh(-laplacian,
                                  k=n_components,
                                  which="LM",
                                  sigma=1.0,
                                  maxiter=5000)
eigenvalues = -eigenvalues[::-1]

max_gap = 0
gap_pre_index = 0
for i in range(1, eigenvalues.size):
    gap = eigenvalues[i] - eigenvalues[i - 1]
    if gap > max_gap:
        max_gap = gap
        gap_pre_index = i - 1

k = gap_pre_index + 1

print k
Esempio n. 26
0
def predict_k(affinity_matrix):
    """
    Predict number of clusters based on the eigengap.

    Parameters
    ----------
    affinity_matrix : array-like or sparse matrix, shape: (n_samples, n_samples)
        adjacency matrix.
        Each element of this matrix contains a measure of similarity between two of the data points.

    Returns
    ----------
    k : integer
        estimated number of cluster.

    Note
    ---------
    If graph is not fully connected, zero component as single cluster.

    References
    ----------
    A Tutorial on Spectral Clustering, 2007
        Luxburg, Ulrike
        http://www.kyb.mpg.de/fileadmin/user_upload/files/publications/attachments/Luxburg07_tutorial_4488%5b0%5d.pdf

    """
    """
    If normed=True, L = D^(-1/2) * (D - A) * D^(-1/2) else L = D - A.
    normed=True is recommended.
    """
    normed_laplacian, dd = graph_laplacian(affinity_matrix,
                                           normed=True,
                                           return_diag=True)
    laplacian = _set_diag(normed_laplacian, 1)
    """
    n_components size is N - 1.
    Setting N - 1 may lead to slow execution time...
    """
    n_components = affinity_matrix.shape[0] - 1
    """
    shift-invert mode
    The shift-invert mode provides more than just a fast way to obtain a few small eigenvalues.
    http://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html

    The normalized Laplacian has eigenvalues between 0 and 2.
    I - L has eigenvalues between -1 and 1.
    """
    eigenvalues, eigenvectors = eigsh(-laplacian,
                                      k=n_components,
                                      which="LM",
                                      sigma=1.0,
                                      maxiter=5000)
    eigenvalues = -eigenvalues[::-1]  # Reverse and sign inversion.

    max_gap = 0
    gap_pre_index = 0
    for i in range(1, eigenvalues.size):
        gap = eigenvalues[i] - eigenvalues[i - 1]
        if gap > max_gap:
            max_gap = gap
            gap_pre_index = i - 1

    k = gap_pre_index + 1

    return k
Esempio n. 27
0
def spectral_embedding(laplacian,
                       n_components=8,
                       eigen_solver=None,
                       random_state=None,
                       eigen_tol=1e-20,
                       drop_first=False):
    """
    
    ----------------------------------------------------------------
    *****!!!sklearn function variation for spectral embeding!!!*****
    ----------------------------------------------------------------
    
    Project the sample on the first eigenvectors of the graph Laplacian.

    This embedding can also 'work' even if the ``adjacency`` variable is
    not strictly the adjacency matrix of a graph but more generally
    an affinity or similarity matrix between samples (for instance the
    heat kernel of a euclidean distance matrix or a k-NN matrix).

    However care must taken to always make the affinity matrix symmetric
    so that the eigenvector decomposition works as expected.

    Read more in the :ref:`User Guide <spectral_embedding>`.

    Parameters
    ----------
    laplacian : array-like or sparse matrix, shape: (n_samples, n_samples)
        The laplacian matrix of the graph to embed.

    n_components : integer, optional, default 8
        The dimension of the projection subspace.

    eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}, default None
        The eigenvalue decomposition strategy to use. AMG requires pyamg
        to be installed. It can be faster on very large, sparse problems,
        but may also lead to instabilities.

    random_state : int seed, RandomState instance, or None (default)
        A pseudo random number generator used for the initialization of the
        lobpcg eigenvectors decomposition when eigen_solver == 'amg'.
        By default, arpack is used.

    eigen_tol : float, optional, default=0.0
        Stopping criterion for eigendecomposition of the Laplacian matrix
        when using arpack eigen_solver.

    drop_first : bool, optional, default=True
        Whether to drop the first eigenvector. For spectral embedding, this
        should be True as the first eigenvector should be constant vector for
        connected graph, but for spectral clustering, this should be kept as
        False to retain the first eigenvector.

    Returns
    -------
    embedding : array, shape=(n_samples, n_components)
        The reduced samples.

    Notes
    -----
    Spectral embedding is most useful when the graph has one connected
    component. If there graph has many components, the first few eigenvectors
    will simply uncover the connected components of the graph.

    References
    ----------
    * http://en.wikipedia.org/wiki/LOBPCG

    * Toward the Optimal Preconditioned Eigensolver: Locally Optimal
      Block Preconditioned Conjugate Gradient Method
      Andrew V. Knyazev
      http://dx.doi.org/10.1137%2FS1064827500366124
    """

    try:
        from pyamg import smoothed_aggregation_solver
    except ImportError:
        if eigen_solver == "amg":
            raise ValueError("The eigen_solver was set to 'amg', but pyamg is "
                             "not available.")

    if eigen_solver is None:
        eigen_solver = 'arpack'
    elif eigen_solver not in ('arpack', 'lobpcg', 'amg'):
        raise ValueError("Unknown value for eigen_solver: '%s'."
                         "Should be 'amg', 'arpack', or 'lobpcg'" %
                         eigen_solver)

    random_state = check_random_state(random_state)

    n_nodes = laplacian.shape[0]
    # Whether to drop the first eigenvector
    if drop_first:
        n_components = n_components + 1

    dd = laplacian.diagonal()

    if (eigen_solver == 'arpack' or eigen_solver != 'lobpcg' and
        (not sparse.isspmatrix(laplacian) or n_nodes < 5 * n_components)):
        # lobpcg used with eigen_solver='amg' has bugs for low number of nodes
        # for details see the source code in scipy:
        # https://github.com/scipy/scipy/blob/v0.11.0/scipy/sparse/linalg/eigen
        # /lobpcg/lobpcg.py#L237
        # or matlab:
        # http://www.mathworks.com/matlabcentral/fileexchange/48-lobpcg-m
        laplacian = _set_diag(laplacian, 1)

        # Here we'll use shift-invert mode for fast eigenvalues
        # (see http://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html
        #  for a short explanation of what this means)
        # Because the normalized Laplacian has eigenvalues between 0 and 2,
        # I - L has eigenvalues between -1 and 1.  ARPACK is most efficient
        # when finding eigenvalues of largest magnitude (keyword which='LM')
        # and when these eigenvalues are very large compared to the rest.
        # For very large, very sparse graphs, I - L can have many, many
        # eigenvalues very near 1.0.  This leads to slow convergence.  So
        # instead, we'll use ARPACK's shift-invert mode, asking for the
        # eigenvalues near 1.0.  This effectively spreads-out the spectrum
        # near 1.0 and leads to much faster convergence: potentially an
        # orders-of-magnitude speedup over simply using keyword which='LA'
        # in standard mode.
        try:
            # We are computing the opposite of the laplacian inplace so as
            # to spare a memory allocation of a possibly very large array
            laplacian *= -1
            lambdas, diffusion_map = eigsh(laplacian,
                                           k=n_components,
                                           sigma=1.0,
                                           which='LM',
                                           tol=eigen_tol)
            embedding = diffusion_map.T[n_components::-1] * dd
        except RuntimeError:
            # When submatrices are exactly singular, an LU decomposition
            # in arpack fails. We fallback to lobpcg
            eigen_solver = "lobpcg"
            # Revert the laplacian to its opposite to have lobpcg work
            laplacian *= -1

    if eigen_solver == 'amg':
        # Use AMG to get a preconditioner and speed up the eigenvalue
        # problem.
        if not sparse.issparse(laplacian):
            warnings.warn("AMG works better for sparse matrices")
        # lobpcg needs double precision floats
        laplacian = check_array(laplacian,
                                dtype=np.float64,
                                accept_sparse=True)
        laplacian = _set_diag(laplacian, 1)
        ml = smoothed_aggregation_solver(check_array(laplacian, 'csr'))
        M = ml.aspreconditioner()
        X = random_state.rand(laplacian.shape[0], n_components + 1)
        X[:, 0] = dd.ravel()
        lambdas, diffusion_map = lobpcg(laplacian,
                                        X,
                                        M=M,
                                        tol=1.e-12,
                                        largest=False)
        embedding = diffusion_map.T * dd
        if embedding.shape[0] == 1:
            raise ValueError

    elif eigen_solver == "lobpcg":
        # lobpcg needs double precision floats
        laplacian = check_array(laplacian,
                                dtype=np.float64,
                                accept_sparse=True)
        if n_nodes < 5 * n_components + 1:
            # see note above under arpack why lobpcg has problems with small
            # number of nodes
            # lobpcg will fallback to eigh, so we short circuit it
            if sparse.isspmatrix(laplacian):
                laplacian = laplacian.toarray()
            lambdas, diffusion_map = eigh(laplacian)
            embedding = diffusion_map.T[:n_components] * dd
        else:
            laplacian = _set_diag(laplacian, 1)
            # We increase the number of eigenvectors requested, as lobpcg
            # doesn't behave well in low dimension
            X = random_state.rand(laplacian.shape[0], n_components + 1)
            X[:, 0] = dd.ravel()
            lambdas, diffusion_map = lobpcg(laplacian,
                                            X,
                                            tol=1e-15,
                                            largest=False,
                                            maxiter=2000)
            embedding = diffusion_map.T[:n_components] * dd
            if embedding.shape[0] == 1:
                raise ValueError

    embedding = _deterministic_vector_sign_flip(embedding)
    if drop_first:
        return embedding[1:n_components].T
    else:
        return embedding[:n_components].T
Esempio n. 28
0
from sklearn.utils.arpack import eigsh

app = service.prodbox.CinemaService()

X = app.getWeightedSearchFeatures(15)

graph = kneighbors_graph(X, 10)
lap = graph_laplacian(graph, True)

from sklearn.decomposition import TruncatedSVD
svd = TruncatedSVD(n_components=30, algorithm="arpack")
lap = spectral_embedding_._set_diag(lap, 1)
svd.fit(-lap)

eigenvalues = np.diag(svd.components_ * (-lap).todense() * svd.components_.T)

eigenvalues2, _ = eigsh(-lap, k=30, which='LM', sigma=1)
print(eigenvalues)

print(eigenvalues2)

se = SpectralEmbedding(n_components=30,
                       eigen_solver='arpack',
                       affinity="nearest_neighbors")
se.fit(X)

app.quit()

# TODO : check budget distribution, draw budget conditionnaly
out = connected_components(graph)
Esempio n. 29
0
def spectral_embedding(adjacency, n_components=8, eigen_solver=None,
                       random_state=None, eigen_tol=0.0,
                       norm_laplacian=True, drop_first=True,
                       mode=None):
    """Project the sample on the first eigen vectors of the graph Laplacian.
    MMP:TO CHANGE THIS

    The adjacency matrix is used to compute a normalized graph Laplacian
    whose spectrum (especially the eigen vectors associated to the
    smallest eigen values) has an interpretation in terms of minimal
    number of cuts necessary to split the graph into comparably sized
    components.

    This embedding can also 'work' even if the ``adjacency`` variable is
    not strictly the adjacency matrix of a graph but more generally
    an affinity or similarity matrix between samples (for instance the
    heat kernel of a euclidean distance matrix or a k-NN matrix).

    However care must taken to always make the affinity matrix symmetric
    so that the eigen vector decomposition works as expected.

    Parameters
    ----------
    adjacency : array-like or sparse matrix, shape: (n_samples, n_samples)
        The adjacency matrix of the graph to embed.

    n_components : integer, optional
        The dimension of the projection subspace.

    eigen_solver : {None, 'arpack', 'lobpcg', or 'amg'}
        The eigenvalue decomposition strategy to use. AMG requires pyamg
        to be installed. It can be faster on very large, sparse problems,
        but may also lead to instabilities.

    random_state : int seed, RandomState instance, or None (default)
        A pseudo random number generator used for the initialization of the
        lobpcg eigen vectors decomposition when eigen_solver == 'amg'.
        By default, arpack is used.

    eigen_tol : float, optional, default=0.0
        Stopping criterion for eigendecomposition of the Laplacian matrix
        when using arpack eigen_solver.

    drop_first : bool, optional, default=True
        Whether to drop the first eigenvector. For spectral embedding, this
        should be True as the first eigenvector should be constant vector for
        connected graph, but for spectral clustering, this should be kept as
        False to retain the first eigenvector.

    Returns
    -------
    embedding : array, shape=(n_samples, n_components)
        The reduced samples.

    Notes
    -----
    Spectral embedding is most useful when the graph has one connected
    component. If there graph has many components, the first few eigenvectors
    will simply uncover the connected components of the graph.

    References
    ----------
    * http://en.wikipedia.org/wiki/LOBPCG

    * Toward the Optimal Preconditioned Eigensolver: Locally Optimal
      Block Preconditioned Conjugate Gradient Method
      Andrew V. Knyazev
      http://dx.doi.org/10.1137%2FS1064827500366124
    """

    try:
        from pyamg import smoothed_aggregation_solver
    except ImportError:
        if eigen_solver == "amg" or mode == "amg":
            raise ValueError("The eigen_solver was set to 'amg', but pyamg is "
                             "not available.")

    if not mode is None:
        warnings.warn("'mode' was renamed to eigen_solver "
                      "and will be removed in 0.15.",
                      DeprecationWarning)
        eigen_solver = mode

    if eigen_solver is None:
        eigen_solver = 'arpack'
    elif not eigen_solver in ('arpack', 'lobpcg', 'amg'):
        raise ValueError("Unknown value for eigen_solver: '%s'."
                         "Should be 'amg', 'arpack', or 'lobpcg'"
                         % eigen_solver)

    random_state = check_random_state(random_state)

    n_nodes = adjacency.shape[0]
    # Whether to drop the first eigenvector
    if drop_first:
        n_components = n_components + 1
    # Check that the matrices given is symmetric
    if ((not sparse.isspmatrix(adjacency) and
         not np.all((adjacency - adjacency.T) < 1e-10)) or
        (sparse.isspmatrix(adjacency) and
         not np.all((adjacency - adjacency.T).data < 1e-10))):
        warnings.warn("Graph adjacency matrix should be symmetric. "
                      "Converted to be symmetric by average with its "
                      "transpose.")
    adjacency = .5 * (adjacency + adjacency.T)

    if not _graph_is_connected(adjacency):
        warnings.warn("Graph is not fully connected, spectral embedding"
                      " may not work as expected.")

    laplacian, dd = graph_laplacian(adjacency,
                                    normed=norm_laplacian, return_diag=True)
    if (eigen_solver == 'arpack'
        or eigen_solver != 'lobpcg' and
            (not sparse.isspmatrix(laplacian)
             or n_nodes < 5 * n_components)):
        # lobpcg used with eigen_solver='amg' has bugs for low number of nodes
        # for details see the source code in scipy:
        # https://github.com/scipy/scipy/blob/v0.11.0/scipy/sparse/linalg/eigen
        # /lobpcg/lobpcg.py#L237
        # or matlab:
        # http://www.mathworks.com/matlabcentral/fileexchange/48-lobpcg-m
        laplacian = _set_diag(laplacian, 1)

        # Here we'll use shift-invert mode for fast eigenvalues
        # (see http://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html
        #  for a short explanation of what this means)
        # Because the normalized Laplacian has eigenvalues between 0 and 2,
        # I - L has eigenvalues between -1 and 1.  ARPACK is most efficient
        # when finding eigenvalues of largest magnitude (keyword which='LM')
        # and when these eigenvalues are very large compared to the rest.
        # For very large, very sparse graphs, I - L can have many, many
        # eigenvalues very near 1.0.  This leads to slow convergence.  So
        # instead, we'll use ARPACK's shift-invert mode, asking for the
        # eigenvalues near 1.0.  This effectively spreads-out the spectrum
        # near 1.0 and leads to much faster convergence: potentially an
        # orders-of-magnitude speedup over simply using keyword which='LA'
        # in standard mode.
        try:
            lambdas, diffusion_map = eigsh(-laplacian, k=n_components,
                                           sigma=1.0, which='LM',
                                           tol=eigen_tol)
            embedding = diffusion_map.T[n_components::-1] * dd
        except RuntimeError:
            # When submatrices are exactly singular, an LU decomposition
            # in arpack fails. We fallback to lobpcg
            eigen_solver = "lobpcg"

    if eigen_solver == 'amg':
        # Use AMG to get a preconditioner and speed up the eigenvalue
        # problem.
        if not sparse.issparse(laplacian):
            warnings.warn("AMG works better for sparse matrices")
        laplacian = laplacian.astype(np.float)  # lobpcg needs native floats
        laplacian = _set_diag(laplacian, 1)
        ml = smoothed_aggregation_solver(atleast2d_or_csr(laplacian))
        M = ml.aspreconditioner()
        X = random_state.rand(laplacian.shape[0], n_components + 1)
        X[:, 0] = dd.ravel()
        lambdas, diffusion_map = lobpcg(laplacian, X, M=M, tol=1.e-12,
                                        largest=False)
        embedding = diffusion_map.T * dd
        if embedding.shape[0] == 1:
            raise ValueError

    elif eigen_solver == "lobpcg":
        laplacian = laplacian.astype(np.float)  # lobpcg needs native floats
        if n_nodes < 5 * n_components + 1:
            # see note above under arpack why lobpcg has problems with small
            # number of nodes
            # lobpcg will fallback to symeig, so we short circuit it
            if sparse.isspmatrix(laplacian):
                laplacian = laplacian.todense()
            lambdas, diffusion_map = symeig(laplacian)
            embedding = diffusion_map.T[:n_components] * dd
        else:
            # lobpcg needs native floats
            laplacian = laplacian.astype(np.float)
            laplacian = _set_diag(laplacian, 1)
            # We increase the number of eigenvectors requested, as lobpcg
            # doesn't behave well in low dimension
            X = random_state.rand(laplacian.shape[0], n_components + 1)
            X[:, 0] = dd.ravel()
            lambdas, diffusion_map = lobpcg(laplacian, X, tol=1e-15,
                                            largest=False, maxiter=2000)
            embedding = diffusion_map.T[:n_components] * dd
            if embedding.shape[0] == 1:
                raise ValueError
    if drop_first:
        return embedding[1:n_components].T
    else:
        return embedding[:n_components].T
import numpy as np
from sklearn.utils.arpack import eigsh

app = service.prodbox.CinemaService()

X = app.getWeightedSearchFeatures(15)

graph = kneighbors_graph(X, 10)
lap = graph_laplacian(graph, True)

from sklearn.decomposition import TruncatedSVD
svd = TruncatedSVD(n_components = 30, algorithm="arpack")
lap = spectral_embedding_._set_diag(lap, 1)
svd.fit(-lap)

eigenvalues = np.diag(svd.components_ * (-lap).todense() * svd.components_.T)

eigenvalues2, _ = eigsh(-lap, k=30, which='LM', sigma=1)
print(eigenvalues)

print(eigenvalues2)

se = SpectralEmbedding(n_components = 30, eigen_solver='arpack', affinity="nearest_neighbors")
se.fit(X)

app.quit()

# TODO : check budget distribution, draw budget conditionnaly
out = connected_components(graph)

Esempio n. 31
0
def null_space(M, k, k_skip=1, eigen_solver='arpack', tol=1E-6, max_iter=100,
               random_state=None):
    """
    Find the null space of a matrix M.

    Parameters
    ----------
    M : {array, matrix, sparse matrix, LinearOperator}
        Input covariance matrix: should be symmetric positive semi-definite

    k : integer
        Number of eigenvalues/vectors to return

    k_skip : integer, optional
        Number of low eigenvalues to skip.

    eigen_solver : string, {'auto', 'arpack', 'dense'}
        auto : algorithm will attempt to choose the best method for input data
        arpack : use arnoldi iteration in shift-invert mode.
                    For this method, M may be a dense matrix, sparse matrix,
                    or general linear operator.
                    Warning: ARPACK can be unstable for some problems.  It is
                    best to try several random seeds in order to check results.
        dense  : use standard dense matrix operations for the eigenvalue
                    decomposition.  For this method, M must be an array
                    or matrix type.  This method should be avoided for
                    large problems.

    tol : float, optional
        Tolerance for 'arpack' method.
        Not used if eigen_solver=='dense'.

    max_iter : maximum number of iterations for 'arpack' method
        not used if eigen_solver=='dense'

    random_state: numpy.RandomState or int, optional
        The generator or seed used to determine the starting vector for arpack
        iterations.  Defaults to numpy.random.

    """
    if eigen_solver == 'auto':
        if M.shape[0] > 200 and k + k_skip < 10:
            eigen_solver = 'arpack'
        else:
            eigen_solver = 'dense'

    if eigen_solver == 'arpack':
        random_state = check_random_state(random_state)
        # initialize with [-1,1] as in ARPACK
        v0 = random_state.uniform(-1, 1, M.shape[0])
        try:
            eigen_values, eigen_vectors = eigsh(M, k + k_skip, sigma=0.0,
                                                tol=tol, maxiter=max_iter,
                                                v0=v0)
        except RuntimeError as msg:
            raise ValueError("Error in determining null-space with ARPACK. "
                             "Error message: '%s'. "
                             "Note that method='arpack' can fail when the "
                             "weight matrix is singular or otherwise "
                             "ill-behaved.  method='dense' is recommended. "
                             "See online documentation for more information."
                             % msg)

        return eigen_vectors[:, k_skip:], np.sum(eigen_values[k_skip:])
    elif eigen_solver == 'dense':
        if hasattr(M, 'toarray'):
            M = M.toarray()
        eigen_values, eigen_vectors = eigh(
            M, eigvals=(k_skip, k + k_skip - 1), overwrite_a=True)
        index = np.argsort(np.abs(eigen_values))
        return eigen_vectors[:, index], np.sum(eigen_values)
    else:
        raise ValueError("Unrecognized eigen_solver '%s'" % eigen_solver)