def make_semiposdef( matrix: np.ndarray, maxiter: Optional[int] = 10, tol: Optional[float] = 1e-12, verbose: Optional[bool] = False, ) -> np.ndarray: """Make quadratic matrix positive semi-definite by increasing its eigenvalues Parameters ---------- matrix : array_like of shape (N,N) the matrix to process maxiter : int, optional the maximum number of iterations for increasing the eigenvalues, defaults to 10 tol : float, optional tolerance for deciding if pos. semi-def., defaults to 1e-12 verbose : bool, optional If True print smallest eigenvalue of the resulting matrix, if False (default) be quiet Returns ------- (N,N) array_like quadratic positive semi-definite matrix Raises ------ ValueError If matrix is not square. """ n, m = matrix.shape if n != m: raise ValueError("Matrix has to be quadratic") # use specialised functions for sparse matrices if issparse(matrix): # enforce symmetric matrix matrix = 0.5 * (matrix + matrix.T) # calculate smallest eigenvalue e = np.real(eigs(matrix, which="SR", return_eigenvectors=False)).min() count = 0 # increase the eigenvalues until matrix is positive semi-definite while e < tol and count < maxiter: matrix += (np.absolute(e) + tol) * eye(n, format=matrix.format) e = np.real(eigs(matrix, which="SR", return_eigenvectors=False)).min() count += 1 e = np.real(eigs(matrix, which="SR", return_eigenvectors=False)).min() # same procedure for non-sparse matrices else: matrix = 0.5 * (matrix + matrix.T) count = 0 e = np.real(np.linalg.eigvals(matrix)).min() while e < tol and count < maxiter: e = np.real(np.linalg.eigvals(matrix)).min() matrix += (np.absolute(e) + tol) * np.eye(n) e = np.real(np.linalg.eigvals(matrix)).min() if verbose: print("Final result of make_semiposdef: smallest eigenvalue is %e" % e) return matrix
def calculate_one(self, evsearch, searchnum): """ Calculate single mode using fixed point iterations calculate_one(evsearch, searchnum) """ m0 = self.m0; k0 = self.k0 wl = self.wl bcstart, bcend = self.equation.diff.bc_extents() coord = self.wg.get_coord(self.base_shape, border=1) #Create new mode mode = self.mode_class(coord=coord, symmetry=self.wg.symmetry, m0=m0, wl=wl, evalue=evsearch, wg=self.wg) mode.right = ones(mode.shape, dtype=self.dtype) mode.discard_vectors = self.discard_vectors residue = inf; numit=0; evtol=1e-10 while residue>self.abc_convergence and numit<self.abc_iterations: numit+=1 #Update equation with new BCs and update LU decomp self.update_lambda(mode.evalue) if self.equation.coord.rmin==0: self.si.update(self.matrix, uprows=bcend) else: self.si.set_shift(self.matrix, complex(mode.evalue)) #Solve linear eigenproblem if use_arpack: evals, revecs = eigs(self.si, k=searchnum, \ which='LM', return_eigenvectors=True) evals = self.si.eigenvalue_transform(evals) else: evals, revecs = eigs(self.si, searchnum, tol=evtol) revecs = revecs.T #Locate closest eigenvalue itrack = absolute(evals-mode.evalue).argmin() mode.evalue = evals[itrack] mode.right = asarray(revecs)[:,itrack] #Residue and convergence evconverge = abs(mode.evalue - evals[itrack]) residue = mode.residue = self.residue(mode) mode.convergence += [ residue ] mode.track += [ mode.evalue ] logging.debug( "[%d] neff: %s conv: %.3g, res:%.3g" % (numit, mode.neff, evconverge, mode.residue) ) mode.iterations = numit #Discard vectors at this stage, to free up memory if we can mode.compress() return mode, evals
def make_semiposdef(matrix, maxiter=10, tol=1e-12, verbose=False): """ Make quadratic matrix positive semi-definite by increasing its eigenvalues Parameters ---------- matrix : (N,N) array_like maxiter: int the maximum number of iterations for increasing the eigenvalues tol: float tolerance for deciding if pos. semi-def. verbose: bool If True print some more detail about input parameters. Returns ------- (N,N) array_like quadratic positive semi-definite matrix """ n, m = matrix.shape if n != m: raise ValueError("Matrix has to be quadratic") # use specialised functions for sparse matrices if issparse(matrix): # enforce symmetric matrix matrix = 0.5 * (matrix + matrix.T) # calculate smallest eigenvalue e = np.real(eigs(matrix, which="SR", return_eigenvectors=False)).min() count = 0 # increase the eigenvalues until matrix is positive semi-definite while e < tol and count < maxiter: matrix += (np.absolute(e) + tol) * eye(n, format=matrix.format) e = np.real(eigs(matrix, which="SR", return_eigenvectors=False)).min() count += 1 e = np.real(eigs(matrix, which="SR", return_eigenvectors=False)).min() # same procedure for non-sparse matrices else: matrix = 0.5 * (matrix + matrix.T) count = 0 e = np.real(np.linalg.eigvals(matrix)).min() while e < tol and count < maxiter: e = np.real(np.linalg.eigvals(matrix)).min() matrix += (np.absolute(e) + tol) * np.eye(n) e = np.real(np.linalg.eigvals(matrix)).min() if verbose: print("Final result of make_semiposdef: smallest eigenvalue is %e" % e) return matrix
def main(): #K = 2 #N = 9 #fileMat,labList = fileTomatrix('german.txt') K = 10 N = 9 fileMat,labList = fileTomatrix('mnist.txt') disMat = np.zeros((np.shape(fileMat)[0],np.shape(fileMat)[0])) graphMat = disMat.copy() disMat = distance.cdist(fileMat,fileMat,'euclidean') print 1 for i in range(np.shape(disMat)[0]): temp = np.argpartition(disMat[i],(1,N+1)) result_args = temp[1:N+1] graphMat[i][result_args] = 1 print 2 D = graphMat.sum(1) D = sparse.spdiags(D, 0, np.shape(graphMat)[0], np.shape(graphMat)[0]).toarray() L = D - graphMat vals,vecs = arpack.eigs(L,k=K,tol=0,which='LM') print vecs clustermatch,randcluster = k_means(vecs,labList,K) a,b = calcuPuri(clustermatch,labList,K) print a,b
def worker(): x = diags([1, -2, 1], [-1, 0, 1], shape=(50, 50)) w, v = eigs(x, k=3, v0=v0) results.append(w) w, v = eigsh(x, k=3, v0=v0) results.append(w)
def computeEigenValsVectors(mat, dimensionReductionNum): eigenVals, eigenVecs = arpack.eigs(mat, k = dimensionReductionNum, tol=0, which = "LM") if pv.outputDebugMsg: Utils.logMessage("\nCompute eigen values vectors finished") return eigenVals, eigenVecs
def eval_evec(self,d,typ,k,which,v0=None): a=d['mat'].astype(typ) if v0 == None: v0 = d['v0'] # get exact eigenvalues exact_eval=d['eval'].astype(typ.upper()) ind=self.sort_choose(exact_eval,typ,k,which) exact_eval=exact_eval[ind] if verbose >= 3: print "exact" print exact_eval # compute eigenvalues eval,evec=eigs(a,k,which=which,v0=v0) ind=self.sort_choose(eval,typ,k,which) eval=eval[ind] evec=evec[:,ind] if verbose >= 3: print eval # check eigenvalues # check eigenvectors A*evec=eval*evec for i in range(k): assert_almost_equal_cc(eval[i],exact_eval[i],decimal=_ndigits[typ]) assert_array_almost_equal_cc(dot(a,evec[:,i]), eval[i]*evec[:,i], decimal=_ndigits[typ])
def eval_evec(self, d, typ, k, which, v0=None): a = d['mat'].astype(typ) if v0 == None: v0 = d['v0'] # get exact eigenvalues exact_eval = d['eval'].astype(typ.upper()) ind = self.sort_choose(exact_eval, typ, k, which) exact_eval = exact_eval[ind] if verbose >= 3: print "exact" print exact_eval # compute eigenvalues eval, evec = eigs(a, k, which=which, v0=v0) ind = self.sort_choose(eval, typ, k, which) eval = eval[ind] evec = evec[:, ind] if verbose >= 3: print eval # check eigenvalues # check eigenvectors A*evec=eval*evec for i in range(k): assert_almost_equal_cc(eval[i], exact_eval[i], decimal=_ndigits[typ]) assert_array_almost_equal_cc(dot(a, evec[:, i]), eval[i] * evec[:, i], decimal=_ndigits[typ])
def calculate(self, number=inf): m0=self.m0; k0=self.k0 number = min(number, self.totalnumber) logging.info( "Center solver. Finding modes, m0=%d, wl=%.3g" % (m0, self.wl) ) #Create new matrix if there is no existing one if self.si is None: self.si = ShiftInvertBlock(overwrite=self.overwrite) self.create() #Time mode solve tick = timer.timer() tick.start() #Array to store solved modes evrange = array(self.bracket)**2*self.k0**2 #Update equation with new BCs and update LU decomp self.update_lambda(self.evapprox, not self.overwrite) self.si.set_shift(self.matrix, self.evapprox+0j) #Find modes of linearized system if use_arpack: cev = eigen(self.si, k=self.totalnumber, which='LM', return_eigenvectors=False) cev = self.si.eigenvalue_transform(cev) else: cev, cvecs = eigs(self.si, self.totalnumber, tol=1e-6) #Filter cev within range if self.ignore_outside_interval: cev = filter(lambda x: min(evrange)<=real(x)<=max(evrange), cev) #Refine modes for ii in range(len(cev)): evsearch = cev[ii] #If the matrix is overwritten we must recreate it if self.overwrite: self.generate() self.si.set_shift(self.matrix, evsearch+0j) #Calculate next mode mode, evals = self.calculate_one(evsearch, self.searchnumber) #Add mode to list if (mode.residue<self.abc_convergence) or self.add_if_unconverged: self.modes += [ mode ] avtime = tick.lap()/(ii+1) logging.info( "Mode #%d [%d/%.3gs], neff=%s, res: %.2e" % \ (self.numbersolved, mode.iterations, avtime, mode.neff, mode.residue) ) self.add_time(tick.lap()) #Clean up if calculation is finished! if self.isfinished(): self.finalize() return self.modes
def test_no_convergence(self): np.random.seed(1234) m = np.random.rand(30, 30) + 1j*np.random.rand(30, 30) try: w, v = eigs(m, 3, which='LM', v0=m[:,0], maxiter=30) raise AssertionError("Spurious no-error exit") except ArpackNoConvergence, err: k = len(err.eigenvalues) if k <= 0: raise AssertionError("Spurious no-eigenvalues-found case") w, v = err.eigenvalues, err.eigenvectors for ww, vv in zip(w, v.T): assert_array_almost_equal(dot(m, vv), ww*vv, decimal=_ndigits['D'])
def test_standard_nonsymmetric_no_convergence(): np.random.seed(1234) m = generate_matrix(30, complex=True) tol, rtol, atol = _get_test_tolerance('d') try: w, v = eigs(m, 4, which='LM', v0=m[:, 0], maxiter=5, tol=tol) raise AssertionError("Spurious no-error exit") except ArpackNoConvergence as err: k = len(err.eigenvalues) if k <= 0: raise AssertionError("Spurious no-eigenvalues-found case") w, v = err.eigenvalues, err.eigenvectors for ww, vv in zip(w, v.T): assert_allclose(dot(m, vv), ww * vv, rtol=rtol, atol=atol)
def correlation_length(B): " Constructs the mixed transfermatrix and returns correlation length" chi = B[0].shape[1] L = len(B) T = np.tensordot(B[0], np.conj(B[0]), axes=(0, 0)) #a,b,a',b' T = T.transpose(0, 2, 1, 3) #a,a',b,b' for i in range(1, L): T = np.tensordot(T, B[i], axes=(2, 1)) #a,a',b',i,b T = np.tensordot(T, np.conj(B[i]), axes=([2, 3], [1, 0])) #a,a',b,b' T = np.reshape(T, (chi**2, chi**2)) # Obtain the 2nd largest eigenvalue eta = arp.eigs(T, k=2, which='LM', return_eigenvectors=False, ncv=20) return -L / np.log(np.min(np.abs(eta)))
def test_eigs_for_k_greater(): # Test eigs() for k beyond limits. A_sparse = diags([1, -2, 1], [-1, 0, 1], shape=(4, 4)) # sparse A = generate_matrix(4, sparse=False) M_dense = np.random.random((4, 4)) M_sparse = generate_matrix(4, sparse=True) M_linop = aslinearoperator(M_dense) eig_tuple1 = eig(A, b=M_dense) eig_tuple2 = eig(A, b=M_sparse) with suppress_warnings() as sup: sup.filter(RuntimeWarning) assert_equal(eigs(A, M=M_dense, k=3), eig_tuple1) assert_equal(eigs(A, M=M_dense, k=4), eig_tuple1) assert_equal(eigs(A, M=M_dense, k=5), eig_tuple1) assert_equal(eigs(A, M=M_sparse, k=5), eig_tuple2) # M as LinearOperator assert_raises(TypeError, eigs, A, M=M_linop, k=3) # Test 'A' for different types assert_raises(TypeError, eigs, aslinearoperator(A), k=3) assert_raises(TypeError, eigs, A_sparse, k=3)
def test_regression_arpackng_1315(): # Check that issue arpack-ng/#1315 is not present. # Adapted from arpack-ng/TESTS/bug_1315_single.c # If this fails, then the installed ARPACK library is faulty. for dtype in [np.float32, np.float64]: np.random.seed(1234) w0 = np.arange(1, 1000 + 1).astype(dtype) A = diags([w0], [0], shape=(1000, 1000)) v0 = np.random.rand(1000).astype(dtype) w, v = eigs(A, k=9, ncv=2 * 9 + 1, which="LM", v0=v0) assert_allclose(np.sort(w), np.sort(w0[-9:]), rtol=1e-4)
def test_regression_arpackng_1315(): # Check that issue arpack-ng/#1315 is not present. # Adapted from arpack-ng/TESTS/bug_1315_single.c # If this fails, then the installed ARPACK library is faulty. for dtype in [np.float32, np.float64]: np.random.seed(1234) w0 = np.arange(1, 1000+1).astype(dtype) A = diags([w0], [0], shape=(1000, 1000)) v0 = np.random.rand(1000).astype(dtype) w, v = eigs(A, k=9, ncv=2*9+1, which="LM", v0=v0) assert_allclose(np.sort(w), np.sort(w0[-9:]), rtol=1e-4)
def correlation_length(self): """Diagonalize transfer matrix to obtain the correlation length.""" import scipy.sparse.linalg.eigen.arpack as arp assert self.bc == 'infinite' # works only in the infinite case B = self.Bs[0] # vL i vR chi = B.shape[0] T = np.tensordot(B, np.conj(B), axes=[1, 1]) # vL [i] vR, vL* [i*] vR* T = np.transpose(T, [0, 2, 1, 3]) # vL vL* vR vR* for i in range(1, self.L): B = self.Bs[i] T = np.tensordot(T, B, axes=[2, 0]) # vL vL* [vR] vR*, [vL] i vR T = np.tensordot(T, np.conj(B), axes=[[2, 3], [0, 1]]) # vL vL* [vR*] [i] vR, [vL*] [i*] vR* T = np.reshape(T, (chi**2, chi**2)) # Obtain the 2nd largest eigenvalue eta = arp.eigs(T, k=2, which='LM', return_eigenvectors=False, ncv=20) return -self.L / np.log(np.min(np.abs(eta)))
def test_ticket_1459_arpack_crash(): for dtype in [np.float32, np.float64]: # XXX: this test does not seem to catch the issue for float32, # but we made the same fix there, just to be sure N = 6 k = 2 np.random.seed(2301) A = np.random.random((N, N)).astype(dtype) v0 = np.array([-0.71063568258907849895, -0.83185111795729227424, -0.34365925382227402451, 0.46122533684552280420, -0.58001341115969040629, -0.78844877570084292984e-01], dtype=dtype) # Should not crash: evals, evecs = eigs(A, k, v0=v0)
def test_ticket_1459_arpack_crash(): for dtype in [np.float32, np.float64]: # This test does not seem to catch the issue for float32, # but we made the same fix there, just to be sure N = 6 k = 2 np.random.seed(2301) A = np.random.random((N, N)).astype(dtype) v0 = np.array([-0.71063568258907849895, -0.83185111795729227424, -0.34365925382227402451, 0.46122533684552280420, -0.58001341115969040629, -0.78844877570084292984e-01], dtype=dtype) # Should not crash: evals, evecs = eigs(A, k, v0=v0)
def extern_pca(data,k): """ Performs the eigen decomposition of the covariance matrix based on the eigen decomposition of the exterior product matrix. arguments: * data: 2D numpy array where each row is a sample and each column a feature. * k: number of principal components to keep. return: * w: the eigen values of the covariance matrix sorted in from highest to lowest. * u: the corresponding eigen vectors. u[:,i] is the vector corresponding to w[i] Notes: This function computes PCA, based on the exterior product matrix (C = X*X.T/(n-1)) instead of the covariance matrix (C = X.T*X) and uses relations based of the singular value decomposition to compute the corresponding the final eigen vectors. While this can be much faster when the number of samples is much smaller than the number of features, it can lead to loss of precisions. The (centered) data matrix X can be decomposed as: X.T = U * S * v.T On computes the eigen decomposition of : X * X.T = v*S^2*v.T and the eigen vectors of the covariance matrix are computed as : U = X.T * v * S^(-1) """ # raise NotImplementedError('Need to curate this method') # Should I take last line and remove the [::-1] stuff? data_m = data - data.mean(0) K = np.dot(data_m,data_m.T) w,v = eigs(K,k = k,which = 'LM') U = np.dot(data.T,v/np.sqrt(w)) # Normalizes eigenvalues by length of data (?) # return w[::-1]/(len(data)-1),U[:,::-1] return w/(len(data)-1),U
def extern_pca(data, k): """ Performs the eigen decomposition of the covariance matrix based on the eigen decomposition of the exterior product matrix. arguments: * data: 2D numpy array where each row is a sample and each column a feature. * k: number of principal components to keep. return: * w: the eigen values of the covariance matrix sorted in from highest to lowest. * u: the corresponding eigen vectors. u[:,i] is the vector corresponding to w[i] Notes: This function computes PCA, based on the exterior product matrix (C = X*X.T/(n-1)) instead of the covariance matrix (C = X.T*X) and uses relations based of the singular value decomposition to compute the corresponding the final eigen vectors. While this can be much faster when the number of samples is much smaller than the number of features, it can lead to loss of precisions. The (centered) data matrix X can be decomposed as: X.T = U * S * v.T On computes the eigen decomposition of : X * X.T = v*S^2*v.T and the eigen vectors of the covariance matrix are computed as : U = X.T * v * S^(-1) """ # raise NotImplementedError('Need to curate this method') # Should I take last line and remove the [::-1] stuff? data_m = data - data.mean(0) K = np.dot(data_m, data_m.T) w, v = eigs(K, k=k, which='LM') U = np.dot(data.T, v / np.sqrt(w)) # Normalizes eigenvalues by length of data (?) # return w[::-1]/(len(data)-1),U[:,::-1] return w / (len(data) - 1), U
def test_real_eigs_real_k_subset(): np.random.seed(1) n = 10 A = rand(n, n, density=0.5) A.data *= 2 A.data -= 1 v0 = np.ones(n) whichs = ['LM', 'SM', 'LR', 'SR', 'LI', 'SI'] dtypes = [np.float32, np.float64] for which, sigma, dtype in itertools.product(whichs, [None, 0, 5], dtypes): prev_w = np.array([], dtype=dtype) eps = np.finfo(dtype).eps for k in range(1, 9): w, z = eigs(A.astype(dtype), k=k, which=which, sigma=sigma, v0=v0.astype(dtype), tol=0) assert_allclose(np.linalg.norm(A.dot(z) - z * w), 0, atol=np.sqrt(eps)) # Check that the set of eigenvalues for `k` is a subset of that for `k+1` dist = abs(prev_w[:, None] - w).min(axis=1) assert_allclose(dist, 0, atol=np.sqrt(eps)) prev_w = w # Check sort order if sigma is None: d = w else: d = 1 / (w - sigma) if which == 'LM': # ARPACK is systematic for 'LM', but sort order # appears not well defined for other modes assert np.all(np.diff(abs(d)) <= 1e-6)
def calculate_large_eigens(matrix, k=1): """Calculate the largest-magnitude eigenvalues and corresponding eigenvectors of a matrix. Args: matrix: a numpy 2D array or scipy sparse matrix. k: an integer, how many eigenvalues and eigenvectors to calculate. Default is 1 Returns: values, vectors. Eigenvalues are in descending order of magnitude. """ sparse = issparse(matrix) key = lambda v: -np.abs(v) # opposite of magnitude if sparse: eigensystem = eigs(matrix, k=k, which='LM') eigensystem = reorder_eigensystem(eigensystem, key=key) return eigensystem else: eigensystem = eig(matrix) values, vectors = reorder_eigensystem(eigensystem, key=key) return values[0:k], vectors[:, 0:k]
def kpca(data,k): """ Performs the eigen decomposition of the kernel matrix. arguments: * data: 2D numpy array representing the symmetric kernel matrix. * k: number of principal components to keep. return: * w: the eigen values of the covariance matrix sorted in from highest to lowest. * u: the corresponding eigen vectors. u[:,i] is the vector corresponding to w[i] Notes: If you want to perform the full decomposition, consider using 'full_kpca' instead. """ w,u = eigs(data,k = k,which = 'LM') # return w[::-1],u[:,::-1] return w,u
def kpca(data, k): """ Performs the eigen decomposition of the kernel matrix. arguments: * data: 2D numpy array representing the symmetric kernel matrix. * k: number of principal components to keep. return: * w: the eigen values of the covariance matrix sorted in from highest to lowest. * u: the corresponding eigen vectors. u[:,i] is the vector corresponding to w[i] Notes: If you want to perform the full decomposition, consider using 'full_kpca' instead. """ w, u = eigs(data, k=k, which='LM') # return w[::-1],u[:,::-1] return w, u
def chebyshev_polynomials(adj, k): """Calculate Chebyshev polynomials up to order k. Return a list of sparse matrices (tuple representation).""" # print("Calculating Chebyshev polynomials up to order {}...".format(k)) adj_normalized = normalize_adj(adj) laplacian = sp.eye(adj.shape[0]) - adj_normalized largest_eigval, _ = eigs(laplacian, 1, which='LR', maxiter=5000) scaled_laplacian = (2. / largest_eigval[0]) * laplacian - sp.eye( adj.shape[0]) t_k = list() t_k.append(sp.eye(adj.shape[0])) t_k.append(scaled_laplacian) def chebyshev_recurrence(t_k_minus_one, t_k_minus_two, scaled_lap): s_lap = sp.csr_matrix(scaled_lap, copy=True) return 2 * s_lap.dot(t_k_minus_one) - t_k_minus_two for i in range(2, k + 1): t_k.append(chebyshev_recurrence(t_k[-1], t_k[-2], scaled_laplacian)) return sparse_to_tuple(t_k)
def pca(data,k): """ Performs the eigen decomposition of the covariance matrix. arguments: * data: 2D numpy array where each row is a sample and each column a feature. * k: number of principal components to keep. return: * w: the eigen values of the covariance matrix sorted in from highest to lowest. * u: the corresponding eigen vectors. u[:,i] is the vector corresponding to w[i] Notes: If the number of samples is much smaller than the number of features, you should consider the use of 'svd_pca'. """ cov = np.cov(data.T) # kw "which" means return largest magnitude k eigenvalues w,u = eigs(cov,k = k,which = 'LM') # return w[::-1],u[:,::-1] return w,u #(No need to reverse w,u because eigs does it using 'LM')
def pca(data, k): """ Performs the eigen decomposition of the covariance matrix. arguments: * data: 2D numpy array where each row is a sample and each column a feature. * k: number of principal components to keep. return: * w: the eigen values of the covariance matrix sorted in from highest to lowest. * u: the corresponding eigen vectors. u[:,i] is the vector corresponding to w[i] Notes: If the number of samples is much smaller than the number of features, you should consider the use of 'svd_pca'. """ cov = np.cov(data.T) # kw "which" means return largest magnitude k eigenvalues w, u = eigs(cov, k=k, which='LM') # return w[::-1],u[:,::-1] return w, u #(No need to reverse w,u because eigs does it using 'LM')
def spectalCluster(data, sigma, num_clusters): print("将邻接矩阵转换成相似矩阵") # 先完成sigma != 0 print("Fixed-sigma谱聚类") data = sparse.csc_matrix.multiply(data, data) data = -data / (2 * sigma * sigma) S = sparse.csc_matrix.expm1(data) + sparse.csc_matrix.multiply(sparse.csc_matrix.sign(data), sparse.csc_matrix.sign(data)) # 转换成Laplacian矩阵 print("将相似矩阵转换成Laplacian矩阵") D = S.sum(1) # 相似矩阵是对称矩阵 D = sqrt(1 / D) n = len(D) D = D.T D = sparse.spdiags(D, 0, n, n) L = D * S * D # 求特征值和特征向量 print("求特征值和特征向量") vals, vecs = arpack.eigs(L, k=num_clusters, tol=0, which="LM") # 利用k-Means print("利用K-Means对特征向量聚类") # 对vecs做正规化 sq_sum = sqrt(multiply(vecs, vecs).sum(1)) m_1, m_2 = shape(vecs) for i in range(m_1): for j in range(m_2): vecs[i, j] = vecs[i, j] / sq_sum[i] myCentroids, clustAssing = kMeans(vecs, num_clusters) for i in range(shape(clustAssing)[0]): print(clustAssing[i, 0])
def spectral_clustering(similarity, sigma, clusters): print("get matrix S") sparse_similarity = sparse.csr_matrix(similarity) data = sparse.csc_matrix.multiply(sparse_similarity, sparse_similarity) data = -data / (2 * sigma * sigma) S = sparse.csc_matrix.expm1(data) + sparse.csc_matrix.multiply(sparse.csc_matrix.sign(data),sparse.csc_matrix.sign(data)) print("get matrix L") D = S.sum(1) D = sqrt(1 / D) n = len(D) D = D.T D = sparse.spdiags(D, 0, n, n) L = D * S * D print("get the eigenvalue and eigenvector") values, vectors = arpack.eigs(L, k=clusters,tol=0,which="LM") print(" vectors normalization") sqrt_sum = sqrt(multiply(vectors,vectors).sum(1)) row, column = shape(vectors) #print(row, column) for i in range(row): for j in range(column): vectors[i][j] = vectors[i][j] / sqrt_sum[i] #print(vectors[i][j]) #print(" ") print("perform k-means") centers, clusters_category = kMeans(vectors, clusters) ''' for i in range(shape(clusters_category)[0]): print(clusters_category[i,0]) ''' return clusters_category
def A_matvec(x): x = diags([1, -2, 1], [-1, 0, 1], shape=(50, 50)) w, v = eigs(x, k=1) return v / w[0]
def compute(inv_dict, save=True): ''' @param inv_dict: is a dict optinally containing any of these: - inv_dict['edge']: boolean for global edge count - inv_dict['ver']: boolean for global vertex number - inv_dict['tri']: boolean for local triangle count - inv_dict['tri_fn']: the path of a precomputed triangle count (.npy) - inv_dict['eig']: boolean for eigenvalues and eigenvectors - inv_dict['eigvl_fn']: the path of a precomputed eigenvalues (.npy) - inv_dict['eigvect_fn']: the path of a precomputed eigenvectors (.npy) - inv_dict['deg']: boolean for local degree count - inv_dict['deg_fn']: the path of a precomputed triangle count (.npy) - inv_dict['ss1']: boolean for scan 1 statistic - inv_dict['cc']: boolean for clustering coefficient - inv_dict['mad']: boolean for maximum average degree - inv_dict['save_dir']: the base path where all invariants will create sub-dirs & be should be saved @param save: boolean for auto save or not. TODO: use this ''' # Popualate inv_dict inv_dict = populate_inv_dict(inv_dict) if inv_dict['save_dir'] is None: inv_dict['save_dir'] = os.path.dirname(inv_dict['graph_fn']) if (inv_dict.has_key('G')): if inv_dict['G'] is not None: G = inv_dict['G'] elif (inv_dict['graphsize'] == 'b' or inv_dict['graphsize'] == 'big'): G = loadAdjMat(inv_dict['graph_fn'], inv_dict['lcc_fn']) # TODO: test # small graphs else: G = loadAnyMat(inv_dict['graph_fn'], inv_dict['data_elem']) if isinstance(G, str): print G return G # Error message num_nodes = G.shape[0] # number of nodes # CC requires deg_fn and tri_fn. Load if available if inv_dict['cc']: # if either #tri or deg is undefined if not inv_dict['tri_fn']: inv_dict['tri'] = True if not inv_dict['deg_fn']: inv_dict['deg'] = True cc_array = np.zeros(num_nodes) # All invariants that require eigenvalues if ((inv_dict['tri'] and not inv_dict['tri_fn']) or (inv_dict['mad'])): if not inv_dict['eigvl_fn']: inv_dict['eig'] = True # Only create arrays if the computation will be done if inv_dict['tri']: if inv_dict['tri_fn']: tri_array = np.load(inv_dict['tri_fn']) # load if precomputed else: tri_array = np.zeros(num_nodes) # local triangle count if inv_dict['deg'] or inv_dict['edge']: # edge is global number of edges inv_dict['deg'] = True if inv_dict['deg_fn']: deg_array = np.load(inv_dict['deg_fn']) else: deg_array = np.zeros(num_nodes) # Vertex degrees of all vertices if (inv_dict['ss1']): ss1_array = np.zeros( num_nodes) # Induced subgraph edge number i.e scan statistic if (not inv_dict['k'] or inv_dict['k'] > 100 or inv_dict['k'] > G.shape[0] - 2): k = 100 if G.shape[0] - 2 > 101 else G.shape[ 0] - 2 # Maximum of 100 eigenvalues start = time() # Calculate Eigenvalues & Eigen vectors if inv_dict['eig']: if not (inv_dict['eigvl_fn'] or inv_dict['eigvect_fn']): l, u = arpack.eigs(G, k=k, which='LM') # LanczosMethod(A,0) print 'Time taken to calc Eigenvalues: %f secs\n' % (time() - start) else: try: l = np.load(inv_dict['eigvl_fn']) u = l = np.load(inv_dict['eigvect_fn']) except Exception: return "[IOERROR: ]Eigenvalues failed to load" # All other invariants start = time() #### For loop #### if (inv_dict['cc'] or inv_dict['ss1'] or (inv_dict['tri'] and not inv_dict['tri_fn'])\ or (inv_dict['deg'] and not inv_dict['deg_fn']) ): # one of the others for j in range(num_nodes): # tri if not inv_dict['tri_fn'] and inv_dict[ 'tri']: # if this is still None we need to compute it tri_array[j] = abs( round((sum(np.power(l.real, 3) * (u[j][:].real**2))) / 6.0)) # Divide by six because we count locally # ss1 & deg if inv_dict['ss1'] or (not inv_dict['deg_fn'] and inv_dict['deg']): nbors = G[:, j].nonzero()[0] # deg if (not inv_dict['deg_fn'] and inv_dict['deg']): deg_array[j] = nbors.shape[0] # ss1 if inv_dict['ss1']: if (nbors.shape[0] > 0): nbors_mat = G[:, nbors][nbors, :] ss1_array[j] = nbors.shape[0] + ( nbors_mat.nnz / 2.0 ) # scan stat 1 # Divide by two because of symmetric matrix else: ss1_array[ j] = 0 # zero neighbors hence zero cardinality enduced subgraph # cc if inv_dict['cc']: if (deg_array[j] > 2): cc_array[j] = (2.0 * tri_array[j]) / ( deg_array[j] * (deg_array[j] - 1)) # Jari et al else: cc_array[j] = 0 print 'Time taken to compute loop dependent invariants: %f secs\n' % ( time() - start) ### End For ### # global edge if inv_dict['edge']: edge_count = deg_array.sum() # global vertices is num_nodes ''' MAD ''' if (inv_dict['mad']): max_ave_deg = np.max(l.real) # Computation complete - handle the saving now ... ''' Top eigenvalues & eigenvectors ''' if not inv_dict['eigvl_fn'] and inv_dict['eig']: eigvDir = os.path.join(inv_dict['save_dir'], "Eigen") #if eigvDir is None else eigvDir # Immediately write eigs to file inv_dict['eigvl_fn'] = os.path.join( eigvDir, getBaseName(inv_dict['graph_fn']) + '_eigvl.npy') inv_dict['eigvect_fn'] = os.path.join( eigvDir, getBaseName(inv_dict['graph_fn']) + '_eigvect.npy') createSave(inv_dict['eigvl_fn'], l.real) # eigenvalues createSave(inv_dict['eigvect_fn'], u) # eigenvectors print 'Eigenvalues and eigenvectors saved ...' ''' Triangle count ''' if not inv_dict['tri_fn'] and inv_dict['tri']: triDir = os.path.join(inv_dict['save_dir'], "Triangle") #if triDir is None else triDir inv_dict['tri_fn'] = os.path.join(triDir, getBaseName(inv_dict['graph_fn']) + '_triangles.npy') # TODO HERE createSave(inv_dict['tri_fn'], tri_array) print 'Triangle Count saved ...' ''' Degree count''' if not inv_dict['deg_fn'] and inv_dict['deg']: degDir = os.path.join(inv_dict['save_dir'], "Degree") #if degDir is None else degDir inv_dict['deg_fn'] = os.path.join( degDir, getBaseName(inv_dict['graph_fn']) + '_degree.npy') createSave(inv_dict['deg_fn'], deg_array) print 'Degree saved ...' ''' MAD ''' if inv_dict['mad']: MADdir = os.path.join(inv_dict['save_dir'], "MAD") #if MADdir is None else MADdir inv_dict['mad_fn'] = os.path.join( MADdir, getBaseName(inv_dict['graph_fn']) + '_mad.npy') createSave(inv_dict['mad_fn'], max_ave_deg) print 'Maximum average Degree saved ...' ''' Scan Statistic 1''' if inv_dict['ss1']: ss1Dir = os.path.join(inv_dict['save_dir'], "SS1") #if ss1Dir is None else ss1Dir inv_dict['ss1_fn'] = os.path.join( ss1Dir, getBaseName(inv_dict['graph_fn']) + '_scanstat1.npy') createSave(inv_dict['ss1_fn'], ss1_array) # save it print 'Scan 1 statistic saved ...' ''' Clustering coefficient ''' if inv_dict['cc']: ccDir = os.path.join(inv_dict['save_dir'], "ClustCoeff") #if ccDir is None else ccDir inv_dict['cc_fn'] = os.path.join( ccDir, getBaseName(inv_dict['graph_fn']) + '_clustcoeff.npy') createSave(inv_dict['cc_fn'], cc_array) # save it print 'Clustering coefficient saved ...' ''' Global Vertices ''' if inv_dict['ver']: vertDir = os.path.join(inv_dict['save_dir'], "Globals") #if vertDir is None else vertDir inv_dict['ver_fn'] = os.path.join( vertDir, getBaseName(inv_dict['graph_fn']) + '_numvert.npy') createSave(inv_dict['ver_fn'], num_nodes) # save it print 'Global vertices number saved ...' ''' Global number of edges ''' if inv_dict['edge']: edgeDir = os.path.join(inv_dict['save_dir'], "Globals") #if edgeDir is None else edgeDir inv_dict['edge_fn'] = os.path.join( edgeDir, getBaseName(inv_dict['graph_fn']) + '_numedges.npy') createSave(inv_dict['edge_fn'], edge_count) # save it print 'Global edge number saved ...' #if test: # bench test # tri_fn = os.path.join('bench', str(G.shape[0]), getBaseName(inv_dict['graph_fn']) + '_triangles.npy') # eigvl_fn = os.path.join('bench', str(G.shape[0]), getBaseName(inv_dict['graph_fn']) + '_eigvl.npy') # eigvect_fn = os.path.join('bench', str(G.shape[0]), getBaseName(inv_dict['graph_fn']) + '_eigvect.npy') # MAD_fn = os.path.join('bench', str(G.shape[0]), getBaseName(inv_dict['graph_fn']) + '_MAD.npy') return inv_dict # TODO: Fix code this breaks. Originally was [tri_fn, deg_fn, MAD_fn, eigvl_fn, eigvect_fn]
def test_eigs_operator(): # Check inferring LinearOperator dtype fft_op = LinearOperator((6, 6), np.fft.fft) w, v = eigs(fft_op, k=3) assert_equal(w.dtype, np.complex_)
def calculate(self, number=inf): m0 = self.m0 k0 = self.k0 number = min(number, self.totalnumber) logging.info("Center solver. Finding modes, m0=%d, wl=%.3g" % (m0, self.wl)) #Create new matrix if there is no existing one if self.si is None: self.si = ShiftInvertBlock(overwrite=self.overwrite) self.create() #Time mode solve tick = timer.timer() tick.start() #Array to store solved modes evrange = array(self.bracket)**2 * self.k0**2 #Update equation with new BCs and update LU decomp self.update_lambda(self.evapprox, not self.overwrite) self.si.set_shift(self.matrix, self.evapprox + 0j) #Find modes of linearized system if use_arpack: cev = eigen(self.si, k=self.totalnumber, which='LM', return_eigenvectors=False) cev = self.si.eigenvalue_transform(cev) else: cev, cvecs = eigs(self.si, self.totalnumber, tol=1e-6) #Filter cev within range if self.ignore_outside_interval: cev = [x for x in cev if min(evrange) <= real(x) <= max(evrange)] #Refine modes for ii in range(len(cev)): evsearch = cev[ii] #If the matrix is overwritten we must recreate it if self.overwrite: self.generate() self.si.set_shift(self.matrix, evsearch + 0j) #Calculate next mode mode, evals = self.calculate_one(evsearch, self.searchnumber) #Add mode to list if (mode.residue < self.abc_convergence) or self.add_if_unconverged: self.modes += [mode] avtime = tick.lap() / (ii + 1) logging.info( "Mode #%d [%d/%.3gs], neff=%s, res: %.2e" % \ (self.numbersolved, mode.iterations, avtime, mode.neff, mode.residue) ) self.add_time(tick.lap()) #Clean up if calculation is finished! if self.isfinished(): self.finalize() return self.modes
def calculate_one(self, evsearch, searchnum): """ Calculate single mode using fixed point iterations calculate_one(evsearch, searchnum) """ m0 = self.m0 k0 = self.k0 wl = self.wl bcstart, bcend = self.equation.diff.bc_extents() coord = self.wg.get_coord(self.base_shape, border=1) #Create new mode mode = self.mode_class(coord=coord, symmetry=self.wg.symmetry, m0=m0, wl=wl, evalue=evsearch, wg=self.wg) mode.right = ones(mode.shape, dtype=self.dtype) mode.discard_vectors = self.discard_vectors residue = inf numit = 0 evtol = 1e-10 while residue > self.abc_convergence and numit < self.abc_iterations: numit += 1 #Update equation with new BCs and update LU decomp self.update_lambda(mode.evalue) if self.equation.coord.rmin == 0: self.si.update(self.matrix, uprows=bcend) else: self.si.set_shift(self.matrix, complex(mode.evalue)) #Solve linear eigenproblem if use_arpack: evals, revecs = eigs(self.si, k=searchnum, \ which='LM', return_eigenvectors=True) evals = self.si.eigenvalue_transform(evals) else: evals, revecs = eigs(self.si, searchnum, tol=evtol) revecs = revecs.T #Locate closest eigenvalue itrack = absolute(evals - mode.evalue).argmin() mode.evalue = evals[itrack] mode.right = asarray(revecs)[:, itrack] #Residue and convergence evconverge = abs(mode.evalue - evals[itrack]) residue = mode.residue = self.residue(mode) mode.convergence += [residue] mode.track += [mode.evalue] logging.debug("[%d] neff: %s conv: %.3g, res:%.3g" % (numit, mode.neff, evconverge, mode.residue)) mode.iterations = numit #Discard vectors at this stage, to free up memory if we can mode.compress() return mode, evals
def compute(inv_dict, save=True): ''' @param inv_dict: is a dict optinally containing any of these: - inv_dict['edge']: boolean for global edge count - inv_dict['ver']: boolean for global vertex number - inv_dict['tri']: boolean for local triangle count - inv_dict['tri_fn']: the path of a precomputed triangle count (.npy) - inv_dict['eig']: boolean for eigenvalues and eigenvectors - inv_dict['eigvl_fn']: the path of a precomputed eigenvalues (.npy) - inv_dict['eigvect_fn']: the path of a precomputed eigenvectors (.npy) - inv_dict['deg']: boolean for local degree count - inv_dict['deg_fn']: the path of a precomputed triangle count (.npy) - inv_dict['ss1']: boolean for scan 1 statistic - inv_dict['cc']: boolean for clustering coefficient - inv_dict['mad']: boolean for maximum average degree - inv_dict['save_dir']: the base path where all invariants will create sub-dirs & be should be saved @param save: boolean for auto save or not. TODO: use this ''' # Popualate inv_dict inv_dict = populate_inv_dict(inv_dict) if inv_dict['save_dir'] is None: inv_dict['save_dir'] = os.path.dirname(inv_dict['graph_fn']) if (inv_dict.has_key('G')): if inv_dict['G'] is not None: G = inv_dict['G'] elif (inv_dict['graphsize'] == 'b' or inv_dict['graphsize'] == 'big'): G = loadAdjMat(inv_dict['graph_fn'], inv_dict['lcc_fn']) # TODO: test # small graphs else: G = loadAnyMat(inv_dict['graph_fn'], inv_dict['data_elem']) if isinstance(G, str): print G return G # Error message num_nodes = G.shape[0] # number of nodes # CC requires deg_fn and tri_fn. Load if available if inv_dict['cc']: # if either #tri or deg is undefined if not inv_dict['tri_fn']: inv_dict['tri'] = True if not inv_dict['deg_fn']: inv_dict['deg'] = True cc_array = np.zeros(num_nodes) # All invariants that require eigenvalues if ((inv_dict['tri'] and not inv_dict['tri_fn']) or (inv_dict['mad'])): if not inv_dict['eigvl_fn']: inv_dict['eig'] = True # Only create arrays if the computation will be done if inv_dict['tri']: if inv_dict['tri_fn']: tri_array = np.load(inv_dict['tri_fn']) # load if precomputed else: tri_array = np.zeros(num_nodes) # local triangle count if inv_dict['deg'] or inv_dict['edge']: # edge is global number of edges inv_dict['deg'] = True if inv_dict['deg_fn']: deg_array = np.load(inv_dict['deg_fn']) else: deg_array = np.zeros(num_nodes) # Vertex degrees of all vertices if (inv_dict['ss1']): ss1_array = np.zeros(num_nodes) # Induced subgraph edge number i.e scan statistic if (not inv_dict['k'] or inv_dict['k'] > 100 or inv_dict['k'] > G.shape[0] - 2): k = 100 if G.shape[0]-2 > 101 else G.shape[0] - 2 # Maximum of 100 eigenvalues start = time() # Calculate Eigenvalues & Eigen vectors if inv_dict['eig']: if not (inv_dict['eigvl_fn'] or inv_dict['eigvect_fn']): l, u = arpack.eigs(G, k=k, which='LM') # LanczosMethod(A,0) print 'Time taken to calc Eigenvalues: %f secs\n' % (time() - start) else: try: l = np.load(inv_dict['eigvl_fn']) u = l = np.load(inv_dict['eigvect_fn']) except Exception: return "[IOERROR: ]Eigenvalues failed to load" # All other invariants start = time() #### For loop #### if (inv_dict['cc'] or inv_dict['ss1'] or (inv_dict['tri'] and not inv_dict['tri_fn'])\ or (inv_dict['deg'] and not inv_dict['deg_fn']) ): # one of the others for j in range(num_nodes): # tri if not inv_dict['tri_fn'] and inv_dict['tri']: # if this is still None we need to compute it tri_array[j] = abs(round((sum( np.power(l.real,3) * (u[j][:].real**2)) ) / 6.0)) # Divide by six because we count locally # ss1 & deg if inv_dict['ss1'] or (not inv_dict['deg_fn'] and inv_dict['deg']): nbors = G[:,j].nonzero()[0] # deg if (not inv_dict['deg_fn'] and inv_dict['deg']): deg_array[j] = nbors.shape[0] # ss1 if inv_dict['ss1']: if (nbors.shape[0] > 0): nbors_mat = G[:,nbors][nbors,:] ss1_array[j] = nbors.shape[0] + (nbors_mat.nnz/2.0) # scan stat 1 # Divide by two because of symmetric matrix else: ss1_array[j] = 0 # zero neighbors hence zero cardinality enduced subgraph # cc if inv_dict['cc']: if (deg_array[j] > 2): cc_array[j] = (2.0 * tri_array[j]) / ( deg_array[j] * (deg_array[j] - 1) ) # Jari et al else: cc_array[j] = 0 print 'Time taken to compute loop dependent invariants: %f secs\n' % (time() - start) ### End For ### # global edge if inv_dict['edge']: edge_count = deg_array.sum() # global vertices is num_nodes ''' MAD ''' if (inv_dict['mad']): max_ave_deg = np.max(l.real) # Computation complete - handle the saving now ... ''' Top eigenvalues & eigenvectors ''' if not inv_dict['eigvl_fn'] and inv_dict['eig'] : eigvDir = os.path.join(inv_dict['save_dir'], "Eigen") #if eigvDir is None else eigvDir # Immediately write eigs to file inv_dict['eigvl_fn'] = os.path.join(eigvDir, getBaseName(inv_dict['graph_fn']) + '_eigvl.npy') inv_dict['eigvect_fn'] = os.path.join(eigvDir, getBaseName(inv_dict['graph_fn']) + '_eigvect.npy') createSave(inv_dict['eigvl_fn'], l.real) # eigenvalues createSave(inv_dict['eigvect_fn'], u) # eigenvectors print 'Eigenvalues and eigenvectors saved ...' ''' Triangle count ''' if not inv_dict['tri_fn'] and inv_dict['tri']: triDir = os.path.join(inv_dict['save_dir'], "Triangle") #if triDir is None else triDir inv_dict['tri_fn'] = os.path.join(triDir, getBaseName(inv_dict['graph_fn']) + '_triangles.npy') # TODO HERE createSave(inv_dict['tri_fn'], tri_array) print 'Triangle Count saved ...' ''' Degree count''' if not inv_dict['deg_fn'] and inv_dict['deg']: degDir = os.path.join(inv_dict['save_dir'], "Degree") #if degDir is None else degDir inv_dict['deg_fn'] = os.path.join(degDir, getBaseName(inv_dict['graph_fn']) + '_degree.npy') createSave(inv_dict['deg_fn'], deg_array) print 'Degree saved ...' ''' MAD ''' if inv_dict['mad']: MADdir = os.path.join(inv_dict['save_dir'], "MAD") #if MADdir is None else MADdir inv_dict['mad_fn'] = os.path.join(MADdir, getBaseName(inv_dict['graph_fn']) + '_mad.npy') createSave(inv_dict['mad_fn'], max_ave_deg) print 'Maximum average Degree saved ...' ''' Scan Statistic 1''' if inv_dict['ss1']: ss1Dir = os.path.join(inv_dict['save_dir'], "SS1") #if ss1Dir is None else ss1Dir inv_dict['ss1_fn'] = os.path.join(ss1Dir, getBaseName(inv_dict['graph_fn']) + '_scanstat1.npy') createSave(inv_dict['ss1_fn'], ss1_array) # save it print 'Scan 1 statistic saved ...' ''' Clustering coefficient ''' if inv_dict['cc']: ccDir = os.path.join(inv_dict['save_dir'], "ClustCoeff") #if ccDir is None else ccDir inv_dict['cc_fn'] = os.path.join(ccDir, getBaseName(inv_dict['graph_fn']) + '_clustcoeff.npy') createSave(inv_dict['cc_fn'], cc_array) # save it print 'Clustering coefficient saved ...' ''' Global Vertices ''' if inv_dict['ver']: vertDir = os.path.join(inv_dict['save_dir'], "Globals") #if vertDir is None else vertDir inv_dict['ver_fn'] = os.path.join(vertDir, getBaseName(inv_dict['graph_fn']) + '_numvert.npy') createSave(inv_dict['ver_fn'], num_nodes) # save it print 'Global vertices number saved ...' ''' Global number of edges ''' if inv_dict['edge']: edgeDir = os.path.join(inv_dict['save_dir'], "Globals") #if edgeDir is None else edgeDir inv_dict['edge_fn'] = os.path.join(edgeDir, getBaseName(inv_dict['graph_fn']) + '_numedges.npy') createSave(inv_dict['edge_fn'], edge_count) # save it print 'Global edge number saved ...' #if test: # bench test # tri_fn = os.path.join('bench', str(G.shape[0]), getBaseName(inv_dict['graph_fn']) + '_triangles.npy') # eigvl_fn = os.path.join('bench', str(G.shape[0]), getBaseName(inv_dict['graph_fn']) + '_eigvl.npy') # eigvect_fn = os.path.join('bench', str(G.shape[0]), getBaseName(inv_dict['graph_fn']) + '_eigvect.npy') # MAD_fn = os.path.join('bench', str(G.shape[0]), getBaseName(inv_dict['graph_fn']) + '_MAD.npy') return inv_dict # TODO: Fix code this breaks. Originally was [tri_fn, deg_fn, MAD_fn, eigvl_fn, eigvect_fn]