def testMatrixApprox(self): tol = 10**-6 A = numpy.random.rand(10, 10) A = A.dot(A.T) n = 5 inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n]) AHat = Nystrom.matrixApprox(A, inds) n = 10 AHat2 = Nystrom.matrixApprox(A, n) self.assertTrue(numpy.linalg.norm(A - AHat2) < numpy.linalg.norm(A - AHat)) self.assertTrue(numpy.linalg.norm(A - AHat2) < tol) #Test on a sparse matrix As = scipy.sparse.csr_matrix(A) n = 5 inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n]) AHat = Nystrom.matrixApprox(As, inds) n = 10 AHat2 = Nystrom.matrixApprox(As, n) self.assertTrue(SparseUtils.norm(As - AHat2) < SparseUtils.norm(As - AHat)) self.assertTrue(SparseUtils.norm(As - AHat2) < tol) #Compare dense and sparse solutions for n in range(1, 9): inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n]) AHats = Nystrom.matrixApprox(As, inds) AHat = Nystrom.matrixApprox(A, inds) self.assertTrue(numpy.linalg.norm(AHat - numpy.array(AHats.todense())) < tol)
def testEig(self): tol = 10**-5 #Test with an indeterminate matrix A = numpy.random.rand(10, 10) A = A.dot(A.T) w, U = numpy.linalg.eig(A) A = U.dot(numpy.diag(w-1)).dot(U.T) n = 10 lmbda, V = Nystrom.eig(A, n) AHat = V.dot(numpy.diag(lmbda)).dot(V.T) self.assertTrue(numpy.linalg.norm(A - AHat) < tol) #Approximation should be good when n < 10 for n in range(2, 11): inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n]) lmbda, V = Nystrom.eig(A, inds) AHat = V.dot(numpy.diag(lmbda)).dot(V.T) AHat2 = Nystrom.matrixApprox(A, inds) #self.assertTrue(numpy.linalg.norm(A - AHat) < numpy.linalg.norm(A)) #print(n) #print(numpy.linalg.norm(A - AHat)) #print(numpy.linalg.norm(A - AHat2)) #Test with a positive definite matrix A = numpy.random.rand(10, 10) A = A.dot(A.T) w, U = numpy.linalg.eig(A) A = U.dot(numpy.diag(w+1)).dot(U.T) #Approximation should be good when n < 10 for n in range(2, 11): inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n]) lmbda, V = Nystrom.eig(A, inds) AHat = V.dot(numpy.diag(lmbda)).dot(V.T) AHat2 = Nystrom.matrixApprox(A, inds) self.assertTrue(numpy.linalg.norm(A - AHat) < numpy.linalg.norm(A))
def testEigpsd2(self): #These tests are on sparse matrices tol = 10**-5 A = numpy.random.rand(10, 10) A = A.dot(A.T) w, U = numpy.linalg.eig(A) A = U.dot(numpy.diag(w+1)).dot(U.T) As = scipy.sparse.csr_matrix(A) n = 10 lmbda, V = Nystrom.eigpsd(As, n) AHat = scipy.sparse.csr_matrix(V.dot(numpy.diag(lmbda)).dot(V.T)) self.assertTrue(numpy.linalg.norm(A - AHat) < tol) for n in range(2, 11): inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n]) lmbda, V = Nystrom.eigpsd(As, inds) AHat = V.dot(numpy.diag(lmbda)).dot(V.T) AHat2 = Nystrom.matrixApprox(A, inds) self.assertTrue(numpy.linalg.norm(A - AHat) < numpy.linalg.norm(A)) self.assertAlmostEquals(numpy.linalg.norm(A - AHat), numpy.linalg.norm(A - AHat2))
def testEigpsd3(self): # These tests are on big matrices tol = 10**-3 n = 1000 # size of the matrices m = 100 # rank of the matrices max_k = int(m*1.1) # maximum rank of the approximation # relevant matrix Arel = numpy.random.rand(m, m) Arel = Arel.dot(Arel.T) w, U = numpy.linalg.eigh(Arel) Arel = U.dot(numpy.diag(w+1)).dot(U.T) tolArel = tol*numpy.linalg.norm(Arel) # big matrix P = numpy.random.rand(n, n) A = P.dot(scipy.linalg.block_diag(Arel, numpy.identity(n-m)/numpy.sqrt(n-m)*tolArel/10)).dot(P.T) #Resulting matrix is really badly conditioned so we reduce the largest eigenvalue lmbda, V = numpy.linalg.eigh(A) inds = numpy.argsort(lmbda) lmbda[inds[-1]] = 5000 A = (V*lmbda).dot(V.T) tolA = tol*numpy.linalg.norm(A) min_error = float('infinity') for k in map(int,2+numpy.array(range(11))*(max_k-2)/10): inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:k]) lmbda, V = Nystrom.eigpsd(A, inds) AHat = V.dot(numpy.diag(lmbda)).dot(V.T) AHat2 = Nystrom.matrixApprox(A, inds) self.assertTrue(numpy.linalg.norm(A - AHat) < numpy.linalg.norm(A)) min_error = min(min_error, numpy.linalg.norm(A - AHat)) a, b, places = numpy.linalg.norm(A - AHat), numpy.linalg.norm(A - AHat2), -int(numpy.log10(tolA)) self.assertAlmostEquals(a, b, places=places, msg= "both approximations differ: " + str(a) + " != " + str(b) + " within " + str(places) + " places (with rank " + str(k) + " approximation)")
def run(): W = createDataset() numVertices = W.shape[0] graph = SparseGraph(GeneralVertexList(numVertices)) graph.setWeightMatrix(W) L = graph.normalisedLaplacianSym() #L = GraphUtils.shiftLaplacian(scipy.sparse.csr_matrix(W)).todense() n = 100 omega, Q = numpy.linalg.eigh(L) omega2, Q2 = Nystrom.eigpsd(L, n) print(omega) print(omega2) plt.figure(1) plt.plot(numpy.arange(omega.shape[0]), omega) plt.plot(numpy.arange(omega2.shape[0]), omega2) plt.show() #run()
def testEigpsd(self): tol = 10**-3 A = numpy.random.rand(10, 10) A = A.dot(A.T) w, U = numpy.linalg.eig(A) A = U.dot(numpy.diag(w+1)).dot(U.T) n = 10 lmbda, V = Nystrom.eigpsd(A, n) AHat = V.dot(numpy.diag(lmbda)).dot(V.T) self.assertTrue(numpy.linalg.norm(A - AHat) < tol) #Approximation should be good when n < 10 for n in range(2, 11): inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n]) lmbda, V = Nystrom.eigpsd(A, inds) AHat = V.dot(numpy.diag(lmbda)).dot(V.T) AHat2 = Nystrom.matrixApprox(A, inds) self.assertTrue(numpy.linalg.norm(A - AHat) < numpy.linalg.norm(A)) self.assertAlmostEquals(numpy.linalg.norm(A - AHat), numpy.linalg.norm(A - AHat2)) #Now let's test on positive semi-definite w[9] = 0 A = U.dot(numpy.diag(w+1)).dot(U.T) n = 10 lmbda, V = Nystrom.eigpsd(A, n) AHat = V.dot(numpy.diag(lmbda)).dot(V.T) self.assertTrue(numpy.linalg.norm(A - AHat) < tol) #Approximation should be good when n < 10 for n in range(2, 11): inds = numpy.sort(numpy.random.permutation(A.shape[0])[0:n]) lmbda, V = Nystrom.eigpsd(A, inds) AHat = V.dot(numpy.diag(lmbda)).dot(V.T) AHat2 = Nystrom.matrixApprox(A, inds) self.assertTrue(numpy.linalg.norm(A - AHat) < numpy.linalg.norm(A)) self.assertAlmostEquals(numpy.linalg.norm(A - AHat), numpy.linalg.norm(A - AHat2))
lastL = initialL lastOmegas = [initialOmega]*len(IASCL) lastQs = [initialQ]*len(IASCL) #Compute exact eigenvalues omega, Q = numpy.linalg.eigh(L.todense()) inds = numpy.flipud(numpy.argsort(omega)) omega, Q = omega[inds], Q[:, inds] omegak, Qk = omega[0:k], Q[:, 0:k] omegakbot, Qkbot = omega[k:], Q[:, k:] #Nystrom method print("Running Nystrom") for j, nystromN in enumerate(nystromNs): omega2, Q2 = Nystrom.eigpsd(L, nystromN) inds = numpy.flipud(numpy.argsort(omega2)) omega2, Q2 = omega2[inds], Q2[:, inds] omega2k, Q2k = omega2[0:k], Q2[:, 0:k] # errors[i, j] += computeBound(L, omega, Q, omega2k, Q2k, k) errors[i, j] += computeSinTheta(Qkbot, Q2k) #Randomised SVD method print("Running Random SVD") for j, r in enumerate(randSVDVecs): Q4, omega4, R4 = RandomisedSVD.svd(L, r) inds = numpy.flipud(numpy.argsort(omega4)) omega4, Q4 = omega4[inds], Q4[:, inds] omega4k, Q4k = omega4[0:k], Q4[:, 0:k]
print(omega) print(Q) omegaHat, Qhat = numpy.linalg.eigh(L2.todense()) inds = numpy.argsort(omegaHat) omegaHat, Qhat = omegaHat[inds], Qhat[:, inds] omegaHatk, Qhatk = omegaHat[0:k], Qhat[:, 0:k] print(computeInnerProd(Qk, Qhatk)) for i, nystromN in enumerate(nystromNs): print(nystromN) #omega2, Q2 = numpy.linalg.eigh(L.todense()) omega2, Q2 = Nystrom.eigpsd(L, int(nystromN)) inds = numpy.flipud(numpy.argsort(omega2)) omega2, Q2 = omega2[inds], Q2[:, inds] omega2k, Q2k = omega2[0:k], Q2[:, 0:k] errors[i, 0] = computeBound(L, omega, Q, omega2k, Q2k, k) innerProds[i, 0] = computeInnerProd(Qk, Q2k) #omega2, Q2 = numpy.linalg.eigh(L2.todense()) omega2, Q2 = Nystrom.eigpsd(L2, int(nystromN)) inds = numpy.argsort(omega2) omega2, Q2 = omega2[inds], Q2[:, inds] omega2k, Q2k = omega2[0:k], Q2[:, 0:k] print(omega2) errors[i, 1] = computeBound(L2, omegaHat, Qhat, omega2k, Q2k, k)
def clusterFromIterator(self, graphListIterator, verbose=False): """ Find a set of clusters for the graphs given by the iterator. If verbose is true the each iteration is timed and bounded the results are returned as lists. The difference between a weight matrix and the previous one should be positive. """ clustersList = [] decompositionTimeList = [] kMeansTimeList = [] boundList = [] i = 0 for subW in graphListIterator: if __debug__: Parameter.checkSymmetric(subW) if self.logStep and i % self.logStep == 0: logging.debug("Graph index: " + str(i)) logging.debug("Clustering graph of size " + str(subW.shape)) if self.alg!="efficientNystrom": ABBA = GraphUtils.shiftLaplacian(subW) # --- Eigen value decomposition --- startTime = time.time() if self.alg=="IASC": if i % self.T != 0: omega, Q = self.approxUpdateEig(subW, ABBA, omega, Q) if self.computeBound: inds = numpy.flipud(numpy.argsort(omega)) Q = Q[:, inds] omega = omega[inds] bounds = self.pertBound(omega, Q, omegaKbot, AKbot, self.k2) #boundList.append([i, bounds[0], bounds[1]]) #Now use accurate values of norm of R and delta rank = Util.rank(ABBA.todense()) gamma, U = scipy.sparse.linalg.eigsh(ABBA, rank-1, which="LM", ncv = ABBA.shape[0]) #logging.debug("gamma=" + str(gamma)) bounds2 = self.realBound(omega, Q, gamma, AKbot, self.k2) boundList.append([i, bounds[0], bounds[1], bounds2[0], bounds2[1]]) else: logging.debug("Computing exact eigenvectors") self.storeInformation(subW, ABBA) if self.computeBound: #omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k2*2, ABBA.shape[0]-1), which="LM", ncv = min(10*self.k2, ABBA.shape[0])) rank = Util.rank(ABBA.todense()) omega, Q = scipy.sparse.linalg.eigsh(ABBA, rank-1, which="LM", ncv = ABBA.shape[0]) inds = numpy.flipud(numpy.argsort(omega)) omegaKbot = omega[inds[self.k2:]] QKbot = Q[:, inds[self.k2:]] AKbot = (QKbot*omegaKbot).dot(QKbot.T) omegaSort = numpy.flipud(numpy.sort(omega)) else: omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k2, ABBA.shape[0]-1), which="LM", ncv = min(10*self.k2, ABBA.shape[0])) elif self.alg == "nystrom": omega, Q = Nystrom.eigpsd(ABBA, self.k3) elif self.alg == "exact": omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k1, ABBA.shape[0]-1), which="LM", ncv = min(15*self.k1, ABBA.shape[0])) elif self.alg == "efficientNystrom": omega, Q = EfficientNystrom.eigWeight(subW, self.k2, self.k1) elif self.alg == "randomisedSvd": Q, omega, R = RandomisedSVD.svd(ABBA, self.k4) else: raise ValueError("Invalid Algorithm: " + str(self.alg)) decompositionTimeList.append(time.time()-startTime) if self.alg=="IASC": self.storeInformation(subW, ABBA) # --- Kmeans --- startTime = time.time() inds = numpy.flipud(numpy.argsort(omega)) standardiser = Standardiser() #For some very strange reason we get an overflow when computing the #norm of the rows of Q even though its elements are bounded by 1. #We'll ignore it for now try: V = standardiser.normaliseArray(Q[:, inds[0:self.k1]].real.T).T except FloatingPointError as e: logging.warn("FloatingPointError: " + str(e)) V = VqUtils.whiten(V) if i == 0: centroids, distortion = vq.kmeans(V, self.k1, iter=self.nb_iter_kmeans) else: centroids = self.findCentroids(V, clusters[:subW.shape[0]]) if centroids.shape[0] < self.k1: nb_missing_centroids = self.k1 - centroids.shape[0] random_centroids = V[numpy.random.randint(0, V.shape[0], nb_missing_centroids),:] centroids = numpy.vstack((centroids, random_centroids)) centroids, distortion = vq.kmeans(V, centroids) #iter can only be 1 clusters, distortion = vq.vq(V, centroids) kMeansTimeList.append(time.time()-startTime) clustersList.append(clusters) #logging.debug("subW.shape: " + str(subW.shape)) #logging.debug("len(clusters): " + str(len(clusters))) #from apgl.util.ProfileUtils import ProfileUtils #logging.debug("Total memory usage: " + str(ProfileUtils.memory()/10**6) + "MB") if ProfileUtils.memory() > 10**9: ProfileUtils.memDisplay(locals()) i += 1 if verbose: return clustersList, numpy.array((decompositionTimeList, kMeansTimeList)).T, boundList else: return clustersList