def testNormalisedLaplacianSym(self): numVertices = 10 numFeatures = 0 vList = VertexList(numVertices, numFeatures) graph = SparseGraph(vList) ell = 2 m = 2 generator = BarabasiAlbertGenerator(ell, m) graph = generator.generate(graph) k = 10 W = graph.getSparseWeightMatrix() L = GraphUtils.shiftLaplacian(W) L2 = graph.normalisedLaplacianSym() tol = 10**-6 self.assertTrue(numpy.linalg.norm(L + L2 - 2*numpy.eye(numVertices)) < tol) #Test zero rows/cols W = scipy.sparse.csr_matrix((5, 5)) W[1, 0] = 1 W[0, 1] = 1 L = GraphUtils.normalisedLaplacianSym(W) for i in range(2, 5): self.assertEquals(L[i, i], 0)
def testNormalisedLaplacianSym(self): numVertices = 10 numFeatures = 0 vList = VertexList(numVertices, numFeatures) graph = SparseGraph(vList) ell = 2 m = 2 generator = BarabasiAlbertGenerator(ell, m) graph = generator.generate(graph) k = 10 W = graph.getSparseWeightMatrix() L = GraphUtils.shiftLaplacian(W) L2 = graph.normalisedLaplacianSym() tol = 10**-6 self.assertTrue( numpy.linalg.norm(L + L2 - 2 * numpy.eye(numVertices)) < tol) #Test zero rows/cols W = scipy.sparse.csr_matrix((5, 5)) W[1, 0] = 1 W[0, 1] = 1 L = GraphUtils.normalisedLaplacianSym(W) for i in range(2, 5): self.assertEquals(L[i, i], 0)
def testShiftLaplacian(self): numVertices = 10 numFeatures = 0 vList = VertexList(numVertices, numFeatures) graph = SparseGraph(vList) ell = 2 m = 2 generator = BarabasiAlbertGenerator(ell, m) graph = generator.generate(graph) k = 10 W = graph.getSparseWeightMatrix() L = GraphUtils.shiftLaplacian(W) L2 = 2*numpy.eye(numVertices) - graph.normalisedLaplacianSym() tol = 10**-6 self.assertTrue(numpy.linalg.norm(L - L2) < tol)
def testShiftLaplacian(self): numVertices = 10 numFeatures = 0 vList = VertexList(numVertices, numFeatures) graph = SparseGraph(vList) ell = 2 m = 2 generator = BarabasiAlbertGenerator(ell, m) graph = generator.generate(graph) k = 10 W = graph.getSparseWeightMatrix() L = GraphUtils.shiftLaplacian(W) L2 = 2 * numpy.eye(numVertices) - graph.normalisedLaplacianSym() tol = 10**-6 self.assertTrue(numpy.linalg.norm(L - L2) < tol)
numRepetitions = 20 #numRepetitions = 1 saveResults = False resultsDir = PathDefaults.getOutputDir() + "cluster/" fileName = resultsDir + "ErrorBoundNystrom.npy" if saveResults: for r in range(numRepetitions): i = 0 iterator = BoundGraphIterator(changeEdges=50, numGraphs=numGraphs, numClusterVertices=numClusterVertices, numClusters=k, p=0.1) for W in iterator: print("i="+str(i)) L = GraphUtils.shiftLaplacian(W) if i == 0: initialL = L initialOmega, initialQ = numpy.linalg.eigh(L.todense()) inds = numpy.flipud(numpy.argsort(initialOmega)) initialOmega, initialQ = initialOmega[inds], initialQ[:, inds] #Fix for weird error in EigenAdd2 later on initialQ = numpy.array(initialQ) initialQk = initialQ[:, 0:k] # for IASC lastL = initialL lastOmegas = [initialOmega]*len(IASCL) lastQs = [initialQ]*len(IASCL) #Compute exact eigenvalues
def clusterFromIterator(self, graphListIterator, verbose=False): """ Find a set of clusters for the graphs given by the iterator. If verbose is true the each iteration is timed and bounded the results are returned as lists. The difference between a weight matrix and the previous one should be positive. """ clustersList = [] decompositionTimeList = [] kMeansTimeList = [] boundList = [] sinThetaList = [] i = 0 for subW in graphListIterator: if __debug__: Parameter.checkSymmetric(subW) if self.logStep and i % self.logStep == 0: logging.debug("Graph index: " + str(i)) logging.debug("Clustering graph of size " + str(subW.shape)) if self.alg != "efficientNystrom": ABBA = GraphUtils.shiftLaplacian(subW) # --- Eigen value decomposition --- startTime = time.time() if self.alg == "IASC": if i % self.T != 0: omega, Q = self.approxUpdateEig(subW, ABBA, omega, Q) if self.computeBound: inds = numpy.flipud(numpy.argsort(omega)) Q = Q[:, inds] omega = omega[inds] bounds = self.pertBound(omega, Q, omegaKbot, AKbot, self.k2) #boundList.append([i, bounds[0], bounds[1]]) #Now use accurate values of norm of R and delta rank = Util.rank(ABBA.todense()) gamma, U = scipy.sparse.linalg.eigsh(ABBA, rank - 1, which="LM", ncv=ABBA.shape[0]) #logging.debug("gamma=" + str(gamma)) bounds2 = self.realBound(omega, Q, gamma, AKbot, self.k2) boundList.append( [bounds[0], bounds[1], bounds2[0], bounds2[1]]) else: logging.debug("Computing exact eigenvectors") self.storeInformation(subW, ABBA) if self.computeBound: #omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k2*2, ABBA.shape[0]-1), which="LM", ncv = min(10*self.k2, ABBA.shape[0])) rank = Util.rank(ABBA.todense()) omega, Q = scipy.sparse.linalg.eigsh(ABBA, rank - 1, which="LM", ncv=ABBA.shape[0]) inds = numpy.flipud(numpy.argsort(omega)) omegaKbot = omega[inds[self.k2:]] QKbot = Q[:, inds[self.k2:]] AKbot = (QKbot * omegaKbot).dot(QKbot.T) omegaSort = numpy.flipud(numpy.sort(omega)) boundList.append([0] * 4) else: omega, Q = scipy.sparse.linalg.eigsh( ABBA, min(self.k2, ABBA.shape[0] - 1), which="LM", ncv=min(10 * self.k2, ABBA.shape[0])) elif self.alg == "nystrom": omega, Q = Nystrom.eigpsd(ABBA, self.k3) elif self.alg == "exact": omega, Q = scipy.sparse.linalg.eigsh( ABBA, min(self.k1, ABBA.shape[0] - 1), which="LM", ncv=min(15 * self.k1, ABBA.shape[0])) elif self.alg == "efficientNystrom": omega, Q = EfficientNystrom.eigWeight(subW, self.k2, self.k1) elif self.alg == "randomisedSvd": Q, omega, R = RandomisedSVD.svd(ABBA, self.k4) else: raise ValueError("Invalid Algorithm: " + str(self.alg)) if self.computeSinTheta: omegaExact, QExact = scipy.linalg.eigh(ABBA.todense()) inds = numpy.flipud(numpy.argsort(omegaExact)) QExactKbot = QExact[:, inds[self.k1:]] inds = numpy.flipud(numpy.argsort(omega)) QApproxK = Q[:, inds[:self.k1]] sinThetaList.append( scipy.linalg.norm(QExactKbot.T.dot(QApproxK))) decompositionTimeList.append(time.time() - startTime) if self.alg == "IASC": self.storeInformation(subW, ABBA) # --- Kmeans --- startTime = time.time() inds = numpy.flipud(numpy.argsort(omega)) standardiser = Standardiser() #For some very strange reason we get an overflow when computing the #norm of the rows of Q even though its elements are bounded by 1. #We'll ignore it for now try: V = standardiser.normaliseArray(Q[:, inds[0:self.k1]].real.T).T except FloatingPointError as e: logging.warn("FloatingPointError: " + str(e)) V = VqUtils.whiten(V) if i == 0: centroids, distortion = vq.kmeans(V, self.k1, iter=self.nb_iter_kmeans) else: centroids = self.findCentroids(V, clusters[:subW.shape[0]]) if centroids.shape[0] < self.k1: nb_missing_centroids = self.k1 - centroids.shape[0] random_centroids = V[numpy.random.randint( 0, V.shape[0], nb_missing_centroids), :] centroids = numpy.vstack((centroids, random_centroids)) centroids, distortion = vq.kmeans( V, centroids) #iter can only be 1 clusters, distortion = vq.vq(V, centroids) kMeansTimeList.append(time.time() - startTime) clustersList.append(clusters) #logging.debug("subW.shape: " + str(subW.shape)) #logging.debug("len(clusters): " + str(len(clusters))) #from sandbox.util.ProfileUtils import ProfileUtils #logging.debug("Total memory usage: " + str(ProfileUtils.memory()/10**6) + "MB") if ProfileUtils.memory() > 10**9: ProfileUtils.memDisplay(locals()) i += 1 if verbose: eigenQuality = { "boundList": boundList, "sinThetaList": sinThetaList } return clustersList, numpy.array( (decompositionTimeList, kMeansTimeList)).T, eigenQuality else: return clustersList
p = 0.05 pClust = 0.3 W = numpy.ones((numVertices, numVertices))*p for i in range(numClusters): W[endClusterSize*i:endClusterSize*(i+1), endClusterSize*i:endClusterSize*(i+1)] = pClust P = numpy.random.rand(numVertices, numVertices) W = numpy.array(P < W, numpy.float) upTriInds = numpy.triu_indices(numVertices) W[upTriInds] = 0 W = W + W.T graph = SparseGraph(vList) graph.setWeightMatrix(W) L = GraphUtils.shiftLaplacian(scipy.sparse.csr_matrix(W)) u, V = numpy.linalg.eig(L.todense()) print(V.shape) print(numpy.linalg.cond(V)) # run with exact eigenvalue decomposition logging.info("Running exact method") graphIterator = IncreasingSubgraphListIterator(graph, subgraphIndicesList) """ for W in graphIterator: graph = SparseGraph(GeneralVertexList(W.shape[0])) graph.setWeightMatrixSparse(W) components = graph.findConnectedComponents() print(graph)
def clusterFromIterator(self, graphListIterator, verbose=False): """ Find a set of clusters for the graphs given by the iterator. If verbose is true the each iteration is timed and bounded the results are returned as lists. The difference between a weight matrix and the previous one should be positive. """ clustersList = [] decompositionTimeList = [] kMeansTimeList = [] boundList = [] i = 0 for subW in graphListIterator: if __debug__: Parameter.checkSymmetric(subW) if self.logStep and i % self.logStep == 0: logging.debug("Graph index: " + str(i)) logging.debug("Clustering graph of size " + str(subW.shape)) if self.alg!="efficientNystrom": ABBA = GraphUtils.shiftLaplacian(subW) # --- Eigen value decomposition --- startTime = time.time() if self.alg=="IASC": if i % self.T != 0: omega, Q = self.approxUpdateEig(subW, ABBA, omega, Q) if self.computeBound: inds = numpy.flipud(numpy.argsort(omega)) Q = Q[:, inds] omega = omega[inds] bounds = self.pertBound(omega, Q, omegaKbot, AKbot, self.k2) #boundList.append([i, bounds[0], bounds[1]]) #Now use accurate values of norm of R and delta rank = Util.rank(ABBA.todense()) gamma, U = scipy.sparse.linalg.eigsh(ABBA, rank-1, which="LM", ncv = ABBA.shape[0]) #logging.debug("gamma=" + str(gamma)) bounds2 = self.realBound(omega, Q, gamma, AKbot, self.k2) boundList.append([i, bounds[0], bounds[1], bounds2[0], bounds2[1]]) else: logging.debug("Computing exact eigenvectors") self.storeInformation(subW, ABBA) if self.computeBound: #omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k2*2, ABBA.shape[0]-1), which="LM", ncv = min(10*self.k2, ABBA.shape[0])) rank = Util.rank(ABBA.todense()) omega, Q = scipy.sparse.linalg.eigsh(ABBA, rank-1, which="LM", ncv = ABBA.shape[0]) inds = numpy.flipud(numpy.argsort(omega)) omegaKbot = omega[inds[self.k2:]] QKbot = Q[:, inds[self.k2:]] AKbot = (QKbot*omegaKbot).dot(QKbot.T) omegaSort = numpy.flipud(numpy.sort(omega)) else: omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k2, ABBA.shape[0]-1), which="LM", ncv = min(10*self.k2, ABBA.shape[0])) elif self.alg == "nystrom": omega, Q = Nystrom.eigpsd(ABBA, self.k3) elif self.alg == "exact": omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k1, ABBA.shape[0]-1), which="LM", ncv = min(15*self.k1, ABBA.shape[0])) elif self.alg == "efficientNystrom": omega, Q = EfficientNystrom.eigWeight(subW, self.k2, self.k1) elif self.alg == "randomisedSvd": Q, omega, R = RandomisedSVD.svd(ABBA, self.k4) else: raise ValueError("Invalid Algorithm: " + str(self.alg)) decompositionTimeList.append(time.time()-startTime) if self.alg=="IASC": self.storeInformation(subW, ABBA) # --- Kmeans --- startTime = time.time() inds = numpy.flipud(numpy.argsort(omega)) standardiser = Standardiser() #For some very strange reason we get an overflow when computing the #norm of the rows of Q even though its elements are bounded by 1. #We'll ignore it for now try: V = standardiser.normaliseArray(Q[:, inds[0:self.k1]].real.T).T except FloatingPointError as e: logging.warn("FloatingPointError: " + str(e)) V = VqUtils.whiten(V) if i == 0: centroids, distortion = vq.kmeans(V, self.k1, iter=self.nb_iter_kmeans) else: centroids = self.findCentroids(V, clusters[:subW.shape[0]]) if centroids.shape[0] < self.k1: nb_missing_centroids = self.k1 - centroids.shape[0] random_centroids = V[numpy.random.randint(0, V.shape[0], nb_missing_centroids),:] centroids = numpy.vstack((centroids, random_centroids)) centroids, distortion = vq.kmeans(V, centroids) #iter can only be 1 clusters, distortion = vq.vq(V, centroids) kMeansTimeList.append(time.time()-startTime) clustersList.append(clusters) #logging.debug("subW.shape: " + str(subW.shape)) #logging.debug("len(clusters): " + str(len(clusters))) #from apgl.util.ProfileUtils import ProfileUtils #logging.debug("Total memory usage: " + str(ProfileUtils.memory()/10**6) + "MB") if ProfileUtils.memory() > 10**9: ProfileUtils.memDisplay(locals()) i += 1 if verbose: return clustersList, numpy.array((decompositionTimeList, kMeansTimeList)).T, boundList else: return clustersList