def testResize(self): numRows = 10 numCols = 10 A = scipy.sparse.rand(numRows, numCols, 0.1, "csr") B = SparseUtils.resize(A, (5, 5)) self.assertEquals(B.shape, (5, 5)) for i in range(5): for j in range(5): self.assertEquals(B[i,j], A[i,j]) B = SparseUtils.resize(A, (15, 15)) self.assertEquals(B.shape, (15, 15)) self.assertEquals(B.nnz, A.nnz) for i in range(10): for j in range(10): self.assertEquals(B[i,j], A[i,j])
def testResize(self): numRows = 10 numCols = 10 A = scipy.sparse.rand(numRows, numCols, 0.1, "csr") B = SparseUtils.resize(A, (5, 5)) self.assertEquals(B.shape, (5, 5)) for i in range(5): for j in range(5): self.assertEquals(B[i, j], A[i, j]) B = SparseUtils.resize(A, (15, 15)) self.assertEquals(B.shape, (15, 15)) self.assertEquals(B.nnz, A.nnz) for i in range(10): for j in range(10): self.assertEquals(B[i, j], A[i, j])
def cluster(self, graphIterator, verbose=False): """ Find a set of clusters using the graph and list of subgraph indices. """ tol = 10**-6 clustersList = [] decompositionTimeList = [] kMeansTimeList = [] boundList = [] sinThetaList = [] numpy.random.seed(self.seed) iter = 0 for W in graphIterator: startTime = time.time() logging.debug("Graph index:" + str(iter)) startTime = time.time() if iter % self.T != 0: # --- Figure out the similarity changes in existing edges --- n = lastW.shape[0] deltaW = W.copy() #Vertices are removed if n > W.shape[0]: #deltaW = Util.extendArray(deltaW, lastW.shape) deltaW = SparseUtils.resize(deltaW, lastW.shape) #Vertices added elif n < W.shape[0]: lastWInds = lastW.nonzero() lastWVal = scipy.zeros(len(lastWInds[0])) for i, j, k in zip(lastWInds[0], lastWInds[1], range(len(lastWInds[0]))): lastWVal[k] = lastW[i, j] lastW = scipy.sparse.csr_matrix((lastWVal, lastWInds), shape=W.shape) deltaW = deltaW - lastW # --- Update the decomposition --- if n < W.shape[0]: # Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))] Q = numpy.r_[Q, numpy.zeros( (W.shape[0] - Q.shape[0], Q.shape[1]))] lmbda, Q = self.__updateEigenSystem(lmbda, Q, deltaW, lastW) # --- resize the decomposition if the graph is losing vertices --- if n > W.shape[0]: Q = Q[0:W.shape[0], :] else: logging.debug("Recomputing eigensystem") # We want to solve the generalized eigen problem $L.v = lambda.D.v$ # with L and D hermitians. # scipy.sparse.linalg does not solve this problem actualy (it # solves it, forgetting about hermitian information, from version # 0.11) # So we will solve $D^{-1}.L.v = lambda.v$, where $D^{-1}.L$ is # no more hermitian. L = GraphUtils.normalisedLaplacianRw(W) lmbda, Q = scipy.sparse.linalg.eigs( L, min(self.k, L.shape[0] - 1), which="SM", ncv=min(20 * self.k, L.shape[0]), v0=numpy.random.rand(L.shape[0])) # n = L.shape[0] # inds = list(range(n)) # Lprime = 2*scipy.sparse.csr_matrix( ([1]*n, (inds,inds)), shape=(n,n))-L # lmbda, Q = scipy.sparse.linalg.eigs(Lprime, min(self.k, L.shape[0]-1), which="LM", ncv = min(20*self.k, L.shape[0]), v0=numpy.random.rand(L.shape[0])) # lmbda = 2-lmbda lmbda = lmbda.real Q = Q.real if self.computeSinTheta: L = GraphUtils.normalisedLaplacianRw(W) lmbdaExact, QExact = scipy.linalg.eig(L.todense()) lmbdaExact = lmbdaExact.real QExact = QExact.real indsExact = numpy.argsort(lmbdaExact) QExactKbot = QExact[:, indsExact[self.k:]] # UQExactKbot, sQExactKbot, VhQExactKbot = scipy.linalg.svd(QExactKbot) inds = numpy.argsort(lmbda) QApproxK = Q[:, inds[:self.k]] # UQApproxK, sQApproxK, VhQApproxK = scipy.linalg.svd(QApproxK) # sinThetaList.append(scipy.linalg.norm(UQExactKbot.T.dot(UQApproxK))) sinThetaList.append( scipy.linalg.norm(QExactKbot.T.dot(QApproxK))) # print("blop", UQExactKbot.shape, UQApproxK.shape, sinThetaList[-1]) # UQExactK, sQExactK, VhQExactK = scipy.linalg.svd(QExact[:, indsExact[:self.k]]) # print("blop", scipy.linalg.norm(UQExactKbot.T.dot(UQExactK))) # print("blop", lmbdaExact[indsExact[:10]], lmbda[inds[:10]], sep = "\n") # quit() decompositionTimeList.append(time.time() - startTime) # Now do actual clustering startTime = time.time() V = VqUtils.whiten(Q) centroids, distortion = vq.kmeans(V, self.k, iter=self.kmeansIter) clusters, distortion = vq.vq(V, centroids) clustersList.append(clusters) kMeansTimeList.append(time.time() - startTime) lastW = W.copy() iter += 1 if verbose: eigenQuality = { "boundList": boundList, "sinThetaList": sinThetaList } return clustersList, numpy.array( (decompositionTimeList, kMeansTimeList)).T, eigenQuality else: return clustersList
def cluster(self, graphIterator, verbose=False): """ Find a set of clusters using the graph and list of subgraph indices. """ tol = 10**-6 clustersList = [] decompositionTimeList = [] kMeansTimeList = [] boundList = [] sinThetaList = [] numpy.random.seed(self.seed) iter = 0 for W in graphIterator: startTime = time.time() logging.debug("Graph index:" + str(iter)) startTime = time.time() if iter % self.T != 0: # --- Figure out the similarity changes in existing edges --- n = lastW.shape[0] deltaW = W.copy() #Vertices are removed if n > W.shape[0]: #deltaW = Util.extendArray(deltaW, lastW.shape) deltaW = SparseUtils.resize(deltaW, lastW.shape) #Vertices added elif n < W.shape[0]: lastWInds = lastW.nonzero() lastWVal = scipy.zeros(len(lastWInds[0])) for i,j,k in zip(lastWInds[0], lastWInds[1], range(len(lastWInds[0]))): lastWVal[k] = lastW[i,j] lastW = scipy.sparse.csr_matrix((lastWVal, lastWInds), shape=W.shape) deltaW = deltaW - lastW # --- Update the decomposition --- if n < W.shape[0]: # Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))] Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))] lmbda, Q = self.__updateEigenSystem(lmbda, Q, deltaW, lastW) # --- resize the decomposition if the graph is losing vertices --- if n > W.shape[0]: Q = Q[0:W.shape[0], :] else: logging.debug("Recomputing eigensystem") # We want to solve the generalized eigen problem $L.v = lambda.D.v$ # with L and D hermitians. # scipy.sparse.linalg does not solve this problem actualy (it # solves it, forgetting about hermitian information, from version # 0.11) # So we will solve $D^{-1}.L.v = lambda.v$, where $D^{-1}.L$ is # no more hermitian. L = GraphUtils.normalisedLaplacianRw(W) lmbda, Q = scipy.sparse.linalg.eigs(L, min(self.k, L.shape[0]-1), which="SM", ncv = min(20*self.k, L.shape[0]), v0=numpy.random.rand(L.shape[0])) # n = L.shape[0] # inds = list(range(n)) # Lprime = 2*scipy.sparse.csr_matrix( ([1]*n, (inds,inds)), shape=(n,n))-L # lmbda, Q = scipy.sparse.linalg.eigs(Lprime, min(self.k, L.shape[0]-1), which="LM", ncv = min(20*self.k, L.shape[0]), v0=numpy.random.rand(L.shape[0])) # lmbda = 2-lmbda lmbda = lmbda.real Q = Q.real if self.computeSinTheta: L = GraphUtils.normalisedLaplacianRw(W) lmbdaExact, QExact = scipy.linalg.eig(L.todense()) lmbdaExact = lmbdaExact.real QExact = QExact.real indsExact = numpy.argsort(lmbdaExact) QExactKbot = QExact[:, indsExact[self.k:]] # UQExactKbot, sQExactKbot, VhQExactKbot = scipy.linalg.svd(QExactKbot) inds = numpy.argsort(lmbda) QApproxK = Q[:,inds[:self.k]] # UQApproxK, sQApproxK, VhQApproxK = scipy.linalg.svd(QApproxK) # sinThetaList.append(scipy.linalg.norm(UQExactKbot.T.dot(UQApproxK))) sinThetaList.append(scipy.linalg.norm(QExactKbot.T.dot(QApproxK))) # print("blop", UQExactKbot.shape, UQApproxK.shape, sinThetaList[-1]) # UQExactK, sQExactK, VhQExactK = scipy.linalg.svd(QExact[:, indsExact[:self.k]]) # print("blop", scipy.linalg.norm(UQExactKbot.T.dot(UQExactK))) # print("blop", lmbdaExact[indsExact[:10]], lmbda[inds[:10]], sep = "\n") # quit() decompositionTimeList.append(time.time()-startTime) # Now do actual clustering startTime = time.time() V = VqUtils.whiten(Q) centroids, distortion = vq.kmeans(V, self.k, iter=self.kmeansIter) clusters, distortion = vq.vq(V, centroids) clustersList.append(clusters) kMeansTimeList.append(time.time()-startTime) lastW = W.copy() iter += 1 if verbose: eigenQuality = {"boundList" : boundList, "sinThetaList" : sinThetaList} return clustersList, numpy.array((decompositionTimeList, kMeansTimeList)).T, eigenQuality else: return clustersList