def testEigWeight(self): tol = 10**-3 n = 100 W = numpy.random.rand(n, n) W = W.dot(W.T) w, U = numpy.linalg.eig(W) W = scipy.sparse.csr_matrix(W) k = 4 m = 5 lmbda, V = EfficientNystrom.eigWeight(W, m, k) MHat = V.dot(numpy.diag(lmbda)).dot(V.T) I = scipy.sparse.eye(n, n) L = GraphUtils.normalisedLaplacianSym(W) M = I - L #print(V) numpy.linalg.norm(M.todense() - MHat) #print(numpy.linalg.norm(M.todense())) #self.assertTrue(numpy.linalg.norm(W - WHat) < tol) #For fixed k, increasing m should improve approximation but not always lastError = 10 for m in range(k+1, n+1, 10): lmbda, V = EfficientNystrom.eigWeight(W, m, k) #print(V) MHat = V.dot(numpy.diag(lmbda)).dot(V.T) error = numpy.linalg.norm(M.todense() - MHat) self.assertTrue(error <= lastError) lastError = error
def testEigWeight(self): tol = 10**-3 n = 100 W = numpy.random.rand(n, n) W = W.dot(W.T) w, U = numpy.linalg.eig(W) W = scipy.sparse.csr_matrix(W) k = 4 m = 5 lmbda, V = EfficientNystrom.eigWeight(W, m, k) MHat = V.dot(numpy.diag(lmbda)).dot(V.T) I = scipy.sparse.eye(n, n) L = GraphUtils.normalisedLaplacianSym(W) M = I - L #print(V) numpy.linalg.norm(M.todense() - MHat) #print(numpy.linalg.norm(M.todense())) #self.assertTrue(numpy.linalg.norm(W - WHat) < tol) #For fixed k, increasing m should improve approximation but not always lastError = 10 for m in range(k + 1, n + 1, 10): lmbda, V = EfficientNystrom.eigWeight(W, m, k) #print(V) MHat = V.dot(numpy.diag(lmbda)).dot(V.T) error = numpy.linalg.norm(M.todense() - MHat) self.assertTrue(error <= lastError) lastError = error
graphIterator = ThreeClustIterator(p, numClusters, r).getIterator() clustListNings = ningsClusterer.cluster(graphIterator) logging.debug("Running random SVD method") graphIterator = ThreeClustIterator(p, numClusters, r).getIterator() clustListRandSVD = randSvdCluster.clusterFromIterator(graphIterator, False) # computer rand index error for each iteration # error: proportion of pairs of vertices (x,y) s.t. # (cl(x) == cl(y)) != (learned_cl(x) == learned_cl(y)) for it in range(len(ThreeClustIterator().subgraphIndicesList)): indicesList = ThreeClustIterator().subgraphIndicesList[it] numUsedVertices = len(indicesList) for i in range(len(k2s)): clustErrApprox[t, it, r, i] += GraphUtils.randIndex(clustListApprox[i][it], indicesList) clustErrExact[t, it, r] += GraphUtils.randIndex(clustListExact[it], indicesList) clustErrNystrom[t, it, r] += GraphUtils.randIndex(clustListNystrom[it], indicesList) if do_Nings: clustErrNings[t, it, r] += GraphUtils.randIndex(clustListNings[it], indicesList) clustErrRandSvd[t, it, r] += GraphUtils.randIndex(clustListRandSVD[it], indicesList) numpy.savez(fileName, clustErrApprox, clustErrExact, clustErrNystrom, clustErrNings, clustErrRandSvd) logging.debug("Saved results as " + fileName) else: errors = numpy.load(fileName) clustErrApprox, clustErrExact, clustErrNystrom, clustErrNings, clustErrRandSvd = errors["arr_0"], errors["arr_1"], errors["arr_2"], errors["arr_3"], errors["arr_4"] meanClustErrExact = clustErrExact.mean(2) meanClustErrApprox = clustErrApprox.mean(2)
if not os.path.exists(resultsDir): logging.warn("Directory did not exist: " + resultsDir + ", created.") os.makedirs(resultsDir) iterator = getIterator() subgraphIndicesList = [] for W in iterator: logging.debug("Graph size " + str(W.shape[0])) subgraphIndicesList.append(range(W.shape[0])) #Try to find number of clusters at end of sequence by looking at eigengap k = 2 if findEigs: L = GraphUtils.normalisedLaplacianSym(W) logging.debug("Computing eigenvalues") omega, Q = scipy.sparse.linalg.eigsh(L, min(k, L.shape[0]-1), which="SM", ncv = min(20*k, L.shape[0])) omegaDiff = numpy.diff(omega) else: omega = numpy.zeros(k) omegaDiff = numpy.zeros(k-1) #No obvious number of clusters and there are many edges graph = SparseGraph(W.shape[0], W=W) logging.debug("Computing graph statistics") graphStats = GraphStatistics() statsMatrix = graphStats.sequenceScalarStats(graph, subgraphIndicesList, slowStats=False)
import numpy import scipy.sparse from apgl.graph import GraphUtils from sandbox.util.Util import Util numpy.set_printoptions(suppress=True, precision=3) n = 10 W1 = scipy.sparse.rand(n, n, 0.5).todense() W1 = W1.T.dot(W1) W2 = W1.copy() W2[1, 2] = 1 W2[2, 1] = 1 print("W1=" + str(W1)) print("W2=" + str(W2)) L1 = GraphUtils.normalisedLaplacianSym(scipy.sparse.csr_matrix(W1)) L2 = GraphUtils.normalisedLaplacianSym(scipy.sparse.csr_matrix(W2)) deltaL = L2 - L1 print("L1=" + str(L1.todense())) print("L2=" + str(L2.todense())) print("deltaL=" + str(deltaL.todense())) print("rank(deltaL)=" + str(Util.rank(deltaL.todense())))
import numpy import scipy.sparse from apgl.graph import GraphUtils from sandbox.util.Util import Util numpy.set_printoptions(suppress=True, precision=3) n = 10 W1 = scipy.sparse.rand(n, n, 0.5).todense() W1 = W1.T.dot(W1) W2 = W1.copy() W2[1, 2] = 1 W2[2, 1] = 1 print("W1="+str(W1)) print("W2="+str(W2)) L1 = GraphUtils.normalisedLaplacianSym(scipy.sparse.csr_matrix(W1)) L2 = GraphUtils.normalisedLaplacianSym(scipy.sparse.csr_matrix(W2)) deltaL = L2 - L1 print("L1="+str(L1.todense())) print("L2="+str(L2.todense())) print("deltaL="+str(deltaL.todense())) print("rank(deltaL)=" + str(Util.rank(deltaL.todense())))
logging.debug("Running random SVD method") resRandSVDList = [] for i in range(len(k4s)): graphIterator = ThreeClustIterator(p, numClusters, r).getIterator() resRandSVDList.append(randSvdClusterers[i].clusterFromIterator(graphIterator, True)) # computer rand index error for each iteration # error: proportion of pairs of vertices (x,y) s.t. # (cl(x) == cl(y)) != (learned_cl(x) == learned_cl(y)) for it in range(len(ThreeClustIterator().subgraphIndicesList)): indicesList = ThreeClustIterator().subgraphIndicesList[it] numUsedVertices = len(indicesList) for k in range(len(k2s)): clustErrApprox[t, it, r, k] = GraphUtils.randIndex(resApproxList[k][0][it], indicesList) clustErrExact[t, it, r] = GraphUtils.randIndex(resExact[0][it], indicesList) for k in range(len(k3s)): clustErrNystrom[t, it, r, k] = GraphUtils.randIndex(resNystromList[k][0][it], indicesList) if do_Nings: clustErrNings[t, it, r] = GraphUtils.randIndex(resNings[0][it], indicesList) for k in range(len(k4s)): clustErrRandSvd[t, it, r, k] = GraphUtils.randIndex(resRandSVDList[k][0][it], indicesList) # store sin(Theta) for k in range(len(k2s)): sinThetaApprox[t, :, r, k] = resApproxList[k][2]["sinThetaList"] sinThetaExact[t, :, r] = resExact[2]["sinThetaList"] for k in range(len(k3s)): sinThetaNystrom[t, :, r, k] = resNystromList[k][2]["sinThetaList"] if do_Nings:
def cluster(self, graphIterator, verbose=False): """ Find a set of clusters using the graph and list of subgraph indices. """ tol = 10**-6 clustersList = [] decompositionTimeList = [] kMeansTimeList = [] boundList = [] sinThetaList = [] numpy.random.seed(self.seed) iter = 0 for W in graphIterator: startTime = time.time() logging.debug("Graph index:" + str(iter)) startTime = time.time() if iter % self.T != 0: # --- Figure out the similarity changes in existing edges --- n = lastW.shape[0] deltaW = W.copy() #Vertices are removed if n > W.shape[0]: #deltaW = Util.extendArray(deltaW, lastW.shape) deltaW = SparseUtils.resize(deltaW, lastW.shape) #Vertices added elif n < W.shape[0]: lastWInds = lastW.nonzero() lastWVal = scipy.zeros(len(lastWInds[0])) for i, j, k in zip(lastWInds[0], lastWInds[1], range(len(lastWInds[0]))): lastWVal[k] = lastW[i, j] lastW = scipy.sparse.csr_matrix((lastWVal, lastWInds), shape=W.shape) deltaW = deltaW - lastW # --- Update the decomposition --- if n < W.shape[0]: # Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))] Q = numpy.r_[Q, numpy.zeros( (W.shape[0] - Q.shape[0], Q.shape[1]))] lmbda, Q = self.__updateEigenSystem(lmbda, Q, deltaW, lastW) # --- resize the decomposition if the graph is losing vertices --- if n > W.shape[0]: Q = Q[0:W.shape[0], :] else: logging.debug("Recomputing eigensystem") # We want to solve the generalized eigen problem $L.v = lambda.D.v$ # with L and D hermitians. # scipy.sparse.linalg does not solve this problem actualy (it # solves it, forgetting about hermitian information, from version # 0.11) # So we will solve $D^{-1}.L.v = lambda.v$, where $D^{-1}.L$ is # no more hermitian. L = GraphUtils.normalisedLaplacianRw(W) lmbda, Q = scipy.sparse.linalg.eigs( L, min(self.k, L.shape[0] - 1), which="SM", ncv=min(20 * self.k, L.shape[0]), v0=numpy.random.rand(L.shape[0])) # n = L.shape[0] # inds = list(range(n)) # Lprime = 2*scipy.sparse.csr_matrix( ([1]*n, (inds,inds)), shape=(n,n))-L # lmbda, Q = scipy.sparse.linalg.eigs(Lprime, min(self.k, L.shape[0]-1), which="LM", ncv = min(20*self.k, L.shape[0]), v0=numpy.random.rand(L.shape[0])) # lmbda = 2-lmbda lmbda = lmbda.real Q = Q.real if self.computeSinTheta: L = GraphUtils.normalisedLaplacianRw(W) lmbdaExact, QExact = scipy.linalg.eig(L.todense()) lmbdaExact = lmbdaExact.real QExact = QExact.real indsExact = numpy.argsort(lmbdaExact) QExactKbot = QExact[:, indsExact[self.k:]] # UQExactKbot, sQExactKbot, VhQExactKbot = scipy.linalg.svd(QExactKbot) inds = numpy.argsort(lmbda) QApproxK = Q[:, inds[:self.k]] # UQApproxK, sQApproxK, VhQApproxK = scipy.linalg.svd(QApproxK) # sinThetaList.append(scipy.linalg.norm(UQExactKbot.T.dot(UQApproxK))) sinThetaList.append( scipy.linalg.norm(QExactKbot.T.dot(QApproxK))) # print("blop", UQExactKbot.shape, UQApproxK.shape, sinThetaList[-1]) # UQExactK, sQExactK, VhQExactK = scipy.linalg.svd(QExact[:, indsExact[:self.k]]) # print("blop", scipy.linalg.norm(UQExactKbot.T.dot(UQExactK))) # print("blop", lmbdaExact[indsExact[:10]], lmbda[inds[:10]], sep = "\n") # quit() decompositionTimeList.append(time.time() - startTime) # Now do actual clustering startTime = time.time() V = VqUtils.whiten(Q) centroids, distortion = vq.kmeans(V, self.k, iter=self.kmeansIter) clusters, distortion = vq.vq(V, centroids) clustersList.append(clusters) kMeansTimeList.append(time.time() - startTime) lastW = W.copy() iter += 1 if verbose: eigenQuality = { "boundList": boundList, "sinThetaList": sinThetaList } return clustersList, numpy.array( (decompositionTimeList, kMeansTimeList)).T, eigenQuality else: return clustersList
def cluster(self, graphIterator, verbose=False): """ Find a set of clusters using the graph and list of subgraph indices. """ tol = 10**-6 clustersList = [] decompositionTimeList = [] kMeansTimeList = [] boundList = [] sinThetaList = [] numpy.random.seed(self.seed) iter = 0 for W in graphIterator: startTime = time.time() logging.debug("Graph index:" + str(iter)) startTime = time.time() if iter % self.T != 0: # --- Figure out the similarity changes in existing edges --- n = lastW.shape[0] deltaW = W.copy() #Vertices are removed if n > W.shape[0]: #deltaW = Util.extendArray(deltaW, lastW.shape) deltaW = SparseUtils.resize(deltaW, lastW.shape) #Vertices added elif n < W.shape[0]: lastWInds = lastW.nonzero() lastWVal = scipy.zeros(len(lastWInds[0])) for i,j,k in zip(lastWInds[0], lastWInds[1], range(len(lastWInds[0]))): lastWVal[k] = lastW[i,j] lastW = scipy.sparse.csr_matrix((lastWVal, lastWInds), shape=W.shape) deltaW = deltaW - lastW # --- Update the decomposition --- if n < W.shape[0]: # Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))] Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))] lmbda, Q = self.__updateEigenSystem(lmbda, Q, deltaW, lastW) # --- resize the decomposition if the graph is losing vertices --- if n > W.shape[0]: Q = Q[0:W.shape[0], :] else: logging.debug("Recomputing eigensystem") # We want to solve the generalized eigen problem $L.v = lambda.D.v$ # with L and D hermitians. # scipy.sparse.linalg does not solve this problem actualy (it # solves it, forgetting about hermitian information, from version # 0.11) # So we will solve $D^{-1}.L.v = lambda.v$, where $D^{-1}.L$ is # no more hermitian. L = GraphUtils.normalisedLaplacianRw(W) lmbda, Q = scipy.sparse.linalg.eigs(L, min(self.k, L.shape[0]-1), which="SM", ncv = min(20*self.k, L.shape[0]), v0=numpy.random.rand(L.shape[0])) # n = L.shape[0] # inds = list(range(n)) # Lprime = 2*scipy.sparse.csr_matrix( ([1]*n, (inds,inds)), shape=(n,n))-L # lmbda, Q = scipy.sparse.linalg.eigs(Lprime, min(self.k, L.shape[0]-1), which="LM", ncv = min(20*self.k, L.shape[0]), v0=numpy.random.rand(L.shape[0])) # lmbda = 2-lmbda lmbda = lmbda.real Q = Q.real if self.computeSinTheta: L = GraphUtils.normalisedLaplacianRw(W) lmbdaExact, QExact = scipy.linalg.eig(L.todense()) lmbdaExact = lmbdaExact.real QExact = QExact.real indsExact = numpy.argsort(lmbdaExact) QExactKbot = QExact[:, indsExact[self.k:]] # UQExactKbot, sQExactKbot, VhQExactKbot = scipy.linalg.svd(QExactKbot) inds = numpy.argsort(lmbda) QApproxK = Q[:,inds[:self.k]] # UQApproxK, sQApproxK, VhQApproxK = scipy.linalg.svd(QApproxK) # sinThetaList.append(scipy.linalg.norm(UQExactKbot.T.dot(UQApproxK))) sinThetaList.append(scipy.linalg.norm(QExactKbot.T.dot(QApproxK))) # print("blop", UQExactKbot.shape, UQApproxK.shape, sinThetaList[-1]) # UQExactK, sQExactK, VhQExactK = scipy.linalg.svd(QExact[:, indsExact[:self.k]]) # print("blop", scipy.linalg.norm(UQExactKbot.T.dot(UQExactK))) # print("blop", lmbdaExact[indsExact[:10]], lmbda[inds[:10]], sep = "\n") # quit() decompositionTimeList.append(time.time()-startTime) # Now do actual clustering startTime = time.time() V = VqUtils.whiten(Q) centroids, distortion = vq.kmeans(V, self.k, iter=self.kmeansIter) clusters, distortion = vq.vq(V, centroids) clustersList.append(clusters) kMeansTimeList.append(time.time()-startTime) lastW = W.copy() iter += 1 if verbose: eigenQuality = {"boundList" : boundList, "sinThetaList" : sinThetaList} return clustersList, numpy.array((decompositionTimeList, kMeansTimeList)).T, eigenQuality else: return clustersList
def cluster(self, graphIterator, verbose=False): """ Find a set of clusters using the graph and list of subgraph indices. """ tol = 10**-6 clustersList = [] decompositionTimeList = [] kMeansTimeList = [] boundList = [] numpy.random.seed(self.seed) iter = 0 for W in graphIterator: startTime = time.time() logging.debug("Graph index:" + str(iter)) startTime = time.time() if iter % self.T != 0: # --- Figure out the similarity changes in existing edges --- n = lastW.shape[0] deltaW = W.copy() #Vertices are removed if n > W.shape[0]: #deltaW = Util.extendArray(deltaW, lastW.shape) deltaW = SparseUtils.resize(deltaW, lastW.shape) #Vertices added elif n < W.shape[0]: lastWInds = lastW.nonzero() lastWVal = scipy.zeros(len(lastWInds[0])) for i,j,k in zip(lastWInds[0], lastWInds[1], range(len(lastWInds[0]))): lastWVal[k] = lastW[i,j] lastW = scipy.sparse.csr_matrix((lastWVal, lastWInds), shape=W.shape) deltaW = deltaW - lastW # --- Update the decomposition --- if n < W.shape[0]: # Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))] Q = numpy.r_[Q, numpy.zeros((W.shape[0]-Q.shape[0], Q.shape[1]))] lmbda, Q = self.__updateEigenSystem(lmbda, Q, deltaW, lastW) # --- resize the decomposition if the graph is losing vertices --- if n > W.shape[0]: Q = Q[0:W.shape[0], :] else: logging.debug("Recomputing eigensystem") # We want to solve the generalized eigen problem $L.v = lambda.D.v$ # with L and D hermitians. # scipy.sparse.linalg does not solve this problem actualy (it # solves it, forgetting about hermitian information, from version # 0.11) # So we will solve $D^{-1}.L.v = lambda.v$, where $D^{-1}.L$ is # no more hermitian. L = GraphUtils.normalisedLaplacianRw(W) lmbda, Q = scipy.sparse.linalg.eigs(L, min(self.k, L.shape[0]-1), which="SM", ncv = min(20*self.k, L.shape[0]), v0=numpy.random.rand(L.shape[0])) lmbda = lmbda.real Q = Q.real decompositionTimeList.append(time.time()-startTime) # Now do actual clustering startTime = time.time() V = VqUtils.whiten(Q) centroids, distortion = vq.kmeans(V, self.k, iter=self.kmeansIter) clusters, distortion = vq.vq(V, centroids) clustersList.append(clusters) kMeansTimeList.append(time.time()-startTime) lastW = W.copy() iter += 1 if verbose: return clustersList, numpy.array((decompositionTimeList, kMeansTimeList)).T, boundList else: return clustersList