clustListExact = exactClusterer.clusterFromIterator(graphIterator, False) logging.debug("Running approximate method") clustListApprox = [] for i in range(len(k2s)): graphIterator = ThreeClustIterator(p, numClusters, r).getIterator() clustListApprox.append(iascClusterers[i].clusterFromIterator(graphIterator, False)) logging.debug("Running Nystrom method") graphIterator = ThreeClustIterator(p, numClusters, r).getIterator() clustListNystrom = nystromClusterer.clusterFromIterator(graphIterator, False) if do_Nings: logging.debug("Running Nings method") graphIterator = ThreeClustIterator(p, numClusters, r).getIterator() clustListNings = ningsClusterer.cluster(graphIterator) logging.debug("Running random SVD method") graphIterator = ThreeClustIterator(p, numClusters, r).getIterator() clustListRandSVD = randSvdCluster.clusterFromIterator(graphIterator, False) # computer rand index error for each iteration # error: proportion of pairs of vertices (x,y) s.t. # (cl(x) == cl(y)) != (learned_cl(x) == learned_cl(y)) for it in range(len(ThreeClustIterator().subgraphIndicesList)): indicesList = ThreeClustIterator().subgraphIndicesList[it] numUsedVertices = len(indicesList) for i in range(len(k2s)): clustErrApprox[t, it, r, i] += GraphUtils.randIndex(clustListApprox[i][it], indicesList) clustErrExact[t, it, r] += GraphUtils.randIndex(clustListExact[it], indicesList)
def testCluster(self): print "< testCluster >" numVertices = 8 graph = SparseGraph(GeneralVertexList(numVertices)) graph.addEdge(0, 1) graph.addEdge(0, 2) graph.addEdge(1, 2) graph.addEdge(3, 4) graph.addEdge(3, 5) graph.addEdge(4, 5) graph.addEdge(0, 3) W = graph.getWeightMatrix() graphIterator = [] graphIterator.append(W[0:6, 0:6].copy()) W[1, 6] += 1 W[6, 1] += 1 graphIterator.append(W[0:7, 0:7].copy()) W[4, 7] += 1 W[7, 4] += 1 graphIterator.append(W.copy()) graphIterator = iter(graphIterator) k = 2 clusterer = NingSpectralClustering(k) clustersList = clusterer.cluster(toSparseGraphListIterator(graphIterator)) #Why are the bottom rows of Q still zero? #Try example in which only edges change numVertices = 7 graph = SparseGraph(GeneralVertexList(numVertices)) graph.addEdge(0, 1) graph.addEdge(0, 2) graph.addEdge(1, 2) graph.addEdge(3, 4) WList = [] W = graph.getWeightMatrix() WList.append(W[0:5, 0:5].copy()) graph.addEdge(3, 5) graph.addEdge(4, 5) W = graph.getWeightMatrix() WList.append(W[0:6, 0:6].copy()) graph.addEdge(0, 6) graph.addEdge(1, 6) graph.addEdge(2, 6) W = graph.getWeightMatrix() WList.append(W[0:7, 0:7].copy()) iterator = iter(WList) clustersList = clusterer.cluster(toSparseGraphListIterator(iterator)) #Seems to work, amazingly #print(clustersList) #Try removing rows/cols W2 = W[0:5, 0:5] W3 = W[0:4, 0:4] WList = [W, W2, W3] iterator = iter(WList) clustersList = clusterer.cluster(toSparseGraphListIterator(iterator)) #nptst.assert_array_equal(clustersList[0][0:5], clustersList[1]) nptst.assert_array_equal(clustersList[1][0:4], clustersList[2]) #Make sure 1st clustering (without updates) is correct L = GraphUtils.normalisedLaplacianRw(scipy.sparse.csr_matrix(W)) numpy.random.seed(21) lmbda, Q = scipy.sparse.linalg.eigs(L, min(k, L.shape[0]-1), which="SM", ncv = min(20*k, L.shape[0]), v0=numpy.random.rand(L.shape[0])) V = VqUtils.whiten(Q) centroids, distortion = vq.kmeans(V, k, iter=20) clusters, distortion = vq.vq(V, centroids) #This should be equal but the eigenvector computation is unstable #even with repeated runs (and no way to set the seed) nptst.assert_array_equal(clusters, clustersList[0])
graphIterator = getGraphIterator() clustListNystrom, timeListNystrom, eigenQualityNystrom = nystromClusterer.clusterFromIterator( graphIterator, verbose=True ) if args.runRandomisedSvd: # run with Nystrom approximation logging.info("Running randomised svd method") graphIterator = getGraphIterator() clustListRSvd, timeListRSvd, eigenQualityRSvd = RSvdClusterer.clusterFromIterator(graphIterator, verbose=True) if args.runNing: # run with Ning's incremental approximation logging.info("Running Nings method") graphIterator = getGraphIterator() clustListNings, timeListNings, eigenQualityNings = ningsClusterer.cluster(graphIterator, verbose=True) # print clusters if args.runExact: logging.info("learned clustering with exact eigenvalue decomposition") for i in range(len(clustersExact)): clusters = clustersExact[i] print(clusters) if args.runIASC: logging.info("learned clustering with our approximation approach") for i in range(len(clustListApprox)): clusters = clustListApprox[i] print(clusters) if args.runNing: logging.info("learned clustering with Nings approximation approach") for i in range(len(clustListNings)):
logging.debug("Running approximate method") resApproxList = [] for i in range(len(k2s)): graphIterator = ThreeClustIterator(p, numClusters, r).getIterator() resApproxList.append(iascClusterers[i].clusterFromIterator(graphIterator, True)) logging.debug("Running Nystrom method") resNystromList = [] for i in range(len(k3s)): graphIterator = ThreeClustIterator(p, numClusters, r).getIterator() resNystromList.append(nystromClusterers[i].clusterFromIterator(graphIterator, True)) if do_Nings: logging.debug("Running Nings method") graphIterator = ThreeClustIterator(p, numClusters, r).getIterator() resNings = ningsClusterer.cluster(graphIterator, True) logging.debug("Running random SVD method") resRandSVDList = [] for i in range(len(k4s)): graphIterator = ThreeClustIterator(p, numClusters, r).getIterator() resRandSVDList.append(randSvdClusterers[i].clusterFromIterator(graphIterator, True)) # computer rand index error for each iteration # error: proportion of pairs of vertices (x,y) s.t. # (cl(x) == cl(y)) != (learned_cl(x) == learned_cl(y)) for it in range(len(ThreeClustIterator().subgraphIndicesList)): indicesList = ThreeClustIterator().subgraphIndicesList[it] numUsedVertices = len(indicesList) for k in range(len(k2s)):