def testFindCentroids(self): V = numpy.random.rand(10, 3) clusters = numpy.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1]) k1 = 2 k2 = 2 clusterer = IterativeSpectralClustering(k1, k2) centroids = clusterer.findCentroids(V, clusters) centroids2 = numpy.zeros((2, 3)) centroids2[0, :] = numpy.mean(V[0:5, :], 0) centroids2[1, :] = numpy.mean(V[5:, :], 0) tol = 10**-6 self.assertTrue(numpy.linalg.norm(centroids - centroids2) < tol)
def cluster(): k1 = 20 # numCluster to learn k2 = 40 # numEigenVector kept dir = PathDefaults.getDataDir() + "cluster/" graphIterator = getBemolGraphIterator(dir) #=========================================== # cluster print("compute clusters") clusterer = IterativeSpectralClustering(k1, k2) clustersList = clusterer.clusterFromIterator(graphIterator, True) for i in range(len(clustersList)): clusters = clustersList[i] print(clusters)
class IterativeSpectralClusteringProfile(object): def __init__(self): numVertices = 1000 graph = SparseGraph(GeneralVertexList(numVertices)) p = 0.1 generator = ErdosRenyiGenerator(p) graph = generator.generate(graph) subgraphIndicesList = [] for i in range(100, numVertices, 10): subgraphIndicesList.append(range(i)) k1 = 5 k2 = 100 self.graph = graph self.subgraphIndicesList = subgraphIndicesList self.clusterer = IterativeSpectralClustering(k1, k2, T=10, alg="IASC") def profileClusterFromIterator(self): iterator = IncreasingSubgraphListIterator(self.graph, self.subgraphIndicesList) dataDir = PathDefaults.getDataDir() + "cluster/" #iterator = getBemolGraphIterator(dataDir) def run(): clusterList, timeList, boundList = self.clusterer.clusterFromIterator(iterator, verbose=True) print(timeList.cumsum(0)) ProfileUtils.profile('run()', globals(), locals())
class IterativeSpectralClusteringProfile(object): def __init__(self): numVertices = 1000 graph = SparseGraph(GeneralVertexList(numVertices)) p = 0.1 generator = ErdosRenyiGenerator(p) graph = generator.generate(graph) subgraphIndicesList = [] for i in range(100, numVertices, 10): subgraphIndicesList.append(range(i)) k1 = 5 k2 = 100 self.graph = graph self.subgraphIndicesList = subgraphIndicesList self.clusterer = IterativeSpectralClustering(k1, k2, T=10, alg="IASC") def profileClusterFromIterator(self): iterator = IncreasingSubgraphListIterator(self.graph, self.subgraphIndicesList) dataDir = PathDefaults.getDataDir() + "cluster/" #iterator = getBemolGraphIterator(dataDir) def run(): clusterList, timeList, boundList = self.clusterer.clusterFromIterator( iterator, verbose=True) print(timeList.cumsum(0)) ProfileUtils.profile('run()', globals(), locals())
def __init__(self): numVertices = 1000 graph = SparseGraph(GeneralVertexList(numVertices)) p = 0.1 generator = ErdosRenyiGenerator(p) graph = generator.generate(graph) subgraphIndicesList = [] for i in range(100, numVertices, 10): subgraphIndicesList.append(range(i)) k1 = 5 k2 = 100 self.graph = graph self.subgraphIndicesList = subgraphIndicesList self.clusterer = IterativeSpectralClustering(k1, k2, T=10, alg="IASC")
def testClusterOnPurchases(self): #Create a list of purchases and cluster it numProd = 30 numUser = 30 numPurchasesPerDate = 10 numDate = 10 numPurchase = numPurchasesPerDate * numDate listProd = numpy.random.randint(0, numProd, numPurchase) listUser = numpy.random.randint(0, numUser, numPurchase) # third week is the same as first one listProd[numPurchasesPerDate*3:numPurchasesPerDate*4] = listProd[:numPurchasesPerDate] listUser[numPurchasesPerDate*3:numPurchasesPerDate*4] = listUser[:numPurchasesPerDate] listWeek = range(numDate)*numPurchasesPerDate listWeek.sort() listYear = [2011]*numPurchase purchasesList = list(list(tup) for tup in itertools.izip(listProd, listUser, listWeek, listYear)) # print purchasesList k1 = 10 k2 = 10 clusterer = IterativeSpectralClustering(k1, k2) #Test full computation of eigenvectors graphIterator = DatedPurchasesGraphListIterator(purchasesList) clustersList = clusterer.clusterFromIterator(graphIterator, False) for i in range(len(clustersList)): clusters = clustersList[i] # self.assertEquals(len(subgraphIndicesList[i]), len(clusters)) print(clusters) #Now test approximation of eigenvectors graphIterator = DatedPurchasesGraphListIterator(purchasesList) clustersList = clusterer.clusterFromIterator(graphIterator, True) for i in range(len(clustersList)): clusters = clustersList[i]
def testIncreasingSubgraphListIterator(self): #Create a small graph and try the iterator increasing the number of vertices. numVertices = 50 graph = SparseGraph(GeneralVertexList(numVertices)) ell = 2 m = 2 generator = BarabasiAlbertGenerator(ell, m) graph = generator.generate(graph) indices = numpy.random.permutation(numVertices) subgraphIndicesList = [indices[0:5], indices] graphIterator = IncreasingSubgraphListIterator(graph, subgraphIndicesList) #Try a much longer sequence of vertices subgraphIndicesList = [] for i in range(10, numVertices): subgraphIndicesList.append(range(i)) graphIterator = IncreasingSubgraphListIterator(graph, subgraphIndicesList) k1 = 3 k2 = 6 clusterer = IterativeSpectralClustering(k1, k2) clustersList = clusterer.clusterFromIterator(graphIterator) #Now test the Nystrom method graphIterator = IncreasingSubgraphListIterator(graph, subgraphIndicesList) clusterer = IterativeSpectralClustering(k1, alg="nystrom") clustersList = clusterer.clusterFromIterator(graphIterator) #Test efficient Nystrom method graphIterator = IncreasingSubgraphListIterator(graph, subgraphIndicesList) clusterer = IterativeSpectralClustering(k1, alg="efficientNystrom") clustersList = clusterer.clusterFromIterator(graphIterator)
detectionIndex = fInds["detectDate"] vertexArray = graph.getVertexList().getVertices() detections = vertexArray[:, detectionIndex] startYear = 1900 daysInMonth = 30 monthStep = 3 dayList = list(range(int(numpy.min(detections)), int(numpy.max(detections)), daysInMonth*monthStep)) dayList.append(numpy.max(detections)) subgraphIndicesList = [] subgraphIndicesList.append(range(graph.getNumVertices())) k1 = 25 k2 = 2*k1 clusterer = IterativeSpectralClustering(k1, k2) clusterer.nb_iter_kmeans = 20 logging.info("Running exact method") iterator = IncreasingSubgraphListIterator(graph, subgraphIndicesList) clusterListExact, timeListExact, boundList = clusterer.clusterFromIterator(iterator, False, verbose=True) clusters = clusterListExact[0] subgraphIndicesList = [] #minGraphSize = 100 minGraphSize = 500 #Generate subgraph indices list for i in dayList: logging.info("Date: " + str(DateUtils.getDateStrFromDay(i, startYear)))
print(u) plt.plot(numpy.arange(u.shape[0]), u) plt.show() """ k2s = [3, 6, 12, 24] if saveResults: numClusters = 3 k1 = numClusters k3 = 90 k4 = 90 T = 8 # index of iteration where exact decomposition is computed exactClusterer = IterativeSpectralClustering(k1, alg="exact") iascClusterers = [] for k2 in k2s: iascClusterers.append(IterativeSpectralClustering(k1, k2, alg="IASC", T=T)) nystromClusterer = IterativeSpectralClustering(k1, k3=k3, alg="nystrom") ningsClusterer = NingSpectralClustering(k1, T=T) randSvdCluster = IterativeSpectralClustering(k1, k4=k4, alg="randomisedSvd") numRepetitions = 50 #numRepetitions = 2 do_Nings = True clustErrApprox = numpy.zeros((ps.shape[0], numGraphs, numRepetitions, len(k2s))) clustErrExact = numpy.zeros((ps.shape[0], numGraphs, numRepetitions)) clustErrNings = numpy.zeros((ps.shape[0], numGraphs, numRepetitions)) clustErrNystrom = numpy.zeros((ps.shape[0], numGraphs, numRepetitions))
""" for W in graphIterator: graph = SparseGraph(GeneralVertexList(W.shape[0])) graph.setWeightMatrixSparse(W) components = graph.findConnectedComponents() print(graph) L = GraphUtils.shiftLaplacian(graph.getSparseWeightMatrix()) u, V = numpy.linalg.eig(L.todense()) inds = numpy.argsort(u) u = u[inds] k = 20 print((u[0:k]**2).sum()) print((u[k:]**2).sum()) """ numGraphs = len(subgraphIndicesList) k1 = 3 k2 = 3 clusterer = IterativeSpectralClustering(k1, k2) clusterer.nb_iter_kmeans = 20 clusterer.computeBound = True clusterList, timeList, boundList = clusterer.clusterFromIterator(graphIterator, verbose=True) boundList = numpy.array(boundList) print(boundList)
def testClusterOnIncreasingGraphs(self): #Create a large graph and try the clustering. numClusters = 3 ClusterSize = 30 numFeatures = 0 pNoise = 0 pClust = 1 numVertices = numClusters*ClusterSize vList = GeneralVertexList(numVertices) vList = VertexList(numVertices, numFeatures) graph = SparseGraph(vList) # ell = 2 # m = 2 # generator = BarabasiAlbertGenerator(ell, m) # graph = generator.generate(graph) #Generate matrix of probabilities W = numpy.ones((numVertices, numVertices))*pNoise for i in range(numClusters): W[ClusterSize*i:ClusterSize*(i+1), ClusterSize*i:ClusterSize*(i+1)] = pClust P = numpy.random.rand(numVertices, numVertices) W = numpy.array(P < W, numpy.float) upTriInds = numpy.triu_indices(numVertices) W[upTriInds] = 0 W = W + W.T graph = SparseGraph(vList) graph.setWeightMatrix(W) indices = numpy.random.permutation(numVertices) subgraphIndicesList = [indices[0:numVertices/2], indices] k1 = numClusters k2 = 10 clusterer = IterativeSpectralClustering(k1, k2) #Test full computation of eigenvectors graphIterator = IncreasingSubgraphListIterator(graph, subgraphIndicesList) clustersList = clusterer.clusterFromIterator(graphIterator, False) self.assertEquals(len(clustersList), len(subgraphIndicesList)) for i in range(len(clustersList)): clusters = clustersList[i] self.assertEquals(len(subgraphIndicesList[i]), len(clusters)) #print(clusters) #Test full computation of eigenvectors with iterator graphIterator = IncreasingSubgraphListIterator(graph, subgraphIndicesList) clustersList = clusterer.clusterFromIterator(graphIterator, False) self.assertEquals(len(clustersList), len(subgraphIndicesList)) for i in range(len(clustersList)): clusters = clustersList[i] self.assertEquals(len(subgraphIndicesList[i]), len(clusters)) #print(clusters) #Now test approximation of eigenvectors with iterator graphIterator = IncreasingSubgraphListIterator(graph, subgraphIndicesList) clustersList2 = clusterer.clusterFromIterator(graphIterator) for i in range(len(clustersList2)): clusters = clustersList2[i] self.assertEquals(len(subgraphIndicesList[i]), len(clusters)) #print(clusters) #Test case where 2 graphs are identical subgraphIndicesList = [] subgraphIndicesList.append(range(graph.getNumVertices())) subgraphIndicesList.append(range(graph.getNumVertices())) graphIterator = IncreasingSubgraphListIterator(graph, subgraphIndicesList) clustersList = clusterer.clusterFromIterator(graphIterator, True)
same_cl = (v1 // clust_size) == (v2 // clust_size) same_learned_cl = learnedClustering[v1] == learnedClustering[v2] error += same_cl != same_learned_cl return float(error) * 2 / (numVertices) / (numVertices - 1) # ========================================================================= # ========================================================================= # run # ========================================================================= # ========================================================================= numIter = len(range(args.startingIteration, args.endingIteration)) logging.info("compute clusters") exactClusterer = IterativeSpectralClustering(args.k1, alg="exact", computeSinTheta=True) approxClusterer = IterativeSpectralClustering(args.k1, args.k2, T=args.exactFreq, alg="IASC", computeSinTheta=True) nystromClusterer = IterativeSpectralClustering(args.k1, k3=args.k3, alg="nystrom", computeSinTheta=True) RSvdClusterer = IterativeSpectralClustering(args.k1, k4=args.k4, alg="randomisedSvd", computeSinTheta=True) ningsClusterer = NingSpectralClustering(args.k1, T=args.exactFreq, computeSinTheta=True) exactClusterer.nb_iter_kmeans = 20 approxClusterer.nb_iter_kmeans = 20 nystromClusterer.nb_iter_kmeans = 20 RSvdClusterer.nb_iter_kmeans = 20 ningsClusterer.nb_iter_kmeans = 20 # exactClusterer.computeBound = args.computeBound # computeBound not implemented for exactClusterer approxClusterer.computeBound = args.computeBound # nystromClusterer.computeBound = args.computeBound # computeBound not implemented for nystromClusterer
def runExperiment(self): """ Run the selected clustering experiments and save results """ if self.algoArgs.runIASC: logging.debug("Running approximate method") for k2 in self.algoArgs.k2s: logging.debug("k2=" + str(k2)) clusterer = IterativeSpectralClustering(self.algoArgs.k1, k2=k2, T=self.algoArgs.T, alg="IASC", logStep=self.logStep) clusterer.nb_iter_kmeans = 20 clusterer.computeBound = self.algoArgs.computeBound iterator = self.getIterator() clusterList, timeList, boundList = clusterer.clusterFromIterator(iterator, verbose=True) resultsFileName = self.resultsDir + "ResultsIASC_k1=" + str(self.algoArgs.k1) + "_k2=" + str(k2) + "_T=" + str(self.algoArgs.T) + ".npz" self.recordResults(clusterList, timeList, resultsFileName) if self.algoArgs.runExact: logging.debug("Running exact method") clusterer = IterativeSpectralClustering(self.algoArgs.k1, alg="exact", logStep=self.logStep) clusterer.nb_iter_kmeans = 20 iterator = self.getIterator() clusterList, timeList, boundList = clusterer.clusterFromIterator(iterator, verbose=True) resultsFileName = self.resultsDir + "ResultsExact_k1=" + str(self.algoArgs.k1) + ".npz" self.recordResults(clusterList, timeList, resultsFileName) if self.algoArgs.runNystrom: logging.debug("Running Nystrom method") for k3 in self.algoArgs.k3s: logging.debug("k3=" + str(k3)) clusterer = IterativeSpectralClustering(self.algoArgs.k1, k3=k3, alg="nystrom", logStep=self.logStep) clusterer.nb_iter_kmeans = 20 clusterer.computeBound = self.algoArgs.computeBound iterator = self.getIterator() clusterList, timeList, boundList = clusterer.clusterFromIterator(iterator, verbose=True) resultsFileName = self.resultsDir + "ResultsNystrom_k1="+ str(self.algoArgs.k1) + "_k3=" + str(k3) + ".npz" self.recordResults(clusterList, timeList, resultsFileName) if self.algoArgs.runRandomisedSvd: logging.debug("Running randomised SVD method") for k4 in self.algoArgs.k4s: logging.debug("k4=" + str(k4)) clusterer = IterativeSpectralClustering(self.algoArgs.k1, k4=k4, alg="randomisedSvd", logStep=self.logStep) clusterer.nb_iter_kmeans = 20 clusterer.computeBound = self.algoArgs.computeBound iterator = self.getIterator() clusterList, timeList, boundList = clusterer.clusterFromIterator(iterator, verbose=True) resultsFileName = self.resultsDir + "ResultsRandomisedSvd_k1="+ str(self.algoArgs.k1) + "_k4=" + str(k4) + ".npz" self.recordResults(clusterList, timeList, resultsFileName) if self.algoArgs.runEfficientNystrom: logging.debug("Running efficient Nystrom method") for k3 in self.algoArgs.k3s: logging.debug("k3=" + str(k3)) clusterer = IterativeSpectralClustering(self.algoArgs.k1, k3=k3, alg="efficientNystrom", logStep=self.logStep) clusterer.nb_iter_kmeans = 20 clusterer.computeBound = self.algoArgs.computeBound iterator = self.getIterator() clusterList, timeList, boundList = clusterer.clusterFromIterator(iterator, verbose=True) resultsFileName = self.resultsDir + "ResultsEfficientNystrom_k1="+ str(self.algoArgs.k1) + "_k3=" + str(k3) + ".npz" self.recordResults(clusterList, timeList, resultsFileName) if self.algoArgs.runModularity: logging.info("Running modularity clustering") clusterer = IterativeModularityClustering(self.algoArgs.k1) iterator = self.getIterator() clusterList, timeList, boundList = clusterer.clusterFromIterator(iterator, verbose=True) resultsFileName = self.resultsDir + "ResultsModularity_k1=" + str(self.algoArgs.k1) + ".npz" self.recordResults(clusterList, timeList, resultsFileName) if self.algoArgs.runNing: logging.info("Running Nings method") iterator = self.getIterator() clusterer = NingSpectralClustering(self.algoArgs.k1, T=self.algoArgs.T) clusterList, timeList, boundList = clusterer.cluster(iterator, verbose=True) resultsFileName = self.resultsDir + "ResultsNing_k1=" + str(self.algoArgs.k1) + "_T=" + str(self.algoArgs.T) + ".npz" self.recordResults(clusterList, timeList, resultsFileName) logging.info("All done: see you around!")
""" k2s = [3, 6, 12, 24, 150] k3s = [3, 24, 90] k4s = [3, 24] # debug of IASC #k2s = [3, 6, 12, 24, 150] #k3s = [3] #k4s = [3] if saveResults: numClusters = 3 k1 = numClusters T = 8 # index of iteration where exact decomposition is computed exactClusterer = IterativeSpectralClustering(k1, alg="exact", computeSinTheta=True) iascClusterers = [] for k2 in k2s: iascClusterers.append(IterativeSpectralClustering(k1, k2, alg="IASC", computeSinTheta=True, T=T)) nystromClusterers = [] for k3 in k3s: nystromClusterers.append(IterativeSpectralClustering(k1, k3=k3, alg="nystrom", computeSinTheta=True)) ningsClusterer = NingSpectralClustering(k1, T=T, computeSinTheta=True) randSvdClusterers = [] for k4 in k4s: randSvdClusterers.append(IterativeSpectralClustering(k1, k4=k4, alg="randomisedSvd", computeSinTheta=True)) numRepetitions = 50 # numRepetitions = 2 do_Nings = True