def testNormalisedLaplacianRw(self):
        numVertices = 10
        numFeatures = 0

        vList = VertexList(numVertices, numFeatures)
        graph = SparseGraph(vList)

        ell = 2
        m = 2
        generator = BarabasiAlbertGenerator(ell, m)
        graph = generator.generate(graph)

        k = 10
        W = graph.getSparseWeightMatrix()
        L = GraphUtils.normalisedLaplacianRw(W)

        L2 = graph.normalisedLaplacianRw()

        tol = 10**-6
        self.assertTrue(numpy.linalg.norm(L - L2) < tol)

        #Test zero rows/cols
        W = scipy.sparse.csr_matrix((5, 5))
        W[1, 0] = 1
        W[0, 1] = 1
        L = GraphUtils.normalisedLaplacianRw(W)

        for i in range(2, 5):
            self.assertEquals(L[i, i], 0)
Ejemplo n.º 2
0
    def recordResults(self, clusterList, timeList, fileName):
        """
        Save results for a particular clustering
        """
        iterator = self.getIterator()
        measures = []
        graphInfo =  []
        logging.debug("Computing cluster measures")

        for i in range(len(clusterList)):
            Util.printIteration(i, self.logStep, len(clusterList))
            W = next(iterator)
            #G = networkx.Graph(W)
            #Store modularity, k-way normalised cut, and cluster size 
            currentMeasures = [GraphUtils.modularity(W, clusterList[i]), GraphUtils.kwayNormalisedCut(W, clusterList[i]), len(numpy.unique(clusterList[i]))] 
            measures.append(currentMeasures) 

            # graph size
            currentGraphInfo = [W.shape[0]]
            graphInfo.append(currentGraphInfo)
            # nb connected components
            #graphInfo[i, 1] = networkx.number_connected_components(G)
        
        measures = numpy.array(measures)
        graphInfo = numpy.array(graphInfo)
        
        numpy.savez(fileName, measures, timeList, graphInfo)
        logging.debug("Saved file as " + fileName)
Ejemplo n.º 3
0
    def testNormalisedLaplacianRw(self):
        numVertices = 10
        numFeatures = 0

        vList = VertexList(numVertices, numFeatures)
        graph = SparseGraph(vList)

        ell = 2
        m = 2
        generator = BarabasiAlbertGenerator(ell, m)
        graph = generator.generate(graph)

        k = 10
        W = graph.getSparseWeightMatrix()
        L = GraphUtils.normalisedLaplacianRw(W)

        L2 = graph.normalisedLaplacianRw()

        tol = 10**-6
        self.assertTrue(numpy.linalg.norm(L - L2) < tol)

        #Test zero rows/cols 
        W = scipy.sparse.csr_matrix((5, 5))
        W[1, 0] = 1
        W[0, 1] = 1
        L = GraphUtils.normalisedLaplacianRw(W)
        
        for i in range(2, 5): 
            self.assertEquals(L[i, i], 0)
Ejemplo n.º 4
0
 def testRandIndex(self): 
     clustering1 = numpy.array([1, 1, 1, 2, 2, 2])
     clustering2 = numpy.array([2, 2, 2, 1, 1, 1])
     
     self.assertEquals(GraphUtils.randIndex(clustering1, clustering2), 0.0)
     
     clustering2 = numpy.array([2, 2, 2, 1, 1, 2])
     self.assertEquals(GraphUtils.randIndex(clustering1, clustering2), 1/3.0) 
     
     clustering2 = numpy.array([1, 2, 2, 1, 1, 2])
     self.assertEquals(GraphUtils.randIndex(clustering1, clustering2), 16/30.0) 
Ejemplo n.º 5
0
    def testVertexLabelPairs(self):
        numVertices = 6
        numFeatures = 1
        vList = VertexList(numVertices, numFeatures)
        vList.setVertices(numpy.array([numpy.arange(0, 6)]).T)

        graph = DenseGraph(vList, True)
        graph.addEdge(0, 1, 0.1)
        graph.addEdge(1, 3, 0.1)
        graph.addEdge(0, 2, 0.2)
        graph.addEdge(2, 3, 0.5)
        graph.addEdge(0, 4, 0.1)
        graph.addEdge(3, 4, 0.1)

        tol = 10**-6
        edges = graph.getAllEdges()

        X = GraphUtils.vertexLabelPairs(graph, edges)
        self.assertTrue(numpy.linalg.norm(X - edges) < tol )


        X = GraphUtils.vertexLabelPairs(graph, edges[[5, 2, 1], :])
        self.assertTrue(numpy.linalg.norm(X - edges[[5,2,1], :]) < tol )

        #Try a bigger graph
        numVertices = 6
        numFeatures = 2
        vList = VertexList(numVertices, numFeatures)
        vList.setVertices(numpy.random.randn(numVertices, numFeatures))

        graph = DenseGraph(vList, True)
        graph.addEdge(0, 1, 0.1)
        graph.addEdge(1, 3, 0.1)

        edges = graph.getAllEdges()

        X = GraphUtils.vertexLabelPairs(graph, edges)
        self.assertTrue(numpy.linalg.norm(X[0, 0:numFeatures] - vList.getVertex(1)) < tol )
        self.assertTrue(numpy.linalg.norm(X[0, numFeatures:numFeatures*2] - vList.getVertex(0)) < tol )
        self.assertTrue(numpy.linalg.norm(X[1, 0:numFeatures] - vList.getVertex(3)) < tol )
        self.assertTrue(numpy.linalg.norm(X[1, numFeatures:numFeatures*2] - vList.getVertex(1)) < tol )

        #Try directed graphs
        graph = DenseGraph(vList, False)
        graph.addEdge(0, 1, 0.1)
        graph.addEdge(1, 3, 0.1)

        edges = graph.getAllEdges()

        X = GraphUtils.vertexLabelPairs(graph, edges)
        self.assertTrue(numpy.linalg.norm(X[0, 0:numFeatures] - vList.getVertex(0)) < tol )
        self.assertTrue(numpy.linalg.norm(X[0, numFeatures:numFeatures*2] - vList.getVertex(1)) < tol )
        self.assertTrue(numpy.linalg.norm(X[1, 0:numFeatures] - vList.getVertex(1)) < tol )
        self.assertTrue(numpy.linalg.norm(X[1, numFeatures:numFeatures*2] - vList.getVertex(3)) < tol )
    def testRandIndex(self):
        clustering1 = numpy.array([1, 1, 1, 2, 2, 2])
        clustering2 = numpy.array([2, 2, 2, 1, 1, 1])

        self.assertEquals(GraphUtils.randIndex(clustering1, clustering2), 0.0)

        clustering2 = numpy.array([2, 2, 2, 1, 1, 2])
        self.assertEquals(GraphUtils.randIndex(clustering1, clustering2),
                          1 / 3.0)

        clustering2 = numpy.array([1, 2, 2, 1, 1, 2])
        self.assertEquals(GraphUtils.randIndex(clustering1, clustering2),
                          16 / 30.0)
Ejemplo n.º 7
0
    def learnModel(self, graph):
        """
        Learn a prediction model based on considering all ego-alter pairs. 

        :param graph: The input graph to learn from.
        :type graph: class:`apgl.graph.AbstractSingleGraph`
        """

        logging.info("Learning model on graph of size " +
                     str(graph.getNumVertices()))
        logging.info("Regressor: " + str(self.predictor))

        edges = graph.getAllEdges()

        if graph.isUndirected():
            edges2 = numpy.c_[edges[:, 1], edges[:, 0]]
            edges = numpy.r_[edges, edges2]

        X = GraphUtils.vertexLabelPairs(graph, edges)
        y = graph.getEdgeValues(edges)

        #Now we need to solve least to find regressor of X onto y
        logging.info("Number of vertex pairs " + str(X.shape))
        gc.collect()
        self.predictor.learnModel(X, y)
    def vectorStatistics(self, graph, treeStats=False, eigenStats=True):
        """
        Find a series of statistics for the given input graph which can be represented 
        as vector values.
        """
        Parameter.checkClass(graph, AbstractMatrixGraph)
        Parameter.checkBoolean(treeStats)
        statsDict = {}

        statsDict["inDegreeDist"] = graph.inDegreeDistribution()
        statsDict["outDegreeDist"] = graph.degreeDistribution()
        logging.debug("Computing hop counts")
        P = graph.findAllDistances(False)
        statsDict["hopCount"] = graph.hopCount(P)
        logging.debug("Computing triangle count")
        if graph.getNumVertices() != 0:
            statsDict["triangleDist"] = numpy.bincount(
                graph.triangleSequence())
        else:
            statsDict["triangleDist"] = numpy.array([])

        #Get the distribution of component sizes
        logging.debug("Finding distribution of component sizes")

        if graph.isUndirected():
            components = graph.findConnectedComponents()
            if len(components) != 0:
                statsDict["componentsDist"] = numpy.bincount(
                    numpy.array([len(c) for c in components], numpy.int))

        #Make sure weight matrix is symmetric

        if graph.getNumVertices() != 0 and eigenStats:
            logging.debug("Computing eigenvalues/vectors")
            W = graph.getWeightMatrix()
            W = (W + W.T) / 2
            eigenDistribution, V = numpy.linalg.eig(W)
            i = numpy.argmax(eigenDistribution)
            statsDict["maxEigVector"] = V[:, i]
            statsDict["eigenDist"] = numpy.flipud(
                numpy.sort(eigenDistribution[eigenDistribution > 0]))
            gc.collect()
        else:
            statsDict["maxEigVector"] = numpy.array([])
            statsDict["eigenDist"] = numpy.array([])

        if treeStats:
            logging.debug("Computing statistics on trees")
            trees = graph.findTrees()
            statsDict["treeSizesDist"] = numpy.bincount(
                [len(x) for x in trees])
            treeDepths = [
                GraphUtils.treeDepth((graph.subgraph(x))) for x in trees
            ]
            statsDict["treeDepthsDist"] = numpy.bincount(treeDepths)

        return statsDict
Ejemplo n.º 9
0
    def testTreeDepth(self):
        numVertices = 4
        numFeatures = 1

        vList = VertexList(numVertices, numFeatures)
        graph = SparseGraph(vList, False)
        graph.addEdge(0, 1)
        graph.addEdge(0, 2)
        graph.addEdge(2, 3)
        self.assertEquals(GraphUtils.treeDepth(graph), 2)

        numVertices = 5
        vList = VertexList(numVertices, numFeatures)
        graph = SparseGraph(vList, False)
        graph.addEdge(0, 1)
        graph.addEdge(0, 2)
        graph.addEdge(2, 3)
        graph.addEdge(3, 4)
        self.assertEquals(GraphUtils.treeDepth(graph), 3)
    def testTreeDepth(self):
        numVertices = 4
        numFeatures = 1

        vList = VertexList(numVertices, numFeatures)
        graph = SparseGraph(vList, False)
        graph.addEdge(0, 1)
        graph.addEdge(0, 2)
        graph.addEdge(2, 3)
        self.assertEquals(GraphUtils.treeDepth(graph), 2)

        numVertices = 5
        vList = VertexList(numVertices, numFeatures)
        graph = SparseGraph(vList, False)
        graph.addEdge(0, 1)
        graph.addEdge(0, 2)
        graph.addEdge(2, 3)
        graph.addEdge(3, 4)
        self.assertEquals(GraphUtils.treeDepth(graph), 3)
Ejemplo n.º 11
0
    def learnModel(self, graph):
        """
        Take the set of pairs of edges and also non-edges and learn when an edge
        occurs. 
        """
        Parameter.checkInt(self.windowSize, 1, graph.getNumVertices())
        self.graph = graph
        X, y = GraphUtils.vertexLabelExamples(graph)

        X = self.preprocessor.learn(X)
        self.learningAlg.learnModel(X, y)
Ejemplo n.º 12
0
    def predictEdges(self, graph, edges):
        """
        Make prediction  given the edges and given graph.

        :param edges: A numpy array consisting of the edges to make predictions over.
        """
        Parameter.checkInt(graph.getVertexList().getNumFeatures(), 1, float('inf'))
        logging.info("Making prediction over " + str(edges.shape[0]) + " edges")

        X = GraphUtils.vertexLabelPairs(graph, edges)
        predY = self.predictor.predict(X)

        return predY 
Ejemplo n.º 13
0
    def vectorStatistics(self, graph, treeStats=False, eigenStats=True):
        """
        Find a series of statistics for the given input graph which can be represented 
        as vector values.
        """
        Parameter.checkClass(graph, AbstractMatrixGraph)
        Parameter.checkBoolean(treeStats)
        statsDict = {}

        statsDict["inDegreeDist"] = graph.inDegreeDistribution()
        statsDict["outDegreeDist"] = graph.degreeDistribution()
        logging.debug("Computing hop counts")
        P = graph.findAllDistances(False)
        statsDict["hopCount"] = graph.hopCount(P)
        logging.debug("Computing triangle count")
        if graph.getNumVertices() != 0:
            statsDict["triangleDist"] = numpy.bincount(graph.triangleSequence())
        else:
            statsDict["triangleDist"] = numpy.array([])
        
        #Get the distribution of component sizes 
        logging.debug("Finding distribution of component sizes")
        
        if graph.isUndirected(): 
            components = graph.findConnectedComponents()
            if len(components) != 0: 
                statsDict["componentsDist"] = numpy.bincount(numpy.array([len(c) for c in components], numpy.int))

        #Make sure weight matrix is symmetric
        
        if graph.getNumVertices()!=0 and eigenStats:
            logging.debug("Computing eigenvalues/vectors")
            W = graph.getWeightMatrix()
            W = (W + W.T)/2
            eigenDistribution, V = numpy.linalg.eig(W)
            i = numpy.argmax(eigenDistribution)
            statsDict["maxEigVector"] = V[:, i]
            statsDict["eigenDist"] = numpy.flipud(numpy.sort(eigenDistribution[eigenDistribution>0]))
            gc.collect() 
        else:
            statsDict["maxEigVector"] = numpy.array([])
            statsDict["eigenDist"] = numpy.array([])

        if treeStats:
            logging.debug("Computing statistics on trees")
            trees = graph.findTrees()
            statsDict["treeSizesDist"] = numpy.bincount([len(x) for x in trees])
            treeDepths = [GraphUtils.treeDepth((graph.subgraph(x))) for x in trees]
            statsDict["treeDepthsDist"] = numpy.bincount(treeDepths)

        return statsDict
    def testModularity(self):
        numVertices = 6
        graph = SparseGraph(GeneralVertexList(numVertices))

        graph.addEdge(0, 0)
        graph.addEdge(1, 1)
        graph.addEdge(2, 2)
        graph.addEdge(0, 1)
        graph.addEdge(0, 2)
        graph.addEdge(2, 1)

        graph.addEdge(3, 4, 2)
        graph.addEdge(3, 5, 2)
        graph.addEdge(4, 5, 2)
        graph.addEdge(3, 3, 2)
        graph.addEdge(4, 4, 2)
        graph.addEdge(5, 5, 2)

        W = graph.getWeightMatrix()
        clustering = numpy.array([0, 0, 0, 1, 1, 1])

        #This is the same as the igraph result
        Q = GraphUtils.modularity(W, clustering)
        self.assertEquals(Q, 4.0 / 9.0)

        Ws = scipy.sparse.csr_matrix(W)
        Q = GraphUtils.modularity(Ws, clustering)
        self.assertEquals(Q, 4.0 / 9.0)

        W = numpy.ones((numVertices, numVertices))
        Q = GraphUtils.modularity(W, clustering)

        self.assertEquals(Q, 0.0)

        Ws = scipy.sparse.csr_matrix(W)
        Q = GraphUtils.modularity(Ws, clustering)
        self.assertEquals(Q, 0.0)
Ejemplo n.º 15
0
    def testModularity(self):
        numVertices = 6
        graph = SparseGraph(GeneralVertexList(numVertices))

        graph.addEdge(0,0)
        graph.addEdge(1,1)
        graph.addEdge(2,2)
        graph.addEdge(0,1)
        graph.addEdge(0,2)
        graph.addEdge(2,1)

        graph.addEdge(3,4,2)
        graph.addEdge(3,5,2)
        graph.addEdge(4,5,2)
        graph.addEdge(3,3,2)
        graph.addEdge(4,4,2)
        graph.addEdge(5,5,2)

        W = graph.getWeightMatrix()
        clustering = numpy.array([0,0,0,1,1,1])

        #This is the same as the igraph result
        Q = GraphUtils.modularity(W, clustering)
        self.assertEquals(Q, 4.0/9.0)

        Ws = scipy.sparse.csr_matrix(W)
        Q = GraphUtils.modularity(Ws, clustering)
        self.assertEquals(Q, 4.0/9.0)

        W = numpy.ones((numVertices, numVertices))
        Q = GraphUtils.modularity(W, clustering)

        self.assertEquals(Q, 0.0)

        Ws = scipy.sparse.csr_matrix(W)
        Q = GraphUtils.modularity(Ws, clustering)
        self.assertEquals(Q, 0.0)
Ejemplo n.º 16
0
    def predictEdges(self, graph, edges):
        """
        Make prediction  given the edges and given graph.

        :param edges: A numpy array consisting of the edges to make predictions over.
        """
        Parameter.checkInt(graph.getVertexList().getNumFeatures(), 1,
                           float('inf'))
        logging.info("Making prediction over " + str(edges.shape[0]) +
                     " edges")

        X = GraphUtils.vertexLabelPairs(graph, edges)
        predY = self.predictor.predict(X)

        return predY
Ejemplo n.º 17
0
 def testModularityMatrix(self): 
     W = scipy.sparse.csr_matrix((5, 5))
     W[1, 0] = 1
     W[0, 1] = 1
     W[2, 3] = 1
     W[3, 2] = 1
     B = GraphUtils.modularityMatrix(W)
     
     B2 = numpy.zeros((5,5))
     d = numpy.array(W.sum(0).ravel()).ravel()
     m = W.getnnz()/2
     
     for i in range(5): 
         for j in range(5): 
             B2[i,j] = W[i,j] - d[i]*d[j]/2*m
             self.assertEquals(B2[i, j], B[i, j])       
    def testModularityMatrix(self):
        W = scipy.sparse.csr_matrix((5, 5))
        W[1, 0] = 1
        W[0, 1] = 1
        W[2, 3] = 1
        W[3, 2] = 1
        B = GraphUtils.modularityMatrix(W)

        B2 = numpy.zeros((5, 5))
        d = numpy.array(W.sum(0).ravel()).ravel()
        m = W.getnnz() / 2

        for i in range(5):
            for j in range(5):
                B2[i, j] = W[i, j] - d[i] * d[j] / 2 * m
                self.assertEquals(B2[i, j], B[i, j])
    def testKwayNormalisedCut(self):
        numVertices = 6
        graph = SparseGraph(GeneralVertexList(numVertices))

        graph.addEdge(0, 1)
        graph.addEdge(0, 2)
        graph.addEdge(2, 1)

        graph.addEdge(3, 4)
        graph.addEdge(3, 5)
        graph.addEdge(5, 4)

        W = graph.getWeightMatrix()
        clustering = numpy.array([0, 0, 0, 1, 1, 1])

        self.assertEquals(GraphUtils.kwayNormalisedCut(W, clustering), 0.0)

        #Try sparse W
        Ws = scipy.sparse.csr_matrix(W)
        self.assertEquals(GraphUtils.kwayNormalisedCut(Ws, clustering), 0.0)

        graph.addEdge(2, 3)
        W = graph.getWeightMatrix()
        self.assertEquals(GraphUtils.kwayNormalisedCut(W, clustering), 1.0 / 7)

        Ws = scipy.sparse.csr_matrix(W)
        self.assertEquals(GraphUtils.kwayNormalisedCut(Ws, clustering),
                          1.0 / 7)

        clustering = numpy.array([0, 0, 0, 1, 1, 2])
        self.assertEquals(GraphUtils.kwayNormalisedCut(W, clustering),
                          61.0 / 105)

        self.assertEquals(GraphUtils.kwayNormalisedCut(Ws, clustering),
                          61.0 / 105)

        #Test two vertices without any edges
        W = numpy.zeros((2, 2))
        clustering = numpy.array([0, 1])
        self.assertEquals(GraphUtils.kwayNormalisedCut(W, clustering), 0.0)

        Ws = scipy.sparse.csr_matrix(W)
        self.assertEquals(GraphUtils.kwayNormalisedCut(Ws, clustering), 0.0)
Ejemplo n.º 20
0
    def testKwayNormalisedCut(self):
        numVertices = 6
        graph = SparseGraph(GeneralVertexList(numVertices))

        graph.addEdge(0, 1)
        graph.addEdge(0, 2)
        graph.addEdge(2, 1)

        graph.addEdge(3, 4)
        graph.addEdge(3, 5)
        graph.addEdge(5, 4)

        W = graph.getWeightMatrix()
        clustering = numpy.array([0,0,0, 1,1,1])

        self.assertEquals(GraphUtils.kwayNormalisedCut(W, clustering), 0.0)

        #Try sparse W
        Ws = scipy.sparse.csr_matrix(W)
        self.assertEquals(GraphUtils.kwayNormalisedCut(Ws, clustering), 0.0)

        graph.addEdge(2, 3)
        W = graph.getWeightMatrix()
        self.assertEquals(GraphUtils.kwayNormalisedCut(W, clustering), 1.0/7)

        Ws = scipy.sparse.csr_matrix(W)
        self.assertEquals(GraphUtils.kwayNormalisedCut(Ws, clustering), 1.0/7)

        clustering = numpy.array([0,0,0, 1,1,2])
        self.assertEquals(GraphUtils.kwayNormalisedCut(W, clustering), 61.0/105)

        self.assertEquals(GraphUtils.kwayNormalisedCut(Ws, clustering), 61.0/105)

        #Test two vertices without any edges
        W = numpy.zeros((2, 2))
        clustering = numpy.array([0, 1])
        self.assertEquals(GraphUtils.kwayNormalisedCut(W, clustering), 0.0)

        Ws = scipy.sparse.csr_matrix(W)
        self.assertEquals(GraphUtils.kwayNormalisedCut(Ws, clustering), 0.0)
    def testShiftLaplacian(self):
        numVertices = 10
        numFeatures = 0

        vList = VertexList(numVertices, numFeatures)
        graph = SparseGraph(vList)

        ell = 2
        m = 2
        generator = BarabasiAlbertGenerator(ell, m)
        graph = generator.generate(graph)

        k = 10
        W = graph.getSparseWeightMatrix()
        L = GraphUtils.shiftLaplacian(W)

        L2 = 2 * numpy.eye(numVertices) - graph.normalisedLaplacianSym()

        tol = 10**-6
        self.assertTrue(numpy.linalg.norm(L - L2) < tol)
Ejemplo n.º 22
0
    def testShiftLaplacian(self):
        numVertices = 10
        numFeatures = 0

        vList = VertexList(numVertices, numFeatures)
        graph = SparseGraph(vList)

        ell = 2
        m = 2
        generator = BarabasiAlbertGenerator(ell, m)
        graph = generator.generate(graph)

        k = 10
        W = graph.getSparseWeightMatrix()
        L = GraphUtils.shiftLaplacian(W)

        L2 = 2*numpy.eye(numVertices) - graph.normalisedLaplacianSym()

        tol = 10**-6
        self.assertTrue(numpy.linalg.norm(L - L2) < tol)
Ejemplo n.º 23
0
    def learnModel(self, graph):
        """
        Learn a prediction model based on considering all ego-alter pairs. 

        :param graph: The input graph to learn from.
        :type graph: class:`apgl.graph.AbstractSingleGraph`
        """

        logging.info("Learning model on graph of size " + str(graph.getNumVertices()))
        logging.info("Regressor: " + str(self.predictor))

        edges = graph.getAllEdges()

        if graph.isUndirected():
            edges2 = numpy.c_[edges[:, 1], edges[:, 0]]
            edges = numpy.r_[edges, edges2]

        X = GraphUtils.vertexLabelPairs(graph, edges)
        y = graph.getEdgeValues(edges)

        #Now we need to solve least to find regressor of X onto y
        logging.info("Number of vertex pairs " + str(X.shape))
        gc.collect()
        self.predictor.learnModel(X, y)
Ejemplo n.º 24
0
#Plot bound as Nystrom cols change 


W = iterator.next() 
nystromNs = numpy.arange(200, 1000, 50) 

#Same plots with Fowlkes dataset 
#There is no eigengap in this case so bound does poorly 
W = scipy.sparse.csr_matrix(createDataset(sigma=1.5))
nystromNs = numpy.arange(20, 151, 10) 
k = 2

errors = numpy.zeros((len(nystromNs), numMethods))  
innerProds = numpy.zeros((len(nystromNs), numMethods))  

L = GraphUtils.shiftLaplacian(W)
L2 = GraphUtils.normalisedLaplacianSym(W)

print(L2.todense())

#Find connected components 
graph = SparseGraph(GeneralVertexList(W.shape[0]))
graph.setWeightMatrix(W)
components = graph.findConnectedComponents()
print(len(components)) 

#Compute exact eigenvalues 
omega, Q = numpy.linalg.eigh(L.todense())
inds = numpy.flipud(numpy.argsort(omega)) 
omega, Q = omega[inds], Q[:, inds]
omegak, Qk = omega[0:k], Q[:, 0:k]    
Ejemplo n.º 25
0
if saveResults: 
    errors = numpy.zeros((numGraphs, numRepetitions))  
    allBoundLists = numpy.zeros((numRepetitions, numGraphs, 5))
    
    for r in range(numRepetitions): 
        iterator = BoundGraphIterator(numGraphs=numGraphs)
        
        clusterer = IterativeSpectralClustering(k1, k2, T=100, computeBound=True, alg="IASC")
        clusterer.nb_iter_kmeans = 20
        logging.debug("Starting clustering")
        clusterList, timeList, boundList = clusterer.clusterFromIterator(iterator, verbose=True)
        allBoundLists[r, :, :] = numpy.array(boundList)
        
        
        for i in range(len(clusterList)): 
            errors[i, r] = GraphUtils.randIndex(clusterList[i], iterator.realClustering)
            
    print(allBoundLists.mean(0))
    
    numpy.save(fileName, allBoundLists)
    logging.debug("Saved results as " + fileName)
else: 
    allBoundLists = numpy.load(fileName) 
    boundList = allBoundLists.mean(0)
    stdBoundList = allBoundLists.std(0)
    stdBoundList[:, 0] = boundList[:, 0]
    
    plotStyles1 = ['k-', 'k--', 'k-.', 'k:', 'b--', 'b-.', 'g-', 'g--', 'g-.', 'r-', 'r--', 'r-.']    
    print(boundList)
    print(stdBoundList)
    def scalarStatistics(self, graph, slowStats=True, treeStats=False):
        """
        Find a series of statistics for the given input graph which can be represented
        as scalar values. Return results as a vector.
        """
        #This method is a bit of a mess
        Parameter.checkClass(graph, AbstractSingleGraph)
        Parameter.checkBoolean(slowStats)
        Parameter.checkBoolean(treeStats)

        statsArray = numpy.ones(self.numStats) * -1
        statsArray[self.numVerticesIndex] = graph.getNumVertices()
        statsArray[self.numEdgesIndex] = graph.getNumEdges()
        statsArray[self.numDirEdgesIndex] = graph.getNumDirEdges()
        statsArray[self.densityIndex] = graph.density()

        if graph.isUndirected():
            subComponents = graph.findConnectedComponents()
            statsArray[self.numComponentsIndex] = len(subComponents)

            nonSingletonSubComponents = [
                c for c in subComponents if len(c) > 1
            ]
            statsArray[self.numNonSingletonComponentsIndex] = len(
                nonSingletonSubComponents)

            triOrMoreSubComponents = [c for c in subComponents if len(c) > 2]
            statsArray[self.numTriOrMoreComponentsIndex] = len(
                triOrMoreSubComponents)

            #logging.debug("Studying max component")
            if len(subComponents) != 0:
                maxCompGraph = graph.subgraph(list(subComponents[0]))
                statsArray[self.maxComponentSizeIndex] = len(subComponents[0])

                if len(subComponents) >= 2:
                    statsArray[self.secondComponentSizeIndex] = len(
                        subComponents[1])

                statsArray[
                    self.maxComponentEdgesIndex] = maxCompGraph.getNumEdges()
                statsArray[self.meanComponentSizeIndex] = sum([
                    len(x) for x in subComponents
                ]) / float(statsArray[self.numComponentsIndex])
                statsArray[self.maxCompMeanDegreeIndex] = numpy.mean(
                    maxCompGraph.outDegreeSequence())
            else:
                statsArray[self.maxComponentSizeIndex] = 0
                statsArray[self.maxComponentEdgesIndex] = 0
                statsArray[self.meanComponentSizeIndex] = 0
                statsArray[self.geodesicDistMaxCompIndex] = 0

        if graph.getNumVertices() != 0:
            statsArray[self.meanDegreeIndex] = numpy.mean(
                graph.outDegreeSequence())
        else:
            statsArray[self.meanDegreeIndex] = 0

        if slowStats:
            if self.useFloydWarshall:
                logging.debug("Running Floyd-Warshall")
                P = graph.floydWarshall(False)
            else:
                logging.debug("Running Dijkstra's algorithm")
                P = graph.findAllDistances(False)

            statsArray[self.diameterIndex] = graph.diameter(P=P)
            statsArray[self.effectiveDiameterIndex] = graph.effectiveDiameter(
                self.q, P=P)
            statsArray[self.powerLawIndex] = graph.fitPowerLaw()[0]
            statsArray[self.geodesicDistanceIndex] = graph.geodesicDistance(
                P=P)
            statsArray[
                self.
                harmonicGeoDistanceIndex] = graph.harmonicGeodesicDistance(P=P)

            if graph.isUndirected() and len(subComponents) != 0:
                statsArray[
                    self.geodesicDistMaxCompIndex] = graph.geodesicDistance(
                        P=P, vertexInds=list(subComponents[0]))

        if treeStats:
            logging.debug("Computing statistics on trees")
            trees = graph.findTrees()
            statsArray[self.numTreesIndex] = len(trees)

            nonSingletonTrees = [c for c in trees if len(c) > 1]
            statsArray[self.numNonSingletonTreesIndex] = len(nonSingletonTrees)

            statsArray[self.meanTreeSizeIndex] = numpy.mean(
                [len(x) for x in trees])
            treeDepths = [
                GraphUtils.treeDepth((graph.subgraph(list(x)))) for x in trees
            ]
            statsArray[self.meanTreeDepthIndex] = numpy.mean(treeDepths)

            if len(trees) != 0:
                maxTreeGraph = graph.subgraph(trees[0])
                statsArray[self.maxTreeSizeIndex] = len(trees[0])
                statsArray[self.maxTreeDepthIndex] = GraphUtils.treeDepth(
                    maxTreeGraph)

                if len(trees) >= 2:
                    secondTreeGraph = graph.subgraph(trees[1])
                    statsArray[self.secondTreeSizeIndex] = len(trees[1])
                    statsArray[
                        self.secondTreeDepthIndex] = GraphUtils.treeDepth(
                            secondTreeGraph)

        return statsArray
Ejemplo n.º 27
0
    def scalarStatistics(self, graph, slowStats=True, treeStats=False):
        """
        Find a series of statistics for the given input graph which can be represented
        as scalar values. Return results as a vector.
        """
        #This method is a bit of a mess 
        Parameter.checkClass(graph, AbstractSingleGraph)
        Parameter.checkBoolean(slowStats)
        Parameter.checkBoolean(treeStats)
        
        statsArray = numpy.ones(self.numStats)*-1
        statsArray[self.numVerticesIndex] = graph.getNumVertices()
        statsArray[self.numEdgesIndex] = graph.getNumEdges()
        statsArray[self.numDirEdgesIndex] = graph.getNumDirEdges()
        statsArray[self.densityIndex] = graph.density()

        if graph.isUndirected():
            logging.debug("Finding connected components")
            subComponents = graph.findConnectedComponents()
            logging.debug("Done")
            statsArray[self.numComponentsIndex] = len(subComponents)
            
            nonSingletonSubComponents = [c for c in subComponents if len(c) > 1]
            statsArray[self.numNonSingletonComponentsIndex] = len(nonSingletonSubComponents)

            triOrMoreSubComponents = [c for c in subComponents if len(c) > 2]
            statsArray[self.numTriOrMoreComponentsIndex] = len(triOrMoreSubComponents)
            
            logging.debug("Studying max component")
            if len(subComponents) != 0:
                maxCompGraph = graph.subgraph(list(subComponents[0]))
                statsArray[self.maxComponentSizeIndex] = len(subComponents[0])

                if len(subComponents) >= 2:
                    statsArray[self.secondComponentSizeIndex] = len(subComponents[1])

                statsArray[self.maxComponentEdgesIndex] = maxCompGraph.getNumEdges()
                statsArray[self.meanComponentSizeIndex] = sum([len(x) for x in subComponents])/float(statsArray[self.numComponentsIndex])
                statsArray[self.maxCompMeanDegreeIndex] = numpy.mean(maxCompGraph.outDegreeSequence())
            else:
                statsArray[self.maxComponentSizeIndex] = 0
                statsArray[self.maxComponentEdgesIndex] = 0 
                statsArray[self.meanComponentSizeIndex] = 0
                statsArray[self.geodesicDistMaxCompIndex] = 0

        if graph.getNumVertices() != 0:
            statsArray[self.meanDegreeIndex] = numpy.mean(graph.outDegreeSequence())
        else:
            statsArray[self.meanDegreeIndex] = 0
            
        if slowStats:
            if self.useFloydWarshall:
                logging.debug("Running Floyd-Warshall")
                P = graph.floydWarshall(False)
            else:
                logging.debug("Running Dijkstra's algorithm")
                P = graph.findAllDistances(False)

            statsArray[self.diameterIndex] = graph.diameter(P=P)
            statsArray[self.effectiveDiameterIndex] = graph.effectiveDiameter(self.q, P=P)
            statsArray[self.powerLawIndex] = graph.fitPowerLaw()[0]
            statsArray[self.geodesicDistanceIndex] = graph.geodesicDistance(P=P)
            statsArray[self.harmonicGeoDistanceIndex] = graph.harmonicGeodesicDistance(P=P)

            if graph.isUndirected() and len(subComponents) != 0:
                statsArray[self.geodesicDistMaxCompIndex] = graph.geodesicDistance(P=P, vertexInds=list(subComponents[0]))

        if treeStats:
            logging.debug("Computing statistics on trees")
            trees = graph.findTrees()
            statsArray[self.numTreesIndex] = len(trees)

            nonSingletonTrees = [c for c in trees if len(c) > 1]
            statsArray[self.numNonSingletonTreesIndex] = len(nonSingletonTrees)

            statsArray[self.meanTreeSizeIndex] = numpy.mean([len(x) for x in trees])
            treeDepths = [GraphUtils.treeDepth((graph.subgraph(list(x)))) for x in trees]
            statsArray[self.meanTreeDepthIndex] = numpy.mean(treeDepths)

            if len(trees) != 0:
                maxTreeGraph = graph.subgraph(trees[0])
                statsArray[self.maxTreeSizeIndex] = len(trees[0])
                statsArray[self.maxTreeDepthIndex] = GraphUtils.treeDepth(maxTreeGraph)

                if len(trees) >= 2:
                    secondTreeGraph = graph.subgraph(trees[1])
                    statsArray[self.secondTreeSizeIndex] = len(trees[1])
                    statsArray[self.secondTreeDepthIndex] = GraphUtils.treeDepth(secondTreeGraph)

        return statsArray
    def testVertexLabelPairs(self):
        numVertices = 6
        numFeatures = 1
        vList = VertexList(numVertices, numFeatures)
        vList.setVertices(numpy.array([numpy.arange(0, 6)]).T)

        graph = DenseGraph(vList, True)
        graph.addEdge(0, 1, 0.1)
        graph.addEdge(1, 3, 0.1)
        graph.addEdge(0, 2, 0.2)
        graph.addEdge(2, 3, 0.5)
        graph.addEdge(0, 4, 0.1)
        graph.addEdge(3, 4, 0.1)

        tol = 10**-6
        edges = graph.getAllEdges()

        X = GraphUtils.vertexLabelPairs(graph, edges)
        self.assertTrue(numpy.linalg.norm(X - edges) < tol)

        X = GraphUtils.vertexLabelPairs(graph, edges[[5, 2, 1], :])
        self.assertTrue(numpy.linalg.norm(X - edges[[5, 2, 1], :]) < tol)

        #Try a bigger graph
        numVertices = 6
        numFeatures = 2
        vList = VertexList(numVertices, numFeatures)
        vList.setVertices(numpy.random.randn(numVertices, numFeatures))

        graph = DenseGraph(vList, True)
        graph.addEdge(0, 1, 0.1)
        graph.addEdge(1, 3, 0.1)

        edges = graph.getAllEdges()

        X = GraphUtils.vertexLabelPairs(graph, edges)
        self.assertTrue(
            numpy.linalg.norm(X[0, 0:numFeatures] - vList.getVertex(1)) < tol)
        self.assertTrue(
            numpy.linalg.norm(X[0, numFeatures:numFeatures * 2] -
                              vList.getVertex(0)) < tol)
        self.assertTrue(
            numpy.linalg.norm(X[1, 0:numFeatures] - vList.getVertex(3)) < tol)
        self.assertTrue(
            numpy.linalg.norm(X[1, numFeatures:numFeatures * 2] -
                              vList.getVertex(1)) < tol)

        #Try directed graphs
        graph = DenseGraph(vList, False)
        graph.addEdge(0, 1, 0.1)
        graph.addEdge(1, 3, 0.1)

        edges = graph.getAllEdges()

        X = GraphUtils.vertexLabelPairs(graph, edges)
        self.assertTrue(
            numpy.linalg.norm(X[0, 0:numFeatures] - vList.getVertex(0)) < tol)
        self.assertTrue(
            numpy.linalg.norm(X[0, numFeatures:numFeatures * 2] -
                              vList.getVertex(1)) < tol)
        self.assertTrue(
            numpy.linalg.norm(X[1, 0:numFeatures] - vList.getVertex(1)) < tol)
        self.assertTrue(
            numpy.linalg.norm(X[1, numFeatures:numFeatures * 2] -
                              vList.getVertex(3)) < tol)
Ejemplo n.º 29
0
        vals = line.split()
        
        node1Inds.append(indexer.append(vals[0]))
        node2Inds.append(indexer.append(vals[1]))
    
    node1Inds = numpy.array(node1Inds)
    node2Inds = numpy.array(node2Inds)
    
    m = len(indexer.getIdDict())    
    
    A = numpy.zeros((m, m))
    A[node1Inds, node2Inds] = 1
    A = (A+A.T)/2
    
    A = scipy.sparse.csr_matrix(A)
    L = GraphUtils.normalisedLaplacianSym(A)
    Ls.append(L)
    
    u, V = scipy.sparse.linalg.eigs(L, k=m-2, which="SM")
    u = u.real 
    inds = numpy.argsort(u)
    u = u[inds]
    V = V[:, inds]
    us.append(u)
    

    k0 = numpy.where(u > 0.01)[0][0]
    k = numpy.argmax(numpy.diff(u[k0:]))
    
    ks.append(k)
    
    def clusterFromIterator(self, graphListIterator, verbose=False):
        """
        Find a set of clusters for the graphs given by the iterator. If verbose 
        is true the each iteration is timed and bounded the results are returned 
        as lists.
        
        The difference between a weight matrix and the previous one should be
        positive.
        """
        clustersList = []
        decompositionTimeList = []
        kMeansTimeList = []
        boundList = []
        sinThetaList = []
        i = 0

        for subW in graphListIterator:
            if __debug__:
                Parameter.checkSymmetric(subW)

            if self.logStep and i % self.logStep == 0:
                logging.debug("Graph index: " + str(i))
            logging.debug("Clustering graph of size " + str(subW.shape))
            if self.alg != "efficientNystrom":
                ABBA = GraphUtils.shiftLaplacian(subW)

            # --- Eigen value decomposition ---
            startTime = time.time()
            if self.alg == "IASC":
                if i % self.T != 0:
                    omega, Q = self.approxUpdateEig(subW, ABBA, omega, Q)

                    if self.computeBound:
                        inds = numpy.flipud(numpy.argsort(omega))
                        Q = Q[:, inds]
                        omega = omega[inds]
                        bounds = self.pertBound(omega, Q, omegaKbot, AKbot,
                                                self.k2)
                        #boundList.append([i, bounds[0], bounds[1]])

                        #Now use accurate values of norm of R and delta
                        rank = Util.rank(ABBA.todense())
                        gamma, U = scipy.sparse.linalg.eigsh(ABBA,
                                                             rank - 1,
                                                             which="LM",
                                                             ncv=ABBA.shape[0])
                        #logging.debug("gamma=" + str(gamma))
                        bounds2 = self.realBound(omega, Q, gamma, AKbot,
                                                 self.k2)
                        boundList.append(
                            [bounds[0], bounds[1], bounds2[0], bounds2[1]])
                else:
                    logging.debug("Computing exact eigenvectors")
                    self.storeInformation(subW, ABBA)

                    if self.computeBound:
                        #omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k2*2, ABBA.shape[0]-1), which="LM", ncv = min(10*self.k2, ABBA.shape[0]))
                        rank = Util.rank(ABBA.todense())
                        omega, Q = scipy.sparse.linalg.eigsh(ABBA,
                                                             rank - 1,
                                                             which="LM",
                                                             ncv=ABBA.shape[0])
                        inds = numpy.flipud(numpy.argsort(omega))
                        omegaKbot = omega[inds[self.k2:]]
                        QKbot = Q[:, inds[self.k2:]]
                        AKbot = (QKbot * omegaKbot).dot(QKbot.T)

                        omegaSort = numpy.flipud(numpy.sort(omega))
                        boundList.append([0] * 4)
                    else:
                        omega, Q = scipy.sparse.linalg.eigsh(
                            ABBA,
                            min(self.k2, ABBA.shape[0] - 1),
                            which="LM",
                            ncv=min(10 * self.k2, ABBA.shape[0]))

            elif self.alg == "nystrom":
                omega, Q = Nystrom.eigpsd(ABBA, self.k3)
            elif self.alg == "exact":
                omega, Q = scipy.sparse.linalg.eigsh(
                    ABBA,
                    min(self.k1, ABBA.shape[0] - 1),
                    which="LM",
                    ncv=min(15 * self.k1, ABBA.shape[0]))
            elif self.alg == "efficientNystrom":
                omega, Q = EfficientNystrom.eigWeight(subW, self.k2, self.k1)
            elif self.alg == "randomisedSvd":
                Q, omega, R = RandomisedSVD.svd(ABBA, self.k4)
            else:
                raise ValueError("Invalid Algorithm: " + str(self.alg))

            if self.computeSinTheta:
                omegaExact, QExact = scipy.linalg.eigh(ABBA.todense())
                inds = numpy.flipud(numpy.argsort(omegaExact))
                QExactKbot = QExact[:, inds[self.k1:]]
                inds = numpy.flipud(numpy.argsort(omega))
                QApproxK = Q[:, inds[:self.k1]]
                sinThetaList.append(
                    scipy.linalg.norm(QExactKbot.T.dot(QApproxK)))

            decompositionTimeList.append(time.time() - startTime)

            if self.alg == "IASC":
                self.storeInformation(subW, ABBA)

            # --- Kmeans ---
            startTime = time.time()
            inds = numpy.flipud(numpy.argsort(omega))

            standardiser = Standardiser()
            #For some very strange reason we get an overflow when computing the
            #norm of the rows of Q even though its elements are bounded by 1.
            #We'll ignore it for now
            try:
                V = standardiser.normaliseArray(Q[:, inds[0:self.k1]].real.T).T
            except FloatingPointError as e:
                logging.warn("FloatingPointError: " + str(e))
            V = VqUtils.whiten(V)
            if i == 0:
                centroids, distortion = vq.kmeans(V,
                                                  self.k1,
                                                  iter=self.nb_iter_kmeans)
            else:
                centroids = self.findCentroids(V, clusters[:subW.shape[0]])
                if centroids.shape[0] < self.k1:
                    nb_missing_centroids = self.k1 - centroids.shape[0]
                    random_centroids = V[numpy.random.randint(
                        0, V.shape[0], nb_missing_centroids), :]
                    centroids = numpy.vstack((centroids, random_centroids))
                centroids, distortion = vq.kmeans(
                    V, centroids)  #iter can only be 1
            clusters, distortion = vq.vq(V, centroids)
            kMeansTimeList.append(time.time() - startTime)

            clustersList.append(clusters)

            #logging.debug("subW.shape: " + str(subW.shape))
            #logging.debug("len(clusters): " + str(len(clusters)))
            #from sandbox.util.ProfileUtils import ProfileUtils
            #logging.debug("Total memory usage: " + str(ProfileUtils.memory()/10**6) + "MB")
            if ProfileUtils.memory() > 10**9:
                ProfileUtils.memDisplay(locals())

            i += 1

        if verbose:
            eigenQuality = {
                "boundList": boundList,
                "sinThetaList": sinThetaList
            }
            return clustersList, numpy.array(
                (decompositionTimeList, kMeansTimeList)).T, eigenQuality
        else:
            return clustersList
Ejemplo n.º 31
0
p = 0.05
pClust = 0.3

W = numpy.ones((numVertices, numVertices))*p
for i in range(numClusters):
	W[endClusterSize*i:endClusterSize*(i+1), endClusterSize*i:endClusterSize*(i+1)] = pClust
P = numpy.random.rand(numVertices, numVertices)
W = numpy.array(P < W, numpy.float)
upTriInds = numpy.triu_indices(numVertices)
W[upTriInds] = 0
W = W + W.T
graph = SparseGraph(vList)
graph.setWeightMatrix(W)

L = GraphUtils.shiftLaplacian(scipy.sparse.csr_matrix(W))
u, V = numpy.linalg.eig(L.todense())
print(V.shape)
print(numpy.linalg.cond(V))

# run with exact eigenvalue decomposition
logging.info("Running exact method")
graphIterator = IncreasingSubgraphListIterator(graph, subgraphIndicesList)

"""
for W in graphIterator: 
    graph = SparseGraph(GeneralVertexList(W.shape[0]))
    graph.setWeightMatrixSparse(W) 
    components = graph.findConnectedComponents()
    print(graph)
    
Ejemplo n.º 32
0
numRepetitions = 20 
#numRepetitions = 1

saveResults = False
resultsDir = PathDefaults.getOutputDir() + "cluster/"
fileName = resultsDir + "ErrorBoundNystrom.npy"

if saveResults: 
    for r in range(numRepetitions): 
        i = 0 
        iterator = BoundGraphIterator(changeEdges=50, numGraphs=numGraphs, numClusterVertices=numClusterVertices, numClusters=k, p=0.1)
        
        for W in iterator: 
            print("i="+str(i))
            L = GraphUtils.shiftLaplacian(W)
          
            if i == 0: 
                initialL = L
                initialOmega, initialQ = numpy.linalg.eigh(L.todense())
                inds = numpy.flipud(numpy.argsort(initialOmega))
                initialOmega, initialQ = initialOmega[inds], initialQ[:, inds]
                #Fix for weird error in EigenAdd2 later on 
                initialQ = numpy.array(initialQ)
                initialQk = initialQ[:, 0:k]
                # for IASC
                lastL = initialL
                lastOmegas = [initialOmega]*len(IASCL)
                lastQs = [initialQ]*len(IASCL)
            
            #Compute exact eigenvalues 
    def clusterFromIterator(self, graphListIterator, verbose=False):
        """
        Find a set of clusters for the graphs given by the iterator. If verbose 
        is true the each iteration is timed and bounded the results are returned 
        as lists.
        
        The difference between a weight matrix and the previous one should be
        positive.
        """
        clustersList = []
        decompositionTimeList = [] 
        kMeansTimeList = [] 
        boundList = []
        i = 0

        for subW in graphListIterator:
            if __debug__:
                Parameter.checkSymmetric(subW)

            if self.logStep and i % self.logStep == 0:
                logging.debug("Graph index: " + str(i))
            logging.debug("Clustering graph of size " + str(subW.shape))
            if self.alg!="efficientNystrom": 
                ABBA = GraphUtils.shiftLaplacian(subW)

            # --- Eigen value decomposition ---
            startTime = time.time()
            if self.alg=="IASC": 
                if i % self.T != 0:
                    omega, Q = self.approxUpdateEig(subW, ABBA, omega, Q)   
                    
                    if self.computeBound:
                        inds = numpy.flipud(numpy.argsort(omega))
                        Q = Q[:, inds]
                        omega = omega[inds]
                        bounds = self.pertBound(omega, Q, omegaKbot, AKbot, self.k2)
                        #boundList.append([i, bounds[0], bounds[1]])
                        
                        #Now use accurate values of norm of R and delta   
                        rank = Util.rank(ABBA.todense())
                        gamma, U = scipy.sparse.linalg.eigsh(ABBA, rank-1, which="LM", ncv = ABBA.shape[0])
                        #logging.debug("gamma=" + str(gamma))
                        bounds2 = self.realBound(omega, Q, gamma, AKbot, self.k2)                  
                        boundList.append([i, bounds[0], bounds[1], bounds2[0], bounds2[1]])      
                else: 
                    logging.debug("Computing exact eigenvectors")
                    self.storeInformation(subW, ABBA)

                    if self.computeBound: 
                        #omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k2*2, ABBA.shape[0]-1), which="LM", ncv = min(10*self.k2, ABBA.shape[0]))
                        rank = Util.rank(ABBA.todense())
                        omega, Q = scipy.sparse.linalg.eigsh(ABBA, rank-1, which="LM", ncv = ABBA.shape[0])
                        inds = numpy.flipud(numpy.argsort(omega))
                        omegaKbot = omega[inds[self.k2:]]  
                        QKbot = Q[:, inds[self.k2:]] 
                        AKbot = (QKbot*omegaKbot).dot(QKbot.T)
                        
                        omegaSort = numpy.flipud(numpy.sort(omega))
                    else: 
                        omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k2, ABBA.shape[0]-1), which="LM", ncv = min(10*self.k2, ABBA.shape[0]))
                            
            elif self.alg == "nystrom":
                omega, Q = Nystrom.eigpsd(ABBA, self.k3)
            elif self.alg == "exact": 
                omega, Q = scipy.sparse.linalg.eigsh(ABBA, min(self.k1, ABBA.shape[0]-1), which="LM", ncv = min(15*self.k1, ABBA.shape[0]))
            elif self.alg == "efficientNystrom":
                omega, Q = EfficientNystrom.eigWeight(subW, self.k2, self.k1)
            elif self.alg == "randomisedSvd": 
                Q, omega, R = RandomisedSVD.svd(ABBA, self.k4)
            else:
                raise ValueError("Invalid Algorithm: " + str(self.alg))

            decompositionTimeList.append(time.time()-startTime)                  
                  
            if self.alg=="IASC":
                self.storeInformation(subW, ABBA)
            
            # --- Kmeans ---
            startTime = time.time()
            inds = numpy.flipud(numpy.argsort(omega))

            standardiser = Standardiser()
            #For some very strange reason we get an overflow when computing the
            #norm of the rows of Q even though its elements are bounded by 1.
            #We'll ignore it for now
            try:
                V = standardiser.normaliseArray(Q[:, inds[0:self.k1]].real.T).T
            except FloatingPointError as e:
                logging.warn("FloatingPointError: " + str(e))
            V = VqUtils.whiten(V)
            if i == 0:
                centroids, distortion = vq.kmeans(V, self.k1, iter=self.nb_iter_kmeans)
            else:
                centroids = self.findCentroids(V, clusters[:subW.shape[0]])
                if centroids.shape[0] < self.k1:
                    nb_missing_centroids = self.k1 - centroids.shape[0]
                    random_centroids = V[numpy.random.randint(0, V.shape[0], nb_missing_centroids),:]
                    centroids = numpy.vstack((centroids, random_centroids))
                centroids, distortion = vq.kmeans(V, centroids) #iter can only be 1
            clusters, distortion = vq.vq(V, centroids)
            kMeansTimeList.append(time.time()-startTime)

            clustersList.append(clusters)

            #logging.debug("subW.shape: " + str(subW.shape))
            #logging.debug("len(clusters): " + str(len(clusters)))
            #from apgl.util.ProfileUtils import ProfileUtils
            #logging.debug("Total memory usage: " + str(ProfileUtils.memory()/10**6) + "MB")
            if ProfileUtils.memory() > 10**9:
                ProfileUtils.memDisplay(locals())

            i += 1

        if verbose:
            return clustersList, numpy.array((decompositionTimeList, kMeansTimeList)).T, boundList
        else:
            return clustersList