def vectorStatistics(self, graph, treeStats=False, eigenStats=True):
        """
        Find a series of statistics for the given input graph which can be represented 
        as vector values.
        """
        Parameter.checkClass(graph, AbstractMatrixGraph)
        Parameter.checkBoolean(treeStats)
        statsDict = {}

        statsDict["inDegreeDist"] = graph.inDegreeDistribution()
        statsDict["outDegreeDist"] = graph.degreeDistribution()
        logging.debug("Computing hop counts")
        P = graph.findAllDistances(False)
        statsDict["hopCount"] = graph.hopCount(P)
        logging.debug("Computing triangle count")
        if graph.getNumVertices() != 0:
            statsDict["triangleDist"] = numpy.bincount(
                graph.triangleSequence())
        else:
            statsDict["triangleDist"] = numpy.array([])

        #Get the distribution of component sizes
        logging.debug("Finding distribution of component sizes")

        if graph.isUndirected():
            components = graph.findConnectedComponents()
            if len(components) != 0:
                statsDict["componentsDist"] = numpy.bincount(
                    numpy.array([len(c) for c in components], numpy.int))

        #Make sure weight matrix is symmetric

        if graph.getNumVertices() != 0 and eigenStats:
            logging.debug("Computing eigenvalues/vectors")
            W = graph.getWeightMatrix()
            W = (W + W.T) / 2
            eigenDistribution, V = numpy.linalg.eig(W)
            i = numpy.argmax(eigenDistribution)
            statsDict["maxEigVector"] = V[:, i]
            statsDict["eigenDist"] = numpy.flipud(
                numpy.sort(eigenDistribution[eigenDistribution > 0]))
            gc.collect()
        else:
            statsDict["maxEigVector"] = numpy.array([])
            statsDict["eigenDist"] = numpy.array([])

        if treeStats:
            logging.debug("Computing statistics on trees")
            trees = graph.findTrees()
            statsDict["treeSizesDist"] = numpy.bincount(
                [len(x) for x in trees])
            treeDepths = [
                GraphUtils.treeDepth((graph.subgraph(x))) for x in trees
            ]
            statsDict["treeDepthsDist"] = numpy.bincount(treeDepths)

        return statsDict
Beispiel #2
0
    def testTreeDepth(self):
        numVertices = 4
        numFeatures = 1

        vList = VertexList(numVertices, numFeatures)
        graph = SparseGraph(vList, False)
        graph.addEdge(0, 1)
        graph.addEdge(0, 2)
        graph.addEdge(2, 3)
        self.assertEquals(GraphUtils.treeDepth(graph), 2)

        numVertices = 5
        vList = VertexList(numVertices, numFeatures)
        graph = SparseGraph(vList, False)
        graph.addEdge(0, 1)
        graph.addEdge(0, 2)
        graph.addEdge(2, 3)
        graph.addEdge(3, 4)
        self.assertEquals(GraphUtils.treeDepth(graph), 3)
    def testTreeDepth(self):
        numVertices = 4
        numFeatures = 1

        vList = VertexList(numVertices, numFeatures)
        graph = SparseGraph(vList, False)
        graph.addEdge(0, 1)
        graph.addEdge(0, 2)
        graph.addEdge(2, 3)
        self.assertEquals(GraphUtils.treeDepth(graph), 2)

        numVertices = 5
        vList = VertexList(numVertices, numFeatures)
        graph = SparseGraph(vList, False)
        graph.addEdge(0, 1)
        graph.addEdge(0, 2)
        graph.addEdge(2, 3)
        graph.addEdge(3, 4)
        self.assertEquals(GraphUtils.treeDepth(graph), 3)
    def vectorStatistics(self, graph, treeStats=False, eigenStats=True):
        """
        Find a series of statistics for the given input graph which can be represented 
        as vector values.
        """
        Parameter.checkClass(graph, AbstractMatrixGraph)
        Parameter.checkBoolean(treeStats)
        statsDict = {}

        statsDict["inDegreeDist"] = graph.inDegreeDistribution()
        statsDict["outDegreeDist"] = graph.degreeDistribution()
        logging.debug("Computing hop counts")
        P = graph.findAllDistances(False)
        statsDict["hopCount"] = graph.hopCount(P)
        logging.debug("Computing triangle count")
        if graph.getNumVertices() != 0:
            statsDict["triangleDist"] = numpy.bincount(graph.triangleSequence())
        else:
            statsDict["triangleDist"] = numpy.array([])
        
        #Get the distribution of component sizes 
        logging.debug("Finding distribution of component sizes")
        
        if graph.isUndirected(): 
            components = graph.findConnectedComponents()
            if len(components) != 0: 
                statsDict["componentsDist"] = numpy.bincount(numpy.array([len(c) for c in components], numpy.int))

        #Make sure weight matrix is symmetric
        
        if graph.getNumVertices()!=0 and eigenStats:
            logging.debug("Computing eigenvalues/vectors")
            W = graph.getWeightMatrix()
            W = (W + W.T)/2
            eigenDistribution, V = numpy.linalg.eig(W)
            i = numpy.argmax(eigenDistribution)
            statsDict["maxEigVector"] = V[:, i]
            statsDict["eigenDist"] = numpy.flipud(numpy.sort(eigenDistribution[eigenDistribution>0]))
            gc.collect() 
        else:
            statsDict["maxEigVector"] = numpy.array([])
            statsDict["eigenDist"] = numpy.array([])

        if treeStats:
            logging.debug("Computing statistics on trees")
            trees = graph.findTrees()
            statsDict["treeSizesDist"] = numpy.bincount([len(x) for x in trees])
            treeDepths = [GraphUtils.treeDepth((graph.subgraph(x))) for x in trees]
            statsDict["treeDepthsDist"] = numpy.bincount(treeDepths)

        return statsDict
    def scalarStatistics(self, graph, slowStats=True, treeStats=False):
        """
        Find a series of statistics for the given input graph which can be represented
        as scalar values. Return results as a vector.
        """
        #This method is a bit of a mess 
        Parameter.checkClass(graph, AbstractSingleGraph)
        Parameter.checkBoolean(slowStats)
        Parameter.checkBoolean(treeStats)
        
        statsArray = numpy.ones(self.numStats)*-1
        statsArray[self.numVerticesIndex] = graph.getNumVertices()
        statsArray[self.numEdgesIndex] = graph.getNumEdges()
        statsArray[self.numDirEdgesIndex] = graph.getNumDirEdges()
        statsArray[self.densityIndex] = graph.density()

        if graph.isUndirected():
            logging.debug("Finding connected components")
            subComponents = graph.findConnectedComponents()
            logging.debug("Done")
            statsArray[self.numComponentsIndex] = len(subComponents)
            
            nonSingletonSubComponents = [c for c in subComponents if len(c) > 1]
            statsArray[self.numNonSingletonComponentsIndex] = len(nonSingletonSubComponents)

            triOrMoreSubComponents = [c for c in subComponents if len(c) > 2]
            statsArray[self.numTriOrMoreComponentsIndex] = len(triOrMoreSubComponents)
            
            logging.debug("Studying max component")
            if len(subComponents) != 0:
                maxCompGraph = graph.subgraph(list(subComponents[0]))
                statsArray[self.maxComponentSizeIndex] = len(subComponents[0])

                if len(subComponents) >= 2:
                    statsArray[self.secondComponentSizeIndex] = len(subComponents[1])

                statsArray[self.maxComponentEdgesIndex] = maxCompGraph.getNumEdges()
                statsArray[self.meanComponentSizeIndex] = sum([len(x) for x in subComponents])/float(statsArray[self.numComponentsIndex])
                statsArray[self.maxCompMeanDegreeIndex] = numpy.mean(maxCompGraph.outDegreeSequence())
            else:
                statsArray[self.maxComponentSizeIndex] = 0
                statsArray[self.maxComponentEdgesIndex] = 0 
                statsArray[self.meanComponentSizeIndex] = 0
                statsArray[self.geodesicDistMaxCompIndex] = 0

        if graph.getNumVertices() != 0:
            statsArray[self.meanDegreeIndex] = numpy.mean(graph.outDegreeSequence())
        else:
            statsArray[self.meanDegreeIndex] = 0
            
        if slowStats:
            if self.useFloydWarshall:
                logging.debug("Running Floyd-Warshall")
                P = graph.floydWarshall(False)
            else:
                logging.debug("Running Dijkstra's algorithm")
                P = graph.findAllDistances(False)

            statsArray[self.diameterIndex] = graph.diameter(P=P)
            statsArray[self.effectiveDiameterIndex] = graph.effectiveDiameter(self.q, P=P)
            statsArray[self.powerLawIndex] = graph.fitPowerLaw()[0]
            statsArray[self.geodesicDistanceIndex] = graph.geodesicDistance(P=P)
            statsArray[self.harmonicGeoDistanceIndex] = graph.harmonicGeodesicDistance(P=P)

            if graph.isUndirected() and len(subComponents) != 0:
                statsArray[self.geodesicDistMaxCompIndex] = graph.geodesicDistance(P=P, vertexInds=list(subComponents[0]))

        if treeStats:
            logging.debug("Computing statistics on trees")
            trees = graph.findTrees()
            statsArray[self.numTreesIndex] = len(trees)

            nonSingletonTrees = [c for c in trees if len(c) > 1]
            statsArray[self.numNonSingletonTreesIndex] = len(nonSingletonTrees)

            statsArray[self.meanTreeSizeIndex] = numpy.mean([len(x) for x in trees])
            treeDepths = [GraphUtils.treeDepth((graph.subgraph(list(x)))) for x in trees]
            statsArray[self.meanTreeDepthIndex] = numpy.mean(treeDepths)

            if len(trees) != 0:
                maxTreeGraph = graph.subgraph(trees[0])
                statsArray[self.maxTreeSizeIndex] = len(trees[0])
                statsArray[self.maxTreeDepthIndex] = GraphUtils.treeDepth(maxTreeGraph)

                if len(trees) >= 2:
                    secondTreeGraph = graph.subgraph(trees[1])
                    statsArray[self.secondTreeSizeIndex] = len(trees[1])
                    statsArray[self.secondTreeDepthIndex] = GraphUtils.treeDepth(secondTreeGraph)

        return statsArray
    def scalarStatistics(self, graph, slowStats=True, treeStats=False):
        """
        Find a series of statistics for the given input graph which can be represented
        as scalar values. Return results as a vector.
        """
        #This method is a bit of a mess
        Parameter.checkClass(graph, AbstractSingleGraph)
        Parameter.checkBoolean(slowStats)
        Parameter.checkBoolean(treeStats)

        statsArray = numpy.ones(self.numStats) * -1
        statsArray[self.numVerticesIndex] = graph.getNumVertices()
        statsArray[self.numEdgesIndex] = graph.getNumEdges()
        statsArray[self.numDirEdgesIndex] = graph.getNumDirEdges()
        statsArray[self.densityIndex] = graph.density()

        if graph.isUndirected():
            subComponents = graph.findConnectedComponents()
            statsArray[self.numComponentsIndex] = len(subComponents)

            nonSingletonSubComponents = [
                c for c in subComponents if len(c) > 1
            ]
            statsArray[self.numNonSingletonComponentsIndex] = len(
                nonSingletonSubComponents)

            triOrMoreSubComponents = [c for c in subComponents if len(c) > 2]
            statsArray[self.numTriOrMoreComponentsIndex] = len(
                triOrMoreSubComponents)

            #logging.debug("Studying max component")
            if len(subComponents) != 0:
                maxCompGraph = graph.subgraph(list(subComponents[0]))
                statsArray[self.maxComponentSizeIndex] = len(subComponents[0])

                if len(subComponents) >= 2:
                    statsArray[self.secondComponentSizeIndex] = len(
                        subComponents[1])

                statsArray[
                    self.maxComponentEdgesIndex] = maxCompGraph.getNumEdges()
                statsArray[self.meanComponentSizeIndex] = sum([
                    len(x) for x in subComponents
                ]) / float(statsArray[self.numComponentsIndex])
                statsArray[self.maxCompMeanDegreeIndex] = numpy.mean(
                    maxCompGraph.outDegreeSequence())
            else:
                statsArray[self.maxComponentSizeIndex] = 0
                statsArray[self.maxComponentEdgesIndex] = 0
                statsArray[self.meanComponentSizeIndex] = 0
                statsArray[self.geodesicDistMaxCompIndex] = 0

        if graph.getNumVertices() != 0:
            statsArray[self.meanDegreeIndex] = numpy.mean(
                graph.outDegreeSequence())
        else:
            statsArray[self.meanDegreeIndex] = 0

        if slowStats:
            if self.useFloydWarshall:
                logging.debug("Running Floyd-Warshall")
                P = graph.floydWarshall(False)
            else:
                logging.debug("Running Dijkstra's algorithm")
                P = graph.findAllDistances(False)

            statsArray[self.diameterIndex] = graph.diameter(P=P)
            statsArray[self.effectiveDiameterIndex] = graph.effectiveDiameter(
                self.q, P=P)
            statsArray[self.powerLawIndex] = graph.fitPowerLaw()[0]
            statsArray[self.geodesicDistanceIndex] = graph.geodesicDistance(
                P=P)
            statsArray[
                self.
                harmonicGeoDistanceIndex] = graph.harmonicGeodesicDistance(P=P)

            if graph.isUndirected() and len(subComponents) != 0:
                statsArray[
                    self.geodesicDistMaxCompIndex] = graph.geodesicDistance(
                        P=P, vertexInds=list(subComponents[0]))

        if treeStats:
            logging.debug("Computing statistics on trees")
            trees = graph.findTrees()
            statsArray[self.numTreesIndex] = len(trees)

            nonSingletonTrees = [c for c in trees if len(c) > 1]
            statsArray[self.numNonSingletonTreesIndex] = len(nonSingletonTrees)

            statsArray[self.meanTreeSizeIndex] = numpy.mean(
                [len(x) for x in trees])
            treeDepths = [
                GraphUtils.treeDepth((graph.subgraph(list(x)))) for x in trees
            ]
            statsArray[self.meanTreeDepthIndex] = numpy.mean(treeDepths)

            if len(trees) != 0:
                maxTreeGraph = graph.subgraph(trees[0])
                statsArray[self.maxTreeSizeIndex] = len(trees[0])
                statsArray[self.maxTreeDepthIndex] = GraphUtils.treeDepth(
                    maxTreeGraph)

                if len(trees) >= 2:
                    secondTreeGraph = graph.subgraph(trees[1])
                    statsArray[self.secondTreeSizeIndex] = len(trees[1])
                    statsArray[
                        self.secondTreeDepthIndex] = GraphUtils.treeDepth(
                            secondTreeGraph)

        return statsArray