def setSelfEdges(self, selfEdges):
     """
     :param selfEdges: whether to allow self edges
     :type selfEdges: :class:`bool`
     """
     Parameter.checkBoolean(selfEdges)
     self.selfEdges = selfEdges
Exemple #2
0
    def diameter(self, useWeights=False, P=None):
        """
        Finds the diameter of a graph i.e. the longest shortest path. If useWeights
        is True then the weights in the adjacency matrix are used if P is not
        provided. 

        :param useWeights: Whether to use edge weights to compute a diameter. 
        :type useWeights: :class:`bool`

        :param P: An optional nxn matrix whose ijth entry is the shortest path from i to j.
        :type P: :class:`ndarray`

        :returns:  The diameter of this graph. 
        """
        Parameter.checkBoolean(useWeights)
        if P!=None and (type(P) != numpy.ndarray or P.shape != (self.getNumVertices(), self.getNumVertices())):
            logging.debug("P.shape = " + P.shape + " W.shape = " + str(self.W.shape))
            raise ValueError("P must be array of same size as weight matrix of graph")
        
        if self.getNumEdges() == 0: 
            return 0 

        if P == None:
            P = self.floydWarshall(useWeights)
        else:
            P = P.copy()

        if useWeights == False:
            return int(numpy.max(P[P!=float('inf')]))
        else:
            return float(numpy.max(P[P!=float('inf')]))
 def setSampleReplace(self, sampleReplace):
     """
     :param sampleReplace: A boolean to decide whether to sample with replacement. 
     :type sampleReplace: :class:`bool`
     """
     Parameter.checkBoolean(sampleReplace)
     self.sampleReplace = sampleReplace
Exemple #4
0
 def setSelfEdges(self, selfEdges):
     """
     :param selfEdges: whether to allow self edges
     :type selfEdges: :class:`bool`
     """
     Parameter.checkBoolean(selfEdges)
     self.selfEdges = selfEdges
    def sequenceVectorStats(self,
                            graph,
                            subgraphIndices,
                            treeStats=False,
                            eigenStats=True):
        """
        Pass in a list of graphs are returns a series of statistics. Each list
        element is a dict of vector statistics. 
        """
        Parameter.checkClass(graph, AbstractMatrixGraph)
        for inds in subgraphIndices:
            Parameter.checkList(inds, Parameter.checkInt,
                                [0, graph.getNumVertices()])
        Parameter.checkBoolean(treeStats)

        numGraphs = len(subgraphIndices)
        statsDictList = []

        for i in range(numGraphs):
            Util.printIteration(i, self.vectorPrintStep, numGraphs)
            subgraph = graph.subgraph(subgraphIndices[i])
            statsDictList.append(
                self.vectorStatistics(subgraph, treeStats, eigenStats))

        return statsDictList
    def sequenceScalarStats(self,
                            graph,
                            subgraphIndices,
                            slowStats=True,
                            treeStats=False):
        """
        Pass in a graph and list of subgraph indices and returns a series of statistics. Each row
        corresponds to the statistics on the subgraph. 
        """
        Parameter.checkClass(graph, AbstractMatrixGraph)
        for inds in subgraphIndices:
            Parameter.checkList(inds, Parameter.checkInt,
                                [0, graph.getNumVertices()])
        Parameter.checkBoolean(slowStats)
        Parameter.checkBoolean(treeStats)

        numGraphs = len(subgraphIndices)
        statsMatrix = numpy.zeros((numGraphs, self.numStats))

        for i in range(numGraphs):
            Util.printIteration(i, self.printStep, numGraphs)
            #logging.debug("Subgraph size: " + str(len(subgraphIndices[i])))
            subgraph = graph.subgraph(subgraphIndices[i])
            statsMatrix[i, :] = self.scalarStatistics(subgraph, slowStats,
                                                      treeStats)

        return statsMatrix
    def vectorStatistics(self, graph, treeStats=False, eigenStats=True):
        """
        Find a series of statistics for the given input graph which can be represented 
        as vector values.
        """
        Parameter.checkClass(graph, AbstractMatrixGraph)
        Parameter.checkBoolean(treeStats)
        statsDict = {}

        statsDict["inDegreeDist"] = graph.inDegreeDistribution()
        statsDict["outDegreeDist"] = graph.degreeDistribution()
        logging.debug("Computing hop counts")
        P = graph.findAllDistances(False)
        statsDict["hopCount"] = graph.hopCount(P)
        logging.debug("Computing triangle count")
        if graph.getNumVertices() != 0:
            statsDict["triangleDist"] = numpy.bincount(
                graph.triangleSequence())
        else:
            statsDict["triangleDist"] = numpy.array([])

        #Get the distribution of component sizes
        logging.debug("Finding distribution of component sizes")

        if graph.isUndirected():
            components = graph.findConnectedComponents()
            if len(components) != 0:
                statsDict["componentsDist"] = numpy.bincount(
                    numpy.array([len(c) for c in components], numpy.int))

        #Make sure weight matrix is symmetric

        if graph.getNumVertices() != 0 and eigenStats:
            logging.debug("Computing eigenvalues/vectors")
            W = graph.getWeightMatrix()
            W = (W + W.T) / 2
            eigenDistribution, V = numpy.linalg.eig(W)
            i = numpy.argmax(eigenDistribution)
            statsDict["maxEigVector"] = V[:, i]
            statsDict["eigenDist"] = numpy.flipud(
                numpy.sort(eigenDistribution[eigenDistribution > 0]))
            gc.collect()
        else:
            statsDict["maxEigVector"] = numpy.array([])
            statsDict["eigenDist"] = numpy.array([])

        if treeStats:
            logging.debug("Computing statistics on trees")
            trees = graph.findTrees()
            statsDict["treeSizesDist"] = numpy.bincount(
                [len(x) for x in trees])
            treeDepths = [
                GraphUtils.treeDepth((graph.subgraph(x))) for x in trees
            ]
            statsDict["treeDepthsDist"] = numpy.bincount(treeDepths)

        return statsDict
Exemple #8
0
    def checkBoolean(self):
        a = True
        b = False
        c = 0
        d = 1
        e = "s"

        Parameter.checkBoolean(a)
        Parameter.checkBoolean(b)

        self.assertRaises(ValueError, Parameter.checkBoolean, c)
        self.assertRaises(ValueError, Parameter.checkBoolean, d)
        self.assertRaises(ValueError, Parameter.checkBoolean, e)
    def checkBoolean(self):
        a = True
        b = False
        c = 0
        d = 1
        e = "s"

        Parameter.checkBoolean(a)
        Parameter.checkBoolean(b)

        self.assertRaises(ValueError, Parameter.checkBoolean, c)
        self.assertRaises(ValueError, Parameter.checkBoolean, d)
        self.assertRaises(ValueError, Parameter.checkBoolean, e)
    def vectorStatistics(self, graph, treeStats=False, eigenStats=True):
        """
        Find a series of statistics for the given input graph which can be represented 
        as vector values.
        """
        Parameter.checkClass(graph, AbstractMatrixGraph)
        Parameter.checkBoolean(treeStats)
        statsDict = {}

        statsDict["inDegreeDist"] = graph.inDegreeDistribution()
        statsDict["outDegreeDist"] = graph.degreeDistribution()
        logging.debug("Computing hop counts")
        P = graph.findAllDistances(False)
        statsDict["hopCount"] = graph.hopCount(P)
        logging.debug("Computing triangle count")
        if graph.getNumVertices() != 0:
            statsDict["triangleDist"] = numpy.bincount(graph.triangleSequence())
        else:
            statsDict["triangleDist"] = numpy.array([])
        
        #Get the distribution of component sizes 
        logging.debug("Finding distribution of component sizes")
        
        if graph.isUndirected(): 
            components = graph.findConnectedComponents()
            if len(components) != 0: 
                statsDict["componentsDist"] = numpy.bincount(numpy.array([len(c) for c in components], numpy.int))

        #Make sure weight matrix is symmetric
        
        if graph.getNumVertices()!=0 and eigenStats:
            logging.debug("Computing eigenvalues/vectors")
            W = graph.getWeightMatrix()
            W = (W + W.T)/2
            eigenDistribution, V = numpy.linalg.eig(W)
            i = numpy.argmax(eigenDistribution)
            statsDict["maxEigVector"] = V[:, i]
            statsDict["eigenDist"] = numpy.flipud(numpy.sort(eigenDistribution[eigenDistribution>0]))
            gc.collect() 
        else:
            statsDict["maxEigVector"] = numpy.array([])
            statsDict["eigenDist"] = numpy.array([])

        if treeStats:
            logging.debug("Computing statistics on trees")
            trees = graph.findTrees()
            statsDict["treeSizesDist"] = numpy.bincount([len(x) for x in trees])
            treeDepths = [GraphUtils.treeDepth((graph.subgraph(x))) for x in trees]
            statsDict["treeDepthsDist"] = numpy.bincount(treeDepths)

        return statsDict
    def __init__(self, vertices, undirected=True, W=None, sizeHint=1000):
        """
        Create a PySparseGraph with a given AbstractVertexList or number of 
        vertices, and specify whether it is directed. One can optionally pass 
        in a sparse matrix W which is used as the weight matrix of the 
        graph. Different kinds of sparse matrix can impact the speed of various
        operations. The currently supported sparse matrix types are: ll_mat. 

        :param vertices: the initial set of vertices as a AbstractVertexList object, or an int to specify the number of vertices in which case vertices are stored in a GeneralVertexList.  
        
        :param undirected: a boolean variable to indicate if the graph is undirected.
        :type undirected: :class:`boolean`

        :param W: a square sparse matrix of the same size as the number of vertices, or None to create the default one.

        :param sizeHint: the expected number of edges in the graph for efficient memory usage.
        :type sizeHint: :class:`int`
        """
        Parameter.checkBoolean(undirected)

        if isinstance(vertices, AbstractVertexList):
            self.vList = vertices
        elif isinstance(vertices, int):
            self.vList = GeneralVertexList(vertices)
        else:
            raise ValueError("Invalid vList parameter: " + str(vertices))

        if W != None and not (isinstance(W, spmatrix.LLMatType) and W.shape ==
                              (len(self.vList), len(self.vList))):
            raise ValueError(
                "Input argument W must be None or spmatrix.ll_mat of size " +
                str(len(self.vList)))

        self.undirected = undirected

        if W == None:
            #Should use ll_mat_sym for undirected graphs but it has several unimplemented methods
            self.W = spmatrix.ll_mat(len(self.vList), len(self.vList),
                                     sizeHint)
        else:
            self.W = W
            #The next line is for error checking mainly
            self.setWeightMatrix(W)
    def sequenceVectorStats(self, graph, subgraphIndices, treeStats=False, eigenStats=True):
        """
        Pass in a list of graphs are returns a series of statistics. Each list
        element is a dict of vector statistics. 
        """
        Parameter.checkClass(graph, AbstractMatrixGraph)
        for inds in subgraphIndices:
            Parameter.checkList(inds, Parameter.checkInt, [0, graph.getNumVertices()])
        Parameter.checkBoolean(treeStats)

        numGraphs = len(subgraphIndices)
        statsDictList = []

        for i in range(numGraphs):
            Util.printIteration(i, self.vectorPrintStep, numGraphs)
            subgraph = graph.subgraph(subgraphIndices[i])
            statsDictList.append(self.vectorStatistics(subgraph, treeStats, eigenStats))

        return statsDictList
    def meanSeqScalarStats(self, graphList, slowStats=True, treeStats=False):
        """
        Pass in a list of tuples (graph, subgraphIndices) and returns a series of statistics. Each row
        corresponds to the statistics on the subgraph. All graphs must be the same size and computed 
        from the same distribution, and the number of subgraphs must be the same.
        """
        Parameter.checkBoolean(slowStats)
        Parameter.checkBoolean(treeStats)
        if len(graphList)==0:
            return -1 

        numGraphs = len(graphList)
        numSubgraphs = len(graphList[0][1])
        statsMatrix = numpy.zeros((numSubgraphs, self.numStats, numGraphs))

        for i in range(len(graphList)):
            (graph, subgraphIndices) = graphList[i]
            statsMatrix[:, :, i] = self.sequenceScalarStats(graph, subgraphIndices, slowStats, treeStats)

        return numpy.mean(statsMatrix, 2), numpy.std(statsMatrix, 2)
    def sequenceScalarStats(self, graph, subgraphIndices, slowStats=True, treeStats=False):
        """
        Pass in a graph and list of subgraph indices and returns a series of statistics. Each row
        corresponds to the statistics on the subgraph. 
        """
        Parameter.checkClass(graph, AbstractMatrixGraph)
        for inds in subgraphIndices:
            Parameter.checkList(inds, Parameter.checkInt, [0, graph.getNumVertices()])
        Parameter.checkBoolean(slowStats)
        Parameter.checkBoolean(treeStats)

        numGraphs = len(subgraphIndices)
        statsMatrix = numpy.zeros((numGraphs, self.numStats))

        for i in range(numGraphs):
            Util.printIteration(i, self.printStep, numGraphs)
            logging.debug("Subgraph size: " + str(len(subgraphIndices[i])))
            subgraph = graph.subgraph(subgraphIndices[i])
            statsMatrix[i, :] = self.scalarStatistics(subgraph, slowStats, treeStats)

        return statsMatrix
    def __init__(self, vertices, undirected=True, W=None, dtype=numpy.float):
        """
        Create a DenseGraph with a given AbstractVertexList or number of 
        vertices, and specify whether it is directed. One can optionally pass 
        in a numpy array W which is used as the weight matrix of the 
        graph. 

        :param vertices: the initial set of vertices as a AbstractVertexList object, or an int to specify the number of vertices in which case vertices are stored in a GeneralVertexList.  
        
        :param undirected: a boolean variable to indicate if the graph is undirected.
        :type undirected: :class:`boolean`

        :param W: a numpy array of the same size as vertices, or None to create the default one.
        
        :param dtype: the data type of the weight matrix if W is not specified e.g numpy.int8. 
        """
        Parameter.checkBoolean(undirected)

        if isinstance(vertices, AbstractVertexList):
            self.vList = vertices
        elif isinstance(vertices, int):
            self.vList = GeneralVertexList(vertices)
        else:
            raise ValueError("Invalid vList parameter: " + str(vertices))

        if W != None and not (isinstance(W, numpy.ndarray) and W.shape ==
                              (len(self.vList), len(self.vList))):
            raise ValueError(
                "Input argument W must be None or numpy array of size " +
                str(len(self.vList)))

        self.undirected = undirected

        if W == None:
            self.W = numpy.zeros((len(self.vList), len(self.vList)),
                                 dtype=dtype)
        else:
            self.W = W
            #The next line is for error checking mainly
            self.setWeightMatrix(W)
    def meanSeqScalarStats(self, graphList, slowStats=True, treeStats=False):
        """
        Pass in a list of tuples (graph, subgraphIndices) and returns a series of statistics. Each row
        corresponds to the statistics on the subgraph. All graphs must be the same size and computed 
        from the same distribution, and the number of subgraphs must be the same.
        """
        Parameter.checkBoolean(slowStats)
        Parameter.checkBoolean(treeStats)
        if len(graphList) == 0:
            return -1

        numGraphs = len(graphList)
        numSubgraphs = len(graphList[0][1])
        statsMatrix = numpy.zeros((numSubgraphs, self.numStats, numGraphs))

        for i in range(len(graphList)):
            (graph, subgraphIndices) = graphList[i]
            statsMatrix[:, :,
                        i] = self.sequenceScalarStats(graph, subgraphIndices,
                                                      slowStats, treeStats)

        return numpy.mean(statsMatrix, 2), numpy.std(statsMatrix, 2)
    def __init__(self, vertices, undirected=True, dtype=numpy.float):
        """
        Create a sparse graph using sppy csarray with a given AbstractVertexList, and specify whether directed.

        :param vertices: the initial set of vertices as a AbstractVertexList object, or an int to specify the number of vertices in which case vertices are stored in a GeneralVertexList.  
        
        :param undirected: a boolean variable to indicate if the graph is undirected.
        :type undirected: :class:`boolean`

        :param dtype: the data type for the weight matrix, e.g numpy.int8.
        """
        Parameter.checkBoolean(undirected)

        if isinstance(vertices, AbstractVertexList):
            self.vList = vertices
        elif isinstance(vertices, int): 
            self.vList = GeneralVertexList(vertices)
        else: 
            raise ValueError("Invalid vList parameter: " + str(vertices))

        self.W = sppy.csarray((self.vList.getNumVertices(), self.vList.getNumVertices()), dtype)
        self.undirected = undirected
    def __init__(self, vertices, undirected=True, W=None, sizeHint=1000):
        """
        Create a PySparseGraph with a given AbstractVertexList or number of 
        vertices, and specify whether it is directed. One can optionally pass 
        in a sparse matrix W which is used as the weight matrix of the 
        graph. Different kinds of sparse matrix can impact the speed of various
        operations. The currently supported sparse matrix types are: ll_mat. 

        :param vertices: the initial set of vertices as a AbstractVertexList object, or an int to specify the number of vertices in which case vertices are stored in a GeneralVertexList.  
        
        :param undirected: a boolean variable to indicate if the graph is undirected.
        :type undirected: :class:`boolean`

        :param W: a square sparse matrix of the same size as the number of vertices, or None to create the default one.

        :param sizeHint: the expected number of edges in the graph for efficient memory usage.
        :type sizeHint: :class:`int`
        """
        Parameter.checkBoolean(undirected)

        if isinstance(vertices, AbstractVertexList):
            self.vList = vertices
        elif isinstance(vertices, int): 
            self.vList = GeneralVertexList(vertices)
        else: 
            raise ValueError("Invalid vList parameter: " + str(vertices))
          
        if W != None and not (isinstance(W, spmatrix.LLMatType) and W.shape == (len(self.vList), len(self.vList))):
            raise ValueError("Input argument W must be None or spmatrix.ll_mat of size " + str(len(self.vList)))          
          
        self.undirected = undirected

        if W == None:
            #Should use ll_mat_sym for undirected graphs but it has several unimplemented methods 
            self.W = spmatrix.ll_mat(len(self.vList), len(self.vList), sizeHint)
        else:
            self.W = W 
            #The next line is for error checking mainly 
            self.setWeightMatrix(W)
Exemple #19
0
    def __init__(self, vertices, undirected=True, dtype=numpy.float):
        """
        Create a sparse graph using sppy csarray with a given AbstractVertexList, and specify whether directed.

        :param vertices: the initial set of vertices as a AbstractVertexList object, or an int to specify the number of vertices in which case vertices are stored in a GeneralVertexList.  
        
        :param undirected: a boolean variable to indicate if the graph is undirected.
        :type undirected: :class:`boolean`

        :param dtype: the data type for the weight matrix, e.g numpy.int8.
        """
        Parameter.checkBoolean(undirected)

        if isinstance(vertices, AbstractVertexList):
            self.vList = vertices
        elif isinstance(vertices, int):
            self.vList = GeneralVertexList(vertices)
        else:
            raise ValueError("Invalid vList parameter: " + str(vertices))

        self.W = sppy.csarray(
            (self.vList.getNumVertices(), self.vList.getNumVertices()), dtype)
        self.undirected = undirected
    def __init__(self, vertices, undirected=True, W=None, dtype=numpy.float):
        """
        Create a DenseGraph with a given AbstractVertexList or number of 
        vertices, and specify whether it is directed. One can optionally pass 
        in a numpy array W which is used as the weight matrix of the 
        graph. 

        :param vertices: the initial set of vertices as a AbstractVertexList object, or an int to specify the number of vertices in which case vertices are stored in a GeneralVertexList.  
        
        :param undirected: a boolean variable to indicate if the graph is undirected.
        :type undirected: :class:`boolean`

        :param W: a numpy array of the same size as vertices, or None to create the default one.
        
        :param dtype: the data type of the weight matrix if W is not specified e.g numpy.int8. 
        """
        Parameter.checkBoolean(undirected)

        if isinstance(vertices, AbstractVertexList):
            self.vList = vertices
        elif isinstance(vertices, int): 
            self.vList = GeneralVertexList(vertices)
        else: 
            raise ValueError("Invalid vList parameter: " + str(vertices))
          
        if W != None and not (isinstance(W, numpy.ndarray) and W.shape == (len(self.vList), len(self.vList))):
            raise ValueError("Input argument W must be None or numpy array of size " + str(len(self.vList)))          
          
        self.undirected = undirected

        if W == None:
            self.W = numpy.zeros((len(self.vList), len(self.vList)), dtype=dtype)
        else:
            self.W = W 
            #The next line is for error checking mainly 
            self.setWeightMatrix(W)
Exemple #21
0
 def setIsLeafNode(self, leafNode):
     Parameter.checkBoolean(leafNode)
     self.leafNode = leafNode
Exemple #22
0
 def setPure(self, pure):
     Parameter.checkBoolean(pure)
     self.pure = pure
    def scalarStatistics(self, graph, slowStats=True, treeStats=False):
        """
        Find a series of statistics for the given input graph which can be represented
        as scalar values. Return results as a vector.
        """
        #This method is a bit of a mess
        Parameter.checkClass(graph, AbstractSingleGraph)
        Parameter.checkBoolean(slowStats)
        Parameter.checkBoolean(treeStats)

        statsArray = numpy.ones(self.numStats) * -1
        statsArray[self.numVerticesIndex] = graph.getNumVertices()
        statsArray[self.numEdgesIndex] = graph.getNumEdges()
        statsArray[self.numDirEdgesIndex] = graph.getNumDirEdges()
        statsArray[self.densityIndex] = graph.density()

        if graph.isUndirected():
            subComponents = graph.findConnectedComponents()
            statsArray[self.numComponentsIndex] = len(subComponents)

            nonSingletonSubComponents = [
                c for c in subComponents if len(c) > 1
            ]
            statsArray[self.numNonSingletonComponentsIndex] = len(
                nonSingletonSubComponents)

            triOrMoreSubComponents = [c for c in subComponents if len(c) > 2]
            statsArray[self.numTriOrMoreComponentsIndex] = len(
                triOrMoreSubComponents)

            #logging.debug("Studying max component")
            if len(subComponents) != 0:
                maxCompGraph = graph.subgraph(list(subComponents[0]))
                statsArray[self.maxComponentSizeIndex] = len(subComponents[0])

                if len(subComponents) >= 2:
                    statsArray[self.secondComponentSizeIndex] = len(
                        subComponents[1])

                statsArray[
                    self.maxComponentEdgesIndex] = maxCompGraph.getNumEdges()
                statsArray[self.meanComponentSizeIndex] = sum([
                    len(x) for x in subComponents
                ]) / float(statsArray[self.numComponentsIndex])
                statsArray[self.maxCompMeanDegreeIndex] = numpy.mean(
                    maxCompGraph.outDegreeSequence())
            else:
                statsArray[self.maxComponentSizeIndex] = 0
                statsArray[self.maxComponentEdgesIndex] = 0
                statsArray[self.meanComponentSizeIndex] = 0
                statsArray[self.geodesicDistMaxCompIndex] = 0

        if graph.getNumVertices() != 0:
            statsArray[self.meanDegreeIndex] = numpy.mean(
                graph.outDegreeSequence())
        else:
            statsArray[self.meanDegreeIndex] = 0

        if slowStats:
            if self.useFloydWarshall:
                logging.debug("Running Floyd-Warshall")
                P = graph.floydWarshall(False)
            else:
                logging.debug("Running Dijkstra's algorithm")
                P = graph.findAllDistances(False)

            statsArray[self.diameterIndex] = graph.diameter(P=P)
            statsArray[self.effectiveDiameterIndex] = graph.effectiveDiameter(
                self.q, P=P)
            statsArray[self.powerLawIndex] = graph.fitPowerLaw()[0]
            statsArray[self.geodesicDistanceIndex] = graph.geodesicDistance(
                P=P)
            statsArray[
                self.
                harmonicGeoDistanceIndex] = graph.harmonicGeodesicDistance(P=P)

            if graph.isUndirected() and len(subComponents) != 0:
                statsArray[
                    self.geodesicDistMaxCompIndex] = graph.geodesicDistance(
                        P=P, vertexInds=list(subComponents[0]))

        if treeStats:
            logging.debug("Computing statistics on trees")
            trees = graph.findTrees()
            statsArray[self.numTreesIndex] = len(trees)

            nonSingletonTrees = [c for c in trees if len(c) > 1]
            statsArray[self.numNonSingletonTreesIndex] = len(nonSingletonTrees)

            statsArray[self.meanTreeSizeIndex] = numpy.mean(
                [len(x) for x in trees])
            treeDepths = [
                GraphUtils.treeDepth((graph.subgraph(list(x)))) for x in trees
            ]
            statsArray[self.meanTreeDepthIndex] = numpy.mean(treeDepths)

            if len(trees) != 0:
                maxTreeGraph = graph.subgraph(trees[0])
                statsArray[self.maxTreeSizeIndex] = len(trees[0])
                statsArray[self.maxTreeDepthIndex] = GraphUtils.treeDepth(
                    maxTreeGraph)

                if len(trees) >= 2:
                    secondTreeGraph = graph.subgraph(trees[1])
                    statsArray[self.secondTreeSizeIndex] = len(trees[1])
                    statsArray[
                        self.secondTreeDepthIndex] = GraphUtils.treeDepth(
                            secondTreeGraph)

        return statsArray
    def __init__(self,
                 vertices,
                 undirected=True,
                 W=None,
                 dtype=numpy.float,
                 frmt="csr"):
        """
        Create a SparseGraph with a given AbstractVertexList or number of 
        vertices, and specify whether it is directed. One can optionally pass 
        in a sparse matrix W which is used as the weight matrix of the 
        graph. Different kinds of sparse matrix can impact the speed of various
        operations. The currently supported sparse matrix types are: lil_matrix, 
        csr_matrix, csc_matrix and dok_matrix. The default sparse matrix is 
        csr_matrix. 

        :param vertices: the initial set of vertices as a AbstractVertexList object, or an int to specify the number of vertices in which case vertices are stored in a GeneralVertexList.  
        
        :param undirected: a boolean variable to indicate if the graph is undirected.
        :type undirected: :class:`boolean`

        :param W: a square sparse matrix of the same size as the number of vertices, or None to create the default one.
        
        :param dtype: the data type of the sparse matrix if W is not specified. 
        
        :param frmt: the format of the sparse matrix: lil, csr or csc if W is not specified 
        """
        Parameter.checkBoolean(undirected)

        if isinstance(vertices, AbstractVertexList):
            self.vList = vertices
        elif isinstance(vertices, int):
            self.vList = GeneralVertexList(vertices)
        else:
            raise ValueError("Invalid vList parameter: " + str(vertices))

        if W != None and not (sparse.issparse(W) and W.shape == (
                self.vList.getNumVertices(), self.vList.getNumVertices())):
            raise ValueError(
                "Input argument W must be None or sparse matrix of size " +
                str(self.vList.getNumVertices()))

        self.undirected = undirected

        if frmt == "lil":
            matrix = sparse.lil_matrix
        elif frmt == "csr":
            matrix = sparse.csr_matrix
        elif frmt == "csc":
            matrix = sparse.csc_matrix
        else:
            raise ValueError("Invalid sparse matrix format: " + frmt)

        #Terrible hack alert:  can't create a zero size sparse matrix, so we settle
        #for one of size 1. Better is to create a new class.
        if self.vList.getNumVertices() == 0 and W == None:
            self.W = matrix((1, 1), dtype=dtype)
        elif W == None:
            self.W = matrix(
                (self.vList.getNumVertices(), self.vList.getNumVertices()),
                dtype=dtype)
        else:
            self.W = W
            #The next line is for error checking mainly
            self.setWeightMatrix(W)
    def scalarStatistics(self, graph, slowStats=True, treeStats=False):
        """
        Find a series of statistics for the given input graph which can be represented
        as scalar values. Return results as a vector.
        """
        #This method is a bit of a mess 
        Parameter.checkClass(graph, AbstractSingleGraph)
        Parameter.checkBoolean(slowStats)
        Parameter.checkBoolean(treeStats)
        
        statsArray = numpy.ones(self.numStats)*-1
        statsArray[self.numVerticesIndex] = graph.getNumVertices()
        statsArray[self.numEdgesIndex] = graph.getNumEdges()
        statsArray[self.numDirEdgesIndex] = graph.getNumDirEdges()
        statsArray[self.densityIndex] = graph.density()

        if graph.isUndirected():
            logging.debug("Finding connected components")
            subComponents = graph.findConnectedComponents()
            logging.debug("Done")
            statsArray[self.numComponentsIndex] = len(subComponents)
            
            nonSingletonSubComponents = [c for c in subComponents if len(c) > 1]
            statsArray[self.numNonSingletonComponentsIndex] = len(nonSingletonSubComponents)

            triOrMoreSubComponents = [c for c in subComponents if len(c) > 2]
            statsArray[self.numTriOrMoreComponentsIndex] = len(triOrMoreSubComponents)
            
            logging.debug("Studying max component")
            if len(subComponents) != 0:
                maxCompGraph = graph.subgraph(list(subComponents[0]))
                statsArray[self.maxComponentSizeIndex] = len(subComponents[0])

                if len(subComponents) >= 2:
                    statsArray[self.secondComponentSizeIndex] = len(subComponents[1])

                statsArray[self.maxComponentEdgesIndex] = maxCompGraph.getNumEdges()
                statsArray[self.meanComponentSizeIndex] = sum([len(x) for x in subComponents])/float(statsArray[self.numComponentsIndex])
                statsArray[self.maxCompMeanDegreeIndex] = numpy.mean(maxCompGraph.outDegreeSequence())
            else:
                statsArray[self.maxComponentSizeIndex] = 0
                statsArray[self.maxComponentEdgesIndex] = 0 
                statsArray[self.meanComponentSizeIndex] = 0
                statsArray[self.geodesicDistMaxCompIndex] = 0

        if graph.getNumVertices() != 0:
            statsArray[self.meanDegreeIndex] = numpy.mean(graph.outDegreeSequence())
        else:
            statsArray[self.meanDegreeIndex] = 0
            
        if slowStats:
            if self.useFloydWarshall:
                logging.debug("Running Floyd-Warshall")
                P = graph.floydWarshall(False)
            else:
                logging.debug("Running Dijkstra's algorithm")
                P = graph.findAllDistances(False)

            statsArray[self.diameterIndex] = graph.diameter(P=P)
            statsArray[self.effectiveDiameterIndex] = graph.effectiveDiameter(self.q, P=P)
            statsArray[self.powerLawIndex] = graph.fitPowerLaw()[0]
            statsArray[self.geodesicDistanceIndex] = graph.geodesicDistance(P=P)
            statsArray[self.harmonicGeoDistanceIndex] = graph.harmonicGeodesicDistance(P=P)

            if graph.isUndirected() and len(subComponents) != 0:
                statsArray[self.geodesicDistMaxCompIndex] = graph.geodesicDistance(P=P, vertexInds=list(subComponents[0]))

        if treeStats:
            logging.debug("Computing statistics on trees")
            trees = graph.findTrees()
            statsArray[self.numTreesIndex] = len(trees)

            nonSingletonTrees = [c for c in trees if len(c) > 1]
            statsArray[self.numNonSingletonTreesIndex] = len(nonSingletonTrees)

            statsArray[self.meanTreeSizeIndex] = numpy.mean([len(x) for x in trees])
            treeDepths = [GraphUtils.treeDepth((graph.subgraph(list(x)))) for x in trees]
            statsArray[self.meanTreeDepthIndex] = numpy.mean(treeDepths)

            if len(trees) != 0:
                maxTreeGraph = graph.subgraph(trees[0])
                statsArray[self.maxTreeSizeIndex] = len(trees[0])
                statsArray[self.maxTreeDepthIndex] = GraphUtils.treeDepth(maxTreeGraph)

                if len(trees) >= 2:
                    secondTreeGraph = graph.subgraph(trees[1])
                    statsArray[self.secondTreeSizeIndex] = len(trees[1])
                    statsArray[self.secondTreeDepthIndex] = GraphUtils.treeDepth(secondTreeGraph)

        return statsArray
    def scalarStatistics(self, graph, slowStats=True, treeStats=False):
        """
        Find a series of statistics for the given input graph which can be represented
        as scalar values. Return results as a vector.
        """
        if graph.is_directed(): 
           raise ValueError("Only works on undirected graphs")     
        
        #This method is a bit of a mess 
        Parameter.checkBoolean(slowStats)
        Parameter.checkBoolean(treeStats)
        
        statsArray = numpy.ones(self.numStats)*-1
        statsArray[self.numVerticesIndex] = graph.vcount()
        statsArray[self.numEdgesIndex] = graph.ecount()
        statsArray[self.numDirEdgesIndex] = graph.as_directed().ecount()
        statsArray[self.densityIndex] = graph.density()

        logging.debug("Finding connected components")
        subComponents = graph.components()
        logging.debug("Done")
        statsArray[self.numComponentsIndex] = len(subComponents)
        
        nonSingletonSubComponents = [c for c in subComponents if len(c) > 1]
        statsArray[self.numNonSingletonComponentsIndex] = len(nonSingletonSubComponents)

        triOrMoreSubComponents = [c for c in subComponents if len(c) > 2]
        statsArray[self.numTriOrMoreComponentsIndex] = len(triOrMoreSubComponents)
        
        componentSizes =  numpy.array([len(c) for c in subComponents])
        inds = numpy.flipud(numpy.argsort(componentSizes))

        logging.debug("Studying max component")
        if len(subComponents) != 0:
            maxCompGraph = graph.subgraph(subComponents[inds[0]])
            statsArray[self.maxComponentSizeIndex] = len(subComponents[inds[0]])

            if len(subComponents) >= 2:
                statsArray[self.secondComponentSizeIndex] = len(subComponents[inds[1]])

            statsArray[self.maxComponentEdgesIndex] = maxCompGraph.ecount()
            statsArray[self.meanComponentSizeIndex] = componentSizes.mean()
            statsArray[self.maxCompMeanDegreeIndex] = numpy.mean(maxCompGraph.degree(mode=igraph.OUT))
        else:
            statsArray[self.maxComponentSizeIndex] = 0
            statsArray[self.maxComponentEdgesIndex] = 0 
            statsArray[self.meanComponentSizeIndex] = 0
            statsArray[self.geodesicDistMaxCompIndex] = 0

        if graph.vcount() != 0:
            statsArray[self.meanDegreeIndex] = numpy.mean(graph.degree(mode=igraph.OUT))
        else:
            statsArray[self.meanDegreeIndex] = 0
            
        if slowStats:
            logging.debug("Computing diameter")
            statsArray[self.diameterIndex] = graph.diameter()
            #statsArray[self.effectiveDiameterIndex] = graph.effectiveDiameter(self.q, P=P)
            #statsArray[self.powerLawIndex] = graph.fitPowerLaw()[0]
            logging.debug("Computing geodesic distance")
            statsArray[self.geodesicDistanceIndex] = graph.average_path_length()

            if len(subComponents) != 0:
                statsArray[self.geodesicDistMaxCompIndex] = graph.average_path_length(P=P, vertexInds=list(subComponents[inds[0]]))

        return statsArray