Exemple #1
0
    def testGetAllEdgeIndices(self):
        graph = DictGraph()
        graph.addEdge("a", "b")
        graph.addEdge("a", "c")
        graph.addEdge("a", "d")
        graph.addEdge("d", "e")

        edgeIndices = graph.getAllEdgeIndices() 
        keys = graph.getAllVertexIds() 

        self.assertEquals(edgeIndices.shape[0], graph.getNumEdges())
        for i in range(edgeIndices.shape[0]):
            self.assertTrue(graph.getEdge(keys[int(edgeIndices[i, 0])], keys[edgeIndices[i, 1]]) == 1)

        graph = DictGraph(False)
        graph.addEdge("a", "b")
        graph.addEdge("b", "a")
        graph.addEdge("a", "c")
        graph.addEdge("a", "d")
        graph.addEdge("d", "e")

        edgeIndices = graph.getAllEdgeIndices() 
        keys = graph.getAllVertexIds()
        self.assertEquals(edgeIndices.shape[0], graph.getNumEdges())
        for i in range(edgeIndices.shape[0]):
            self.assertTrue(graph.getEdge(keys[int(edgeIndices[i, 0])], keys[edgeIndices[i, 1]]) == 1)
Exemple #2
0
    def testRemoveVertex(self):
        graph = DictGraph()
        graph.addEdge(0, 1)
        graph.addEdge(0, 2)
        graph.addEdge(0, 3)
        graph.addEdge(1, 2)
        graph.addEdge(2, 3)
        graph.addEdge(3, 4)

        graph.removeVertex(4)
        self.assertFalse(graph.vertexExists(4))
        self.assertFalse(graph.edgeExists(3, 4))
        
        graph.removeVertex(3)
        self.assertFalse(graph.vertexExists(3))
        self.assertFalse(graph.edgeExists(2, 3))
        self.assertFalse(graph.edgeExists(0, 3))
            
        graph.removeVertex(2)
        self.assertFalse(graph.vertexExists(2))
        self.assertFalse(graph.edgeExists(1, 2))
        self.assertFalse(graph.edgeExists(0, 2))
        
        self.assertTrue(graph.getAllVertexIds() == [0, 1])
        self.assertTrue(graph.getAllEdges() == [(0, 1)])
        
        #Try directed graph 
        graph = DictGraph(False)
        graph.addEdge(0, 1)
        graph.addEdge(1, 0)
        graph.addEdge(0, 3)
        graph.addEdge(1, 2)
        graph.addEdge(2, 3)
        graph.addEdge(3, 4)
        
        graph.removeVertex(0)

        self.assertFalse(graph.vertexExists(0))
        self.assertFalse(graph.edgeExists(0, 1))
        self.assertFalse(graph.edgeExists(0, 3))
        self.assertFalse(graph.edgeExists(1, 0))
        
        graph.removeVertex(2)
        self.assertFalse(graph.vertexExists(2))
        self.assertFalse(graph.edgeExists(1, 2))
        self.assertFalse(graph.edgeExists(2, 3))

        self.assertTrue(graph.getAllVertexIds() == [1, 3, 4])
        self.assertTrue(graph.getAllEdges() == [(3, 4)])
Exemple #3
0
    def testGetWeightMatrix(self):
        graph = DictGraph()
        graph.addEdge("a", "b")
        graph.addEdge("a", "c")
        graph.addEdge("a", "d")
        graph.addEdge("d", "e")

        W = graph.getWeightMatrix()
        keys = graph.getAllVertexIds()

        for i in range(len(keys)):
            for j in range(len(keys)):
                if W[i, j] == 1:
                    self.assertEquals(graph.getEdge(keys[i], keys[j]), 1)
                else:
                    self.assertEquals(graph.getEdge(keys[i], keys[j]), None)

        #Try a directed graph
        graph = DictGraph(False)
        graph.addEdge("a", "b")
        graph.addEdge("a", "c")
        graph.addEdge("a", "d")
        graph.addEdge("d", "e")

        W = graph.getWeightMatrix()

        for i in range(len(keys)):
            for j in range(len(keys)):
                if W[i, j] == 1:
                    self.assertEquals(graph.getEdge(keys[i], keys[j]), 1)
                else:
                    self.assertEquals(graph.getEdge(keys[i], keys[j]), None)
Exemple #4
0
    def testGetAllVertexIds(self):
        dictGraph = DictGraph(True)
        dictGraph.addEdge(1, 2, 12)
        dictGraph.addEdge(1, 3, 18)
        dictGraph.setVertex(5, 12)

        self.assertEquals(dictGraph.getAllVertexIds(), [1, 2, 3, 5])
Exemple #5
0
    def testSetVertices(self):
        graph = DictGraph()

        vertexIndices = [1, 2, 3]
        vertices = ["a", "b", "c"]

        graph.setVertices(vertexIndices, vertices)

        vertexIndices2 = graph.getAllVertexIds()
        vertices2 = graph.getVertices(vertexIndices2)

        self.assertEquals(vertexIndices, vertexIndices2)
        self.assertEquals(vertices, vertices2)
Exemple #6
0
    def testDijkstrasAlgorithm(self):
        graph = DictGraph()

        graph.addEdge(0, 1, 1)
        graph.addEdge(1, 2, 1)
        graph.addEdge(1, 3, 1)
        graph.addEdge(2, 4, 1)
        graph.setVertex(4, 1)

        self.assertTrue((graph.dijkstrasAlgorithm(0) == numpy.array([0, 1, 2, 2, 3])).all())
        self.assertTrue((graph.dijkstrasAlgorithm(1) == numpy.array([1, 0, 1, 1, 2])).all())
        self.assertTrue((graph.dijkstrasAlgorithm(2) == numpy.array([2, 1, 0, 2, 1])).all())
        self.assertTrue((graph.dijkstrasAlgorithm(3) == numpy.array([2, 1, 2, 0, 3])).all())
        self.assertTrue((graph.dijkstrasAlgorithm(4) == numpy.array([3, 2, 1, 3, 0])).all())

        
        #Test a graph which has an isolated node
        graph = DictGraph()
        graph.setVertex(5, 1)

        graph.addEdge(0, 1, 1)
        graph.addEdge(1, 2, 1)
        graph.addEdge(1, 3, 1)

        self.assertTrue((graph.dijkstrasAlgorithm(0) == numpy.array([0, 1, 2, 2, numpy.inf])).all())

        #Test a graph in a ring
        graph = DictGraph()

        graph.addEdge(0, 1, 1)
        graph.addEdge(1, 2, 1)
        graph.addEdge(2, 3, 1)
        graph.addEdge(3, 4, 1)
        graph.addEdge(4, 0, 1)

        self.assertTrue((graph.dijkstrasAlgorithm(0) == numpy.array([0, 1, 2, 2, 1])).all())
        
        #Try case in which vertex ids are not numbers 
        graph = DictGraph()

        graph.addEdge("a", "b", 1)
        graph.addEdge("b", "c", 1)
        graph.addEdge("b", "d", 1)
        graph.addEdge("c", "e", 1)

        inds = Util.argsort(graph.getAllVertexIds())
        self.assertTrue((graph.dijkstrasAlgorithm("a")[inds] == numpy.array([0, 1, 2, 2, 3])).all())
        self.assertTrue((graph.dijkstrasAlgorithm("b")[inds] == numpy.array([1, 0, 1, 1, 2])).all())
        self.assertTrue((graph.dijkstrasAlgorithm("c")[inds] == numpy.array([2, 1, 0, 2, 1])).all())
        self.assertTrue((graph.dijkstrasAlgorithm("d")[inds] == numpy.array([2, 1, 2, 0, 3])).all())
        self.assertTrue((graph.dijkstrasAlgorithm("e")[inds] == numpy.array([3, 2, 1, 3, 0])).all())
Exemple #7
0
 def testDegreeSequence(self): 
     graph = DictGraph() 
     graph.setVertex("a", 10)
     graph["b", "c"] = 1
     graph["b", "d"] = 1
     graph["d", "e"] = 1
     graph["e", "e"] = 1
             
     degreeDict = {}
     degreeDict2 = {"a": 0, "b": 2, "c": 1, "d": 2, "e": 3}
     
     for i, id in enumerate(graph.getAllVertexIds()): 
         degreeDict[id] = graph.degreeSequence()[i]
         
     self.assertEquals(degreeDict, degreeDict2)
Exemple #8
0
    def testAdjacencyList(self): 
        graph = DictGraph()
        graph.addEdge("a", "b", 1)
        graph.addEdge("b", "c", 1)
        graph.addEdge("b", "d", 1)
        graph.addEdge("c", "e", 1)
        graph.setVertex("f", 1)
 
        neighbourIndices, neighbourWeights = graph.adjacencyList()   
 
        vertexIds = graph.getAllVertexIds()

        for i in range(len(neighbourIndices)): 
            for k, j in enumerate(neighbourIndices[i]): 
                self.assertTrue(graph.edgeExists(vertexIds[i], vertexIds[j]))  
                self.assertEquals(graph[vertexIds[i], vertexIds[j]], neighbourWeights[i][k])
Exemple #9
0
    def testGetSparseWeightMatrix(self):
        graph = DictGraph()
        graph.addEdge("a", "b")
        graph.addEdge("a", "c")
        graph.addEdge("a", "d", "blah")
        graph.addEdge("d", "e", -1.1)
        graph.addEdge("c", "b", 2)

        W = graph.getSparseWeightMatrix()
        keys = graph.getAllVertexIds()
        
        for i in range(len(keys)):
            for j in range(len(keys)):
                if graph.edgeExists(keys[i], keys[j]) and not isinstance(graph.getEdge(keys[i], keys[j]), numbers.Number): 
                    self.assertEquals(1, W[i, j])
                elif W[i, j] != 0:
                    self.assertEquals(graph.getEdge(keys[i], keys[j]), W[i, j])
                else:
                    self.assertEquals(graph.getEdge(keys[i], keys[j]), None)

        #Try a directed graph
        graph = DictGraph(False)
        graph.addEdge("a", "b")
        graph.addEdge("a", "c", "test")
        graph.addEdge("a", "d")
        graph.addEdge("d", "e")
        graph.addEdge("c", "a", 0.1)

        W = graph.getSparseWeightMatrix()

        for i in range(len(keys)):
            for j in range(len(keys)):
                if graph.edgeExists(keys[i], keys[j]) and not isinstance(graph.getEdge(keys[i], keys[j]), numbers.Number): 
                    self.assertEquals(1, W[i, j])
                elif W[i, j] != 0:
                    self.assertEquals(graph.getEdge(keys[i], keys[j]), W[i, j])
                else:
                    self.assertEquals(graph.getEdge(keys[i], keys[j]), None)
    def __init__(self, minGraphSize=500, maxGraphSize=None, dayStep=30):
        
        dataDir = PathDefaults.getDataDir() + "cluster/"
        edgesFilename = dataDir + "Cit-HepTh.txt"
        dateFilename = dataDir + "Cit-HepTh-dates.txt"

        #Note the IDs are integers but can start with zero so we prefix "1" to each ID 
        edges = []
        file = open(edgesFilename, 'r')
        file.readline()
        file.readline()
        file.readline()
        file.readline()

        for line in file:
            (vertex1, sep, vertex2) = line.partition("\t")
            vertex1 = vertex1.strip()
            vertex2 = vertex2.strip()
            edges.append([vertex1, vertex2])
            
            #if vertex1 == vertex2: 
            #    print(vertex1)

        file.close()

        logging.info("Loaded edge file " + str(edgesFilename) + " with " + str(len(edges)) + " edges")

        #Keep an edge graph 
        graph = DictGraph(False)
        graph.addEdges(edges)
        logging.info("Created directed citation graph with " + str(graph.getNumEdges()) + " edges and " + str(graph.getNumVertices()) + " vertices")

        #Read in the dates articles appear in a dict which used the year and month
        #as the key and the value is a list of vertex ids. For each month we include
        #all papers uploaded that month and those directed cited by those uploads. 
        startDate = datetime.date(1990, 1, 1)

        file = open(dateFilename, 'r')
        file.readline()
        numLines = 0 
        subgraphIds = []

        for line in file:
            (id, sep, date) = line.partition("\t")
            id = id.strip()
            date = date.strip()
            

            inputDate = datetime.datetime.strptime(date.strip(), "%Y-%m-%d")
            inputDate = inputDate.date()

            if graph.vertexExists(id):
                tDelta = inputDate - startDate
                            
                graph.vertices[id] = tDelta.days 
                subgraphIds.append(id)
                
                #If a paper cites another, it must have been written before 
                #the citing paper - enforce this rule. 
                for neighbour in graph.neighbours(id): 
                    if graph.getVertex(neighbour) == None: 
                        graph.setVertex(neighbour, tDelta.days) 
                        subgraphIds.append(neighbour)
                    elif tDelta.days < graph.getVertex(neighbour): 
                        graph.setVertex(neighbour, tDelta.days) 
                        
            numLines += 1 
            
        file.close()
        
        subgraphIds = set(subgraphIds)
        graph = graph.subgraph(list(subgraphIds))
        logging.debug(graph)
        logging.info("Loaded date file " + str(dateFilename) + " with " + str(len(subgraphIds)) + " dates and " + str(numLines) + " lines")

        W = graph.getSparseWeightMatrix()
        W = W + W.T
        
        vList = VertexList(W.shape[0], 1)
        vList.setVertices(numpy.array([graph.getVertices(graph.getAllVertexIds())]).T)
        
        #Note: we have 16 self edges and some two-way citations so this graph has fewer edges than the directed one 
        self.graph = SparseGraph(vList, W=W)
        logging.debug(self.graph)
        
        #Now pick the max component 
        components = self.graph.findConnectedComponents()
        self.graph = self.graph.subgraph(components[0])
        
        logging.debug("Largest component graph: " + str(self.graph))
        
        self.minGraphSize = minGraphSize
        self.maxGraphSize = maxGraphSize 
        self.dayStep = dayStep 
    def testGetIterator(self):
        generator = CitationIterGenerator()
        iterator = generator.getIterator()

        lastW = iterator.next()

        for W in iterator:
            self.assertTrue((W-W.T).getnnz() == 0)
            self.assertTrue((lastW - W[0:lastW.shape[0], 0:lastW.shape[0]]).getnnz() ==0  )
            lastW = W

        numVertices = W.shape[0]

        #Now compute the vertexIds manually:
        dataDir = PathDefaults.getDataDir() + "cluster/"
        edgesFilename = dataDir + "Cit-HepTh.txt"
        dateFilename = dataDir + "Cit-HepTh-dates.txt"

        #We can't load in numbers using numpy since some may start with zero 
        edges = []
        file = open(edgesFilename, 'r')
        file.readline()
        file.readline()
        file.readline()
        file.readline()

        for line in file:
            (vertex1, sep, vertex2) = line.partition("\t")
            vertex1 = vertex1.strip()
            vertex2 = vertex2.strip()
            edges.append([int("1" + vertex1), int("1" + vertex2)])

        edges = numpy.array(edges, numpy.int)

        #Check file read correctly
        self.assertTrue((edges[0, :] == numpy.array([11001, 19304045])).all())
        self.assertTrue((edges[1, :] == numpy.array([11001, 19308122])).all())
        self.assertTrue((edges[9, :] == numpy.array([11001, 19503124])).all())
        vertexIds1 = numpy.unique(edges)
        logging.info("Number of graph vertices: " + str(vertexIds1.shape[0]))

        file = open(dateFilename, 'r')
        file.readline()
        vertexIds2 = []

        for line in file:
            (id, sep, date) = line.partition("\t")
            id = id.strip()
            date = date.strip()
            vertexIds2.append(int("1" + id))

        #Check file read correctly 
        vertexIds2 = numpy.array(vertexIds2, numpy.int)
        self.assertTrue((vertexIds2[0:10] == numpy.array([19203201, 19203202, 19203203, 19203204, 19203205, 19203206, 19203207, 19203208, 19203209, 19203210], numpy.int)).all())
        vertexIds2 = numpy.unique(numpy.array(vertexIds2, numpy.int))

        graph = DictGraph(False)
        graph.addEdges(edges)

        #Find the set of vertices with known citation
        vertices = []
        vertexId2Set = set(vertexIds2.tolist())
        for i in graph.getAllVertexIds():
            Util.printIteration(i, 50000, edges.shape[0])
            if i in vertexId2Set:
                vertices.append(i)
                vertices.extend(graph.neighbours(i))

        logging.debug("Number of final vertices: " + str(numVertices))
        numVertices2 = numpy.unique(numpy.array(vertices)).shape[0]
        self.assertEquals(numVertices, numVertices2)

        #Now compare the weight matrices using the undirected graph
        #Note the order of vertices is different from the iterator 
        graph = DictGraph()
        graph.addEdges(edges)
        subgraph = graph.subgraph(numpy.unique(numpy.array(vertices)))
        W2 = subgraph.getSparseWeightMatrix()

        self.assertEquals(W.getnnz(), W2.getnnz())