Exemple #1
0
    def testSeparatePapersAuthorsTopicSharedConferenceGraph(self):

        # Build sample data & expected output
        parsedData = {
            0: {
                'id': 0,
                'arnetid': 1,
                'authors': ['Author One'],
                'conference': 'Conference One',
                'references': [],
                'title': 'Databases',
                'year': 1995
            },
            1: {
                'id': 1,
                'arnetid': 2,
                'authors': ['Author Two'],
                'conference': 'Conference One',
                'references': [],
                'title': 'All The Knowledge',
                'year': 1999
            }
        }

        expectedGraph = GraphFactory.createInstance()

        # Expect unspecified ids to auto-increment
        author1 = Author(0, 'Author One')
        author2 = Author(1, 'Author Two')
        paper1 = Paper(0, 'Databases')
        paper2 = Paper(1, 'All The Knowledge')
        topic1 = Topic(0, ['databas'])
        topic2 = Topic(1, ['knowledg'])
        conference = Conference(0, 'Conference One')
        expectedGraph.addNode(author1)
        expectedGraph.addNode(author2)
        expectedGraph.addNode(paper1)
        expectedGraph.addNode(paper2)
        expectedGraph.addNode(topic1)
        expectedGraph.addNode(topic2)
        expectedGraph.addNode(conference)

        expectedGraph.addBothEdges(author1, paper1, Authorship())
        expectedGraph.addBothEdges(author2, paper2, Authorship())
        expectedGraph.addBothEdges(paper1, topic1, Mention())
        expectedGraph.addBothEdges(paper2, topic2, Mention())
        expectedGraph.addBothEdges(paper1, conference, Publication())
        expectedGraph.addBothEdges(paper2, conference, Publication())

        actualGraph = self.dataImporter.buildGraph(parsedData)

        self.assertGraphsEqual(actualGraph, expectedGraph)
Exemple #2
0
    def testCoAuthorsGraph(self):
        """
          Sample (simple) scenario as the first case, except that three authors exist, and two of them are co-authors.
        """

        # Build sample data & expected output
        parsedData = {
            0: {
                'id': 0,
                'arnetid': 1,
                'authors': ['Author One', 'Author Three'],
                'conference': 'Conference One',
                'references': [],
                'title': 'Databases',
                'year': 1995
            },
            1: {
                'id': 1,
                'arnetid': 2,
                'authors': ['Author Two'],
                'conference': 'Conference Two',
                'references': [],
                'title': 'Databases',
                'year': 1999
            }
        }

        expectedGraph = GraphFactory.createInstance()

        # Expect unspecified ids to auto-increment
        author1 = Author(0, 'Author One')
        author2 = Author(2, 'Author Two')
        author3 = Author(1, 'Author Three')
        paper1 = Paper(0, 'Databases')
        paper2 = Paper(1, 'Databases')
        topic = Topic(0, ['databas'])
        conference1 = Conference(0, 'Conference One')
        conference2 = Conference(1, 'Conference Two')
        expectedGraph.addNode(author1)
        expectedGraph.addNode(author2)
        expectedGraph.addNode(author3)
        expectedGraph.addNode(paper1)
        expectedGraph.addNode(paper2)
        expectedGraph.addNode(topic)
        expectedGraph.addNode(conference1)
        expectedGraph.addNode(conference2)

        expectedGraph.addBothEdges(author1, paper1, Authorship())
        expectedGraph.addBothEdges(author3, paper1, Authorship())
        expectedGraph.addBothEdges(author2, paper2, Authorship())
        expectedGraph.addBothEdges(paper1, topic, Mention())
        expectedGraph.addBothEdges(paper2, topic, Mention())
        expectedGraph.addBothEdges(paper1, conference1, Publication())
        expectedGraph.addBothEdges(paper2, conference2, Publication())

        actualGraph = self.dataImporter.buildGraph(parsedData)

        self.assertGraphsEqual(actualGraph, expectedGraph)
Exemple #3
0
    def testMutualCitationGraph(self):

        # Build sample data & expected output
        parsedData = {
            0: {
                'id': 0,
                'arnetid': 1,
                'authors': ['Author One'],
                'conference': 'Conference One',
                'references': [1],
                'title': 'Databases',
                'year': 1999
            },
            1: {
                'id': 1,
                'arnetid': 2,
                'authors': ['Author Two'],
                'conference': 'Conference Two',
                'references': [0],
                'title': 'Databases',
                'year': 1999
            }
        }

        expectedGraph = GraphFactory.createInstance()

        # Expect unspecified ids to auto-increment
        author1 = Author(0, 'Author One')
        author2 = Author(1, 'Author Two')
        paper1 = Paper(0, 'Databases')
        paper2 = Paper(1, 'Databases')
        topic = Topic(0, ['databas'])
        conference1 = Conference(0, 'Conference One')
        conference2 = Conference(1, 'Conference Two')
        expectedGraph.addNode(author1)
        expectedGraph.addNode(author2)
        expectedGraph.addNode(paper1)
        expectedGraph.addNode(paper2)
        expectedGraph.addNode(topic)
        expectedGraph.addNode(conference1)
        expectedGraph.addNode(conference2)

        expectedGraph.addBothEdges(author1, paper1, Authorship())
        expectedGraph.addBothEdges(author2, paper2, Authorship())
        expectedGraph.addBothEdges(paper1, topic, Mention())
        expectedGraph.addBothEdges(paper2, topic, Mention())
        expectedGraph.addBothEdges(paper1, conference1, Publication())
        expectedGraph.addBothEdges(paper2, conference2, Publication())

        # Symmetric in this case only!
        expectedGraph.addBothEdges(paper1, paper2, Citation())

        actualGraph = self.dataImporter.buildGraph(parsedData)

        self.assertGraphsEqual(actualGraph, expectedGraph)
 def termLineParser(line):
     topicId, term = line.split()
     topicId = int(self.__removeControlCharacters(topicId))
     term = self.stemmer.stemWord(term)
     if term in stemmedTermMap:
         topic = stemmedTermMap[term]
     else:
         topic = Topic(topicId, [term]) if term not in self.stopWords else None
         if topic is not None:
             stemmedTermMap[term] = topic
     return topicId, topic
    def buildGraph(self, parsedData):
        """
          Form the DBLP graph structure from the parsed data
        """

        graph = GraphFactory.createInstance()

        # First, build the nodes for the graph
        authors = {}  # Indexed by name
        papers = {}  # Indexed by paper id
        topics = {}  # Indexed by keyword
        conferences = {}  # Indexed by name
        citationMap = {}  # Map of paper id to referenced paper ids

        # Construct everything except reference edges
        for paperId in parsedData:
            paperData = parsedData[paperId]

            paper = Paper(paperId, paperData['title'])
            citationMap[paperId] = paperData['references']

            # Create or get conference for this paper
            conferenceName = paperData['conference']
            if conferenceName not in conferences:
                conference = Conference(len(conferences), conferenceName)
                conferences[conferenceName] = conference
                graph.addNode(conference)
            else:
                conference = conferences[conferenceName]

            # Create or get authors for this paper
            paperAuthors = []
            for authorName in paperData['authors']:
                if authorName not in authors:
                    author = Author(len(authors), authorName)
                    authors[authorName] = author
                    graph.addNode(author)
                else:
                    author = authors[authorName]
                paperAuthors.append(author)

            # Extract keywords from title, and use as topics
            keywords = self.__extractKeywords(paperData['title'])
            for keyword in keywords:
                if keyword not in topics:
                    topic = Topic(len(topics), [keyword])
                    topics[keyword] = topic
                    graph.addNode(topic)
                else:
                    topic = topics[keyword]
                graph.addEdge(topic, paper, Mention())
                graph.addEdge(paper, topic, Mention())

            # Add new paper to the graph
            papers[paperId] = paper
            graph.addNode(paper)

            # Add corresponding edges in the graph
            for author in paperAuthors:
                graph.addEdge(paper, author, Authorship())
                graph.addEdge(author, paper, Authorship())
            graph.addEdge(paper, conference, Publication())
            graph.addEdge(conference, paper, Publication())

        # Add citations to the graph
        for paperId in citationMap:
            references = citationMap[paperId]
            paper = papers[paperId]
            for citedPaperId in references:
                citedPaper = papers[citedPaperId]
                graph.addEdge(paper, citedPaper, Citation())

        return graph