def __init__(self,
                 graph,
                 metaPath=None,
                 symmetric=False,
                 conserveMemory=False):
        """
          Constructs a meta path similarity strategy, storing the meta path data for this strategy.

            @param  metaPath    Meta path object where the list of classes contains classes of nodes in the graph,
                                and weights in [0,1] containing the importance of the meta path

            @param  symmetric   Whether or not to enforce that meta paths must be symmetric

            For example, if 'symmetric' and 'evenLength' are both 'true', for meta path 'ABC', we will only count meta
            path 'ABCCBA', depending on, and if 'symmetric' is 'true' while 'evenLength' is 'false', we will only count
            meta paths 'ABCBA'
        """

        super(MetaPathSimilarityStrategy, self).__init__(graph)

        self.metaPath = metaPath
        self.symmetric = symmetric
        self.conserveMemory = conserveMemory

        self.metaPathUtility = EdgeBasedMetaPathUtility()
class MetaPathSimilarityStrategy(SimilarityStrategy):
    """
      Generic class for similarity strategies that use meta paths
    """

    def __init__(self, graph, metaPath = None, symmetric = False, conserveMemory = False):
        """
          Constructs a meta path similarity strategy, storing the meta path data for this strategy.

            @param  metaPath    Meta path object where the list of classes contains classes of nodes in the graph,
                                and weights in [0,1] containing the importance of the meta path

            @param  symmetric   Whether or not to enforce that meta paths must be symmetric

            For example, if 'symmetric' and 'evenLength' are both 'true', for meta path 'ABC', we will only count meta
            path 'ABCCBA', depending on, and if 'symmetric' is 'true' while 'evenLength' is 'false', we will only count
            meta paths 'ABCBA'
        """

        super(MetaPathSimilarityStrategy, self).__init__(graph)

        self.metaPath = metaPath
        self.symmetric = symmetric
        self.conserveMemory = conserveMemory

        self.metaPathUtility = EdgeBasedMetaPathUtility()


    def findMostSimilarNodes(self, source, number=None, conserveMemory=False):
        """
          Simple find the similarity scores between this node and all reachable nodes on this meta path. Note that if
          there are fewer reachable nodes than "number", the number of reachable nodes will be returned.
        """

        # If no number is provided, default to the number of nodes in the graph
        if number is None:
            number = self.n

        # Get similarity scores for all entries
        reachableNodes = self.metaPathUtility.findMetaPathNeighbors(self.graph, source, self.metaPath)
        for reachableNode in reachableNodes:
            self.similarityScores[source][reachableNode] = self.findSimilarityScore(source, reachableNode)

        # Sort by increasing score
        mostSimilarNodes = sorted(self.similarityScores[source].iteritems(), key=operator.itemgetter(1))

        # Remove source, nodes of different types, and reverse
        newMostSimilarNodes = []
        for node, score in mostSimilarNodes:
            if node != source and node.__class__ == source.__class__:
                newMostSimilarNodes.append(node)
        newMostSimilarNodes.reverse()
        number = min([number, len(newMostSimilarNodes)])
        mostSimilarNodes = newMostSimilarNodes[:number]

        return mostSimilarNodes
    def __init__(self, graph, metaPath = None, symmetric = False, conserveMemory = False):
        """
          Constructs a meta path similarity strategy, storing the meta path data for this strategy.

            @param  metaPath    Meta path object where the list of classes contains classes of nodes in the graph,
                                and weights in [0,1] containing the importance of the meta path

            @param  symmetric   Whether or not to enforce that meta paths must be symmetric

            For example, if 'symmetric' and 'evenLength' are both 'true', for meta path 'ABC', we will only count meta
            path 'ABCCBA', depending on, and if 'symmetric' is 'true' while 'evenLength' is 'false', we will only count
            meta paths 'ABCBA'
        """

        super(MetaPathSimilarityStrategy, self).__init__(graph)

        self.metaPath = metaPath
        self.symmetric = symmetric
        self.conserveMemory = conserveMemory

        self.metaPathUtility = EdgeBasedMetaPathUtility()
    def run(self):

        self.graph, authorMap, conference, citationsPublications = SampleGraphUtility.constructSkewedCitationPublicationExample(
            introduceRandomness=False
        )

        # Get the nodes we care about
        authors = [
            authorMap["Alice"],
            authorMap["Bob"],
            authorMap["Carol"],
            authorMap["Dave"],
            authorMap["Ed"],
            authorMap["Frank"],
        ]
        metaPathUtility = EdgeBasedMetaPathUtility()

        # Output adjacency matrices
        self.output("\nCPA Adjacency Matrix:")
        cpaadjMatrix, nodesIndex = metaPathUtility.getAdjacencyMatrixFromGraph(
            self.graph, [Conference, Paper, Author], project=True
        )
        adjMatrixTable = texttable.Texttable()
        rows = [["Conference"] + [author.name for author in authors]]
        rows += [[conference.name] + [cpaadjMatrix[nodesIndex[conference]][nodesIndex[author]] for author in authors]]
        adjMatrixTable.add_rows(rows)
        self.output(adjMatrixTable.draw())

        self.output("\nCPPA Adjacency Matrix:")
        cpaadjMatrix, nodesIndex = metaPathUtility.getAdjacencyMatrixFromGraph(
            self.graph, [Conference, Paper, Paper, Author], project=True
        )
        adjMatrixTable = texttable.Texttable()
        rows = [["Conference"] + [author.name for author in authors]]
        rows += [[conference.name] + [cpaadjMatrix[nodesIndex[conference]][nodesIndex[author]] for author in authors]]
        adjMatrixTable.add_rows(rows)
        self.output(adjMatrixTable.draw())

        # Total citation & publication counts
        self.output("\nCitation & Publication Counts")
        adjMatrixTable = texttable.Texttable()
        rows = [["Measure"] + [author.name for author in authors]]
        rows += [["Citations"] + [citationsPublications[author][0] for author in authors]]
        rows += [["Publications"] + [citationsPublications[author][1] for author in authors]]
        adjMatrixTable.add_rows(rows)
        self.output(adjMatrixTable.draw())

        # Output NeighborSim & PathSim similarity scores
        neighborSimStrategy = NeighborSimStrategy(self.graph, [Conference, Paper, Author], symmetric=True)
        self.outputSimilarityScores(authorMap, authors, neighborSimStrategy, "APCPA PathSim")
        neighborSimStrategy = NeighborSimStrategy(self.graph, [Conference, Paper, Paper, Author])
        self.outputSimilarityScores(authorMap, authors, neighborSimStrategy, "APPCPPA PathSim")

        # Omit extra duplicate entry in path, and weight at different levels of 'relative'
        for strategy, generalStrategyTitle in [
            (FlattenedMatrixStrategy, "FlatMat"),
            (VectorProductStrategy, "VectorProduct"),
        ]:
            for w in [1.0, 0.5, 0]:
                neighborPathShapeStrategy = strategy(
                    self.graph, weight=w, omit=[], metaPath=[Conference, Paper, Paper, Author], symmetric=True
                )
                strategyTitle = "APPCPPA %s ShapeSim (%1.2f weight)" % (generalStrategyTitle, w)
                self.outputSimilarityScores(authorMap, authors, neighborPathShapeStrategy, strategyTitle)
        w = 1.0
        neighborPathShapeStrategy = VectorProductStrategy(
            self.graph, weight=w, omit=[0], metaPath=[Conference, Paper, Paper, Author], symmetric=True
        )
        strategyTitle = "APPCPPA VectorProduct ShapeSim omitting CPC (%1.2f weight)" % w
        self.outputSimilarityScores(authorMap, authors, neighborPathShapeStrategy, strategyTitle)

        # Output recursive pathsim strategy score(s)
        recursivePathSimStrategy = RecursivePathSimStrategy(self.graph, [Conference, Paper, Paper, Author])
        self.outputSimilarityScores(authorMap, authors, recursivePathSimStrategy, "APPCPPA Recursive PathSim")
class MetaPathSimilarityStrategy(SimilarityStrategy):
    """
      Generic class for similarity strategies that use meta paths
    """
    def __init__(self,
                 graph,
                 metaPath=None,
                 symmetric=False,
                 conserveMemory=False):
        """
          Constructs a meta path similarity strategy, storing the meta path data for this strategy.

            @param  metaPath    Meta path object where the list of classes contains classes of nodes in the graph,
                                and weights in [0,1] containing the importance of the meta path

            @param  symmetric   Whether or not to enforce that meta paths must be symmetric

            For example, if 'symmetric' and 'evenLength' are both 'true', for meta path 'ABC', we will only count meta
            path 'ABCCBA', depending on, and if 'symmetric' is 'true' while 'evenLength' is 'false', we will only count
            meta paths 'ABCBA'
        """

        super(MetaPathSimilarityStrategy, self).__init__(graph)

        self.metaPath = metaPath
        self.symmetric = symmetric
        self.conserveMemory = conserveMemory

        self.metaPathUtility = EdgeBasedMetaPathUtility()

    def findMostSimilarNodes(self, source, number=None, conserveMemory=False):
        """
          Simple find the similarity scores between this node and all reachable nodes on this meta path. Note that if
          there are fewer reachable nodes than "number", the number of reachable nodes will be returned.
        """

        # If no number is provided, default to the number of nodes in the graph
        if number is None:
            number = self.n

        # Get similarity scores for all entries
        reachableNodes = self.metaPathUtility.findMetaPathNeighbors(
            self.graph, source, self.metaPath)
        for reachableNode in reachableNodes:
            self.similarityScores[source][
                reachableNode] = self.findSimilarityScore(
                    source, reachableNode)

        # Sort by increasing score
        mostSimilarNodes = sorted(self.similarityScores[source].iteritems(),
                                  key=operator.itemgetter(1))

        # Remove source, nodes of different types, and reverse
        newMostSimilarNodes = []
        for node, score in mostSimilarNodes:
            if node != source and node.__class__ == source.__class__:
                newMostSimilarNodes.append(node)
        newMostSimilarNodes.reverse()
        number = min([number, len(newMostSimilarNodes)])
        mostSimilarNodes = newMostSimilarNodes[:number]

        return mostSimilarNodes
예제 #6
0
 def _getImplementation(self):
     return EdgeBasedMetaPathUtility()
    def run(self):

        citationMap = {
            'Mike': {
                'Mike': 0,
                'Jim': 0,
                'Mary': 0,
                'Bob': 0,
                'Ann': 0,
                'Joe': 0,
                'Nancy': 0
            },
            'Jim': {
                'Mike': 20,
                'Jim': 0,
                'Mary': 20,
                'Bob': 20,
                'Ann': 0,
                'Joe': 20,
                'Nancy': 0
            },
            'Mary': {
                'Mike': 1,
                'Jim': 10,
                'Mary': 0,
                'Bob': 1,
                'Ann': 0,
                'Joe': 1,
                'Nancy': 0
            },
            'Bob': {
                'Mike': 1,
                'Jim': 10,
                'Mary': 1,
                'Bob': 0,
                'Ann': 0,
                'Joe': 1,
                'Nancy': 0
            },
            'Ann': {
                'Mike': 0,
                'Jim': 0,
                'Mary': 0,
                'Bob': 0,
                'Ann': 0,
                'Joe': 0,
                'Nancy': 0
            },
            'Joe': {
                'Mike': 0,
                'Jim': 0,
                'Mary': 0,
                'Bob': 0,
                'Ann': 0,
                'Joe': 0,
                'Nancy': 0
            },
            'Nancy': {
                'Mike': 1,
                'Jim': 10,
                'Mary': 1,
                'Bob': 1,
                'Ann': 0,
                'Joe': 1,
                'Nancy': 0
            }
        }

        self.graph, authorMap, conferenceMap =\
            SampleGraphUtility.constructPathSimExampleThree(extraAuthorsAndCitations=True, citationMap = citationMap)

        # Get the nodes we care about
        conferences = [
            conferenceMap['SIGMOD'], conferenceMap['VLDB'],
            conferenceMap['ICDE'], conferenceMap['KDD']
        ]
        authors = [
            authorMap['Mike'],
            authorMap['Jim'],
            authorMap['Mary'],
            authorMap['Bob'],
            authorMap['Ann'],
            authorMap['Joe'],
            authorMap['Nancy'],
        ]
        metaPathUtility = EdgeBasedMetaPathUtility()

        # Project a 2-typed heterogeneous graph over adapted PathSim example
        publicationProjectedGraph = metaPathUtility.createHeterogeneousProjection(
            self.graph, [Author, Paper, Conference], symmetric=True)
        self.output('\nAdjacency Matrix (Projected):')
        adjMatrixTable = texttable.Texttable()
        rows = [['Author'] + [conference.name for conference in conferences]]
        rows += [[author.name] + [
            publicationProjectedGraph.getNumberOfEdges(author, conference)
            for conference in conferences
        ] for author in authors]
        adjMatrixTable.add_rows(rows)
        self.output(adjMatrixTable.draw())

        # Project a homogeneous citation graph over adapted PathSim example
        citationProjectedGraph = metaPathUtility.createHomogeneousProjection(
            self.graph, [Author, Paper, Paper, Author])
        self.output('\nCitation Matrix:')
        adjMatrixTable = texttable.Texttable()
        rows = [['Author'] + [author.name for author in authors]]
        rows += [[author.name] + [
            citationProjectedGraph.getNumberOfEdges(author, otherAuthor)
            for otherAuthor in authors
        ] for author in authors]
        adjMatrixTable.add_rows(rows)
        self.output(adjMatrixTable.draw())

        # Output total out/in citations
        self.output('\nCitations Total:')
        totalCitationsTable = texttable.Texttable()
        rows = [['Author', 'In', 'Out']]
        for author in authors:
            inCount = sum(
                citationProjectedGraph.getNumberOfEdges(otherAuthor, author)
                for otherAuthor in authors)
            outCount = sum(
                citationProjectedGraph.getNumberOfEdges(author, otherAuthor)
                for otherAuthor in authors)
            rows += [[author.name, inCount, outCount]]
        totalCitationsTable.add_rows(rows)
        self.output(totalCitationsTable.draw())

        # Get PathSim similarity scores
        pathSimStrategy = PathSimStrategy(
            self.graph, [Author, Paper, Conference, Paper, Author], True)
        self.outputSimilarityScores(authorMap, authors, pathSimStrategy,
                                    'APCPA PathSim')

        # Output SimRank-related scores
        strategy = SimRankStrategy(self.graph, [Author, Paper, Paper, Author],
                                   symmetric=True)
        self.outputSimilarityScores(authorMap, authors, strategy, "SimRank")

        # Output the projected PageRank/HITS similarity scores
        for name, algorithm in zip(
            ['PageRank', 'HITS'],
            [PageRankDistanceStrategy, HITSDistanceStrategy]):
            strategy = algorithm(self.graph, [Author, Paper, Paper, Author],
                                 symmetric=True)
            self.outputSimilarityScores(authorMap, authors, strategy,
                                        "%s-Distance" % name)

        # Get NeighborSim similarity scores
        inNeighborSimStrategy = NeighborSimStrategy(
            self.graph, [Author, Paper, Paper, Author])
        self.outputSimilarityScores(authorMap, authors, inNeighborSimStrategy,
                                    'APPA NeighborSim-In')
        outNeighborSimStrategy = NeighborSimStrategy(
            self.graph, [Author, Paper, Paper, Author],
            reversed=True,
            smoothed=True)
        self.outputSimilarityScores(authorMap, authors, outNeighborSimStrategy,
                                    'APPA NeighborSim-Out')

        # Combined best PR-distance algorithm
        simRankStrategy = SimRankStrategy(self.graph,
                                          [Author, Paper, Paper, Author],
                                          symmetric=True)
        simRank = AggregateSimilarityStrategy(
            self.graph, [pathSimStrategy, simRankStrategy], [0.5, 0.5])
        self.outputSimilarityScores(authorMap, authors, simRank,
                                    'APCPA Pathsim, APPA SimRank')

        # Combined best neighborsim score
        combinedNeighborSim = AggregateSimilarityStrategy(
            self.graph,
            [pathSimStrategy, inNeighborSimStrategy, outNeighborSimStrategy],
            [0.6, 0.2, 0.2])
        self.outputSimilarityScores(
            authorMap, authors, combinedNeighborSim,
            'APCPA Pathsim, APPA NeighborSim-Combined')
예제 #8
0
    def run(self):

        self.graph, authorMap, conferenceMap, totalCitationCount = SampleGraphUtility.constructMultiDisciplinaryAuthorExample(indirectAuthor = True)

        # Get the nodes we care about
        conferences = [
            conferenceMap['VLDB'],
            conferenceMap['KDD']
        ]
        authors = [
            authorMap['A'],
            authorMap['B'],
            authorMap['C'],
            authorMap['D'],
            authorMap['E'],
            authorMap['F'],
            authorMap['G'],
            authorMap['H'],
            authorMap['I'],
            authorMap['J'],
        ]
        self.metaPathUtility = EdgeBasedMetaPathUtility()

        # Build homogeneous projection of network (authors, with edges for times authors cite each other)
        projectedGraph = self.metaPathUtility.createHomogeneousProjection(self.graph, [Author, Paper, Paper, Author])
        authorCitationCounts = {}
        for author in projectedGraph.getNodes():
            authorCitationCounts[author] = {}
            for otherAuthor in projectedGraph.getNodes():
                authorCitationCounts[author][otherAuthor] = projectedGraph.getNumberOfEdges(author, otherAuthor)

        # Output the adjacency matrix for authors-authors in the graph
        self.output('\nCitation Matrix:')
        adjMatrixTable = texttable.Texttable()
        rows = [['Author'] + [author.name for author in authors]]
        rows += [[author.name] + [authorCitationCounts[author][otherAuthor] for otherAuthor in authors] for author in authors]
        adjMatrixTable.add_rows(rows)
        self.output(adjMatrixTable.draw())

        # Output the adjacency matrix for authors & conferences in the graph
        self.output('\nAdjacency Matrix:')
        adjMatrixTable = texttable.Texttable()
        projectedGraph = self.metaPathUtility.createHeterogeneousProjection(self.graph, [Author, Paper, Conference])
        rows = [[''] + [conference.name for conference in conferences]]
        rows += [[author.name] + [projectedGraph.getNumberOfEdges(author, conference) for conference in conferences] for author in authors]
        adjMatrixTable.add_rows(rows)
        self.output(adjMatrixTable.draw())

        # Output total citation counts
        self.output('\nTotal Citation Counts:')
        rows = [[author.name for author in authors],['%d' % totalCitationCount[author.name] for author in authors]]
        citationCountTable = texttable.Texttable()
        citationCountTable.add_rows(rows)
        self.output(citationCountTable.draw())

        # Output the PathSim similarity scores
        pathsimStretegy = PathSimStrategy(self.graph, [Author, Paper, Conference, Paper, Author], True)
        self.outputSimilarityScores(authorMap, authors, pathsimStretegy, "PathSim")

        # Output the NeighborSim similarity scores
        neighborsimStrategy = NeighborSimStrategy(self.graph, [Conference, Paper, Paper, Author])
        self.outputSimilarityScores(authorMap, authors, neighborsimStrategy, "NeighborSim (CPPA)")

        # Output the NeighborSim similarity scores
        neighborsimStrategy = NeighborSimStrategy(self.graph, [Author, Paper, Paper, Author])
        self.outputSimilarityScores(authorMap, authors, neighborsimStrategy, "NeighborSim (APPA)")

        # Constant weight propagation strategy
        propagatedNeighborsimStrategy = NeighborSimConstantPropagationStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 2)
        self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-ConstantPropagation-2")
        propagatedNeighborsimStrategy = NeighborSimConstantPropagationStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 3)
        self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-ConstantPropagation-3")
        propagatedNeighborsimStrategy = NeighborSimConstantPropagationStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 4)
        self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-ConstantPropagation-4")
        propagatedNeighborsimStrategy = NeighborSimConstantPropagationStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 50)
        self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-ConstantPropagation-50")

        # Preferential attachment propagation strategy
        propagatedNeighborsimStrategy = NeighborSimConstantPreferentialAttachmentStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 2)
        self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-WeightedPropagation-2")
        propagatedNeighborsimStrategy = NeighborSimConstantPreferentialAttachmentStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 3)
        self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-WeightedPropagation-3")
        propagatedNeighborsimStrategy = NeighborSimConstantPreferentialAttachmentStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 4)
        self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-WeightedPropagation-4")
        propagatedNeighborsimStrategy = NeighborSimConstantPreferentialAttachmentStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 50)
        self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-WeightedPropagation-50")

        # Neighbor citation count difference strategy
        citeCountNeighborsimStrategy = NeighborSimStrategy(self.graph, [Paper, Paper, Author], commonNeighbors = False)
        self.outputSimilarityScores(authorMap, authors, citeCountNeighborsimStrategy, "NeighborSim-CiteCountDiff", citationCounts = totalCitationCount)
    def run(self):

        citationMap = {
            'Mike':  {'Mike': 0,  'Jim': 0,  'Mary': 0,  'Bob': 0,  'Ann': 0, 'Joe': 0,  'Nancy': 0},
            'Jim':   {'Mike': 20, 'Jim': 0,  'Mary': 20, 'Bob': 20, 'Ann': 0, 'Joe': 20, 'Nancy': 0},
            'Mary':  {'Mike': 1,  'Jim': 10, 'Mary': 0,  'Bob': 1,  'Ann': 0, 'Joe': 1,  'Nancy': 0},
            'Bob':   {'Mike': 1,  'Jim': 10, 'Mary': 1,  'Bob': 0,  'Ann': 0, 'Joe': 1,  'Nancy': 0},
            'Ann':   {'Mike': 0,  'Jim': 0,  'Mary': 0,  'Bob': 0,  'Ann': 0, 'Joe': 0,  'Nancy': 0},
            'Joe':   {'Mike': 0,  'Jim': 0,  'Mary': 0,  'Bob': 0,  'Ann': 0, 'Joe': 0,  'Nancy': 0},
            'Nancy': {'Mike': 1,  'Jim': 10, 'Mary': 1,  'Bob': 1,  'Ann': 0, 'Joe': 1,  'Nancy': 0}
        }

        self.graph, authorMap, conferenceMap =\
            SampleGraphUtility.constructPathSimExampleThree(extraAuthorsAndCitations=True, citationMap = citationMap)

        # Get the nodes we care about
        conferences = [
            conferenceMap['SIGMOD'],
            conferenceMap['VLDB'],
            conferenceMap['ICDE'],
            conferenceMap['KDD']
        ]
        authors = [
            authorMap['Mike'],
            authorMap['Jim'],
            authorMap['Mary'],
            authorMap['Bob'],
            authorMap['Ann'],
            authorMap['Joe'],
            authorMap['Nancy'],
        ]
        metaPathUtility = EdgeBasedMetaPathUtility()

        # Project a 2-typed heterogeneous graph over adapted PathSim example
        publicationProjectedGraph = metaPathUtility.createHeterogeneousProjection(self.graph, [Author, Paper, Conference], symmetric = True)
        self.output('\nAdjacency Matrix (Projected):')
        adjMatrixTable = texttable.Texttable()
        rows = [['Author'] + [conference.name for conference in conferences]]
        rows += [[author.name] + [publicationProjectedGraph.getNumberOfEdges(author, conference) for conference in conferences] for author in authors]
        adjMatrixTable.add_rows(rows)
        self.output(adjMatrixTable.draw())

        # Project a homogeneous citation graph over adapted PathSim example
        citationProjectedGraph = metaPathUtility.createHomogeneousProjection(self.graph, [Author, Paper, Paper, Author])
        self.output('\nCitation Matrix:')
        adjMatrixTable = texttable.Texttable()
        rows = [['Author'] + [author.name for author in authors]]
        rows += [[author.name] + [citationProjectedGraph.getNumberOfEdges(author, otherAuthor) for otherAuthor in authors] for author in authors]
        adjMatrixTable.add_rows(rows)
        self.output(adjMatrixTable.draw())

        # Output total out/in citations
        self.output('\nCitations Total:')
        totalCitationsTable = texttable.Texttable()
        rows = [['Author', 'In', 'Out']]
        for author in authors:
            inCount = sum(citationProjectedGraph.getNumberOfEdges(otherAuthor, author) for otherAuthor in authors)
            outCount = sum(citationProjectedGraph.getNumberOfEdges(author, otherAuthor) for otherAuthor in authors)
            rows += [[author.name, inCount, outCount]]
        totalCitationsTable.add_rows(rows)
        self.output(totalCitationsTable.draw())

        # Get PathSim similarity scores
        pathSimStrategy = PathSimStrategy(self.graph, [Author, Paper, Conference, Paper, Author], True)
        self.outputSimilarityScores(authorMap, authors, pathSimStrategy, 'APCPA PathSim')

        # Output SimRank-related scores
        strategy = SimRankStrategy(self.graph, [Author, Paper, Paper, Author], symmetric=True)
        self.outputSimilarityScores(authorMap, authors, strategy, "SimRank")

        # Output the projected PageRank/HITS similarity scores
        for name, algorithm in zip(['PageRank', 'HITS'], [PageRankDistanceStrategy, HITSDistanceStrategy]):
            strategy = algorithm(self.graph, [Author, Paper, Paper, Author], symmetric=True)
            self.outputSimilarityScores(authorMap, authors, strategy, "%s-Distance" % name)

        # Get NeighborSim similarity scores
        inNeighborSimStrategy = NeighborSimStrategy(self.graph, [Author, Paper, Paper, Author])
        self.outputSimilarityScores(authorMap, authors, inNeighborSimStrategy, 'APPA NeighborSim-In')
        outNeighborSimStrategy = NeighborSimStrategy(self.graph, [Author, Paper, Paper, Author], reversed=True, smoothed=True)
        self.outputSimilarityScores(authorMap, authors, outNeighborSimStrategy, 'APPA NeighborSim-Out')

        # Combined best PR-distance algorithm
        simRankStrategy = SimRankStrategy(self.graph, [Author, Paper, Paper, Author], symmetric=True)
        simRank = AggregateSimilarityStrategy(self.graph, [pathSimStrategy, simRankStrategy], [0.5, 0.5])
        self.outputSimilarityScores(authorMap, authors, simRank, 'APCPA Pathsim, APPA SimRank')

        # Combined best neighborsim score
        combinedNeighborSim = AggregateSimilarityStrategy(self.graph, [pathSimStrategy, inNeighborSimStrategy, outNeighborSimStrategy], [0.6, 0.2, 0.2])
        self.outputSimilarityScores(authorMap, authors, combinedNeighborSim, 'APCPA Pathsim, APPA NeighborSim-Combined')
    def run(self):

        self.graph, authorMap, conferenceMap = SampleGraphUtility.constructPathSimExampleThree()

        # Get the nodes we care about
        conferences = [
            conferenceMap['SIGMOD'],
            conferenceMap['VLDB'],
            conferenceMap['ICDE'],
            conferenceMap['KDD']
        ]
        authors = [
            authorMap['Mike'],
            authorMap['Jim'],
            authorMap['Mary'],
            authorMap['Bob'],
            authorMap['Ann'],
        ]
        metaPathUtility = EdgeBasedMetaPathUtility()

        self.output('\nAPC Adjacency Matrix:')
        apcadjMatrix, nodesIndex = metaPathUtility.getAdjacencyMatrixFromGraph(self.graph, [Author, Paper, Conference], project=True)
        adjMatrixTable = texttable.Texttable()
        rows = [['Author'] + [conference.name for conference in conferences]]
        rows += [[author.name] + [apcadjMatrix[nodesIndex[author]][nodesIndex[conference]] for conference in conferences] for author in authors]
        adjMatrixTable.add_rows(rows)
        self.output(adjMatrixTable.draw())

        self.output('\nCPA Adjacency Matrix:')
        cpaadjMatrix, dsad = metaPathUtility.getAdjacencyMatrixFromGraph(self.graph, [Conference, Paper, Author], project=True)
        adjMatrixTable = texttable.Texttable()
        rows = [['Conference'] + [author.name for author in authors]]
        rows += [[conference.name] + [cpaadjMatrix[nodesIndex[conference]][nodesIndex[author]] for author in authors] for conference in conferences]
        adjMatrixTable.add_rows(rows)
        self.output(adjMatrixTable.draw())

        self.output('\nAPCPA Adjacency Matrix (Computed):')
        adjMatrix = numpy.dot(apcadjMatrix, cpaadjMatrix)
        adjMatrixTable = texttable.Texttable()
        rows = [['Author'] + [author.name for author in authors]]
        rows += [[author.name] + [adjMatrix[nodesIndex[author]][nodesIndex[otherAuthor]] for otherAuthor in authors] for author in authors]
        adjMatrixTable.add_rows(rows)
        self.output(adjMatrixTable.draw())

        # Output homogeneous simrank comparison
        homogeneousSimRankStrategy = SimRankStrategy(self.graph)
        self.outputSimilarityScores(authorMap, authors, homogeneousSimRankStrategy, 'Homogeneous SimRank')

        projectedGraph = metaPathUtility.createHeterogeneousProjection(self.graph, [Author, Paper, Conference], symmetric = True)

        # Output heterogeneous simrank comparison
        heterogeneousSimRankStrategy = SimRankStrategy(projectedGraph)
        self.outputSimilarityScores(authorMap, authors, heterogeneousSimRankStrategy, 'APC Heterogeneous SimRank')

        # Output heterogeneous simrank w/ squared neighbors comparison
        def sqNeighborsNorm(graph, a, b, sim):
            aNeighbors, bNeighbors = graph.getPredecessors(a), graph.getPredecessors(b)
            return float(len(aNeighbors)**2 * len(bNeighbors)**2)
        heterogeneousSquaredSimRankStrategy = SimRankStrategy(projectedGraph, normalization=sqNeighborsNorm)
        self.outputSimilarityScores(authorMap, authors, heterogeneousSquaredSimRankStrategy, 'Squared Heterogeneous SimRank')

        # Output NeighborSim similarity scores
        neighborSimStrategy = NeighborSimStrategy(self.graph, [Author, Paper, Conference], symmetric=True)
        self.outputSimilarityScores(authorMap, authors, neighborSimStrategy, 'APC NeighborSim')

        # Output the PathSim similarity scores
        pathsimStrategy = PathSimStrategy(self.graph, [Author, Paper, Conference, Paper, Author], symmetric=True)
        self.outputSimilarityScores(authorMap, authors, pathsimStrategy, 'APCPA PathSim')
    def testConstructPathSimExampleThree(self):
        """
          Tests the construction of "Example 3" from PathSim paper. Specifically, checks adjacency matrix shown in
          this example for Author-Paper-Conference meta paths.
        """

        graph, authorMap, conferenceMap = SampleGraphUtility.constructPathSimExampleThree()
        metaPath = [Author, Paper, Conference]

        metaPathUtility = EdgeBasedMetaPathUtility()

        # Mike's adjacency to conferences
        self.assertEquals(2, len(metaPathUtility.findMetaPaths(graph, authorMap['Mike'], conferenceMap['SIGMOD'], metaPath)))
        self.assertEquals(1, len(metaPathUtility.findMetaPaths(graph, authorMap['Mike'], conferenceMap['VLDB'], metaPath)))
        self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Mike'], conferenceMap['ICDE'], metaPath)))
        self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Mike'], conferenceMap['KDD'], metaPath)))

        # Jim's adjacency to conferences
        self.assertEquals(50, len(metaPathUtility.findMetaPaths(graph, authorMap['Jim'], conferenceMap['SIGMOD'], metaPath)))
        self.assertEquals(20, len(metaPathUtility.findMetaPaths(graph, authorMap['Jim'], conferenceMap['VLDB'], metaPath)))
        self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Jim'], conferenceMap['ICDE'], metaPath)))
        self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Jim'], conferenceMap['KDD'], metaPath)))

        # Mary's adjacency to conferences
        self.assertEquals(2, len(metaPathUtility.findMetaPaths(graph, authorMap['Mary'], conferenceMap['SIGMOD'], metaPath)))
        self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Mary'], conferenceMap['VLDB'], metaPath)))
        self.assertEquals(1, len(metaPathUtility.findMetaPaths(graph, authorMap['Mary'], conferenceMap['ICDE'], metaPath)))
        self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Mary'], conferenceMap['KDD'], metaPath)))

        # Bob's adjacency to conferences
        self.assertEquals(2, len(metaPathUtility.findMetaPaths(graph, authorMap['Bob'], conferenceMap['SIGMOD'], metaPath)))
        self.assertEquals(1, len(metaPathUtility.findMetaPaths(graph, authorMap['Bob'], conferenceMap['VLDB'], metaPath)))
        self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Bob'], conferenceMap['ICDE'], metaPath)))
        self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Bob'], conferenceMap['KDD'], metaPath)))

        # Ann's adjacency to conferences
        self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Ann'], conferenceMap['SIGMOD'], metaPath)))
        self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Ann'], conferenceMap['VLDB'], metaPath)))
        self.assertEquals(1, len(metaPathUtility.findMetaPaths(graph, authorMap['Ann'], conferenceMap['ICDE'], metaPath)))
        self.assertEquals(1, len(metaPathUtility.findMetaPaths(graph, authorMap['Ann'], conferenceMap['KDD'], metaPath)))
    def run(self):

        self.graph, authorMap, conference, citationsPublications = \
            SampleGraphUtility.constructSkewedCitationPublicationExample(introduceRandomness=False)

        # Get the nodes we care about
        authors = [
            authorMap['Alice'],
            authorMap['Bob'],
            authorMap['Carol'],
            authorMap['Dave'],
            authorMap['Ed'],
            authorMap['Frank']
        ]
        metaPathUtility = EdgeBasedMetaPathUtility()

        # Output adjacency matrices
        self.output('\nCPA Adjacency Matrix:')
        cpaadjMatrix, nodesIndex = metaPathUtility.getAdjacencyMatrixFromGraph(
            self.graph, [Conference, Paper, Author], project=True
        )
        adjMatrixTable = texttable.Texttable()
        rows = [['Conference'] + [author.name for author in authors]]
        rows += [[conference.name] + [cpaadjMatrix[nodesIndex[conference]][nodesIndex[author]] for author in authors]]
        adjMatrixTable.add_rows(rows)
        self.output(adjMatrixTable.draw())

        self.output('\nCPPA Adjacency Matrix:')
        cpaadjMatrix, nodesIndex = metaPathUtility.getAdjacencyMatrixFromGraph(
            self.graph, [Conference, Paper, Paper, Author], project=True
        )
        adjMatrixTable = texttable.Texttable()
        rows = [['Conference'] + [author.name for author in authors]]
        rows += [[conference.name] + [cpaadjMatrix[nodesIndex[conference]][nodesIndex[author]] for author in authors]]
        adjMatrixTable.add_rows(rows)
        self.output(adjMatrixTable.draw())

        # Total citation & publication counts
        self.output('\nCitation & Publication Counts')
        adjMatrixTable = texttable.Texttable()
        rows = [['Measure'] + [author.name for author in authors]]
        rows += [['Citations'] + [citationsPublications[author][0] for author in authors]]
        rows += [['Publications'] + [citationsPublications[author][1] for author in authors]]
        adjMatrixTable.add_rows(rows)
        self.output(adjMatrixTable.draw())

        # Output NeighborSim & PathSim similarity scores
        neighborSimStrategy = NeighborSimStrategy(self.graph, [Conference, Paper, Author], symmetric=True)
        self.outputSimilarityScores(authorMap, authors, neighborSimStrategy, 'APCPA PathSim')
        neighborSimStrategy = NeighborSimStrategy(self.graph, [Conference, Paper, Paper, Author])
        self.outputSimilarityScores(authorMap, authors, neighborSimStrategy, 'APPCPPA PathSim')

        # Omit extra duplicate entry in path, and weight at different levels of 'relative'
        for strategy, generalStrategyTitle in [(FlattenedMatrixStrategy, 'FlatMat'), (VectorProductStrategy, 'VectorProduct')]:
            for w in [1.0, 0.5, 0]:
                neighborPathShapeStrategy = strategy(
                    self.graph, weight=w, omit=[], metaPath=[Conference, Paper, Paper, Author], symmetric=True
                )
                strategyTitle = 'APPCPPA %s ShapeSim (%1.2f weight)' % (generalStrategyTitle, w)
                self.outputSimilarityScores(authorMap, authors, neighborPathShapeStrategy, strategyTitle)
        w = 1.0
        neighborPathShapeStrategy = VectorProductStrategy(
            self.graph, weight=w, omit=[0], metaPath=[Conference, Paper, Paper, Author], symmetric=True
        )
        strategyTitle = 'APPCPPA VectorProduct ShapeSim omitting CPC (%1.2f weight)' % w
        self.outputSimilarityScores(authorMap, authors, neighborPathShapeStrategy, strategyTitle)

        # Output recursive pathsim strategy score(s)
        recursivePathSimStrategy = RecursivePathSimStrategy(self.graph, [Conference, Paper, Paper, Author])
        self.outputSimilarityScores(authorMap, authors, recursivePathSimStrategy, 'APPCPPA Recursive PathSim')
    def run(self):

        self.graph, authorMap, conferenceMap, totalCitationCount = SampleGraphUtility.constructMultiDisciplinaryAuthorExample()

        # Get the nodes we care about
        conferences = [
            conferenceMap['VLDB'],
            conferenceMap['KDD']
        ]
        authors = [
            authorMap['A'],
            authorMap['B'],
            authorMap['C'],
            authorMap['D'],
            authorMap['E'],
            authorMap['F'],
            authorMap['G'],
            authorMap['H'],
            authorMap['I'],
        ]
        self.metaPathUtility = EdgeBasedMetaPathUtility()

        # Build homogeneous projection of network (authors, with edges for times authors cite each other)
        projectedGraph = self.metaPathUtility.createHomogeneousProjection(self.graph, [Author, Paper, Paper, Author])
        authorCitationCounts = {}
        for author in projectedGraph.getNodes():
            authorCitationCounts[author] = {}
            for otherAuthor in projectedGraph.getNodes():
                authorCitationCounts[author][otherAuthor] = projectedGraph.getNumberOfEdges(author, otherAuthor)

        # Output the adjacency matrix for authors-authors in the graph
        self.output('\nCitation Matrix:')
        adjMatrixTable = texttable.Texttable()
        rows = [['Author'] + [author.name for author in authors]]
        rows += [[author.name] + [authorCitationCounts[author][otherAuthor] for otherAuthor in authors] for author in authors]
        adjMatrixTable.add_rows(rows)
        self.output(adjMatrixTable.draw())

        # Output the adjacency matrix for authors & conferences in the graph
        self.output('\nAdjacency Matrix:')
        adjMatrixTable = texttable.Texttable()
        projectedGraph = self.metaPathUtility.createHeterogeneousProjection(self.graph, [Author, Paper, Conference])
        rows = [[''] + [conference.name for conference in conferences]]
        rows += [[author.name] + [projectedGraph.getNumberOfEdges(author, conference) for conference in conferences] for author in authors]
        adjMatrixTable.add_rows(rows)
        self.output(adjMatrixTable.draw())

        # Output total citation counts
        self.output('\nTotal Citation Counts:')
        rows = [[author.name for author in authors],['%d' % totalCitationCount[author.name] for author in authors]]
        citationCountTable = texttable.Texttable()
        citationCountTable.add_rows(rows)
        self.output(citationCountTable.draw())

        # Output the NeighborSim similarity scores
        strategy = NeighborSimStrategy(self.graph, [Conference, Paper, Paper, Author])
        self.outputSimilarityScores(authorMap, authors, strategy, "NeighborSim")

        # Output the PathSim similarity scores
        pathsimStretegy = PathSimStrategy(self.graph, [Author, Paper, Conference, Paper, Author], True)
        self.outputSimilarityScores(authorMap, authors, pathsimStretegy, "PathSim")

        # Output SimRank-related scores
        simrankStrategy = SimRankStrategy(self.graph, [Author, Paper, Paper, Author])
        self.outputSimilarityScores(authorMap, authors, simrankStrategy, "SimRank")

        # Output pathsim - simrank scores
        combinedNeighborSim = AggregateSimilarityStrategy(self.graph, [simrankStrategy, pathsimStretegy], [0.5, 0.5])
        self.outputSimilarityScores(authorMap, authors, combinedNeighborSim, 'APCPA Pathsim, APPA SimRank')


        # Output the projected PageRank/HITS similarity scores
        for name, algorithm in zip(['PageRank', 'HITS'], [PageRankDistanceStrategy, HITSDistanceStrategy]):
            researchAreas = {
                (authorMap['A'], authorMap['B'], authorMap['C'], authorMap['D'], authorMap['E'], authorMap['I']),
                (authorMap['F'], authorMap['G'], authorMap['H'], authorMap['D'], authorMap['E'], authorMap['I']),
            }
            strategy = algorithm(self.graph, [Author, Paper, Paper, Author], nodeSets=researchAreas, symmetric=True)
            self.outputSimilarityScores(authorMap, authors, strategy, "%s-Distance" % name)
    def run(self):

        self.graph, authorMap, conferenceMap = SampleGraphUtility.constructPathSimExampleThree(
        )

        # Get the nodes we care about
        conferences = [
            conferenceMap['SIGMOD'], conferenceMap['VLDB'],
            conferenceMap['ICDE'], conferenceMap['KDD']
        ]
        authors = [
            authorMap['Mike'],
            authorMap['Jim'],
            authorMap['Mary'],
            authorMap['Bob'],
            authorMap['Ann'],
        ]
        metaPathUtility = EdgeBasedMetaPathUtility()

        self.output('\nAPC Adjacency Matrix:')
        apcadjMatrix, nodesIndex = metaPathUtility.getAdjacencyMatrixFromGraph(
            self.graph, [Author, Paper, Conference], project=True)
        adjMatrixTable = texttable.Texttable()
        rows = [['Author'] + [conference.name for conference in conferences]]
        rows += [[author.name] + [
            apcadjMatrix[nodesIndex[author]][nodesIndex[conference]]
            for conference in conferences
        ] for author in authors]
        adjMatrixTable.add_rows(rows)
        self.output(adjMatrixTable.draw())

        self.output('\nCPA Adjacency Matrix:')
        cpaadjMatrix, dsad = metaPathUtility.getAdjacencyMatrixFromGraph(
            self.graph, [Conference, Paper, Author], project=True)
        adjMatrixTable = texttable.Texttable()
        rows = [['Conference'] + [author.name for author in authors]]
        rows += [[conference.name] + [
            cpaadjMatrix[nodesIndex[conference]][nodesIndex[author]]
            for author in authors
        ] for conference in conferences]
        adjMatrixTable.add_rows(rows)
        self.output(adjMatrixTable.draw())

        self.output('\nAPCPA Adjacency Matrix (Computed):')
        adjMatrix = numpy.dot(apcadjMatrix, cpaadjMatrix)
        adjMatrixTable = texttable.Texttable()
        rows = [['Author'] + [author.name for author in authors]]
        rows += [[author.name] + [
            adjMatrix[nodesIndex[author]][nodesIndex[otherAuthor]]
            for otherAuthor in authors
        ] for author in authors]
        adjMatrixTable.add_rows(rows)
        self.output(adjMatrixTable.draw())

        # Output homogeneous simrank comparison
        homogeneousSimRankStrategy = SimRankStrategy(self.graph)
        self.outputSimilarityScores(authorMap, authors,
                                    homogeneousSimRankStrategy,
                                    'Homogeneous SimRank')

        projectedGraph = metaPathUtility.createHeterogeneousProjection(
            self.graph, [Author, Paper, Conference], symmetric=True)

        # Output heterogeneous simrank comparison
        heterogeneousSimRankStrategy = SimRankStrategy(projectedGraph)
        self.outputSimilarityScores(authorMap, authors,
                                    heterogeneousSimRankStrategy,
                                    'APC Heterogeneous SimRank')

        # Output heterogeneous simrank w/ squared neighbors comparison
        def sqNeighborsNorm(graph, a, b, sim):
            aNeighbors, bNeighbors = graph.getPredecessors(
                a), graph.getPredecessors(b)
            return float(len(aNeighbors)**2 * len(bNeighbors)**2)

        heterogeneousSquaredSimRankStrategy = SimRankStrategy(
            projectedGraph, normalization=sqNeighborsNorm)
        self.outputSimilarityScores(authorMap, authors,
                                    heterogeneousSquaredSimRankStrategy,
                                    'Squared Heterogeneous SimRank')

        # Output NeighborSim similarity scores
        neighborSimStrategy = NeighborSimStrategy(self.graph,
                                                  [Author, Paper, Conference],
                                                  symmetric=True)
        self.outputSimilarityScores(authorMap, authors, neighborSimStrategy,
                                    'APC NeighborSim')

        # Output the PathSim similarity scores
        pathsimStrategy = PathSimStrategy(
            self.graph, [Author, Paper, Conference, Paper, Author],
            symmetric=True)
        self.outputSimilarityScores(authorMap, authors, pathsimStrategy,
                                    'APCPA PathSim')