def __init__(self, graph, metaPath=None, symmetric=False, conserveMemory=False): """ Constructs a meta path similarity strategy, storing the meta path data for this strategy. @param metaPath Meta path object where the list of classes contains classes of nodes in the graph, and weights in [0,1] containing the importance of the meta path @param symmetric Whether or not to enforce that meta paths must be symmetric For example, if 'symmetric' and 'evenLength' are both 'true', for meta path 'ABC', we will only count meta path 'ABCCBA', depending on, and if 'symmetric' is 'true' while 'evenLength' is 'false', we will only count meta paths 'ABCBA' """ super(MetaPathSimilarityStrategy, self).__init__(graph) self.metaPath = metaPath self.symmetric = symmetric self.conserveMemory = conserveMemory self.metaPathUtility = EdgeBasedMetaPathUtility()
class MetaPathSimilarityStrategy(SimilarityStrategy): """ Generic class for similarity strategies that use meta paths """ def __init__(self, graph, metaPath = None, symmetric = False, conserveMemory = False): """ Constructs a meta path similarity strategy, storing the meta path data for this strategy. @param metaPath Meta path object where the list of classes contains classes of nodes in the graph, and weights in [0,1] containing the importance of the meta path @param symmetric Whether or not to enforce that meta paths must be symmetric For example, if 'symmetric' and 'evenLength' are both 'true', for meta path 'ABC', we will only count meta path 'ABCCBA', depending on, and if 'symmetric' is 'true' while 'evenLength' is 'false', we will only count meta paths 'ABCBA' """ super(MetaPathSimilarityStrategy, self).__init__(graph) self.metaPath = metaPath self.symmetric = symmetric self.conserveMemory = conserveMemory self.metaPathUtility = EdgeBasedMetaPathUtility() def findMostSimilarNodes(self, source, number=None, conserveMemory=False): """ Simple find the similarity scores between this node and all reachable nodes on this meta path. Note that if there are fewer reachable nodes than "number", the number of reachable nodes will be returned. """ # If no number is provided, default to the number of nodes in the graph if number is None: number = self.n # Get similarity scores for all entries reachableNodes = self.metaPathUtility.findMetaPathNeighbors(self.graph, source, self.metaPath) for reachableNode in reachableNodes: self.similarityScores[source][reachableNode] = self.findSimilarityScore(source, reachableNode) # Sort by increasing score mostSimilarNodes = sorted(self.similarityScores[source].iteritems(), key=operator.itemgetter(1)) # Remove source, nodes of different types, and reverse newMostSimilarNodes = [] for node, score in mostSimilarNodes: if node != source and node.__class__ == source.__class__: newMostSimilarNodes.append(node) newMostSimilarNodes.reverse() number = min([number, len(newMostSimilarNodes)]) mostSimilarNodes = newMostSimilarNodes[:number] return mostSimilarNodes
def __init__(self, graph, metaPath = None, symmetric = False, conserveMemory = False): """ Constructs a meta path similarity strategy, storing the meta path data for this strategy. @param metaPath Meta path object where the list of classes contains classes of nodes in the graph, and weights in [0,1] containing the importance of the meta path @param symmetric Whether or not to enforce that meta paths must be symmetric For example, if 'symmetric' and 'evenLength' are both 'true', for meta path 'ABC', we will only count meta path 'ABCCBA', depending on, and if 'symmetric' is 'true' while 'evenLength' is 'false', we will only count meta paths 'ABCBA' """ super(MetaPathSimilarityStrategy, self).__init__(graph) self.metaPath = metaPath self.symmetric = symmetric self.conserveMemory = conserveMemory self.metaPathUtility = EdgeBasedMetaPathUtility()
def run(self): self.graph, authorMap, conference, citationsPublications = SampleGraphUtility.constructSkewedCitationPublicationExample( introduceRandomness=False ) # Get the nodes we care about authors = [ authorMap["Alice"], authorMap["Bob"], authorMap["Carol"], authorMap["Dave"], authorMap["Ed"], authorMap["Frank"], ] metaPathUtility = EdgeBasedMetaPathUtility() # Output adjacency matrices self.output("\nCPA Adjacency Matrix:") cpaadjMatrix, nodesIndex = metaPathUtility.getAdjacencyMatrixFromGraph( self.graph, [Conference, Paper, Author], project=True ) adjMatrixTable = texttable.Texttable() rows = [["Conference"] + [author.name for author in authors]] rows += [[conference.name] + [cpaadjMatrix[nodesIndex[conference]][nodesIndex[author]] for author in authors]] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) self.output("\nCPPA Adjacency Matrix:") cpaadjMatrix, nodesIndex = metaPathUtility.getAdjacencyMatrixFromGraph( self.graph, [Conference, Paper, Paper, Author], project=True ) adjMatrixTable = texttable.Texttable() rows = [["Conference"] + [author.name for author in authors]] rows += [[conference.name] + [cpaadjMatrix[nodesIndex[conference]][nodesIndex[author]] for author in authors]] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Total citation & publication counts self.output("\nCitation & Publication Counts") adjMatrixTable = texttable.Texttable() rows = [["Measure"] + [author.name for author in authors]] rows += [["Citations"] + [citationsPublications[author][0] for author in authors]] rows += [["Publications"] + [citationsPublications[author][1] for author in authors]] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output NeighborSim & PathSim similarity scores neighborSimStrategy = NeighborSimStrategy(self.graph, [Conference, Paper, Author], symmetric=True) self.outputSimilarityScores(authorMap, authors, neighborSimStrategy, "APCPA PathSim") neighborSimStrategy = NeighborSimStrategy(self.graph, [Conference, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, neighborSimStrategy, "APPCPPA PathSim") # Omit extra duplicate entry in path, and weight at different levels of 'relative' for strategy, generalStrategyTitle in [ (FlattenedMatrixStrategy, "FlatMat"), (VectorProductStrategy, "VectorProduct"), ]: for w in [1.0, 0.5, 0]: neighborPathShapeStrategy = strategy( self.graph, weight=w, omit=[], metaPath=[Conference, Paper, Paper, Author], symmetric=True ) strategyTitle = "APPCPPA %s ShapeSim (%1.2f weight)" % (generalStrategyTitle, w) self.outputSimilarityScores(authorMap, authors, neighborPathShapeStrategy, strategyTitle) w = 1.0 neighborPathShapeStrategy = VectorProductStrategy( self.graph, weight=w, omit=[0], metaPath=[Conference, Paper, Paper, Author], symmetric=True ) strategyTitle = "APPCPPA VectorProduct ShapeSim omitting CPC (%1.2f weight)" % w self.outputSimilarityScores(authorMap, authors, neighborPathShapeStrategy, strategyTitle) # Output recursive pathsim strategy score(s) recursivePathSimStrategy = RecursivePathSimStrategy(self.graph, [Conference, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, recursivePathSimStrategy, "APPCPPA Recursive PathSim")
class MetaPathSimilarityStrategy(SimilarityStrategy): """ Generic class for similarity strategies that use meta paths """ def __init__(self, graph, metaPath=None, symmetric=False, conserveMemory=False): """ Constructs a meta path similarity strategy, storing the meta path data for this strategy. @param metaPath Meta path object where the list of classes contains classes of nodes in the graph, and weights in [0,1] containing the importance of the meta path @param symmetric Whether or not to enforce that meta paths must be symmetric For example, if 'symmetric' and 'evenLength' are both 'true', for meta path 'ABC', we will only count meta path 'ABCCBA', depending on, and if 'symmetric' is 'true' while 'evenLength' is 'false', we will only count meta paths 'ABCBA' """ super(MetaPathSimilarityStrategy, self).__init__(graph) self.metaPath = metaPath self.symmetric = symmetric self.conserveMemory = conserveMemory self.metaPathUtility = EdgeBasedMetaPathUtility() def findMostSimilarNodes(self, source, number=None, conserveMemory=False): """ Simple find the similarity scores between this node and all reachable nodes on this meta path. Note that if there are fewer reachable nodes than "number", the number of reachable nodes will be returned. """ # If no number is provided, default to the number of nodes in the graph if number is None: number = self.n # Get similarity scores for all entries reachableNodes = self.metaPathUtility.findMetaPathNeighbors( self.graph, source, self.metaPath) for reachableNode in reachableNodes: self.similarityScores[source][ reachableNode] = self.findSimilarityScore( source, reachableNode) # Sort by increasing score mostSimilarNodes = sorted(self.similarityScores[source].iteritems(), key=operator.itemgetter(1)) # Remove source, nodes of different types, and reverse newMostSimilarNodes = [] for node, score in mostSimilarNodes: if node != source and node.__class__ == source.__class__: newMostSimilarNodes.append(node) newMostSimilarNodes.reverse() number = min([number, len(newMostSimilarNodes)]) mostSimilarNodes = newMostSimilarNodes[:number] return mostSimilarNodes
def _getImplementation(self): return EdgeBasedMetaPathUtility()
def run(self): citationMap = { 'Mike': { 'Mike': 0, 'Jim': 0, 'Mary': 0, 'Bob': 0, 'Ann': 0, 'Joe': 0, 'Nancy': 0 }, 'Jim': { 'Mike': 20, 'Jim': 0, 'Mary': 20, 'Bob': 20, 'Ann': 0, 'Joe': 20, 'Nancy': 0 }, 'Mary': { 'Mike': 1, 'Jim': 10, 'Mary': 0, 'Bob': 1, 'Ann': 0, 'Joe': 1, 'Nancy': 0 }, 'Bob': { 'Mike': 1, 'Jim': 10, 'Mary': 1, 'Bob': 0, 'Ann': 0, 'Joe': 1, 'Nancy': 0 }, 'Ann': { 'Mike': 0, 'Jim': 0, 'Mary': 0, 'Bob': 0, 'Ann': 0, 'Joe': 0, 'Nancy': 0 }, 'Joe': { 'Mike': 0, 'Jim': 0, 'Mary': 0, 'Bob': 0, 'Ann': 0, 'Joe': 0, 'Nancy': 0 }, 'Nancy': { 'Mike': 1, 'Jim': 10, 'Mary': 1, 'Bob': 1, 'Ann': 0, 'Joe': 1, 'Nancy': 0 } } self.graph, authorMap, conferenceMap =\ SampleGraphUtility.constructPathSimExampleThree(extraAuthorsAndCitations=True, citationMap = citationMap) # Get the nodes we care about conferences = [ conferenceMap['SIGMOD'], conferenceMap['VLDB'], conferenceMap['ICDE'], conferenceMap['KDD'] ] authors = [ authorMap['Mike'], authorMap['Jim'], authorMap['Mary'], authorMap['Bob'], authorMap['Ann'], authorMap['Joe'], authorMap['Nancy'], ] metaPathUtility = EdgeBasedMetaPathUtility() # Project a 2-typed heterogeneous graph over adapted PathSim example publicationProjectedGraph = metaPathUtility.createHeterogeneousProjection( self.graph, [Author, Paper, Conference], symmetric=True) self.output('\nAdjacency Matrix (Projected):') adjMatrixTable = texttable.Texttable() rows = [['Author'] + [conference.name for conference in conferences]] rows += [[author.name] + [ publicationProjectedGraph.getNumberOfEdges(author, conference) for conference in conferences ] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Project a homogeneous citation graph over adapted PathSim example citationProjectedGraph = metaPathUtility.createHomogeneousProjection( self.graph, [Author, Paper, Paper, Author]) self.output('\nCitation Matrix:') adjMatrixTable = texttable.Texttable() rows = [['Author'] + [author.name for author in authors]] rows += [[author.name] + [ citationProjectedGraph.getNumberOfEdges(author, otherAuthor) for otherAuthor in authors ] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output total out/in citations self.output('\nCitations Total:') totalCitationsTable = texttable.Texttable() rows = [['Author', 'In', 'Out']] for author in authors: inCount = sum( citationProjectedGraph.getNumberOfEdges(otherAuthor, author) for otherAuthor in authors) outCount = sum( citationProjectedGraph.getNumberOfEdges(author, otherAuthor) for otherAuthor in authors) rows += [[author.name, inCount, outCount]] totalCitationsTable.add_rows(rows) self.output(totalCitationsTable.draw()) # Get PathSim similarity scores pathSimStrategy = PathSimStrategy( self.graph, [Author, Paper, Conference, Paper, Author], True) self.outputSimilarityScores(authorMap, authors, pathSimStrategy, 'APCPA PathSim') # Output SimRank-related scores strategy = SimRankStrategy(self.graph, [Author, Paper, Paper, Author], symmetric=True) self.outputSimilarityScores(authorMap, authors, strategy, "SimRank") # Output the projected PageRank/HITS similarity scores for name, algorithm in zip( ['PageRank', 'HITS'], [PageRankDistanceStrategy, HITSDistanceStrategy]): strategy = algorithm(self.graph, [Author, Paper, Paper, Author], symmetric=True) self.outputSimilarityScores(authorMap, authors, strategy, "%s-Distance" % name) # Get NeighborSim similarity scores inNeighborSimStrategy = NeighborSimStrategy( self.graph, [Author, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, inNeighborSimStrategy, 'APPA NeighborSim-In') outNeighborSimStrategy = NeighborSimStrategy( self.graph, [Author, Paper, Paper, Author], reversed=True, smoothed=True) self.outputSimilarityScores(authorMap, authors, outNeighborSimStrategy, 'APPA NeighborSim-Out') # Combined best PR-distance algorithm simRankStrategy = SimRankStrategy(self.graph, [Author, Paper, Paper, Author], symmetric=True) simRank = AggregateSimilarityStrategy( self.graph, [pathSimStrategy, simRankStrategy], [0.5, 0.5]) self.outputSimilarityScores(authorMap, authors, simRank, 'APCPA Pathsim, APPA SimRank') # Combined best neighborsim score combinedNeighborSim = AggregateSimilarityStrategy( self.graph, [pathSimStrategy, inNeighborSimStrategy, outNeighborSimStrategy], [0.6, 0.2, 0.2]) self.outputSimilarityScores( authorMap, authors, combinedNeighborSim, 'APCPA Pathsim, APPA NeighborSim-Combined')
def run(self): self.graph, authorMap, conferenceMap, totalCitationCount = SampleGraphUtility.constructMultiDisciplinaryAuthorExample(indirectAuthor = True) # Get the nodes we care about conferences = [ conferenceMap['VLDB'], conferenceMap['KDD'] ] authors = [ authorMap['A'], authorMap['B'], authorMap['C'], authorMap['D'], authorMap['E'], authorMap['F'], authorMap['G'], authorMap['H'], authorMap['I'], authorMap['J'], ] self.metaPathUtility = EdgeBasedMetaPathUtility() # Build homogeneous projection of network (authors, with edges for times authors cite each other) projectedGraph = self.metaPathUtility.createHomogeneousProjection(self.graph, [Author, Paper, Paper, Author]) authorCitationCounts = {} for author in projectedGraph.getNodes(): authorCitationCounts[author] = {} for otherAuthor in projectedGraph.getNodes(): authorCitationCounts[author][otherAuthor] = projectedGraph.getNumberOfEdges(author, otherAuthor) # Output the adjacency matrix for authors-authors in the graph self.output('\nCitation Matrix:') adjMatrixTable = texttable.Texttable() rows = [['Author'] + [author.name for author in authors]] rows += [[author.name] + [authorCitationCounts[author][otherAuthor] for otherAuthor in authors] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output the adjacency matrix for authors & conferences in the graph self.output('\nAdjacency Matrix:') adjMatrixTable = texttable.Texttable() projectedGraph = self.metaPathUtility.createHeterogeneousProjection(self.graph, [Author, Paper, Conference]) rows = [[''] + [conference.name for conference in conferences]] rows += [[author.name] + [projectedGraph.getNumberOfEdges(author, conference) for conference in conferences] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output total citation counts self.output('\nTotal Citation Counts:') rows = [[author.name for author in authors],['%d' % totalCitationCount[author.name] for author in authors]] citationCountTable = texttable.Texttable() citationCountTable.add_rows(rows) self.output(citationCountTable.draw()) # Output the PathSim similarity scores pathsimStretegy = PathSimStrategy(self.graph, [Author, Paper, Conference, Paper, Author], True) self.outputSimilarityScores(authorMap, authors, pathsimStretegy, "PathSim") # Output the NeighborSim similarity scores neighborsimStrategy = NeighborSimStrategy(self.graph, [Conference, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, neighborsimStrategy, "NeighborSim (CPPA)") # Output the NeighborSim similarity scores neighborsimStrategy = NeighborSimStrategy(self.graph, [Author, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, neighborsimStrategy, "NeighborSim (APPA)") # Constant weight propagation strategy propagatedNeighborsimStrategy = NeighborSimConstantPropagationStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 2) self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-ConstantPropagation-2") propagatedNeighborsimStrategy = NeighborSimConstantPropagationStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 3) self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-ConstantPropagation-3") propagatedNeighborsimStrategy = NeighborSimConstantPropagationStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 4) self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-ConstantPropagation-4") propagatedNeighborsimStrategy = NeighborSimConstantPropagationStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 50) self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-ConstantPropagation-50") # Preferential attachment propagation strategy propagatedNeighborsimStrategy = NeighborSimConstantPreferentialAttachmentStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 2) self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-WeightedPropagation-2") propagatedNeighborsimStrategy = NeighborSimConstantPreferentialAttachmentStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 3) self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-WeightedPropagation-3") propagatedNeighborsimStrategy = NeighborSimConstantPreferentialAttachmentStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 4) self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-WeightedPropagation-4") propagatedNeighborsimStrategy = NeighborSimConstantPreferentialAttachmentStrategy(self.graph, [Author, Paper, Paper, Author], iterations = 50) self.outputSimilarityScores(authorMap, authors, propagatedNeighborsimStrategy, "NeighborSim-WeightedPropagation-50") # Neighbor citation count difference strategy citeCountNeighborsimStrategy = NeighborSimStrategy(self.graph, [Paper, Paper, Author], commonNeighbors = False) self.outputSimilarityScores(authorMap, authors, citeCountNeighborsimStrategy, "NeighborSim-CiteCountDiff", citationCounts = totalCitationCount)
def run(self): citationMap = { 'Mike': {'Mike': 0, 'Jim': 0, 'Mary': 0, 'Bob': 0, 'Ann': 0, 'Joe': 0, 'Nancy': 0}, 'Jim': {'Mike': 20, 'Jim': 0, 'Mary': 20, 'Bob': 20, 'Ann': 0, 'Joe': 20, 'Nancy': 0}, 'Mary': {'Mike': 1, 'Jim': 10, 'Mary': 0, 'Bob': 1, 'Ann': 0, 'Joe': 1, 'Nancy': 0}, 'Bob': {'Mike': 1, 'Jim': 10, 'Mary': 1, 'Bob': 0, 'Ann': 0, 'Joe': 1, 'Nancy': 0}, 'Ann': {'Mike': 0, 'Jim': 0, 'Mary': 0, 'Bob': 0, 'Ann': 0, 'Joe': 0, 'Nancy': 0}, 'Joe': {'Mike': 0, 'Jim': 0, 'Mary': 0, 'Bob': 0, 'Ann': 0, 'Joe': 0, 'Nancy': 0}, 'Nancy': {'Mike': 1, 'Jim': 10, 'Mary': 1, 'Bob': 1, 'Ann': 0, 'Joe': 1, 'Nancy': 0} } self.graph, authorMap, conferenceMap =\ SampleGraphUtility.constructPathSimExampleThree(extraAuthorsAndCitations=True, citationMap = citationMap) # Get the nodes we care about conferences = [ conferenceMap['SIGMOD'], conferenceMap['VLDB'], conferenceMap['ICDE'], conferenceMap['KDD'] ] authors = [ authorMap['Mike'], authorMap['Jim'], authorMap['Mary'], authorMap['Bob'], authorMap['Ann'], authorMap['Joe'], authorMap['Nancy'], ] metaPathUtility = EdgeBasedMetaPathUtility() # Project a 2-typed heterogeneous graph over adapted PathSim example publicationProjectedGraph = metaPathUtility.createHeterogeneousProjection(self.graph, [Author, Paper, Conference], symmetric = True) self.output('\nAdjacency Matrix (Projected):') adjMatrixTable = texttable.Texttable() rows = [['Author'] + [conference.name for conference in conferences]] rows += [[author.name] + [publicationProjectedGraph.getNumberOfEdges(author, conference) for conference in conferences] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Project a homogeneous citation graph over adapted PathSim example citationProjectedGraph = metaPathUtility.createHomogeneousProjection(self.graph, [Author, Paper, Paper, Author]) self.output('\nCitation Matrix:') adjMatrixTable = texttable.Texttable() rows = [['Author'] + [author.name for author in authors]] rows += [[author.name] + [citationProjectedGraph.getNumberOfEdges(author, otherAuthor) for otherAuthor in authors] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output total out/in citations self.output('\nCitations Total:') totalCitationsTable = texttable.Texttable() rows = [['Author', 'In', 'Out']] for author in authors: inCount = sum(citationProjectedGraph.getNumberOfEdges(otherAuthor, author) for otherAuthor in authors) outCount = sum(citationProjectedGraph.getNumberOfEdges(author, otherAuthor) for otherAuthor in authors) rows += [[author.name, inCount, outCount]] totalCitationsTable.add_rows(rows) self.output(totalCitationsTable.draw()) # Get PathSim similarity scores pathSimStrategy = PathSimStrategy(self.graph, [Author, Paper, Conference, Paper, Author], True) self.outputSimilarityScores(authorMap, authors, pathSimStrategy, 'APCPA PathSim') # Output SimRank-related scores strategy = SimRankStrategy(self.graph, [Author, Paper, Paper, Author], symmetric=True) self.outputSimilarityScores(authorMap, authors, strategy, "SimRank") # Output the projected PageRank/HITS similarity scores for name, algorithm in zip(['PageRank', 'HITS'], [PageRankDistanceStrategy, HITSDistanceStrategy]): strategy = algorithm(self.graph, [Author, Paper, Paper, Author], symmetric=True) self.outputSimilarityScores(authorMap, authors, strategy, "%s-Distance" % name) # Get NeighborSim similarity scores inNeighborSimStrategy = NeighborSimStrategy(self.graph, [Author, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, inNeighborSimStrategy, 'APPA NeighborSim-In') outNeighborSimStrategy = NeighborSimStrategy(self.graph, [Author, Paper, Paper, Author], reversed=True, smoothed=True) self.outputSimilarityScores(authorMap, authors, outNeighborSimStrategy, 'APPA NeighborSim-Out') # Combined best PR-distance algorithm simRankStrategy = SimRankStrategy(self.graph, [Author, Paper, Paper, Author], symmetric=True) simRank = AggregateSimilarityStrategy(self.graph, [pathSimStrategy, simRankStrategy], [0.5, 0.5]) self.outputSimilarityScores(authorMap, authors, simRank, 'APCPA Pathsim, APPA SimRank') # Combined best neighborsim score combinedNeighborSim = AggregateSimilarityStrategy(self.graph, [pathSimStrategy, inNeighborSimStrategy, outNeighborSimStrategy], [0.6, 0.2, 0.2]) self.outputSimilarityScores(authorMap, authors, combinedNeighborSim, 'APCPA Pathsim, APPA NeighborSim-Combined')
def run(self): self.graph, authorMap, conferenceMap = SampleGraphUtility.constructPathSimExampleThree() # Get the nodes we care about conferences = [ conferenceMap['SIGMOD'], conferenceMap['VLDB'], conferenceMap['ICDE'], conferenceMap['KDD'] ] authors = [ authorMap['Mike'], authorMap['Jim'], authorMap['Mary'], authorMap['Bob'], authorMap['Ann'], ] metaPathUtility = EdgeBasedMetaPathUtility() self.output('\nAPC Adjacency Matrix:') apcadjMatrix, nodesIndex = metaPathUtility.getAdjacencyMatrixFromGraph(self.graph, [Author, Paper, Conference], project=True) adjMatrixTable = texttable.Texttable() rows = [['Author'] + [conference.name for conference in conferences]] rows += [[author.name] + [apcadjMatrix[nodesIndex[author]][nodesIndex[conference]] for conference in conferences] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) self.output('\nCPA Adjacency Matrix:') cpaadjMatrix, dsad = metaPathUtility.getAdjacencyMatrixFromGraph(self.graph, [Conference, Paper, Author], project=True) adjMatrixTable = texttable.Texttable() rows = [['Conference'] + [author.name for author in authors]] rows += [[conference.name] + [cpaadjMatrix[nodesIndex[conference]][nodesIndex[author]] for author in authors] for conference in conferences] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) self.output('\nAPCPA Adjacency Matrix (Computed):') adjMatrix = numpy.dot(apcadjMatrix, cpaadjMatrix) adjMatrixTable = texttable.Texttable() rows = [['Author'] + [author.name for author in authors]] rows += [[author.name] + [adjMatrix[nodesIndex[author]][nodesIndex[otherAuthor]] for otherAuthor in authors] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output homogeneous simrank comparison homogeneousSimRankStrategy = SimRankStrategy(self.graph) self.outputSimilarityScores(authorMap, authors, homogeneousSimRankStrategy, 'Homogeneous SimRank') projectedGraph = metaPathUtility.createHeterogeneousProjection(self.graph, [Author, Paper, Conference], symmetric = True) # Output heterogeneous simrank comparison heterogeneousSimRankStrategy = SimRankStrategy(projectedGraph) self.outputSimilarityScores(authorMap, authors, heterogeneousSimRankStrategy, 'APC Heterogeneous SimRank') # Output heterogeneous simrank w/ squared neighbors comparison def sqNeighborsNorm(graph, a, b, sim): aNeighbors, bNeighbors = graph.getPredecessors(a), graph.getPredecessors(b) return float(len(aNeighbors)**2 * len(bNeighbors)**2) heterogeneousSquaredSimRankStrategy = SimRankStrategy(projectedGraph, normalization=sqNeighborsNorm) self.outputSimilarityScores(authorMap, authors, heterogeneousSquaredSimRankStrategy, 'Squared Heterogeneous SimRank') # Output NeighborSim similarity scores neighborSimStrategy = NeighborSimStrategy(self.graph, [Author, Paper, Conference], symmetric=True) self.outputSimilarityScores(authorMap, authors, neighborSimStrategy, 'APC NeighborSim') # Output the PathSim similarity scores pathsimStrategy = PathSimStrategy(self.graph, [Author, Paper, Conference, Paper, Author], symmetric=True) self.outputSimilarityScores(authorMap, authors, pathsimStrategy, 'APCPA PathSim')
def testConstructPathSimExampleThree(self): """ Tests the construction of "Example 3" from PathSim paper. Specifically, checks adjacency matrix shown in this example for Author-Paper-Conference meta paths. """ graph, authorMap, conferenceMap = SampleGraphUtility.constructPathSimExampleThree() metaPath = [Author, Paper, Conference] metaPathUtility = EdgeBasedMetaPathUtility() # Mike's adjacency to conferences self.assertEquals(2, len(metaPathUtility.findMetaPaths(graph, authorMap['Mike'], conferenceMap['SIGMOD'], metaPath))) self.assertEquals(1, len(metaPathUtility.findMetaPaths(graph, authorMap['Mike'], conferenceMap['VLDB'], metaPath))) self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Mike'], conferenceMap['ICDE'], metaPath))) self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Mike'], conferenceMap['KDD'], metaPath))) # Jim's adjacency to conferences self.assertEquals(50, len(metaPathUtility.findMetaPaths(graph, authorMap['Jim'], conferenceMap['SIGMOD'], metaPath))) self.assertEquals(20, len(metaPathUtility.findMetaPaths(graph, authorMap['Jim'], conferenceMap['VLDB'], metaPath))) self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Jim'], conferenceMap['ICDE'], metaPath))) self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Jim'], conferenceMap['KDD'], metaPath))) # Mary's adjacency to conferences self.assertEquals(2, len(metaPathUtility.findMetaPaths(graph, authorMap['Mary'], conferenceMap['SIGMOD'], metaPath))) self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Mary'], conferenceMap['VLDB'], metaPath))) self.assertEquals(1, len(metaPathUtility.findMetaPaths(graph, authorMap['Mary'], conferenceMap['ICDE'], metaPath))) self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Mary'], conferenceMap['KDD'], metaPath))) # Bob's adjacency to conferences self.assertEquals(2, len(metaPathUtility.findMetaPaths(graph, authorMap['Bob'], conferenceMap['SIGMOD'], metaPath))) self.assertEquals(1, len(metaPathUtility.findMetaPaths(graph, authorMap['Bob'], conferenceMap['VLDB'], metaPath))) self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Bob'], conferenceMap['ICDE'], metaPath))) self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Bob'], conferenceMap['KDD'], metaPath))) # Ann's adjacency to conferences self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Ann'], conferenceMap['SIGMOD'], metaPath))) self.assertEquals(0, len(metaPathUtility.findMetaPaths(graph, authorMap['Ann'], conferenceMap['VLDB'], metaPath))) self.assertEquals(1, len(metaPathUtility.findMetaPaths(graph, authorMap['Ann'], conferenceMap['ICDE'], metaPath))) self.assertEquals(1, len(metaPathUtility.findMetaPaths(graph, authorMap['Ann'], conferenceMap['KDD'], metaPath)))
def run(self): self.graph, authorMap, conference, citationsPublications = \ SampleGraphUtility.constructSkewedCitationPublicationExample(introduceRandomness=False) # Get the nodes we care about authors = [ authorMap['Alice'], authorMap['Bob'], authorMap['Carol'], authorMap['Dave'], authorMap['Ed'], authorMap['Frank'] ] metaPathUtility = EdgeBasedMetaPathUtility() # Output adjacency matrices self.output('\nCPA Adjacency Matrix:') cpaadjMatrix, nodesIndex = metaPathUtility.getAdjacencyMatrixFromGraph( self.graph, [Conference, Paper, Author], project=True ) adjMatrixTable = texttable.Texttable() rows = [['Conference'] + [author.name for author in authors]] rows += [[conference.name] + [cpaadjMatrix[nodesIndex[conference]][nodesIndex[author]] for author in authors]] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) self.output('\nCPPA Adjacency Matrix:') cpaadjMatrix, nodesIndex = metaPathUtility.getAdjacencyMatrixFromGraph( self.graph, [Conference, Paper, Paper, Author], project=True ) adjMatrixTable = texttable.Texttable() rows = [['Conference'] + [author.name for author in authors]] rows += [[conference.name] + [cpaadjMatrix[nodesIndex[conference]][nodesIndex[author]] for author in authors]] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Total citation & publication counts self.output('\nCitation & Publication Counts') adjMatrixTable = texttable.Texttable() rows = [['Measure'] + [author.name for author in authors]] rows += [['Citations'] + [citationsPublications[author][0] for author in authors]] rows += [['Publications'] + [citationsPublications[author][1] for author in authors]] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output NeighborSim & PathSim similarity scores neighborSimStrategy = NeighborSimStrategy(self.graph, [Conference, Paper, Author], symmetric=True) self.outputSimilarityScores(authorMap, authors, neighborSimStrategy, 'APCPA PathSim') neighborSimStrategy = NeighborSimStrategy(self.graph, [Conference, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, neighborSimStrategy, 'APPCPPA PathSim') # Omit extra duplicate entry in path, and weight at different levels of 'relative' for strategy, generalStrategyTitle in [(FlattenedMatrixStrategy, 'FlatMat'), (VectorProductStrategy, 'VectorProduct')]: for w in [1.0, 0.5, 0]: neighborPathShapeStrategy = strategy( self.graph, weight=w, omit=[], metaPath=[Conference, Paper, Paper, Author], symmetric=True ) strategyTitle = 'APPCPPA %s ShapeSim (%1.2f weight)' % (generalStrategyTitle, w) self.outputSimilarityScores(authorMap, authors, neighborPathShapeStrategy, strategyTitle) w = 1.0 neighborPathShapeStrategy = VectorProductStrategy( self.graph, weight=w, omit=[0], metaPath=[Conference, Paper, Paper, Author], symmetric=True ) strategyTitle = 'APPCPPA VectorProduct ShapeSim omitting CPC (%1.2f weight)' % w self.outputSimilarityScores(authorMap, authors, neighborPathShapeStrategy, strategyTitle) # Output recursive pathsim strategy score(s) recursivePathSimStrategy = RecursivePathSimStrategy(self.graph, [Conference, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, recursivePathSimStrategy, 'APPCPPA Recursive PathSim')
def run(self): self.graph, authorMap, conferenceMap, totalCitationCount = SampleGraphUtility.constructMultiDisciplinaryAuthorExample() # Get the nodes we care about conferences = [ conferenceMap['VLDB'], conferenceMap['KDD'] ] authors = [ authorMap['A'], authorMap['B'], authorMap['C'], authorMap['D'], authorMap['E'], authorMap['F'], authorMap['G'], authorMap['H'], authorMap['I'], ] self.metaPathUtility = EdgeBasedMetaPathUtility() # Build homogeneous projection of network (authors, with edges for times authors cite each other) projectedGraph = self.metaPathUtility.createHomogeneousProjection(self.graph, [Author, Paper, Paper, Author]) authorCitationCounts = {} for author in projectedGraph.getNodes(): authorCitationCounts[author] = {} for otherAuthor in projectedGraph.getNodes(): authorCitationCounts[author][otherAuthor] = projectedGraph.getNumberOfEdges(author, otherAuthor) # Output the adjacency matrix for authors-authors in the graph self.output('\nCitation Matrix:') adjMatrixTable = texttable.Texttable() rows = [['Author'] + [author.name for author in authors]] rows += [[author.name] + [authorCitationCounts[author][otherAuthor] for otherAuthor in authors] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output the adjacency matrix for authors & conferences in the graph self.output('\nAdjacency Matrix:') adjMatrixTable = texttable.Texttable() projectedGraph = self.metaPathUtility.createHeterogeneousProjection(self.graph, [Author, Paper, Conference]) rows = [[''] + [conference.name for conference in conferences]] rows += [[author.name] + [projectedGraph.getNumberOfEdges(author, conference) for conference in conferences] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output total citation counts self.output('\nTotal Citation Counts:') rows = [[author.name for author in authors],['%d' % totalCitationCount[author.name] for author in authors]] citationCountTable = texttable.Texttable() citationCountTable.add_rows(rows) self.output(citationCountTable.draw()) # Output the NeighborSim similarity scores strategy = NeighborSimStrategy(self.graph, [Conference, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, strategy, "NeighborSim") # Output the PathSim similarity scores pathsimStretegy = PathSimStrategy(self.graph, [Author, Paper, Conference, Paper, Author], True) self.outputSimilarityScores(authorMap, authors, pathsimStretegy, "PathSim") # Output SimRank-related scores simrankStrategy = SimRankStrategy(self.graph, [Author, Paper, Paper, Author]) self.outputSimilarityScores(authorMap, authors, simrankStrategy, "SimRank") # Output pathsim - simrank scores combinedNeighborSim = AggregateSimilarityStrategy(self.graph, [simrankStrategy, pathsimStretegy], [0.5, 0.5]) self.outputSimilarityScores(authorMap, authors, combinedNeighborSim, 'APCPA Pathsim, APPA SimRank') # Output the projected PageRank/HITS similarity scores for name, algorithm in zip(['PageRank', 'HITS'], [PageRankDistanceStrategy, HITSDistanceStrategy]): researchAreas = { (authorMap['A'], authorMap['B'], authorMap['C'], authorMap['D'], authorMap['E'], authorMap['I']), (authorMap['F'], authorMap['G'], authorMap['H'], authorMap['D'], authorMap['E'], authorMap['I']), } strategy = algorithm(self.graph, [Author, Paper, Paper, Author], nodeSets=researchAreas, symmetric=True) self.outputSimilarityScores(authorMap, authors, strategy, "%s-Distance" % name)
def run(self): self.graph, authorMap, conferenceMap = SampleGraphUtility.constructPathSimExampleThree( ) # Get the nodes we care about conferences = [ conferenceMap['SIGMOD'], conferenceMap['VLDB'], conferenceMap['ICDE'], conferenceMap['KDD'] ] authors = [ authorMap['Mike'], authorMap['Jim'], authorMap['Mary'], authorMap['Bob'], authorMap['Ann'], ] metaPathUtility = EdgeBasedMetaPathUtility() self.output('\nAPC Adjacency Matrix:') apcadjMatrix, nodesIndex = metaPathUtility.getAdjacencyMatrixFromGraph( self.graph, [Author, Paper, Conference], project=True) adjMatrixTable = texttable.Texttable() rows = [['Author'] + [conference.name for conference in conferences]] rows += [[author.name] + [ apcadjMatrix[nodesIndex[author]][nodesIndex[conference]] for conference in conferences ] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) self.output('\nCPA Adjacency Matrix:') cpaadjMatrix, dsad = metaPathUtility.getAdjacencyMatrixFromGraph( self.graph, [Conference, Paper, Author], project=True) adjMatrixTable = texttable.Texttable() rows = [['Conference'] + [author.name for author in authors]] rows += [[conference.name] + [ cpaadjMatrix[nodesIndex[conference]][nodesIndex[author]] for author in authors ] for conference in conferences] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) self.output('\nAPCPA Adjacency Matrix (Computed):') adjMatrix = numpy.dot(apcadjMatrix, cpaadjMatrix) adjMatrixTable = texttable.Texttable() rows = [['Author'] + [author.name for author in authors]] rows += [[author.name] + [ adjMatrix[nodesIndex[author]][nodesIndex[otherAuthor]] for otherAuthor in authors ] for author in authors] adjMatrixTable.add_rows(rows) self.output(adjMatrixTable.draw()) # Output homogeneous simrank comparison homogeneousSimRankStrategy = SimRankStrategy(self.graph) self.outputSimilarityScores(authorMap, authors, homogeneousSimRankStrategy, 'Homogeneous SimRank') projectedGraph = metaPathUtility.createHeterogeneousProjection( self.graph, [Author, Paper, Conference], symmetric=True) # Output heterogeneous simrank comparison heterogeneousSimRankStrategy = SimRankStrategy(projectedGraph) self.outputSimilarityScores(authorMap, authors, heterogeneousSimRankStrategy, 'APC Heterogeneous SimRank') # Output heterogeneous simrank w/ squared neighbors comparison def sqNeighborsNorm(graph, a, b, sim): aNeighbors, bNeighbors = graph.getPredecessors( a), graph.getPredecessors(b) return float(len(aNeighbors)**2 * len(bNeighbors)**2) heterogeneousSquaredSimRankStrategy = SimRankStrategy( projectedGraph, normalization=sqNeighborsNorm) self.outputSimilarityScores(authorMap, authors, heterogeneousSquaredSimRankStrategy, 'Squared Heterogeneous SimRank') # Output NeighborSim similarity scores neighborSimStrategy = NeighborSimStrategy(self.graph, [Author, Paper, Conference], symmetric=True) self.outputSimilarityScores(authorMap, authors, neighborSimStrategy, 'APC NeighborSim') # Output the PathSim similarity scores pathsimStrategy = PathSimStrategy( self.graph, [Author, Paper, Conference, Paper, Author], symmetric=True) self.outputSimilarityScores(authorMap, authors, pathsimStrategy, 'APCPA PathSim')