def testFindAllSimilarityFromNodeOnPathSimExampleThree(self): """ Tests similarity for all other nodes given a single node, using example 3 from PathSim paper """ graph, authorMap, conferenceMap = SampleGraphUtility.constructPathSimExampleThree() metaPath = [Author, Paper, Conference, Paper, Author] strategy = PathSimStrategy(graph, metaPath) mike = authorMap['Mike'] mostSimilarNodes = strategy.findMostSimilarNodes(mike, 5) self.assertEquals([authorMap['Bob'], authorMap['Mary'], authorMap['Jim']], mostSimilarNodes)
def testFindAllSimilarityFromNodeOnPathSimExampleThree(self): """ Tests similarity for all other nodes given a single node, using example 3 from PathSim paper """ graph, authorMap, conferenceMap = SampleGraphUtility.constructPathSimExampleThree( ) metaPath = [Author, Paper, Conference, Paper, Author] strategy = PathSimStrategy(graph, metaPath) mike = authorMap['Mike'] mostSimilarNodes = strategy.findMostSimilarNodes(mike, 5) self.assertEquals( [authorMap['Bob'], authorMap['Mary'], authorMap['Jim']], mostSimilarNodes)
def run(self): strategy = PathSimStrategy( self.graph, [Conference, Paper, Author, Paper, Conference], True) experimentHelper = LabeledExperimentHelper( os.path.join('data', 'dbis', 'query_label', 'PathSim')) conferenceQueryNames = [ 'SIGMOD Conference', 'VLDB', 'ICDE', 'PODS', 'EDBT', 'DASFAA', 'KDD', 'ICDM', 'PKDD', 'SDM', 'PAKDD', 'WWW', 'SIGIR', 'TREC', 'APWeb' ] for conferenceQueryName in conferenceQueryNames: conferences = experimentHelper.getNodesByAttribute( self.graph, 'name', conferenceQueryName) assert (len(conferences) == 1) target = list(conferences)[0] number = 10 # Output the top ten most similar conferences on the CPAPC meta path self.output( '\n\nTop Ten Similar Conferences to %s (CPAPC meta path):' % conferenceQueryName) mostSimilarNodes = strategy.findMostSimilarNodes(target, number) apaPathTable = texttable.Texttable() headerRow = [['Rank', 'Conference', 'Relevance']] dataRows = [[ i + 1, mostSimilarNodes[i].name, experimentHelper.getLabelForNode(target, mostSimilarNodes[i]) ] for i in xrange(0, number)] apaPathTable.add_rows(headerRow + dataRows) self.output(apaPathTable.draw()) # Output the nDCG for these results self.output( '%1.3f' % CumulativeGainMeasures.normalizedDiscountedCumulativeGain( target, mostSimilarNodes, experimentHelper.labelDictionary))
def run(self): strategy = PathSimStrategy(self.graph, [Conference, Paper, Author, Paper, Conference], True) experimentHelper = LabeledExperimentHelper(os.path.join('data', 'dbis', 'query_label', 'PathSim')) conferenceQueryNames = [ 'SIGMOD Conference', 'VLDB', 'ICDE', 'PODS', 'EDBT', 'DASFAA', 'KDD', 'ICDM', 'PKDD', 'SDM', 'PAKDD', 'WWW', 'SIGIR', 'TREC', 'APWeb' ] for conferenceQueryName in conferenceQueryNames: conferences = experimentHelper.getNodesByAttribute(self.graph, 'name', conferenceQueryName) assert(len(conferences) == 1) target = list(conferences)[0] number = 10 # Output the top ten most similar conferences on the CPAPC meta path self.output('\n\nTop Ten Similar Conferences to %s (CPAPC meta path):' % conferenceQueryName) mostSimilarNodes = strategy.findMostSimilarNodes(target, number) apaPathTable = texttable.Texttable() headerRow = [['Rank', 'Conference', 'Relevance']] dataRows = [[i + 1, mostSimilarNodes[i].name, experimentHelper.getLabelForNode(target, mostSimilarNodes[i])] for i in xrange(0, number)] apaPathTable.add_rows(headerRow + dataRows) self.output(apaPathTable.draw()) # Output the nDCG for these results self.output('%1.3f' % CumulativeGainMeasures.normalizedDiscountedCumulativeGain(target, mostSimilarNodes, experimentHelper.labelDictionary))