예제 #1
0
    def testFindAllSimilarityFromNodeOnPathSimExampleThree(self):
        """
          Tests similarity for all other nodes given a single node, using example 3 from PathSim paper
        """

        graph, authorMap, conferenceMap  = SampleGraphUtility.constructPathSimExampleThree()
        metaPath = [Author, Paper, Conference, Paper, Author]
        strategy = PathSimStrategy(graph, metaPath)

        mike = authorMap['Mike']
        mostSimilarNodes = strategy.findMostSimilarNodes(mike, 5)

        self.assertEquals([authorMap['Bob'], authorMap['Mary'], authorMap['Jim']], mostSimilarNodes)
예제 #2
0
    def testFindAllSimilarityFromNodeOnPathSimExampleThree(self):
        """
          Tests similarity for all other nodes given a single node, using example 3 from PathSim paper
        """

        graph, authorMap, conferenceMap = SampleGraphUtility.constructPathSimExampleThree(
        )
        metaPath = [Author, Paper, Conference, Paper, Author]
        strategy = PathSimStrategy(graph, metaPath)

        mike = authorMap['Mike']
        mostSimilarNodes = strategy.findMostSimilarNodes(mike, 5)

        self.assertEquals(
            [authorMap['Bob'], authorMap['Mary'], authorMap['Jim']],
            mostSimilarNodes)
    def run(self):

        strategy = PathSimStrategy(
            self.graph, [Conference, Paper, Author, Paper, Conference], True)
        experimentHelper = LabeledExperimentHelper(
            os.path.join('data', 'dbis', 'query_label', 'PathSim'))
        conferenceQueryNames = [
            'SIGMOD Conference', 'VLDB', 'ICDE', 'PODS', 'EDBT', 'DASFAA',
            'KDD', 'ICDM', 'PKDD', 'SDM', 'PAKDD', 'WWW', 'SIGIR', 'TREC',
            'APWeb'
        ]

        for conferenceQueryName in conferenceQueryNames:
            conferences = experimentHelper.getNodesByAttribute(
                self.graph, 'name', conferenceQueryName)
            assert (len(conferences) == 1)
            target = list(conferences)[0]
            number = 10

            # Output the top ten most similar conferences on the CPAPC meta path
            self.output(
                '\n\nTop Ten Similar Conferences to %s (CPAPC meta path):' %
                conferenceQueryName)
            mostSimilarNodes = strategy.findMostSimilarNodes(target, number)
            apaPathTable = texttable.Texttable()
            headerRow = [['Rank', 'Conference', 'Relevance']]
            dataRows = [[
                i + 1, mostSimilarNodes[i].name,
                experimentHelper.getLabelForNode(target, mostSimilarNodes[i])
            ] for i in xrange(0, number)]
            apaPathTable.add_rows(headerRow + dataRows)
            self.output(apaPathTable.draw())

            # Output the nDCG for these results
            self.output(
                '%1.3f' %
                CumulativeGainMeasures.normalizedDiscountedCumulativeGain(
                    target, mostSimilarNodes,
                    experimentHelper.labelDictionary))
    def run(self):

        strategy = PathSimStrategy(self.graph, [Conference, Paper, Author, Paper, Conference], True)
        experimentHelper = LabeledExperimentHelper(os.path.join('data', 'dbis', 'query_label', 'PathSim'))
        conferenceQueryNames = [
            'SIGMOD Conference',
            'VLDB',
            'ICDE',
            'PODS',
            'EDBT',
            'DASFAA',
            'KDD',
            'ICDM',
            'PKDD',
            'SDM',
            'PAKDD',
            'WWW',
            'SIGIR',
            'TREC',
            'APWeb'
        ]

        for conferenceQueryName in conferenceQueryNames:
            conferences = experimentHelper.getNodesByAttribute(self.graph, 'name', conferenceQueryName)
            assert(len(conferences) == 1)
            target = list(conferences)[0]
            number = 10

            # Output the top ten most similar conferences on the CPAPC meta path
            self.output('\n\nTop Ten Similar Conferences to %s (CPAPC meta path):' % conferenceQueryName)
            mostSimilarNodes = strategy.findMostSimilarNodes(target, number)
            apaPathTable = texttable.Texttable()
            headerRow = [['Rank', 'Conference', 'Relevance']]
            dataRows = [[i + 1, mostSimilarNodes[i].name, experimentHelper.getLabelForNode(target, mostSimilarNodes[i])] for i in xrange(0, number)]
            apaPathTable.add_rows(headerRow + dataRows)
            self.output(apaPathTable.draw())

            # Output the nDCG for these results
            self.output('%1.3f' % CumulativeGainMeasures.normalizedDiscountedCumulativeGain(target, mostSimilarNodes, experimentHelper.labelDictionary))