Ejemplo n.º 1
0
    def run(self):
        """
          Run the search, and update the dictionary (releasing the lock to trigger a callback whenever one should occur)
        """

        # Run retrieval phase
        self.update("Running retrieval phase")
        numberOfSearchResults = 50
        searchInterface = GoogleSearchFacade(numberOfSearchResults, True, [])
        experiment = EntityAttributeValues([self.entityId], searchInterface, ExactAttributeValuesQueryBuilder(), numberOfSearchResults)
        experiment.run()
        temporaryFileName = "tmp-output" + self.entityId
        experiment.printResults(temporaryFileName)

        # Run ranking phase
        self.update("Running ranking phase")
        extensions = [
            PageRankExtension(),
            YQLKeywordExtension(),
            ExpandedYQLKeywordExtension(),
            BaselineScoreExtension()
        ]
        rankingExperiment = RankingExperiment(temporaryFileName, self.entity, experiment, extensions, True, True)
        results = rankingExperiment.rank()
        self.dictionary['results'] = results

        # Signal that we're finally done
        self.update("done")
        ]

        for experiment in experiments:

            print "Running experiment %s for %s" % (experiment[0], entityId)

            # Find the project root & open the input entity
            projectRoot = str(os.getcwd())
            projectRoot = projectRoot[: projectRoot.find("EntityQuerier") + len("EntityQuerier")]
            entity = load(open(projectRoot + "/entities/%s.json" % entityId))

            # Rank the results
            entityName = entityId.replace(" ", "").replace("-", "")
            retrievalResults = "/experiments/retrieval/results/%s/AttributeValues" % entityName
            extensions = []
            rankingExperiment = RankingExperiment(projectRoot + retrievalResults, entity, experiment[1], extensions)
            results = rankingExperiment.rank()

            # Output the ranking results
            outputTitle = "Results Summary (for top %d results):\n"
            outputFile = "dmoz/" + entityName + "/" + experiment[0]
            outputRankingResults(entityId, outputFile, outputTitle, projectRoot, results)

        # Cleanup all the indexes
        try:
            output = subprocess.check_call(
                [
                    "rm",
                    "-rf",
                    ".dmoz-10000-index",
                    ".dmoz-1000-index",