Beispiel #1
0
    def doGrid(self):
        print >> sys.stderr, "--------- Parameter grid search ---------"
        # Build trigger examples
        self.triggerDetector.buildExamples(
            self.model, [self.optData],
            [self.workDir + "grid-trigger-examples"])

        if self.fullGrid:
            stepParams = {
                "trigger":
                Parameters.get(self.model.getStr(self.triggerDetector.tag +
                                                 "classifier-parameters-train",
                                                 defaultIfNotExist=""),
                               valueListKey="c"),
                "booster":
                [float(i) for i in self.recallAdjustParameters.split(",")],
                "edge":
                Parameters.get(self.model.getStr(self.edgeDetector.tag +
                                                 "classifier-parameters-train",
                                                 defaultIfNotExist=""),
                               valueListKey="c")
            }
        else:
            stepParams = {
                "trigger":
                Parameters.get(self.model.getStr(self.triggerDetector.tag +
                                                 "classifier-parameter",
                                                 defaultIfNotExist=""),
                               valueListKey="c"),
                "booster":
                [float(i) for i in self.recallAdjustParameters.split(",")],
                "edge":
                Parameters.get(self.model.getStr(self.edgeDetector.tag +
                                                 "classifier-parameter",
                                                 defaultIfNotExist=""),
                               valueListKey="c")
            }

        for step in ["trigger", "edge"]:
            stepParams[step] = Parameters.getCombinations(stepParams[step])
            for i in range(len(stepParams[step])):
                stepParams[step][i] = Parameters.toString(stepParams[step][i])
        print >> sys.stderr, "Parameters", [
            stepParams[x] for x in ["trigger", "booster", "edge"]
        ]
        paramCombinations = combine(
            *[stepParams[x] for x in ["trigger", "booster", "edge"]])
        print >> sys.stderr, "Combinations", paramCombinations
        for i in range(len(paramCombinations)):
            paramCombinations[i] = {
                "trigger": paramCombinations[i][0],
                "booster": paramCombinations[i][1],
                "edge": paramCombinations[i][2]
            }

        #paramCombinations = Parameters.getCombinations(ALL_PARAMS, ["trigger", "booster", "edge"])
        prevParams = None
        EDGE_MODEL_STEM = os.path.join(
            self.edgeDetector.workDir,
            os.path.normpath(self.model.path) + "-edge-models/model")
        TRIGGER_MODEL_STEM = os.path.join(
            self.triggerDetector.workDir,
            os.path.normpath(self.model.path) + "-trigger-models/model")
        self.structureAnalyzer.load(self.model)
        bestResults = None
        for i in range(len(paramCombinations)):
            params = paramCombinations[i]
            print >> sys.stderr, "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
            print >> sys.stderr, "Processing params", str(i + 1) + "/" + str(
                len(paramCombinations)), params
            print >> sys.stderr, "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
            # Triggers and Boost (the trigger predictions are recalculated only when the relevant parameters change)
            if (prevParams == None) or (
                    prevParams["trigger"] != params["trigger"]) or (
                        prevParams["booster"] != params["booster"]):
                print >> sys.stderr, "Classifying trigger examples for parameters", "trigger:" + str(
                    params["trigger"]), "booster:" + str(params["booster"])
                xml = self.triggerDetector.classifyToXML(
                    self.optData,
                    self.model,
                    self.workDir + "grid-trigger-examples",
                    self.workDir + "grid-",
                    classifierModel=TRIGGER_MODEL_STEM +
                    Parameters.toId(params["trigger"]),
                    recallAdjust=params["booster"],
                    useExistingExamples=True)
            prevParams = params
            ## Build edge examples
            #self.edgeDetector.buildExamples(self.model, [xml], [self.workDir+"grid-edge-examples"], [self.optData])
            # Classify with pre-defined model
            edgeClassifierModel = EDGE_MODEL_STEM + Parameters.toId(
                params["edge"])
            xml = self.edgeDetector.classifyToXML(
                xml,
                self.model,
                self.workDir + "grid-edge-examples",
                self.workDir + "grid-",
                classifierModel=edgeClassifierModel,
                goldData=self.optData)
            bestResults = self.evaluateGrid(xml, params, bestResults)
        # Remove remaining intermediate grid files
        for tag1 in ["edge", "trigger", "unmerging"]:
            for tag2 in ["examples", "pred.xml.gz"]:
                if os.path.exists(self.workDir + "grid-" + tag1 + "-" + tag2):
                    os.remove(self.workDir + "grid-" + tag1 + "-" + tag2)
        print >> sys.stderr, "Parameter grid search complete"
        print >> sys.stderr, "Tested", len(paramCombinations), "combinations"
        print >> sys.stderr, "Best parameters:", bestResults[0]
        print >> sys.stderr, "Best result:", bestResults[2]  # f-score
        # Save grid model
        self.saveStr("recallAdjustParameter", str(bestResults[0]["booster"]),
                     self.model)
        self.saveStr("recallAdjustParameter", str(bestResults[0]["booster"]),
                     self.combinedModel, False)
        if self.fullGrid:  # define best models
            self.triggerDetector.addClassifierModel(
                self.model,
                TRIGGER_MODEL_STEM + str(bestResults[0]["trigger"]),
                bestResults[0]["trigger"])
            self.edgeDetector.addClassifierModel(
                self.model, EDGE_MODEL_STEM + str(bestResults[0]["edge"]),
                bestResults[0]["edge"])
        # Remove work files
        for stepTag in [
                self.workDir + "grid-trigger", self.workDir + "grid-edge",
                self.workDir + "grid-unmerging"
        ]:
            for fileStem in [
                    "-classifications", "-classifications.log", "examples.gz",
                    "pred.xml.gz"
            ]:
                if os.path.exists(stepTag + fileStem):
                    os.remove(stepTag + fileStem)
Beispiel #2
0
    def doGrid(self):
        print >> sys.stderr, "--------- Parameter grid search ---------"
        # Build trigger examples
        self.triggerDetector.buildExamples(self.model, [self.optData], [self.workDir+"grid-trigger-examples.gz"])

        if self.fullGrid:
            stepParams = {
                "trigger":Parameters.get(self.model.getStr(self.triggerDetector.tag+"classifier-parameters-train", defaultIfNotExist=""), valueListKey="c"),
                "booster":[float(i) for i in self.recallAdjustParameters.split(",")],
                "edge":Parameters.get(self.model.getStr(self.edgeDetector.tag+"classifier-parameters-train", defaultIfNotExist=""), valueListKey="c")}
        else:
            stepParams = {
                "trigger":Parameters.get(self.model.getStr(self.triggerDetector.tag+"classifier-parameter", defaultIfNotExist=""), valueListKey="c"),
                "booster":[float(i) for i in self.recallAdjustParameters.split(",")],
                "edge":Parameters.get(self.model.getStr(self.edgeDetector.tag+"classifier-parameter", defaultIfNotExist=""), valueListKey="c")}
        
        for step in ["trigger", "edge"]:
            stepParams[step] = Parameters.getCombinations(stepParams[step])
            for i in range(len(stepParams[step])):
                stepParams[step][i] = Parameters.toString(stepParams[step][i])
        print >> sys.stderr, [stepParams[x] for x in ["trigger", "booster", "edge"]]
        paramCombinations = combine(*[stepParams[x] for x in ["trigger", "booster", "edge"]])
        print >> sys.stderr, paramCombinations
        for i in range(len(paramCombinations)):
            paramCombinations[i] = {"trigger":paramCombinations[i][0], "booster":paramCombinations[i][1], "edge":paramCombinations[i][2]}
        
        #paramCombinations = Parameters.getCombinations(ALL_PARAMS, ["trigger", "booster", "edge"])
        prevParams = None
        EDGE_MODEL_STEM = os.path.join(self.edgeDetector.workDir, os.path.normpath(self.model.path)+"-edge-models/model")
        TRIGGER_MODEL_STEM = os.path.join(self.triggerDetector.workDir, os.path.normpath(self.model.path)+"-trigger-models/model")
        self.structureAnalyzer.load(self.model)
        bestResults = None
        for i in range(len(paramCombinations)):
            params = paramCombinations[i]
            print >> sys.stderr, "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
            print >> sys.stderr, "Processing params", str(i+1) + "/" + str(len(paramCombinations)), params
            print >> sys.stderr, "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
            # Triggers and Boost
            if prevParams == None or prevParams["trigger"] != params["trigger"] or prevParams["trigger"] != params["trigger"]:
                print >> sys.stderr, "Classifying trigger examples for parameters", "trigger:" + str(params["trigger"]), "booster:" + str(params["booster"])
                xml = self.triggerDetector.classifyToXML(self.optData, self.model, self.workDir+"grid-trigger-examples", self.workDir+"grid-", classifierModel=TRIGGER_MODEL_STEM + Parameters.toId(params["trigger"]), recallAdjust=params["booster"])
            prevParams = params
            ## Build edge examples
            #self.edgeDetector.buildExamples(self.model, [xml], [self.workDir+"grid-edge-examples"], [self.optData])
            # Classify with pre-defined model
            edgeClassifierModel = EDGE_MODEL_STEM + Parameters.toId(params["edge"])
            xml = self.edgeDetector.classifyToXML(xml, self.model, self.workDir+"grid-edge-examples", self.workDir+"grid-", classifierModel=edgeClassifierModel, goldData=self.optData)
            bestResults = self.evaluateGrid(xml, params, bestResults)
        # Remove remaining intermediate grid files
        for tag1 in ["edge", "trigger", "unmerging"]:
            for tag2 in ["examples", "pred.xml.gz"]:
                if os.path.exists(self.workDir+"grid-"+tag1+"-"+tag2):
                    os.remove(self.workDir+"grid-"+tag1+"-"+tag2)
        print >> sys.stderr, "Parameter grid search complete"
        print >> sys.stderr, "Tested", len(paramCombinations), "combinations"
        print >> sys.stderr, "Best parameters:", bestResults[0]
        print >> sys.stderr, "Best result:", bestResults[2] # f-score
        # Save grid model
        self.saveStr("recallAdjustParameter", str(bestResults[0]["booster"]), self.model)
        self.saveStr("recallAdjustParameter", str(bestResults[0]["booster"]), self.combinedModel, False)
        if self.fullGrid: # define best models
            self.triggerDetector.addClassifierModel(self.model, TRIGGER_MODEL_STEM+str(bestResults[0]["trigger"]), bestResults[0]["trigger"])
            self.edgeDetector.addClassifierModel(self.model, EDGE_MODEL_STEM+str(bestResults[0]["edge"]), bestResults[0]["edge"])
        # Remove work files
        for stepTag in [self.workDir+"grid-trigger", self.workDir+"grid-edge", self.workDir+"grid-unmerging"]:
            for fileStem in ["-classifications", "-classifications.log", "examples.gz", "pred.xml.gz"]:
                if os.path.exists(stepTag+fileStem):
                    os.remove(stepTag+fileStem)