Ejemplo n.º 1
0
 def classify(self, data, model, output, parse=None, task=None, goldData=None, workDir=None, fromStep=None, omitSteps=None, validate=False):
     model = self.openModel(model, "r")
     self.enterState(self.STATE_CLASSIFY)
     self.setWorkDir(workDir)
     if workDir == None:
         self.setTempWorkDir()
     model = self.openModel(model, "r")
     if parse == None: parse = self.getStr(self.tag+"parse", model)
     workOutputTag = os.path.join(self.workDir, os.path.basename(output) + "-")
     xml = self.classifyToXML(data, model, None, workOutputTag, 
         model.get(self.tag+"classifier-model", defaultIfNotExist=None), goldData, parse, float(model.getStr("recallAdjustParameter", defaultIfNotExist=1.0)))
     if (validate):
         self.structureAnalyzer.load(model)
         self.structureAnalyzer.validate(xml)
         ETUtils.write(xml, output+"-pred.xml.gz")
     else:
         shutil.copy2(workOutputTag+self.tag+"pred.xml.gz", output+"-pred.xml.gz")
     EvaluateInteractionXML.run(self.evaluator, xml, data, parse)
     stParams = self.getBioNLPSharedTaskParams(self.bioNLPSTParams, model)
     if stParams["convert"]: #self.useBioNLPSTFormat:
         extension = ".zip" if (stParams["convert"] == "zip") else ".tar.gz" 
         Utils.STFormat.ConvertXML.toSTFormat(xml, output+"-events" + extension, outputTag=stParams["a2Tag"], writeExtra=(stParams["scores"] == True))
         if stParams["evaluate"]: #self.stEvaluator != None:
             if task == None: 
                 task = self.getStr(self.tag+"task", model)
             self.stEvaluator.evaluate(output+"-events" + extension, task)
     self.deleteTempWorkDir()
     self.exitState()
Ejemplo n.º 2
0
def evaluateChemProt(xml, gold):
    EvaluateIXML.run(AveragingMultiClassEvaluator, xml, gold, "McCC")
    preprocessor = Preprocessor(steps=["EXPORT_CHEMPROT"])
    tempDir = tempfile.mkdtemp()
    print >> sys.stderr, "Using temporary evaluation directory", tempDir
    tsvPath = os.path.join(tempDir, "predictions.tsv")
    preprocessor.process(xml, tsvPath)
    ChemProtEvaluator().evaluateTSV(tsvPath, tempDir)
    print >> sys.stderr, "Removing temporary evaluation directory", tempDir
    shutil.rmtree(tempDir)
Ejemplo n.º 3
0
def evaluateChemProt(xml, gold):
    EvaluateIXML.run(AveragingMultiClassEvaluator, xml, gold, "McCC")
    preprocessor = Preprocessor(steps=["EXPORT_CHEMPROT"])
    tempDir = tempfile.mkdtemp()
    print >> sys.stderr, "Using temporary evaluation directory", tempDir
    tsvPath = os.path.join(tempDir, "predictions.tsv")
    preprocessor.process(xml, tsvPath)
    ChemProtEvaluator().evaluateTSV(tsvPath, tempDir)
    print >> sys.stderr, "Removing temporary evaluation directory", tempDir
    shutil.rmtree(tempDir)
Ejemplo n.º 4
0
 def classify(self, data, model, output, parse=None, task=None):
     self.enterState(self.STATE_CLASSIFY)
     model = self.openModel(model, "r")
     if parse == None: parse = self.getStr(self.tag+"parse", model)
     if task == None: task = self.getStr(self.tag+"task", model)
     xml = self.classifyToXML(data, model, None, output + "-", 
         model.get(self.tag+"classifier-model"), None, parse, float(model.get("recallAdjustParameter")))
     EvaluateInteractionXML.run(self.evaluator, xml, data, parse)
     STFormat.ConvertXML.toSTFormat(xml, output+".tar.gz", outputTag="a2")
     if self.stEvaluator != None:
         self.stEvaluator.evaluate(output+".tar.gz", task)
     self.exitState()
Ejemplo n.º 5
0
 def trainUnmergingDetector(self):
     xml = None
     if not self.unmerging:
         print >> sys.stderr, "No unmerging"
     if self.checkStep("SELF-TRAIN-EXAMPLES-FOR-UNMERGING", self.unmerging) and self.unmerging:
         # Self-classified train data for unmerging
         if self.doUnmergingSelfTraining:
             # This allows limiting to a subcorpus
             triggerStyle = copy.copy(Parameters.get(self.triggerExampleStyle))
             edgeStyle = copy.copy(Parameters.get(self.edgeExampleStyle))
             unmergingStyle = Parameters.get(self.unmergingExampleStyle)
             if "sentenceLimit" in unmergingStyle and unmergingStyle["sentenceLimit"]:
                 triggerStyle["sentenceLimit"] = unmergingStyle["sentenceLimit"]
                 edgeStyle["sentenceLimit"] = unmergingStyle["sentenceLimit"]
             # Build the examples
             xml = self.triggerDetector.classifyToXML(self.trainData, self.model, None, self.workDir+"unmerging-extra-", exampleStyle=triggerStyle)#, recallAdjust=0.5)
             xml = self.edgeDetector.classifyToXML(xml, self.model, None, self.workDir+"unmerging-extra-", exampleStyle=edgeStyle)#, recallAdjust=0.5)
             assert xml != None
             EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.trainData, self.parse)
         else:
             print >> sys.stderr, "No self-training for unmerging"
     if self.checkStep("UNMERGING-EXAMPLES", self.unmerging) and self.unmerging:
         # Unmerging example generation
         GOLD_TEST_FILE = self.optData.replace("-nodup", "")
         GOLD_TRAIN_FILE = self.trainData.replace("-nodup", "")
         if self.doUnmergingSelfTraining:
             if xml == None: 
                 xml = self.workDir+"unmerging-extra-edge-pred.xml.gz"
             self.unmergingDetector.buildExamples(self.model, [self.optData.replace("-nodup", ""), [self.trainData.replace("-nodup", ""), xml]], 
                                                  [self.workDir+"unmerging-opt-examples.gz", self.workDir+"unmerging-train-examples.gz"], 
                                                  [GOLD_TEST_FILE, [GOLD_TRAIN_FILE, GOLD_TRAIN_FILE]], 
                                                  exampleStyle=self.unmergingExampleStyle, saveIdsToModel=True)
             xml = None
         else:
             self.unmergingDetector.buildExamples(self.model, [self.optData.replace("-nodup", ""), self.trainData.replace("-nodup", "")], 
                                                  [self.workDir+"unmerging-opt-examples.gz", self.workDir+"unmerging-train-examples.gz"], 
                                                  [GOLD_TEST_FILE, GOLD_TRAIN_FILE], 
                                                  exampleStyle=self.unmergingExampleStyle, saveIdsToModel=True)
             xml = None
         #UnmergingExampleBuilder.run("/home/jari/biotext/EventExtension/TrainSelfClassify/test-predicted-edges.xml", GOLD_TRAIN_FILE, UNMERGING_TRAIN_EXAMPLE_FILE, PARSE, TOK, UNMERGING_FEATURE_PARAMS, UNMERGING_IDS, append=True)
     if self.checkStep("BEGIN-UNMERGING-MODEL", self.unmerging) and self.unmerging:
         self.unmergingDetector.beginModel(None, self.model, self.workDir+"unmerging-train-examples.gz", self.workDir+"unmerging-opt-examples.gz")
     if self.checkStep("END-UNMERGING-MODEL", self.unmerging) and self.unmerging:
         self.unmergingDetector.endModel(None, self.model, self.workDir+"unmerging-opt-examples.gz")
         print >> sys.stderr, "Adding unmerging classifier model to test-set event model"
         if self.combinedModel != None:
             self.combinedModel.addStr("unmerging-example-style", self.model.getStr("unmerging-example-style"))
             self.combinedModel.insert(self.model.get("unmerging-ids.classes"), "unmerging-ids.classes")
             self.combinedModel.insert(self.model.get("unmerging-ids.features"), "unmerging-ids.features")
             self.unmergingDetector.addClassifierModel(self.combinedModel, self.model.get("unmerging-classifier-model", True), 
                                                       self.model.getStr("unmerging-classifier-parameter"))
             self.combinedModel.save()
Ejemplo n.º 6
0
 def classify(self, data, model, output, parse=None, task=None):
     self.enterState(self.STATE_CLASSIFY)
     model = self.openModel(model, "r")
     if parse == None: parse = self.getStr(self.tag + "parse", model)
     if task == None: task = self.getStr(self.tag + "task", model)
     xml = self.classifyToXML(data, model, None, output + "-",
                              model.get(self.tag + "classifier-model"),
                              None, parse,
                              float(model.get("recallAdjustParameter")))
     EvaluateInteractionXML.run(self.evaluator, xml, data, parse)
     STFormat.ConvertXML.toSTFormat(xml, output + ".tar.gz", outputTag="a2")
     if self.stEvaluator != None:
         self.stEvaluator.evaluate(output + ".tar.gz", task)
     self.exitState()
Ejemplo n.º 7
0
 def classify(self,
              data,
              model,
              output,
              parse=None,
              task=None,
              goldData=None,
              workDir=None,
              fromStep=None,
              omitSteps=None,
              validate=False):
     model = self.openModel(model, "r")
     self.enterState(self.STATE_CLASSIFY)
     self.setWorkDir(workDir)
     if workDir == None:
         self.setTempWorkDir()
     model = self.openModel(model, "r")
     if parse == None: parse = self.getStr(self.tag + "parse", model)
     workOutputTag = os.path.join(self.workDir,
                                  os.path.basename(output) + "-")
     xml = self.classifyToXML(
         data, model, None, workOutputTag,
         model.get(self.tag + "classifier-model", defaultIfNotExist=None),
         goldData, parse,
         float(model.getStr("recallAdjustParameter",
                            defaultIfNotExist=1.0)))
     if (validate):
         self.structureAnalyzer.load(model)
         self.structureAnalyzer.validate(xml)
         ETUtils.write(xml, output + "-pred.xml.gz")
     else:
         shutil.copy2(workOutputTag + self.tag + "pred.xml.gz",
                      output + "-pred.xml.gz")
     EvaluateInteractionXML.run(self.evaluator, xml, data, parse)
     stParams = self.getBioNLPSharedTaskParams(self.bioNLPSTParams, model)
     if stParams["convert"]:  #self.useBioNLPSTFormat:
         extension = ".zip" if (stParams["convert"] == "zip") else ".tar.gz"
         Utils.STFormat.ConvertXML.toSTFormat(
             xml,
             output + "-events" + extension,
             outputTag=stParams["a2Tag"],
             writeExtra=(stParams["scores"] == True))
         if stParams["evaluate"]:  #self.stEvaluator != None:
             if task == None:
                 task = self.getStr(self.tag + "task", model)
             self.stEvaluator.evaluate(output + "-events" + extension, task)
     self.deleteTempWorkDir()
     self.exitState()
Ejemplo n.º 8
0
 def evaluateGrid(self, xml, params, bestResults):
     if xml != None:                
         # TODO: Where should the EvaluateInteractionXML evaluator come from?
         EIXMLResult = EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.optData, self.parse)
         # Convert to ST-format
         if self.unmerging:
             xml = self.unmergingDetector.classifyToXML(xml, self.model, None, self.workDir+"grid-", goldData=self.optData)
             #self.structureAnalyzer.validate(xml)
             if self.bioNLPSTParams["evaluate"]:
                 Utils.STFormat.ConvertXML.toSTFormat(xml, self.workDir+"grid-unmerging-geniaformat", "a2")
                 stFormatDir = self.workDir+"grid-unmerging-geniaformat"
         elif self.bioNLPSTParams["evaluate"]:
             #self.structureAnalyzer.validate(xml)
             Utils.STFormat.ConvertXML.toSTFormat(xml, self.workDir+"grid-flat-geniaformat", "a2") #getA2FileTag(options.task, subTask))
             stFormatDir = self.workDir+"grid-flat-geniaformat"
         # Evaluation
         # Attempt shared task evaluation
         stEvaluation = None
         if self.bioNLPSTParams["evaluate"]:
             stEvaluation = self.stEvaluator.evaluate(stFormatDir, self.task)
         if stEvaluation != None:
             if bestResults == None or stEvaluation[0] > bestResults[1][0]:
                 bestResults = (params, stEvaluation, stEvaluation[0])
         else: # If shared task evaluation was not done (failed or not requested) fall back to internal evaluation
             if bestResults == None or EIXMLResult.getData().fscore > bestResults[1].getData().fscore:
                 bestResults = (params, EIXMLResult, EIXMLResult.getData().fscore)
         # Remove ST-format files
         if os.path.exists(self.workDir+"grid-flat-geniaformat"):
             shutil.rmtree(self.workDir+"grid-flat-geniaformat")
         if os.path.exists(self.workDir+"grid-unmerging-geniaformat"):
             shutil.rmtree(self.workDir+"grid-unmerging-geniaformat")
     else:
         print >> sys.stderr, "No predicted edges"
     return bestResults
Ejemplo n.º 9
0
 def evaluateGrid(self, xml, params, bestResults):
     if xml != None:                
         # TODO: Where should the EvaluateInteractionXML evaluator come from?
         EIXMLResult = EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.optData, self.parse)
         # Convert to ST-format
         STFormat.ConvertXML.toSTFormat(xml, self.workDir+"grid-flat-geniaformat", "a2") #getA2FileTag(options.task, subTask))
         stFormatDir = self.workDir+"grid-flat-geniaformat"
         
         if self.unmerging:
             xml = self.unmergingDetector.classifyToXML(xml, self.model, None, self.workDir+"grid-", goldData=self.optData.replace("-nodup", ""))
             STFormat.ConvertXML.toSTFormat(xml, self.workDir+"grid-unmerging-geniaformat", "a2")
             stFormatDir = self.workDir+"grid-unmerging-geniaformat"
         stEvaluation = self.stEvaluator.evaluate(stFormatDir, self.task)
         if stEvaluation != None:
             if bestResults == None or stEvaluation[0] > bestResults[1][0]:
                 bestResults = (params, stEvaluation, stEvaluation[0])
         else:
             if bestResults == None or EIXMLResult.getData().fscore > bestResults[1].getData().fscore:
                 bestResults = (params, EIXMLResult, EIXMLResult.getData().fscore)
         shutil.rmtree(self.workDir+"grid-flat-geniaformat")
         if os.path.exists(self.workDir+"grid-unmerging-geniaformat"):
             shutil.rmtree(self.workDir+"grid-unmerging-geniaformat")
     else:
         print >> sys.stderr, "No predicted edges"
     return bestResults
Ejemplo n.º 10
0
 def classify(self, data, model, output, parse=None, task=None, fromStep=None, toStep=None):
     BINARY_RECALL_MODE = False # TODO: make a parameter
     xml = None
     self.initVariables(classifyData=data, model=model, xml=None, task=task, parse=parse)
     self.enterState(self.STATE_CLASSIFY, ["TRIGGERS", "EDGES", "UNMERGING", "MODIFIERS", "ST-CONVERT"], fromStep, toStep)
     #self.enterState(self.STATE_CLASSIFY, ["TRIGGERS", "RECALL-ADJUST", "EDGES", "UNMERGING", "MODIFIERS", "ST-CONVERT"], fromStep, toStep)
     self.model = self.openModel(self.model, "r")
     if self.checkStep("TRIGGERS"):
         xml = self.triggerDetector.classifyToXML(self.classifyData, self.model, None, output + "-", parse=self.parse, recallAdjust=float(self.getStr("recallAdjustParameter", self.model)))
     if self.checkStep("EDGES"):
         xml = self.getWorkFile(xml, output + "-recall-adjusted.xml.gz")
         xml = self.edgeDetector.classifyToXML(xml, self.model, None, output + "-", parse=self.parse)
         assert xml != None
         if self.parse == None:
             edgeParse = self.getStr(self.edgeDetector.tag+"parse", self.model)
         else:
             edgeParse = self.parse
         #EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.classifyData, edgeParse)
         EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, None, edgeParse)
     if self.checkStep("UNMERGING"):
         if self.model.hasMember("unmerging-classifier-model"):
             #xml = self.getWorkFile(xml, output + "-edge-pred.xml.gz")
             # To avoid running out of memory, always use file on disk
             xml = self.getWorkFile(None, output + "-edge-pred.xml.gz")
             goldData = None
             if type(self.classifyData) in types.StringTypes:
                 if os.path.exists(self.classifyData.replace("-nodup", "")):
                     goldData = self.classifyData.replace("-nodup", "")
             xml = self.unmergingDetector.classifyToXML(xml, self.model, None, output + "-", goldData=goldData, parse=self.parse)
         else:
             print >> sys.stderr, "No model for unmerging"
     if self.checkStep("MODIFIERS"):
         if self.model.hasMember("modifier-classifier-model"):
             xml = self.getWorkFile(xml, [output + "-unmerging-pred.xml.gz", output + "-edge-pred.xml.gz"])
             xml = self.modifierDetector.classifyToXML(xml, self.model, None, output + "-", parse=self.parse)
         else:
             print >> sys.stderr, "No model for modifier detection"
     if self.checkStep("ST-CONVERT"):
         xml = self.getWorkFile(xml, [output + "-modifier-pred.xml.gz", output + "-unmerging-pred.xml.gz", output + "-edge-pred.xml.gz"])
         STFormat.ConvertXML.toSTFormat(xml, output+"-events.tar.gz", outputTag="a2", writeScores=self.stWriteScores)
         if self.stEvaluator != None:
             task = self.task
             if task == None:
                 task = self.getStr(self.edgeDetector.tag+"task", self.model)
             self.stEvaluator.evaluate(output + "-events.tar.gz", task)
     self.exitState()
Ejemplo n.º 11
0
 def classify(self, data, model, output, parse=None, task=None, goldData=None, workDir=None, fromStep=None, omitSteps=None):
     model = self.openModel(model, "r")
     self.enterState(self.STATE_CLASSIFY)
     self.setWorkDir(workDir)
     if workDir == None:
         self.setTempWorkDir()
     model = self.openModel(model, "r")
     if parse == None: parse = self.getStr(self.tag+"parse", model)
     if task == None: task = self.getStr(self.tag+"task", model)
     workOutputTag = os.path.join(self.workDir, os.path.basename(output) + "-")
     xml = self.classifyToXML(data, model, None, workOutputTag, 
         model.get(self.tag+"classifier-model"), goldData, parse, float(model.getStr("recallAdjustParameter", defaultIfNotExist=1.0)))
     shutil.copy2(workOutputTag+self.tag+"pred.xml.gz", output+"-pred.xml.gz")
     EvaluateInteractionXML.run(self.evaluator, xml, data, parse)
     stParams = self.getBioNLPSharedTaskParams(self.bioNLPSTParams, model)
     if stParams["convert"]: #self.useBioNLPSTFormat:
         Utils.STFormat.ConvertXML.toSTFormat(xml, output+"-events.tar.gz", outputTag="a2")
         if stParams["evaluate"]: #self.stEvaluator != None:
             self.stEvaluator.evaluate(output+"-events.tar.gz", task)
     self.deleteTempWorkDir()
     self.exitState()
Ejemplo n.º 12
0
 def evaluateGrid(self, xml, params, bestResults):
     #traceback.print_stack()
     #pdb.set_trace()
     if xml != None:
         # TODO: Where should the EvaluateInteractionXML evaluator come from?
         EIXMLResult = EvaluateInteractionXML.run(
             self.edgeDetector.evaluator, xml, self.optData, self.parse)
         # Convert to ST-format
         if self.unmerging:
             xml = self.unmergingDetector.classifyToXML(
                 xml,
                 self.model,
                 None,
                 self.workDir + "grid-",
                 goldData=self.optData)
             #self.structureAnalyzer.validate(xml)
             if self.bioNLPSTParams["evaluate"]:
                 Utils.STFormat.ConvertXML.toSTFormat(
                     xml, self.workDir + "grid-unmerging-geniaformat", "a2")
                 stFormatDir = self.workDir + "grid-unmerging-geniaformat"
         elif self.bioNLPSTParams["evaluate"]:
             #self.structureAnalyzer.validate(xml)
             Utils.STFormat.ConvertXML.toSTFormat(
                 xml, self.workDir + "grid-flat-geniaformat",
                 "a2")  #getA2FileTag(options.task, subTask))
             stFormatDir = self.workDir + "grid-flat-geniaformat"
         # Evaluation
         # Attempt shared task evaluation
         stEvaluation = None
         if self.bioNLPSTParams["evaluate"]:
             stEvaluation = self.stEvaluator.evaluate(
                 stFormatDir, self.task)
         if stEvaluation != None:
             if bestResults == None or stEvaluation[0] > bestResults[1][0]:
                 bestResults = (params, stEvaluation, stEvaluation[0])
         else:  # If shared task evaluation was not done (failed or not requested) fall back to internal evaluation
             if bestResults == None or EIXMLResult.getData(
             ).fscore > bestResults[1].getData().fscore:
                 bestResults = (params, EIXMLResult,
                                EIXMLResult.getData().fscore)
         # Remove ST-format files
         if os.path.exists(self.workDir + "grid-flat-geniaformat"):
             shutil.rmtree(self.workDir + "grid-flat-geniaformat")
         if os.path.exists(self.workDir + "grid-unmerging-geniaformat"):
             shutil.rmtree(self.workDir + "grid-unmerging-geniaformat")
     else:
         print >> sys.stderr, "No predicted edges"
     return bestResults
Ejemplo n.º 13
0
    def evaluateGrid(self, xml, params, bestResults):
        if xml != None:
            # TODO: Where should the EvaluateInteractionXML evaluator come from?
            EIXMLResult = EvaluateInteractionXML.run(
                self.edgeDetector.evaluator, xml, self.optData, self.parse)
            # Convert to ST-format
            STFormat.ConvertXML.toSTFormat(
                xml, self.workDir + "grid-flat-geniaformat",
                "a2")  #getA2FileTag(options.task, subTask))
            stFormatDir = self.workDir + "grid-flat-geniaformat"

            if self.unmerging:
                xml = self.unmergingDetector.classifyToXML(
                    xml,
                    self.model,
                    None,
                    self.workDir + "grid-",
                    goldData=self.optData.replace("-nodup", ""))
                STFormat.ConvertXML.toSTFormat(
                    xml, self.workDir + "grid-unmerging-geniaformat", "a2")
                stFormatDir = self.workDir + "grid-unmerging-geniaformat"
            stEvaluation = self.stEvaluator.evaluate(stFormatDir, self.task)
            if stEvaluation != None:
                if bestResults == None or stEvaluation[0] > bestResults[1][0]:
                    bestResults = (params, stEvaluation, stEvaluation[0])
            else:
                if bestResults == None or EIXMLResult.getData(
                ).fscore > bestResults[1].getData().fscore:
                    bestResults = (params, EIXMLResult,
                                   EIXMLResult.getData().fscore)
            shutil.rmtree(self.workDir + "grid-flat-geniaformat")
            if os.path.exists(self.workDir + "grid-unmerging-geniaformat"):
                shutil.rmtree(self.workDir + "grid-unmerging-geniaformat")
        else:
            print >> sys.stderr, "No predicted edges"
        return bestResults
Ejemplo n.º 14
0
CLASSIFIER_PARAMS="c:25000,50000,87500"
WORKDIR="/usr/share/biotext/GeniaChallenge/SharedTaskTriggerTest"
PARSE_TOK="split-Charniak-Lease"

workdir(WORKDIR, False)
log()


# Trigger detection

#Gazetteer.run(TRAIN_FILE, "gazetteer-train")
#GeneralEntityTypeRecognizer.run(TRAIN_FILE, "trigger-train-examples", PARSE_TOK, PARSE_TOK, "style:typed", "trigger-ids")

GeneralEntityTypeRecognizer.run(TEST_FILE, "trigger-test-examples", PARSE_TOK, PARSE_TOK, "style:typed", "trigger-ids")
Cls.test("trigger-test-examples", "trigger-param-opt/model-c_75000", "trigger-test-classifications")
evaluator = Ev.evaluate("trigger-test-examples", "trigger-test-classifications", "trigger-ids.class_names")

#evaluator = optimize(Cls, Ev, "trigger-train-examples", "trigger-test-examples",\
#    "trigger-ids.class_names", CLASSIFIER_PARAMS, "trigger-param-opt")[0]

ExampleUtils.writeToInteractionXML(evaluator.classifications, TEST_FILE, "test-predicted-triggers.xml", "trigger-ids.class_names", PARSE_TOK, PARSE_TOK)

# RecallAdjust.run("test-predicted-triggers.xml",1.0,"test-predicted-triggers-adj.xml")
# ix.splitMergedElements("test-predicted-triggers-adj.xml", "test-predicted-triggers-adj-split.xml")
# ix.recalculateIds("test-predicted-triggers-adj-split.xml", "test-predicted-triggers-adj-split-recids.xml", True)
# EvaluateInteractionXML.run(Ev, "test-predicted-triggers-adj-split-recids.xml", GOLD_TEST_FILE, PARSE_TOK, PARSE_TOK)

ix.splitMergedElements("test-predicted-triggers.xml", "test-predicted-triggers-split.xml")
ix.recalculateIds("test-predicted-triggers-split.xml", "test-predicted-triggers-split-recids.xml", True)
EvaluateInteractionXML.run(Ev, "test-predicted-triggers-split-recids.xml", GOLD_TEST_FILE, PARSE_TOK, PARSE_TOK)
Ejemplo n.º 15
0
 def trainUnmergingDetector(self):
     xml = None
     if not self.unmerging:
         print >> sys.stderr, "No unmerging"
     if self.checkStep("SELF-TRAIN-EXAMPLES-FOR-UNMERGING",
                       self.unmerging) and self.unmerging:
         # Self-classified train data for unmerging
         if self.doUnmergingSelfTraining:
             # This allows limiting to a subcorpus
             triggerStyle = copy.copy(
                 Parameters.get(self.triggerExampleStyle))
             edgeStyle = copy.copy(Parameters.get(self.edgeExampleStyle))
             unmergingStyle = Parameters.get(self.unmergingExampleStyle)
             if "sentenceLimit" in unmergingStyle and unmergingStyle[
                     "sentenceLimit"]:
                 triggerStyle["sentenceLimit"] = unmergingStyle[
                     "sentenceLimit"]
                 edgeStyle["sentenceLimit"] = unmergingStyle[
                     "sentenceLimit"]
             # Build the examples
             xml = self.triggerDetector.classifyToXML(
                 self.trainData,
                 self.model,
                 None,
                 self.workDir + "unmerging-extra-",
                 exampleStyle=triggerStyle)  #, recallAdjust=0.5)
             xml = self.edgeDetector.classifyToXML(
                 xml,
                 self.model,
                 None,
                 self.workDir + "unmerging-extra-",
                 exampleStyle=edgeStyle)  #, recallAdjust=0.5)
             assert xml != None
             EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml,
                                        self.trainData, self.parse)
         else:
             print >> sys.stderr, "No self-training for unmerging"
     if self.checkStep("UNMERGING-EXAMPLES",
                       self.unmerging) and self.unmerging:
         # Unmerging example generation
         GOLD_TEST_FILE = self.optData.replace("-nodup", "")
         GOLD_TRAIN_FILE = self.trainData.replace("-nodup", "")
         if self.doUnmergingSelfTraining:
             if xml == None:
                 xml = self.workDir + "unmerging-extra-edge-pred.xml.gz"
             self.unmergingDetector.buildExamples(
                 self.model, [
                     self.optData.replace("-nodup", ""),
                     [self.trainData.replace("-nodup", ""), xml]
                 ], [
                     self.workDir + "unmerging-opt-examples.gz",
                     self.workDir + "unmerging-train-examples.gz"
                 ], [GOLD_TEST_FILE, [GOLD_TRAIN_FILE, GOLD_TRAIN_FILE]],
                 exampleStyle=self.unmergingExampleStyle,
                 saveIdsToModel=True)
             xml = None
         else:
             self.unmergingDetector.buildExamples(
                 self.model, [
                     self.optData.replace("-nodup", ""),
                     self.trainData.replace("-nodup", "")
                 ], [
                     self.workDir + "unmerging-opt-examples.gz",
                     self.workDir + "unmerging-train-examples.gz"
                 ], [GOLD_TEST_FILE, GOLD_TRAIN_FILE],
                 exampleStyle=self.unmergingExampleStyle,
                 saveIdsToModel=True)
             xml = None
         #UnmergingExampleBuilder.run("/home/jari/biotext/EventExtension/TrainSelfClassify/test-predicted-edges.xml", GOLD_TRAIN_FILE, UNMERGING_TRAIN_EXAMPLE_FILE, PARSE, TOK, UNMERGING_FEATURE_PARAMS, UNMERGING_IDS, append=True)
     if self.checkStep("BEGIN-UNMERGING-MODEL",
                       self.unmerging) and self.unmerging:
         self.unmergingDetector.beginModel(
             None, self.model, self.workDir + "unmerging-train-examples.gz",
             self.workDir + "unmerging-opt-examples.gz")
     if self.checkStep("END-UNMERGING-MODEL",
                       self.unmerging) and self.unmerging:
         self.unmergingDetector.endModel(
             None, self.model, self.workDir + "unmerging-opt-examples.gz")
         print >> sys.stderr, "Adding unmerging classifier model to test-set event model"
         if self.combinedModel != None:
             self.combinedModel.addStr(
                 "unmerging-example-style",
                 self.model.getStr("unmerging-example-style"))
             self.combinedModel.insert(
                 self.model.get("unmerging-ids.classes"),
                 "unmerging-ids.classes")
             self.combinedModel.insert(
                 self.model.get("unmerging-ids.features"),
                 "unmerging-ids.features")
             self.unmergingDetector.addClassifierModel(
                 self.combinedModel,
                 self.model.get("unmerging-classifier-model", True),
                 self.model.getStr("unmerging-classifier-parameter"))
             self.combinedModel.save()
Ejemplo n.º 16
0
def train(output, task=None, detector=None, inputFiles=None, models=None, parse=None,
          processUnmerging=None, processModifiers=None, 
          bioNLPSTParams=None, preprocessorParams=None, exampleStyles=None, 
          classifierParams=None,  doFullGrid=False, deleteOutput=False, copyFrom=None, 
          log="log.txt", step=None, omitSteps=None, debug=False, connection=None, subset=None, 
          folds=None, corpusDir=None, corpusPreprocessing=None, evaluator=None):
    """
    Train a new model for event or relation detection.
    
    @param output: A directory where output files will appear.
    @param task: If defined, overridable default settings are used for many of the training parameters. Must be one of the supported TEES tasks.
    @param detector: a Detector object, or a string defining one to be imported
    @param inputFiles: A dictionary of file names, with keys "train", "devel" and, "test"
    @param models: A dictionary of file names defining the place for the new models, with keys "devel" and, "test"
    @param parse: The parse element name in the training interaction XML
    @param processUnmerging: Use the unmerging step of EventDetector. True, False or None for task default.
    @param processModifiers: Use the modifier detection step of EventDetector. True, False or None for task default.
    @param bioNLPSTParams: Parameters controlling BioNLP ST format output.
    @param preprocessorParams: Parameters controlling the preprocessor. Not used for training, but saved to the model for use when classifying.
    @param exampleStyles: A parameter set for controlling example builders.
    @param classifierParams: A parameter set for controlling classifiers.
    @param doFullGrid: Whether all parameters, as opposed to just recall adjustment, are tested in the EventDetector grid search.
    @param deleteOutput: Remove an existing output directory
    @param copyFrom: Copy an existing output directory for use as a template
    @param log: An optional alternative name for the log file. None is for no logging.
    @param step: A step=substep pair, where the steps are "TRAIN", "DEVEL", "EMPTY" and "TEST"
    @param omitSteps: step=substep parameters, where multiple substeps can be defined.
    @param debug: In debug mode, more output is shown, and some temporary intermediate files are saved
    @param connection: A parameter set defining a local or remote connection for training the classifier
    @param subset: A parameter set for making subsets of input files
    """
    # Insert default arguments where needed
    inputFiles = setDictDefaults(inputFiles, {"train":None, "devel":None, "test":None})
    models = setDictDefaults(models, {"devel":"model-devel", "test":"model-test"})
    exampleStyles = setDictDefaults(exampleStyles, {"examples":None, "trigger":None, "edge":None, "unmerging":None, "modifiers":None})
    classifierParams = setDictDefaults(classifierParams, {"examples":None, "trigger":None, "recall":None, "edge":None, "unmerging":None, "modifiers":None})
    subset = setDictDefaults(Parameters.get(subset), {"train":None, "devel":None, "test":None, "seed":0, "all":None})
    folds = setDictDefaults(folds, {"train":None, "devel":None, "test":None})
    processUnmerging = getDefinedBool(processUnmerging)
    processModifiers = getDefinedBool(processModifiers)
    # Initialize working directory
    workdir(output, deleteOutput, copyFrom, log)
    # Get task specific parameters
    useKerasDetector = False
    if detector != None and "keras" in detector.lower():
        print >> sys.stderr, "Using a Keras Detector"
        useKerasDetector = True
        if detector.lower() == "keras":
            detector = None
    detector, bioNLPSTParams, preprocessorParams, folds = getTaskSettings(task, detector, 
        bioNLPSTParams, preprocessorParams, inputFiles, exampleStyles, classifierParams, folds, corpusDir=corpusDir, useKerasDetector=useKerasDetector)
    # Learn training settings from input files
    detector = learnSettings(inputFiles, detector, classifierParams, task, exampleStyles, useKerasDetector=useKerasDetector)   
    # Get corpus subsets   
    getFolds(inputFiles, folds)
    getSubsets(inputFiles, subset)
    if task != None: 
        task = task.replace("-FULL", "")
    if "." in task:
        _, subTask = getSubTask(task)
        if subTask != 3:
            processModifiers = False
    # Preprocess the corpus if required
    if corpusPreprocessing != None:
        preprocessor = Preprocessor(steps=corpusPreprocessing)
        assert preprocessor.steps[0].name == "MERGE_SETS"
        assert preprocessor.steps[-1].name == "DIVIDE_SETS"
        preprocessedCorpusDir = os.path.join(output, "corpus")
        #outputFiles = {x:os.path.join(preprocessedCorpusDir, os.path.basename(inputFiles[x])) for x in inputFiles}
        preprocessor.process(inputFiles, os.path.join(preprocessedCorpusDir, task))
        #inputFiles = outputFiles
        for setName in inputFiles.keys():
            if inputFiles[setName] != None:
                inputFiles[setName] = os.path.join(preprocessedCorpusDir, task + "-" + setName + ".xml")
    # Define processing steps
    selector, detectorSteps, omitDetectorSteps = getSteps(step, omitSteps, ["TRAIN", "DEVEL", "EMPTY", "TEST"])
    
    # Initialize the detector
    detector, detectorName = getDetector(detector, evaluator=evaluator)
    evaluator, evaluatorName = importClass(evaluator, "evaluator")
    detector = detector() # initialize object
    if evaluator != None:
        print >> sys.stderr, "Using evaluator", evaluator.__name__
        detector.evaluator = evaluator
    detector.debug = debug
    detector.bioNLPSTParams = detector.getBioNLPSharedTaskParams(bioNLPSTParams)
    #detector.useBioNLPSTFormat = useBioNLPSTFormat # classify-output and grid evaluation in ST-format
    #detector.stWriteScores = True # write confidence scores into additional st-format files
    connection = getConnection(connection)
    detector.setConnection(connection)
    connection.debug = debug
    if deleteOutput:
        connection.clearWorkDir()
    
    # Train
    if selector.check("TRAIN"):
        print >> sys.stderr, "----------------------------------------------------"
        print >> sys.stderr, "------------------ Train Detector ------------------"
        print >> sys.stderr, "----------------------------------------------------"
        if not isinstance(detector, EventDetector):
            detector.train(inputFiles["train"], inputFiles["devel"], models["devel"], models["test"],
                           exampleStyles["examples"], classifierParams["examples"], parse, None, task,
                           fromStep=detectorSteps["TRAIN"], workDir="training", testData=inputFiles["test"])
        else:
            detector.train(inputFiles["train"], inputFiles["devel"], models["devel"], models["test"],
                           exampleStyles["trigger"], exampleStyles["edge"], exampleStyles["unmerging"], exampleStyles["modifiers"],
                           classifierParams["trigger"], classifierParams["edge"], classifierParams["unmerging"], classifierParams["modifiers"],
                           classifierParams["recall"], processUnmerging, processModifiers, 
                           doFullGrid, task, parse, None,
                           fromStep=detectorSteps["TRAIN"], workDir="training", testData=inputFiles["test"])
        # Save the detector type
        for model in [models["devel"], models["test"]]:
            if model != None and os.path.exists(model):
                model = Model(model, "a")
                model.addStr("detector", detectorName)
                if evaluatorName != None:
                    model.addStr("detector", evaluatorName)
                if preprocessorParams != None:
                    preprocessor = Preprocessor()
                    model.addStr("preprocessorParams", Parameters.toString(preprocessor.getParameters(preprocessorParams)))
                model.save()
                model.close()
    if selector.check("DEVEL"):
        print >> sys.stderr, "----------------------------------------------------"
        print >> sys.stderr, "------------ Check devel classification ------------"
        print >> sys.stderr, "----------------------------------------------------"
        #detector.bioNLPSTParams["scores"] = False # the evaluation server doesn't like additional files
        detector.classify(inputFiles["devel"], models["devel"], "classification-devel/devel", goldData=inputFiles["devel"], fromStep=detectorSteps["DEVEL"], workDir="classification-devel")
    if selector.check("EMPTY"):
        # By passing an emptied devel set through the prediction system, we can check that we get the same predictions
        # as in the DEVEL step, ensuring the model does not use leaked information.
        print >> sys.stderr, "----------------------------------------------------"
        print >> sys.stderr, "------------ Empty devel classification ------------"
        print >> sys.stderr, "----------------------------------------------------"
        #detector.bioNLPSTParams["scores"] = False # the evaluation server doesn't like additional files
        removalScope = "non-given"
        if "names" in str(exampleStyles["examples"]) or "names" in str(exampleStyles["trigger"]):
            removalScope = "all"
        elif "Edge" in detector.__class__.__name__:
            removalScope = "interactions"
        detector.classify(getEmptyCorpus(inputFiles["devel"], scope=removalScope), models["devel"], "classification-empty/devel-empty", fromStep=detectorSteps["EMPTY"], workDir="classification-empty")
        print >> sys.stderr, "*** Evaluate empty devel classification ***"
        if os.path.exists("classification-empty/devel-empty-pred.xml.gz"):
            EvaluateInteractionXML.run(detector.evaluator, "classification-empty/devel-empty-pred.xml.gz", inputFiles["devel"], parse)
        else:
            print >> sys.stderr, "No output file for evaluation"
    if selector.check("TEST"):
        print >> sys.stderr, "----------------------------------------------------"
        print >> sys.stderr, "------------- Test set classification --------------"
        print >> sys.stderr, "----------------------------------------------------"
        if inputFiles["test"] == None or not os.path.exists(inputFiles["test"]):
            print >> sys.stderr, "Skipping, test file", inputFiles["test"], "does not exist"
        else:
            #detector.bioNLPSTParams["scores"] = False # the evaluation server doesn't like additional files
            detector.classify(inputFiles["test"], models["test"] if models["test"] != None else models["devel"], "classification-test/test", fromStep=detectorSteps["TEST"], workDir="classification-test")
            if detector.bioNLPSTParams["convert"]:
                extension = ".zip" if (detector.bioNLPSTParams["convert"] == "zip") else ".tar.gz" 
                Utils.STFormat.Compare.compare("classification-test/test-events" + extension, "classification-devel/devel-events" + extension, "a2")
    # Stop logging
    if log != None:
        Stream.closeLog(log)
Ejemplo n.º 17
0
    def classify(self, data, model, output, parse=None, task=None, goldData=None, fromStep=None, toStep=None, omitSteps=None, workDir=None):
        #BINARY_RECALL_MODE = False # TODO: make a parameter
        xml = None
        model = self.openModel(model, "r")
        self.initVariables(classifyData=data, model=model, xml=None, task=task, parse=parse)
        self.enterState(self.STATE_CLASSIFY, ["TRIGGERS", "EDGES", "UNMERGING", "MODIFIERS", "ST-CONVERT"], fromStep, toStep, omitSteps)
        #self.enterState(self.STATE_CLASSIFY, ["TRIGGERS", "RECALL-ADJUST", "EDGES", "UNMERGING", "MODIFIERS", "ST-CONVERT"], fromStep, toStep)
        self.setWorkDir(workDir)
        if workDir == None:
            self.setTempWorkDir()
        workOutputTag = os.path.join(self.workDir, os.path.basename(output) + "-")
        self.model = self.openModel(self.model, "r")
        stParams = self.getBioNLPSharedTaskParams(self.bioNLPSTParams, model)
        if self.checkStep("TRIGGERS"):
            xml = self.triggerDetector.classifyToXML(self.classifyData, self.model, None, workOutputTag, goldData=goldData, parse=self.parse, recallAdjust=float(self.getStr("recallAdjustParameter", self.model)))
        if self.checkStep("EDGES"):
            xml = self.getWorkFile(xml, workOutputTag + "trigger-pred.xml.gz")
            xml = self.edgeDetector.classifyToXML(xml, self.model, None, workOutputTag, goldData=goldData, parse=self.parse)
            assert xml != None
            if self.parse == None:
                edgeParse = self.getStr(self.edgeDetector.tag+"parse", self.model)
            else:
                edgeParse = self.parse
            #EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.classifyData, edgeParse)
            if goldData != None:
                EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, goldData, edgeParse)
            else:
                EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.classifyData, edgeParse)
        if self.checkStep("UNMERGING"):
            if self.model.getStr("unmerging-classifier-parameter", None) != None: #self.model.hasMember("unmerging-classifier-model"):
                #xml = self.getWorkFile(xml, output + "-edge-pred.xml.gz")
                # To avoid running out of memory, always use file on disk
                xml = self.getWorkFile(None, workOutputTag + "edge-pred.xml.gz")
                #goldData = None
                #if type(self.classifyData) in types.StringTypes:
                #    if os.path.exists(self.classifyData.replace("-nodup", "")):
                #        goldData = self.classifyData.replace("-nodup", "")
                xml = self.unmergingDetector.classifyToXML(xml, self.model, None, workOutputTag, goldData=goldData, parse=self.parse)
                # Evaluate after unmerging
                if self.parse == None:
                    edgeParse = self.getStr(self.edgeDetector.tag+"parse", self.model)
                else:
                    edgeParse = self.parse
                if goldData != None:
                    EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, goldData, edgeParse)
                else:
                    EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.classifyData, edgeParse)
            else:
                print >> sys.stderr, "No model for unmerging"
        if self.checkStep("MODIFIERS"):
            if self.model.hasMember("modifier-classifier-model"):
                xml = self.getWorkFile(xml, [workOutputTag + "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz"])
                xml = self.modifierDetector.classifyToXML(xml, self.model, None, workOutputTag, goldData=goldData, parse=self.parse)
            else:
                print >> sys.stderr, "No model for modifier detection"
#        if self.checkStep("VALIDATE"):
#            xml = self.getWorkFile(xml, [workOutputTag + "modifier-pred.xml.gz", workOutputTag + "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz"])
#            self.structureAnalyzer.load(model)
#            self.structureAnalyzer.validate(xml)
#            ETUtils.write(xml, workOutputTag + "validate-pred.xml.gz")
        if self.checkStep("ST-CONVERT"):
            if stParams["convert"]:
                #xml = self.getWorkFile(xml, [workOutputTag + "validate-pred.xml.gz", workOutputTag + "modifier-pred.xml.gz", workOutputTag + "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz"])
                xml = self.getWorkFile(xml, [workOutputTag + "modifier-pred.xml.gz", workOutputTag + "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz"])
                Utils.STFormat.ConvertXML.toSTFormat(xml, output+"-events.tar.gz", outputTag=stParams["a2Tag"], writeExtra=(stParams["scores"] == True))
                if stParams["evaluate"]: #self.stEvaluator != None:
                    task = self.task
                    if task == None:
                        task = self.getStr(self.edgeDetector.tag+"task", self.model)
                    self.stEvaluator.evaluate(output + "-events.tar.gz", task)
            else:
                print >> sys.stderr, "No BioNLP shared task format conversion"
        finalXMLFile = self.getWorkFile(None, [workOutputTag + "modifier-pred.xml.gz", workOutputTag + "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz"])
        if finalXMLFile != None:
            shutil.copy2(finalXMLFile, output+"-pred.xml.gz")
        self.deleteTempWorkDir()
        self.exitState()
Ejemplo n.º 18
0
 def classify(self,
              data,
              model,
              output,
              parse=None,
              task=None,
              fromStep=None,
              toStep=None):
     BINARY_RECALL_MODE = False  # TODO: make a parameter
     xml = None
     self.initVariables(classifyData=data,
                        model=model,
                        xml=None,
                        task=task,
                        parse=parse)
     self.enterState(
         self.STATE_CLASSIFY,
         ["TRIGGERS", "EDGES", "UNMERGING", "MODIFIERS", "ST-CONVERT"],
         fromStep, toStep)
     #self.enterState(self.STATE_CLASSIFY, ["TRIGGERS", "RECALL-ADJUST", "EDGES", "UNMERGING", "MODIFIERS", "ST-CONVERT"], fromStep, toStep)
     self.model = self.openModel(self.model, "r")
     if self.checkStep("TRIGGERS"):
         xml = self.triggerDetector.classifyToXML(
             self.classifyData,
             self.model,
             None,
             output + "-",
             parse=self.parse,
             recallAdjust=float(
                 self.getStr("recallAdjustParameter", self.model)))
     if self.checkStep("EDGES"):
         xml = self.getWorkFile(xml, output + "-recall-adjusted.xml.gz")
         xml = self.edgeDetector.classifyToXML(xml,
                                               self.model,
                                               None,
                                               output + "-",
                                               parse=self.parse)
         assert xml != None
         if self.parse == None:
             edgeParse = self.getStr(self.edgeDetector.tag + "parse",
                                     self.model)
         else:
             edgeParse = self.parse
         #EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.classifyData, edgeParse)
         EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, None,
                                    edgeParse)
     if self.checkStep("UNMERGING"):
         if self.model.hasMember("unmerging-classifier-model"):
             #xml = self.getWorkFile(xml, output + "-edge-pred.xml.gz")
             # To avoid running out of memory, always use file on disk
             xml = self.getWorkFile(None, output + "-edge-pred.xml.gz")
             goldData = None
             if type(self.classifyData) in types.StringTypes:
                 if os.path.exists(self.classifyData.replace("-nodup", "")):
                     goldData = self.classifyData.replace("-nodup", "")
             xml = self.unmergingDetector.classifyToXML(xml,
                                                        self.model,
                                                        None,
                                                        output + "-",
                                                        goldData=goldData,
                                                        parse=self.parse)
         else:
             print >> sys.stderr, "No model for unmerging"
     if self.checkStep("MODIFIERS"):
         if self.model.hasMember("modifier-classifier-model"):
             xml = self.getWorkFile(xml, [
                 output + "-unmerging-pred.xml.gz",
                 output + "-edge-pred.xml.gz"
             ])
             xml = self.modifierDetector.classifyToXML(xml,
                                                       self.model,
                                                       None,
                                                       output + "-",
                                                       parse=self.parse)
         else:
             print >> sys.stderr, "No model for modifier detection"
     if self.checkStep("ST-CONVERT"):
         xml = self.getWorkFile(xml, [
             output + "-modifier-pred.xml.gz",
             output + "-unmerging-pred.xml.gz", output + "-edge-pred.xml.gz"
         ])
         STFormat.ConvertXML.toSTFormat(xml,
                                        output + "-events.tar.gz",
                                        outputTag="a2",
                                        writeScores=self.stWriteScores)
         if self.stEvaluator != None:
             task = self.task
             if task == None:
                 task = self.getStr(self.edgeDetector.tag + "task",
                                    self.model)
             self.stEvaluator.evaluate(output + "-events.tar.gz", task)
     self.exitState()
Ejemplo n.º 19
0
    def classify(self,
                 data,
                 model,
                 output,
                 parse=None,
                 task=None,
                 goldData=None,
                 fromStep=None,
                 toStep=None,
                 omitSteps=None,
                 workDir=None):
        #BINARY_RECALL_MODE = False # TODO: make a parameter
        xml = None
        model = self.openModel(model, "r")
        self.initVariables(classifyData=data,
                           model=model,
                           xml=None,
                           task=task,
                           parse=parse)
        self.enterState(
            self.STATE_CLASSIFY,
            ["TRIGGERS", "EDGES", "UNMERGING", "MODIFIERS", "ST-CONVERT"],
            fromStep, toStep, omitSteps)
        #self.enterState(self.STATE_CLASSIFY, ["TRIGGERS", "RECALL-ADJUST", "EDGES", "UNMERGING", "MODIFIERS", "ST-CONVERT"], fromStep, toStep)
        self.setWorkDir(workDir)
        if workDir == None:
            self.setTempWorkDir()
        workOutputTag = os.path.join(self.workDir,
                                     os.path.basename(output) + "-")
        self.model = self.openModel(self.model, "r")
        stParams = self.getBioNLPSharedTaskParams(self.bioNLPSTParams, model)
        if self.checkStep("TRIGGERS"):
            xml = self.triggerDetector.classifyToXML(
                self.classifyData,
                self.model,
                None,
                workOutputTag,
                goldData=goldData,
                parse=self.parse,
                recallAdjust=float(
                    self.getStr("recallAdjustParameter", self.model)))
        if self.checkStep("EDGES"):
            xml = self.getWorkFile(xml, workOutputTag + "trigger-pred.xml.gz")
            xml = self.edgeDetector.classifyToXML(xml,
                                                  self.model,
                                                  None,
                                                  workOutputTag,
                                                  goldData=goldData,
                                                  parse=self.parse)
            assert xml != None
            if self.parse == None:
                edgeParse = self.getStr(self.edgeDetector.tag + "parse",
                                        self.model)
            else:
                edgeParse = self.parse
            #EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml, self.classifyData, edgeParse)
            if goldData != None:
                EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml,
                                           goldData, edgeParse)
            else:
                EvaluateInteractionXML.run(self.edgeDetector.evaluator, xml,
                                           self.classifyData, edgeParse)
        if self.checkStep("UNMERGING"):
            if self.model.getStr(
                    "unmerging-classifier-parameter", None
            ) != None:  #self.model.hasMember("unmerging-classifier-model"):
                #xml = self.getWorkFile(xml, output + "-edge-pred.xml.gz")
                # To avoid running out of memory, always use file on disk
                xml = self.getWorkFile(None,
                                       workOutputTag + "edge-pred.xml.gz")
                #goldData = None
                #if type(self.classifyData) in types.StringTypes:
                #    if os.path.exists(self.classifyData.replace("-nodup", "")):
                #        goldData = self.classifyData.replace("-nodup", "")
                xml = self.unmergingDetector.classifyToXML(xml,
                                                           self.model,
                                                           None,
                                                           workOutputTag,
                                                           goldData=goldData,
                                                           parse=self.parse)
                # Evaluate after unmerging
                if self.parse == None:
                    edgeParse = self.getStr(self.edgeDetector.tag + "parse",
                                            self.model)
                else:
                    edgeParse = self.parse
                if goldData != None:
                    EvaluateInteractionXML.run(self.edgeDetector.evaluator,
                                               xml, goldData, edgeParse)
                else:
                    EvaluateInteractionXML.run(self.edgeDetector.evaluator,
                                               xml, self.classifyData,
                                               edgeParse)
            else:
                print >> sys.stderr, "No model for unmerging"
        if self.checkStep("MODIFIERS"):
            if self.model.hasMember("modifier-classifier-model"):
                xml = self.getWorkFile(xml, [
                    workOutputTag + "unmerging-pred.xml.gz",
                    workOutputTag + "edge-pred.xml.gz"
                ])
                xml = self.modifierDetector.classifyToXML(xml,
                                                          self.model,
                                                          None,
                                                          workOutputTag,
                                                          goldData=goldData,
                                                          parse=self.parse)
            else:
                print >> sys.stderr, "No model for modifier detection"
#        if self.checkStep("VALIDATE"):
#            xml = self.getWorkFile(xml, [workOutputTag + "modifier-pred.xml.gz", workOutputTag + "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz"])
#            self.structureAnalyzer.load(model)
#            self.structureAnalyzer.validate(xml)
#            ETUtils.write(xml, workOutputTag + "validate-pred.xml.gz")
        if self.checkStep("ST-CONVERT"):
            if stParams["convert"]:
                #xml = self.getWorkFile(xml, [workOutputTag + "validate-pred.xml.gz", workOutputTag + "modifier-pred.xml.gz", workOutputTag + "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz"])
                xml = self.getWorkFile(xml, [
                    workOutputTag + "modifier-pred.xml.gz", workOutputTag +
                    "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz"
                ])
                Utils.STFormat.ConvertXML.toSTFormat(
                    xml,
                    output + "-events.tar.gz",
                    outputTag=stParams["a2Tag"],
                    writeExtra=(stParams["scores"] == True))
                if stParams["evaluate"]:  #self.stEvaluator != None:
                    task = self.task
                    if task == None:
                        task = self.getStr(self.edgeDetector.tag + "task",
                                           self.model)
                    self.stEvaluator.evaluate(output + "-events.tar.gz", task)
            else:
                print >> sys.stderr, "No BioNLP shared task format conversion"
        finalXMLFile = self.getWorkFile(None, [
            workOutputTag + "modifier-pred.xml.gz", workOutputTag +
            "unmerging-pred.xml.gz", workOutputTag + "edge-pred.xml.gz"
        ])
        if finalXMLFile != None:
            shutil.copy2(finalXMLFile, output + "-pred.xml.gz")
        self.deleteTempWorkDir()
        self.exitState()
Ejemplo n.º 20
0
#Gazetteer.run(TRAIN_FILE, "gazetteer-train")
#GeneralEntityTypeRecognizer.run(TRAIN_FILE, "trigger-train-examples", PARSE_TOK, PARSE_TOK, "style:typed", "trigger-ids")

GeneralEntityTypeRecognizer.run(TEST_FILE, "trigger-test-examples", PARSE_TOK,
                                PARSE_TOK, "style:typed", "trigger-ids")
Cls.test("trigger-test-examples", "trigger-param-opt/model-c_75000",
         "trigger-test-classifications")
evaluator = Ev.evaluate("trigger-test-examples",
                        "trigger-test-classifications",
                        "trigger-ids.class_names")

#evaluator = optimize(Cls, Ev, "trigger-train-examples", "trigger-test-examples",\
#    "trigger-ids.class_names", CLASSIFIER_PARAMS, "trigger-param-opt")[0]

ExampleUtils.writeToInteractionXML(evaluator.classifications, TEST_FILE,
                                   "test-predicted-triggers.xml",
                                   "trigger-ids.class_names", PARSE_TOK,
                                   PARSE_TOK)

# RecallAdjust.run("test-predicted-triggers.xml",1.0,"test-predicted-triggers-adj.xml")
# ix.splitMergedElements("test-predicted-triggers-adj.xml", "test-predicted-triggers-adj-split.xml")
# ix.recalculateIds("test-predicted-triggers-adj-split.xml", "test-predicted-triggers-adj-split-recids.xml", True)
# EvaluateInteractionXML.run(Ev, "test-predicted-triggers-adj-split-recids.xml", GOLD_TEST_FILE, PARSE_TOK, PARSE_TOK)

ix.splitMergedElements("test-predicted-triggers.xml",
                       "test-predicted-triggers-split.xml")
ix.recalculateIds("test-predicted-triggers-split.xml",
                  "test-predicted-triggers-split-recids.xml", True)
EvaluateInteractionXML.run(Ev, "test-predicted-triggers-split-recids.xml",
                           GOLD_TEST_FILE, PARSE_TOK, PARSE_TOK)