コード例 #1
0
ファイル: Preprocessor.py プロジェクト: jbjorne/TEES
 def __init__(self, steps, parseName="McCC", requireEntities=False):
     #if constParser == "None": constParser = None
     #if depParser == "None": depParser = None
     #assert constParser in ("BLLIP", "BLLIP-BIO", "STANFORD", None), constParser
     #assert depParser in ("STANFORD", "STANFORD-CONVERT", "SYNTAXNET", None), depParser
     #self.constParser = constParser
     #self.depParser = depParser
     self.requireEntities = requireEntities
     self.parseName = parseName
     ToolChain.__init__(self, steps)
     self.modelParameterStringName = "preprocessorParams"
コード例 #2
0
ファイル: Preprocessor.py プロジェクト: jbjorne/Tdevel
 def __init__(self):
     ToolChain.__init__(self)
     # Steps
     self.addStep("CONVERT", self.convert, {"dataSetNames":None, "corpusName":None} , "documents.xml")
     self.addStep("SPLIT-SENTENCES", Tools.GeniaSentenceSplitter.makeSentences, {"debug":False, "postProcess":True}, "sentences.xml")
     self.addStep("NER", Tools.BANNER.run, {"elementName":"entity", "processElement":"sentence", "debug":False, "splitNewlines":True}, "ner.xml")
     self.addStep("PARSE", Tools.CharniakJohnsonParser.parse, {"parseName":"McCC", "requireEntities":False, "debug":False}, "parse.xml")
     self.addStep("CONVERT-PARSE", Tools.StanfordParser.convertXML, {"parser":"McCC", "debug":False}, "converted-parse.xml")
     self.addStep("SPLIT-NAMES", ProteinNameSplitter.mainFunc, {"parseName":"McCC"}, "split-names.xml")
     self.addStep("FIND-HEADS", FindHeads.findHeads, {"parse":"McCC", "removeExisting":True}, "heads.xml")
     self.addStep("DIVIDE-SETS", self.divideSets, {"outputStem":None, "saveCombined":True})
コード例 #3
0
 def __init__(self, steps, parseName="McCC", requireEntities=False):
     #if constParser == "None": constParser = None
     #if depParser == "None": depParser = None
     #assert constParser in ("BLLIP", "BLLIP-BIO", "STANFORD", None), constParser
     #assert depParser in ("STANFORD", "STANFORD-CONVERT", "SYNTAXNET", None), depParser
     #self.constParser = constParser
     #self.depParser = depParser
     self.requireEntities = requireEntities
     self.parseName = parseName
     ToolChain.__init__(self, steps)
     self.modelParameterStringName = "preprocessorParams"
コード例 #4
0
ファイル: Preprocessor.py プロジェクト: ConesaLab/Padhoc
    def process(self,
                source,
                output,
                parameters=None,
                model=None,
                sourceDataSetNames=None,
                fromStep=None,
                toStep=None,
                omitSteps=None):
        if omitSteps != None and (
            (type(omitSteps) in types.StringTypes and omitSteps == "CONVERT")
                or "CONVERT" in omitSteps):
            raise Exception("Preprocessor step 'CONVERT' may not be omitted")

        # Initialize variables and save existing default values
        #self.intermediateFileTag = corpusName
        #parameters = self.getParameters(parameters, model)
        #parameters["CONVERT.dataSetNames"] = sourceDataSetNames
        #parameters["CONVERT.corpusName"] = corpusName
        #convertSetNames = self.stepArgs("CONVERT")["dataSetNames"]
        #convertCorpusName = self.stepArgs("CONVERT")["corpusName"]
        #self.stepArgs("CONVERT")["dataSetNames"] = sourceDataSetNames
        #self.stepArgs("CONVERT")["corpusName"] = corpusName
        # Run the tool chain
        xml = ToolChain.process(self, source, output, parameters, model,
                                fromStep, toStep, omitSteps)
        # Reset variables to saved default values
        #self.stepArgs("CONVERT")["dataSetNames"] = convertSetNames
        #self.stepArgs("CONVERT")["corpusName"] = convertCorpusName
        return xml
コード例 #5
0
 def process(self,
             source,
             output,
             parameters=None,
             model=None,
             sourceDataSetNames=None,
             fromStep=None,
             toStep=None,
             omitSteps=None):
     # Initialize variables and save existing default values
     #self.intermediateFileTag = corpusName
     #parameters = self.getParameters(parameters, model)
     #parameters["CONVERT.dataSetNames"] = sourceDataSetNames
     #parameters["CONVERT.corpusName"] = corpusName
     #convertSetNames = self.stepArgs("CONVERT")["dataSetNames"]
     #convertCorpusName = self.stepArgs("CONVERT")["corpusName"]
     #self.stepArgs("CONVERT")["dataSetNames"] = sourceDataSetNames
     #self.stepArgs("CONVERT")["corpusName"] = corpusName
     # Run the tool chain
     xml = ToolChain.process(self, source, output, parameters, model,
                             fromStep, toStep, omitSteps)
     # Reset variables to saved default values
     #self.stepArgs("CONVERT")["dataSetNames"] = convertSetNames
     #self.stepArgs("CONVERT")["corpusName"] = convertCorpusName
     return xml
コード例 #6
0
ファイル: Preprocessor.py プロジェクト: jbjorne/TEES
 def save(self, input, output=None):
     if "*" in output:
         if output.endswith("*.xml"):
             return self.divideSets(input, output.split("*")[0].rstrip("-"))
         else:
             exportPath, extension = output.split("*")
             extension = extension.strip(".")
             if not os.path.exists(exportPath):
                 os.makedirs(exportPath)
             self.export(input, exportPath, [extension]) 
     else:
         return ToolChain.save(self, input, output)
コード例 #7
0
 def save(self, input, output=None):
     if "*" in output:
         if output.endswith("*.xml"):
             return self.divideSets(input, output.split("*")[0].rstrip("-"))
         else:
             exportPath, extension = output.split("*")
             extension = extension.strip(".")
             if not os.path.exists(exportPath):
                 os.makedirs(exportPath)
             self.export(input, exportPath, [extension])
     else:
         return ToolChain.save(self, input, output)
コード例 #8
0
 def process(self,
             source,
             output=None,
             model=None,
             fromStep=None,
             toStep=None,
             omitSteps=None,
             logPath=None):
     if logPath == "AUTO":
         if output != None:
             logPath = output
             if "*" in logPath:
                 logPath = logPath.split("*")[0].rstrip("-")
             logPath = os.path.join(
                 logPath.rstrip("/").rstrip("\\") + "-log.txt")
         else:
             logPath = None
     elif logPath == "None":
         logPath = None
     if logPath != None:
         if not os.path.exists(os.path.dirname(logPath)):
             os.makedirs(os.path.dirname(logPath))
         Stream.openLog(logPath)
     print >> sys.stderr, "Preprocessor steps:", [
         x.name for x in self.steps
     ]
     if len(self.steps) == 0:
         raise Exception("No preprocessing steps defined")
     #if omitSteps != None and((type(omitSteps) in types.StringTypes and omitSteps == "CONVERT") or "CONVERT" in omitSteps):
     #    raise Exception("Preprocessor step 'CONVERT' may not be omitted")
     #if isinstance(source, basestring) and os.path.basename(source).isdigit(): # PMID
     #    print >> sys.stderr, "Preprocessing PubMed abstract", os.path.basename(source)
     #    source = Utils.Download.getPubMed(int(source))
     # Initialize variables and save existing default values
     #self.intermediateFileTag = corpusName
     #parameters = self.getParameters(parameters, model)
     #parameters["CONVERT.dataSetNames"] = sourceDataSetNames
     #parameters["CONVERT.corpusName"] = corpusName
     #convertSetNames = self.stepArgs("CONVERT")["dataSetNames"]
     #convertCorpusName = self.stepArgs("CONVERT")["corpusName"]
     #self.stepArgs("CONVERT")["dataSetNames"] = sourceDataSetNames
     #self.stepArgs("CONVERT")["corpusName"] = corpusName
     # Run the tool chain
     xml = ToolChain.process(self, source, output, model, fromStep, toStep,
                             omitSteps)
     # Reset variables to saved default values
     #self.stepArgs("CONVERT")["dataSetNames"] = convertSetNames
     #self.stepArgs("CONVERT")["corpusName"] = convertCorpusName
     if logPath != None:
         Stream.closeLog(logPath)
     return xml
コード例 #9
0
 def __init__(self):
     ToolChain.__init__(self)
     # Steps
     self.addStep("CONVERT", self.convert, {
         "dataSetNames": None,
         "corpusName": None
     }, "documents.xml")
     self.addStep("SPLIT-SENTENCES",
                  Tools.GeniaSentenceSplitter.makeSentences, {
                      "debug": False,
                      "postProcess": True
                  }, "sentences.xml")
     self.addStep(
         "NER", Tools.BANNER.run, {
             "elementName": "entity",
             "processElement": "sentence",
             "debug": False,
             "splitNewlines": True
         }, "ner.xml")
     self.addStep("PARSE", Tools.CharniakJohnsonParser.parse, {
         "parseName": "McCC",
         "requireEntities": False,
         "debug": False
     }, "parse.xml")
     self.addStep("CONVERT-PARSE", Tools.StanfordParser.convertXML, {
         "parser": "McCC",
         "debug": False
     }, "converted-parse.xml")
     self.addStep("SPLIT-NAMES", ProteinNameSplitter.mainFunc,
                  {"parseName": "McCC"}, "split-names.xml")
     self.addStep("FIND-HEADS", FindHeads.findHeads, {
         "parse": "McCC",
         "removeExisting": True
     }, "heads.xml")
     self.addStep("DIVIDE-SETS", self.divideSets, {
         "outputStem": None,
         "saveCombined": True
     })
コード例 #10
0
ファイル: Preprocessor.py プロジェクト: chengkun-wu/PWTEES
 def process(self, source, output, parameters=None, model=None, sourceDataSetNames=None, fromStep=None, toStep=None, omitSteps=None):
     # Initialize variables and save existing default values
     #self.intermediateFileTag = corpusName
     #parameters = self.getParameters(parameters, model)
     #parameters["CONVERT.dataSetNames"] = sourceDataSetNames
     #parameters["CONVERT.corpusName"] = corpusName
     #convertSetNames = self.stepArgs("CONVERT")["dataSetNames"]
     #convertCorpusName = self.stepArgs("CONVERT")["corpusName"]
     #self.stepArgs("CONVERT")["dataSetNames"] = sourceDataSetNames
     #self.stepArgs("CONVERT")["corpusName"] = corpusName
     # Run the tool chain
     xml = ToolChain.process(self, source, output, parameters, model, fromStep, toStep, omitSteps)
     # Reset variables to saved default values
     #self.stepArgs("CONVERT")["dataSetNames"] = convertSetNames
     #self.stepArgs("CONVERT")["corpusName"] = convertCorpusName
     return xml
コード例 #11
0
ファイル: Preprocessor.py プロジェクト: DUT-LiuYang/TEES
 def process(self, source, output, parameters=None, model=None, sourceDataSetNames=None, fromStep=None, toStep=None, omitSteps=None):
     if omitSteps != None and((type(omitSteps) in types.StringTypes and omitSteps == "CONVERT") or "CONVERT" in omitSteps):
         raise Exception("Preprocessor step 'CONVERT' may not be omitted")
         
     # Initialize variables and save existing default values
     #self.intermediateFileTag = corpusName
     #parameters = self.getParameters(parameters, model)
     #parameters["CONVERT.dataSetNames"] = sourceDataSetNames
     #parameters["CONVERT.corpusName"] = corpusName
     #convertSetNames = self.stepArgs("CONVERT")["dataSetNames"]
     #convertCorpusName = self.stepArgs("CONVERT")["corpusName"]
     #self.stepArgs("CONVERT")["dataSetNames"] = sourceDataSetNames
     #self.stepArgs("CONVERT")["corpusName"] = corpusName
     # Run the tool chain
     xml = ToolChain.process(self, source, output, parameters, model, fromStep, toStep, omitSteps)
     # Reset variables to saved default values
     #self.stepArgs("CONVERT")["dataSetNames"] = convertSetNames
     #self.stepArgs("CONVERT")["corpusName"] = convertCorpusName
     return xml
コード例 #12
0
ファイル: Preprocessor.py プロジェクト: jbjorne/TEES
 def process(self, source, output=None, model=None, fromStep=None, toStep=None, omitSteps=None, logPath=None):
     if logPath == "AUTO":
         if output != None:
             logPath = output
             if "*" in logPath:
                 logPath = logPath.split("*")[0].rstrip("-")
             logPath = os.path.join(logPath.rstrip("/").rstrip("\\") + "-log.txt")
         else:
             logPath = None
     elif logPath == "None":
         logPath = None
     if logPath != None:
         if not os.path.exists(os.path.dirname(logPath)):
             os.makedirs(os.path.dirname(logPath))
         Stream.openLog(logPath)
     print >> sys.stderr, "Preprocessor steps:", [x.name for x in self.steps]
     if len(self.steps) == 0:
         raise Exception("No preprocessing steps defined")
     #if omitSteps != None and((type(omitSteps) in types.StringTypes and omitSteps == "CONVERT") or "CONVERT" in omitSteps):
     #    raise Exception("Preprocessor step 'CONVERT' may not be omitted")
     #if isinstance(source, basestring) and os.path.basename(source).isdigit(): # PMID
     #    print >> sys.stderr, "Preprocessing PubMed abstract", os.path.basename(source)
     #    source = Utils.Download.getPubMed(int(source))   
     # Initialize variables and save existing default values
     #self.intermediateFileTag = corpusName
     #parameters = self.getParameters(parameters, model)
     #parameters["CONVERT.dataSetNames"] = sourceDataSetNames
     #parameters["CONVERT.corpusName"] = corpusName
     #convertSetNames = self.stepArgs("CONVERT")["dataSetNames"]
     #convertCorpusName = self.stepArgs("CONVERT")["corpusName"]
     #self.stepArgs("CONVERT")["dataSetNames"] = sourceDataSetNames
     #self.stepArgs("CONVERT")["corpusName"] = corpusName
     # Run the tool chain
     xml = ToolChain.process(self, source, output, model, fromStep, toStep, omitSteps)
     # Reset variables to saved default values
     #self.stepArgs("CONVERT")["dataSetNames"] = convertSetNames
     #self.stepArgs("CONVERT")["corpusName"] = convertCorpusName
     if logPath != None:
         Stream.closeLog(logPath)
     return xml
コード例 #13
0
ファイル: Preprocessor.py プロジェクト: ConesaLab/Padhoc
 def __init__(self):
     ToolChain.__init__(self)
     self.modelParameterStringName = "preprocessorParams"
コード例 #14
0
ファイル: Preprocessor.py プロジェクト: DUT-LiuYang/TEES
 def __init__(self):
     ToolChain.__init__(self)
     self.modelParameterStringName = "preprocessorParams"