コード例 #1
0
ファイル: EvaluateEPE.py プロジェクト: jbjorne/TEES
def run(inPath, outPath, subDirs, model, connection, numJobs, subTask=3, posTags=None, useTestSet=False, clear=True, debug=False, force=False, training=True, preprocessorSteps=None, subset=None):
    # Remove existing non-empty work directory, if requested to do so
    if os.path.exists(outPath) and len(os.listdir(outPath)) > 0 and clear:
        if force or ask("Output directory '" + outPath + "' exists, remove?"):
            print >> sys.stderr, "Output directory exists, removing", outPath
            shutil.rmtree(outPath)
    # Create work directory if needed
    if not os.path.exists(outPath):
        print >> sys.stderr, "Making output directory", outPath
        os.makedirs(outPath)
    
    # Begin logging
    logPath = beginLog(outPath)
    
    # Collect the parse files
    parseDir = os.path.join(outPath, "parses")
    if not os.path.exists(parseDir) or len(os.listdir(parseDir)) == 0:
        parseDir = combineParses(inPath, parseDir, subDirs)
    else:
        print >> sys.stderr, "Using collected parses from", parseDir
    
    # Import the parses
    corpusDir = os.path.join(outPath, "corpus")
    if not os.path.exists(corpusDir):
        if preprocessorSteps == None:
            preprocessorSteps = ["MERGE_SETS", "REMOVE_ANALYSES", "REMOVE_HEADS", "MERGE_SENTENCES", "IMPORT_PARSE", "SPLIT_NAMES", "FIND_HEADS", "DIVIDE_SETS"]
        preprocessor = Preprocessor(preprocessorSteps)
        #preprocessor = Preprocessor(["MERGE-SETS", "REMOVE-ANALYSES", "REMOVE-HEADS", "MERGE-SENTENCES", "IMPORT-PARSE", "VALIDATE", "DIVIDE-SETS"])
        preprocessor.setArgForAllSteps("debug", debug)
        preprocessor.getStep("IMPORT_PARSE").setArg("parseDir", parseDir)
        preprocessor.getStep("IMPORT_PARSE").setArg("posTags", posTags)
        modelPattern = model + ".+\.xml" if useTestSet else model + "-devel\.xml|" + model + "-train\.xml"
        preprocessor.process(modelPattern, os.path.join(corpusDir, model), logPath=None)
    else:
        print >> sys.stderr, "Using imported parses from", corpusDir
    
    # Train the model
    if training:
        connection = connection.replace("$JOBS", str(numJobs))
        if subTask > 0:
            model = model + "." + str(subTask)
        train(outPath, model, parse="McCC", debug=debug, connection=connection, corpusDir=corpusDir, subset=subset, log=None) #classifierParams={"examples":None, "trigger":"150000", "recall":None, "edge":"7500", "unmerging":"2500", "modifiers":"10000"})
        
    # Close the log
    endLog(logPath)
コード例 #2
0
ファイル: EvaluateEPE.py プロジェクト: Mu-Y/BioNLPST
def run(inPath,
        outPath,
        subDirs,
        model,
        connection,
        numJobs,
        subTask=3,
        posTags=None,
        useTestSet=False,
        clear=True,
        debug=False,
        force=False,
        training=True,
        preprocessorSteps=None,
        subset=None):
    # Remove existing non-empty work directory, if requested to do so
    if os.path.exists(outPath) and len(os.listdir(outPath)) > 0 and clear:
        if force or ask("Output directory '" + outPath + "' exists, remove?"):
            print >> sys.stderr, "Output directory exists, removing", outPath
            shutil.rmtree(outPath)
    # Create work directory if needed
    if not os.path.exists(outPath):
        print >> sys.stderr, "Making output directory", outPath
        os.makedirs(outPath)

    # Begin logging
    logPath = beginLog(outPath)

    # Collect the parse files
    parseDir = os.path.join(outPath, "parses")
    if not os.path.exists(parseDir) or len(os.listdir(parseDir)) == 0:
        parseDir = combineParses(inPath, parseDir, subDirs)
    else:
        print >> sys.stderr, "Using collected parses from", parseDir

    # Import the parses
    corpusDir = os.path.join(outPath, "corpus")
    if not os.path.exists(corpusDir):
        if preprocessorSteps == None:
            preprocessorSteps = [
                "MERGE_SETS", "REMOVE_ANALYSES", "REMOVE_HEADS",
                "MERGE_SENTENCES", "IMPORT_PARSE", "SPLIT_NAMES", "FIND_HEADS",
                "DIVIDE_SETS"
            ]
        preprocessor = Preprocessor(preprocessorSteps)
        #preprocessor = Preprocessor(["MERGE-SETS", "REMOVE-ANALYSES", "REMOVE-HEADS", "MERGE-SENTENCES", "IMPORT-PARSE", "VALIDATE", "DIVIDE-SETS"])
        preprocessor.setArgForAllSteps("debug", debug)
        preprocessor.getStep("IMPORT_PARSE").setArg("parseDir", parseDir)
        preprocessor.getStep("IMPORT_PARSE").setArg("posTags", posTags)
        modelPattern = model + ".+\.xml" if useTestSet else model + "-devel\.xml|" + model + "-train\.xml"
        preprocessor.process(modelPattern,
                             os.path.join(corpusDir, model),
                             logPath=None)
    else:
        print >> sys.stderr, "Using imported parses from", corpusDir

    # Train the model
    if training:
        connection = connection.replace("$JOBS", str(numJobs))
        if subTask > 0:
            model = model + "." + str(subTask)
        train(
            outPath,
            model,
            parse="McCC",
            debug=debug,
            connection=connection,
            corpusDir=corpusDir,
            subset=subset,
            log=None
        )  #classifierParams={"examples":None, "trigger":"150000", "recall":None, "edge":"7500", "unmerging":"2500", "modifiers":"10000"})

    # Close the log
    endLog(logPath)
コード例 #3
0
    (options, args) = optparser.parse_args()

    #     if options.steps != None:
    #         options.steps = [x.strip() for x in options.steps.split(",")]
    #     if options.omitSteps != None:
    #         options.omitSteps = options.omitSteps.split(",")
    #
    preprocessor = Preprocessor(options.steps, options.parseName,
                                options.requireEntities)
    if options.steps == None:
        print >> sys.stderr, preprocessor.getHelpString()
    else:
        preprocessor.setArgForAllSteps("debug", options.debug)
        if preprocessor.hasStep("CONVERT"):
            if options.corpus != None:
                preprocessor.getStep("CONVERT").setArg("corpusName",
                                                       options.corpus)
            if options.dataSetNames != None:
                preprocessor.getStep("CONVERT").setArg("dataSetNames",
                                                       options.dataSetNames)
        if options.parseDir:
            preprocessor.getStep("IMPORT_PARSE").setArg(
                "parseDir", options.parseDir)
        if options.exportFormats and preprocessor.hasStep("EXPORT"):
            preprocessor.getStep("EXPORT").setArg(
                "formats", options.exportFormats.split(","))
        if options.importFormats:
            if preprocessor.hasStep("LOAD"):
                preprocessor.getStep("LOAD").setArg(
                    "extensions", options.importFormats.split(","))
            if preprocessor.hasStep("IMPORT_PARSE"):
                preprocessor.getStep("IMPORT_PARSE").setArg(
コード例 #4
0
ファイル: preprocess.py プロジェクト: jbjorne/TEES
    debug.add_option("--debug", default=False, action="store_true", dest="debug", help="Set debug mode for all steps")
    optparser.add_option_group(debug)
    (options, args) = optparser.parse_args()
    
#     if options.steps != None:
#         options.steps = [x.strip() for x in options.steps.split(",")]
#     if options.omitSteps != None:
#         options.omitSteps = options.omitSteps.split(",")
#         
    preprocessor = Preprocessor(options.steps, options.parseName, options.requireEntities)
    if options.steps == None:
        print >> sys.stderr, preprocessor.getHelpString()
    else:
        preprocessor.setArgForAllSteps("debug", options.debug)
        if preprocessor.hasStep("CONVERT"):
            if options.corpus != None:
                preprocessor.getStep("CONVERT").setArg("corpusName", options.corpus)
            if options.dataSetNames != None:
                preprocessor.getStep("CONVERT").setArg("dataSetNames", options.dataSetNames)
        if options.parseDir:
            preprocessor.getStep("IMPORT_PARSE").setArg("parseDir", options.parseDir)
        if options.exportFormats and preprocessor.hasStep("EXPORT"):
            preprocessor.getStep("EXPORT").setArg("formats", options.exportFormats.split(","))
        if options.importFormats:
            if preprocessor.hasStep("LOAD"):
                preprocessor.getStep("LOAD").setArg("extensions", options.importFormats.split(","))
            if preprocessor.hasStep("IMPORT_PARSE"):
                preprocessor.getStep("IMPORT_PARSE").setArg("extensions", options.importFormats.split(","))
        #if options.intermediateFiles:
        #    preprocessor.setIntermediateFiles(True)
        preprocessor.process(options.input, options.output, model=None, logPath=options.logPath)