Beispiel #1
0
WORKDIR = options.workdir
if WORKDIR == None:
    WORKDIR = tempfile.mkdtemp()
workdir(WORKDIR, options.clearAll) # Select a working directory, optionally remove existing files
if not options.noLog:
    log(options.clearAll, True, INPUT_TAG + "-" + options.eventTag + ".log") # Start logging into a file in working directory
print >> sys.stderr, "Work directory at", WORKDIR

eventDetectionInput = None
preprocessor = Preprocessor()
preprocessor.debug = options.debug
preprocessor.source = options.input # This has to be defined already here, needs to be fixed later
preprocessor.compressIntermediateFiles = True # save space
preprocessor.intermediateFilesAtSource = True # create output at source file location
preprocessor.requireEntitiesForParsing = True # parse only sentences which contain BANNER entities
if selector.check("PREPROCESS"):
    if os.path.exists(preprocessor.getOutputPath("FIND-HEADS")):
        print >> sys.stderr, "Preprocessor output", preprocessor.getOutputPath("FIND-HEADS"), "exists, skipping preprocessing."
        eventDetectionInput = preprocessor.getOutputPath("FIND-HEADS")
    else:
        print >> sys.stderr, "Preprocessor output", preprocessor.getOutputPath("FIND-HEADS"), "does not exist"
        print >> sys.stderr, "------------ Preprocessing ------------"
        # Remove some of the unnecessary intermediate files
        preprocessor.setIntermediateFile("CONVERT", None)
        preprocessor.setIntermediateFile("SPLIT-SENTENCES", None)
        preprocessor.setIntermediateFile("PARSE", None)
        preprocessor.setIntermediateFile("CONVERT-PARSE", None)
        preprocessor.setIntermediateFile("SPLIT-NAMES", None)
        # Process input into interaction XML
        omitPreprocessorSteps=["DIVIDE-SETS"]
        if options.omitPreprocessorSteps != None:
Beispiel #2
0
     shutil.copytree(options.copyFrom, WORKDIR)
     workdir(WORKDIR, False)
 else:
     workdir(WORKDIR, options.clearAll) # Select a working directory, optionally remove existing files
 if not options.noLog:
     Stream.openLog("log.txt")
     #log() # Start logging into a file in working directory
 
 print >> sys.stderr, "Importing detector", options.detector
 Detector = eval("from " + options.detector + " import " + options.detector.split(".")[-1])
 detector = Detector()
 detector.debug = options.debug
 detector.stWriteScores = True # write confidence scores into additional st-format files
 detector.setConnection(getConnection(options.connection)).debug = options.debug
 # Pre-calculate all the required SVM models
 if selector.check("TRAIN"):
     print >> sys.stderr, "----------------------------------------------------"
     print >> sys.stderr, "------------------ Train Detector ------------------"
     print >> sys.stderr, "----------------------------------------------------"
     if options.singleStage:
         detector.train(trainFile, develFile, options.develModel, options.testModel,
                        options.exampleStyle, options.exampleParams, options.parse, None, fullTaskId,
                        fromStep=detectorStep["TRAIN"], workDir="training")
     else:
         detector.train(trainFile, develFile, options.develModel, options.testModel,
                        TRIGGER_FEATURE_PARAMS, EDGE_FEATURE_PARAMS, "", options.modifierStyle,
                        options.triggerParams, options.edgeParams, options.uParams, options.modifierParams,
                        options.recallAdjustParams, options.unmerging, options.modifiers, 
                        options.fullGrid, fullTaskId, options.parse, None,
                        fromStep=detectorStep["TRAIN"], workDir="training")
 if selector.check("DEVEL"):
Beispiel #3
0
#newTrainFile = makeSubset(TRAIN_FILE, options.task + "-train-nodup" + options.extraTag + downSampleTag + ".xml", options.downSampleTrain, options.downSampleSeed)
#makeSubset(TRAIN_FILE.replace("-nodup", ""), options.task + "-train" + options.extraTag + downSampleTag + ".xml", options.downSampleTrain, options.downSampleSeed)
#TRAIN_FILE = newTrainFile

if subTask != None:
    print >> sys.stderr, "Task:", options.task + "." + str(subTask)
else:
    print >> sys.stderr, "Task:", options.task

eventDetector = EventDetector()
eventDetector.debug = options.debug
eventDetector.stWriteScores = True # write confidence scores into additional st-format files
#eventDetector.setCSCConnection(options.csc, os.path.join("CSCConnection",WORKDIR.lstrip("/")))
eventDetector.setConnection(getConnection(options.connection)).debug = options.debug
# Pre-calculate all the required SVM models
if selector.check("TRAIN"):
    print >> sys.stderr, "----------------------------------------------------"
    print >> sys.stderr, "--------------- Train Event Detector ---------------"
    print >> sys.stderr, "----------------------------------------------------"
    eventDetector.train(TRAIN_FILE, TEST_FILE, options.develModel, options.testModel,
                        TRIGGER_FEATURE_PARAMS, EDGE_FEATURE_PARAMS, "", options.modifierStyle,
                        options.triggerParams, options.edgeParams, 
                        options.uParams, options.modifierParams,
                        options.recallAdjustParams, options.unmerging, options.modifiers, 
                        options.fullGrid, fullTaskId,
                        options.parse, options.tokenization,
                        fromStep=detectorStep["TRAIN"],
                        workDir="training")
if selector.check("DEVEL"):
    print >> sys.stderr, "----------------------------------------------------"
    print >> sys.stderr, "------------ Check devel classification ------------"
Beispiel #4
0
#makeSubset(TRAIN_FILE.replace("-nodup", ""), options.task + "-train" + options.extraTag + downSampleTag + ".xml", options.downSampleTrain, options.downSampleSeed)
#TRAIN_FILE = newTrainFile

if subTask != None:
    print >> sys.stderr, "Task:", options.task + "." + str(subTask)
else:
    print >> sys.stderr, "Task:", options.task

eventDetector = EventDetector()
eventDetector.debug = options.debug
eventDetector.stWriteScores = True  # write confidence scores into additional st-format files
#eventDetector.setCSCConnection(options.csc, os.path.join("CSCConnection",WORKDIR.lstrip("/")))
eventDetector.setConnection(getConnection(
    options.connection)).debug = options.debug
# Pre-calculate all the required SVM models
if selector.check("TRAIN"):
    print >> sys.stderr, "----------------------------------------------------"
    print >> sys.stderr, "--------------- Train Event Detector ---------------"
    print >> sys.stderr, "----------------------------------------------------"
    eventDetector.train(TRAIN_FILE,
                        TEST_FILE,
                        options.develModel,
                        options.testModel,
                        TRIGGER_FEATURE_PARAMS,
                        EDGE_FEATURE_PARAMS,
                        "",
                        options.modifierStyle,
                        options.triggerParams,
                        options.edgeParams,
                        options.uParams,
                        options.modifierParams,