WORKDIR = options.workdir if WORKDIR == None: WORKDIR = tempfile.mkdtemp() workdir(WORKDIR, options.clearAll) # Select a working directory, optionally remove existing files if not options.noLog: log(options.clearAll, True, INPUT_TAG + "-" + options.eventTag + ".log") # Start logging into a file in working directory print >> sys.stderr, "Work directory at", WORKDIR eventDetectionInput = None preprocessor = Preprocessor() preprocessor.debug = options.debug preprocessor.source = options.input # This has to be defined already here, needs to be fixed later preprocessor.compressIntermediateFiles = True # save space preprocessor.intermediateFilesAtSource = True # create output at source file location preprocessor.requireEntitiesForParsing = True # parse only sentences which contain BANNER entities if selector.check("PREPROCESS"): if os.path.exists(preprocessor.getOutputPath("FIND-HEADS")): print >> sys.stderr, "Preprocessor output", preprocessor.getOutputPath("FIND-HEADS"), "exists, skipping preprocessing." eventDetectionInput = preprocessor.getOutputPath("FIND-HEADS") else: print >> sys.stderr, "Preprocessor output", preprocessor.getOutputPath("FIND-HEADS"), "does not exist" print >> sys.stderr, "------------ Preprocessing ------------" # Remove some of the unnecessary intermediate files preprocessor.setIntermediateFile("CONVERT", None) preprocessor.setIntermediateFile("SPLIT-SENTENCES", None) preprocessor.setIntermediateFile("PARSE", None) preprocessor.setIntermediateFile("CONVERT-PARSE", None) preprocessor.setIntermediateFile("SPLIT-NAMES", None) # Process input into interaction XML omitPreprocessorSteps=["DIVIDE-SETS"] if options.omitPreprocessorSteps != None:
shutil.copytree(options.copyFrom, WORKDIR) workdir(WORKDIR, False) else: workdir(WORKDIR, options.clearAll) # Select a working directory, optionally remove existing files if not options.noLog: Stream.openLog("log.txt") #log() # Start logging into a file in working directory print >> sys.stderr, "Importing detector", options.detector Detector = eval("from " + options.detector + " import " + options.detector.split(".")[-1]) detector = Detector() detector.debug = options.debug detector.stWriteScores = True # write confidence scores into additional st-format files detector.setConnection(getConnection(options.connection)).debug = options.debug # Pre-calculate all the required SVM models if selector.check("TRAIN"): print >> sys.stderr, "----------------------------------------------------" print >> sys.stderr, "------------------ Train Detector ------------------" print >> sys.stderr, "----------------------------------------------------" if options.singleStage: detector.train(trainFile, develFile, options.develModel, options.testModel, options.exampleStyle, options.exampleParams, options.parse, None, fullTaskId, fromStep=detectorStep["TRAIN"], workDir="training") else: detector.train(trainFile, develFile, options.develModel, options.testModel, TRIGGER_FEATURE_PARAMS, EDGE_FEATURE_PARAMS, "", options.modifierStyle, options.triggerParams, options.edgeParams, options.uParams, options.modifierParams, options.recallAdjustParams, options.unmerging, options.modifiers, options.fullGrid, fullTaskId, options.parse, None, fromStep=detectorStep["TRAIN"], workDir="training") if selector.check("DEVEL"):
#newTrainFile = makeSubset(TRAIN_FILE, options.task + "-train-nodup" + options.extraTag + downSampleTag + ".xml", options.downSampleTrain, options.downSampleSeed) #makeSubset(TRAIN_FILE.replace("-nodup", ""), options.task + "-train" + options.extraTag + downSampleTag + ".xml", options.downSampleTrain, options.downSampleSeed) #TRAIN_FILE = newTrainFile if subTask != None: print >> sys.stderr, "Task:", options.task + "." + str(subTask) else: print >> sys.stderr, "Task:", options.task eventDetector = EventDetector() eventDetector.debug = options.debug eventDetector.stWriteScores = True # write confidence scores into additional st-format files #eventDetector.setCSCConnection(options.csc, os.path.join("CSCConnection",WORKDIR.lstrip("/"))) eventDetector.setConnection(getConnection(options.connection)).debug = options.debug # Pre-calculate all the required SVM models if selector.check("TRAIN"): print >> sys.stderr, "----------------------------------------------------" print >> sys.stderr, "--------------- Train Event Detector ---------------" print >> sys.stderr, "----------------------------------------------------" eventDetector.train(TRAIN_FILE, TEST_FILE, options.develModel, options.testModel, TRIGGER_FEATURE_PARAMS, EDGE_FEATURE_PARAMS, "", options.modifierStyle, options.triggerParams, options.edgeParams, options.uParams, options.modifierParams, options.recallAdjustParams, options.unmerging, options.modifiers, options.fullGrid, fullTaskId, options.parse, options.tokenization, fromStep=detectorStep["TRAIN"], workDir="training") if selector.check("DEVEL"): print >> sys.stderr, "----------------------------------------------------" print >> sys.stderr, "------------ Check devel classification ------------"
#makeSubset(TRAIN_FILE.replace("-nodup", ""), options.task + "-train" + options.extraTag + downSampleTag + ".xml", options.downSampleTrain, options.downSampleSeed) #TRAIN_FILE = newTrainFile if subTask != None: print >> sys.stderr, "Task:", options.task + "." + str(subTask) else: print >> sys.stderr, "Task:", options.task eventDetector = EventDetector() eventDetector.debug = options.debug eventDetector.stWriteScores = True # write confidence scores into additional st-format files #eventDetector.setCSCConnection(options.csc, os.path.join("CSCConnection",WORKDIR.lstrip("/"))) eventDetector.setConnection(getConnection( options.connection)).debug = options.debug # Pre-calculate all the required SVM models if selector.check("TRAIN"): print >> sys.stderr, "----------------------------------------------------" print >> sys.stderr, "--------------- Train Event Detector ---------------" print >> sys.stderr, "----------------------------------------------------" eventDetector.train(TRAIN_FILE, TEST_FILE, options.develModel, options.testModel, TRIGGER_FEATURE_PARAMS, EDGE_FEATURE_PARAMS, "", options.modifierStyle, options.triggerParams, options.edgeParams, options.uParams, options.modifierParams,