# INPUT_TAG = INPUT_TAG[:-len(".tar.gz")] open(INPUT_TAG+"-STARTED", "w").close() # Mark process status # Start logging WORKDIR = options.workdir if WORKDIR == None: WORKDIR = tempfile.mkdtemp() workdir(WORKDIR, options.clearAll) # Select a working directory, optionally remove existing files if not options.noLog: log(options.clearAll, True, INPUT_TAG + "-" + options.eventTag + ".log") # Start logging into a file in working directory print >> sys.stderr, "Work directory at", WORKDIR eventDetectionInput = None preprocessor = Preprocessor() preprocessor.debug = options.debug preprocessor.source = options.input # This has to be defined already here, needs to be fixed later preprocessor.compressIntermediateFiles = True # save space preprocessor.intermediateFilesAtSource = True # create output at source file location preprocessor.requireEntitiesForParsing = True # parse only sentences which contain BANNER entities if selector.check("PREPROCESS"): if os.path.exists(preprocessor.getOutputPath("FIND-HEADS")): print >> sys.stderr, "Preprocessor output", preprocessor.getOutputPath("FIND-HEADS"), "exists, skipping preprocessing." eventDetectionInput = preprocessor.getOutputPath("FIND-HEADS") else: print >> sys.stderr, "Preprocessor output", preprocessor.getOutputPath("FIND-HEADS"), "does not exist" print >> sys.stderr, "------------ Preprocessing ------------" # Remove some of the unnecessary intermediate files preprocessor.setIntermediateFile("CONVERT", None) preprocessor.setIntermediateFile("SPLIT-SENTENCES", None) preprocessor.setIntermediateFile("PARSE", None) preprocessor.setIntermediateFile("CONVERT-PARSE", None)