def getSteps(step, omitSteps, mainSteps): # Determine substep to start from, for the main step from which processing starts step = Parameters.get(step, mainSteps) fromMainStep = None fromSubStep = {} # The substep to start from, for the main step to start from for mainStep in step.keys(): fromSubStep[mainStep] = step[mainStep] # the sub step to start from if step[mainStep] != None: assert fromMainStep == None # processing can start from one place only fromMainStep = mainStep if step[mainStep] == True: fromSubStep[mainStep] = None else: assert type(step[mainStep]) in types.StringTypes # no list allowed, processing can start from one place only # Determine steps to omit omitSubSteps = {} # Skip these substeps. If the value is True, skip the entire main step. omitMainSteps = [] omitSteps = Parameters.get(omitSteps, mainSteps) for mainStep in omitSteps.keys(): omitSubSteps[mainStep] = omitSteps[mainStep] if omitSteps[mainStep] == True: omitMainSteps.append(mainStep) omitSubSteps[mainStep] = None # Initialize main step selector if fromMainStep != None: if fromSubStep[fromMainStep] != None: print >> sys.stderr, "Starting process from step", fromMainStep + ", substep", fromSubStep[fromMainStep] else: print >> sys.stderr, "Starting process from step", fromMainStep selector = StepSelector(mainSteps, fromStep=fromMainStep, omitSteps=omitMainSteps) return selector, fromSubStep, omitSubSteps
optparser.add_option("-c", "--corpus", default="PMC11", dest="corpus", help="corpus name for preprocessing") optparser.add_option("-o", "--output", default=None, dest="output", help="output directory") optparser.add_option("-w", "--workdir", default=None, dest="workdir", help="work directory") optparser.add_option("-m", "--model", default=None, dest="model", help="model file or directory") optparser.add_option("-p", "--parse", default="split-McClosky", dest="parse", help="Parse XML element name") optparser.add_option("--eventTag", default="GE", dest="eventTag", help="") optparser.add_option("--step", default=None, dest="step", help="") optparser.add_option("--detectorStep", default=None, dest="detectorStep", help="") optparser.add_option("--omitPreprocessorSteps", default=None, dest="omitPreprocessorSteps", help="") optparser.add_option("--csc", default="", dest="csc", help="") optparser.add_option("--noLog", default=False, action="store_true", dest="noLog", help="") optparser.add_option("--clearAll", default=False, action="store_true", dest="clearAll", help="Delete all files") optparser.add_option("--debug", default=False, action="store_true", dest="debug", help="") (options, args) = optparser.parse_args() selector = StepSelector(["PREPROCESS", "EVENTS"], fromStep=options.step) # Get the input stem, which will be used for naming the output files options.input = options.input.rstrip("/") if options.output == None: INPUT_TAG = options.input else: if not os.path.exists(options.output): os.makedirs(options.output) INPUT_TAG = os.path.join(options.output, options.input.rsplit("/", 1)[-1]) #if os.path.isfile(options.input): # if INPUT_TAG.endswith(".tar.gz"): # INPUT_TAG = INPUT_TAG[:-len(".tar.gz")] open(INPUT_TAG+"-STARTED", "w").close() # Mark process status # Start logging
optparser.add_option("--clearAll", default=False, action="store_true", dest="clearAll", help="Delete all files") optparser.add_option("--debug", default=False, action="store_true", dest="debug", help="More verbose output") optparser.add_option("-u", "--unmerging", default=False, action="store_true", dest="unmerging", help="SVM unmerging") optparser.add_option("-m", "--modifiers", default=False, action="store_true", dest="modifiers", help="Train model for modifier detection") # Task 3 optparser.add_option("--speculationModel", default=os.path.expanduser("~/biotext/BioNLP2011/tests/task3/task3TrainGE-EPI-ID/speculation-models/model-c_150000"), dest="speculationModel", help="SVM-multiclass speculation model") optparser.add_option("--negationModel", default=os.path.expanduser("~/biotext/BioNLP2011/tests/task3/task3TrainGE-EPI-ID/negation-models/model-c_16000"), dest="negationModel", help="SVM-multiclass negation model") optparser.add_option("--task3Ids", default=os.path.expanduser("~/biotext/BioNLP2011/tests/task3/task3TrainGE-EPI-ID/genia-task3-ids"), dest="task3Ids", help="Speculation & negation SVM example class and feature id file stem (files = STEM.class_names and STEM.feature_names)") (options, args) = optparser.parse_args() step = options.step detectorStep = {"TRAIN":None, "DEVEL":None, "EMPTY":None, "TEST":None} # TRAIN substep if options.step != None and "." in options.step: step = options.step.split(".")[0] detectorStep[step] = options.step.split(".")[1] selector = StepSelector(["TRAIN", "DEVEL", "EMPTY", "TEST"], fromStep=step) # Check options if options.classify: print "Classifying with existing models" options.mode = "POST-GRID" assert options.output != None assert options.task in ["OLD.1", "OLD.2", "CO", "REL", "GE", "GE.1", "GE.2", "EPI", "ID", "BB"] fullTaskId = options.task subTask = 2 if "." in options.task: options.task, subTask = options.task.split(".") subTask = int(subTask) #dataPath = "/home/jari/biotext/BioNLP2011/data/main-tasks/" if options.task == "REL": dataPath = os.path.expanduser("~/biotext/BioNLP2011/data/supporting-tasks/REL/")
optparser.add_option("--noLog", default=False, action="store_true", dest="noLog", help="") optparser.add_option("--noTestSet", default=False, action="store_true", dest="noTestSet", help="") optparser.add_option("--clearAll", default=False, action="store_true", dest="clearAll", help="Delete all files") optparser.add_option("--debug", default=False, action="store_true", dest="debug", help="More verbose output") (options, args) = optparser.parse_args() # Validate options assert options.output != None assert options.task in ["GE00", "GE09.1", "GE09.2", "GE", "GE.1", "GE.2", "EPI", "ID", "BB", "BI", "CO", "REL", "REN"] step = options.step detectorStep = {"TRAIN":None, "DEVEL":None, "EMPTY":None, "TEST":None} # TRAIN substep if options.step != None and "." in options.step: step = options.step.split(".")[0] detectorStep[step] = options.step.split(".")[1] selector = StepSelector(["TRAIN", "DEVEL", "EMPTY", "TEST"], fromStep=step) fullTaskId = options.task subTask = 2 if "." in options.task: options.task, subTask = options.task.split(".") subTask = int(subTask) if options.task != None: dataPath = os.path.expanduser("~/biotext/BioNLP2011/data/main-tasks/") trainFile = dataPath + options.task + "/" + options.task + "-train" + options.extraTag + ".xml" develFile = dataPath + options.task + "/" + options.task + "-devel" + options.extraTag + ".xml" testFile = dataPath + options.task + "/" + options.task + "-test" + options.extraTag + ".xml" # test set never uses extratag # Optional overrides for input files if options.trainFile != None: trainFile = options.trainFile if options.develFile != None: develFile = options.develFile if options.testFile != None: testFile = options.testFile
help= "Speculation & negation SVM example class and feature id file stem (files = STEM.class_names and STEM.feature_names)" ) (options, args) = optparser.parse_args() step = options.step detectorStep = { "TRAIN": None, "DEVEL": None, "EMPTY": None, "TEST": None } # TRAIN substep if options.step != None and "." in options.step: step = options.step.split(".")[0] detectorStep[step] = options.step.split(".")[1] selector = StepSelector(["TRAIN", "DEVEL", "EMPTY", "TEST"], fromStep=step) # Check options if options.classify: print "Classifying with existing models" options.mode = "POST-GRID" assert options.output != None assert options.task in [ "OLD.1", "OLD.2", "CO", "REL", "GE", "GE.1", "GE.2", "EPI", "ID", "BB" ] fullTaskId = options.task subTask = 2 if "." in options.task: options.task, subTask = options.task.split(".") subTask = int(subTask) #dataPath = "/home/jari/biotext/BioNLP2011/data/main-tasks/"