def test_create_AP_labels(self): model = Model(set(), collections.Counter(), dict(), set()) complex_parser = Parser("rate_complex") complex_1 = complex_parser.parse("K(S{i},T{a}).B{o}::cyt").data.children[0] complex_2 = complex_parser.parse("K(S{a},T{a}).B{o}::cyt").data.children[0] complex_3 = complex_parser.parse("K(S{a},T{i}).B{o}::cyt").data.children[0] complex_abstract = complex_parser.parse("K(S{a}).B{_}::cyt").data.children[0] ordering = (complex_1, complex_2, complex_3) APs = [Core.Formula.AtomicProposition(complex_abstract, " >= ", "3"), Core.Formula.AtomicProposition(complex_1, " < ", 2)] s1 = State(np.array((1, 2, 2))) s2 = State(np.array((5, 1, 1))) s3 = State(np.array((2, 4, 3))) s4 = State(np.array((1, 4, 3))) states_encoding = {s1: 1, s2: 2, s3: 3, s4: 4} result_AP_lables = {APs[0]: 'property_0', APs[1]: 'property_1'} result_state_labels = {1: {'property_0', 'property_1'}, 3: {'property_0', 'init'}, 4: {'property_0', 'property_1'}} ts = TS.TransitionSystem.TransitionSystem(ordering) ts.states_encoding = states_encoding ts.init = 3 state_labels, AP_lables = model.create_AP_labels(APs, ts, 0) self.assertEqual(state_labels, result_state_labels) self.assertEqual(AP_lables, result_AP_lables)
def getDetector(detector, model=None, evaluator=None): # Get the detector if detector == None: assert model != None model = Model(model, "r") detector = model.getStr("detector") model.close() return importClass(detector, "detector")
def test_create_complex_labels(self): model = Model(set(), collections.Counter(), dict(), set()) complex_parser = Parser("rate_complex") complex_1 = complex_parser.parse("K(S{i},T{a}).B{o}::cyt").data.children[0] complex_2 = complex_parser.parse("K(S{a},T{a}).B{o}::cyt").data.children[0] complex_3 = complex_parser.parse("K(S{a},T{i}).B{o}::cyt").data.children[0] complex_abstract = complex_parser.parse("K(S{a}).B{_}::cyt").data.children[0] ordering = (complex_1, complex_2, complex_3) complexes = [complex_2, complex_abstract, complex_1] result_labels = {complex_2: "VAR_1",complex_abstract: "ABSTRACT_VAR_12", complex_1: "VAR_0"} result_formulas = ['ABSTRACT_VAR_12 = VAR_1+VAR_2; // K(S{a}).B{_}::cyt'] labels, prism_formulas = model.create_complex_labels(complexes, ordering) self.assertEqual(labels, result_labels) self.assertEqual(prism_formulas, result_formulas)
def GetTrainingData(self, inputDim, dataPercent): if len(self.allDataList) == 0: m = Model() pares = m.GetAllPar() totalData = m.GetTotalData() partTotalDAta = totalData - int(totalData * (1 - dataPercent)) allData = [] for p in pares: allData = allData + m.GetDataLimited(inputDim, p[0], partTotalDAta) print(p[0] + " " + str(len(allData))) self.allDataList = allData random.shuffle(self.allDataList) traingData = self.allDataList[0:int(len(self.allDataList) * 0.7)] validationData = self.allDataList[(int(len(self.allDataList) * 0.7)) + 1:len(self.allDataList)] return traingData, validationData
def getDetector(detector, model=None): # Get the detector if detector == None: assert model != None model = Model(model, "r") detector = model.getStr("detector") model.close() if type(detector) in types.StringTypes: print >> sys.stderr, "Importing detector", detector detectorName = detector if detector.startswith("from"): exec detector detector = eval(detector.split(".")[-1]) else: exec "from " + detector + " import " + detector.split(".")[-1] detector = eval(detector.split(".")[-1]) else: # assume it is a class detectorName = detector.__name__ print >> sys.stderr, "Using detector", detectorName detector = detector return detector, detectorName
def analyseData(self): workspace = self.pre_form.value() mdl = Model(workspace['Algorithm'], workspace['Parameters']) if workspace['Learning Type'] == 'Clustering': mdl.fitData(workspace['Data'].post_data) pred = mdl.predictData(workspace['Data'].post_data) else: mdl.fitData(workspace['Data'].post_data['Training'].data, workspace['Data'].post_data['Training'].labels) pred = mdl.predictData(workspace['Data'].post_data['Testing'].data) workspace['Model'] = mdl workspace['Predicted'] = pred self.post_options.close() del (self.post_options) self.post_options = PostAnalysisOptions(self, workspace) self.rightScroll.setWidget(self.post_options) self.built = True
def openModel(self, model, mode="r"): if type(model) in types.StringTypes: model = Model(model, mode) self.modelsToClose.append(model) return model
def setUp(self): # agents self.s1 = StructureAgent("X", set()) self.s2 = StructureAgent("Y", set()) self.s3 = StructureAgent("Z", set()) self.c1 = Complex([self.s1], "rep") self.c2 = Complex([self.s2], "rep") self.c3 = Complex([self.s3], "rep") # rules sequence_1 = (self.s1,) mid_1 = 1 compartments_1 = ["rep"] complexes_1 = [(0, 0)] pairs_1 = [(0, None)] rate_1 = Rate("k1*[X()::rep]") self.r1 = Rule(sequence_1, mid_1, compartments_1, complexes_1, pairs_1, rate_1) sequence_2 = (self.s3, self.s1) mid_2 = 1 compartments_2 = ["rep"] * 2 complexes_2 = [(0, 0), (1, 1)] pairs_2 = [(0, 1)] self.r2 = Rule(sequence_2, mid_2, compartments_2, complexes_2, pairs_2, None) sequence_3 = (self.s2,) mid_3 = 0 compartments_3 = ["rep"] complexes_3 = [(0, 0)] pairs_3 = [(None, 0)] rate_3 = Rate("1.0/(1.0+([X()::rep])**4.0)") self.r3 = Rule(sequence_3, mid_3, compartments_3, complexes_3, pairs_3, rate_3) # inits self.inits = collections.Counter({self.c1: 2, self.c2: 1}) # defs self.defs = {'k1': 0.05, 'k2': 0.12} self.model = Model({self.r1, self.r2, self.r3}, self.inits, self.defs, set()) # model self.model_str_1 = """ #! rules X()::rep => @ k1*[X()::rep] Z()::rep => X()::rep => Y()::rep @ 1/(1+([X()::rep])**4) #! inits 2 X()::rep Y()::rep #! definitions k1 = 0.05 k2 = 0.12 """ self.model_parser = Parser("model") self.model_str_2 = """ #! rules X(K{i})::rep => X(K{p})::rep @ k1*[X()::rep] X(T{a})::rep => X(T{o})::rep @ k2*[Z()::rep] => Y(P{f})::rep @ 1/(1+([X()::rep])**4) #! inits 2 X(K{c}, T{e}).X(K{c}, T{j})::rep Y(P{g}, N{l})::rep #! definitions k1 = 0.05 k2 = 0.12 """ # vectors ordering = (self.c1, self.c2, self.c3) self.rate_parser = Parser("rate") rate_expr = "1/(1+([X()::rep])**4)" rate_1 = Rate(self.rate_parser.parse(rate_expr).data) rate_1.vectorize(ordering, dict()) rate_expr = "k1*[X()::rep]" rate_2 = Rate(self.rate_parser.parse(rate_expr).data) rate_2.vectorize(ordering, {"k1": 0.05}) init = State(np.array([2, 1, 0])) vector_reactions = {VectorReaction(State(np.array([0, 0, 0])), State(np.array([0, 1, 0])), rate_1), VectorReaction(State(np.array([1, 0, 0])), State(np.array([0, 0, 0])), rate_2), VectorReaction(State(np.array([0, 0, 1])), State(np.array([1, 0, 0])), None)} self.vm_1 = VectorModel(vector_reactions, init, ordering, None) # wrong models self.model_wrong_1 = \ """#! rules X(K{i})::rep => X(K{p})::rep @ k1*[X()::rep] X(T{a})::rep => X(T{o}):;rep @ k2*[Z()::rep] => Y(P{f})::rep @ 1/(1+([X()::rep])**4) #! inits 2 X(K{c}, T{e}).X(K{c}, T{j})::rep Y(P{g}, N{l})::rep #! definitions k1 = 0.05 k2 = 0.12 """ self.model_wrong_2 = \ """#! rules X(K{i})::rep => X(K{p})::rep @ k1*[X()::rep] X(T{a})::rep = X(T{o})::rep @ k2*[Z()::rep] => Y(P{f})::rep @ 1/(1+([X()::rep])**4) #! inits 2 X(K{c}, T{e}).X(K{c}, T{j})::rep Y(P{g}, N{l})::rep #! definitions k1 = 0.05 k2 = 0.12 """ self.model_with_comments = """ #! rules // commenting X(K{i})::rep => X(K{p})::rep @ k1*[X()::rep] // also here X(T{a})::rep => X(T{o})::rep @ k2*[Z()::rep] => Y(P{f})::rep @ 1/(1+([X()::rep])**4) // ** means power (^) #! inits // here 2 X(K{c}, T{e}).X(K{c}, T{j})::rep Y(P{g}, N{l})::rep // comment just 1 item #! definitions // and k1 = 0.05 // also k2 = 0.12 """ self.model_with_complexes = """ #! rules // commenting X(T{a}):XX::rep => X(T{o}):XX::rep @ k2*[X().X()::rep] K{i}:X():XYZ::rep => K{p}:X():XYZ::rep @ k1*[X().Y().Z()::rep] // also here => P{f}:XP::rep @ 1/(1+([X().P{_}::rep])**4) // ** means power (^) #! inits // here 2 X(K{c}, T{e}).X(K{c}, T{j})::rep Y(P{g}, N{l})::rep // comment just 1 item #! definitions // and k1 = 0.05 // also k2 = 0.12 #! complexes XYZ = X().Y().Z() // a big complex XX = X().X() XP = X().P{_} """ self.model_without_complexes = """ #! rules // commenting X(T{a}).X()::rep => X(T{o}).X()::rep @ k2*[X().X()::rep] X(K{i}).Y().Z()::rep => X(K{p}).Y().Z()::rep @ k1*[X().Y().Z()::rep] // also here => X().P{f}::rep @ 1/(1+([X().P{_}::rep])**4) // ** means power (^) #! inits // here 2 X(K{c}, T{e}).X(K{c}, T{j})::rep Y(P{g}, N{l})::rep // comment just 1 item #! definitions // and k1 = 0.05 // also k2 = 0.12 """ self.model_with_variable = """ #! rules // commenting T{a}:X():?::rep => T{o}:X():?::rep @ k2*[X().X()::rep] ; ? = { XX, XY } K{i}:X():XY::rep => K{p}:X():XY::rep @ k1*[X().Y().Z().X()::rep] // also here #! inits // here 2 X(K{c}, T{e}).X(K{c}, T{j})::rep #! definitions // and k1 = 0.05 // also k2 = 0.12 #! complexes XX = X().X() XY = X().Y() """ self.model_without_variable = """ #! rules // commenting X(K{i}).Y()::rep => X(K{p}).Y()::rep @ k1*[X().Y().Z().X()::rep] X(T{a}).X()::rep => X(T{o}).X()::rep @ k2*[X().X()::rep] X(T{a}).Y()::rep => X(T{o}).Y()::rep @ k2*[X().X()::rep] #! inits // here 2 X(K{c}, T{e}).X(K{c}, T{j})::rep #! definitions // and k1 = 0.05 // also k2 = 0.12 """ self.model_with_redundant = """ #! rules K(S{u}).B()::cyt => K(S{p})::cyt + B()::cyt + D(A{_})::cell @ 3*[K().B()::cyt]/2*v_1 K().B()::cyt => K()::cyt + B()::cyt + D(A{_})::cell @ 3*[K().B()::cyt]/2*v_1 K().K()::cyt => K()::cyt + K()::cyt K(S{i}).K()::cyt => K(S{a})::cyt + K()::cyt K(S{i}, T{p}).K()::cyt => K(S{a}, T{p})::cyt + K()::cyt #! inits 2 X(K{c}, T{e}).X(K{c}, T{j})::rep #! definitions v_1 = 0.05 k2 = 0.12 """ self.model_without_redundant = """ #! rules K().B()::cyt => K()::cyt + B()::cyt + D(A{_})::cell @ 3*[K().B()::cyt]/2*v_1 K().K()::cyt => K()::cyt + K()::cyt #! inits 2 X(K{c}, T{e}).X(K{c}, T{j})::rep #! definitions v_1 = 0.05 k2 = 0.12 """ self.model_with_context = """ #! rules K(S{i}).B(T{a})::cyt => K(S{i})::cyt + B(T{a})::cyt @ 3*[K(S{i}).B(T{a})::cyt]/2*v_1 A{p}.K(S{i},T{i})::cyt => A{i}::cyt + K(S{a},T{a})::cyt K(S{i},T{i})::cyt => K(S{a},T{i})::cyt #! inits 2 K(S{i}).B(T{a})::cyt 1 A{p}.K(S{i},T{i})::cyt #! definitions v_1 = 0.05 k2 = 0.12 """ self.model_without_context = """ #! rules K().B()::cyt => K()::cyt + B()::cyt @ 3*[K().B()::cyt]/2*v_1 A{_}.K()::cyt => A{_}::cyt + K()::cyt #! inits 2 K().B()::cyt 1 A{_}.K()::cyt #! definitions v_1 = 0.05 k2 = 0.12 """ self.model_reachable = """ #! rules K(S{i}).B()::cyt => K(S{a})::cyt + B()::cyt @ 3*[K(S{i}).B()::cyt]/2*v_1 K(S{a})::cyt + A{i}::cyt => K(S{a}).A{i}::cyt K().A{i}::cyt => K().A{a}::cyt #! inits 2 K(S{i}).B()::cyt 1 A{i}::cyt #! definitions v_1 = 0.05 k2 = 0.12 """ self.model_nonreachable = """ #! rules K(S{i}).B()::cyt => K(S{a})::cyt + B()::cyt @ 3*[K(S{i}).B()::cyt]/2*v_1 K(S{a})::cyt + A{i}::cyt => K(S{a}).A{i}::cyt #! inits 2 K(S{i}).B()::cyt 1 A{i}::cyt #! definitions v_1 = 0.05 k2 = 0.12 """ self.model_parametrised = """ #! rules // commenting X(K{i})::rep => X(K{p})::rep @ k1*[X()::rep] // also here X(T{a})::rep => X(T{o})::rep @ k2*[Z()::rep] => Y(P{f})::rep @ 1/(v_3+([X()::rep])**4) // ** means power (^) #! inits 2 X(K{c}, T{e}).X(K{c}, T{j})::rep Y(P{g}, N{l})::rep // comment just 1 item #! definitions k1 = 0.05 """ self.miyoshi = """
def train(output, task=None, detector=None, inputFiles=None, models=None, parse=None, processUnmerging=None, processModifiers=None, isSingleStage=False, bioNLPSTParams=None, preprocessorParams=None, exampleStyles=None, classifierParams=None, doFullGrid=False, deleteOutput=False, copyFrom=None, log="log.txt", step=None, omitSteps=None, debug=False, connection=None): """ Train a new model for event or relation detection. @param output: A directory where output files will appear. @param task: If defined, overridable default settings are used for many of the training parameters. Must be one of the supported TEES tasks. @param detector: a Detector object, or a string defining one to be imported @param inputFiles: A dictionary of file names, with keys "train", "devel" and, "test" @param models: A dictionary of file names defining the place for the new models, with keys "devel" and, "test" @param parse: The parse element name in the training interaction XML @param processUnmerging: Use the unmerging step of EventDetector. True, False or None for task default. @param processModifiers: Use the modifier detection step of EventDetector. True, False or None for task default. @param isSingleStage: False for EventDetector, True for a single stage detector. @param bioNLPSTParams: Parameters controlling BioNLP ST format output. @param preprocessorParams: Parameters controlling the preprocessor. Not used for training, but saved to the model for use when classifying. @param exampleStyles: A parameter set for controlling example builders. @param classifierParams: A parameter set for controlling classifiers. @param doFullGrid: Whether all parameters, as opposed to just recall adjustment, are tested in the EventDetector grid search. @param deleteOutput: Remove an existing output directory @param copyFrom: Copy an existing output directory for use as a template @param log: An optional alternative name for the log file. None is for no logging. @param step: A step=substep pair, where the steps are "TRAIN", "DEVEL", "EMPTY" and "TEST" @param omitSteps: step=substep parameters, where multiple substeps can be defined. @param debug: In debug mode, more output is shown, and some temporary intermediate files are saved @param connection: A parameter set defining a local or remote connection for training the classifier """ # Insert default arguments where needed inputFiles = Parameters.get(inputFiles, {"train":None, "devel":None, "test":None}) models = Parameters.get(models, {"devel":None, "test":None}) exampleStyles = Parameters.get(exampleStyles, {"examples":None, "trigger":None, "edge":None, "unmerging":None, "modifiers":None}) classifierParams = Parameters.get(classifierParams, {"examples":None, "trigger":None, "recall":None, "edge":None, "unmerging":None, "modifiers":None}) processUnmerging = getDefinedBool(processUnmerging) processModifiers = getDefinedBool(processModifiers) # Initialize working directory workdir(output, deleteOutput, copyFrom, log) # Get task specific parameters detector, processUnmerging, processModifiers, isSingleStage, bioNLPSTParams, preprocessorParams, exampleStyles, classifierParams, removeNamesFromEmpty = getTaskSettings(task, detector, processUnmerging, processModifiers, isSingleStage, bioNLPSTParams, preprocessorParams, inputFiles, exampleStyles, classifierParams) if task != None: task = task.replace("-MINI", "").replace("-FULL", "") # Define processing steps selector, detectorSteps, omitDetectorSteps = getSteps(step, omitSteps, ["TRAIN", "DEVEL", "EMPTY", "TEST"]) # Initialize the detector detector, detectorName = getDetector(detector) detector = detector() # initialize object detector.debug = debug detector.bioNLPSTParams = detector.getBioNLPSharedTaskParams(bioNLPSTParams) #detector.useBioNLPSTFormat = useBioNLPSTFormat # classify-output and grid evaluation in ST-format #detector.stWriteScores = True # write confidence scores into additional st-format files connection = getConnection(connection) detector.setConnection(connection) connection.debug = debug if deleteOutput: connection.clearWorkDir() # Train if selector.check("TRAIN"): print >> sys.stderr, "----------------------------------------------------" print >> sys.stderr, "------------------ Train Detector ------------------" print >> sys.stderr, "----------------------------------------------------" if isSingleStage: detector.train(inputFiles["train"], inputFiles["devel"], models["devel"], models["test"], exampleStyles["examples"], classifierParams["examples"], parse, None, task, fromStep=detectorSteps["TRAIN"], workDir="training") else: detector.train(inputFiles["train"], inputFiles["devel"], models["devel"], models["test"], exampleStyles["trigger"], exampleStyles["edge"], exampleStyles["unmerging"], exampleStyles["modifiers"], classifierParams["trigger"], classifierParams["edge"], classifierParams["unmerging"], classifierParams["modifiers"], classifierParams["recall"], processUnmerging, processModifiers, doFullGrid, task, parse, None, fromStep=detectorSteps["TRAIN"], workDir="training") # Save the detector type for model in [models["devel"], models["test"]]: if os.path.exists(model): model = Model(model, "a") model.addStr("detector", detectorName) if preprocessorParams != None: preprocessor = Preprocessor() model.addStr("preprocessorParams", Parameters.toString(preprocessor.getParameters(preprocessorParams))) model.save() model.close() if selector.check("DEVEL"): print >> sys.stderr, "----------------------------------------------------" print >> sys.stderr, "------------ Check devel classification ------------" print >> sys.stderr, "----------------------------------------------------" detector.classify(inputFiles["devel"], models["devel"], "classification-devel/devel", goldData=inputFiles["devel"], fromStep=detectorSteps["DEVEL"], workDir="classification-devel") if selector.check("EMPTY"): # By passing an emptied devel set through the prediction system, we can check that we get the same predictions # as in the DEVEL step, ensuring the model does not use leaked information. print >> sys.stderr, "----------------------------------------------------" print >> sys.stderr, "------------ Empty devel classification ------------" print >> sys.stderr, "----------------------------------------------------" detector.classify(getEmptyCorpus(inputFiles["devel"], removeNames=removeNamesFromEmpty), models["devel"], "classification-empty/devel-empty", fromStep=detectorSteps["EMPTY"], workDir="classification-empty") if selector.check("TEST"): print >> sys.stderr, "----------------------------------------------------" print >> sys.stderr, "------------- Test set classification --------------" print >> sys.stderr, "----------------------------------------------------" if inputFiles["test"] == None or not os.path.exists(inputFiles["test"]): print >> sys.stderr, "Skipping, test file", inputFiles["test"], "does not exist" else: detector.bioNLPSTParams["scores"] = False # the evaluation server doesn't like additional files detector.classify(inputFiles["test"], models["test"], "classification-test/test", fromStep=detectorSteps["TEST"], workDir="classification-test") if detector.bioNLPSTParams["convert"]: Utils.STFormat.Compare.compare("classification-test/test-events.tar.gz", "classification-devel/devel-events.tar.gz", "a2")
def train(output, task=None, detector=None, inputFiles=None, models=None, parse=None, processUnmerging=None, processModifiers=None, bioNLPSTParams=None, preprocessorParams=None, exampleStyles=None, classifierParams=None, doFullGrid=False, deleteOutput=False, copyFrom=None, log="log.txt", step=None, omitSteps=None, debug=False, connection=None, subset=None, folds=None): """ Train a new model for event or relation detection. @param output: A directory where output files will appear. @param task: If defined, overridable default settings are used for many of the training parameters. Must be one of the supported TEES tasks. @param detector: a Detector object, or a string defining one to be imported @param inputFiles: A dictionary of file names, with keys "train", "devel" and, "test" @param models: A dictionary of file names defining the place for the new models, with keys "devel" and, "test" @param parse: The parse element name in the training interaction XML @param processUnmerging: Use the unmerging step of EventDetector. True, False or None for task default. @param processModifiers: Use the modifier detection step of EventDetector. True, False or None for task default. @param bioNLPSTParams: Parameters controlling BioNLP ST format output. @param preprocessorParams: Parameters controlling the preprocessor. Not used for training, but saved to the model for use when classifying. @param exampleStyles: A parameter set for controlling example builders. @param classifierParams: A parameter set for controlling classifiers. @param doFullGrid: Whether all parameters, as opposed to just recall adjustment, are tested in the EventDetector grid search. @param deleteOutput: Remove an existing output directory @param copyFrom: Copy an existing output directory for use as a template @param log: An optional alternative name for the log file. None is for no logging. @param step: A step=substep pair, where the steps are "TRAIN", "DEVEL", "EMPTY" and "TEST" @param omitSteps: step=substep parameters, where multiple substeps can be defined. @param debug: In debug mode, more output is shown, and some temporary intermediate files are saved @param connection: A parameter set defining a local or remote connection for training the classifier @param subset: A parameter set for making subsets of input files """ # Insert default arguments where needed inputFiles = setDictDefaults(inputFiles, { "train": None, "devel": None, "test": None }) models = setDictDefaults(models, {"devel": None, "test": None}) exampleStyles = setDictDefaults( exampleStyles, { "examples": None, "trigger": None, "edge": None, "unmerging": None, "modifiers": None }) classifierParams = setDictDefaults( classifierParams, { "examples": None, "trigger": None, "recall": None, "edge": None, "unmerging": None, "modifiers": None }) subset = setDictDefaults(Parameters.get(subset), { "train": None, "devel": None, "test": None, "seed": 0, "all": None }) folds = setDictDefaults(folds, { "train": None, "devel": None, "test": None }) processUnmerging = getDefinedBool(processUnmerging) processModifiers = getDefinedBool(processModifiers) # Initialize working directory workdir(output, deleteOutput, copyFrom, log) # Get task specific parameters detector, bioNLPSTParams, preprocessorParams = getTaskSettings( task, detector, bioNLPSTParams, preprocessorParams, inputFiles, exampleStyles, classifierParams) # Learn training settings from input files detector = learnSettings(inputFiles, detector, classifierParams) # Get corpus subsets getFolds(inputFiles, folds) getSubsets(inputFiles, subset) if task != None: task = task.replace("-FULL", "") # Define processing steps selector, detectorSteps, omitDetectorSteps = getSteps( step, omitSteps, ["TRAIN", "DEVEL", "EMPTY", "TEST"]) # Initialize the detector detector, detectorName = getDetector(detector) detector = detector() # initialize object detector.debug = debug detector.bioNLPSTParams = detector.getBioNLPSharedTaskParams( bioNLPSTParams) #detector.useBioNLPSTFormat = useBioNLPSTFormat # classify-output and grid evaluation in ST-format #detector.stWriteScores = True # write confidence scores into additional st-format files connection = getConnection(connection) detector.setConnection(connection) connection.debug = debug if deleteOutput: connection.clearWorkDir() # Train if selector.check("TRAIN"): print >> sys.stderr, "----------------------------------------------------" print >> sys.stderr, "------------------ Train Detector ------------------" print >> sys.stderr, "----------------------------------------------------" if isinstance(detector, SingleStageDetector): detector.train(inputFiles["train"], inputFiles["devel"], models["devel"], models["test"], exampleStyles["examples"], classifierParams["examples"], parse, None, task, fromStep=detectorSteps["TRAIN"], workDir="training") else: detector.train(inputFiles["train"], inputFiles["devel"], models["devel"], models["test"], exampleStyles["trigger"], exampleStyles["edge"], exampleStyles["unmerging"], exampleStyles["modifiers"], classifierParams["trigger"], classifierParams["edge"], classifierParams["unmerging"], classifierParams["modifiers"], classifierParams["recall"], processUnmerging, processModifiers, doFullGrid, task, parse, None, fromStep=detectorSteps["TRAIN"], workDir="training") # Save the detector type for model in [models["devel"], models["test"]]: if model != None and os.path.exists(model): model = Model(model, "a") model.addStr("detector", detectorName) if preprocessorParams != None: preprocessor = Preprocessor() model.addStr( "preprocessorParams", Parameters.toString( preprocessor.getParameters(preprocessorParams))) model.save() model.close() if selector.check("DEVEL"): print >> sys.stderr, "----------------------------------------------------" print >> sys.stderr, "------------ Check devel classification ------------" print >> sys.stderr, "----------------------------------------------------" #detector.bioNLPSTParams["scores"] = False # the evaluation server doesn't like additional files detector.classify(inputFiles["devel"], models["devel"], "classification-devel/devel", goldData=inputFiles["devel"], fromStep=detectorSteps["DEVEL"], workDir="classification-devel") if selector.check("EMPTY"): # By passing an emptied devel set through the prediction system, we can check that we get the same predictions # as in the DEVEL step, ensuring the model does not use leaked information. print >> sys.stderr, "----------------------------------------------------" print >> sys.stderr, "------------ Empty devel classification ------------" print >> sys.stderr, "----------------------------------------------------" #detector.bioNLPSTParams["scores"] = False # the evaluation server doesn't like additional files detector.classify(getEmptyCorpus( inputFiles["devel"], removeNames=("names" in str(exampleStyles["examples"]) or "names" in str(exampleStyles["trigger"]))), models["devel"], "classification-empty/devel-empty", fromStep=detectorSteps["EMPTY"], workDir="classification-empty") if selector.check("TEST"): print >> sys.stderr, "----------------------------------------------------" print >> sys.stderr, "------------- Test set classification --------------" print >> sys.stderr, "----------------------------------------------------" if inputFiles["test"] == None or not os.path.exists( inputFiles["test"]): print >> sys.stderr, "Skipping, test file", inputFiles[ "test"], "does not exist" else: #detector.bioNLPSTParams["scores"] = False # the evaluation server doesn't like additional files detector.classify(inputFiles["test"], models["test"], "classification-test/test", fromStep=detectorSteps["TEST"], workDir="classification-test") if detector.bioNLPSTParams["convert"]: Utils.STFormat.Compare.compare( "classification-test/test-events.tar.gz", "classification-devel/devel-events.tar.gz", "a2")
def train(output, task=None, detector=None, inputFiles=None, models=None, parse=None, processUnmerging=None, processModifiers=None, bioNLPSTParams=None, preprocessorParams=None, exampleStyles=None, classifierParams=None, doFullGrid=False, deleteOutput=False, copyFrom=None, log="log.txt", step=None, omitSteps=None, debug=False, connection=None, subset=None, folds=None, corpusDir=None, corpusPreprocessing=None, evaluator=None): """ Train a new model for event or relation detection. @param output: A directory where output files will appear. @param task: If defined, overridable default settings are used for many of the training parameters. Must be one of the supported TEES tasks. @param detector: a Detector object, or a string defining one to be imported @param inputFiles: A dictionary of file names, with keys "train", "devel" and, "test" @param models: A dictionary of file names defining the place for the new models, with keys "devel" and, "test" @param parse: The parse element name in the training interaction XML @param processUnmerging: Use the unmerging step of EventDetector. True, False or None for task default. @param processModifiers: Use the modifier detection step of EventDetector. True, False or None for task default. @param bioNLPSTParams: Parameters controlling BioNLP ST format output. @param preprocessorParams: Parameters controlling the preprocessor. Not used for training, but saved to the model for use when classifying. @param exampleStyles: A parameter set for controlling example builders. @param classifierParams: A parameter set for controlling classifiers. @param doFullGrid: Whether all parameters, as opposed to just recall adjustment, are tested in the EventDetector grid search. @param deleteOutput: Remove an existing output directory @param copyFrom: Copy an existing output directory for use as a template @param log: An optional alternative name for the log file. None is for no logging. @param step: A step=substep pair, where the steps are "TRAIN", "DEVEL", "EMPTY" and "TEST" @param omitSteps: step=substep parameters, where multiple substeps can be defined. @param debug: In debug mode, more output is shown, and some temporary intermediate files are saved @param connection: A parameter set defining a local or remote connection for training the classifier @param subset: A parameter set for making subsets of input files """ # Insert default arguments where needed inputFiles = setDictDefaults(inputFiles, {"train":None, "devel":None, "test":None}) models = setDictDefaults(models, {"devel":"model-devel", "test":"model-test"}) exampleStyles = setDictDefaults(exampleStyles, {"examples":None, "trigger":None, "edge":None, "unmerging":None, "modifiers":None}) classifierParams = setDictDefaults(classifierParams, {"examples":None, "trigger":None, "recall":None, "edge":None, "unmerging":None, "modifiers":None}) subset = setDictDefaults(Parameters.get(subset), {"train":None, "devel":None, "test":None, "seed":0, "all":None}) folds = setDictDefaults(folds, {"train":None, "devel":None, "test":None}) processUnmerging = getDefinedBool(processUnmerging) processModifiers = getDefinedBool(processModifiers) # Initialize working directory workdir(output, deleteOutput, copyFrom, log) # Get task specific parameters useKerasDetector = False if detector != None and "keras" in detector.lower(): print >> sys.stderr, "Using a Keras Detector" useKerasDetector = True if detector.lower() == "keras": detector = None detector, bioNLPSTParams, preprocessorParams, folds = getTaskSettings(task, detector, bioNLPSTParams, preprocessorParams, inputFiles, exampleStyles, classifierParams, folds, corpusDir=corpusDir, useKerasDetector=useKerasDetector) # Learn training settings from input files detector = learnSettings(inputFiles, detector, classifierParams, task, exampleStyles, useKerasDetector=useKerasDetector) # Get corpus subsets getFolds(inputFiles, folds) getSubsets(inputFiles, subset) if task != None: task = task.replace("-FULL", "") if "." in task: _, subTask = getSubTask(task) if subTask != 3: processModifiers = False # Preprocess the corpus if required if corpusPreprocessing != None: preprocessor = Preprocessor(steps=corpusPreprocessing) assert preprocessor.steps[0].name == "MERGE_SETS" assert preprocessor.steps[-1].name == "DIVIDE_SETS" preprocessedCorpusDir = os.path.join(output, "corpus") #outputFiles = {x:os.path.join(preprocessedCorpusDir, os.path.basename(inputFiles[x])) for x in inputFiles} preprocessor.process(inputFiles, os.path.join(preprocessedCorpusDir, task)) #inputFiles = outputFiles for setName in inputFiles.keys(): if inputFiles[setName] != None: inputFiles[setName] = os.path.join(preprocessedCorpusDir, task + "-" + setName + ".xml") # Define processing steps selector, detectorSteps, omitDetectorSteps = getSteps(step, omitSteps, ["TRAIN", "DEVEL", "EMPTY", "TEST"]) # Initialize the detector detector, detectorName = getDetector(detector, evaluator=evaluator) evaluator, evaluatorName = importClass(evaluator, "evaluator") detector = detector() # initialize object if evaluator != None: print >> sys.stderr, "Using evaluator", evaluator.__name__ detector.evaluator = evaluator detector.debug = debug detector.bioNLPSTParams = detector.getBioNLPSharedTaskParams(bioNLPSTParams) #detector.useBioNLPSTFormat = useBioNLPSTFormat # classify-output and grid evaluation in ST-format #detector.stWriteScores = True # write confidence scores into additional st-format files connection = getConnection(connection) detector.setConnection(connection) connection.debug = debug if deleteOutput: connection.clearWorkDir() # Train if selector.check("TRAIN"): print >> sys.stderr, "----------------------------------------------------" print >> sys.stderr, "------------------ Train Detector ------------------" print >> sys.stderr, "----------------------------------------------------" if not isinstance(detector, EventDetector): detector.train(inputFiles["train"], inputFiles["devel"], models["devel"], models["test"], exampleStyles["examples"], classifierParams["examples"], parse, None, task, fromStep=detectorSteps["TRAIN"], workDir="training", testData=inputFiles["test"]) else: detector.train(inputFiles["train"], inputFiles["devel"], models["devel"], models["test"], exampleStyles["trigger"], exampleStyles["edge"], exampleStyles["unmerging"], exampleStyles["modifiers"], classifierParams["trigger"], classifierParams["edge"], classifierParams["unmerging"], classifierParams["modifiers"], classifierParams["recall"], processUnmerging, processModifiers, doFullGrid, task, parse, None, fromStep=detectorSteps["TRAIN"], workDir="training", testData=inputFiles["test"]) # Save the detector type for model in [models["devel"], models["test"]]: if model != None and os.path.exists(model): model = Model(model, "a") model.addStr("detector", detectorName) if evaluatorName != None: model.addStr("detector", evaluatorName) if preprocessorParams != None: preprocessor = Preprocessor() model.addStr("preprocessorParams", Parameters.toString(preprocessor.getParameters(preprocessorParams))) model.save() model.close() if selector.check("DEVEL"): print >> sys.stderr, "----------------------------------------------------" print >> sys.stderr, "------------ Check devel classification ------------" print >> sys.stderr, "----------------------------------------------------" #detector.bioNLPSTParams["scores"] = False # the evaluation server doesn't like additional files detector.classify(inputFiles["devel"], models["devel"], "classification-devel/devel", goldData=inputFiles["devel"], fromStep=detectorSteps["DEVEL"], workDir="classification-devel") if selector.check("EMPTY"): # By passing an emptied devel set through the prediction system, we can check that we get the same predictions # as in the DEVEL step, ensuring the model does not use leaked information. print >> sys.stderr, "----------------------------------------------------" print >> sys.stderr, "------------ Empty devel classification ------------" print >> sys.stderr, "----------------------------------------------------" #detector.bioNLPSTParams["scores"] = False # the evaluation server doesn't like additional files removalScope = "non-given" if "names" in str(exampleStyles["examples"]) or "names" in str(exampleStyles["trigger"]): removalScope = "all" elif "Edge" in detector.__class__.__name__: removalScope = "interactions" detector.classify(getEmptyCorpus(inputFiles["devel"], scope=removalScope), models["devel"], "classification-empty/devel-empty", fromStep=detectorSteps["EMPTY"], workDir="classification-empty") print >> sys.stderr, "*** Evaluate empty devel classification ***" if os.path.exists("classification-empty/devel-empty-pred.xml.gz"): EvaluateInteractionXML.run(detector.evaluator, "classification-empty/devel-empty-pred.xml.gz", inputFiles["devel"], parse) else: print >> sys.stderr, "No output file for evaluation" if selector.check("TEST"): print >> sys.stderr, "----------------------------------------------------" print >> sys.stderr, "------------- Test set classification --------------" print >> sys.stderr, "----------------------------------------------------" if inputFiles["test"] == None or not os.path.exists(inputFiles["test"]): print >> sys.stderr, "Skipping, test file", inputFiles["test"], "does not exist" else: #detector.bioNLPSTParams["scores"] = False # the evaluation server doesn't like additional files detector.classify(inputFiles["test"], models["test"] if models["test"] != None else models["devel"], "classification-test/test", fromStep=detectorSteps["TEST"], workDir="classification-test") if detector.bioNLPSTParams["convert"]: extension = ".zip" if (detector.bioNLPSTParams["convert"] == "zip") else ".tar.gz" Utils.STFormat.Compare.compare("classification-test/test-events" + extension, "classification-devel/devel-events" + extension, "a2") # Stop logging if log != None: Stream.closeLog(log)