Exemple #1
0
    def test_create_AP_labels(self):
        model = Model(set(), collections.Counter(), dict(), set())

        complex_parser = Parser("rate_complex")
        complex_1 = complex_parser.parse("K(S{i},T{a}).B{o}::cyt").data.children[0]
        complex_2 = complex_parser.parse("K(S{a},T{a}).B{o}::cyt").data.children[0]
        complex_3 = complex_parser.parse("K(S{a},T{i}).B{o}::cyt").data.children[0]

        complex_abstract = complex_parser.parse("K(S{a}).B{_}::cyt").data.children[0]

        ordering = (complex_1, complex_2, complex_3)

        APs = [Core.Formula.AtomicProposition(complex_abstract, " >= ", "3"),
               Core.Formula.AtomicProposition(complex_1, " < ", 2)]

        s1 = State(np.array((1, 2, 2)))
        s2 = State(np.array((5, 1, 1)))
        s3 = State(np.array((2, 4, 3)))
        s4 = State(np.array((1, 4, 3)))

        states_encoding = {s1: 1, s2: 2, s3: 3, s4: 4}

        result_AP_lables = {APs[0]: 'property_0', APs[1]: 'property_1'}
        result_state_labels = {1: {'property_0', 'property_1'},
                               3: {'property_0', 'init'},
                               4: {'property_0', 'property_1'}}

        ts = TS.TransitionSystem.TransitionSystem(ordering)
        ts.states_encoding = states_encoding
        ts.init = 3

        state_labels, AP_lables = model.create_AP_labels(APs, ts, 0)
        self.assertEqual(state_labels, result_state_labels)
        self.assertEqual(AP_lables, result_AP_lables)
Exemple #2
0
def getDetector(detector, model=None, evaluator=None):
    # Get the detector
    if detector == None:
        assert model != None
        model = Model(model, "r")
        detector = model.getStr("detector")
        model.close()
    return importClass(detector, "detector")
Exemple #3
0
    def test_create_complex_labels(self):
        model = Model(set(), collections.Counter(), dict(), set())
        complex_parser = Parser("rate_complex")
        complex_1 = complex_parser.parse("K(S{i},T{a}).B{o}::cyt").data.children[0]
        complex_2 = complex_parser.parse("K(S{a},T{a}).B{o}::cyt").data.children[0]
        complex_3 = complex_parser.parse("K(S{a},T{i}).B{o}::cyt").data.children[0]
        complex_abstract = complex_parser.parse("K(S{a}).B{_}::cyt").data.children[0]

        ordering = (complex_1, complex_2, complex_3)
        complexes = [complex_2, complex_abstract, complex_1]

        result_labels = {complex_2: "VAR_1",complex_abstract: "ABSTRACT_VAR_12", complex_1: "VAR_0"}
        result_formulas = ['ABSTRACT_VAR_12 = VAR_1+VAR_2; // K(S{a}).B{_}::cyt']

        labels, prism_formulas = model.create_complex_labels(complexes, ordering)
        self.assertEqual(labels, result_labels)
        self.assertEqual(prism_formulas, result_formulas)
Exemple #4
0
 def GetTrainingData(self, inputDim, dataPercent):
     if len(self.allDataList) == 0:
         m = Model()
         pares = m.GetAllPar()
         totalData = m.GetTotalData()
         partTotalDAta = totalData - int(totalData * (1 - dataPercent))
         allData = []
         for p in pares:
             allData = allData + m.GetDataLimited(inputDim, p[0],
                                                  partTotalDAta)
             print(p[0] + "   " + str(len(allData)))
         self.allDataList = allData
     random.shuffle(self.allDataList)
     traingData = self.allDataList[0:int(len(self.allDataList) * 0.7)]
     validationData = self.allDataList[(int(len(self.allDataList) * 0.7)) +
                                       1:len(self.allDataList)]
     return traingData, validationData
Exemple #5
0
def getDetector(detector, model=None):
    # Get the detector
    if detector == None:
        assert model != None
        model = Model(model, "r")
        detector = model.getStr("detector")
        model.close()
    if type(detector) in types.StringTypes:
        print >> sys.stderr, "Importing detector", detector
        detectorName = detector
        if detector.startswith("from"):
            exec detector
            detector = eval(detector.split(".")[-1])
        else:
            exec "from " + detector + " import " + detector.split(".")[-1]
            detector = eval(detector.split(".")[-1])
    else: # assume it is a class
        detectorName = detector.__name__
        print >> sys.stderr, "Using detector", detectorName
        detector = detector
    return detector, detectorName
Exemple #6
0
def getDetector(detector, model=None):
    # Get the detector
    if detector == None:
        assert model != None
        model = Model(model, "r")
        detector = model.getStr("detector")
        model.close()
    if type(detector) in types.StringTypes:
        print >> sys.stderr, "Importing detector", detector
        detectorName = detector
        if detector.startswith("from"):
            exec detector
            detector = eval(detector.split(".")[-1])
        else:
            exec "from " + detector + " import " + detector.split(".")[-1]
            detector = eval(detector.split(".")[-1])
    else:  # assume it is a class
        detectorName = detector.__name__
        print >> sys.stderr, "Using detector", detectorName
        detector = detector
    return detector, detectorName
Exemple #7
0
    def analyseData(self):
        workspace = self.pre_form.value()
        mdl = Model(workspace['Algorithm'], workspace['Parameters'])

        if workspace['Learning Type'] == 'Clustering':
            mdl.fitData(workspace['Data'].post_data)
            pred = mdl.predictData(workspace['Data'].post_data)

        else:
            mdl.fitData(workspace['Data'].post_data['Training'].data,
                        workspace['Data'].post_data['Training'].labels)
            pred = mdl.predictData(workspace['Data'].post_data['Testing'].data)

        workspace['Model'] = mdl
        workspace['Predicted'] = pred
        self.post_options.close()
        del (self.post_options)
        self.post_options = PostAnalysisOptions(self, workspace)
        self.rightScroll.setWidget(self.post_options)
        self.built = True
 def openModel(self, model, mode="r"):
     if type(model) in types.StringTypes:
         model = Model(model, mode)
         self.modelsToClose.append(model)
     return model
Exemple #9
0
    def setUp(self):
        # agents

        self.s1 = StructureAgent("X", set())
        self.s2 = StructureAgent("Y", set())
        self.s3 = StructureAgent("Z", set())

        self.c1 = Complex([self.s1], "rep")
        self.c2 = Complex([self.s2], "rep")
        self.c3 = Complex([self.s3], "rep")

        #  rules

        sequence_1 = (self.s1,)
        mid_1 = 1
        compartments_1 = ["rep"]
        complexes_1 = [(0, 0)]
        pairs_1 = [(0, None)]
        rate_1 = Rate("k1*[X()::rep]")

        self.r1 = Rule(sequence_1, mid_1, compartments_1, complexes_1, pairs_1, rate_1)

        sequence_2 = (self.s3, self.s1)
        mid_2 = 1
        compartments_2 = ["rep"] * 2
        complexes_2 = [(0, 0), (1, 1)]
        pairs_2 = [(0, 1)]

        self.r2 = Rule(sequence_2, mid_2, compartments_2, complexes_2, pairs_2, None)

        sequence_3 = (self.s2,)
        mid_3 = 0
        compartments_3 = ["rep"]
        complexes_3 = [(0, 0)]
        pairs_3 = [(None, 0)]
        rate_3 = Rate("1.0/(1.0+([X()::rep])**4.0)")

        self.r3 = Rule(sequence_3, mid_3, compartments_3, complexes_3, pairs_3, rate_3)

        # inits

        self.inits = collections.Counter({self.c1: 2, self.c2: 1})

        # defs

        self.defs = {'k1': 0.05, 'k2': 0.12}

        self.model = Model({self.r1, self.r2, self.r3}, self.inits, self.defs, set())
        # model

        self.model_str_1 = """
        #! rules
        X()::rep => @ k1*[X()::rep]
        Z()::rep => X()::rep
        => Y()::rep @ 1/(1+([X()::rep])**4)

        #! inits
        2 X()::rep
        Y()::rep
        
        #! definitions
        k1 = 0.05
        k2 = 0.12   
        """

        self.model_parser = Parser("model")

        self.model_str_2 = """
        #! rules
        X(K{i})::rep => X(K{p})::rep @ k1*[X()::rep]
        X(T{a})::rep => X(T{o})::rep @ k2*[Z()::rep]
        => Y(P{f})::rep @ 1/(1+([X()::rep])**4)

        #! inits
        2 X(K{c}, T{e}).X(K{c}, T{j})::rep
        Y(P{g}, N{l})::rep

        #! definitions
        k1 = 0.05
        k2 = 0.12
        """

        # vectors

        ordering = (self.c1, self.c2, self.c3)

        self.rate_parser = Parser("rate")
        rate_expr = "1/(1+([X()::rep])**4)"
        rate_1 = Rate(self.rate_parser.parse(rate_expr).data)
        rate_1.vectorize(ordering, dict())

        rate_expr = "k1*[X()::rep]"
        rate_2 = Rate(self.rate_parser.parse(rate_expr).data)
        rate_2.vectorize(ordering, {"k1": 0.05})

        init = State(np.array([2, 1, 0]))

        vector_reactions = {VectorReaction(State(np.array([0, 0, 0])), State(np.array([0, 1, 0])), rate_1),
                            VectorReaction(State(np.array([1, 0, 0])), State(np.array([0, 0, 0])), rate_2),
                            VectorReaction(State(np.array([0, 0, 1])), State(np.array([1, 0, 0])), None)}

        self.vm_1 = VectorModel(vector_reactions, init, ordering, None)

        # wrong models

        self.model_wrong_1 = \
            """#! rules
            X(K{i})::rep => X(K{p})::rep @ k1*[X()::rep]
            X(T{a})::rep => X(T{o}):;rep @ k2*[Z()::rep]
            => Y(P{f})::rep @ 1/(1+([X()::rep])**4)

            #! inits
            2 X(K{c}, T{e}).X(K{c}, T{j})::rep
            Y(P{g}, N{l})::rep

            #! definitions
            k1 = 0.05
            k2 = 0.12
            """

        self.model_wrong_2 = \
            """#! rules
            X(K{i})::rep => X(K{p})::rep @ k1*[X()::rep]
            X(T{a})::rep = X(T{o})::rep @ k2*[Z()::rep]
            => Y(P{f})::rep @ 1/(1+([X()::rep])**4)

            #! inits
            2 X(K{c}, T{e}).X(K{c}, T{j})::rep
            Y(P{g}, N{l})::rep

            #! definitions
            k1 = 0.05
            k2 = 0.12
            """

        self.model_with_comments = """
            #! rules
            // commenting
            X(K{i})::rep => X(K{p})::rep @ k1*[X()::rep] // also here
            X(T{a})::rep => X(T{o})::rep @ k2*[Z()::rep]
            => Y(P{f})::rep @ 1/(1+([X()::rep])**4) // ** means power (^)

            #! inits
            // here
            2 X(K{c}, T{e}).X(K{c}, T{j})::rep
            Y(P{g}, N{l})::rep // comment just 1 item

            #! definitions
            // and
            k1 = 0.05 // also
            k2 = 0.12
            """
        
        self.model_with_complexes = """
            #! rules
            // commenting
            X(T{a}):XX::rep => X(T{o}):XX::rep @ k2*[X().X()::rep]
            K{i}:X():XYZ::rep => K{p}:X():XYZ::rep @ k1*[X().Y().Z()::rep] // also here
            => P{f}:XP::rep @ 1/(1+([X().P{_}::rep])**4) // ** means power (^)

            #! inits
            // here
            2 X(K{c}, T{e}).X(K{c}, T{j})::rep
            Y(P{g}, N{l})::rep // comment just 1 item

            #! definitions
            // and
            k1 = 0.05 // also
            k2 = 0.12

            #! complexes
            XYZ = X().Y().Z() // a big complex
            XX = X().X()
            XP = X().P{_}
            """

        self.model_without_complexes = """
            #! rules
            // commenting
            X(T{a}).X()::rep => X(T{o}).X()::rep @ k2*[X().X()::rep]
            X(K{i}).Y().Z()::rep => X(K{p}).Y().Z()::rep @ k1*[X().Y().Z()::rep] // also here
            => X().P{f}::rep @ 1/(1+([X().P{_}::rep])**4) // ** means power (^)

            #! inits
            // here
            2 X(K{c}, T{e}).X(K{c}, T{j})::rep
            Y(P{g}, N{l})::rep // comment just 1 item

            #! definitions
            // and
            k1 = 0.05 // also
            k2 = 0.12
            """

        self.model_with_variable = """
            #! rules
            // commenting
            T{a}:X():?::rep => T{o}:X():?::rep @ k2*[X().X()::rep] ; ? = { XX, XY }
            K{i}:X():XY::rep => K{p}:X():XY::rep @ k1*[X().Y().Z().X()::rep] // also here

            #! inits
            // here
            2 X(K{c}, T{e}).X(K{c}, T{j})::rep

            #! definitions
            // and
            k1 = 0.05 // also
            k2 = 0.12

            #! complexes
            XX = X().X()
            XY = X().Y()
            """

        self.model_without_variable = """
            #! rules
            // commenting
            X(K{i}).Y()::rep => X(K{p}).Y()::rep @ k1*[X().Y().Z().X()::rep]
            X(T{a}).X()::rep => X(T{o}).X()::rep @ k2*[X().X()::rep]
            X(T{a}).Y()::rep => X(T{o}).Y()::rep @ k2*[X().X()::rep]

            #! inits
            // here
            2 X(K{c}, T{e}).X(K{c}, T{j})::rep

            #! definitions
            // and
            k1 = 0.05 // also
            k2 = 0.12
            """

        self.model_with_redundant = """
            #! rules
            K(S{u}).B()::cyt => K(S{p})::cyt + B()::cyt + D(A{_})::cell @ 3*[K().B()::cyt]/2*v_1
            K().B()::cyt => K()::cyt + B()::cyt + D(A{_})::cell @ 3*[K().B()::cyt]/2*v_1
            K().K()::cyt => K()::cyt + K()::cyt
            K(S{i}).K()::cyt => K(S{a})::cyt + K()::cyt
            K(S{i}, T{p}).K()::cyt => K(S{a}, T{p})::cyt + K()::cyt

            #! inits
            2 X(K{c}, T{e}).X(K{c}, T{j})::rep

            #! definitions
            v_1 = 0.05
            k2 = 0.12
            """

        self.model_without_redundant = """
            #! rules
            K().B()::cyt => K()::cyt + B()::cyt + D(A{_})::cell @ 3*[K().B()::cyt]/2*v_1
            K().K()::cyt => K()::cyt + K()::cyt

            #! inits
            2 X(K{c}, T{e}).X(K{c}, T{j})::rep

            #! definitions
            v_1 = 0.05
            k2 = 0.12
            """

        self.model_with_context = """
            #! rules
            K(S{i}).B(T{a})::cyt => K(S{i})::cyt + B(T{a})::cyt @ 3*[K(S{i}).B(T{a})::cyt]/2*v_1
            A{p}.K(S{i},T{i})::cyt => A{i}::cyt + K(S{a},T{a})::cyt
            K(S{i},T{i})::cyt => K(S{a},T{i})::cyt

            #! inits
            2 K(S{i}).B(T{a})::cyt
            1 A{p}.K(S{i},T{i})::cyt

            #! definitions
            v_1 = 0.05
            k2 = 0.12
            """

        self.model_without_context = """
            #! rules
            K().B()::cyt => K()::cyt + B()::cyt @ 3*[K().B()::cyt]/2*v_1
            A{_}.K()::cyt => A{_}::cyt + K()::cyt

            #! inits
            2 K().B()::cyt
            1 A{_}.K()::cyt

            #! definitions
            v_1 = 0.05
            k2 = 0.12
            """

        self.model_reachable = """
            #! rules
            K(S{i}).B()::cyt => K(S{a})::cyt + B()::cyt @ 3*[K(S{i}).B()::cyt]/2*v_1
            K(S{a})::cyt + A{i}::cyt => K(S{a}).A{i}::cyt
            K().A{i}::cyt => K().A{a}::cyt

            #! inits
            2 K(S{i}).B()::cyt
            1 A{i}::cyt

            #! definitions
            v_1 = 0.05
            k2 = 0.12
            """

        self.model_nonreachable = """
            #! rules
            K(S{i}).B()::cyt => K(S{a})::cyt + B()::cyt @ 3*[K(S{i}).B()::cyt]/2*v_1
            K(S{a})::cyt + A{i}::cyt => K(S{a}).A{i}::cyt

            #! inits
            2 K(S{i}).B()::cyt
            1 A{i}::cyt

            #! definitions
            v_1 = 0.05
            k2 = 0.12
            """
        
        self.model_parametrised = """
            #! rules
            // commenting
            X(K{i})::rep => X(K{p})::rep @ k1*[X()::rep] // also here
            X(T{a})::rep => X(T{o})::rep @ k2*[Z()::rep]
            => Y(P{f})::rep @ 1/(v_3+([X()::rep])**4) // ** means power (^)

            #! inits
            2 X(K{c}, T{e}).X(K{c}, T{j})::rep
            Y(P{g}, N{l})::rep // comment just 1 item

            #! definitions
            k1 = 0.05
            """

        self.miyoshi = """
Exemple #10
0
def train(output, task=None, detector=None, inputFiles=None, models=None, parse=None,
          processUnmerging=None, processModifiers=None, isSingleStage=False, 
          bioNLPSTParams=None, preprocessorParams=None, exampleStyles=None, 
          classifierParams=None,  doFullGrid=False, deleteOutput=False, copyFrom=None, 
          log="log.txt", step=None, omitSteps=None, debug=False, connection=None):
    """
    Train a new model for event or relation detection.
    
    @param output: A directory where output files will appear.
    @param task: If defined, overridable default settings are used for many of the training parameters. Must be one of the supported TEES tasks.
    @param detector: a Detector object, or a string defining one to be imported
    @param inputFiles: A dictionary of file names, with keys "train", "devel" and, "test"
    @param models: A dictionary of file names defining the place for the new models, with keys "devel" and, "test"
    @param parse: The parse element name in the training interaction XML
    @param processUnmerging: Use the unmerging step of EventDetector. True, False or None for task default.
    @param processModifiers: Use the modifier detection step of EventDetector. True, False or None for task default.
    @param isSingleStage: False for EventDetector, True for a single stage detector.
    @param bioNLPSTParams: Parameters controlling BioNLP ST format output.
    @param preprocessorParams: Parameters controlling the preprocessor. Not used for training, but saved to the model for use when classifying.
    @param exampleStyles: A parameter set for controlling example builders.
    @param classifierParams: A parameter set for controlling classifiers.
    @param doFullGrid: Whether all parameters, as opposed to just recall adjustment, are tested in the EventDetector grid search.
    @param deleteOutput: Remove an existing output directory
    @param copyFrom: Copy an existing output directory for use as a template
    @param log: An optional alternative name for the log file. None is for no logging.
    @param step: A step=substep pair, where the steps are "TRAIN", "DEVEL", "EMPTY" and "TEST"
    @param omitSteps: step=substep parameters, where multiple substeps can be defined.
    @param debug: In debug mode, more output is shown, and some temporary intermediate files are saved
    @param connection: A parameter set defining a local or remote connection for training the classifier
    """
    # Insert default arguments where needed
    inputFiles = Parameters.get(inputFiles, {"train":None, "devel":None, "test":None})
    models = Parameters.get(models, {"devel":None, "test":None})
    exampleStyles = Parameters.get(exampleStyles, {"examples":None, "trigger":None, "edge":None, "unmerging":None, "modifiers":None})
    classifierParams = Parameters.get(classifierParams, {"examples":None, "trigger":None, "recall":None, "edge":None, "unmerging":None, "modifiers":None})
    processUnmerging = getDefinedBool(processUnmerging)
    processModifiers = getDefinedBool(processModifiers)
    # Initialize working directory
    workdir(output, deleteOutput, copyFrom, log)
    # Get task specific parameters
    detector, processUnmerging, processModifiers, isSingleStage, bioNLPSTParams, preprocessorParams, exampleStyles, classifierParams, removeNamesFromEmpty = getTaskSettings(task, 
        detector, processUnmerging, processModifiers, isSingleStage, bioNLPSTParams, preprocessorParams, inputFiles, exampleStyles, classifierParams)   
    if task != None: task = task.replace("-MINI", "").replace("-FULL", "")
    # Define processing steps
    selector, detectorSteps, omitDetectorSteps = getSteps(step, omitSteps, ["TRAIN", "DEVEL", "EMPTY", "TEST"])
    
    # Initialize the detector
    detector, detectorName = getDetector(detector)
    detector = detector() # initialize object
    detector.debug = debug
    detector.bioNLPSTParams = detector.getBioNLPSharedTaskParams(bioNLPSTParams)
    #detector.useBioNLPSTFormat = useBioNLPSTFormat # classify-output and grid evaluation in ST-format
    #detector.stWriteScores = True # write confidence scores into additional st-format files
    connection = getConnection(connection)
    detector.setConnection(connection)
    connection.debug = debug
    if deleteOutput:
        connection.clearWorkDir()
    
    # Train
    if selector.check("TRAIN"):
        print >> sys.stderr, "----------------------------------------------------"
        print >> sys.stderr, "------------------ Train Detector ------------------"
        print >> sys.stderr, "----------------------------------------------------"
        if isSingleStage:
            detector.train(inputFiles["train"], inputFiles["devel"], models["devel"], models["test"],
                           exampleStyles["examples"], classifierParams["examples"], parse, None, task,
                           fromStep=detectorSteps["TRAIN"], workDir="training")
        else:
            detector.train(inputFiles["train"], inputFiles["devel"], models["devel"], models["test"],
                           exampleStyles["trigger"], exampleStyles["edge"], exampleStyles["unmerging"], exampleStyles["modifiers"],
                           classifierParams["trigger"], classifierParams["edge"], classifierParams["unmerging"], classifierParams["modifiers"],
                           classifierParams["recall"], processUnmerging, processModifiers, 
                           doFullGrid, task, parse, None,
                           fromStep=detectorSteps["TRAIN"], workDir="training")
        # Save the detector type
        for model in [models["devel"], models["test"]]:
            if os.path.exists(model):
                model = Model(model, "a")
                model.addStr("detector", detectorName)
                if preprocessorParams != None:
                    preprocessor = Preprocessor()
                    model.addStr("preprocessorParams", Parameters.toString(preprocessor.getParameters(preprocessorParams)))
                model.save()
                model.close()
    if selector.check("DEVEL"):
        print >> sys.stderr, "----------------------------------------------------"
        print >> sys.stderr, "------------ Check devel classification ------------"
        print >> sys.stderr, "----------------------------------------------------"
        detector.classify(inputFiles["devel"], models["devel"], "classification-devel/devel", goldData=inputFiles["devel"], fromStep=detectorSteps["DEVEL"], workDir="classification-devel")
    if selector.check("EMPTY"):
        # By passing an emptied devel set through the prediction system, we can check that we get the same predictions
        # as in the DEVEL step, ensuring the model does not use leaked information.
        print >> sys.stderr, "----------------------------------------------------"
        print >> sys.stderr, "------------ Empty devel classification ------------"
        print >> sys.stderr, "----------------------------------------------------"
        detector.classify(getEmptyCorpus(inputFiles["devel"], removeNames=removeNamesFromEmpty), models["devel"], "classification-empty/devel-empty", fromStep=detectorSteps["EMPTY"], workDir="classification-empty")
    if selector.check("TEST"):
        print >> sys.stderr, "----------------------------------------------------"
        print >> sys.stderr, "------------- Test set classification --------------"
        print >> sys.stderr, "----------------------------------------------------"
        if inputFiles["test"] == None or not os.path.exists(inputFiles["test"]):
            print >> sys.stderr, "Skipping, test file", inputFiles["test"], "does not exist"
        else:
            detector.bioNLPSTParams["scores"] = False # the evaluation server doesn't like additional files
            detector.classify(inputFiles["test"], models["test"], "classification-test/test", fromStep=detectorSteps["TEST"], workDir="classification-test")
            if detector.bioNLPSTParams["convert"]:
                Utils.STFormat.Compare.compare("classification-test/test-events.tar.gz", "classification-devel/devel-events.tar.gz", "a2")
Exemple #11
0
def train(output,
          task=None,
          detector=None,
          inputFiles=None,
          models=None,
          parse=None,
          processUnmerging=None,
          processModifiers=None,
          bioNLPSTParams=None,
          preprocessorParams=None,
          exampleStyles=None,
          classifierParams=None,
          doFullGrid=False,
          deleteOutput=False,
          copyFrom=None,
          log="log.txt",
          step=None,
          omitSteps=None,
          debug=False,
          connection=None,
          subset=None,
          folds=None):
    """
    Train a new model for event or relation detection.
    
    @param output: A directory where output files will appear.
    @param task: If defined, overridable default settings are used for many of the training parameters. Must be one of the supported TEES tasks.
    @param detector: a Detector object, or a string defining one to be imported
    @param inputFiles: A dictionary of file names, with keys "train", "devel" and, "test"
    @param models: A dictionary of file names defining the place for the new models, with keys "devel" and, "test"
    @param parse: The parse element name in the training interaction XML
    @param processUnmerging: Use the unmerging step of EventDetector. True, False or None for task default.
    @param processModifiers: Use the modifier detection step of EventDetector. True, False or None for task default.
    @param bioNLPSTParams: Parameters controlling BioNLP ST format output.
    @param preprocessorParams: Parameters controlling the preprocessor. Not used for training, but saved to the model for use when classifying.
    @param exampleStyles: A parameter set for controlling example builders.
    @param classifierParams: A parameter set for controlling classifiers.
    @param doFullGrid: Whether all parameters, as opposed to just recall adjustment, are tested in the EventDetector grid search.
    @param deleteOutput: Remove an existing output directory
    @param copyFrom: Copy an existing output directory for use as a template
    @param log: An optional alternative name for the log file. None is for no logging.
    @param step: A step=substep pair, where the steps are "TRAIN", "DEVEL", "EMPTY" and "TEST"
    @param omitSteps: step=substep parameters, where multiple substeps can be defined.
    @param debug: In debug mode, more output is shown, and some temporary intermediate files are saved
    @param connection: A parameter set defining a local or remote connection for training the classifier
    @param subset: A parameter set for making subsets of input files
    """
    # Insert default arguments where needed
    inputFiles = setDictDefaults(inputFiles, {
        "train": None,
        "devel": None,
        "test": None
    })
    models = setDictDefaults(models, {"devel": None, "test": None})
    exampleStyles = setDictDefaults(
        exampleStyles, {
            "examples": None,
            "trigger": None,
            "edge": None,
            "unmerging": None,
            "modifiers": None
        })
    classifierParams = setDictDefaults(
        classifierParams, {
            "examples": None,
            "trigger": None,
            "recall": None,
            "edge": None,
            "unmerging": None,
            "modifiers": None
        })
    subset = setDictDefaults(Parameters.get(subset), {
        "train": None,
        "devel": None,
        "test": None,
        "seed": 0,
        "all": None
    })
    folds = setDictDefaults(folds, {
        "train": None,
        "devel": None,
        "test": None
    })
    processUnmerging = getDefinedBool(processUnmerging)
    processModifiers = getDefinedBool(processModifiers)
    # Initialize working directory
    workdir(output, deleteOutput, copyFrom, log)
    # Get task specific parameters
    detector, bioNLPSTParams, preprocessorParams = getTaskSettings(
        task, detector, bioNLPSTParams, preprocessorParams, inputFiles,
        exampleStyles, classifierParams)
    # Learn training settings from input files
    detector = learnSettings(inputFiles, detector, classifierParams)
    # Get corpus subsets
    getFolds(inputFiles, folds)
    getSubsets(inputFiles, subset)
    if task != None:
        task = task.replace("-FULL", "")
    # Define processing steps
    selector, detectorSteps, omitDetectorSteps = getSteps(
        step, omitSteps, ["TRAIN", "DEVEL", "EMPTY", "TEST"])

    # Initialize the detector
    detector, detectorName = getDetector(detector)
    detector = detector()  # initialize object
    detector.debug = debug
    detector.bioNLPSTParams = detector.getBioNLPSharedTaskParams(
        bioNLPSTParams)
    #detector.useBioNLPSTFormat = useBioNLPSTFormat # classify-output and grid evaluation in ST-format
    #detector.stWriteScores = True # write confidence scores into additional st-format files
    connection = getConnection(connection)
    detector.setConnection(connection)
    connection.debug = debug
    if deleteOutput:
        connection.clearWorkDir()

    # Train
    if selector.check("TRAIN"):
        print >> sys.stderr, "----------------------------------------------------"
        print >> sys.stderr, "------------------ Train Detector ------------------"
        print >> sys.stderr, "----------------------------------------------------"
        if isinstance(detector, SingleStageDetector):
            detector.train(inputFiles["train"],
                           inputFiles["devel"],
                           models["devel"],
                           models["test"],
                           exampleStyles["examples"],
                           classifierParams["examples"],
                           parse,
                           None,
                           task,
                           fromStep=detectorSteps["TRAIN"],
                           workDir="training")
        else:
            detector.train(inputFiles["train"],
                           inputFiles["devel"],
                           models["devel"],
                           models["test"],
                           exampleStyles["trigger"],
                           exampleStyles["edge"],
                           exampleStyles["unmerging"],
                           exampleStyles["modifiers"],
                           classifierParams["trigger"],
                           classifierParams["edge"],
                           classifierParams["unmerging"],
                           classifierParams["modifiers"],
                           classifierParams["recall"],
                           processUnmerging,
                           processModifiers,
                           doFullGrid,
                           task,
                           parse,
                           None,
                           fromStep=detectorSteps["TRAIN"],
                           workDir="training")
        # Save the detector type
        for model in [models["devel"], models["test"]]:
            if model != None and os.path.exists(model):
                model = Model(model, "a")
                model.addStr("detector", detectorName)
                if preprocessorParams != None:
                    preprocessor = Preprocessor()
                    model.addStr(
                        "preprocessorParams",
                        Parameters.toString(
                            preprocessor.getParameters(preprocessorParams)))
                model.save()
                model.close()
    if selector.check("DEVEL"):
        print >> sys.stderr, "----------------------------------------------------"
        print >> sys.stderr, "------------ Check devel classification ------------"
        print >> sys.stderr, "----------------------------------------------------"
        #detector.bioNLPSTParams["scores"] = False # the evaluation server doesn't like additional files
        detector.classify(inputFiles["devel"],
                          models["devel"],
                          "classification-devel/devel",
                          goldData=inputFiles["devel"],
                          fromStep=detectorSteps["DEVEL"],
                          workDir="classification-devel")
    if selector.check("EMPTY"):
        # By passing an emptied devel set through the prediction system, we can check that we get the same predictions
        # as in the DEVEL step, ensuring the model does not use leaked information.
        print >> sys.stderr, "----------------------------------------------------"
        print >> sys.stderr, "------------ Empty devel classification ------------"
        print >> sys.stderr, "----------------------------------------------------"
        #detector.bioNLPSTParams["scores"] = False # the evaluation server doesn't like additional files
        detector.classify(getEmptyCorpus(
            inputFiles["devel"],
            removeNames=("names" in str(exampleStyles["examples"])
                         or "names" in str(exampleStyles["trigger"]))),
                          models["devel"],
                          "classification-empty/devel-empty",
                          fromStep=detectorSteps["EMPTY"],
                          workDir="classification-empty")
    if selector.check("TEST"):
        print >> sys.stderr, "----------------------------------------------------"
        print >> sys.stderr, "------------- Test set classification --------------"
        print >> sys.stderr, "----------------------------------------------------"
        if inputFiles["test"] == None or not os.path.exists(
                inputFiles["test"]):
            print >> sys.stderr, "Skipping, test file", inputFiles[
                "test"], "does not exist"
        else:
            #detector.bioNLPSTParams["scores"] = False # the evaluation server doesn't like additional files
            detector.classify(inputFiles["test"],
                              models["test"],
                              "classification-test/test",
                              fromStep=detectorSteps["TEST"],
                              workDir="classification-test")
            if detector.bioNLPSTParams["convert"]:
                Utils.STFormat.Compare.compare(
                    "classification-test/test-events.tar.gz",
                    "classification-devel/devel-events.tar.gz", "a2")
Exemple #12
0
def train(output, task=None, detector=None, inputFiles=None, models=None, parse=None,
          processUnmerging=None, processModifiers=None, 
          bioNLPSTParams=None, preprocessorParams=None, exampleStyles=None, 
          classifierParams=None,  doFullGrid=False, deleteOutput=False, copyFrom=None, 
          log="log.txt", step=None, omitSteps=None, debug=False, connection=None, subset=None, 
          folds=None, corpusDir=None, corpusPreprocessing=None, evaluator=None):
    """
    Train a new model for event or relation detection.
    
    @param output: A directory where output files will appear.
    @param task: If defined, overridable default settings are used for many of the training parameters. Must be one of the supported TEES tasks.
    @param detector: a Detector object, or a string defining one to be imported
    @param inputFiles: A dictionary of file names, with keys "train", "devel" and, "test"
    @param models: A dictionary of file names defining the place for the new models, with keys "devel" and, "test"
    @param parse: The parse element name in the training interaction XML
    @param processUnmerging: Use the unmerging step of EventDetector. True, False or None for task default.
    @param processModifiers: Use the modifier detection step of EventDetector. True, False or None for task default.
    @param bioNLPSTParams: Parameters controlling BioNLP ST format output.
    @param preprocessorParams: Parameters controlling the preprocessor. Not used for training, but saved to the model for use when classifying.
    @param exampleStyles: A parameter set for controlling example builders.
    @param classifierParams: A parameter set for controlling classifiers.
    @param doFullGrid: Whether all parameters, as opposed to just recall adjustment, are tested in the EventDetector grid search.
    @param deleteOutput: Remove an existing output directory
    @param copyFrom: Copy an existing output directory for use as a template
    @param log: An optional alternative name for the log file. None is for no logging.
    @param step: A step=substep pair, where the steps are "TRAIN", "DEVEL", "EMPTY" and "TEST"
    @param omitSteps: step=substep parameters, where multiple substeps can be defined.
    @param debug: In debug mode, more output is shown, and some temporary intermediate files are saved
    @param connection: A parameter set defining a local or remote connection for training the classifier
    @param subset: A parameter set for making subsets of input files
    """
    # Insert default arguments where needed
    inputFiles = setDictDefaults(inputFiles, {"train":None, "devel":None, "test":None})
    models = setDictDefaults(models, {"devel":"model-devel", "test":"model-test"})
    exampleStyles = setDictDefaults(exampleStyles, {"examples":None, "trigger":None, "edge":None, "unmerging":None, "modifiers":None})
    classifierParams = setDictDefaults(classifierParams, {"examples":None, "trigger":None, "recall":None, "edge":None, "unmerging":None, "modifiers":None})
    subset = setDictDefaults(Parameters.get(subset), {"train":None, "devel":None, "test":None, "seed":0, "all":None})
    folds = setDictDefaults(folds, {"train":None, "devel":None, "test":None})
    processUnmerging = getDefinedBool(processUnmerging)
    processModifiers = getDefinedBool(processModifiers)
    # Initialize working directory
    workdir(output, deleteOutput, copyFrom, log)
    # Get task specific parameters
    useKerasDetector = False
    if detector != None and "keras" in detector.lower():
        print >> sys.stderr, "Using a Keras Detector"
        useKerasDetector = True
        if detector.lower() == "keras":
            detector = None
    detector, bioNLPSTParams, preprocessorParams, folds = getTaskSettings(task, detector, 
        bioNLPSTParams, preprocessorParams, inputFiles, exampleStyles, classifierParams, folds, corpusDir=corpusDir, useKerasDetector=useKerasDetector)
    # Learn training settings from input files
    detector = learnSettings(inputFiles, detector, classifierParams, task, exampleStyles, useKerasDetector=useKerasDetector)   
    # Get corpus subsets   
    getFolds(inputFiles, folds)
    getSubsets(inputFiles, subset)
    if task != None: 
        task = task.replace("-FULL", "")
    if "." in task:
        _, subTask = getSubTask(task)
        if subTask != 3:
            processModifiers = False
    # Preprocess the corpus if required
    if corpusPreprocessing != None:
        preprocessor = Preprocessor(steps=corpusPreprocessing)
        assert preprocessor.steps[0].name == "MERGE_SETS"
        assert preprocessor.steps[-1].name == "DIVIDE_SETS"
        preprocessedCorpusDir = os.path.join(output, "corpus")
        #outputFiles = {x:os.path.join(preprocessedCorpusDir, os.path.basename(inputFiles[x])) for x in inputFiles}
        preprocessor.process(inputFiles, os.path.join(preprocessedCorpusDir, task))
        #inputFiles = outputFiles
        for setName in inputFiles.keys():
            if inputFiles[setName] != None:
                inputFiles[setName] = os.path.join(preprocessedCorpusDir, task + "-" + setName + ".xml")
    # Define processing steps
    selector, detectorSteps, omitDetectorSteps = getSteps(step, omitSteps, ["TRAIN", "DEVEL", "EMPTY", "TEST"])
    
    # Initialize the detector
    detector, detectorName = getDetector(detector, evaluator=evaluator)
    evaluator, evaluatorName = importClass(evaluator, "evaluator")
    detector = detector() # initialize object
    if evaluator != None:
        print >> sys.stderr, "Using evaluator", evaluator.__name__
        detector.evaluator = evaluator
    detector.debug = debug
    detector.bioNLPSTParams = detector.getBioNLPSharedTaskParams(bioNLPSTParams)
    #detector.useBioNLPSTFormat = useBioNLPSTFormat # classify-output and grid evaluation in ST-format
    #detector.stWriteScores = True # write confidence scores into additional st-format files
    connection = getConnection(connection)
    detector.setConnection(connection)
    connection.debug = debug
    if deleteOutput:
        connection.clearWorkDir()
    
    # Train
    if selector.check("TRAIN"):
        print >> sys.stderr, "----------------------------------------------------"
        print >> sys.stderr, "------------------ Train Detector ------------------"
        print >> sys.stderr, "----------------------------------------------------"
        if not isinstance(detector, EventDetector):
            detector.train(inputFiles["train"], inputFiles["devel"], models["devel"], models["test"],
                           exampleStyles["examples"], classifierParams["examples"], parse, None, task,
                           fromStep=detectorSteps["TRAIN"], workDir="training", testData=inputFiles["test"])
        else:
            detector.train(inputFiles["train"], inputFiles["devel"], models["devel"], models["test"],
                           exampleStyles["trigger"], exampleStyles["edge"], exampleStyles["unmerging"], exampleStyles["modifiers"],
                           classifierParams["trigger"], classifierParams["edge"], classifierParams["unmerging"], classifierParams["modifiers"],
                           classifierParams["recall"], processUnmerging, processModifiers, 
                           doFullGrid, task, parse, None,
                           fromStep=detectorSteps["TRAIN"], workDir="training", testData=inputFiles["test"])
        # Save the detector type
        for model in [models["devel"], models["test"]]:
            if model != None and os.path.exists(model):
                model = Model(model, "a")
                model.addStr("detector", detectorName)
                if evaluatorName != None:
                    model.addStr("detector", evaluatorName)
                if preprocessorParams != None:
                    preprocessor = Preprocessor()
                    model.addStr("preprocessorParams", Parameters.toString(preprocessor.getParameters(preprocessorParams)))
                model.save()
                model.close()
    if selector.check("DEVEL"):
        print >> sys.stderr, "----------------------------------------------------"
        print >> sys.stderr, "------------ Check devel classification ------------"
        print >> sys.stderr, "----------------------------------------------------"
        #detector.bioNLPSTParams["scores"] = False # the evaluation server doesn't like additional files
        detector.classify(inputFiles["devel"], models["devel"], "classification-devel/devel", goldData=inputFiles["devel"], fromStep=detectorSteps["DEVEL"], workDir="classification-devel")
    if selector.check("EMPTY"):
        # By passing an emptied devel set through the prediction system, we can check that we get the same predictions
        # as in the DEVEL step, ensuring the model does not use leaked information.
        print >> sys.stderr, "----------------------------------------------------"
        print >> sys.stderr, "------------ Empty devel classification ------------"
        print >> sys.stderr, "----------------------------------------------------"
        #detector.bioNLPSTParams["scores"] = False # the evaluation server doesn't like additional files
        removalScope = "non-given"
        if "names" in str(exampleStyles["examples"]) or "names" in str(exampleStyles["trigger"]):
            removalScope = "all"
        elif "Edge" in detector.__class__.__name__:
            removalScope = "interactions"
        detector.classify(getEmptyCorpus(inputFiles["devel"], scope=removalScope), models["devel"], "classification-empty/devel-empty", fromStep=detectorSteps["EMPTY"], workDir="classification-empty")
        print >> sys.stderr, "*** Evaluate empty devel classification ***"
        if os.path.exists("classification-empty/devel-empty-pred.xml.gz"):
            EvaluateInteractionXML.run(detector.evaluator, "classification-empty/devel-empty-pred.xml.gz", inputFiles["devel"], parse)
        else:
            print >> sys.stderr, "No output file for evaluation"
    if selector.check("TEST"):
        print >> sys.stderr, "----------------------------------------------------"
        print >> sys.stderr, "------------- Test set classification --------------"
        print >> sys.stderr, "----------------------------------------------------"
        if inputFiles["test"] == None or not os.path.exists(inputFiles["test"]):
            print >> sys.stderr, "Skipping, test file", inputFiles["test"], "does not exist"
        else:
            #detector.bioNLPSTParams["scores"] = False # the evaluation server doesn't like additional files
            detector.classify(inputFiles["test"], models["test"] if models["test"] != None else models["devel"], "classification-test/test", fromStep=detectorSteps["TEST"], workDir="classification-test")
            if detector.bioNLPSTParams["convert"]:
                extension = ".zip" if (detector.bioNLPSTParams["convert"] == "zip") else ".tar.gz" 
                Utils.STFormat.Compare.compare("classification-test/test-events" + extension, "classification-devel/devel-events" + extension, "a2")
    # Stop logging
    if log != None:
        Stream.closeLog(log)