Ejemplo n.º 1
0
 def test(cls, examples, modelPath, output=None, parameters=None, timeout=None):
     if type(examples) == types.ListType:
         print >> sys.stderr, "Classifying", len(examples), "with All-True Classifier"
         examples, predictions = self.filterClassificationSet(examples, False)
         testPath = self.tempDir+"/test.dat"
         Example.writeExamples(examples, testPath)
     else:
         print >> sys.stderr, "Classifying file", examples, "with All-True Classifier"
         testPath = examples
         examples = Example.readExamples(examples,False)
     print >> sys.stderr, "Note! Classification must be binary"
     #examples, predictions = self.filterClassificationSet(examples, True)
     predictions = []
     for example in examples:
         #predictions.append( (example, example[1]) )
         predictions.append( [2] ) #[example[1]] )
     
     if output == None:
         output = "predictions"
     f = open(output, "wt")
     for p in predictions:
         f.write(str(p[0])+"\n")
     f.close()
         
     return predictions
Ejemplo n.º 2
0
 def train(self, examples, parameters=None):
     self.isBinary = self.isBinaryProblem(examples)
     examples = self.filterTrainingSet(examples)
     ExampleUtils.writeExamples(examples, self.tempDir + "/train.dat")
     #prepare parameters:
     if parameters.has_key("c"):
         assert (not parameters.has_key("C"))
         parameters["C"] = parameters["c"]
         del parameters["c"]
     totalExamples = float(sum(self.classes.values()))
     weight_label = self.classes.keys()
     weight_label.sort()
     weight = []
     for k in weight_label:
         weight.append(1.0 - self.classes[k] / totalExamples)
     libSVMparam = svm.svm_parameter(nr_weight=len(self.classes),
                                     weight_label=weight_label,
                                     weight=weight,
                                     **parameters)
     labels = []
     samples = []
     for example in examples:
         labels.append(example[1])
         samples.append(example[2])
     problem = svm.svm_problem(labels, samples)
     self.model = svm.svm_model(problem, libSVMparam)
Ejemplo n.º 3
0
    def test(cls,
             examples,
             modelPath,
             output=None,
             parameters=None,
             timeout=None):
        if type(examples) == types.ListType:
            print >> sys.stderr, "Classifying", len(
                examples), "with All-Correct Classifier"
            examples, predictions = self.filterClassificationSet(
                examples, False)
            testPath = self.tempDir + "/test.dat"
            Example.writeExamples(examples, testPath)
        else:
            print >> sys.stderr, "Classifying file", examples, "with All-Correct Classifier"
            testPath = examples
            examples = Example.readExamples(examples, False)
        #examples, predictions = self.filterClassificationSet(examples, True)
        predictions = []
        for example in examples:
            #predictions.append( (example, example[1]) )
            predictions.append([example[1]])

        if output == None:
            output = "predictions"
        f = open(output, "wt")
        for p in predictions:
            f.write(str(p[0]) + "\n")
        f.close()

        return predictions
Ejemplo n.º 4
0
 def classify(self, examples, parameters=None):
     if type(examples) == types.StringType:
         testFilePath = examples
         predictions = []
         realClasses = []
         exampleFile = open(examples,"rt")
         for line in exampleFile.readlines():
             realClasses.append(int(line.split(" ",1)[0].strip()))
         exampleFile.close()
     elif type(examples) == types.ListType:
         examples, predictions = self.filterClassificationSet(examples, True)
         Example.writeExamples(examples, self.tempDir+"/test.dat")
         testFilePath = self.tempDir+"/test.dat"
     args = [self.classifyBin]
     if parameters != None:
         self.__addParametersToSubprocessCall(args, parameters)
     args += [testFilePath, self.tempDir+"/model", self.tempDir+"/predictions"]
     #print args
     subprocess.call(args, stdout = self.debugFile)
     os.remove(self.tempDir+"/model")
     predictionsFile = open(self.tempDir+"/predictions", "rt")
     lines = predictionsFile.readlines()
     predictionsFile.close()
     #predictions = []
     for i in range(len(lines)):
         if type(examples) == types.ListType:
             predictions.append( (examples[i],float(lines[i]),self.type,lines[i]) )
         else:
             predictions.append( ([None,realClasses[i]],float(lines[i]),self.type) )
     return predictions
Ejemplo n.º 5
0
    def buildGraphKernelFeatures(self, sentenceGraph, path):
        edgeList = []
        depGraph = sentenceGraph.dependencyGraph
        pt = path
        for i in range(1, len(path)):
            edgeList.extend(depGraph.getEdges(pt[i], pt[i - 1]))
            edgeList.extend(depGraph.getEdges(pt[i - 1], pt[i]))
        edges = edgeList
        adjacencyMatrix, labels = self._buildAdjacencyMatrix(
            sentenceGraph, path, edges)
        node_count = 2 * len(sentenceGraph.tokens) + len(
            sentenceGraph.dependencies)

        if sentenceGraph.sentenceElement.attrib["id"] == "LLL.d0.s0":
            adjacencyMatrixToHtml(adjacencyMatrix, labels,
                                  "LLL.d0.s0_adjacency_matrix.html")

        allPathsMatrix = self._prepareMatrix(adjacencyMatrix, node_count)
        self._matrixToFeatures(allPathsMatrix, labels)
        if sentenceGraph.sentenceElement.attrib["id"] == "LLL.d0.s0":
            adjacencyMatrixToHtml(allPathsMatrix, labels,
                                  "LLL.d0.s0_all_paths_matrix.html")
            commentLines = []
            commentLines.extend(self.featureSet.toStrings())
            example = [
                "example_" + self.entity1.attrib["id"] + "_" +
                self.entity2.attrib["id"], "unknown", self.features
            ]
            ExampleUtils.writeExamples([example], "LLL.d0.s0_example.txt",
                                       commentLines)
Ejemplo n.º 6
0
 def train(cls, examples, parameters, outputFile=None): #, timeout=None):
     """
     Train the SVM-multiclass classifier on a set of examples.
     
     @type examples: string (filename) or list (or iterator) of examples
     @param examples: a list or file containing examples in SVM-format
     @type parameters: a dictionary or string
     @param parameters: parameters for the classifier
     @type outputFile: string
     @param outputFile: the name of the model file to be written
     """
     timer = Timer()
     parameters = cls.getParams(parameters)
     
     # If examples are in a list, they will be written to a file for SVM-multiclass
     if type(examples) == types.ListType:
         print >> sys.stderr, "Training SVM-MultiClass on", len(examples), "examples"
         trainPath = self.tempDir+"/train.dat"
         examples = self.filterTrainingSet(examples)
         Example.writeExamples(examples, trainPath)
     else:
         print >> sys.stderr, "Training SVM-MultiClass on file", examples
         trainPath = cls.stripComments(examples)
     args = ["/home/jari/Programs/liblinear-1.5-poly2/train"]
     cls.__addParametersToSubprocessCall(args, parameters)
     if outputFile == None:
         args += [trainPath, "model"]
         logFile = open("svmmulticlass.log","at")
     else:
         args += [trainPath, outputFile]
         logFile = open(outputFile+".log","wt")
     rv = subprocess.call(args, stdout = logFile)
     logFile.close()
     print >> sys.stderr, timer.toString()
     return rv
Ejemplo n.º 7
0
 def test(cls, examples, modelPath, output=None, parameters=None, forceInternal=False): # , timeout=None):
     """
     Classify examples with a pre-trained model.
     
     @type examples: string (filename) or list (or iterator) of examples
     @param examples: a list or file containing examples in SVM-format
     @type modelPath: string
     @param modelPath: filename of the pre-trained model file
     @type parameters: a dictionary or string
     @param parameters: parameters for the classifier
     @type output: string
     @param output: the name of the predictions file to be written
     @type forceInternal: Boolean
     @param forceInternal: Use python classifier even if SVM Multiclass binary is defined in Settings.py
     """
     if forceInternal or Settings.SVMMultiClassDir == None:
         return cls.testInternal(examples, modelPath, output)
     timer = Timer()
     if type(examples) == types.ListType:
         print >> sys.stderr, "Classifying", len(examples), "with SVM-MultiClass model", modelPath
         examples, predictions = self.filterClassificationSet(examples, False)
         testPath = self.tempDir+"/test.dat"
         Example.writeExamples(examples, testPath)
     else:
         print >> sys.stderr, "Classifying file", examples, "with SVM-MultiClass model", modelPath
         testPath = cls.stripComments(examples)
         examples = Example.readExamples(examples,False)
     args = ["/home/jari/Programs/liblinear-1.5-poly2/predict"]
     if modelPath == None:
         modelPath = "model"
     if parameters != None:
         parameters = copy.copy(parameters)
         if parameters.has_key("c"):
             del parameters["c"]
         if parameters.has_key("predefined"):
             parameters = copy.copy(parameters)
             modelPath = os.path.join(parameters["predefined"][0],"classifier/model")
             del parameters["predefined"]
         self.__addParametersToSubprocessCall(args, parameters)
     if output == None:
         output = "predictions"
         logFile = open("svmmulticlass.log","at")
     else:
         logFile = open(output+".log","wt")
     args += [testPath, modelPath, output]
     #if timeout == None:
     #    timeout = -1
     #print args
     subprocess.call(args, stdout = logFile, stderr = logFile)
     predictionsFile = open(output, "rt")
     lines = predictionsFile.readlines()
     predictionsFile.close()
     predictions = []
     for i in range(len(lines)):
         predictions.append( [int(lines[i].split()[0])] + lines[i].split()[1:] )
         #predictions.append( (examples[i],int(lines[i].split()[0]),"multiclass",lines[i].split()[1:]) )
     print >> sys.stderr, timer.toString()
     return predictions
Ejemplo n.º 8
0
 def classify(self, examples, parameters=None):
     examples, predictions = self.filterClassificationSet(
         examples, self.isBinary)
     ExampleUtils.writeExamples(examples, self.tempDir + "/test.dat")
     for i in range(len(examples)):
         if self.isBinary:
             predictedClass = self.model.predict(examples[i][2])
             predictions.append((examples[i], predictedClass, "binary"))
         else:
             predictedClass = self.model.predict(examples[i][2])
             predictions.append((examples[i], predictedClass, "multiclass"))
     return predictions
Ejemplo n.º 9
0
 def classify(self, examples, parameters=None):
     examples, predictions = self.filterClassificationSet(examples, self.isBinary)
     ExampleUtils.writeExamples(examples, self.tempDir+"/test.dat")
     for i in range(len(examples)):
         if self.isBinary:
             predictedClass = self.model.predict(examples[i][2])
             predictions.append( (examples[i],predictedClass,"binary") )
         else:
             predictedClass = self.model.predict(examples[i][2])
             predictions.append( (examples[i],predictedClass,"multiclass") )
     return predictions
         
         
Ejemplo n.º 10
0
 def buildGraphKernelFeatures(self, sentenceGraph, path):
     edgeList = []
     depGraph = sentenceGraph.dependencyGraph
     pt = path
     for i in range(1, len(path)):
         edgeList.extend(depGraph.getEdges(pt[i], pt[i-1]))
         edgeList.extend(depGraph.getEdges(pt[i-1], pt[i]))
     edges = edgeList
     adjacencyMatrix, labels = self._buildAdjacencyMatrix(sentenceGraph, path, edges)
     node_count = 2*len(sentenceGraph.tokens) + len(sentenceGraph.dependencies)
     
     if sentenceGraph.sentenceElement.attrib["id"] == "LLL.d0.s0":
         adjacencyMatrixToHtml(adjacencyMatrix, labels, "LLL.d0.s0_adjacency_matrix.html")
     
     allPathsMatrix = self._prepareMatrix(adjacencyMatrix, node_count)
     self._matrixToFeatures(allPathsMatrix, labels)
     if sentenceGraph.sentenceElement.attrib["id"] == "LLL.d0.s0":
         adjacencyMatrixToHtml(allPathsMatrix, labels, "LLL.d0.s0_all_paths_matrix.html")
         commentLines = []
         commentLines.extend(self.featureSet.toStrings())
         example = ["example_"+self.entity1.attrib["id"]+"_"+self.entity2.attrib["id"],"unknown",self.features]
         ExampleUtils.writeExamples([example],"LLL.d0.s0_example.txt",commentLines)
Ejemplo n.º 11
0
    def train(self, examples, parameters=None, outputDir=None):
        timeout = -1
        if type(examples) == types.StringType:
            trainFilePath = examples
        elif type(examples) == types.ListType:
            examples = self.filterTrainingSet(examples)
            parameters = copy.copy(parameters)
            if parameters.has_key("style"):
                if "no_duplicates" in parameters["style"]:
                    examples = Example.removeDuplicates(examples)
                del parameters["style"]
            Example.writeExamples(examples, self.tempDir+"/train.dat")
            trainFilePath = self.tempDir+"/train.dat"

        if parameters.has_key("timeout"):
            timeout = parameters["timeout"]
            del parameters["timeout"]        
        args = [self.trainBin]
        if parameters != None:
            self.__addParametersToSubprocessCall(args, parameters)
        args += [trainFilePath, self.tempDir+"/model"]
        return killableprocess.call(args, stdout = self.debugFile, timeout = timeout)
Ejemplo n.º 12
0
 def train(self, examples, parameters=None):
     self.isBinary = self.isBinaryProblem(examples)
     examples = self.filterTrainingSet(examples)
     ExampleUtils.writeExamples(examples, self.tempDir+"/train.dat")
     #prepare parameters:
     if parameters.has_key("c"):
         assert(not parameters.has_key("C"))
         parameters["C"] = parameters["c"]
         del parameters["c"]
     totalExamples = float(sum(self.classes.values()))
     weight_label = self.classes.keys()
     weight_label.sort()
     weight = []
     for k in weight_label:
         weight.append(1.0-self.classes[k]/totalExamples)
     libSVMparam = svm.svm_parameter(nr_weight = len(self.classes), weight_label=weight_label, weight=weight, **parameters)
     labels = []
     samples = []
     for example in examples:
         labels.append(example[1])
         samples.append(example[2])
     problem = svm.svm_problem(labels, samples)
     self.model = svm.svm_model(problem, libSVMparam)
Ejemplo n.º 13
0
    def train(self, examples, parameters=None, outputDir=None):
        timeout = -1
        if type(examples) == types.StringType:
            trainFilePath = examples
        elif type(examples) == types.ListType:
            examples = self.filterTrainingSet(examples)
            parameters = copy.copy(parameters)
            if parameters.has_key("style"):
                if "no_duplicates" in parameters["style"]:
                    examples = Example.removeDuplicates(examples)
                del parameters["style"]
            Example.writeExamples(examples, self.tempDir + "/train.dat")
            trainFilePath = self.tempDir + "/train.dat"

        if parameters.has_key("timeout"):
            timeout = parameters["timeout"]
            del parameters["timeout"]
        args = [self.trainBin]
        if parameters != None:
            self.__addParametersToSubprocessCall(args, parameters)
        args += [trainFilePath, self.tempDir + "/model"]
        return killableprocess.call(args,
                                    stdout=self.debugFile,
                                    timeout=timeout)
Ejemplo n.º 14
0
 def classify(self, examples, parameters=None):
     if type(examples) == types.StringType:
         testFilePath = examples
         predictions = []
         realClasses = []
         exampleFile = open(examples, "rt")
         for line in exampleFile.readlines():
             realClasses.append(int(line.split(" ", 1)[0].strip()))
         exampleFile.close()
     elif type(examples) == types.ListType:
         examples, predictions = self.filterClassificationSet(
             examples, True)
         Example.writeExamples(examples, self.tempDir + "/test.dat")
         testFilePath = self.tempDir + "/test.dat"
     args = [self.classifyBin]
     if parameters != None:
         self.__addParametersToSubprocessCall(args, parameters)
     args += [
         testFilePath, self.tempDir + "/model",
         self.tempDir + "/predictions"
     ]
     #print args
     subprocess.call(args, stdout=self.debugFile)
     os.remove(self.tempDir + "/model")
     predictionsFile = open(self.tempDir + "/predictions", "rt")
     lines = predictionsFile.readlines()
     predictionsFile.close()
     #predictions = []
     for i in range(len(lines)):
         if type(examples) == types.ListType:
             predictions.append(
                 (examples[i], float(lines[i]), self.type, lines[i]))
         else:
             predictions.append(
                 ([None, realClasses[i]], float(lines[i]), self.type))
     return predictions
Ejemplo n.º 15
0
    def train(cls, examples, parameters, outputFile=None):  #, timeout=None):
        """
        Train the SVM-multiclass classifier on a set of examples.
        
        @type examples: string (filename) or list (or iterator) of examples
        @param examples: a list or file containing examples in SVM-format
        @type parameters: a dictionary or string
        @param parameters: parameters for the classifier
        @type outputFile: string
        @param outputFile: the name of the model file to be written
        """
        timer = Timer()
        parameters = cls.getParams(parameters)

        # If examples are in a list, they will be written to a file for SVM-multiclass
        if type(examples) == types.ListType:
            print >> sys.stderr, "Training SVM-MultiClass on", len(
                examples), "examples"
            trainPath = self.tempDir + "/train.dat"
            examples = self.filterTrainingSet(examples)
            Example.writeExamples(examples, trainPath)
        else:
            print >> sys.stderr, "Training SVM-MultiClass on file", examples
            trainPath = cls.stripComments(examples)
        args = ["/home/jari/Programs/liblinear-1.5-poly2/train"]
        cls.__addParametersToSubprocessCall(args, parameters)
        if outputFile == None:
            args += [trainPath, "model"]
            logFile = open("svmmulticlass.log", "at")
        else:
            args += [trainPath, outputFile]
            logFile = open(outputFile + ".log", "wt")
        rv = subprocess.call(args, stdout=logFile)
        logFile.close()
        print >> sys.stderr, timer.toString()
        return rv
Ejemplo n.º 16
0
 def test(cls, examples, modelPath, output=None, parameters=None, forceInternal=False, classIds=None): # , timeout=None):
     """
     Classify examples with a pre-trained model.
     
     @type examples: string (filename) or list (or iterator) of examples
     @param examples: a list or file containing examples in SVM-format
     @type modelPath: string
     @param modelPath: filename of the pre-trained model file
     @type parameters: a dictionary or string
     @param parameters: parameters for the classifier
     @type output: string
     @param output: the name of the predictions file to be written
     @type forceInternal: Boolean
     @param forceInternal: Use python classifier even if SVM Multiclass binary is defined in Settings.py
     """
     if type(parameters) == types.StringType:
         parameters = splitParameters(parameters)
     timer = Timer()
     if type(examples) == types.ListType:
         print >> sys.stderr, "Classifying", len(examples), "with SVM-MultiClass model", modelPath
         examples, predictions = self.filterClassificationSet(examples, False)
         testPath = self.tempDir+"/test.dat"
         Example.writeExamples(examples, testPath)
     else:
         print >> sys.stderr, "Classifying file", examples, "with SVM-MultiClass model", modelPath
         testPath = examples
         examples = Example.readExamples(examples,False)
     if parameters != None:
         parameters = copy.copy(parameters)
         if parameters.has_key("c"):
             del parameters["c"]
         if parameters.has_key("predefined"):
             parameters = copy.copy(parameters)
             modelPath = os.path.join(parameters["predefined"][0],"classifier/model")
             del parameters["predefined"]
     # Read model
     if modelPath == None:
         modelPath = "model-multilabel"
     classModels = {}
     if modelPath.endswith(".gz"):
         f = gzip.open(modelPath, "rt")
     else:
         f = open(modelPath, "rt")
     thresholds = {}
     for line in f:
         key, value, threshold = line.split()
         classModels[key] = value
         if threshold != "None":
             thresholds[key] = float(threshold)
         else:
             thresholds[key] = 0.0
     f.close()
     mergedPredictions = []
     if type(classIds) == types.StringType:
         classIds = IdSet(filename=classIds)
     #print classModels
     print "Thresholds", thresholds
     classifierBin = Settings.SVMMultiClassDir+"/svm_multiclass_classify"
     print parameters
     if "classifier" in parameters and "svmperf" in parameters["classifier"]:
         classifierBin = Settings.SVMPerfDir+"/svm_perf_classify"
         parameters = copy.copy(parameters)
         del parameters["classifier"]
     for className in classIds.getNames():
         if className != "neg" and not "---" in className:
             classId = classIds.getId(className)
             if thresholds[str(className)] != 0.0:
                 print >> sys.stderr, "Classifying", className, "with threshold", thresholds[str(className)]
             else:
                 print >> sys.stderr, "Classifying", className
             args = [classifierBin]
             #self.__addParametersToSubprocessCall(args, parameters)
             classOutput = "predictions" + ".cls-" + className
             logFile = open("svmmulticlass" + ".cls-" + className + ".log","at")
             args += [testPath, classModels[str(className)], classOutput]
             print args
             subprocess.call(args, stdout = logFile, stderr = logFile)
             cls.addPredictions(classOutput, mergedPredictions, classId, len(classIds.Ids), threshold=thresholds[str(className)])
     print >> sys.stderr, timer.toString()
     
     predFileName = output
     f = open(predFileName, "wt")
     for mergedPred in mergedPredictions:
         if len(mergedPred[0]) > 1 and "1" in mergedPred[0]:
             mergedPred[0].remove("1")
         mergedPred[1] = str(mergedPred[1])
         mergedPred[0] = ",".join(sorted(list(mergedPred[0])))
         f.write(" ".join(mergedPred) + "\n")
     f.close()
     
     return mergedPredictions
Ejemplo n.º 17
0
def crossValidate(exampleBuilder, corpusElements, examples, options, timer):
    parameterOptimizationSet = None
    constantParameterOptimizationSet = None
    if options.paramOptData != None:
        print >> sys.stderr, "Separating parameter optimization set"
        parameterOptimizationDivision = Example.makeCorpusDivision(corpusElements, float(options.paramOptData))
        exampleSets = Example.divideExamples(examples, parameterOptimizationDivision)
        constantParameterOptimizationSet = exampleSets[0]
        parameterOptimizationSet = constantParameterOptimizationSet
        optDocs = 0
        for k,v in parameterOptimizationDivision.iteritems():
            if v == 0:
                del corpusElements.documentsById[k]
                optDocs += 1
        print >> sys.stderr, "  Documents for parameter optimization:", optDocs
    discardedParameterCombinations = []

    print >> sys.stderr, "Dividing data into folds"
    corpusFolds = Example.makeCorpusFolds(corpusElements, options.folds[0])
    exampleSets = Example.divideExamples(examples, corpusFolds)
    
    keys = exampleSets.keys()
    keys.sort()
    evaluations = []
    for key in keys:
        testSet = exampleSets[key]
        for example in testSet:
            example[3]["visualizationSet"] = key + 1
        trainSet = []
        for key2 in keys:
            if key != key2:
                trainSet.extend(exampleSets[key2])
        print >> sys.stderr, "Fold", str(key + 1)
        # Create classifier object
        if options.output != None:
            if not os.path.exists(options.output+"/fold"+str(key+1)):
                os.mkdir(options.output+"/fold"+str(key+1))
#                if not os.path.exists(options.output+"/fold"+str(key+1)+"/classifier"):
#                    os.mkdir(options.output+"/fold"+str(key+1)+"/classifier")
            classifier = Classifier(workDir = options.output + "/fold"+str(key + 1))
        else:
            classifier = Classifier()
        classifier.featureSet = exampleBuilder.featureSet
        # Optimize ####################
        # Check whether there is need for included param opt set
        if parameterOptimizationSet == None and options.folds[1] == 0: # 8-1-1 folds
            assert(len(keys) > 1)
            if keys.index(key) == 0:
                parameterOptimizationSetKey = keys[-1]
            else:
                parameterOptimizationSetKey = keys[keys.index(key)-1]
            parameterOptimizationSet = exampleSets[parameterOptimizationSetKey]
            trainSet = []
            for key2 in keys:
                if key2 != key and key2 != parameterOptimizationSetKey:
                    trainSet.extend(exampleSets[key2])

        if parameterOptimizationSet != None: # constant external parameter optimization set
            evaluationArgs = {"classSet":exampleBuilder.classSet}
            if options.parameters != None:
                paramDict = splitParameters(options.parameters)
                bestResults = classifier.optimize([trainSet], [parameterOptimizationSet], paramDict, Evaluation, evaluationArgs, combinationsThatTimedOut=discardedParameterCombinations)
            else:
                bestResults = classifier.optimize([trainSet], [parameterOptimizationSet], evaluationClass=Evaluation, evaluationArgs=evaluationArgs, combinationsThatTimedOut=discardedParameterCombinations)
        else: # nested x-fold parameter optimization
            assert (options.folds[1] >= 2)
            optimizationFolds = Example.makeExampleFolds(trainSet, options.folds[1])
            optimizationSets = Example.divideExamples(trainSet, optimizationFolds)
            optimizationSetList = []
            optSetKeys = optimizationSets.keys()
            optSetKeys.sort()
            for optSetKey in optSetKeys:
                optimizationSetList.append(optimizationSets[optSetKey])
            evaluationArgs = {"classSet":exampleBuilder.classSet}
            if options.parameters != None:
                paramDict = splitParameters(options.parameters)
                bestResults = classifier.optimize(optimizationSetList, optimizationSetList, paramDict, Evaluation, evaluationArgs, combinationsThatTimedOut=discardedParameterCombinations)
            else:
                bestResults = classifier.optimize(optimizationSetList, optimizationSetList, evaluationClass=Evaluation, evaluationArgs=evaluationArgs, combinationsThatTimedOut=discardedParameterCombinations)
        
        # Classify
        print >> sys.stderr, "Classifying test data"
        bestParams = bestResults[2]
        if bestParams.has_key("timeout"):
            del bestParams["timeout"]
        print >> sys.stderr, "Parameters:", bestParams
        print >> sys.stderr, "Training",
        startTime = time.time()
        classifier.train(trainSet, bestParams)
        print >> sys.stderr, "(Time spent:", time.time() - startTime, "s)"
        print >> sys.stderr, "Testing",
        startTime = time.time()
        predictions = classifier.classify(testSet)
        if options.output != None:
            pdict = []
            fieldnames = ["class","prediction","id","fold"]
            for p in predictions:
                if "typed" in exampleBuilder.styles:
                    pdict.append( {"class":exampleBuilder.classSet.getName(p[0][1]), "prediction":exampleBuilder.classSet.getName(p[1]), "id":p[0][0], "fold":key} )
                else:
                    pdict.append( {"class":p[0][1], "prediction":p[1], "id":p[0][0], "fold":key} )
            TableUtils.addToCSV(pdict, options.output +"/predictions.csv", fieldnames)
        print >> sys.stderr, "(Time spent:", time.time() - startTime, "s)"
        
        # Calculate statistics
        evaluation = Evaluation(predictions, classSet=exampleBuilder.classSet)
        print >> sys.stderr, evaluation.toStringConcise()
        print >> sys.stderr, timer.toString()
        evaluations.append(evaluation)
        
        # Save example sets
        if options.output != None:
            print >> sys.stderr, "Saving example sets to", options.output
            Example.writeExamples(exampleSets[0], options.output +"/fold"+str(key+1) + "/examplesTest.txt")
            Example.writeExamples(exampleSets[1], options.output +"/fold"+str(key+1) + "/examplesTrain.txt")
            if parameterOptimizationSet == None:
                for k,v in optimizationSets.iteritems():
                    Example.writeExamples(v, options.output +"/fold"+str(key+1) + "/examplesOptimizationSet" + str(k) + ".txt")
            else:
                Example.writeExamples(parameterOptimizationSet, options.output +"/fold"+str(key+1) + "/examplesOptimizationSetPredefined.txt")
            TableUtils.writeCSV(bestResults[2], options.output +"/fold"+str(key+1) + "/parameters.csv")
            evaluation.saveCSV(options.output +"/fold"+str(key+1) + "/results.csv")
            print >> sys.stderr, "Compressing folder"
            zipTree(options.output, "fold"+str(key+1))
        
        parameterOptimizationSet = constantParameterOptimizationSet
    
    print >> sys.stderr, "Cross-validation Results"
    for i in range(len(evaluations)):
        print >> sys.stderr, evaluations[i].toStringConcise("  Fold "+str(i)+": ")
    averageResult = Evaluation.average(evaluations)
    print >> sys.stderr, averageResult.toStringConcise("  Avg: ")
    pooledResult = Evaluation.pool(evaluations)
    print >> sys.stderr, pooledResult.toStringConcise("  Pool: ")
    if options.output != None:
        for i in range(len(evaluations)):
            evaluations[i].saveCSV(options.output+"/results.csv", i)
        averageResult.saveCSV(options.output+"/results.csv", "Avg")
        pooledResult.saveCSV(options.output+"/results.csv", "Pool")
        averageResult.saveCSV(options.output+"/resultsAverage.csv")
        pooledResult.saveCSV(options.output+"/resultsPooled.csv")
    # Visualize
    if options.visualization != None:
        visualize(sentences, pooledResult.classifications, options, exampleBuilder)
    
    # Save interactionXML
    if options.resultsToXML != None:
        classSet = None
        if "typed" in exampleBuilder.styles:
            classSet = exampleBuilder.classSet
        Example.writeToInteractionXML(pooledResult.classifications, corpusElements, options.resultsToXML, classSet)
Ejemplo n.º 18
0
                    
    return examples

if __name__=="__main__":
    # Import Psyco if available
    try:
        import psyco
        psyco.full()
        print >> sys.stderr, "Found Psyco, using"
    except ImportError:
        print >> sys.stderr, "Psyco not installed"

    from optparse import OptionParser
    import os
    optparser = OptionParser(usage="%prog [options]\nCreate an html visualization for a corpus.")
    optparser.add_option("-i", "--input", default=None, dest="input", help="Corpus in analysis format", metavar="FILE")
    optparser.add_option("-o", "--output", default=None, dest="output", help="Output directory, useful for debugging")
    (options, args) = optparser.parse_args()
    
    print >> sys.stderr, "Reading input from " + options.input
    examples = readARFF(options.input)
    if options.output == None:
        if options.input.rsplit(".",1)[-1] == "arff":
            options.output = options.input.rsplit(".",1)[0] + ".examples"
        else:
            options.output = options.input + ".examples"
    print >> sys.stderr, "Writing output to " + options.output
    ExampleUtils.writeExamples(examples, options.output)
    
    
Ejemplo n.º 19
0
    variantExamples = ExampleUtils.readExamples(
        os.path.join(options.variant, "test-triggers.examples"))

    invariantFeatureSet = IdSet()
    invariantFeatureSet.load(
        os.path.join(options.invariant, "feature_names.txt"))
    invariantClassSet = IdSet()
    invariantClassSet.load(os.path.join(options.invariant, "class_names.txt"))

    variantFeatureSet = IdSet()
    variantFeatureSet.load(
        os.path.join(options.variant, "test-triggers.examples.feature_names"))
    variantClassSet = IdSet()
    variantClassSet.load(
        os.path.join(options.variant, "test-triggers.examples.class_names"))

    counter = ProgressCounter(len(variantExamples))
    for example in variantExamples:
        counter.update()
        example[1] = invariantClassSet.getId(
            variantClassSet.getName(example[1]))
        newFeatures = {}
        for k, v in example[2].iteritems():
            newFeatures[invariantFeatureSet.getId(
                variantFeatureSet.getName(k))] = v
        example[2] = newFeatures

    ExampleUtils.writeExamples(
        variantExamples, os.path.join(options.variant,
                                      "realignedExamples.txt"))
Ejemplo n.º 20
0
 else:
     print >> sys.stderr, "Using predefined model"
     bestResults = [None,None,{}]
     for k,v in classifierParamDict.iteritems():
         bestResults[2][k] = v
     featureSet = IdSet()
     featureSet.load(os.path.join(classifierParamDict["predefined"][0], "feature_names.txt"))
     classSet = None
     if os.path.exists(os.path.join(classifierParamDict["predefined"][0], "class_names.txt")):
         classSet = IdSet()
         classSet.load(os.path.join(classifierParamDict["predefined"][0], "class_names.txt"))
     exampleBuilder = ExampleBuilder(featureSet=featureSet, classSet=classSet, **splitParameters(options.exampleBuilderParameters))
 # Save training sets
 if options.output != None:
     print >> sys.stderr, "Saving example sets to", options.output
     Example.writeExamples(exampleSets[0], options.output + "/examplesTrain.txt")
     if not classifierParamDict.has_key("predefined"):
         Example.writeExamples(optimizationSets[0], options.output + "/examplesOptimizationTest.txt")
         Example.writeExamples(optimizationSets[1], options.output + "/examplesOptimizationTrain.txt")
     TableUtils.writeCSV(bestResults[2], options.output +"/best_parameters.csv")
 
 # Optimize and train
 if options.output != None:
     classifier = Classifier(workDir = options.output + "/classifier")
 else:
     classifier = Classifier()
 classifier.featureSet = exampleBuilder.featureSet
 if hasattr(exampleBuilder,"classSet"):
     classifier.classSet = exampleBuilder.classSet
 print >> sys.stderr, "Classifying test data"
 if bestResults[2].has_key("timeout"):
Ejemplo n.º 21
0
def crossValidate(exampleBuilder, corpusElements, examples, options, timer):
    parameterOptimizationSet = None
    constantParameterOptimizationSet = None
    if options.paramOptData != None:
        print >> sys.stderr, "Separating parameter optimization set"
        parameterOptimizationDivision = Example.makeCorpusDivision(
            corpusElements, float(options.paramOptData))
        exampleSets = Example.divideExamples(examples,
                                             parameterOptimizationDivision)
        constantParameterOptimizationSet = exampleSets[0]
        parameterOptimizationSet = constantParameterOptimizationSet
        optDocs = 0
        for k, v in parameterOptimizationDivision.iteritems():
            if v == 0:
                del corpusElements.documentsById[k]
                optDocs += 1
        print >> sys.stderr, "  Documents for parameter optimization:", optDocs
    discardedParameterCombinations = []

    print >> sys.stderr, "Dividing data into folds"
    corpusFolds = Example.makeCorpusFolds(corpusElements, options.folds[0])
    exampleSets = Example.divideExamples(examples, corpusFolds)

    keys = exampleSets.keys()
    keys.sort()
    evaluations = []
    for key in keys:
        testSet = exampleSets[key]
        for example in testSet:
            example[3]["visualizationSet"] = key + 1
        trainSet = []
        for key2 in keys:
            if key != key2:
                trainSet.extend(exampleSets[key2])
        print >> sys.stderr, "Fold", str(key + 1)
        # Create classifier object
        if options.output != None:
            if not os.path.exists(options.output + "/fold" + str(key + 1)):
                os.mkdir(options.output + "/fold" + str(key + 1))


#                if not os.path.exists(options.output+"/fold"+str(key+1)+"/classifier"):
#                    os.mkdir(options.output+"/fold"+str(key+1)+"/classifier")
            classifier = Classifier(workDir=options.output + "/fold" +
                                    str(key + 1))
        else:
            classifier = Classifier()
        classifier.featureSet = exampleBuilder.featureSet
        # Optimize ####################
        # Check whether there is need for included param opt set
        if parameterOptimizationSet == None and options.folds[
                1] == 0:  # 8-1-1 folds
            assert (len(keys) > 1)
            if keys.index(key) == 0:
                parameterOptimizationSetKey = keys[-1]
            else:
                parameterOptimizationSetKey = keys[keys.index(key) - 1]
            parameterOptimizationSet = exampleSets[parameterOptimizationSetKey]
            trainSet = []
            for key2 in keys:
                if key2 != key and key2 != parameterOptimizationSetKey:
                    trainSet.extend(exampleSets[key2])

        if parameterOptimizationSet != None:  # constant external parameter optimization set
            evaluationArgs = {"classSet": exampleBuilder.classSet}
            if options.parameters != None:
                paramDict = splitParameters(options.parameters)
                bestResults = classifier.optimize(
                    [trainSet], [parameterOptimizationSet],
                    paramDict,
                    Evaluation,
                    evaluationArgs,
                    combinationsThatTimedOut=discardedParameterCombinations)
            else:
                bestResults = classifier.optimize(
                    [trainSet], [parameterOptimizationSet],
                    evaluationClass=Evaluation,
                    evaluationArgs=evaluationArgs,
                    combinationsThatTimedOut=discardedParameterCombinations)
        else:  # nested x-fold parameter optimization
            assert (options.folds[1] >= 2)
            optimizationFolds = Example.makeExampleFolds(
                trainSet, options.folds[1])
            optimizationSets = Example.divideExamples(trainSet,
                                                      optimizationFolds)
            optimizationSetList = []
            optSetKeys = optimizationSets.keys()
            optSetKeys.sort()
            for optSetKey in optSetKeys:
                optimizationSetList.append(optimizationSets[optSetKey])
            evaluationArgs = {"classSet": exampleBuilder.classSet}
            if options.parameters != None:
                paramDict = splitParameters(options.parameters)
                bestResults = classifier.optimize(
                    optimizationSetList,
                    optimizationSetList,
                    paramDict,
                    Evaluation,
                    evaluationArgs,
                    combinationsThatTimedOut=discardedParameterCombinations)
            else:
                bestResults = classifier.optimize(
                    optimizationSetList,
                    optimizationSetList,
                    evaluationClass=Evaluation,
                    evaluationArgs=evaluationArgs,
                    combinationsThatTimedOut=discardedParameterCombinations)

        # Classify
        print >> sys.stderr, "Classifying test data"
        bestParams = bestResults[2]
        if bestParams.has_key("timeout"):
            del bestParams["timeout"]
        print >> sys.stderr, "Parameters:", bestParams
        print >> sys.stderr, "Training",
        startTime = time.time()
        classifier.train(trainSet, bestParams)
        print >> sys.stderr, "(Time spent:", time.time() - startTime, "s)"
        print >> sys.stderr, "Testing",
        startTime = time.time()
        predictions = classifier.classify(testSet)
        if options.output != None:
            pdict = []
            fieldnames = ["class", "prediction", "id", "fold"]
            for p in predictions:
                if "typed" in exampleBuilder.styles:
                    pdict.append({
                        "class":
                        exampleBuilder.classSet.getName(p[0][1]),
                        "prediction":
                        exampleBuilder.classSet.getName(p[1]),
                        "id":
                        p[0][0],
                        "fold":
                        key
                    })
                else:
                    pdict.append({
                        "class": p[0][1],
                        "prediction": p[1],
                        "id": p[0][0],
                        "fold": key
                    })
            TableUtils.addToCSV(pdict, options.output + "/predictions.csv",
                                fieldnames)
        print >> sys.stderr, "(Time spent:", time.time() - startTime, "s)"

        # Calculate statistics
        evaluation = Evaluation(predictions, classSet=exampleBuilder.classSet)
        print >> sys.stderr, evaluation.toStringConcise()
        print >> sys.stderr, timer.toString()
        evaluations.append(evaluation)

        # Save example sets
        if options.output != None:
            print >> sys.stderr, "Saving example sets to", options.output
            Example.writeExamples(
                exampleSets[0],
                options.output + "/fold" + str(key + 1) + "/examplesTest.txt")
            Example.writeExamples(
                exampleSets[1],
                options.output + "/fold" + str(key + 1) + "/examplesTrain.txt")
            if parameterOptimizationSet == None:
                for k, v in optimizationSets.iteritems():
                    Example.writeExamples(
                        v, options.output + "/fold" + str(key + 1) +
                        "/examplesOptimizationSet" + str(k) + ".txt")
            else:
                Example.writeExamples(
                    parameterOptimizationSet, options.output + "/fold" +
                    str(key + 1) + "/examplesOptimizationSetPredefined.txt")
            TableUtils.writeCSV(
                bestResults[2],
                options.output + "/fold" + str(key + 1) + "/parameters.csv")
            evaluation.saveCSV(options.output + "/fold" + str(key + 1) +
                               "/results.csv")
            print >> sys.stderr, "Compressing folder"
            zipTree(options.output, "fold" + str(key + 1))

        parameterOptimizationSet = constantParameterOptimizationSet

    print >> sys.stderr, "Cross-validation Results"
    for i in range(len(evaluations)):
        print >> sys.stderr, evaluations[i].toStringConcise("  Fold " +
                                                            str(i) + ": ")
    averageResult = Evaluation.average(evaluations)
    print >> sys.stderr, averageResult.toStringConcise("  Avg: ")
    pooledResult = Evaluation.pool(evaluations)
    print >> sys.stderr, pooledResult.toStringConcise("  Pool: ")
    if options.output != None:
        for i in range(len(evaluations)):
            evaluations[i].saveCSV(options.output + "/results.csv", i)
        averageResult.saveCSV(options.output + "/results.csv", "Avg")
        pooledResult.saveCSV(options.output + "/results.csv", "Pool")
        averageResult.saveCSV(options.output + "/resultsAverage.csv")
        pooledResult.saveCSV(options.output + "/resultsPooled.csv")
    # Visualize
    if options.visualization != None:
        visualize(sentences, pooledResult.classifications, options,
                  exampleBuilder)

    # Save interactionXML
    if options.resultsToXML != None:
        classSet = None
        if "typed" in exampleBuilder.styles:
            classSet = exampleBuilder.classSet
        Example.writeToInteractionXML(pooledResult.classifications,
                                      corpusElements, options.resultsToXML,
                                      classSet)
Ejemplo n.º 22
0
    
    defaultAnalysisFilename = "/usr/share/biotext/ComplexPPI/BioInferForComplexPPIVisible.xml"
    optparser = OptionParser(usage="%prog [options]\nCreate an html visualization for a corpus.")
    optparser.add_option("-i", "--invariant", default=None, dest="invariant", help="Corpus in analysis format", metavar="FILE")
    optparser.add_option("-v", "--variant", default=None, dest="variant", help="Corpus in analysis format", metavar="FILE")
    (options, args) = optparser.parse_args()
    
    #invariantExamples = ExampleUtils.readExamples(os.path.join(options.invariant, "examples.txt"))
    variantExamples = ExampleUtils.readExamples(os.path.join(options.variant, "test-triggers.examples"))
    
    invariantFeatureSet = IdSet()
    invariantFeatureSet.load(os.path.join(options.invariant, "feature_names.txt"))
    invariantClassSet = IdSet()
    invariantClassSet.load(os.path.join(options.invariant, "class_names.txt"))

    variantFeatureSet = IdSet()
    variantFeatureSet.load(os.path.join(options.variant, "test-triggers.examples.feature_names"))
    variantClassSet = IdSet()
    variantClassSet.load(os.path.join(options.variant, "test-triggers.examples.class_names"))
    
    counter = ProgressCounter(len(variantExamples))
    for example in variantExamples:
        counter.update()
        example[1] = invariantClassSet.getId(variantClassSet.getName(example[1]))
        newFeatures = {}
        for k,v in example[2].iteritems():
            newFeatures[ invariantFeatureSet.getId(variantFeatureSet.getName(k)) ] = v
        example[2] = newFeatures
        
    ExampleUtils.writeExamples(variantExamples, os.path.join(options.variant, "realignedExamples.txt"))
Ejemplo n.º 23
0
    def test(cls,
             examples,
             modelPath,
             output=None,
             parameters=None,
             forceInternal=False,
             classIds=None):  # , timeout=None):
        """
        Classify examples with a pre-trained model.
        
        @type examples: string (filename) or list (or iterator) of examples
        @param examples: a list or file containing examples in SVM-format
        @type modelPath: string
        @param modelPath: filename of the pre-trained model file
        @type parameters: a dictionary or string
        @param parameters: parameters for the classifier
        @type output: string
        @param output: the name of the predictions file to be written
        @type forceInternal: Boolean
        @param forceInternal: Use python classifier even if SVM Multiclass binary is defined in Settings.py
        """
        if type(parameters) == types.StringType:
            parameters = splitParameters(parameters)
        timer = Timer()
        if type(examples) == types.ListType:
            print >> sys.stderr, "Classifying", len(
                examples), "with SVM-MultiClass model", modelPath
            examples, predictions = self.filterClassificationSet(
                examples, False)
            testPath = self.tempDir + "/test.dat"
            Example.writeExamples(examples, testPath)
        else:
            print >> sys.stderr, "Classifying file", examples, "with SVM-MultiClass model", modelPath
            testPath = examples
            examples = Example.readExamples(examples, False)
        if parameters != None:
            parameters = copy.copy(parameters)
            if parameters.has_key("c"):
                del parameters["c"]
            if parameters.has_key("predefined"):
                parameters = copy.copy(parameters)
                modelPath = os.path.join(parameters["predefined"][0],
                                         "classifier/model")
                del parameters["predefined"]
        # Read model
        if modelPath == None:
            modelPath = "model-multilabel"
        classModels = {}
        if modelPath.endswith(".gz"):
            f = gzip.open(modelPath, "rt")
        else:
            f = open(modelPath, "rt")
        thresholds = {}
        for line in f:
            key, value, threshold = line.split()
            classModels[key] = value
            if threshold != "None":
                thresholds[key] = float(threshold)
            else:
                thresholds[key] = 0.0
        f.close()
        mergedPredictions = []
        if type(classIds) == types.StringType:
            classIds = IdSet(filename=classIds)
        #print classModels
        print "Thresholds", thresholds
        classifierBin = Settings.SVMMultiClassDir + "/svm_multiclass_classify"
        print parameters
        if "classifier" in parameters and "svmperf" in parameters["classifier"]:
            classifierBin = Settings.SVMPerfDir + "/svm_perf_classify"
            parameters = copy.copy(parameters)
            del parameters["classifier"]
        for className in classIds.getNames():
            if className != "neg" and not "---" in className:
                classId = classIds.getId(className)
                if thresholds[str(className)] != 0.0:
                    print >> sys.stderr, "Classifying", className, "with threshold", thresholds[
                        str(className)]
                else:
                    print >> sys.stderr, "Classifying", className
                args = [classifierBin]
                #self.__addParametersToSubprocessCall(args, parameters)
                classOutput = "predictions" + ".cls-" + className
                logFile = open("svmmulticlass" + ".cls-" + className + ".log",
                               "at")
                args += [testPath, classModels[str(className)], classOutput]
                print args
                subprocess.call(args, stdout=logFile, stderr=logFile)
                cls.addPredictions(classOutput,
                                   mergedPredictions,
                                   classId,
                                   len(classIds.Ids),
                                   threshold=thresholds[str(className)])
        print >> sys.stderr, timer.toString()

        predFileName = output
        f = open(predFileName, "wt")
        for mergedPred in mergedPredictions:
            if len(mergedPred[0]) > 1 and "1" in mergedPred[0]:
                mergedPred[0].remove("1")
            mergedPred[1] = str(mergedPred[1])
            mergedPred[0] = ",".join(sorted(list(mergedPred[0])))
            f.write(" ".join(mergedPred) + "\n")
        f.close()

        return mergedPredictions
Ejemplo n.º 24
0
 def test(cls,
          examples,
          modelPath,
          output=None,
          parameters=None,
          forceInternal=False,
          classIds=None):  # , timeout=None):
     """
     Classify examples with a pre-trained model.
     
     @type examples: string (filename) or list (or iterator) of examples
     @param examples: a list or file containing examples in SVM-format
     @type modelPath: string
     @param modelPath: filename of the pre-trained model file
     @type parameters: a dictionary or string
     @param parameters: parameters for the classifier
     @type output: string
     @param output: the name of the predictions file to be written
     @type forceInternal: Boolean
     @param forceInternal: Use python classifier even if SVM Multiclass binary is defined in Settings.py
     """
     #if forceInternal or Settings.SVMMultiClassDir == None:
     #    return cls.testInternal(examples, modelPath, output)
     timer = Timer()
     if type(examples) == types.ListType:
         print >> sys.stderr, "Classifying", len(
             examples), "with SVM-Light model", modelPath
         examples, predictions = self.filterClassificationSet(
             examples, False)
         testPath = self.tempDir + "/test.dat"
         Example.writeExamples(examples, testPath)
     else:
         print >> sys.stderr, "Classifying file", examples, "with SVM-Light model", modelPath
         testPath = examples
         #examples = Example.readExamples(examples,False)
     if os.environ.has_key("METAWRK"):
         args = [SVMMultiClassClassifier.louhiBinDir + "/svm_classify"]
     else:
         args = [self.binDir + "/svm_classify"]
     if modelPath == None:
         modelPath = "model"
     if parameters != None:
         parameters = copy.copy(parameters)
         if parameters.has_key("c"):
             del parameters["c"]
         if parameters.has_key("predefined"):
             parameters = copy.copy(parameters)
             modelPath = os.path.join(parameters["predefined"][0],
                                      "classifier/model")
             del parameters["predefined"]
         self.__addParametersToSubprocessCall(args, parameters)
     if output == None:
         output = "predictions"
         logFile = open("svmlight.log", "at")
     else:
         logFile = open(output + ".log", "wt")
     args += [testPath, modelPath, output]
     #if timeout == None:
     #    timeout = -1
     #print args
     subprocess.call(args, stdout=logFile, stderr=logFile)
     predictionsFile = open(output, "rt")
     lines = predictionsFile.readlines()
     predictionsFile.close()
     predictions = []
     for i in range(len(lines)):
         predictions.append([int(lines[i].split()[0])] +
                            lines[i].split()[1:])
         #predictions.append( (examples[i],int(lines[i].split()[0]),"multiclass",lines[i].split()[1:]) )
     print >> sys.stderr, timer.toString()
     return predictions
Ejemplo n.º 25
0
        classSet = None
        if os.path.exists(
                os.path.join(classifierParamDict["predefined"][0],
                             "class_names.txt")):
            classSet = IdSet()
            classSet.load(
                os.path.join(classifierParamDict["predefined"][0],
                             "class_names.txt"))
        exampleBuilder = ExampleBuilder(featureSet=featureSet,
                                        classSet=classSet,
                                        **splitParameters(
                                            options.exampleBuilderParameters))
    # Save training sets
    if options.output != None:
        print >> sys.stderr, "Saving example sets to", options.output
        Example.writeExamples(exampleSets[0],
                              options.output + "/examplesTrain.txt")
        if not classifierParamDict.has_key("predefined"):
            Example.writeExamples(
                optimizationSets[0],
                options.output + "/examplesOptimizationTest.txt")
            Example.writeExamples(
                optimizationSets[1],
                options.output + "/examplesOptimizationTrain.txt")
        TableUtils.writeCSV(bestResults[2],
                            options.output + "/best_parameters.csv")

    # Optimize and train
    if options.output != None:
        classifier = Classifier(workDir=options.output + "/classifier")
    else:
        classifier = Classifier()