コード例 #1
0
    def __init__(self, examples=None, predictions=None, classSet=None):
        if type(classSet) == types.StringType:  # class names are in file
            classSet = IdSet(filename=classSet)
        if type(predictions) == types.StringType:  # predictions are in file
            predictions = ExampleUtils.loadPredictions(predictions)
        if type(examples) == types.StringType:  # examples are in file
            examples = ExampleUtils.readExamples(examples, False)

        self.keep = set(["CPR:3", "CPR:4", "CPR:5", "CPR:6", "CPR:9"])

        self.classSet = classSet
        self.results = None
        self.internal = None
        if predictions != None:
            for example in examples:
                if example[3] != None:
                    print >> sys.stderr, "ChemProt Evaluator:"
                    self._calculateExamples(examples, predictions)
                else:
                    print >> sys.stderr, "No example extra info, skipping ChemProt evaluation"
                break
            self.internal = AveragingMultiClassEvaluator(
                examples, predictions, classSet)
            print >> sys.stderr, "AveragingMultiClassEvaluator:"
            print >> sys.stderr, self.internal.toStringConcise()
コード例 #2
0
ファイル: SingleStageDetector.py プロジェクト: ayoshiaki/TEES
    def classifyToXML(self, data, model, exampleFileName=None, tag="", classifierModel=None, goldData=None, parse=None, recallAdjust=None, compressExamples=True, exampleStyle=None):
        model = self.openModel(model, "r")
        if parse == None:
            parse = self.getStr(self.tag+"parse", model)
        if exampleFileName == None:
            exampleFileName = tag+self.tag+"examples"
            if compressExamples:
                exampleFileName += ".gz"
        self.buildExamples(model, [data], [exampleFileName], [goldData], parse=parse, exampleStyle=exampleStyle)
        if classifierModel == None:
            classifierModel = model.get(self.tag+"classifier-model", defaultIfNotExist=None)
        #else:
        #    assert os.path.exists(classifierModel), classifierModel
        classifier = self.getClassifier(model.getStr(self.tag+"classifier-parameter", defaultIfNotExist=None))()
        classifier.classify(exampleFileName, tag+self.tag+"classifications", classifierModel, finishBeforeReturn=True)
        threshold = model.getStr(self.tag+"threshold", defaultIfNotExist=None, asType=float)
        predictions = ExampleUtils.loadPredictions(tag+self.tag+"classifications", recallAdjust, threshold=threshold)
        evaluator = self.evaluator.evaluate(exampleFileName, predictions, model.get(self.tag+"ids.classes"))
        #outputFileName = tag+"-"+self.tag+"pred.xml.gz"
        #exampleStyle = self.exampleBuilder.getParameters(model.getStr(self.tag+"example-style"))
        if exampleStyle == None:
            exampleStyle = Parameters.get(model.getStr(self.tag+"example-style")) # no checking, but these should already have passed the ExampleBuilder
        self.structureAnalyzer.load(model)
        return self.exampleWriter.write(exampleFileName, predictions, data, tag+self.tag+"pred.xml.gz", model.get(self.tag+"ids.classes"), parse, exampleStyle=exampleStyle, structureAnalyzer=self.structureAnalyzer)
#        if evaluator.getData().getTP() + evaluator.getData().getFP() > 0:
#            return self.exampleWriter.write(exampleFileName, predictions, data, outputFileName, model.get(self.tag+"ids.classes"), parse)
#        else:
#            # TODO: e.g. interactions must be removed if task does unmerging
#            print >> sys.stderr, "No positive", self.tag + "predictions, XML file", outputFileName, "unchanged from input"
#            if type(data) in types.StringTypes: # assume its a file
#                shutil.copy(data, outputFileName)
#            else: # assume its an elementtree
#                ETUtils.write(data, outputFileName)
#            #print >> sys.stderr, "No positive predictions, XML file", tag+self.tag+"pred.xml", "not written"
#            return data #None
コード例 #3
0
    def __init__(self, examples, predictions=None, classSet=None):
        if type(classSet) == types.StringType:  # class names are in file
            classSet = IdSet(filename=classSet)
        if type(predictions) == types.StringType:  # predictions are in file
            predictions = ExampleUtils.loadPredictions(predictions)
        if type(examples) == types.StringType:  # examples are in file
            examples = ExampleUtils.readExamples(examples, False)

        SharedTaskEvaluator.corpusElements = Core.SentenceGraph.loadCorpus(
            SharedTaskEvaluator.corpusFilename, SharedTaskEvaluator.parse,
            SharedTaskEvaluator.tokenization)
        # Build interaction xml
        xml = BioTextExampleWriter.write(
            examples, predictions, SharedTaskEvaluator.corpusElements, None,
            SharedTaskEvaluator.ids + ".class_names",
            SharedTaskEvaluator.parse, SharedTaskEvaluator.tokenization)
        #xml = ExampleUtils.writeToInteractionXML(examples, predictions, SharedTaskEvaluator.corpusElements, None, "genia-direct-event-ids.class_names", SharedTaskEvaluator.parse, SharedTaskEvaluator.tokenization)
        # Convert to GENIA format
        gifxmlToGenia(xml,
                      SharedTaskEvaluator.geniaDir,
                      task=SharedTaskEvaluator.task,
                      verbose=False)
        # Use GENIA evaluation tool
        self.results = evaluateSharedTask(SharedTaskEvaluator.geniaDir,
                                          task=SharedTaskEvaluator.task,
                                          evaluations=["approximate"],
                                          verbose=False)
コード例 #4
0
    def __init__(self, examples, predictions=None, classSet=None):
        if type(classSet) == types.StringType:  # class names are in file
            classSet = IdSet(filename=classSet)
        if type(predictions) == types.StringType:  # predictions are in file
            predictions = ExampleUtils.loadPredictions(predictions)
        if type(examples) == types.StringType:  # examples are in file
            examples = ExampleUtils.readExamples(examples, False)

        self.classSet = classSet
        # define class ids in alphabetical order
        self.classSet = classSet
        if classSet != None:
            classNames = sorted(classSet.Ids.keys())
        else:
            classNames = []
        # make an ordered list of class ids
        self.classes = []
        for className in classNames:
            self.classes.append(classSet.getId(className))
        # create data structures for per-class evaluation
        self.dataByClass = {}
        for cls in self.classes:
            self.dataByClass[cls] = EvaluationData()
        # hack for unnamed classes
        if len(self.dataByClass) == 0:
            self.dataByClass[1] = EvaluationData()
            self.dataByClass[2] = EvaluationData()

        #self.untypedUndirected = None
        self.untypedCurrentMajorId = None
        self.untypedPredictionQueue = []
        self.untypedUndirected = EvaluationData()
        #self.AUC = None
        if predictions != None:
            self._calculate(examples, predictions)
コード例 #5
0
    def __init__(self, examples, predictions=None, classSet=None):
        if type(classSet) == types.StringType: # class names are in file
            classSet = IdSet(filename=classSet)
        if type(predictions) == types.StringType: # predictions are in file
            predictions = ExampleUtils.loadPredictions(predictions)
        if type(examples) == types.StringType: # examples are in file
            examples = ExampleUtils.readExamples(examples, False)

        self.classSet = classSet
        # define class ids in alphabetical order
        self.classSet = classSet
        if classSet != None:
            classNames = sorted(classSet.Ids.keys())
        else:
            classNames = []
        # make an ordered list of class ids
        self.classes = []
        for className in classNames:
            self.classes.append(classSet.getId(className))
        # create data structures for per-class evaluation
        self.dataByClass = {}
        for cls in self.classes:
            self.dataByClass[cls] = EvaluationData()
        # hack for unnamed classes
        if len(self.dataByClass) == 0:
            self.dataByClass[1] = EvaluationData()
            self.dataByClass[2] = EvaluationData()
        
        #self.untypedUndirected = None
        self.untypedCurrentMajorId = None
        self.untypedPredictionQueue = []
        self.untypedUndirected = EvaluationData()
        #self.AUC = None
        if predictions != None:
            self._calculate(examples, predictions)
コード例 #6
0
ファイル: SingleStageDetector.py プロジェクト: jbjorne/Tdevel
    def classifyToXML(self, data, model, exampleFileName=None, tag="", classifierModel=None, goldData=None, parse=None, recallAdjust=None, compressExamples=True):
        model = self.openModel(model, "r")
        if parse == None:
            parse = self.getStr(self.tag+"parse", model)
        if exampleFileName == None:
            exampleFileName = tag+self.tag+"examples"
            if compressExamples:
                exampleFileName += ".gz"
            self.buildExamples(model, [data], [exampleFileName], [goldData], parse=parse)
        if classifierModel == None:
            classifierModel = model.get(self.tag+"classifier-model")
        else:
            assert os.path.exists(classifierModel), classifierModel
        classifier = self.Classifier()
        classifier.classify(exampleFileName, tag+self.tag+"classifications", classifierModel, finishBeforeReturn=True)
        predictions = ExampleUtils.loadPredictions(tag+self.tag+"classifications", recallAdjust)
        evaluator = self.evaluator.evaluate(exampleFileName, predictions, model.get(self.tag+"ids.classes"))
        #outputFileName = tag+"-"+self.tag+"pred.xml.gz"
        return self.exampleWriter.write(exampleFileName, predictions, data, tag+self.tag+"pred.xml.gz", model.get(self.tag+"ids.classes"), parse)
#        if evaluator.getData().getTP() + evaluator.getData().getFP() > 0:
#            return self.exampleWriter.write(exampleFileName, predictions, data, outputFileName, model.get(self.tag+"ids.classes"), parse)
#        else:
#            # TODO: e.g. interactions must be removed if task does unmerging
#            print >> sys.stderr, "No positive", self.tag + "predictions, XML file", outputFileName, "unchanged from input"
#            if type(data) in types.StringTypes: # assume its a file
#                shutil.copy(data, outputFileName)
#            else: # assume its an elementtree
#                ETUtils.write(data, outputFileName)
#            #print >> sys.stderr, "No positive predictions, XML file", tag+self.tag+"pred.xml", "not written"
#            return data #None
コード例 #7
0
 def loadExamples(self, examples, predictions):
     if type(predictions) == types.StringType:
         print >> sys.stderr, "Reading predictions from", predictions
         predictions = ExampleUtils.loadPredictions(predictions)
     if type(examples) == types.StringType:
         print >> sys.stderr, "Reading examples from", examples
         examples = ExampleUtils.readExamples(examples, False)
     return examples, predictions
コード例 #8
0
 def loadExamples(self, examples, predictions):
     if type(predictions) == types.StringType:
         print >> sys.stderr, "Reading predictions from", predictions
         predictions = ExampleUtils.loadPredictions(predictions)
     if type(examples) == types.StringType:
         print >> sys.stderr, "Reading examples from", examples
         examples = ExampleUtils.readExamples(examples, False)
     return examples, predictions
コード例 #9
0
ファイル: ExternalClassifier.py プロジェクト: jbjorne/TEES
 def optimize(self, examples, outDir, parameters, classifyExamples, classIds, step="BOTH", evaluator=None, determineThreshold=False, timeout=None, downloadAllModels=False):
     assert step in ["BOTH", "SUBMIT", "RESULTS"], step
     outDir = os.path.abspath(outDir)
     # Initialize training (or reconnect to existing jobs)
     combinations = Parameters.getCombinations(Parameters.get(parameters, valueListKey="c")) #Core.OptimizeParameters.getParameterCombinations(parameters)
     trained = []
     for combination in combinations:
         trained.append( self.train(examples, outDir, combination, classifyExamples, replaceRemoteExamples=(len(trained) == 0), dummy=(step == "RESULTS")) )
     if step == "SUBMIT": # Return already
         classifier = copy.copy(self)
         classifier.setState("OPTIMIZE")
         return classifier
     
     # Wait for the training to finish
     finalJobStatus = self.connection.waitForJobs([x.getJob() for x in trained])
     # Evaluate the results
     print >> sys.stderr, "Evaluating results"
     #Stream.setIndent(" ")
     bestResult = None
     if evaluator == None:
         evaluator = self.defaultEvaluator
     for i in range(len(combinations)):
         id = trained[i].parameterIdStr
         #Stream.setIndent(" ")
         # Get predictions
         predictions = None
         if trained[i].getStatus() == "FINISHED":
             predictions = trained[i].downloadPredictions()
         else:
             print >> sys.stderr, "No results for combination" + id
             continue
         if downloadAllModels:
             trained[i].downloadModel()
         # Compare to other results
         print >> sys.stderr, "*** Evaluating results for combination" + id + " ***"
         threshold = None
         if determineThreshold:
             print >> sys.stderr, "Thresholding, original micro =",
             evaluation = evaluator.evaluate(classifyExamples, predictions, classIds, os.path.join(outDir, "evaluation-before-threshold" + id + ".csv"), verbose=False)
             print >> sys.stderr, evaluation.microF.toStringConcise()
             threshold, bestF = evaluator.threshold(classifyExamples, predictions)
             print >> sys.stderr, "threshold =", threshold, "at binary fscore", str(bestF)[0:6]
         evaluation = evaluator.evaluate(classifyExamples, ExampleUtils.loadPredictions(predictions, threshold=threshold), classIds, os.path.join(outDir, "evaluation" + id + ".csv"))
         if bestResult == None or evaluation.compare(bestResult[0]) > 0: #: averageResult.fScore > bestResult[1].fScore:
             bestResult = [evaluation, trained[i], combinations[i], threshold]
         if not self.connection.isLocal():
             os.remove(predictions) # remove predictions to save space
     #Stream.setIndent()
     if bestResult == None:
         raise Exception("No results for any parameter combination")
     print >> sys.stderr, "*** Evaluation complete", finalJobStatus, "***"
     print >> sys.stderr, "Selected parameters", bestResult[2]
     classifier = copy.copy(bestResult[1])
     classifier.threshold = bestResult[3]
     classifier.downloadModel()
     return classifier
コード例 #10
0
    def classifyToXML(self,
                      data,
                      model,
                      exampleFileName=None,
                      tag="",
                      classifierModel=None,
                      goldData=None,
                      parse=None,
                      recallAdjust=None,
                      compressExamples=True):
        model = self.openModel(model, "r")
        if parse == None:
            parse = self.getStr(self.tag + "parse", model)
        if exampleFileName == None:
            exampleFileName = tag + self.tag + "examples"
            if compressExamples:
                exampleFileName += ".gz"
            self.buildExamples(model, [data], [exampleFileName], [goldData],
                               parse=parse)
        if classifierModel == None:
            classifierModel = model.get(self.tag + "classifier-model")
        else:
            assert os.path.exists(classifierModel), classifierModel
        classifier = self.Classifier()
        classifier.classify(exampleFileName,
                            tag + self.tag + "classifications",
                            classifierModel,
                            finishBeforeReturn=True)
        predictions = ExampleUtils.loadPredictions(
            tag + self.tag + "classifications", recallAdjust)
        evaluator = self.evaluator.evaluate(
            exampleFileName, predictions, model.get(self.tag + "ids.classes"))
        #outputFileName = tag+"-"+self.tag+"pred.xml.gz"
        return self.exampleWriter.write(exampleFileName, predictions, data,
                                        tag + self.tag + "pred.xml.gz",
                                        model.get(self.tag + "ids.classes"),
                                        parse)


#        if evaluator.getData().getTP() + evaluator.getData().getFP() > 0:
#            return self.exampleWriter.write(exampleFileName, predictions, data, outputFileName, model.get(self.tag+"ids.classes"), parse)
#        else:
#            # TODO: e.g. interactions must be removed if task does unmerging
#            print >> sys.stderr, "No positive", self.tag + "predictions, XML file", outputFileName, "unchanged from input"
#            if type(data) in types.StringTypes: # assume its a file
#                shutil.copy(data, outputFileName)
#            else: # assume its an elementtree
#                ETUtils.write(data, outputFileName)
#            #print >> sys.stderr, "No positive predictions, XML file", tag+self.tag+"pred.xml", "not written"
#            return data #None
コード例 #11
0
ファイル: SharedTaskEvaluator.py プロジェクト: jbjorne/Tdevel
 def __init__(self, examples, predictions=None, classSet=None):
     if type(classSet) == types.StringType: # class names are in file
         classSet = IdSet(filename=classSet)
     if type(predictions) == types.StringType: # predictions are in file
         predictions = ExampleUtils.loadPredictions(predictions)
     if type(examples) == types.StringType: # examples are in file
         examples = ExampleUtils.readExamples(examples, False)
     
     SharedTaskEvaluator.corpusElements = Core.SentenceGraph.loadCorpus(SharedTaskEvaluator.corpusFilename, SharedTaskEvaluator.parse, SharedTaskEvaluator.tokenization)
     # Build interaction xml
     xml = BioTextExampleWriter.write(examples, predictions, SharedTaskEvaluator.corpusElements, None, SharedTaskEvaluator.ids+".class_names", SharedTaskEvaluator.parse, SharedTaskEvaluator.tokenization)
     #xml = ExampleUtils.writeToInteractionXML(examples, predictions, SharedTaskEvaluator.corpusElements, None, "genia-direct-event-ids.class_names", SharedTaskEvaluator.parse, SharedTaskEvaluator.tokenization)
     # Convert to GENIA format
     gifxmlToGenia(xml, SharedTaskEvaluator.geniaDir, task=SharedTaskEvaluator.task, verbose=False)
     # Use GENIA evaluation tool
     self.results = evaluateSharedTask(SharedTaskEvaluator.geniaDir, task=SharedTaskEvaluator.task, evaluations=["approximate"], verbose=False)
コード例 #12
0
    def __init__(self, examples, predictions=None, classSet=None):
        if type(classSet) == types.StringType: # class names are in file
            classSet = IdSet(filename=classSet)
        if type(predictions) == types.StringType: # predictions are in file
            predictions = ExampleUtils.loadPredictions(predictions)
        if type(examples) == types.StringType: # examples are in file
            examples = ExampleUtils.readExamples(examples, False)

        self.classSet = classSet
        self.dataByClass = defaultdict(EvaluationData)
        
        #self.untypedUndirected = None
        self.untypedCurrentMajorId = None
        self.untypedPredictionQueue = []
        self.untypedUndirected = EvaluationData()
        #self.AUC = None
        if predictions != None:
            self._calculate(examples, predictions)
コード例 #13
0
    def __init__(self, examples, predictions=None, classSet=None):
        if type(classSet) == types.StringType:  # class names are in file
            classSet = IdSet(filename=classSet)
        if type(predictions) == types.StringType:  # predictions are in file
            predictions = ExampleUtils.loadPredictions(predictions)
        if type(examples) == types.StringType:  # examples are in file
            examples = ExampleUtils.readExamples(examples, False)

        self.classSet = classSet
        self.dataByClass = defaultdict(EvaluationData)

        #self.untypedUndirected = None
        self.untypedCurrentMajorId = None
        self.untypedPredictionQueue = []
        self.untypedUndirected = EvaluationData()
        #self.AUC = None
        if predictions != None:
            self._calculate(examples, predictions)
コード例 #14
0
 def __init__(self, examples=None, predictions=None, classSet=None):
     if type(classSet) == types.StringType:  # class names are in file
         classSet = IdSet(filename=classSet)
     if type(predictions) == types.StringType:  # predictions are in file
         predictions = ExampleUtils.loadPredictions(predictions)
     if type(examples) == types.StringType:  # examples are in file
         examples = ExampleUtils.readExamples(examples, False)
     #self.examples = examples
     #self.predictions = predictions
     self.truePositives = 0
     self.falsePositives = 0
     self.trueNegatives = 0
     self.falseNegatives = 0
     self.precision = None
     self.recall = None
     self.fScore = None
     self.AUC = None
     self.type = "binary"
     if predictions != None:
         self._calculate(examples, predictions)
コード例 #15
0
ファイル: BXEvaluator.py プロジェクト: thiagoki/Tdevel
 def __init__(self, examples, predictions=None, classSet=None):
     if type(classSet) == types.StringType: # class names are in file
         classSet = IdSet(filename=classSet)
     if type(predictions) == types.StringType: # predictions are in file
         predictions = ExampleUtils.loadPredictions(predictions)
     if type(examples) == types.StringType: # examples are in file
         examples = ExampleUtils.readExamples(examples, False)
     
     corpusElements = Core.SentenceGraph.loadCorpus(BXEvaluator.corpusFilename, BXEvaluator.parse, BXEvaluator.tokenization)
     # Build interaction xml
     xml = BioTextExampleWriter.write(examples, predictions, corpusElements, None, BXEvaluator.ids+".class_names", BXEvaluator.parse, BXEvaluator.tokenization)
     xml = ix.splitMergedElements(xml, None)
     xml = ix.recalculateIds(xml, None, True)
     #xml = ExampleUtils.writeToInteractionXML(examples, predictions, SharedTaskEvaluator.corpusElements, None, "genia-direct-event-ids.class_names", SharedTaskEvaluator.parse, SharedTaskEvaluator.tokenization)
     # Convert to GENIA format
     STFormat.ConvertXML.toSTFormat(xml, BXEvaluator.geniaDir, outputTag="a2")
     #gifxmlToGenia(xml, BXEvaluator.geniaDir, task=SharedTaskEvaluator.task, verbose=False)
     # Use GENIA evaluation tool
     self.results = BioNLP11GeniaTools.evaluateBX(BXEvaluator.geniaDir, corpusName=BXEvaluator.corpusTag)
     corpusElements = None
コード例 #16
0
ファイル: BinaryEvaluator.py プロジェクト: ninjin/TEES
 def __init__(self, examples=None, predictions=None, classSet=None):
     if type(classSet) == types.StringType:  # class names are in file
         classSet = IdSet(filename=classSet)
     if type(predictions) == types.StringType:  # predictions are in file
         predictions = ExampleUtils.loadPredictions(predictions)
     if type(examples) == types.StringType:  # examples are in file
         examples = ExampleUtils.readExamples(examples, False)
     # self.examples = examples
     # self.predictions = predictions
     self.truePositives = 0
     self.falsePositives = 0
     self.trueNegatives = 0
     self.falseNegatives = 0
     self.precision = None
     self.recall = None
     self.fScore = None
     self.AUC = None
     self.type = "binary"
     if predictions != None:
         self._calculate(examples, predictions)
コード例 #17
0
    def threshold(cls, examples, predictions):
        # Make negative confidence score / true class pairs
        if type(examples) in types.StringTypes:
            examples = ExampleUtils.readExamples(examples, False)
        if type(predictions) in types.StringTypes:
            predictions = ExampleUtils.loadPredictions(predictions)
        pairs = []
        realPositives = 0
        for example, prediction in itertools.izip(examples, predictions):
            trueClass = example[1]
            assert (trueClass > 0
                    )  # multiclass classification uses non-negative integers
            if trueClass > 1:
                realPositives += 1
            negClassValue = prediction[1]
            pairs.append((negClassValue, trueClass))
        pairs.sort(reverse=True)
        realNegatives = len(pairs) - realPositives

        # When starting thresholding, all examples are considered positive
        binaryF = EvaluationData()
        binaryF._tp = realPositives
        binaryF._fp = realNegatives
        binaryF._fn = 0
        binaryF.calculateFScore()
        fscore = binaryF.fscore
        threshold = pairs[0][0] - 1.

        # Turn one example negative at a time
        for pair in pairs:
            if pair[1] == 1:  # the real class is negative
                binaryF._fp -= 1  # false positive -> true negative
            else:  # the real class is a positive class
                binaryF._tp -= 1  # true positive -> ...
                binaryF._fn += 1  # ... false negative
            binaryF.calculateFScore()
            if binaryF.fscore > fscore:
                fscore = binaryF.fscore
                threshold = pair[0] + 0.00000001
        return threshold, fscore
コード例 #18
0
 def threshold(cls, examples, predictions):
     # Make negative confidence score / true class pairs
     if type(examples) in types.StringTypes:
         examples = ExampleUtils.readExamples(examples, False)
     if type(predictions) in types.StringTypes:
         predictions = ExampleUtils.loadPredictions(predictions)
     pairs = []
     realPositives = 0
     for example, prediction in itertools.izip(examples, predictions):
         trueClass = example[1]
         assert(trueClass > 0) # multiclass classification uses non-negative integers
         if trueClass > 1:
             realPositives += 1
         negClassValue = prediction[1]
         pairs.append( (negClassValue, trueClass) )
     pairs.sort(reverse=True)
     realNegatives = len(pairs) - realPositives
     
     # When starting thresholding, all examples are considered positive
     binaryF = EvaluationData()
     binaryF._tp = realPositives
     binaryF._fp = realNegatives
     binaryF._fn = 0
     binaryF.calculateFScore()
     fscore = binaryF.fscore
     threshold = pairs[0][0]-1.
     
     # Turn one example negative at a time
     for pair in pairs:
         if pair[1] == 1: # the real class is negative
             binaryF._fp -= 1 # false positive -> true negative
         else: # the real class is a positive class
             binaryF._tp -= 1 # true positive -> ...
             binaryF._fn += 1 # ... false negative
         binaryF.calculateFScore()
         if binaryF.fscore > fscore:
             fscore = binaryF.fscore
             threshold = pair[0]+0.00000001
     return threshold, fscore        
コード例 #19
0
ファイル: ChemProtEvaluator.py プロジェクト: jbjorne/TEES
    def __init__(self, examples=None, predictions=None, classSet=None):
        if type(classSet) == types.StringType: # class names are in file
            classSet = IdSet(filename=classSet)
        if type(predictions) == types.StringType: # predictions are in file
            predictions = ExampleUtils.loadPredictions(predictions)
        if type(examples) == types.StringType: # examples are in file
            examples = ExampleUtils.readExamples(examples, False)
        
        self.keep = set(["CPR:3", "CPR:4", "CPR:5", "CPR:6", "CPR:9"])

        self.classSet = classSet
        self.results = None
        self.internal = None
        if predictions != None:
            for example in examples:
                if example[3] != None:
                    print >> sys.stderr, "ChemProt Evaluator:"
                    self._calculateExamples(examples, predictions)
                else:
                    print >> sys.stderr, "No example extra info, skipping ChemProt evaluation"
                break
            self.internal = AveragingMultiClassEvaluator(examples, predictions, classSet)
            print >> sys.stderr, "AveragingMultiClassEvaluator:"
            print >> sys.stderr, self.internal.toStringConcise()
コード例 #20
0
ファイル: ThresholdTest.py プロジェクト: jbjorne/Tdevel
def threshold(examples, predictionsDir=None, classSet=None):
    if type(classSet) == types.StringType: # class names are in file
        classSet = IdSet(filename=classSet)
    classIds = set()
    if type(examples) == types.StringType: # examples are in file
        examplesTemp = ExampleUtils.readExamples(examples, False)
        examples = []
        for example in examplesTemp:
            examples.append(example)
            classIds.add(example[1])
    classIds = list(classIds)
    classIds.sort()
    
    #multilabel = MultiLabelMultiClassEvaluator(None, None, classSet)
    #multilabel._calculate(examples, predictions)
    #print multilabel.toStringConcise(title="multilabel")
    
    bestThrF = [0]
    bestBaseF = [0]
    predFileNames = []
    for filename in os.listdir(predictionsDir):
        if "predictions" in filename:
            predFileNames.append( (int(filename.rsplit("_")[-1]), filename) )
    predFileNames.sort()
    for predFileName in predFileNames:
        predictionsTemp = ExampleUtils.loadPredictions(os.path.join(predictionsDir, predFileName[1]))
        predictions = []
        for prediction in predictionsTemp:
            predictions.append(prediction)
    
        baseEv = AveragingMultiClassEvaluator(None, None, classSet)
        baseEv._calculate(examples, predictions)
        print "============================"
        print predFileName[1]
        print "============================"
        #print baseEv.toStringConcise(title="baseline")
        
        baseLineF = baseEv.microF.fscore
        for step in [0]:
            for classId in [1]: #classIds:
                cls = None
                if classSet != None:
                    cls = classSet.getName(classId)
                else:
                    cls = str(classId)
                bestF = thresholdClass(examples, predictions, classId, baseLineF)
                for prediction in predictions:
                    prediction[classId] -= bestF[2][0] + 0.00000001
                changed = 0
                for prediction in predictions:
                    maxVal = -999999
                    maxClass = None
                    for i in range(1, len(prediction)):
                        if prediction[i] > maxVal:
                            maxVal = prediction[i]
                            maxClass = i
                    if maxClass != prediction[0]:
                        prediction[0] = maxClass
                        changed += 1
                print step, cls, "changed", changed, bestF[0]
                baseLineF = bestF[0]
        
        if bestF[0] > bestThrF[0]:
            bestThrF = (bestF[0], predFileName[1], bestF[1], bestF[2], bestF[3])
        if baseEv.microF.fscore > bestBaseF[0]:
            bestBaseF = (baseEv.microF.fscore, predFileName[1], baseEv.microF.toStringConcise())
    
        print "-------- Baseline ------------"
        print baseEv.toStringConcise()
        print "-------- Best ------------"
        print bestF[0], bestF[1], bestF[2]
        print bestF[3]
        thEv = AveragingMultiClassEvaluator(None, None, classSet)
        thEv._calculate(examples, predictions)
        print thEv.toStringConcise()
    
    print "=============== All Best ==============="
    print "Threshold", bestThrF
    print "Base", bestBaseF
    sys.exit()
    
    memPredictions = []
    bestEv = baseEv
    bestPair = [None, None, None]
    for p in predictions:
        memPredictions.append(p)
    for pair in pairs:
        modifier = pair[0] + 0.00000001
        changedClass = 0
        for pred in memPredictions:
            negPred = pred[1] - modifier  
            maxVal = negPred
            maxClass = 1
            for i in range(2, len(pred)):
                if pred[i] > maxVal:
                    maxVal = pred[i]
                    maxClass = i
            if pred[0] != maxClass:
                changedClass += 1
            pred[0] = maxClass
        ev = AveragingMultiClassEvaluator(None)
        ev._calculate(examples, memPredictions)
        print pair[0], pair[2], changedClass
        print ev.toStringConcise()
        if ev.compare(bestEv) == 1:
            print "Improved"
            bestPair = pair
            bestEv = ev
    
    print "---------------------------------------------"
    print baseEv.toStringConcise(title="baseline")
    print bestPair[0], bestPair[2] 
    print bestEv.toStringConcise(title="best") 
コード例 #21
0
    def classifyToXML(self,
                      data,
                      model,
                      exampleFileName=None,
                      tag="",
                      classifierModel=None,
                      goldData=None,
                      parse=None,
                      recallAdjust=None,
                      compressExamples=True,
                      exampleStyle=None,
                      useExistingExamples=False):
        model = self.openModel(model, "r")
        if parse == None:
            parse = self.getStr(self.tag + "parse", model)
        if useExistingExamples:
            assert exampleFileName != None
            assert os.path.exists(exampleFileName)
        if exampleFileName == None:
            exampleFileName = tag + self.tag + "examples"
            if compressExamples:
                exampleFileName += ".gz"
        if not useExistingExamples:
            self.buildExamples(model, [data], [exampleFileName], [goldData],
                               parse=parse,
                               exampleStyle=exampleStyle)
        if classifierModel == None:
            classifierModel = model.get(self.tag + "classifier-model",
                                        defaultIfNotExist=None)
        #else:
        #    assert os.path.exists(classifierModel), classifierModel
        classifier = self.getClassifier(
            model.getStr(self.tag + "classifier-parameter",
                         defaultIfNotExist=None))()
        classifier.classify(exampleFileName,
                            tag + self.tag + "classifications",
                            classifierModel,
                            finishBeforeReturn=True)
        threshold = model.getStr(self.tag + "threshold",
                                 defaultIfNotExist=None,
                                 asType=float)
        predictions = ExampleUtils.loadPredictions(tag + self.tag +
                                                   "classifications",
                                                   recallAdjust,
                                                   threshold=threshold)
        evaluator = self.evaluator.evaluate(
            exampleFileName, predictions, model.get(self.tag + "ids.classes"))
        #outputFileName = tag+"-"+self.tag+"pred.xml.gz"
        #exampleStyle = self.exampleBuilder.getParameters(model.getStr(self.tag+"example-style"))
        if exampleStyle == None:
            exampleStyle = Parameters.get(
                model.getStr(self.tag + "example-style")
            )  # no checking, but these should already have passed the ExampleBuilder
        self.structureAnalyzer.load(model)
        return self.exampleWriter.write(
            exampleFileName,
            predictions,
            data,
            tag + self.tag + "pred.xml.gz",
            model.get(self.tag + "ids.classes"),
            parse,
            exampleStyle=exampleStyle,
            structureAnalyzer=self.structureAnalyzer)


#        if evaluator.getData().getTP() + evaluator.getData().getFP() > 0:
#            return self.exampleWriter.write(exampleFileName, predictions, data, outputFileName, model.get(self.tag+"ids.classes"), parse)
#        else:
#            # TODO: e.g. interactions must be removed if task does unmerging
#            print >> sys.stderr, "No positive", self.tag + "predictions, XML file", outputFileName, "unchanged from input"
#            if type(data) in types.StringTypes: # assume its a file
#                shutil.copy(data, outputFileName)
#            else: # assume its an elementtree
#                ETUtils.write(data, outputFileName)
#            #print >> sys.stderr, "No positive predictions, XML file", tag+self.tag+"pred.xml", "not written"
#            return data #None
コード例 #22
0
    def determineThreshold(self, examples, predictions):
        if type(predictions) == types.StringType:  # predictions are in file
            predictions = ExampleUtils.loadPredictions(predictions)
        if type(examples) == types.StringType:  # examples are in file
            examples = ExampleUtils.readExamples(examples, False)

        examplesByClass = {}
        for cls in self.classes:
            examplesByClass[cls] = []
        # prepare examples
        for example, prediction in itertools.izip(examples, predictions):
            # Check true class for multilabel
            trueClass = example[1]
            trueClassName = self.classSet.getName(trueClass)
            assert (trueClass > 0
                    )  # multiclass classification uses non-negative integers
            if "---" in trueClassName:
                trueClass = set()
                for name in trueClassName.split("---"):
                    trueClass.add(self.classSet.getId(name))
            else:
                trueClass = [trueClass]
            # Check prediction for multilabel
            predictedClasses = prediction[0]
            if type(predictedClasses) == types.IntType:
                predictedClasses = [predictedClasses]

            for predType in predictedClasses:
                if predType != 1:
                    exTrueClass = 1
                    if predType in trueClass:
                        exTrueClass = 2
                    examplesByClass[predType].append(
                        (prediction[predType], exTrueClass, 2))
            # positives are negatives for other classes
            for cls in self.classes:
                if cls not in predictedClasses:
                    exTrueClass = 1
                    if cls in trueClass:
                        exTrueClass = 2
                    examplesByClass[cls].append(
                        (prediction[cls], exTrueClass, 1))
        # do the thresholding
        thresholdByClass = {}
        for cls in self.classes:
            if cls == 1:
                continue
            thresholdByClass[cls] = 0.0
            examplesByClass[cls].sort()
            # Start with all below zero being negative, and all above it being what is predicted
            ev = EvaluationData()
            for example in examplesByClass[cls]:
                #print example
                if example[0] < 0.0:
                    updateF(ev, example[1], 2, 1)  # always negative
                else:
                    updateF(ev, example[1], example[2], 1)  # what is predicted
            count = 0
            bestF = [self.dataByClass[cls].fscore, None, (0.0, None), None]
            for example in examplesByClass[cls]:
                if example[0] < 0.0:
                    # Remove original example
                    updateF(ev, example[1], 2, -1)
                    # Add new example
                    updateF(ev, example[1], example[2], 1)
                    # Calculate F for this point
                else:
                    # Remove original example
                    updateF(ev, example[1], example[2], -1)
                    # Add new example
                    updateF(ev, example[1], 1, 1)
                    # Calculate F for this point
                ev.calculateFScore()
                #print example, ev.toStringConcise()
                count += 1
                #if self.classSet.getName(cls) == "Binding":
                #    print count, example, ev.toStringConcise()
                if ev.fscore > bestF[0]:
                    bestF = (ev.fscore, count, example, ev.toStringConcise())
                    self.dataByClass[cls] = copy.copy(ev)
            print >> sys.stderr, "Threshold", self.classSet.getName(cls), bestF
            if bestF[2][0] != 0.0:
                thresholdByClass[cls] = bestF[2][0] + 0.00000001
            else:
                thresholdByClass[cls] = 0.0
        #print thresholdByClass
        self.thresholds = thresholdByClass
        #self._calculate(examples, predictions, thresholdByClass)
        #print >> sys.stderr, "Optimal", self.toStringConcise()
        return thresholdByClass
コード例 #23
0
    def optimize(self,
                 examples,
                 outDir,
                 parameters,
                 classifyExamples,
                 classIds,
                 step="BOTH",
                 evaluator=None,
                 determineThreshold=False,
                 timeout=None,
                 downloadAllModels=False):
        assert step in ["BOTH", "SUBMIT", "RESULTS"], step
        outDir = os.path.abspath(outDir)
        # Initialize training (or reconnect to existing jobs)
        combinations = Parameters.getCombinations(
            Parameters.get(parameters, valueListKey="c")
        )  #Core.OptimizeParameters.getParameterCombinations(parameters)
        trained = []
        for combination in combinations:
            trained.append(
                self.train(examples,
                           outDir,
                           combination,
                           classifyExamples,
                           replaceRemoteExamples=(len(trained) == 0),
                           dummy=(step == "RESULTS")))
        if step == "SUBMIT":  # Return already
            classifier = copy.copy(self)
            classifier.setState("OPTIMIZE")
            return classifier

        # Wait for the training to finish
        finalJobStatus = self.connection.waitForJobs(
            [x.getJob() for x in trained])
        # Evaluate the results
        print >> sys.stderr, "Evaluating results"
        #Stream.setIndent(" ")
        bestResult = None
        if evaluator == None:
            evaluator = self.defaultEvaluator
        for i in range(len(combinations)):
            id = trained[i].parameterIdStr
            #Stream.setIndent(" ")
            # Get predictions
            predictions = None
            if trained[i].getStatus() == "FINISHED":
                predictions = trained[i].downloadPredictions()
            else:
                print >> sys.stderr, "No results for combination" + id
                continue
            if downloadAllModels:
                trained[i].downloadModel()
            # Compare to other results
            print >> sys.stderr, "*** Evaluating results for combination" + id + " ***"
            threshold = None
            if determineThreshold:
                print >> sys.stderr, "Thresholding, original micro =",
                evaluation = evaluator.evaluate(
                    classifyExamples,
                    predictions,
                    classIds,
                    os.path.join(outDir,
                                 "evaluation-before-threshold" + id + ".csv"),
                    verbose=False)
                print >> sys.stderr, evaluation.microF.toStringConcise()
                threshold, bestF = evaluator.threshold(classifyExamples,
                                                       predictions)
                print >> sys.stderr, "threshold =", threshold, "at binary fscore", str(
                    bestF)[0:6]
            evaluation = evaluator.evaluate(
                classifyExamples,
                ExampleUtils.loadPredictions(predictions, threshold=threshold),
                classIds, os.path.join(outDir, "evaluation" + id + ".csv"))
            if bestResult == None or evaluation.compare(
                    bestResult[0]
            ) > 0:  #: averageResult.fScore > bestResult[1].fScore:
                bestResult = [
                    evaluation, trained[i], combinations[i], threshold
                ]
            if not self.connection.isLocal():
                os.remove(predictions)  # remove predictions to save space
        #Stream.setIndent()
        if bestResult == None:
            raise Exception("No results for any parameter combination")
        print >> sys.stderr, "*** Evaluation complete", finalJobStatus, "***"
        print >> sys.stderr, "Selected parameters", bestResult[2]
        classifier = copy.copy(bestResult[1])
        classifier.threshold = bestResult[3]
        classifier.downloadModel()
        return classifier
コード例 #24
0
ファイル: ThresholdTest.py プロジェクト: thiagoki/Tdevel
def threshold(examples, predictionsDir=None, classSet=None):
    if type(classSet) == types.StringType:  # class names are in file
        classSet = IdSet(filename=classSet)
    classIds = set()
    if type(examples) == types.StringType:  # examples are in file
        examplesTemp = ExampleUtils.readExamples(examples, False)
        examples = []
        for example in examplesTemp:
            examples.append(example)
            classIds.add(example[1])
    classIds = list(classIds)
    classIds.sort()

    #multilabel = MultiLabelMultiClassEvaluator(None, None, classSet)
    #multilabel._calculate(examples, predictions)
    #print multilabel.toStringConcise(title="multilabel")

    bestThrF = [0]
    bestBaseF = [0]
    predFileNames = []
    for filename in os.listdir(predictionsDir):
        if "predictions" in filename:
            predFileNames.append((int(filename.rsplit("_")[-1]), filename))
    predFileNames.sort()
    for predFileName in predFileNames:
        predictionsTemp = ExampleUtils.loadPredictions(
            os.path.join(predictionsDir, predFileName[1]))
        predictions = []
        for prediction in predictionsTemp:
            predictions.append(prediction)

        baseEv = AveragingMultiClassEvaluator(None, None, classSet)
        baseEv._calculate(examples, predictions)
        print "============================"
        print predFileName[1]
        print "============================"
        #print baseEv.toStringConcise(title="baseline")

        baseLineF = baseEv.microF.fscore
        for step in [0]:
            for classId in [1]:  #classIds:
                cls = None
                if classSet != None:
                    cls = classSet.getName(classId)
                else:
                    cls = str(classId)
                bestF = thresholdClass(examples, predictions, classId,
                                       baseLineF)
                for prediction in predictions:
                    prediction[classId] -= bestF[2][0] + 0.00000001
                changed = 0
                for prediction in predictions:
                    maxVal = -999999
                    maxClass = None
                    for i in range(1, len(prediction)):
                        if prediction[i] > maxVal:
                            maxVal = prediction[i]
                            maxClass = i
                    if maxClass != prediction[0]:
                        prediction[0] = maxClass
                        changed += 1
                print step, cls, "changed", changed, bestF[0]
                baseLineF = bestF[0]

        if bestF[0] > bestThrF[0]:
            bestThrF = (bestF[0], predFileName[1], bestF[1], bestF[2],
                        bestF[3])
        if baseEv.microF.fscore > bestBaseF[0]:
            bestBaseF = (baseEv.microF.fscore, predFileName[1],
                         baseEv.microF.toStringConcise())

        print "-------- Baseline ------------"
        print baseEv.toStringConcise()
        print "-------- Best ------------"
        print bestF[0], bestF[1], bestF[2]
        print bestF[3]
        thEv = AveragingMultiClassEvaluator(None, None, classSet)
        thEv._calculate(examples, predictions)
        print thEv.toStringConcise()

    print "=============== All Best ==============="
    print "Threshold", bestThrF
    print "Base", bestBaseF
    sys.exit()

    memPredictions = []
    bestEv = baseEv
    bestPair = [None, None, None]
    for p in predictions:
        memPredictions.append(p)
    for pair in pairs:
        modifier = pair[0] + 0.00000001
        changedClass = 0
        for pred in memPredictions:
            negPred = pred[1] - modifier
            maxVal = negPred
            maxClass = 1
            for i in range(2, len(pred)):
                if pred[i] > maxVal:
                    maxVal = pred[i]
                    maxClass = i
            if pred[0] != maxClass:
                changedClass += 1
            pred[0] = maxClass
        ev = AveragingMultiClassEvaluator(None)
        ev._calculate(examples, memPredictions)
        print pair[0], pair[2], changedClass
        print ev.toStringConcise()
        if ev.compare(bestEv) == 1:
            print "Improved"
            bestPair = pair
            bestEv = ev

    print "---------------------------------------------"
    print baseEv.toStringConcise(title="baseline")
    print bestPair[0], bestPair[2]
    print bestEv.toStringConcise(title="best")
コード例 #25
0
ファイル: MultiLabelEvaluator.py プロジェクト: jbjorne/Tdevel
 def determineThreshold(self, examples, predictions):
     if type(predictions) == types.StringType: # predictions are in file
         predictions = ExampleUtils.loadPredictions(predictions)
     if type(examples) == types.StringType: # examples are in file
         examples = ExampleUtils.readExamples(examples, False)
         
     examplesByClass = {}
     for cls in self.classes:
         examplesByClass[cls] = []
     # prepare examples
     for example, prediction in itertools.izip(examples, predictions):
         # Check true class for multilabel
         trueClass = example[1]
         trueClassName = self.classSet.getName(trueClass)
         assert(trueClass > 0) # multiclass classification uses non-negative integers
         if "---" in trueClassName:
             trueClass = set()
             for name in trueClassName.split("---"):
                 trueClass.add(self.classSet.getId(name))
         else:
             trueClass = [trueClass]
         # Check prediction for multilabel
         predictedClasses = prediction[0]
         if type(predictedClasses) == types.IntType:
             predictedClasses = [predictedClasses]
         
         for predType in predictedClasses:
             if predType != 1:
                 exTrueClass = 1
                 if predType in trueClass:
                     exTrueClass = 2
                 examplesByClass[predType].append( (prediction[predType], exTrueClass, 2) )
         # positives are negatives for other classes
         for cls in self.classes:
             if cls not in predictedClasses:
                 exTrueClass = 1
                 if cls in trueClass:
                     exTrueClass = 2
                 examplesByClass[cls].append( (prediction[cls], exTrueClass, 1) )
     # do the thresholding
     thresholdByClass = {}
     for cls in self.classes:
         if cls == 1:
             continue
         thresholdByClass[cls] = 0.0
         examplesByClass[cls].sort()
         # Start with all below zero being negative, and all above it being what is predicted
         ev = EvaluationData()
         for example in examplesByClass[cls]:
             #print example
             if example[0] < 0.0:
                 updateF(ev, example[1], 2, 1) # always negative
             else:
                 updateF(ev, example[1], example[2], 1) # what is predicted
         count = 0
         bestF = [self.dataByClass[cls].fscore, None, (0.0, None), None]
         for example in examplesByClass[cls]:
             if example[0] < 0.0:
                 # Remove original example
                 updateF(ev, example[1], 2, -1)
                 # Add new example
                 updateF(ev, example[1], example[2], 1)
                 # Calculate F for this point
             else:
                 # Remove original example
                 updateF(ev, example[1], example[2], -1)
                 # Add new example
                 updateF(ev, example[1], 1, 1)
                 # Calculate F for this point
             ev.calculateFScore()
             #print example, ev.toStringConcise()
             count += 1
             #if self.classSet.getName(cls) == "Binding":
             #    print count, example, ev.toStringConcise()
             if ev.fscore > bestF[0]:
                 bestF = (ev.fscore, count, example, ev.toStringConcise())
                 self.dataByClass[cls] = copy.copy(ev)
         print >> sys.stderr, "Threshold", self.classSet.getName(cls), bestF
         if bestF[2][0] != 0.0:
             thresholdByClass[cls] = bestF[2][0] + 0.00000001
         else:
             thresholdByClass[cls] = 0.0
     #print thresholdByClass
     self.thresholds = thresholdByClass
     #self._calculate(examples, predictions, thresholdByClass)
     #print >> sys.stderr, "Optimal", self.toStringConcise()
     return thresholdByClass