Ejemplo n.º 1
0
    def __init__(self, examples, predictions=None, classSet=None):
        if type(classSet) == types.StringType:  # class names are in file
            classSet = IdSet(filename=classSet)
        if type(predictions) == types.StringType:  # predictions are in file
            predictions = ExampleUtils.loadPredictions(predictions)
        if type(examples) == types.StringType:  # examples are in file
            examples = ExampleUtils.readExamples(examples, False)

        self.classSet = classSet
        # define class ids in alphabetical order
        self.classSet = classSet
        if classSet != None:
            classNames = sorted(classSet.Ids.keys())
        else:
            classNames = []
        # make an ordered list of class ids
        self.classes = []
        for className in classNames:
            self.classes.append(classSet.getId(className))
        # create data structures for per-class evaluation
        self.dataByClass = {}
        for cls in self.classes:
            self.dataByClass[cls] = EvaluationData()
        # hack for unnamed classes
        if len(self.dataByClass) == 0:
            self.dataByClass[1] = EvaluationData()
            self.dataByClass[2] = EvaluationData()

        #self.untypedUndirected = None
        self.untypedCurrentMajorId = None
        self.untypedPredictionQueue = []
        self.untypedUndirected = EvaluationData()
        #self.AUC = None
        if predictions != None:
            self._calculate(examples, predictions)
Ejemplo n.º 2
0
    def __init__(self, examples, predictions=None, classSet=None):
        if type(classSet) == types.StringType:  # class names are in file
            classSet = IdSet(filename=classSet)
        if type(predictions) == types.StringType:  # predictions are in file
            predictions = ExampleUtils.loadPredictions(predictions)
        if type(examples) == types.StringType:  # examples are in file
            examples = ExampleUtils.readExamples(examples, False)

        self.classSet = classSet
        self.dataByClass = defaultdict(EvaluationData)

        #self.untypedUndirected = None
        self.untypedCurrentMajorId = None
        self.untypedPredictionQueue = []
        self.untypedUndirected = EvaluationData()
        #self.AUC = None
        if predictions != None:
            self._calculate(examples, predictions)
    def threshold(cls, examples, predictions):
        # Make negative confidence score / true class pairs
        if type(examples) in types.StringTypes:
            examples = ExampleUtils.readExamples(examples, False)
        if type(predictions) in types.StringTypes:
            predictions = ExampleUtils.loadPredictions(predictions)
        pairs = []
        realPositives = 0
        for example, prediction in itertools.izip(examples, predictions):
            trueClass = example[1]
            assert (trueClass > 0
                    )  # multiclass classification uses non-negative integers
            if trueClass > 1:
                realPositives += 1
            negClassValue = prediction[1]
            pairs.append((negClassValue, trueClass))
        pairs.sort(reverse=True)
        realNegatives = len(pairs) - realPositives

        # When starting thresholding, all examples are considered positive
        binaryF = EvaluationData()
        binaryF._tp = realPositives
        binaryF._fp = realNegatives
        binaryF._fn = 0
        binaryF.calculateFScore()
        fscore = binaryF.fscore
        threshold = pairs[0][0] - 1.

        # Turn one example negative at a time
        for pair in pairs:
            if pair[1] == 1:  # the real class is negative
                binaryF._fp -= 1  # false positive -> true negative
            else:  # the real class is a positive class
                binaryF._tp -= 1  # true positive -> ...
                binaryF._fn += 1  # ... false negative
            binaryF.calculateFScore()
            if binaryF.fscore > fscore:
                fscore = binaryF.fscore
                threshold = pair[0] + 0.00000001
        return threshold, fscore
Ejemplo n.º 4
0
    def _calculate(self, examples, predictions):
        """
        The actual evaluation
        """
        #self._calculateUntypedUndirected(examples, predictions)
        # First count instances
        self.microF = EvaluationData()
        self.binaryF = EvaluationData()
        #self.classifications = []
        #assert(len(examples) == len(predictions))
        #for i in range(len(examples)):
        for example, prediction in itertools.izip(examples, predictions):
            #            self._queueUntypedUndirected(example, prediction)
            #example = examples[i] # examples and predictions are in matching lists
            #prediction = predictions[i] # examples and predictions are in matching lists
            trueClass = example[1]
            assert (trueClass > 0
                    )  # multiclass classification uses non-negative integers
            predictedClass = prediction[0]
            #print predictedClass
            assert (predictedClass > 0
                    )  # multiclass classification uses non-negative integers
            if predictedClass == trueClass:  # correct classification
                # correctly classified for its class -> true positive for that class
                self.dataByClass[trueClass].addTP()
                if trueClass != 1:  # a non-negative example -> correct = true positive
                    #self.classifications.append("tp")
                    #self.classifications.append((prediction[0],"tp",self.type,prediction[1],prediction[3]))
                    self.microF.addTP()
                    self.binaryF.addTP()
                else:  # a negative example -> correct = true negative
                    #self.classifications.append((prediction[0],"tn",self.type,prediction[1],prediction[3]))
                    #self.classifications.append("tn")
                    self.microF.addTN()
                    self.binaryF.addTN()
                for cls in self.classes:
                    # this example was correctly classified for its class,
                    # so it is also correctly classified for each class,
                    # i.e. true negative for them
                    if cls != trueClass:
                        self.dataByClass[cls].addTN()
            else:  # predictedClass != trueClass:
                # prediction was incorrect -> false positive for the predicted class
                self.dataByClass[predictedClass].addFP()
                if predictedClass == 1:  # non-negative example, negative prediction -> incorrect = false negative
                    #self.classifications.append("fn")
                    #self.classifications.append((prediction[0],"fn",self.type,prediction[1],prediction[3]))
                    self.microF.addFN()
                    self.binaryF.addFN()
                else:  # non-negative incorrect prediction -> false positive
                    #self.classifications.append("fp")
                    #self.classifications.append((prediction[0],"fp",self.type,prediction[1],prediction[3]))
                    self.microF.addFP()
                    if trueClass == 1:
                        self.binaryF.addFP()
                    else:
                        self.binaryF.addTP()
                for cls in self.classes:
                    if cls == trueClass:  # example not found -> false negative
                        self.dataByClass[cls].addFN()
                    elif cls != predictedClass:
                        self.dataByClass[cls].addTN()

        # Process remaining untyped undirected examples and calculate untyped undirected f-score
#        self._processUntypedUndirectedQueue()
#        self.untypedUndirected.calculateFScore()

# Then calculate statistics
        for cls in self.classes:
            self.dataByClass[cls].calculateFScore()
        self.microF.calculateFScore()
        self.binaryF.calculateFScore()

        # Finally calculate macro-f-score
        # macro-average is simply the unweighted average of per-class f-scores
        numClassesWithInstances = 0
        self.macroF = EvaluationData()
        self.macroF.precision = 0.0
        self.macroF.recall = 0.0
        self.macroF.fscore = 0.0
        for cls in self.classes:
            if (self.dataByClass[cls].getNumInstances() > 0
                    or self.dataByClass[cls].getFP() > 0
                ) and cls != self.classSet.getId("neg", False):
                numClassesWithInstances += 1
                self.macroF.precision += self.dataByClass[cls].precision
                self.macroF.recall += self.dataByClass[cls].recall
                if self.dataByClass[cls].fscore != "N/A":
                    self.macroF.fscore += self.dataByClass[cls].fscore
        if numClassesWithInstances > 0:
            if self.macroF.precision != 0:
                self.macroF.precision /= float(numClassesWithInstances)
            if self.macroF.recall != 0:
                self.macroF.recall /= float(numClassesWithInstances)
            if self.macroF.fscore != 0:
                self.macroF.fscore /= float(numClassesWithInstances)
Ejemplo n.º 5
0
    def _calculate(self, examples, predictions, thresholds=None):
        """
        The actual evaluation
        """
        for cls in self.classes:
            self.dataByClass[cls] = EvaluationData()
        #self._calculateUntypedUndirected(examples, predictions)
        # First count instances
        self.microF = EvaluationData()
        self.binaryF = EvaluationData()
        #self.classifications = []
        #assert(len(examples) == len(predictions))
        #for i in range(len(examples)):

        # Prepare offsets for thresholding
        self.thresholds = thresholds
        offsets = [None] + len(self.classSet.Ids) * [0.0]
        for cls in self.classSet.Ids.keys():
            if thresholds != None and cls in thresholds:
                offsets[cls] = thresholds[cls]
        #print self.classes, offsets

        # Calculate results
        for example, prediction in itertools.izip(examples, predictions):
            #self._queueUntypedUndirected(example, prediction)
            # Check true class for multilabel
            trueClass = example[1]
            trueClassName = self.classSet.getName(trueClass)
            assert (trueClass > 0
                    )  # multiclass classification uses non-negative integers
            if "---" in trueClassName:
                trueClass = set()
                for name in trueClassName.split("---"):
                    trueClass.add(self.classSet.getId(name))
            else:
                trueClass = [trueClass]
            # Check prediction for multilabel
            predictedClasses = prediction[0]
            if type(predictedClasses) == types.IntType:
                predictedClasses = [predictedClasses]
            # Thresholding
            if thresholds != None:
                for i in range(2, len(prediction)):
                    if prediction[i] != "N/A":
                        if prediction[
                                i] < 0.0 and prediction[i] - offsets[i] > 0.0:
                            if predictedClasses == [1]:
                                predictedClasses = []
                            predictedClasses.append(i)
                        elif prediction[
                                i] > 0.0 and prediction[i] - offsets[i] < 0.0:
                            predictedClasses.remove(i)
                            if len(predictedClasses) == 0:
                                predictedClasses = [1]

            for predictedClass in predictedClasses:
                #print predictedClass
                assert (
                    predictedClass > 0
                )  # multiclass classification uses non-negative integers
                if predictedClass in trueClass:  # correct classification
                    # correctly classified for its class -> true positive for that class
                    self.dataByClass[predictedClass].addTP()
                    if predictedClass != 1:  # a non-negative example -> correct = true positive
                        #self.classifications.append("tp")
                        #self.classifications.append((prediction[0],"tp",self.type,prediction[1],prediction[3]))
                        self.microF.addTP()
                        self.binaryF.addTP()
                    else:  # a negative example -> correct = true negative
                        #self.classifications.append((prediction[0],"tn",self.type,prediction[1],prediction[3]))
                        #self.classifications.append("tn")
                        self.microF.addTN()
                        self.binaryF.addTN()
                    for cls in self.classes:
                        # this example was correctly classified for its class,
                        # so it is also correctly classified for each class,
                        # i.e. true negative for them
                        if cls != predictedClass:
                            if cls not in predictedClasses:
                                self.dataByClass[cls].addTN()
                else:  # predictedClass != trueClass:
                    # prediction was incorrect -> false positive for the predicted class
                    self.dataByClass[predictedClass].addFP()
                    if predictedClass == 1:  # non-negative example, negative prediction -> incorrect = false negative
                        #self.classifications.append("fn")
                        #self.classifications.append((prediction[0],"fn",self.type,prediction[1],prediction[3]))
                        self.microF.addFN()
                        self.binaryF.addFN()
                    else:  # non-negative incorrect prediction -> false positive
                        #self.classifications.append("fp")
                        #self.classifications.append((prediction[0],"fp",self.type,prediction[1],prediction[3]))
                        self.microF.addFP()
                        if 1 in trueClass:
                            self.binaryF.addFP()
                        else:
                            self.binaryF.addTP()
                    for cls in self.classes:
                        if cls in trueClass:  # example not found -> false negative
                            if cls not in predictedClasses:
                                self.dataByClass[cls].addFN()
                        elif cls != predictedClass:
                            self.dataByClass[cls].addTN()

        # Process remaining untyped undirected examples and calculate untyped undirected f-score
        #self._processUntypedUndirectedQueue()
        #self.untypedUndirected.calculateFScore()

        # Then calculate statistics
        for cls in self.classes:
            self.dataByClass[cls].calculateFScore()
        self.microF.calculateFScore()
        self.binaryF.calculateFScore()

        # Finally calculate macro-f-score
        # macro-average is simply the unweighted average of per-class f-scores
        numClassesWithInstances = 0
        self.macroF = EvaluationData()
        self.macroF.precision = 0.0
        self.macroF.recall = 0.0
        self.macroF.fscore = 0.0
        for cls in self.classes:
            if (self.dataByClass[cls].getNumInstances() > 0
                    or self.dataByClass[cls].getFP() > 0
                ) and cls != self.classSet.getId("neg", False):
                numClassesWithInstances += 1
                self.macroF.precision += self.dataByClass[cls].precision
                self.macroF.recall += self.dataByClass[cls].recall
                if self.dataByClass[cls].fscore != "N/A":
                    self.macroF.fscore += self.dataByClass[cls].fscore
        if numClassesWithInstances > 0:
            if self.macroF.precision != 0:
                self.macroF.precision /= float(numClassesWithInstances)
            if self.macroF.recall != 0:
                self.macroF.recall /= float(numClassesWithInstances)
            if self.macroF.fscore != 0:
                self.macroF.fscore /= float(numClassesWithInstances)
Ejemplo n.º 6
0
    def determineThreshold(self, examples, predictions):
        if type(predictions) == types.StringType:  # predictions are in file
            predictions = ExampleUtils.loadPredictions(predictions)
        if type(examples) == types.StringType:  # examples are in file
            examples = ExampleUtils.readExamples(examples, False)

        examplesByClass = {}
        for cls in self.classes:
            examplesByClass[cls] = []
        # prepare examples
        for example, prediction in itertools.izip(examples, predictions):
            # Check true class for multilabel
            trueClass = example[1]
            trueClassName = self.classSet.getName(trueClass)
            assert (trueClass > 0
                    )  # multiclass classification uses non-negative integers
            if "---" in trueClassName:
                trueClass = set()
                for name in trueClassName.split("---"):
                    trueClass.add(self.classSet.getId(name))
            else:
                trueClass = [trueClass]
            # Check prediction for multilabel
            predictedClasses = prediction[0]
            if type(predictedClasses) == types.IntType:
                predictedClasses = [predictedClasses]

            for predType in predictedClasses:
                if predType != 1:
                    exTrueClass = 1
                    if predType in trueClass:
                        exTrueClass = 2
                    examplesByClass[predType].append(
                        (prediction[predType], exTrueClass, 2))
            # positives are negatives for other classes
            for cls in self.classes:
                if cls not in predictedClasses:
                    exTrueClass = 1
                    if cls in trueClass:
                        exTrueClass = 2
                    examplesByClass[cls].append(
                        (prediction[cls], exTrueClass, 1))
        # do the thresholding
        thresholdByClass = {}
        for cls in self.classes:
            if cls == 1:
                continue
            thresholdByClass[cls] = 0.0
            examplesByClass[cls].sort()
            # Start with all below zero being negative, and all above it being what is predicted
            ev = EvaluationData()
            for example in examplesByClass[cls]:
                #print example
                if example[0] < 0.0:
                    updateF(ev, example[1], 2, 1)  # always negative
                else:
                    updateF(ev, example[1], example[2], 1)  # what is predicted
            count = 0
            bestF = [self.dataByClass[cls].fscore, None, (0.0, None), None]
            for example in examplesByClass[cls]:
                if example[0] < 0.0:
                    # Remove original example
                    updateF(ev, example[1], 2, -1)
                    # Add new example
                    updateF(ev, example[1], example[2], 1)
                    # Calculate F for this point
                else:
                    # Remove original example
                    updateF(ev, example[1], example[2], -1)
                    # Add new example
                    updateF(ev, example[1], 1, 1)
                    # Calculate F for this point
                ev.calculateFScore()
                #print example, ev.toStringConcise()
                count += 1
                #if self.classSet.getName(cls) == "Binding":
                #    print count, example, ev.toStringConcise()
                if ev.fscore > bestF[0]:
                    bestF = (ev.fscore, count, example, ev.toStringConcise())
                    self.dataByClass[cls] = copy.copy(ev)
            print >> sys.stderr, "Threshold", self.classSet.getName(cls), bestF
            if bestF[2][0] != 0.0:
                thresholdByClass[cls] = bestF[2][0] + 0.00000001
            else:
                thresholdByClass[cls] = 0.0
        #print thresholdByClass
        self.thresholds = thresholdByClass
        #self._calculate(examples, predictions, thresholdByClass)
        #print >> sys.stderr, "Optimal", self.toStringConcise()
        return thresholdByClass