Ejemplo n.º 1
0
class ClassificationMeasurements(BaseObject):
    """ Class used to keep updated statistics about a classifier, in order
    to be able to provide, at any given moment, any relevant metric about
    that classifier.

    It combines a ConfusionMatrix object, with some additional statistics,
    to compute a range of performance metrics.

    In order to keep statistics updated, the class won't require lots of
    information, but two: the predictions and true labels.

    At any given moment, it can compute the following statistics: accuracy,
    kappa, kappa_t, kappa_m, majority_class and error rate.

    Parameters
    ----------
    targets: list
        A list containing the possible labels.

    dtype: data type (Default: numpy.int64)
        The data type of the existing labels.

    Examples
    --------

    """
    def __init__(self, targets=None, dtype=np.int64):
        super().__init__()
        if targets is not None:
            self.n_targets = len(targets)
        else:
            self.n_targets = 0
        self.confusion_matrix = ConfusionMatrix(self.n_targets, dtype)
        self.last_true_label = None
        self.last_prediction = None
        self.last_sample = None
        self.sample_count = 0
        self.majority_classifier = 0
        self.correct_no_change = 0
        self.targets = targets

    def reset(self):
        if self.targets is not None:
            self.n_targets = len(self.targets)
        else:
            self.n_targets = 0
        self.last_true_label = None
        self.last_prediction = None
        self.last_sample = None
        self.sample_count = 0
        self.majority_classifier = 0
        self.correct_no_change = 0
        self.confusion_matrix.restart(self.n_targets)

    def add_result(self, y_true, y_pred, weight=1.0):
        """ Updates its statistics with the results of a prediction.

        Parameters
        ----------
        y_true: int
            The true label.

        y_pred: int
            The classifier's prediction

        weight: float
            Sample's weight

        """
        check_weights(weight)

        true_y = self._get_target_index(y_true, True)
        pred = self._get_target_index(y_pred, True)
        self.confusion_matrix.update(true_y, pred)
        self.sample_count += weight

        if self.get_majority_class() == y_true:
            self.majority_classifier = self.majority_classifier + weight
        if self.last_true_label == y_true:
            self.correct_no_change = self.correct_no_change + weight

        self.last_true_label = y_true
        self.last_prediction = y_pred

    def get_last(self):
        return self.last_true_label, self.last_prediction

    def get_majority_class(self):
        """ Computes the true majority class.

        Returns
        -------
        int
            The true majority class.

        """
        if (self.n_targets is None) or (self.n_targets == 0):
            return False
        majority_class = 0
        max_prob = 0.0
        for i in range(self.n_targets):
            sum_value = 0.0
            for j in range(self.n_targets):
                sum_value += self.confusion_matrix.value_at(i, j)
            sum_value = sum_value / self.sample_count
            if sum_value > max_prob:
                max_prob = sum_value
                majority_class = i

        return majority_class

    def get_accuracy(self):
        """ Computes the accuracy.

        Returns
        -------
        float
            The accuracy.

        """
        sum_value = 0.0
        n, _ = self.confusion_matrix.shape()
        for i in range(n):
            sum_value += self.confusion_matrix.value_at(i, i)
        try:
            return sum_value / self.sample_count
        except ZeroDivisionError:
            return 0.0

    def get_incorrectly_classified_ratio(self):
        return 1.0 - self.get_accuracy()

    def _get_target_index(self, target, add_label=False):
        """ Computes the index of an element in the self.targets list.
        Also reshapes the ConfusionMatrix and adds new found targets
        if add is True.

        Parameters
        ----------
        target: int
            A class label.

        add_label: bool
            Either to add new found labels to the targets list or not.

        Returns
        -------
        int
            The target index in the self.targets list.

        """
        if (self.targets is None) and add_label:
            self.targets = []
            self.targets.append(target)
            self.n_targets = len(self.targets)
            self.confusion_matrix.reshape(len(self.targets), len(self.targets))
        elif (self.targets is None) and (not add_label):
            return None
        if (target not in self.targets) and add_label:
            self.targets.append(target)
            self.n_targets = len(self.targets)
            self.confusion_matrix.reshape(len(self.targets), len(self.targets))
        for i in range(len(self.targets)):
            if self.targets[i] == target:
                return i
        return None

    def get_kappa(self):
        """ Computes the Cohen's kappa coefficient.

        Returns
        -------
        float
            The Cohen's kappa coefficient.

        """
        p0 = self.get_accuracy()
        pc = 0.0
        n_rows, n_cols = self.confusion_matrix.shape()
        for i in range(n_rows):
            row = self.confusion_matrix.row(i)
            column = self.confusion_matrix.column(i)

            sum_row = np.sum(row) / self.sample_count
            sum_column = np.sum(column) / self.sample_count

            pc += sum_row * sum_column
        if pc == 1:
            return 1
        return (p0 - pc) / (1.0 - pc)

    def get_kappa_t(self):
        """ Computes the Cohen's kappa T coefficient. This measures the
        temporal correlation between samples.

        Returns
        -------
        float
            The Cohen's kappa T coefficient.

        """
        p0 = self.get_accuracy()
        if self.sample_count != 0:
            pc = self.correct_no_change / self.sample_count
        else:
            pc = 0
        if pc == 1:
            return 1
        return (p0 - pc) / (1.0 - pc)

    def get_kappa_m(self):
        """ Computes the Cohen's kappa M coefficient.

        Returns
        -------
        float
            The Cohen's kappa M coefficient.

        """
        p0 = self.get_accuracy()
        if self.sample_count != 0:
            pc = self.majority_classifier / self.sample_count
        else:
            pc = 0
        if pc == 1:
            return 1
        return (p0 - pc) / (1.0 - pc)

    @property
    def _matrix(self):
        return self.confusion_matrix.matrix

    def get_info(self):
        return '{}:'.format(type(self).__name__) + \
               ' - sample_count: {}'.format(self.sample_count) + \
               ' - accuracy: {:.6f}'.format(self.get_accuracy()) + \
               ' - kappa: {:.6f}'.format(self.get_kappa()) + \
               ' - kappa_t: {:.6f}'.format(self.get_kappa_t()) + \
               ' - kappa_m: {:.6f}'.format(self.get_kappa_m()) + \
               ' - majority_class: {}'.format(self.get_majority_class())

    def get_class_type(self):
        return 'measurement'
Ejemplo n.º 2
0
class WindowClassificationMeasurements(BaseObject):
    """ This class will maintain a fixed sized window of the newest information
    about one classifier. It can provide, as requested, any of the relevant
    current metrics about the classifier, measured inside the window.

    To keep track of statistics inside a window, the class will use a
    ConfusionMatrix object, alongside FastBuffers, to simulate fixed sized
    windows of the important classifier's attributes.

    Its functionality is somewhat similar to those of the
    ClassificationMeasurements class. The difference is that the statistics
    kept by this class are local, or partial, while the statistics kept by
    the ClassificationMeasurements class are global.

    At any given moment, it can compute the following statistics: accuracy,
    kappa, kappa_t, kappa_m, majority_class and error rate.

    Parameters
    ----------
    targets: list
        A list containing the possible labels.

    dtype: data type (Default: numpy.int64)
        The data type of the existing labels.

    window_size: int (Default: 200)
        The width of the window. Determines how many samples the object
        can see.

    Examples
    --------

    """
    def __init__(self, targets=None, dtype=np.int64, window_size=200):
        super().__init__()
        if targets is not None:
            self.n_targets = len(targets)
        else:
            self.n_targets = 0
        self.confusion_matrix = ConfusionMatrix(self.n_targets, dtype)
        self.last_class = None

        self.targets = targets
        self.window_size = window_size
        self.true_labels = FastBuffer(window_size)
        self.predictions = FastBuffer(window_size)
        self.temp = 0
        self.last_prediction = None
        self.last_true_label = None
        self.last_sample = None

        self.majority_classifier = 0
        self.correct_no_change = 0
        self.majority_classifier_correction = FastBuffer(window_size)
        self.correct_no_change_correction = FastBuffer(window_size)

    def reset(self):
        if self.targets is not None:
            self.n_targets = len(self.targets)
        else:
            self.n_targets = 0

        self.true_labels = FastBuffer(self.window_size)
        self.predictions = FastBuffer(self.window_size)
        self.temp = 0
        self.last_prediction = None
        self.last_true_label = None
        self.last_sample = None

        self.majority_classifier = 0
        self.correct_no_change = 0
        self.confusion_matrix.restart(self.n_targets)
        self.majority_classifier_correction = FastBuffer(self.window_size)
        self.correct_no_change_correction = FastBuffer(self.window_size)

    def add_result(self, y_true, y_pred):
        """ Updates its statistics with the results of a prediction.
        If needed it will remove samples from the observation window.

        Parameters
        ----------
        y_true: int
            The true label.

        y_pred: int
            The classifier's prediction

        """
        true_y = self._get_target_index(y_true, True)
        pred = self._get_target_index(y_pred, True)
        old_true = self.true_labels.add_element(np.array([y_true]))
        old_predict = self.predictions.add_element(np.array([y_pred]))

        # Verify if it's needed to decrease the count of any label
        # pair in the confusion matrix
        if (old_true is not None) and (old_predict is not None):
            self.temp += 1
            self.confusion_matrix.remove(
                self._get_target_index(old_true[0]),
                self._get_target_index(old_predict[0]))
            self.correct_no_change += self.correct_no_change_correction.peek()
            self.majority_classifier += self.majority_classifier_correction.peek(
            )

        # Verify if it's needed to decrease the majority_classifier count
        if (self.get_majority_class()
                == y_true) and (self.get_majority_class() is not None):
            self.majority_classifier += 1
            self.majority_classifier_correction.add_element([-1])
        else:
            self.majority_classifier_correction.add_element([0])

        # Verify if it's needed to decrease the correct_no_change
        if (self.last_true_label == y_true) and (self.last_true_label
                                                 is not None):
            self.correct_no_change += 1
            self.correct_no_change_correction.add_element([-1])
        else:
            self.correct_no_change_correction.add_element([0])

        self.confusion_matrix.update(true_y, pred)

        self.last_true_label = y_true
        self.last_prediction = y_pred

    def get_last(self):
        return self.last_true_label, self.last_prediction

    def get_majority_class(self):
        """ Computes the window/current true majority class.

        Returns
        -------
        int
            The true window/current majority class.

        """
        if (self.n_targets is None) or (self.n_targets == 0):
            return None
        majority_class = 0
        max_prob = 0.0
        for i in range(self.n_targets):
            sum_value = 0.0
            for j in range(self.n_targets):
                sum_value += self.confusion_matrix.value_at(i, j)
            sum_value = sum_value / self.true_labels.get_current_size()
            if sum_value > max_prob:
                max_prob = sum_value
                majority_class = i

        return majority_class

    def get_accuracy(self):
        """ Computes the window/current accuracy.

        Returns
        -------
        float
            The window/current accuracy.

        """
        sum_value = 0.0
        n, _ = self.confusion_matrix.shape()
        for i in range(n):
            sum_value += self.confusion_matrix.value_at(i, i)
        try:
            return sum_value / self.true_labels.get_current_size()
        except ZeroDivisionError:
            return 0.0

    def get_incorrectly_classified_ratio(self):
        return 1.0 - self.get_accuracy()

    def _get_target_index(self, target, add=False):
        """ Computes the index of an element in the self.targets list.
        Also reshapes the ConfusionMatrix and adds new found targets
        if add is True.

        Parameters
        ----------
        target: int
            A class label.

        add: bool
            Either to add new found labels to the targets list or not.

        Returns
        -------
        int
            The target index in the self.targets list.

        """
        if (self.targets is None) and add:
            self.targets = []
            self.targets.append(target)
            self.n_targets = len(self.targets)
            self.confusion_matrix.reshape(len(self.targets), len(self.targets))
        elif (self.targets is None) and (not add):
            return None
        if target not in self.targets and add:
            self.targets.append(target)
            self.n_targets = len(self.targets)
            self.confusion_matrix.reshape(len(self.targets), len(self.targets))
        for i in range(len(self.targets)):
            if self.targets[i] == target:
                return i
        return None

    def get_kappa(self):
        """ Computes the window/current Cohen's kappa coefficient.

        Returns
        -------
        float
            The window/current Cohen's kappa coefficient.

        """
        p0 = self.get_accuracy()
        pc = 0.0
        n_rows, n_cols = self.confusion_matrix.shape()
        for i in range(n_rows):
            row = self.confusion_matrix.row(i)
            column = self.confusion_matrix.column(i)

            sum_row = np.sum(row) / self.true_labels.get_current_size()
            sum_column = np.sum(column) / self.true_labels.get_current_size()

            pc += sum_row * sum_column

        if pc == 1:
            return 1
        return (p0 - pc) / (1.0 - pc)

    def get_kappa_t(self):
        """ Computes the window/current Cohen's kappa T coefficient. This measures
        the temporal correlation between samples.

        Returns
        -------
        float
            The window/current Cohen's kappa T coefficient.

        """
        p0 = self.get_accuracy()
        if self.sample_count != 0:
            pc = self.correct_no_change / self.sample_count
        else:
            pc = 0
        if pc == 1:
            return 1
        return (p0 - pc) / (1.0 - pc)

    def get_kappa_m(self):
        """ Computes the window/current Cohen's kappa M coefficient.

        Returns
        -------
        float
            The window/current Cohen's kappa M coefficient.

        """
        p0 = self.get_accuracy()
        if self.sample_count != 0:
            pc = self.majority_classifier / self.sample_count
        else:
            pc = 0
        if pc == 1:
            return 1
        return (p0 - pc) / (1.0 - pc)

    @property
    def _matrix(self):
        return self.confusion_matrix.matrix

    @property
    def sample_count(self):
        return self.true_labels.get_current_size()

    def get_class_type(self):
        return 'measurement'

    def get_info(self):
        return '{}:'.format(type(self).__name__) + \
               ' - sample_count: {}'.format(self.sample_count) + \
               ' - window_size: {}'.format(self.window_size) + \
               ' - accuracy: {:.6f}'.format(self.get_accuracy()) + \
               ' - kappa: {:.6f}'.format(self.get_kappa()) + \
               ' - kappa_t: {:.6f}'.format(self.get_kappa_t()) + \
               ' - kappa_m: {:.6f}'.format(self.get_kappa_m()) + \
               ' - majority_class: {}'.format(self.get_majority_class())