Ejemplo n.º 1
0
    def matthews_corrcoef(y_true, y_pred, sample_weight=None):
        from sklearn.preprocessing import LabelEncoder
        from sklearn.metrics import confusion_matrix
        from sklearn.metrics._classification import _check_targets

        y_type, y_true, y_pred = _check_targets(y_true, y_pred)
        if y_type not in {'binary', 'multiclass'}:
            raise ValueError('%s is not supported' % y_type)
        lb = LabelEncoder()
        lb.fit(np.hstack([y_true, y_pred]))
        y_true = lb.transform(y_true)
        y_pred = lb.transform(y_pred)
        C = confusion_matrix(y_true, y_pred, sample_weight=sample_weight)
        t_sum = C.sum(axis=1)
        p_sum = C.sum(axis=0)
        n_correct = np.trace(C)
        n_samples = p_sum.sum()
        cov_ytyp = n_correct * n_samples - np.dot(t_sum, p_sum)
        cov_ypyp = n_samples ** 2 - np.dot(p_sum, p_sum)
        cov_ytyt = n_samples ** 2 - np.dot(t_sum, t_sum)
        mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)
        if np.isnan(mcc):
            return 0.0
        else:
            return mcc
Ejemplo n.º 2
0
 def precision_score(
     y_true: Union[Sequence[int], np.ndarray, pd.Series],
     y_pred: Union[Sequence[int], np.ndarray, pd.Series],
     problem: str = "Binary",
     positive_class: Union[str, int] = None,
 ) -> float:
     problem_true, y_true, y_pred = _check_targets(y_true, y_pred)
     if problem.casefold() == "binary":
         tp, fp, fn, tn = get_classification_labels(y_true, y_pred)
     elif problem.casefold() == "multiclass":
         if positive_class:
             if isinstance(positive_class, str) or isinstance(
                     positive_class, int):
                 new_y_true = np.where(y_true == positive_class, 1, 0)
                 new_y_pred = np.where(y_pred == positive_class, 1, 0)
                 tp, fp, fn, tn = get_classification_labels(
                     new_y_true, new_y_pred)
             else:
                 raise Exception(
                     "Cannot discern positive class for multiclass problem")
         else:
             raise Exception("Cannot calculate precision score with None")
     else:
         raise ValueError("Cannot determine problem type")
     return tp / (tp + fp)
Ejemplo n.º 3
0
 def ClassPredictionErrorViz(self):
     y_type, y_true, y_pred = _check_targets(self.y_true, self.y_pred)
     if y_type not in ("binary", "multiclass"):
         raise YellowbrickValueError("{} is not supported".format(y_type))
     # Get the indices of the unique labels
     indices = unique_labels(self.y_true, self.y_pred)
     labels = self.classes
     predictions_ = np.array([[
         (self.y_pred[self.y_true == label_t] == label_p).sum()
         for label_p in indices
     ] for label_t in indices])
     fig, ax = plt.subplots(ncols=1, nrows=1)
     legend_kws = {"bbox_to_anchor": (1.04, 0.5), "loc": "center left"}
     bar_stack(
         predictions_,
         ax,
         labels=list(self.classes),
         ticks=self.classes,
         legend_kws=legend_kws,
     )
     # Set the title
     ax.set_title("Class Prediction Error for {}".format(self.name))
     # Set the axes labels
     ax.set_xlabel("Actual Class")
     ax.set_ylabel("Number of Predicted Class")
     # Compute the ceiling for the y limit
     cmax = max([sum(predictions) for predictions in predictions_])
     ax.set_ylim(0, cmax + cmax * 0.1)
     # Ensure the legend fits on the figure
     fig.tight_layout(rect=[0, 0, 0.90, 1])
     fig.savefig(self.path_to_save + "/ClassPredictionError_" + self.name +
                 ".pdf")
     return ax
Ejemplo n.º 4
0
def get_classification_labels(
    y_true: Union[Sequence[int], np.ndarray, pd.Series],
    y_pred: Union[Sequence[int], np.ndarray, pd.Series],
) -> Sequence[int, int, int, int]:
    """Calculates the true positive, false positive, false negative and true negative values for a classification
    problem.

    Parameters
    ----------
    y_true: list or array like
        The true, or the expected, values of our problem
    y_pred: list or array like
        The predicted values of our problem

    Returns
    -------
    The true positive, false positive, false negative and true negative values for our classification problem
    """
    problem_true, y_true, y_pred = _check_targets(y_true, y_pred)
    if len(np.unique(y_true)) > 2:
        raise Exception("We have more than two classes for a Binary problem")
    if len(np.unique(y_pred)) > 2:
        raise Exception("We have more than two classes for a Binary problem")
    label_1 = sorted(np.unique(y_true))[1]
    label_0 = sorted(np.unique(y_true))[0]
    true_positive = len(np.where((y_true == label_1) & (y_pred == label_1))[0])
    false_positive = len(
        np.where((y_true == label_0) & (y_pred == label_1))[0])
    false_negative = len(
        np.where((y_true == label_1) * (y_pred == label_0))[0])
    true_negative = len(np.where((y_true == label_0) & (y_pred == label_0))[0])
    return true_positive, false_positive, false_negative, true_negative
Ejemplo n.º 5
0
def balanced_accuracy(solution, prediction):
    y_type, solution, prediction = _check_targets(solution, prediction)

    if y_type not in ["binary", "multiclass", 'multilabel-indicator']:
        raise ValueError(f"{y_type} is not supported")

    if y_type == 'binary':
        # Do not transform into any multiclass representation
        pass

    elif y_type == 'multiclass':
        n = len(solution)
        unique_sol, encoded_sol = np.unique(solution, return_inverse=True)
        unique_pred, encoded_pred = np.unique(prediction, return_inverse=True)
        classes = np.unique(np.concatenate((unique_sol, unique_pred)))
        map_sol = np.array([np.where(classes == c)[0][0] for c in unique_sol])
        map_pred = np.array(
            [np.where(classes == c)[0][0] for c in unique_pred])
        # one hot encoding
        sol_ohe = np.zeros((n, len(classes)))
        pred_ohe = np.zeros((n, len(classes)))
        sol_ohe[np.arange(n), map_sol[encoded_sol]] = 1
        pred_ohe[np.arange(n), map_pred[encoded_pred]] = 1
        solution = sol_ohe
        prediction = pred_ohe

    elif y_type == 'multilabel-indicator':
        solution = solution.toarray()
        prediction = prediction.toarray()
    else:
        raise NotImplementedError(
            f'bac_metric does not support task type {y_type}')

    fn = np.sum(np.multiply(solution, (1 - prediction)), axis=0, dtype=float)
    tp = np.sum(np.multiply(solution, prediction), axis=0, dtype=float)
    # Bounding to avoid division by 0
    eps = 1e-15
    tp = np.maximum(eps, tp)
    pos_num = np.maximum(eps, tp + fn)
    tpr = tp / pos_num  # true positive rate (sensitivity)

    if y_type in ('binary', 'multilabel-indicator'):
        tn = np.sum(np.multiply((1 - solution), (1 - prediction)),
                    axis=0,
                    dtype=float)
        fp = np.sum(np.multiply((1 - solution), prediction),
                    axis=0,
                    dtype=float)
        tn = np.maximum(eps, tn)
        neg_num = np.maximum(eps, tn + fp)
        tnr = tn / neg_num  # true negative rate (specificity)
        bac = 0.5 * (tpr + tnr)
    elif y_type == 'multiclass':
        bac = tpr
    else:
        raise ValueError(y_type)

    return np.mean(bac)  # average over all classes
Ejemplo n.º 6
0
def _assert_binary(y1, y2=None):

    if y2 is None:
        y2 = y1

    y_type, _, _ = _check_targets(y1, y2)

    if y_type != 'binary':
        raise ValueError('y_true and y_pred must be binary.')
    def score(self, X, y):
        """
        Generates a 2D array where each row is the count of the
        predicted classes and each column is the true class

        Parameters
        ----------
        X : ndarray or DataFrame of shape n x m
            A matrix of n instances with m features

        y : ndarray or Series of length n
            An array or series of target or class values

        Returns
        -------
        score_ : float
            Global accuracy score
        """
        # Must be computed before calling super
        # We're relying on predict to raise NotFitted
        y_pred = self.predict(X)
        y_type, y_true, y_pred = _check_targets(y, y_pred)
        if y_type not in ("binary", "multiclass"):
            raise YellowbrickValueError("{} is not supported".format(y_type))

        # Get the indices of the unique labels
        indices = unique_labels(y_true, y_pred)
        labels = self._labels()

        # Call super to compute self.score_ and verify classes
        try:
            super(ClassPredictionError, self).score(X, y)
        except ModelError as e:
            # raise visualizer-specific errors
            if labels is not None and len(labels) < len(indices):
                raise NotImplementedError(
                    "filtering classes is currently not supported")
            else:
                raise e

        # Ensure all labels are used
        if labels is not None and len(labels) > len(indices):
            raise ModelError(
                "y and y_pred contain zero values for one of the specified classes"
            )

        # Create a table of predictions whose rows are the true classes
        # and whose columns are the predicted classes; each element
        # is the count of predictions for that class that match the true
        # value of that class.
        self.predictions_ = np.array([[(y_pred[y == label_t] == label_p).sum()
                                       for label_p in indices]
                                      for label_t in indices])

        self.draw()
        return self.score_
Ejemplo n.º 8
0
def _calc_score(function: str,
                y_true,
                y_pred,
                sample_weight=None,
                force_multilabel=False):
    """
    Implement all scores above
    :param function: Name of the function, mapped in _FUNCTIONS
    :param y_true:
    :param y_pred:
    :param normalize:
    :param sample_weights:
    :return:
    """
    _FUNCTIONS = {
        "brier_score": _bs_from_cm,
        "critical_success_index": _csi_from_cm,
        "peirce_skill_score": _pss_from_cm,
        "odds_ratio": _or_from_cm,
        "odds_ratio_skill_score": _orss_from_cm
    }
    y_type, y_true, y_pred = clf_metrics._check_targets(y_true, y_pred)
    if force_multilabel:
        labels = np.unique(y_true)
        cm = clf_metrics.multilabel_confusion_matrix(
            y_true,
            y_pred,
            sample_weight=sample_weight,
            samplewise=None,
            labels=labels)
        val = {labels[i]: _FUNCTIONS[function](cm[i]) for i in range(len(cm))}
    else:
        if y_type == "binary":
            cm = confusion_matrix(y_true, y_pred)
            val = _FUNCTIONS[function](cm)

        elif y_type.startswith("multiclass"):
            labels = np.unique(y_true)
            cm = clf_metrics.multilabel_confusion_matrix(
                y_true,
                y_pred,
                sample_weight=sample_weight,
                samplewise=None,
                labels=labels)
            val = {
                labels[i]: _FUNCTIONS[function](cm[i])
                for i in range(len(cm))
            }

        else:
            val = np.nan
            warnings.warn("%s could no be calculated undefined y_type %s" %
                          (function, y_type))

    return val
Ejemplo n.º 9
0
def negative_predictive_score(
    y_true: Union[Sequence[int], np.ndarray, pd.Series],
    y_pred: Union[Sequence[int], np.ndarray, pd.Series],
    problem: str = "Binary",
    positive_class: Union[str, int] = None,
) -> float:
    """Also known as problem II error score. Calculates the percentage of true negatives we correctly identified compared to
    the number of true negative and false negatives.

    Parameters
    ----------
    y_true: list or array like
        The true, or the expected, values of our problem
    y_pred: list or array like
        The predicted values of our problem
    problem: str, ['binary', 'multiclass'], default='binary'
        Whether our problem is a binary classification or a multiclassification problem
    positive_class: int or str, default=None
        If problem=='multiclass' then the class we are denoting as 'succcess' or 'positive' (i.e., the one marked as a 1).

    Returns
    -------
    The negative predictive score
    """
    problem_true, y_true, y_pred = _check_targets(y_true, y_pred)
    if problem.casefold() == "binary":
        tp, fp, fn, tn = get_classification_labels(y_true, y_pred)
    elif problem.casefold() == "multiclass":
        if positive_class:
            if isinstance(positive_class, str) or isinstance(
                    positive_class, int):
                new_y_true = np.where(y_true == positive_class, 1, 0)
                new_y_pred = np.where(y_pred == positive_class, 1, 0)
                tp, fp, fn, tn = get_classification_labels(
                    new_y_true, new_y_pred)
            else:
                raise Exception(
                    "Cannot discern positive class for multiclass problem")
        else:
            raise Exception(
                "Cannot calculate negative predictive score with None")
    else:
        raise ValueError("Cannot determine problem type")
    return tn / (tn + fn)
Ejemplo n.º 10
0
def specificity_score(
    y_true: Union[Sequence[int], np.ndarray, pd.Series],
    y_pred: Union[Sequence[int], np.ndarray, pd.Series],
    problem: str = "Binary",
    positive_class: Union[str, int] = None,
) -> float:
    """Calculates the specificity of a classification problem

    Parameters
    ----------
    y_true: list or array like
        The true, or the expected, values of our problem
    y_pred: list or array like
        The predicted values of our problem
    problem: {'binary', 'multiclass'}
        Whether our problem is a binary classification or a multiclassification problem
    positive_class: int or str, default=None
        If problem=='multiclass' then the class we are denoting as 'succcess' or 'positive' (i.e., the one marked as a 1).

    Returns
    -------
    The specificity score
    """
    problem_true, y_true, y_pred = _check_targets(y_true, y_pred)
    if problem.casefold() == "binary":
        tp, fp, fn, tn = get_classification_labels(y_true, y_pred)
    elif problem.casefold() == "multiclass":
        if positive_class:
            if isinstance(positive_class, str) or isinstance(
                    positive_class, int):
                new_y_true = np.where(y_true == positive_class, 1, 0)
                new_y_pred = np.where(y_pred == positive_class, 1, 0)
                tp, fp, fn, tn = get_classification_labels(
                    new_y_true, new_y_pred)
            else:
                raise TypeError(
                    "Cannot discern positive class for multiclass problem")
        else:
            raise ValueError("Cannot calculate specificity score with None")
    else:
        raise ValueError("Cannot determine problem type")
    return tn / (tn + fp)
def sensitivity_specificity_support(
        y_true,
        y_pred,
        *,
        labels=None,
        pos_label=1,
        average=None,
        warn_for=("sensitivity", "specificity"),
        sample_weight=None,
):
    """Compute sensitivity, specificity, and support for each class

    The sensitivity is the ratio ``tp / (tp + fn)`` where ``tp`` is the number
    of true positives and ``fn`` the number of false negatives. The sensitivity
    quantifies the ability to avoid false negatives_[1].

    The specificity is the ratio ``tn / (tn + fp)`` where ``tn`` is the number
    of true negatives and ``fn`` the number of false negatives. The specificity
    quantifies the ability to avoid false positives_[1].

    The support is the number of occurrences of each class in ``y_true``.

    If ``pos_label is None`` and in binary classification, this function
    returns the average sensitivity and specificity if ``average``
    is one of ``'weighted'``.

    Read more in the :ref:`User Guide <sensitivity_specificity>`.

    Parameters
    ----------
    y_true : ndarray of shape (n_samples,)
        Ground truth (correct) target values.

    y_pred : ndarray of shape (n_samples,)
        Estimated targets as returned by a classifier.

    labels : list, default=None
        The set of labels to include when ``average != 'binary'``, and their
        order if ``average is None``. Labels present in the data can be
        excluded, for example to calculate a multiclass average ignoring a
        majority negative class, while labels not present in the data will
        result in 0 components in a macro average. For multilabel targets,
        labels are column indices. By default, all labels in ``y_true`` and
        ``y_pred`` are used in sorted order.

    pos_label : str or int, default=1
        The class to report if ``average='binary'`` and the data is binary.
        If the data are multiclass, this will be ignored;
        setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
        scores for that label only.

    average : str, default=None
        If ``None``, the scores for each class are returned. Otherwise, this
        determines the type of averaging performed on the data:

        ``'binary'``:
            Only report results for the class specified by ``pos_label``.
            This is applicable only if targets (``y_{true,pred}``) are binary.
        ``'micro'``:
            Calculate metrics globally by counting the total true positives,
            false negatives and false positives.
        ``'macro'``:
            Calculate metrics for each label, and find their unweighted
            mean.  This does not take label imbalance into account.
        ``'weighted'``:
            Calculate metrics for each label, and find their average, weighted
            by support (the number of true instances for each label). This
            alters 'macro' to account for label imbalance; it can result in an
            F-score that is not between precision and recall.
        ``'samples'``:
            Calculate metrics for each instance, and find their average (only
            meaningful for multilabel classification where this differs from
            :func:`accuracy_score`).

    warn_for : tuple or set of {{"sensitivity", "specificity"}}, for internal use
        This determines which warnings will be made in the case that this
        function is being used to return only one of its metrics.

    sample_weight : ndarray of shape (n_samples,), default=None
        Sample weights.

    Returns
    -------
    sensitivity : float (if `average is None`) or ndarray of \
            shape (n_unique_labels,)
        The sensitivity metric.

    specificity : float (if `average is None`) or ndarray of \
            shape (n_unique_labels,)
        The specificity metric.

    support : int (if `average is None`) or ndarray of \
            shape (n_unique_labels,)
        The number of occurrences of each label in ``y_true``.

    References
    ----------
    .. [1] `Wikipedia entry for the Sensitivity and specificity
           <https://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_

    Examples
    --------
    >>> import numpy as np
    >>> from imblearn.metrics import sensitivity_specificity_support
    >>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig'])
    >>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog'])
    >>> sensitivity_specificity_support(y_true, y_pred, average='macro')
    (0.33333333333333331, 0.66666666666666663, None)
    >>> sensitivity_specificity_support(y_true, y_pred, average='micro')
    (0.33333333333333331, 0.66666666666666663, None)
    >>> sensitivity_specificity_support(y_true, y_pred, average='weighted')
    (0.33333333333333331, 0.66666666666666663, None)
    """
    average_options = (None, "micro", "macro", "weighted", "samples")
    if average not in average_options and average != "binary":
        raise ValueError("average has to be one of " + str(average_options))

    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
    present_labels = unique_labels(y_true, y_pred)

    if average == "binary":
        if y_type == "binary":
            if pos_label not in present_labels:
                if len(present_labels) < 2:
                    # Only negative labels
                    return (0.0, 0.0, 0)
                else:
                    raise ValueError("pos_label=%r is not a valid label: %r" %
                                     (pos_label, present_labels))
            labels = [pos_label]
        else:
            raise ValueError("Target is %s but average='binary'. Please "
                             "choose another average setting." % y_type)
    elif pos_label not in (None, 1):
        warnings.warn(
            "Note that pos_label (set to %r) is ignored when "
            "average != 'binary' (got %r). You may use "
            "labels=[pos_label] to specify a single positive class." %
            (pos_label, average),
            UserWarning,
        )

    if labels is None:
        labels = present_labels
        n_labels = None
    else:
        n_labels = len(labels)
        labels = np.hstack(
            [labels,
             np.setdiff1d(present_labels, labels, assume_unique=True)])

    # Calculate tp_sum, pred_sum, true_sum ###

    if y_type.startswith("multilabel"):
        raise ValueError("imblearn does not support multilabel")
    elif average == "samples":
        raise ValueError("Sample-based precision, recall, fscore is "
                         "not meaningful outside multilabel "
                         "classification. See the accuracy_score instead.")
    else:
        le = LabelEncoder()
        le.fit(labels)
        y_true = le.transform(y_true)
        y_pred = le.transform(y_pred)
        sorted_labels = le.classes_

        # labels are now from 0 to len(labels) - 1 -> use bincount
        tp = y_true == y_pred
        tp_bins = y_true[tp]
        if sample_weight is not None:
            tp_bins_weights = np.asarray(sample_weight)[tp]
        else:
            tp_bins_weights = None

        if len(tp_bins):
            tp_sum = np.bincount(tp_bins,
                                 weights=tp_bins_weights,
                                 minlength=len(labels))
        else:
            # Pathological case
            true_sum = pred_sum = tp_sum = np.zeros(len(labels))
        if len(y_pred):
            pred_sum = np.bincount(y_pred,
                                   weights=sample_weight,
                                   minlength=len(labels))
        if len(y_true):
            true_sum = np.bincount(y_true,
                                   weights=sample_weight,
                                   minlength=len(labels))

        # Compute the true negative
        tn_sum = y_true.size - (pred_sum + true_sum - tp_sum)

        # Retain only selected labels
        indices = np.searchsorted(sorted_labels, labels[:n_labels])
        tp_sum = tp_sum[indices]
        true_sum = true_sum[indices]
        pred_sum = pred_sum[indices]
        tn_sum = tn_sum[indices]

    if average == "micro":
        tp_sum = np.array([tp_sum.sum()])
        pred_sum = np.array([pred_sum.sum()])
        true_sum = np.array([true_sum.sum()])
        tn_sum = np.array([tn_sum.sum()])

    # Finally, we have all our sufficient statistics. Divide! #

    with np.errstate(divide="ignore", invalid="ignore"):
        # Divide, and on zero-division, set scores to 0 and warn:

        # Oddly, we may get an "invalid" rather than a "divide" error
        # here.
        specificity = _prf_divide(
            tn_sum,
            tn_sum + pred_sum - tp_sum,
            "specificity",
            "predicted",
            average,
            warn_for,
        )
        sensitivity = _prf_divide(tp_sum, true_sum, "sensitivity", "true",
                                  average, warn_for)

    # Average the results

    if average == "weighted":
        weights = true_sum
        if weights.sum() == 0:
            return 0, 0, None
    elif average == "samples":
        weights = sample_weight
    else:
        weights = None

    if average is not None:
        assert average != "binary" or len(specificity) == 1
        specificity = np.average(specificity, weights=weights)
        sensitivity = np.average(sensitivity, weights=weights)
        true_sum = None  # return no support

    return sensitivity, specificity, true_sum
Ejemplo n.º 12
0
def confusion_matrix(solution,
                     prediction,
                     labels=None,
                     weights=None,
                     normalize=None,
                     output_format='numpy_array'):
    """
        Computes confusion matrix for a given true and predicted targets
        Parameters:
            solution - true targets
            prediction - predicted targets
            labels - list of labels for which confusion matrix should be calculated
            weights - list of weights of each target
            normalize - should the output be normalized. Can take values {'true', 'pred', 'all'}
            output_format - output format of the matrix. Can take values {'python_list', 'numpy_array', 'pandas_dataframe'}
        TODO : Add dedicated confusion_matrix function to AbstractLearner
    """
    y_type, solution, prediction = _check_targets(solution, prediction)
    # Only binary and multiclass data is supported
    if y_type not in ("binary", "multiclass"):
        raise ValueError(f'{y_type} dataset is not currently supported')

    if labels is None:
        labels = unique_labels(solution, prediction)
    else:
        # Ensure that label contains only 1-D binary or multi-class array
        labels_type = type_of_target(labels)
        if labels_type not in ("binary", "multiclass"):
            raise ValueError(f'{labels_type} labels are not supported')
        labels = np.array(labels)

    if weights is None:
        weights = np.ones(solution.size, dtype=int)
    else:
        # Ensure that weights contains only 1-D integer or float array
        weights_type = type_of_target(weights)
        if weights_type not in ("binary", "multiclass", "continuous"):
            raise ValueError(f'{weights_type} weights are not supported')
        weights = np.array(weights)

    n_labels = labels.size
    if n_labels == 0:
        raise ValueError("Labels cannot be empty")
    elif (np.unique(labels)).size != n_labels:
        raise ValueError("Labels cannot have duplicates")

    if solution.size == 0 or prediction.size == 0:
        return np.zeros((n_labels, n_labels), dtype=int)

    label_to_index = {y: x for x, y in enumerate(labels)}

    check_consistent_length(solution, prediction, weights)

    # Invalidate indexes with target labels outside the accepted set of labels
    valid_indexes = np.logical_and(np.in1d(solution, labels),
                                   np.in1d(prediction, labels))
    solution = np.array(
        [label_to_index.get(i) for i in solution[valid_indexes]])
    prediction = np.array(
        [label_to_index.get(i) for i in prediction[valid_indexes]])
    weights = weights[valid_indexes]
    # For high precision
    matrix_dtype = np.int64 if weights.dtype.kind in {'i', 'u', 'b'
                                                      } else np.float64
    cm = coo_matrix((weights, (solution, prediction)),
                    shape=(n_labels, n_labels),
                    dtype=matrix_dtype).toarray()
    with np.errstate(all='ignore'):
        if normalize == 'true':
            cm = cm / cm.sum(axis=1, keepdims=True)
        elif normalize == 'pred':
            cm = cm / cm.sum(axis=0, keepdims=True)
        elif normalize == 'all':
            cm = cm / cm.sum()
        cm = np.nan_to_num(cm)
    if output_format == 'python_list':
        return cm.tolist()
    elif output_format == 'numpy_array':
        return cm
    elif output_format == 'pandas_dataframe':
        cm_df = pd.DataFrame(data=cm, index=labels, columns=labels)
        return cm_df
    else:
        return cm
Ejemplo n.º 13
0
def score_metrics(y_true, y_pred, metrics=['accuracy_score']):
    '''
  Scikit-learn compatibility API for Scorer usage.
  Evaluate the required score-metric using the Scorer object.

  Parameters
  ----------
    y_true : array-like
      List of true labels

    y_pred : array-like
      List of predicted labels

    metrics : str or array-like
      List of metric-names to evaluate

  Returns
  -------
    metrics : float or array-like
      The required metrics

  Example
  -------
  >>> from scorer import sklearn_api
  >>>
  >>> y_true = ['a', 'b', 'a', 'a', 'b', 'c', 'c', 'a', 'a', 'b', 'c', 'a']
  >>> y_pred = ['b', 'b', 'a', 'c', 'b', 'a', 'c', 'b', 'a', 'b', 'a', 'a']
  >>>
  >>> metrics = sklearn_api.score_metrics(y_true, y_pred, metrics='accuracy_score')

  Or you can use the scorer metrics inside a sklearn pipeline like

  Example
  -------
  >>> from scorer import sklearn_api
  >>> from sklearn.svm import SVC
  >>> from sklearn.metrics import make_scorer
  >>> from sklearn.model_selection import cross_val_score
  >>> from sklearn.datasets import load_iris
  >>>
  >>> X, y = load_iris(return_X_y=True)
  >>> clf = SVC(kernel='linear', C=1.)
  >>> my_scorer = make_scorer(sklearn_api.score_metrics, metrics='accuracy_score')
  >>>
  >>>  scores = cross_val_score(clf,  # classifier
  >>>                           X,  # training data
  >>>                           y,  # training labels
  >>>                           cv=5,  # split data randomly into 10 parts: 9 for training, 1 for scoring
  >>>                           scoring=my_scorer,  # which scoring metric?
  >>>                          )

  '''

    y_type, y_true, y_pred = _check_targets(y_true, y_pred)

    if y_type not in {'binary', 'multiclass'}:
        raise ValueError('{0} is not supported'.format(y_type))

    scorer = Scorer()
    available_metrics = scorer._get_available_metrics

    # convert str to iterable
    if isinstance(metrics, str):
        metrics = [metrics]

    # check metric params

    if not all(metric in available_metrics for metric in metrics):
        raise ValueError('score_metrics error: metric {0} not found. \
                      Available metrics are {1}'.format(
            metrics, ','.join(available_metrics)))

    scorer.evaluate(y_true, y_pred)

    metrics = [available_metrics[metric] for metric in metrics]
    results = [scorer[metric] for metric in metrics]

    return results if len(results) > 1 else results[0]
Ejemplo n.º 14
0
def check_type(actual, predicted):
    if actual is not None and predicted is not None:
        _check_targets(actual, predicted)
    else:
        raise MetricValueError('The inputs{0}{1} should not be none'.format(
            len(actual), len(predicted)))
Ejemplo n.º 15
0
def macro_averaged_mean_absolute_error(y_true, y_pred, *, sample_weight=None):
    """Compute Macro-Averaged Mean Absolute Error (MA-MAE)
    for imbalanced ordinal classification.

    This function computes each MAE for each class and average them,
    giving an equal weight to each class.

    Read more in the :ref:`User Guide <macro_averaged_mean_absolute_error>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated targets as returned by a classifier.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    Returns
    -------
    loss : float or ndarray of floats
        Macro-Averaged MAE output is non-negative floating point.
        The best value is 0.0.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.metrics import mean_absolute_error
    >>> from imblearn.metrics import macro_averaged_mean_absolute_error
    >>> y_true_balanced = [1, 1, 2, 2]
    >>> y_true_imbalanced = [1, 2, 2, 2]
    >>> y_pred = [1, 2, 1, 2]
    >>> mean_absolute_error(y_true_balanced, y_pred)
    0.5
    >>> mean_absolute_error(y_true_imbalanced, y_pred)
    0.25
    >>> macro_averaged_mean_absolute_error(y_true_balanced, y_pred)
    0.5
    >>> macro_averaged_mean_absolute_error(y_true_imbalanced, y_pred)
    0.16666666666666666
    """
    _, y_true, y_pred = _check_targets(y_true, y_pred)
    if sample_weight is not None:
        sample_weight = column_or_1d(sample_weight)
    else:
        sample_weight = np.ones(y_true.shape)
    check_consistent_length(y_true, y_pred, sample_weight)
    labels = unique_labels(y_true, y_pred)
    mae = []
    for possible_class in labels:
        indices = np.flatnonzero(y_true == possible_class)

        mae.append(
            mean_absolute_error(
                y_true[indices],
                y_pred[indices],
                sample_weight=sample_weight[indices],
            ))

    return np.sum(mae) / len(mae)