def matthews_corrcoef(y_true, y_pred, sample_weight=None): from sklearn.preprocessing import LabelEncoder from sklearn.metrics import confusion_matrix from sklearn.metrics._classification import _check_targets y_type, y_true, y_pred = _check_targets(y_true, y_pred) if y_type not in {'binary', 'multiclass'}: raise ValueError('%s is not supported' % y_type) lb = LabelEncoder() lb.fit(np.hstack([y_true, y_pred])) y_true = lb.transform(y_true) y_pred = lb.transform(y_pred) C = confusion_matrix(y_true, y_pred, sample_weight=sample_weight) t_sum = C.sum(axis=1) p_sum = C.sum(axis=0) n_correct = np.trace(C) n_samples = p_sum.sum() cov_ytyp = n_correct * n_samples - np.dot(t_sum, p_sum) cov_ypyp = n_samples ** 2 - np.dot(p_sum, p_sum) cov_ytyt = n_samples ** 2 - np.dot(t_sum, t_sum) mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp) if np.isnan(mcc): return 0.0 else: return mcc
def precision_score( y_true: Union[Sequence[int], np.ndarray, pd.Series], y_pred: Union[Sequence[int], np.ndarray, pd.Series], problem: str = "Binary", positive_class: Union[str, int] = None, ) -> float: problem_true, y_true, y_pred = _check_targets(y_true, y_pred) if problem.casefold() == "binary": tp, fp, fn, tn = get_classification_labels(y_true, y_pred) elif problem.casefold() == "multiclass": if positive_class: if isinstance(positive_class, str) or isinstance( positive_class, int): new_y_true = np.where(y_true == positive_class, 1, 0) new_y_pred = np.where(y_pred == positive_class, 1, 0) tp, fp, fn, tn = get_classification_labels( new_y_true, new_y_pred) else: raise Exception( "Cannot discern positive class for multiclass problem") else: raise Exception("Cannot calculate precision score with None") else: raise ValueError("Cannot determine problem type") return tp / (tp + fp)
def ClassPredictionErrorViz(self): y_type, y_true, y_pred = _check_targets(self.y_true, self.y_pred) if y_type not in ("binary", "multiclass"): raise YellowbrickValueError("{} is not supported".format(y_type)) # Get the indices of the unique labels indices = unique_labels(self.y_true, self.y_pred) labels = self.classes predictions_ = np.array([[ (self.y_pred[self.y_true == label_t] == label_p).sum() for label_p in indices ] for label_t in indices]) fig, ax = plt.subplots(ncols=1, nrows=1) legend_kws = {"bbox_to_anchor": (1.04, 0.5), "loc": "center left"} bar_stack( predictions_, ax, labels=list(self.classes), ticks=self.classes, legend_kws=legend_kws, ) # Set the title ax.set_title("Class Prediction Error for {}".format(self.name)) # Set the axes labels ax.set_xlabel("Actual Class") ax.set_ylabel("Number of Predicted Class") # Compute the ceiling for the y limit cmax = max([sum(predictions) for predictions in predictions_]) ax.set_ylim(0, cmax + cmax * 0.1) # Ensure the legend fits on the figure fig.tight_layout(rect=[0, 0, 0.90, 1]) fig.savefig(self.path_to_save + "/ClassPredictionError_" + self.name + ".pdf") return ax
def get_classification_labels( y_true: Union[Sequence[int], np.ndarray, pd.Series], y_pred: Union[Sequence[int], np.ndarray, pd.Series], ) -> Sequence[int, int, int, int]: """Calculates the true positive, false positive, false negative and true negative values for a classification problem. Parameters ---------- y_true: list or array like The true, or the expected, values of our problem y_pred: list or array like The predicted values of our problem Returns ------- The true positive, false positive, false negative and true negative values for our classification problem """ problem_true, y_true, y_pred = _check_targets(y_true, y_pred) if len(np.unique(y_true)) > 2: raise Exception("We have more than two classes for a Binary problem") if len(np.unique(y_pred)) > 2: raise Exception("We have more than two classes for a Binary problem") label_1 = sorted(np.unique(y_true))[1] label_0 = sorted(np.unique(y_true))[0] true_positive = len(np.where((y_true == label_1) & (y_pred == label_1))[0]) false_positive = len( np.where((y_true == label_0) & (y_pred == label_1))[0]) false_negative = len( np.where((y_true == label_1) * (y_pred == label_0))[0]) true_negative = len(np.where((y_true == label_0) & (y_pred == label_0))[0]) return true_positive, false_positive, false_negative, true_negative
def balanced_accuracy(solution, prediction): y_type, solution, prediction = _check_targets(solution, prediction) if y_type not in ["binary", "multiclass", 'multilabel-indicator']: raise ValueError(f"{y_type} is not supported") if y_type == 'binary': # Do not transform into any multiclass representation pass elif y_type == 'multiclass': n = len(solution) unique_sol, encoded_sol = np.unique(solution, return_inverse=True) unique_pred, encoded_pred = np.unique(prediction, return_inverse=True) classes = np.unique(np.concatenate((unique_sol, unique_pred))) map_sol = np.array([np.where(classes == c)[0][0] for c in unique_sol]) map_pred = np.array( [np.where(classes == c)[0][0] for c in unique_pred]) # one hot encoding sol_ohe = np.zeros((n, len(classes))) pred_ohe = np.zeros((n, len(classes))) sol_ohe[np.arange(n), map_sol[encoded_sol]] = 1 pred_ohe[np.arange(n), map_pred[encoded_pred]] = 1 solution = sol_ohe prediction = pred_ohe elif y_type == 'multilabel-indicator': solution = solution.toarray() prediction = prediction.toarray() else: raise NotImplementedError( f'bac_metric does not support task type {y_type}') fn = np.sum(np.multiply(solution, (1 - prediction)), axis=0, dtype=float) tp = np.sum(np.multiply(solution, prediction), axis=0, dtype=float) # Bounding to avoid division by 0 eps = 1e-15 tp = np.maximum(eps, tp) pos_num = np.maximum(eps, tp + fn) tpr = tp / pos_num # true positive rate (sensitivity) if y_type in ('binary', 'multilabel-indicator'): tn = np.sum(np.multiply((1 - solution), (1 - prediction)), axis=0, dtype=float) fp = np.sum(np.multiply((1 - solution), prediction), axis=0, dtype=float) tn = np.maximum(eps, tn) neg_num = np.maximum(eps, tn + fp) tnr = tn / neg_num # true negative rate (specificity) bac = 0.5 * (tpr + tnr) elif y_type == 'multiclass': bac = tpr else: raise ValueError(y_type) return np.mean(bac) # average over all classes
def _assert_binary(y1, y2=None): if y2 is None: y2 = y1 y_type, _, _ = _check_targets(y1, y2) if y_type != 'binary': raise ValueError('y_true and y_pred must be binary.')
def score(self, X, y): """ Generates a 2D array where each row is the count of the predicted classes and each column is the true class Parameters ---------- X : ndarray or DataFrame of shape n x m A matrix of n instances with m features y : ndarray or Series of length n An array or series of target or class values Returns ------- score_ : float Global accuracy score """ # Must be computed before calling super # We're relying on predict to raise NotFitted y_pred = self.predict(X) y_type, y_true, y_pred = _check_targets(y, y_pred) if y_type not in ("binary", "multiclass"): raise YellowbrickValueError("{} is not supported".format(y_type)) # Get the indices of the unique labels indices = unique_labels(y_true, y_pred) labels = self._labels() # Call super to compute self.score_ and verify classes try: super(ClassPredictionError, self).score(X, y) except ModelError as e: # raise visualizer-specific errors if labels is not None and len(labels) < len(indices): raise NotImplementedError( "filtering classes is currently not supported") else: raise e # Ensure all labels are used if labels is not None and len(labels) > len(indices): raise ModelError( "y and y_pred contain zero values for one of the specified classes" ) # Create a table of predictions whose rows are the true classes # and whose columns are the predicted classes; each element # is the count of predictions for that class that match the true # value of that class. self.predictions_ = np.array([[(y_pred[y == label_t] == label_p).sum() for label_p in indices] for label_t in indices]) self.draw() return self.score_
def _calc_score(function: str, y_true, y_pred, sample_weight=None, force_multilabel=False): """ Implement all scores above :param function: Name of the function, mapped in _FUNCTIONS :param y_true: :param y_pred: :param normalize: :param sample_weights: :return: """ _FUNCTIONS = { "brier_score": _bs_from_cm, "critical_success_index": _csi_from_cm, "peirce_skill_score": _pss_from_cm, "odds_ratio": _or_from_cm, "odds_ratio_skill_score": _orss_from_cm } y_type, y_true, y_pred = clf_metrics._check_targets(y_true, y_pred) if force_multilabel: labels = np.unique(y_true) cm = clf_metrics.multilabel_confusion_matrix( y_true, y_pred, sample_weight=sample_weight, samplewise=None, labels=labels) val = {labels[i]: _FUNCTIONS[function](cm[i]) for i in range(len(cm))} else: if y_type == "binary": cm = confusion_matrix(y_true, y_pred) val = _FUNCTIONS[function](cm) elif y_type.startswith("multiclass"): labels = np.unique(y_true) cm = clf_metrics.multilabel_confusion_matrix( y_true, y_pred, sample_weight=sample_weight, samplewise=None, labels=labels) val = { labels[i]: _FUNCTIONS[function](cm[i]) for i in range(len(cm)) } else: val = np.nan warnings.warn("%s could no be calculated undefined y_type %s" % (function, y_type)) return val
def negative_predictive_score( y_true: Union[Sequence[int], np.ndarray, pd.Series], y_pred: Union[Sequence[int], np.ndarray, pd.Series], problem: str = "Binary", positive_class: Union[str, int] = None, ) -> float: """Also known as problem II error score. Calculates the percentage of true negatives we correctly identified compared to the number of true negative and false negatives. Parameters ---------- y_true: list or array like The true, or the expected, values of our problem y_pred: list or array like The predicted values of our problem problem: str, ['binary', 'multiclass'], default='binary' Whether our problem is a binary classification or a multiclassification problem positive_class: int or str, default=None If problem=='multiclass' then the class we are denoting as 'succcess' or 'positive' (i.e., the one marked as a 1). Returns ------- The negative predictive score """ problem_true, y_true, y_pred = _check_targets(y_true, y_pred) if problem.casefold() == "binary": tp, fp, fn, tn = get_classification_labels(y_true, y_pred) elif problem.casefold() == "multiclass": if positive_class: if isinstance(positive_class, str) or isinstance( positive_class, int): new_y_true = np.where(y_true == positive_class, 1, 0) new_y_pred = np.where(y_pred == positive_class, 1, 0) tp, fp, fn, tn = get_classification_labels( new_y_true, new_y_pred) else: raise Exception( "Cannot discern positive class for multiclass problem") else: raise Exception( "Cannot calculate negative predictive score with None") else: raise ValueError("Cannot determine problem type") return tn / (tn + fn)
def specificity_score( y_true: Union[Sequence[int], np.ndarray, pd.Series], y_pred: Union[Sequence[int], np.ndarray, pd.Series], problem: str = "Binary", positive_class: Union[str, int] = None, ) -> float: """Calculates the specificity of a classification problem Parameters ---------- y_true: list or array like The true, or the expected, values of our problem y_pred: list or array like The predicted values of our problem problem: {'binary', 'multiclass'} Whether our problem is a binary classification or a multiclassification problem positive_class: int or str, default=None If problem=='multiclass' then the class we are denoting as 'succcess' or 'positive' (i.e., the one marked as a 1). Returns ------- The specificity score """ problem_true, y_true, y_pred = _check_targets(y_true, y_pred) if problem.casefold() == "binary": tp, fp, fn, tn = get_classification_labels(y_true, y_pred) elif problem.casefold() == "multiclass": if positive_class: if isinstance(positive_class, str) or isinstance( positive_class, int): new_y_true = np.where(y_true == positive_class, 1, 0) new_y_pred = np.where(y_pred == positive_class, 1, 0) tp, fp, fn, tn = get_classification_labels( new_y_true, new_y_pred) else: raise TypeError( "Cannot discern positive class for multiclass problem") else: raise ValueError("Cannot calculate specificity score with None") else: raise ValueError("Cannot determine problem type") return tn / (tn + fp)
def sensitivity_specificity_support( y_true, y_pred, *, labels=None, pos_label=1, average=None, warn_for=("sensitivity", "specificity"), sample_weight=None, ): """Compute sensitivity, specificity, and support for each class The sensitivity is the ratio ``tp / (tp + fn)`` where ``tp`` is the number of true positives and ``fn`` the number of false negatives. The sensitivity quantifies the ability to avoid false negatives_[1]. The specificity is the ratio ``tn / (tn + fp)`` where ``tn`` is the number of true negatives and ``fn`` the number of false negatives. The specificity quantifies the ability to avoid false positives_[1]. The support is the number of occurrences of each class in ``y_true``. If ``pos_label is None`` and in binary classification, this function returns the average sensitivity and specificity if ``average`` is one of ``'weighted'``. Read more in the :ref:`User Guide <sensitivity_specificity>`. Parameters ---------- y_true : ndarray of shape (n_samples,) Ground truth (correct) target values. y_pred : ndarray of shape (n_samples,) Estimated targets as returned by a classifier. labels : list, default=None The set of labels to include when ``average != 'binary'``, and their order if ``average is None``. Labels present in the data can be excluded, for example to calculate a multiclass average ignoring a majority negative class, while labels not present in the data will result in 0 components in a macro average. For multilabel targets, labels are column indices. By default, all labels in ``y_true`` and ``y_pred`` are used in sorted order. pos_label : str or int, default=1 The class to report if ``average='binary'`` and the data is binary. If the data are multiclass, this will be ignored; setting ``labels=[pos_label]`` and ``average != 'binary'`` will report scores for that label only. average : str, default=None If ``None``, the scores for each class are returned. Otherwise, this determines the type of averaging performed on the data: ``'binary'``: Only report results for the class specified by ``pos_label``. This is applicable only if targets (``y_{true,pred}``) are binary. ``'micro'``: Calculate metrics globally by counting the total true positives, false negatives and false positives. ``'macro'``: Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. ``'weighted'``: Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). This alters 'macro' to account for label imbalance; it can result in an F-score that is not between precision and recall. ``'samples'``: Calculate metrics for each instance, and find their average (only meaningful for multilabel classification where this differs from :func:`accuracy_score`). warn_for : tuple or set of {{"sensitivity", "specificity"}}, for internal use This determines which warnings will be made in the case that this function is being used to return only one of its metrics. sample_weight : ndarray of shape (n_samples,), default=None Sample weights. Returns ------- sensitivity : float (if `average is None`) or ndarray of \ shape (n_unique_labels,) The sensitivity metric. specificity : float (if `average is None`) or ndarray of \ shape (n_unique_labels,) The specificity metric. support : int (if `average is None`) or ndarray of \ shape (n_unique_labels,) The number of occurrences of each label in ``y_true``. References ---------- .. [1] `Wikipedia entry for the Sensitivity and specificity <https://en.wikipedia.org/wiki/Sensitivity_and_specificity>`_ Examples -------- >>> import numpy as np >>> from imblearn.metrics import sensitivity_specificity_support >>> y_true = np.array(['cat', 'dog', 'pig', 'cat', 'dog', 'pig']) >>> y_pred = np.array(['cat', 'pig', 'dog', 'cat', 'cat', 'dog']) >>> sensitivity_specificity_support(y_true, y_pred, average='macro') (0.33333333333333331, 0.66666666666666663, None) >>> sensitivity_specificity_support(y_true, y_pred, average='micro') (0.33333333333333331, 0.66666666666666663, None) >>> sensitivity_specificity_support(y_true, y_pred, average='weighted') (0.33333333333333331, 0.66666666666666663, None) """ average_options = (None, "micro", "macro", "weighted", "samples") if average not in average_options and average != "binary": raise ValueError("average has to be one of " + str(average_options)) y_type, y_true, y_pred = _check_targets(y_true, y_pred) present_labels = unique_labels(y_true, y_pred) if average == "binary": if y_type == "binary": if pos_label not in present_labels: if len(present_labels) < 2: # Only negative labels return (0.0, 0.0, 0) else: raise ValueError("pos_label=%r is not a valid label: %r" % (pos_label, present_labels)) labels = [pos_label] else: raise ValueError("Target is %s but average='binary'. Please " "choose another average setting." % y_type) elif pos_label not in (None, 1): warnings.warn( "Note that pos_label (set to %r) is ignored when " "average != 'binary' (got %r). You may use " "labels=[pos_label] to specify a single positive class." % (pos_label, average), UserWarning, ) if labels is None: labels = present_labels n_labels = None else: n_labels = len(labels) labels = np.hstack( [labels, np.setdiff1d(present_labels, labels, assume_unique=True)]) # Calculate tp_sum, pred_sum, true_sum ### if y_type.startswith("multilabel"): raise ValueError("imblearn does not support multilabel") elif average == "samples": raise ValueError("Sample-based precision, recall, fscore is " "not meaningful outside multilabel " "classification. See the accuracy_score instead.") else: le = LabelEncoder() le.fit(labels) y_true = le.transform(y_true) y_pred = le.transform(y_pred) sorted_labels = le.classes_ # labels are now from 0 to len(labels) - 1 -> use bincount tp = y_true == y_pred tp_bins = y_true[tp] if sample_weight is not None: tp_bins_weights = np.asarray(sample_weight)[tp] else: tp_bins_weights = None if len(tp_bins): tp_sum = np.bincount(tp_bins, weights=tp_bins_weights, minlength=len(labels)) else: # Pathological case true_sum = pred_sum = tp_sum = np.zeros(len(labels)) if len(y_pred): pred_sum = np.bincount(y_pred, weights=sample_weight, minlength=len(labels)) if len(y_true): true_sum = np.bincount(y_true, weights=sample_weight, minlength=len(labels)) # Compute the true negative tn_sum = y_true.size - (pred_sum + true_sum - tp_sum) # Retain only selected labels indices = np.searchsorted(sorted_labels, labels[:n_labels]) tp_sum = tp_sum[indices] true_sum = true_sum[indices] pred_sum = pred_sum[indices] tn_sum = tn_sum[indices] if average == "micro": tp_sum = np.array([tp_sum.sum()]) pred_sum = np.array([pred_sum.sum()]) true_sum = np.array([true_sum.sum()]) tn_sum = np.array([tn_sum.sum()]) # Finally, we have all our sufficient statistics. Divide! # with np.errstate(divide="ignore", invalid="ignore"): # Divide, and on zero-division, set scores to 0 and warn: # Oddly, we may get an "invalid" rather than a "divide" error # here. specificity = _prf_divide( tn_sum, tn_sum + pred_sum - tp_sum, "specificity", "predicted", average, warn_for, ) sensitivity = _prf_divide(tp_sum, true_sum, "sensitivity", "true", average, warn_for) # Average the results if average == "weighted": weights = true_sum if weights.sum() == 0: return 0, 0, None elif average == "samples": weights = sample_weight else: weights = None if average is not None: assert average != "binary" or len(specificity) == 1 specificity = np.average(specificity, weights=weights) sensitivity = np.average(sensitivity, weights=weights) true_sum = None # return no support return sensitivity, specificity, true_sum
def confusion_matrix(solution, prediction, labels=None, weights=None, normalize=None, output_format='numpy_array'): """ Computes confusion matrix for a given true and predicted targets Parameters: solution - true targets prediction - predicted targets labels - list of labels for which confusion matrix should be calculated weights - list of weights of each target normalize - should the output be normalized. Can take values {'true', 'pred', 'all'} output_format - output format of the matrix. Can take values {'python_list', 'numpy_array', 'pandas_dataframe'} TODO : Add dedicated confusion_matrix function to AbstractLearner """ y_type, solution, prediction = _check_targets(solution, prediction) # Only binary and multiclass data is supported if y_type not in ("binary", "multiclass"): raise ValueError(f'{y_type} dataset is not currently supported') if labels is None: labels = unique_labels(solution, prediction) else: # Ensure that label contains only 1-D binary or multi-class array labels_type = type_of_target(labels) if labels_type not in ("binary", "multiclass"): raise ValueError(f'{labels_type} labels are not supported') labels = np.array(labels) if weights is None: weights = np.ones(solution.size, dtype=int) else: # Ensure that weights contains only 1-D integer or float array weights_type = type_of_target(weights) if weights_type not in ("binary", "multiclass", "continuous"): raise ValueError(f'{weights_type} weights are not supported') weights = np.array(weights) n_labels = labels.size if n_labels == 0: raise ValueError("Labels cannot be empty") elif (np.unique(labels)).size != n_labels: raise ValueError("Labels cannot have duplicates") if solution.size == 0 or prediction.size == 0: return np.zeros((n_labels, n_labels), dtype=int) label_to_index = {y: x for x, y in enumerate(labels)} check_consistent_length(solution, prediction, weights) # Invalidate indexes with target labels outside the accepted set of labels valid_indexes = np.logical_and(np.in1d(solution, labels), np.in1d(prediction, labels)) solution = np.array( [label_to_index.get(i) for i in solution[valid_indexes]]) prediction = np.array( [label_to_index.get(i) for i in prediction[valid_indexes]]) weights = weights[valid_indexes] # For high precision matrix_dtype = np.int64 if weights.dtype.kind in {'i', 'u', 'b' } else np.float64 cm = coo_matrix((weights, (solution, prediction)), shape=(n_labels, n_labels), dtype=matrix_dtype).toarray() with np.errstate(all='ignore'): if normalize == 'true': cm = cm / cm.sum(axis=1, keepdims=True) elif normalize == 'pred': cm = cm / cm.sum(axis=0, keepdims=True) elif normalize == 'all': cm = cm / cm.sum() cm = np.nan_to_num(cm) if output_format == 'python_list': return cm.tolist() elif output_format == 'numpy_array': return cm elif output_format == 'pandas_dataframe': cm_df = pd.DataFrame(data=cm, index=labels, columns=labels) return cm_df else: return cm
def score_metrics(y_true, y_pred, metrics=['accuracy_score']): ''' Scikit-learn compatibility API for Scorer usage. Evaluate the required score-metric using the Scorer object. Parameters ---------- y_true : array-like List of true labels y_pred : array-like List of predicted labels metrics : str or array-like List of metric-names to evaluate Returns ------- metrics : float or array-like The required metrics Example ------- >>> from scorer import sklearn_api >>> >>> y_true = ['a', 'b', 'a', 'a', 'b', 'c', 'c', 'a', 'a', 'b', 'c', 'a'] >>> y_pred = ['b', 'b', 'a', 'c', 'b', 'a', 'c', 'b', 'a', 'b', 'a', 'a'] >>> >>> metrics = sklearn_api.score_metrics(y_true, y_pred, metrics='accuracy_score') Or you can use the scorer metrics inside a sklearn pipeline like Example ------- >>> from scorer import sklearn_api >>> from sklearn.svm import SVC >>> from sklearn.metrics import make_scorer >>> from sklearn.model_selection import cross_val_score >>> from sklearn.datasets import load_iris >>> >>> X, y = load_iris(return_X_y=True) >>> clf = SVC(kernel='linear', C=1.) >>> my_scorer = make_scorer(sklearn_api.score_metrics, metrics='accuracy_score') >>> >>> scores = cross_val_score(clf, # classifier >>> X, # training data >>> y, # training labels >>> cv=5, # split data randomly into 10 parts: 9 for training, 1 for scoring >>> scoring=my_scorer, # which scoring metric? >>> ) ''' y_type, y_true, y_pred = _check_targets(y_true, y_pred) if y_type not in {'binary', 'multiclass'}: raise ValueError('{0} is not supported'.format(y_type)) scorer = Scorer() available_metrics = scorer._get_available_metrics # convert str to iterable if isinstance(metrics, str): metrics = [metrics] # check metric params if not all(metric in available_metrics for metric in metrics): raise ValueError('score_metrics error: metric {0} not found. \ Available metrics are {1}'.format( metrics, ','.join(available_metrics))) scorer.evaluate(y_true, y_pred) metrics = [available_metrics[metric] for metric in metrics] results = [scorer[metric] for metric in metrics] return results if len(results) > 1 else results[0]
def check_type(actual, predicted): if actual is not None and predicted is not None: _check_targets(actual, predicted) else: raise MetricValueError('The inputs{0}{1} should not be none'.format( len(actual), len(predicted)))
def macro_averaged_mean_absolute_error(y_true, y_pred, *, sample_weight=None): """Compute Macro-Averaged Mean Absolute Error (MA-MAE) for imbalanced ordinal classification. This function computes each MAE for each class and average them, giving an equal weight to each class. Read more in the :ref:`User Guide <macro_averaged_mean_absolute_error>`. Parameters ---------- y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) Estimated targets as returned by a classifier. sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns ------- loss : float or ndarray of floats Macro-Averaged MAE output is non-negative floating point. The best value is 0.0. Examples -------- >>> import numpy as np >>> from sklearn.metrics import mean_absolute_error >>> from imblearn.metrics import macro_averaged_mean_absolute_error >>> y_true_balanced = [1, 1, 2, 2] >>> y_true_imbalanced = [1, 2, 2, 2] >>> y_pred = [1, 2, 1, 2] >>> mean_absolute_error(y_true_balanced, y_pred) 0.5 >>> mean_absolute_error(y_true_imbalanced, y_pred) 0.25 >>> macro_averaged_mean_absolute_error(y_true_balanced, y_pred) 0.5 >>> macro_averaged_mean_absolute_error(y_true_imbalanced, y_pred) 0.16666666666666666 """ _, y_true, y_pred = _check_targets(y_true, y_pred) if sample_weight is not None: sample_weight = column_or_1d(sample_weight) else: sample_weight = np.ones(y_true.shape) check_consistent_length(y_true, y_pred, sample_weight) labels = unique_labels(y_true, y_pred) mae = [] for possible_class in labels: indices = np.flatnonzero(y_true == possible_class) mae.append( mean_absolute_error( y_true[indices], y_pred[indices], sample_weight=sample_weight[indices], )) return np.sum(mae) / len(mae)