def get_score(self, labels: Union[List, np.ndarray, pd.Series],
                  predictions: Union[List, np.ndarray, pd.Series]) -> float:
        """
        The Theil index is the generalized entropy index with :math:`\\alpha = 1`.
        See Generalized Entropy index.

        Parameters
        ----------
        labels: Union[List, np.ndarray, pd.Series]
            Binary ground truth labels for the provided dataset (0/1).
        predictions: Union[List, np.ndarray, pd.Series]
            Binary predictions from some black-box classifier (0/1).

        Returns
        ----------
        Theil Index of the classifier.
        """
        check_input_type(labels)
        check_input_type(predictions)

        # Check input shape
        check_input_shape(labels)
        check_input_shape(predictions)

        # Check input content
        check_binary(labels)
        check_binary(predictions)

        # Check the actual contents of the arrays
        check_elementwise_input_type(labels)
        check_elementwise_input_type(predictions)

        # Check that our arrays are all the same length
        check_true(
            len(labels) == len(predictions),
            AssertionError(
                "Shapes of inputs do not match. You supplied labels :"
                f"{len(labels)} and predictions: {len(predictions)}"))

        # Convert lists
        y_pred = check_and_convert_list_types(predictions)
        y_true = check_and_convert_list_types(labels)

        y_pred = (y_pred == self.positive_label_name).astype(np.float64)
        y_true = (y_true == self.positive_label_name).astype(np.float64)

        b = 1 + y_pred - y_true

        return float(np.mean(np.log((b / np.mean(b))**b) / np.mean(b)))
    def _check_input_mitigation(labels, predictions, likelihoods, is_member):
        """
        Check the following aspects:
        1) whether input is or can be converted to numpy arrays
        2) labels, predictions and is_member need to be binary
        3) likelihoods need to be between 0 and 1
        4) all arrays need to be of the same length
        """

        if labels is not None:
            # Check labels can be converted to numpy array
            msg = "Input type not allowed for {name}, allowed are numpy array, Pandas Series, or lists."
            labels = check_or_convert_numpy_array(labels, msg)
            # Check labels are binary
            check_binary(labels)

        # Check predictions
        msg = "Input type not allowed for predictions, allowed are numpy array, Pandas Series, or lists."
        predictions = check_or_convert_numpy_array(predictions, msg)

        # Check predictions type
        check_binary(predictions)

        # Check likelihoods
        msg = "Input type not allowed for likelihoods, allowed are numpy array, Pandas Series, or lists."
        likelihoods = check_or_convert_numpy_array(likelihoods, msg)

        # Check likelihoods between 0 and 1
        check_true(all(likelihoods >= 0) and all(likelihoods <= 1),
                   ValueError("Likelihood can be only between 0 and 1."))

        # Check is_member
        msg = "Input type not allowed for is_member, allowed are numpy array, Pandas Series, or lists."
        is_member = check_or_convert_numpy_array(is_member, msg)

        # Check predictions type
        check_binary(is_member)

        # Check shapes match
        if labels is not None:
            check_true(labels.shape[0] == predictions.shape[0], InputShapeError("", "Input shapes do not match."))

        check_true(predictions.shape[0] == likelihoods.shape[0] and
                   likelihoods.shape[0] == is_member.shape[0],
                   InputShapeError("", "Input shapes do not match."))

        return labels, predictions, likelihoods, is_member
Exemple #3
0
    def get_score(self,
                  labels: Union[List, np.ndarray, pd.Series],
                  predictions: Union[List, np.ndarray, pd.Series],
                  alpha: float = 2) -> float:
        """Generalized entropy index is proposed as a unified individual and group fairness measure in [3]_.
        With :math:`b_i = \\hat{y}_i - y_i + 1`:

        .. math::

           \\mathcal{E}(\\alpha) = \\begin{cases}
              \\frac{1}{n \\alpha (\\alpha-1)}\\sum_{i=1}^n\\left[\\left(\\frac{b_i}{\\mu}\\right)^\\alpha - 1\\right] &
              \\alpha \\ne 0, 1, \\\\
              \\frac{1}{n}\\sum_{i=1}^n\\frac{b_{i}}{\\mu}\\ln\\frac{b_{i}}{\\mu} & \\alpha=1, \\\\
            -\\frac{1}{n}\\sum_{i=1}^n\\ln\\frac{b_{i}}{\\mu},& \\alpha=0.
            \\end{cases}

        Parameters
        ----------
        labels: Union[List, np.ndarray, pd.Series]
            Binary ground truth labels for the provided dataset (0/1).
        predictions: Union[List, np.ndarray, pd.Series]
            Binary predictions from some black-box classifier (0/1).
        alpha: float
            Parameter that regulates weight given to distances between values at different parts of the distribution.
            Default value is 2.

        Returns
        ----------
        General Entropy Index of the classifier.

        References:
        ----------
            .. [3] T. Speicher, H. Heidari, N. Grgic-Hlaca, K. P. Gummadi, A. Singla, A. Weller, and M. B. Zafar,
             A Unified Approach to Quantifying Algorithmic Unfairness: Measuring Individual and Group Unfairness via
             Inequality Indices, ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, 2018.
        """

        # Check input types
        check_input_type(labels)
        check_input_type(predictions)

        # Check input shapes
        check_input_shape(labels)
        check_input_shape(predictions)

        # Check input content
        check_binary(labels)
        check_binary(predictions)

        # Check the actual contents of the arrays
        check_elementwise_input_type(labels)
        check_elementwise_input_type(predictions)

        # Check that our arrays are all the same length
        check_true(
            len(labels) == len(predictions),
            AssertionError(
                "Shapes of inputs do not match. You supplied labels :"
                f"{len(labels)} and predictions: {len(predictions)}"))

        # Convert
        y_pred = check_and_convert_list_types(predictions)
        y_true = check_and_convert_list_types(labels)

        y_pred = (y_pred == self.positive_label_name).astype(np.float64)
        y_true = (y_true == self.positive_label_name).astype(np.float64)

        b = 1 + y_pred - y_true

        if alpha == 1:
            # moving the b inside the log allows for 0 values
            return float(np.mean(np.log((b / np.mean(b))**b) / np.mean(b)))
        elif alpha == 0:
            return -np.mean(np.log(b / np.mean(b)))
        else:
            return np.mean((b / np.mean(b))**alpha - 1) / (alpha * (alpha - 1))
 def test_check_binary_valid(self):
     arr = np.array([1, 2, 1, 2])
     check_binary(arr)
 def test_check_binary_invalid(self):
     arr = np.array([1, 2, 1, 3])
     with self.assertRaises(ValueError):
         check_binary(arr)
 def test_check_binary_more_than_two_values(self):
     with self.assertRaises(ValueError):
         check_binary([1, 2, 3])
 def test_check_binary_invalid_type(self):
     with self.assertWarns(UserWarning):
         with self.assertRaises(TypeError):
             check_binary({1, 2})
 def test_check_binary_df(self):
     check_binary(pd.DataFrame.from_dict({'a': [1, 2, 2]})['a'])
 def test_check_binary_np(self):
     check_binary(np.array([1, 2, 2]))
Exemple #10
0
 def test_check_binary_list(self):
     check_binary([1, 2, 2])