def get_score(self, labels: Union[List, np.ndarray, pd.Series], predictions: Union[List, np.ndarray, pd.Series]) -> float: """ The Theil index is the generalized entropy index with :math:`\\alpha = 1`. See Generalized Entropy index. Parameters ---------- labels: Union[List, np.ndarray, pd.Series] Binary ground truth labels for the provided dataset (0/1). predictions: Union[List, np.ndarray, pd.Series] Binary predictions from some black-box classifier (0/1). Returns ---------- Theil Index of the classifier. """ check_input_type(labels) check_input_type(predictions) # Check input shape check_input_shape(labels) check_input_shape(predictions) # Check input content check_binary(labels) check_binary(predictions) # Check the actual contents of the arrays check_elementwise_input_type(labels) check_elementwise_input_type(predictions) # Check that our arrays are all the same length check_true( len(labels) == len(predictions), AssertionError( "Shapes of inputs do not match. You supplied labels :" f"{len(labels)} and predictions: {len(predictions)}")) # Convert lists y_pred = check_and_convert_list_types(predictions) y_true = check_and_convert_list_types(labels) y_pred = (y_pred == self.positive_label_name).astype(np.float64) y_true = (y_true == self.positive_label_name).astype(np.float64) b = 1 + y_pred - y_true return float(np.mean(np.log((b / np.mean(b))**b) / np.mean(b)))
def _check_input_mitigation(labels, predictions, likelihoods, is_member): """ Check the following aspects: 1) whether input is or can be converted to numpy arrays 2) labels, predictions and is_member need to be binary 3) likelihoods need to be between 0 and 1 4) all arrays need to be of the same length """ if labels is not None: # Check labels can be converted to numpy array msg = "Input type not allowed for {name}, allowed are numpy array, Pandas Series, or lists." labels = check_or_convert_numpy_array(labels, msg) # Check labels are binary check_binary(labels) # Check predictions msg = "Input type not allowed for predictions, allowed are numpy array, Pandas Series, or lists." predictions = check_or_convert_numpy_array(predictions, msg) # Check predictions type check_binary(predictions) # Check likelihoods msg = "Input type not allowed for likelihoods, allowed are numpy array, Pandas Series, or lists." likelihoods = check_or_convert_numpy_array(likelihoods, msg) # Check likelihoods between 0 and 1 check_true(all(likelihoods >= 0) and all(likelihoods <= 1), ValueError("Likelihood can be only between 0 and 1.")) # Check is_member msg = "Input type not allowed for is_member, allowed are numpy array, Pandas Series, or lists." is_member = check_or_convert_numpy_array(is_member, msg) # Check predictions type check_binary(is_member) # Check shapes match if labels is not None: check_true(labels.shape[0] == predictions.shape[0], InputShapeError("", "Input shapes do not match.")) check_true(predictions.shape[0] == likelihoods.shape[0] and likelihoods.shape[0] == is_member.shape[0], InputShapeError("", "Input shapes do not match.")) return labels, predictions, likelihoods, is_member
def get_score(self, labels: Union[List, np.ndarray, pd.Series], predictions: Union[List, np.ndarray, pd.Series], alpha: float = 2) -> float: """Generalized entropy index is proposed as a unified individual and group fairness measure in [3]_. With :math:`b_i = \\hat{y}_i - y_i + 1`: .. math:: \\mathcal{E}(\\alpha) = \\begin{cases} \\frac{1}{n \\alpha (\\alpha-1)}\\sum_{i=1}^n\\left[\\left(\\frac{b_i}{\\mu}\\right)^\\alpha - 1\\right] & \\alpha \\ne 0, 1, \\\\ \\frac{1}{n}\\sum_{i=1}^n\\frac{b_{i}}{\\mu}\\ln\\frac{b_{i}}{\\mu} & \\alpha=1, \\\\ -\\frac{1}{n}\\sum_{i=1}^n\\ln\\frac{b_{i}}{\\mu},& \\alpha=0. \\end{cases} Parameters ---------- labels: Union[List, np.ndarray, pd.Series] Binary ground truth labels for the provided dataset (0/1). predictions: Union[List, np.ndarray, pd.Series] Binary predictions from some black-box classifier (0/1). alpha: float Parameter that regulates weight given to distances between values at different parts of the distribution. Default value is 2. Returns ---------- General Entropy Index of the classifier. References: ---------- .. [3] T. Speicher, H. Heidari, N. Grgic-Hlaca, K. P. Gummadi, A. Singla, A. Weller, and M. B. Zafar, A Unified Approach to Quantifying Algorithmic Unfairness: Measuring Individual and Group Unfairness via Inequality Indices, ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, 2018. """ # Check input types check_input_type(labels) check_input_type(predictions) # Check input shapes check_input_shape(labels) check_input_shape(predictions) # Check input content check_binary(labels) check_binary(predictions) # Check the actual contents of the arrays check_elementwise_input_type(labels) check_elementwise_input_type(predictions) # Check that our arrays are all the same length check_true( len(labels) == len(predictions), AssertionError( "Shapes of inputs do not match. You supplied labels :" f"{len(labels)} and predictions: {len(predictions)}")) # Convert y_pred = check_and_convert_list_types(predictions) y_true = check_and_convert_list_types(labels) y_pred = (y_pred == self.positive_label_name).astype(np.float64) y_true = (y_true == self.positive_label_name).astype(np.float64) b = 1 + y_pred - y_true if alpha == 1: # moving the b inside the log allows for 0 values return float(np.mean(np.log((b / np.mean(b))**b) / np.mean(b))) elif alpha == 0: return -np.mean(np.log(b / np.mean(b))) else: return np.mean((b / np.mean(b))**alpha - 1) / (alpha * (alpha - 1))
def test_check_binary_valid(self): arr = np.array([1, 2, 1, 2]) check_binary(arr)
def test_check_binary_invalid(self): arr = np.array([1, 2, 1, 3]) with self.assertRaises(ValueError): check_binary(arr)
def test_check_binary_more_than_two_values(self): with self.assertRaises(ValueError): check_binary([1, 2, 3])
def test_check_binary_invalid_type(self): with self.assertWarns(UserWarning): with self.assertRaises(TypeError): check_binary({1, 2})
def test_check_binary_df(self): check_binary(pd.DataFrame.from_dict({'a': [1, 2, 2]})['a'])
def test_check_binary_np(self): check_binary(np.array([1, 2, 2]))
def test_check_binary_list(self): check_binary([1, 2, 2])