def test_fairness_quick_start_example(self): # Data binary_predictions = [1, 1, 0, 1, 0, 0] multi_class_predictions = ["a", "b", "c", "b", "a", "a"] multi_class_multi_label_predictions = [["a", "b"], ["b", "c"], ["b"], ["a", "b"], ["c", "a"], ["c"]] is_member = [0, 0, 0, 1, 1, 1] classes = ["a", "b", "c"] # Metric (see also other available metrics) metric = BinaryFairnessMetrics.StatisticalParity() multi_metric = MultiClassFairnessMetrics.StatisticalParity( list_of_classes=classes) # Score binary_score = metric.get_score(binary_predictions, is_member) multi_scores = multi_metric.get_scores(multi_class_predictions, is_member) multi_label_scores = multi_metric.get_scores( multi_class_multi_label_predictions, is_member) # Results self.assertEqual( metric.description, "Measures the difference in statistical parity between two groups") self.assertEqual(metric.lower_bound, -0.2) self.assertEqual(metric.upper_bound, 0.2) self.assertEqual(metric.ideal_value, 0) self.assertEqual(binary_score, -0.3333333333333333) self.assertListEqual(multi_scores, [0.3333333333333333, 0.0, -0.3333333333333333]) self.assertListEqual( multi_label_scores, [0.3333333333333333, -0.6666666666666667, 0.3333333333333333])
def test_stat_parity_multi_label(self): # Group membership is_member = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0] # Predictions - 3 classes y_pred = [['a', 'b'], ['b', 'c'], ['b'], ['a', 'b'], ['c', 'a'], ['c'], ['a', 'b'], [], ['a', 'b'], ['c']] # classes for multi-class classification classes = ['a', 'b', 'c'] # Multiclass Fairness Metric multi_metric = MultiClassFairnessMetrics.StatisticalParity( list_of_classes=classes) result = multi_metric.get_scores(y_pred, is_member) one_hot = multi_metric._one_hot_encode_classes(y_pred) binary_metric = BinaryFairnessMetrics.StatisticalParity() assert np.isclose(binary_metric.get_score(one_hot['a'], is_member), result[0], atol=0.001) assert np.isclose(binary_metric.get_score(one_hot['b'], is_member), result[1], atol=0.001) assert np.isclose(binary_metric.get_score(one_hot['c'], is_member), result[2], atol=0.001)
def test_stat_parity_normal_list(self): # Group membership is_member = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0] # Predictions - 3 classes y_pred = ['a', 'b', 'b', 'a', 'c', 'c', 'a', 'b', 'a', 'c'] # classes for multi-class classification classes = ['a', 'b', 'c'] # Multiclass Fairness Metric multi_metric = MultiClassFairnessMetrics.StatisticalParity( list_of_classes=classes) result = multi_metric.get_scores(y_pred, is_member) # get one-hot encoded 0-1 like arrays for each class y_pred_a = convert_one_vs_rest('a', y_pred) y_pred_b = convert_one_vs_rest('b', y_pred) y_pred_c = convert_one_vs_rest('c', y_pred) # create a binary metric to test whether binary and multiclass give the same output binary_metric = BinaryFairnessMetrics.StatisticalParity() assert binary_metric.get_score(y_pred_a, is_member) == result[0] assert binary_metric.get_score(y_pred_b, is_member) == result[1] assert binary_metric.get_score(y_pred_c, is_member) == result[2]
def test_stat_parity_edge_2(self): # Data: edge case stat parity == -1 y_pred = np.array([1, 1, 1, 1, 0, 0, 0, 0]) is_member = np.array([0, 0, 0, 0, 1, 1, 1, 1]) # Metric metric = BinaryFairnessMetrics.StatisticalParity() score = metric.get_score(y_pred, is_member) assert score == -1
def test_stat_parity_invalid_np(self): # Data y_pred = np.array([1, 1, 0, 1, 0]) is_member = np.array([0, 0, 0, 1, 1, 1]) # Metric metric = BinaryFairnessMetrics.StatisticalParity() # Score with self.assertRaises(InputShapeError): metric.get_score(y_pred, is_member)
def test_stat_parity_normal_list(self): # Data y_pred = [1, 1, 0, 1, 0, 0] is_member = [0, 0, 0, 1, 1, 1] # Metric metric = BinaryFairnessMetrics.StatisticalParity() # Score score = metric.get_score(y_pred, is_member) assert np.isclose(score, -0.3333, atol=0.001)
def test_stat_parity_invalid_df(self): # Data my_df = pd.DataFrame.from_dict({ 'y_pred': [1, 1, 0, 1, 0, 2], 'is_member': [0, 0, 0, 1, 1, 1] }) # Metric metric = BinaryFairnessMetrics.StatisticalParity() # Score with self.assertRaises(ValueError): metric.get_score(my_df['y_pred'], my_df['is_member'])
def test_binary_matches_multiclass_stat_parity(self): binary_predictions = [0, 1, 0, 0, 1, 1] is_member = [0, 1, 1, 0, 0, 1] metric = BinaryFairnessMetrics.StatisticalParity() score = metric.get_score(binary_predictions, is_member) classes = [0, 1] multi_metric = MultiClassFairnessMetrics.StatisticalParity( list_of_classes=classes) multi_score = multi_metric.get_scores(binary_predictions, is_member) assert score == multi_score[1]
def test_all_scores_multi_class_multi_label_stat_parity(self): # Group membership is_member = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0] # Predictions - 3 classes y_pred = [['a', 'b'], ['b', 'c'], ['b'], ['a', 'b'], ['c', 'a'], ['c'], ['a', 'b'], [], ['a', 'b'], ['c']] # Classes for multi-class classification classes = ['a', 'b', 'c'] # Multiclass Fairness Metric all_scores = MultiClassFairnessMetrics.get_all_scores( predictions=y_pred, is_member=is_member, list_of_classes=classes) all_scores = all_scores.reset_index() # Get one-hot encoded 0-1 like arrays for each class mlb = MultiLabelBinarizer(classes=classes) predictions = pd.Series(y_pred) one_hot = pd.DataFrame( mlb.fit_transform(predictions), columns=mlb.classes_, index=predictions.index, ) y_pred_a = one_hot['a'] y_pred_b = one_hot['b'] y_pred_c = one_hot['c'] # Create a binary metric to test whether binary and multiclass give the same output # Statistical parity binary_metric = BinaryFairnessMetrics.StatisticalParity() binary_score_a = binary_metric.get_score(y_pred_a, is_member) binary_score_b = binary_metric.get_score(y_pred_b, is_member) binary_score_c = binary_metric.get_score(y_pred_c, is_member) multi_score_a = all_scores.loc[all_scores['Metric'] == 'Statistical Parity']['a'].values[0] multi_score_b = all_scores.loc[all_scores['Metric'] == 'Statistical Parity']['b'].values[0] multi_score_c = all_scores.loc[all_scores['Metric'] == 'Statistical Parity']['c'].values[0] self.assertAlmostEqual(binary_score_a, multi_score_a) self.assertAlmostEqual(binary_score_b, multi_score_b) self.assertAlmostEqual(binary_score_c, multi_score_c)
def test_stat_parity_normal_df(self): # Data my_df = pd.DataFrame.from_dict({ 'y_pred': [1, 1, 0, 1, 0, 0], 'is_member': [0, 0, 0, 1, 1, 1] }) # Metric metric = BinaryFairnessMetrics.StatisticalParity() # Score score = metric.get_score(my_df['y_pred'], my_df['is_member']) assert np.isclose(score, -0.3333, atol=0.001)
def test_all_scores_multi_class_stat_parity(self): # Group membership is_member = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0] # Predictions - 3 classes y_pred = ['a', 'b', 'b', 'a', 'a', 'c', 'a', 'a', 'c', 'b'] # classes for multi-class classification classes = ['a', 'b', 'c'] # Multiclass Fairness Metric all_scores = MultiClassFairnessMetrics.get_all_scores( predictions=y_pred, is_member=is_member, list_of_classes=classes) all_scores = all_scores.reset_index() # Get one-hot encoded 0-1 like arrays for each class y_pred_a = convert_one_vs_rest('a', y_pred) y_pred_b = convert_one_vs_rest('b', y_pred) y_pred_c = convert_one_vs_rest('c', y_pred) # Create a binary metric to test whether binary and multiclass give the same output # Statistical parity binary_metric = BinaryFairnessMetrics.StatisticalParity() binary_score_a = binary_metric.get_score(y_pred_a, is_member) binary_score_b = binary_metric.get_score(y_pred_b, is_member) binary_score_c = binary_metric.get_score(y_pred_c, is_member) multi_score_a = all_scores.loc[all_scores['Metric'] == 'Statistical Parity']['a'].values[0] multi_score_b = all_scores.loc[all_scores['Metric'] == 'Statistical Parity']['b'].values[0] multi_score_c = all_scores.loc[all_scores['Metric'] == 'Statistical Parity']['c'].values[0] self.assertAlmostEqual(binary_score_a, multi_score_a) self.assertAlmostEqual(binary_score_b, multi_score_b) self.assertAlmostEqual(binary_score_c, multi_score_c)