def test_mitigation_quick_start_example(self):
        # Data
        labels = [1, 1, 0, 1, 0, 0, 1, 0]
        predictions = [0, 0, 0, 1, 1, 1, 1, 0]
        likelihoods = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.1]
        is_member = [0, 0, 0, 0, 1, 1, 1, 1]

        # Bias Mitigation
        mitigation = BinaryMitigation.EqualizedOdds()

        # Training: Learn mixing rates from the labeled data
        mitigation.fit(labels, predictions, likelihoods, is_member)

        # Testing: Mitigate bias in predictions
        fair_predictions, fair_likelihoods = mitigation.transform(
            predictions, likelihoods, is_member)

        # Results: Fairness before and after
        before_scores = BinaryFairnessMetrics().get_all_scores(
            labels, predictions, is_member)
        after_scores = BinaryFairnessMetrics().get_all_scores(
            labels, fair_predictions, is_member)

        before_scores_check = {
            'Average Odds': 0.667,
            'Disparate Impact': 3.0,
            'Equal Opportunity': 0.667,
            'FNR difference': -0.667,
            'Generalized Entropy Index': 0.25,
            'Predictive Equality': 0.667,
            'Statistical Parity': 0.5,
            'Theil Index': 0.347
        }

        after_scores_check = {
            'Average Odds': 0.0,
            'Disparate Impact': 1.0,
            'Equal Opportunity': 0.333,
            'FNR difference': -0.333,
            'Generalized Entropy Index': 0.14,
            'Predictive Equality': -0.333,
            'Statistical Parity': 0.0,
            'Theil Index': 0.193
        }

        self.assertDictEqual(before_scores["Value"].to_dict(),
                             before_scores_check)
        self.assertDictEqual(after_scores["Value"].to_dict(),
                             after_scores_check)
Exemple #2
0
    def test_gei_alpha_more_than_one(self):

        np.random.seed(1)

        # test bounds
        metric = BinaryFairnessMetrics.GeneralizedEntropyIndex()
        my_dict = {}
        for _ in range(1000):
            random = np.random.choice([0, 1], 10)
            if str(random) in my_dict:
                continue
            else:
                my_dict[str(random)] = (random[:5], random[5:])

        with self.assertWarns(
                RuntimeWarning):  # division by 0 in certain instances.
            # enumerate through all the combinations
            alpha = 3
            results = []
            vals = []
            for y_true, y_pred in my_dict.values():
                vals.append([y_true, y_pred])
                results.append(metric.get_score(y_true, y_pred, alpha=alpha))

            assert min(results) == 0
            assert max(results) == (np.power(5, alpha - 1) - 1) / (alpha *
                                                                   (alpha - 1))
Exemple #3
0
    def test_theil_boundaries(self):
        np.random.seed(1)

        # test bounds
        metric = BinaryFairnessMetrics.TheilIndex()
        my_dict = {}
        for _ in range(1000):
            random = np.random.choice([0, 1], 10)
            if str(random) in my_dict:
                continue
            else:
                my_dict[str(random)] = (random[:5], random[5:])

        with self.assertWarns(
                RuntimeWarning):  # division by 0 in certain instances.
            # enumerate through all the combinations
            # test theil as well
            results_thiel = []
            vals = []

            for y_true, y_pred in my_dict.values():
                vals.append([y_true, y_pred])
                results_thiel.append(metric.get_score(y_true, y_pred))

            assert min(results_thiel) == 0
            assert max(results_thiel) == np.log(5)
    def test_fairness_quick_start_example(self):
        # Data
        binary_predictions = [1, 1, 0, 1, 0, 0]
        multi_class_predictions = ["a", "b", "c", "b", "a", "a"]
        multi_class_multi_label_predictions = [["a", "b"], ["b", "c"], ["b"],
                                               ["a", "b"], ["c", "a"], ["c"]]
        is_member = [0, 0, 0, 1, 1, 1]
        classes = ["a", "b", "c"]

        # Metric (see also other available metrics)
        metric = BinaryFairnessMetrics.StatisticalParity()
        multi_metric = MultiClassFairnessMetrics.StatisticalParity(
            list_of_classes=classes)

        # Score
        binary_score = metric.get_score(binary_predictions, is_member)
        multi_scores = multi_metric.get_scores(multi_class_predictions,
                                               is_member)
        multi_label_scores = multi_metric.get_scores(
            multi_class_multi_label_predictions, is_member)

        # Results
        self.assertEqual(
            metric.description,
            "Measures the difference in statistical parity between two groups")
        self.assertEqual(metric.lower_bound, -0.2)
        self.assertEqual(metric.upper_bound, 0.2)
        self.assertEqual(metric.ideal_value, 0)
        self.assertEqual(binary_score, -0.3333333333333333)
        self.assertListEqual(multi_scores,
                             [0.3333333333333333, 0.0, -0.3333333333333333])
        self.assertListEqual(
            multi_label_scores,
            [0.3333333333333333, -0.6666666666666667, 0.3333333333333333])
    def test_disp_impact_multilabel(self):

        # Group membership
        is_member = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]

        # Predictions - 3 classes
        y_pred = [['a', 'b'], ['b', 'c'], ['b'], ['a', 'b'], ['c', 'a'], ['c'],
                  ['a', 'b'], [], ['a', 'b'], ['c']]

        # classes for multi-class classification
        classes = ['a', 'b', 'c']

        # Multiclass Fairness Metric
        multi_metric = MultiClassFairnessMetrics.DisparateImpact(
            list_of_classes=classes)

        result = multi_metric.get_scores(y_pred, is_member)

        one_hot = multi_metric._one_hot_encode_classes(y_pred)

        binary_metric = BinaryFairnessMetrics.DisparateImpact()

        assert np.isclose(binary_metric.get_score(one_hot['a'], is_member),
                          result[0],
                          atol=0.001)
        assert np.isclose(binary_metric.get_score(one_hot['b'], is_member),
                          result[1],
                          atol=0.001)
        assert np.isclose(binary_metric.get_score(one_hot['c'], is_member),
                          result[2],
                          atol=0.001)
    def test_disp_impact_normal_list(self):

        # Group membership
        is_member = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]

        # Predictions - 3 classes
        y_pred = ['a', 'b', 'b', 'a', 'c', 'c', 'a', 'b', 'a', 'c']

        # classes for multi-class classification
        classes = ['a', 'b', 'c']

        # Multiclass Fairness Metric
        multi_metric = MultiClassFairnessMetrics.DisparateImpact(
            list_of_classes=classes)

        result = multi_metric.get_scores(y_pred, is_member)

        # get one-hot encoded 0-1 like arrays for each class
        y_pred_a = convert_one_vs_rest('a', y_pred)
        y_pred_b = convert_one_vs_rest('b', y_pred)
        y_pred_c = convert_one_vs_rest('c', y_pred)

        # create a binary metric to test whether binary and multiclass give the same output
        binary_metric = BinaryFairnessMetrics.DisparateImpact()

        assert binary_metric.get_score(y_pred_a, is_member) == result[0]
        assert binary_metric.get_score(y_pred_b, is_member) == result[1]
        assert binary_metric.get_score(y_pred_c, is_member) == result[2]
Exemple #7
0
    def test_all_scores_valid(self):

        # test standard pandas table creation
        y_true = np.array([1, 1, 1, 0, 1, 0, 1, 1, 1, 1])
        y_pred = np.array([0, 0, 0, 1, 0, 0, 0, 0, 0, 1])
        is_member = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

        df = BinaryFairnessMetrics.get_all_scores(y_true, y_pred, is_member)
        assert type(df) == pd.DataFrame

        assert self.extract_metric_from_df('Statistical Parity', df) == 0.
        assert self.extract_metric_from_df('Average Odds', df) == 0.375
        assert self.extract_metric_from_df('Disparate Impact', df) == 1.
        assert self.extract_metric_from_df('FNR difference', df) == 0.25
        assert self.extract_metric_from_df('Predictive Equality', df) == 1.00
        assert self.extract_metric_from_df('Generalized Entropy Index',
                                           df) == 1.375
        assert np.isclose(self.extract_metric_from_df('Theil Index', df),
                          1.263,
                          atol=0.01)

        attr = 'Ideal Value'
        assert self.extract_metric_from_df('Average Odds', df, attr) == 0
        assert self.extract_metric_from_df('Disparate Impact', df, attr) == 1
        assert self.extract_metric_from_df('Equal Opportunity', df, attr) == 0
        assert self.extract_metric_from_df('FNR difference', df, attr) == 0
        assert self.extract_metric_from_df('Generalized Entropy Index', df,
                                           attr) == 0
        assert self.extract_metric_from_df('Predictive Equality', df,
                                           attr) == 0
        assert self.extract_metric_from_df('Statistical Parity', df, attr) == 0
        assert self.extract_metric_from_df('Theil Index', df, attr) == 0

        attr = 'Lower Bound'
        assert self.extract_metric_from_df('Average Odds', df, attr) == -.2
        assert self.extract_metric_from_df('Disparate Impact', df, attr) == .8
        assert self.extract_metric_from_df('Equal Opportunity', df,
                                           attr) == -.2
        assert self.extract_metric_from_df('FNR difference', df, attr) == -.2
        assert self.extract_metric_from_df('Generalized Entropy Index', df,
                                           attr) == 0.
        assert self.extract_metric_from_df('Predictive Equality', df,
                                           attr) == -.2
        assert self.extract_metric_from_df('Statistical Parity', df,
                                           attr) == -.2
        assert self.extract_metric_from_df('Theil Index', df, attr) == 0.

        attr = 'Upper Bound'
        assert self.extract_metric_from_df('Average Odds', df, attr) == .2
        assert self.extract_metric_from_df('Disparate Impact', df, attr) == 1.2
        assert self.extract_metric_from_df('Equal Opportunity', df, attr) == .2
        assert self.extract_metric_from_df('FNR difference', df, attr) == .2
        assert not np.isfinite(
            self.extract_metric_from_df('Generalized Entropy Index', df, attr))
        assert self.extract_metric_from_df('Predictive Equality', df,
                                           attr) == .2
        assert self.extract_metric_from_df('Statistical Parity', df,
                                           attr) == .2
        assert not np.isfinite(
            self.extract_metric_from_df('Theil Index', df, attr))
Exemple #8
0
    def test_theil_normal_invalid(self):

        # Metric
        metric = BinaryFairnessMetrics.TheilIndex()

        y_true = [0, 1, 1, 0, 1, 1, 1, 0, 1, -1]
        y_pred = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
        with self.assertRaises(ValueError):
            metric.get_score(y_true, y_pred)
Exemple #9
0
    def test_gei_normal_invalid(self):

        # Metric
        metric = BinaryFairnessMetrics.GeneralizedEntropyIndex()

        y_true = np.array([0, 1, 1, 0, 1, 1, 1, 0, 1, -1])
        y_pred = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])

        with self.assertRaises(ValueError):
            metric.get_score(y_true, y_pred)
Exemple #10
0
    def test_pred_equality_edge_5(self):

        # edge case of - 1
        y_pred = np.array([0, 0, 0, 0, 1, 1, 1, 1])
        y_true = np.array([0, 0, 0, 1, 1, 1, 1, 0])
        is_member = np.array([1, 1, 1, 1, 0, 0, 0, 0])

        metric = BinaryFairnessMetrics.PredictiveEquality()

        assert metric.get_score(y_true, y_pred, is_member) == -1
Exemple #11
0
    def test_stat_parity_edge_2(self):

        # Data: edge case stat parity == -1
        y_pred = np.array([1, 1, 1, 1, 0, 0, 0, 0])
        is_member = np.array([0, 0, 0, 0, 1, 1, 1, 1])

        # Metric
        metric = BinaryFairnessMetrics.StatisticalParity()
        score = metric.get_score(y_pred, is_member)

        assert score == -1
Exemple #12
0
    def test_disp_impact_normal_list(self):

        # Metric
        metric = BinaryFairnessMetrics.DisparateImpact()

        # Data
        is_member = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

        # test a medium number
        y_pred = np.array([0, 0, 0, 1, 1, 1, 0, 0, 0, 0])
        assert metric.get_score(y_pred, is_member) == 2
Exemple #13
0
    def test_theil_normal_list(self):

        # Metric
        metric = BinaryFairnessMetrics.TheilIndex()

        y_true = [0, 1, 1, 0, 1, 1, 1, 0, 1, 0]
        y_pred = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]

        score = metric.get_score(y_true, y_pred)
        assert isinstance(score, float)
        assert np.isclose(score, 0.413, atol=0.01)
Exemple #14
0
    def test_disp_impact_edge3(self):

        # Metric
        metric = BinaryFairnessMetrics.DisparateImpact()

        # Data
        is_member = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

        # test 1
        y_pred = np.array([0, 0, 0, 0, 1, 1, 0, 0, 0, 0])
        assert metric.get_score(y_pred, is_member) == 1
Exemple #15
0
    def test_gei_normal_list(self):

        # Metric
        metric = BinaryFairnessMetrics.GeneralizedEntropyIndex()

        y_true = [0, 1, 1, 0, 1, 1, 1, 0, 1, 0]
        y_pred = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]

        score = metric.get_score(y_true, y_pred)
        assert isinstance(score, float)
        assert np.isclose(score, 0.302, atol=0.01)
Exemple #16
0
    def test_avg_odds_diff_edge_4(self):

        # Data: edge case of 1
        y_true = np.array([0, 0, 0, 1, 1, 1, 1, 0])
        y_pred = np.array([1, 1, 1, 1, 0, 0, 0, 0])
        is_member = np.array([1, 1, 1, 1, 0, 0, 0, 0])

        # Metric
        metric = BinaryFairnessMetrics.AverageOdds()

        # Score
        assert metric.get_score(y_true, y_pred, is_member) == 1
Exemple #17
0
    def test_fnr_diff_edge2(self):

        # Metric
        metric = BinaryFairnessMetrics.FNRDifference()

        # edge case of -1
        y_true = np.array([0, 1, 1, 0, 1, 1, 1, 0, 1, 0])
        y_pred = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])

        is_member = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])

        assert metric.get_score(y_true, y_pred, is_member) == -1
Exemple #18
0
    def test_fnr_diff_normal_invalid(self):

        # Metric
        metric = BinaryFairnessMetrics.FNRDifference()

        # Data
        y_true = np.array([0, 1, 1, 0, 1, 1, 1, 0, 1, 2])
        y_pred = np.array([0, 0, 1, 0, 0, 1, 1, 1, 0, 0])

        is_member = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
        with self.assertRaises(ValueError):
            metric.get_score(y_true, y_pred, is_member)
Exemple #19
0
    def test_stat_parity_invalid_np(self):

        # Data
        y_pred = np.array([1, 1, 0, 1, 0])
        is_member = np.array([0, 0, 0, 1, 1, 1])

        # Metric
        metric = BinaryFairnessMetrics.StatisticalParity()

        # Score
        with self.assertRaises(InputShapeError):
            metric.get_score(y_pred, is_member)
Exemple #20
0
    def test_pred_equality_edge_1(self):

        # Data: edge case - homogeneous ground truth within group - returns None
        # unprivileged homogeneous
        y_true = np.array([0, 0, 0, 1, 1, 1, 1, 1])
        y_pred = np.array([0, 0, 0, 0, 0, 0, 0, 0])
        is_member = np.array([1, 1, 1, 1, 0, 0, 0, 0])

        # Metric
        metric = BinaryFairnessMetrics.PredictiveEquality()

        with self.assertWarns(UserWarning):
            assert metric.get_score(y_true, y_pred, is_member) is None
Exemple #21
0
    def test_pred_equality_edge_4(self):

        # Data: edge case - homogeneous ground truth within group - returns None
        # edge case of 1
        y_true = np.array([0, 0, 0, 1, 1, 1, 1, 0])
        y_pred = np.array([1, 1, 1, 1, 0, 0, 0, 0])
        is_member = np.array([1, 1, 1, 1, 0, 0, 0, 0])

        # Metric
        metric = BinaryFairnessMetrics.PredictiveEquality()

        # Score
        assert metric.get_score(y_true, y_pred, is_member) == 1
Exemple #22
0
    def test_disp_impact_edge2(self):

        # Metric
        metric = BinaryFairnessMetrics.DisparateImpact()

        # Data
        is_member = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

        # test no positives in unprotected
        y_pred = np.array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0])

        with self.assertWarns(UserWarning):
            assert metric.get_score(y_pred, is_member) == 1
Exemple #23
0
    def test_equal_opp_normal_invalid(self):

        # Data
        y_true = np.array([1, 0, 0, 0, 1, 1, 0, 2])
        y_pred = np.array([0, 1, 1, 1, 1, 1, 1, 0])
        is_member = np.array([1, 1, 1, 1, 0, 0, 0, 0])

        # Metric
        metric = BinaryFairnessMetrics.EqualOpportunity()

        # Score
        with self.assertRaises(ValueError):
            metric.get_score(y_true, y_pred, is_member)
Exemple #24
0
    def test_fnr_diff_normal_list(self):

        # Metric
        metric = BinaryFairnessMetrics.FNRDifference()

        # Data
        y_true = [0, 1, 1, 0, 1, 1, 1, 0, 1, 0]
        y_pred = [0, 0, 1, 0, 0, 1, 1, 1, 0, 0]

        is_member = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
        assert np.isclose(metric.get_score(y_true, y_pred, is_member),
                          0.333,
                          atol=0.001)
Exemple #25
0
    def test_disp_impact_normal_df(self):

        # Metric
        metric = BinaryFairnessMetrics.DisparateImpact()

        # medium number
        my_df = pd.DataFrame.from_dict({
            'y_pred': [0, 0, 0, 1, 1, 1, 0, 0, 0, 0],
            'is_member': [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]
        })

        # Score
        assert metric.get_score(my_df['y_pred'], my_df['is_member']) == 2
Exemple #26
0
    def test_stat_parity_normal_list(self):

        # Data
        y_pred = [1, 1, 0, 1, 0, 0]
        is_member = [0, 0, 0, 1, 1, 1]

        # Metric
        metric = BinaryFairnessMetrics.StatisticalParity()

        # Score
        score = metric.get_score(y_pred, is_member)

        assert np.isclose(score, -0.3333, atol=0.001)
    def test_binary_matches_multiclass_disp_impact(self):
        binary_predictions = [0, 1, 0, 0, 1, 1]
        is_member = [0, 1, 1, 0, 0, 1]

        metric = BinaryFairnessMetrics.DisparateImpact()
        score = metric.get_score(binary_predictions, is_member)

        classes = [0, 1]
        multi_metric = MultiClassFairnessMetrics.DisparateImpact(
            list_of_classes=classes)
        multi_score = multi_metric.get_scores(binary_predictions, is_member)

        assert score == multi_score[1]
Exemple #28
0
    def test_equal_opp_edge_3(self):

        # Data: homogeneous both groups in ground truth - returns nan
        y_true = np.array([0, 0, 0, 0, 1, 1, 1, 1])
        y_pred = np.array([0, 1, 1, 1, 1, 1, 1, 0])
        is_member = np.array([1, 1, 1, 1, 0, 0, 0, 0])

        # Metric
        metric = BinaryFairnessMetrics.EqualOpportunity()

        with self.assertWarns(
                UserWarning):  # division by zero caught inside numpy
            metric.get_score(y_true, y_pred, is_member)
Exemple #29
0
    def test_stat_parity_invalid_df(self):

        # Data
        my_df = pd.DataFrame.from_dict({
            'y_pred': [1, 1, 0, 1, 0, 2],
            'is_member': [0, 0, 0, 1, 1, 1]
        })

        # Metric
        metric = BinaryFairnessMetrics.StatisticalParity()

        # Score
        with self.assertRaises(ValueError):
            metric.get_score(my_df['y_pred'], my_df['is_member'])
Exemple #30
0
    def test_theil_normal_df(self):

        my_df = pd.DataFrame.from_dict({
            'y_true': [0, 1, 1, 0, 1, 1, 1, 0, 1, 0],
            'y_pred': [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
        })

        # Metric
        metric = BinaryFairnessMetrics.TheilIndex()
        # Score
        score = metric.get_score(my_df['y_true'], my_df['y_pred'])

        assert isinstance(score, float)
        assert np.isclose(score, 0.413, atol=0.01)