def test_custom_grid(self, transformX, transformY, transformA):

        # Creating a standard grid with the default parameters
        grid_size = 10
        grid_limit = 2.0
        grid_offset = 0.1

        disparity_moment = EqualizedOdds()
        X, y, A = _quick_data(False)

        disparity_moment.load_data(X, y, sensitive_features=A)

        grid = _GridGenerator(
            grid_size, grid_limit,
            disparity_moment.pos_basis, disparity_moment.neg_basis,
            disparity_moment.neg_basis_present, False, grid_offset).grid

        # Creating a custom grid by selecting only a few columns from the grid to try out
        indices = [7, 3, 4]
        grid = grid.iloc[:, indices]

        gs = GridSearch(
            estimator=LogisticRegression(solver='liblinear'),
            constraints=EqualizedOdds(),
            grid=grid,
        )

        # Check that fit runs successfully with the custom grid
        gs.fit(
            transformX(X),
            transformY(y),
            sensitive_features=transformA(A))

        # Check that it trained the correct number of predictors
        assert len(gs.predictors_) == len(grid.columns)
 def test_valid_inputs(self, transformX, transformY, transformA, A_two_dim):
     gs = GridSearch(self.estimator, self.disparity_criterion, grid_size=2)
     X, Y, A = _quick_data(A_two_dim)
     gs.fit(transformX(X),
            transformY(Y),
            sensitive_features=transformA(A))
     assert_n_grid_search_results(2, gs)
 def test_valid_inputs(self, transformX, transformY, transformA):
     gs = GridSearch(self.estimator, self.disparity_criterion, grid_size=2)
     X, Y, A = self._quick_data()
     gs.fit(transformX(X),
            transformY(Y),
            sensitive_features=transformA(A))
     assert len(gs.all_results) == 2
    def test_grid_size_warning_up_to_5_sensitive_feature_group(
            self, transformX, transformY, transformA, A_two_dim, n_groups,
            caplog):
        if isinstance(self.disparity_criterion, EqualizedOdds):
            pytest.skip(
                'With EqualizedOdds there would be multiple warnings due to higher grid '
                'dimensionality.')

        grid_size = 10
        gs = GridSearch(self.estimator,
                        self.disparity_criterion,
                        grid_size=grid_size)
        X, Y, A = _quick_data(A_two_dim, n_groups=n_groups)

        caplog.set_level(logging.WARNING)
        gs.fit(transformX(X), transformY(Y), sensitive_features=transformA(A))

        # don't expect the dimension warning;
        # but expect the grid size warning for large numbers of groups
        log_records = caplog.get_records('call')

        # 6 groups total, but one is not part of the basis, so 5 dimensions
        grid_dimensions = n_groups - 1

        if 2**(n_groups - 1) > grid_size:
            assert len(log_records) == 1
            size_log_record = log_records[0]
            assert GRID_SIZE_WARN_TEMPLATE.format(grid_size, 2**grid_dimensions) \
                in size_log_record.msg.format(*size_log_record.args)
        else:
            assert len(log_records) == 0
Example #5
0
def test_bgl_unmitigated_same(A_two_dim):
    a0_count = 4
    a1_count = 4

    a0_label = 2
    a1_label = 3

    a0_factor = 1
    a1_factor = 16

    X, y, A = _simple_regression_data(a0_count, a1_count, a0_factor, a1_factor,
                                      a0_label, a1_label, A_two_dim)

    estimator = LinearRegression()

    unmitigated_estimator = copy.deepcopy(estimator)
    unmitigated_estimator.fit(X, y)

    # Do the grid search with a zero Lagrange multiplier
    idx = pd.Int64Index(sorted([a0_label, a1_label]))
    lagrange_balanced_series = pd.Series([1.0, 1.0], index=idx)
    grid_df = pd.DataFrame(lagrange_balanced_series)

    target = GridSearch(estimator,
                        constraints=GroupLossMoment(ZeroOneLoss()),
                        grid=grid_df)
    target.fit(X, y, sensitive_features=A)

    raw_coef = unmitigated_estimator.coef_
    gs_coef = target.best_result.predictor.coef_
    # Can't quite get exact match, but this should be very close
    assert np.allclose(raw_coef, gs_coef, rtol=1e-10, atol=1e-7)
def test_single_y_class():
    # Setup with data designed to result in "all single class"
    # at some point in the grid
    X_dict = {
        "c": [0, 1, 4, 1, 5, 1, 6, 0, 2, 4],
        "d": [1, 5, 1, 6, 2, 3, 5, 1, 5, 2]
    }
    X = pd.DataFrame(X_dict)

    # Set y to a constant
    y_val = 1
    y = np.full(10, y_val)
    A = ['a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'b']

    estimator = LogisticRegression(solver='liblinear',
                                   fit_intercept=True,
                                   random_state=97)

    grid_search = GridSearch(copy.deepcopy(estimator),
                             constraints=DemographicParity(),
                             grid_size=3,
                             grid_limit=0.1,
                             grid_offset=5)

    # We want to avoid an exception on the following line
    grid_search.fit(X, y, sensitive_features=A)

    # Check all predictors are DummyClassifiers
    test_X_dict = {"c": [134534, 27381], "d": [1923, 14123]}
    test_X = pd.DataFrame(test_X_dict)
    for p in grid_search.predictors_:
        assert isinstance(p, DummyClassifier)
        assert np.array_equal(p.predict(test_X), [y_val, y_val])
Example #7
0
def gridSearch(model, X_train, Y_train, A_train, grid_size):
    """
    Generates a sequence of relabellings and reweightings, and trains a predictor for each. 
    Only applicable for binary feature.
    
    Parameters:
    x_train: input data for training model
    y_train: list of ground truths
    model: the unmitigated algorthmic model
    
    Returns a dataframe of the different predictors and its accuracy scores and disparity scores.
    
    """
    sweep = GridSearch(model,
                       constraints=DemographicParity(),
                       grid_size=grid_size)

    # we extract the full set of predictors from the `GridSearch` object
    sweep.fit(X_train, Y_train, sensitive_features=A_train)

    predictors = sweep._predictors
    """
    Remove the predictors which are dominated in the error-disparity space by others from the sweep 
    (note that the disparity will only be calculated for the protected attribute; 
    other potentially protected attributes will not be mitigated)
   
    In general, one might not want to do this, since there may be other considerations beyond the strict 
    optimisation of error and disparity (of the given protected attribute).
    """
    errors, disparities = [], []
    for m in predictors:
        classifier = lambda X: m.predict(X)

        error = ErrorRate()
        error.load_data(X_train,
                        pd.Series(Y_train),
                        sensitive_features=A_train)
        disparity = DemographicParity()
        disparity.load_data(X_train,
                            pd.Series(Y_train),
                            sensitive_features=A_train)

        errors.append(error.gamma(classifier)[0])
        disparities.append(disparity.gamma(classifier).max())

    all_results = pd.DataFrame({
        "predictor": predictors,
        "error": errors,
        "disparity": disparities
    })

    non_dominated = []
    for row in all_results.itertuples():
        errors_for_lower_or_eq_disparity = all_results["error"][
            all_results["disparity"] <= row.disparity]
        if row.error <= errors_for_lower_or_eq_disparity.min():
            non_dominated.append(row.predictor)

    return non_dominated
    def __mitigation_with_gridsearch(self, X_train, A_train, Y_train, fitted_model):
        sweep = GridSearch(LogisticRegression(solver='liblinear', fit_intercept=True),
                           constraints=DemographicParity(),
                           grid_size=70)
        sweep.fit(X_train, Y_train, sensitive_features=A_train.diabetic)
        predictors = sweep._predictors

        return predictors
    def test_no_predict_proba_before_fit(self):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, _, _ = _quick_data()

        with pytest.raises(NotFittedError) as execInfo:
            gs.predict_proba(X)

        assert not_fitted_error_msg.format(GridSearch.__name__) == execInfo.value.args[0]
Example #10
0
    def test_Y_is_None(self, transformX, transformA, A_two_dim):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, _, A = self._quick_data()

        with pytest.raises(ValueError) as execInfo:
            gs.fit(transformX(X), None, sensitive_features=transformA(A))

        assert _MESSAGE_Y_NONE == execInfo.value.args[0]
Example #11
0
    def test_no_predict_proba_before_fit(self):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, _, _ = self._quick_data()

        with pytest.raises(NotFittedError) as execInfo:
            gs.predict_proba(X)

        assert _NO_PREDICT_BEFORE_FIT == execInfo.value.args[0]
Example #12
0
def test_bgl_unfair(A_two_dim):
    a0_count = 5
    a1_count = 7

    a0_label = 2
    a1_label = 3

    a0_factor = 1
    a1_factor = 16

    grid_size = 7

    X, Y, A = _simple_regression_data(
        a0_count, a1_count, a0_factor, a1_factor, a0_label, a1_label, A_two_dim
    )

    bgl_square_loss = BoundedGroupLoss(SquareLoss(-np.inf, np.inf))
    grid_search = GridSearch(
        LinearRegression(), constraints=bgl_square_loss, grid_size=grid_size
    )

    grid_search.fit(X, Y, sensitive_features=A)

    assert_n_grid_search_results(grid_size, grid_search)

    test_X = pd.DataFrame(
        {
            "actual_feature": [0.2, 0.7],
            "sensitive_features": [a0_label, a1_label],
            "constant_ones_feature": [1, 1],
        }
    )

    best_predict = grid_search.predict(test_X)
    assert np.allclose([-1.91764706, 9.61176471], best_predict)

    all_predict = [predictor.predict(test_X) for predictor in grid_search.predictors_]

    # TODO: investigate where the different outcomes for the first grid point are from, likely
    # due to some ignored data points at the edge resulting in another solution with the same
    # least squares loss (i.e. both solutions acceptable).
    # Reflects https://github.com/fairlearn/fairlearn/issues/265
    assert logging_all_close([[3.2, 11.2]], [all_predict[0]]) or logging_all_close(
        [[3.03010885, 11.2]], [all_predict[0]]
    )

    assert logging_all_close(
        [
            [-3.47346939, 10.64897959],
            [-2.68, 10.12],
            [-1.91764706, 9.61176471],
            [-1.18461538, 9.12307692],
            [-0.47924528, 8.65283019],
            [0.2, 0.7],
        ],
        all_predict[1:],
    )
Example #13
0
    def test_X_is_None(self, transformY, transformA, A_two_dim):
        gs = GridSearch(self.estimator, self.disparity_criterion, grid_size=3)
        _, Y, A = self._quick_data(A_two_dim)

        with pytest.raises(ValueError) as execInfo:
            gs.fit(None, transformY(Y), sensitive_features=transformA(A))

        assert "Expected 2D array, got scalar array instead" in execInfo.value.args[
            0]
Example #14
0
    def test_X_is_None(self, transformY, transformA):
        gs = GridSearch(self.estimator, self.disparity_criterion, grid_size=3)
        _, Y, A = self._quick_data()

        message = str("Must supply X")
        with pytest.raises(ValueError) as execInfo:
            gs.fit(None, transformY(Y), sensitive_features=transformA(A))

        assert message == execInfo.value.args[0]
Example #15
0
    def test_Y_is_None(self, transformX, transformA):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, _, A = self._quick_data()

        message = str("Must supply y")
        with pytest.raises(ValueError) as execInfo:
            gs.fit(transformX(X), None, sensitive_features=transformA(A))

        assert message == execInfo.value.args[0]
Example #16
0
    def test_no_predict_proba_before_fit(self):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, _, _ = self._quick_data()

        message = str("Must call fit before attempting to make predictions")
        with pytest.raises(NotFittedException) as execInfo:
            gs.predict_proba(X)

        assert message == execInfo.value.args[0]
    def test_many_sensitive_feature_groups_warning(
        self, transformX, transformY, transformA, A_two_dim, caplog
    ):
        # The purpose of this test case is to create enough groups to trigger certain expected
        # warnings. The scenario should still work and succeed.
        grid_size = 10
        gs = GridSearch(
            self.estimator,
            self.disparity_criterion,
            grid_size=grid_size,
            sample_weight_name=self.sample_weight_name,
        )
        X, Y, A = _quick_data(A_two_dim)

        if A_two_dim:
            A[0][0] = 0
            A[0][1] = 0
            A[1][0] = 1
            A[1][1] = 1
            A[2][0] = 2
            A[2][1] = 2
            A[3][0] = 3
            A[3][1] = 3
            A[4][0] = 4
            A[4][1] = 4
            A[5][0] = 5
            A[5][1] = 5
        else:
            A[0] = 0
            A[1] = 1
            A[2] = 2
            A[3] = 3
            A[4] = 4
            A[5] = 5

        caplog.set_level(logging.WARNING)
        gs.fit(transformX(X), transformY(Y), sensitive_features=transformA(A))

        log_records = caplog.get_records("call")
        dimension_log_record = log_records[0]
        size_log_record = log_records[1]
        if isinstance(self.disparity_criterion, EqualizedOdds):
            # not every label occurs with every group
            grid_dimensions = 10
        else:
            # 6 groups total, but one is not part of the basis, so 5 dimensions
            grid_dimensions = 5

        # expect both the dimension warning and the grid size warning
        assert len(log_records) == 2
        assert GRID_DIMENSION_WARN_TEMPLATE.format(
            grid_dimensions, GRID_DIMENSION_WARN_THRESHOLD
        ) in dimension_log_record.msg.format(*dimension_log_record.args)
        assert GRID_SIZE_WARN_TEMPLATE.format(
            grid_size, 2**grid_dimensions
        ) in size_log_record.msg.format(*size_log_record.args)
    def test_Y_df_bad_columns(self, transformX, transformA, A_two_dim):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, Y, A = _quick_data(A_two_dim)

        Y_two_col_df = pd.DataFrame({"a": Y, "b": Y})
        with pytest.raises(ValueError) as execInfo:
            gs.fit(transformX(X),
                   Y_two_col_df,
                   sensitive_features=transformA(A))
        assert get_sklearn_expected_1d_message() in execInfo.value.args[0]
    def test_Y_ndarray_bad_columns(self, transformX, transformA, A_two_dim):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, Y, A = _quick_data(A_two_dim)

        Y_two_col_ndarray = np.stack((Y, Y), -1)
        with pytest.raises(ValueError) as execInfo:
            gs.fit(transformX(X),
                   Y_two_col_ndarray,
                   sensitive_features=transformA(A))
        assert get_sklearn_expected_1d_message() in execInfo.value.args[0]
    def test_Y_ndarray_bad_columns(self, transformX, transformA, A_two_dim):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, Y, A = _utilities._quick_data(A_two_dim)

        Y_two_col_ndarray = np.stack((Y, Y), -1)
        with pytest.raises(ValueError) as execInfo:
            gs.fit(transformX(X),
                   Y_two_col_ndarray,
                   sensitive_features=transformA(A))

        assert "bad input shape" in execInfo.value.args[0]
    def test_Y_df_bad_columns(self, transformX, transformA, A_two_dim):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, Y, A = _utilities._quick_data(A_two_dim)

        Y_two_col_df = pd.DataFrame({"a": Y, "b": Y})
        with pytest.raises(ValueError) as execInfo:
            gs.fit(transformX(X),
                   Y_two_col_df,
                   sensitive_features=transformA(A))

        assert "bad input shape" in execInfo.value.args[0]
Example #22
0
    def test_Y_not_0_1(self, transformX, transformY, transformA, A_two_dim):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, Y, A = self._quick_data(A_two_dim)
        Y = Y + 1

        with pytest.raises(ValueError) as execInfo:
            gs.fit(transformX(X),
                   transformY(Y),
                   sensitive_features=transformA(A))

        assert _LABELS_NOT_0_1_ERROR_MESSAGE == execInfo.value.args[0]
Example #23
0
    def test_X_Y_different_rows(self, transformX, transformY, transformA):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, _, A = self._quick_data()
        Y = np.random.randint(2, size=len(A) + 1)

        message = str("X and y must have same number of rows")
        with pytest.raises(RuntimeError) as execInfo:
            gs.fit(transformX(X),
                   transformY(Y),
                   sensitive_features=transformA(A))

        assert message == execInfo.value.args[0]
Example #24
0
    def test_Y_not_0_1(self, transformX, transformY, transformA):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, Y, A = self._quick_data()
        Y = Y + 1

        message = str("Supplied y labels are not 0 or 1")
        with pytest.raises(RuntimeError) as execInfo:
            gs.fit(transformX(X),
                   transformY(Y),
                   sensitive_features=transformA(A))

        assert message == execInfo.value.args[0]
Example #25
0
    def test_Y_df_bad_columns(self, transformX, transformA):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, Y, A = self._quick_data()

        Y_two_col_df = pd.DataFrame({"a": Y, "b": Y})
        message = str("y is a DataFrame with more than one column")
        with pytest.raises(RuntimeError) as execInfo:
            gs.fit(transformX(X),
                   Y_two_col_df,
                   sensitive_features=transformA(A))

        assert message == execInfo.value.args[0]
Example #26
0
    def test_Y_ndarray_bad_columns(self, transformX, transformA):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, Y, A = self._quick_data()

        Y_two_col_ndarray = np.stack((Y, Y), -1)
        message = str("y is an ndarray with more than one column")
        with pytest.raises(RuntimeError) as execInfo:
            gs.fit(transformX(X),
                   Y_two_col_ndarray,
                   sensitive_features=transformA(A))

        assert message == execInfo.value.args[0]
    def test_X_Y_different_rows(self, transformX, transformY, transformA, A_two_dim):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, _, A = _quick_data()
        Y = np.random.randint(2, size=len(A)+1)

        with pytest.raises(ValueError) as execInfo:
            gs.fit(transformX(X),
                   transformY(Y),
                   sensitive_features=transformA(A))

        expected_exception_message = "Found input variables with inconsistent numbers of samples"
        assert expected_exception_message in execInfo.value.args[0]
Example #28
0
def run_gridsearch_classification(estimator, moment):
    """Run classification test with GridSearch."""
    X, Y, A = fetch_adult()

    num_predictors = 5
    gs = GridSearch(
        estimator,
        constraints=moment,
        grid_size=num_predictors)
    gs.fit(X, Y, sensitive_features=A)

    assert len(gs.predictors_) == num_predictors
def run_clean(fairness_constraints):
    print(f"Start running experiment with clean data.")

    unmitigated_predictor = LogisticRegression(solver='liblinear',
                                               fit_intercept=True)

    # unmitigated_predictor.fit(X_train, Y_train)
    unmitigated_predictor.fit(X_train, Y_train)
    sweep = GridSearch(LogisticRegression(solver='liblinear',
                                          fit_intercept=True),
                       constraints=EqualizedOdds(),
                       grid_size=71)

    sweep.fit(X_train, Y_train, sensitive_features=A_train)
    predictors = [unmitigated_predictor
                  ] + [z.predictor for z in sweep.all_results]

    all_results_train, all_results_test = [], []
    for predictor in predictors:
        prediction_train = predictor.predict(X_train)
        prediction_test = predictor.predict(X_test)

        all_results_train.append({
            'accuracy':
            accuracy(prediction_train, Y_train),
            'violation':
            violation(prediction_train, Y_train, A_train)
        })
        all_results_test.append({
            'accuracy':
            accuracy(prediction_test, Y_test),
            'violation':
            violation(prediction_test, Y_test, A_test)
        })
    # print(all_results_train)
    # print(all_results_test)

    best_train, best_test = [], []
    for constraint in fairness_constraints:
        best = 0.0
        for result in all_results_train:
            if result['violation'] <= constraint and result['accuracy'] > best:
                best = result['accuracy']
        best_train.append(best)

        best = 0.0
        for result in all_results_test:
            if result['violation'] <= constraint and result['accuracy'] > best:
                best = result['accuracy']
        best_test.append(best)

    return best_train, best_test
Example #30
0
    def test_Y_ternary(self, transformX, transformY, transformA, A_two_dim):
        gs = GridSearch(self.estimator, self.disparity_criterion,
                        sample_weight_name=self.sample_weight_name)
        X, Y, A = _quick_data(A_two_dim)
        Y[0] = 0
        Y[1] = 1
        Y[2] = 2

        with pytest.raises(ValueError) as execInfo:
            gs.fit(transformX(X),
                   transformY(Y),
                   sensitive_features=transformA(A))

        assert _LABELS_NOT_0_1_ERROR_MESSAGE == execInfo.value.args[0]