コード例 #1
0
 def test_valid_inputs(self, transformX, transformY, transformA, A_two_dim):
     gs = GridSearch(self.estimator, self.disparity_criterion, grid_size=2)
     X, Y, A = _quick_data(A_two_dim)
     gs.fit(transformX(X),
            transformY(Y),
            sensitive_features=transformA(A))
     assert_n_grid_search_results(2, gs)
コード例 #2
0
    def test_grid_size_warning_up_to_5_sensitive_feature_group(
            self, transformX, transformY, transformA, A_two_dim, n_groups,
            caplog):
        if isinstance(self.disparity_criterion, EqualizedOdds):
            pytest.skip(
                'With EqualizedOdds there would be multiple warnings due to higher grid '
                'dimensionality.')

        grid_size = 10
        gs = GridSearch(self.estimator,
                        self.disparity_criterion,
                        grid_size=grid_size)
        X, Y, A = _quick_data(A_two_dim, n_groups=n_groups)

        caplog.set_level(logging.WARNING)
        gs.fit(transformX(X), transformY(Y), sensitive_features=transformA(A))

        # don't expect the dimension warning;
        # but expect the grid size warning for large numbers of groups
        log_records = caplog.get_records('call')

        # 6 groups total, but one is not part of the basis, so 5 dimensions
        grid_dimensions = n_groups - 1

        if 2**(n_groups - 1) > grid_size:
            assert len(log_records) == 1
            size_log_record = log_records[0]
            assert GRID_SIZE_WARN_TEMPLATE.format(grid_size, 2**grid_dimensions) \
                in size_log_record.msg.format(*size_log_record.args)
        else:
            assert len(log_records) == 0
コード例 #3
0
    def test_custom_grid(self, transformX, transformY, transformA):

        # Creating a standard grid with the default parameters
        grid_size = 10
        grid_limit = 2.0
        grid_offset = 0.1

        disparity_moment = EqualizedOdds()
        X, y, A = _quick_data(False)

        disparity_moment.load_data(X, y, sensitive_features=A)

        grid = _GridGenerator(
            grid_size, grid_limit,
            disparity_moment.pos_basis, disparity_moment.neg_basis,
            disparity_moment.neg_basis_present, False, grid_offset).grid

        # Creating a custom grid by selecting only a few columns from the grid to try out
        indices = [7, 3, 4]
        grid = grid.iloc[:, indices]

        gs = GridSearch(
            estimator=LogisticRegression(solver='liblinear'),
            constraints=EqualizedOdds(),
            grid=grid,
        )

        # Check that fit runs successfully with the custom grid
        gs.fit(
            transformX(X),
            transformY(y),
            sensitive_features=transformA(A))

        # Check that it trained the correct number of predictors
        assert len(gs.predictors_) == len(grid.columns)
コード例 #4
0
    def test_no_predict_proba_before_fit(self):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, _, _ = _quick_data()

        with pytest.raises(NotFittedError) as execInfo:
            gs.predict_proba(X)

        assert _NO_PREDICT_BEFORE_FIT == execInfo.value.args[0]
コード例 #5
0
    def test_no_predict_proba_before_fit(self):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, _, _ = _quick_data()

        with pytest.raises(NotFittedError) as execInfo:
            gs.predict_proba(X)

        assert not_fitted_error_msg.format(GridSearch.__name__) == execInfo.value.args[0]
コード例 #6
0
def calculate_grid(grid_limit, grid_size, disparity_moment, grid_offset=None):
    X, y, A = _quick_data()

    disparity_moment.load_data(X, y, sensitive_features=A)

    return _GridGenerator(grid_size, grid_limit,
                          disparity_moment.pos_basis, disparity_moment.neg_basis,
                          disparity_moment.neg_basis_present, False, grid_offset).grid
コード例 #7
0
    def test_many_sensitive_feature_groups_warning(
        self, transformX, transformY, transformA, A_two_dim, caplog
    ):
        # The purpose of this test case is to create enough groups to trigger certain expected
        # warnings. The scenario should still work and succeed.
        grid_size = 10
        gs = GridSearch(
            self.estimator,
            self.disparity_criterion,
            grid_size=grid_size,
            sample_weight_name=self.sample_weight_name,
        )
        X, Y, A = _quick_data(A_two_dim)

        if A_two_dim:
            A[0][0] = 0
            A[0][1] = 0
            A[1][0] = 1
            A[1][1] = 1
            A[2][0] = 2
            A[2][1] = 2
            A[3][0] = 3
            A[3][1] = 3
            A[4][0] = 4
            A[4][1] = 4
            A[5][0] = 5
            A[5][1] = 5
        else:
            A[0] = 0
            A[1] = 1
            A[2] = 2
            A[3] = 3
            A[4] = 4
            A[5] = 5

        caplog.set_level(logging.WARNING)
        gs.fit(transformX(X), transformY(Y), sensitive_features=transformA(A))

        log_records = caplog.get_records("call")
        dimension_log_record = log_records[0]
        size_log_record = log_records[1]
        if isinstance(self.disparity_criterion, EqualizedOdds):
            # not every label occurs with every group
            grid_dimensions = 10
        else:
            # 6 groups total, but one is not part of the basis, so 5 dimensions
            grid_dimensions = 5

        # expect both the dimension warning and the grid size warning
        assert len(log_records) == 2
        assert GRID_DIMENSION_WARN_TEMPLATE.format(
            grid_dimensions, GRID_DIMENSION_WARN_THRESHOLD
        ) in dimension_log_record.msg.format(*dimension_log_record.args)
        assert GRID_SIZE_WARN_TEMPLATE.format(
            grid_size, 2**grid_dimensions
        ) in size_log_record.msg.format(*size_log_record.args)
コード例 #8
0
    def test_Y_ndarray_bad_columns(self, transformX, transformA, A_two_dim):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, Y, A = _quick_data(A_two_dim)

        Y_two_col_ndarray = np.stack((Y, Y), -1)
        with pytest.raises(ValueError) as execInfo:
            gs.fit(transformX(X),
                   Y_two_col_ndarray,
                   sensitive_features=transformA(A))
        assert get_sklearn_expected_1d_message() in execInfo.value.args[0]
コード例 #9
0
    def test_Y_df_bad_columns(self, transformX, transformA, A_two_dim):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, Y, A = _quick_data(A_two_dim)

        Y_two_col_df = pd.DataFrame({"a": Y, "b": Y})
        with pytest.raises(ValueError) as execInfo:
            gs.fit(transformX(X),
                   Y_two_col_df,
                   sensitive_features=transformA(A))
        assert get_sklearn_expected_1d_message() in execInfo.value.args[0]
コード例 #10
0
    def test_Y_not_0_1(self, transformX, transformY, transformA, A_two_dim):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, Y, A = _quick_data(A_two_dim)
        Y = Y + 1

        with pytest.raises(ValueError) as execInfo:
            gs.fit(transformX(X),
                   transformY(Y),
                   sensitive_features=transformA(A))

        assert _LABELS_NOT_0_1_ERROR_MESSAGE == execInfo.value.args[0]
コード例 #11
0
 def test_eps(self):
     X, Y, A = _quick_data()
     eps = 0.01
     self.estimator = LinearRegression().fit(X, Y)
     def predictor(x): return self.estimator.predict(x)
     self.disparity_criterion = GroupLossMoment(SquareLoss(-np.inf, np.inf), upper_bound=eps)
     self.disparity_criterion.load_data(X, Y, sensitive_features=A)
     bnd = self.disparity_criterion.bound()
     loss_eps = self.disparity_criterion.gamma(predictor) - bnd
     loss = self.disparity_criterion.gamma(predictor)
     assert(np.all(np.isclose(loss - eps, loss_eps)))
コード例 #12
0
    def test_X_Y_different_rows(self, transformX, transformY, transformA, A_two_dim):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, _, A = _quick_data()
        Y = np.random.randint(2, size=len(A)+1)

        with pytest.raises(ValueError) as execInfo:
            gs.fit(transformX(X),
                   transformY(Y),
                   sensitive_features=transformA(A))

        expected_exception_message = "Found input variables with inconsistent numbers of samples"
        assert expected_exception_message in execInfo.value.args[0]
コード例 #13
0
    def test_Y_ternary(self, transformX, transformY, transformA, A_two_dim):
        gs = GridSearch(self.estimator, self.disparity_criterion,
                        sample_weight_name=self.sample_weight_name)
        X, Y, A = _quick_data(A_two_dim)
        Y[0] = 0
        Y[1] = 1
        Y[2] = 2

        with pytest.raises(ValueError) as execInfo:
            gs.fit(transformX(X),
                   transformY(Y),
                   sensitive_features=transformA(A))

        assert _LABELS_NOT_0_1_ERROR_MESSAGE == execInfo.value.args[0]
コード例 #14
0
    def test_X_A_different_rows(self, transformX, transformY, transformA, A_two_dim):
        gs = GridSearch(
            self.estimator,
            self.disparity_criterion,
            sample_weight_name=self.sample_weight_name,
        )
        X, Y, _ = _quick_data(A_two_dim)
        A = np.random.randint(2, size=len(Y) + 1)
        if A_two_dim:
            A = np.stack((A, A), -1)

        with pytest.raises(ValueError) as execInfo:
            gs.fit(transformX(X), transformY(Y), sensitive_features=transformA(A))

        expected_exception_message = (
            "Found input variables with inconsistent numbers of samples"
        )
        assert expected_exception_message in execInfo.value.args[0]
コード例 #15
0
    def test_sensitive_feature_non_binary(self, transformX, transformY,
                                          transformA, A_two_dim):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, Y, A = _quick_data(A_two_dim)

        if A_two_dim:
            A[0][0] = 0
            A[0][1] = 0
            A[1][0] = 1
            A[1][1] = 1
            A[2][0] = 2
            A[2][1] = 2
        else:
            A[0] = 0
            A[1] = 1
            A[2] = 2

        with pytest.raises(ValueError) as execInfo:
            gs.fit(transformX(X),
                   transformY(Y),
                   sensitive_features=transformA(A))

        assert _SENSITIVE_FEATURES_NON_BINARY_ERROR_MESSAGE == execInfo.value.args[
            0]