Ejemplo n.º 1
0
def test_bgl_lagrange_specifications(A_two_dim):
    a0_count = 13
    a1_count = 4

    a0_label = 5
    a1_label = 3

    a0_factor = 1
    a1_factor = 16

    X, y, A = _simple_regression_data(a0_count, a1_count,
                                      a0_factor, a1_factor,
                                      a0_label, a1_label, A_two_dim)

    estimator = LinearRegression()

    # Do the grid search with a zero Lagrange multiplier
    idx = pd.Int64Index(sorted([a0_label, a1_label]))
    l0_series = pd.Series([2.0, 0.0], index=idx)
    l1_series = pd.Series([1.5, 0.5], index=idx)
    l2_series = pd.Series([1.0, 1.0], index=idx)
    l3_series = pd.Series([0.5, 1.5], index=idx)
    l4_series = pd.Series([0.0, 2.0], index=idx)
    grid_df = pd.concat([l0_series,
                         l1_series,
                         l2_series,
                         l3_series,
                         l4_series],
                        axis=1)

    grid_search1 = GridSearch(copy.deepcopy(estimator),
                              constraints=BoundedGroupLoss(ZeroOneLoss()),
                              grid_size=5)

    grid_search2 = GridSearch(copy.deepcopy(estimator),
                              constraints=BoundedGroupLoss(ZeroOneLoss()),
                              grid=grid_df)

    tradeoffs = [0, 0.25, 0.5, 0.75, 1]

    grid_search1.fit(X, y, sensitive_features=A)
    grid_search2.fit(X, y, sensitive_features=A)

    assert_n_grid_search_results(len(tradeoffs), grid_search1)
    assert_n_grid_search_results(len(tradeoffs), grid_search2)

    # Check we generated the same multipliers
    for i in range(len(tradeoffs)):
        lm1 = grid_search1.lambda_vecs_[i]
        lm2 = grid_search2.lambda_vecs_[i]
        assert lm1.equals(lm2)

    # Check the models are the same
    for i in range(len(tradeoffs)):
        coef1 = grid_search1.predictors_[i].coef_
        coef2 = grid_search2.predictors_[i].coef_
        assert np.array_equal(coef1, coef2)
Ejemplo n.º 2
0
def test_bgl_unmitigated_same(A_two_dim):
    a0_count = 4
    a1_count = 4

    a0_label = 2
    a1_label = 3

    a0_factor = 1
    a1_factor = 16

    X, y, A = _simple_regression_data(a0_count, a1_count, a0_factor, a1_factor,
                                      a0_label, a1_label, A_two_dim)

    estimator = LinearRegression()

    unmitigated_estimator = copy.deepcopy(estimator)
    unmitigated_estimator.fit(X, y)

    # Do the grid search with a zero Lagrange multiplier
    idx = pd.Int64Index(sorted([a0_label, a1_label]))
    lagrange_balanced_series = pd.Series([1.0, 1.0], index=idx)
    grid_df = pd.DataFrame(lagrange_balanced_series)

    target = GridSearch(estimator,
                        constraints=GroupLossMoment(ZeroOneLoss()),
                        grid=grid_df)
    target.fit(X, y, sensitive_features=A)

    raw_coef = unmitigated_estimator.coef_
    gs_coef = target.best_result.predictor.coef_
    # Can't quite get exact match, but this should be very close
    assert np.allclose(raw_coef, gs_coef, rtol=1e-10, atol=1e-7)
Ejemplo n.º 3
0
def test_call_oracle(Constraints, eps, estimator, mocker):
    X, y, A = _get_data(A_two_dim=False)
    # Using a real estimator here with a mocked `fit` method since we don't actually
    # want to fit one, but rather care about having that object's fit method called exactly once.
    estimator.fit = mocker.MagicMock(name="fit")
    if issubclass(Constraints, LossMoment):
        constraints = Constraints(ZeroOneLoss())
    else:
        constraints = Constraints()

    lagrangian = _Lagrangian(
        X=X,
        y=y,
        estimator=estimator,
        constraints=deepcopy(constraints),
        B=1 / eps,
        sensitive_features=A,
    )

    # Set up initial lambda vector based on a 0-initialized theta and use separate constraints
    # object for it to avoid the dependence on the lagrangian object.
    lambda_vec, new_weights, new_labels = get_lambda_new_weights_and_labels(
        constraints, X, y, A)

    _ = lagrangian._call_oracle(lambda_vec)

    # Ideally we'd prefer calling assert_called_once_with(args) but that is not compatible with
    # pandas data structures.
    assert len(estimator.fit.mock_calls) == 1
    _, args, kwargs = estimator.fit.mock_calls[0]
    assert (args[0] == X).all().all()
    assert (args[1] == new_labels).all()
    assert (kwargs["sample_weight"] == new_weights).all()
    assert lagrangian.n_oracle_calls == 1
    assert len(lagrangian.oracle_execution_times) == 1
Ejemplo n.º 4
0
def test_objective_constraints_compatibility(Constraints, Objective):
    X, y, A = _get_data(A_two_dim=False)
    estimator = LeastSquaresBinaryClassifierLearner()

    if issubclass(Constraints, LossMoment):
        constraints = Constraints(ZeroOneLoss())
    else:
        constraints = Constraints()

    if issubclass(Objective, LossMoment):
        objective = Objective(ZeroOneLoss())
    else:
        objective = Objective()

    if objective._moment_type() != constraints._moment_type():
        with pytest.raises(ValueError) as execInfo:
            _ = _Lagrangian(
                X=X,
                y=y,
                estimator=estimator,
                constraints=deepcopy(constraints),
                objective=objective,
                B=1.0,
                sensitive_features=A,
            )
        assert (_MESSAGE_BAD_OBJECTIVE.format(objective._moment_type(),
                                              constraints._moment_type())
                in execInfo.value.args[0])
    else:
        # No exception raised
        _ = _Lagrangian(
            X=X,
            y=y,
            estimator=estimator,
            constraints=deepcopy(constraints),
            objective=objective,
            B=1.0,
            sensitive_features=A,
        )
Ejemplo n.º 5
0
def test_call_oracle_single_y_value(Constraints, eps, y_value, mocker):
    X_dict = {
        "c": [0, 1, 4, 1, 5, 1, 6, 0, 2, 4],
        "d": [1, 5, 1, 6, 2, 3, 5, 1, 5, 2],
    }
    X = pd.DataFrame(X_dict)

    # Try with both possible y values for binary classification to ensure that
    # constraints that focus only on positives or negatives can handle the
    # case where none of the rows apply to them.
    y = pd.Series([y_value] * 10)
    A = pd.Series([0, 0, 0, 0, 0, 0, 0, 0, 0, 1])

    # We mock the estimator, but we only patch it for pickling
    estimator = mocker.MagicMock()
    if issubclass(Constraints, LossMoment):
        constraints = Constraints(ZeroOneLoss(), upper_bound=eps)
    else:
        constraints = Constraints(difference_bound=eps)

    lagrangian = _Lagrangian(
        X=X,
        y=y,
        estimator=estimator,
        constraints=deepcopy(constraints),
        B=1 / eps,
        sensitive_features=A,
    )

    # Set up initial lambda vector based on a 0-initialized theta and use separate constraints
    # object for it to avoid the dependence on the lagrangian object.
    lambda_vec = get_lambda_vec(constraints, X, y, A)

    test_X_dict = {"c": [10000], "d": [2000000]}
    test_X = pd.DataFrame(test_X_dict)

    result_estimator = lagrangian._call_oracle(lambda_vec)
    assert isinstance(result_estimator, DummyClassifier)
    assert result_estimator.predict(test_X) == y_value
    assert lagrangian.n_oracle_calls_dummy_returned == 1

    # Make sure the mocked estimator wasn't called
    assert len(estimator.method_calls) == 0
Ejemplo n.º 6
0
def test_call_oracle(Constraints, eps, mocker):
    X, y, A = _get_data(A_two_dim=False)
    # Using a mocked estimator here since we don't actually want to fit one, but rather care about
    # having that object's fit method called exactly once.
    estimator = mocker.MagicMock()
    if issubclass(Constraints, LossMoment):
        constraints = Constraints(ZeroOneLoss())
    else:
        constraints = Constraints()

    # ExponentiatedGradient pickles and unpickles the estimator, which isn't possible for the mock
    # object, so we mock that process as well. It sets the result from pickle.loads as the
    # estimator, so we can simply overwrite the return value to be our mocked estimator object.
    mocker.patch('pickle.dumps')
    pickle.loads = mocker.MagicMock(return_value=estimator)

    lagrangian = _Lagrangian(X, A, y, estimator, deepcopy(constraints),
                             1 / eps)

    # Set up initial lambda vector based on a 0-initialized theta and use separate constraints
    # object for it to avoid the dependence on the lagrangian object.
    lambda_vec, new_weights, new_labels = get_lambda_new_weights_and_labels(
        constraints, X, y, A)

    _ = lagrangian._call_oracle(lambda_vec)

    # Ideally we'd prefer calling assert_called_once_with(args) but that is not compatible with
    # pandas data structures.
    assert len(estimator.method_calls) == 1
    name, args, kwargs = estimator.method_calls[0]
    assert name == 'fit'
    assert len(args) == 2
    assert len(kwargs) == 1
    assert (args[0] == X).all().all()
    assert (args[1] == new_labels).all()
    assert (kwargs['sample_weight'] == new_weights).all()
    assert lagrangian.n_oracle_calls == 1
    assert len(lagrangian.oracle_execution_times) == 1
Ejemplo n.º 7
0
 def setup_method(self, method):
     self.estimator = LinearRegression()
     self.disparity_criterion = GroupLossMoment(ZeroOneLoss())
Ejemplo n.º 8
0
 def setup_method(self, method):
     self.estimator = LinearRegression()
     eps = 0.01
     self.disparity_criterion = BoundedGroupLoss(ZeroOneLoss(), upper_bound=eps)
     self.sample_weight_name = 'sample_weight'
Ejemplo n.º 9
0
def test_lagrangian_eval(eps, Constraints, use_Q_callable, opt_lambda):
    X, y, A = _get_data(A_two_dim=False)
    estimator = LeastSquaresBinaryClassifierLearner()

    if issubclass(Constraints, LossMoment):
        task_type = "regression"
        constraints = Constraints(ZeroOneLoss(), upper_bound=eps)
    else:
        task_type = "classification"
        constraints = Constraints(difference_bound=eps)

    # epsilon (and thereby also B) only affects L_high and L
    B = 1 / eps

    lagrangian = _Lagrangian(
        X=X,
        y=y,
        estimator=estimator,
        constraints=deepcopy(constraints),
        B=B,
        opt_lambda=opt_lambda,
        sensitive_features=A,
    )

    lambda_vec = get_lambda_vec(constraints, X, y, A)

    # call oracle to determine error and gamma and calculate exp
    fitted_estimator = lagrangian._call_oracle(lambda_vec)

    def h(X):
        return fitted_estimator.predict(X)

    best_h_error = lagrangian.obj.gamma(h)[0]
    best_h_gamma = lagrangian.constraints.gamma(h)

    # opt_lambda affects only the calculation of L
    if opt_lambda:
        projected_lambda = constraints.project_lambda(lambda_vec)
        L_expected = (best_h_error + np.sum(projected_lambda * best_h_gamma) -
                      eps * np.sum(projected_lambda))
    else:
        L_expected = (best_h_error + np.sum(lambda_vec * best_h_gamma) -
                      eps * np.sum(lambda_vec))

    L_high_expected = best_h_error + B * (best_h_gamma.max() - eps)

    # manually set errors and gammas which would otherwise be done in the best_h step
    lagrangian.errors = pd.Series([best_h_error])
    lagrangian.gammas = pd.Series([best_h_gamma])

    # call _eval to get the desired results L, L_high, gamma, error;
    # _eval is compatible with a callable h or a vector Q
    Q_vec = pd.Series([1.0])
    L, L_high, gamma, error = lagrangian._eval(h if use_Q_callable else Q_vec,
                                               lambda_vec)

    # in this particular example the estimator is always the same
    expected_estimator_weights = {
        "regression":
        pd.Series({
            "X1": 0.541252,
            "X2": 0.454293,
            "X3": 0.019203
        }),
        "classification":
        pd.Series({
            "X1": 0.538136,
            "X2": 0.457627,
            "X3": 0.021186
        }),
    }
    assert (np.isclose(
        fitted_estimator.weights,
        expected_estimator_weights[task_type],
        atol=1.0e-6,
    )).all()

    assert L == pytest.approx(L_expected, abs=_PRECISION)
    assert L_high == pytest.approx(L_high_expected, abs=_PRECISION)
    assert error == 0.25
    assert (gamma == best_h_gamma).all()
 def setup_method(self, method):
     self.estimator = LinearRegression()
     eps = 0.01
     self.disparity_criterion = GroupLossMoment(ZeroOneLoss(),
                                                upper_bound=eps)
class TestExponentiatedGradientSmoke:
    smoke_test_data = [
        {
            "constraint_class": DemographicParity,
            "eps": 0.100,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.100000,
            "error": 0.250000,
            "n_oracle_calls": 32,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 3
        },
        {
            "constraint_class": DemographicParity,
            "eps": 0.100,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": -0.020000,
            "error": 0.250000,
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 2,
            "ratio": 0.8
        },
        {
            "constraint_class": DemographicParity,
            "eps": 0.050,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.050000,
            "error": 0.266522,
            "n_oracle_calls": 23,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 6
        },
        {
            "constraint_class": DemographicParity,
            "eps": 0.050,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": -0.020000,
            "error": 0.25,
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 2,
            "ratio": 0.8
        },
        {
            "constraint_class": DemographicParity,
            "eps": 0.020,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.020000,
            "error": 0.332261,
            "n_oracle_calls": 22,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 5
        },
        # ================================================
        {
            "constraint_class": DemographicParity,
            "eps": 0.020,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": -0.020000,
            "error": 0.25,
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 2,
            "ratio": 0.8
        },
        {
            "constraint_class": DemographicParity,
            "eps": 0.010,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.010000,
            "error": 0.354174,
            "n_oracle_calls": 22,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 5
        },
        {
            "constraint_class": DemographicParity,
            "eps": 0.010,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": -0.020000,
            "error": 0.25,
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 2,
            "ratio": 0.8
        },
        {
            "constraint_class": DemographicParity,
            "eps": 0.005,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.005000,
            "error": 0.365130,
            "n_oracle_calls": 22,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 5
        },
        {
            "constraint_class": DemographicParity,
            "eps": 0.005,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": -0.020000,
            "error": 0.25,
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 2,
            "ratio": 0.8
        },
        # ================================================
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.100,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.100000,
            "error": 0.309333,
            "n_oracle_calls": 21,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 4
        },
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.100,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.100000,
            "error": 0.25,
            "n_oracle_calls": 22,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 4,
            "ratio": 0.8
        },
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.050,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.050000,
            "error": 0.378827,
            "n_oracle_calls": 19,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 6
        },
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.050,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.050000,
            "error": 0.277016,
            "n_oracle_calls": 22,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 4,
            "ratio": 0.8
        },
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.020,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.020000,
            "error": 0.421531,
            "n_oracle_calls": 19,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 6
        },
        # ================================================
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.020,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.020000,
            "error": 0.296612,
            "n_oracle_calls": 22,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 4,
            "ratio": 0.8
        },
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.010,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.010000,
            "error": 0.435765,
            "n_oracle_calls": 19,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 6
        },
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.010,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.010000,
            "error": 0.303145,
            "n_oracle_calls": 22,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 4,
            "ratio": 0.8
        },
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.005,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.005000,
            "error": 0.442883,
            "n_oracle_calls": 19,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 6
        },
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.005,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.005000,
            "error": 0.306411,
            "n_oracle_calls": 22,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 4,
            "ratio": 0.8
        },
        # ================================================
        {
            "constraint_class": ErrorRateParity,
            "eps": 0.1,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.100000,
            "error": 0.25625,
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 3
        },
        {
            "constraint_class": ErrorRateParity,
            "eps": 0.1,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.092857,
            "error": 0.25,
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 3,
            "ratio": 0.8
        },
        {
            "constraint_class": ErrorRateParity,
            "eps": 0.05,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.049999,
            "error": 0.3,
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 3
        },
        {
            "constraint_class": ErrorRateParity,
            "eps": 0.05,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.050000,
            "error": 0.253472,
            "n_oracle_calls": 26,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 6,
            "ratio": 0.8
        },
        {
            "constraint_class": ErrorRateParity,
            "eps": 0.02,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.019999,
            "error": 0.326250,
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 3
        },
        # ================================================
        {
            "constraint_class": ErrorRateParity,
            "eps": 0.02,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.020000,
            "error": 0.268055,
            "n_oracle_calls": 26,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 5,
            "ratio": 0.8
        },
        {
            "constraint_class": ErrorRateParity,
            "eps": 0.01,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.010000,
            "error": 0.325555,
            "n_oracle_calls": 18,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 4
        },
        {
            "constraint_class": ErrorRateParity,
            "eps": 0.01,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.010000,
            "error": 0.272916,
            "n_oracle_calls": 26,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 5,
            "ratio": 0.8
        },
        {
            "constraint_class": ErrorRateParity,
            "eps": 0.005,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.005000,
            "error": 0.329444,
            "n_oracle_calls": 19,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 5
        },
        {
            "constraint_class": ErrorRateParity,
            "eps": 0.005,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.005000,
            "error": 0.275347,
            "n_oracle_calls": 26,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 5,
            "ratio": 0.8
        },
        # ================================================
        {
            "constraint_class": TruePositiveRateParity,
            "eps": 0.005,
            "best_gap": 0.0,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.005000,
            "error": 0.25,
            "n_oracle_calls": 16,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 2
        },
        {
            "constraint_class": FalsePositiveRateParity,
            "eps": 0.005,
            "best_gap": 0.0,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.005000,
            "error": 0.427133,
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 3
        }
    ]
    smoke_test_data_flipped = [{
        "constraint_class": TruePositiveRateParity,
        "eps": 0.005,
        "best_gap": 0.0,
        "last_iter": 5,
        "best_iter": 5,
        "disp": 0.005000,
        "error": 0.427133,
        "n_oracle_calls": 17,
        "n_oracle_calls_dummy_returned": 0,
        "n_predictors": 3
    }, {
        "constraint_class": FalsePositiveRateParity,
        "eps": 0.005,
        "best_gap": 0.0,
        "last_iter": 5,
        "best_iter": 5,
        "disp": 0.005000,
        "error": 0.25,
        "n_oracle_calls": 16,
        "n_oracle_calls_dummy_returned": 0,
        "n_predictors": 2
    }, {
        "constraint_class": EqualizedOdds,
        "eps": 0.005,
        "best_gap": 0.000000,
        "last_iter": 5,
        "best_iter": 5,
        "disp": 0.005000,
        "error": 0.442883,
        "n_oracle_calls": 19,
        "n_oracle_calls_dummy_returned": 0,
        "n_predictors": 6
    }]

    smoke_test_data_regression = [
        {
            "constraint_class":
            BoundedGroupLoss,
            "loss":
            SquareLoss(0, 1),
            "eps":
            0.01,
            "best_gap":
            0.003905,
            "last_iter":
            6,
            "best_iter":
            6,
            "upper_bound":
            0.01,  # infeasible
            "disp": [
                0.178333, 0.178333, 0.178333, 0.178333, 0.178333, 0.178333,
                0.028045, 0.178333, 0.178333, 0.178333, 0.030853, 0.178333,
                0.178333, 0.178333, 0.178333, 0.178333
            ],
            "error": [
                0.1035, 0.1035, 0.1035, 0.1035, 0.1035, 0.1035, 0.024412,
                0.1035, 0.1035, 0.1035, 0.025691, 0.1035, 0.1035, 0.1035,
                0.1035, 0.1035
            ],
            "weights":
            [0, 0, 0, 0, 0, 0, 0.956748, 0, 0, 0, 0.043251, 0, 0, 0, 0, 0, 0],
            "n_oracle_calls":
            23,
            "n_oracle_calls_dummy_returned":
            0,
            "n_predictors":
            16
        },
        {
            "constraint_class":
            BoundedGroupLoss,
            "loss":
            SquareLoss(0, 1),
            "eps":
            0.01,
            "best_gap":
            0.0,
            "last_iter":
            5,
            "best_iter":
            5,
            "upper_bound":
            0.05,  # feasible
            "disp": [
                0.178333, 0.178333, 0.036690, 0.178333, 0.178333, 0.178333,
                0.178333
            ],
            "error":
            [0.1035, 0.1035, 0.021988, 0.1035, 0.1035, 0.1035, 0.1035],
            "weights": [0, 0, 1, 0, 0, 0, 0],
            "n_oracle_calls":
            32,
            "n_oracle_calls_dummy_returned":
            0,
            "n_predictors":
            7
        },
        {
            "constraint_class":
            BoundedGroupLoss,
            "loss":
            SquareLoss(0, 1),
            "eps":
            0.01,
            "best_gap":
            0.0,
            "last_iter":
            5,
            "best_iter":
            5,
            "max_iter":
            20,
            "nu":
            1e-6,
            "upper_bound":
            0.05,  # feasible
            "disp": [
                0.178333, 0.178333, 0.036690, 0.178333, 0.178333, 0.178333,
                0.178333
            ],
            "error":
            [0.1035, 0.1035, 0.021988, 0.1035, 0.1035, 0.1035, 0.1035],
            "weights": [0, 0, 1, 0, 0, 0, 0],
            "n_oracle_calls":
            29,
            "n_oracle_calls_dummy_returned":
            0,
            "n_predictors":
            7
        },
        {
            "constraint_class":
            BoundedGroupLoss,
            "loss":
            ZeroOneLoss(),
            "eps":
            0.01,
            "best_gap":
            0.007185,
            "last_iter":
            5,
            "best_iter":
            5,
            "upper_bound":
            0.01,  # infeasible
            "disp": [
                0.383333, 0.383333, 0.383333, 0.383333, 0.1479, 0.383333,
                0.383333, 0.383333, 0.140256, 0.383333, 0.383333, 0.383333,
                0.383333, 0.383333
            ],
            "error": [
                0.255, 0.255, 0.255, 0.255, 0.140198, 0.255, 0.255, 0.255,
                0.135674, 0.255, 0.255, 0.255, 0.255, 0.255
            ],
            "weights":
            [0, 0, 0, 0, 0.221468, 0, 0, 0, 0.778531, 0, 0, 0, 0, 0],
            "n_oracle_calls":
            20,
            "n_oracle_calls_dummy_returned":
            0,
            "n_predictors":
            14
        },
        {
            "constraint_class": BoundedGroupLoss,
            "loss": ZeroOneLoss(),
            "eps": 0.01,
            "best_gap": 0.0,
            "last_iter": 5,
            "best_iter": 5,
            "upper_bound": 0.2,  # feasible
            "disp": [0.383333, 0.383333, 0.166918],
            "error": [0.255, 0.255, 0.116949],
            "weights": [0, 0, 1],
            "n_oracle_calls": 20,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 3
        },
        {
            "constraint_class": BoundedGroupLoss,
            "loss": ZeroOneLoss(),
            "eps": 0.01,
            "best_gap": 0.0,
            "last_iter": 5,
            "best_iter": 5,
            "max_iter": 20,
            "nu": 1e-6,
            "upper_bound": 0.2,  # feasible
            "disp": [0.383333, 0.383333, 0.166918],
            "error": [0.255, 0.255, 0.116949],
            "weights": [0, 0, 1],
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 3
        },
        {
            "constraint_class":
            BoundedGroupLoss,
            "loss":
            AbsoluteLoss(0, 1),
            "eps":
            0.01,
            "best_gap":
            0.007185,
            "last_iter":
            5,
            "best_iter":
            5,
            "upper_bound":
            0.01,  # infeasible
            "disp": [
                0.383333, 0.383333, 0.383333, 0.383333, 0.1479, 0.383333,
                0.383333, 0.383333, 0.140256, 0.383333, 0.383333, 0.383333,
                0.383333, 0.383333
            ],
            "error": [
                0.255, 0.255, 0.255, 0.255, 0.140198, 0.255, 0.255, 0.255,
                0.135674, 0.255, 0.255, 0.255, 0.255, 0.255
            ],
            "weights":
            [0, 0, 0, 0, 0.221468, 0, 0, 0, 0.778531, 0, 0, 0, 0, 0],
            "n_oracle_calls":
            20,
            "n_oracle_calls_dummy_returned":
            0,
            "n_predictors":
            14
        },
    ]

    def run_smoke_test_binary_classification(self, data, flipped=False):
        learner = LeastSquaresBinaryClassifierLearner()
        if "ratio" in data.keys():
            disparity_moment = data["constraint_class"](
                ratio_bound_slack=data["eps"], ratio_bound=data["ratio"])
        else:
            disparity_moment = data["constraint_class"](
                difference_bound=data["eps"])

        # Create Exponentiated Gradient object with a copy of the constraint.
        # The original disparity_moment object is used for validation, so the
        # assumption is that the moment logic is correct in these tests.
        expgrad = ExponentiatedGradient(learner,
                                        constraints=deepcopy(disparity_moment),
                                        eps=data["eps"])

        X, y, A = _get_data(A_two_dim=False, flip_y=flipped)

        expgrad.fit(X, y, sensitive_features=A)

        self._assert_expgrad_state(expgrad, data)

        # select probability of predicting 1
        def Q(X):
            return expgrad._pmf_predict(X)[:, 1]

        default_objective = ErrorRate()
        disparity_moment.load_data(X, y, sensitive_features=A)
        default_objective.load_data(X, y, sensitive_features=A)
        disparity = disparity_moment.gamma(Q).max()
        error = default_objective.gamma(Q)[0]
        assert disparity == pytest.approx(data["disp"], abs=_PRECISION)
        assert error == pytest.approx(data["error"], abs=_PRECISION)

    @pytest.mark.parametrize("testdata", smoke_test_data)
    def test_smoke(self, testdata):
        self.run_smoke_test_binary_classification(testdata)

    @pytest.mark.parametrize("testdata", smoke_test_data_flipped)
    def test_smoke_flipped(self, testdata):
        self.run_smoke_test_binary_classification(testdata, flipped=True)

    @pytest.mark.parametrize("data", smoke_test_data_regression)
    def test_smoke_regression(self, data):
        learner = LeastSquaresRegressor()
        disparity_moment = data["constraint_class"](
            loss=data["loss"], upper_bound=data["upper_bound"])

        # Create Exponentiated Gradient object with a copy of the constraint.
        # The original disparity_moment object is used for validation, so the
        # assumption is that the moment logic is correct in these tests.
        expgrad = ExponentiatedGradient(learner,
                                        constraints=deepcopy(disparity_moment),
                                        eps=data["eps"],
                                        nu=data.get('nu'),
                                        max_iter=data.get("max_iter", 50))

        X, y, A = _get_data(A_two_dim=False, y_as_scores=True)

        expgrad.fit(X, y, sensitive_features=A)

        self._assert_expgrad_state(expgrad, data)

        # check all predictors
        disparity_moment.load_data(X, y, sensitive_features=A)
        for i in range(len(expgrad.predictors_)):

            def Q(X):
                return expgrad._pmf_predict(X)[i]

            default_objective = MeanLoss(data["loss"])
            default_objective.load_data(X, y, sensitive_features=A)
            disparity = disparity_moment.gamma(Q).max()
            error = default_objective.gamma(Q)[0]
            assert disparity == pytest.approx(data["disp"][i], abs=_PRECISION)
            assert error == pytest.approx(data["error"][i], abs=_PRECISION)
            assert expgrad.weights_[i] == pytest.approx(data['weights'][i],
                                                        abs=_PRECISION)

        assert sum(expgrad.weights_) == pytest.approx(1, abs=_PRECISION)

    @pytest.mark.parametrize("Constraints", [
        TruePositiveRateParity, FalsePositiveRateParity, DemographicParity,
        EqualizedOdds, ErrorRateParity
    ])
    def test_simple_fit_predict_binary_classification(self, Constraints):
        X, y, sensitive_features = _get_data()
        estimator = LeastSquaresBinaryClassifierLearner()
        expgrad = ExponentiatedGradient(estimator, Constraints())
        expgrad.fit(X, y, sensitive_features=sensitive_features)
        expgrad.predict(X)

    @pytest.mark.parametrize("constraints", [
        BoundedGroupLoss(loss=SquareLoss(0, 1), upper_bound=0.01),
        BoundedGroupLoss(loss=AbsoluteLoss(0, 1), upper_bound=0.01),
        BoundedGroupLoss(loss=ZeroOneLoss(), upper_bound=0.01)
    ])
    def test_simple_fit_predict_regression(self, constraints):
        X, y, sensitive_features = _get_data(y_as_scores=True)
        estimator = LeastSquaresRegressor()
        expgrad = ExponentiatedGradient(estimator, constraints)
        expgrad.fit(X, y, sensitive_features=sensitive_features)
        expgrad.predict(X)

    def test_single_y_value(self):
        # Setup with data designed to result in "all single class"
        # at some point in the grid
        X_dict = {"c": [10, 50, 10]}
        X = pd.DataFrame(X_dict)

        y = [1, 1, 1]
        A = ['a', 'b', 'b']

        estimator = LogisticRegression(solver='liblinear',
                                       fit_intercept=True,
                                       random_state=97)
        expgrad = ExponentiatedGradient(estimator, DemographicParity())

        # Following line should not throw an exception
        expgrad.fit(X, y, sensitive_features=A)

        # Check the predictors for a ConstantPredictor
        test_X_dict = {"c": [1, 2, 3, 4, 5, 6]}
        test_X = pd.DataFrame(test_X_dict)
        assert expgrad.n_oracle_calls_dummy_returned_ > 0
        assert len(expgrad.oracle_execution_times_) == expgrad.n_oracle_calls_
        for p in expgrad.predictors_:
            assert isinstance(p, DummyClassifier)
            assert np.array_equal(p.predict(test_X), [1, 1, 1, 1, 1, 1])

    def _assert_expgrad_state(self, expgrad, data):
        n_predictors = len(expgrad.predictors_)
        assert expgrad.best_gap_ == pytest.approx(data["best_gap"],
                                                  abs=_PRECISION)
        assert expgrad.best_gap_ < expgrad.nu
        assert expgrad.last_iter_ == data["last_iter"]
        assert expgrad.best_iter_ == data["best_iter"]
        assert expgrad.last_iter_ >= _MIN_ITER
        assert expgrad.n_oracle_calls_ == data["n_oracle_calls"]
        assert expgrad.n_oracle_calls_dummy_returned_ == data[
            "n_oracle_calls_dummy_returned"]
        assert n_predictors == data["n_predictors"]
        assert len(expgrad.oracle_execution_times_) == expgrad.n_oracle_calls_
Ejemplo n.º 12
0
class TestExponentiatedGradientSmoke:
    smoke_test_data = [
        {
            "constraint_class": DemographicParity,
            "eps": 0.100,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.100000,
            "error": 0.250000,
            "n_oracle_calls": 32,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 3,
        },
        {
            "constraint_class": DemographicParity,
            "eps": 0.100,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": -0.020000,
            "error": 0.250000,
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 2,
            "ratio": 0.8,
        },
        {
            "constraint_class": DemographicParity,
            "eps": 0.050,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.050000,
            "error": 0.266522,
            "n_oracle_calls": 23,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 6,
        },
        {
            "constraint_class": DemographicParity,
            "eps": 0.050,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": -0.020000,
            "error": 0.25,
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 2,
            "ratio": 0.8,
        },
        {
            "constraint_class": DemographicParity,
            "eps": 0.020,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.020000,
            "error": 0.332261,
            "n_oracle_calls": 22,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 5,
        },
        {
            "constraint_class": DemographicParity,
            "eps": 0.020,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": -0.020000,
            "error": 0.25,
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 2,
            "ratio": 0.8,
        },
        {
            "constraint_class": DemographicParity,
            "eps": 0.010,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.010000,
            "error": 0.354174,
            "n_oracle_calls": 22,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 5,
        },
        {
            "constraint_class": DemographicParity,
            "eps": 0.010,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": -0.020000,
            "error": 0.25,
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 2,
            "ratio": 0.8,
        },
        {
            "constraint_class": DemographicParity,
            "eps": 0.005,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.005000,
            "error": 0.365130,
            "n_oracle_calls": 22,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 5,
        },
        {
            "constraint_class": DemographicParity,
            "eps": 0.005,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": -0.020000,
            "error": 0.25,
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 2,
            "ratio": 0.8,
        },
        # ================================================
        {
            "constraint_class": DemographicParity,
            "eps": 0.050,
            "objective": ErrorRate(costs={"fn": 2.0, "fp": 1.0}),
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.050000,
            "error": 0.407142,
            "n_oracle_calls": 18,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 4,
        },
        {
            "constraint_class": DemographicParity,
            "eps": 0.050,
            "objective": ErrorRate(costs={"fn": 2.0, "fp": 1.0}),
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.050000,
            "error": 0.263830,
            "n_oracle_calls": 21,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 3,
            "ratio": 0.8,
        },
        {
            "constraint_class": DemographicParity,
            "eps": 0.020,
            "objective": ErrorRate(costs={"fn": 2.0, "fp": 1.0}),
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.020000,
            "error": 0.422,
            "n_oracle_calls": 19,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 5,
        },
        {
            "constraint_class": DemographicParity,
            "eps": 0.020,
            "objective": ErrorRate(costs={"fn": 2.0, "fp": 1.0}),
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.020000,
            "error": 0.286170,
            "n_oracle_calls": 21,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 3,
            "ratio": 0.8,
        },
        # ================================================
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.100,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.100000,
            "error": 0.309333,
            "n_oracle_calls": 21,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 4,
        },
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.100,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.100000,
            "error": 0.25,
            "n_oracle_calls": 22,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 4,
            "ratio": 0.8,
        },
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.050,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.050000,
            "error": 0.378827,
            "n_oracle_calls": 19,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 6,
        },
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.050,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.050000,
            "error": 0.277016,
            "n_oracle_calls": 22,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 4,
            "ratio": 0.8,
        },
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.020,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.020000,
            "error": 0.421531,
            "n_oracle_calls": 19,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 6,
        },
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.020,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.020000,
            "error": 0.296612,
            "n_oracle_calls": 22,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 4,
            "ratio": 0.8,
        },
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.010,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.010000,
            "error": 0.435765,
            "n_oracle_calls": 19,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 6,
        },
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.010,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.010000,
            "error": 0.303145,
            "n_oracle_calls": 22,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 4,
            "ratio": 0.8,
        },
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.005,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.005000,
            "error": 0.442883,
            "n_oracle_calls": 19,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 6,
        },
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.005,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.005000,
            "error": 0.306411,
            "n_oracle_calls": 22,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 4,
            "ratio": 0.8,
        },
        # ================================================
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.050,
            "objective": ErrorRate(costs={"fn": 2.0, "fp": 1.0}),
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.050000,
            "error": 0.4125,
            "n_oracle_calls": 23,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 6,
        },
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.050,
            "objective": ErrorRate(costs={"fn": 2.0, "fp": 1.0}),
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.050000,
            "error": 0.324067,
            "n_oracle_calls": 22,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 4,
            "ratio": 0.8,
        },
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.020,
            "objective": ErrorRate(costs={"fn": 2.0, "fp": 1.0}),
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.020000,
            "error": 0.435,
            "n_oracle_calls": 23,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 6,
        },
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.020,
            "objective": ErrorRate(costs={"fn": 2.0, "fp": 1.0}),
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.020000,
            "error": 0.339179,
            "n_oracle_calls": 22,
            "n_oracle_calls_dummy_returned": 12,
            "n_predictors": 4,
            "ratio": 0.8,
        },
        # ================================================
        {
            "constraint_class": ErrorRateParity,
            "eps": 0.1,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.100000,
            "error": 0.25625,
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 3,
        },
        {
            "constraint_class": ErrorRateParity,
            "eps": 0.1,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.092857,
            "error": 0.25,
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 3,
            "ratio": 0.8,
        },
        {
            "constraint_class": ErrorRateParity,
            "eps": 0.05,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.049999,
            "error": 0.3,
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 3,
        },
        {
            "constraint_class": ErrorRateParity,
            "eps": 0.05,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.050000,
            "error": 0.253472,
            "n_oracle_calls": 26,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 6,
            "ratio": 0.8,
        },
        {
            "constraint_class": ErrorRateParity,
            "eps": 0.02,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.019999,
            "error": 0.326250,
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 3,
        },
        {
            "constraint_class": ErrorRateParity,
            "eps": 0.02,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.020000,
            "error": 0.268055,
            "n_oracle_calls": 26,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 5,
            "ratio": 0.8,
        },
        {
            "constraint_class": ErrorRateParity,
            "eps": 0.01,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.010000,
            "error": 0.325555,
            "n_oracle_calls": 18,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 4,
        },
        {
            "constraint_class": ErrorRateParity,
            "eps": 0.01,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.010000,
            "error": 0.272916,
            "n_oracle_calls": 26,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 5,
            "ratio": 0.8,
        },
        {
            "constraint_class": ErrorRateParity,
            "eps": 0.005,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.005000,
            "error": 0.329444,
            "n_oracle_calls": 19,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 5,
        },
        {
            "constraint_class": ErrorRateParity,
            "eps": 0.005,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.005000,
            "error": 0.275347,
            "n_oracle_calls": 26,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 5,
            "ratio": 0.8,
        },
        # ================================================
        {
            "constraint_class": TruePositiveRateParity,
            "eps": 0.005,
            "best_gap": 0.0,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.005000,
            "error": 0.25,
            "n_oracle_calls": 16,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 2,
        },
        {
            "constraint_class": FalsePositiveRateParity,
            "eps": 0.005,
            "best_gap": 0.0,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.005000,
            "error": 0.427133,
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 3,
        },
    ]
    smoke_test_data_flipped = [
        {
            "constraint_class": TruePositiveRateParity,
            "eps": 0.005,
            "best_gap": 0.0,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.005000,
            "error": 0.427133,
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 3,
        },
        {
            "constraint_class": FalsePositiveRateParity,
            "eps": 0.005,
            "best_gap": 0.0,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.005000,
            "error": 0.25,
            "n_oracle_calls": 16,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 2,
        },
        {
            "constraint_class": EqualizedOdds,
            "eps": 0.005,
            "best_gap": 0.000000,
            "last_iter": 5,
            "best_iter": 5,
            "disp": 0.005000,
            "error": 0.442883,
            "n_oracle_calls": 19,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 6,
        },
    ]

    smoke_test_data_regression = [
        {
            "constraint_class": BoundedGroupLoss,
            "loss": SquareLoss(0, 1),
            "eps": 0.01,
            "best_gap": 0.003905,
            "last_iter": 6,
            "best_iter": 6,
            "upper_bound": 0.01,  # infeasible
            "disp": [
                0.178333,
                0.178333,
                0.178333,
                0.178333,
                0.178333,
                0.178333,
                0.028045,
                0.178333,
                0.178333,
                0.178333,
                0.030853,
                0.178333,
                0.178333,
                0.178333,
                0.178333,
                0.178333,
            ],
            "error": [
                0.1035,
                0.1035,
                0.1035,
                0.1035,
                0.1035,
                0.1035,
                0.024412,
                0.1035,
                0.1035,
                0.1035,
                0.025691,
                0.1035,
                0.1035,
                0.1035,
                0.1035,
                0.1035,
            ],
            "weights": [
                0,
                0,
                0,
                0,
                0,
                0,
                0.956748,
                0,
                0,
                0,
                0.043251,
                0,
                0,
                0,
                0,
                0,
                0,
            ],
            "n_oracle_calls": 23,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 16,
        },
        {
            "constraint_class": BoundedGroupLoss,
            "loss": SquareLoss(0, 1),
            "eps": 0.01,
            "best_gap": 0.0,
            "last_iter": 5,
            "best_iter": 5,
            "upper_bound": 0.05,  # feasible
            "disp": [
                0.178333,
                0.178333,
                0.036690,
                0.178333,
                0.178333,
                0.178333,
                0.178333,
            ],
            "error": [0.1035, 0.1035, 0.021988, 0.1035, 0.1035, 0.1035, 0.1035],
            "weights": [0, 0, 1, 0, 0, 0, 0],
            "n_oracle_calls": 32,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 7,
        },
        {
            "constraint_class": BoundedGroupLoss,
            "loss": SquareLoss(0, 1),
            "eps": 0.01,
            "best_gap": 0.0,
            "last_iter": 5,
            "best_iter": 5,
            "max_iter": 20,
            "nu": 1e-6,
            "upper_bound": 0.05,  # feasible
            "disp": [
                0.178333,
                0.178333,
                0.036690,
                0.178333,
                0.178333,
                0.178333,
                0.178333,
            ],
            "error": [0.1035, 0.1035, 0.021988, 0.1035, 0.1035, 0.1035, 0.1035],
            "weights": [0, 0, 1, 0, 0, 0, 0],
            "n_oracle_calls": 29,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 7,
        },
        {
            "constraint_class": BoundedGroupLoss,
            "loss": ZeroOneLoss(),
            "eps": 0.01,
            "best_gap": 0.007185,
            "last_iter": 5,
            "best_iter": 5,
            "upper_bound": 0.01,  # infeasible
            "disp": [
                0.383333,
                0.383333,
                0.383333,
                0.383333,
                0.1479,
                0.383333,
                0.383333,
                0.383333,
                0.140256,
                0.383333,
                0.383333,
                0.383333,
                0.383333,
                0.383333,
            ],
            "error": [
                0.255,
                0.255,
                0.255,
                0.255,
                0.140198,
                0.255,
                0.255,
                0.255,
                0.135674,
                0.255,
                0.255,
                0.255,
                0.255,
                0.255,
            ],
            "weights": [0, 0, 0, 0, 0.221468, 0, 0, 0, 0.778531, 0, 0, 0, 0, 0],
            "n_oracle_calls": 20,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 14,
        },
        {
            "constraint_class": BoundedGroupLoss,
            "loss": ZeroOneLoss(),
            "eps": 0.01,
            "best_gap": 0.0,
            "last_iter": 5,
            "best_iter": 5,
            "upper_bound": 0.2,  # feasible
            "disp": [0.383333, 0.383333, 0.166918],
            "error": [0.255, 0.255, 0.116949],
            "weights": [0, 0, 1],
            "n_oracle_calls": 20,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 3,
        },
        {
            "constraint_class": BoundedGroupLoss,
            "loss": ZeroOneLoss(),
            "eps": 0.01,
            "best_gap": 0.0,
            "last_iter": 5,
            "best_iter": 5,
            "max_iter": 20,
            "nu": 1e-6,
            "upper_bound": 0.2,  # feasible
            "disp": [0.383333, 0.383333, 0.166918],
            "error": [0.255, 0.255, 0.116949],
            "weights": [0, 0, 1],
            "n_oracle_calls": 17,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 3,
        },
        {
            "constraint_class": BoundedGroupLoss,
            "loss": AbsoluteLoss(0, 1),
            "eps": 0.01,
            "best_gap": 0.007185,
            "last_iter": 5,
            "best_iter": 5,
            "upper_bound": 0.01,  # infeasible
            "disp": [
                0.383333,
                0.383333,
                0.383333,
                0.383333,
                0.1479,
                0.383333,
                0.383333,
                0.383333,
                0.140256,
                0.383333,
                0.383333,
                0.383333,
                0.383333,
                0.383333,
            ],
            "error": [
                0.255,
                0.255,
                0.255,
                0.255,
                0.140198,
                0.255,
                0.255,
                0.255,
                0.135674,
                0.255,
                0.255,
                0.255,
                0.255,
                0.255,
            ],
            "weights": [0, 0, 0, 0, 0.221468, 0, 0, 0, 0.778531, 0, 0, 0, 0, 0],
            "n_oracle_calls": 20,
            "n_oracle_calls_dummy_returned": 0,
            "n_predictors": 14,
        },
    ]

    def run_smoke_test_binary_classification(self, data, flipped=False):
        learner = LeastSquaresBinaryClassifierLearner()
        if "ratio" in data.keys():
            disparity_moment = data["constraint_class"](
                ratio_bound_slack=data["eps"], ratio_bound=data["ratio"]
            )
        else:
            disparity_moment = data["constraint_class"](difference_bound=data["eps"])
        if "objective" in data.keys():
            objective_moment = deepcopy(data["objective"])
        else:
            objective_moment = ErrorRate()

        # Create Exponentiated Gradient object with a copy of the constraint.
        # The original disparity_moment object is used for validation, so the
        # assumption is that the moment logic is correct in these tests.
        expgrad = ExponentiatedGradient(
            learner,
            constraints=deepcopy(disparity_moment),
            objective=deepcopy(objective_moment),
            eps=data["eps"],
        )

        X, y, A = _get_data(A_two_dim=False, flip_y=flipped)

        expgrad.fit(X, y, sensitive_features=A)

        self._assert_expgrad_state(expgrad, data)

        # select probability of predicting 1
        def Q(X):
            return expgrad._pmf_predict(X)[:, 1]

        disparity_moment.load_data(X, y, sensitive_features=A)
        objective_moment.load_data(X, y, sensitive_features=A)
        disparity = disparity_moment.gamma(Q).max()
        error = objective_moment.gamma(Q)[0]
        assert disparity == pytest.approx(data["disp"], abs=_PRECISION)
        assert error == pytest.approx(data["error"], abs=_PRECISION)

    @pytest.mark.parametrize("testdata", smoke_test_data)
    def test_smoke(self, testdata):
        self.run_smoke_test_binary_classification(testdata)

    @pytest.mark.parametrize("testdata", smoke_test_data_flipped)
    def test_smoke_flipped(self, testdata):
        self.run_smoke_test_binary_classification(testdata, flipped=True)

    @pytest.mark.parametrize("data", smoke_test_data_regression)
    def test_smoke_regression(self, data):
        learner = LeastSquaresRegressor()
        disparity_moment = data["constraint_class"](
            loss=data["loss"], upper_bound=data["upper_bound"]
        )

        # Create Exponentiated Gradient object with a copy of the constraint.
        # The original disparity_moment object is used for validation, so the
        # assumption is that the moment logic is correct in these tests.
        expgrad = ExponentiatedGradient(
            learner,
            constraints=deepcopy(disparity_moment),
            eps=data["eps"],
            nu=data.get("nu"),
            max_iter=data.get("max_iter", 50),
        )

        X, y, A = _get_data(A_two_dim=False, y_as_scores=True)

        expgrad.fit(X, y, sensitive_features=A)

        self._assert_expgrad_state(expgrad, data)

        # check all predictors
        disparity_moment.load_data(X, y, sensitive_features=A)
        for i in range(len(expgrad.predictors_)):

            def Q(X):
                return expgrad._pmf_predict(X)[i]

            default_objective = MeanLoss(data["loss"])
            default_objective.load_data(X, y, sensitive_features=A)
            disparity = disparity_moment.gamma(Q).max()
            error = default_objective.gamma(Q)[0]
            assert disparity == pytest.approx(data["disp"][i], abs=_PRECISION)
            assert error == pytest.approx(data["error"][i], abs=_PRECISION)
            assert expgrad.weights_[i] == pytest.approx(
                data["weights"][i], abs=_PRECISION
            )

        assert sum(expgrad.weights_) == pytest.approx(1, abs=_PRECISION)

    @pytest.mark.parametrize(
        "Constraints",
        [
            TruePositiveRateParity,
            FalsePositiveRateParity,
            DemographicParity,
            EqualizedOdds,
            ErrorRateParity,
        ],
    )
    def test_simple_fit_predict_binary_classification(self, Constraints):
        X, y, sensitive_features = _get_data()
        estimator = LeastSquaresBinaryClassifierLearner()
        expgrad = ExponentiatedGradient(estimator, Constraints())
        expgrad.fit(X, y, sensitive_features=sensitive_features)
        expgrad.predict(X)

    @pytest.mark.parametrize(
        "constraints",
        [
            BoundedGroupLoss(loss=SquareLoss(0, 1), upper_bound=0.01),
            BoundedGroupLoss(loss=AbsoluteLoss(0, 1), upper_bound=0.01),
            BoundedGroupLoss(loss=ZeroOneLoss(), upper_bound=0.01),
        ],
    )
    def test_simple_fit_predict_regression(self, constraints):
        X, y, sensitive_features = _get_data(y_as_scores=True)
        estimator = LeastSquaresRegressor()
        expgrad = ExponentiatedGradient(estimator, constraints)
        expgrad.fit(X, y, sensitive_features=sensitive_features)
        expgrad.predict(X)

    def test_single_y_value(self):
        # Setup with data designed to result in "all single class"
        # at some point in the grid
        X_dict = {"c": [10, 50, 10]}
        X = pd.DataFrame(X_dict)

        y = [1, 1, 1]
        A = ["a", "b", "b"]

        estimator = LogisticRegression(
            solver="liblinear", fit_intercept=True, random_state=97
        )
        expgrad = ExponentiatedGradient(estimator, DemographicParity())

        # Following line should not throw an exception
        expgrad.fit(X, y, sensitive_features=A)

        # Check the predictors for a ConstantPredictor
        test_X_dict = {"c": [1, 2, 3, 4, 5, 6]}
        test_X = pd.DataFrame(test_X_dict)
        assert expgrad.n_oracle_calls_dummy_returned_ > 0
        assert len(expgrad.oracle_execution_times_) == expgrad.n_oracle_calls_
        for p in expgrad.predictors_:
            assert isinstance(p, DummyClassifier)
            assert np.array_equal(p.predict(test_X), [1, 1, 1, 1, 1, 1])

    def _assert_expgrad_state(self, expgrad, data):
        n_predictors = len(expgrad.predictors_)
        assert expgrad.best_gap_ == pytest.approx(data["best_gap"], abs=_PRECISION)
        assert expgrad.best_gap_ < expgrad.nu
        assert expgrad.last_iter_ == data["last_iter"]
        assert expgrad.best_iter_ == data["best_iter"]
        assert expgrad.last_iter_ >= _MIN_ITER
        assert expgrad.n_oracle_calls_ == data["n_oracle_calls"]
        assert (
            expgrad.n_oracle_calls_dummy_returned_
            == data["n_oracle_calls_dummy_returned"]
        )
        assert n_predictors == data["n_predictors"]
        assert len(expgrad.oracle_execution_times_) == expgrad.n_oracle_calls_

    @pytest.mark.parametrize("eps", [0.05, 0.02])
    @pytest.mark.parametrize("ratio", [None, 0.8])
    @pytest.mark.parametrize("pos_copies", [0, 1, 2])
    def test_error_rate_consistency(self, eps, ratio, pos_copies):
        learner = LeastSquaresBinaryClassifierLearner()
        if ratio is None:
            constraints_moment = EqualizedOdds(difference_bound=eps)
        else:
            constraints_moment = EqualizedOdds(ratio_bound=ratio, ratio_bound_slack=eps)

        results = {}
        for method in ["costs", "sampling"]:
            X, y, A = _get_data()

            if method == "sampling":
                select = y == 1
                X = pd.concat((X,) + (X.loc[select, :],) * pos_copies).values
                y = pd.concat((y,) + (y[select],) * pos_copies).values
                A = pd.concat((A,) + (A[select],) * pos_copies).values
                objective_moment = ErrorRate()
            else:
                objective_moment = ErrorRate(costs={"fn": 1.0 + pos_copies, "fp": 1.0})

            expgrad = ExponentiatedGradient(
                learner,
                constraints=deepcopy(constraints_moment),
                objective=deepcopy(objective_moment),
                eps=eps,
                nu=1e-3,
            )

            expgrad.fit(X, y, sensitive_features=A)

            # select probability of predicting 1
            def Q(X):
                return expgrad._pmf_predict(X)[:, 1]

            constraints_eval = deepcopy(constraints_moment)
            constraints_eval.load_data(X, y, sensitive_features=A)
            disparity = constraints_eval.gamma(Q).max()

            objective_eval = deepcopy(objective_moment)
            objective_eval.load_data(X, y, sensitive_features=A)
            total_error = objective_eval.gamma(Q)[0] * len(y)
            results[method] = {
                "error": objective_eval.gamma(Q)[0],
                "total_error": total_error,
                "disp": disparity,
                "n_predictors": len(expgrad.predictors_),
                "best_gap": expgrad.best_gap_,
                "last_iter": expgrad.last_iter_,
                "best_iter": expgrad.best_iter_,
                "n_oracle_calls": expgrad.n_oracle_calls_,
                "n_oracle_calls_dummy_returned": expgrad.n_oracle_calls_dummy_returned_,
            }

        self._assert_expgrad_two_states(results["costs"], results["sampling"])

    def _assert_expgrad_two_states(self, state1, state2):
        assert state1["total_error"] == pytest.approx(
            state2["total_error"], abs=_PRECISION
        )
        assert state1["disp"] == pytest.approx(state2["disp"], abs=_PRECISION)
        assert state1["n_predictors"] == state2["n_predictors"]
        assert state1["best_gap"] == pytest.approx(state2["best_gap"], abs=_PRECISION)
        assert state1["last_iter"] == state2["last_iter"]
        assert state1["best_iter"] == state2["best_iter"]
        assert state1["n_oracle_calls"] == state2["n_oracle_calls"]
        assert (
            state1["n_oracle_calls_dummy_returned"]
            == state2["n_oracle_calls_dummy_returned"]
        )