예제 #1
0
def test_call_oracle(Constraints, eps, estimator, mocker):
    X, y, A = _get_data(A_two_dim=False)
    # Using a real estimator here with a mocked `fit` method since we don't actually
    # want to fit one, but rather care about having that object's fit method called exactly once.
    estimator.fit = mocker.MagicMock(name="fit")
    if issubclass(Constraints, LossMoment):
        constraints = Constraints(ZeroOneLoss())
    else:
        constraints = Constraints()

    lagrangian = _Lagrangian(
        X=X,
        y=y,
        estimator=estimator,
        constraints=deepcopy(constraints),
        B=1 / eps,
        sensitive_features=A,
    )

    # Set up initial lambda vector based on a 0-initialized theta and use separate constraints
    # object for it to avoid the dependence on the lagrangian object.
    lambda_vec, new_weights, new_labels = get_lambda_new_weights_and_labels(
        constraints, X, y, A)

    _ = lagrangian._call_oracle(lambda_vec)

    # Ideally we'd prefer calling assert_called_once_with(args) but that is not compatible with
    # pandas data structures.
    assert len(estimator.fit.mock_calls) == 1
    _, args, kwargs = estimator.fit.mock_calls[0]
    assert (args[0] == X).all().all()
    assert (args[1] == new_labels).all()
    assert (kwargs["sample_weight"] == new_weights).all()
    assert lagrangian.n_oracle_calls == 1
    assert len(lagrangian.oracle_execution_times) == 1
예제 #2
0
def test_lagrangian_eval(eps, Constraints, use_Q_callable, opt_lambda):
    X, y, A = _get_data(A_two_dim=False)
    estimator = LeastSquaresBinaryClassifierLearner()
    constraints = Constraints()

    # epsilon (and thereby also B) only affects L_high and L
    B = 1 / eps

    lagrangian = _Lagrangian(X, A, y, estimator, deepcopy(constraints), eps, B,
                             opt_lambda=opt_lambda)

    # set up initial lambda vector based on a 0-initialized theta
    constraints.load_data(X, y, sensitive_features=A)
    objective = constraints.default_objective()
    objective.load_data(X, y, sensitive_features=A)
    theta = pd.Series(0, constraints.index)
    lambda_vec = np.exp(theta) / (1 + np.exp(theta).sum())

    # call oracle to determine error and gamma and calculate exp
    fitted_estimator = lagrangian._call_oracle(lambda_vec)
    def h(X): return fitted_estimator.predict(X)
    best_h_error = lagrangian.obj.gamma(h)[0]
    best_h_gamma = lagrangian.constraints.gamma(h)

    # opt_lambda affects only the calculation of L
    if opt_lambda:
        projected_lambda = constraints.project_lambda(lambda_vec)
        L_expected = best_h_error + np.sum(projected_lambda * best_h_gamma) - \
            eps * np.sum(projected_lambda)
    else:
        L_expected = best_h_error + np.sum(lambda_vec * best_h_gamma) - eps * np.sum(lambda_vec)

    L_high_expected = best_h_error + B * (best_h_gamma.max() - eps)

    # manually set errors and gammas which would otherwise be done in the best_h step
    lagrangian.errors = pd.Series([best_h_error])
    lagrangian.gammas = pd.Series([best_h_gamma])

    # call _eval to get the desired results L, L_high, gamma, error;
    # _eval is compatible with a callable h or a vector Q
    Q_vec = pd.Series([1.0])
    L, L_high, gamma, error = lagrangian._eval(h if use_Q_callable else Q_vec, lambda_vec)

    # in this particular example the estimator is always the same
    expected_estimator_weights = pd.Series({
        'X1': 0.538136,
        'X2': 0.457627,
        'X3': 0.021186})
    assert (np.isclose(fitted_estimator.weights, expected_estimator_weights, atol=1.e-6)).all()

    assert L == L_expected
    assert L_high == L_high_expected
    assert error == 0.25
    assert (gamma == best_h_gamma).all()
예제 #3
0
def test_objective_constraints_compatibility(Constraints, Objective):
    X, y, A = _get_data(A_two_dim=False)
    estimator = LeastSquaresBinaryClassifierLearner()

    if issubclass(Constraints, LossMoment):
        constraints = Constraints(ZeroOneLoss())
    else:
        constraints = Constraints()

    if issubclass(Objective, LossMoment):
        objective = Objective(ZeroOneLoss())
    else:
        objective = Objective()

    if objective._moment_type() != constraints._moment_type():
        with pytest.raises(ValueError) as execInfo:
            _ = _Lagrangian(
                X=X,
                y=y,
                estimator=estimator,
                constraints=deepcopy(constraints),
                objective=objective,
                B=1.0,
                sensitive_features=A,
            )
        assert (_MESSAGE_BAD_OBJECTIVE.format(objective._moment_type(),
                                              constraints._moment_type())
                in execInfo.value.args[0])
    else:
        # No exception raised
        _ = _Lagrangian(
            X=X,
            y=y,
            estimator=estimator,
            constraints=deepcopy(constraints),
            objective=objective,
            B=1.0,
            sensitive_features=A,
        )
예제 #4
0
def test_call_oracle(Constraints, eps, mocker):
    X, y, A = _get_data(A_two_dim=False)
    # Using a mocked estimator here since we don't actually want to fit one, but rather care about
    # having that object's fit method called exactly once.
    estimator = mocker.MagicMock()
    constraints = Constraints()

    # ExponentiatedGradient pickles and unpickles the estimator, which isn't possible for the mock
    # object, so we mock that process as well. It sets the result from pickle.loads as the
    # estimator, so we can simply overwrite the return value to be our mocked estimator object.
    mocker.patch('pickle.dumps')
    pickle.loads = mocker.MagicMock(return_value=estimator)

    lagrangian = _Lagrangian(X, A, y, estimator, deepcopy(constraints), eps,
                             1 / eps)

    # Set up initial lambda vector based on a 0-initialized theta and use separate constraints
    # object for it to avoid the dependence on the lagrangian object.
    constraints.load_data(X, y, sensitive_features=A)
    objective = constraints.default_objective()
    objective.load_data(X, y, sensitive_features=A)
    theta = pd.Series(0, constraints.index)
    lambda_vec = np.exp(theta) / (1 + np.exp(theta).sum())

    signed_weights = objective.signed_weights() + \
        constraints.signed_weights(lambda_vec)
    redY = 1 * (signed_weights > 0)
    redW = signed_weights.abs()
    redW = y.shape[0] * redW / redW.sum()

    _ = lagrangian._call_oracle(lambda_vec)

    # Ideally we'd prefer calling assert_called_once_with(args) but that is not compatible with
    # pandas data structures.
    assert len(estimator.method_calls) == 1
    name, args, kwargs = estimator.method_calls[0]
    assert name == 'fit'
    assert len(args) == 2
    assert len(kwargs) == 1
    assert (args[0] == X).all().all()
    assert (args[1] == redY).all()
    assert (kwargs['sample_weight'] == redW).all()
    assert lagrangian.n_oracle_calls == 1
    assert len(lagrangian.oracle_execution_times) == 1
예제 #5
0
def test_call_oracle_single_y_value(Constraints, eps, y_value, mocker):
    X_dict = {
        "c": [0, 1, 4, 1, 5, 1, 6, 0, 2, 4],
        "d": [1, 5, 1, 6, 2, 3, 5, 1, 5, 2],
    }
    X = pd.DataFrame(X_dict)

    # Try with both possible y values for binary classification to ensure that
    # constraints that focus only on positives or negatives can handle the
    # case where none of the rows apply to them.
    y = pd.Series([y_value] * 10)
    A = pd.Series([0, 0, 0, 0, 0, 0, 0, 0, 0, 1])

    # We mock the estimator, but we only patch it for pickling
    estimator = mocker.MagicMock()
    if issubclass(Constraints, LossMoment):
        constraints = Constraints(ZeroOneLoss(), upper_bound=eps)
    else:
        constraints = Constraints(difference_bound=eps)

    lagrangian = _Lagrangian(
        X=X,
        y=y,
        estimator=estimator,
        constraints=deepcopy(constraints),
        B=1 / eps,
        sensitive_features=A,
    )

    # Set up initial lambda vector based on a 0-initialized theta and use separate constraints
    # object for it to avoid the dependence on the lagrangian object.
    lambda_vec = get_lambda_vec(constraints, X, y, A)

    test_X_dict = {"c": [10000], "d": [2000000]}
    test_X = pd.DataFrame(test_X_dict)

    result_estimator = lagrangian._call_oracle(lambda_vec)
    assert isinstance(result_estimator, DummyClassifier)
    assert result_estimator.predict(test_X) == y_value
    assert lagrangian.n_oracle_calls_dummy_returned == 1

    # Make sure the mocked estimator wasn't called
    assert len(estimator.method_calls) == 0
예제 #6
0
def test_call_oracle(Constraints, eps, mocker):
    X, y, A = _get_data(A_two_dim=False)
    # Using a mocked estimator here since we don't actually want to fit one, but rather care about
    # having that object's fit method called exactly once.
    estimator = mocker.MagicMock()
    if issubclass(Constraints, LossMoment):
        constraints = Constraints(ZeroOneLoss())
    else:
        constraints = Constraints()

    # ExponentiatedGradient pickles and unpickles the estimator, which isn't possible for the mock
    # object, so we mock that process as well. It sets the result from pickle.loads as the
    # estimator, so we can simply overwrite the return value to be our mocked estimator object.
    mocker.patch('pickle.dumps')
    pickle.loads = mocker.MagicMock(return_value=estimator)

    lagrangian = _Lagrangian(X, A, y, estimator, deepcopy(constraints),
                             1 / eps)

    # Set up initial lambda vector based on a 0-initialized theta and use separate constraints
    # object for it to avoid the dependence on the lagrangian object.
    lambda_vec, new_weights, new_labels = get_lambda_new_weights_and_labels(
        constraints, X, y, A)

    _ = lagrangian._call_oracle(lambda_vec)

    # Ideally we'd prefer calling assert_called_once_with(args) but that is not compatible with
    # pandas data structures.
    assert len(estimator.method_calls) == 1
    name, args, kwargs = estimator.method_calls[0]
    assert name == 'fit'
    assert len(args) == 2
    assert len(kwargs) == 1
    assert (args[0] == X).all().all()
    assert (args[1] == new_labels).all()
    assert (kwargs['sample_weight'] == new_weights).all()
    assert lagrangian.n_oracle_calls == 1
    assert len(lagrangian.oracle_execution_times) == 1
예제 #7
0
def test_call_oracle_single_y_value(Constraints, eps, mocker):
    X_dict = {
        "c": [0, 1, 4, 1, 5, 1, 6, 0, 2, 4],
        "d": [1, 5, 1, 6, 2, 3, 5, 1, 5, 2]
    }
    X = pd.DataFrame(X_dict)

    y = pd.Series([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
    A = pd.Series([0, 0, 0, 0, 0, 0, 0, 0, 0, 1])

    # We mock the estimator, but we only patch it for pickling
    estimator = mocker.MagicMock()
    mocker.patch('pickle.dumps')

    constraints = Constraints()

    lagrangian = _Lagrangian(X, A, y, estimator, deepcopy(constraints), eps,
                             1 / eps)

    # Set up initial lambda vector based on a 0-initialized theta and use separate constraints
    # object for it to avoid the dependence on the lagrangian object.
    constraints.load_data(X, y, sensitive_features=A)
    objective = constraints.default_objective()
    objective.load_data(X, y, sensitive_features=A)
    theta = pd.Series(0, constraints.index)
    lambda_vec = np.exp(theta) / (1 + np.exp(theta).sum())

    test_X_dict = {"c": [10000], "d": [2000000]}
    test_X = pd.DataFrame(test_X_dict)

    result_estimator = lagrangian._call_oracle(lambda_vec)
    assert isinstance(result_estimator, DummyClassifier)
    assert result_estimator.predict(test_X) == 1
    assert lagrangian.n_oracle_calls_dummy_returned == 1

    # Make sure the mocked estimator wasn't called
    assert len(estimator.method_calls) == 0
예제 #8
0
def test_lagrangian_eval(eps, Constraints, use_Q_callable, opt_lambda):
    X, y, A = _get_data(A_two_dim=False)
    estimator = LeastSquaresBinaryClassifierLearner()

    if issubclass(Constraints, LossMoment):
        task_type = "regression"
        constraints = Constraints(ZeroOneLoss(), upper_bound=eps)
    else:
        task_type = "classification"
        constraints = Constraints(difference_bound=eps)

    # epsilon (and thereby also B) only affects L_high and L
    B = 1 / eps

    lagrangian = _Lagrangian(
        X=X,
        y=y,
        estimator=estimator,
        constraints=deepcopy(constraints),
        B=B,
        opt_lambda=opt_lambda,
        sensitive_features=A,
    )

    lambda_vec = get_lambda_vec(constraints, X, y, A)

    # call oracle to determine error and gamma and calculate exp
    fitted_estimator = lagrangian._call_oracle(lambda_vec)

    def h(X):
        return fitted_estimator.predict(X)

    best_h_error = lagrangian.obj.gamma(h)[0]
    best_h_gamma = lagrangian.constraints.gamma(h)

    # opt_lambda affects only the calculation of L
    if opt_lambda:
        projected_lambda = constraints.project_lambda(lambda_vec)
        L_expected = (best_h_error + np.sum(projected_lambda * best_h_gamma) -
                      eps * np.sum(projected_lambda))
    else:
        L_expected = (best_h_error + np.sum(lambda_vec * best_h_gamma) -
                      eps * np.sum(lambda_vec))

    L_high_expected = best_h_error + B * (best_h_gamma.max() - eps)

    # manually set errors and gammas which would otherwise be done in the best_h step
    lagrangian.errors = pd.Series([best_h_error])
    lagrangian.gammas = pd.Series([best_h_gamma])

    # call _eval to get the desired results L, L_high, gamma, error;
    # _eval is compatible with a callable h or a vector Q
    Q_vec = pd.Series([1.0])
    L, L_high, gamma, error = lagrangian._eval(h if use_Q_callable else Q_vec,
                                               lambda_vec)

    # in this particular example the estimator is always the same
    expected_estimator_weights = {
        "regression":
        pd.Series({
            "X1": 0.541252,
            "X2": 0.454293,
            "X3": 0.019203
        }),
        "classification":
        pd.Series({
            "X1": 0.538136,
            "X2": 0.457627,
            "X3": 0.021186
        }),
    }
    assert (np.isclose(
        fitted_estimator.weights,
        expected_estimator_weights[task_type],
        atol=1.0e-6,
    )).all()

    assert L == pytest.approx(L_expected, abs=_PRECISION)
    assert L_high == pytest.approx(L_high_expected, abs=_PRECISION)
    assert error == 0.25
    assert (gamma == best_h_gamma).all()