def test_no_predict_before_fit(self):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, _, _ = _quick_data()

        with pytest.raises(NotFittedError) as execInfo:
            gs.predict(X)

        assert not_fitted_error_msg.format(GridSearch.__name__) == execInfo.value.args[0]
Ejemplo n.º 2
0
    def test_no_predict_before_fit(self):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, _, _ = self._quick_data()

        with pytest.raises(NotFittedError) as execInfo:
            gs.predict(X)

        assert _NO_PREDICT_BEFORE_FIT == execInfo.value.args[0]
Ejemplo n.º 3
0
    def test_no_predict_before_fit(self):
        gs = GridSearch(self.estimator, self.disparity_criterion)
        X, _, _ = self._quick_data()

        message = str("Must call fit before attempting to make predictions")
        with pytest.raises(NotFittedException) as execInfo:
            gs.predict(X)

        assert message == execInfo.value.args[0]
def test_demographicparity_fair_uneven_populations(A_two_dim):
    # Variant of test_demographicparity_already_fair, which has unequal
    # populations in the two classes
    # Also allow the threshold to be adjustable

    score_threshold = 0.625

    number_a0 = 4
    number_a1 = 4

    a0_label = 17
    a1_label = 37

    X, Y, A = _simple_threshold_data(number_a0, number_a1,
                                     score_threshold, score_threshold,
                                     a0_label, a1_label, A_two_dim)

    target = GridSearch(LogisticRegression(solver='liblinear', fit_intercept=True),
                        constraints=DemographicParity(),
                        grid_size=11)

    target.fit(X, Y, sensitive_features=A)
    assert len(target.all_results) == 11

    test_X = pd.DataFrame({"actual_feature": [0.2, 0.7],
                           "sensitive_features": [a0_label, a1_label],
                           "constant_ones_feature": [1, 1]})

    sample_results = target.predict(test_X)
    sample_proba = target.predict_proba(test_X)
    assert np.allclose(sample_proba, [[0.53748641, 0.46251359], [0.46688736, 0.53311264]])

    sample_results = target.all_results[0].predictor.predict(test_X)
    assert np.array_equal(sample_results, [1, 0])
Ejemplo n.º 5
0
def test_bgl_unfair(A_two_dim):
    a0_count = 5
    a1_count = 7

    a0_label = 2
    a1_label = 3

    a0_factor = 1
    a1_factor = 16

    grid_size = 7

    X, Y, A = _simple_regression_data(
        a0_count, a1_count, a0_factor, a1_factor, a0_label, a1_label, A_two_dim
    )

    bgl_square_loss = BoundedGroupLoss(SquareLoss(-np.inf, np.inf))
    grid_search = GridSearch(
        LinearRegression(), constraints=bgl_square_loss, grid_size=grid_size
    )

    grid_search.fit(X, Y, sensitive_features=A)

    assert_n_grid_search_results(grid_size, grid_search)

    test_X = pd.DataFrame(
        {
            "actual_feature": [0.2, 0.7],
            "sensitive_features": [a0_label, a1_label],
            "constant_ones_feature": [1, 1],
        }
    )

    best_predict = grid_search.predict(test_X)
    assert np.allclose([-1.91764706, 9.61176471], best_predict)

    all_predict = [predictor.predict(test_X) for predictor in grid_search.predictors_]

    # TODO: investigate where the different outcomes for the first grid point are from, likely
    # due to some ignored data points at the edge resulting in another solution with the same
    # least squares loss (i.e. both solutions acceptable).
    # Reflects https://github.com/fairlearn/fairlearn/issues/265
    assert logging_all_close([[3.2, 11.2]], [all_predict[0]]) or logging_all_close(
        [[3.03010885, 11.2]], [all_predict[0]]
    )

    assert logging_all_close(
        [
            [-3.47346939, 10.64897959],
            [-2.68, 10.12],
            [-1.91764706, 9.61176471],
            [-1.18461538, 9.12307692],
            [-0.47924528, 8.65283019],
            [0.2, 0.7],
        ],
        all_predict[1:],
    )
Ejemplo n.º 6
0
def test_demographicparity_fair_uneven_populations_with_grid_offset(
        A_two_dim, offset):
    # Grid of Lagrangian multipliers has some initial offset

    score_threshold = 0.625

    number_a0 = 4
    number_a1 = 4

    a0_label = 17
    a1_label = 37

    grid_size = 11
    iterables = [["+", "-"], ["all"], [a0_label, a1_label]]
    midx = pd.MultiIndex.from_product(iterables,
                                      names=["sign", "event", "group_id"])
    grid_offset = pd.Series(offset, index=midx)

    X, Y, A = _simple_threshold_data(
        number_a0,
        number_a1,
        score_threshold,
        score_threshold,
        a0_label,
        a1_label,
        A_two_dim,
    )

    grid_search = GridSearch(
        LogisticRegression(solver="liblinear", fit_intercept=True),
        constraints=DemographicParity(),
        grid_size=grid_size,
        grid_offset=grid_offset,
    )

    grid_search.fit(X, Y, sensitive_features=A)
    assert_n_grid_search_results(grid_size, grid_search)

    test_X = pd.DataFrame({
        "actual_feature": [0.2, 0.7],
        "sensitive_features": [a0_label, a1_label],
        "constant_ones_feature": [1, 1],
    })

    sample_results = grid_search.predict(test_X)
    assert np.array_equal(sample_results, [0, 1])

    sample_proba = grid_search.predict_proba(test_X)
    assert np.allclose(sample_proba,
                       [[0.55069845, 0.44930155], [0.41546008, 0.58453992]])

    sample_results = grid_search.predictors_[0].predict(test_X)
    assert np.array_equal(sample_results, [1, 0])
Ejemplo n.º 7
0
def lagrangian(constraint, model, constraint_weight, grid_size, X_train,
               Y_train, A_train, X_test):
    """ Conduct lagrangian algorithm and set the base classifier as the black-box 
    estimator to train and predict.
    """
    start_time = datetime.now()
    if constraint == 'DP':
        clf = GridSearch(models[model],
                         constraints=DemographicParity(),
                         constraint_weight=constraint_weight,
                         grid_size=grid_size)
    elif constraint == 'EO':
        clf = GridSearch(models[model],
                         constraints=EqualizedOdds(),
                         constraint_weight=constraint_weight,
                         grid_size=grid_size)
    clf.fit(X_train, Y_train, sensitive_features=A_train)
    Y_pred = clf.predict(X_test)
    end_time = datetime.now()
    return Y_pred, time_diff_in_microseconds(end_time - start_time)
def evaluate(weight, X_train, y_train, X_test, y_test, sex_train, sex_test,
             index):
    estimator = GradientBoostingClassifier()
    constraints = DemographicParity()
    gssolver = GridSearch(estimator,
                          constraints,
                          grid_size=10,
                          constraint_weight=weight)
    gssolver.fit(X_train, y_train, sensitive_features=sex_train)
    y_pred = gssolver.predict(X_test)
    # print("y_pred",y_pred)
    group_summary_adult = group_summary(accuracy_score,
                                        y_test,
                                        y_pred,
                                        sensitive_features=sex_test)
    selection_rate_summary = selection_rate_group_summary(
        y_test, y_pred, sensitive_features=sex_test)
    error = 1 - group_summary_adult["overall"]
    dp = demographic(selection_rate_summary)
    errorlist[index].append(error)
    dplist[index].append(dp)
    print("error:%f,dp:%f" % (error, dp))
Ejemplo n.º 9
0
def run_gridsearch_classification(estimator, moment):
    """Run classification test with GridSearch."""
    X_train, Y_train, A_train, X_test, Y_test, A_test = fetch_adult()
    verification_moment = copy.deepcopy(moment)

    unmitigated = copy.deepcopy(estimator)
    unmitigated.fit(X_train, Y_train)

    num_predictors = 11
    gs = GridSearch(estimator, constraints=moment, grid_size=num_predictors)
    gs.fit(X_train, Y_train, sensitive_features=A_train)

    assert len(gs.predictors_) == num_predictors

    verification_moment.load_data(X_test, Y_test, sensitive_features=A_test)
    gamma_unmitigated = verification_moment.gamma(
        lambda x: unmitigated.predict(x))
    gamma_mitigated = verification_moment.gamma(lambda x: gs.predict(x))

    for idx in gamma_mitigated.index:
        assert abs(gamma_mitigated[idx]) <= abs(
            gamma_unmitigated[idx]), "Checking {0}".format(idx)
Ejemplo n.º 10
0
def test_bgl_unfair(A_two_dim):
    a0_count = 5
    a1_count = 7

    a0_label = 2
    a1_label = 3

    a0_factor = 1
    a1_factor = 16

    X, Y, A = _simple_regression_data(a0_count, a1_count, a0_factor, a1_factor,
                                      a0_label, a1_label, A_two_dim)

    bgl_square_loss = GroupLossMoment(SquareLoss(-np.inf, np.inf))
    target = GridSearch(LinearRegression(),
                        constraints=bgl_square_loss,
                        grid_size=7)

    target.fit(X, Y, sensitive_features=A)

    assert len(target.all_results) == 7

    test_X = pd.DataFrame({
        "actual_feature": [0.2, 0.7],
        "sensitive_features": [a0_label, a1_label],
        "constant_ones_feature": [1, 1]
    })

    best_predict = target.predict(test_X)
    assert np.allclose([-1.91764706, 9.61176471], best_predict)

    all_predict = [r.predictor.predict(test_X) for r in target.all_results]

    assert logging_all_close(
        [[3.2, 11.2], [-3.47346939, 10.64897959], [-2.68, 10.12],
         [-1.91764706, 9.61176471], [-1.18461538, 9.12307692],
         [-0.47924528, 8.65283019], [0.2, 0.7]], all_predict)
Ejemplo n.º 11
0
def test_bgl_unfair():
    a0_count = 5
    a1_count = 7

    a0_label = 2
    a1_label = 3

    a0_factor = 1
    a1_factor = 16

    X, Y, A = _simple_regression_data(a0_count, a1_count, a0_factor, a1_factor,
                                      a0_label, a1_label)

    target = GridSearch(LinearRegression(),
                        disparity_metric=moments.GroupLossMoment(
                            moments.ZeroOneLoss()),
                        quality_metric=SimpleRegressionQualityMetric(),
                        grid_size=7)

    target.fit(X, Y, sensitive_features=A)

    assert len(target.all_results) == 7

    test_X = pd.DataFrame({
        "actual_feature": [0.2, 0.7],
        "sensitive_features": [a0_label, a1_label],
        "constant_ones_feature": [1, 1]
    })

    best_predict = target.predict(test_X)
    assert np.allclose([-1.91764706, 9.61176471], best_predict)

    all_predict = target.posterior_predict(test_X)
    assert np.allclose(
        [[3.2, 11.2], [-3.47346939, 10.64897959], [-2.68, 10.12],
         [-1.91764706, 9.61176471], [-1.18461538, 9.12307692],
         [-0.47924528, 8.65283019], [0.2, 0.7]], all_predict)