コード例 #1
0
 def test_valid_inputs(self, transformX, transformY, transformA, A_two_dim):
     gs = GridSearch(self.estimator, self.disparity_criterion, grid_size=2)
     X, Y, A = _quick_data(A_two_dim)
     gs.fit(transformX(X),
            transformY(Y),
            sensitive_features=transformA(A))
     assert_n_grid_search_results(2, gs)
コード例 #2
0
def test_bgl_lagrange_specifications(A_two_dim):
    a0_count = 13
    a1_count = 4

    a0_label = 5
    a1_label = 3

    a0_factor = 1
    a1_factor = 16

    X, y, A = _simple_regression_data(a0_count, a1_count,
                                      a0_factor, a1_factor,
                                      a0_label, a1_label, A_two_dim)

    estimator = LinearRegression()

    # Do the grid search with a zero Lagrange multiplier
    idx = pd.Int64Index(sorted([a0_label, a1_label]))
    l0_series = pd.Series([2.0, 0.0], index=idx)
    l1_series = pd.Series([1.5, 0.5], index=idx)
    l2_series = pd.Series([1.0, 1.0], index=idx)
    l3_series = pd.Series([0.5, 1.5], index=idx)
    l4_series = pd.Series([0.0, 2.0], index=idx)
    grid_df = pd.concat([l0_series,
                         l1_series,
                         l2_series,
                         l3_series,
                         l4_series],
                        axis=1)

    grid_search1 = GridSearch(copy.deepcopy(estimator),
                              constraints=BoundedGroupLoss(ZeroOneLoss()),
                              grid_size=5)

    grid_search2 = GridSearch(copy.deepcopy(estimator),
                              constraints=BoundedGroupLoss(ZeroOneLoss()),
                              grid=grid_df)

    tradeoffs = [0, 0.25, 0.5, 0.75, 1]

    grid_search1.fit(X, y, sensitive_features=A)
    grid_search2.fit(X, y, sensitive_features=A)

    assert_n_grid_search_results(len(tradeoffs), grid_search1)
    assert_n_grid_search_results(len(tradeoffs), grid_search2)

    # Check we generated the same multipliers
    for i in range(len(tradeoffs)):
        lm1 = grid_search1.lambda_vecs_[i]
        lm2 = grid_search2.lambda_vecs_[i]
        assert lm1.equals(lm2)

    # Check the models are the same
    for i in range(len(tradeoffs)):
        coef1 = grid_search1.predictors_[i].coef_
        coef2 = grid_search2.predictors_[i].coef_
        assert np.array_equal(coef1, coef2)
コード例 #3
0
def test_bgl_unfair(A_two_dim):
    a0_count = 5
    a1_count = 7

    a0_label = 2
    a1_label = 3

    a0_factor = 1
    a1_factor = 16

    grid_size = 7

    X, Y, A = _simple_regression_data(
        a0_count, a1_count, a0_factor, a1_factor, a0_label, a1_label, A_two_dim
    )

    bgl_square_loss = BoundedGroupLoss(SquareLoss(-np.inf, np.inf))
    grid_search = GridSearch(
        LinearRegression(), constraints=bgl_square_loss, grid_size=grid_size
    )

    grid_search.fit(X, Y, sensitive_features=A)

    assert_n_grid_search_results(grid_size, grid_search)

    test_X = pd.DataFrame(
        {
            "actual_feature": [0.2, 0.7],
            "sensitive_features": [a0_label, a1_label],
            "constant_ones_feature": [1, 1],
        }
    )

    best_predict = grid_search.predict(test_X)
    assert np.allclose([-1.91764706, 9.61176471], best_predict)

    all_predict = [predictor.predict(test_X) for predictor in grid_search.predictors_]

    # TODO: investigate where the different outcomes for the first grid point are from, likely
    # due to some ignored data points at the edge resulting in another solution with the same
    # least squares loss (i.e. both solutions acceptable).
    # Reflects https://github.com/fairlearn/fairlearn/issues/265
    assert logging_all_close([[3.2, 11.2]], [all_predict[0]]) or logging_all_close(
        [[3.03010885, 11.2]], [all_predict[0]]
    )

    assert logging_all_close(
        [
            [-3.47346939, 10.64897959],
            [-2.68, 10.12],
            [-1.91764706, 9.61176471],
            [-1.18461538, 9.12307692],
            [-0.47924528, 8.65283019],
            [0.2, 0.7],
        ],
        all_predict[1:],
    )
コード例 #4
0
def test_can_specify_and_generate_lambda_vecs(A_two_dim):
    score_threshold = 0.4

    number_a0 = 32
    number_a1 = 24

    a0_label = 11
    a1_label = 3

    X, y, A = _simple_threshold_data(number_a0, number_a1, score_threshold,
                                     score_threshold, a0_label, a1_label)

    estimator = LogisticRegression(solver="liblinear",
                                   fit_intercept=True,
                                   random_state=97)

    iterables = [["+", "-"], ["all"], sorted([a0_label, a1_label])]
    midx = pd.MultiIndex.from_product(iterables,
                                      names=["sign", "event", "group_id"])
    lagrange_negative_series = pd.Series([0.0, 0.0, 0.0, 2.0], index=midx)
    lagrange_zero_series = pd.Series(np.zeros(4), index=midx)
    lagrange_positive_series = pd.Series([0.0, 2.0, 0.0, 0.0], index=midx)
    grid_df = pd.concat(
        [
            lagrange_negative_series, lagrange_zero_series,
            lagrange_positive_series
        ],
        axis=1,
    )

    grid_search1 = GridSearch(copy.deepcopy(estimator),
                              constraints=DemographicParity(),
                              grid_size=3)

    grid_search2 = GridSearch(copy.deepcopy(estimator),
                              constraints=DemographicParity(),
                              grid=grid_df)

    # Try both ways of specifying the Lagrange multipliers
    grid_search2.fit(X, y, sensitive_features=A)
    grid_search1.fit(X, y, sensitive_features=A)

    assert_n_grid_search_results(3, grid_search1)
    assert_n_grid_search_results(3, grid_search2)

    # Check we generated the same multipliers
    for i in range(3):
        lm1 = grid_search1.lambda_vecs_[i]
        lm2 = grid_search2.lambda_vecs_[i]
        assert lm1.equals(lm2)

    # Check the models are the same
    for i in range(3):
        coef1 = grid_search1.predictors_[i].coef_
        coef2 = grid_search2.predictors_[i].coef_
        assert np.array_equal(coef1, coef2)
コード例 #5
0
def test_demographicparity_fair_uneven_populations_with_grid_offset(
        A_two_dim, offset):
    # Grid of Lagrangian multipliers has some initial offset

    score_threshold = 0.625

    number_a0 = 4
    number_a1 = 4

    a0_label = 17
    a1_label = 37

    grid_size = 11
    iterables = [["+", "-"], ["all"], [a0_label, a1_label]]
    midx = pd.MultiIndex.from_product(iterables,
                                      names=["sign", "event", "group_id"])
    grid_offset = pd.Series(offset, index=midx)

    X, Y, A = _simple_threshold_data(
        number_a0,
        number_a1,
        score_threshold,
        score_threshold,
        a0_label,
        a1_label,
        A_two_dim,
    )

    grid_search = GridSearch(
        LogisticRegression(solver="liblinear", fit_intercept=True),
        constraints=DemographicParity(),
        grid_size=grid_size,
        grid_offset=grid_offset,
    )

    grid_search.fit(X, Y, sensitive_features=A)
    assert_n_grid_search_results(grid_size, grid_search)

    test_X = pd.DataFrame({
        "actual_feature": [0.2, 0.7],
        "sensitive_features": [a0_label, a1_label],
        "constant_ones_feature": [1, 1],
    })

    sample_results = grid_search.predict(test_X)
    assert np.array_equal(sample_results, [0, 1])

    sample_proba = grid_search.predict_proba(test_X)
    assert np.allclose(sample_proba,
                       [[0.55069845, 0.44930155], [0.41546008, 0.58453992]])

    sample_results = grid_search.predictors_[0].predict(test_X)
    assert np.array_equal(sample_results, [1, 0])
コード例 #6
0
def test_demographicparity_fair_uneven_populations(A_two_dim):
    # Variant of test_demographicparity_already_fair, which has unequal
    # populations in the two classes
    # Also allow the threshold to be adjustable

    score_threshold = 0.625

    number_a0 = 4
    number_a1 = 4

    a0_label = 17
    a1_label = 37

    grid_size = 11

    X, Y, A = _simple_threshold_data(
        number_a0,
        number_a1,
        score_threshold,
        score_threshold,
        a0_label,
        a1_label,
        A_two_dim,
    )

    grid_search = GridSearch(
        LogisticRegression(solver="liblinear", fit_intercept=True),
        constraints=DemographicParity(),
        grid_size=grid_size,
    )

    grid_search.fit(X, Y, sensitive_features=A)
    assert_n_grid_search_results(grid_size, grid_search)

    test_X = pd.DataFrame({
        "actual_feature": [0.2, 0.7],
        "sensitive_features": [a0_label, a1_label],
        "constant_ones_feature": [1, 1],
    })

    sample_results = grid_search.predict(test_X)
    assert np.array_equal(sample_results, [0, 1])

    sample_proba = grid_search.predict_proba(test_X)
    assert np.allclose(sample_proba,
                       [[0.53748641, 0.46251359], [0.46688736, 0.53311264]])

    sample_results = grid_search.predictors_[0].predict(test_X)
    assert np.array_equal(sample_results, [1, 0])
コード例 #7
0
def test_lambda_vec_zero_unchanged_model(A_two_dim):
    score_threshold = 0.6

    number_a0 = 64
    number_a1 = 24

    a0_label = 7
    a1_label = 22

    X, y, A = _simple_threshold_data(
        number_a0,
        number_a1,
        score_threshold,
        score_threshold,
        a0_label,
        a1_label,
        A_two_dim,
    )

    estimator = LogisticRegression(solver="liblinear",
                                   fit_intercept=True,
                                   random_state=97)

    # Train an unmitigated estimator
    unmitigated_estimator = copy.deepcopy(estimator)
    unmitigated_estimator.fit(X, y)

    # Do the grid search with a zero Lagrange multiplier
    iterables = [["+", "-"], ["all"], [a0_label, a1_label]]
    midx = pd.MultiIndex.from_product(iterables,
                                      names=["sign", "event", "group_id"])
    lagrange_zero_series = pd.Series(np.zeros(4), index=midx)
    grid_df = pd.DataFrame(lagrange_zero_series)

    grid_search = GridSearch(estimator,
                             constraints=DemographicParity(),
                             grid=grid_df)
    grid_search.fit(X, y, sensitive_features=A)
    assert_n_grid_search_results(1, grid_search)

    # Check coefficients
    gs_coeff = grid_search.predictors_[grid_search.best_idx_].coef_
    um_coeff = unmitigated_estimator.coef_
    assert np.array_equal(gs_coeff, um_coeff)