def test_no_predict_proba_before_fit(self): gs = GridSearch(self.estimator, self.disparity_criterion, sample_weight_name=self.sample_weight_name) X, _, _ = _quick_data() with pytest.raises(NotFittedError) as execInfo: gs.predict_proba(X) assert not_fitted_error_msg.format(GridSearch.__name__) == execInfo.value.args[0]
def test_no_predict_proba_before_fit(self): gs = GridSearch(self.estimator, self.disparity_criterion) X, _, _ = self._quick_data() message = str("Must call fit before attempting to make predictions") with pytest.raises(NotFittedException) as execInfo: gs.predict_proba(X) assert message == execInfo.value.args[0]
def test_demographicparity_fair_uneven_populations(A_two_dim): # Variant of test_demographicparity_already_fair, which has unequal # populations in the two classes # Also allow the threshold to be adjustable score_threshold = 0.625 number_a0 = 4 number_a1 = 4 a0_label = 17 a1_label = 37 X, Y, A = _simple_threshold_data(number_a0, number_a1, score_threshold, score_threshold, a0_label, a1_label, A_two_dim) target = GridSearch(LogisticRegression(solver='liblinear', fit_intercept=True), constraints=DemographicParity(), grid_size=11) target.fit(X, Y, sensitive_features=A) assert len(target.all_results) == 11 test_X = pd.DataFrame({"actual_feature": [0.2, 0.7], "sensitive_features": [a0_label, a1_label], "constant_ones_feature": [1, 1]}) sample_results = target.predict(test_X) sample_proba = target.predict_proba(test_X) assert np.allclose(sample_proba, [[0.53748641, 0.46251359], [0.46688736, 0.53311264]]) sample_results = target.all_results[0].predictor.predict(test_X) assert np.array_equal(sample_results, [1, 0])
def test_demographicparity_fair_uneven_populations_with_grid_offset( A_two_dim, offset): # Grid of Lagrangian multipliers has some initial offset score_threshold = 0.625 number_a0 = 4 number_a1 = 4 a0_label = 17 a1_label = 37 grid_size = 11 iterables = [["+", "-"], ["all"], [a0_label, a1_label]] midx = pd.MultiIndex.from_product(iterables, names=["sign", "event", "group_id"]) grid_offset = pd.Series(offset, index=midx) X, Y, A = _simple_threshold_data( number_a0, number_a1, score_threshold, score_threshold, a0_label, a1_label, A_two_dim, ) grid_search = GridSearch( LogisticRegression(solver="liblinear", fit_intercept=True), constraints=DemographicParity(), grid_size=grid_size, grid_offset=grid_offset, ) grid_search.fit(X, Y, sensitive_features=A) assert_n_grid_search_results(grid_size, grid_search) test_X = pd.DataFrame({ "actual_feature": [0.2, 0.7], "sensitive_features": [a0_label, a1_label], "constant_ones_feature": [1, 1], }) sample_results = grid_search.predict(test_X) assert np.array_equal(sample_results, [0, 1]) sample_proba = grid_search.predict_proba(test_X) assert np.allclose(sample_proba, [[0.55069845, 0.44930155], [0.41546008, 0.58453992]]) sample_results = grid_search.predictors_[0].predict(test_X) assert np.array_equal(sample_results, [1, 0])