def test_smoke_extra_arg(self, transform_y_t, transform_y_p, transform_gid): y_t = transform_y_t([0, 0, 1, 1, 0, 1, 1, 1]) y_p = transform_y_p([0, 1, 1, 1, 1, 0, 0, 1]) gid = transform_gid([0, 0, 0, 0, 1, 1, 1, 1]) # Run with the argument defaulted result = metrics.group_summary(mock_func_extra_arg, y_t, y_p, sensitive_features=gid) assert result.overall == 5 assert len(result.by_group) == 2 assert result.by_group[0] == 2 assert result.by_group[1] == 3 # Run with the argument speficied result = metrics.group_summary(mock_func_extra_arg, y_t, y_p, sensitive_features=gid, my_arg=2) assert result.overall == 10 assert len(result.by_group) == 2 assert result.by_group[0] == 4 assert result.by_group[1] == 6
def test_true_weight_length_mismatch(self, transform_y_t, transform_s_w): y_t = transform_y_t([0, 0, 1, 1, 0, 1, 1, 1]) y_p = [0, 1, 1, 1, 1, 0, 0, 0] gid = [0, 0, 0, 0, 1, 1, 2, 3] s_w = transform_s_w([1, 1, 1, 1, 2, 2, 3]) with pytest.raises(ValueError) as exception_context: _ = metrics.group_summary( mock_func_weight, y_t, y_p, sensitive_features=gid, sample_weight=s_w) expected = "Array sample_weight is not the same size as y_true" assert exception_context.value.args[0] == expected
def test_matrix_metric(self, transform_y_t, transform_y_p, transform_gid): a = "ABC" b = "DEF" c = "GHI" y_t = transform_y_t([0, 0, 1, 1, 0, 1, 1, 1]) y_p = transform_y_p([0, 1, 1, 1, 1, 0, 0, 1]) gid = transform_gid([a, a, a, b, b, c, c, c]) result = metrics.group_summary(mock_func_matrix_return, y_t, y_p, sensitive_features=gid) assert np.array_equal(result.overall, np.ones([8, 5])) assert np.array_equal(result.by_group[a], np.ones([3, 2])) assert np.array_equal(result.by_group[b], np.ones([2, 2])) assert np.array_equal(result.by_group[c], np.ones([3, 1]))
def test_smoke(self, transform_y_t, transform_y_p, transform_gid): y_t = transform_y_t([0, 0, 1, 1, 0, 1, 1, 1]) y_p = transform_y_p([0, 1, 1, 1, 1, 0, 0, 1]) gid = transform_gid([0, 0, 0, 0, 1, 1, 1, 1]) result = metrics.group_summary(mock_func, y_t, y_p, sensitive_features=gid) assert result.overall == 5 assert len(result.by_group) == 2 assert result.by_group[0] == 2 assert result.by_group[1] == 3 assert metrics.group_min_from_summary(result) == 2 assert metrics.group_max_from_summary(result) == 3 assert metrics.difference_from_summary(result) == 1 assert metrics.ratio_from_summary(result) == pytest.approx(0.6666666667)
def test_true_predict_length_mismatch(self, transform_y_a, transform_y_p): y_a = transform_y_a([0, 0, 1, 1, 0, 1, 1, 1]) y_p = transform_y_p([0, 1, 1, 1, 1, 0, 0]) gid = [0, 0, 0, 0, 1, 1, 2, 2] s_w = [1, 1, 1, 1, 2, 2, 3, 3] with pytest.raises(ValueError) as exception_context: _ = metrics.group_summary(mock_func_weight, y_a, y_p, gid, sample_weight=s_w) expected = "Array y_pred is not the same size as y_true" assert exception_context.value.args[0] == expected
def test_groups_only_one_element(self): y_t = [1, 2] y_p = [1, 2] gid = [0, 1] def sum_lengths(y_true, y_pred): return len(y_true) + len(y_pred) result = metrics.group_summary(sum_lengths, y_t, y_p, sensitive_features=gid) assert result.overall == 4 assert result.by_group[0] == 2 assert result.by_group[1] == 2 assert metrics.group_min_from_summary(result) == 2 assert metrics.group_max_from_summary(result) == 2 assert metrics.difference_from_summary(result) == 0 assert metrics.ratio_from_summary(result) == 1
def test_single_element_input(self): y_t = [0] y_p = [0] gid = [0] s_w = [0] def sum_lengths(y_true, y_pred, sample_weight): return len(y_true) + len(y_pred) + len(sample_weight) result = metrics.group_summary( sum_lengths, y_t, y_p, sensitive_features=gid, sample_weight=s_w) assert result.overall == 3 assert result.by_group[0] == 3 assert metrics.group_min_from_summary(result) == 3 assert metrics.group_max_from_summary(result) == 3 assert metrics.difference_from_summary(result) == 0 assert metrics.ratio_from_summary(result) == 1
def test_negative_results(self): y_t = [0, 0, 1, 1, 0, 1, 1, 1] y_p = [0, 1, 1, 1, 1, 0, 0, 1] gid = [0, 0, 0, 0, 0, 1, 1, 1] def negative_results(y_true, y_pred): return -(len(y_true) + len(y_pred)) result = metrics.group_summary(negative_results, y_t, y_p, sensitive_features=gid) assert result.overall == -16 assert result.by_group[0] == -10 assert result.by_group[1] == -6 assert metrics.group_min_from_summary(result) == -10 assert metrics.group_max_from_summary(result) == -6 assert metrics.difference_from_summary(result) == 4 assert np.isnan(metrics.ratio_from_summary(result))
def test_with_weights(self, transform_y_t, transform_y_p, transform_gid, transform_s_w): y_t = transform_y_t([0, 0, 1, 1, 0, 1, 1, 1]) y_p = transform_y_p([0, 1, 1, 1, 1, 0, 0, 1]) gid = transform_gid([0, 0, 0, 0, 1, 1, 2, 2]) s_w = transform_s_w([1, 1, 1, 1, 2, 2, 3, 3]) result = metrics.group_summary( mock_func_weight, y_t, y_p, sensitive_features=gid, sample_weight=s_w) assert result.overall == 10 assert len(result.by_group) == 3 assert result.by_group[0] == 2 assert result.by_group[1] == 2 assert result.by_group[2] == 6 assert metrics.group_min_from_summary(result) == 2 assert metrics.group_max_from_summary(result) == 6 assert metrics.difference_from_summary(result) == 4 assert metrics.ratio_from_summary(result) == pytest.approx(0.33333333333333)
def test_string_groups(self, transform_y_t, transform_y_p, transform_gid): a = "ABC" b = "DEF" c = "GHI" y_t = transform_y_t([0, 0, 1, 1, 0, 1, 1, 1]) y_p = transform_y_p([0, 1, 1, 1, 1, 0, 0, 1]) gid = transform_gid([a, a, a, b, b, c, c, c]) result = metrics.group_summary(mock_func, y_t, y_p, sensitive_features=gid) assert result.overall == 5 assert len(result.by_group) == 3 assert result.by_group[a] == 1 assert result.by_group[b] == 1 assert result.by_group[c] == 3 assert metrics.group_min_from_summary(result) == 1 assert metrics.group_max_from_summary(result) == 3 assert metrics.difference_from_summary(result) == 2 assert metrics.ratio_from_summary(result) == pytest.approx(0.33333333333333)
def test_metric_results_zero(self): y_t = [0, 0, 1, 1, 0, 1, 1, 1] y_p = [0, 1, 1, 1, 1, 0, 0, 1] gid = [0, 0, 0, 0, 0, 1, 1, 1] def zero_results(y_true, y_pred): # Arrays will always be same length return len(y_true)-len(y_pred) result = metrics.group_summary(zero_results, y_t, y_p, sensitive_features=gid) assert result.overall == 0 assert result.by_group[0] == 0 assert result.by_group[1] == 0 assert metrics.group_min_from_summary(result) == 0 assert metrics.group_max_from_summary(result) == 0 assert metrics.difference_from_summary(result) == 0 # Following is special case assert metrics.ratio_from_summary(result) == 1
def test_matrix_metric_other_properties(self): a = "ABC" b = "DEF" c = "GHI" y_t = [0, 0, 1, 1, 0, 1, 1, 1] y_p = [0, 1, 1, 1, 1, 0, 0, 1] gid = [a, a, a, b, b, c, c, c] result = metrics.group_summary(mock_func_matrix_return, y_t, y_p, sensitive_features=gid) # Other fields should fail with pytest.raises(ValueError): _ = metrics.group_min_from_summary(result) with pytest.raises(ValueError): _ = metrics.group_max_from_summary(result) with pytest.raises(ValueError): _ = metrics.difference_from_summary(result) with pytest.raises(ValueError): _ = metrics.ratio_from_summary(result)
def evaluate(eps, X_train, y_train, X_test, y_test, sex_train, sex_test, index): estimator = GradientBoostingClassifier() constraints = DemographicParity() egsolver = ExponentiatedGradient(estimator, constraints, eps=eps) egsolver.fit(X_train, y_train, sensitive_features=sex_train) y_pred = egsolver.predict(X_test) # print("y_pred",y_pred) group_summary_adult = group_summary(accuracy_score, y_test, y_pred, sensitive_features=sex_test) selection_rate_summary = selection_rate_group_summary( y_test, y_pred, sensitive_features=sex_test) error = 1 - group_summary_adult["overall"] dp = demographic(selection_rate_summary) errorlist[index].append(error) dplist[index].append(dp) print("error:%f,dp:%f" % (error, dp))
def evaluate(weight, X_train, y_train, X_test, y_test, sex_train, sex_test, index): estimator = GradientBoostingClassifier() constraints = DemographicParity() gssolver = GridSearch(estimator, constraints, grid_size=10, constraint_weight=weight) gssolver.fit(X_train, y_train, sensitive_features=sex_train) y_pred = gssolver.predict(X_test) # print("y_pred",y_pred) group_summary_adult = group_summary(accuracy_score, y_test, y_pred, sensitive_features=sex_test) selection_rate_summary = selection_rate_group_summary( y_test, y_pred, sensitive_features=sex_test) error = 1 - group_summary_adult["overall"] dp = demographic(selection_rate_summary) errorlist[index].append(error) dplist[index].append(dp) print("error:%f,dp:%f" % (error, dp))
else: y_true.append(0) # print(data["Sex"][:10]) # print(y_true[:100]) return pd.DataFrame(data), np.array(y_true)''' X, y_true = shap.datasets.adult() #readfrom("adult.data") y_true = y_true * 1 sex = X['Sex'].apply(lambda sex: "female" if sex == 0 else "male") classifier = DecisionTreeClassifier() classifier.fit(X, y_true) y_pred = classifier.predict(X) result1 = metrics.group_summary(accuracy_score, y_true, y_pred, sensitive_features=sex) print("group_summary", result1) result2 = metrics.selection_rate_group_summary(y_true, y_pred, sensitive_features=sex) print("selection_rate_group_summary", result2) # FairlearnDashboard(sensitive_features=sex, # sensitive_feature_names=['sex'], # y_true=y_true, # y_pred={"initial model": y_pred}) np.random.seed(0) constraint = DemographicParity() classifier = DecisionTreeClassifier() mitigator = ExponentiatedGradient(classifier, constraint)
# linear regression from sklearn.metrics import mean_squared_error, r2_score from sklearn.linear_model import LinearRegression reg = LinearRegression() reg.fit(X_train, Y_train) lr_pred = reg.predict(X_test) print("MSE: ", mean_squared_error(y_pred=lr_pred, y_true=Y_test)) print("RMSE: ", mean_squared_error(y_pred=lr_pred, y_true=Y_test, squared=False)) print("R^2: ", r2_score(y_true=Y_test, y_pred=lr_pred)) from fairlearn.metrics import group_summary print("Under Bounded Group Loss constraint, MSE summary: {}".format( group_summary(mean_squared_error, Y_test, lr_pred, sensitive_features=A_test))) results = group_summary(mean_squared_error, Y_test, lr_pred, sensitive_features=A_test) cls_error = results['overall'] error_0 = results['by_group'][0] error_1 = results['by_group'][1] print("err_gap: ", np.abs(error_0 - error_1)) # save to file f_out_np = 'data/insurance.npz' np.savez(f_out_np, x_train=X_train, x_test=X_test,