def evaluate(eps, X_train, y_train, X_test, y_test, sex_train, sex_test, index): estimator = GradientBoostingClassifier() constraints = DemographicParity() egsolver = ExponentiatedGradient(estimator, constraints, eps=eps) egsolver.fit(X_train, y_train, sensitive_features=sex_train) y_pred = egsolver.predict(X_test) # print("y_pred",y_pred) group_summary_adult = group_summary(accuracy_score, y_test, y_pred, sensitive_features=sex_test) selection_rate_summary = selection_rate_group_summary( y_test, y_pred, sensitive_features=sex_test) error = 1 - group_summary_adult["overall"] dp = demographic(selection_rate_summary) errorlist[index].append(error) dplist[index].append(dp) print("error:%f,dp:%f" % (error, dp))
def test_selection_rate_group_summary(): a = "a" b = "b" q = "q" r = "r" y_true = [a, b, a, b, b, b, a, a] y_pred = [a, b, a, a, b, b, a, b] weight = [1, 2, 3, 4, 1, 2, 4, 8] groups = [r, r, r, r, q, q, q, q] result = metrics.selection_rate_group_summary( y_true, y_pred, sensitive_features=groups, pos_label=b, sample_weight=weight) assert result.overall == 0.52 assert result.by_group[r] == 0.2 assert result.by_group[q] == pytest.approx(0.73333333)
def evaluate(weight, X_train, y_train, X_test, y_test, sex_train, sex_test, index): estimator = GradientBoostingClassifier() constraints = DemographicParity() gssolver = GridSearch(estimator, constraints, grid_size=10, constraint_weight=weight) gssolver.fit(X_train, y_train, sensitive_features=sex_train) y_pred = gssolver.predict(X_test) # print("y_pred",y_pred) group_summary_adult = group_summary(accuracy_score, y_test, y_pred, sensitive_features=sex_test) selection_rate_summary = selection_rate_group_summary( y_test, y_pred, sensitive_features=sex_test) error = 1 - group_summary_adult["overall"] dp = demographic(selection_rate_summary) errorlist[index].append(error) dplist[index].append(dp) print("error:%f,dp:%f" % (error, dp))
X, y_true = shap.datasets.adult() #readfrom("adult.data") y_true = y_true * 1 sex = X['Sex'].apply(lambda sex: "female" if sex == 0 else "male") classifier = DecisionTreeClassifier() classifier.fit(X, y_true) y_pred = classifier.predict(X) result1 = metrics.group_summary(accuracy_score, y_true, y_pred, sensitive_features=sex) print("group_summary", result1) result2 = metrics.selection_rate_group_summary(y_true, y_pred, sensitive_features=sex) print("selection_rate_group_summary", result2) # FairlearnDashboard(sensitive_features=sex, # sensitive_feature_names=['sex'], # y_true=y_true, # y_pred={"initial model": y_pred}) np.random.seed(0) constraint = DemographicParity() classifier = DecisionTreeClassifier() mitigator = ExponentiatedGradient(classifier, constraint) #print("constructing mitigator") mitigator.fit(X, y_true, sensitive_features=sex) y_pred_mitigated = mitigator.predict(X) result2_mitigated = metrics.selection_rate_group_summary(
# Copyright (c) Microsoft Corporation and Fairlearn contributors. # Licensed under the MIT License. """Produce plot of selection rates for the quickstart guide.""" from bokeh.plotting import figure, show from fairlearn.metrics import selection_rate_group_summary from fairlearn.datasets import fetch_adult data = fetch_adult(as_frame=True) X = data.data y_true = (data.target == '>50K') * 1 sex = X['sex'] selection_rates = selection_rate_group_summary(y_true, y_true, sensitive_features=sex) xs = list(selection_rates.by_group.keys()) ys = [selection_rates.by_group[s] for s in xs] p = figure(x_range=xs, plot_height=480, plot_width=640, title="Fraction earning over $50,0000", toolbar_location=None, tools="") p.vbar(x=xs, top=ys, width=0.9) p.y_range.start = 0 p.xgrid.grid_line_color = None p.axis.minor_tick_line_color = None