def evaluate(eps, X_train, y_train, X_test, y_test, sex_train, sex_test,
             index):
    estimator = GradientBoostingClassifier()
    constraints = DemographicParity()
    egsolver = ExponentiatedGradient(estimator, constraints, eps=eps)
    egsolver.fit(X_train, y_train, sensitive_features=sex_train)
    y_pred = egsolver.predict(X_test)
    # print("y_pred",y_pred)
    group_summary_adult = group_summary(accuracy_score,
                                        y_test,
                                        y_pred,
                                        sensitive_features=sex_test)
    selection_rate_summary = selection_rate_group_summary(
        y_test, y_pred, sensitive_features=sex_test)
    error = 1 - group_summary_adult["overall"]
    dp = demographic(selection_rate_summary)
    errorlist[index].append(error)
    dplist[index].append(dp)
    print("error:%f,dp:%f" % (error, dp))
Ejemplo n.º 2
0
def test_selection_rate_group_summary():
    a = "a"
    b = "b"
    q = "q"
    r = "r"

    y_true = [a, b, a, b, b, b, a, a]
    y_pred = [a, b, a, a, b, b, a, b]
    weight = [1, 2, 3, 4, 1, 2, 4, 8]
    groups = [r, r, r, r, q, q, q, q]

    result = metrics.selection_rate_group_summary(
        y_true, y_pred,
        sensitive_features=groups,
        pos_label=b,
        sample_weight=weight)

    assert result.overall == 0.52
    assert result.by_group[r] == 0.2
    assert result.by_group[q] == pytest.approx(0.73333333)
def evaluate(weight, X_train, y_train, X_test, y_test, sex_train, sex_test,
             index):
    estimator = GradientBoostingClassifier()
    constraints = DemographicParity()
    gssolver = GridSearch(estimator,
                          constraints,
                          grid_size=10,
                          constraint_weight=weight)
    gssolver.fit(X_train, y_train, sensitive_features=sex_train)
    y_pred = gssolver.predict(X_test)
    # print("y_pred",y_pred)
    group_summary_adult = group_summary(accuracy_score,
                                        y_test,
                                        y_pred,
                                        sensitive_features=sex_test)
    selection_rate_summary = selection_rate_group_summary(
        y_test, y_pred, sensitive_features=sex_test)
    error = 1 - group_summary_adult["overall"]
    dp = demographic(selection_rate_summary)
    errorlist[index].append(error)
    dplist[index].append(dp)
    print("error:%f,dp:%f" % (error, dp))
Ejemplo n.º 4
0
X, y_true = shap.datasets.adult()  #readfrom("adult.data")
y_true = y_true * 1
sex = X['Sex'].apply(lambda sex: "female" if sex == 0 else "male")

classifier = DecisionTreeClassifier()
classifier.fit(X, y_true)

y_pred = classifier.predict(X)
result1 = metrics.group_summary(accuracy_score,
                                y_true,
                                y_pred,
                                sensitive_features=sex)
print("group_summary", result1)
result2 = metrics.selection_rate_group_summary(y_true,
                                               y_pred,
                                               sensitive_features=sex)
print("selection_rate_group_summary", result2)
# FairlearnDashboard(sensitive_features=sex,
#                        sensitive_feature_names=['sex'],
#                        y_true=y_true,
#                        y_pred={"initial model": y_pred})

np.random.seed(0)
constraint = DemographicParity()
classifier = DecisionTreeClassifier()
mitigator = ExponentiatedGradient(classifier, constraint)
#print("constructing mitigator")
mitigator.fit(X, y_true, sensitive_features=sex)
y_pred_mitigated = mitigator.predict(X)
result2_mitigated = metrics.selection_rate_group_summary(
Ejemplo n.º 5
0
# Copyright (c) Microsoft Corporation and Fairlearn contributors.
# Licensed under the MIT License.
"""Produce plot of selection rates for the quickstart guide."""
from bokeh.plotting import figure, show
from fairlearn.metrics import selection_rate_group_summary
from fairlearn.datasets import fetch_adult

data = fetch_adult(as_frame=True)
X = data.data
y_true = (data.target == '>50K') * 1
sex = X['sex']

selection_rates = selection_rate_group_summary(y_true,
                                               y_true,
                                               sensitive_features=sex)

xs = list(selection_rates.by_group.keys())
ys = [selection_rates.by_group[s] for s in xs]

p = figure(x_range=xs,
           plot_height=480,
           plot_width=640,
           title="Fraction earning over $50,0000",
           toolbar_location=None,
           tools="")

p.vbar(x=xs, top=ys, width=0.9)

p.y_range.start = 0
p.xgrid.grid_line_color = None
p.axis.minor_tick_line_color = None