def setup_method(self, method):
     self.estimator = LogisticRegression(solver='liblinear')
     self.disparity_criterion = DemographicParity()
Esempio n. 2
0
# The :class:`fairlearn.reductions.GridSearch` class implements a simplified version of the
# exponentiated gradient reduction of `Agarwal et al. 2018 <https://arxiv.org/abs/1803.02453>`_.
# The user supplies a standard ML estimator, which is treated as a blackbox.
# `GridSearch` works by generating a sequence of relabellings and reweightings, and
# trains a predictor for each.
#
# For this example, we specify demographic parity (on the sensitive feature of sex) as
# the fairness metric.
# Demographic parity requires that individuals are offered the opportunity (are approved
# for a loan in this example) independent of membership in the sensitive class (i.e., females
# and males should be offered loans at the same rate).
# We are using this metric for the sake of simplicity; in general, the appropriate fairness
# metric will not be obvious.

sweep = GridSearch(LogisticRegression(solver='liblinear', fit_intercept=True),
                   constraints=DemographicParity(),
                   grid_size=71)

# %%
# Our algorithms provide :code:`fit()` and :code:`predict()` methods, so they behave in a similar manner
# to other ML packages in Python.
# We do however have to specify two extra arguments to :code:`fit()` - the column of sensitive
# feature labels, and also the number of predictors to generate in our sweep.
#
# After :code:`fit()` completes, we extract the full set of predictors from the
# :class:`fairlearn.reductions.GridSearch` object.

sweep.fit(X_train, Y_train, sensitive_features=A_train)

predictors = sweep.predictors_
Esempio n. 3
0
def test_gridsearch_classification():
    estimator = KerasClassifier(build_fn=create_model)
    disparity_moment = DemographicParity()

    ptc.run_gridsearch_classification(estimator, disparity_moment)
Esempio n. 4
0
def test_gridsearch_classification():
    estimator = xgb.XGBClassifier()
    disparity_moment = DemographicParity()

    ptc.run_gridsearch_classification(estimator, disparity_moment)
Esempio n. 5
0
def test_construct_and_load():
    dp = DemographicParity()
    assert dp.short_name == "DemographicParity"

    # Generate some (rigged) data
    num_samples_a0 = 10
    num_samples_a1 = 30
    num_samples = num_samples_a0 + num_samples_a1

    a0_threshold = 0.2
    a1_threshold = 0.7

    a0_label = 2
    a1_label = 3

    X, Y, A = simple_binary_threshold_data(num_samples_a0, num_samples_a1,
                                           a0_threshold, a1_threshold,
                                           a0_label, a1_label)

    # Load up the (rigged) data
    dp.load_data(X, Y, sensitive_features=A)
    assert dp.data_loaded
    assert dp.total_samples == num_samples_a0 + num_samples_a1

    # Examine the tags DF
    assert dp.tags['label'].equals(pd.Series(Y))
    assert dp.tags['group_id'].equals(pd.Series(A))
    assert dp.tags['event'].map(lambda x: x == 'all').all()

    # Examine the index MultiIndex
    events = ['all']
    signs = ['+', '-']
    labels = [2, 3]
    expected_index = pd.MultiIndex.from_product(
        [signs, events, labels], names=[_SIGN, _EVENT, _GROUP_ID])
    assert dp.index.equals(expected_index)

    # Examine the prob_event DF
    # There's only the 'all' event and everything belongs to it
    assert len(dp.prob_event.index) == 1
    assert dp.prob_event.loc['all'] == 1

    # Examine the prob_group_event DF
    # There's only an 'all' event but this records the fractions
    # of each label in the population
    assert len(dp.prob_group_event.index) == 2
    assert dp.prob_group_event.loc[('all',
                                    a0_label)] == num_samples_a0 / num_samples
    assert dp.prob_group_event.loc[('all',
                                    a1_label)] == num_samples_a1 / num_samples

    # Examine the neg_basis DF
    # This is obviously looking at the \lambda_{-} values and picking
    # out the one associated with the first label
    assert len(dp.neg_basis.index) == 4
    assert dp.neg_basis[0]['+', 'all', a0_label] == 0
    assert dp.neg_basis[0]['+', 'all', a1_label] == 0
    assert dp.neg_basis[0]['-', 'all', a0_label] == 1
    assert dp.neg_basis[0]['-', 'all', a1_label] == 0

    # Examine the pos_basis DF
    # This is looking at the \lambda_{+} values and picking out the
    # one associated with the first label
    assert len(dp.pos_basis.index) == 4
    assert dp.pos_basis[0]['+', 'all', a0_label] == 1
    assert dp.pos_basis[0]['+', 'all', a1_label] == 0
    assert dp.pos_basis[0]['-', 'all', a0_label] == 0
    assert dp.pos_basis[0]['-', 'all', a1_label] == 0

    # Examine the neg_basis_present DF
    assert len(dp.neg_basis_present) == 1
    assert dp.neg_basis_present[0]
Esempio n. 6
0
result1 = metrics.group_summary(accuracy_score,
                                y_true,
                                y_pred,
                                sensitive_features=sex)
print("group_summary", result1)
result2 = metrics.selection_rate_group_summary(y_true,
                                               y_pred,
                                               sensitive_features=sex)
print("selection_rate_group_summary", result2)
# FairlearnDashboard(sensitive_features=sex,
#                        sensitive_feature_names=['sex'],
#                        y_true=y_true,
#                        y_pred={"initial model": y_pred})

np.random.seed(0)
constraint = DemographicParity()
classifier = DecisionTreeClassifier()
mitigator = ExponentiatedGradient(classifier, constraint)
#print("constructing mitigator")
mitigator.fit(X, y_true, sensitive_features=sex)
y_pred_mitigated = mitigator.predict(X)
result2_mitigated = metrics.selection_rate_group_summary(
    y_true, y_pred_mitigated, sensitive_features=sex)
print("selection_rate_group_summary mitigated", result2_mitigated)
FairlearnDashboard(sensitive_features=sex,
                   sensitive_feature_names=['sex'],
                   y_true=y_true,
                   y_pred={
                       "initial model": y_pred,
                       "mitigated model": y_pred_mitigated
                   })
Esempio n. 7
0
def test_gridsearch_classification():
    estimator = xgb.XGBClassifier(use_label_encoder=False)
    disparity_moment = DemographicParity()

    ptc.run_gridsearch_classification(estimator, disparity_moment)
Esempio n. 8
0
def test_gridsearch_classification():
    estimator = create_model()
    disparity_moment = DemographicParity()

    ptc.run_gridsearch_classification(estimator, disparity_moment)
 def setup_method(self, method):
     self.estimator = Pipeline([('scaler', StandardScaler()),
                                ('logistic',
                                 LogisticRegression(solver='liblinear'))])
     self.disparity_criterion = DemographicParity()
     self.sample_weight_name = 'logistic__sample_weight'
Esempio n. 10
0
def test_perf(perf_test_configuration, request):
    print("Starting with test case {}".format(request.node.name))
    print("Downloading dataset")
    dataset = datasets[perf_test_configuration.dataset]()
    X_train, X_test = dataset.get_X()
    y_train, y_test = dataset.get_y()
    print("Done downloading dataset")

    if perf_test_configuration.dataset == "adult_uci":
        # sensitive feature is 8th column (sex)
        sensitive_features_train = X_train[:, 7]
        sensitive_features_test = X_test[:, 7]
    elif perf_test_configuration.dataset == "diabetes_sklearn":
        # sensitive feature is 2nd column (sex)
        # features have been scaled, but sensitive feature needs to be str or int
        sensitive_features_train = X_train[:, 1].astype(str)
        sensitive_features_test = X_test[:, 1].astype(str)
        # labels can't be floats as of now
        y_train = y_train.astype(int)
        y_test = y_test.astype(int)
    elif perf_test_configuration.dataset == "compas":
        # sensitive feature is either race or sex
        # TODO add another case where we use sex as well, or both (?)
        sensitive_features_train, sensitive_features_test = dataset.get_sensitive_features(
            'race')
        y_train = y_train.astype(int)
        y_test = y_test.astype(int)
    else:
        raise ValueError("Sensitive features unknown for dataset {}".format(
            perf_test_configuration.dataset))

    print("Fitting estimator")
    estimator = models[perf_test_configuration.predictor]()
    unconstrained_predictor = models[perf_test_configuration.predictor]()
    unconstrained_predictor.fit(X_train, y_train)
    print("Done fitting estimator")

    start_time = time()
    if perf_test_configuration.mitigator == ThresholdOptimizer.__name__:
        mitigator = ThresholdOptimizer(
            unconstrained_predictor=unconstrained_predictor,
            constraints=DEMOGRAPHIC_PARITY)
    elif perf_test_configuration.mitigator == ExponentiatedGradient.__name__:
        mitigator = ExponentiatedGradient(estimator=estimator,
                                          constraints=DemographicParity())
    elif perf_test_configuration.mitigator == GridSearch.__name__:
        mitigator = GridSearch(estimator=estimator,
                               constraints=DemographicParity())
    else:
        raise Exception("Unknown mitigation technique.")

    print("Fitting mitigator")

    mitigator.fit(X_train,
                  y_train,
                  sensitive_features=sensitive_features_train)

    if perf_test_configuration.mitigator == ThresholdOptimizer.__name__:
        mitigator.predict(X_test,
                          sensitive_features=sensitive_features_test,
                          random_state=1)
    else:
        mitigator.predict(X_test)

    # TODO evaluate accuracy/fairness tradeoff

    total_time = time() - start_time
    print("Total time taken: {}s".format(total_time))
    print("Maximum allowed time: {}s".format(
        perf_test_configuration.max_time_consumption))
    assert total_time <= perf_test_configuration.max_time_consumption
    print(
        "\n\n===============================================================\n\n"
    )