Example #1
0
    def test_input_X_unchanged(self, transformA, transformY, transformX,
                               mocker):
        # The purpose of this test is to ensure that X is passed to the underlying estimator
        # unchanged. For y and sample_weight ExponentiatedGradient makes certain transformations
        # which are required. They are expected as pandas.Series.
        X, y, A = _get_data()

        transformed_X = transformX(X)
        transformed_y = transformY(y)
        transformed_A = transformA(A)

        # Using a mocked estimator here since we don't actually want to fit one, but rather care
        # about having that object's fit method called exactly twice through the best_h calls.
        estimator = LeastSquaresBinaryClassifierLearner()
        estimator.predict = mocker.Mock(return_value=y)
        estimator.fit = mocker.MagicMock()
        # restrict ExponentiatedGradient to a single iteration
        expgrad = ExponentiatedGradient(estimator,
                                        constraints=DemographicParity(),
                                        max_iter=1)
        mocker.patch('copy.deepcopy', return_value=estimator)
        expgrad.fit(transformed_X,
                    transformed_y,
                    sensitive_features=transformed_A)

        # ensure that the input data wasn't changed by our mitigator before being passed to the
        # underlying estimator
        assert estimator.fit.call_count == 2
        args, kwargs = estimator.fit.call_args
        assert len(args) == 2  # X and y
        assert len(kwargs) == 1  # sample_weight
        assert isinstance(args[0], type(transformed_X))
        assert isinstance(args[1], pd.Series)
        assert isinstance(kwargs['sample_weight'], pd.Series)
Example #2
0
    def test_argument_types(self, transformX, transformY, transformA):
        # This is an expanded-out version of one of the smoke tests
        expgrad = ExponentiatedGradient(self.learner,
                                        constraints=DemographicParity(),
                                        eps=0.1)
        expgrad.fit(transformX(self.X),
                    transformY(self.y),
                    sensitive_features=transformA(self.A))

        res = expgrad._expgrad_result._as_dict()
        Q = res["best_classifier"]
        res["n_classifiers"] = len(res["classifiers"])

        disp = DemographicParity()
        disp.load_data(self.X, self.y, sensitive_features=self.A)
        error = ErrorRate()
        error.load_data(self.X, self.y, sensitive_features=self.A)
        res["disp"] = disp.gamma(Q).max()
        res["error"] = error.gamma(Q)[0]

        assert res["best_gap"] == pytest.approx(0.0000, abs=self._PRECISION)
        assert res["last_t"] == 5
        assert res["best_t"] == 5
        assert res["disp"] == pytest.approx(0.1, abs=self._PRECISION)
        assert res["error"] == pytest.approx(0.25, abs=self._PRECISION)
        assert res["n_oracle_calls"] == 32
        assert res["n_classifiers"] == 3
    def run_smoke_test_binary_classification(self, data, flipped=False):
        learner = LeastSquaresBinaryClassifierLearner()
        if "ratio" in data.keys():
            disparity_moment = data["constraint_class"](
                ratio_bound_slack=data["eps"], ratio_bound=data["ratio"])
        else:
            disparity_moment = data["constraint_class"](
                difference_bound=data["eps"])

        # Create Exponentiated Gradient object with a copy of the constraint.
        # The original disparity_moment object is used for validation, so the
        # assumption is that the moment logic is correct in these tests.
        expgrad = ExponentiatedGradient(learner,
                                        constraints=deepcopy(disparity_moment),
                                        eps=data["eps"])

        X, y, A = _get_data(A_two_dim=False, flip_y=flipped)

        expgrad.fit(X, y, sensitive_features=A)

        self._assert_expgrad_state(expgrad, data)

        # select probability of predicting 1
        def Q(X):
            return expgrad._pmf_predict(X)[:, 1]

        default_objective = ErrorRate()
        disparity_moment.load_data(X, y, sensitive_features=A)
        default_objective.load_data(X, y, sensitive_features=A)
        disparity = disparity_moment.gamma(Q).max()
        error = default_objective.gamma(Q)[0]
        assert disparity == pytest.approx(data["disp"], abs=_PRECISION)
        assert error == pytest.approx(data["error"], abs=_PRECISION)
Example #4
0
    def test_argument_types(self, transformX, transformY, transformA,
                            A_two_dim):
        # This is an expanded-out version of one of the smoke tests
        X, y, A = _get_data(A_two_dim)
        merged_A = _map_into_single_column(A)

        expgrad = ExponentiatedGradient(LeastSquaresBinaryClassifierLearner(),
                                        constraints=DemographicParity(),
                                        eps=0.1)
        expgrad.fit(transformX(X),
                    transformY(y),
                    sensitive_features=transformA(A))

        res = expgrad._expgrad_result._as_dict()
        Q = res["best_classifier"]
        res["n_classifiers"] = len(res["classifiers"])

        disp = DemographicParity()
        disp.load_data(X, y, sensitive_features=merged_A)
        error = ErrorRate()
        error.load_data(X, y, sensitive_features=merged_A)
        res["disp"] = disp.gamma(Q).max()
        res["error"] = error.gamma(Q)[0]

        assert res["best_gap"] == pytest.approx(0.0000, abs=_PRECISION)
        assert res["last_t"] == 5
        assert res["best_t"] == 5
        assert res["disp"] == pytest.approx(0.1, abs=_PRECISION)
        assert res["error"] == pytest.approx(0.25, abs=_PRECISION)
        assert res["n_oracle_calls"] == 32
        assert res["n_classifiers"] == 3
Example #5
0
    def test_smoke_regression(self, data):
        learner = LeastSquaresRegressor()
        disparity_moment = data["constraint_class"](
                loss=data["loss"],
                upper_bound=data["upper_bound"])

        # Create Exponentiated Gradient object with a copy of the constraint.
        # The original disparity_moment object is used for validation, so the
        # assumption is that the moment logic is correct in these tests.
        expgrad = ExponentiatedGradient(
            learner,
            constraints=deepcopy(disparity_moment),
            eps=data["eps"],
            nu=data.get('nu'),
            max_iter=data.get("max_iter", 50))

        X, y, A = _get_data(A_two_dim=False, y_as_scores=True)

        expgrad.fit(X, y, sensitive_features=A)

        self._assert_expgrad_state(expgrad, data)

        # check all predictors
        disparity_moment.load_data(X, y, sensitive_features=A)
        for i in range(len(expgrad.predictors_)):
            def Q(X): return expgrad._pmf_predict(X)[i]
            default_objective = MeanLoss(data["loss"])
            default_objective.load_data(X, y, sensitive_features=A)
            disparity = disparity_moment.gamma(Q).max()
            error = default_objective.gamma(Q)[0]
            assert disparity == pytest.approx(data["disp"][i], abs=_PRECISION)
            assert error == pytest.approx(data["error"][i], abs=_PRECISION)
            assert expgrad.weights_[i] == pytest.approx(data['weights'][i], abs=_PRECISION)

        assert sum(expgrad.weights_) == pytest.approx(1, abs=_PRECISION)
    def run_smoke_test(self, data, flipped=False):
        ratio = 1.0
        if "ratio" in data.keys():
            ratio = data["ratio"]
        expgrad = ExponentiatedGradient(
            self.learner,
            constraints=data["cons_class"](ratio=ratio),
            eps=data["eps"])
        expgrad.fit(self.X, (self.flipped_y if flipped else self.y),
                    sensitive_features=self.A)

        def Q(X):
            return expgrad._pmf_predict(X)[:, 1]

        n_predictors = len(expgrad._predictors)

        disparity_moment = data["cons_class"](ratio=ratio)
        disparity_moment.load_data(self.X,
                                   (self.flipped_y if flipped else self.y),
                                   sensitive_features=self.A)
        error = ErrorRate()
        error.load_data(self.X, (self.flipped_y if flipped else self.y),
                        sensitive_features=self.A)
        disparity = disparity_moment.gamma(Q).max()
        error = error.gamma(Q)[0]

        assert expgrad._best_gap == pytest.approx(data["best_gap"],
                                                  abs=self._PRECISION)
        assert expgrad._last_t == data["last_t"]
        assert expgrad._best_t == data["best_t"]
        assert disparity == pytest.approx(data["disp"], abs=self._PRECISION)
        assert error == pytest.approx(data["error"], abs=self._PRECISION)
        assert expgrad._n_oracle_calls == data["n_oracle_calls"]
        assert n_predictors == data["n_predictors"]
    def test_single_y_value(self):
        # Setup with data designed to result in "all single class"
        # at some point in the grid
        X_dict = {"c": [10, 50, 10]}
        X = pd.DataFrame(X_dict)

        y = [1, 1, 1]
        A = ['a', 'b', 'b']

        estimator = LogisticRegression(solver='liblinear',
                                       fit_intercept=True,
                                       random_state=97)
        expgrad = ExponentiatedGradient(estimator, DemographicParity())

        # Following line should not throw an exception
        expgrad.fit(X, y, sensitive_features=A)

        # Check the predictors for a ConstantPredictor
        test_X_dict = {"c": [1, 2, 3, 4, 5, 6]}
        test_X = pd.DataFrame(test_X_dict)
        assert expgrad.n_oracle_calls_dummy_returned_ > 0
        assert len(expgrad.oracle_execution_times_) == expgrad.n_oracle_calls_
        for p in expgrad.predictors_:
            assert isinstance(p, DummyClassifier)
            assert np.array_equal(p.predict(test_X), [1, 1, 1, 1, 1, 1])
Example #8
0
    def run_smoke_test(self, data):
        expgrad = ExponentiatedGradient(self.learner, constraints=data["cons_class"](),
                                        eps=data["eps"])
        expgrad.fit(self.X, self.y, sensitive_features=self.A)

        res = expgrad._expgrad_result._as_dict()
        Q = res["best_classifier"]
        res["n_classifiers"] = len(res["classifiers"])

        disp = data["cons_class"]()
        disp.load_data(self.X, self.y, sensitive_features=self.A)
        error = moments.MisclassificationError()
        error.load_data(self.X, self.y, sensitive_features=self.A)
        res["disp"] = disp.gamma(Q).max()
        res["error"] = error.gamma(Q)[0]

        assert res["best_gap"] == pytest.approx(
            data["best_gap"], abs=self._PRECISION)
        assert res["last_t"] == data["last_t"]
        assert res["best_t"] == data["best_t"]
        assert res["disp"] == pytest.approx(data["disp"], abs=self._PRECISION)
        assert res["error"] == pytest.approx(
            data["error"], abs=self._PRECISION)
        assert res["n_oracle_calls"] == data["n_oracle_calls"]
        assert res["n_classifiers"] == data["n_classifiers"]
    def test_argument_types(self, transformX, transformY, transformA,
                            A_two_dim):
        # This is an expanded-out version of one of the smoke tests
        X, y, A = _get_data(A_two_dim)
        merged_A = _map_into_single_column(A)

        expgrad = ExponentiatedGradient(LeastSquaresBinaryClassifierLearner(),
                                        constraints=DemographicParity(),
                                        eps=0.1)
        expgrad.fit(transformX(X),
                    transformY(y),
                    sensitive_features=transformA(A))

        Q = expgrad._best_classifier
        n_classifiers = len(expgrad._classifiers)

        disparity_moment = DemographicParity()
        disparity_moment.load_data(X, y, sensitive_features=merged_A)
        error = ErrorRate()
        error.load_data(X, y, sensitive_features=merged_A)
        disparity = disparity_moment.gamma(Q).max()
        error = error.gamma(Q)[0]

        assert expgrad._best_gap == pytest.approx(0.0000, abs=_PRECISION)
        assert expgrad._last_t == 5
        assert expgrad._best_t == 5
        assert disparity == pytest.approx(0.1, abs=_PRECISION)
        assert error == pytest.approx(0.25, abs=_PRECISION)
        assert expgrad._n_oracle_calls == 32
        assert n_classifiers == 3
def test_equalized_odds():
    # Have to do this one longhand, since it combines tpr and fpr
    X, y = loan_scenario_generator(n, f, sfs, ibs, seed=632753)
    X_dummy = pd.get_dummies(X)

    metrics = {"tpr": true_positive_rate, "fpr": false_positive_rate}

    unmitigated = LogisticRegression()
    unmitigated.fit(X_dummy, y)
    y_pred = unmitigated.predict(X_dummy)
    mf_unmitigated = MetricFrame(
        metrics=metrics,
        y_true=y,
        y_pred=y_pred,
        sensitive_features=X["sens"],
        control_features=X["ctrl"],
    )

    expgrad_basic = ExponentiatedGradient(
        LogisticRegression(),
        constraints=EqualizedOdds(difference_bound=0.01),
        eps=0.01)
    expgrad_basic.fit(X_dummy, y, sensitive_features=X["sens"])
    y_pred_basic = expgrad_basic.predict(X_dummy, random_state=9235)
    mf_basic = MetricFrame(
        metrics=metrics,
        y_true=y,
        y_pred=y_pred_basic,
        sensitive_features=X["sens"],
        control_features=X["ctrl"],
    )

    expgrad_control = ExponentiatedGradient(
        LogisticRegression(),
        constraints=EqualizedOdds(difference_bound=0.01),
        eps=0.01)
    expgrad_control.fit(X_dummy,
                        y,
                        sensitive_features=X["sens"],
                        control_features=X["ctrl"])
    y_pred_control = expgrad_control.predict(X_dummy, random_state=8152)
    mf_control = MetricFrame(
        metrics=metrics,
        y_true=y,
        y_pred=y_pred_control,
        sensitive_features=X["sens"],
        control_features=X["ctrl"],
    )

    compare_unmitigated = mf_control.difference(
        method="to_overall") <= mf_unmitigated.difference(method="to_overall")
    print(compare_unmitigated)

    compare_basic = mf_control.difference(
        method="to_overall") <= mf_basic.difference(method="to_overall")
    print(compare_basic)

    assert compare_basic.values.reshape(6).all()
    assert compare_unmitigated.values.reshape(6).all()
Example #11
0
    def test_binary_classifier_0_1_required(self):
        X, y, A = _get_data()
        y = 2 * y

        expgrad = ExponentiatedGradient(LogisticRegression(),
                                        constraints=DemographicParity(),
                                        max_iter=1)
        with pytest.raises(ValueError) as execInfo:
            expgrad.fit(X, y, sensitive_features=(A))
        assert _LABELS_NOT_0_1_ERROR_MESSAGE == execInfo.value.args[0]
def run_comparisons(moment, metric_fn):
    X, y = loan_scenario_generator(n, f, sfs, ibs, seed=163)
    X_dummy = pd.get_dummies(X)

    mf_input = MetricFrame(metric_fn, y, y,
                           sensitive_features=X['sens'],
                           control_features=X['ctrl'])

    print("Metric for input:\n", mf_input.by_group)
    print("Input Metric differences:\n", mf_input.difference(method='to_overall'), "\n")

    unmitigated = LogisticRegression()
    unmitigated.fit(X_dummy, y)
    y_pred = unmitigated.predict(X_dummy)
    mf_unmitigated = MetricFrame(metric_fn,
                                 y, y_pred,
                                 sensitive_features=X['sens'],
                                 control_features=X['ctrl'])
    print("Unmitigated metric:\n", mf_unmitigated.by_group)
    print("Unmitigated metric differences:\n",
          mf_unmitigated.difference(method='to_overall'), "\n")

    expgrad_basic = ExponentiatedGradient(
        LogisticRegression(),
        constraints=moment(),
        eps=0.005)
    expgrad_basic.fit(X_dummy, y, sensitive_features=X['sens'])
    y_pred_basic = expgrad_basic.predict(X_dummy, random_state=8235)
    mf_basic = MetricFrame(metric_fn, y, y_pred_basic,
                           sensitive_features=X['sens'],
                           control_features=X['ctrl'])
    print("Basic expgrad metric:\n", mf_basic.by_group)
    print("Basic expgrad metric differences:\n",
          mf_basic.difference(method='to_overall'), "\n")

    expgrad_control = ExponentiatedGradient(
        LogisticRegression(),
        constraints=moment(),
        eps=0.005)
    expgrad_control.fit(X_dummy, y,
                        sensitive_features=X['sens'],
                        control_features=X['ctrl'])
    y_pred_control = expgrad_control.predict(X_dummy, random_state=852)
    mf_control = MetricFrame(metric_fn, y, y_pred_control,
                             sensitive_features=X['sens'],
                             control_features=X['ctrl'])
    print("expgrad_control metric:\n", mf_control.by_group)
    print("expgrad_control metric differences:\n",
          mf_control.difference(method='to_overall'))

    assert (mf_control.difference(method='to_overall') <=
            mf_unmitigated.difference(method='to_overall')).all()

    assert (mf_control.difference(method='to_overall') <=
            mf_basic.difference(method='to_overall')).all()
Example #13
0
    def test_error_rate_consistency(self, eps, ratio, pos_copies):
        learner = LeastSquaresBinaryClassifierLearner()
        if ratio is None:
            constraints_moment = EqualizedOdds(difference_bound=eps)
        else:
            constraints_moment = EqualizedOdds(ratio_bound=ratio, ratio_bound_slack=eps)

        results = {}
        for method in ["costs", "sampling"]:
            X, y, A = _get_data()

            if method == "sampling":
                select = y == 1
                X = pd.concat((X,) + (X.loc[select, :],) * pos_copies).values
                y = pd.concat((y,) + (y[select],) * pos_copies).values
                A = pd.concat((A,) + (A[select],) * pos_copies).values
                objective_moment = ErrorRate()
            else:
                objective_moment = ErrorRate(costs={"fn": 1.0 + pos_copies, "fp": 1.0})

            expgrad = ExponentiatedGradient(
                learner,
                constraints=deepcopy(constraints_moment),
                objective=deepcopy(objective_moment),
                eps=eps,
                nu=1e-3,
            )

            expgrad.fit(X, y, sensitive_features=A)

            # select probability of predicting 1
            def Q(X):
                return expgrad._pmf_predict(X)[:, 1]

            constraints_eval = deepcopy(constraints_moment)
            constraints_eval.load_data(X, y, sensitive_features=A)
            disparity = constraints_eval.gamma(Q).max()

            objective_eval = deepcopy(objective_moment)
            objective_eval.load_data(X, y, sensitive_features=A)
            total_error = objective_eval.gamma(Q)[0] * len(y)
            results[method] = {
                "error": objective_eval.gamma(Q)[0],
                "total_error": total_error,
                "disp": disparity,
                "n_predictors": len(expgrad.predictors_),
                "best_gap": expgrad.best_gap_,
                "last_iter": expgrad.last_iter_,
                "best_iter": expgrad.best_iter_,
                "n_oracle_calls": expgrad.n_oracle_calls_,
                "n_oracle_calls_dummy_returned": expgrad.n_oracle_calls_dummy_returned_,
            }

        self._assert_expgrad_two_states(results["costs"], results["sampling"])
Example #14
0
def run_expgrad_classification(estimator, moment):
    """Run classification test with ExponentiatedGradient."""
    X, Y, A = fetch_adult()

    expgrad = ExponentiatedGradient(
        estimator,
        constraints=moment)
    expgrad.fit(X, Y, sensitive_features=A)

    assert expgrad.n_oracle_calls_ > 1
    assert len(expgrad.predictors_) > 1
def run(fairness_constraints, use_proxy=False):
    print(f"Start running experiment with Proxy: {use_proxy}.")
    all_results = {}
    all_results['eps'] = fairness_constraints
    all_results['accuracy'] = {'train': [], 'test': []}

    all_results['violation'] = {'train': [], 'test': []}

    all_results['violation_male'] = {'train': [], 'test': []}

    all_results['violation_female'] = {'train': [], 'test': []}

    for eps in fairness_constraints:
        begin = time.time()

        if use_proxy:
            sweep = ExponentiatedGradient(
                LogisticRegression(solver='liblinear', fit_intercept=True),
                constraints=ProxyEqualizedOdds(error_rate=error_rate),
                eps=eps)
        else:
            sweep = ExponentiatedGradient(LogisticRegression(
                solver='liblinear', fit_intercept=True),
                                          constraints=EqualizedOdds(),
                                          eps=eps)

        try:
            sweep.fit(X_train, Y_noised, sensitive_features=A_train)

            prediction_train = sweep.predict(X_train)
            prediction_test = sweep.predict(X_test)
        except:
            print(f"Fairlearn can't fit at fairness constraint {eps}")
            pass

        all_results['accuracy']['train'].append(
            accuracy(prediction_train, Y_train))
        all_results['accuracy']['test'].append(
            accuracy(prediction_test, Y_test))

        all_results['violation']['train'].append(
            violation(prediction_train, Y_train, A_train))
        all_results['violation']['test'].append(
            violation(prediction_test, Y_test, A_test))

        all_results['violation_male']['train'].append(
            violation(prediction_train, Y_train, A_train, grp=1))
        all_results['violation_male']['test'].append(
            violation(prediction_test, Y_test, A_test, grp=1))

        all_results['violation_female']['train'].append(
            violation(prediction_train, Y_train, A_train, grp=0))
        all_results['violation_female']['test'].append(
            violation(prediction_test, Y_test, A_test, grp=0))

        print(
            f"Running fairness constraint: {eps}, Training Accuracy: {all_results['accuracy']['train']}, Test Accuracy: {all_results['accuracy']['test']}, Training Violation: {all_results['violation']['train']}, Test Violation: {all_results['violation']['test']}, Time cost: {time.time() - begin}"
        )

    return all_results
def run_estimation(fairness_constraints, proxy=False, lnl=False):
    print(
        f"Start running experiment with Proxy: {proxy}, Learning with Noisy Labels: {lnl}."
    )
    all_results_train, all_results_test = [], []

    for eps in fairness_constraints:
        begin = time.time()

        if proxy and lnl:
            clf = ExponentiatedGradient(
                LogisticRegression(solver='liblinear', fit_intercept=True),
                constraints=ProxyEqualizedOdds2(delta=delta),
                eps=eps)
            sweep = LearningWithNoisyLabels(clf=clf)
        elif proxy:
            sweep = ExponentiatedGradient(
                LogisticRegression(solver='liblinear', fit_intercept=True),
                constraints=ProxyEqualizedOdds2(delta=delta),
                eps=eps)

        elif lnl:
            clf = ExponentiatedGradient(LogisticRegression(solver='liblinear',
                                                           fit_intercept=True),
                                        constraints=EqualizedOdds(),
                                        eps=eps)
            sweep = LearningWithNoisyLabels(clf=clf)
        else:
            sweep = ExponentiatedGradient(LogisticRegression(
                solver='liblinear', fit_intercept=True),
                                          constraints=EqualizedOdds(),
                                          eps=eps)

        sweep.fit(X_train, Y_noised, sensitive_features=A_train)

        prediction_train = sweep.predict(X_train)
        prediction_test = sweep.predict(X_test)

        accuracy_train = accuracy(prediction_train, Y_train)
        accuracy_test = accuracy(prediction_test, Y_test)
        all_results_train.append(accuracy_train)
        all_results_test.append(accuracy_test)

        print(
            f"Running fairness constraint: {eps}, Training Accuracy: {accuracy_train}, Test Accuracy: {accuracy_test}, Training Violation: {violation(prediction_train, Y_train, A_train)}, Test Violation: {violation(prediction_test, Y_test, A_test)}, Time cost: {time.time() - begin}"
        )

    return all_results_train, all_results_test
    def test_argument_types_ratio_bound(self, transformX, transformY,
                                        transformA, A_two_dim):
        # This is an expanded-out version of one of the smoke tests
        X, y, A = _get_data(A_two_dim)
        merged_A = _map_into_single_column(A)

        transformed_X = transformX(X)
        transformed_y = transformY(y)
        transformed_A = transformA(A)
        eps = 0.1
        ratio = 1.0

        expgrad = ExponentiatedGradient(
            LeastSquaresBinaryClassifierLearner(),
            constraints=DemographicParity(ratio_bound_slack=eps,
                                          ratio_bound=ratio),
            eps=eps,
        )
        expgrad.fit(transformed_X,
                    transformed_y,
                    sensitive_features=transformed_A)

        def Q(X):
            return expgrad._pmf_predict(X)[:, 1]

        n_predictors = len(expgrad.predictors_)

        disparity_moment = DemographicParity(ratio_bound_slack=eps,
                                             ratio_bound=ratio)
        disparity_moment.load_data(X, y, sensitive_features=merged_A)
        error = ErrorRate()
        error.load_data(X, y, sensitive_features=merged_A)
        disparity = disparity_moment.gamma(Q).max()
        disp = disparity_moment.gamma(Q)
        disp_eps = disparity_moment.gamma(Q) - disparity_moment.bound()
        error = error.gamma(Q)[0]

        assert expgrad.best_gap_ == pytest.approx(0.0000, abs=_PRECISION)
        assert expgrad.last_iter_ == 5
        assert expgrad.best_iter_ == 5
        assert disparity == pytest.approx(0.1, abs=_PRECISION)
        assert np.all(np.isclose(disp - eps, disp_eps))
        assert error == pytest.approx(0.25, abs=_PRECISION)
        assert expgrad.n_oracle_calls_ == 32
        assert n_predictors == 3
def evaluate(eps, X_train, y_train, X_test, y_test, sex_train, sex_test,
             index):
    estimator = GradientBoostingClassifier()
    constraints = DemographicParity()
    egsolver = ExponentiatedGradient(estimator, constraints, eps=eps)
    egsolver.fit(X_train, y_train, sensitive_features=sex_train)
    y_pred = egsolver.predict(X_test)
    # print("y_pred",y_pred)
    group_summary_adult = group_summary(accuracy_score,
                                        y_test,
                                        y_pred,
                                        sensitive_features=sex_test)
    selection_rate_summary = selection_rate_group_summary(
        y_test, y_pred, sensitive_features=sex_test)
    error = 1 - group_summary_adult["overall"]
    dp = demographic(selection_rate_summary)
    errorlist[index].append(error)
    dplist[index].append(dp)
    print("error:%f,dp:%f" % (error, dp))
    def fit(self, X, y, sample_weight=None):
        from fairlearn.reductions import ExponentiatedGradient, DemographicParity
        _estimator = self._estimator
        constraints = self.constraints
        eps = float(self.eps)
        nu = float(self.nu)
        max_iter = int(self.max_iter)
        eta0 = float(self.eta0) # renamed from eta_mul
        # run_linprog_step = self.run_linprog_step # missing
        # sample_weight_name = self.sample_weight_name #missing

        # For now the sensitive feature is always the first one
        sensitive_features = X[:,0]

        constraints = DemographicParity(difference_bound = eps)
        self.estimator = ExponentiatedGradient(
            _estimator, constraints, eps = self.eps, T = max_iter, nu = nu, eta_mul = eta0)
        self.estimator = self.estimator.fit(X, y, sensitive_features)
        return self
Example #20
0
def fit(train: DataTuple, args):
    """Fit a model."""
    try:
        from fairlearn.reductions import (
            ConditionalSelectionRate,
            DemographicParity,
            EqualizedOdds,
            ExponentiatedGradient,
        )
    except ImportError as e:
        raise RuntimeError(
            "In order to use Agarwal, install fairlearn==0.4.6.") from e

    fairness_class: ConditionalSelectionRate
    if args.fairness == "DP":
        fairness_class = DemographicParity()
    else:
        fairness_class = EqualizedOdds()

    if args.classifier == "SVM":
        model = select_svm(C=args.C, kernel=args.kernel, seed=args.seed)
    else:
        random_state = np.random.RandomState(seed=args.seed)
        model = LogisticRegression(solver="liblinear",
                                   random_state=random_state,
                                   max_iter=5000,
                                   C=args.C)

    data_x = train.x
    data_y = train.y[train.y.columns[0]]
    data_a = train.s[train.s.columns[0]]

    exponentiated_gradient = ExponentiatedGradient(model,
                                                   constraints=fairness_class,
                                                   eps=args.eps,
                                                   T=args.iters)
    exponentiated_gradient.fit(data_x, data_y, sensitive_features=data_a)

    min_class_label = train.y[train.y.columns[0]].min()
    exponentiated_gradient.min_class_label = min_class_label

    return exponentiated_gradient
Example #21
0
 def test_simple_fit_predict(self):
     estimator = LeastSquaresBinaryClassifierLearner()
     constraints = DemographicParity()
     expgrad = ExponentiatedGradient(estimator, constraints)
     expgrad.fit(pd.DataFrame(X1), pd.Series(labels),
                 sensitive_features=pd.Series(sensitive_features))
     expgrad.predict(pd.DataFrame(X1))
Example #22
0
def run_corrupt(fairness_constraints):
    all_results = {}
    all_results['eps'] = fairness_constraints
    all_results['accuracy'] = {
        'train': [],
        'test': []
    }

    all_results['violation'] = {
        'train': [],
        'test': []
    }

    all_results['violation_male'] = {
        'train': [],
        'test': []            
    }

    all_results['violation_female'] = {
        'train': [],
        'test': []
    }

    for eps in fairness_constraints:
        begin = time.time()

        print(f"[INFO][RUN] Corrupt")
        sweep = ExponentiatedGradient(LogisticRegression(solver='liblinear', fit_intercept=True),
                            constraints=EqualizedOdds(),
                            eps=eps)        

        try:
            sweep.fit(X_train, Y_noised, sensitive_features=A_train)

            prediction_train = sweep.predict(X_train)
            prediction_test = sweep.predict(X_test)
        except:
            print(f"Fairlearn can't fit at fairness constraint {eps}")
            pass

        all_results['accuracy']['train'].append(accuracy(prediction_train, Y_train))
        all_results['accuracy']['test'].append(accuracy(prediction_test, Y_test))

        all_results['violation']['train'].append(violation(prediction_train, Y_train, A_train))
        all_results['violation']['test'].append(violation(prediction_test, Y_test, A_test))

        all_results['violation_male']['train'].append(violation(prediction_train, Y_train, A_train, grp=1))
        all_results['violation_male']['test'].append(violation(prediction_test, Y_test, A_test, grp=1))         

        all_results['violation_female']['train'].append(violation(prediction_train, Y_train, A_train, grp=0))
        all_results['violation_female']['test'].append(violation(prediction_test, Y_test, A_test, grp=0))
        print(f"Running fairness constraint: {eps}, Training Accuracy: {all_results['accuracy']['train'][-1]}, Test Accuracy: {all_results['accuracy']['test'][-1]}, Training Violation: {all_results['violation']['train'][-1]}, Test Violation: {all_results['violation']['test'][-1]}, Time cost: {time.time() - begin}")

    acc = np.array(all_results['accuracy']['test'])
    v = np.array(all_results['violation']['test'])
    all_results['accuracy']['mean'] = acc.mean()
    all_results['accuracy']['std'] = acc.std()
    all_results['violation']['mean'] = v.mean()
    all_results['violation']['std'] = v.std()
    return all_results
Example #23
0
def reduction(name, fold, num_X=0):
    # load data
    train_data, test_data, cloumns, _, train_y_fair, train_y_proxy, test_y_fair, test_y_proxy = load_data(
        name, fold, num_X=num_X, use_fair=False)

    train_X = train_data.drop(columns=cloumns)
    train_y_true = train_data[DATA2D[name]]
    train_sex = train_data[DATA2S[name]]

    # learn
    learn = ExponentiatedGradient(LogisticRegression(solver='liblinear',
                                                     fit_intercept=True),
                                  constraints=DemographicParity())

    learn.fit(train_X, train_y_true, sensitive_features=train_sex)

    # predict
    test_X = test_data.drop(columns=cloumns)
    prob_test = learn._pmf_predict(test_X)[:, 1]
    prob_train = learn._pmf_predict(train_X)[:, 1]
    s_train = train_sex.astype(bool)
    s_test = np.array(test_data[DATA2S[name]]).astype(bool)

    save_file(name, num_X, fold, "Reduction", prob_train, s_train,
              train_y_fair, train_y_proxy, prob_test, s_test, test_y_fair,
              test_y_proxy)
Example #24
0
    def test_input_X_unchanged(self, transformA, transformY, transformX,
                               mocker):
        # The purpose of this test is to ensure that X is passed to the underlying estimator
        # unchanged. For y and sample_weight ExponentiatedGradient makes certain transformations
        # which are required. They are expected as pandas.Series.
        X, y, A = _get_data()

        transformed_X = transformX(X)
        transformed_y = transformY(y)
        transformed_A = transformA(A)

        # Using a mocked estimator here since we don't actually want to fit one, but rather care
        # about having that object's fit method called exactly twice through the best_h calls.
        estimator = mocker.MagicMock()
        estimator.predict = mocker.MagicMock(return_value=y)
        # ExponentiatedGradient pickles and unpickles the estimator, which isn't possible for the
        # mock object, so we patch import of pickle as well. It sets the result from pickle.loads
        # as the estimator, so we can simply overwrite the return value to be our mocked estimator
        # object.
        mocker.patch('pickle.dumps')
        mocker.patch('pickle.loads', return_value=estimator)

        # restrict ExponentiatedGradient to a single iteration
        expgrad = ExponentiatedGradient(estimator,
                                        constraints=DemographicParity(),
                                        max_iter=1)
        expgrad.fit(transformed_X,
                    transformed_y,
                    sensitive_features=transformed_A)

        # ensure that the input data wasn't changed by our mitigator before being passed to the
        # underlying estimator
        assert estimator.fit.call_count == 2
        for name, args, kwargs in estimator.method_calls:
            if name == 'fit':
                assert len(args) == 2  # X and y
                assert len(kwargs) == 1  # sample_weight
                assert isinstance(args[0], type(transformed_X))
                assert isinstance(args[1], pd.Series)
                assert isinstance(kwargs['sample_weight'], pd.Series)
Example #25
0
    def run_smoke_test(self, data, flipped=False):
        if flipped:
            y = self.flipped_y
        else:
            y = self.y

        ratio = 1.0
        if "ratio" in data.keys():
            ratio = data["ratio"]
        expgrad = ExponentiatedGradient(
            self.learner,
            constraints=data["cons_class"](ratio=ratio),
            eps=data["eps"])
        expgrad.fit(self.X, y, sensitive_features=self.A)

        def Q(X):
            return expgrad._pmf_predict(X)[:, 1]

        n_predictors = len(expgrad.predictors_)

        disparity_moment = data["cons_class"](ratio=ratio)
        disparity_moment.load_data(self.X, y, sensitive_features=self.A)
        error = ErrorRate()
        error.load_data(self.X, y, sensitive_features=self.A)
        disparity = disparity_moment.gamma(Q).max()
        error = error.gamma(Q)[0]

        assert expgrad.best_gap_ == pytest.approx(data["best_gap"],
                                                  abs=self._PRECISION)
        assert expgrad.last_iter_ == data["last_iter"]
        assert expgrad.best_iter_ == data["best_iter"]
        assert expgrad.last_iter_ >= _MIN_ITER
        assert disparity == pytest.approx(data["disp"], abs=self._PRECISION)
        assert error == pytest.approx(data["error"], abs=self._PRECISION)
        assert expgrad.n_oracle_calls_ == data["n_oracle_calls"]
        assert expgrad.n_oracle_calls_dummy_returned_ == data[
            "n_oracle_calls_dummy_returned"]
        assert n_predictors == data["n_predictors"]
        assert len(expgrad.oracle_execution_times_) == expgrad.n_oracle_calls_
Example #26
0
def run_expgrad_classification(estimator, moment):
    """Run classification test with ExponentiatedGradient."""
    X_train, Y_train, A_train, X_test, Y_test, A_test = fetch_adult()
    verification_moment = copy.deepcopy(moment)

    unmitigated = copy.deepcopy(estimator)
    unmitigated.fit(X_train, Y_train)

    expgrad = ExponentiatedGradient(estimator, constraints=moment)
    expgrad.fit(X_train, Y_train, sensitive_features=A_train)

    assert expgrad.n_oracle_calls_ > 1
    assert len(expgrad.predictors_) > 1

    verification_moment.load_data(X_test, Y_test, sensitive_features=A_test)
    gamma_unmitigated = verification_moment.gamma(
        lambda x: unmitigated.predict(x))
    gamma_mitigated = verification_moment.gamma(lambda x: expgrad.predict(x))

    for idx in gamma_mitigated.index:
        assert abs(gamma_mitigated[idx]) <= abs(
            gamma_unmitigated[idx]), "Checking {0}".format(idx)
Example #27
0
def train_and_predict(train: DataTuple, test: TestTuple, args: AgarwalArgs):
    """Train a logistic regression model and compute predictions on the given test data."""
    random.seed(888)
    np.random.seed(888)

    fairness_class: ConditionalSelectionRate
    if args.fairness == "DP":
        fairness_class = DemographicParity()
    else:
        fairness_class = EqualizedOdds()

    if args.classifier == "SVM":
        model = select_svm(args.C, args.kernel)
    else:
        model = LogisticRegression(solver="liblinear",
                                   random_state=888,
                                   max_iter=5000,
                                   C=args.C)

    data_x = train.x
    data_y = train.y[train.y.columns[0]]
    data_a = train.s[train.s.columns[0]]

    exponentiated_gradient = ExponentiatedGradient(model,
                                                   constraints=fairness_class,
                                                   eps=args.eps,
                                                   T=args.iters)
    exponentiated_gradient.fit(data_x, data_y, sensitive_features=data_a)

    randomized_predictions = exponentiated_gradient.predict(test.x)
    preds = pd.DataFrame(randomized_predictions, columns=["preds"])

    min_class_label = train.y[train.y.columns[0]].min()
    if preds["preds"].min() != preds["preds"].max():
        preds = preds.replace(preds["preds"].min(), min_class_label)
    return preds
    def run_smoke_test(self, data):
        expgrad = ExponentiatedGradient(self.learner,
                                        constraints=data["cons_class"](),
                                        eps=data["eps"])
        expgrad.fit(self.X, self.y, sensitive_features=self.A)

        Q = expgrad._best_classifier
        n_classifiers = len(expgrad._classifiers)

        disparity_moment = data["cons_class"]()
        disparity_moment.load_data(self.X, self.y, sensitive_features=self.A)
        error = ErrorRate()
        error.load_data(self.X, self.y, sensitive_features=self.A)
        disparity = disparity_moment.gamma(Q).max()
        error = error.gamma(Q)[0]

        assert expgrad._best_gap == pytest.approx(data["best_gap"],
                                                  abs=self._PRECISION)
        assert expgrad._last_t == data["last_t"]
        assert expgrad._best_t == data["best_t"]
        assert disparity == pytest.approx(data["disp"], abs=self._PRECISION)
        assert error == pytest.approx(data["error"], abs=self._PRECISION)
        assert expgrad._n_oracle_calls == data["n_oracle_calls"]
        assert n_classifiers == data["n_classifiers"]
Example #29
0
    def test_sample_weights_argument(self):
        estimator = Pipeline([('scaler', StandardScaler()),
                              ('logistic',
                               LogisticRegression(solver='liblinear'))])

        X, y, A = _get_data()

        expgrad = ExponentiatedGradient(estimator,
                                        constraints=DemographicParity(),
                                        max_iter=1)

        with pytest.raises(ValueError) as execInfo:
            expgrad.fit(X, y, sensitive_features=(A))
        assert 'Pipeline.fit does not accept the sample_weight parameter' in execInfo.value.args[
            0]

        expgrad = ExponentiatedGradient(
            estimator,
            constraints=DemographicParity(),
            max_iter=1,
            sample_weight_name='logistic__sample_weight')
        expgrad.fit(X, y, sensitive_features=(A))
def test_random_state_exponentiated_gradient():
    """Test that the random_state argument works as expected.

    This test case reproduces the problem reported in issue 588 if the
    random_state does not work as intended within Exponentiated Gradient.
    https://github.com/fairlearn/fairlearn/issues/588
    """
    X_train, X_test, y_train, y_test, race_train, race_test = _get_test_data()

    # Train a simple logistic regression model
    lr = LogisticRegression(max_iter=1000, random_state=0)
    lr.fit(X_train, y_train)

    # Train threshold optimizer
    expgrad = ExponentiatedGradient(estimator=lr, constraints=EqualizedOdds())
    expgrad.fit(X_train, y_train, sensitive_features=race_train)

    # score groups
    y_pred_test = expgrad.predict(X_test, random_state=0)
    for _ in range(100):
        assert (y_pred_test == expgrad.predict(X_test, random_state=0)).all()
    assert (y_pred_test != expgrad.predict(X_test, random_state=1)).any()