예제 #1
0
def run_corrupt(fairness_constraints):
    all_results = {}
    all_results['eps'] = fairness_constraints
    all_results['accuracy'] = {
        'train': [],
        'test': []
    }

    all_results['violation'] = {
        'train': [],
        'test': []
    }

    all_results['violation_male'] = {
        'train': [],
        'test': []            
    }

    all_results['violation_female'] = {
        'train': [],
        'test': []
    }

    for eps in fairness_constraints:
        begin = time.time()

        print(f"[INFO][RUN] Corrupt")
        sweep = ExponentiatedGradient(LogisticRegression(solver='liblinear', fit_intercept=True),
                            constraints=EqualizedOdds(),
                            eps=eps)        

        try:
            sweep.fit(X_train, Y_noised, sensitive_features=A_train)

            prediction_train = sweep.predict(X_train)
            prediction_test = sweep.predict(X_test)
        except:
            print(f"Fairlearn can't fit at fairness constraint {eps}")
            pass

        all_results['accuracy']['train'].append(accuracy(prediction_train, Y_train))
        all_results['accuracy']['test'].append(accuracy(prediction_test, Y_test))

        all_results['violation']['train'].append(violation(prediction_train, Y_train, A_train))
        all_results['violation']['test'].append(violation(prediction_test, Y_test, A_test))

        all_results['violation_male']['train'].append(violation(prediction_train, Y_train, A_train, grp=1))
        all_results['violation_male']['test'].append(violation(prediction_test, Y_test, A_test, grp=1))         

        all_results['violation_female']['train'].append(violation(prediction_train, Y_train, A_train, grp=0))
        all_results['violation_female']['test'].append(violation(prediction_test, Y_test, A_test, grp=0))
        print(f"Running fairness constraint: {eps}, Training Accuracy: {all_results['accuracy']['train'][-1]}, Test Accuracy: {all_results['accuracy']['test'][-1]}, Training Violation: {all_results['violation']['train'][-1]}, Test Violation: {all_results['violation']['test'][-1]}, Time cost: {time.time() - begin}")

    acc = np.array(all_results['accuracy']['test'])
    v = np.array(all_results['violation']['test'])
    all_results['accuracy']['mean'] = acc.mean()
    all_results['accuracy']['std'] = acc.std()
    all_results['violation']['mean'] = v.mean()
    all_results['violation']['std'] = v.std()
    return all_results
예제 #2
0
 def test_simple_fit_predict(self):
     estimator = LeastSquaresBinaryClassifierLearner()
     constraints = DemographicParity()
     expgrad = ExponentiatedGradient(estimator, constraints)
     expgrad.fit(pd.DataFrame(X1), pd.Series(labels),
                 sensitive_features=pd.Series(sensitive_features))
     expgrad.predict(pd.DataFrame(X1))
def run(fairness_constraints, use_proxy=False):
    print(f"Start running experiment with Proxy: {use_proxy}.")
    all_results = {}
    all_results['eps'] = fairness_constraints
    all_results['accuracy'] = {'train': [], 'test': []}

    all_results['violation'] = {'train': [], 'test': []}

    all_results['violation_male'] = {'train': [], 'test': []}

    all_results['violation_female'] = {'train': [], 'test': []}

    for eps in fairness_constraints:
        begin = time.time()

        if use_proxy:
            sweep = ExponentiatedGradient(
                LogisticRegression(solver='liblinear', fit_intercept=True),
                constraints=ProxyEqualizedOdds(error_rate=error_rate),
                eps=eps)
        else:
            sweep = ExponentiatedGradient(LogisticRegression(
                solver='liblinear', fit_intercept=True),
                                          constraints=EqualizedOdds(),
                                          eps=eps)

        try:
            sweep.fit(X_train, Y_noised, sensitive_features=A_train)

            prediction_train = sweep.predict(X_train)
            prediction_test = sweep.predict(X_test)
        except:
            print(f"Fairlearn can't fit at fairness constraint {eps}")
            pass

        all_results['accuracy']['train'].append(
            accuracy(prediction_train, Y_train))
        all_results['accuracy']['test'].append(
            accuracy(prediction_test, Y_test))

        all_results['violation']['train'].append(
            violation(prediction_train, Y_train, A_train))
        all_results['violation']['test'].append(
            violation(prediction_test, Y_test, A_test))

        all_results['violation_male']['train'].append(
            violation(prediction_train, Y_train, A_train, grp=1))
        all_results['violation_male']['test'].append(
            violation(prediction_test, Y_test, A_test, grp=1))

        all_results['violation_female']['train'].append(
            violation(prediction_train, Y_train, A_train, grp=0))
        all_results['violation_female']['test'].append(
            violation(prediction_test, Y_test, A_test, grp=0))

        print(
            f"Running fairness constraint: {eps}, Training Accuracy: {all_results['accuracy']['train']}, Test Accuracy: {all_results['accuracy']['test']}, Training Violation: {all_results['violation']['train']}, Test Violation: {all_results['violation']['test']}, Time cost: {time.time() - begin}"
        )

    return all_results
def test_equalized_odds():
    # Have to do this one longhand, since it combines tpr and fpr
    X, y = loan_scenario_generator(n, f, sfs, ibs, seed=632753)
    X_dummy = pd.get_dummies(X)

    metrics = {"tpr": true_positive_rate, "fpr": false_positive_rate}

    unmitigated = LogisticRegression()
    unmitigated.fit(X_dummy, y)
    y_pred = unmitigated.predict(X_dummy)
    mf_unmitigated = MetricFrame(
        metrics=metrics,
        y_true=y,
        y_pred=y_pred,
        sensitive_features=X["sens"],
        control_features=X["ctrl"],
    )

    expgrad_basic = ExponentiatedGradient(
        LogisticRegression(),
        constraints=EqualizedOdds(difference_bound=0.01),
        eps=0.01)
    expgrad_basic.fit(X_dummy, y, sensitive_features=X["sens"])
    y_pred_basic = expgrad_basic.predict(X_dummy, random_state=9235)
    mf_basic = MetricFrame(
        metrics=metrics,
        y_true=y,
        y_pred=y_pred_basic,
        sensitive_features=X["sens"],
        control_features=X["ctrl"],
    )

    expgrad_control = ExponentiatedGradient(
        LogisticRegression(),
        constraints=EqualizedOdds(difference_bound=0.01),
        eps=0.01)
    expgrad_control.fit(X_dummy,
                        y,
                        sensitive_features=X["sens"],
                        control_features=X["ctrl"])
    y_pred_control = expgrad_control.predict(X_dummy, random_state=8152)
    mf_control = MetricFrame(
        metrics=metrics,
        y_true=y,
        y_pred=y_pred_control,
        sensitive_features=X["sens"],
        control_features=X["ctrl"],
    )

    compare_unmitigated = mf_control.difference(
        method="to_overall") <= mf_unmitigated.difference(method="to_overall")
    print(compare_unmitigated)

    compare_basic = mf_control.difference(
        method="to_overall") <= mf_basic.difference(method="to_overall")
    print(compare_basic)

    assert compare_basic.values.reshape(6).all()
    assert compare_unmitigated.values.reshape(6).all()
예제 #5
0
def run_comparisons(moment, metric_fn):
    X, y = loan_scenario_generator(n, f, sfs, ibs, seed=163)
    X_dummy = pd.get_dummies(X)

    mf_input = MetricFrame(metric_fn, y, y,
                           sensitive_features=X['sens'],
                           control_features=X['ctrl'])

    print("Metric for input:\n", mf_input.by_group)
    print("Input Metric differences:\n", mf_input.difference(method='to_overall'), "\n")

    unmitigated = LogisticRegression()
    unmitigated.fit(X_dummy, y)
    y_pred = unmitigated.predict(X_dummy)
    mf_unmitigated = MetricFrame(metric_fn,
                                 y, y_pred,
                                 sensitive_features=X['sens'],
                                 control_features=X['ctrl'])
    print("Unmitigated metric:\n", mf_unmitigated.by_group)
    print("Unmitigated metric differences:\n",
          mf_unmitigated.difference(method='to_overall'), "\n")

    expgrad_basic = ExponentiatedGradient(
        LogisticRegression(),
        constraints=moment(),
        eps=0.005)
    expgrad_basic.fit(X_dummy, y, sensitive_features=X['sens'])
    y_pred_basic = expgrad_basic.predict(X_dummy, random_state=8235)
    mf_basic = MetricFrame(metric_fn, y, y_pred_basic,
                           sensitive_features=X['sens'],
                           control_features=X['ctrl'])
    print("Basic expgrad metric:\n", mf_basic.by_group)
    print("Basic expgrad metric differences:\n",
          mf_basic.difference(method='to_overall'), "\n")

    expgrad_control = ExponentiatedGradient(
        LogisticRegression(),
        constraints=moment(),
        eps=0.005)
    expgrad_control.fit(X_dummy, y,
                        sensitive_features=X['sens'],
                        control_features=X['ctrl'])
    y_pred_control = expgrad_control.predict(X_dummy, random_state=852)
    mf_control = MetricFrame(metric_fn, y, y_pred_control,
                             sensitive_features=X['sens'],
                             control_features=X['ctrl'])
    print("expgrad_control metric:\n", mf_control.by_group)
    print("expgrad_control metric differences:\n",
          mf_control.difference(method='to_overall'))

    assert (mf_control.difference(method='to_overall') <=
            mf_unmitigated.difference(method='to_overall')).all()

    assert (mf_control.difference(method='to_overall') <=
            mf_basic.difference(method='to_overall')).all()
예제 #6
0
def test_random_state_exponentiated_gradient():
    """Test that the random_state argument works as expected.

    This test case reproduces the problem reported in issue 588 if the
    random_state does not work as intended within Exponentiated Gradient.
    https://github.com/fairlearn/fairlearn/issues/588
    """
    X_train, X_test, y_train, y_test, race_train, race_test = _get_test_data()

    # Train a simple logistic regression model
    lr = LogisticRegression(max_iter=1000, random_state=0)
    lr.fit(X_train, y_train)

    # Train threshold optimizer
    expgrad = ExponentiatedGradient(estimator=lr, constraints=EqualizedOdds())
    expgrad.fit(X_train, y_train, sensitive_features=race_train)

    # score groups
    y_pred_test = expgrad.predict(X_test, random_state=0)
    for _ in range(100):
        assert (y_pred_test == expgrad.predict(X_test, random_state=0)).all()
    assert (y_pred_test != expgrad.predict(X_test, random_state=1)).any()
def evaluate(eps, X_train, y_train, X_test, y_test, sex_train, sex_test,
             index):
    estimator = GradientBoostingClassifier()
    constraints = DemographicParity()
    egsolver = ExponentiatedGradient(estimator, constraints, eps=eps)
    egsolver.fit(X_train, y_train, sensitive_features=sex_train)
    y_pred = egsolver.predict(X_test)
    # print("y_pred",y_pred)
    group_summary_adult = group_summary(accuracy_score,
                                        y_test,
                                        y_pred,
                                        sensitive_features=sex_test)
    selection_rate_summary = selection_rate_group_summary(
        y_test, y_pred, sensitive_features=sex_test)
    error = 1 - group_summary_adult["overall"]
    dp = demographic(selection_rate_summary)
    errorlist[index].append(error)
    dplist[index].append(dp)
    print("error:%f,dp:%f" % (error, dp))
예제 #8
0
def run_expgrad_classification(estimator, moment):
    """Run classification test with ExponentiatedGradient."""
    X_train, Y_train, A_train, X_test, Y_test, A_test = fetch_adult()
    verification_moment = copy.deepcopy(moment)

    unmitigated = copy.deepcopy(estimator)
    unmitigated.fit(X_train, Y_train)

    expgrad = ExponentiatedGradient(estimator, constraints=moment)
    expgrad.fit(X_train, Y_train, sensitive_features=A_train)

    assert expgrad.n_oracle_calls_ > 1
    assert len(expgrad.predictors_) > 1

    verification_moment.load_data(X_test, Y_test, sensitive_features=A_test)
    gamma_unmitigated = verification_moment.gamma(
        lambda x: unmitigated.predict(x))
    gamma_mitigated = verification_moment.gamma(lambda x: expgrad.predict(x))

    for idx in gamma_mitigated.index:
        assert abs(gamma_mitigated[idx]) <= abs(
            gamma_unmitigated[idx]), "Checking {0}".format(idx)
예제 #9
0
def train_and_predict(train: DataTuple, test: TestTuple, args: AgarwalArgs):
    """Train a logistic regression model and compute predictions on the given test data."""
    random.seed(888)
    np.random.seed(888)

    fairness_class: ConditionalSelectionRate
    if args.fairness == "DP":
        fairness_class = DemographicParity()
    else:
        fairness_class = EqualizedOdds()

    if args.classifier == "SVM":
        model = select_svm(args.C, args.kernel)
    else:
        model = LogisticRegression(solver="liblinear",
                                   random_state=888,
                                   max_iter=5000,
                                   C=args.C)

    data_x = train.x
    data_y = train.y[train.y.columns[0]]
    data_a = train.s[train.s.columns[0]]

    exponentiated_gradient = ExponentiatedGradient(model,
                                                   constraints=fairness_class,
                                                   eps=args.eps,
                                                   T=args.iters)
    exponentiated_gradient.fit(data_x, data_y, sensitive_features=data_a)

    randomized_predictions = exponentiated_gradient.predict(test.x)
    preds = pd.DataFrame(randomized_predictions, columns=["preds"])

    min_class_label = train.y[train.y.columns[0]].min()
    if preds["preds"].min() != preds["preds"].max():
        preds = preds.replace(preds["preds"].min(), min_class_label)
    return preds
 def test_simple_fit_predict_regression(self, constraints):
     X, y, sensitive_features = _get_data(y_as_scores=True)
     estimator = LeastSquaresRegressor()
     expgrad = ExponentiatedGradient(estimator, constraints)
     expgrad.fit(X, y, sensitive_features=sensitive_features)
     expgrad.predict(X)
예제 #11
0
                                y_pred,
                                sensitive_features=sex)
print("group_summary", result1)
result2 = metrics.selection_rate_group_summary(y_true,
                                               y_pred,
                                               sensitive_features=sex)
print("selection_rate_group_summary", result2)
# FairlearnDashboard(sensitive_features=sex,
#                        sensitive_feature_names=['sex'],
#                        y_true=y_true,
#                        y_pred={"initial model": y_pred})

np.random.seed(0)
constraint = DemographicParity()
classifier = DecisionTreeClassifier()
mitigator = ExponentiatedGradient(classifier, constraint)
#print("constructing mitigator")
mitigator.fit(X, y_true, sensitive_features=sex)
y_pred_mitigated = mitigator.predict(X)
result2_mitigated = metrics.selection_rate_group_summary(
    y_true, y_pred_mitigated, sensitive_features=sex)
print("selection_rate_group_summary mitigated", result2_mitigated)
FairlearnDashboard(sensitive_features=sex,
                   sensitive_feature_names=['sex'],
                   y_true=y_true,
                   y_pred={
                       "initial model": y_pred,
                       "mitigated model": y_pred_mitigated
                   })
#FairlearnDashboard(sensitive_features=sex, sensitive_feature_names=['sex'],y_true=y_true,y_pred={"initial model": y_pred})
예제 #12
0
def run_surrogate(fairness_constraints, est=False):
    print(f"[INFO][RUN] Surrogate Loss.")
    all_results = {}
    all_results['eps'] = fairness_constraints
    all_results['accuracy'] = {
        'train': [],
        'test': []
    }

    all_results['violation'] = {
        'train': [],
        'test': []
    }

    all_results['violation_male'] = {
        'train': [],
        'test': []            
    }

    all_results['violation_female'] = {
        'train': [],
        'test': []
    }
    
    for eps in fairness_constraints:
        begin = time.time()

        if not est:
            surrogate_clf = SurrogateLoss(clf=LogisticRegression(solver='liblinear', fit_intercept=True), noise_matrix=noise_matrix)
        else:
            surrogate_clf = SurrogateLoss(clf=LogisticRegression(solver='liblinear', fit_intercept=True))

        sweep = ExponentiatedGradient(surrogate_clf,
                    constraints=ProxyEqualizedOdds(error_rate=error_rate),
                    eps=eps)   

        sweep.fit(X_train, Y_noised, sensitive_features=A_train)

        prediction_train = sweep.predict(X_train)
        prediction_test = sweep.predict(X_test)

        all_results['accuracy']['train'].append(accuracy(prediction_train, Y_train))
        all_results['accuracy']['test'].append(accuracy(prediction_test, Y_test))

        all_results['violation']['train'].append(violation(prediction_train, Y_train, A_train))
        all_results['violation']['test'].append(violation(prediction_test, Y_test, A_test))

        all_results['violation_male']['train'].append(accuracy(prediction_train, Y_train))
        all_results['violation_male']['test'].append(accuracy(prediction_test, Y_test))         

        all_results['violation_female']['train'].append(accuracy(prediction_train, Y_train))
        all_results['violation_female']['test'].append(accuracy(prediction_test, Y_test))

        print(f"Running fairness constraint: {eps}, Training Accuracy: {all_results['accuracy']['train'][-1]}, Test Accuracy: {all_results['accuracy']['test'][-1]}, Training Violation: {all_results['violation']['train'][-1]}, Test Violation: {all_results['violation']['test'][-1]}, Time cost: {time.time() - begin}")
    
    acc = np.array(all_results['accuracy']['test'])
    v = np.array(all_results['violation']['test'])
    all_results['accuracy']['mean'] = acc.mean()
    all_results['accuracy']['std'] = acc.std()
    all_results['violation']['mean'] = v.mean()
    all_results['violation']['std'] = v.std()
    return all_results
예제 #13
0
def run_peerloss(fairness_constraints, alpha=0.5, est=False):
    print(f"[INFO][RUN] Peer Loss with alpha = {alpha}")
    all_results = {}
    all_results['eps'] = fairness_constraints
    all_results['accuracy'] = {
        'train': [],
        'test': []
    }

    all_results['violation'] = {
        'train': [],
        'test': []
    }

    all_results['violation_male'] = {
        'train': [],
        'test': []            
    }

    all_results['violation_female'] = {
        'train': [],
        'test': []
    }
    
    if est:
        delta = [1 - est_error_rate[i][0] - est_error_rate[i][1] for i in range(len(est_error_rate))]
    else:
        delta = [1 - error_rate[i][0] - error_rate[i][1] for i in range(len(error_rate))]

    for eps in fairness_constraints:
        begin = time.time()

        sweep = ExponentiatedGradient(PeerLoss(A_train, delta=delta, alpha=alpha),
                    constraints=EqualizedOdds(),
                    eps=eps)   
 
        sweep.fit(X_train, Y_noised, sensitive_features=A_train)

        prediction_train = sweep.predict(X_train)
        prediction_test = sweep.predict(X_test)

        all_results['accuracy']['train'].append(accuracy(prediction_train, Y_train))
        all_results['accuracy']['test'].append(accuracy(prediction_test, Y_test))

        all_results['violation']['train'].append(violation(prediction_train, Y_train, A_train))
        all_results['violation']['test'].append(violation(prediction_test, Y_test, A_test))

        all_results['violation_male']['train'].append(accuracy(prediction_train, Y_train))
        all_results['violation_male']['test'].append(accuracy(prediction_test, Y_test))         

        all_results['violation_female']['train'].append(accuracy(prediction_train, Y_train))
        all_results['violation_female']['test'].append(accuracy(prediction_test, Y_test))

        print(f"Running fairness constraint: {eps}, Training Accuracy: {all_results['accuracy']['train'][-1]}, Test Accuracy: {all_results['accuracy']['test'][-1]}, Training Violation: {all_results['violation']['train'][-1]}, Test Violation: {all_results['violation']['test'][-1]}, Time cost: {time.time() - begin}")

    acc = np.array(all_results['accuracy']['test'])
    v = np.array(all_results['violation']['test'])
    all_results['accuracy']['mean'] = acc.mean()
    all_results['accuracy']['std'] = acc.std()
    all_results['violation']['mean'] = v.mean()
    all_results['violation']['std'] = v.std()
    return all_results
class ExponentiatedGradientDT(AutoSklearnClassificationAlgorithm):
    def __init__(self, eps, max_iter, nu,  eta0,
                 random_state=None):
                 # _estimator, constraints, run_linprog_step, sample_weight_name,

        from sklearn.tree import DecisionTreeClassifier 
        self._estimator = DecisionTreeClassifier()
        self.constraints = DemographicParity(difference_bound=0.01)
        self.eps = eps
        self.max_iter = max_iter
        self.nu = nu
        self.eta0 = eta0

        # self.run_linprog_step = run_linprog_step
        # self.sample_weight_name = sample_weight_name
        self.random_state = random_state
        
        self.estimator = None

    def fit(self, X, y, sample_weight=None):
        from fairlearn.reductions import ExponentiatedGradient, DemographicParity
        _estimator = self._estimator
        constraints = self.constraints
        eps = float(self.eps)
        nu = float(self.nu)
        max_iter = int(self.max_iter)
        eta0 = float(self.eta0) # renamed from eta_mul
        # run_linprog_step = self.run_linprog_step # missing
        # sample_weight_name = self.sample_weight_name #missing

        # For now the sensitive feature is always the first one
        sensitive_features = X[:,0]

        constraints = DemographicParity(difference_bound = eps)
        self.estimator = ExponentiatedGradient(
            _estimator, constraints, eps = self.eps, T = max_iter, nu = nu, eta_mul = eta0)
        self.estimator = self.estimator.fit(X, y, sensitive_features)
        return self

    def predict(self, X):
        if self.estimator is None:
            raise NotImplementedError
        return self.estimator.predict(X)

    def predict_proba(self, X):
        if self.estimator is None:
            raise NotImplementedError()
        probas = self.estimator.predict_proba(X)
        probas = convert_multioutput_multiclass_to_multilabel(probas)
        return probas

    @staticmethod
    def get_properties(dataset_properties=None):
        return {'shortname': 'ExpGrad',
                'name': 'Exponentiated Gradient Decision Tree Classifier',
                'handles_regression': False,
                'handles_classification': True,
                'handles_multiclass': False,
                'handles_multilabel': False,
                'handles_multioutput': False,
                'is_deterministic': False,
                'input': (DENSE, SPARSE, UNSIGNED_DATA),
                'output': (PREDICTIONS,)}

    @staticmethod
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()
        # FIXME: This is missing quite a few, implement later.
        eps = UniformFloatHyperparameter(
            'eps', 0., 1., default_value=0.01)
        max_iter = UniformIntegerHyperparameter(
            "max_iter", 1, 1000, default_value=50)
        eta0 = UniformFloatHyperparameter(
            'eta0', 1e-4,1e3, default_value=2.0)
        nu = UniformFloatHyperparameter(
            'nu', 0,1, default_value=1e-6)
        # sample_weight_name = Constant("sample_weight_name", None)

        cs.add_hyperparameters([eps, max_iter, eta0, nu])

        return cs
예제 #15
0
threshold_optimizer = ThresholdOptimizer(
    estimator=pipeline,
    constraints="demographic_parity",
    predict_method="predict_proba",
    prefit=False,
)
threshold_optimizer.fit(X_train, y_train, sensitive_features=A_train)
print(threshold_optimizer.predict(X_test, sensitive_features=A_test))
print(
    json.dumps(
        threshold_optimizer.interpolated_thresholder_.interpolation_dict,
        default=str,
        indent=4,
    ))
plot_threshold_optimizer(threshold_optimizer)

# %%
# Similarly, :class:`fairlearn.reductions.ExponentiatedGradient` works with
# pipelines. Since it requires the :code:`sample_weight` parameter of the
# underlying estimator internally we need to provide it with the correct
# way of passing :code:`sample_weight` to just the :code:`"classifier"` step
# using the step name followed by two underscores and :code:`sample_weight`.

exponentiated_gradient = ExponentiatedGradient(
    estimator=pipeline,
    constraints=DemographicParity(),
    sample_weight_name="classifier__sample_weight",
)
exponentiated_gradient.fit(X_train, y_train, sensitive_features=A_train)
print(exponentiated_gradient.predict(X_test))
예제 #16
0
fairness_constraints = [0.008 * i for i in range(1, 11)]
all_results_train, all_results_test = [[] for _ in range(len(alphas))
                                       ], [[] for _ in range(len(alphas))]

fp = open('logs/peer_loss_result.txt', 'w')
for i in range(len(alphas)):
    alpha = alphas[i]
    for eps in fairness_constraints:
        sweep = ExponentiatedGradient(
            PeerLoss(A_train, delta, alpha=alpha),
            constraints=ProxyEqualizedOdds(error_rate=error_rate),
            # constraints=EqualizedOdds(),
            eps=eps)
        sweep.fit(X_train, Y_noised, sensitive_features=A_train)

        prediction_train = sweep.predict(X_train)
        prediction_test = sweep.predict(X_test)

        accuracy_train = accuracy(prediction_train, Y_train)
        accuracy_test = accuracy(prediction_test, Y_test)
        violation_train = violation(prediction_train, Y_train, A_train)
        violation_test = violation(prediction_test, Y_test, A_test)
        all_results_train[i].append(accuracy_train)
        all_results_test[i].append(accuracy_test)

        print(
            f"Running alpha {alpha}, fairness constraint {eps}, Train Accuracy {accuracy_train}, Test Accuracy {accuracy_test}, Train Violation {violation_train}, Test Violation {violation_test}."
        )
        fp.write(
            f"{alpha},{eps},{accuracy_train},{accuracy_test},{violation_train},{violation_test}\n"
        )
 def test_simple_fit_predict_binary_classification(self, Constraints):
     X, y, sensitive_features = _get_data()
     estimator = LeastSquaresBinaryClassifierLearner()
     expgrad = ExponentiatedGradient(estimator, Constraints())
     expgrad.fit(X, y, sensitive_features=sensitive_features)
     expgrad.predict(X)
def run_estimation(fairness_constraints, isEstimate=True):
    def NearestNeighbor(X, A, i):
        # print(X_train.shape)
        distance = max(np.linalg.norm(X[i] - X[0]),
                       np.linalg.norm(X[i] - X[1]))
        nn = 0
        for j in range(len(X)):
            if i == j:
                continue
            if A[i] == A[j] and np.linalg.norm(X[i] - X[j]) < distance:
                distance = np.linalg.norm(X[i] - X[j])
                nn = j
        return nn

    def estimate_delta(X, A, Y):
        c1 = np.array([0., 0.])
        t = np.array([0., 0.])
        num = np.array([0., 0.])
        for i in range(len(X)):
            num[int(A[i])] += 1.
            if Y[i] == 1:
                j = NearestNeighbor(X, A, i)
                # print(i, j)
                t[int(A[i])] += Y[i] == Y[j]
                c1[int(A[i])] += 1
        c1 = 2 * c1 / num
        c2 = 2 * t / num
        print(f"c1: {c1}, c2: {c2}")
        return np.sqrt(2 * c2 - c1 * c1)

    if isEstimate:
        print(f"Start running proxy fairness constraint with estimated delta.")
        delta = estimate_delta(X_train.values, A_train.values, Y_noised)
        print(f"Estimated delta is {delta}.")
    else:
        print("Start running proxy fairness constraint with known delta.")
        delta = np.array([
            1 - error_rate[0][0] - error_rate[0][1],
            1 - error_rate[1][0] - error_rate[1][1]
        ])
        print(f"The known delta is {delta}.")

    all_results = {}
    all_results['eps'] = fairness_constraints
    all_results['accuracy'] = {'train': [], 'test': []}

    all_results['violation'] = {'train': [], 'test': []}

    all_results['violation_male'] = {'train': [], 'test': []}

    all_results['violation_female'] = {'train': [], 'test': []}

    for eps in fairness_constraints:
        begin = time.time()

        sweep = ExponentiatedGradient(
            LogisticRegression(solver='liblinear', fit_intercept=True),
            constraints=ProxyEqualizedOdds2(delta=delta),
            eps=eps)

        try:
            sweep.fit(X_train, Y_noised, sensitive_features=A_train)

            prediction_train = sweep.predict(X_train)
            prediction_test = sweep.predict(X_test)
        except:
            print(f"Fairlearn can't fit at fairness constraint {eps}")
            pass

        all_results['accuracy']['train'].append(
            accuracy(prediction_train, Y_train))
        all_results['accuracy']['test'].append(
            accuracy(prediction_test, Y_test))

        all_results['violation']['train'].append(
            violation(prediction_train, Y_train, A_train))
        all_results['violation']['test'].append(
            violation(prediction_test, Y_test, A_test))

        all_results['violation_male']['train'].append(
            violation(prediction_train, Y_train, A_train, grp=1))
        all_results['violation_male']['test'].append(
            violation(prediction_test, Y_test, A_test, grp=1))

        all_results['violation_female']['train'].append(
            violation(prediction_train, Y_train, A_train, grp=0))
        all_results['violation_female']['test'].append(
            violation(prediction_test, Y_test, A_test, grp=0))

        print(
            f"Running fairness constraint: {eps}, Training Accuracy: {all_results['accuracy']['train']}, Test Accuracy: {all_results['accuracy']['test']}, Training Violation: {all_results['violation']['train']}, Test Violation: {all_results['violation']['test']}, Time cost: {time.time() - begin}"
        )

    return all_results