def _selected_label_compare(moment, metric, selected_label):
    # Similar to _simple_compare, but we need to worry about the y label
    X, y = loan_scenario_generator(n, f, sfs, ibs, seed=7132752)
    X_dummy = pd.get_dummies(X)

    est = LogisticRegression()
    est.fit(X_dummy, y)
    y_pred = est.predict(X_dummy)

    target = moment()
    target.load_data(np.asarray(X_dummy), np.asarray(y),
                     sensitive_features=X['sens'],
                     control_features=X['ctrl'])

    # gamma measures the constraint violation relative to the overall value
    results = target.gamma(est.predict)

    # Compute the constraint violation using the metrics
    mf_pred = MetricFrame(metrics=metric, y_true=y, y_pred=y_pred,
                          sensitive_features=X['sens'],
                          control_features=X['ctrl'])
    diffs = mf_pred.by_group - mf_pred.overall

    # Compare (with a very small amount of wriggle room)
    for ib in ibs:
        for sf in sfs:
            # Format defined within utility_parity._combine_event_and_control
            label_format = "control={0},label={1}"
            label = label_format.format(ib, selected_label)
            assert diffs[(ib, sf)] == pytest.approx(results[('+', label, sf)],
                                                    rel=1e-10, abs=1e-12)
            assert diffs[(ib, sf)] == pytest.approx(-results[('-', label, sf)],
                                                    rel=1e-10, abs=1e-12)
def _simple_compare(moment, metric):
    X, y = loan_scenario_generator(n, f, sfs, ibs, seed=7632752)
    X_dummy = pd.get_dummies(X)

    est = LogisticRegression()
    est.fit(X_dummy, y)
    y_pred = est.predict(X_dummy)

    target = moment()
    target.load_data(np.asarray(X_dummy), np.asarray(y),
                     sensitive_features=X['sens'],
                     control_features=X['ctrl'])

    # gamma measures the constraint violation relative to the overall value
    results = target.gamma(est.predict)

    # Compute the constraint violation using the metrics
    mf_pred = MetricFrame(metrics=metric, y_true=y, y_pred=y_pred,
                          sensitive_features=X['sens'],
                          control_features=X['ctrl'])
    diffs = mf_pred.by_group - mf_pred.overall

    # Compare (with a very small amount of wriggle room)
    for ib in ibs:
        for sf in sfs:
            event_format = "control={0},all"
            assert diffs[(ib, sf)] == pytest.approx(results[('+', event_format.format(ib), sf)],
                                                    rel=1e-10, abs=1e-12)
            assert diffs[(ib, sf)] == pytest.approx(-results[('-', event_format.format(ib), sf)],
                                                    rel=1e-10, abs=1e-12)
def test_equalized_odds():
    # Have to do this one longhand, since it combines tpr and fpr
    X, y = loan_scenario_generator(n, f, sfs, ibs, seed=632753)
    X_dummy = pd.get_dummies(X)

    metrics = {"tpr": true_positive_rate, "fpr": false_positive_rate}

    unmitigated = LogisticRegression()
    unmitigated.fit(X_dummy, y)
    y_pred = unmitigated.predict(X_dummy)
    mf_unmitigated = MetricFrame(
        metrics=metrics,
        y_true=y,
        y_pred=y_pred,
        sensitive_features=X["sens"],
        control_features=X["ctrl"],
    )

    expgrad_basic = ExponentiatedGradient(
        LogisticRegression(),
        constraints=EqualizedOdds(difference_bound=0.01),
        eps=0.01)
    expgrad_basic.fit(X_dummy, y, sensitive_features=X["sens"])
    y_pred_basic = expgrad_basic.predict(X_dummy, random_state=9235)
    mf_basic = MetricFrame(
        metrics=metrics,
        y_true=y,
        y_pred=y_pred_basic,
        sensitive_features=X["sens"],
        control_features=X["ctrl"],
    )

    expgrad_control = ExponentiatedGradient(
        LogisticRegression(),
        constraints=EqualizedOdds(difference_bound=0.01),
        eps=0.01)
    expgrad_control.fit(X_dummy,
                        y,
                        sensitive_features=X["sens"],
                        control_features=X["ctrl"])
    y_pred_control = expgrad_control.predict(X_dummy, random_state=8152)
    mf_control = MetricFrame(
        metrics=metrics,
        y_true=y,
        y_pred=y_pred_control,
        sensitive_features=X["sens"],
        control_features=X["ctrl"],
    )

    compare_unmitigated = mf_control.difference(
        method="to_overall") <= mf_unmitigated.difference(method="to_overall")
    print(compare_unmitigated)

    compare_basic = mf_control.difference(
        method="to_overall") <= mf_basic.difference(method="to_overall")
    print(compare_basic)

    assert compare_basic.values.reshape(6).all()
    assert compare_unmitigated.values.reshape(6).all()
def run_comparisons(moment, metric_fn):
    X, y = loan_scenario_generator(n, f, sfs, ibs, seed=163)
    X_dummy = pd.get_dummies(X)

    mf_input = MetricFrame(metric_fn, y, y,
                           sensitive_features=X['sens'],
                           control_features=X['ctrl'])

    print("Metric for input:\n", mf_input.by_group)
    print("Input Metric differences:\n", mf_input.difference(method='to_overall'), "\n")

    unmitigated = LogisticRegression()
    unmitigated.fit(X_dummy, y)
    y_pred = unmitigated.predict(X_dummy)
    mf_unmitigated = MetricFrame(metric_fn,
                                 y, y_pred,
                                 sensitive_features=X['sens'],
                                 control_features=X['ctrl'])
    print("Unmitigated metric:\n", mf_unmitigated.by_group)
    print("Unmitigated metric differences:\n",
          mf_unmitigated.difference(method='to_overall'), "\n")

    expgrad_basic = ExponentiatedGradient(
        LogisticRegression(),
        constraints=moment(),
        eps=0.005)
    expgrad_basic.fit(X_dummy, y, sensitive_features=X['sens'])
    y_pred_basic = expgrad_basic.predict(X_dummy, random_state=8235)
    mf_basic = MetricFrame(metric_fn, y, y_pred_basic,
                           sensitive_features=X['sens'],
                           control_features=X['ctrl'])
    print("Basic expgrad metric:\n", mf_basic.by_group)
    print("Basic expgrad metric differences:\n",
          mf_basic.difference(method='to_overall'), "\n")

    expgrad_control = ExponentiatedGradient(
        LogisticRegression(),
        constraints=moment(),
        eps=0.005)
    expgrad_control.fit(X_dummy, y,
                        sensitive_features=X['sens'],
                        control_features=X['ctrl'])
    y_pred_control = expgrad_control.predict(X_dummy, random_state=852)
    mf_control = MetricFrame(metric_fn, y, y_pred_control,
                             sensitive_features=X['sens'],
                             control_features=X['ctrl'])
    print("expgrad_control metric:\n", mf_control.by_group)
    print("expgrad_control metric differences:\n",
          mf_control.difference(method='to_overall'))

    assert (mf_control.difference(method='to_overall') <=
            mf_unmitigated.difference(method='to_overall')).all()

    assert (mf_control.difference(method='to_overall') <=
            mf_basic.difference(method='to_overall')).all()