Example #1
0
def test_post_sep_fit_pred(
    toy_train_test: TrainValPair, post_model: PostAlgorithm, name: str, num_pos: int
) -> None:
    """Test the dem par flipping method."""
    train, test = toy_train_test
    train_test = em.concat_tt([train, test], ignore_index=True)

    in_model: InAlgorithm = LR()
    assert in_model is not None
    assert in_model.name == "Logistic Regression (C=1.0)"

    predictions: Prediction = in_model.run(train, train_test)

    # seperate out predictions on train set and predictions on test set
    pred_train = predictions.hard.iloc[: train.y.shape[0]]
    pred_test = predictions.hard.iloc[train.y.shape[0] :].reset_index(drop=True)
    assert np.count_nonzero(pred_test.values == 1) == 44
    assert np.count_nonzero(pred_test.values == 0) == 36

    assert post_model.name == name
    fair_model = post_model.fit(Prediction(pred_train), train)
    fair_preds = fair_model.predict(Prediction(pred_test), test)
    assert np.count_nonzero(fair_preds.hard.values == 1) == num_pos
    assert np.count_nonzero(fair_preds.hard.values == 0) == len(fair_preds) - num_pos
    diffs = em.diff_per_sensitive_attribute(
        em.metric_per_sensitive_attribute(fair_preds, test, ProbPos())
    )
    if isinstance(post_model, DPFlip):
        for diff in diffs.values():
            assert pytest.approx(diff, abs=1e-2) == 0
Example #2
0
def test_dp_flip_inverted_s(toy_train_test: TrainValPair) -> None:
    """Test the dem par flipping method."""
    train, test = toy_train_test
    train = train.replace(s=1 - train.s)
    test = test.replace(s=1 - test.s)
    train_test = em.concat_tt([train, test], ignore_index=True)

    in_model: InAlgorithm = LR()
    assert in_model is not None
    assert in_model.name == "Logistic Regression (C=1.0)"

    predictions: Prediction = in_model.run(train, train_test)

    # seperate out predictions on train set and predictions on test set
    pred_train = predictions.hard.iloc[: train.y.shape[0]]
    pred_test = predictions.hard.iloc[train.y.shape[0] :].reset_index(drop=True)
    assert np.count_nonzero(pred_test.values == 1) == 44
    assert np.count_nonzero(pred_test.values == 0) == 36

    post_model: PostAlgorithm = DPFlip()
    assert post_model.name == "DemPar. Post Process"
    fair_preds = post_model.run(Prediction(pred_train), train, Prediction(pred_test), test)
    assert np.count_nonzero(fair_preds.hard.values == 1) == 57
    assert np.count_nonzero(fair_preds.hard.values == 0) == 23
    diffs = em.diff_per_sensitive_attribute(
        em.metric_per_sensitive_attribute(fair_preds, test, ProbPos())
    )
    for diff in diffs.values():
        assert pytest.approx(diff, abs=1e-2) == 0
Example #3
0
def test_ppv_diff(toy_train_val: TrainValPair):
    """Test ppv diff."""
    train, test = toy_train_val
    model: InAlgorithm = SVM()
    predictions: Prediction = model.run(train, test)
    results = em.metric_per_sensitive_attribute(predictions, test, PPV())
    assert PPV().name == "PPV"
    assert results == {
        "sensitive-attr_0": approx(0.857, abs=0.001),
        "sensitive-attr_1": approx(0.903, abs=0.001),
    }
    diff = em.diff_per_sensitive_attribute(results)
    assert diff["sensitive-attr_0-sensitive-attr_1"] == approx(0.05, abs=0.1)
Example #4
0
def test_bcr_diff(toy_train_val: TrainValPair):
    """Test bcr diff."""
    train, test = toy_train_val
    model: InAlgorithm = SVM()
    predictions: Prediction = model.run(train, test)
    results = em.metric_per_sensitive_attribute(predictions, test, BCR())
    assert BCR().name == "BCR"
    assert results == {
        "sensitive-attr_0": approx(0.921, abs=0.001),
        "sensitive-attr_1": approx(0.892, abs=0.001),
    }
    diff = em.diff_per_sensitive_attribute(results)
    assert diff["sensitive-attr_0-sensitive-attr_1"] == approx(0.029,
                                                               abs=0.001)
Example #5
0
def test_tpr_diff(toy_train_val: TrainValPair):
    """Test tpr diff."""
    train, test = toy_train_val
    model: InAlgorithm = SVM()
    predictions: Prediction = model.run(train, test)
    tprs = em.metric_per_sensitive_attribute(predictions, test, TPR())
    assert TPR().name == "TPR"
    assert tprs == {
        "sensitive-attr_0": approx(0.923, abs=0.001),
        "sensitive-attr_1": approx(1.0, abs=0.001),
    }
    tpr_diff = em.diff_per_sensitive_attribute(tprs)
    print(tpr_diff)
    assert tpr_diff["sensitive-attr_0-sensitive-attr_1"] == approx(0.077,
                                                                   abs=0.001)
Example #6
0
def test_tpr_diff_non_binary_race():
    """Test tpr diff non binary race."""
    data: DataTuple = load_data(em.adult("Race"))
    train_test: Tuple[DataTuple, DataTuple] = train_test_split(data)
    train, test = train_test
    model: InAlgorithm = SVM()
    predictions: Prediction = model.run_test(train, test)
    tprs = em.metric_per_sensitive_attribute(predictions, test, TPR())
    assert TPR().name == "TPR"
    test_dict = {
        "race_0": approx(0.37, abs=0.01),
        "race_1": approx(0.12, abs=0.01),
        "race_2": approx(0.14, abs=0.01),
        "race_3": approx(0.12, abs=0.01),
        "race_4": approx(0.16, abs=0.01),
    }

    for key, val in tprs.items():
        assert val == test_dict[key]

    tpr_diff = em.diff_per_sensitive_attribute(tprs)
    test_dict = {
        "race_0-race_1": approx(0.25, abs=0.01),
        "race_0-race_2": approx(0.23, abs=0.01),
        "race_0-race_3": approx(0.25, abs=0.01),
        "race_0-race_4": approx(0.20, abs=0.01),
        "race_1-race_2": approx(0.01, abs=0.01),
        "race_1-race_3": approx(0.00, abs=0.01),
        "race_1-race_4": approx(0.04, abs=0.01),
        "race_2-race_3": approx(0.01, abs=0.01),
        "race_2-race_4": approx(0.04, abs=0.01),
        "race_3-race_4": approx(0.04, abs=0.01),
    }

    for key, val in tpr_diff.items():
        assert val == test_dict[key]
Example #7
0
def _compute_inv_cv(preds: Prediction, actual: DataTuple) -> float:
    diffs = em.diff_per_sensitive_attribute(
        em.metric_per_sensitive_attribute(preds, actual, ProbPos()))
    return next(iter(diffs.values()))