Example #1
0
def toy_train_val() -> TrainTestPair:
    """By making this a fixture, pytest can cache the result."""
    data: DataTuple = em.toy().load()
    train: DataTuple
    test: DataTuple
    train, test = em.train_test_split(data)
    return TrainTestPair(train, test)
Example #2
0
def test_empty_evaluate():
    """Test empty evaluate."""
    empty_result = em.evaluate_models([em.toy()], repeats=3, delete_prev=True)
    expected_result = pd.DataFrame(
        [], columns=["dataset", "scaler", "transform", "model", "split_id"])
    expected_result = expected_result.set_index(
        ["dataset", "scaler", "transform", "model", "split_id"])
    pd.testing.assert_frame_equal(empty_result, expected_result)
Example #3
0
def test_run_alg_suite():
    """Test run alg suite."""
    dataset = em.adult(split="Race-Binary")
    datasets: List[em.Dataset] = [dataset, em.toy()]
    preprocess_models: List[em.PreAlgorithm] = [em.Upsampler()]
    inprocess_models: List[em.InAlgorithm] = [em.LR(), em.SVM(kernel="linear")]
    postprocess_models: List[em.PostAlgorithm] = []
    metrics: List[em.Metric] = [em.Accuracy(), em.CV()]
    per_sens_metrics: List[em.Metric] = [em.Accuracy(), em.TPR()]
    parallel_results = em.evaluate_models_async(
        datasets=datasets,
        preprocess_models=preprocess_models,
        inprocess_models=inprocess_models,
        postprocess_models=postprocess_models,
        metrics=metrics,
        per_sens_metrics=per_sens_metrics,
        repeats=1,
        test_mode=True,
        topic="pytest",
    )
    results = em.evaluate_models(
        datasets,
        preprocess_models,
        inprocess_models,
        postprocess_models,
        metrics,
        per_sens_metrics,
        repeats=1,
        test_mode=True,
        delete_prev=True,
        topic="pytest",
    )
    pd.testing.assert_frame_equal(parallel_results, results, check_like=True)

    files = os.listdir(Path(".") / "results")
    file_names = [
        "pytest_Adult Race-Binary_Upsample uniform.csv",
        "pytest_Adult Race-Binary_no_transform.csv",
        "pytest_Toy_Upsample uniform.csv",
        "pytest_Toy_no_transform.csv",
    ]
    assert len(files) == 4
    assert sorted(files) == file_names

    for file in file_names:
        written_file = pd.read_csv(Path(f"./results/{file}"))
        assert (written_file["seed"][0], written_file["seed"][1]) == (0, 0)
        assert written_file.shape == (2, 16)

    reloaded = em.load_results("Adult Race-Binary", "Upsample uniform",
                               "pytest")
    assert reloaded is not None
    read = pd.read_csv(
        Path(".") / "results" /
        "pytest_Adult Race-Binary_Upsample uniform.csv")
    read = read.set_index(
        ["dataset", "scaler", "transform", "model", "split_id"])
    pd.testing.assert_frame_equal(reloaded, read)
Example #4
0
def test_sequential_split():
    """Test sequential split."""
    data: DataTuple = em.load_data(em.toy())
    train: DataTuple
    test: DataTuple
    train, test, _ = em.SequentialSplit(train_percentage=0.8)(data)
    assert all(data.x.iloc[0] == train.x.iloc[0])
    assert all(data.x.iloc[-1] == test.x.iloc[-1])
    assert len(train) == 320
    assert len(test) == 80
Example #5
0
def test_train_test_split():
    """Test train test split."""
    data: DataTuple = em.load_data(em.toy())
    train_test: Tuple[DataTuple, DataTuple] = em.train_test_split(data)
    train, test = train_test
    assert train is not None
    assert test is not None
    assert train.x.shape[0] > test.x.shape[0]
    assert train.x["a1"].values[0] == 0.2365572108691669
    assert train.x["a2"].values[0] == approx(0.008603090240657633, abs=1e-6)

    assert train.x.shape[0] == train.s.shape[0]
    assert train.s.shape[0] == train.y.shape[0]

    num_samples = len(data)

    len_default = math.floor((num_samples / 100) * 80)
    assert train.s.shape[0] == len_default
    assert test.s.shape[0] == num_samples - len_default

    len_0_9 = math.floor((num_samples / 100) * 90)
    train, test = em.train_test_split(data, train_percentage=0.9)
    assert train.s.shape[0] == len_0_9
    assert test.s.shape[0] == num_samples - len_0_9

    len_0_7 = math.floor((num_samples / 100) * 70)
    train, test = em.train_test_split(data, train_percentage=0.7)
    assert train.s.shape[0] == len_0_7
    assert test.s.shape[0] == num_samples - len_0_7

    len_0_5 = math.floor((num_samples / 100) * 50)
    train, test = em.train_test_split(data, train_percentage=0.5)
    assert train.s.shape[0] == len_0_5
    assert test.s.shape[0] == num_samples - len_0_5

    len_0_3 = math.floor((num_samples / 100) * 30)
    train, test = em.train_test_split(data, train_percentage=0.3)
    assert train.s.shape[0] == len_0_3
    assert test.s.shape[0] == num_samples - len_0_3

    len_0_1 = math.floor((num_samples / 100) * 10)
    train, test = em.train_test_split(data, train_percentage=0.1)
    assert train.s.shape[0] == len_0_1
    assert test.s.shape[0] == num_samples - len_0_1

    len_0_0 = math.floor((num_samples / 100) * 0)
    train, test = em.train_test_split(data, train_percentage=0.0)
    assert train.s.shape[0] == len_0_0
    assert train.name == "Toy - Train"
    assert test.s.shape[0] == num_samples - len_0_0
    assert test.name == "Toy - Test"
Example #6
0
def test_random_seed():
    """Test random seed."""
    data: DataTuple = em.load_data(em.toy())
    train_test_0: Tuple[DataTuple, DataTuple] = em.train_test_split(data)
    train_0, test_0 = train_test_0
    assert train_0 is not None
    assert test_0 is not None
    assert train_0.x.shape[0] > test_0.x.shape[0]
    assert train_0.x["a1"].values[0] == 0.2365572108691669
    assert train_0.x["a2"].values[0] == approx(0.008603090240657633, abs=1e-6)

    assert train_0.x.shape[0] == train_0.s.shape[0]
    assert train_0.s.shape[0] == train_0.y.shape[0]

    train_test_1: Tuple[DataTuple,
                        DataTuple] = em.train_test_split(data, random_seed=1)
    train_1, test_1 = train_test_1
    assert train_1 is not None
    assert test_1 is not None
    assert train_1.x.shape[0] > test_1.x.shape[0]
    assert train_1.x["a1"].values[0] == 1.3736566330173798
    assert train_1.x["a2"].values[0] == approx(0.21742296144957174, abs=1e-6)

    assert train_1.x.shape[0] == train_1.s.shape[0]
    assert train_1.s.shape[0] == train_1.y.shape[0]

    train_test_2: Tuple[DataTuple,
                        DataTuple] = em.train_test_split(data, random_seed=2)
    train_2, test_2 = train_test_2
    assert train_2 is not None
    assert test_2 is not None
    assert train_2.x.shape[0] > test_2.x.shape[0]
    assert train_2.x["a1"].values[0] == 1.2255705960148289
    assert train_2.x["a2"].values[0] == -1.208089015454192

    assert train_2.x.shape[0] == train_2.s.shape[0]
    assert train_2.s.shape[0] == train_2.y.shape[0]

    train_test_3: Tuple[DataTuple,
                        DataTuple] = em.train_test_split(data, random_seed=3)
    train_3, test_3 = train_test_3
    assert train_3 is not None
    assert test_3 is not None
    assert train_3.x.shape[0] > test_3.x.shape[0]
    assert train_3.x["a1"].values[0] == approx(0.21165963748018515, abs=1e-6)
    assert train_3.x["a2"].values[0] == -2.425137404779957

    assert train_3.x.shape[0] == train_3.s.shape[0]
    assert train_3.s.shape[0] == train_3.y.shape[0]
Example #7
0
def test_threaded_agarwal():
    """Test threaded agarwal."""
    models: List[InAlgorithmAsync] = [
        Agarwal(dir='/tmp', classifier="SVM", fairness="EqOd")
    ]

    class AssertResult(Metric):
        _name = "assert_result"

        def score(self, prediction, actual) -> float:
            return (np.count_nonzero(prediction.hard.values == 1) == 45
                    and np.count_nonzero(prediction.hard.values == 0) == 35)

    results = evaluate_models_async(datasets=[toy()],
                                    inprocess_models=models,
                                    metrics=[AssertResult()],
                                    delete_prev=True)
    assert results["assert_result"].iloc[0]
Example #8
0
def test_run_alg_suite_wrong_metrics():
    """Test run alg suite wrong metrics."""
    datasets: List[em.Dataset] = [em.toy(), em.adult()]
    preprocess_models: List[em.PreAlgorithm] = [em.Upsampler()]
    inprocess_models: List[em.InAlgorithm] = [em.SVM(kernel="linear"), em.LR()]
    postprocess_models: List[em.PostAlgorithm] = []
    metrics: List[em.Metric] = [em.Accuracy(), em.CV()]
    per_sens_metrics: List[em.Metric] = [em.Accuracy(), em.TPR(), em.CV()]
    with pytest.raises(em.MetricNotApplicable):
        em.evaluate_models(
            datasets,
            preprocess_models,
            inprocess_models,
            postprocess_models,
            metrics,
            per_sens_metrics,
            repeats=1,
            test_mode=True,
        )
Example #9
0
def test_plot_evals():
    """Test plot evals."""
    results: Results = evaluate_models(
        datasets=[adult(), toy()],
        preprocess_models=[Upsampler(strategy="preferential")],
        inprocess_models=[LR(), SVM(kernel="linear"),
                          Kamiran()],
        metrics=[Accuracy(), CV()],
        per_sens_metrics=[TPR(), ProbPos()],
        repeats=3,
        test_mode=True,
        delete_prev=True,
    )
    assert results["seed"][0] == results["seed"][1] == results["seed"][2] == 0
    assert results["seed"][3] == results["seed"][4] == results["seed"][
        5] == 2410
    assert results["seed"][6] == results["seed"][7] == results["seed"][
        8] == 4820

    figs_and_plots: List[Tuple[plt.Figure,
                               plt.Axes]]  # type: ignore[name-defined]

    # plot with metrics
    figs_and_plots = plot_results(results, Accuracy(), ProbPos())
    # num(datasets) * num(preprocess) * num(accuracy combinations) * num(prop_pos combinations)
    assert len(
        figs_and_plots) == 2 * 2 * 1 * 2 + 4  # TODO: this +4 should be FIXED,
    # it matches the column name containing a hyphen as a DIFF metric.

    # plot with column names
    figs_and_plots = plot_results(results, "Accuracy",
                                  "prob_pos_sensitive-attr_0")
    assert len(figs_and_plots) == 1 * 2 * 1 * 1

    with pytest.raises(
            ValueError,
            match='No matching columns found for Metric "NMI preds and s".'):
        plot_results(results, Accuracy(), NMI())

    with pytest.raises(ValueError, match='No column named "unknown metric".'):
        plot_results(results, "unknown metric", Accuracy())
Example #10
0
def test_run_alg_suite_scaler():
    """Test run alg suite."""
    dataset = em.adult(split="Race-Binary")
    datasets: List[em.Dataset] = [dataset, em.toy()]
    preprocess_models: List[em.PreAlgorithm] = [em.Upsampler()]
    inprocess_models: List[em.InAlgorithm] = [em.LR(), em.SVM(kernel="linear")]
    postprocess_models: List[em.PostAlgorithm] = []
    metrics: List[em.Metric] = [em.Accuracy(), em.CV()]
    per_sens_metrics: List[em.Metric] = [em.Accuracy(), em.TPR()]
    results_no_scaler = em.evaluate_models(
        datasets,
        preprocess_models,
        inprocess_models,
        postprocess_models,
        metrics,
        per_sens_metrics,
        repeats=1,
        test_mode=True,
        delete_prev=True,
        topic="pytest",
    )
    results_scaler = em.evaluate_models(
        datasets,
        preprocess_models,
        inprocess_models,
        postprocess_models,
        metrics,
        per_sens_metrics,
        scaler=StandardScaler(),
        repeats=1,
        test_mode=True,
        delete_prev=True,
        topic="pytest",
    )
    with pytest.raises(AssertionError):
        pd.testing.assert_frame_equal(results_scaler,
                                      results_no_scaler,
                                      check_like=True)
Example #11
0
def test_prop_train_test_split():
    """Test prop train test split."""
    data: DataTuple = em.load_data(em.toy())
    train: DataTuple
    test: DataTuple
    train, test, _ = ProportionalSplit(train_percentage=0.8)(data, split_id=0)
    assert train is not None
    assert test is not None
    assert train.x.shape[0] > test.x.shape[0]
    assert train.x["a1"].values[0] == -0.7135614558562237
    assert train.x["a2"].values[0] == 1.1211390799513148

    assert train.x.shape[0] == train.s.shape[0]
    assert train.s.shape[0] == train.y.shape[0]

    NUM_SAMPLES = len(data)
    len_default = math.floor((NUM_SAMPLES / 100) * 80)
    assert train.s.shape[0] == len_default
    assert test.s.shape[0] == NUM_SAMPLES - len_default

    # assert that the proportion of s=0 to s=1 has remained the same (and also test for y=0/y=1)
    assert np.count_nonzero(train.s.to_numpy() == 0) == round(
        0.8 * np.count_nonzero(data.s.to_numpy() == 0))
    assert np.count_nonzero(train.y.to_numpy() == 0) == round(
        0.8 * np.count_nonzero(data.y.to_numpy() == 0))

    len_0_9 = math.floor((NUM_SAMPLES / 100) * 90)
    train, test, _ = ProportionalSplit(train_percentage=0.9)(data, split_id=0)
    assert train.s.shape[0] == len_0_9
    assert test.s.shape[0] == NUM_SAMPLES - len_0_9
    assert np.count_nonzero(train.s.to_numpy() == 0) == approx(round(
        0.9 * np.count_nonzero(data.s.to_numpy() == 0)),
                                                               abs=1)
    assert np.count_nonzero(train.y.to_numpy() == 0) == approx(round(
        0.9 * np.count_nonzero(data.y.to_numpy() == 0)),
                                                               abs=1)

    len_0_7 = math.floor((NUM_SAMPLES / 100) * 70)
    train, test, _ = ProportionalSplit(train_percentage=0.7)(data, split_id=0)
    assert train.s.shape[0] == len_0_7
    assert test.s.shape[0] == NUM_SAMPLES - len_0_7
    assert np.count_nonzero(train.s.to_numpy() == 0) == approx(round(
        0.7 * np.count_nonzero(data.s.to_numpy() == 0)),
                                                               abs=1)
    assert np.count_nonzero(train.y.to_numpy() == 0) == approx(round(
        0.7 * np.count_nonzero(data.y.to_numpy() == 0)),
                                                               abs=1)

    len_0_5 = math.floor((NUM_SAMPLES / 100) * 50)
    train, test, _ = ProportionalSplit(train_percentage=0.5)(data, split_id=0)
    assert train.s.shape[0] == len_0_5
    assert test.s.shape[0] == NUM_SAMPLES - len_0_5

    len_0_3 = math.floor((NUM_SAMPLES / 100) * 30)
    train, test, _ = ProportionalSplit(train_percentage=0.3)(data, split_id=0)
    assert train.s.shape[0] == len_0_3
    assert test.s.shape[0] == NUM_SAMPLES - len_0_3

    len_0_1 = math.floor((NUM_SAMPLES / 100) * 10)
    train, test, _ = ProportionalSplit(train_percentage=0.1)(data, split_id=0)
    assert train.s.shape[0] == len_0_1
    assert test.s.shape[0] == NUM_SAMPLES - len_0_1

    len_0_0 = math.floor((NUM_SAMPLES / 100) * 0)
    train, test, _ = ProportionalSplit(train_percentage=0.0)(data, split_id=0)
    assert train.s.shape[0] == len_0_0
    assert train.name == "Toy - Train"
    assert test.s.shape[0] == NUM_SAMPLES - len_0_0
    assert test.name == "Toy - Test"
Example #12
0
    s = pd.DataFrame(np.random.randn(100), columns=["s"])
    y = pd.DataFrame(np.random.randint(0, 5, 100), columns=["y"])
    data = DataTuple(x=x, s=s, y=y)
    train_test: Tuple[DataTuple, DataTuple] = train_test_split(data)
    train, test = train_test
    model: InAlgorithm = LR()
    predictions: Prediction = model.run(train, test)
    acc_per_sens = metric_per_sensitive_attribute(
        predictions, test, TPR(pos_class=1,
                               labels=list(range(y.nunique()[0]))))
    print(acc_per_sens)


PER_SENS = [
    PerSensMetricTest(
        dataset=toy(),
        classifier=SVM(),
        metric=Accuracy(),
        expected_values={
            "sensitive-attr_0": 0.921,
            "sensitive-attr_1": 0.929
        },
    ),
    PerSensMetricTest(
        dataset=toy(),
        classifier=SVM(),
        metric=ProbPos(),
        expected_values={
            "sensitive-attr_0": 0.368,
            "sensitive-attr_1": 0.738
        },
Example #13
0
 ),
 DT(
     dataset=em.sqf(split="Race-Sex"),
     samples=12_347,
     x_features=144,
     discrete_features=138,
     s_features=1,
     num_sens=4,
     y_features=1,
     num_labels=2,
     name="SQF Race-Sex",
     sum_s=24_336,
     sum_y=1_289,
 ),
 DT(
     dataset=em.toy(),
     samples=400,
     x_features=10,
     discrete_features=8,
     s_features=1,
     num_sens=2,
     y_features=1,
     num_labels=2,
     name="Toy",
     sum_s=200,
     sum_y=231,
 ),
 DT(
     dataset=em.acs_income(
         root=Path("~/Data"), year="2018", horizon=1, states=["AL"]),
     samples=22_268,
Example #14
0
def test_run_alg_suite_no_pipeline():
    """Run alg suite while avoiding the 'fair pipeline'."""
    datasets: List[em.Dataset] = [em.toy(), em.adult()]
    preprocess_models: List[em.PreAlgorithm] = [em.Upsampler()]
    inprocess_models: List[em.InAlgorithm] = [
        em.Kamiran(classifier="LR"), em.LR()
    ]
    postprocess_models: List[em.PostAlgorithm] = []
    metrics: List[em.Metric] = [em.Accuracy(), em.CV()]
    per_sens_metrics: List[em.Metric] = [em.Accuracy(), em.TPR()]

    parallel_results = em.evaluate_models_async(
        datasets=datasets,
        preprocess_models=preprocess_models,
        inprocess_models=inprocess_models,
        postprocess_models=postprocess_models,
        metrics=metrics,
        per_sens_metrics=per_sens_metrics,
        repeats=1,
        test_mode=True,
        topic="pytest",
        fair_pipeline=False,
    )
    results = em.evaluate_models(
        datasets,
        preprocess_models,
        inprocess_models,
        postprocess_models,
        metrics,
        per_sens_metrics,
        repeats=1,
        test_mode=True,
        topic="pytest",
        fair_pipeline=False,
        delete_prev=True,
    )
    pd.testing.assert_frame_equal(parallel_results, results, check_like=True)

    num_datasets = 2
    num_preprocess = 1
    num_fair_inprocess = 1
    num_unfair_inprocess = 1
    expected_num = num_datasets * (num_fair_inprocess +
                                   (num_preprocess + 1) * num_unfair_inprocess)
    assert len(results) == expected_num

    kc_name = "Kamiran & Calders LR"

    assert len(em.filter_results(results,
                                 [kc_name])) == 2  # result for Toy and Adult
    assert (len(em.filter_results(results, ["Toy"], index="dataset")) == 3
            )  # Kamiran, LR and Upsampler
    different_name = em.filter_and_map_results(results,
                                               {kc_name: "Kamiran & Calders"})
    assert len(em.filter_results(different_name, [kc_name])) == 0
    assert len(em.filter_results(different_name, ["Kamiran & Calders"])) == 2

    pd.testing.assert_frame_equal(
        em.filter_results(results, [kc_name]),
        results.query(f"model == '{kc_name}'"),
    )