Beispiel #1
0
def test_load_adult_drop_native():
    """Test load adult drop native."""
    adult_data = em.adult("Sex", binarize_nationality=True)
    assert adult_data.name == "Adult Sex, binary nationality"
    assert "native-country_United-States" in adult_data.discrete_features
    assert "native-country_Canada" not in adult_data.discrete_features

    # with dummies
    data = adult_data.load(ordered=True)
    assert (45222, 62) == data.x.shape
    assert (45222, 1) == data.s.shape
    assert (45222, 1) == data.y.shape
    assert "native-country_United-States" in data.x.columns
    # the dummy feature *is* in the actual dataframe:
    assert "native-country_not_United-States" in data.x.columns
    assert "native-country_Canada" not in data.x.columns
    native_cols = data.x[[
        "native-country_United-States", "native-country_not_United-States"
    ]]
    assert (native_cols.sum(axis="columns") == 1).all()

    # with dummies, not ordered
    data = adult_data.load(ordered=False)
    assert (45222, 62) == data.x.shape
    assert (45222, 1) == data.s.shape
    assert (45222, 1) == data.y.shape
    assert "native-country_United-States" in data.x.columns
    # the dummy feature *is* in the actual dataframe:
    assert "native-country_not_United-States" in data.x.columns
    assert "native-country_Canada" not in data.x.columns
    native_cols = data.x[[
        "native-country_United-States", "native-country_not_United-States"
    ]]
    assert (native_cols.sum(axis="columns") == 1).all()
 def __init__(
     self,
     val_split: Union[float, int] = 0.2,
     test_split: Union[float, int] = 0.2,
     num_workers: int = 0,
     batch_size: int = 32,
     seed: int = 0,
     scaler: Optional[ScalerType] = None,
     persist_workers: bool = False,
     stratified_sampling: bool = False,
     sample_with_replacement: bool = False,
 ):
     super().__init__(
         batch_size=batch_size,
         num_workers=num_workers,
         scaler=scaler,
         seed=seed,
         test_split=test_split,
         val_split=val_split,
         persist_workers=persist_workers,
         stratified_sampling=stratified_sampling,
         sample_with_replacement=sample_with_replacement,
     )
     self._em_dataset = em.adult(split="Sex", binarize_nationality=True)
     self.num_classes = 2
     self.num_sens = 2
Beispiel #3
0
def test_label_plot():
    """Test label plot."""
    data: DataTuple = load_data(adult())
    train_test: Tuple[DataTuple, DataTuple] = train_test_split(data)
    train, _ = train_test

    save_label_plot(train, "./plots/labels.png")
Beispiel #4
0
def test_run_alg_suite():
    """Test run alg suite."""
    dataset = em.adult(split="Race-Binary")
    datasets: List[em.Dataset] = [dataset, em.toy()]
    preprocess_models: List[em.PreAlgorithm] = [em.Upsampler()]
    inprocess_models: List[em.InAlgorithm] = [em.LR(), em.SVM(kernel="linear")]
    postprocess_models: List[em.PostAlgorithm] = []
    metrics: List[em.Metric] = [em.Accuracy(), em.CV()]
    per_sens_metrics: List[em.Metric] = [em.Accuracy(), em.TPR()]
    parallel_results = em.evaluate_models_async(
        datasets=datasets,
        preprocess_models=preprocess_models,
        inprocess_models=inprocess_models,
        postprocess_models=postprocess_models,
        metrics=metrics,
        per_sens_metrics=per_sens_metrics,
        repeats=1,
        test_mode=True,
        topic="pytest",
    )
    results = em.evaluate_models(
        datasets,
        preprocess_models,
        inprocess_models,
        postprocess_models,
        metrics,
        per_sens_metrics,
        repeats=1,
        test_mode=True,
        delete_prev=True,
        topic="pytest",
    )
    pd.testing.assert_frame_equal(parallel_results, results, check_like=True)

    files = os.listdir(Path(".") / "results")
    file_names = [
        "pytest_Adult Race-Binary_Upsample uniform.csv",
        "pytest_Adult Race-Binary_no_transform.csv",
        "pytest_Toy_Upsample uniform.csv",
        "pytest_Toy_no_transform.csv",
    ]
    assert len(files) == 4
    assert sorted(files) == file_names

    for file in file_names:
        written_file = pd.read_csv(Path(f"./results/{file}"))
        assert (written_file["seed"][0], written_file["seed"][1]) == (0, 0)
        assert written_file.shape == (2, 16)

    reloaded = em.load_results("Adult Race-Binary", "Upsample uniform",
                               "pytest")
    assert reloaded is not None
    read = pd.read_csv(
        Path(".") / "results" /
        "pytest_Adult Race-Binary_Upsample uniform.csv")
    read = read.set_index(
        ["dataset", "scaler", "transform", "model", "split_id"])
    pd.testing.assert_frame_equal(reloaded, read)
Beispiel #5
0
def test_binning():
    """Test binning."""
    data: DataTuple = em.load_data(em.adult())

    binned: DataTuple = em.bin_cont_feats(data)

    assert len([col for col in binned.x.columns
                if col not in data.x.columns]) == 25
    assert "age" not in binned.x.columns
    def em_dataset(self) -> em.Dataset:

        return em.adult(
            split=self.sens_feat.value,
            binarize_nationality=self.bin_nationality,
            discrete_only=self.disc_feats_only,
            binarize_race=self.bin_race,
            invert_s=self.invert_s,
        )
Beispiel #7
0
def test_domain_adapt_adult():
    """Test domain adapt adult."""
    data: DataTuple = em.adult().load()
    train, test = em.domain_split(
        datatup=data,
        tr_cond="education_Masters == 0. & education_Doctorate == 0.",
        te_cond="education_Masters == 1. | education_Doctorate == 1.",
    )
    assert (39106, 101) == train.x.shape
    assert (39106, 1) == train.s.shape
    assert (39106, 1) == train.y.shape

    assert (6116, 101) == test.x.shape
    assert (6116, 1) == test.s.shape
    assert (6116, 1) == test.y.shape

    data = em.adult().load()
    train, test = em.domain_split(datatup=data,
                                  tr_cond="education_Masters == 0.",
                                  te_cond="education_Masters == 1.")
    assert (40194, 101) == train.x.shape
    assert (40194, 1) == train.s.shape
    assert (40194, 1) == train.y.shape

    assert (5028, 101) == test.x.shape
    assert (5028, 1) == test.s.shape
    assert (5028, 1) == test.y.shape

    data = em.adult().load()
    train, test = em.domain_split(
        datatup=data,
        tr_cond=
        "education_Masters == 0. & education_Doctorate == 0. & education_Bachelors == 0.",
        te_cond=
        "education_Masters == 1. | education_Doctorate == 1. | education_Bachelors == 1.",
    )
    assert (23966, 101) == train.x.shape
    assert (23966, 1) == train.s.shape
    assert (23966, 1) == train.y.shape

    assert (21256, 101) == test.x.shape
    assert (21256, 1) == test.s.shape
    assert (21256, 1) == test.y.shape
Beispiel #8
0
def test_load_adult_explicitly_sex():
    """Test load adult explicitly sex."""
    adult_sex = em.adult("Sex")
    data: DataTuple = adult_sex.load()
    assert (45222, 101) == data.x.shape
    assert (45222, 1) == data.s.shape
    assert (45222, 1) == data.y.shape
    assert adult_sex.disc_feature_groups is not None
    assert "sex" not in adult_sex.disc_feature_groups
    assert "salary" not in adult_sex.disc_feature_groups
Beispiel #9
0
def test_load_adult_race():
    """Test load adult race."""
    adult_race = em.adult("Race")
    data: DataTuple = adult_race.load()
    assert (45222, 98) == data.x.shape
    assert (45222, 1) == data.s.shape
    assert data.s.nunique()[0] == 5
    assert (45222, 1) == data.y.shape
    assert adult_race.disc_feature_groups is not None
    assert "race" not in adult_race.disc_feature_groups
    assert "salary" not in adult_race.disc_feature_groups
Beispiel #10
0
def test_race_feature_split():
    """Test race feature split."""
    adult_data: em.Dataset = em.adult(split="Custom")
    adult_data._sens_attr_spec = "race_White"
    adult_data._s_prefix = ["race"]
    adult_data._class_label_spec = "salary_>50K"
    adult_data._class_label_prefix = ["salary"]

    data: DataTuple = adult_data.load()

    assert (45222, 98) == data.x.shape
    assert (45222, 1) == data.s.shape
    assert (45222, 1) == data.y.shape
def load_adult_data(
    cfg: BaseArgs
) -> Tuple[DataTupleDataset, DataTupleDataset, DataTupleDataset]:
    global ADULT_DATASET
    ADULT_DATASET = em.adult(split=cfg.data.adult_split.name,
                             binarize_nationality=cfg.data.drop_native)
    data = ADULT_DATASET.load(ordered=True)
    global SENS_ATTRS
    SENS_ATTRS = data.s.columns

    disc_feature_groups = ADULT_DATASET.disc_feature_groups
    assert disc_feature_groups is not None
    cont_feats = ADULT_DATASET.continuous_features

    tuples: DataTupleTriplet = biased_split(cfg, data)
    context, test, train = tuples.context, tuples.test, tuples.train

    scaler = StandardScaler()

    train_x = train.x
    train_x[cont_feats] = scaler.fit_transform(train.x[cont_feats].to_numpy(
        np.float32))
    test_x = test.x
    test_x[cont_feats] = scaler.transform(test.x[cont_feats].to_numpy(
        np.float32))
    context_x = context.x
    context_x[cont_feats] = scaler.transform(context.x[cont_feats].to_numpy(
        np.float32))

    if cfg.data.drop_discrete:
        context_x = context_x[cont_feats]
        train_x = train_x[cont_feats]
        test_x = test_x[cont_feats]
        disc_feature_groups = {}

    train = train.replace(x=train_x)
    test = test.replace(x=test_x)
    context = context.replace(x=context_x)

    cont_features = ADULT_DATASET.continuous_features
    context_dataset = DataTupleDataset(context,
                                       disc_feature_groups=disc_feature_groups,
                                       cont_features=cont_features)
    train_dataset = DataTupleDataset(train,
                                     disc_feature_groups=disc_feature_groups,
                                     cont_features=cont_features)
    test_dataset = DataTupleDataset(test,
                                    disc_feature_groups=disc_feature_groups,
                                    cont_features=cont_features)
    return context_dataset, train_dataset, test_dataset
Beispiel #12
0
def test_run_alg_suite_wrong_metrics():
    """Test run alg suite wrong metrics."""
    datasets: List[em.Dataset] = [em.toy(), em.adult()]
    preprocess_models: List[em.PreAlgorithm] = [em.Upsampler()]
    inprocess_models: List[em.InAlgorithm] = [em.SVM(kernel="linear"), em.LR()]
    postprocess_models: List[em.PostAlgorithm] = []
    metrics: List[em.Metric] = [em.Accuracy(), em.CV()]
    per_sens_metrics: List[em.Metric] = [em.Accuracy(), em.TPR(), em.CV()]
    with pytest.raises(em.MetricNotApplicable):
        em.evaluate_models(
            datasets,
            preprocess_models,
            inprocess_models,
            postprocess_models,
            metrics,
            per_sens_metrics,
            repeats=1,
            test_mode=True,
        )
Beispiel #13
0
def test_dependence_measures_adult() -> None:
    """Test dependence measures."""
    data = load_data(em.adult(split="Sex"))
    train_percentage = 0.75
    unbalanced, balanced, _ = BalancedTestSplit(
        train_percentage=train_percentage)(data)

    fair_prediction = Prediction(
        hard=balanced.y["salary_>50K"])  # predict the balanced label
    unfair_prediction = Prediction(
        hard=unbalanced.y["salary_>50K"])  # predict the normal label
    extremely_unfair_prediction = Prediction(
        hard=unbalanced.s["sex_Male"])  # predict s

    # measure the dependence between s and the prediction in several ways
    assert _compute_di(fair_prediction, balanced) == approx(1, abs=1e-15)
    assert _compute_di(unfair_prediction, unbalanced) == approx(0.364,
                                                                abs=3e-3)
    assert _compute_di(extremely_unfair_prediction,
                       unbalanced) == approx(0, abs=3e-3)
    assert _compute_inv_cv(fair_prediction, balanced) == approx(0, abs=1e-15)
    assert _compute_inv_cv(unfair_prediction, unbalanced) == approx(0.199,
                                                                    abs=3e-3)
    assert _compute_inv_cv(extremely_unfair_prediction,
                           unbalanced) == approx(1, abs=3e-3)
    nmi = NMI()
    assert nmi.score(fair_prediction, balanced) == approx(0, abs=1e-15)
    assert nmi.score(unfair_prediction, unbalanced) == approx(0.0432, abs=3e-4)
    assert nmi.score(extremely_unfair_prediction,
                     unbalanced) == approx(1, abs=3e-4)
    yanovich = Yanovich()
    assert yanovich.score(fair_prediction, balanced) == approx(0, abs=1e-15)
    assert yanovich.score(unfair_prediction, unbalanced) == approx(0.0396,
                                                                   abs=3e-4)
    assert yanovich.score(extremely_unfair_prediction,
                          unbalanced) == approx(1, abs=3e-4)
    renyi = RenyiCorrelation()
    assert renyi.score(fair_prediction, balanced) == approx(0, abs=1e-15)
    assert renyi.score(unfair_prediction, unbalanced) == approx(0.216,
                                                                abs=3e-4)
    assert renyi.score(extremely_unfair_prediction,
                       unbalanced) == approx(1, abs=3e-4)
Beispiel #14
0
def test_plot_evals():
    """Test plot evals."""
    results: Results = evaluate_models(
        datasets=[adult(), toy()],
        preprocess_models=[Upsampler(strategy="preferential")],
        inprocess_models=[LR(), SVM(kernel="linear"),
                          Kamiran()],
        metrics=[Accuracy(), CV()],
        per_sens_metrics=[TPR(), ProbPos()],
        repeats=3,
        test_mode=True,
        delete_prev=True,
    )
    assert results["seed"][0] == results["seed"][1] == results["seed"][2] == 0
    assert results["seed"][3] == results["seed"][4] == results["seed"][
        5] == 2410
    assert results["seed"][6] == results["seed"][7] == results["seed"][
        8] == 4820

    figs_and_plots: List[Tuple[plt.Figure,
                               plt.Axes]]  # type: ignore[name-defined]

    # plot with metrics
    figs_and_plots = plot_results(results, Accuracy(), ProbPos())
    # num(datasets) * num(preprocess) * num(accuracy combinations) * num(prop_pos combinations)
    assert len(
        figs_and_plots) == 2 * 2 * 1 * 2 + 4  # TODO: this +4 should be FIXED,
    # it matches the column name containing a hyphen as a DIFF metric.

    # plot with column names
    figs_and_plots = plot_results(results, "Accuracy",
                                  "prob_pos_sensitive-attr_0")
    assert len(figs_and_plots) == 1 * 2 * 1 * 1

    with pytest.raises(
            ValueError,
            match='No matching columns found for Metric "NMI preds and s".'):
        plot_results(results, Accuracy(), NMI())

    with pytest.raises(ValueError, match='No column named "unknown metric".'):
        plot_results(results, "unknown metric", Accuracy())
Beispiel #15
0
def test_run_alg_suite_scaler():
    """Test run alg suite."""
    dataset = em.adult(split="Race-Binary")
    datasets: List[em.Dataset] = [dataset, em.toy()]
    preprocess_models: List[em.PreAlgorithm] = [em.Upsampler()]
    inprocess_models: List[em.InAlgorithm] = [em.LR(), em.SVM(kernel="linear")]
    postprocess_models: List[em.PostAlgorithm] = []
    metrics: List[em.Metric] = [em.Accuracy(), em.CV()]
    per_sens_metrics: List[em.Metric] = [em.Accuracy(), em.TPR()]
    results_no_scaler = em.evaluate_models(
        datasets,
        preprocess_models,
        inprocess_models,
        postprocess_models,
        metrics,
        per_sens_metrics,
        repeats=1,
        test_mode=True,
        delete_prev=True,
        topic="pytest",
    )
    results_scaler = em.evaluate_models(
        datasets,
        preprocess_models,
        inprocess_models,
        postprocess_models,
        metrics,
        per_sens_metrics,
        scaler=StandardScaler(),
        repeats=1,
        test_mode=True,
        delete_prev=True,
        topic="pytest",
    )
    with pytest.raises(AssertionError):
        pd.testing.assert_frame_equal(results_scaler,
                                      results_no_scaler,
                                      check_like=True)
Beispiel #16
0
def test_load_adult_education_drop():
    """Test load adult education."""
    adult_data = em.adult("Education", binarize_nationality=True)
    assert adult_data.name == "Adult Education, binary nationality"
    assert "education_HS-grad" in adult_data.sens_attrs
    assert "education_other" in adult_data.sens_attrs
    assert "education_Masters" not in adult_data.sens_attrs

    # ordered
    data = adult_data.load(ordered=True)
    assert (45222, 47) == data.x.shape
    assert (45222, 1) == data.s.shape
    assert data.s.nunique()[0] == 3
    assert (45222, 1) == data.y.shape
    assert "education" in data.s.columns

    # not ordered
    data = adult_data.load()
    assert (45222, 47) == data.x.shape
    assert (45222, 1) == data.s.shape
    assert data.s.nunique()[0] == 3
    assert (45222, 1) == data.y.shape
    assert "education" in data.s.columns
Beispiel #17
0
def test_tpr_ratio_non_binary_race():
    """Test tpr ratio non binary race."""
    data: DataTuple = load_data(em.adult("Race"))
    train_test: Tuple[DataTuple, DataTuple] = train_test_split(data)
    train, test = train_test
    model: InAlgorithm = SVM()
    predictions: Prediction = model.run_test(train, test)
    tprs = em.metric_per_sensitive_attribute(predictions, test, TPR())
    assert TPR().name == "TPR"
    test_dict = {
        "race_0": approx(0.37, abs=0.01),
        "race_1": approx(0.12, abs=0.01),
        "race_2": approx(0.14, abs=0.01),
        "race_3": approx(0.12, abs=0.01),
        "race_4": approx(0.16, abs=0.01),
    }

    for key, val in tprs.items():
        assert val == test_dict[key]

    tpr_diff = em.ratio_per_sensitive_attribute(tprs)
    test_dict = {
        "race_0/race_1": approx(0.32, abs=0.1),
        "race_0/race_2": approx(0.37, abs=0.1),
        "race_0/race_3": approx(0.33, abs=0.1),
        "race_0/race_4": approx(0.44, abs=0.1),
        "race_1/race_2": approx(0.88, abs=0.1),
        "race_1/race_3": approx(0.97, abs=0.1),
        "race_1/race_4": approx(0.72, abs=0.1),
        "race_2/race_3": approx(0.91, abs=0.1),
        "race_2/race_4": approx(0.74, abs=0.1),
        "race_3/race_4": approx(0.74, abs=0.1),
    }

    for key, val in tpr_diff.items():
        assert val == test_dict[key]
Beispiel #18
0
 ),
 DT(
     dataset=em.admissions(split="Gender", invert_s=True),
     samples=43_303,
     x_features=9,
     discrete_features=0,
     s_features=1,
     num_sens=2,
     y_features=1,
     num_labels=2,
     name="Admissions Gender",
     sum_s=43_303 - 22_335,
     sum_y=20_263,
 ),
 DT(
     dataset=em.adult(),
     samples=45_222,
     x_features=101,
     discrete_features=96,
     s_features=1,
     num_sens=2,
     y_features=1,
     num_labels=2,
     name="Adult Sex",
     sum_s=30_527,
     sum_y=11_208,
 ),
 DT(
     dataset=em.adult("Sex", binarize_nationality=True),
     samples=45_222,
     x_features=62,
Beispiel #19
0
def test_run_alg_suite_no_pipeline():
    """Run alg suite while avoiding the 'fair pipeline'."""
    datasets: List[em.Dataset] = [em.toy(), em.adult()]
    preprocess_models: List[em.PreAlgorithm] = [em.Upsampler()]
    inprocess_models: List[em.InAlgorithm] = [
        em.Kamiran(classifier="LR"), em.LR()
    ]
    postprocess_models: List[em.PostAlgorithm] = []
    metrics: List[em.Metric] = [em.Accuracy(), em.CV()]
    per_sens_metrics: List[em.Metric] = [em.Accuracy(), em.TPR()]

    parallel_results = em.evaluate_models_async(
        datasets=datasets,
        preprocess_models=preprocess_models,
        inprocess_models=inprocess_models,
        postprocess_models=postprocess_models,
        metrics=metrics,
        per_sens_metrics=per_sens_metrics,
        repeats=1,
        test_mode=True,
        topic="pytest",
        fair_pipeline=False,
    )
    results = em.evaluate_models(
        datasets,
        preprocess_models,
        inprocess_models,
        postprocess_models,
        metrics,
        per_sens_metrics,
        repeats=1,
        test_mode=True,
        topic="pytest",
        fair_pipeline=False,
        delete_prev=True,
    )
    pd.testing.assert_frame_equal(parallel_results, results, check_like=True)

    num_datasets = 2
    num_preprocess = 1
    num_fair_inprocess = 1
    num_unfair_inprocess = 1
    expected_num = num_datasets * (num_fair_inprocess +
                                   (num_preprocess + 1) * num_unfair_inprocess)
    assert len(results) == expected_num

    kc_name = "Kamiran & Calders LR"

    assert len(em.filter_results(results,
                                 [kc_name])) == 2  # result for Toy and Adult
    assert (len(em.filter_results(results, ["Toy"], index="dataset")) == 3
            )  # Kamiran, LR and Upsampler
    different_name = em.filter_and_map_results(results,
                                               {kc_name: "Kamiran & Calders"})
    assert len(em.filter_results(different_name, [kc_name])) == 0
    assert len(em.filter_results(different_name, ["Kamiran & Calders"])) == 2

    pd.testing.assert_frame_equal(
        em.filter_results(results, [kc_name]),
        results.query(f"model == '{kc_name}'"),
    )
Beispiel #20
0
     expected_values={
         "sensitive-attr_0": 0.632,
         "sensitive-attr_1": 0.262
     },
 ),
 PerSensMetricTest(
     dataset=toy(),
     classifier=SVM(),
     metric=Accuracy(),
     expected_values={
         "sensitive-attr_0": 0.921,
         "sensitive-attr_1": 0.928
     },
 ),
 PerSensMetricTest(
     dataset=adult("Nationality"),
     classifier=SVM(kernel="linear"),
     metric=Accuracy(),
     expected_values={
         "native-country_1": 0.649,
         "native-country_2": 0.850,
         "native-country_3": 0.933,
         "native-country_4": 0.913,
         "native-country_5": 0.867,
         "native-country_6": 0.867,
         "native-country_7": 0.950,
         "native-country_8": 0.950,
         "native-country_9": 0.750,
         "native-country_10": 0.755,
         "native-country_11": 0.636,
         "native-country_12": 0.952,