def test_load_adult_drop_native(): """Test load adult drop native.""" adult_data = em.adult("Sex", binarize_nationality=True) assert adult_data.name == "Adult Sex, binary nationality" assert "native-country_United-States" in adult_data.discrete_features assert "native-country_Canada" not in adult_data.discrete_features # with dummies data = adult_data.load(ordered=True) assert (45222, 62) == data.x.shape assert (45222, 1) == data.s.shape assert (45222, 1) == data.y.shape assert "native-country_United-States" in data.x.columns # the dummy feature *is* in the actual dataframe: assert "native-country_not_United-States" in data.x.columns assert "native-country_Canada" not in data.x.columns native_cols = data.x[[ "native-country_United-States", "native-country_not_United-States" ]] assert (native_cols.sum(axis="columns") == 1).all() # with dummies, not ordered data = adult_data.load(ordered=False) assert (45222, 62) == data.x.shape assert (45222, 1) == data.s.shape assert (45222, 1) == data.y.shape assert "native-country_United-States" in data.x.columns # the dummy feature *is* in the actual dataframe: assert "native-country_not_United-States" in data.x.columns assert "native-country_Canada" not in data.x.columns native_cols = data.x[[ "native-country_United-States", "native-country_not_United-States" ]] assert (native_cols.sum(axis="columns") == 1).all()
def __init__( self, val_split: Union[float, int] = 0.2, test_split: Union[float, int] = 0.2, num_workers: int = 0, batch_size: int = 32, seed: int = 0, scaler: Optional[ScalerType] = None, persist_workers: bool = False, stratified_sampling: bool = False, sample_with_replacement: bool = False, ): super().__init__( batch_size=batch_size, num_workers=num_workers, scaler=scaler, seed=seed, test_split=test_split, val_split=val_split, persist_workers=persist_workers, stratified_sampling=stratified_sampling, sample_with_replacement=sample_with_replacement, ) self._em_dataset = em.adult(split="Sex", binarize_nationality=True) self.num_classes = 2 self.num_sens = 2
def test_label_plot(): """Test label plot.""" data: DataTuple = load_data(adult()) train_test: Tuple[DataTuple, DataTuple] = train_test_split(data) train, _ = train_test save_label_plot(train, "./plots/labels.png")
def test_run_alg_suite(): """Test run alg suite.""" dataset = em.adult(split="Race-Binary") datasets: List[em.Dataset] = [dataset, em.toy()] preprocess_models: List[em.PreAlgorithm] = [em.Upsampler()] inprocess_models: List[em.InAlgorithm] = [em.LR(), em.SVM(kernel="linear")] postprocess_models: List[em.PostAlgorithm] = [] metrics: List[em.Metric] = [em.Accuracy(), em.CV()] per_sens_metrics: List[em.Metric] = [em.Accuracy(), em.TPR()] parallel_results = em.evaluate_models_async( datasets=datasets, preprocess_models=preprocess_models, inprocess_models=inprocess_models, postprocess_models=postprocess_models, metrics=metrics, per_sens_metrics=per_sens_metrics, repeats=1, test_mode=True, topic="pytest", ) results = em.evaluate_models( datasets, preprocess_models, inprocess_models, postprocess_models, metrics, per_sens_metrics, repeats=1, test_mode=True, delete_prev=True, topic="pytest", ) pd.testing.assert_frame_equal(parallel_results, results, check_like=True) files = os.listdir(Path(".") / "results") file_names = [ "pytest_Adult Race-Binary_Upsample uniform.csv", "pytest_Adult Race-Binary_no_transform.csv", "pytest_Toy_Upsample uniform.csv", "pytest_Toy_no_transform.csv", ] assert len(files) == 4 assert sorted(files) == file_names for file in file_names: written_file = pd.read_csv(Path(f"./results/{file}")) assert (written_file["seed"][0], written_file["seed"][1]) == (0, 0) assert written_file.shape == (2, 16) reloaded = em.load_results("Adult Race-Binary", "Upsample uniform", "pytest") assert reloaded is not None read = pd.read_csv( Path(".") / "results" / "pytest_Adult Race-Binary_Upsample uniform.csv") read = read.set_index( ["dataset", "scaler", "transform", "model", "split_id"]) pd.testing.assert_frame_equal(reloaded, read)
def test_binning(): """Test binning.""" data: DataTuple = em.load_data(em.adult()) binned: DataTuple = em.bin_cont_feats(data) assert len([col for col in binned.x.columns if col not in data.x.columns]) == 25 assert "age" not in binned.x.columns
def em_dataset(self) -> em.Dataset: return em.adult( split=self.sens_feat.value, binarize_nationality=self.bin_nationality, discrete_only=self.disc_feats_only, binarize_race=self.bin_race, invert_s=self.invert_s, )
def test_domain_adapt_adult(): """Test domain adapt adult.""" data: DataTuple = em.adult().load() train, test = em.domain_split( datatup=data, tr_cond="education_Masters == 0. & education_Doctorate == 0.", te_cond="education_Masters == 1. | education_Doctorate == 1.", ) assert (39106, 101) == train.x.shape assert (39106, 1) == train.s.shape assert (39106, 1) == train.y.shape assert (6116, 101) == test.x.shape assert (6116, 1) == test.s.shape assert (6116, 1) == test.y.shape data = em.adult().load() train, test = em.domain_split(datatup=data, tr_cond="education_Masters == 0.", te_cond="education_Masters == 1.") assert (40194, 101) == train.x.shape assert (40194, 1) == train.s.shape assert (40194, 1) == train.y.shape assert (5028, 101) == test.x.shape assert (5028, 1) == test.s.shape assert (5028, 1) == test.y.shape data = em.adult().load() train, test = em.domain_split( datatup=data, tr_cond= "education_Masters == 0. & education_Doctorate == 0. & education_Bachelors == 0.", te_cond= "education_Masters == 1. | education_Doctorate == 1. | education_Bachelors == 1.", ) assert (23966, 101) == train.x.shape assert (23966, 1) == train.s.shape assert (23966, 1) == train.y.shape assert (21256, 101) == test.x.shape assert (21256, 1) == test.s.shape assert (21256, 1) == test.y.shape
def test_load_adult_explicitly_sex(): """Test load adult explicitly sex.""" adult_sex = em.adult("Sex") data: DataTuple = adult_sex.load() assert (45222, 101) == data.x.shape assert (45222, 1) == data.s.shape assert (45222, 1) == data.y.shape assert adult_sex.disc_feature_groups is not None assert "sex" not in adult_sex.disc_feature_groups assert "salary" not in adult_sex.disc_feature_groups
def test_load_adult_race(): """Test load adult race.""" adult_race = em.adult("Race") data: DataTuple = adult_race.load() assert (45222, 98) == data.x.shape assert (45222, 1) == data.s.shape assert data.s.nunique()[0] == 5 assert (45222, 1) == data.y.shape assert adult_race.disc_feature_groups is not None assert "race" not in adult_race.disc_feature_groups assert "salary" not in adult_race.disc_feature_groups
def test_race_feature_split(): """Test race feature split.""" adult_data: em.Dataset = em.adult(split="Custom") adult_data._sens_attr_spec = "race_White" adult_data._s_prefix = ["race"] adult_data._class_label_spec = "salary_>50K" adult_data._class_label_prefix = ["salary"] data: DataTuple = adult_data.load() assert (45222, 98) == data.x.shape assert (45222, 1) == data.s.shape assert (45222, 1) == data.y.shape
def load_adult_data( cfg: BaseArgs ) -> Tuple[DataTupleDataset, DataTupleDataset, DataTupleDataset]: global ADULT_DATASET ADULT_DATASET = em.adult(split=cfg.data.adult_split.name, binarize_nationality=cfg.data.drop_native) data = ADULT_DATASET.load(ordered=True) global SENS_ATTRS SENS_ATTRS = data.s.columns disc_feature_groups = ADULT_DATASET.disc_feature_groups assert disc_feature_groups is not None cont_feats = ADULT_DATASET.continuous_features tuples: DataTupleTriplet = biased_split(cfg, data) context, test, train = tuples.context, tuples.test, tuples.train scaler = StandardScaler() train_x = train.x train_x[cont_feats] = scaler.fit_transform(train.x[cont_feats].to_numpy( np.float32)) test_x = test.x test_x[cont_feats] = scaler.transform(test.x[cont_feats].to_numpy( np.float32)) context_x = context.x context_x[cont_feats] = scaler.transform(context.x[cont_feats].to_numpy( np.float32)) if cfg.data.drop_discrete: context_x = context_x[cont_feats] train_x = train_x[cont_feats] test_x = test_x[cont_feats] disc_feature_groups = {} train = train.replace(x=train_x) test = test.replace(x=test_x) context = context.replace(x=context_x) cont_features = ADULT_DATASET.continuous_features context_dataset = DataTupleDataset(context, disc_feature_groups=disc_feature_groups, cont_features=cont_features) train_dataset = DataTupleDataset(train, disc_feature_groups=disc_feature_groups, cont_features=cont_features) test_dataset = DataTupleDataset(test, disc_feature_groups=disc_feature_groups, cont_features=cont_features) return context_dataset, train_dataset, test_dataset
def test_run_alg_suite_wrong_metrics(): """Test run alg suite wrong metrics.""" datasets: List[em.Dataset] = [em.toy(), em.adult()] preprocess_models: List[em.PreAlgorithm] = [em.Upsampler()] inprocess_models: List[em.InAlgorithm] = [em.SVM(kernel="linear"), em.LR()] postprocess_models: List[em.PostAlgorithm] = [] metrics: List[em.Metric] = [em.Accuracy(), em.CV()] per_sens_metrics: List[em.Metric] = [em.Accuracy(), em.TPR(), em.CV()] with pytest.raises(em.MetricNotApplicable): em.evaluate_models( datasets, preprocess_models, inprocess_models, postprocess_models, metrics, per_sens_metrics, repeats=1, test_mode=True, )
def test_dependence_measures_adult() -> None: """Test dependence measures.""" data = load_data(em.adult(split="Sex")) train_percentage = 0.75 unbalanced, balanced, _ = BalancedTestSplit( train_percentage=train_percentage)(data) fair_prediction = Prediction( hard=balanced.y["salary_>50K"]) # predict the balanced label unfair_prediction = Prediction( hard=unbalanced.y["salary_>50K"]) # predict the normal label extremely_unfair_prediction = Prediction( hard=unbalanced.s["sex_Male"]) # predict s # measure the dependence between s and the prediction in several ways assert _compute_di(fair_prediction, balanced) == approx(1, abs=1e-15) assert _compute_di(unfair_prediction, unbalanced) == approx(0.364, abs=3e-3) assert _compute_di(extremely_unfair_prediction, unbalanced) == approx(0, abs=3e-3) assert _compute_inv_cv(fair_prediction, balanced) == approx(0, abs=1e-15) assert _compute_inv_cv(unfair_prediction, unbalanced) == approx(0.199, abs=3e-3) assert _compute_inv_cv(extremely_unfair_prediction, unbalanced) == approx(1, abs=3e-3) nmi = NMI() assert nmi.score(fair_prediction, balanced) == approx(0, abs=1e-15) assert nmi.score(unfair_prediction, unbalanced) == approx(0.0432, abs=3e-4) assert nmi.score(extremely_unfair_prediction, unbalanced) == approx(1, abs=3e-4) yanovich = Yanovich() assert yanovich.score(fair_prediction, balanced) == approx(0, abs=1e-15) assert yanovich.score(unfair_prediction, unbalanced) == approx(0.0396, abs=3e-4) assert yanovich.score(extremely_unfair_prediction, unbalanced) == approx(1, abs=3e-4) renyi = RenyiCorrelation() assert renyi.score(fair_prediction, balanced) == approx(0, abs=1e-15) assert renyi.score(unfair_prediction, unbalanced) == approx(0.216, abs=3e-4) assert renyi.score(extremely_unfair_prediction, unbalanced) == approx(1, abs=3e-4)
def test_plot_evals(): """Test plot evals.""" results: Results = evaluate_models( datasets=[adult(), toy()], preprocess_models=[Upsampler(strategy="preferential")], inprocess_models=[LR(), SVM(kernel="linear"), Kamiran()], metrics=[Accuracy(), CV()], per_sens_metrics=[TPR(), ProbPos()], repeats=3, test_mode=True, delete_prev=True, ) assert results["seed"][0] == results["seed"][1] == results["seed"][2] == 0 assert results["seed"][3] == results["seed"][4] == results["seed"][ 5] == 2410 assert results["seed"][6] == results["seed"][7] == results["seed"][ 8] == 4820 figs_and_plots: List[Tuple[plt.Figure, plt.Axes]] # type: ignore[name-defined] # plot with metrics figs_and_plots = plot_results(results, Accuracy(), ProbPos()) # num(datasets) * num(preprocess) * num(accuracy combinations) * num(prop_pos combinations) assert len( figs_and_plots) == 2 * 2 * 1 * 2 + 4 # TODO: this +4 should be FIXED, # it matches the column name containing a hyphen as a DIFF metric. # plot with column names figs_and_plots = plot_results(results, "Accuracy", "prob_pos_sensitive-attr_0") assert len(figs_and_plots) == 1 * 2 * 1 * 1 with pytest.raises( ValueError, match='No matching columns found for Metric "NMI preds and s".'): plot_results(results, Accuracy(), NMI()) with pytest.raises(ValueError, match='No column named "unknown metric".'): plot_results(results, "unknown metric", Accuracy())
def test_run_alg_suite_scaler(): """Test run alg suite.""" dataset = em.adult(split="Race-Binary") datasets: List[em.Dataset] = [dataset, em.toy()] preprocess_models: List[em.PreAlgorithm] = [em.Upsampler()] inprocess_models: List[em.InAlgorithm] = [em.LR(), em.SVM(kernel="linear")] postprocess_models: List[em.PostAlgorithm] = [] metrics: List[em.Metric] = [em.Accuracy(), em.CV()] per_sens_metrics: List[em.Metric] = [em.Accuracy(), em.TPR()] results_no_scaler = em.evaluate_models( datasets, preprocess_models, inprocess_models, postprocess_models, metrics, per_sens_metrics, repeats=1, test_mode=True, delete_prev=True, topic="pytest", ) results_scaler = em.evaluate_models( datasets, preprocess_models, inprocess_models, postprocess_models, metrics, per_sens_metrics, scaler=StandardScaler(), repeats=1, test_mode=True, delete_prev=True, topic="pytest", ) with pytest.raises(AssertionError): pd.testing.assert_frame_equal(results_scaler, results_no_scaler, check_like=True)
def test_load_adult_education_drop(): """Test load adult education.""" adult_data = em.adult("Education", binarize_nationality=True) assert adult_data.name == "Adult Education, binary nationality" assert "education_HS-grad" in adult_data.sens_attrs assert "education_other" in adult_data.sens_attrs assert "education_Masters" not in adult_data.sens_attrs # ordered data = adult_data.load(ordered=True) assert (45222, 47) == data.x.shape assert (45222, 1) == data.s.shape assert data.s.nunique()[0] == 3 assert (45222, 1) == data.y.shape assert "education" in data.s.columns # not ordered data = adult_data.load() assert (45222, 47) == data.x.shape assert (45222, 1) == data.s.shape assert data.s.nunique()[0] == 3 assert (45222, 1) == data.y.shape assert "education" in data.s.columns
def test_tpr_ratio_non_binary_race(): """Test tpr ratio non binary race.""" data: DataTuple = load_data(em.adult("Race")) train_test: Tuple[DataTuple, DataTuple] = train_test_split(data) train, test = train_test model: InAlgorithm = SVM() predictions: Prediction = model.run_test(train, test) tprs = em.metric_per_sensitive_attribute(predictions, test, TPR()) assert TPR().name == "TPR" test_dict = { "race_0": approx(0.37, abs=0.01), "race_1": approx(0.12, abs=0.01), "race_2": approx(0.14, abs=0.01), "race_3": approx(0.12, abs=0.01), "race_4": approx(0.16, abs=0.01), } for key, val in tprs.items(): assert val == test_dict[key] tpr_diff = em.ratio_per_sensitive_attribute(tprs) test_dict = { "race_0/race_1": approx(0.32, abs=0.1), "race_0/race_2": approx(0.37, abs=0.1), "race_0/race_3": approx(0.33, abs=0.1), "race_0/race_4": approx(0.44, abs=0.1), "race_1/race_2": approx(0.88, abs=0.1), "race_1/race_3": approx(0.97, abs=0.1), "race_1/race_4": approx(0.72, abs=0.1), "race_2/race_3": approx(0.91, abs=0.1), "race_2/race_4": approx(0.74, abs=0.1), "race_3/race_4": approx(0.74, abs=0.1), } for key, val in tpr_diff.items(): assert val == test_dict[key]
), DT( dataset=em.admissions(split="Gender", invert_s=True), samples=43_303, x_features=9, discrete_features=0, s_features=1, num_sens=2, y_features=1, num_labels=2, name="Admissions Gender", sum_s=43_303 - 22_335, sum_y=20_263, ), DT( dataset=em.adult(), samples=45_222, x_features=101, discrete_features=96, s_features=1, num_sens=2, y_features=1, num_labels=2, name="Adult Sex", sum_s=30_527, sum_y=11_208, ), DT( dataset=em.adult("Sex", binarize_nationality=True), samples=45_222, x_features=62,
def test_run_alg_suite_no_pipeline(): """Run alg suite while avoiding the 'fair pipeline'.""" datasets: List[em.Dataset] = [em.toy(), em.adult()] preprocess_models: List[em.PreAlgorithm] = [em.Upsampler()] inprocess_models: List[em.InAlgorithm] = [ em.Kamiran(classifier="LR"), em.LR() ] postprocess_models: List[em.PostAlgorithm] = [] metrics: List[em.Metric] = [em.Accuracy(), em.CV()] per_sens_metrics: List[em.Metric] = [em.Accuracy(), em.TPR()] parallel_results = em.evaluate_models_async( datasets=datasets, preprocess_models=preprocess_models, inprocess_models=inprocess_models, postprocess_models=postprocess_models, metrics=metrics, per_sens_metrics=per_sens_metrics, repeats=1, test_mode=True, topic="pytest", fair_pipeline=False, ) results = em.evaluate_models( datasets, preprocess_models, inprocess_models, postprocess_models, metrics, per_sens_metrics, repeats=1, test_mode=True, topic="pytest", fair_pipeline=False, delete_prev=True, ) pd.testing.assert_frame_equal(parallel_results, results, check_like=True) num_datasets = 2 num_preprocess = 1 num_fair_inprocess = 1 num_unfair_inprocess = 1 expected_num = num_datasets * (num_fair_inprocess + (num_preprocess + 1) * num_unfair_inprocess) assert len(results) == expected_num kc_name = "Kamiran & Calders LR" assert len(em.filter_results(results, [kc_name])) == 2 # result for Toy and Adult assert (len(em.filter_results(results, ["Toy"], index="dataset")) == 3 ) # Kamiran, LR and Upsampler different_name = em.filter_and_map_results(results, {kc_name: "Kamiran & Calders"}) assert len(em.filter_results(different_name, [kc_name])) == 0 assert len(em.filter_results(different_name, ["Kamiran & Calders"])) == 2 pd.testing.assert_frame_equal( em.filter_results(results, [kc_name]), results.query(f"model == '{kc_name}'"), )
expected_values={ "sensitive-attr_0": 0.632, "sensitive-attr_1": 0.262 }, ), PerSensMetricTest( dataset=toy(), classifier=SVM(), metric=Accuracy(), expected_values={ "sensitive-attr_0": 0.921, "sensitive-attr_1": 0.928 }, ), PerSensMetricTest( dataset=adult("Nationality"), classifier=SVM(kernel="linear"), metric=Accuracy(), expected_values={ "native-country_1": 0.649, "native-country_2": 0.850, "native-country_3": 0.933, "native-country_4": 0.913, "native-country_5": 0.867, "native-country_6": 0.867, "native-country_7": 0.950, "native-country_8": 0.950, "native-country_9": 0.750, "native-country_10": 0.755, "native-country_11": 0.636, "native-country_12": 0.952,