def test_scaling_separate_test(dataset_name, scaler): """Test that scaling works.""" scaler = scaler() dataset = get_dataset_obj_by_name(dataset_name)() datatuple = dataset.load() # Speed up the tests by making some data smaller if dataset_name == "health": datatuple, _ = train_test_split(datatuple, train_percentage=0.05) train, test = train_test_split(datatuple) train_scaled, scaler2 = scale_continuous(dataset, train, scaler) test_scaled, _ = scale_continuous(dataset, test, scaler2, fit=False) if dataset_name == "crime" and str(scaler) == "MinMaxScaler()": # Crime dataset is minmax scaled by the data providers. # So can't confirm that train contains the full range pass else: with pytest.raises(AssertionError): pandas.testing.assert_frame_equal(train.x, train_scaled.x, check_dtype=False) # type: ignore[call-arg] with pytest.raises(AssertionError): pandas.testing.assert_frame_equal(test.x, test_scaled.x, check_dtype=False) # type: ignore[call-arg] train_post, _ = scale_continuous(dataset, train_scaled, scaler2, inverse=True) test_post, _ = scale_continuous(dataset, test_scaled, scaler2, inverse=True) pandas.testing.assert_frame_equal(train.x, train_post.x, check_dtype=False) # type: ignore[call-arg] pandas.testing.assert_frame_equal(test.x, test_post.x, check_dtype=False) # type: ignore[call-arg]
def _get_splits(self) -> TrainValTestSplit[DataTupleDataset]: self._datatuple = self.em_dataset.load(ordered=True) data_len = int(self._datatuple.x.shape[0]) num_train_val, num_test = self._get_split_sizes( data_len, test_prop=self.test_prop) train_val, test_data = em.train_test_split( data=self._datatuple, train_percentage=(1 - (num_test / data_len)), random_seed=self.seed, ) _, num_val = self._get_split_sizes(num_train_val, test_prop=self.val_prop) train_data, val_data = em.train_test_split( data=train_val, train_percentage=(1 - (num_val / num_train_val)), random_seed=self.seed, ) self._train_datatuple, self.scaler = em.scale_continuous( self.em_dataset, datatuple=train_data, scaler=self.scaler # type: ignore ) self._val_datatuple, _ = em.scale_continuous(self.em_dataset, datatuple=val_data, scaler=self.scaler, fit=False) self._test_datatuple, _ = em.scale_continuous(self.em_dataset, datatuple=test_data, scaler=self.scaler, fit=False) train_data = DataTupleDataset( dataset=self._train_datatuple, disc_features=self.em_dataset.discrete_features, cont_features=self.em_dataset.continuous_features, ) val_data = DataTupleDataset( dataset=self._val_datatuple, disc_features=self.em_dataset.discrete_features, cont_features=self.em_dataset.continuous_features, ) test_data = DataTupleDataset( dataset=self._test_datatuple, disc_features=self.em_dataset.discrete_features, cont_features=self.em_dataset.continuous_features, ) return TrainValTestSplit(train=train_data, val=val_data, test=test_data)
def test_random_seed(): """Test random seed.""" data: DataTuple = em.load_data(em.toy()) train_test_0: Tuple[DataTuple, DataTuple] = em.train_test_split(data) train_0, test_0 = train_test_0 assert train_0 is not None assert test_0 is not None assert train_0.x.shape[0] > test_0.x.shape[0] assert train_0.x["a1"].values[0] == 0.2365572108691669 assert train_0.x["a2"].values[0] == approx(0.008603090240657633, abs=1e-6) assert train_0.x.shape[0] == train_0.s.shape[0] assert train_0.s.shape[0] == train_0.y.shape[0] train_test_1: Tuple[DataTuple, DataTuple] = em.train_test_split(data, random_seed=1) train_1, test_1 = train_test_1 assert train_1 is not None assert test_1 is not None assert train_1.x.shape[0] > test_1.x.shape[0] assert train_1.x["a1"].values[0] == 1.3736566330173798 assert train_1.x["a2"].values[0] == approx(0.21742296144957174, abs=1e-6) assert train_1.x.shape[0] == train_1.s.shape[0] assert train_1.s.shape[0] == train_1.y.shape[0] train_test_2: Tuple[DataTuple, DataTuple] = em.train_test_split(data, random_seed=2) train_2, test_2 = train_test_2 assert train_2 is not None assert test_2 is not None assert train_2.x.shape[0] > test_2.x.shape[0] assert train_2.x["a1"].values[0] == 1.2255705960148289 assert train_2.x["a2"].values[0] == -1.208089015454192 assert train_2.x.shape[0] == train_2.s.shape[0] assert train_2.s.shape[0] == train_2.y.shape[0] train_test_3: Tuple[DataTuple, DataTuple] = em.train_test_split(data, random_seed=3) train_3, test_3 = train_test_3 assert train_3 is not None assert test_3 is not None assert train_3.x.shape[0] > test_3.x.shape[0] assert train_3.x["a1"].values[0] == approx(0.21165963748018515, abs=1e-6) assert train_3.x["a2"].values[0] == -2.425137404779957 assert train_3.x.shape[0] == train_3.s.shape[0] assert train_3.s.shape[0] == train_3.y.shape[0]
def test_label_plot(): """Test label plot.""" data: DataTuple = load_data(adult()) train_test: Tuple[DataTuple, DataTuple] = train_test_split(data) train, _ = train_test save_label_plot(train, "./plots/labels.png")
def test_calders(): """Test calders.""" data = DataTuple( x=pd.DataFrame(np.linspace(0, 1, 100), columns=["x"]), s=pd.DataFrame([1] * 75 + [0] * 25, columns=["s"]), y=pd.DataFrame([1] * 50 + [0] * 25 + [1] * 10 + [0] * 15, columns=["y"]), name="TestData", ) data, _ = em.train_test_split(data, train_percentage=1.0) assert len(em.query_dt(data, "s == 0 & y == 0")) == 15 assert len(em.query_dt(data, "s == 0 & y == 1")) == 10 assert len(em.query_dt(data, "s == 1 & y == 0")) == 25 assert len(em.query_dt(data, "s == 1 & y == 1")) == 50 assert em.query_dt(data, "s == 1 & y == 0").x.min().min() == approx(0.50, abs=0.01) calders: PreAlgorithm = Calders(preferable_class=1, disadvantaged_group=0) new_train, new_test = calders.run(data, data.remove_y()) pd.testing.assert_frame_equal(new_test.x, data.x) pd.testing.assert_frame_equal(new_test.s, data.s) assert len(em.query_dt(new_train, "s == 0 & y == 0")) == 10 assert len(em.query_dt(new_train, "s == 0 & y == 1")) == 15 assert len(em.query_dt(new_train, "s == 1 & y == 0")) == 30 assert len(em.query_dt(new_train, "s == 1 & y == 1")) == 45 assert len(data) == len(new_train) assert em.query_dt(new_train, "s == 1 & y == 1").x.min().min() == 0 assert em.query_dt(new_train, "s == 1 & y == 0").x.min().min() == approx(0.45, abs=0.01)
def test_metric_per_sens_attr(dataset: Dataset, classifier: InAlgorithm, metric: Metric, expected_values: Dict[str, float]): """Test accuracy per sens attr.""" data: DataTuple = load_data(dataset) train_test: Tuple[DataTuple, DataTuple] = train_test_split(data) train, test = train_test model: InAlgorithm = classifier predictions: Prediction = model.run(train, test) acc_per_sens = metric_per_sensitive_attribute(predictions, test, metric) try: for key, value in acc_per_sens.items(): assert value == approx(expected_values[key], abs=0.001) except AssertionError: print({key: round(value, 3) for key, value in acc_per_sens.items()}) raise AssertionError acc_per_sens = metric_per_sensitive_attribute(predictions, test, metric, use_sens_name=False) try: for key, value in expected_values.items(): # Check that the sensitive attribute name is now just 'S'. assert acc_per_sens[f"S_{''.join(key.split('_')[1:])}"] == approx( value, abs=0.001) except AssertionError: print({key: round(value, 3) for key, value in acc_per_sens.items()}) raise AssertionError
def toy_train_val() -> TrainTestPair: """By making this a fixture, pytest can cache the result.""" data: DataTuple = em.toy().load() train: DataTuple test: DataTuple train, test = em.train_test_split(data) return TrainTestPair(train, test)
def setup(self, stage: Optional[str] = None) -> None: self.datatuple = self.em_dataset.load(ordered=True) data_len = int(self.datatuple.x.shape[0]) num_train_val, num_test = self._get_splits(data_len, self.test_split) train_val, test = em.train_test_split( data=self.datatuple, train_percentage=(1 - (num_test / data_len)), random_seed=self.seed, ) num_train, num_val = self._get_splits(num_train_val, self.val_split) train, val = em.train_test_split( data=train_val, train_percentage=(1 - (num_val / num_train_val)), random_seed=self.seed, ) train, self.scaler = em.scale_continuous(self.em_dataset, datatuple=train, scaler=self.scaler) val, _ = em.scale_continuous(self.em_dataset, datatuple=val, scaler=self.scaler, fit=False) test, _ = em.scale_continuous(self.em_dataset, datatuple=test, scaler=self.scaler, fit=False) self._train_data = DataTupleDataset( train, disc_features=self.em_dataset.discrete_features, cont_features=self.em_dataset.continuous_features, ) self._val_data = DataTupleDataset( val, disc_features=self.em_dataset.discrete_features, cont_features=self.em_dataset.continuous_features, ) self._test_data = DataTupleDataset( test, disc_features=self.em_dataset.discrete_features, cont_features=self.em_dataset.continuous_features, )
def test_nb_acc(): """Test nb acc.""" data: DataTuple = load_data(nonbinary_toy()) train_test: Tuple[DataTuple, DataTuple] = train_test_split(data) train, test = train_test model: InAlgorithm = SVM() predictions: Prediction = model.run_test(train, test) acc_score = Accuracy().score(predictions, test) assert acc_score == 0.1
def test_nb_tnr(): """Test nb tnr.""" data: DataTuple = load_data(nonbinary_toy()) train_test: Tuple[DataTuple, DataTuple] = train_test_split(data) train, test = train_test model: InAlgorithm = SVM() predictions: Prediction = model.run_test(train, test) tnr_score = TNR(pos_class=1).score(predictions, test) assert tnr_score == 1.0 tnr_score = TNR(pos_class=2).score(predictions, test) assert tnr_score == 1.0 tnr_score = TNR(pos_class=3).score(predictions, test) assert tnr_score == 0.0 tnr_score = TNR(pos_class=4).score(predictions, test) assert tnr_score == 1.0 tnr_score = TNR(pos_class=5).score(predictions, test) assert tnr_score == 1.0 with pytest.raises(LabelOutOfBounds): _ = TNR(pos_class=0).score(predictions, test) accs = em.metric_per_sensitive_attribute(predictions, test, TNR()) assert accs == { "sens_0": approx(1.0, abs=0.1), "sens_1": approx(1.0, abs=0.1) } model = LR() predictions = model.run_test(train, test) print([(k, z) for k, z in zip(predictions.hard.values, test.y.values) if k != z]) tnr_score = TNR(pos_class=1).score(predictions, test) assert tnr_score == 1.0 tnr_score = TNR(pos_class=2).score(predictions, test) assert tnr_score == 1.0 tnr_score = TNR(pos_class=3).score(predictions, test) assert tnr_score == approx(0.7, abs=0.1) tnr_score = TNR(pos_class=4).score(predictions, test) assert tnr_score == approx(0.85, abs=0.1) tnr_score = TNR(pos_class=5).score(predictions, test) assert tnr_score == 1.0 with pytest.raises(LabelOutOfBounds): _ = TNR(pos_class=0).score(predictions, test) tnrs = em.metric_per_sensitive_attribute(predictions, test, TNR()) assert tnrs == { "sens_0": approx(1.0, abs=0.1), "sens_1": approx(1.0, abs=0.1) }
def test_issue_431(): """This issue highlighted that error would be raised due to not all values existing in subsets of the data.""" x = pd.DataFrame(np.random.randn(100), columns=["x"]) s = pd.DataFrame(np.random.randn(100), columns=["s"]) y = pd.DataFrame(np.random.randint(0, 5, 100), columns=["y"]) data = DataTuple(x=x, s=s, y=y) train_test: Tuple[DataTuple, DataTuple] = train_test_split(data) train, test = train_test model: InAlgorithm = LR() predictions: Prediction = model.run(train, test) acc_per_sens = metric_per_sensitive_attribute( predictions, test, TPR(pos_class=1, labels=list(range(y.nunique()[0])))) print(acc_per_sens)
def test_train_test_split(): """Test train test split.""" data: DataTuple = em.load_data(em.toy()) train_test: Tuple[DataTuple, DataTuple] = em.train_test_split(data) train, test = train_test assert train is not None assert test is not None assert train.x.shape[0] > test.x.shape[0] assert train.x["a1"].values[0] == 0.2365572108691669 assert train.x["a2"].values[0] == approx(0.008603090240657633, abs=1e-6) assert train.x.shape[0] == train.s.shape[0] assert train.s.shape[0] == train.y.shape[0] num_samples = len(data) len_default = math.floor((num_samples / 100) * 80) assert train.s.shape[0] == len_default assert test.s.shape[0] == num_samples - len_default len_0_9 = math.floor((num_samples / 100) * 90) train, test = em.train_test_split(data, train_percentage=0.9) assert train.s.shape[0] == len_0_9 assert test.s.shape[0] == num_samples - len_0_9 len_0_7 = math.floor((num_samples / 100) * 70) train, test = em.train_test_split(data, train_percentage=0.7) assert train.s.shape[0] == len_0_7 assert test.s.shape[0] == num_samples - len_0_7 len_0_5 = math.floor((num_samples / 100) * 50) train, test = em.train_test_split(data, train_percentage=0.5) assert train.s.shape[0] == len_0_5 assert test.s.shape[0] == num_samples - len_0_5 len_0_3 = math.floor((num_samples / 100) * 30) train, test = em.train_test_split(data, train_percentage=0.3) assert train.s.shape[0] == len_0_3 assert test.s.shape[0] == num_samples - len_0_3 len_0_1 = math.floor((num_samples / 100) * 10) train, test = em.train_test_split(data, train_percentage=0.1) assert train.s.shape[0] == len_0_1 assert test.s.shape[0] == num_samples - len_0_1 len_0_0 = math.floor((num_samples / 100) * 0) train, test = em.train_test_split(data, train_percentage=0.0) assert train.s.shape[0] == len_0_0 assert train.name == "Toy - Train" assert test.s.shape[0] == num_samples - len_0_0 assert test.name == "Toy - Test"
def test_corels(toy_train_test: TrainTestPair) -> None: """Test corels.""" model: InAlgorithm = Corels() assert model is not None assert model.name == "CORELS" train_toy, test_toy = toy_train_test with pytest.raises(RuntimeError): model.run(train_toy, test_toy) data: DataTuple = load_data(compas()) train, test = train_test_split(data) predictions: Prediction = model.run(train, test) expected_num_pos = 428 assert predictions.hard.values[predictions.hard.values == 1].shape[0] == expected_num_pos num_neg = predictions.hard.values[predictions.hard.values == 0].shape[0] assert num_neg == len(predictions) - expected_num_pos
def test_scaling(dataset_name, scaler): """Test that scaling works.""" scaler = scaler() dataset = get_dataset_obj_by_name(dataset_name)() datatuple = dataset.load() # Speed up the tests by making some data smaller if dataset_name == "health": datatuple, _ = train_test_split(datatuple, train_percentage=0.05) datatuple_scaled, scaler2 = scale_continuous(dataset, datatuple, scaler) if dataset_name == "crime" and str(scaler) == "MinMaxScaler()": # Crime dataset is minmax scaled by the data providers. pandas.testing.assert_frame_equal(datatuple.x, datatuple_scaled.x, check_dtype=False) # type: ignore[call-arg] else: with pytest.raises(AssertionError): pandas.testing.assert_frame_equal(datatuple.x, datatuple_scaled.x, check_dtype=False) # type: ignore[call-arg] datatuple_post, _ = scale_continuous(dataset, datatuple_scaled, scaler2, inverse=True) pandas.testing.assert_frame_equal(datatuple.x, datatuple_post.x, check_dtype=False) # type: ignore[call-arg]
def test_tpr_ratio_non_binary_race(): """Test tpr ratio non binary race.""" data: DataTuple = load_data(em.adult("Race")) train_test: Tuple[DataTuple, DataTuple] = train_test_split(data) train, test = train_test model: InAlgorithm = SVM() predictions: Prediction = model.run_test(train, test) tprs = em.metric_per_sensitive_attribute(predictions, test, TPR()) assert TPR().name == "TPR" test_dict = { "race_0": approx(0.37, abs=0.01), "race_1": approx(0.12, abs=0.01), "race_2": approx(0.14, abs=0.01), "race_3": approx(0.12, abs=0.01), "race_4": approx(0.16, abs=0.01), } for key, val in tprs.items(): assert val == test_dict[key] tpr_diff = em.ratio_per_sensitive_attribute(tprs) test_dict = { "race_0/race_1": approx(0.32, abs=0.1), "race_0/race_2": approx(0.37, abs=0.1), "race_0/race_3": approx(0.33, abs=0.1), "race_0/race_4": approx(0.44, abs=0.1), "race_1/race_2": approx(0.88, abs=0.1), "race_1/race_3": approx(0.97, abs=0.1), "race_1/race_4": approx(0.72, abs=0.1), "race_2/race_3": approx(0.91, abs=0.1), "race_2/race_4": approx(0.74, abs=0.1), "race_3/race_4": approx(0.74, abs=0.1), } for key, val in tpr_diff.items(): assert val == test_dict[key]