def test_tpr_diff(toy_train_val: TrainValPair): """Test tpr diff.""" train, test = toy_train_val model: InAlgorithm = SVM() predictions: Prediction = model.run(train, test) tprs = em.metric_per_sensitive_attribute(predictions, test, TPR()) assert TPR().name == "TPR" assert tprs == { "sensitive-attr_0": approx(0.923, abs=0.001), "sensitive-attr_1": approx(1.0, abs=0.001), } tpr_diff = em.diff_per_sensitive_attribute(tprs) print(tpr_diff) assert tpr_diff["sensitive-attr_0-sensitive-attr_1"] == approx(0.077, abs=0.001)
def test_issue_431(): """This issue highlighted that error would be raised due to not all values existing in subsets of the data.""" x = pd.DataFrame(np.random.randn(100), columns=["x"]) s = pd.DataFrame(np.random.randn(100), columns=["s"]) y = pd.DataFrame(np.random.randint(0, 5, 100), columns=["y"]) data = DataTuple(x=x, s=s, y=y) train_test: Tuple[DataTuple, DataTuple] = train_test_split(data) train, test = train_test model: InAlgorithm = LR() predictions: Prediction = model.run(train, test) acc_per_sens = metric_per_sensitive_attribute( predictions, test, TPR(pos_class=1, labels=list(range(y.nunique()[0])))) print(acc_per_sens)
def test_run_metrics(toy_train_val: TrainValPair): """Test run metrics.""" train, test = toy_train_val model: InAlgorithm = SVM() predictions: Prediction = model.run(train, test) results = em.run_metrics(predictions, test, [CV()], [TPR()]) assert len(results) == 5 assert results["TPR_sensitive-attr_0"] == approx(0.923, abs=0.001) assert results["TPR_sensitive-attr_1"] == approx(1.0, abs=0.001) assert results["TPR_sensitive-attr_0-sensitive-attr_1"] == approx( 0.077, abs=0.001) assert results["TPR_sensitive-attr_0/sensitive-attr_1"] == approx( 0.923, abs=0.001) assert results["CV"] == approx(0.630, abs=0.001)
def test_tpr_ratio_non_binary_race(): """Test tpr ratio non binary race.""" data: DataTuple = load_data(em.adult("Race")) train_test: Tuple[DataTuple, DataTuple] = train_test_split(data) train, test = train_test model: InAlgorithm = SVM() predictions: Prediction = model.run_test(train, test) tprs = em.metric_per_sensitive_attribute(predictions, test, TPR()) assert TPR().name == "TPR" test_dict = { "race_0": approx(0.37, abs=0.01), "race_1": approx(0.12, abs=0.01), "race_2": approx(0.14, abs=0.01), "race_3": approx(0.12, abs=0.01), "race_4": approx(0.16, abs=0.01), } for key, val in tprs.items(): assert val == test_dict[key] tpr_diff = em.ratio_per_sensitive_attribute(tprs) test_dict = { "race_0/race_1": approx(0.32, abs=0.1), "race_0/race_2": approx(0.37, abs=0.1), "race_0/race_3": approx(0.33, abs=0.1), "race_0/race_4": approx(0.44, abs=0.1), "race_1/race_2": approx(0.88, abs=0.1), "race_1/race_3": approx(0.97, abs=0.1), "race_1/race_4": approx(0.72, abs=0.1), "race_2/race_3": approx(0.91, abs=0.1), "race_2/race_4": approx(0.74, abs=0.1), "race_3/race_4": approx(0.74, abs=0.1), } for key, val in tpr_diff.items(): assert val == test_dict[key]
def test_plot_evals(): """Test plot evals.""" results: Results = evaluate_models( datasets=[adult(), toy()], preprocess_models=[Upsampler(strategy="preferential")], inprocess_models=[LR(), SVM(kernel="linear"), Kamiran()], metrics=[Accuracy(), CV()], per_sens_metrics=[TPR(), ProbPos()], repeats=3, test_mode=True, delete_prev=True, ) assert results["seed"][0] == results["seed"][1] == results["seed"][2] == 0 assert results["seed"][3] == results["seed"][4] == results["seed"][ 5] == 2410 assert results["seed"][6] == results["seed"][7] == results["seed"][ 8] == 4820 figs_and_plots: List[Tuple[plt.Figure, plt.Axes]] # type: ignore[name-defined] # plot with metrics figs_and_plots = plot_results(results, Accuracy(), ProbPos()) # num(datasets) * num(preprocess) * num(accuracy combinations) * num(prop_pos combinations) assert len( figs_and_plots) == 2 * 2 * 1 * 2 + 4 # TODO: this +4 should be FIXED, # it matches the column name containing a hyphen as a DIFF metric. # plot with column names figs_and_plots = plot_results(results, "Accuracy", "prob_pos_sensitive-attr_0") assert len(figs_and_plots) == 1 * 2 * 1 * 1 with pytest.raises( ValueError, match='No matching columns found for Metric "NMI preds and s".'): plot_results(results, Accuracy(), NMI()) with pytest.raises(ValueError, match='No column named "unknown metric".'): plot_results(results, "unknown metric", Accuracy())
def test_nb_tpr(): """Test nb tpr.""" data: DataTuple = load_data(nonbinary_toy()) train_test: Tuple[DataTuple, DataTuple] = train_test_split(data) train, test = train_test model: InAlgorithm = SVM() predictions: Prediction = model.run_test(train, test) tpr_score = TPR(pos_class=1).score(predictions, test) assert tpr_score == 0.0 tpr_score = TPR(pos_class=2).score(predictions, test) assert tpr_score == 0.0 tpr_score = TPR(pos_class=3).score(predictions, test) assert tpr_score == 1.0 tpr_score = TPR(pos_class=4).score(predictions, test) assert tpr_score == 0.0 tpr_score = TPR(pos_class=5).score(predictions, test) assert tpr_score == 0.0 with pytest.raises(LabelOutOfBounds): _ = TPR(pos_class=0).score(predictions, test) accs = em.metric_per_sensitive_attribute(predictions, test, TPR()) assert accs == { "sens_0": approx(0.0, abs=0.1), "sens_1": approx(0.0, abs=0.1) } model = LR() predictions = model.run_test(train, test) print([(k, z) for k, z in zip(predictions.hard.values, test.y.values) if k != z]) tpr_score = TPR(pos_class=1).score(predictions, test) assert tpr_score == 1.0 tpr_score = TPR(pos_class=2).score(predictions, test) assert tpr_score == 0.0 tpr_score = TPR(pos_class=3).score(predictions, test) assert tpr_score == 0.0 tpr_score = TPR(pos_class=4).score(predictions, test) assert tpr_score == 1.0 tpr_score = TPR(pos_class=5).score(predictions, test) assert tpr_score == 1.0 with pytest.raises(LabelOutOfBounds): _ = TPR(pos_class=0).score(predictions, test) tprs = em.metric_per_sensitive_attribute(predictions, test, TPR()) assert tprs == { "sens_0": approx(1.0, abs=0.1), "sens_1": approx(1.0, abs=0.1) }