def test_smoke_extra_arg(self): y_t = [0, 0, 1, 1, 0, 1, 1, 1] y_p = [0, 1, 1, 1, 1, 0, 0, 1] gid = [0, 0, 0, 0, 1, 1, 1, 1] metric_group_summary = metrics.make_metric_group_summary(mock_func_extra_arg) metric_group_min = metrics.make_derived_metric( metrics.group_min_from_summary, metric_group_summary) metric_group_max = metrics.make_derived_metric( metrics.group_max_from_summary, metric_group_summary) metric_difference = metrics.make_derived_metric( metrics.difference_from_summary, metric_group_summary) metric_ratio = metrics.make_derived_metric( metrics.ratio_from_summary, metric_group_summary) # Run with the extra argument defaulted assert metric_group_min(y_t, y_p, sensitive_features=gid) == 2 assert metric_group_max(y_t, y_p, sensitive_features=gid) == 3 assert metric_difference(y_t, y_p, sensitive_features=gid) == 1 assert metric_ratio(y_t, y_p, sensitive_features=gid) == pytest.approx(0.66666666667) # Run with the extra argument set to something assert metric_group_min(y_t, y_p, sensitive_features=gid, my_arg=2) == 4 assert metric_group_max(y_t, y_p, sensitive_features=gid, my_arg=2) == 6 assert metric_difference(y_t, y_p, sensitive_features=gid, my_arg=2) == 2 assert metric_ratio(y_t, y_p, sensitive_features=gid, my_arg=2) == pytest.approx(0.66666666667)
def test_keys_and_weights(self, transform_y_t, transform_y_p, transform_gid, transform_s_w): a = "ABC" b = "DEF" c = "GHI" z = "something_longer" y_t = transform_y_t([0, 1, 1, 1, 0, 1, 1, 1]) y_p = transform_y_p([0, 1, 1, 1, 1, 0, 0, 1]) gid = transform_gid([a, z, a, b, b, c, c, c]) s_w = transform_s_w([1, 1, 1, 5, 5, 7, 7, 7]) metric_group_summary = metrics.make_metric_group_summary(mock_func_weight) metric_group_min = metrics.make_derived_metric( metrics.group_min_from_summary, metric_group_summary) metric_group_max = metrics.make_derived_metric( metrics.group_max_from_summary, metric_group_summary) metric_difference = metrics.make_derived_metric( metrics.difference_from_summary, metric_group_summary) metric_ratio = metrics.make_derived_metric( metrics.ratio_from_summary, metric_group_summary) assert metric_group_min(y_t, y_p, sensitive_features=gid, sample_weight=s_w) == 1 assert metric_group_max(y_t, y_p, sensitive_features=gid, sample_weight=s_w) == 21 assert metric_difference(y_t, y_p, sensitive_features=gid, sample_weight=s_w) == 20 assert metric_ratio(y_t, y_p, sensitive_features=gid, sample_weight=s_w) == pytest.approx(1.0/21.0)
def test_derived_difference_both_arg_types(): my_beta = 0.5 my_fn = metrics.make_derived_metric( metric=skm.fbeta_score, transform="difference", sample_param_names=["sample_weight"], ) my_fbeta = functools.partial(skm.fbeta_score, beta=my_beta) my_fbeta.__name__ = "my_fbeta" grouped = metrics.MetricFrame( metrics=my_fbeta, y_true=y_t, y_pred=y_p, sensitive_features=gid, sample_params={"sample_weight": wgt}, ) actual = my_fn( y_t, y_p, sensitive_features=gid, beta=my_beta, sample_weight=wgt, method="between_groups", ) assert actual == grouped.difference(method="between_groups")
def test_bad_transform_rejected(): expected = "Transform must be one of ['difference', 'group_min', 'group_max', 'ratio']" with pytest.raises(ValueError) as context: _ = metrics.make_derived_metric(metric=skm.accuracy_score, transform="something rotten") assert context.value.args[0] == expected
def test_smoke(self): y_t = [0, 0, 1, 1, 0, 1, 1, 1] y_p = [0, 1, 1, 1, 1, 0, 0, 1] gid = [0, 0, 0, 0, 1, 1, 1, 1] metric_group_summary = metrics.make_metric_group_summary(mock_func) metric_group_min = metrics.make_derived_metric( metrics.group_min_from_summary, metric_group_summary) metric_group_max = metrics.make_derived_metric( metrics.group_max_from_summary, metric_group_summary) metric_difference = metrics.make_derived_metric( metrics.difference_from_summary, metric_group_summary) metric_ratio = metrics.make_derived_metric( metrics.ratio_from_summary, metric_group_summary) assert metric_group_min(y_t, y_p, sensitive_features=gid) == 2 assert metric_group_max(y_t, y_p, sensitive_features=gid) == 3 assert metric_difference(y_t, y_p, sensitive_features=gid) == 1 assert metric_ratio(y_t, y_p, sensitive_features=gid) == pytest.approx(0.66666666667)
def test_group_max(): my_fn = metrics.make_derived_metric(metric=skm.precision_score, transform='group_max', sample_param_names=['sample_weight']) grouped = metrics.MetricFrame(skm.precision_score, y_t, y_p, sensitive_features=gid) actual = my_fn(y_t, y_p, sensitive_features=gid) assert actual == grouped.group_max()
def test_derived_ratio_to_overall(): my_fn = metrics.make_derived_metric(metric=skm.precision_score, transform='ratio', sample_param_names=['sample_weight']) grouped = metrics.MetricFrame(skm.precision_score, y_t, y_p, sensitive_features=gid) actual = my_fn(y_t, y_p, sensitive_features=gid, method='to_overall') assert actual == grouped.ratio(method='to_overall')
def test_derived_ratio_default_is_between_groups(): my_fn = metrics.make_derived_metric(metric=skm.precision_score, transform='ratio', sample_param_names=['sample_weight']) grouped = metrics.MetricFrame(skm.precision_score, y_t, y_p, sensitive_features=gid) actual = my_fn(y_t, y_p, sensitive_features=gid) assert actual == grouped.ratio()
def test_derived_difference_to_overall(): my_fn = metrics.make_derived_metric(metric=skm.accuracy_score, transform='difference', sample_param_names=['sample_weight']) grouped = metrics.MetricFrame(skm.accuracy_score, y_t, y_p, sensitive_features=gid) actual = my_fn(y_t, y_p, sensitive_features=gid, method='to_overall') assert actual == grouped.difference(method='to_overall')
def test_derived_difference_default_is_between_groups(): my_fn = metrics.make_derived_metric(metric=skm.accuracy_score, transform='difference', sample_param_names=['sample_weight']) grouped = metrics.MetricFrame(skm.accuracy_score, y_t, y_p, sensitive_features=gid) actual = my_fn(y_t, y_p, sensitive_features=gid) assert actual == grouped.difference()
def test_function_method_arg_rejected(): def bad_fn(y_p, y_t, method): print(method) return skm.accuracy_score(y_p, y_t) expected = "Callables which accept a 'method' argument" \ " may not be passed to make_derived_metric()." \ " Please use functools.partial()" with pytest.raises(ValueError) as context: _ = metrics.make_derived_metric(metric=bad_fn, transform='group_max') assert context.value.args[0] == expected
def test_group_min(): my_fn = metrics.make_derived_metric( metric=skm.precision_score, transform="group_min", sample_param_names=["sample_weight"], ) grouped = metrics.MetricFrame(metrics=skm.precision_score, y_true=y_t, y_pred=y_p, sensitive_features=gid) actual = my_fn(y_t, y_p, sensitive_features=gid) assert actual == grouped.group_min()
def test_derived_ratio_to_overall(): my_fn = metrics.make_derived_metric( metric=skm.precision_score, transform="ratio", sample_param_names=["sample_weight"], ) grouped = metrics.MetricFrame(metrics=skm.precision_score, y_true=y_t, y_pred=y_p, sensitive_features=gid) actual = my_fn(y_t, y_p, sensitive_features=gid, method="to_overall") assert actual == grouped.ratio(method="to_overall")
def test_derived_difference_sample_arg(): my_fbeta = functools.partial(skm.fbeta_score, beta=0.6) my_fbeta.__name__ = "my_fbeta" my_fn = metrics.make_derived_metric(metric=my_fbeta, transform='difference', sample_param_names=['sample_weight']) grouped = metrics.MetricFrame(my_fbeta, y_t, y_p, sensitive_features=gid, sample_params={'sample_weight': wgt}) actual = my_fn(y_t, y_p, sensitive_features=gid, sample_weight=wgt, method='between_groups') assert actual == grouped.difference(method='between_groups')
def test_derived_difference_to_overall(): my_fn = metrics.make_derived_metric( metric=skm.accuracy_score, transform="difference", sample_param_names=["sample_weight"], ) grouped = metrics.MetricFrame(metrics=skm.accuracy_score, y_true=y_t, y_pred=y_p, sensitive_features=gid) actual = my_fn(y_t, y_p, sensitive_features=gid, method="to_overall") assert actual == grouped.difference(method="to_overall")
def test_derived_difference_both_arg_types_default_sample_param_names(): my_beta = 0.5 my_fn = metrics.make_derived_metric(metric=skm.fbeta_score, transform='difference') my_fbeta = functools.partial(skm.fbeta_score, beta=my_beta) my_fbeta.__name__ = "my_fbeta" grouped = metrics.MetricFrame(my_fbeta, y_t, y_p, sensitive_features=gid, sample_params={'sample_weight': wgt}) actual = my_fn(y_t, y_p, sensitive_features=gid, beta=my_beta, sample_weight=wgt) assert actual == grouped.difference()
def test_derived_difference_broadcast_arg(): my_beta = 0.6 my_fn = metrics.make_derived_metric(metric=skm.fbeta_score, transform='difference', sample_param_names=['sample_weight']) my_fbeta = functools.partial(skm.fbeta_score, beta=my_beta) my_fbeta.__name__ = "my_fbeta" grouped = metrics.MetricFrame(metrics=my_fbeta, y_true=y_t, y_pred=y_p, sensitive_features=gid) actual = my_fn(y_t, y_p, sensitive_features=gid, beta=my_beta, method='between_groups') assert actual == grouped.difference(method='between_groups')
# metrics in `scikit-learn`. # # The result is a new function with the same signature as the # base metric, which accepts two extra arguments: # # - :code:`sensitive_features=` to specify the sensitive features # which define the subgroups # - :code:`method=` to adjust how the aggregation transformation # operates. This corresponds to the same argument in # :meth:`fairlearn.metrics.MetricFrame.difference` and # :meth:`fairlearn.metrics.MetricFrame.ratio` # # For the current case, we do not need the :code:`method=` # argument, since we are taking the minimum value. my_acc = make_derived_metric(metric=skm.accuracy_score, transform="group_min") my_acc_min = my_acc(y_test, y_pred, sensitive_features=A_test["sex"]) print("Minimum accuracy_score: ", my_acc_min) # %% # To show that the returned function also works with sample weights: random_weights = np.random.rand(len(y_test)) acc_frame_sw = MetricFrame( metrics=skm.accuracy_score, y_true=y_test, y_pred=y_pred, sensitive_features=A_test["sex"], sample_params={"sample_weight": random_weights}, )
if task_type in REGRESSION_TASKS: # TODO put this into the regression metric itself cprediction = sanitize_array(prediction) score = metric(solution, cprediction) else: score = metric(solution, prediction) return score # Must be at bottom so all metrics are defined default_metric_for_task: Dict[int, Scorer] = { BINARY_CLASSIFICATION: CLASSIFICATION_METRICS['accuracy'], MULTICLASS_CLASSIFICATION: CLASSIFICATION_METRICS['accuracy'], MULTILABEL_CLASSIFICATION: CLASSIFICATION_METRICS['f1_macro'], REGRESSION: REGRESSION_METRICS['r2'], MULTIOUTPUT_REGRESSION: REGRESSION_METRICS['r2'], } from fairlearn.metrics import make_derived_metric import sklearn.metrics as skm # __call__() needs 'sensitive_features' array as input. acc_diff = make_scorer( 'acc_diff', make_derived_metric(metric = skm.accuracy_score, transform = 'group_min'), worst_possible_result=1, greater_is_better=False, needs_proba=False, needs_prot=True )
def test_noncallable_rejected(): expected = "Supplied metric object must be callable" with pytest.raises(ValueError) as context: _ = metrics.make_derived_metric(metric="random", transform='group_max') assert context.value.args[0] == expected