Example #1
0
    def test_smoke_extra_arg(self):
        y_t = [0, 0, 1, 1, 0, 1, 1, 1]
        y_p = [0, 1, 1, 1, 1, 0, 0, 1]
        gid = [0, 0, 0, 0, 1, 1, 1, 1]

        metric_group_summary = metrics.make_metric_group_summary(mock_func_extra_arg)
        metric_group_min = metrics.make_derived_metric(
            metrics.group_min_from_summary, metric_group_summary)
        metric_group_max = metrics.make_derived_metric(
            metrics.group_max_from_summary, metric_group_summary)
        metric_difference = metrics.make_derived_metric(
            metrics.difference_from_summary, metric_group_summary)
        metric_ratio = metrics.make_derived_metric(
            metrics.ratio_from_summary, metric_group_summary)

        # Run with the extra argument defaulted
        assert metric_group_min(y_t, y_p, sensitive_features=gid) == 2
        assert metric_group_max(y_t, y_p, sensitive_features=gid) == 3
        assert metric_difference(y_t, y_p, sensitive_features=gid) == 1
        assert metric_ratio(y_t, y_p, sensitive_features=gid) == pytest.approx(0.66666666667)

        # Run with the extra argument set to something
        assert metric_group_min(y_t, y_p, sensitive_features=gid, my_arg=2) == 4
        assert metric_group_max(y_t, y_p, sensitive_features=gid, my_arg=2) == 6
        assert metric_difference(y_t, y_p, sensitive_features=gid, my_arg=2) == 2
        assert metric_ratio(y_t, y_p, sensitive_features=gid,
                            my_arg=2) == pytest.approx(0.66666666667)
Example #2
0
    def test_keys_and_weights(self, transform_y_t, transform_y_p, transform_gid, transform_s_w):
        a = "ABC"
        b = "DEF"
        c = "GHI"
        z = "something_longer"
        y_t = transform_y_t([0, 1, 1, 1, 0, 1, 1, 1])
        y_p = transform_y_p([0, 1, 1, 1, 1, 0, 0, 1])
        gid = transform_gid([a, z, a, b, b, c, c, c])
        s_w = transform_s_w([1, 1, 1, 5, 5, 7, 7, 7])

        metric_group_summary = metrics.make_metric_group_summary(mock_func_weight)
        metric_group_min = metrics.make_derived_metric(
            metrics.group_min_from_summary, metric_group_summary)
        metric_group_max = metrics.make_derived_metric(
            metrics.group_max_from_summary, metric_group_summary)
        metric_difference = metrics.make_derived_metric(
            metrics.difference_from_summary, metric_group_summary)
        metric_ratio = metrics.make_derived_metric(
            metrics.ratio_from_summary, metric_group_summary)

        assert metric_group_min(y_t, y_p, sensitive_features=gid, sample_weight=s_w) == 1
        assert metric_group_max(y_t, y_p, sensitive_features=gid, sample_weight=s_w) == 21
        assert metric_difference(y_t, y_p, sensitive_features=gid, sample_weight=s_w) == 20
        assert metric_ratio(y_t, y_p,
                            sensitive_features=gid, sample_weight=s_w) == pytest.approx(1.0/21.0)
Example #3
0
def test_derived_difference_both_arg_types():
    my_beta = 0.5
    my_fn = metrics.make_derived_metric(
        metric=skm.fbeta_score,
        transform="difference",
        sample_param_names=["sample_weight"],
    )

    my_fbeta = functools.partial(skm.fbeta_score, beta=my_beta)
    my_fbeta.__name__ = "my_fbeta"
    grouped = metrics.MetricFrame(
        metrics=my_fbeta,
        y_true=y_t,
        y_pred=y_p,
        sensitive_features=gid,
        sample_params={"sample_weight": wgt},
    )

    actual = my_fn(
        y_t,
        y_p,
        sensitive_features=gid,
        beta=my_beta,
        sample_weight=wgt,
        method="between_groups",
    )
    assert actual == grouped.difference(method="between_groups")
def test_bad_transform_rejected():
    expected = "Transform must be one of ['difference', 'group_min', 'group_max', 'ratio']"

    with pytest.raises(ValueError) as context:
        _ = metrics.make_derived_metric(metric=skm.accuracy_score,
                                        transform="something rotten")
    assert context.value.args[0] == expected
Example #5
0
    def test_smoke(self):
        y_t = [0, 0, 1, 1, 0, 1, 1, 1]
        y_p = [0, 1, 1, 1, 1, 0, 0, 1]
        gid = [0, 0, 0, 0, 1, 1, 1, 1]

        metric_group_summary = metrics.make_metric_group_summary(mock_func)
        metric_group_min = metrics.make_derived_metric(
            metrics.group_min_from_summary, metric_group_summary)
        metric_group_max = metrics.make_derived_metric(
            metrics.group_max_from_summary, metric_group_summary)
        metric_difference = metrics.make_derived_metric(
            metrics.difference_from_summary, metric_group_summary)
        metric_ratio = metrics.make_derived_metric(
            metrics.ratio_from_summary, metric_group_summary)

        assert metric_group_min(y_t, y_p, sensitive_features=gid) == 2
        assert metric_group_max(y_t, y_p, sensitive_features=gid) == 3
        assert metric_difference(y_t, y_p, sensitive_features=gid) == 1
        assert metric_ratio(y_t, y_p, sensitive_features=gid) == pytest.approx(0.66666666667)
def test_group_max():
    my_fn = metrics.make_derived_metric(metric=skm.precision_score,
                                        transform='group_max',
                                        sample_param_names=['sample_weight'])

    grouped = metrics.MetricFrame(skm.precision_score,
                                  y_t, y_p,
                                  sensitive_features=gid)
    actual = my_fn(y_t, y_p, sensitive_features=gid)
    assert actual == grouped.group_max()
def test_derived_ratio_to_overall():
    my_fn = metrics.make_derived_metric(metric=skm.precision_score,
                                        transform='ratio',
                                        sample_param_names=['sample_weight'])

    grouped = metrics.MetricFrame(skm.precision_score,
                                  y_t, y_p,
                                  sensitive_features=gid)
    actual = my_fn(y_t, y_p, sensitive_features=gid, method='to_overall')
    assert actual == grouped.ratio(method='to_overall')
def test_derived_ratio_default_is_between_groups():
    my_fn = metrics.make_derived_metric(metric=skm.precision_score,
                                        transform='ratio',
                                        sample_param_names=['sample_weight'])

    grouped = metrics.MetricFrame(skm.precision_score,
                                  y_t, y_p,
                                  sensitive_features=gid)
    actual = my_fn(y_t, y_p, sensitive_features=gid)
    assert actual == grouped.ratio()
def test_derived_difference_to_overall():
    my_fn = metrics.make_derived_metric(metric=skm.accuracy_score,
                                        transform='difference',
                                        sample_param_names=['sample_weight'])

    grouped = metrics.MetricFrame(skm.accuracy_score,
                                  y_t, y_p,
                                  sensitive_features=gid)

    actual = my_fn(y_t, y_p, sensitive_features=gid, method='to_overall')
    assert actual == grouped.difference(method='to_overall')
def test_derived_difference_default_is_between_groups():
    my_fn = metrics.make_derived_metric(metric=skm.accuracy_score,
                                        transform='difference',
                                        sample_param_names=['sample_weight'])

    grouped = metrics.MetricFrame(skm.accuracy_score,
                                  y_t, y_p,
                                  sensitive_features=gid)

    actual = my_fn(y_t, y_p, sensitive_features=gid)
    assert actual == grouped.difference()
def test_function_method_arg_rejected():
    def bad_fn(y_p, y_t, method):
        print(method)
        return skm.accuracy_score(y_p, y_t)

    expected = "Callables which accept a 'method' argument" \
        " may not be passed to make_derived_metric()." \
        " Please use functools.partial()"
    with pytest.raises(ValueError) as context:
        _ = metrics.make_derived_metric(metric=bad_fn, transform='group_max')
    assert context.value.args[0] == expected
Example #12
0
def test_group_min():
    my_fn = metrics.make_derived_metric(
        metric=skm.precision_score,
        transform="group_min",
        sample_param_names=["sample_weight"],
    )

    grouped = metrics.MetricFrame(metrics=skm.precision_score,
                                  y_true=y_t,
                                  y_pred=y_p,
                                  sensitive_features=gid)
    actual = my_fn(y_t, y_p, sensitive_features=gid)
    assert actual == grouped.group_min()
Example #13
0
def test_derived_ratio_to_overall():
    my_fn = metrics.make_derived_metric(
        metric=skm.precision_score,
        transform="ratio",
        sample_param_names=["sample_weight"],
    )

    grouped = metrics.MetricFrame(metrics=skm.precision_score,
                                  y_true=y_t,
                                  y_pred=y_p,
                                  sensitive_features=gid)
    actual = my_fn(y_t, y_p, sensitive_features=gid, method="to_overall")
    assert actual == grouped.ratio(method="to_overall")
def test_derived_difference_sample_arg():
    my_fbeta = functools.partial(skm.fbeta_score, beta=0.6)
    my_fbeta.__name__ = "my_fbeta"
    my_fn = metrics.make_derived_metric(metric=my_fbeta,
                                        transform='difference',
                                        sample_param_names=['sample_weight'])

    grouped = metrics.MetricFrame(my_fbeta,
                                  y_t, y_p,
                                  sensitive_features=gid,
                                  sample_params={'sample_weight': wgt})
    actual = my_fn(y_t, y_p, sensitive_features=gid,
                   sample_weight=wgt, method='between_groups')
    assert actual == grouped.difference(method='between_groups')
Example #15
0
def test_derived_difference_to_overall():
    my_fn = metrics.make_derived_metric(
        metric=skm.accuracy_score,
        transform="difference",
        sample_param_names=["sample_weight"],
    )

    grouped = metrics.MetricFrame(metrics=skm.accuracy_score,
                                  y_true=y_t,
                                  y_pred=y_p,
                                  sensitive_features=gid)

    actual = my_fn(y_t, y_p, sensitive_features=gid, method="to_overall")
    assert actual == grouped.difference(method="to_overall")
def test_derived_difference_both_arg_types_default_sample_param_names():
    my_beta = 0.5
    my_fn = metrics.make_derived_metric(metric=skm.fbeta_score,
                                        transform='difference')

    my_fbeta = functools.partial(skm.fbeta_score, beta=my_beta)
    my_fbeta.__name__ = "my_fbeta"
    grouped = metrics.MetricFrame(my_fbeta,
                                  y_t, y_p,
                                  sensitive_features=gid,
                                  sample_params={'sample_weight': wgt})

    actual = my_fn(y_t, y_p,
                   sensitive_features=gid,
                   beta=my_beta,
                   sample_weight=wgt)
    assert actual == grouped.difference()
def test_derived_difference_broadcast_arg():
    my_beta = 0.6
    my_fn = metrics.make_derived_metric(metric=skm.fbeta_score,
                                        transform='difference',
                                        sample_param_names=['sample_weight'])

    my_fbeta = functools.partial(skm.fbeta_score, beta=my_beta)
    my_fbeta.__name__ = "my_fbeta"
    grouped = metrics.MetricFrame(metrics=my_fbeta,
                                  y_true=y_t,
                                  y_pred=y_p,
                                  sensitive_features=gid)

    actual = my_fn(y_t,
                   y_p,
                   sensitive_features=gid,
                   beta=my_beta,
                   method='between_groups')
    assert actual == grouped.difference(method='between_groups')
Example #18
0
#   metrics in `scikit-learn`.
#
# The result is a new function with the same signature as the
# base metric, which accepts two extra arguments:
#
#  - :code:`sensitive_features=` to specify the sensitive features
#    which define the subgroups
#  - :code:`method=` to adjust how the aggregation transformation
#    operates. This corresponds to the same argument in
#    :meth:`fairlearn.metrics.MetricFrame.difference` and
#    :meth:`fairlearn.metrics.MetricFrame.ratio`
#
# For the current case, we do not need the :code:`method=`
# argument, since we are taking the minimum value.

my_acc = make_derived_metric(metric=skm.accuracy_score, transform="group_min")
my_acc_min = my_acc(y_test, y_pred, sensitive_features=A_test["sex"])
print("Minimum accuracy_score: ", my_acc_min)

# %%
# To show that the returned function also works with sample weights:
random_weights = np.random.rand(len(y_test))

acc_frame_sw = MetricFrame(
    metrics=skm.accuracy_score,
    y_true=y_test,
    y_pred=y_pred,
    sensitive_features=A_test["sex"],
    sample_params={"sample_weight": random_weights},
)
Example #19
0
    if task_type in REGRESSION_TASKS:
        # TODO put this into the regression metric itself
        cprediction = sanitize_array(prediction)
        score = metric(solution, cprediction)
    else:
        score = metric(solution, prediction)
    return score


# Must be at bottom so all metrics are defined
default_metric_for_task: Dict[int, Scorer] = {
    BINARY_CLASSIFICATION: CLASSIFICATION_METRICS['accuracy'],
    MULTICLASS_CLASSIFICATION: CLASSIFICATION_METRICS['accuracy'],
    MULTILABEL_CLASSIFICATION: CLASSIFICATION_METRICS['f1_macro'],
    REGRESSION: REGRESSION_METRICS['r2'],
    MULTIOUTPUT_REGRESSION: REGRESSION_METRICS['r2'],
}

from fairlearn.metrics import make_derived_metric
import sklearn.metrics as skm

# __call__() needs 'sensitive_features' array as input.
acc_diff = make_scorer(
    'acc_diff',
    make_derived_metric(metric = skm.accuracy_score, transform = 'group_min'),
    worst_possible_result=1,
    greater_is_better=False,
    needs_proba=False,
    needs_prot=True
)
def test_noncallable_rejected():
    expected = "Supplied metric object must be callable"
    with pytest.raises(ValueError) as context:
        _ = metrics.make_derived_metric(metric="random", transform='group_max')
    assert context.value.args[0] == expected