def _prepare(self, metric_fn):
        self.mfn = "Some name"
        self.target = metrics.MetricFrame(
            {self.mfn: metric_fn},
            y_t,
            y_p,
            sensitive_features=pd.Series(data=g_2, name='sf0'),
            control_features=pd.Series(data=g_3, name='cf0'))

        assert isinstance(self.target.control_levels, list)
        assert (self.target.control_levels == ['cf0'])
        assert isinstance(self.target.sensitive_levels, list)
        assert (self.target.sensitive_levels == ['sf0'])

        mask_f = (g_2 == 'f')
        mask_g = (g_2 == 'g')
        mask_k = (g_3 == 'kk')
        mask_m = (g_3 == 'm')

        mask_k_f = np.logical_and(mask_k, mask_f)
        mask_k_g = np.logical_and(mask_k, mask_g)
        mask_m_f = np.logical_and(mask_m, mask_f)
        mask_m_g = np.logical_and(mask_m, mask_g)
        self.metric_k = metric_fn(y_t[mask_k], y_p[mask_k])
        self.metric_m = metric_fn(y_t[mask_m], y_p[mask_m])
        self.metric_k_f = metric_fn(y_t[mask_k_f], y_p[mask_k_f])
        self.metric_m_f = metric_fn(y_t[mask_m_f], y_p[mask_m_f])
        self.metric_k_g = metric_fn(y_t[mask_k_g], y_p[mask_k_g])
        self.metric_m_g = metric_fn(y_t[mask_m_g], y_p[mask_m_g])
        self.metric_k_arr = [self.metric_k_f, self.metric_k_g]
        self.metric_m_arr = [self.metric_m_f, self.metric_m_g]
def test_multid_input_output():
    # In this, both y_t and y_p are 2d arrays
    # The metric results are also arrays
    metric_fn = functools.partial(skm.r2_score, multioutput="raw_values")

    y_t_2 = np.random.rand(len(g_1), 2)
    y_p_2 = np.random.rand(len(g_1), 2)

    target = metrics.MetricFrame(metrics=metric_fn,
                                 y_true=y_t_2,
                                 y_pred=y_p_2,
                                 sensitive_features=g_1)

    expected_overall = skm.r2_score(y_t_2, y_p_2, multioutput="raw_values")
    # Have to use allclose rather than equal since we don't know how
    # groupby will do its slicing
    assert np.allclose(target.overall,
                       expected_overall,
                       rtol=1e-12,
                       atol=1e-10)
    for g in np.unique(g_1):
        mask = g_1 == g

        expected = skm.r2_score(y_t_2[mask],
                                y_p_2[mask],
                                multioutput="raw_values")
        actual = target.by_group[g]
        assert np.allclose(actual, expected, rtol=1e-12, atol=1e-10)
Exemple #3
0
def test_missing_sensitive_feature_combinations(metric_fn):

    target = metrics.MetricFrame(metrics=metric_fn,
                                 y_true=y_t,
                                 y_pred=y_p,
                                 sensitive_features=np.stack([g_A, g_B],
                                                             axis=1))

    # Make sure our missing combination is in an expected place
    overall = metric_fn(y_t, y_p)
    direct_eval = []
    for idx in target.by_group.index:
        mask_A = g_A == idx[0]
        mask_B = g_B == idx[1]
        mask = np.logical_and(mask_A, mask_B)
        if idx == ('bb', 'x'):
            assert sum(mask) == 0, 'idx={0}'.format(idx)
        else:
            assert sum(mask) != 0, 'idx={0}'.format(idx)
            nxt = metric_fn(y_t[mask], y_p[mask])
            direct_eval.append(nxt)
    assert len(direct_eval) == 5

    # Check we have expected values
    assert np.isnan(target.by_group[('bb', 'x')])
    assert target.group_min() == min(direct_eval)
    assert target.group_max() == max(direct_eval)
    assert target.difference(method='between_groups') == \
        max(direct_eval)-min(direct_eval)
    assert target.difference(method='to_overall') == \
        max([abs(x-overall) for x in direct_eval])
    assert target.ratio(method='between_groups') == \
        min(direct_eval) / max(direct_eval)
    assert target.ratio(method='to_overall') == \
        min([x/overall for x in direct_eval] + [overall/x for x in direct_eval])
Exemple #4
0
def test_derived_difference_both_arg_types():
    my_beta = 0.5
    my_fn = metrics.make_derived_metric(
        metric=skm.fbeta_score,
        transform="difference",
        sample_param_names=["sample_weight"],
    )

    my_fbeta = functools.partial(skm.fbeta_score, beta=my_beta)
    my_fbeta.__name__ = "my_fbeta"
    grouped = metrics.MetricFrame(
        metrics=my_fbeta,
        y_true=y_t,
        y_pred=y_p,
        sensitive_features=gid,
        sample_params={"sample_weight": wgt},
    )

    actual = my_fn(
        y_t,
        y_p,
        sensitive_features=gid,
        beta=my_beta,
        sample_weight=wgt,
        method="between_groups",
    )
    assert actual == grouped.difference(method="between_groups")
Exemple #5
0
def test_1m_1sf_1cf_metric_dict(transform_y_t, transform_y_p):
    # If there are failures here, other, more specific tests should also fail
    target = metrics.MetricFrame({'recall': skm.recall_score},
                                 transform_y_t(y_t),
                                 transform_y_p(y_p),
                                 sensitive_features=g_2,
                                 control_features=g_3)

    # Check on the indices properties
    assert isinstance(target.control_levels, list)
    assert (target.control_levels == ['control_feature_0'])
    assert isinstance(target.sensitive_levels, list)
    assert (target.sensitive_levels == ['sensitive_feature_0'])

    # Check we have correct return types
    assert isinstance(target.overall, pd.DataFrame)
    assert isinstance(target.by_group, pd.DataFrame)

    mask_f = (g_2 == 'f')
    mask_g = (g_2 == 'g')
    mask_k = (g_3 == 'kk')
    mask_m = (g_3 == 'm')

    # Check we have expected number of elements
    assert target.overall.shape == (2, 1)
    assert target.by_group.shape == (4, 1)

    recall_k = skm.recall_score(y_t[mask_k], y_p[mask_k])
    recall_m = skm.recall_score(y_t[mask_m], y_p[mask_m])
    assert target.overall['recall']['kk'] == recall_k
    assert target.overall['recall']['m'] == recall_m

    mask_k_f = np.logical_and(mask_k, mask_f)
    mask_k_g = np.logical_and(mask_k, mask_g)
    mask_m_f = np.logical_and(mask_m, mask_f)
    mask_m_g = np.logical_and(mask_m, mask_g)
    recall_k_f = skm.recall_score(y_t[mask_k_f], y_p[mask_k_f])
    recall_m_f = skm.recall_score(y_t[mask_m_f], y_p[mask_m_f])
    recall_k_g = skm.recall_score(y_t[mask_k_g], y_p[mask_k_g])
    recall_m_g = skm.recall_score(y_t[mask_m_g], y_p[mask_m_g])
    assert target.by_group['recall'][('kk', 'f')] == recall_k_f
    assert target.by_group['recall'][('kk', 'g')] == recall_k_g
    assert target.by_group['recall'][('m', 'f')] == recall_m_f
    assert target.by_group['recall'][('m', 'g')] == recall_m_g

    recall_k_arr = [recall_k_f, recall_k_g]
    recall_m_arr = [recall_m_f, recall_m_g]

    target_mins = target.group_min()
    assert isinstance(target_mins, pd.DataFrame)
    assert target_mins.shape == (2, 1)
    assert target_mins['recall']['kk'] == min(recall_k_arr)
    assert target_mins['recall']['m'] == min(recall_m_arr)

    target_maxs = target.group_max()
    assert isinstance(target_mins, pd.DataFrame)
    assert target_maxs.shape == (2, 1)
    assert target_maxs['recall']['kk'] == max(recall_k_arr)
    assert target_maxs['recall']['m'] == max(recall_m_arr)
Exemple #6
0
def test_single_element_lists():
    mf = metrics.MetricFrame(
        metrics=skm.balanced_accuracy_score,
        y_true=[1],
        y_pred=[1],
        sensitive_features=[0],
    )
    assert mf.overall == 1
    def _prepare(self, metric_fn):
        self.target = metrics.MetricFrame(metric_fn,
                                          y_t,
                                          y_p,
                                          sensitive_features=list(g_2),
                                          control_features=np.stack([g_3, g_1],
                                                                    axis=1))

        assert isinstance(self.target.control_levels, list)
        assert (self.target.control_levels == [
            'control_feature_0', 'control_feature_1'
        ])
        assert isinstance(self.target.sensitive_levels, list)
        assert (self.target.sensitive_levels == ['sensitive_feature_0'])

        # Check we have correct return types
        assert isinstance(self.target.overall, pd.Series)
        assert isinstance(self.target.by_group, pd.Series)

        mask_a = (g_1 == 'aa')
        mask_b = (g_1 == 'ba')
        mask_f = (g_2 == 'f')
        mask_g = (g_2 == 'g')
        mask_k = (g_3 == 'kk')
        mask_m = (g_3 == 'm')

        mask_k_a = np.logical_and(mask_k, mask_a)
        mask_k_b = np.logical_and(mask_k, mask_b)
        mask_m_a = np.logical_and(mask_m, mask_a)
        mask_m_b = np.logical_and(mask_m, mask_b)
        mask_k_a_f = np.logical_and(mask_k_a, mask_f)
        mask_k_a_g = np.logical_and(mask_k_a, mask_g)
        mask_k_b_f = np.logical_and(mask_k_b, mask_f)
        mask_k_b_g = np.logical_and(mask_k_b, mask_g)
        mask_m_a_f = np.logical_and(mask_m_a, mask_f)
        mask_m_a_g = np.logical_and(mask_m_a, mask_g)
        mask_m_b_f = np.logical_and(mask_m_b, mask_f)
        mask_m_b_g = np.logical_and(mask_m_b, mask_g)

        self.metric_k_a = metric_fn(y_t[mask_k_a], y_p[mask_k_a])
        self.metric_k_b = metric_fn(y_t[mask_k_b], y_p[mask_k_b])
        self.metric_m_a = metric_fn(y_t[mask_m_a], y_p[mask_m_a])
        self.metric_m_b = metric_fn(y_t[mask_m_b], y_p[mask_m_b])
        self.metric_k_a_f = metric_fn(y_t[mask_k_a_f], y_p[mask_k_a_f])
        self.metric_k_a_g = metric_fn(y_t[mask_k_a_g], y_p[mask_k_a_g])
        self.metric_k_b_f = metric_fn(y_t[mask_k_b_f], y_p[mask_k_b_f])
        self.metric_k_b_g = metric_fn(y_t[mask_k_b_g], y_p[mask_k_b_g])
        self.metric_m_a_f = metric_fn(y_t[mask_m_a_f], y_p[mask_m_a_f])
        self.metric_m_a_g = metric_fn(y_t[mask_m_a_g], y_p[mask_m_a_g])
        self.metric_m_b_f = metric_fn(y_t[mask_m_b_f], y_p[mask_m_b_f])
        self.metric_m_b_g = metric_fn(y_t[mask_m_b_g], y_p[mask_m_b_g])

        self.metric_k_a_arr = [self.metric_k_a_f, self.metric_k_a_g]
        self.metric_k_b_arr = [self.metric_k_b_f, self.metric_k_b_g]
        self.metric_m_a_arr = [self.metric_m_a_f, self.metric_m_a_g]
        self.metric_m_b_arr = [self.metric_m_b_f, self.metric_m_b_g]

        self.mfn = metric_fn.__name__
Exemple #8
0
def test_four_positional_arguments():
    # The first formal positional argument to the constructor is "self"
    # so the error message says that five arguments were given
    msg = "__init__() takes 1 positional argument but 5 positional arguments were given"

    with pytest.raises(TypeError) as execInfo:
        _ = metrics.MetricFrame(skm.accuracy_score, y_true, y_pred, sf)

    assert execInfo.value.args[0] == msg
Exemple #9
0
def test_duplicate_sf_names():
    groups = pd.DataFrame(np.stack([g_2, g_3], axis=1), columns=["A", "A"])
    msg = "Detected duplicate feature name: 'A'"
    with pytest.raises(ValueError) as execInfo:
        _ = metrics.MetricFrame(skm.recall_score,
                                y_t,
                                y_p,
                                sensitive_features=groups)
    assert execInfo.value.args[0] == msg
Exemple #10
0
def test_no_warnings():
    with pytest.warns(None) as record:
        mf = metrics.MetricFrame(
            metrics=skm.accuracy_score,
            y_true=y_true,
            y_pred=y_pred,
            sensitive_features=sf)

    assert len(record) == 0
    assert mf.difference() == pytest.approx(accuracy_score_difference)
def test_group_max():
    my_fn = metrics.make_derived_metric(metric=skm.precision_score,
                                        transform='group_max',
                                        sample_param_names=['sample_weight'])

    grouped = metrics.MetricFrame(skm.precision_score,
                                  y_t, y_p,
                                  sensitive_features=gid)
    actual = my_fn(y_t, y_p, sensitive_features=gid)
    assert actual == grouped.group_max()
def test_derived_ratio_to_overall():
    my_fn = metrics.make_derived_metric(metric=skm.precision_score,
                                        transform='ratio',
                                        sample_param_names=['sample_weight'])

    grouped = metrics.MetricFrame(skm.precision_score,
                                  y_t, y_p,
                                  sensitive_features=gid)
    actual = my_fn(y_t, y_p, sensitive_features=gid, method='to_overall')
    assert actual == grouped.ratio(method='to_overall')
def test_derived_ratio_default_is_between_groups():
    my_fn = metrics.make_derived_metric(metric=skm.precision_score,
                                        transform='ratio',
                                        sample_param_names=['sample_weight'])

    grouped = metrics.MetricFrame(skm.precision_score,
                                  y_t, y_p,
                                  sensitive_features=gid)
    actual = my_fn(y_t, y_p, sensitive_features=gid)
    assert actual == grouped.ratio()
def test_derived_difference_to_overall():
    my_fn = metrics.make_derived_metric(metric=skm.accuracy_score,
                                        transform='difference',
                                        sample_param_names=['sample_weight'])

    grouped = metrics.MetricFrame(skm.accuracy_score,
                                  y_t, y_p,
                                  sensitive_features=gid)

    actual = my_fn(y_t, y_p, sensitive_features=gid, method='to_overall')
    assert actual == grouped.difference(method='to_overall')
def test_derived_difference_default_is_between_groups():
    my_fn = metrics.make_derived_metric(metric=skm.accuracy_score,
                                        transform='difference',
                                        sample_param_names=['sample_weight'])

    grouped = metrics.MetricFrame(skm.accuracy_score,
                                  y_t, y_p,
                                  sensitive_features=gid)

    actual = my_fn(y_t, y_p, sensitive_features=gid)
    assert actual == grouped.difference()
Exemple #16
0
def test_duplicate_cf_sf_names():
    cf = pd.DataFrame(np.stack([g_2, g_3], axis=1), columns=["A", "B"])
    sf = {"B": g_1, "C": g_4}
    msg = "Detected duplicate feature name: 'B'"
    with pytest.raises(ValueError) as execInfo:
        _ = metrics.MetricFrame(skm.recall_score,
                                y_t,
                                y_p,
                                sensitive_features=sf,
                                control_features=cf)
    assert execInfo.value.args[0] == msg
Exemple #17
0
def test_duplicate_cf_names():
    groups = pd.DataFrame(np.stack([g_2, g_3], axis=1), columns=["B", "B"])
    msg = "Detected duplicate feature name: 'B'"
    with pytest.raises(ValueError) as execInfo:
        _ = metrics.MetricFrame(
            metrics=skm.recall_score,
            y_true=y_t,
            y_pred=y_p,
            sensitive_features=g_4,
            control_features=groups,
        )
    assert execInfo.value.args[0] == msg
Exemple #18
0
def test_group_min():
    my_fn = metrics.make_derived_metric(
        metric=skm.precision_score,
        transform="group_min",
        sample_param_names=["sample_weight"],
    )

    grouped = metrics.MetricFrame(metrics=skm.precision_score,
                                  y_true=y_t,
                                  y_pred=y_p,
                                  sensitive_features=gid)
    actual = my_fn(y_t, y_p, sensitive_features=gid)
    assert actual == grouped.group_min()
Exemple #19
0
def test_derived_ratio_to_overall():
    my_fn = metrics.make_derived_metric(
        metric=skm.precision_score,
        transform="ratio",
        sample_param_names=["sample_weight"],
    )

    grouped = metrics.MetricFrame(metrics=skm.precision_score,
                                  y_true=y_t,
                                  y_pred=y_p,
                                  sensitive_features=gid)
    actual = my_fn(y_t, y_p, sensitive_features=gid, method="to_overall")
    assert actual == grouped.ratio(method="to_overall")
def test_derived_difference_sample_arg():
    my_fbeta = functools.partial(skm.fbeta_score, beta=0.6)
    my_fbeta.__name__ = "my_fbeta"
    my_fn = metrics.make_derived_metric(metric=my_fbeta,
                                        transform='difference',
                                        sample_param_names=['sample_weight'])

    grouped = metrics.MetricFrame(my_fbeta,
                                  y_t, y_p,
                                  sensitive_features=gid,
                                  sample_params={'sample_weight': wgt})
    actual = my_fn(y_t, y_p, sensitive_features=gid,
                   sample_weight=wgt, method='between_groups')
    assert actual == grouped.difference(method='between_groups')
Exemple #21
0
def test_derived_difference_to_overall():
    my_fn = metrics.make_derived_metric(
        metric=skm.accuracy_score,
        transform="difference",
        sample_param_names=["sample_weight"],
    )

    grouped = metrics.MetricFrame(metrics=skm.accuracy_score,
                                  y_true=y_t,
                                  y_pred=y_p,
                                  sensitive_features=gid)

    actual = my_fn(y_t, y_p, sensitive_features=gid, method="to_overall")
    assert actual == grouped.difference(method="to_overall")
def test_1m_1sf_0cf():
    target = metrics.MetricFrame({'confusion_matrix': skm.confusion_matrix},
                                 y_t,
                                 y_p,
                                 sensitive_features=g_1)

    overall = skm.confusion_matrix(y_t, y_p)
    assert np.array_equal(target.overall['confusion_matrix'], overall)

    for g in np.unique(g_1):
        mask = g_1 == g
        expected = skm.confusion_matrix(y_t[mask], y_p[mask])
        actual = target.by_group['confusion_matrix'][g]
        assert np.array_equal(actual, expected)
    def _prepare(self, metric_fn):
        self.mfn = "Random name"
        self.target = metrics.MetricFrame(metrics={self.mfn: metric_fn}, y_true=y_t, y_pred=y_p,
                                          sensitive_features=g_4)

        assert self.target.control_levels is None
        assert isinstance(self.target.sensitive_levels, list)
        assert (self.target.sensitive_levels == ['sensitive_feature_0'])

        self.overall = metric_fn(y_t, y_p)
        mask_p = (g_4 == 'pp')
        mask_q = (g_4 == 'q')
        self.metric_p = metric_fn(y_t[mask_p], y_p[mask_p])
        self.metric_q = metric_fn(y_t[mask_q], y_p[mask_q])
Exemple #24
0
def test_one_positional_argument():
    msg = (
        "You have provided 'metrics' as positional arguments. Please pass them as"
        f" keyword arguments. From version {version} passing them as positional"
        " arguments will result in an error.")

    with pytest.warns(FutureWarning) as record:
        mf = metrics.MetricFrame(skm.accuracy_score,
                                 y_true=y_true,
                                 y_pred=y_pred,
                                 sensitive_features=sf)

    assert len(record) == 1
    assert str(record[0].message) == msg
    assert mf.difference() == pytest.approx(accuracy_score_difference)
Exemple #25
0
def test_keyword_metric():
    msg = ("The positional argument 'metric' has been replaced by "
           f"a keyword argument 'metrics'. From version {version} passing "
           "it as a positional argument or as a keyword argument "
           "'metric' will result in an error")

    with pytest.warns(FutureWarning) as record:
        mf = metrics.MetricFrame(
            metric=skm.accuracy_score,
            y_true=y_true,
            y_pred=y_pred,
            sensitive_features=sf)

    assert len(record) == 1
    assert str(record[0].message) == msg
    assert mf.difference() == pytest.approx(accuracy_score_difference)
def test_roc_auc():
    ras = functools.partial(skm.roc_auc_score,
                            multi_class='ovr',
                            labels=[0, 1, 2])
    target = metrics.MetricFrame(ras,
                                 y_true, y_pred,
                                 sensitive_features=s_f)

    overall = ras(y_true, y_pred)
    assert target.overall == overall

    for g in np.unique(s_f):
        mask = s_f == g
        expected = ras(y_true[mask], y_pred[mask])
        actual = target.by_group[g]
        assert expected == actual
def test_derived_difference_both_arg_types_default_sample_param_names():
    my_beta = 0.5
    my_fn = metrics.make_derived_metric(metric=skm.fbeta_score,
                                        transform='difference')

    my_fbeta = functools.partial(skm.fbeta_score, beta=my_beta)
    my_fbeta.__name__ = "my_fbeta"
    grouped = metrics.MetricFrame(my_fbeta,
                                  y_t, y_p,
                                  sensitive_features=gid,
                                  sample_params={'sample_weight': wgt})

    actual = my_fn(y_t, y_p,
                   sensitive_features=gid,
                   beta=my_beta,
                   sample_weight=wgt)
    assert actual == grouped.difference()
    def _prepare(self):
        fns = {'recall': skm.recall_score, 'prec': skm.precision_score}
        self.target = metrics.MetricFrame(
            fns, y_t, y_p, sensitive_features=pd.Series(data=g_4))

        assert self.target.control_levels is None
        assert isinstance(self.target.sensitive_levels, list)
        assert (self.target.sensitive_levels == ['sensitive_feature_0'])

        self.recall = skm.recall_score(y_t, y_p)
        self.prec = skm.precision_score(y_t, y_p)
        mask_p = (g_4 == 'pp')
        mask_q = (g_4 == 'q')
        self.recall_p = skm.recall_score(y_t[mask_p], y_p[mask_p])
        self.recall_q = skm.recall_score(y_t[mask_q], y_p[mask_q])
        self.prec_p = skm.precision_score(y_t[mask_p], y_p[mask_p])
        self.prec_q = skm.precision_score(y_t[mask_q], y_p[mask_q])
Exemple #29
0
def test_1m_1_sf_sample_weights():
    """Check that sample weights are passed correctly to a single metric."""
    def multi_sp(y_t, y_p, p1, p2):
        """Metric to check passing of sample parameters.

        Verifies that p2 == y_t + y_p + p1 for all elements
        """
        assert len(y_t) == len(y_p)
        assert len(y_t) == len(p1)
        assert len(y_t) == len(p2)
        assert np.array_equal(
            p2,
            np.asarray(y_t) + np.asarray(y_p) + np.asarray(p1))
        return sum(p2)

    # Generate some random input data
    rng = np.random.default_rng(seed=42)
    param1 = rng.random(len(y_t))
    # Compute the expected sum
    param2 = s_w + y_p + param1

    # Note that we pass in the s_w array for y_true, to get
    # a little more variety in the results
    target = metrics.MetricFrame(
        metrics=multi_sp,
        y_true=s_w,
        y_pred=y_p,
        sensitive_features=g_1,
        sample_params={
            "p1": param1,
            "p2": param2
        },
    )

    # Sanity check types
    assert isinstance(target.overall, float)
    assert isinstance(target.by_group, pd.Series)
    assert target.by_group.shape == (2, )

    # Check the overall value
    assert target.overall == sum(param2)

    # Look at the by_group values for each subgroup identified by g_1
    for g in g_1:
        mask = g_1 == g
        assert target.by_group[g] == sum(param2[mask])
def test_multid_input_output():
    # In this, both y_t and y_p are 2d arrays
    # The metric results are also arrays
    metric_fn = functools.partial(skm.r2_score, multioutput='raw_values')

    y_t_2 = np.random.rand(len(g_1), 2)
    y_p_2 = np.random.rand(len(g_1), 2)

    target = metrics.MetricFrame(metric_fn, y_t_2, y_p_2, sensitive_features=g_1)

    expected_overall = skm.r2_score(y_t_2, y_p_2, multioutput='raw_values')
    assert np.array_equal(target.overall, expected_overall)
    for g in np.unique(g_1):
        mask = g_1 == g

        expected = skm.r2_score(y_t_2[mask], y_p_2[mask], multioutput='raw_values')
        actual = target.by_group[g]
        assert np.array_equal(actual, expected)