def test_equalized_odds(): # Have to do this one longhand, since it combines tpr and fpr X, y = loan_scenario_generator(n, f, sfs, ibs, seed=632753) X_dummy = pd.get_dummies(X) metrics = {"tpr": true_positive_rate, "fpr": false_positive_rate} unmitigated = LogisticRegression() unmitigated.fit(X_dummy, y) y_pred = unmitigated.predict(X_dummy) mf_unmitigated = MetricFrame( metrics=metrics, y_true=y, y_pred=y_pred, sensitive_features=X["sens"], control_features=X["ctrl"], ) expgrad_basic = ExponentiatedGradient( LogisticRegression(), constraints=EqualizedOdds(difference_bound=0.01), eps=0.01) expgrad_basic.fit(X_dummy, y, sensitive_features=X["sens"]) y_pred_basic = expgrad_basic.predict(X_dummy, random_state=9235) mf_basic = MetricFrame( metrics=metrics, y_true=y, y_pred=y_pred_basic, sensitive_features=X["sens"], control_features=X["ctrl"], ) expgrad_control = ExponentiatedGradient( LogisticRegression(), constraints=EqualizedOdds(difference_bound=0.01), eps=0.01) expgrad_control.fit(X_dummy, y, sensitive_features=X["sens"], control_features=X["ctrl"]) y_pred_control = expgrad_control.predict(X_dummy, random_state=8152) mf_control = MetricFrame( metrics=metrics, y_true=y, y_pred=y_pred_control, sensitive_features=X["sens"], control_features=X["ctrl"], ) compare_unmitigated = mf_control.difference( method="to_overall") <= mf_unmitigated.difference(method="to_overall") print(compare_unmitigated) compare_basic = mf_control.difference( method="to_overall") <= mf_basic.difference(method="to_overall") print(compare_basic) assert compare_basic.values.reshape(6).all() assert compare_unmitigated.values.reshape(6).all()
def test_demographic_parity_ratio(agg_method): actual = demographic_parity_ratio(y_t, y_p, sensitive_features=g_1, method=agg_method) gm = MetricFrame(selection_rate, y_t, y_p, sensitive_features=g_1) assert actual == gm.ratio(method=agg_method)
def test_equalized_odds_ratio(agg_method): actual = equalized_odds_ratio(y_t, y_p, method=agg_method, sensitive_features=g_1) metrics = {'tpr': true_positive_rate, 'fpr': false_positive_rate} gm = MetricFrame(metrics, y_t, y_p, sensitive_features=g_1) ratios = gm.ratio(method=agg_method) assert actual == ratios.min()
def test_equalized_odds_difference(agg_method): actual = equalized_odds_difference(y_t, y_p, sensitive_features=g_1, method=agg_method) metrics = {'tpr': true_positive_rate, 'fpr': false_positive_rate} gm = MetricFrame(metrics, y_t, y_p, sensitive_features=g_1) diffs = gm.difference(method=agg_method) assert actual == diffs.max()
def test_demographic_parity_difference(agg_method): actual = demographic_parity_difference(y_t, y_p, sensitive_features=g_1, method=agg_method) gm = MetricFrame(metrics=selection_rate, y_true=y_t, y_pred=y_p, sensitive_features=g_1) assert actual == gm.difference(method=agg_method)
def test_demographic_parity_difference_weighted(agg_method): actual = demographic_parity_difference(y_t, y_p, sensitive_features=g_1, sample_weight=s_w, method=agg_method) gm = MetricFrame(metrics=selection_rate, y_true=y_t, y_pred=y_p, sensitive_features=g_1, sample_params={'sample_weight': s_w}) assert actual == gm.difference(method=agg_method)
def test_demographic_parity_ratio_weighted(agg_method): actual = demographic_parity_ratio(y_t, y_p, sensitive_features=g_1, sample_weight=s_w, method=agg_method) gm = MetricFrame(selection_rate, y_t, y_p, sensitive_features=g_1, sample_params={'sample_weight': s_w}) assert actual == gm.ratio(method=agg_method)
def test_1m_2cf_metric_dict(): target = MetricFrame( metrics={"recall_score": skm.recall_score}, y_true=y_t, y_pred=y_p, sensitive_features=g_3, control_features=np.stack((g_1, g_2), axis=1), ) assert target._user_supplied_callable is False assert isinstance(target.overall, pd.DataFrame) assert target.overall.shape == (4, 1) assert np.array_equal( target.overall.index.names, ["control_feature_0", "control_feature_1"] ) mask_a_f = np.logical_and((g_1 == "aa"), (g_2 == "f")) mask_a_g = np.logical_and((g_1 == "aa"), (g_2 == "g")) mask_b_f = np.logical_and((g_1 == "ba"), (g_2 == "f")) mask_b_g = np.logical_and((g_1 == "ba"), (g_2 == "g")) exp_a_f = skm.recall_score(y_t[mask_a_f], y_p[mask_a_f]) exp_a_g = skm.recall_score(y_t[mask_a_g], y_p[mask_a_g]) exp_b_f = skm.recall_score(y_t[mask_b_f], y_p[mask_b_f]) exp_b_g = skm.recall_score(y_t[mask_b_g], y_p[mask_b_g]) assert target.overall["recall_score"][("aa", "f")] == exp_a_f assert target.overall["recall_score"][("aa", "g")] == exp_a_g assert target.overall["recall_score"][("ba", "f")] == exp_b_f assert target.overall["recall_score"][("ba", "g")] == exp_b_g
def test_1m_2cf(): target = MetricFrame( metrics=skm.recall_score, y_true=y_t, y_pred=y_p, sensitive_features=g_3, control_features=np.stack((g_1, g_2), axis=1), ) assert target._user_supplied_callable is True assert isinstance(target.overall, pd.Series) assert len(target.overall) == 4 assert np.array_equal( target.overall.index.names, ["control_feature_0", "control_feature_1"] ) mask_a_f = np.logical_and((g_1 == "aa"), (g_2 == "f")) mask_a_g = np.logical_and((g_1 == "aa"), (g_2 == "g")) mask_b_f = np.logical_and((g_1 == "ba"), (g_2 == "f")) mask_b_g = np.logical_and((g_1 == "ba"), (g_2 == "g")) exp_a_f = skm.recall_score(y_t[mask_a_f], y_p[mask_a_f]) exp_a_g = skm.recall_score(y_t[mask_a_g], y_p[mask_a_g]) exp_b_f = skm.recall_score(y_t[mask_b_f], y_p[mask_b_f]) exp_b_g = skm.recall_score(y_t[mask_b_g], y_p[mask_b_g]) assert target.overall[("aa", "f")] == exp_a_f assert target.overall[("aa", "g")] == exp_a_g assert target.overall[("ba", "f")] == exp_b_f assert target.overall[("ba", "g")] == exp_b_g
def test_1m_2cf_metric_dict(): target = MetricFrame({'recall_score': skm.recall_score}, y_t, y_p, sensitive_features=g_3, control_features=np.stack((g_1, g_2), axis=1)) assert target._user_supplied_callable is False assert isinstance(target.overall, pd.DataFrame) assert target.overall.shape == (4, 1) assert np.array_equal(target.overall.index.names, ['control_feature_0', 'control_feature_1']) mask_a_f = np.logical_and((g_1 == 'aa'), (g_2 == 'f')) mask_a_g = np.logical_and((g_1 == 'aa'), (g_2 == 'g')) mask_b_f = np.logical_and((g_1 == 'ba'), (g_2 == 'f')) mask_b_g = np.logical_and((g_1 == 'ba'), (g_2 == 'g')) exp_a_f = skm.recall_score(y_t[mask_a_f], y_p[mask_a_f]) exp_a_g = skm.recall_score(y_t[mask_a_g], y_p[mask_a_g]) exp_b_f = skm.recall_score(y_t[mask_b_f], y_p[mask_b_f]) exp_b_g = skm.recall_score(y_t[mask_b_g], y_p[mask_b_g]) assert target.overall['recall_score'][('aa', 'f')] == exp_a_f assert target.overall['recall_score'][('aa', 'g')] == exp_a_g assert target.overall['recall_score'][('ba', 'f')] == exp_b_f assert target.overall['recall_score'][('ba', 'g')] == exp_b_g
def test_1m_2cf(): target = MetricFrame(skm.recall_score, y_t, y_p, sensitive_features=g_3, control_features=np.stack((g_1, g_2), axis=1)) assert target._user_supplied_callable is True assert isinstance(target.overall, pd.Series) assert len(target.overall) == 4 assert np.array_equal(target.overall.index.names, ['control_feature_0', 'control_feature_1']) mask_a_f = np.logical_and((g_1 == 'aa'), (g_2 == 'f')) mask_a_g = np.logical_and((g_1 == 'aa'), (g_2 == 'g')) mask_b_f = np.logical_and((g_1 == 'ba'), (g_2 == 'f')) mask_b_g = np.logical_and((g_1 == 'ba'), (g_2 == 'g')) exp_a_f = skm.recall_score(y_t[mask_a_f], y_p[mask_a_f]) exp_a_g = skm.recall_score(y_t[mask_a_g], y_p[mask_a_g]) exp_b_f = skm.recall_score(y_t[mask_b_f], y_p[mask_b_f]) exp_b_g = skm.recall_score(y_t[mask_b_g], y_p[mask_b_g]) assert target.overall[('aa', 'f')] == exp_a_f assert target.overall[('aa', 'g')] == exp_a_g assert target.overall[('ba', 'f')] == exp_b_f assert target.overall[('ba', 'g')] == exp_b_g
def test_1m_1cf_wgt_metric_dict(): target = MetricFrame( {'recall': skm.recall_score}, y_t, y_p, sensitive_features=g_2, # Unused control_features=g_1, sample_params={'recall': { 'sample_weight': s_w }}) assert target._user_supplied_callable is False assert isinstance(target.overall, pd.DataFrame) assert target.overall.shape == (2, 1) assert np.array_equal(target.overall.index.names, ['control_feature_0']) mask_a = (g_1 == 'aa') mask_b = (g_1 == 'ba') exp_recall_a = skm.recall_score(y_t[mask_a], y_p[mask_a], sample_weight=s_w[mask_a]) exp_recall_b = skm.recall_score(y_t[mask_b], y_p[mask_b], sample_weight=s_w[mask_b]) assert target.overall['recall']['aa'] == exp_recall_a assert target.overall['recall']['ba'] == exp_recall_b
def test_1m_1sf_1cf(metric_fn): target = MetricFrame( metrics=metric_fn, y_true=y_t, y_pred=y_p, sensitive_features=g_1, control_features=g_2, ) assert target._user_supplied_callable is True assert isinstance(target.by_group, pd.Series) assert len(target.by_group) == 4 assert np.array_equal(target.by_group.index.names, ["control_feature_0", "sensitive_feature_0"]) mask_a_f = np.logical_and((g_1 == "aa"), (g_2 == "f")) mask_a_g = np.logical_and((g_1 == "aa"), (g_2 == "g")) mask_b_f = np.logical_and((g_1 == "ba"), (g_2 == "f")) mask_b_g = np.logical_and((g_1 == "ba"), (g_2 == "g")) exp_a_f = metric_fn(y_t[mask_a_f], y_p[mask_a_f]) exp_a_g = metric_fn(y_t[mask_a_g], y_p[mask_a_g]) exp_b_f = metric_fn(y_t[mask_b_f], y_p[mask_b_f]) exp_b_g = metric_fn(y_t[mask_b_g], y_p[mask_b_g]) assert target.by_group[("f", "aa")] == exp_a_f assert target.by_group[("f", "ba")] == exp_b_f assert target.by_group[("g", "aa")] == exp_a_g assert target.by_group[("g", "ba")] == exp_b_g
def test_2m_2cf(): two_group = pd.DataFrame(data=np.stack((g_1, g_2), axis=1), columns=["g_1", "g_2"]) target = MetricFrame( metrics={"recall": skm.recall_score, "prec": skm.precision_score}, y_true=y_t, y_pred=y_p, sensitive_features=g_3, # Unused control_features=two_group, ) assert isinstance(target.overall, pd.DataFrame) assert target.overall.shape == (4, 2) assert target.overall.index.names == ["g_1", "g_2"] mask_a_f = np.logical_and((g_1 == "aa"), (g_2 == "f")) mask_a_g = np.logical_and((g_1 == "aa"), (g_2 == "g")) mask_b_f = np.logical_and((g_1 == "ba"), (g_2 == "f")) mask_b_g = np.logical_and((g_1 == "ba"), (g_2 == "g")) recall_a_f = skm.recall_score(y_t[mask_a_f], y_p[mask_a_f]) recall_a_g = skm.recall_score(y_t[mask_a_g], y_p[mask_a_g]) recall_b_f = skm.recall_score(y_t[mask_b_f], y_p[mask_b_f]) recall_b_g = skm.recall_score(y_t[mask_b_g], y_p[mask_b_g]) assert target.overall["recall"][("aa", "f")] == recall_a_f assert target.overall["recall"][("aa", "g")] == recall_a_g assert target.overall["recall"][("ba", "f")] == recall_b_f assert target.overall["recall"][("ba", "g")] == recall_b_g prec_a_f = skm.precision_score(y_t[mask_a_f], y_p[mask_a_f]) prec_a_g = skm.precision_score(y_t[mask_a_g], y_p[mask_a_g]) prec_b_f = skm.precision_score(y_t[mask_b_f], y_p[mask_b_f]) prec_b_g = skm.precision_score(y_t[mask_b_g], y_p[mask_b_g]) assert target.overall["prec"][("aa", "f")] == prec_a_f assert target.overall["prec"][("aa", "g")] == prec_a_g assert target.overall["prec"][("ba", "f")] == prec_b_f assert target.overall["prec"][("ba", "g")] == prec_b_g
def _selected_label_compare(moment, metric, selected_label): # Similar to _simple_compare, but we need to worry about the y label X, y = loan_scenario_generator(n, f, sfs, ibs, seed=7132752) X_dummy = pd.get_dummies(X) est = LogisticRegression() est.fit(X_dummy, y) y_pred = est.predict(X_dummy) target = moment() target.load_data(np.asarray(X_dummy), np.asarray(y), sensitive_features=X['sens'], control_features=X['ctrl']) # gamma measures the constraint violation relative to the overall value results = target.gamma(est.predict) # Compute the constraint violation using the metrics mf_pred = MetricFrame(metrics=metric, y_true=y, y_pred=y_pred, sensitive_features=X['sens'], control_features=X['ctrl']) diffs = mf_pred.by_group - mf_pred.overall # Compare (with a very small amount of wriggle room) for ib in ibs: for sf in sfs: # Format defined within utility_parity._combine_event_and_control label_format = "control={0},label={1}" label = label_format.format(ib, selected_label) assert diffs[(ib, sf)] == pytest.approx(results[('+', label, sf)], rel=1e-10, abs=1e-12) assert diffs[(ib, sf)] == pytest.approx(-results[('-', label, sf)], rel=1e-10, abs=1e-12)
def _simple_compare(moment, metric): X, y = loan_scenario_generator(n, f, sfs, ibs, seed=7632752) X_dummy = pd.get_dummies(X) est = LogisticRegression() est.fit(X_dummy, y) y_pred = est.predict(X_dummy) target = moment() target.load_data(np.asarray(X_dummy), np.asarray(y), sensitive_features=X['sens'], control_features=X['ctrl']) # gamma measures the constraint violation relative to the overall value results = target.gamma(est.predict) # Compute the constraint violation using the metrics mf_pred = MetricFrame(metrics=metric, y_true=y, y_pred=y_pred, sensitive_features=X['sens'], control_features=X['ctrl']) diffs = mf_pred.by_group - mf_pred.overall # Compare (with a very small amount of wriggle room) for ib in ibs: for sf in sfs: event_format = "control={0},all" assert diffs[(ib, sf)] == pytest.approx(results[('+', event_format.format(ib), sf)], rel=1e-10, abs=1e-12) assert diffs[(ib, sf)] == pytest.approx(-results[('-', event_format.format(ib), sf)], rel=1e-10, abs=1e-12)
def test_1m_1sf_1cf_metric_dict(metric_fn): target = MetricFrame(metrics={metric_fn.__name__: metric_fn}, y_true=y_t, y_pred=y_p, sensitive_features=g_1, control_features=g_2) assert target._user_supplied_callable is False assert isinstance(target.by_group, pd.DataFrame) assert target.by_group.shape == (4, 1) assert np.array_equal(target.by_group.index.names, ['control_feature_0', 'sensitive_feature_0']) mask_a_f = np.logical_and((g_1 == 'aa'), (g_2 == 'f')) mask_a_g = np.logical_and((g_1 == 'aa'), (g_2 == 'g')) mask_b_f = np.logical_and((g_1 == 'ba'), (g_2 == 'f')) mask_b_g = np.logical_and((g_1 == 'ba'), (g_2 == 'g')) exp_a_f = metric_fn(y_t[mask_a_f], y_p[mask_a_f]) exp_a_g = metric_fn(y_t[mask_a_g], y_p[mask_a_g]) exp_b_f = metric_fn(y_t[mask_b_f], y_p[mask_b_f]) exp_b_g = metric_fn(y_t[mask_b_g], y_p[mask_b_g]) assert target.by_group[metric_fn.__name__][('f', 'aa')] == exp_a_f assert target.by_group[metric_fn.__name__][('f', 'ba')] == exp_b_f assert target.by_group[metric_fn.__name__][('g', 'aa')] == exp_a_g assert target.by_group[metric_fn.__name__][('g', 'ba')] == exp_b_g
def test_1m_1sf_1cf(metric_fn): target = MetricFrame(metrics=metric_fn, y_true=y_t, y_pred=y_p, sensitive_features=g_1, control_features=g_2) assert target._user_supplied_callable is True assert isinstance(target.by_group, pd.Series) assert len(target.by_group) == 4 assert np.array_equal(target.by_group.index.names, ['control_feature_0', 'sensitive_feature_0']) mask_a_f = np.logical_and((g_1 == 'aa'), (g_2 == 'f')) mask_a_g = np.logical_and((g_1 == 'aa'), (g_2 == 'g')) mask_b_f = np.logical_and((g_1 == 'ba'), (g_2 == 'f')) mask_b_g = np.logical_and((g_1 == 'ba'), (g_2 == 'g')) exp_a_f = metric_fn(y_t[mask_a_f], y_p[mask_a_f]) exp_a_g = metric_fn(y_t[mask_a_g], y_p[mask_a_g]) exp_b_f = metric_fn(y_t[mask_b_f], y_p[mask_b_f]) exp_b_g = metric_fn(y_t[mask_b_g], y_p[mask_b_g]) assert target.by_group[('f', 'aa')] == exp_a_f assert target.by_group[('f', 'ba')] == exp_b_f assert target.by_group[('g', 'aa')] == exp_a_g assert target.by_group[('g', 'ba')] == exp_b_g
def _on_request(self, change): try: new = change.new response = copy.deepcopy(self._widget_instance.response) for id in new: # noqa: A001 try: if id not in response: data = new[id] method = self._metric_methods.get(data["metricKey"]).get("function") prediction = MetricFrame(method, self._y_true, self._y_pred[data["modelIndex"]], sensitive_features=data["binVector"]) response[id] = { "global": prediction.overall, "bins": prediction.by_group.to_dict() } except Exception as ed: # noqa: B902 response[id] = { "error": ed, "global": 0, "bins": []} self._widget_instance.response = response except Exception: # noqa: B902 # Not sure why we're masking the exception here raise ValueError("Error while making request")
def test_equalized_odds_difference_weighted(agg_method): actual = equalized_odds_difference(y_t, y_p, sensitive_features=g_1, method=agg_method, sample_weight=s_w) metrics = {'tpr': true_positive_rate, 'fpr': false_positive_rate} sw = {'sample_weight': s_w} sp = {'tpr': sw, 'fpr': sw} gm = MetricFrame(metrics, y_t, y_p, sensitive_features=g_1, sample_params=sp) diffs = gm.difference(method=agg_method) assert actual == diffs.max()
def test_equalized_odds_ratio_weighted(agg_method): actual = equalized_odds_ratio(y_t, y_p, method=agg_method, sensitive_features=g_1, sample_weight=s_w) metrics = {'tpr': true_positive_rate, 'fpr': false_positive_rate} sw = {'sample_weight': s_w} sp = {'tpr': sw, 'fpr': sw} gm = MetricFrame(metrics, y_t, y_p, sensitive_features=g_1, sample_params=sp) ratios = gm.ratio(method=agg_method) assert actual == ratios.min()
def test_1m_0cf(metric_fn): target = MetricFrame( metrics=metric_fn, y_true=y_t, y_pred=y_p, sensitive_features=g_1 ) # Sensitive features actually unused assert target._user_supplied_callable is True # Check user-visible expected = metric_fn(y_t, y_p) assert isinstance(target.overall, float) assert target.overall == expected
def test_2m_0cf(): funcs = {'recall': skm.recall_score, 'prec': skm.precision_score} target = MetricFrame(funcs, y_t, y_p, sensitive_features=g_1) # Irrelevant for this test assert target._user_supplied_callable is False assert isinstance(target.overall, pd.Series) assert len(target.overall) == 2 exp_recall = skm.recall_score(y_t, y_p) exp_prec = skm.precision_score(y_t, y_p) assert target.overall['recall'] == exp_recall assert target.overall['prec'] == exp_prec
def test_1m_0cf_metric_dict(metric_fn): target = MetricFrame({metric_fn.__name__: metric_fn}, y_t, y_p, sensitive_features=g_1 ) # Sensitive features actually unusedle is False assert target._user_supplied_callable is False # Check user visible expected = metric_fn(y_t, y_p) assert isinstance(target.overall, pd.Series) assert len(target.overall) == 1 assert target.overall[metric_fn.__name__] == expected
def test_1m_1sf_0cf(metric_fn): target = MetricFrame(metric_fn, y_t, y_p, sensitive_features=g_1) assert target._user_supplied_callable is True assert isinstance(target.by_group, pd.Series) assert len(target.by_group) == 2 assert np.array_equal(target.by_group.index.names, ['sensitive_feature_0']) mask_a = (g_1 == 'aa') mask_b = (g_1 == 'ba') metric_a = metric_fn(y_t[mask_a], y_p[mask_a]) metric_b = metric_fn(y_t[mask_b], y_p[mask_b]) assert target.by_group['aa'] == metric_a assert target.by_group['ba'] == metric_b
def test_1m_0cf_wgt(metric_fn): target = MetricFrame( metric_fn, y_t, y_p, sensitive_features=g_1, # Unused for overall sample_params={'sample_weight': s_w}) assert target._user_supplied_callable is True # Check user visible expected = metric_fn(y_t, y_p, sample_weight=s_w) assert isinstance(target.overall, float) assert target.overall == expected
def test_1m_0cf_wgt_metric_dict(metric_fn): target = MetricFrame( metrics={metric_fn.__name__: metric_fn}, y_true=y_t, y_pred=y_p, sensitive_features=g_1, # Unused for overall sample_params={metric_fn.__name__: {"sample_weight": s_w}}, ) assert target._user_supplied_callable is False # Check user visible expected = metric_fn(y_t, y_p, sample_weight=s_w) assert isinstance(target.overall, pd.Series) assert len(target.overall) == 1 assert target.overall[metric_fn.__name__] == expected
def test_1m_1sf_0cf_metric_dict(metric_fn): target = MetricFrame(metrics={metric_fn.__name__: metric_fn}, y_true=y_t, y_pred=y_p, sensitive_features=g_1) assert target._user_supplied_callable is False assert isinstance(target.by_group, pd.DataFrame) assert target.by_group.shape == (2, 1) assert np.array_equal(target.by_group.index.names, ['sensitive_feature_0']) mask_a = (g_1 == 'aa') mask_b = (g_1 == 'ba') metric_a = metric_fn(y_t[mask_a], y_p[mask_a]) metric_b = metric_fn(y_t[mask_b], y_p[mask_b]) assert target.by_group[metric_fn.__name__]['aa'] == metric_a assert target.by_group[metric_fn.__name__]['ba'] == metric_b
def test_1m_1sf_0cf(metric_fn): target = MetricFrame(metrics=metric_fn, y_true=y_t, y_pred=y_p, sensitive_features=g_1) assert target._user_supplied_callable is True assert isinstance(target.by_group, pd.Series) assert len(target.by_group) == 2 assert np.array_equal(target.by_group.index.names, ["sensitive_feature_0"]) mask_a = g_1 == "aa" mask_b = g_1 == "ba" metric_a = metric_fn(y_t[mask_a], y_p[mask_a]) metric_b = metric_fn(y_t[mask_b], y_p[mask_b]) assert target.by_group["aa"] == metric_a assert target.by_group["ba"] == metric_b
def test_1m_1cf_metric_dict(metric_fn): target = MetricFrame({metric_fn.__name__: metric_fn}, y_t, y_p, sensitive_features=g_2, control_features=g_1) assert target._user_supplied_callable is False assert isinstance(target.overall, pd.DataFrame) assert target.overall.shape == (2, 1) assert np.array_equal(target.overall.index.names, ['control_feature_0']) mask_a = (g_1 == 'aa') mask_b = (g_1 == 'ba') exp_metric_a = metric_fn(y_t[mask_a], y_p[mask_a]) exp_metric_b = metric_fn(y_t[mask_b], y_p[mask_b]) assert target.overall[metric_fn.__name__]['aa'] == exp_metric_a assert target.overall[metric_fn.__name__]['ba'] == exp_metric_b