def fair_metrics(bst, data, column, thresh): tr = list(data.get_label()) best_iteration = bst.best_ntree_limit pred = bst.predict(data, ntree_limit=best_iteration) pred = [1 if p > thresh else 0 for p in pred] na0 = 0 na1 = 0 nd0 = 0 nd1 = 0 for p, c in zip(pred, column): if (p == 1 and c == 0): nd1 += 1 if (p == 1 and c == 1): na1 += 1 if (p == 0 and c == 0): nd0 += 1 if (p == 0 and c == 1): na0 += 1 Pa1, Pd1, Pa0, Pd0 = na1 / (na1 + na0), nd1 / (nd1 + nd0), na0 / ( na1 + na0), nd0 / (nd1 + nd0) dsp_metric = np.abs(Pd1 - Pa1) #dsp_metric = np.abs((first-second)/(first+second)) sr_metric = selection_rate(tr, pred, pos_label=1) dpd_metric = demographic_parity_difference(tr, pred, sensitive_features=column) dpr_metric = demographic_parity_ratio(tr, pred, sensitive_features=column) eod_metric = equalized_odds_difference(tr, pred, sensitive_features=column) return dsp_metric, sr_metric, dpd_metric, dpr_metric, eod_metric
def test_selection_rate_unweighted(): y_true = [0, 0, 0, 0, 0, 0, 0, 0] y_pred = [0, 0, 0, 1, 1, 1, 1, 1] result = metrics.selection_rate(y_true, y_pred) assert result == 0.625
def test_selection_rate_weighted(): y_true = [0, 0, 0, 0, 0, 0, 0, 0] y_pred = [0, 1, 1, 0, 0, 0, 0, 0] weight = [1, 2, 3, 4, 1, 2, 1, 2] result = metrics.selection_rate(y_true, y_pred, sample_weight=weight) assert result == 0.3125
def test_selection_rate_non_numeric(): a = "a" b = "b" y_true = [a, b, a, b, a, b, a, b] y_pred = [a, a, a, b, b, b, a, a] result = metrics.selection_rate(y_true, y_pred, pos_label=b) assert result == 0.375
def test_selection_rate_empty(): with pytest.raises(ValueError) as exc: _ = metrics.selection_rate([], []) assert _EMPTY_INPUT_PREDICTIONS_ERROR_MESSAGE == exc.value.args[0]
def test_selection_rate_single_element(): assert 1 == metrics.selection_rate([1], [1]) assert 1 == metrics.selection_rate([0], [1]) assert 0 == metrics.selection_rate([1], [0]) assert 0 == metrics.selection_rate([0], [0])
# # After our data manipulations and model training, we have the following # from our test set: # # - A vector of true values called ``y_test`` # - A vector of model predictions called ``y_pred`` # - A DataFrame of categorical features relevant to fairness called ``A_test`` # # In a traditional model analysis, we would now look at some metrics # evaluated on the entire dataset. Suppose in this case, the relevant # metrics are :func:`fairlearn.metrics.selection_rate` and # :func:`sklearn.metrics.fbeta_score` (with # ``beta=0.6``). # We can evaluate these metrics directly: print("Selection Rate:", selection_rate(y_test, y_pred)) print("fbeta:", skm.fbeta_score(y_test, y_pred, beta=0.6)) # %% # We know that there are sensitive features in our data, and we want to # ensure that we're not harming individuals due to membership in any of # these groups. For this purpose, Fairlearn provides the # :class:`fairlearn.metrics.MetricFrame` # class. Let us construct an instance of this class, and then look at # its capabilities: fbeta_06 = functools.partial(skm.fbeta_score, beta=0.6) metric_fns = { 'selection_rate': selection_rate, 'fbeta_06': fbeta_06,