Ejemplo n.º 1
0
    def test_regression_prediction_type(self):
        y_t = [0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1]
        y_p = [1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0]
        s_f = [0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1]

        predictions = {"some model": y_p}
        sensitive_feature = {"my sf": s_f}

        # Using the `regression` prediction type should not crash
        _create_group_metric_set(y_t, predictions, sensitive_feature,
                                 'regression')
Ejemplo n.º 2
0
    def test_specific_metrics(self):
        y_t = [0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1]
        y_p = [1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0]
        s_f = [0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1]

        exp_acc = accuracy_score_group_summary(y_t,
                                               y_p,
                                               sensitive_features=s_f)
        exp_roc = roc_auc_score_group_summary(y_t, y_p, sensitive_features=s_f)

        predictions = {"some model": y_p}
        sensitive_feature = {"my sf": s_f}

        actual = _create_group_metric_set(y_t, predictions, sensitive_feature,
                                          'binary_classification')

        # Do some sanity checks
        validate_dashboard_dictionary(actual)
        assert actual['trueY'] == y_t
        assert actual['predictedY'][0] == y_p
        assert actual['precomputedFeatureBins'][0]['binVector'] == s_f
        assert len(actual['precomputedMetrics'][0][0]) == 11

        # Cross check the two metrics we computed
        # Comparisons simplified because s_f was already {0,1}
        actual_acc = actual['precomputedMetrics'][0][0]['accuracy_score']
        assert actual_acc['global'] == exp_acc.overall
        assert actual_acc['bins'] == list(exp_acc.by_group.values())

        actual_roc = actual['precomputedMetrics'][0][0][
            'balanced_accuracy_score']
        assert actual_roc['global'] == exp_roc.overall
        assert actual_roc['bins'] == list(exp_roc.by_group.values())
 def __get_dashboard_dict(self, A_test, Y_test, dominant_all_ids):
     sf = {'diabetic': A_test.diabetic,
           'asthmatic': A_test.asthmatic, 'smoker': A_test.smoker}
     return _create_group_metric_set(y_true=Y_test,
                                     predictions=dominant_all_ids,
                                     sensitive_features=sf,
                                     prediction_type='binary_classification')
Ejemplo n.º 4
0
    def test_roc_auc_single_class(self, recwarn):
        # Note that y_t and s_f are identical, so subgroup evaluation will fail for
        # roc_auc_score
        y_p = [0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1]
        y_t = [0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1]
        s_f = [0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1]

        predictions = {"some model": y_p}
        sensitive_feature = {"my sf": s_f}

        actual = _create_group_metric_set(y_t, predictions, sensitive_feature,
                                          "binary_classification")

        # Check that the error case was intercepted for roc_auc_score
        validate_dashboard_dictionary(actual)
        actual_roc = actual["precomputedMetrics"][0][0][
            "balanced_accuracy_score"]
        expected_all_roc = skm.roc_auc_score(y_t, y_p)
        assert actual_roc["global"] == expected_all_roc
        assert actual_roc["bins"] == [0, 0]  # We substituted zero
        # Check that the right warnings were issued
        assert len(recwarn) == 3
        msgs = sorted([str(x.message) for x in recwarn])
        # We get the message from roc_auc_score once for each subgroup
        assert msgs[0] == "Evaluation of roc_auc_score failed. Substituting 0"
        assert msgs[1] == "Evaluation of roc_auc_score failed. Substituting 0"
        assert msgs[2].startswith("Recall is ill-defined and being set to 0.0")
Ejemplo n.º 5
0
def fairness_regression(model, model_id, sensitive_feat, X_test, y_test):
    #  Create a dictionary of model(s) you want to assess for fairness
    ys_pred = {model_id: model.predict(X_test).reshape(-1, 1)}
    dash_dict = _create_group_metric_set(y_true=y_test,
                                         predictions=ys_pred,
                                         sensitive_features=sensitive_feat,
                                         prediction_type='regression')

    return dash_dict
Ejemplo n.º 6
0
def fairness_binary(model, model_id, sensitive_feat, X_test, y_test):
    #  Create a dictionary of model(s) you want to assess for fairness
    ys_pred = {model_id: model.predict(X_test)}
    dash_dict = _create_group_metric_set(
        y_true=y_test,
        predictions=ys_pred,
        sensitive_features=validate_for_fairness(sensitive_feat, y_test),
        prediction_type='binary_classification')

    return dash_dict
Ejemplo n.º 7
0
    def test_round_trip_1p_1f(self, t_y_t, t_y_p, t_sf):
        expected = load_sample_dashboard(_BC_1P_1F)

        y_true = t_y_t(expected['trueY'])
        y_pred = {expected['modelNames'][0]: t_y_p(expected['predictedY'][0])}

        sf_file = expected['precomputedFeatureBins'][0]
        sf = [sf_file['binLabels'][x] for x in sf_file['binVector']]
        sensitive_feature = {sf_file['featureBinName']: t_sf(sf)}

        actual = _create_group_metric_set(y_true, y_pred, sensitive_feature,
                                          'binary_classification')
        validate_dashboard_dictionary(actual)
        assert expected == actual
    def test_regression_prediction_type(self):
        # For regression, both y_t and y_p can have floating point values
        y_t = [0, 1, 1, 0, 1, 1, 1.5, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1]
        y_p = [1, 1, 1, 0, 1, 1, 1.5, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0]
        s_f = [0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1]

        predictions = {"some model": y_p}
        sensitive_feature = {"my sf": s_f}

        # Using the `regression` prediction type should not crash
        result = _create_group_metric_set(y_t, predictions, sensitive_feature,
                                          'regression')
        assert result['predictionType'] == 'regression'
        assert len(result['precomputedMetrics'][0][0]) == 6
Ejemplo n.º 9
0
    def test_json_serializable(self):
        y_t = [0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1]
        y_p = [1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0]
        s_f = [0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1]

        predictions = {"some model": y_p}
        sensitive_feature = {"my sf": s_f}

        actual = _create_group_metric_set(y_t, predictions, sensitive_feature,
                                          "binary_classification")

        # Check that we can turn the dictionary into JSON
        # Sometimes, you need to listen carefully to the quack
        result = json.dumps(actual)
        assert isinstance(result, str)
    def test_probability_prediction_type(self):
        # For probability, y_p can have real values [0, 1]
        y_t = [0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1]
        y_p = [
            0.9, 1, 1, 0.1, 1, 1, 0.8, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0
        ]
        s_f = [0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1]

        predictions = {"some model": y_p}
        sensitive_feature = {"my sf": s_f}

        # Using the `probability` prediction type should not crash
        result = _create_group_metric_set(y_t, predictions, sensitive_feature,
                                          'probability')
        assert result['predictionType'] == 'probability'
        assert len(result['precomputedMetrics'][0][0]) == 10
Ejemplo n.º 11
0
    def test_round_trip_2p_3f(self, t_y_t, t_y_p, t_sf):
        expected = load_sample_dashboard(_BC_2P_3F)

        y_true = t_y_t(expected['trueY'])

        y_pred = {}
        y_p_ts = [t_y_p, lambda x: x]  # Only transform one y_p
        for i, name in enumerate(expected['modelNames']):
            y_pred[name] = y_p_ts[i](expected['predictedY'][i])

        sensitive_features = {}
        t_sfs = [lambda x: x, t_sf, lambda x: x]  # Only transform one sf
        for i, sf_file in enumerate(expected['precomputedFeatureBins']):
            sf = [sf_file['binLabels'][x] for x in sf_file['binVector']]
            sensitive_features[sf_file['featureBinName']] = t_sfs[i](sf)

        actual = _create_group_metric_set(y_true, y_pred, sensitive_features,
                                          'binary_classification')
        validate_dashboard_dictionary(actual)
        assert expected == actual
Ejemplo n.º 12
0
    def test_specific_metrics(self):
        y_t = [0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1]
        y_p = [1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0]
        s_f = [0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1]

        expected = MetricFrame(
            metrics={
                "accuracy_score": skm.accuracy_score,
                "roc_auc_score": skm.roc_auc_score,
            },
            y_true=y_t,
            y_pred=y_p,
            sensitive_features=s_f,
        )

        predictions = {"some model": y_p}
        sensitive_feature = {"my sf": s_f}

        actual = _create_group_metric_set(y_t, predictions, sensitive_feature,
                                          "binary_classification")

        # Do some sanity checks
        validate_dashboard_dictionary(actual)
        assert actual["trueY"] == y_t
        assert actual["predictedY"][0] == y_p
        assert actual["precomputedFeatureBins"][0]["binVector"] == s_f
        assert len(actual["precomputedMetrics"][0][0]) == 12

        # Cross check the two metrics we computed
        # Comparisons simplified because s_f was already {0,1}
        actual_acc = actual["precomputedMetrics"][0][0]["accuracy_score"]
        assert actual_acc["global"] == expected.overall["accuracy_score"]
        assert actual_acc["bins"] == list(expected.by_group["accuracy_score"])

        actual_roc = actual["precomputedMetrics"][0][0][
            "balanced_accuracy_score"]
        assert actual_roc["global"] == expected.overall["roc_auc_score"]
        assert actual_roc["bins"] == list(expected.by_group["roc_auc_score"])