Exemple #1
0
def test_calculate_roc_points():
    data = pd.DataFrame({
        SENSITIVE_FEATURE_KEY: sensitive_features_ex1.squeeze(),
        SCORE_KEY: scores_ex.squeeze(),
        LABEL_KEY: labels_ex.squeeze()})
    grouped_data = data.groupby(SENSITIVE_FEATURE_KEY).get_group("A") \
        .sort_values(by=SCORE_KEY, ascending=False)

    roc_points = _calculate_roc_points(grouped_data, "A")
    expected_roc_points = pd.DataFrame({
        "x": [0, 0.25, 0.5, 0.5, 1],
        "y": [0, 1/3,  2/3, 1,   1],
        "operation": [ThresholdOperation('>', np.inf),
                      ThresholdOperation('<', 0.5),
                      ThresholdOperation('<', 1.5),
                      ThresholdOperation('<', 2.5),
                      ThresholdOperation('>', -np.inf)]
    })

    _assert_equal_points(expected_roc_points, roc_points)

    # Try filtering to get the convex hull of the ROC points.
    # This should drop the second and third point.
    selected_points = \
        pd.DataFrame(_filter_points_to_get_convex_hull(roc_points))[['x', 'y', 'operation']]
    _assert_equal_points(expected_roc_points, selected_points, ignore_indices=[1, 2])
def test_predict_from_operation_less():
    classifier = ThresholdOperation('<', 0.5)
    assert classifier(-10000) == 1
    assert classifier(0) == 1
    assert classifier(0.5) == 0
    assert classifier(1) == 0
    assert classifier(10000) == 0
def test_predict_from_operation_more():
    classifier = ThresholdOperation('>', 0.5).get_predictor_from_operation()
    assert classifier(-10000) == 0
    assert classifier(0) == 0
    assert classifier(0.5) == 0
    assert classifier(1) == 1
    assert classifier(10000) == 1
def test_predict_from_operation_invalid_operator():
    with pytest.raises(ValueError, match="Unrecognized operator: ="):
        ThresholdOperation('=', 0.5)
Exemple #5
0
def _get_grouped_data_and_base_points(sensitive_feature_value):
    data = pd.DataFrame({
        SENSITIVE_FEATURE_KEY: sensitive_features_ex1.squeeze(),
        SCORE_KEY: scores_ex.squeeze(),
        LABEL_KEY: labels_ex.squeeze()
    })
    grouped_data = data.groupby(SENSITIVE_FEATURE_KEY).get_group(sensitive_feature_value) \
        .sort_values(by=SCORE_KEY, ascending=False)
    x_grid = np.linspace(0, 1, 100)

    if sensitive_feature_value == "A":
        expected_roc_points = pd.DataFrame({
            "x": [0, 0.25, 0.5, 0.5, 1],
            "y": [0, 1 / 3, 2 / 3, 1, 1],
            "operation": [
                ThresholdOperation('>', np.inf),
                ThresholdOperation('<', 0.5),
                ThresholdOperation('<', 1.5),
                ThresholdOperation('<', 2.5),
                ThresholdOperation('>', -np.inf)
            ]
        })
        ignore_for_base_points = [1, 2]

    if sensitive_feature_value == "B":
        expected_roc_points = pd.DataFrame({
            "x": [0, 1 / 3, 1],
            "y": [0, 3 / 4, 1],
            "operation": [
                ThresholdOperation('>', np.inf),
                ThresholdOperation('<', 0.5),
                ThresholdOperation('>', -np.inf)
            ]
        })
        ignore_for_base_points = []

    if sensitive_feature_value == "C":
        expected_roc_points = pd.DataFrame({
            "x": [0, 0, 2 / 3, 1],
            "y": [0, 1 / 3, 1, 1],
            "operation": [
                ThresholdOperation('>', np.inf),
                ThresholdOperation('<', 0.5),
                ThresholdOperation('<', 1.5),
                ThresholdOperation('>', -np.inf)
            ]
        })
        ignore_for_base_points = [0]

    return grouped_data, expected_roc_points, ignore_for_base_points, x_grid
def test_calculate_tradeoff_points():
    data = pd.DataFrame(
        {
            SENSITIVE_FEATURE_KEY: sensitive_features_ex1.squeeze(),
            SCORE_KEY: scores_ex.squeeze(),
            LABEL_KEY: labels_ex.squeeze(),
        }
    )
    grouped_data = (
        data.groupby(SENSITIVE_FEATURE_KEY)
        .get_group("A")
        .sort_values(by=SCORE_KEY, ascending=False)
    )

    roc_points = _calculate_tradeoff_points(grouped_data, "A", flip=True)
    expected_roc_points = pd.DataFrame(
        {
            "x": [0, 0.0, 0.25, 0.5, 0.5, 0.5, 0.5, 0.75, 1.0, 1],
            "y": [0, 0.0, 1 / 3, 0.0, 0.1 / 0.3, 2 / 3, 1, 0.2 / 0.3, 1.0, 1],
            "operation": [
                ThresholdOperation(">", np.inf),
                ThresholdOperation("<", -np.inf),
                ThresholdOperation("<", 0.5),
                ThresholdOperation(">", 2.5),
                ThresholdOperation(">", 1.5),
                ThresholdOperation("<", 1.5),
                ThresholdOperation("<", 2.5),
                ThresholdOperation(">", 0.5),
                ThresholdOperation("<", np.inf),
                ThresholdOperation(">", -np.inf),
            ],
        }
    )

    _assert_equal_points(expected_roc_points, roc_points)

    expected_roc_convex_hull = pd.DataFrame(
        {
            "x": [0, 0.5, 1],
            "y": [0, 1, 1],
            "operation": [
                ThresholdOperation(">", np.inf),
                ThresholdOperation("<", 2.5),
                ThresholdOperation(">", -np.inf),
            ],
        }
    )
    # Try filtering to get the convex hull of the ROC points.
    selected_points = pd.DataFrame(_filter_points_to_get_convex_hull(roc_points))[
        ["x", "y", "operation"]
    ]
    _assert_equal_points(expected_roc_convex_hull, selected_points)