def test_calculate_roc_points(): data = pd.DataFrame({ SENSITIVE_FEATURE_KEY: sensitive_features_ex1.squeeze(), SCORE_KEY: scores_ex.squeeze(), LABEL_KEY: labels_ex.squeeze()}) grouped_data = data.groupby(SENSITIVE_FEATURE_KEY).get_group("A") \ .sort_values(by=SCORE_KEY, ascending=False) roc_points = _calculate_roc_points(grouped_data, "A") expected_roc_points = pd.DataFrame({ "x": [0, 0.25, 0.5, 0.5, 1], "y": [0, 1/3, 2/3, 1, 1], "operation": [ThresholdOperation('>', np.inf), ThresholdOperation('<', 0.5), ThresholdOperation('<', 1.5), ThresholdOperation('<', 2.5), ThresholdOperation('>', -np.inf)] }) _assert_equal_points(expected_roc_points, roc_points) # Try filtering to get the convex hull of the ROC points. # This should drop the second and third point. selected_points = \ pd.DataFrame(_filter_points_to_get_convex_hull(roc_points))[['x', 'y', 'operation']] _assert_equal_points(expected_roc_points, selected_points, ignore_indices=[1, 2])
def test_predict_from_operation_less(): classifier = ThresholdOperation('<', 0.5) assert classifier(-10000) == 1 assert classifier(0) == 1 assert classifier(0.5) == 0 assert classifier(1) == 0 assert classifier(10000) == 0
def test_predict_from_operation_more(): classifier = ThresholdOperation('>', 0.5).get_predictor_from_operation() assert classifier(-10000) == 0 assert classifier(0) == 0 assert classifier(0.5) == 0 assert classifier(1) == 1 assert classifier(10000) == 1
def test_predict_from_operation_invalid_operator(): with pytest.raises(ValueError, match="Unrecognized operator: ="): ThresholdOperation('=', 0.5)
def _get_grouped_data_and_base_points(sensitive_feature_value): data = pd.DataFrame({ SENSITIVE_FEATURE_KEY: sensitive_features_ex1.squeeze(), SCORE_KEY: scores_ex.squeeze(), LABEL_KEY: labels_ex.squeeze() }) grouped_data = data.groupby(SENSITIVE_FEATURE_KEY).get_group(sensitive_feature_value) \ .sort_values(by=SCORE_KEY, ascending=False) x_grid = np.linspace(0, 1, 100) if sensitive_feature_value == "A": expected_roc_points = pd.DataFrame({ "x": [0, 0.25, 0.5, 0.5, 1], "y": [0, 1 / 3, 2 / 3, 1, 1], "operation": [ ThresholdOperation('>', np.inf), ThresholdOperation('<', 0.5), ThresholdOperation('<', 1.5), ThresholdOperation('<', 2.5), ThresholdOperation('>', -np.inf) ] }) ignore_for_base_points = [1, 2] if sensitive_feature_value == "B": expected_roc_points = pd.DataFrame({ "x": [0, 1 / 3, 1], "y": [0, 3 / 4, 1], "operation": [ ThresholdOperation('>', np.inf), ThresholdOperation('<', 0.5), ThresholdOperation('>', -np.inf) ] }) ignore_for_base_points = [] if sensitive_feature_value == "C": expected_roc_points = pd.DataFrame({ "x": [0, 0, 2 / 3, 1], "y": [0, 1 / 3, 1, 1], "operation": [ ThresholdOperation('>', np.inf), ThresholdOperation('<', 0.5), ThresholdOperation('<', 1.5), ThresholdOperation('>', -np.inf) ] }) ignore_for_base_points = [0] return grouped_data, expected_roc_points, ignore_for_base_points, x_grid
def test_calculate_tradeoff_points(): data = pd.DataFrame( { SENSITIVE_FEATURE_KEY: sensitive_features_ex1.squeeze(), SCORE_KEY: scores_ex.squeeze(), LABEL_KEY: labels_ex.squeeze(), } ) grouped_data = ( data.groupby(SENSITIVE_FEATURE_KEY) .get_group("A") .sort_values(by=SCORE_KEY, ascending=False) ) roc_points = _calculate_tradeoff_points(grouped_data, "A", flip=True) expected_roc_points = pd.DataFrame( { "x": [0, 0.0, 0.25, 0.5, 0.5, 0.5, 0.5, 0.75, 1.0, 1], "y": [0, 0.0, 1 / 3, 0.0, 0.1 / 0.3, 2 / 3, 1, 0.2 / 0.3, 1.0, 1], "operation": [ ThresholdOperation(">", np.inf), ThresholdOperation("<", -np.inf), ThresholdOperation("<", 0.5), ThresholdOperation(">", 2.5), ThresholdOperation(">", 1.5), ThresholdOperation("<", 1.5), ThresholdOperation("<", 2.5), ThresholdOperation(">", 0.5), ThresholdOperation("<", np.inf), ThresholdOperation(">", -np.inf), ], } ) _assert_equal_points(expected_roc_points, roc_points) expected_roc_convex_hull = pd.DataFrame( { "x": [0, 0.5, 1], "y": [0, 1, 1], "operation": [ ThresholdOperation(">", np.inf), ThresholdOperation("<", 2.5), ThresholdOperation(">", -np.inf), ], } ) # Try filtering to get the convex hull of the ROC points. selected_points = pd.DataFrame(_filter_points_to_get_convex_hull(roc_points))[ ["x", "y", "operation"] ] _assert_equal_points(expected_roc_convex_hull, selected_points)