def test_predicted_label_values():
    """
    Tests whether exception is raised when predicted label values are differnt from positive label values
    """
    df = pd.DataFrame(
        [["a", "p", 1, "p"], ["b", "q", 1, "p"], ["b", "r", 1, "q"],
         ["c", "p", 0, "p"], ["c", "q", 0, "p"]],
        columns=["x", "y", "z", "yhat"],
    )
    # when  explicit label values are given for categorical data
    # Pre training bias metrics
    with pytest.raises(
            ValueError,
            match="Positive predicted label values or threshold should"
            " be empty or same as label values or thresholds",
    ):
        pretraining_report = bias_report(
            df,
            FacetColumn("x"),
            LabelColumn("y", df["y"], ["p", "q"]),
            StageType.PRE_TRAINING,
            LabelColumn("yhat", df["yhat"], ["q"]),
            metrics=["DPL", "CDDL"],
            group_variable=df["z"],
        )
Exemple #2
0
def test_bias_metrics():
    dataframe = fetch_input_data()
    label_data = dataframe.pop("Class1Good2Bad")
    label_column = LabelColumn("Class1Good2Bad", label_data, [1])
    facet_column = FacetColumn("ForeignWorker", [1])
    group_variable = dataframe["A151"]

    # pre_training_bias metrics
    pre_training_metrics = get_pretraining_bias_metrics(
        dataframe, facet_column, label_column, group_variable)

    # post training bias metrics
    predicted_labels = get_predicted_labels()
    pred_label_column = LabelColumn("_predicted_labels", predicted_labels, [1])

    post_training_metrics = get_posttraining_bias_metrics(
        dataframe, facet_column, label_column, pred_label_column,
        group_variable)

    expected_results = get_expected_results()
    pre_training_expected_result = expected_results.get(
        "pre_training_bias_metrics")
    post_training_expected_result = expected_results.get(
        "post_training_bias_metrics")

    if not (pre_training_metrics == approximate(pre_training_expected_result)):
        raise AssertionError(
            "Pre_training Bias Metrics values differ from expected Metrics")
    if not (post_training_metrics
            == approximate(post_training_expected_result)):
        raise AssertionError(
            "Post_training Bias Metrics values differ from expected Metrics")
    print("Test SMClarify Bias Metrics succeeded!")
Exemple #3
0
def test_invalid_input():
    df_cat = pd.DataFrame(
        [["a", 0, 0, "n"], ["b", 0, 1, "y"], ["c", 1, 0, "n"]],
        columns=["x", "y", "label", "predicted_label"],
    )
    for staging_type in StageType:
        # facet not in dataset
        with pytest.raises(ValueError):
            bias_report(
                df_cat,
                FacetColumn("z"),
                LabelColumn("Label", df_cat["label"]),
                staging_type,
            )
        # no positive label value
        with pytest.raises(ValueError):
            bias_report(
                df_cat,
                FacetColumn("x"),
                LabelColumn("Label", df_cat["label"]),
                staging_type,
            )
    # incorrect stage type
    with pytest.raises(ValueError):
        # noinspection PyTypeChecker
        bias_report(
            df_cat,
            FacetColumn("x"),
            LabelColumn("Label", df_cat["label"], [1]),
            "pre_training",
        )
    # post-training but no predicted label column
    with pytest.raises(ValueError):
        bias_report(
            df_cat,
            FacetColumn("x"),
            LabelColumn("Label", df_cat["label"], [1]),
            StageType.POST_TRAINING,
        )
    # positive label value of label and predicted label not the same
    with pytest.raises(ValueError):
        bias_report(
            df_cat,
            FacetColumn("x"),
            LabelColumn("Label", df_cat["label"], [1]),
            StageType.POST_TRAINING,
            LabelColumn("Prediction", df_cat["predicted_label"], [1]),
        )
    # label and positive label have different data types.
    with pytest.raises(ValueError):
        bias_report(
            df_cat,
            FacetColumn("x"),
            LabelColumn("Label", df_cat["label"], [1]),
            StageType.POST_TRAINING,
            LabelColumn("Prediction", df_cat["predicted_label"], [1]),
        )
Exemple #4
0
def test_bias_metrics():
    dataframe = fetch_input_data()
    label_data = dataframe.pop("Class1Good2Bad")
    label_column = LabelColumn("Class1Good2Bad", label_data, [1])
    facet_column = FacetColumn("ForeignWorker", [1])
    group_variable = dataframe["A151"]

    # pre_training_bias metrics
    pre_training_metrics = get_pretraining_bias_metrics(
        dataframe, facet_column, label_column, group_variable)

    # post training bias metrics
    predicted_labels = get_predicted_labels()
    pred_label_column = LabelColumn("_predicted_labels", predicted_labels, [1])

    post_training_metrics = get_posttraining_bias_metrics(
        dataframe, facet_column, label_column, pred_label_column,
        group_variable)

    pre_training_expected_result = [{
        "value_or_threshold":
        "1",
        "metrics": [
            {
                "name": "CDDL",
                "description":
                "Conditional Demographic Disparity in Labels (CDDL)",
                "value": 0.029771892530848814,
            },
            {
                "name": "CI",
                "description": "Class Imbalance (CI)",
                "value": -0.9288888888888889
            },
            {
                "name": "DPL",
                "description":
                "Difference in Positive Proportions in Labels (DPL)",
                "value": 0.17453917050691248,
            },
            {
                "name": "JS",
                "description": "Jensen-Shannon Divergence (JS)",
                "value": 0.04021236938805562
            },
            {
                "name": "KL",
                "description": "Kullback-Liebler Divergence (KL)",
                "value": 0.08543332780657628
            },
            {
                "name": "KS",
                "description": "Kolmogorov-Smirnov Distance (KS)",
                "value": 0.17453917050691248
            },
            {
                "name": "LP",
                "description": "L-p Norm (LP)",
                "value": 0.2468356620962257
            },
            {
                "name": "TVD",
                "description": "Total Variation Distance (TVD)",
                "value": 0.17453917050691245
            },
        ],
    }]

    post_training_expected_result = [{
        "value_or_threshold":
        "1",
        "metrics": [
            {
                "name": "AD",
                "description": "Accuracy Difference (AD)",
                "value": 0.03312211981566815
            },
            {
                "name": "CDDPL",
                "description":
                "Conditional Demographic Disparity in Predicted Labels (CDDPL)",
                "value": 0.032647137172999274,
            },
            {
                "name": "DAR",
                "description": "Difference in Acceptance Rates (DAR)",
                "value": 0.017096617181796114
            },
            {
                "name": "DCA",
                "description": "Difference in Conditional Acceptance (DCA)",
                "value": -0.035775127768313375,
            },
            {
                "name": "DCR",
                "description": "Difference in Conditional Rejection (DCR)",
                "value": -0.07473309608540923,
            },
            {
                "name": "DI",
                "description": "Disparate Impact (DI)",
                "value": 0.7728768926925609
            },
            {
                "name": "DPPL",
                "description":
                "Difference in Positive Proportions in Predicted Labels (DPPL)",
                "value": 0.19873271889400923,
            },
            {
                "name": "DRR",
                "description": "Difference in Rejection Rates (DRR)",
                "value": 0.06494661921708189
            },
            {
                "name": "FT",
                "description": "Flip Test (FT)",
                "value": -0.32373271889400923
            },
            {
                "name": "RD",
                "description": "Recall Difference (RD)",
                "value": 0.049812030075187974
            },
            {
                "name": "TE",
                "description": "Treatment Equality (TE)",
                "value": 0.6774193548387097
            },
        ],
    }]

    assert pre_training_metrics == pre_training_expected_result
    assert post_training_metrics == post_training_expected_result
Exemple #5
0
def test_report_category_data():
    # test the bias_report function on the category data
    #
    # pre training bias metrics
    df_cat = pd.DataFrame(
        [["a", 1, 1, 1, "1"], ["b", 1, 1, 0, "0"], ["b", 0, 1, 0, "0"], ["b", 0, 0, 1, "1"]],
        columns=["x", "y", "z", "yhat", "yhat_cat"],
    )
    pretraining_report = bias_report(
        df_cat,
        FacetColumn("x"),
        LabelColumn("y", df_cat["y"], [0]),
        StageType.PRE_TRAINING,
        LabelColumn("yhat", df_cat["yhat"]),
        group_variable=df_cat["z"],
    )

    pretraining_report_cat = bias_report(
        df_cat,
        FacetColumn("x"),
        LabelColumn("y", df_cat["y"], [0]),
        StageType.PRE_TRAINING,
        LabelColumn("yhat", df_cat["yhat_cat"]),
        group_variable=df_cat["z"],
    )

    assert isinstance(pretraining_report, list)
    assert len(pretraining_report) > 0
    assert pretraining_report == pretraining_report_cat

    result = [
        {
            "metrics": [
                {
                    "description": "Conditional Demographic Disparity in Labels " "(CDDL)",
                    "name": "CDDL",
                    "value": pytest.approx(-0.375),
                },
                {"description": "Class Imbalance (CI)", "name": "CI", "value": pytest.approx(0.5)},
                {
                    "description": "Difference in Positive Proportions in Labels (DPL)",
                    "name": "DPL",
                    "value": pytest.approx(-0.6666666666666667),
                },
                {
                    "description": "Jensen-Shannon Divergence (JS)",
                    "name": "JS",
                    "value": pytest.approx(0.08720802396075798),
                },
                {
                    "description": "Kullback-Liebler Divergence (KL)",
                    "name": "KL",
                    "value": pytest.approx(-0.3662040962227032),
                },
                {
                    "description": "Kolmogorov-Smirnov Distance (KS)",
                    "name": "KS",
                    "value": pytest.approx(0.6666666666666667),
                },
                {"description": "L-p Norm (LP)", "name": "LP", "value": pytest.approx(0.6666666666666667)},
                {
                    "description": "Total Variation Distance (TVD)",
                    "name": "TVD",
                    "value": pytest.approx(0.33333333333333337),
                },
            ],
            "value_or_threshold": "a",
        },
        {
            "metrics": [
                {
                    "description": "Conditional Demographic Disparity in Labels " "(CDDL)",
                    "name": "CDDL",
                    "value": pytest.approx(0.625),
                },
                {"description": "Class Imbalance (CI)", "name": "CI", "value": pytest.approx(-0.5)},
                {
                    "description": "Difference in Positive Proportions in Labels (DPL)",
                    "name": "DPL",
                    "value": pytest.approx(0.6666666666666667),
                },
                {
                    "description": "Jensen-Shannon Divergence (JS)",
                    "name": "JS",
                    "value": pytest.approx(0.08720802396075798),
                },
                {
                    "description": "Kullback-Liebler Divergence (KL)",
                    "name": "KL",
                    "value": pytest.approx(1.0986122886681098),
                },
                {
                    "description": "Kolmogorov-Smirnov Distance (KS)",
                    "name": "KS",
                    "value": pytest.approx(0.6666666666666667),
                },
                {"description": "L-p Norm (LP)", "name": "LP", "value": pytest.approx(0.6666666666666667)},
                {
                    "description": "Total Variation Distance (TVD)",
                    "name": "TVD",
                    "value": pytest.approx(0.33333333333333337),
                },
            ],
            "value_or_threshold": "b",
        },
    ]
    assert pretraining_report == result

    # post training bias metrics
    posttraining_report = bias_report(
        df_cat,
        FacetColumn("x"),
        LabelColumn("y", df_cat["y"], [0]),
        StageType.POST_TRAINING,
        LabelColumn("yhat", df_cat["yhat"]),
        metrics=["AD", "DI", "DPPL", "RD"],
        group_variable=df_cat["z"],
    )

    posttraining_report_cat = bias_report(
        df_cat,
        FacetColumn("x"),
        LabelColumn("y", df_cat["y"], [0]),
        StageType.POST_TRAINING,
        LabelColumn("yhat", df_cat["yhat_cat"]),
        metrics=["AD", "DI", "DPPL", "RD"],
        group_variable=df_cat["z"],
    )

    assert isinstance(posttraining_report, list)
    assert len(posttraining_report) > 0
    assert posttraining_report == posttraining_report_cat

    expected_result_1 = [
        {
            "metrics": [
                {"description": "Accuracy Difference (AD)", "name": "AD", "value": pytest.approx(-0.6666666666666667)},
                {"description": "Disparate Impact (DI)", "name": "DI", "value": pytest.approx(3.0)},
                {
                    "description": "Difference in Positive Proportions in Predicted " "Labels (DPPL)",
                    "name": "DPPL",
                    "value": pytest.approx(-0.6666666666666667),
                },
                {"description": "Recall Difference (RD)", "name": "RD", "value": pytest.approx(-1.0)},
            ],
            "value_or_threshold": "a",
        },
        {
            "metrics": [
                {"description": "Accuracy Difference (AD)", "name": "AD", "value": pytest.approx(0.6666666666666667)},
                {"description": "Disparate Impact (DI)", "name": "DI", "value": pytest.approx(0.3333333333333333)},
                {
                    "description": "Difference in Positive Proportions in Predicted " "Labels (DPPL)",
                    "name": "DPPL",
                    "value": pytest.approx(0.6666666666666667),
                },
                {"description": "Recall Difference (RD)", "name": "RD", "value": pytest.approx(1.0)},
            ],
            "value_or_threshold": "b",
        },
    ]
    assert posttraining_report == expected_result_1
Exemple #6
0
def test_partial_bias_report():
    """
    Test that bias report is generated in for partial metrics when errors occur to compute some metrics
    """
    df = pd.DataFrame(
        [[1, 1, 1, 1], [2, 1, 1, 0], [3, 0, 0, 0], [2, 0, 1, 1], [0, 0, 1, 1]], columns=["x", "y", "z", "yhat"]
    )
    # pre training bias metrics
    pretraining_report = bias_report(
        df,
        FacetColumn("x", [2]),
        LabelColumn("y", df["y"], [0]),
        StageType.PRE_TRAINING,
        LabelColumn("yhat", df["yhat"]),
        metrics=["CI", "CDDL", "DPL", "KL"],
    )
    assert isinstance(pretraining_report, list)
    expected_result_1 = [
        {
            "metrics": [
                {
                    "description": "Conditional Demographic Disparity in Labels (CDDL)",
                    "error": "Group variable is empty or not provided",
                    "name": "CDDL",
                    "value": None,
                },
                {"description": "Class Imbalance (CI)", "name": "CI", "value": pytest.approx(0.6)},
                {
                    "description": "Difference in Positive Proportions in Labels " "(DPL)",
                    "name": "DPL",
                    "value": pytest.approx(0.5),
                },
                {
                    "description": "Kullback-Liebler Divergence (KL)",
                    "name": "KL",
                    "value": pytest.approx(-0.34657359027997264),
                },
            ],
            "value_or_threshold": "(2, 3]",
        }
    ]
    assert pretraining_report == expected_result_1

    # post training bias metrics
    posttraining_report = bias_report(
        df,
        FacetColumn("x", [2]),
        LabelColumn("y", df["y"], [0]),
        StageType.POST_TRAINING,
        LabelColumn("yhat", df["yhat"]),
        metrics=["AD", "CDDPL", "DCA", "DI", "DPPL", "FT"],
    )
    assert isinstance(posttraining_report, list)
    expected_result_2 = [
        {
            "metrics": [
                {"description": "Accuracy Difference (AD)", "name": "AD", "value": pytest.approx(-0.75)},
                {
                    "description": "Conditional Demographic Disparity in Predicted " "Labels (CDDPL)",
                    "error": "Group variable is empty or not provided",
                    "name": "CDDPL",
                    "value": None,
                },
                {
                    "description": "Difference in Conditional Acceptance (DCA)",
                    "name": "DCA",
                    "value": pytest.approx(0.6666666666666666),
                },
                {"description": "Disparate Impact (DI)", "name": "DI", "value": pytest.approx(0.0)},
                {
                    "description": "Difference in Positive Proportions in Predicted " "Labels (DPPL)",
                    "name": "DPPL",
                    "value": pytest.approx(0.75),
                },
                {"description": "Flip Test (FT)", "name": "FT", "value": pytest.approx(-1.0)},
            ],
            "value_or_threshold": "(2, 3]",
        }
    ]
    assert posttraining_report == expected_result_2
Exemple #7
0
def test_label_values():
    """
    Test bias metrics for multiple label values
    """
    df = pd.DataFrame(
        [["a", "p", 1, "p"], ["b", "q", 1, "p"], ["b", "r", 1, "q"], ["c", "p", 0, "p"], ["c", "q", 0, "p"]],
        columns=["x", "y", "z", "yhat"],
    )
    # when  explicit label values are given for categorical data
    # Pre training bias metrics
    pretraining_report = bias_report(
        df,
        FacetColumn("x"),
        LabelColumn("y", df["y"], ["p", "q"]),
        StageType.PRE_TRAINING,
        LabelColumn("yhat", df["yhat"]),
        metrics=["DPL", "CDDL"],
        group_variable=df["z"],
    )

    assert isinstance(pretraining_report[0], dict)
    expected_result_1 = [
        {
            "metrics": [
                {
                    "description": "Conditional Demographic Disparity in Labels " "(CDDL)",
                    "name": "CDDL",
                    "value": pytest.approx(-0.3),
                },
                {
                    "description": "Difference in Positive Proportions in Labels " "(DPL)",
                    "name": "DPL",
                    "value": pytest.approx(-0.25),
                },
            ],
            "value_or_threshold": "a",
        },
        {
            "metrics": [
                {
                    "description": "Conditional Demographic Disparity in Labels " "(CDDL)",
                    "name": "CDDL",
                    "value": pytest.approx(0.3),
                },
                {
                    "description": "Difference in Positive Proportions in Labels " "(DPL)",
                    "name": "DPL",
                    "value": pytest.approx(0.5),
                },
            ],
            "value_or_threshold": "b",
        },
        {
            "metrics": [
                {
                    "description": "Conditional Demographic Disparity in Labels " "(CDDL)",
                    "name": "CDDL",
                    "value": pytest.approx(-0.4),
                },
                {
                    "description": "Difference in Positive Proportions in Labels (DPL)",
                    "name": "DPL",
                    "value": pytest.approx(-0.33333333333333337),
                },
            ],
            "value_or_threshold": "c",
        },
    ]
    assert pretraining_report == expected_result_1

    # post training bias metrics
    posttraining_report = bias_report(
        df,
        FacetColumn("x"),
        LabelColumn("y", df["y"], ["p", "q"]),
        StageType.POST_TRAINING,
        LabelColumn("yhat", df["yhat"]),
        metrics=["AD", "DI", "DPPL", "RD", "DAR", "DRR"],
        group_variable=df["z"],
    )

    assert isinstance(posttraining_report[0], dict)
    expected_result_2 = [
        {
            "metrics": [
                {"description": "Accuracy Difference (AD)", "name": "AD", "value": pytest.approx(-0.25)},
                {"description": "Difference in Acceptance Rates (DAR)", "name": "DAR", "value": pytest.approx(-0.25)},
                {"description": "Disparate Impact (DI)", "name": "DI", "value": pytest.approx(1.0)},
                {
                    "description": "Difference in Positive Proportions in Predicted " "Labels (DPPL)",
                    "name": "DPPL",
                    "value": pytest.approx(0.0),
                },
                {"description": "Difference in Rejection Rates (DRR)", "name": "DRR", "value": pytest.approx(0)},
                {"description": "Recall Difference (RD)", "name": "RD", "value": pytest.approx(0.0)},
            ],
            "value_or_threshold": "a",
        },
        {
            "metrics": [
                {"description": "Accuracy Difference (AD)", "name": "AD", "value": pytest.approx(0.5)},
                {"description": "Difference in Acceptance Rates (DAR)", "name": "DAR", "value": pytest.approx(0.5)},
                {"description": "Disparate Impact (DI)", "name": "DI", "value": pytest.approx(1.0)},
                {
                    "description": "Difference in Positive Proportions in Predicted " "Labels (DPPL)",
                    "name": "DPPL",
                    "value": pytest.approx(0.0),
                },
                {"description": "Difference in Rejection Rates (DRR)", "name": "DRR", "value": pytest.approx(0)},
                {"description": "Recall Difference (RD)", "name": "RD", "value": pytest.approx(0.0)},
            ],
            "value_or_threshold": "b",
        },
        {
            "metrics": [
                {"description": "Accuracy Difference (AD)", "name": "AD", "value": pytest.approx(-0.33333333333333337)},
                {
                    "description": "Difference in Acceptance Rates (DAR)",
                    "name": "DAR",
                    "value": pytest.approx(-0.33333333333333337),
                },
                {"description": "Disparate Impact (DI)", "name": "DI", "value": pytest.approx(1.0)},
                {
                    "description": "Difference in Positive Proportions in Predicted " "Labels (DPPL)",
                    "name": "DPPL",
                    "value": pytest.approx(0.0),
                },
                {"description": "Difference in Rejection Rates (DRR)", "name": "DRR", "value": pytest.approx(0)},
                {"description": "Recall Difference (RD)", "name": "RD", "value": pytest.approx(0.0)},
            ],
            "value_or_threshold": "c",
        },
    ]
    assert posttraining_report == expected_result_2
Exemple #8
0
def test_report_continuous_data_regression():
    #   test that we correctly apply thresholds for regression tasks.
    #
    df_cont_old = pd.DataFrame(
        [
            [0, 0, 0, 0, True, 1, 1, 1],
            [3, 0, 0, 0, True, 0, 1, 1],
            [3, 0, 1, 0, True, 0, 1, 1],
            [0, 0, 0, 0, False, 1, 1, 0],
            [4, 0, 0, 1, True, 0, 1, 1],
            [0, 0, 1, 0, True, 1, 1, 1],
            [3, 0, 0, 0, True, 1, 1, 1],
            [3, 1, 0, 0, True, 1, 1, 1],
            [0, 0, 1, 0, True, 1, 1, 1],
            [3, 0, 1, 1, True, 1, 0, 1],
            [4, 0, 0, 0, True, 1, 0, 1],
            [3, 0, 1, 0, True, 1, 1, 1],
            [3, 0, 0, 0, False, 1, 1, 0],
            [0, 0, 0, 0, True, 1, 1, 1],
            [0, 0, 1, 0, True, 0, 1, 1],
            [0, 0, 1, 0, True, 1, 1, 1],
            [0, 1, 0, 1, False, 0, 1, 0],
            [3, 0, 0, 0, False, 1, 1, 0],
            [0, 0, 1, 0, False, 1, 1, 1],
            [3, 0, 0, 0, True, 1, 0, 1],
            [3, 0, 1, 0, False, 1, 1, 0],
            [0, 1, 0, 0, False, 1, 1, 0],
            [3, 0, 1, 0, True, 0, 1, 1],
            [0, 0, 0, 1, True, 1, 0, 1],
        ],
        columns=["x", "y", "z", "a", "b", "c", "d", "yhat"],
    )

    df_cont = pd.DataFrame(
        [
            [0, 0.0, 0, 0, True, 1, 1, 11],  # 11 is the highest among y and yhat
            [3, 0.5, 0, 0, True, 0, 1, 6],
            [3, 2, 1, 0, True, 0, 1, 6.6],
            [0, 3, 0, 0, False, 1, 1, 0.3],
            [4, 2.2, 0, 1, True, 0, 1, 6],
            [0, 0.1, 1, 0, True, 1, 1, 6],
            [3, 0, 0, 0, True, 1, 1, 6],
            [3, 6, 0, 0, True, 1, 1, 6],
            [0, 0, 1, 0, True, 1, 1, 6],
            [3, 0, 1, 1, True, 1, 0, 6],
            [4, 0, 0, 0, True, 1, 0, 6],
            [3, 0, 1, 0, True, 1, 1, 6],
            [3, 0, 0, 0, False, 1, 1, 0],
            [0, 0, 0, 0, True, 1, 1, 6.2],
            [0, 0, 1, 0, True, 0, 1, 6.6],
            [0, 0, 1, 0, True, 1, 1, 6.6],
            [0, 7, 0, 1, False, 0, 1, 0.1],
            [3, 0, 0, 0, False, 1, 1, 2],
            [0, 0, 1, 0, False, 1, 1, 8],
            [3, 0, 0, 0, True, 1, 0, 9],
            [3, 0, 1, 0, False, 1, 1, 0.1],
            [0, 8, 0, 0, False, 1, 1, 2.2],
            [3, 0, 1, 0, True, 0, 1, 10],
            [0, 0, 0, 1, True, 1, 0, 9],
        ],
        columns=["x", "y", "z", "a", "b", "c", "d", "yhat"],
    )
    # Old and new df should yield the same results if we use threshold 5 for the latter.

    threshold_old = 0.5
    threshold_new = 5
    assert ((df_cont_old[["y"]] > threshold_old) == (df_cont[["y"]] > threshold_new)).all
    assert ((df_cont_old[["yhat"]] > threshold_old) == (df_cont[["yhat"]] > threshold_new)).all

    posttraining_report = bias_report(
        df_cont,
        FacetColumn("x", [2]),
        LabelColumn("y", df_cont["y"], positive_label_values=[threshold_new]),
        StageType.POST_TRAINING,
        LabelColumn("yhat", df_cont["yhat"], positive_label_values=[threshold_new]),
        group_variable=df_cont["z"],
    )
    posttraining_report_old = bias_report(
        df_cont_old,
        FacetColumn("x", [2]),
        LabelColumn("y", df_cont_old["y"], positive_label_values=[threshold_old]),
        StageType.POST_TRAINING,
        LabelColumn("yhat", df_cont_old["yhat"], positive_label_values=[threshold_old]),
        group_variable=df_cont["z"],
    )
    assert posttraining_report == posttraining_report_old
Exemple #9
0
def test_report_continuous_data():
    #   test the bias_report function on the category data
    #
    # pre training bias metrics
    df_cont = pd.DataFrame(
        [
            [0, 0, 0, 0, True, 1, 1, 1],
            [3, 0, 0, 0, True, 0, 1, 1],
            [3, 0, 1, 0, True, 0, 1, 1],
            [0, 0, 0, 0, False, 1, 1, 0],
            [4, 0, 0, 1, True, 0, 1, 1],
            [0, 0, 1, 0, True, 1, 1, 1],
            [3, 0, 0, 0, True, 1, 1, 1],
            [3, 1, 0, 0, True, 1, 1, 1],
            [0, 0, 1, 0, True, 1, 1, 1],
            [3, 0, 1, 1, True, 1, 0, 1],
            [4, 0, 0, 0, True, 1, 0, 1],
            [3, 0, 1, 0, True, 1, 1, 1],
            [3, 0, 0, 0, False, 1, 1, 0],
            [0, 0, 0, 0, True, 1, 1, 1],
            [0, 0, 1, 0, True, 0, 1, 1],
            [0, 0, 1, 0, True, 1, 1, 1],
            [0, 1, 0, 1, False, 0, 1, 0],
            [3, 0, 0, 0, False, 1, 1, 0],
            [0, 0, 1, 0, False, 1, 1, 1],
            [3, 0, 0, 0, True, 1, 0, 1],
            [3, 0, 1, 0, False, 1, 1, 0],
            [0, 1, 0, 0, False, 1, 1, 0],
            [3, 0, 1, 0, True, 0, 1, 1],
            [0, 0, 0, 1, True, 1, 0, 1],
        ],
        columns=["x", "y", "z", "a", "b", "c", "d", "yhat"],
    )

    pretraining_report = bias_report(
        df_cont,
        FacetColumn("x", [2]),
        LabelColumn("y", df_cont["y"], [0]),
        StageType.PRE_TRAINING,
        LabelColumn("yhat", df_cont["yhat"]),
        group_variable=df_cont["z"],
    )
    assert isinstance(pretraining_report, list)
    assert len(pretraining_report) > 0
    result = [
        {
            "metrics": [
                {
                    "description": "Conditional Demographic Disparity in Labels " "(CDDL)",
                    "name": "CDDL",
                    "value": pytest.approx(0.3851010101010101),
                },
                {"description": "Class Imbalance (CI)", "name": "CI", "value": pytest.approx(-0.08333333333333333)},
                {
                    "description": "Difference in Positive Proportions in Labels (DPL)",
                    "name": "DPL",
                    "value": pytest.approx(0.1048951048951049),
                },
                {
                    "description": "Jensen-Shannon Divergence (JS)",
                    "name": "JS",
                    "value": pytest.approx(0.01252420207928287),
                },
                {
                    "description": "Kullback-Liebler Divergence (KL)",
                    "name": "KL",
                    "value": pytest.approx(0.057704603668062765),
                },
                {
                    "description": "Kolmogorov-Smirnov Distance (KS)",
                    "name": "KS",
                    "value": pytest.approx(0.1048951048951049),
                },
                {"description": "L-p Norm (LP)", "name": "LP", "value": pytest.approx(0.14834407996920576)},
                {
                    "description": "Total Variation Distance (TVD)",
                    "name": "TVD",
                    "value": pytest.approx(0.1048951048951049),
                },
            ],
            "value_or_threshold": "(2, 4]",
        }
    ]

    assert pretraining_report == result

    posttraining_report = bias_report(
        df_cont,
        FacetColumn("x", [2]),
        LabelColumn("y", df_cont["y"], [0]),
        StageType.POST_TRAINING,
        LabelColumn("yhat", df_cont["yhat"]),
        group_variable=df_cont["z"],
    )
    assert isinstance(posttraining_report, list)
    assert len(posttraining_report) > 0
    expected_result_1 = [
        {
            "metrics": [
                {"description": "Accuracy Difference (AD)", "name": "AD", "value": pytest.approx(-0.2167832167832168)},
                {
                    "description": "Conditional Demographic Disparity in Predicted " "Labels (CDDPL)",
                    "name": "CDDPL",
                    "value": pytest.approx(0.07592592592592595),
                },
                {"description": "Difference in Acceptance Rates (DAR)", "name": "DAR", "value": pytest.approx(-0.1)},
                {
                    "description": "Difference in Conditional Acceptance (DCA)",
                    "name": "DCA",
                    "value": pytest.approx(0.15),
                },
                {
                    "description": "Difference in Conditional Rejection (DCR)",
                    "name": "DCR",
                    "value": pytest.approx(1.0),
                },
                {"description": "Disparate Impact (DI)", "name": "DI", "value": pytest.approx(1.0576923076923077)},
                {
                    "description": "Difference in Positive Proportions in Predicted " "Labels (DPPL)",
                    "name": "DPPL",
                    "value": pytest.approx(-0.04195804195804198),
                },
                {
                    "description": "Difference in Rejection Rates (DRR)",
                    "name": "DRR",
                    "value": pytest.approx(0.6666666666666667),
                },
                {"description": "Flip Test (FT)", "name": "FT", "value": pytest.approx(-0.23076923076923078)},
                {"description": "Recall Difference (RD)", "name": "RD", "value": pytest.approx(-1.0)},
                {"description": "Treatment Equality (TE)", "name": "TE", "value": pytest.approx(-0.25)},
            ],
            "value_or_threshold": "(2, 4]",
        }
    ]
    assert posttraining_report == expected_result_1
def test_bias_basic_stats():
    df_cat = pd.DataFrame(
        [["a", 1, 1, 1, "1"], ["b", 1, 1, 0, "0"], ["b", 0, 1, 0, "0"],
         ["b", 0, 0, 1, "1"]],
        columns=["x", "y", "z", "yhat", "yhat_cat"],
    )

    # Proportion
    results = bias_basic_stats(
        df_cat,
        FacetColumn("x"),
        LabelColumn("y", df_cat["y"], [0]),
        StageType.PRE_TRAINING,
        LabelColumn("yhat", df_cat["yhat"]),
    )
    expected_results = [
        {
            "value_or_threshold":
            "a",
            "metrics": [{
                "name": "proportion",
                "description": "Proportion of examples in sensitive facet.",
                "value": pytest.approx(0.25),
            }],
        },
        {
            "value_or_threshold":
            "b",
            "metrics": [{
                "name": "proportion",
                "description": "Proportion of examples in sensitive facet.",
                "value": pytest.approx(0.75),
            }],
        },
    ]
    assert expected_results == results

    # Confusion matrix
    results = bias_basic_stats(
        df_cat,
        FacetColumn("x"),
        LabelColumn("y", df_cat["y"], [0]),
        StageType.POST_TRAINING,
        LabelColumn("yhat", df_cat["yhat"]),
    )

    expected_results = [
        {
            "value_or_threshold":
            "a",
            "metrics": [
                {
                    "name": "proportion",
                    "description":
                    "Proportion of examples in sensitive facet.",
                    "value": pytest.approx(0.25),
                },
                {
                    "name":
                    "confusion_matrix",
                    "description":
                    "Fractions of TP, FP, FN, TN.",
                    "value": [
                        pytest.approx(1.0),
                        pytest.approx(0.0),
                        pytest.approx(0.0),
                        pytest.approx(0.0)
                    ],
                },
            ],
        },
        {
            "value_or_threshold":
            "b",
            "metrics": [
                {
                    "name": "proportion",
                    "description":
                    "Proportion of examples in sensitive facet.",
                    "value": pytest.approx(0.75),
                },
                {
                    "name":
                    "confusion_matrix",
                    "description":
                    "Fractions of TP, FP, FN, TN.",
                    "value": [
                        pytest.approx(0.0),
                        pytest.approx(1 / 3.0),
                        pytest.approx(1 / 3.0),
                        pytest.approx(1 / 3.0),
                    ],
                },
            ],
        },
    ]
    assert expected_results == results