Example #1
0
def test_inspector_adult_easy_str_pipeline():
    """
    Tests whether the str version of the inspector works
    """
    with open(ADULT_SIMPLE_PY) as file:
        code = file.read()

        inspector_result = PipelineInspector\
            .on_pipeline_from_string(code)\
            .add_required_inspection(MaterializeFirstOutputRows(5)) \
            .add_check(NoBiasIntroducedFor(['race'])) \
            .add_check(NoIllegalFeatures()) \
            .execute()
        extracted_dag = inspector_result.dag
        expected_dag = get_expected_dag_adult_easy("<string-source>")
        compare(networkx.to_dict_of_dicts(extracted_dag),
                networkx.to_dict_of_dicts(expected_dag))

        assert HistogramForColumns(['race']) in list(
            inspector_result.dag_node_to_inspection_results.values())[0]
        check_to_check_results = inspector_result.check_to_check_results
        assert check_to_check_results[NoBiasIntroducedFor(
            ['race'])].status == CheckStatus.SUCCESS
        assert check_to_check_results[
            NoIllegalFeatures()].status == CheckStatus.FAILURE
Example #2
0
def get_expected_check_result_merge():
    """ Expected result for the code snippet in test_no_bias_introduced_for_merge"""
    failing_dag_node = DagNode(
        2, BasicCodeLocation('<string-source>', 5),
        OperatorContext(OperatorType.JOIN,
                        FunctionInfo('pandas.core.frame', 'merge')),
        DagNodeDetails("on 'B'", ['A', 'B', 'C']),
        OptionalCodeInfo(CodeReference(5, 12, 5, 36),
                         "df_a.merge(df_b, on='B')"))

    change_df = DataFrame({
        'sensitive_column_value': ['cat_a', 'cat_b', 'cat_c'],
        'count_before': [2, 2, 1],
        'count_after': [2, 1, 1],
        'ratio_before': [0.4, 0.4, 0.2],
        'ratio_after': [0.5, 0.25, 0.25],
        'relative_ratio_change': [(0.5 - 0.4) / 0.4, (.25 - 0.4) / 0.4,
                                  (0.25 - 0.2) / 0.2]
    })
    expected_distribution_change = BiasDistributionChange(
        failing_dag_node, False, (.25 - 0.4) / 0.4, change_df)
    expected_dag_node_to_change = {
        failing_dag_node: {
            'A': expected_distribution_change
        }
    }
    failure_message = 'A Join causes a min_relative_ratio_change of \'A\' by -0.37500000000000006, a value below the ' \
                      'configured minimum threshold -0.3!'
    expected_result = NoBiasIntroducedForResult(NoBiasIntroducedFor(['A']),
                                                CheckStatus.FAILURE,
                                                failure_message,
                                                expected_dag_node_to_change)
    return expected_result
Example #3
0
def get_expected_check_result_simple_imputer():
    """ Expected result for the code snippet in test_no_bias_introduced_for_simple_imputer"""
    imputer_dag_node = DagNode(
        1, BasicCodeLocation('<string-source>', 6),
        OperatorContext(OperatorType.TRANSFORMER,
                        FunctionInfo('sklearn.impute._base', 'SimpleImputer')),
        DagNodeDetails('Simple Imputer', ['A']),
        OptionalCodeInfo(
            CodeReference(6, 10, 6, 72),
            "SimpleImputer(missing_values=np.nan, strategy='most_frequent')"))

    change_df = DataFrame({
        'sensitive_column_value': ['cat_a', 'cat_c', math.nan],
        'count_before': [2, 1, 1],
        'count_after': [3, 1, 0],
        'ratio_before': [0.5, 0.25, 0.25],
        'ratio_after': [0.75, 0.25, 0.],
        'relative_ratio_change': [0.5, 0., -1.]
    })
    expected_distribution_change = BiasDistributionChange(
        imputer_dag_node, True, 0., change_df)
    expected_dag_node_to_change = {
        imputer_dag_node: {
            'A': expected_distribution_change
        }
    }
    expected_result = NoBiasIntroducedForResult(NoBiasIntroducedFor(['A']),
                                                CheckStatus.SUCCESS, None,
                                                expected_dag_node_to_change)
    return expected_result
Example #4
0
def test_no_bias_introduced_for_merge():
    """
    Tests whether RowLineage works for joins
    """
    test_code = cleandoc("""
            import pandas as pd

            df_a = pd.DataFrame({'A': ['cat_a', 'cat_b', 'cat_a', 'cat_c', 'cat_b'], 'B': [1, 2, 4, 5, 7]})
            df_b = pd.DataFrame({'B': [1, 2, 3, 4, 5], 'C': [1, 5, 4, 11, None]})
            df_merged = df_a.merge(df_b, on='B')
            """)

    inspector_result = PipelineInspector \
        .on_pipeline_from_string(test_code) \
        .add_check(NoBiasIntroducedFor(['A'])) \
        .execute()

    check_result = inspector_result.check_to_check_results[NoBiasIntroducedFor(['A'])]
    expected_result = get_expected_check_result_merge()
    compare(check_result, expected_result)
Example #5
0
def test_no_bias_introduced_simple_imputer():
    """
    Tests whether RowLineage works for joins
    """
    test_code = cleandoc("""
            import pandas as pd
            from sklearn.impute import SimpleImputer
            import numpy as np

            df = pd.DataFrame({'A': ['cat_a', np.nan, 'cat_a', 'cat_c']})
            imputer = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
            imputed_data = imputer.fit_transform(df)
            """)

    inspector_result = PipelineInspector \
        .on_pipeline_from_string(test_code) \
        .add_check(NoBiasIntroducedFor(['A'])) \
        .execute()

    check_result = inspector_result.check_to_check_results[NoBiasIntroducedFor(['A'])]
    expected_result = get_expected_check_result_simple_imputer()
    compare(check_result, expected_result)
Example #6
0
def test_inspector_adult_easy_ipynb_pipeline():
    """
    Tests whether the .ipynb version of the inspector works
    """
    inspector_result = PipelineInspector\
        .on_pipeline_from_ipynb_file(ADULT_SIMPLE_IPYNB)\
        .add_required_inspection(MaterializeFirstOutputRows(5)) \
        .add_check(NoBiasIntroducedFor(['race'])) \
        .add_check(NoIllegalFeatures()) \
        .execute()
    extracted_dag = inspector_result.dag
    expected_dag = get_expected_dag_adult_easy_ipynb()
    compare(networkx.to_dict_of_dicts(extracted_dag),
            networkx.to_dict_of_dicts(expected_dag))

    assert HistogramForColumns(
        ['race']) in inspector_result.inspection_to_annotations
    check_to_check_results = inspector_result.check_to_check_results
    assert check_to_check_results[NoBiasIntroducedFor(
        ['race'])].status == CheckStatus.SUCCESS
    assert check_to_check_results[
        NoIllegalFeatures()].status == CheckStatus.FAILURE