Example #1
0
def test_inspector_adult_easy_str_pipeline():
    """
    Tests whether the str version of the inspector works
    """
    with open(ADULT_SIMPLE_PY) as file:
        code = file.read()

        inspector_result = PipelineInspector\
            .on_pipeline_from_string(code)\
            .add_required_inspection(MaterializeFirstOutputRows(5)) \
            .add_check(NoBiasIntroducedFor(['race'])) \
            .add_check(NoIllegalFeatures()) \
            .execute()
        extracted_dag = inspector_result.dag
        expected_dag = get_expected_dag_adult_easy("<string-source>")
        compare(networkx.to_dict_of_dicts(extracted_dag),
                networkx.to_dict_of_dicts(expected_dag))

        assert HistogramForColumns(['race']) in list(
            inspector_result.dag_node_to_inspection_results.values())[0]
        check_to_check_results = inspector_result.check_to_check_results
        assert check_to_check_results[NoBiasIntroducedFor(
            ['race'])].status == CheckStatus.SUCCESS
        assert check_to_check_results[
            NoIllegalFeatures()].status == CheckStatus.FAILURE
def test_no_illegal_features():
    """
    Tests whether NoIllegalFeatures works for joins
    """
    test_code = cleandoc("""
            import pandas as pd
            from sklearn.preprocessing import label_binarize, StandardScaler, OneHotEncoder
            from sklearn.compose import ColumnTransformer
            from sklearn.pipeline import Pipeline
            from sklearn.tree import DecisionTreeClassifier

            data = pd.DataFrame({'age': [1, 2, 10, 5], 'B': ['cat_a', 'cat_b', 'cat_a', 'cat_c'], 
                'C': ['cat_a', 'cat_b', 'cat_a', 'cat_c'], 'target': ['no', 'no', 'yes', 'yes']})
                
            column_transformer = ColumnTransformer(transformers=[
                ('numeric', StandardScaler(), ['age']),
                ('categorical', OneHotEncoder(sparse=False), ['B', 'C'])
            ])
            
            income_pipeline = Pipeline([
                ('features', column_transformer),
                ('classifier', DecisionTreeClassifier())])
            
            labels = label_binarize(data['target'], classes=['no', 'yes'])
            income_pipeline.fit(data, labels)
            """)

    inspector_result = PipelineInspector \
        .on_pipeline_from_string(test_code) \
        .add_check(NoIllegalFeatures(['C'])) \
        .execute()

    check_result = inspector_result.check_to_check_results[NoIllegalFeatures(
        ['C'])]
    # pylint: disable=anomalous-backslash-in-string
    expected_result = NoIllegalFeaturesResult(
        NoIllegalFeatures(['C']), CheckStatus.FAILURE,
        StringComparison("Used illegal columns\: .*"),
        SequenceComparison('C', 'age', ordered=False))
    compare(check_result, expected_result)
Example #3
0
def test_inspector_adult_easy_ipynb_pipeline():
    """
    Tests whether the .ipynb version of the inspector works
    """
    inspector_result = PipelineInspector\
        .on_pipeline_from_ipynb_file(ADULT_SIMPLE_IPYNB)\
        .add_required_inspection(MaterializeFirstOutputRows(5)) \
        .add_check(NoBiasIntroducedFor(['race'])) \
        .add_check(NoIllegalFeatures()) \
        .execute()
    extracted_dag = inspector_result.dag
    expected_dag = get_expected_dag_adult_easy_ipynb()
    compare(networkx.to_dict_of_dicts(extracted_dag),
            networkx.to_dict_of_dicts(expected_dag))

    assert HistogramForColumns(
        ['race']) in inspector_result.inspection_to_annotations
    check_to_check_results = inspector_result.check_to_check_results
    assert check_to_check_results[NoBiasIntroducedFor(
        ['race'])].status == CheckStatus.SUCCESS
    assert check_to_check_results[
        NoIllegalFeatures()].status == CheckStatus.FAILURE