def test_inspector_adult_easy_str_pipeline(): """ Tests whether the str version of the inspector works """ with open(ADULT_SIMPLE_PY) as file: code = file.read() inspector_result = PipelineInspector\ .on_pipeline_from_string(code)\ .add_required_inspection(MaterializeFirstOutputRows(5)) \ .add_check(NoBiasIntroducedFor(['race'])) \ .add_check(NoIllegalFeatures()) \ .execute() extracted_dag = inspector_result.dag expected_dag = get_expected_dag_adult_easy("<string-source>") compare(networkx.to_dict_of_dicts(extracted_dag), networkx.to_dict_of_dicts(expected_dag)) assert HistogramForColumns(['race']) in list( inspector_result.dag_node_to_inspection_results.values())[0] check_to_check_results = inspector_result.check_to_check_results assert check_to_check_results[NoBiasIntroducedFor( ['race'])].status == CheckStatus.SUCCESS assert check_to_check_results[ NoIllegalFeatures()].status == CheckStatus.FAILURE
def test_no_illegal_features(): """ Tests whether NoIllegalFeatures works for joins """ test_code = cleandoc(""" import pandas as pd from sklearn.preprocessing import label_binarize, StandardScaler, OneHotEncoder from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.tree import DecisionTreeClassifier data = pd.DataFrame({'age': [1, 2, 10, 5], 'B': ['cat_a', 'cat_b', 'cat_a', 'cat_c'], 'C': ['cat_a', 'cat_b', 'cat_a', 'cat_c'], 'target': ['no', 'no', 'yes', 'yes']}) column_transformer = ColumnTransformer(transformers=[ ('numeric', StandardScaler(), ['age']), ('categorical', OneHotEncoder(sparse=False), ['B', 'C']) ]) income_pipeline = Pipeline([ ('features', column_transformer), ('classifier', DecisionTreeClassifier())]) labels = label_binarize(data['target'], classes=['no', 'yes']) income_pipeline.fit(data, labels) """) inspector_result = PipelineInspector \ .on_pipeline_from_string(test_code) \ .add_check(NoIllegalFeatures(['C'])) \ .execute() check_result = inspector_result.check_to_check_results[NoIllegalFeatures( ['C'])] # pylint: disable=anomalous-backslash-in-string expected_result = NoIllegalFeaturesResult( NoIllegalFeatures(['C']), CheckStatus.FAILURE, StringComparison("Used illegal columns\: .*"), SequenceComparison('C', 'age', ordered=False)) compare(check_result, expected_result)
def test_inspector_adult_easy_ipynb_pipeline(): """ Tests whether the .ipynb version of the inspector works """ inspector_result = PipelineInspector\ .on_pipeline_from_ipynb_file(ADULT_SIMPLE_IPYNB)\ .add_required_inspection(MaterializeFirstOutputRows(5)) \ .add_check(NoBiasIntroducedFor(['race'])) \ .add_check(NoIllegalFeatures()) \ .execute() extracted_dag = inspector_result.dag expected_dag = get_expected_dag_adult_easy_ipynb() compare(networkx.to_dict_of_dicts(extracted_dag), networkx.to_dict_of_dicts(expected_dag)) assert HistogramForColumns( ['race']) in inspector_result.inspection_to_annotations check_to_check_results = inspector_result.check_to_check_results assert check_to_check_results[NoBiasIntroducedFor( ['race'])].status == CheckStatus.SUCCESS assert check_to_check_results[ NoIllegalFeatures()].status == CheckStatus.FAILURE