コード例 #1
0
def create_rai_insights_object_classification():
    X, y = shap.datasets.adult()
    y = [1 if r else 0 for r in y]

    X, y = sklearn.utils.resample(
        X, y, n_samples=1000, random_state=7, stratify=y)

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.01, random_state=7, stratify=y)

    knn = sklearn.neighbors.KNeighborsClassifier()
    knn.fit(X_train, y_train)

    X['Income'] = y
    X_test['Income'] = y_test

    ri = RAIInsights(knn, X, X_test, 'Income', 'classification',
                     categorical_features=['Workclass', 'Education-Num',
                                           'Marital Status',
                                           'Occupation', 'Relationship',
                                           'Race',
                                           'Sex', 'Country'])
    ri.explainer.add()
    ri.counterfactual.add(10, desired_class='opposite')
    ri.error_analysis.add()
    ri.causal.add(treatment_features=['Hours per week', 'Occupation'],
                  heterogeneity_features=None,
                  upper_bound_on_cat_expansion=42,
                  skip_cat_limit_checks=True)
    ri.compute()
    return ri
コード例 #2
0
def create_rai_insights_object_regression():
    housing = fetch_california_housing()
    X_train, X_test, y_train, y_test = train_test_split(housing.data,
                                                        housing.target,
                                                        test_size=0.005,
                                                        random_state=7)
    X_train = pd.DataFrame(X_train, columns=housing.feature_names)
    X_test = pd.DataFrame(X_test, columns=housing.feature_names)

    rfc = RandomForestRegressor(n_estimators=10, max_depth=4, random_state=777)
    model = rfc.fit(X_train, y_train)

    X_train['target'] = y_train
    X_test['target'] = y_test

    ri = RAIInsights(model, X_train, X_test, 'target', 'regression')
    ri.explainer.add()
    ri.counterfactual.add(10, desired_range=[5, 10])
    ri.error_analysis.add()
    ri.causal.add(treatment_features=['AveRooms'],
                  heterogeneity_features=None,
                  upper_bound_on_cat_expansion=42,
                  skip_cat_limit_checks=True)
    ri.compute()
    return ri
    def test_loading_rai_insights_without_model_file(self):
        X_train, X_test, y_train, y_test, feature_names, classes = \
            create_iris_data()
        model = create_lightgbm_classifier(X_train, y_train)
        X_train['target'] = y_train
        X_test['target'] = y_test

        rai_insights = RAIInsights(
            model=model,
            train=X_train,
            test=X_test,
            target_column='target',
            task_type='classification')

        with TemporaryDirectory() as tmpdir:
            assert rai_insights.model is not None
            save_path = Path(tmpdir) / "rai_insights"
            rai_insights.save(save_path)

            # Remove the model.pkl file to cause an exception to occur
            # while loading the model.
            model_pkl_path = Path(tmpdir) / "rai_insights" / "model.pkl"
            os.remove(model_pkl_path)
            with pytest.warns(
                UserWarning,
                match='ERROR-LOADING-USER-MODEL: '
                      'There was an error loading the user model. '
                      'Some of RAI dashboard features may not work.'):
                without_model_rai_insights = RAIInsights.load(save_path)
                assert without_model_rai_insights.model is None
    def test_rai_insights_save_load_add_save(self, manager_type):
        data_train, data_test, y_train, y_test, categorical_features, \
            continuous_features, target_name, classes = \
            create_adult_income_dataset()
        X_train = data_train.drop([target_name], axis=1)

        model = create_complex_classification_pipeline(X_train, y_train,
                                                       continuous_features,
                                                       categorical_features)

        # Cut down size for counterfactuals, in the interests of speed
        if manager_type == ManagerNames.COUNTERFACTUAL:
            data_test = data_test[0:1]

        rai_insights = RAIInsights(model,
                                   data_train,
                                   data_test,
                                   target_name,
                                   categorical_features=categorical_features,
                                   task_type=ModelTask.CLASSIFICATION)

        with TemporaryDirectory() as tmpdir:
            save_1 = Path(tmpdir) / "first_save"
            save_2 = Path(tmpdir) / "second_save"

            # Save it
            rai_insights.save(save_1)

            # Load
            rai_2 = RAIInsights.load(save_1)

            # Call a single manager
            if manager_type == ManagerNames.CAUSAL:
                rai_2.causal.add(treatment_features=['age', 'hours_per_week'])
            elif manager_type == ManagerNames.COUNTERFACTUAL:
                rai_2.counterfactual.add(total_CFs=10,
                                         desired_class='opposite',
                                         feature_importance=False)
            elif manager_type == ManagerNames.ERROR_ANALYSIS:
                rai_2.error_analysis.add()
            elif manager_type == ManagerNames.EXPLAINER:
                rai_2.explainer.add()
            else:
                raise ValueError("Bad manager_type: {0}".format(manager_type))

            rai_2.compute()

            # Validate, but this isn't the main check
            validate_rai_insights(rai_2,
                                  data_train,
                                  data_test,
                                  target_name,
                                  ModelTask.CLASSIFICATION,
                                  categorical_features=categorical_features)

            # Save again (this is where Issue #1046 manifested)
            rai_2.save(save_2)
コード例 #5
0
    def test_eval_data_having_new_categories(self):
        train_data = pd.DataFrame(data=[[1, 2, 0], [2, 3, 1], [3, 3, 0]],
                                  columns=['c1', 'c2', TARGET])
        test_data = pd.DataFrame(data=[[1, 1, 0]],
                                 columns=['c1', 'c2', TARGET])

        X_train = train_data.drop([TARGET], axis=1)
        y_train = train_data[TARGET]
        model = create_lightgbm_classifier(X_train, y_train)

        rai_insights = RAIInsights(model=model,
                                   train=train_data,
                                   test=test_data,
                                   target_column=TARGET,
                                   task_type='classification',
                                   categorical_features=['c2'])

        message = ("Counterfactual example generation requires "
                   "that every category of "
                   "categorical features present in the test data be "
                   "also present in the train data. "
                   "Categories missing from train data: "
                   "{'c2': \\[1\\]}")
        with pytest.raises(UserConfigValidationException, match=message):
            rai_insights.counterfactual.add(total_CFs=10,
                                            method='random',
                                            desired_class='opposite')
コード例 #6
0
    def test_weird_predict_proba_function(self):
        X_train, X_test, y_train, y_test, _, _ = create_iris_data()

        # A weird model that modifies the input dataset by
        # adding back the target column
        class WeirdModelPredictProbaWrapper():
            def __init__(self, model):
                self.model = model

            def predict(self, test_data_pandas):
                return self.model.predict(test_data_pandas)

            def predict_proba(self, test_data_pandas):
                if TARGET not in test_data_pandas.columns:
                    test_data_pandas[TARGET] = 0
                return self.model.predict_proba(
                    test_data_pandas.drop(columns=TARGET))

        model = create_lightgbm_classifier(X_train, y_train)
        model = WeirdModelPredictProbaWrapper(model)
        X_train = X_train.copy()
        X_test = X_test.copy()
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        message = ('Calling model predict_proba function modifies '
                   'input dataset features. Please check if '
                   'predict function is defined correctly.')
        with pytest.raises(UserConfigValidationException, match=message):
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification')
    def __init__(self,
                 analysis: RAIInsights,
                 cohort_list: Optional[List[Cohort]] = None):
        """Initialize the Explanation Dashboard Input.

        :param analysis:
            A RAIInsights object that represents an explanation.
        :type analysis: RAIInsights
        :param cohort_list:
            List of cohorts defined by the user for the dashboard.
        :type cohort_list: List[Cohort]
        """
        self._analysis = analysis
        model = analysis.model
        self._is_classifier = is_classifier(model)
        self.dashboard_input = analysis.get_data()

        self._validate_cohort_list(cohort_list)
        if cohort_list is not None:
            # Add cohort_list to dashboard_input
            self.dashboard_input.cohortData = cohort_list
        else:
            self.dashboard_input.cohortData = []

        self._feature_length = len(self.dashboard_input.dataset.feature_names)
        if hasattr(analysis, ManagerNames.ERROR_ANALYSIS):
            self._error_analyzer = analysis.error_analysis._analyzer
    def test_counterfactual_vary_features(self, vary_all_features,
                                          feature_importance):
        X_train, X_test, y_train, y_test, feature_names, _ = \
            create_iris_data()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train['target'] = y_train
        X_test['target'] = y_test

        rai_insights = RAIInsights(model=model,
                                   train=X_train,
                                   test=X_test.iloc[0:10],
                                   target_column='target',
                                   task_type='classification')

        if vary_all_features:
            features_to_vary = 'all'
        else:
            features_to_vary = [feature_names[0]]

        rai_insights.counterfactual.add(total_CFs=10,
                                        desired_class=2,
                                        features_to_vary=features_to_vary,
                                        feature_importance=feature_importance)
        rai_insights.counterfactual.compute()

        cf_obj = rai_insights.counterfactual.get()[0]
        assert cf_obj is not None
コード例 #9
0
    def test_classes_exceptions(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_cancer_data()
        model = create_lightgbm_classifier(X_train, y_train)

        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification',
                        classes=[0, 1, 2])
        assert 'The train labels and distinct values in ' + \
            'target (train data) do not match' in str(ucve.value)

        y_train[0] = 2
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification',
                        classes=[0, 1])
        assert 'The train labels and distinct values in target ' + \
            '(train data) do not match' in str(ucve.value)

        y_train[0] = 0
        y_test[0] = 2
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification',
                        classes=[0, 1])

        assert 'The train labels and distinct values in target ' + \
            '(test data) do not match' in str(ucve.value)
コード例 #10
0
    def test_causal_save_and_load(self, housing_data, tmpdir):
        train_df, test_df, target_feature = housing_data

        save_dir = tmpdir.mkdir('save-dir')

        insights = RAIInsights(None, train_df, test_df, target_feature,
                               ModelTask.REGRESSION)

        insights.causal.add(['AveRooms'])
        insights.compute()

        pre_results = insights.causal.get()
        pre_result = pre_results[0]

        insights.causal._save(save_dir)
        manager = insights.causal._load(save_dir, insights)
        post_results = manager.get()
        post_result = post_results[0]
        assert post_result.id == pre_result.id
        assert post_result.causal_analysis is not None
        assert post_result.global_effects is not None
        assert post_result.local_effects is not None
        assert post_result.policies is not None

        # Remove the causal analysis models to test the loading of
        # causal models in case there is error in loading of the causal
        # models.
        all_causal_dirs = DirectoryManager.list_sub_directories(save_dir)
        for causal_dir in all_causal_dirs:
            dm = DirectoryManager(parent_directory_path=save_dir,
                                  sub_directory_name=causal_dir)
            causal_analysis_pkl_file_path = \
                dm.get_data_directory() / "causal_analysis.pkl"
            os.remove(causal_analysis_pkl_file_path)

        model_load_err = ('ERROR-LOADING-EXPLAINER: '
                          'There was an error loading the explainer. '
                          'Some of RAI dashboard features may not work.')
        with pytest.warns(UserWarning, match=model_load_err):
            manager = insights.causal._load(save_dir, insights)
        post_results = manager.get()
        post_result = post_results[0]
        assert post_result.id == pre_result.id
        assert post_result.causal_analysis is None
        assert post_result.global_effects is not None
        assert post_result.local_effects is not None
        assert post_result.policies is not None
    def test_causal_with_object_types(self, get_adult_shap_dataset):
        data_train, data_test, treatment_features, \
            heterogeneity_features, cat_cols, \
            target_feature = get_adult_shap_dataset

        rai_i = RAIInsights(
            model=None,
            train=data_train,
            test=data_test,
            target_column=target_feature,
            task_type='classification',
            categorical_features=cat_cols,
        )

        rai_i.causal.add(treatment_features=treatment_features,
                         heterogeneity_features=heterogeneity_features)

        rai_i.compute()
    def test_rai_insights_empty_save_load_save(self):
        X_train, y_train, X_test, y_test, classes = \
            create_binary_classification_dataset()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train[LABELS] = y_train
        X_test[LABELS] = y_test

        rai_insights = RAIInsights(
            model, X_train, X_test,
            LABELS,
            categorical_features=None,
            task_type=ModelTask.CLASSIFICATION)

        with TemporaryDirectory() as tmpdir:
            save_1 = Path(tmpdir) / "first_save"
            save_2 = Path(tmpdir) / "second_save"

            # Save it
            rai_insights.save(save_1)
            assert len(os.listdir(save_1 / ManagerNames.CAUSAL)) == 0
            assert len(os.listdir(save_1 / ManagerNames.COUNTERFACTUAL)) == 0
            assert len(os.listdir(save_1 / ManagerNames.DATA_BALANCE)) == 0
            assert len(os.listdir(save_1 / ManagerNames.ERROR_ANALYSIS)) == 0
            assert len(os.listdir(save_1 / ManagerNames.EXPLAINER)) == 0

            # Load
            rai_2 = RAIInsights.load(save_1)

            # Validate, but this isn't the main check
            validate_rai_insights(
                rai_2, X_train, X_test,
                LABELS, ModelTask.CLASSIFICATION)

            # Save again (this is where Issue #1046 manifested)
            rai_2.save(save_2)
            assert len(os.listdir(save_2 / ManagerNames.CAUSAL)) == 0
            assert len(os.listdir(save_2 / ManagerNames.COUNTERFACTUAL)) == 0
            assert len(os.listdir(save_2 / ManagerNames.DATA_BALANCE)) == 0
            assert len(os.listdir(save_2 / ManagerNames.ERROR_ANALYSIS)) == 0
            assert len(os.listdir(save_2 / ManagerNames.EXPLAINER)) == 0
コード例 #13
0
def test_causal_classification_scikitlearn_issue():
    # This test gets stuck on SciKit-Learn v1.1.0
    # See PR #1429
    data_train, data_test, _, _, categorical_features, \
        _, target_name, classes, _, _ = \
        create_adult_income_dataset()

    rai_i = RAIInsights(model=None,
                        train=data_train,
                        test=data_test,
                        task_type='classification',
                        target_column=target_name,
                        categorical_features=categorical_features,
                        classes=classes)
    assert rai_i is not None

    treatment_features = ["age", "gender"]
    cat_expansion = 49
    rai_i.causal.add(
        treatment_features=treatment_features,
        heterogeneity_features=["marital_status"],
        nuisance_model="automl",
        heterogeneity_model="forest",
        alpha=0.06,
        upper_bound_on_cat_expansion=cat_expansion,
        treatment_cost=[0.1, 0.2],
        min_tree_leaf_samples=2,
        skip_cat_limit_checks=False,
        categories="auto",
        n_jobs=1,
        verbose=1,
        random_state=100,
    )

    rai_i.compute()

    results = rai_i.causal.get()
    assert results is not None
    assert isinstance(results, list)
    assert len(results) == 1
    _check_causal_result(results[0])
コード例 #14
0
    def test_dirty_train_test_data(self):
        X_train = pd.DataFrame(data=[['1', np.nan], ['2', '3']],
                               columns=['c1', 'c2'])
        y_train = np.array([1, 0])
        X_test = pd.DataFrame(data=[['1', '2'], ['2', '3']],
                              columns=['c1', 'c2'])
        y_test = np.array([1, 0])

        model = LGBMClassifier(boosting_type='gbdt',
                               learning_rate=0.1,
                               max_depth=5,
                               n_estimators=200,
                               n_jobs=1,
                               random_state=777)

        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        categorical_features=['c2'],
                        task_type='classification')

        assert 'Error finding unique values in column c2. ' + \
            'Please check your train data.' in str(ucve.value)

        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=model,
                        train=X_test,
                        test=X_train,
                        target_column=TARGET,
                        categorical_features=['c2'],
                        task_type='classification')

        assert 'Error finding unique values in column c2. ' + \
            'Please check your test data.' in str(ucve.value)
コード例 #15
0
def create_rai_insights_object_multiclass_classification():
    # Import Iris dataset
    iris = load_iris()
    # Split data into train and test
    X_train, X_test, y_train, y_test = train_test_split(
        iris.data, iris.target, test_size=0.2, random_state=0)
    feature_names = [name.replace(' (cm)', '') for name in iris.feature_names]
    X_train = pd.DataFrame(X_train, columns=feature_names)
    X_test = pd.DataFrame(X_test, columns=feature_names)

    knn = sklearn.neighbors.KNeighborsClassifier()
    knn.fit(X_train, y_train)

    X_train['target'] = y_train
    X_test['target'] = y_test

    ri = RAIInsights(knn, X_train, X_test, 'target', 'classification')
    ri.explainer.add()
    ri.counterfactual.add(10, desired_class=2)
    ri.error_analysis.add()
    ri.compute()
    return ri
    def test_causal_with_categorical_types(self, get_adult_shap_dataset):
        data_train, data_test, treatment_features, \
            heterogeneity_features, cat_cols, \
            target_feature = get_adult_shap_dataset

        for c in cat_cols:
            data_train[c] = data_train[c].astype("category")
            data_test[c] = data_test[c].astype("category")

        rai_i = RAIInsights(
            model=None,
            train=data_train,
            test=data_test,
            target_column=target_feature,
            task_type='classification',
            categorical_features=cat_cols,
        )

        rai_i.causal.add(treatment_features=treatment_features,
                         heterogeneity_features=heterogeneity_features)

        # This one fails
        rai_i.compute()
    def test_counterfactual_manager_save_load(self, tmpdir):
        X_train, X_test, y_train, y_test, feature_names, _ = \
            create_iris_data()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train['target'] = y_train
        X_test['target'] = y_test

        rai_insights = RAIInsights(
            model=model,
            train=X_train,
            test=X_test.iloc[0:10],
            target_column='target',
            task_type='classification')

        rai_insights.counterfactual.add(
            total_CFs=10, desired_class=2,
            features_to_vary=[feature_names[0]],
            permitted_range={feature_names[0]: [2.0, 5.0]})
        rai_insights.counterfactual.add(
            total_CFs=10, desired_class=1,
            features_to_vary=[feature_names[0]],
            permitted_range={feature_names[0]: [2.0, 5.0]})
        rai_insights.counterfactual.compute()

        assert len(rai_insights.counterfactual.get()) == 2
        cf_obj = rai_insights.counterfactual.get()[0]
        assert cf_obj is not None

        save_dir = tmpdir.mkdir('save-dir')
        rai_insights.save(save_dir)
        rai_insights_copy = RAIInsights.load(save_dir)

        assert len(rai_insights_copy.counterfactual.get()) == 2
        cf_obj = rai_insights_copy.counterfactual.get()[0]
        assert cf_obj is not None

        # Delete the dice-ml explainer directory so that the dice-ml
        # explainer can be re-trained rather being loaded from the
        # disc
        counterfactual_path = save_dir / "counterfactual"
        all_cf_dirs = DirectoryManager.list_sub_directories(
            counterfactual_path)
        for counterfactual_config_dir in all_cf_dirs:
            directory_manager = DirectoryManager(
                parent_directory_path=counterfactual_path,
                sub_directory_name=counterfactual_config_dir)
            explainer_pkl_path = \
                directory_manager.get_generators_directory() / "explainer.pkl"
            os.remove(explainer_pkl_path)

        rai_insights_copy_new = RAIInsights.load(save_dir)
        counterfactual_config_list = \
            rai_insights_copy_new.counterfactual._counterfactual_config_list
        assert len(counterfactual_config_list) == 2
        assert counterfactual_config_list[0].explainer is not None
        assert counterfactual_config_list[1].explainer is not None
    def test_load_missing_dirs(self, target_dir):
        # This test is about the case where an object has been saved to Azure
        # Directories only exist implicitly, so in a downloaded instance
        # if a manager had no outputs, then its subdirectory won't exist
        # The exception is the Explainer, which always creates a file
        # in its subdirectory
        data_train, data_test, y_train, y_test, categorical_features, \
            continuous_features, target_name, classes = \
            create_adult_income_dataset()
        X_train = data_train.drop([target_name], axis=1)

        model = create_complex_classification_pipeline(X_train, y_train,
                                                       continuous_features,
                                                       categorical_features)
        rai_insights = RAIInsights(model,
                                   data_train,
                                   data_test,
                                   target_name,
                                   categorical_features=categorical_features,
                                   task_type=ModelTask.CLASSIFICATION)

        with TemporaryDirectory() as tmpdir:
            save_1 = Path(tmpdir) / "first_save"

            # Save it
            rai_insights.save(save_1)

            # Remove the target directory
            # First make sure it's empty
            dir_to_remove = save_1 / target_dir
            assert len(list(dir_to_remove.iterdir())) == 0
            os.rmdir(dir_to_remove)
            assert not dir_to_remove.exists()

            # Load
            rai_2 = RAIInsights.load(save_1)
            assert rai_2 is not None
コード例 #19
0
    def test_validate_unsupported_task_type(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_iris_data()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        message = ("Unsupported task type 'regre'. "
                   "Should be one of \\['classification', 'regression'\\]")
        with pytest.raises(UserConfigValidationException, match=message):
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='regre')
コード例 #20
0
    def test_mismatch_train_test_features(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_cancer_data()
        model = create_lightgbm_classifier(X_train, y_train)

        X_train[TARGET] = y_train
        X_test['bad_target'] = y_test

        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification')
        assert 'The features in train and test data do not match' in \
            str(ucve.value)
コード例 #21
0
    def test_validate_bad_target_name(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_iris_data()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column='bad_target',
                        task_type='classification')
        assert "Target name bad_target not present in train/test data" in \
            str(ucve.value)
コード例 #22
0
    def test_classes_passes(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_cancer_data()
        model = create_lightgbm_classifier(X_train, y_train)

        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        rai = RAIInsights(model=model,
                          train=X_train,
                          test=X_test,
                          target_column=TARGET,
                          task_type='classification')
        # validate classes are always sorted
        classes = rai._classes
        assert np.all(classes[:-1] <= classes[1:])
コード例 #23
0
    def test_no_model_but_serializer_provided(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_cancer_data()

        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=None,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification',
                        serializer={})
        assert 'No valid model is specified but model serializer provided.' \
            in str(ucve.value)
コード例 #24
0
    def test_model_analysis_incorrect_task_type(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_cancer_data()
        model = create_lightgbm_classifier(X_train, y_train)

        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        err_msg = ('The regression model'
                   'provided has a predict_proba function. '
                   'Please check the task_type.')
        with pytest.raises(UserConfigValidationException, match=err_msg):
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='regression')
コード例 #25
0
    def test_unsupported_train_test_types(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_cancer_data()
        model = create_lightgbm_classifier(X_train, y_train)

        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=model,
                        train=X_train.values,
                        test=X_test.values,
                        target_column=TARGET,
                        task_type='classification')

        assert "Unsupported data type for either train or test. " + \
            "Expecting pandas DataFrame for train and test." in str(ucve.value)
コード例 #26
0
    def test_validate_categorical_features_not_having_train_features(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_iris_data()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        message = ("Feature names in categorical_features "
                   "do not exist in train data: \\['not_a_feature'\\]")
        with pytest.raises(UserConfigValidationException, match=message):
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification',
                        categorical_features=['not_a_feature'])
コード例 #27
0
    def test_validate_categorical_features_having_target(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_iris_data()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification',
                        categorical_features=[TARGET])
        assert 'Found target name target in categorical feature list' in \
            str(ucve.value)
コード例 #28
0
    def test_treatment_features_list_not_having_train_features(self):
        X_train, y_train, X_test, y_test, _ = \
            create_binary_classification_dataset()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        rai_insights = RAIInsights(model=model,
                                   train=X_train,
                                   test=X_test,
                                   target_column=TARGET,
                                   task_type='classification')

        message = ("Feature names in treatment_features "
                   "do not exist in train data: \\['not_a_feature'\\]")
        with pytest.raises(UserConfigValidationException, match=message):
            rai_insights.causal.add(treatment_features=['not_a_feature'])
コード例 #29
0
    def test_model_predictions_predict(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_cancer_data()

        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        model = MagicMock()
        model.predict.side_effect = Exception()
        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification')

        assert 'The model passed cannot be used for getting predictions ' + \
            'via predict()' in str(ucve.value)
コード例 #30
0
    def test_desired_range_not_set(self):
        X_train, X_test, y_train, y_test, feature_names = \
            create_housing_data()

        model = create_sklearn_random_forest_regressor(X_train, y_train)
        X_train = pd.DataFrame(X_train, columns=feature_names)
        X_test = pd.DataFrame(X_test, columns=feature_names)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        rai_insights = RAIInsights(model=model,
                                   train=X_train,
                                   test=X_test,
                                   target_column=TARGET,
                                   task_type='regression')
        with pytest.raises(UserConfigValidationException,
                           match='The desired_range should not be None'
                           ' for regression scenarios.'):
            rai_insights.counterfactual.add(total_CFs=10, method='random')