Esempio n. 1
0
def create_iris_pandas():
    X_train, X_test, y_train, y_test, feature_names, _ = create_iris_data()

    X_train = pd.DataFrame(X_train, columns=feature_names)
    X_test = pd.DataFrame(X_test, columns=feature_names)

    return X_train, X_test, y_train, y_test, feature_names
    def test_weird_predict_proba_function(self):
        X_train, X_test, y_train, y_test, _, _ = create_iris_data()

        # A weird model that modifies the input dataset by
        # adding back the target column
        class WeirdModelPredictProbaWrapper():
            def __init__(self, model):
                self.model = model

            def predict(self, test_data_pandas):
                return self.model.predict(test_data_pandas)

            def predict_proba(self, test_data_pandas):
                if TARGET not in test_data_pandas.columns:
                    test_data_pandas[TARGET] = 0
                return self.model.predict_proba(
                    test_data_pandas.drop(columns=TARGET))

        model = create_lightgbm_classifier(X_train, y_train)
        model = WeirdModelPredictProbaWrapper(model)
        X_train = X_train.copy()
        X_test = X_test.copy()
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        message = ('Calling model predict_proba function modifies '
                   'input dataset features. Please check if '
                   'predict function is defined correctly.')
        with pytest.raises(UserConfigValidationException, match=message):
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification')
    def test_rai_insights_iris(self, manager_type):
        X_train, X_test, y_train, y_test, feature_names, classes = \
            create_iris_data()
        models = create_models_classification(X_train, y_train)
        X_train[LABELS] = y_train
        X_test[LABELS] = y_test
        manager_args = {
            ManagerParams.TREATMENT_FEATURES: [feature_names[0]],
            ManagerParams.DESIRED_CLASS: 0,
            ManagerParams.FEATURE_IMPORTANCE: True
        }

        for model in models:
            run_rai_insights(model, X_train, X_test, LABELS, None,
                             manager_type, manager_args, classes)
    def test_validate_bad_target_name(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_iris_data()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column='bad_target',
                        task_type='classification')
        assert "Target name bad_target not present in train/test data" in \
            str(ucve.value)
    def test_validate_unsupported_task_type(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_iris_data()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        message = ("Unsupported task type 'regre'. "
                   "Should be one of \\['classification', 'regression'\\]")
        with pytest.raises(UserConfigValidationException, match=message):
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='regre')
    def test_validate_categorical_features_not_having_train_features(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_iris_data()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        message = ("Feature names in categorical_features "
                   "do not exist in train data: \\['not_a_feature'\\]")
        with pytest.raises(UserConfigValidationException, match=message):
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification',
                        categorical_features=['not_a_feature'])
    def test_validate_categorical_features_having_target(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_iris_data()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column=TARGET,
                        task_type='classification',
                        categorical_features=[TARGET])
        assert 'Found target name target in categorical feature list' in \
            str(ucve.value)
    def test_validate_test_data_size(self):
        X_train, X_test, y_train, y_test, _, _ = \
            create_iris_data()

        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        with pytest.raises(UserConfigValidationException) as ucve:
            RAIInsights(model=model,
                        train=X_train,
                        test=X_test,
                        target_column='bad_target',
                        task_type='classification',
                        maximum_rows_for_test=len(y_test) - 1)
        assert "The test data has 31 rows, but limit is set to 30 rows" in \
            str(ucve.value)
        assert "Please resample the test data or " +\
            "adjust maximum_rows_for_test" in \
            str(ucve.value)
    def test_desired_class_opposite_multi_classification(self):
        X_train, X_test, y_train, y_test, feature_names, classes = \
            create_iris_data()
        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        rai_insights = RAIInsights(model=model,
                                   train=X_train,
                                   test=X_test,
                                   target_column=TARGET,
                                   task_type='classification')

        with pytest.raises(
                UserConfigValidationException,
                match='The desired_class attribute should not be \'opposite\''
                ' It should be the class value for multiclass'
                ' classification scenario.'):
            rai_insights.counterfactual.add(total_CFs=10,
                                            method='random',
                                            desired_class='opposite')
    def test_feature_importance_with_less_counterfactuals(self):
        X_train, X_test, y_train, y_test, feature_names, classes = \
            create_iris_data()
        model = create_lightgbm_classifier(X_train, y_train)
        X_train[TARGET] = y_train
        X_test[TARGET] = y_test

        rai_insights = RAIInsights(model=model,
                                   train=X_train,
                                   test=X_test,
                                   target_column=TARGET,
                                   task_type='classification')

        with pytest.raises(
                UserConfigValidationException,
                match="A total_CFs value of at least 10 is required to "
                "use counterfactual feature importances. "
                "Either increase total_CFs to at least 10 or "
                "set feature_importance to False."):
            rai_insights.counterfactual.add(total_CFs=5,
                                            method='random',
                                            desired_class=2)