def create_iris_pandas():
    X_train, X_test, y_train, y_test, feature_names, _ = create_iris_data()

    X_train = pd.DataFrame(X_train, columns=feature_names)
    X_test = pd.DataFrame(X_test, columns=feature_names)

    return X_train, X_test, y_train, y_test, feature_names
    def test_surrogate_error_tree_iris(self, analyzer_type):
        X_train, X_test, y_train, y_test, feature_names, _ = create_iris_data()

        models = create_models_classification(X_train, y_train)

        for model in models:
            run_error_analyzer(model, X_test, y_test, feature_names,
                               analyzer_type)
    def test_importances_iris(self):
        X_train, X_test, y_train, y_test, feature_names, _ = create_iris_data()

        models = create_models_classification(X_train, y_train)

        for model in models:
            categorical_features = []
            run_error_analyzer(model, X_test, y_test, feature_names,
                               categorical_features)
Esempio n. 4
0
    def test_matrix_filter_iris(self):
        x_train, x_test, y_train, y_test, feature_names, _ = create_iris_data()

        models = create_models(x_train, y_train)

        for model in models:
            categorical_features = []
            run_error_analyzer(model, x_test, y_test, feature_names,
                               categorical_features)
Esempio n. 5
0
def iris():
    x_train, x_test, y_train, y_test, features, classes = create_iris_data()
    yield {
        DatasetConstants.X_TRAIN: x_train,
        DatasetConstants.X_TEST: x_test,
        DatasetConstants.Y_TRAIN: y_train,
        DatasetConstants.Y_TEST: y_test,
        DatasetConstants.FEATURES: features,
        DatasetConstants.CLASSES: classes
    }
Esempio n. 6
0
    def test_error_report_iris(self, alter_feature_names):
        X_train, X_test, y_train, y_test, feature_names, _ = \
            create_iris_data(append_special_characters=alter_feature_names)

        models = create_models_classification(X_train, y_train)

        for model in models:
            categorical_features = []
            run_error_analyzer(model, X_test, y_test, feature_names,
                               categorical_features,
                               expect_user_warnings=alter_feature_names)
Esempio n. 7
0
    def test_matrix_filter_iris_quantile_binning(self):
        X_train, X_test, y_train, y_test, feature_names, _ = create_iris_data()

        model_task = ModelTask.CLASSIFICATION
        run_error_analyzer_on_models(X_train,
                                     y_train,
                                     X_test,
                                     y_test,
                                     feature_names,
                                     model_task,
                                     quantile_binning=True)
Esempio n. 8
0
    def test_error_report_iris_numpy_int64_features(self):
        X_train, X_test, y_train, y_test, _, _ = create_iris_data()
        # Test with numpy feature indexes instead of string feature names
        feature_names = range(0, X_train.shape[1])
        feature_names = [np.int64(i) for i in feature_names]
        models = create_models_classification(X_train, y_train)

        for model in models:
            categorical_features = []
            run_error_analyzer(model, X_test, y_test, feature_names,
                               categorical_features)
    def test_raianalyzer_iris(self):
        x_train, x_test, y_train, y_test, feature_names, classes = \
            create_iris_data()
        x_train = pd.DataFrame(x_train, columns=feature_names)
        x_test = pd.DataFrame(x_test, columns=feature_names)
        models = create_models(x_train, y_train)
        x_train[LABELS] = y_train
        x_test[LABELS] = y_test

        for model in models:
            run_raianalyzer(model, x_train, x_test, LABELS, classes)
Esempio n. 10
0
    def test_matrix_filter_iris_num_bins(self):
        X_train, X_test, y_train, y_test, feature_names, _ = create_iris_data()

        model_task = ModelTask.CLASSIFICATION
        num_bins_list = [2, 4, 10, 12]
        for num_bins in num_bins_list:
            run_error_analyzer_on_models(X_train,
                                         y_train,
                                         X_test,
                                         y_test,
                                         feature_names,
                                         model_task,
                                         num_bins=num_bins)
    def test_explain_model_multiclass_classification_with_different_format_predictions(
            self, mimic_explainer, if_predictions_as_dataframe, explainable_model):
        x_train, x_test, y_train, y_test, _, classes = create_iris_data()
        model = LogisticRegression(random_state=42).fit(x_train, y_train)
        model.fit(x_train, y_train)

        model = PredictAsDataFrameClassificationTestModel(
            model, return_predictions_as_dataframe=if_predictions_as_dataframe)

        kwargs = {}
        explainer = mimic_explainer(model, x_train, explainable_model, **kwargs)
        global_explanation = explainer.explain_global(evaluation_examples=x_test)
        assert global_explanation is not None
    def test_model_analysis_iris(self, manager_type):
        x_train, x_test, y_train, y_test, feature_names, classes = \
            create_iris_data()
        x_train = pd.DataFrame(x_train, columns=feature_names)
        x_test = pd.DataFrame(x_test, columns=feature_names)
        models = create_models_classification(x_train, y_train)
        x_train[LABELS] = y_train
        x_test[LABELS] = y_test
        manager_args = {DESIRED_CLASS: 0}

        for model in models:
            run_model_analysis(model, x_train, x_test, LABELS, [],
                               manager_type, manager_args, classes)
    def test_parameters(self, metric, min_child_samples, max_depth, num_leaves,
                        analyzer_type):
        X_train, X_test, y_train, y_test, feature_names, _ = create_iris_data()

        model = create_kneighbors_classifier(X_train, y_train)
        run_error_analyzer(model,
                           X_test,
                           y_test,
                           feature_names,
                           analyzer_type,
                           max_depth=max_depth,
                           num_leaves=num_leaves,
                           min_child_samples=min_child_samples,
                           metric=metric)
Esempio n. 14
0
    def test_matrix_filter_iris_invalid_num_bins(self):
        X_train, X_test, y_train, y_test, feature_names, _ = create_iris_data()

        model_task = ModelTask.CLASSIFICATION
        invalid_num_bins_list = [-10, -1, 0]
        err = 'Number of bins parameter must be greater than 0 for the heatmap'
        for num_bins in invalid_num_bins_list:
            with pytest.raises(ValueError, match=err):
                run_error_analyzer_on_models(X_train,
                                             y_train,
                                             X_test,
                                             y_test,
                                             feature_names,
                                             model_task,
                                             num_bins=num_bins)
Esempio n. 15
0
    def test_matrix_filter_iris_int64(self):
        X_train, X_test, y_train, y_test, feature_names, _ = create_iris_data()

        X_train = pd.DataFrame(X_train, columns=feature_names)
        X_test = pd.DataFrame(X_test, columns=feature_names)

        X_train[feature_names[0]] = X_train[feature_names[0]].astype(np.int64)
        X_test[feature_names[0]] = X_test[feature_names[0]].astype(np.int64)

        model_task = ModelTask.CLASSIFICATION
        matrix_features = [feature_names[0]]
        run_error_analyzer_on_models(X_train,
                                     y_train,
                                     X_test,
                                     y_test,
                                     feature_names,
                                     model_task,
                                     matrix_features=matrix_features)
Esempio n. 16
0
    def test_matrix_filter_iris_filters(self):
        # Validate the shift cohort functionality where base
        # cohort was chosen in tree view
        X_train, X_test, y_train, y_test, feature_names, _ = create_iris_data()

        filters = [{
            'arg': [2.85],
            'column': feature_names[1],
            'method': 'less and equal'
        }]

        model_task = ModelTask.CLASSIFICATION
        run_error_analyzer_on_models(X_train,
                                     y_train,
                                     X_test,
                                     y_test,
                                     feature_names,
                                     model_task,
                                     filters=filters)
 def test_explain_model_serialization_multiclass(self, mimic_explainer):
     x_train, x_test, y_train, _, _, _ = create_iris_data()
     # Fit an SVM model
     model = create_sklearn_svm_classifier(x_train, y_train)
     self._validate_model_serialization(model, x_train, x_test,
                                        mimic_explainer)