Ejemplo n.º 1
0
    def test_matrix_filter_housing(self):
        X_train, X_test, y_train, y_test, feature_names = create_housing_data()

        metrics = [Metrics.MEAN_SQUARED_ERROR, Metrics.MEAN_ABSOLUTE_ERROR]
        for metric in metrics:
            model_task = ModelTask.REGRESSION
            run_error_analyzer_on_models(X_train,
                                         y_train,
                                         X_test,
                                         y_test,
                                         feature_names,
                                         model_task,
                                         metric=metric)

            # Test with single feature instead of two features
            run_error_analyzer_on_models(X_train,
                                         y_train,
                                         X_test,
                                         y_test,
                                         feature_names,
                                         model_task,
                                         matrix_features=[feature_names[0]],
                                         metric=metric)

            # Note: Third feature has few unique values, tests code path
            # without binning data
            run_error_analyzer_on_models(X_train,
                                         y_train,
                                         X_test,
                                         y_test,
                                         feature_names,
                                         model_task,
                                         matrix_features=[feature_names[3]],
                                         metric=metric)
Ejemplo n.º 2
0
    def test_error_report_housing(self):
        X_train, X_test, y_train, y_test, feature_names = \
            create_housing_data()
        models = create_models_regression(X_train, y_train)

        for model in models:
            categorical_features = []
            run_error_analyzer(model, X_test, y_test, feature_names,
                               categorical_features)
Ejemplo n.º 3
0
    def test_error_report_housing_pandas(self, filter_features):
        X_train, X_test, y_train, y_test, feature_names = \
            create_housing_data()
        X_train = create_dataframe(X_train, feature_names)
        X_test = create_dataframe(X_test, feature_names)
        models = create_models_regression(X_train, y_train)

        for model in models:
            categorical_features = []
            run_error_analyzer(model,
                               X_test,
                               y_test,
                               feature_names,
                               categorical_features,
                               filter_features=filter_features)
Ejemplo n.º 4
0
    def test_matrix_filter_housing_quantile_binning(self):
        # Test quantile binning on CRIM feature in california housing dataset,
        # which errored out due to first category not fitting into bins
        (X_train, X_test, y_train, y_test,
         feature_names) = create_housing_data(test_size=0.5)

        model_task = ModelTask.REGRESSION
        matrix_features = ['Population']
        run_error_analyzer_on_models(X_train,
                                     y_train,
                                     X_test,
                                     y_test,
                                     feature_names,
                                     model_task,
                                     matrix_features=matrix_features,
                                     quantile_binning=True)
Ejemplo n.º 5
0
    def test_matrix_filter_housing_filters(self):
        X_train, X_test, y_train, y_test, feature_names = create_housing_data()

        filters = [{
            'arg': [600],
            'column': 'Population',
            'method': 'less and equal'
        }, {
            'arg': [6],
            'column': 'AveRooms',
            'method': 'greater'
        }]

        model_task = ModelTask.REGRESSION
        run_error_analyzer_on_models(X_train,
                                     y_train,
                                     X_test,
                                     y_test,
                                     feature_names,
                                     model_task,
                                     filters=filters)