def test_predict_batched_sparse(self):
        dataset_properties = {'sparse': True}
        include = {'regressor': ['decision_tree']}

        cs = SimpleRegressionPipeline(
            dataset_properties=dataset_properties,
            include=include).get_hyperparameter_search_space()

        default = cs.get_default_configuration()
        regressor = SimpleRegressionPipeline(
            config=default,
            random_state=1,
            dataset_properties=dataset_properties,
            include=include)

        X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston',
                                                       make_sparse=True)
        regressor.fit(X_train, Y_train)
        X_test_ = X_test.copy()
        prediction_ = regressor.predict(X_test_)
        mock_predict = unittest.mock.Mock(
            wraps=regressor.steps[-1][-1].predict)
        regressor.steps[-1][-1].predict = mock_predict
        prediction = regressor.predict(X_test, batch_size=20)
        self.assertEqual((356, ), prediction.shape)
        self.assertEqual(18, mock_predict.call_count)
        np.testing.assert_array_almost_equal(prediction_, prediction)
Exemple #2
0
    def test_configurations(self):
        # Use a limit of ~4GiB
        limit = 4000 * 1024 * 1024
        resource.setrlimit(resource.RLIMIT_AS, (limit, limit))

        cs = SimpleRegressionPipeline.get_hyperparameter_search_space()

        print(cs)
        cs.seed(1)

        for i in range(10):
            config = cs.sample_configuration()
            config._populate_values()
            if config['regressor:sgd:n_iter'] is not None:
                config._values['regressor:sgd:n_iter'] = 5

            X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston')
            cls = SimpleRegressionPipeline(config, random_state=1)
            print(config)
            try:
                cls.fit(X_train, Y_train)
                X_test_ = X_test.copy()
                predictions = cls.predict(X_test)
                self.assertIsInstance(predictions, np.ndarray)
                predicted_probabiliets = cls.predict(X_test_)
                self.assertIsInstance(predicted_probabiliets, np.ndarray)
            except ValueError as e:
                if "Floating-point under-/overflow occurred at epoch" in \
                        e.args[0] or \
                                "removed all features" in e.args[0] or \
                                "all features are discarded" in e.args[0]:
                    continue
                else:
                    print(config)
                    print(traceback.format_exc())
                    raise e
            except RuntimeWarning as e:
                if "invalid value encountered in sqrt" in e.args[0]:
                    continue
                elif "divide by zero encountered in" in e.args[0]:
                    continue
                elif "invalid value encountered in divide" in e.args[0]:
                    continue
                elif "invalid value encountered in true_divide" in e.args[0]:
                    continue
                else:
                    print(config)
                    print(traceback.format_exc())
                    raise e
            except UserWarning as e:
                if "FastICA did not converge" in e.args[0]:
                    continue
                else:
                    print(config)
                    print(traceback.format_exc())
                    raise e
            except MemoryError as e:
                continue
    def test_predict_batched(self):
        cs = SimpleRegressionPipeline.get_hyperparameter_search_space()
        default = cs.get_default_configuration()
        cls = SimpleRegressionPipeline(default)

        X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston')
        cls.fit(X_train, Y_train)
        X_test_ = X_test.copy()
        prediction_ = cls.predict(X_test_)
        cls_predict = mock.Mock(wraps=cls.pipeline_)
        cls.pipeline_ = cls_predict
        prediction = cls.predict(X_test, batch_size=20)
        self.assertEqual((356,), prediction.shape)
        self.assertEqual(18, cls_predict.predict.call_count)
        assert_array_almost_equal(prediction_, prediction)
    def test_predict_batched(self):
        cs = SimpleRegressionPipeline.get_hyperparameter_search_space()
        default = cs.get_default_configuration()
        cls = SimpleRegressionPipeline(default)

        X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston')
        cls.fit(X_train, Y_train)
        X_test_ = X_test.copy()
        prediction_ = cls.predict(X_test_)
        cls_predict = mock.Mock(wraps=cls.pipeline_)
        cls.pipeline_ = cls_predict
        prediction = cls.predict(X_test, batch_size=20)
        self.assertEqual((356,), prediction.shape)
        self.assertEqual(18, cls_predict.predict.call_count)
        assert_array_almost_equal(prediction_, prediction)
Exemple #5
0
    def test_predict_batched(self):
        include = {'regressor': ['decision_tree']}
        cs = SimpleRegressionPipeline(include=include).get_hyperparameter_search_space()
        default = cs.get_default_configuration()
        regressor = SimpleRegressionPipeline(default, include=include)

        X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston')
        regressor.fit(X_train, Y_train)
        X_test_ = X_test.copy()
        prediction_ = regressor.predict(X_test_)
        mock_predict = unittest.mock.Mock(wraps=regressor.steps[-1][-1].predict)
        regressor.steps[-1][-1].predict = mock_predict
        prediction = regressor.predict(X_test, batch_size=20)
        self.assertEqual((356,), prediction.shape)
        self.assertEqual(18, mock_predict.call_count)
        assert_array_almost_equal(prediction_, prediction)
    def test_predict_batched(self):
        include = {'regressor': ['decision_tree']}
        cs = SimpleRegressionPipeline(include=include).get_hyperparameter_search_space()
        default = cs.get_default_configuration()
        regressor = SimpleRegressionPipeline(default, include=include)

        X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston')
        regressor.fit(X_train, Y_train)
        X_test_ = X_test.copy()
        prediction_ = regressor.predict(X_test_)
        mock_predict = unittest.mock.Mock(wraps=regressor.steps[-1][-1].predict)
        regressor.steps[-1][-1].predict = mock_predict
        prediction = regressor.predict(X_test, batch_size=20)
        self.assertEqual((356,), prediction.shape)
        self.assertEqual(18, mock_predict.call_count)
        assert_array_almost_equal(prediction_, prediction)
    def test_predict_batched(self):
        cs = SimpleRegressionPipeline.get_hyperparameter_search_space(
            include={'regressor': ['decision_tree']})
        default = cs.get_default_configuration()
        cls = SimpleRegressionPipeline(default)

        X_train, Y_train, X_test, Y_test = get_dataset(dataset='diabetes')
        cls.fit(X_train, Y_train)
        X_test_ = X_test.copy()
        prediction_ = cls.predict(X_test_)
        cls_predict = unittest.mock.Mock(wraps=cls.pipeline_)
        cls.pipeline_ = cls_predict
        prediction = cls.predict(X_test, batch_size=20)
        self.assertEqual((292, ), prediction.shape)
        self.assertEqual(15, cls_predict.predict.call_count)
        assert_array_almost_equal(prediction_, prediction)
    def test_configurations_sparse(self):
        # Use a limit of ~4GiB
        limit = 4000 * 1024 * 1024
        resource.setrlimit(resource.RLIMIT_AS, (limit, limit))

        cs = SimpleRegressionPipeline.get_hyperparameter_search_space(
            dataset_properties={'sparse': True})
        print(cs)
        for i in range(10):
            config = cs.sample_configuration()
            config._populate_values()
            if 'classifier:passive_aggressive:n_iter' in config and \
                            config[
                                'classifier:passive_aggressive:n_iter'] is not None:
                config._values['classifier:passive_aggressive:n_iter'] = 5
            if 'classifier:sgd:n_iter' in config and \
                            config['classifier:sgd:n_iter'] is not None:
                config._values['classifier:sgd:n_iter'] = 5

            print(config)
            X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston',
                                                           make_sparse=True)
            cls = SimpleRegressionPipeline(config, random_state=1)
            try:
                cls.fit(X_train, Y_train)
                predictions = cls.predict(X_test)
            except ValueError as e:
                if "Floating-point under-/overflow occurred at epoch" in \
                        e.args[0] or \
                                "removed all features" in e.args[0] or \
                                "all features are discarded" in e.args[0]:
                    continue
                else:
                    print(config)
                    traceback.print_tb(sys.exc_info()[2])
                    raise e
            except RuntimeWarning as e:
                if "invalid value encountered in sqrt" in e.args[0]:
                    continue
                elif "divide by zero encountered in" in e.args[0]:
                    continue
                elif "invalid value encountered in divide" in e.args[0]:
                    continue
                elif "invalid value encountered in true_divide" in e.args[0]:
                    continue
                else:
                    print(config)
                    raise e
            except UserWarning as e:
                if "FastICA did not converge" in e.args[0]:
                    continue
                else:
                    print(config)
                    raise e
Exemple #9
0
 def test_default_configuration(self):
     for i in range(2):
         X_train, Y_train, X_test, Y_test = get_dataset(dataset='diabetes')
         auto = SimpleRegressionPipeline()
         auto = auto.fit(X_train, Y_train)
         predictions = auto.predict(copy.deepcopy(X_test))
         # The lower the worse
         r2_score = sklearn.metrics.r2_score(Y_test, predictions)
         self.assertAlmostEqual(0.417, r2_score, places=3)
         model_score = auto.score(copy.deepcopy(X_test), Y_test)
         self.assertAlmostEqual(model_score, r2_score, places=5)
Exemple #10
0
    def test_configurations_sparse(self):
        # Use a limit of ~4GiB
        limit = 4000 * 1024 * 1024
        resource.setrlimit(resource.RLIMIT_AS, (limit, limit))

        cs = SimpleRegressionPipeline.get_hyperparameter_search_space(
            dataset_properties={'sparse': True})
        print(cs)
        for i in range(10):
            config = cs.sample_configuration()
            config._populate_values()
            if 'classifier:passive_aggressive:n_iter' in config and \
                            config[
                                'classifier:passive_aggressive:n_iter'] is not None:
                config._values['classifier:passive_aggressive:n_iter'] = 5
            if 'classifier:sgd:n_iter' in config and \
                            config['classifier:sgd:n_iter'] is not None:
                config._values['classifier:sgd:n_iter'] = 5

            print(config)
            X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston',
                                                           make_sparse=True)
            cls = SimpleRegressionPipeline(config, random_state=1)
            try:
                cls.fit(X_train, Y_train)
                predictions = cls.predict(X_test)
            except ValueError as e:
                if "Floating-point under-/overflow occurred at epoch" in \
                        e.args[0] or \
                                "removed all features" in e.args[0] or \
                                "all features are discarded" in e.args[0]:
                    continue
                else:
                    print(config)
                    traceback.print_tb(sys.exc_info()[2])
                    raise e
            except RuntimeWarning as e:
                if "invalid value encountered in sqrt" in e.args[0]:
                    continue
                elif "divide by zero encountered in" in e.args[0]:
                    continue
                elif "invalid value encountered in divide" in e.args[0]:
                    continue
                elif "invalid value encountered in true_divide" in e.args[0]:
                    continue
                else:
                    print(config)
                    raise e
            except UserWarning as e:
                if "FastICA did not converge" in e.args[0]:
                    continue
                else:
                    print(config)
                    raise e
 def test_default_configuration(self):
     for i in range(2):
         X_train, Y_train, X_test, Y_test = get_dataset(dataset='diabetes')
         auto = SimpleRegressionPipeline()
         auto = auto.fit(X_train, Y_train)
         predictions = auto.predict(copy.deepcopy(X_test))
         # The lower the worse
         r2_score = sklearn.metrics.r2_score(Y_test, predictions)
         self.assertAlmostEqual(0.339, r2_score, places=3)
         model_score = auto.score(copy.deepcopy(X_test), Y_test)
         self.assertAlmostEqual(model_score, r2_score, places=5)
 def test_default_configuration(self):
     for i in range(2):
         cs = SimpleRegressionPipeline.get_hyperparameter_search_space()
         default = cs.get_default_configuration()
         X_train, Y_train, X_test, Y_test = get_dataset(dataset='diabetes')
         auto = SimpleRegressionPipeline(default)
         auto = auto.fit(X_train, Y_train)
         predictions = auto.predict(copy.deepcopy(X_test))
         # The lower the worse
         r2_score = sklearn.metrics.r2_score(Y_test, predictions)
         self.assertAlmostEqual(0.41732302035060087, r2_score)
         model_score = auto.score(copy.deepcopy(X_test), Y_test)
         self.assertEqual(model_score, r2_score)
Exemple #13
0
 def test_default_configuration(self):
     for i in range(2):
         cs = SimpleRegressionPipeline.get_hyperparameter_search_space()
         default = cs.get_default_configuration()
         X_train, Y_train, X_test, Y_test = get_dataset(dataset='diabetes')
         auto = SimpleRegressionPipeline(default)
         auto = auto.fit(X_train, Y_train)
         predictions = auto.predict(copy.deepcopy(X_test))
         # The lower the worse
         r2_score = sklearn.metrics.r2_score(Y_test, predictions)
         self.assertAlmostEqual(0.41732302035060087, r2_score)
         model_score = auto.score(copy.deepcopy(X_test), Y_test)
         self.assertEqual(model_score, r2_score)
Exemple #14
0
    def _test_configurations(self, configurations_space, make_sparse=False,
                             data=None, dataset_properties=None):
        # Use a limit of ~4GiB
        limit = 3072 * 1024 * 1024
        resource.setrlimit(resource.RLIMIT_AS, (limit, limit))

        configurations_space.seed(1)

        for i in range(10):
            config = configurations_space.sample_configuration()
            config._populate_values()

            # Restrict configurations which could take too long on travis-ci
            restrictions = {'regressor:passive_aggressive:n_iter': 5,
                            'regressor:sgd:n_iter': 5,
                            'regressor:adaboost:n_estimators': 50,
                            'regressor:adaboost:max_depth': 1,
                            'preprocessor:kernel_pca:n_components': 10,
                            'preprocessor:kitchen_sinks:n_components': 50,
                            'regressor:libsvm_svc:degree': 2,
                            'regressor:libsvm_svr:degree': 2,
                            'preprocessor:truncatedSVD:target_dim': 10,
                            'preprocessor:polynomial:degree': 2,
                            'regressor:lda:n_components': 10}

            for restrict_parameter in restrictions:
                restrict_to = restrictions[restrict_parameter]
                if restrict_parameter in config and \
                                config[restrict_parameter] is not None:
                    config._values[restrict_parameter] = restrict_to

            if data is None:
                X_train, Y_train, X_test, Y_test = get_dataset(
                    dataset='boston', make_sparse=make_sparse, add_NaNs=True)
            else:
                X_train = data['X_train'].copy()
                Y_train = data['Y_train'].copy()
                X_test = data['X_test'].copy()
                Y_test = data['Y_test'].copy()

            cls = SimpleRegressionPipeline(random_state=1,
                                           dataset_properties=dataset_properties)
            cls.set_hyperparameters(config)
            try:
                cls.fit(X_train, Y_train)
                predictions = cls.predict(X_test)
            except MemoryError as e:
                continue
            except ValueError as e:
                if "Floating-point under-/overflow occurred at epoch" in \
                        e.args[0]:
                    continue
                elif "removed all features" in e.args[0]:
                    continue
                elif "all features are discarded" in e.args[0]:
                    continue
                elif "Numerical problems in QDA" in e.args[0]:
                    continue
                elif 'Bug in scikit-learn' in e.args[0]:
                    continue
                else:
                    print(config)
                    print(traceback.format_exc())
                    raise e
            except RuntimeWarning as e:
                if "invalid value encountered in sqrt" in e.args[0]:
                    continue
                elif "divide by zero encountered in" in e.args[0]:
                    continue
                elif "invalid value encountered in divide" in e.args[0]:
                    continue
                elif "invalid value encountered in true_divide" in e.args[0]:
                    continue
                else:
                    print(config)
                    traceback.print_tb(sys.exc_info()[2])
                    raise e
            except UserWarning as e:
                if "FastICA did not converge" in e.args[0]:
                    continue
                else:
                    print(config)
                    traceback.print_tb(sys.exc_info()[2])
                    raise e
            except Exception as e:
                if "Multiple input features cannot have the same target value" in e.args[0]:
                    continue
                else:
                    print(config)
                    traceback.print_tb(sys.exc_info()[2])
                    raise e
    def _test_configurations(self,
                             configurations_space,
                             make_sparse=False,
                             data=None,
                             dataset_properties=None):
        # Use a limit of ~4GiB
        limit = 3072 * 1024 * 1024
        resource.setrlimit(resource.RLIMIT_AS, (limit, limit))

        configurations_space.seed(1)

        for i in range(10):
            config = configurations_space.sample_configuration()
            config._populate_values()

            # Restrict configurations which could take too long on travis-ci
            restrictions = {
                'regressor:adaboost:n_estimators': 50,
                'regressor:adaboost:max_depth': 1,
                'feature_preprocessor:kernel_pca:n_components': 10,
                'feature_preprocessor:kitchen_sinks:n_components': 50,
                'regressor:libsvm_svc:degree': 2,
                'regressor:libsvm_svr:degree': 2,
                'regressor:libsvm_svr:C': 1.,
                'feature_preprocessor:truncatedSVD:target_dim': 10,
                'feature_preprocessor:polynomial:degree': 2,
                'regressor:lda:n_components': 10
            }

            for restrict_parameter in restrictions:
                restrict_to = restrictions[restrict_parameter]
                if restrict_parameter in config and config[
                        restrict_parameter] is not None:
                    config._values[restrict_parameter] = restrict_to

            if data is None:
                X_train, Y_train, X_test, Y_test = get_dataset(
                    dataset='boston', make_sparse=make_sparse, add_NaNs=True)
            else:
                X_train = data['X_train'].copy()
                Y_train = data['Y_train'].copy()
                X_test = data['X_test'].copy()
                data['Y_test'].copy()

            cls = SimpleRegressionPipeline(
                random_state=1, dataset_properties=dataset_properties)
            cls.set_hyperparameters(config)

            # First make sure that for this configuration, setting the parameters
            # does not mistakenly set the estimator as fitted
            for name, step in cls.named_steps.items():
                with self.assertRaisesRegex(sklearn.exceptions.NotFittedError,
                                            "instance is not fitted yet"):
                    check_is_fitted(step)

            try:
                cls.fit(X_train, Y_train)
                # After fit, all components should be tagged as fitted
                # by sklearn. Check is fitted raises an exception if that
                # is not the case
                try:
                    for name, step in cls.named_steps.items():
                        check_is_fitted(step)
                except sklearn.exceptions.NotFittedError:
                    self.fail(
                        "config={} raised NotFittedError unexpectedly!".format(
                            config))

                cls.predict(X_test)
            except MemoryError:
                continue
            except np.linalg.LinAlgError:
                continue
            except ValueError as e:
                if "Floating-point under-/overflow occurred at epoch" in \
                        e.args[0]:
                    continue
                elif "removed all features" in e.args[0]:
                    continue
                elif "all features are discarded" in e.args[0]:
                    continue
                elif "Numerical problems in QDA" in e.args[0]:
                    continue
                elif 'Bug in scikit-learn' in e.args[0]:
                    continue
                elif 'The condensed distance matrix must contain only finite ' \
                     'values.' in e.args[0]:
                    continue
                else:
                    print(config)
                    print(traceback.format_exc())
                    raise e
            except RuntimeWarning as e:
                if "invalid value encountered in sqrt" in e.args[0]:
                    continue
                elif "divide by zero encountered in" in e.args[0]:
                    continue
                elif "invalid value encountered in divide" in e.args[0]:
                    continue
                elif "invalid value encountered in true_divide" in e.args[0]:
                    continue
                elif "invalid value encountered in multiply" in e.args[0]:
                    continue
                else:
                    print(config)
                    traceback.print_tb(sys.exc_info()[2])
                    raise e
            except UserWarning as e:
                if "FastICA did not converge" in e.args[0]:
                    continue
                else:
                    print(config)
                    traceback.print_tb(sys.exc_info()[2])
                    raise e
            except Exception as e:
                if "Multiple input features cannot have the same target value" in e.args[
                        0]:
                    continue
                else:
                    print(config)
                    traceback.print_tb(sys.exc_info()[2])
                    raise e
    def _test_configurations(self, configurations_space, make_sparse=False,
                             data=None, dataset_properties=None):
        # Use a limit of ~4GiB
        limit = 3072 * 1024 * 1024
        resource.setrlimit(resource.RLIMIT_AS, (limit, limit))

        configurations_space.seed(1)

        for i in range(10):
            config = configurations_space.sample_configuration()
            config._populate_values()

            # Restrict configurations which could take too long on travis-ci
            restrictions = {'regressor:adaboost:n_estimators': 50,
                            'regressor:adaboost:max_depth': 1,
                            'preprocessor:kernel_pca:n_components': 10,
                            'preprocessor:kitchen_sinks:n_components': 50,
                            'regressor:libsvm_svc:degree': 2,
                            'regressor:libsvm_svr:degree': 2,
                            'preprocessor:truncatedSVD:target_dim': 10,
                            'preprocessor:polynomial:degree': 2,
                            'regressor:lda:n_components': 10}

            for restrict_parameter in restrictions:
                restrict_to = restrictions[restrict_parameter]
                if restrict_parameter in config and \
                                config[restrict_parameter] is not None:
                    config._values[restrict_parameter] = restrict_to

            if data is None:
                X_train, Y_train, X_test, Y_test = get_dataset(
                    dataset='boston', make_sparse=make_sparse, add_NaNs=True)
            else:
                X_train = data['X_train'].copy()
                Y_train = data['Y_train'].copy()
                X_test = data['X_test'].copy()
                Y_test = data['Y_test'].copy()

            cls = SimpleRegressionPipeline(random_state=1,
                                           dataset_properties=dataset_properties)
            cls.set_hyperparameters(config)
            try:
                cls.fit(X_train, Y_train)
                predictions = cls.predict(X_test)
            except MemoryError as e:
                continue
            except ValueError as e:
                if "Floating-point under-/overflow occurred at epoch" in \
                        e.args[0]:
                    continue
                elif "removed all features" in e.args[0]:
                    continue
                elif "all features are discarded" in e.args[0]:
                    continue
                elif "Numerical problems in QDA" in e.args[0]:
                    continue
                elif 'Bug in scikit-learn' in e.args[0]:
                    continue
                elif 'The condensed distance matrix must contain only finite ' \
                     'values.' in e.args[0]:
                    continue
                else:
                    print(config)
                    print(traceback.format_exc())
                    raise e
            except RuntimeWarning as e:
                if "invalid value encountered in sqrt" in e.args[0]:
                    continue
                elif "divide by zero encountered in" in e.args[0]:
                    continue
                elif "invalid value encountered in divide" in e.args[0]:
                    continue
                elif "invalid value encountered in true_divide" in e.args[0]:
                    continue
                else:
                    print(config)
                    traceback.print_tb(sys.exc_info()[2])
                    raise e
            except UserWarning as e:
                if "FastICA did not converge" in e.args[0]:
                    continue
                else:
                    print(config)
                    traceback.print_tb(sys.exc_info()[2])
                    raise e
            except Exception as e:
                if "Multiple input features cannot have the same target value" in e.args[0]:
                    continue
                else:
                    print(config)
                    traceback.print_tb(sys.exc_info()[2])
                    raise e