def test_get_hyperparameter_search_space_preprocessor_contradicts_default_classifier( self): cs = ParamSklearnRegressor.get_hyperparameter_search_space( include={'preprocessor': ['densifier']}, dataset_properties={'sparse': True}) self.assertEqual(cs.get_hyperparameter('regressor:__choice__').default, 'gradient_boosting') cs = ParamSklearnRegressor.get_hyperparameter_search_space( include={'preprocessor': ['nystroem_sampler']}) self.assertEqual(cs.get_hyperparameter('regressor:__choice__').default, 'sgd')
def test_get_hyperparameter_search_space(self): cs = ParamSklearnRegressor.get_hyperparameter_search_space() self.assertIsInstance(cs, ConfigurationSpace) conditions = cs.get_conditions() hyperparameters = cs.get_hyperparameters() self.assertEqual(114, len(hyperparameters)) self.assertEqual(len(hyperparameters) - 5, len(conditions))
def test_configurations(self): # Use a limit of ~4GiB limit = 4000 * 1024 * 1024 resource.setrlimit(resource.RLIMIT_AS, (limit, limit)) cs = ParamSklearnRegressor.get_hyperparameter_search_space() print(cs) cs.seed(1) for i in range(10): config = cs.sample_configuration() config._populate_values() if config['regressor:sgd:n_iter'] is not None: config._values['regressor:sgd:n_iter'] = 5 X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston') cls = ParamSklearnRegressor(config, random_state=1) print(config) try: cls.fit(X_train, Y_train) X_test_ = X_test.copy() predictions = cls.predict(X_test) self.assertIsInstance(predictions, np.ndarray) predicted_probabiliets = cls.predict(X_test_) self.assertIsInstance(predicted_probabiliets, np.ndarray) except ValueError as e: if "Floating-point under-/overflow occurred at epoch" in \ e.args[0] or \ "removed all features" in e.args[0] or \ "all features are discarded" in e.args[0]: continue else: print(config) print(traceback.format_exc()) raise e except RuntimeWarning as e: if "invalid value encountered in sqrt" in e.args[0]: continue elif "divide by zero encountered in" in e.args[0]: continue elif "invalid value encountered in divide" in e.args[0]: continue elif "invalid value encountered in true_divide" in e.args[0]: continue else: print(config) print(traceback.format_exc()) raise e except UserWarning as e: if "FastICA did not converge" in e.args[0]: continue else: print(config) print(traceback.format_exc()) raise e except MemoryError as e: continue
def test_get_hyperparameter_search_space_include_exclude_models(self): cs = ParamSklearnRegressor.get_hyperparameter_search_space( include={'regressor': ['random_forest']}) self.assertEqual(cs.get_hyperparameter('regressor:__choice__'), CategoricalHyperparameter('regressor:__choice__', ['random_forest'])) # TODO add this test when more than one regressor is present cs = ParamSklearnRegressor.get_hyperparameter_search_space( exclude={'regressor': ['random_forest']}) self.assertNotIn('random_forest', str(cs)) cs = ParamSklearnRegressor.get_hyperparameter_search_space( include={'preprocessor': ['pca']}) self.assertEqual(cs.get_hyperparameter('preprocessor:__choice__'), CategoricalHyperparameter('preprocessor:__choice__', ['pca'])) cs = ParamSklearnRegressor.get_hyperparameter_search_space( exclude={'preprocessor': ['no_preprocessing']}) self.assertNotIn('no_preprocessing', str(cs))
def test_predict_batched(self): cs = ParamSklearnRegressor.get_hyperparameter_search_space() default = cs.get_default_configuration() cls = ParamSklearnRegressor(default) X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston') cls.fit(X_train, Y_train) X_test_ = X_test.copy() prediction_ = cls.predict(X_test_) cls_predict = mock.Mock(wraps=cls.pipeline_) cls.pipeline_ = cls_predict prediction = cls.predict(X_test, batch_size=20) self.assertEqual((356,), prediction.shape) self.assertEqual(18, cls_predict.predict.call_count) assert_array_almost_equal(prediction_, prediction)
def test_default_configuration(self): for i in range(2): cs = ParamSklearnRegressor.get_hyperparameter_search_space() default = cs.get_default_configuration() X_train, Y_train, X_test, Y_test = get_dataset(dataset='diabetes') auto = ParamSklearnRegressor(default) auto = auto.fit(X_train, Y_train) predictions = auto.predict(copy.deepcopy(X_test)) # The lower the worse r2_score = sklearn.metrics.r2_score(Y_test, predictions) self.assertAlmostEqual(0.41626416529791199, r2_score) model_score = auto.score(copy.deepcopy(X_test), Y_test) self.assertEqual(model_score, r2_score)
def get_model(configuration, seed): if 'classifier' in configuration: return ParamSklearnClassifier(configuration, seed) elif 'regressor' in configuration: return ParamSklearnRegressor(configuration, seed)
def test_repr(self): cs = ParamSklearnRegressor.get_hyperparameter_search_space() default = cs.get_default_configuration() representation = repr(ParamSklearnRegressor(default)) cls = eval(representation) self.assertIsInstance(cls, ParamSklearnRegressor)