def test_get_hyperparameter_search_space(self): cs = SimpleRegressionPipeline().get_hyperparameter_search_space() self.assertIsInstance(cs, ConfigurationSpace) conditions = cs.get_conditions() hyperparameters = cs.get_hyperparameters() self.assertEqual(158, len(hyperparameters)) self.assertEqual(len(hyperparameters) - 5, len(conditions))
def test_get_hyperparameter_search_space(self): cs = SimpleRegressionPipeline().get_hyperparameter_search_space() self.assertIsInstance(cs, ConfigurationSpace) conditions = cs.get_conditions() hyperparameters = cs.get_hyperparameters() self.assertEqual(207, len(hyperparameters)) self.assertEqual(len(hyperparameters) - 5, len(conditions))
def test_default_configuration_iterative_fit(self): regressor = SimpleRegressionPipeline( include={'regressor': ['random_forest'], 'preprocessor': ['no_preprocessing']}) X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston') XT = regressor.fit_transformer(X_train, Y_train) for i in range(1, 11): regressor.iterative_fit(X_train, Y_train) self.assertEqual(regressor.steps[-1][-1].choice.estimator.n_estimators, i)
def test_default_configuration_iterative_fit(self): regressor = SimpleRegressionPipeline( include={'regressor': ['random_forest'], 'preprocessor': ['no_preprocessing']}) X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston') XT = regressor.pre_transform(X_train, Y_train) for i in range(1, 11): regressor.iterative_fit(X_train, Y_train) self.assertEqual(regressor.steps[-1][-1].choice.estimator.n_estimators, i)
def test_predict_batched_sparse(self): dataset_properties = {'sparse': True} include = {'regressor': ['decision_tree']} cs = SimpleRegressionPipeline( dataset_properties=dataset_properties, include=include).get_hyperparameter_search_space() default = cs.get_default_configuration() regressor = SimpleRegressionPipeline( config=default, random_state=1, dataset_properties=dataset_properties, include=include) X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston', make_sparse=True) regressor.fit(X_train, Y_train) X_test_ = X_test.copy() prediction_ = regressor.predict(X_test_) mock_predict = unittest.mock.Mock( wraps=regressor.steps[-1][-1].predict) regressor.steps[-1][-1].predict = mock_predict prediction = regressor.predict(X_test, batch_size=20) self.assertEqual((356, ), prediction.shape) self.assertEqual(18, mock_predict.call_count) np.testing.assert_array_almost_equal(prediction_, prediction)
def test_configurations_sparse(self): # Use a limit of ~4GiB limit = 4000 * 1024 * 1024 resource.setrlimit(resource.RLIMIT_AS, (limit, limit)) cs = SimpleRegressionPipeline.get_hyperparameter_search_space( dataset_properties={'sparse': True}) print(cs) for i in range(10): config = cs.sample_configuration() config._populate_values() if 'classifier:passive_aggressive:n_iter' in config and \ config[ 'classifier:passive_aggressive:n_iter'] is not None: config._values['classifier:passive_aggressive:n_iter'] = 5 if 'classifier:sgd:n_iter' in config and \ config['classifier:sgd:n_iter'] is not None: config._values['classifier:sgd:n_iter'] = 5 print(config) X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston', make_sparse=True) cls = SimpleRegressionPipeline(config, random_state=1) try: cls.fit(X_train, Y_train) predictions = cls.predict(X_test) except ValueError as e: if "Floating-point under-/overflow occurred at epoch" in \ e.args[0] or \ "removed all features" in e.args[0] or \ "all features are discarded" in e.args[0]: continue else: print(config) traceback.print_tb(sys.exc_info()[2]) raise e except RuntimeWarning as e: if "invalid value encountered in sqrt" in e.args[0]: continue elif "divide by zero encountered in" in e.args[0]: continue elif "invalid value encountered in divide" in e.args[0]: continue elif "invalid value encountered in true_divide" in e.args[0]: continue else: print(config) raise e except UserWarning as e: if "FastICA did not converge" in e.args[0]: continue else: print(config) raise e
def test_default_configuration(self): for i in range(2): X_train, Y_train, X_test, Y_test = get_dataset(dataset='diabetes') auto = SimpleRegressionPipeline() auto = auto.fit(X_train, Y_train) predictions = auto.predict(copy.deepcopy(X_test)) # The lower the worse r2_score = sklearn.metrics.r2_score(Y_test, predictions) self.assertAlmostEqual(0.339, r2_score, places=3) model_score = auto.score(copy.deepcopy(X_test), Y_test) self.assertAlmostEqual(model_score, r2_score, places=5)
def test_default_configuration(self): for i in range(2): X_train, Y_train, X_test, Y_test = get_dataset(dataset='diabetes') auto = SimpleRegressionPipeline() auto = auto.fit(X_train, Y_train) predictions = auto.predict(copy.deepcopy(X_test)) # The lower the worse r2_score = sklearn.metrics.r2_score(Y_test, predictions) self.assertAlmostEqual(0.417, r2_score, places=3) model_score = auto.score(copy.deepcopy(X_test), Y_test) self.assertAlmostEqual(model_score, r2_score, places=5)
def test_get_hyperparameter_search_space_preprocessor_contradicts_default_classifier( self): cs = SimpleRegressionPipeline.get_hyperparameter_search_space( include={'preprocessor': ['densifier']}, dataset_properties={'sparse': True}) self.assertEqual(cs.get_hyperparameter('regressor:__choice__').default, 'gradient_boosting') cs = SimpleRegressionPipeline.get_hyperparameter_search_space( include={'preprocessor': ['nystroem_sampler']}) self.assertEqual(cs.get_hyperparameter('regressor:__choice__').default, 'sgd')
def test_default_configuration(self): for i in range(2): cs = SimpleRegressionPipeline.get_hyperparameter_search_space() default = cs.get_default_configuration() X_train, Y_train, X_test, Y_test = get_dataset(dataset='diabetes') auto = SimpleRegressionPipeline(default) auto = auto.fit(X_train, Y_train) predictions = auto.predict(copy.deepcopy(X_test)) # The lower the worse r2_score = sklearn.metrics.r2_score(Y_test, predictions) self.assertAlmostEqual(0.41732302035060087, r2_score) model_score = auto.score(copy.deepcopy(X_test), Y_test) self.assertEqual(model_score, r2_score)
def test_configurations_signed_data(self): dataset_properties = {'signed': True} cs = SimpleRegressionPipeline(dataset_properties=dataset_properties).\ get_hyperparameter_search_space() self._test_configurations(configurations_space=cs, dataset_properties=dataset_properties)
def test_multioutput(self): cache = Memory(location=tempfile.gettempdir()) cached_func = cache.cache(sklearn.datasets.make_regression) X, Y = cached_func(n_samples=250, n_features=20, n_informative=9, n_targets=4, bias=0.5, effective_rank=10, tail_strength=0.4, noise=0.3, shuffle=True, coef=False, random_state=1) X_train = X[:200, :] Y_train = Y[:200, :] X_test = X[200:, :] Y_test = Y[200:, :] data = { 'X_train': X_train, 'Y_train': Y_train, 'X_test': X_test, 'Y_test': Y_test } dataset_properties = {'multioutput': True} cs = SimpleRegressionPipeline(dataset_properties=dataset_properties).\ get_hyperparameter_search_space() self._test_configurations(cs, data=data, dataset_properties=dataset_properties)
def _get_regression_configuration_space(info, include, exclude): sparse = False if info['is_sparse'] == 1: sparse = True configuration_space = SimpleRegressionPipeline( dataset_properties=info, include=include, exclude=exclude).\ get_hyperparameter_search_space() return configuration_space
def test_configurations_sparse(self): dataset_properties = {'sparse': True} cs = SimpleRegressionPipeline(dataset_properties=dataset_properties ).get_hyperparameter_search_space() self._test_configurations(cs, make_sparse=True, dataset_properties=dataset_properties)
def test_configurations_sparse(self): dataset_properties = {'sparse': True} cs = SimpleRegressionPipeline( # TODO remove in sklearn 0.18 dataset_properties=dataset_properties, exclude={'regressor': 'gaussian_process'}).get_hyperparameter_search_space() self._test_configurations(cs, make_sparse=True, dataset_properties=dataset_properties)
def test_get_hyperparameter_search_space_include_exclude_models(self): cs = SimpleRegressionPipeline.get_hyperparameter_search_space( include={'regressor': ['random_forest']}) self.assertEqual(cs.get_hyperparameter('regressor:__choice__'), CategoricalHyperparameter('regressor:__choice__', ['random_forest'])) # TODO add this test when more than one regressor is present cs = SimpleRegressionPipeline.get_hyperparameter_search_space( exclude={'regressor': ['random_forest']}) self.assertNotIn('random_forest', str(cs)) cs = SimpleRegressionPipeline.get_hyperparameter_search_space( include={'preprocessor': ['pca']}) self.assertEqual(cs.get_hyperparameter('preprocessor:__choice__'), CategoricalHyperparameter('preprocessor:__choice__', ['pca'])) cs = SimpleRegressionPipeline.get_hyperparameter_search_space( exclude={'preprocessor': ['no_preprocessing']}) self.assertNotIn('no_preprocessing', str(cs))
def test_configurations(self): # Use a limit of ~4GiB limit = 4000 * 1024 * 1024 resource.setrlimit(resource.RLIMIT_AS, (limit, limit)) cs = SimpleRegressionPipeline.get_hyperparameter_search_space() print(cs) cs.seed(1) for i in range(10): config = cs.sample_configuration() config._populate_values() if config['regressor:sgd:n_iter'] is not None: config._values['regressor:sgd:n_iter'] = 5 X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston') cls = SimpleRegressionPipeline(config, random_state=1) print(config) try: cls.fit(X_train, Y_train) X_test_ = X_test.copy() predictions = cls.predict(X_test) self.assertIsInstance(predictions, np.ndarray) predicted_probabiliets = cls.predict(X_test_) self.assertIsInstance(predicted_probabiliets, np.ndarray) except ValueError as e: if "Floating-point under-/overflow occurred at epoch" in \ e.args[0] or \ "removed all features" in e.args[0] or \ "all features are discarded" in e.args[0]: continue else: print(config) print(traceback.format_exc()) raise e except RuntimeWarning as e: if "invalid value encountered in sqrt" in e.args[0]: continue elif "divide by zero encountered in" in e.args[0]: continue elif "invalid value encountered in divide" in e.args[0]: continue elif "invalid value encountered in true_divide" in e.args[0]: continue else: print(config) print(traceback.format_exc()) raise e except UserWarning as e: if "FastICA did not converge" in e.args[0]: continue else: print(config) print(traceback.format_exc()) raise e except MemoryError as e: continue
def test_predict_batched(self): include = {'regressor': ['decision_tree']} cs = SimpleRegressionPipeline(include=include).get_hyperparameter_search_space() default = cs.get_default_configuration() regressor = SimpleRegressionPipeline(default, include=include) X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston') regressor.fit(X_train, Y_train) X_test_ = X_test.copy() prediction_ = regressor.predict(X_test_) mock_predict = unittest.mock.Mock(wraps=regressor.steps[-1][-1].predict) regressor.steps[-1][-1].predict = mock_predict prediction = regressor.predict(X_test, batch_size=20) self.assertEqual((356,), prediction.shape) self.assertEqual(18, mock_predict.call_count) assert_array_almost_equal(prediction_, prediction)
def _get_regression_configuration_space(info, include, exclude, incremental_learning): sparse = False if info['is_sparse'] == 1: sparse = True configuration_space = SimpleRegressionPipeline( dataset_properties={ 'sparse': sparse, 'incremental_learning': incremental_learning }, include=include, exclude=exclude).get_hyperparameter_search_space() return configuration_space
def test_predict_batched(self): cs = SimpleRegressionPipeline.get_hyperparameter_search_space() default = cs.get_default_configuration() cls = SimpleRegressionPipeline(default) X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston') cls.fit(X_train, Y_train) X_test_ = X_test.copy() prediction_ = cls.predict(X_test_) cls_predict = mock.Mock(wraps=cls.pipeline_) cls.pipeline_ = cls_predict prediction = cls.predict(X_test, batch_size=20) self.assertEqual((356,), prediction.shape) self.assertEqual(18, cls_predict.predict.call_count) assert_array_almost_equal(prediction_, prediction)
def test_pipeline_clonability(self): X_train, Y_train, X_test, Y_test = get_dataset(dataset='boston') auto = SimpleRegressionPipeline(random_state=1) auto = auto.fit(X_train, Y_train) auto_clone = clone(auto) auto_clone_params = auto_clone.get_params() # Make sure all keys are copied properly for k, v in auto.get_params().items(): self.assertIn(k, auto_clone_params) # Make sure the params getter of estimator are honored klass = auto.__class__ new_object_params = auto.get_params(deep=False) for name, param in new_object_params.items(): new_object_params[name] = clone(param, safe=False) new_object = klass(**new_object_params) params_set = new_object.get_params(deep=False) for name in new_object_params: param1 = new_object_params[name] param2 = params_set[name] self.assertEqual(param1, param2)
def test_predict_batched(self): cs = SimpleRegressionPipeline.get_hyperparameter_search_space( include={'regressor': ['decision_tree']}) default = cs.get_default_configuration() cls = SimpleRegressionPipeline(default) X_train, Y_train, X_test, Y_test = get_dataset(dataset='diabetes') cls.fit(X_train, Y_train) X_test_ = X_test.copy() prediction_ = cls.predict(X_test_) cls_predict = unittest.mock.Mock(wraps=cls.pipeline_) cls.pipeline_ = cls_predict prediction = cls.predict(X_test, batch_size=20) self.assertEqual((292, ), prediction.shape) self.assertEqual(15, cls_predict.predict.call_count) assert_array_almost_equal(prediction_, prediction)
def _get_regression_configuration_space( info: Dict[str, Any], include: Dict[str, List[str]], exclude: Dict[str, List[str]]) -> ConfigurationSpace: task_type = info['task'] sparse = False multioutput = False if task_type == MULTIOUTPUT_REGRESSION: multioutput = True dataset_properties = {'multioutput': multioutput, 'sparse': sparse} if info['is_sparse'] == 1: sparse = True configuration_space = SimpleRegressionPipeline( dataset_properties=dataset_properties, include=include, exclude=exclude).get_hyperparameter_search_space() return configuration_space
def _test_configurations(self, configurations_space, make_sparse=False, data=None, dataset_properties=None): # Use a limit of ~4GiB limit = 3072 * 1024 * 1024 resource.setrlimit(resource.RLIMIT_AS, (limit, limit)) configurations_space.seed(1) for i in range(10): config = configurations_space.sample_configuration() config._populate_values() # Restrict configurations which could take too long on travis-ci restrictions = {'regressor:passive_aggressive:n_iter': 5, 'regressor:sgd:n_iter': 5, 'regressor:adaboost:n_estimators': 50, 'regressor:adaboost:max_depth': 1, 'preprocessor:kernel_pca:n_components': 10, 'preprocessor:kitchen_sinks:n_components': 50, 'regressor:libsvm_svc:degree': 2, 'regressor:libsvm_svr:degree': 2, 'preprocessor:truncatedSVD:target_dim': 10, 'preprocessor:polynomial:degree': 2, 'regressor:lda:n_components': 10} for restrict_parameter in restrictions: restrict_to = restrictions[restrict_parameter] if restrict_parameter in config and \ config[restrict_parameter] is not None: config._values[restrict_parameter] = restrict_to if data is None: X_train, Y_train, X_test, Y_test = get_dataset( dataset='boston', make_sparse=make_sparse, add_NaNs=True) else: X_train = data['X_train'].copy() Y_train = data['Y_train'].copy() X_test = data['X_test'].copy() Y_test = data['Y_test'].copy() cls = SimpleRegressionPipeline(random_state=1, dataset_properties=dataset_properties) cls.set_hyperparameters(config) try: cls.fit(X_train, Y_train) predictions = cls.predict(X_test) except MemoryError as e: continue except ValueError as e: if "Floating-point under-/overflow occurred at epoch" in \ e.args[0]: continue elif "removed all features" in e.args[0]: continue elif "all features are discarded" in e.args[0]: continue elif "Numerical problems in QDA" in e.args[0]: continue elif 'Bug in scikit-learn' in e.args[0]: continue else: print(config) print(traceback.format_exc()) raise e except RuntimeWarning as e: if "invalid value encountered in sqrt" in e.args[0]: continue elif "divide by zero encountered in" in e.args[0]: continue elif "invalid value encountered in divide" in e.args[0]: continue elif "invalid value encountered in true_divide" in e.args[0]: continue else: print(config) traceback.print_tb(sys.exc_info()[2]) raise e except UserWarning as e: if "FastICA did not converge" in e.args[0]: continue else: print(config) traceback.print_tb(sys.exc_info()[2]) raise e except Exception as e: if "Multiple input features cannot have the same target value" in e.args[0]: continue else: print(config) traceback.print_tb(sys.exc_info()[2]) raise e
def test_repr(self): cs = SimpleRegressionPipeline.get_hyperparameter_search_space() default = cs.get_default_configuration() representation = repr(SimpleRegressionPipeline(default)) cls = eval(representation) self.assertIsInstance(cls, SimpleRegressionPipeline)
def test_repr(self): representation = repr(SimpleRegressionPipeline()) cls = eval(representation) self.assertIsInstance(cls, SimpleRegressionPipeline)
def test_set_hyperparameters_honors_configuration(self): """Makes sure that a given configuration is honored in practice. This method tests that the set hyperparameters actually create objects that comply with the given configuration. It iterates trough the pipeline to make sure we did not miss a step, but also checks at the end that every configuration from Config was checked """ all_combinations = list(itertools.product([True, False], repeat=4)) for sparse, multilabel, signed, multiclass, in all_combinations: dataset_properties = { 'sparse': sparse, 'multilabel': multilabel, 'multiclass': multiclass, 'signed': signed, } auto = SimpleRegressionPipeline( random_state=1, dataset_properties=dataset_properties, ) cs = auto.get_hyperparameter_search_space() config = cs.sample_configuration() # Set hyperparameters takes a given config and translate # a config to an actual implementation auto.set_hyperparameters(config) config_dict = config.get_dictionary() # keys to check is our mechanism to ensure that every # every config key is checked keys_checked = [] for name, step in auto.named_steps.items(): if name == 'data_preprocessing': # We have to check both the numerical and categorical to_check = { 'numerical_transformer': step.numer_ppl.named_steps, 'categorical_transformer': step.categ_ppl.named_steps, } for data_type, pipeline in to_check.items(): for sub_name, sub_step in pipeline.items(): # If it is a Choice, make sure it is the correct one! if isinstance(sub_step, AutoSklearnChoice): key = "data_preprocessing:{}:{}:__choice__".format( data_type, sub_name) keys_checked.extend( self._test_set_hyperparameter_choice( key, sub_step, config_dict)) # If it is a component, make sure it has the correct hyperparams elif isinstance(sub_step, AutoSklearnComponent): keys_checked.extend( self._test_set_hyperparameter_component( "data_preprocessing:{}:{}".format( data_type, sub_name), sub_step, config_dict)) else: raise ValueError( "New type of pipeline component!") elif name == 'balancing': keys_checked.extend( self._test_set_hyperparameter_component( 'balancing', step, config_dict)) elif name == 'feature_preprocessor': keys_checked.extend( self._test_set_hyperparameter_choice( 'feature_preprocessor:__choice__', step, config_dict)) elif name == 'regressor': keys_checked.extend( self._test_set_hyperparameter_choice( 'regressor:__choice__', step, config_dict)) else: raise ValueError( "Found another type of step! Need to update this check" " {}. ".format(name)) # Make sure we checked the whole configuration self.assertSetEqual(set(config_dict.keys()), set(keys_checked))
def _test_configurations(self, configurations_space, make_sparse=False, data=None, dataset_properties=None): # Use a limit of ~4GiB limit = 3072 * 1024 * 1024 resource.setrlimit(resource.RLIMIT_AS, (limit, limit)) configurations_space.seed(1) for i in range(10): config = configurations_space.sample_configuration() config._populate_values() # Restrict configurations which could take too long on travis-ci restrictions = { 'regressor:adaboost:n_estimators': 50, 'regressor:adaboost:max_depth': 1, 'feature_preprocessor:kernel_pca:n_components': 10, 'feature_preprocessor:kitchen_sinks:n_components': 50, 'regressor:libsvm_svc:degree': 2, 'regressor:libsvm_svr:degree': 2, 'regressor:libsvm_svr:C': 1., 'feature_preprocessor:truncatedSVD:target_dim': 10, 'feature_preprocessor:polynomial:degree': 2, 'regressor:lda:n_components': 10 } for restrict_parameter in restrictions: restrict_to = restrictions[restrict_parameter] if restrict_parameter in config and config[ restrict_parameter] is not None: config._values[restrict_parameter] = restrict_to if data is None: X_train, Y_train, X_test, Y_test = get_dataset( dataset='boston', make_sparse=make_sparse, add_NaNs=True) else: X_train = data['X_train'].copy() Y_train = data['Y_train'].copy() X_test = data['X_test'].copy() data['Y_test'].copy() cls = SimpleRegressionPipeline( random_state=1, dataset_properties=dataset_properties) cls.set_hyperparameters(config) # First make sure that for this configuration, setting the parameters # does not mistakenly set the estimator as fitted for name, step in cls.named_steps.items(): with self.assertRaisesRegex(sklearn.exceptions.NotFittedError, "instance is not fitted yet"): check_is_fitted(step) try: cls.fit(X_train, Y_train) # After fit, all components should be tagged as fitted # by sklearn. Check is fitted raises an exception if that # is not the case try: for name, step in cls.named_steps.items(): check_is_fitted(step) except sklearn.exceptions.NotFittedError: self.fail( "config={} raised NotFittedError unexpectedly!".format( config)) cls.predict(X_test) except MemoryError: continue except np.linalg.LinAlgError: continue except ValueError as e: if "Floating-point under-/overflow occurred at epoch" in \ e.args[0]: continue elif "removed all features" in e.args[0]: continue elif "all features are discarded" in e.args[0]: continue elif "Numerical problems in QDA" in e.args[0]: continue elif 'Bug in scikit-learn' in e.args[0]: continue elif 'The condensed distance matrix must contain only finite ' \ 'values.' in e.args[0]: continue else: print(config) print(traceback.format_exc()) raise e except RuntimeWarning as e: if "invalid value encountered in sqrt" in e.args[0]: continue elif "divide by zero encountered in" in e.args[0]: continue elif "invalid value encountered in divide" in e.args[0]: continue elif "invalid value encountered in true_divide" in e.args[0]: continue elif "invalid value encountered in multiply" in e.args[0]: continue else: print(config) traceback.print_tb(sys.exc_info()[2]) raise e except UserWarning as e: if "FastICA did not converge" in e.args[0]: continue else: print(config) traceback.print_tb(sys.exc_info()[2]) raise e except Exception as e: if "Multiple input features cannot have the same target value" in e.args[ 0]: continue else: print(config) traceback.print_tb(sys.exc_info()[2]) raise e
def test_set_hyperparameters_honors_configuration(self): """Makes sure that a given configuration is honored in practice. This method tests that the set hyperparameters actually create objects that comply with the given configuration. It iterates trough the pipeline to make sure we did not miss a step, but also checks at the end that every configuration from Config was checked Also considers random_state and ensures pipeline steps correctly recieve the right random_state """ all_combinations = list(itertools.product([True, False], repeat=4)) for sparse, multilabel, signed, multiclass, in all_combinations: dataset_properties = { 'sparse': sparse, 'multilabel': multilabel, 'multiclass': multiclass, 'signed': signed, } random_state = 1 auto = SimpleRegressionPipeline( random_state=random_state, dataset_properties=dataset_properties, ) cs = auto.get_hyperparameter_search_space() config = cs.sample_configuration() # Set hyperparameters takes a given config and translate # a config to an actual implementation auto.set_hyperparameters(config) config_dict = config.get_dictionary() # keys to check is our mechanism to ensure that every # every config key is checked keys_checked = [] for name, step in auto.named_steps.items(): if name == 'data_preprocessor': keys_checked.extend( self._test_set_hyperparameter_choice( 'data_preprocessor:__choice__', step, config_dict)) self.assertEqual(step.random_state, random_state) elif name == 'feature_preprocessor': keys_checked.extend( self._test_set_hyperparameter_choice( 'feature_preprocessor:__choice__', step, config_dict)) self.assertEqual(step.random_state, random_state) elif name == 'regressor': keys_checked.extend( self._test_set_hyperparameter_choice( 'regressor:__choice__', step, config_dict)) self.assertEqual(step.random_state, random_state) else: raise ValueError( "Found another type of step! Need to update this check" " {}. ".format(name)) # Make sure we checked the whole configuration self.assertSetEqual(set(config_dict.keys()), set(keys_checked))
def _test_configurations(self, configurations_space, make_sparse=False, data=None, dataset_properties=None): # Use a limit of ~4GiB limit = 3072 * 1024 * 1024 resource.setrlimit(resource.RLIMIT_AS, (limit, limit)) configurations_space.seed(1) for i in range(10): config = configurations_space.sample_configuration() config._populate_values() # Restrict configurations which could take too long on travis-ci restrictions = {'regressor:adaboost:n_estimators': 50, 'regressor:adaboost:max_depth': 1, 'preprocessor:kernel_pca:n_components': 10, 'preprocessor:kitchen_sinks:n_components': 50, 'regressor:libsvm_svc:degree': 2, 'regressor:libsvm_svr:degree': 2, 'preprocessor:truncatedSVD:target_dim': 10, 'preprocessor:polynomial:degree': 2, 'regressor:lda:n_components': 10} for restrict_parameter in restrictions: restrict_to = restrictions[restrict_parameter] if restrict_parameter in config and \ config[restrict_parameter] is not None: config._values[restrict_parameter] = restrict_to if data is None: X_train, Y_train, X_test, Y_test = get_dataset( dataset='boston', make_sparse=make_sparse, add_NaNs=True) else: X_train = data['X_train'].copy() Y_train = data['Y_train'].copy() X_test = data['X_test'].copy() Y_test = data['Y_test'].copy() cls = SimpleRegressionPipeline(random_state=1, dataset_properties=dataset_properties) cls.set_hyperparameters(config) try: cls.fit(X_train, Y_train) predictions = cls.predict(X_test) except MemoryError as e: continue except ValueError as e: if "Floating-point under-/overflow occurred at epoch" in \ e.args[0]: continue elif "removed all features" in e.args[0]: continue elif "all features are discarded" in e.args[0]: continue elif "Numerical problems in QDA" in e.args[0]: continue elif 'Bug in scikit-learn' in e.args[0]: continue elif 'The condensed distance matrix must contain only finite ' \ 'values.' in e.args[0]: continue else: print(config) print(traceback.format_exc()) raise e except RuntimeWarning as e: if "invalid value encountered in sqrt" in e.args[0]: continue elif "divide by zero encountered in" in e.args[0]: continue elif "invalid value encountered in divide" in e.args[0]: continue elif "invalid value encountered in true_divide" in e.args[0]: continue else: print(config) traceback.print_tb(sys.exc_info()[2]) raise e except UserWarning as e: if "FastICA did not converge" in e.args[0]: continue else: print(config) traceback.print_tb(sys.exc_info()[2]) raise e except Exception as e: if "Multiple input features cannot have the same target value" in e.args[0]: continue else: print(config) traceback.print_tb(sys.exc_info()[2]) raise e
y = D.data['Y_train'] X_valid = D.data['X_valid'] X_test = D.data['X_test'] # Replace the following array by a new ensemble choices = \ [(0.220000, SimpleRegressionPipeline(configuration={ 'imputation:strategy': 'most_frequent', 'one_hot_encoding:use_minimum_fraction': 'False', 'preprocessor:__choice__': 'no_preprocessing', 'regressor:__choice__': 'xgradient_boosting', 'regressor:xgradient_boosting:base_score': 0.5, 'regressor:xgradient_boosting:colsample_bylevel': 1, 'regressor:xgradient_boosting:colsample_bytree': 1, 'regressor:xgradient_boosting:gamma': 0, 'regressor:xgradient_boosting:learning_rate': 0.056838908807173093, 'regressor:xgradient_boosting:max_delta_step': 0, 'regressor:xgradient_boosting:max_depth': 8, 'regressor:xgradient_boosting:min_child_weight': 16, 'regressor:xgradient_boosting:n_estimators': 178, 'regressor:xgradient_boosting:reg_alpha': 0, 'regressor:xgradient_boosting:reg_lambda': 1, 'regressor:xgradient_boosting:scale_pos_weight': 1, 'regressor:xgradient_boosting:subsample': 0.70026686345272005, 'rescaling:__choice__': 'none'})), (0.160000, SimpleRegressionPipeline(configuration={ 'imputation:strategy': 'mean', 'one_hot_encoding:minimum_fraction': 0.028721299365033225, 'one_hot_encoding:use_minimum_fraction': 'True', 'preprocessor:__choice__': 'no_preprocessing', 'regressor:__choice__': 'xgradient_boosting',
def test_configurations(self): cs = SimpleRegressionPipeline().get_hyperparameter_search_space() self._test_configurations(cs)
configuration_space = get_configuration_space(new_info_object) try: config = ConfigSpace.Configuration(configuration_space, configuration) except Exception as inst: execution_success = False logger.critical(inst) continue logger.info("Running the following configuration:") logger.info(str(config)) if 'classifier:__choice__' in configuration: M = SimpleClassificationPipeline(config, 1) elif 'regressor:__choice__' in configuration: M = SimpleRegressionPipeline(config, 1) else: execution_success = False logger.critical('Invalid hyperparameter configuration, does neither ' 'contain hyperparameter classifier:__choice__ nor ' 'regressor:__choice__!') continue evaluate_model = pynisher.enforce_limits( mem_in_mb=memlimit, wall_time_in_s=overall_time_budget)( pynish_me_aka_evaluate_model) rval = evaluate_model(D, M) if rval is not None: if isinstance(rval, ValueError) and rval.message == "KernelPCA " \ "removed all features!": logger.error("KernelPCA removed all features. Please try a "
# https://github.com/Lasagne/Lasagne/commit/24c9ed2ffc25504c3b0df4598afb1e63fdd59eee # Copy the file RegDeepNet into autosklearn.pipeline.components.regression # Copy the file FeedForwardNet into autosklearn.pipeline.implementations choices = \ [(0.360000, SimpleRegressionPipeline(configuration={ 'imputation:strategy': 'mean', 'one_hot_encoding:minimum_fraction': 0.049682918006307676, 'one_hot_encoding:use_minimum_fraction': 'True', 'preprocessor:__choice__': 'no_preprocessing', 'regressor:RegDeepNet:activation': 'tanh', 'regressor:RegDeepNet:batch_size': 1865, 'regressor:RegDeepNet:dropout_layer_1': 0.017462492577406473, 'regressor:RegDeepNet:dropout_layer_2': 0.048354205627225436, 'regressor:RegDeepNet:dropout_output': 0.00962149073006804, 'regressor:RegDeepNet:lambda2': 1.0282444549550921e-05, 'regressor:RegDeepNet:learning_rate': 0.001, 'regressor:RegDeepNet:num_layers': 'd', 'regressor:RegDeepNet:num_units_layer_1': 2615, 'regressor:RegDeepNet:num_units_layer_2': 252, 'regressor:RegDeepNet:number_updates': 3225, 'regressor:RegDeepNet:solver': 'smorm3s', 'regressor:RegDeepNet:std_layer_1': 0.006861129306844183, 'regressor:RegDeepNet:std_layer_2': 0.002395977520245193, 'regressor:__choice__': 'RegDeepNet', 'rescaling:__choice__': 'standardize'})), (0.320000, SimpleRegressionPipeline(configuration={ 'imputation:strategy': 'mean', 'one_hot_encoding:minimum_fraction': 0.05112532429613385, 'one_hot_encoding:use_minimum_fraction': 'True', 'preprocessor:__choice__': 'no_preprocessing',