def test_constant(self): # Test construction c1 = Constant("value", 1) c2 = Constant("value", 1) c3 = Constant("value", 2) c4 = Constant("valuee", 1) c5 = Constant("valueee", 2) # Test the representation self.assertEqual("value, Type: Constant, Value: 1", c1.__repr__()) # Test the equals operator (and the ne operator in the last line) self.assertFalse(c1 == 1) self.assertEqual(c1, c2) self.assertFalse(c1 == c3) self.assertFalse(c1 == c4) self.assertTrue(c1 != c5) # Test that only string, integers and floats are allowed self.assertRaises(TypeError, Constant, "value", dict()) self.assertRaises(TypeError, Constant, "value", None) self.assertRaises(TypeError, Constant, "value", True) # Test that only string names are allowed self.assertRaises(TypeError, Constant, 1, "value") self.assertRaises(TypeError, Constant, dict(), "value") self.assertRaises(TypeError, Constant, None, "value") self.assertRaises(TypeError, Constant, True, "value")
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() loss = Constant("loss", "deviance") learning_rate = UniformFloatHyperparameter(name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True) n_estimators = UniformIntegerHyperparameter("n_estimators", 50, 500, default_value=100) max_depth = UniformIntegerHyperparameter(name="max_depth", lower=1, upper=10, default_value=3) criterion = CategoricalHyperparameter( # 固定使用mse 'criterion', ['mse'], #['friedman_mse', 'mse', 'mae'], default_value='mse') min_samples_split = UniformIntegerHyperparameter( name="min_samples_split", lower=2, upper=20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter( name="min_samples_leaf", lower=1, upper=20, default_value=1) min_weight_fraction_leaf = UnParametrizedHyperparameter( "min_weight_fraction_leaf", 0.) subsample = UniformFloatHyperparameter(name="subsample", lower=0.01, upper=1.0, default_value=1.0) max_features = UniformFloatHyperparameter("max_features", 0.1, 1.0, default_value=1) max_leaf_nodes = UnParametrizedHyperparameter(name="max_leaf_nodes", value="None") min_impurity_decrease = UnParametrizedHyperparameter( name='min_impurity_decrease', value=0.0) cs.add_hyperparameters([ loss, learning_rate, n_estimators, max_depth, criterion, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, subsample, max_features, max_leaf_nodes, min_impurity_decrease ]) return cs
def get_hyperparameter_search_space(dataset_properties=None): percentile = UniformFloatHyperparameter( name="percentile", lower=1, upper=99, default=50) score_func = CategoricalHyperparameter( name="score_func", choices=["chi2", "f_classif"], default="chi2") if dataset_properties is not None: # Chi2 can handle sparse data, so we respect this if 'is_sparse' in dataset_properties and dataset_properties['is_sparse']: score_func = Constant( name="score_func", value="chi2") cs = ConfigurationSpace() cs.add_hyperparameter(percentile) cs.add_hyperparameter(score_func) return cs
def get_configspace_from_metalearning(metalearning_entry): cs = ConfigurationSpace() categorical_and_none_hyperparams = [] for hyperparam in metalearning_entry: if len(metalearning_entry[hyperparam]['choices'] ) == 1 and metalearning_entry[hyperparam]['default'] is None: categorical_and_none_hyperparams.append( Constant(hyperparam, 'None')) else: categorical_and_none_hyperparams.append( CategoricalHyperparameter( name=hyperparam, choices=metalearning_entry[hyperparam]['choices'], default_value=metalearning_entry[hyperparam]['default'])) cs.add_hyperparameters(categorical_and_none_hyperparams) return cs
def test_greater_and_less_condition(self): child = Constant('child', 'child') hp1 = UniformFloatHyperparameter("float", 0, 5) hp2 = UniformIntegerHyperparameter("int", 0, 5) hp3 = OrdinalHyperparameter("ord", list(range(6))) for hp in [hp1, hp2, hp3]: gt = GreaterThanCondition(child, hp, 1) self.assertFalse(gt.evaluate({hp.name: 0})) self.assertTrue(gt.evaluate({hp.name: 2})) self.assertFalse(gt.evaluate({hp.name: None})) lt = LessThanCondition(child, hp, 1) self.assertTrue(lt.evaluate({hp.name: 0})) self.assertFalse(lt.evaluate({hp.name: 2})) self.assertFalse(lt.evaluate({hp.name: None})) hp4 = CategoricalHyperparameter("cat", list(range(6))) self.assertRaisesRegexp( ValueError, "Parent hyperparameter in a > or < " "condition must be a subclass of " "NumericalHyperparameter or " "OrdinalHyperparameter, but is " "<class 'ConfigSpace.hyperparameters.CategoricalHyperparameter'>", GreaterThanCondition, child, hp4, 1) self.assertRaisesRegexp( ValueError, "Parent hyperparameter in a > or < " "condition must be a subclass of " "NumericalHyperparameter or " "OrdinalHyperparameter, but is " "<class 'ConfigSpace.hyperparameters.CategoricalHyperparameter'>", LessThanCondition, child, hp4, 1) hp5 = OrdinalHyperparameter("ord", ['cold', 'luke warm', 'warm', 'hot']) gt = GreaterThanCondition(child, hp5, 'warm') self.assertTrue(gt.evaluate({hp5.name: 'hot'})) self.assertFalse(gt.evaluate({hp5.name: 'cold'})) lt = LessThanCondition(child, hp5, 'warm') self.assertTrue(lt.evaluate({hp5.name: 'luke warm'})) self.assertFalse(lt.evaluate({hp5.name: 'warm'}))
def test_get_types(self): cs = ConfigurationSpace() cs.add_hyperparameter(CategoricalHyperparameter('a', ['a', 'b'])) cs.add_hyperparameter(UniformFloatHyperparameter('b', 1, 5)) cs.add_hyperparameter(UniformIntegerHyperparameter('c', 3, 7)) cs.add_hyperparameter(Constant('d', -5)) cs.add_hyperparameter(OrdinalHyperparameter('e', ['cold', 'hot'])) cs.add_hyperparameter(CategoricalHyperparameter('f', ['x', 'y'])) types, bounds = get_types(cs, None) np.testing.assert_array_equal(types, [2, 0, 0, 0, 0, 2]) self.assertEqual(bounds[0][0], 2) self.assertFalse(np.isfinite(bounds[0][1])) np.testing.assert_array_equal(bounds[1], [0, 1]) np.testing.assert_array_equal(bounds[2], [0, 1]) self.assertEqual(bounds[3][0], 0) self.assertFalse(np.isfinite(bounds[3][1])) np.testing.assert_array_equal(bounds[4], [0, 1]) self.assertEqual(bounds[5][0], 2) self.assertFalse(np.isfinite(bounds[5][1]))
def test_all_components_have_the_same_child(self): hp1 = CategoricalHyperparameter("input1", [0, 1]) hp2 = CategoricalHyperparameter("input2", [0, 1]) hp3 = CategoricalHyperparameter("input3", [0, 1]) hp4 = CategoricalHyperparameter("input4", [0, 1]) hp5 = CategoricalHyperparameter("input5", [0, 1]) hp6 = Constant("AND", "True") cond1 = EqualsCondition(hp1, hp2, 1) cond2 = EqualsCondition(hp1, hp3, 1) cond3 = EqualsCondition(hp1, hp4, 1) cond4 = EqualsCondition(hp6, hp4, 1) cond5 = EqualsCondition(hp6, hp5, 1) AndConjunction(cond1, cond2, cond3) AndConjunction(cond4, cond5) self.assertRaisesRegexp( ValueError, "All Conjunctions and Conditions must have " "the same child.", AndConjunction, cond1, cond4)
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) criterion = CategoricalHyperparameter( "criterion", ["gini", "entropy"], default_value="gini") max_features = UniformFloatHyperparameter("max_features", 0, 1, default_value=0.5, q=0.05) max_depth = UnParametrizedHyperparameter(name="max_depth", value="None") max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None") min_samples_split = UniformIntegerHyperparameter( "min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter( "min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = UnParametrizedHyperparameter( 'min_weight_fraction_leaf', 0.) min_impurity_decrease = UnParametrizedHyperparameter( 'min_impurity_decrease', 0.) bootstrap = CategoricalHyperparameter( "bootstrap", ["True", "False"], default_value="False") cs.add_hyperparameters([n_estimators, criterion, max_features, max_depth, max_leaf_nodes, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, min_impurity_decrease, bootstrap]) return cs elif optimizer == 'tpe': from hyperopt import hp space = {'n_estimators': 100, 'criterion': hp.choice('ets_criterion', ['gini', 'entropy']), 'max_features': hp.uniform('ets_max_features', 0, 1), 'max_depth': "None", 'max_leaf_nodes': "None", 'min_samples_leaf': hp.randint('ets_samples_leaf', 20) + 1, 'min_samples_split': hp.randint('ets_samples_split', 19) + 2, 'min_weight_fraction_leaf': 0., 'min_impurity_decrease': 0., 'bootstrap': hp.choice('ets_bootstrap', ['True', 'False'])} return space
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) criterion = CategoricalHyperparameter("criterion", ["gini", "entropy"], default_value="gini") # The maximum number of features used in the forest is calculated as m^max_features, where # m is the total number of features, and max_features is the hyperparameter specified below. # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This # corresponds with Geurts' heuristic. max_features = UniformFloatHyperparameter("max_features", 0., 1., default_value=0.5) max_depth = UnParametrizedHyperparameter(name="max_depth", value="None") min_samples_split = UniformIntegerHyperparameter("min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = UnParametrizedHyperparameter( 'min_weight_fraction_leaf', 0.) max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None") min_impurity_decrease = UnParametrizedHyperparameter( 'min_impurity_decrease', 0.0) bootstrap = CategoricalHyperparameter("bootstrap", ["True", "False"], default_value="False") cs.add_hyperparameters([ n_estimators, criterion, max_features, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes, min_impurity_decrease, bootstrap ]) return cs
def get_hyperparameter_search_space(dataset_properties=None): alpha = UniformFloatHyperparameter( name="alpha", lower=0.01, upper=0.5, default=0.1) score_func = CategoricalHyperparameter( name="score_func", choices=["chi2", "f_classif"], default="chi2") if dataset_properties is not None: # Chi2 can handle sparse data, so we respect this if 'sparse' in dataset_properties and dataset_properties['sparse']: score_func = Constant( name="score_func", value="chi2") mode = CategoricalHyperparameter('mode', ['fpr', 'fdr', 'fwe'], 'fpr') cs = ConfigurationSpace() cs.add_hyperparameter(alpha) cs.add_hyperparameter(score_func) cs.add_hyperparameter(mode) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() loss = CategoricalHyperparameter( "loss", ["least_squares"], default_value="least_squares") learning_rate = UniformFloatHyperparameter( name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True) max_iter = UniformIntegerHyperparameter( "max_iter", 32, 512, default_value=100) min_samples_leaf = UniformIntegerHyperparameter( name="min_samples_leaf", lower=1, upper=200, default_value=20, log=True) max_depth = UnParametrizedHyperparameter( name="max_depth", value="None") max_leaf_nodes = UniformIntegerHyperparameter( name="max_leaf_nodes", lower=3, upper=2047, default_value=31, log=True) max_bins = Constant("max_bins", 255) l2_regularization = UniformFloatHyperparameter( name="l2_regularization", lower=1E-10, upper=1, default_value=1E-10, log=True) early_stop = CategoricalHyperparameter( name="early_stop", choices=["off", "train", "valid"], default_value="off") tol = UnParametrizedHyperparameter( name="tol", value=1e-7) scoring = UnParametrizedHyperparameter( name="scoring", value="loss") n_iter_no_change = UniformIntegerHyperparameter( name="n_iter_no_change", lower=1, upper=20, default_value=10) validation_fraction = UniformFloatHyperparameter( name="validation_fraction", lower=0.01, upper=0.4, default_value=0.1) cs.add_hyperparameters([loss, learning_rate, max_iter, min_samples_leaf, max_depth, max_leaf_nodes, max_bins, l2_regularization, early_stop, tol, scoring, n_iter_no_change, validation_fraction]) n_iter_no_change_cond = InCondition( n_iter_no_change, early_stop, ["valid", "train"]) validation_fraction_cond = EqualsCondition( validation_fraction, early_stop, "valid") cs.add_conditions([n_iter_no_change_cond, validation_fraction_cond]) return cs
def test_and_conjunction(self): self.assertRaises(TypeError, AndConjunction, "String1", "String2") hp1 = CategoricalHyperparameter("input1", [0, 1]) hp2 = CategoricalHyperparameter("input2", [0, 1]) hp3 = CategoricalHyperparameter("input3", [0, 1]) hp4 = Constant("And", "True") cond1 = EqualsCondition(hp4, hp1, 1) # Only one condition in an AndConjunction! self.assertRaises(ValueError, AndConjunction, cond1) cond2 = EqualsCondition(hp4, hp2, 1) cond3 = EqualsCondition(hp4, hp3, 1) andconj1 = AndConjunction(cond1, cond2) andconj1_ = AndConjunction(cond1, cond2) self.assertEqual(andconj1, andconj1_) # Test setting vector idx hyperparameter_idx = { hp1.name: 0, hp2.name: 1, hp3.name: 2, hp4.name: 3 } andconj1.set_vector_idx(hyperparameter_idx) self.assertEqual(andconj1.get_parents_vector(), [0, 1]) self.assertEqual(andconj1.get_children_vector(), [3, 3]) andconj2 = AndConjunction(cond2, cond3) self.assertNotEqual(andconj1, andconj2) andconj3 = AndConjunction(cond1, cond2, cond3) self.assertEqual( "(And | input1 == 1 && And | input2 == 1 && And | " "input3 == 1)", str(andconj3)) # Test __eq__ self.assertNotEqual(andconj1, andconj3) self.assertNotEqual(andconj1, "String")
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) criterion = UnParametrizedHyperparameter("criterion", "gini") max_features = UnParametrizedHyperparameter("max_features", "0.5") max_depth = UnParametrizedHyperparameter("max_depth", "None") min_samples_split = UnParametrizedHyperparameter("min_samples_split", 2) min_samples_leaf = UnParametrizedHyperparameter("min_samples_leaf", 1) min_weight_fraction_leaf = UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.) max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None") min_impurity_decrease = UnParametrizedHyperparameter('min_impurity_decrease', 0.0) bootstrap = UnParametrizedHyperparameter("bootstrap", "True") cs.add_hyperparameters([n_estimators, criterion, max_features, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes, bootstrap, min_impurity_decrease]) return cs
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() criterion = CategoricalHyperparameter( "criterion", ["gini", "entropy"], default_value="gini") max_depth_factor = UniformFloatHyperparameter( 'max_depth_factor', 0., 2., default_value=0.5) min_samples_split = UniformIntegerHyperparameter( "min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter( "min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = Constant("min_weight_fraction_leaf", 0.0) max_features = UnParametrizedHyperparameter('max_features', 1.0) max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None") min_impurity_decrease = UnParametrizedHyperparameter('min_impurity_decrease', 0.0) cs.add_hyperparameters([criterion, max_features, max_depth_factor, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes, min_impurity_decrease]) return cs
def choice(label: str, options: List, default=None): if len(options) == 1: return Constant(label, _encode(options[0])) # fixme: if declare probability in here? # fixme: copy from hyperflow/hdl2shps/hdl2shps.py:354 choice2proba = {} not_specific_proba_choices = [] sum_proba = 0 choices = [] raw_choices = [] for option in options: if isinstance(option, (tuple, list)) and len(option) == 2: choice = None proba = None for item in option: if isinstance(item, (float, int)) and 0 <= item <= 1: proba = item else: choice = item assert choice is not None and proba is not None choice2proba[choice] = proba sum_proba += proba else: choice = option not_specific_proba_choices.append(choice) choices.append(_encode(choice)) raw_choices.append(choice) if sum_proba <= 1: if len(not_specific_proba_choices) > 0: p_rest = (1 - sum_proba) / len(not_specific_proba_choices) for not_specific_proba_choice in not_specific_proba_choices: choice2proba[not_specific_proba_choice] = p_rest else: choice2proba = {k: 1 / len(options) for k in choices} proba_list = [choice2proba[k] for k in raw_choices] kwargs = {} if default: kwargs.update({'default_value': _encode(default)}) hp=CategoricalHyperparameter(label, choices, weights=proba_list, **kwargs) hp.probabilities=proba_list # fixme: don't make sense return hp
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() loss = Constant("loss", "deviance") learning_rate = UniformFloatHyperparameter(name="learning_rate", lower=0.01, upper=1, default=0.1, log=True) n_estimators = UniformIntegerHyperparameter("n_estimators", 50, 500, default=100) max_depth = UniformIntegerHyperparameter(name="max_depth", lower=1, upper=10, default=3) min_samples_split = UniformIntegerHyperparameter( name="min_samples_split", lower=2, upper=20, default=2, log=False) min_samples_leaf = UniformIntegerHyperparameter( name="min_samples_leaf", lower=1, upper=20, default=1, log=False) min_weight_fraction_leaf = UnParametrizedHyperparameter( "min_weight_fraction_leaf", 0.) subsample = UniformFloatHyperparameter(name="subsample", lower=0.01, upper=1.0, default=1.0, log=False) max_features = UniformFloatHyperparameter("max_features", 0.5, 5, default=1) max_leaf_nodes = UnParametrizedHyperparameter(name="max_leaf_nodes", value="None") cs.add_hyperparameters([ loss, learning_rate, n_estimators, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, subsample, max_features, max_leaf_nodes ]) return cs
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': alpha = UniformFloatHyperparameter(name="alpha", lower=0.01, upper=0.5, default_value=0.1) score_func = CategoricalHyperparameter( name="score_func", choices=["chi2", "f_classif"], default_value="chi2") if dataset_properties is not None: # Chi2 can handle sparse data, so we respect this if 'sparse' in dataset_properties and dataset_properties[ 'sparse']: score_func = Constant(name="score_func", value="chi2") mode = CategoricalHyperparameter('mode', ['fpr', 'fdr', 'fwe'], 'fpr') cs = ConfigurationSpace() cs.add_hyperparameter(alpha) cs.add_hyperparameter(score_func) cs.add_hyperparameter(mode) return cs elif optimizer == 'tpe': from hyperopt import hp space = { 'alpha': hp.uniform('gus_alpha', 0.01, 0.5), 'score_func': hp.choice('gus_score_func', ['chi2', 'f_classif', 'mutual_info']), 'mode': hp.choice('gus_mode', ['fpr', 'fdr', 'fwe']) } return space
def get_hyperparameter_search_space(dataset_properties=None): n_estimators = UniformIntegerHyperparameter(name="n_estimators", lower=10, upper=100, default=10) max_depth = UniformIntegerHyperparameter(name="max_depth", lower=2, upper=10, default=5) min_samples_split = UniformIntegerHyperparameter( name="min_samples_split", lower=2, upper=20, default=2) min_samples_leaf = UniformIntegerHyperparameter( name="min_samples_leaf", lower=1, upper=20, default=1) min_weight_fraction_leaf = Constant('min_weight_fraction_leaf', 1.0) max_leaf_nodes = UnParametrizedHyperparameter(name="max_leaf_nodes", value="None") cs = ConfigurationSpace() cs.add_hyperparameters([ n_estimators, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes ]) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) criterion = CategoricalHyperparameter("criterion", ["gini", "entropy"], default_value="gini") max_features = UniformFloatHyperparameter("max_features", 0, 1, default_value=0.5, q=0.05) max_depth = UnParametrizedHyperparameter(name="max_depth", value="None") max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None") min_samples_split = UniformIntegerHyperparameter("min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = UnParametrizedHyperparameter( 'min_weight_fraction_leaf', 0.) min_impurity_decrease = UnParametrizedHyperparameter( 'min_impurity_decrease', 0.) bootstrap = CategoricalHyperparameter("bootstrap", ["True", "False"], default_value="False") cs.add_hyperparameters([ n_estimators, criterion, max_features, max_depth, max_leaf_nodes, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, min_impurity_decrease, bootstrap ]) return cs
def test_constant(self): # Test construction c1 = Constant("value", 1) c2 = Constant("value", 1) c3 = Constant("value", 2) c4 = Constant("valuee", 1) c5 = Constant("valueee", 2) # Test attributes are accessible self.assertEqual(c5.name, "valueee") self.assertEqual(c5.value, 2) # Test the representation self.assertEqual("value, Type: Constant, Value: 1", c1.__repr__()) # Test the equals operator (and the ne operator in the last line) self.assertFalse(c1 == 1) self.assertEqual(c1, c2) self.assertFalse(c1 == c3) self.assertFalse(c1 == c4) self.assertTrue(c1 != c5) # Test that only string, integers and floats are allowed self.assertRaises(TypeError, Constant, "value", dict()) self.assertRaises(TypeError, Constant, "value", None) self.assertRaises(TypeError, Constant, "value", True) # Test that only string names are allowed self.assertRaises(TypeError, Constant, 1, "value") self.assertRaises(TypeError, Constant, dict(), "value") self.assertRaises(TypeError, Constant, None, "value") self.assertRaises(TypeError, Constant, True, "value") # test that meta-data is stored correctly c1_meta = Constant("value", 1, dict(self.meta_data)) self.assertEqual(c1_meta.meta, self.meta_data)
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) criterion = CategoricalHyperparameter( "criterion", ["mse", "mae"], default_value="mse") # The maximum number of features used in the forest is calculated as m^max_features, where # m is the total number of features, and max_features is the hyperparameter specified below. # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This # corresponds with Geurts' heuristic. max_features = UniformFloatHyperparameter( "max_features", 0., 1., default_value=0.5) min_samples_split = UniformIntegerHyperparameter( "min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter( "min_samples_leaf", 1, 20, default_value=1) bootstrap = CategoricalHyperparameter( "bootstrap", ["True", "False"], default_value="False") cs.add_hyperparameters([n_estimators, criterion, max_features, min_samples_split, min_samples_leaf, bootstrap]) return cs elif optimizer == 'tpe': from hyperopt import hp space = {'n_estimators': hp.choice('et_n_estimators', [100]), 'criterion': hp.choice('et_criterion', ["mse", "mae"]), 'max_features': hp.uniform('et_max_features', 0, 1), 'min_samples_split': hp.randint('et_min_samples_split', 19) + 2, 'min_samples_leaf': hp.randint('et_min_samples_leaf,', 20) + 1, 'bootstrap': hp.choice('et_bootstrap', ["True", "False"])} init_trial = {'n_estimators': 100, 'criterion': "mse", 'max_features': 0.5, 'min_samples_split': 2, 'min_samples_leaf': 1, 'bootstrap': "False"} return space
def get_random_forest_default_search_space(seed): cs = ConfigurationSpace('sklearn.ensemble.RandomForestClassifier', seed) imputation = CategoricalHyperparameter('imputation__strategy', ['mean', 'median', 'most_frequent']) n_estimators = Constant("classifier__n_estimators", 100) criterion = CategoricalHyperparameter("classifier__criterion", ["gini", "entropy"], default_value="gini") # The maximum number of features used in the forest is calculated as m^max_features, where # m is the total number of features, and max_features is the hyperparameter specified below. # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This # corresponds with Geurts' heuristic. max_features = UniformFloatHyperparameter("classifier__max_features", 0., 1., default_value=0.5) # max_depth = UnParametrizedHyperparameter("classifier__max_depth", None) min_samples_split = UniformIntegerHyperparameter( "classifier__min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter( "classifier__min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = UnParametrizedHyperparameter( "classifier__min_weight_fraction_leaf", 0.) # max_leaf_nodes = UnParametrizedHyperparameter("classifier__max_leaf_nodes", "None") min_impurity_decrease = UnParametrizedHyperparameter( 'classifier__min_impurity_decrease', 0.0) bootstrap = CategoricalHyperparameter("classifier__bootstrap", ["True", "False"], default_value="True") cs.add_hyperparameters([ imputation, n_estimators, criterion, max_features, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, bootstrap, min_impurity_decrease ]) return cs
def test_nested_conjunctions(self): hp1 = CategoricalHyperparameter("input1", [0, 1]) hp2 = CategoricalHyperparameter("input2", [0, 1]) hp3 = CategoricalHyperparameter("input3", [0, 1]) hp4 = CategoricalHyperparameter("input4", [0, 1]) hp5 = CategoricalHyperparameter("input5", [0, 1]) hp6 = Constant("AND", "True") cond1 = EqualsCondition(hp6, hp1, 1) cond2 = EqualsCondition(hp6, hp2, 1) cond3 = EqualsCondition(hp6, hp3, 1) cond4 = EqualsCondition(hp6, hp4, 1) cond5 = EqualsCondition(hp6, hp5, 1) conj1 = AndConjunction(cond1, cond2) conj2 = OrConjunction(conj1, cond3) conj3 = AndConjunction(conj2, cond4, cond5) # TODO: this does not look nice, And should depend on a large # conjunction, there should not be many ANDs inside this string! self.assertEqual("(((AND | input1 == 1 && AND | input2 == 1) || AND | " "input3 == 1) && AND | input4 == 1 && AND | input5 " "== 1)", str(conj3))
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() cs.add_hyperparameter(Constant("n_estimators", 100)) cs.add_hyperparameter( CategoricalHyperparameter("criterion", ["gini", "entropy"], default="gini")) cs.add_hyperparameter( UniformFloatHyperparameter("max_features", 0.5, 5, default=1)) cs.add_hyperparameter(UnParametrizedHyperparameter( "max_depth", "None")) cs.add_hyperparameter( UniformIntegerHyperparameter("min_samples_split", 2, 20, default=2)) cs.add_hyperparameter( UniformIntegerHyperparameter("min_samples_leaf", 1, 20, default=1)) cs.add_hyperparameter( UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.)) cs.add_hyperparameter( UnParametrizedHyperparameter("max_leaf_nodes", "None")) cs.add_hyperparameter( CategoricalHyperparameter("bootstrap", ["True", "False"], default="True")) return cs
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): cs = ConfigurationSpace() n_estimators = UniformFloatHyperparameter("n_estimators", 100, 1000, default_value=500, q=50) num_leaves = UniformIntegerHyperparameter("num_leaves", 31, 2047, default_value=128) max_depth = Constant('max_depth', 15) learning_rate = UniformFloatHyperparameter("learning_rate", 1e-3, 0.3, default_value=0.1, log=True) min_child_samples = UniformIntegerHyperparameter("min_child_samples", 5, 30, default_value=20) subsample = UniformFloatHyperparameter("subsample", 0.7, 1, default_value=1, q=0.1) colsample_bytree = UniformFloatHyperparameter("colsample_bytree", 0.7, 1, default_value=1, q=0.1) cs.add_hyperparameters([ n_estimators, num_leaves, max_depth, learning_rate, min_child_samples, subsample, colsample_bytree ]) return cs
def test_or_conjunction(self): self.assertRaises(TypeError, AndConjunction, "String1", "String2") hp1 = CategoricalHyperparameter("input1", [0, 1]) hp2 = CategoricalHyperparameter("input2", [0, 1]) hp3 = CategoricalHyperparameter("input3", [0, 1]) hp4 = Constant("Or", "True") cond1 = EqualsCondition(hp4, hp1, 1) self.assertRaises(ValueError, OrConjunction, cond1) cond2 = EqualsCondition(hp4, hp2, 1) cond3 = EqualsCondition(hp4, hp3, 1) andconj1 = OrConjunction(cond1, cond2) andconj1_ = OrConjunction(cond1, cond2) self.assertEqual(andconj1, andconj1_) andconj2 = OrConjunction(cond2, cond3) self.assertNotEqual(andconj1, andconj2) andconj3 = OrConjunction(cond1, cond2, cond3) self.assertEqual("(Or | input1 == 1 || Or | input2 == 1 || Or | " "input3 == 1)", str(andconj3))
def test_get_types_with_inactive(self): cs = ConfigurationSpace() a = cs.add_hyperparameter(CategoricalHyperparameter('a', ['a', 'b'])) b = cs.add_hyperparameter(UniformFloatHyperparameter('b', 1, 5)) c = cs.add_hyperparameter(UniformIntegerHyperparameter('c', 3, 7)) d = cs.add_hyperparameter(Constant('d', -5)) e = cs.add_hyperparameter(OrdinalHyperparameter('e', ['cold', 'hot'])) f = cs.add_hyperparameter(CategoricalHyperparameter('f', ['x', 'y'])) cs.add_condition(EqualsCondition(b, a, 'a')) cs.add_condition(EqualsCondition(c, a, 'a')) cs.add_condition(EqualsCondition(d, a, 'a')) cs.add_condition(EqualsCondition(e, a, 'a')) cs.add_condition(EqualsCondition(f, a, 'a')) types, bounds = get_types(cs, None) np.testing.assert_array_equal(types, [2, 0, 0, 2, 0, 3]) self.assertEqual(bounds[0][0], 2) self.assertFalse(np.isfinite(bounds[0][1])) np.testing.assert_array_equal(bounds[1], [-1, 1]) np.testing.assert_array_equal(bounds[2], [-1, 1]) self.assertEqual(bounds[3][0], 2) self.assertFalse(np.isfinite(bounds[3][1])) np.testing.assert_array_equal(bounds[4], [0, 2]) self.assertEqual(bounds[5][0], 3) self.assertFalse(np.isfinite(bounds[5][1]))
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): n_estimators = UniformIntegerHyperparameter(name="n_estimators", lower=10, upper=100, default_value=10) max_depth = UniformIntegerHyperparameter(name="max_depth", lower=2, upper=10, default_value=5) min_samples_split = UniformIntegerHyperparameter( name="min_samples_split", lower=2, upper=20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter( name="min_samples_leaf", lower=1, upper=20, default_value=1) min_weight_fraction_leaf = Constant('min_weight_fraction_leaf', 1.0) max_leaf_nodes = UnParametrizedHyperparameter(name="max_leaf_nodes", value="None") bootstrap = CategoricalHyperparameter('bootstrap', ['True', 'False']) cs = ConfigurationSpace() cs.add_hyperparameters([ n_estimators, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes, bootstrap ]) return cs
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() epsilon = CategoricalHyperparameter("epsilon", [1e-4, 1e-3, 1e-2, 1e-1, 1], default_value=1e-4) loss = CategoricalHyperparameter( "loss", ["epsilon_insensitive", "squared_epsilon_insensitive"], default_value="epsilon_insensitive") dual = CategoricalHyperparameter("dual", ['True', 'False'], default_value='True') tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-4, log=True) C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default_value=1.0) fit_intercept = Constant("fit_intercept", "True") intercept_scaling = Constant("intercept_scaling", 1) cs.add_hyperparameters([ epsilon, loss, dual, tol, C, fit_intercept, intercept_scaling ]) dual_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(loss, "epsilon_insensitive")) cs.add_forbidden_clause(dual_and_loss) return cs elif optimizer == 'tpe': from hyperopt import hp space = { 'loss': hp.choice('liblinear_combination', [{ 'loss': "epsilon_insensitive", 'dual': "True" }, { 'loss': "squared_epsilon_insensitive", 'dual': "True" }, { 'loss': "squared_epsilon_insensitive", 'dual': "False" }]), 'dual': None, 'tol': hp.loguniform('liblinear_tol', np.log(1e-5), np.log(1e-1)), 'C': hp.loguniform('liblinear_C', np.log(0.03125), np.log(32768)), 'fit_intercept': hp.choice('liblinear_fit_intercept', ["True"]), 'intercept_scaling': hp.choice('liblinear_intercept_scaling', [1]) } init_trial = { 'loss': { 'loss': "epsilon_insensitive", 'dual': "True" }, 'tol': 1e-4, 'C': 1, 'fit_intercept': "True", 'intercept_scaling': 1 } return space
from ConfigSpace.conditions import InCondition from automl.utl import json_utils cs = ConfigurationSpace() loss = CategoricalHyperparameter("loss", ["ls", "lad", "huber", "quantile"], default_value="ls") learning_rate = UniformFloatHyperparameter(name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True) n_estimators = UniformIntegerHyperparameter("n_estimators", 50, 500, default_value=100) max_depth = Constant("max_depth_none", "None") min_samples_split = UniformFloatHyperparameter("min_samples_split", 0., 1., default_value=0.5) min_samples_leaf = UniformFloatHyperparameter("min_samples_leaf", 0., 0.5, default_value=0.0001) min_weight_fraction_leaf = Constant("min_weight_fraction_leaf", 0.) subsample = UniformFloatHyperparameter(name="subsample", lower=0.01, upper=1.0, default_value=1.0, log=False) max_features = UniformFloatHyperparameter("max_features",
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() penalty = CategoricalHyperparameter("penalty", ["l1", "l2"], default_value="l2") loss = CategoricalHyperparameter("loss", ["hinge", "squared_hinge"], default_value="squared_hinge") dual = CategoricalHyperparameter("dual", ['True', 'False'], default_value='True') # This is set ad-hoc tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-4, log=True) C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default_value=1.0) multi_class = Constant("multi_class", "ovr") # These are set ad-hoc fit_intercept = Constant("fit_intercept", "True") intercept_scaling = Constant("intercept_scaling", 1) cs.add_hyperparameters([ penalty, loss, dual, tol, C, multi_class, fit_intercept, intercept_scaling ]) penalty_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(penalty, "l1"), ForbiddenEqualsClause(loss, "hinge")) constant_penalty_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(penalty, "l2"), ForbiddenEqualsClause(loss, "hinge")) penalty_and_dual = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "True"), ForbiddenEqualsClause(penalty, "l1")) cs.add_forbidden_clause(penalty_and_loss) cs.add_forbidden_clause(constant_penalty_and_loss) cs.add_forbidden_clause(penalty_and_dual) return cs elif optimizer == 'tpe': space = { 'penalty': hp.choice('liblinear_combination', [{ 'penalty': "l1", 'loss': "squared_hinge", 'dual': "False" }, { 'penalty': "l2", 'loss': "hinge", 'dual': "True" }, { 'penalty': "l2", 'loss': "squared_hinge", 'dual': "True" }, { 'penalty': "l2", 'loss': "squared_hinge", 'dual': "False" }]), 'loss': None, 'dual': None, 'tol': hp.loguniform('liblinear_tol', np.log(1e-5), np.log(1e-1)), 'C': hp.loguniform('liblinear_C', np.log(0.03125), np.log(32768)), 'multi_class': hp.choice('liblinear_multi_class', ["ovr"]), 'fit_intercept': hp.choice('liblinear_fit_intercept', ["True"]), 'intercept_scaling': hp.choice('liblinear_intercept_scaling', [1]) } init_trial = { 'penalty': { 'penalty': "l2", 'loss': "squared_hinge", 'dual': "True" }, 'tol': 1e-4, 'C': 1, 'multiclass': "ovr", 'fit_intercept': "True", 'intercept_scaling': 1 } return space