def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() shrinkage = CategoricalHyperparameter("shrinkage", ["None", "auto", "manual"], default_value="None") shrinkage_factor = UniformFloatHyperparameter("shrinkage_factor", 0., 1., 0.5) n_components = UniformIntegerHyperparameter('n_components', 1, 250, default_value=10) tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-4, log=True) cs.add_hyperparameters( [shrinkage, shrinkage_factor, n_components, tol]) cs.add_condition(EqualsCondition(shrinkage_factor, shrinkage, "manual")) return cs
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() shrinkage = CategoricalHyperparameter( "shrinkage", ["None", "auto", "manual"], default_value="None") shrinkage_factor = UniformFloatHyperparameter( "shrinkage_factor", 0., 1., 0.5) n_components = UniformIntegerHyperparameter('n_components', 1, 250, default_value=10) tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-4, log=True) cs.add_hyperparameters([shrinkage, shrinkage_factor, n_components, tol]) cs.add_condition(EqualsCondition(shrinkage_factor, shrinkage, "manual")) return cs elif optimizer == 'tpe': from hyperopt import hp space = {'n_components': hp.randint('lda_n_components', 250) + 1, 'tol': hp.loguniform('lda_tol', np.log(1e-5), np.log(1e-1)), 'shrinkage': hp.choice('lda_shrinkage', ["None", "auto", ( "manual", {'shrinkage_factor': hp.uniform('lda_shrinkage_factor', 0, 1)})]) } init_trial = {'n_components': 10, 'tol': 1e-4, 'shrinkage': "None"} return space
def get_cs(): cs = ConfigurationSpace() criterion = CategoricalHyperparameter("criterion", ["mse", "mae"], default_value="mse") # The maximum number of features used in the forest is calculated as m^max_features, where # m is the total number of features, and max_features is the hyperparameter specified below. # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This # corresponds with Geurts' heuristic. max_features = UniformFloatHyperparameter("max_features", 0., 1., default_value=0.5) max_depth = UnParametrizedHyperparameter(name="max_depth", value="None") min_samples_split = UniformIntegerHyperparameter("min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = UnParametrizedHyperparameter( 'min_weight_fraction_leaf', 0.) max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None") min_impurity_decrease = UnParametrizedHyperparameter( 'min_impurity_decrease', 0.0) bootstrap = CategoricalHyperparameter("bootstrap", ["True", "False"], default_value="False") cs.add_hyperparameters([ criterion, max_features, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes, min_impurity_decrease, bootstrap ]) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() loss = Constant("loss", "auto") learning_rate = UniformFloatHyperparameter( name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True) min_samples_leaf = UniformIntegerHyperparameter( name="min_samples_leaf", lower=1, upper=200, default_value=20, log=True) max_depth = UnParametrizedHyperparameter( name="max_depth", value="None") max_leaf_nodes = UniformIntegerHyperparameter( name="max_leaf_nodes", lower=3, upper=2047, default_value=31, log=True) max_bins = Constant("max_bins", 255) l2_regularization = UniformFloatHyperparameter( name="l2_regularization", lower=1E-10, upper=1, default_value=1E-10, log=True) scoring = UnParametrizedHyperparameter( name="scoring", value="loss") cs.add_hyperparameters([loss, learning_rate, min_samples_leaf, max_depth, max_leaf_nodes, max_bins, l2_regularization, scoring,]) return cs
def get_hyperparameter_search_space(self, dataset_properties=None, default=None, include=None, exclude=None): cs = ConfigurationSpace() if dataset_properties is None: dataset_properties = {} # Compile a list of legal preprocessors for this problem available_preprocessors = self.get_available_components( dataset_properties=dataset_properties, include=include, exclude=exclude) if len(available_preprocessors) == 0: raise ValueError( "No minority coalescers found, please add any one minority coalescer" "component.") if default is None: defaults = ['minority_coalescer', 'no_coalescense'] for default_ in defaults: if default_ in available_preprocessors: default = default_ break preprocessor = CategoricalHyperparameter( '__choice__', list(available_preprocessors.keys()), default_value=default) cs.add_hyperparameter(preprocessor) for name in available_preprocessors: preprocessor_configuration_space = available_preprocessors[name]. \ get_hyperparameter_search_space(dataset_properties) parent_hyperparameter = {'parent': preprocessor, 'value': name} cs.add_configuration_space(name, preprocessor_configuration_space, parent_hyperparameter=parent_hyperparameter) self.configuration_space_ = cs self.dataset_properties_ = dataset_properties return cs
def get_cs(): cs = ConfigurationSpace() n_estimators = UniformFloatHyperparameter("n_estimators", 100, 1000, default_value=500, q=50) num_leaves = UniformIntegerHyperparameter("num_leaves", 31, 2047, default_value=128) # max_depth = Constant('max_depth', 15) max_depth = UniformIntegerHyperparameter("max_depth", 5, 15, default_value=10) learning_rate = UniformFloatHyperparameter("learning_rate", 1e-3, 0.3, default_value=0.1, log=True) min_child_samples = UniformIntegerHyperparameter("min_child_samples", 5, 30, default_value=20) subsample = UniformFloatHyperparameter("subsample", 0.7, 1, default_value=1, q=0.1) colsample_bytree = UniformFloatHyperparameter("colsample_bytree", 0.7, 1, default_value=1) # q=0.1 cs.add_hyperparameters([ n_estimators, num_leaves, max_depth, learning_rate, min_child_samples, subsample, colsample_bytree ]) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) criterion = CategoricalHyperparameter("criterion", ["gini", "entropy"], default_value="gini") max_features = UniformFloatHyperparameter("max_features", 0, 1, default_value=0.5, q=0.05) max_depth = UnParametrizedHyperparameter(name="max_depth", value="None") max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None") min_samples_split = UniformIntegerHyperparameter("min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = UnParametrizedHyperparameter( 'min_weight_fraction_leaf', 0.) min_impurity_decrease = UnParametrizedHyperparameter( 'min_impurity_decrease', 0.) bootstrap = CategoricalHyperparameter("bootstrap", ["True", "False"], default_value="False") cs.add_hyperparameters([ n_estimators, criterion, max_features, max_depth, max_leaf_nodes, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, min_impurity_decrease, bootstrap ]) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() n_estimators = cs.add_hyperparameter(Constant("n_estimators", 100)) criterion = cs.add_hyperparameter(Constant("criterion", "mse")) max_features = cs.add_hyperparameter(UniformFloatHyperparameter( "max_features", 0.5, 5, default=1)) max_depth = cs.add_hyperparameter( UnParametrizedHyperparameter(name="max_depth", value="None")) min_samples_split = cs.add_hyperparameter(UniformIntegerHyperparameter( "min_samples_split", 2, 20, default=2)) min_samples_leaf = cs.add_hyperparameter(UniformIntegerHyperparameter( "min_samples_leaf", 1, 20, default=1)) min_weight_fraction_leaf = cs.add_hyperparameter(Constant( 'min_weight_fraction_leaf', 0.)) bootstrap = cs.add_hyperparameter(CategoricalHyperparameter( "bootstrap", ["True", "False"], default="False")) return cs
def get_hyperparameter_search_space(dataset_properties=None): alpha = UniformFloatHyperparameter(name="alpha", lower=0.01, upper=0.5, default_value=0.1) score_func = CategoricalHyperparameter(name="score_func", choices=["chi2", "f_classif"], default_value="chi2") if dataset_properties is not None: # Chi2 can handle sparse data, so we respect this if 'sparse' in dataset_properties and dataset_properties['sparse']: score_func = Constant(name="score_func", value="chi2") mode = CategoricalHyperparameter('mode', ['fpr', 'fdr', 'fwe'], 'fpr') cs = ConfigurationSpace() cs.add_hyperparameter(alpha) cs.add_hyperparameter(score_func) cs.add_hyperparameter(mode) return cs
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if dataset_properties is not None and \ (dataset_properties.get("sparse") is True or dataset_properties.get("signed") is False): allow_chi2 = False else: allow_chi2 = True possible_kernels = ['poly', 'rbf', 'sigmoid', 'cosine'] if allow_chi2: possible_kernels.append("chi2") kernel = CategoricalHyperparameter('kernel', possible_kernels, 'rbf') n_components = UniformIntegerHyperparameter("n_components", 50, 5000, default_value=100, log=True) gamma = UniformFloatHyperparameter("gamma", 3.0517578125e-05, 8, log=True, default_value=0.1) degree = UniformIntegerHyperparameter('degree', 2, 5, 3) coef0 = UniformFloatHyperparameter("coef0", -1, 1, default_value=0) cs = ConfigurationSpace() cs.add_hyperparameters([kernel, degree, gamma, coef0, n_components]) degree_depends_on_poly = EqualsCondition(degree, kernel, "poly") coef0_condition = InCondition(coef0, kernel, ["poly", "sigmoid"]) gamma_kernels = ["poly", "rbf", "sigmoid"] if allow_chi2: gamma_kernels.append("chi2") gamma_condition = InCondition(gamma, kernel, gamma_kernels) cs.add_conditions( [degree_depends_on_poly, coef0_condition, gamma_condition]) return cs
def get_hyperparameter_search_space(dataset_properties=None): n_components = UniformIntegerHyperparameter("n_components", 1, 512, default_value=256) learning_rate = UniformFloatHyperparameter("learning_rate", 1e-5, 1., default_value=0.1) batch_size = UniformIntegerHyperparameter("batch_size", 1, 100, default_value=10) n_iter = UniformIntegerHyperparameter("n_iter", 2, 200, default_value=10) cs = ConfigurationSpace() cs.add_hyperparameters( [n_components, n_iter, learning_rate, batch_size]) return cs
def get_hyperparameter_search_space(dataset_properties=None): C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default_value=1.0) # No linear kernel here, because we have liblinear kernel = CategoricalHyperparameter(name="kernel", choices=["rbf", "poly", "sigmoid"], default_value="rbf") degree = UniformIntegerHyperparameter("degree", 2, 5, default_value=3) gamma = UniformFloatHyperparameter("gamma", 3.0517578125e-05, 8, log=True, default_value=0.1) # TODO this is totally ad-hoc coef0 = UniformFloatHyperparameter("coef0", -1, 1, default_value=0) # probability is no hyperparameter, but an argument to the SVM algo shrinking = CategoricalHyperparameter("shrinking", ["True", "False"], default_value="True") tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-3, log=True) # cache size is not a hyperparameter, but an argument to the program! max_iter = UnParametrizedHyperparameter("max_iter", -1) cs = ConfigurationSpace() cs.add_hyperparameters( [C, kernel, degree, gamma, coef0, shrinking, tol, max_iter]) degree_depends_on_poly = EqualsCondition(degree, kernel, "poly") coef0_condition = InCondition(coef0, kernel, ["poly", "sigmoid"]) cs.add_condition(degree_depends_on_poly) cs.add_condition(coef0_condition) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() criterion = CategoricalHyperparameter( "criterion", ["gini", "entropy"], default_value="gini") max_depth = UniformFloatHyperparameter( 'max_depth', 0., 2., default_value=0.5) min_samples_split = UniformIntegerHyperparameter( "min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter( "min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = Constant("min_weight_fraction_leaf", 0.0) max_features = UnParametrizedHyperparameter('max_features', 1.0) max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None") min_impurity_decrease = UnParametrizedHyperparameter('min_impurity_decrease', 0.0) cs.add_hyperparameters([criterion, max_features, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes, min_impurity_decrease]) return cs
def get_hyperparameter_search_space(**kwargs): cs = ConfigurationSpace() kernel = CategoricalHyperparameter("kernel", [ "constant", "rbf", "matern", "rational_quadratic", "exp_sine_squared", "white", "dot" ], default_value="rbf") n_restarts_optimizer = UniformIntegerHyperparameter( "n_restarts_optimizer", 0, 500, default_value=0) max_iter_predict = UniformIntegerHyperparameter("max_iter_predict", 1, 1000, default_value=100) multi_class = CategoricalHyperparameter("multi_class", ["one_vs_rest", "one_vs_one"], default_value="one_vs_rest") cs.add_hyperparameters( [n_restarts_optimizer, max_iter_predict, multi_class, kernel]) return cs
def get_hyperparameter_search_space(dataset_properties=None): nugget = UniformFloatHyperparameter(name="nugget", lower=0.0001, upper=10, default=0.1, log=True) thetaL = UniformFloatHyperparameter(name="thetaL", lower=1e-6, upper=1e-3, default=1e-4, log=True) thetaU = UniformFloatHyperparameter(name="thetaU", lower=0.2, upper=10, default=1.0, log=True) cs = ConfigurationSpace() cs.add_hyperparameter(nugget) cs.add_hyperparameter(thetaL) cs.add_hyperparameter(thetaU) return cs
def get_hyperparameter_search_space(dataset_properties=None): n_estimators = UniformIntegerHyperparameter(name="n_estimators", lower=10, upper=100, default=10) max_depth = UniformIntegerHyperparameter(name="max_depth", lower=2, upper=10, default=5) min_samples_split = UniformIntegerHyperparameter( name="min_samples_split", lower=2, upper=20, default=2) min_samples_leaf = UniformIntegerHyperparameter( name="min_samples_leaf", lower=1, upper=20, default=1) min_weight_fraction_leaf = Constant('min_weight_fraction_leaf', 1.0) max_leaf_nodes = UnParametrizedHyperparameter(name="max_leaf_nodes", value="None") cs = ConfigurationSpace() cs.add_hyperparameters([ n_estimators, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes ]) return cs
def test_write_OrConjunction_condition(self): import numpy as np expected = ( "lp '--lp ' c {mi,bo}\ntemp '--temp ' r (2.000000, 5.000000)\nls " "'--ls ' c {sa,ca,ny}| temp==3.0 || lp %in% c(bo)\n") temp = UniformFloatHyperparameter( "temp", np.exp(2), np.exp(5), log=True) ls = CategoricalHyperparameter("ls", ["sa", "ca", "ny"], "sa") lp = CategoricalHyperparameter("lp", ["mi", "bo"], "bo") cs = ConfigurationSpace() cs.add_hyperparameter(temp) cs.add_hyperparameter(lp) cs.add_hyperparameter(ls) c1 = EqualsCondition(ls, temp, np.exp(3)) c2 = InCondition(ls, lp, ['bo']) c3 = OrConjunction(c1, c2) cs.add_condition(c3) value = irace.write(cs) self.assertEqual(expected, value)
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': alpha = UniformFloatHyperparameter(name="alpha", lower=0.01, upper=0.5, default_value=0.1) score_func = CategoricalHyperparameter( name="score_func", choices=["chi2", "f_classif"], default_value="chi2") if dataset_properties is not None: # Chi2 can handle sparse data, so we respect this if 'sparse' in dataset_properties and dataset_properties[ 'sparse']: score_func = Constant(name="score_func", value="chi2") mode = CategoricalHyperparameter('mode', ['fpr', 'fdr', 'fwe'], 'fpr') cs = ConfigurationSpace() cs.add_hyperparameter(alpha) cs.add_hyperparameter(score_func) cs.add_hyperparameter(mode) return cs elif optimizer == 'tpe': from hyperopt import hp space = { 'alpha': hp.uniform('gus_alpha', 0.01, 0.5), 'score_func': hp.choice('gus_score_func', ['chi2', 'f_classif', 'mutual_info']), 'mode': hp.choice('gus_mode', ['fpr', 'fdr', 'fwe']) } return space
def get_random_forest_default_search_space(seed): cs = ConfigurationSpace('sklearn.ensemble.RandomForestClassifier', seed) imputation = CategoricalHyperparameter('imputation__strategy', ['mean', 'median', 'most_frequent']) n_estimators = Constant("classifier__n_estimators", 100) criterion = CategoricalHyperparameter("classifier__criterion", ["gini", "entropy"], default_value="gini") # The maximum number of features used in the forest is calculated as m^max_features, where # m is the total number of features, and max_features is the hyperparameter specified below. # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This # corresponds with Geurts' heuristic. max_features = UniformFloatHyperparameter("classifier__max_features", 0., 1., default_value=0.5) # max_depth = UnParametrizedHyperparameter("classifier__max_depth", None) min_samples_split = UniformIntegerHyperparameter( "classifier__min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter( "classifier__min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = UnParametrizedHyperparameter( "classifier__min_weight_fraction_leaf", 0.) # max_leaf_nodes = UnParametrizedHyperparameter("classifier__max_leaf_nodes", "None") min_impurity_decrease = UnParametrizedHyperparameter( 'classifier__min_impurity_decrease', 0.0) bootstrap = CategoricalHyperparameter("classifier__bootstrap", ["True", "False"], default_value="True") cs.add_hyperparameters([ imputation, n_estimators, criterion, max_features, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, bootstrap, min_impurity_decrease ]) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() # Parameterized Hyperparameters max_depth = UniformIntegerHyperparameter( name="max_depth", lower=1, upper=10, default_value=3) learning_rate = UniformFloatHyperparameter( name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True) n_estimators = UniformIntegerHyperparameter( "n_estimators", 50, 500, default_value=100) subsample = UniformFloatHyperparameter( name="subsample", lower=0.01, upper=1.0, default_value=1.0, log=False) min_child_weight = UniformIntegerHyperparameter( name="min_child_weight", lower=1, upper=20, default_value=1, log=False) # Unparameterized Hyperparameters max_delta_step = UnParametrizedHyperparameter( name="max_delta_step", value=0) colsample_bytree = UnParametrizedHyperparameter( name="colsample_bytree", value=1) gamma = UnParametrizedHyperparameter( name="gamma", value=0) colsample_bylevel = UnParametrizedHyperparameter( name="colsample_bylevel", value=1) reg_alpha = UnParametrizedHyperparameter( name="reg_alpha", value=0) reg_lambda = UnParametrizedHyperparameter( name="reg_lambda", value=1) base_score = UnParametrizedHyperparameter( name="base_score", value=0.5) scale_pos_weight = UnParametrizedHyperparameter( name="scale_pos_weight", value=1) cs.add_hyperparameters([max_depth, learning_rate, n_estimators, subsample, min_child_weight, max_delta_step, colsample_bytree, gamma, colsample_bylevel, reg_alpha, reg_lambda, base_score, scale_pos_weight]) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() penalty = cs.add_hyperparameter(CategoricalHyperparameter( "penalty", ["l1", "l2"], default="l2")) loss = cs.add_hyperparameter(CategoricalHyperparameter( "loss", ["hinge", "squared_hinge"], default="squared_hinge")) # dual = cs.add_hyperparameter(Constant("dual", "False")) # This is set ad-hoc tol = cs.add_hyperparameter(UniformFloatHyperparameter( "tol", 1e-5, 1e-1, default=1e-4, log=True)) C = cs.add_hyperparameter(UniformFloatHyperparameter( "C", 0.03125, 32768, log=True, default=1.0)) # multi_class = cs.add_hyperparameter(Constant("multi_class", "ovr")) # These are set ad-hoc # fit_intercept = cs.add_hyperparameter(Constant("fit_intercept", "True")) # intercept_scaling = cs.add_hyperparameter(Constant( # "intercept_scaling", 1)) penalty_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(penalty, "l1"), ForbiddenEqualsClause(loss, "hinge") ) constant_penalty_and_loss = ForbiddenAndConjunction( # ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(penalty, "l2"), ForbiddenEqualsClause(loss, "hinge") ) penalty_and_dual = ForbiddenEqualsClause(penalty, "l1") # penalty_and_dual = ForbiddenAndConjunction( # ForbiddenEqualsClause(dual, "False"), # ForbiddenEqualsClause(penalty, "l1") # ) cs.add_forbidden_clause(penalty_and_loss) cs.add_forbidden_clause(constant_penalty_and_loss) cs.add_forbidden_clause(penalty_and_dual) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() loss = Constant("loss", "auto") learning_rate = UniformFloatHyperparameter( name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True) min_samples_leaf = UniformIntegerHyperparameter( name="min_samples_leaf", lower=1, upper=200, default_value=20, log=True) max_depth = UnParametrizedHyperparameter( name="max_depth", value="None") max_leaf_nodes = UniformIntegerHyperparameter( name="max_leaf_nodes", lower=3, upper=2047, default_value=31, log=True) max_bins = Constant("max_bins", 255) l2_regularization = UniformFloatHyperparameter( name="l2_regularization", lower=1E-10, upper=1, default_value=1E-10, log=True) early_stop = CategoricalHyperparameter( name="early_stop", choices=["off", "train", "valid"], default_value="off") tol = UnParametrizedHyperparameter( name="tol", value=1e-7) scoring = UnParametrizedHyperparameter( name="scoring", value="loss") n_iter_no_change = UniformIntegerHyperparameter( name="n_iter_no_change", lower=1, upper=20, default_value=10) validation_fraction = UniformFloatHyperparameter( name="validation_fraction", lower=0.01, upper=0.4, default_value=0.1) cs.add_hyperparameters([loss, learning_rate, min_samples_leaf, max_depth, max_leaf_nodes, max_bins, l2_regularization, early_stop, tol, scoring, n_iter_no_change, validation_fraction]) n_iter_no_change_cond = InCondition( n_iter_no_change, early_stop, ["valid", "train"]) validation_fraction_cond = EqualsCondition( validation_fraction, early_stop, "valid") cs.add_conditions([n_iter_no_change_cond, validation_fraction_cond]) return cs
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) criterion = CategoricalHyperparameter( "criterion", ["mse", "mae"], default_value="mse") # The maximum number of features used in the forest is calculated as m^max_features, where # m is the total number of features, and max_features is the hyperparameter specified below. # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This # corresponds with Geurts' heuristic. max_features = UniformFloatHyperparameter( "max_features", 0., 1., default_value=0.5) min_samples_split = UniformIntegerHyperparameter( "min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter( "min_samples_leaf", 1, 20, default_value=1) bootstrap = CategoricalHyperparameter( "bootstrap", ["True", "False"], default_value="False") cs.add_hyperparameters([n_estimators, criterion, max_features, min_samples_split, min_samples_leaf, bootstrap]) return cs elif optimizer == 'tpe': from hyperopt import hp space = {'n_estimators': hp.choice('et_n_estimators', [100]), 'criterion': hp.choice('et_criterion', ["mse", "mae"]), 'max_features': hp.uniform('et_max_features', 0, 1), 'min_samples_split': hp.randint('et_min_samples_split', 19) + 2, 'min_samples_leaf': hp.randint('et_min_samples_leaf,', 20) + 1, 'bootstrap': hp.choice('et_bootstrap', ["True", "False"])} init_trial = {'n_estimators': 100, 'criterion': "mse", 'max_features': 0.5, 'min_samples_split': 2, 'min_samples_leaf': 1, 'bootstrap': "False"} return space
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() C = cs.add_hyperparameter( UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default=1.0)) loss = cs.add_hyperparameter( CategoricalHyperparameter( "loss", ["epsilon_insensitive", "squared_epsilon_insensitive"], default="squared_epsilon_insensitive")) # Random Guess epsilon = cs.add_hyperparameter( UniformFloatHyperparameter(name="epsilon", lower=0.001, upper=1, default=0.1, log=True)) dual = cs.add_hyperparameter(Constant("dual", "False")) # These are set ad-hoc tol = cs.add_hyperparameter( UniformFloatHyperparameter("tol", 1e-5, 1e-1, default=1e-4, log=True)) fit_intercept = cs.add_hyperparameter(Constant("fit_intercept", "True")) intercept_scaling = cs.add_hyperparameter( Constant("intercept_scaling", 1)) dual_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(loss, "epsilon_insensitive")) cs.add_forbidden_clause(dual_and_loss) return cs
def _get_hyperparameter_search_space( self, include: Optional[Dict[str, str]] = None, exclude: Optional[Dict[str, str]] = None, dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None, ) -> ConfigurationSpace: """Create the hyperparameter configuration space. Returns ------- cs : ConfigSpace.configuration_space.Configuration The configuration space describing the SimpleRegressionClassifier. """ cs = ConfigurationSpace() if dataset_properties is None or not isinstance(dataset_properties, dict): dataset_properties = dict() cs = self._get_base_search_space( cs=cs, dataset_properties=dataset_properties, exclude=exclude, include=include, pipeline=self.steps) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() loss = cs.add_hyperparameter(Constant("loss", "deviance")) learning_rate = cs.add_hyperparameter(UniformFloatHyperparameter( name="learning_rate", lower=0.01, upper=1, default=0.1, log=True)) n_estimators = cs.add_hyperparameter(UniformIntegerHyperparameter ("n_estimators", 50, 500, default=100)) max_depth = cs.add_hyperparameter(UniformIntegerHyperparameter( name="max_depth", lower=1, upper=10, default=3)) min_samples_split = cs.add_hyperparameter(UniformIntegerHyperparameter( name="min_samples_split", lower=2, upper=20, default=2, log=False)) min_samples_leaf = cs.add_hyperparameter(UniformIntegerHyperparameter( name="min_samples_leaf", lower=1, upper=20, default=1, log=False)) min_weight_fraction_leaf = cs.add_hyperparameter( UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.)) subsample = cs.add_hyperparameter(UniformFloatHyperparameter( name="subsample", lower=0.01, upper=1.0, default=1.0, log=False)) max_features = cs.add_hyperparameter(UniformFloatHyperparameter( "max_features", 0.5, 5, default=1)) max_leaf_nodes = cs.add_hyperparameter(UnParametrizedHyperparameter( name="max_leaf_nodes", value="None")) return cs
def get_hyperparameter_search_space(**kwargs): cs = ConfigurationSpace() n_estimators = UniformIntegerHyperparameter(name="n_estimators", lower=10, upper=50, default_value=25) max_depth_factor = UniformFloatHyperparameter("max_depth_factor", 1e-5, 2.5, default_value=1.) min_samples_split_factor = UniformFloatHyperparameter( "min_samples_split_factor", 0.0001, 0.5, default_value=0.0001) min_samples_leaf_factor = UniformFloatHyperparameter( "min_samples_leaf_factor", 0.0001, 0.5, default_value=0.0001) min_weight_fraction_leaf = UniformFloatHyperparameter( "min_weight_fraction_leaf", 0., 0.5, default_value=0.) cs.add_hyperparameters([ n_estimators, max_depth_factor, min_samples_split_factor, min_samples_leaf_factor, min_weight_fraction_leaf ]) return cs
def get_hyperparameter_search_space(dataset_properties=None): """Get the configuratin space used for hyperparameter searching.""" cs = ConfigurationSpace() solver = CategoricalHyperparameter(name="solver", choices=["svd", "lsqr", "eigen"], default_value="svd") shrinkage = UniformFloatHyperparameter(name="shrinkage", lower=0.0, upper=1.0, default_value=0.5) n_components = UniformIntegerHyperparameter(name="n_components", lower=1, upper=30, default_value=10) tol = UniformFloatHyperparameter(name="tol", lower=0.0001, upper=1, default_value=0.0001) cs.add_hyperparameters([solver, shrinkage, n_components, tol]) shrinkage_condition = InCondition(shrinkage, solver, ["lsqr", "eigen"]) cs.add_condition(shrinkage_condition) return cs
def get_hyperparameter_search_space(dataset_properties=None): n_components = UniformIntegerHyperparameter("n_components", 10, 2000, default_value=100) kernel = CategoricalHyperparameter( 'kernel', ['poly', 'rbf', 'sigmoid', 'cosine'], 'rbf') gamma = UniformFloatHyperparameter("gamma", 3.0517578125e-05, 8, log=True, default_value=1.0) degree = UniformIntegerHyperparameter('degree', 2, 5, 3) coef0 = UniformFloatHyperparameter("coef0", -1, 1, default_value=0) cs = ConfigurationSpace() cs.add_hyperparameters([n_components, kernel, degree, gamma, coef0]) degree_depends_on_poly = EqualsCondition(degree, kernel, "poly") coef0_condition = InCondition(coef0, kernel, ["poly", "sigmoid"]) gamma_condition = InCondition(gamma, kernel, ["poly", "rbf"]) cs.add_conditions( [degree_depends_on_poly, coef0_condition, gamma_condition]) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() loss = CategoricalHyperparameter( "loss", ["epsilon_insensitive", "squared_epsilon_insensitive"], default_value="epsilon_insensitive") dual = CategoricalHyperparameter("dual", ['True', 'False'], default_value='True') # This is set ad-hoc tol = UniformFloatHyperparameter( "tol", 1e-5, 1e-1, default_value=1e-4, log=True) C = UniformFloatHyperparameter( "C", 0.03125, 32768, log=True, default_value=1.0) # These are set ad-hoc fit_intercept = Constant("fit_intercept", "True") intercept_scaling = Constant("intercept_scaling", 1) cs.add_hyperparameters([loss, dual, tol, C, fit_intercept, intercept_scaling]) dual_and_loss = ForbiddenAndConjunction( ForbiddenEqualsClause(dual, "False"), ForbiddenEqualsClause(loss, "epsilon_insensitive") ) cs.add_forbidden_clause(dual_and_loss) return cs