def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() loss = CategoricalHyperparameter( "loss", ["ls", "lad", "huber", "quantile"], default_value="ls") learning_rate = UniformFloatHyperparameter( name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True) n_estimators = UniformIntegerHyperparameter( "n_estimators", 50, 500, default_value=100) max_depth = UniformIntegerHyperparameter( name="max_depth", lower=1, upper=10, default_value=3) min_samples_split = UniformIntegerHyperparameter( name="min_samples_split", lower=2, upper=20, default_value=2, log=False) min_samples_leaf = UniformIntegerHyperparameter( name="min_samples_leaf", lower=1, upper=20, default_value=1, log=False) min_weight_fraction_leaf = UnParametrizedHyperparameter( "min_weight_fraction_leaf", 0.) subsample = UniformFloatHyperparameter( name="subsample", lower=0.01, upper=1.0, default_value=1.0, log=False) max_features = UniformFloatHyperparameter( "max_features", 0.1, 1.0, default_value=1) max_leaf_nodes = UnParametrizedHyperparameter( name="max_leaf_nodes", value="None") min_impurity_decrease = UnParametrizedHyperparameter( name='min_impurity_decrease', value=0.0) alpha = UniformFloatHyperparameter( "alpha", lower=0.75, upper=0.99, default_value=0.9) cs.add_hyperparameters([loss, learning_rate, n_estimators, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, subsample, max_features, max_leaf_nodes, min_impurity_decrease, alpha]) cs.add_condition(InCondition(alpha, loss, ['huber', 'quantile'])) return cs
def get_cs(): cs = ConfigurationSpace() criterion = CategoricalHyperparameter( "criterion", ["gini", "entropy"], default_value="gini") # The maximum number of features used in the forest is calculated as m^max_features, where # m is the total number of features, and max_features is the hyperparameter specified below. # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This # corresponds with Geurts' heuristic. max_features = UniformFloatHyperparameter( "max_features", 0., 1., default_value=0.5) max_depth = UnParametrizedHyperparameter("max_depth", "None") min_samples_split = UniformIntegerHyperparameter( "min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter( "min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.) max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None") min_impurity_decrease = UnParametrizedHyperparameter('min_impurity_decrease', 0.0) bootstrap = CategoricalHyperparameter( "bootstrap", ["True", "False"], default_value="True") cs.add_hyperparameters([criterion, max_features, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes, bootstrap, min_impurity_decrease]) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) criterion = CategoricalHyperparameter( "criterion", ["gini", "entropy"], default_value="gini") max_features = UniformFloatHyperparameter("max_features", 0, 1, default_value=0.5, q=0.05) max_depth = UnParametrizedHyperparameter(name="max_depth", value="None") max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None") min_samples_split = UniformIntegerHyperparameter( "min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter( "min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = UnParametrizedHyperparameter( 'min_weight_fraction_leaf', 0.) min_impurity_decrease = UnParametrizedHyperparameter( 'min_impurity_decrease', 0.) bootstrap = CategoricalHyperparameter( "bootstrap", ["True", "False"], default_value="False") cs.add_hyperparameters([n_estimators, criterion, max_features, max_depth, max_leaf_nodes, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, min_impurity_decrease, bootstrap]) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() loss = Constant("loss", "auto") learning_rate = UniformFloatHyperparameter(name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True) max_iter = UniformIntegerHyperparameter("max_iter", 32, 512, default_value=100) min_samples_leaf = UniformIntegerHyperparameter( name="min_samples_leaf", lower=1, upper=200, default_value=20, log=True) max_depth = UnParametrizedHyperparameter(name="max_depth", value="None") max_leaf_nodes = UniformIntegerHyperparameter(name="max_leaf_nodes", lower=3, upper=2047, default_value=31, log=True) max_bins = Constant("max_bins", 256) l2_regularization = UniformFloatHyperparameter( name="l2_regularization", lower=1E-10, upper=1, default_value=1E-10, log=True) early_stop = CategoricalHyperparameter( name="early_stop", choices=["off", "train", "valid"], default_value="off") tol = UnParametrizedHyperparameter(name="tol", value=1e-7) scoring = UnParametrizedHyperparameter(name="scoring", value="loss") n_iter_no_change = UniformIntegerHyperparameter( name="n_iter_no_change", lower=1, upper=20, default_value=10) validation_fraction = UniformFloatHyperparameter( name="validation_fraction", lower=0.01, upper=0.4, default_value=0.1) cs.add_hyperparameters([ loss, learning_rate, max_iter, min_samples_leaf, max_depth, max_leaf_nodes, max_bins, l2_regularization, early_stop, tol, scoring, n_iter_no_change, validation_fraction ]) n_iter_no_change_cond = InCondition(n_iter_no_change, early_stop, ["valid", "train"]) validation_fraction_cond = EqualsCondition(validation_fraction, early_stop, "valid") cs.add_conditions([n_iter_no_change_cond, validation_fraction_cond]) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() criterion = CategoricalHyperparameter("criterion", ['mse', 'friedman_mse', 'mae']) max_features = UniformFloatHyperparameter("max_features", 0.1, 1.0, default_value=1) max_depth = UnParametrizedHyperparameter(name="max_depth", value="None") max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None") min_samples_split = UniformIntegerHyperparameter("min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf", 1, 20, default_value=1) min_impurity_decrease = UnParametrizedHyperparameter( 'min_impurity_decrease', 0.0) bootstrap = CategoricalHyperparameter("bootstrap", ["True", "False"], default_value="False") cs.add_hyperparameters([ criterion, max_features, max_depth, max_leaf_nodes, min_samples_split, min_samples_leaf, min_impurity_decrease, bootstrap ]) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() loss = Constant("loss", "deviance") learning_rate = UniformFloatHyperparameter( name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True) # n_estimators = UniformIntegerHyperparameter( # "n_estimators", 50, 500, default_value=100) n_estimators = Constant("n_estimators", 100) max_depth = UniformIntegerHyperparameter( name="max_depth", lower=1, upper=8, default_value=3) criterion = CategoricalHyperparameter( 'criterion', ['friedman_mse', 'mse'], default_value='mse') min_samples_split = UniformIntegerHyperparameter( name="min_samples_split", lower=2, upper=20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter( name="min_samples_leaf", lower=1, upper=20, default_value=1) min_weight_fraction_leaf = UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.) subsample = UniformFloatHyperparameter( name="subsample", lower=0.01, upper=1.0, default_value=1.0) max_features = UniformFloatHyperparameter( "max_features", 0.1, 1.0, default_value=1) max_leaf_nodes = UnParametrizedHyperparameter( name="max_leaf_nodes", value="None") min_impurity_decrease = UnParametrizedHyperparameter( name='min_impurity_decrease', value=0.0) cs.add_hyperparameters([loss, learning_rate, n_estimators, max_depth, criterion, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, subsample, max_features, max_leaf_nodes, min_impurity_decrease]) return cs
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() criterion = CategoricalHyperparameter("criterion", ["gini", "entropy"], default_value="gini") max_depth_factor = UniformFloatHyperparameter('max_depth_factor', 0., 2., default_value=0.5) min_samples_split = UniformIntegerHyperparameter( "min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = Constant("min_weight_fraction_leaf", 0.0) max_features = UnParametrizedHyperparameter('max_features', 1.0) max_leaf_nodes = UnParametrizedHyperparameter( "max_leaf_nodes", "None") min_impurity_decrease = UnParametrizedHyperparameter( 'min_impurity_decrease', 0.0) cs.add_hyperparameters([ criterion, max_features, max_depth_factor, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes, min_impurity_decrease ]) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() criterion = CategoricalHyperparameter("criterion", ["gini"], default_value="gini") #, "entropy"], default_value="gini") max_depth = UniformFloatHyperparameter( # 增大决策树的最大深度到10,之前默认的深度太低了 'max_depth', 0., 10., default_value=0.5) #2., default_value=0.5) min_samples_split = UniformIntegerHyperparameter("min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = Constant("min_weight_fraction_leaf", 0.0) max_features = UnParametrizedHyperparameter('max_features', 1.0) max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None") min_impurity_decrease = UnParametrizedHyperparameter( 'min_impurity_decrease', 0.0) cs.add_hyperparameters([ criterion, max_features, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes, min_impurity_decrease ]) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() Window_size = UniformIntegerHyperparameter(name="Window_size", lower=5, upper=50, default_value=20) Difference = CategoricalHyperparameter(name="Difference", choices=["True", "False"], default_value="True") tsfresh_feature = CategoricalHyperparameter(name="tsfresh_feature", choices=["True", "False"], default_value="True") n_iter = UnParametrizedHyperparameter("n_iter", value=50) tol = UniformFloatHyperparameter("tol", 10**-5, 10**-1, default_value=10**-3, log=True) alpha_1 = UniformFloatHyperparameter(name="alpha_1", lower=10**-10, upper=10**-3, default_value=10**-6) alpha_2 = UniformFloatHyperparameter(name="alpha_2", log=True, lower=10**-10, upper=10**-3, default_value=10**-6) lambda_1 = UniformFloatHyperparameter(name="lambda_1", log=True, lower=10**-10, upper=10**-3, default_value=10**-6) lambda_2 = UniformFloatHyperparameter(name="lambda_2", log=True, lower=10**-10, upper=10**-3, default_value=10**-6) threshold_lambda = UniformFloatHyperparameter(name="threshold_lambda", log=True, lower=10**3, upper=10**5, default_value=10**4) fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True") cs.add_hyperparameters([ n_iter, tol, alpha_1, alpha_2, lambda_1, Difference, lambda_2, threshold_lambda, fit_intercept, Window_size, tsfresh_feature ]) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() criterion = CategoricalHyperparameter('criterion', ['mse', 'friedman_mse', 'mae']) max_features = Constant('max_features', 1.0) max_depth = UniformFloatHyperparameter('max_depth', 0., 2., default_value=0.5) min_samples_split = UniformIntegerHyperparameter("min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = Constant("min_weight_fraction_leaf", 0.0) max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None") min_impurity_decrease = UnParametrizedHyperparameter( 'min_impurity_decrease', 0.0) cs.add_hyperparameters([ criterion, max_features, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes, min_impurity_decrease ]) return cs
def get_configspace(): if benchmark == 'hpo': cs = _classifiers[algo_name].get_hyperparameter_search_space() model = UnParametrizedHyperparameter("estimator", algo_name) cs.add_hyperparameter(model) return cs train_data, test_data = load_train_test_data('splice', task_type=MULTICLASS_CLS) cs = _classifiers[algo_name].get_hyperparameter_search_space() model = UnParametrizedHyperparameter("estimator", algo_name) cs.add_hyperparameter(model) default_hpo_config = cs.get_default_configuration() fe_evaluator = ClassificationEvaluator(default_hpo_config, scorer=metric, name='fe', resampling_strategy='holdout', seed=1) fe_optimizer = BayesianOptimizationOptimizer(task_type=CLASSIFICATION, input_data=train_data, evaluator=fe_evaluator, model_id=algo_name, time_limit_per_trans=600, mem_limit_per_trans=5120, number_of_unit_resource=10, seed=1) hyper_space = fe_optimizer.hyperparameter_space return hyper_space
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) criterion = Constant("criterion", "mse") max_features = UniformFloatHyperparameter("max_features", 0.5, 5, default=1) max_depth = UnParametrizedHyperparameter("max_depth", "None") min_samples_split = UniformIntegerHyperparameter("min_samples_split", 2, 20, default=2) min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf", 1, 20, default=1) min_weight_fraction_leaf = \ UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.) max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None") bootstrap = CategoricalHyperparameter("bootstrap", ["True", "False"], default="True") cs.add_hyperparameters([ n_estimators, criterion, max_features, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes, bootstrap ]) return cs
def evaluate(_config): _config = _config.get_dictionary() # print(_config) arm = None cs = ConfigurationSpace() for key in _config: key_str = key.split(":") if key_str[0] == 'classifier': if key_str[1] == '__choice__': arm = _config[key] cs.add_hyperparameter(UnParametrizedHyperparameter("estimator", _config[key])) else: cs.add_hyperparameter(UnParametrizedHyperparameter(key_str[2], _config[key])) if arm in first_bandit.arms: transformed_node = apply_metalearning_fe(first_bandit.sub_bandits[arm].optimizer['fe'], _config) default_config = cs.sample_configuration(1) hpo_evaluator = ClassificationEvaluator(None, data_node=transformed_node, name='hpo', resampling_strategy=first_bandit.eval_type, seed=first_bandit.seed) start_time = time.time() score1 = 1 - hpo_evaluator(default_config) time_cost1 = time.time() - start_time # Evaluate the default config start_time = time.time() score2 = 1 - hpo_evaluator(first_bandit.sub_bandits[arm].default_config) time_cost2 = time.time() - start_time transformed_node.score2 = max(score1, score2) return (arm, score1, default_config, transformed_node, time_cost1), ( arm, score2, first_bandit.sub_bandits[arm].default_config, transformed_node, time_cost2)
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) criterion = CategoricalHyperparameter( "criterion", ["gini", "entropy"], default_value="gini") # The maximum number of features used in the forest is calculated as m^max_features, where # m is the total number of features, and max_features is the hyperparameter specified below. # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This # corresponds with Geurts' heuristic. max_features = UniformFloatHyperparameter( "max_features", 0., 1., default_value=0.5) min_samples_split = UniformIntegerHyperparameter( "min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter( "min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.) min_impurity_decrease = UnParametrizedHyperparameter('min_impurity_decrease', 0.0) bootstrap = CategoricalHyperparameter( "bootstrap", ["True", "False"], default_value="True") sampling_strategy = CategoricalHyperparameter( name="sampling_strategy", choices=["majority", "not minority", "not majority", "all"], default_value="not minority") replacement = CategoricalHyperparameter( "replacement", ["True", "False"], default_value="False") cs.add_hyperparameters([n_estimators, criterion, max_features, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, bootstrap, min_impurity_decrease, sampling_strategy, replacement]) return cs elif optimizer == 'tpe': from hyperopt import hp space = {'n_estimators': hp.choice('bal_rf_n_estimators', [100]), 'criterion': hp.choice('bal_rf_criterion', ["gini", "entropy"]), 'max_features': hp.uniform('bal_rf_max_features', 0, 1), 'min_samples_split': hp.randint('bal_rf_min_samples_split', 19) + 2, 'min_samples_leaf': hp.randint('bal_rf_min_samples_leaf', 20) + 1, 'min_weight_fraction_leaf': hp.choice('bal_rf_min_weight_fraction_leaf', [0]), 'min_impurity_decrease': hp.choice('bal_rf_min_impurity_decrease', [0]), 'bootstrap': hp.choice('bal_rf_bootstrap', ["True", "False"]), 'sampling_strategy': hp.choice('bal_rf_sampling_strategy', ["majority", "not minority", "not majority", "all"]), 'replacement': hp.choice('bal_rf_replacement', ["True", "False"]), } init_trial = {'n_estimators': 100, 'criterion': "gini", 'max_features': 0.5, 'min_samples_split': 2, 'min_samples_leaf': 1, 'min_weight_fraction_leaf': 0, 'min_impurity_decrease': 0, 'bootstrap': "False", 'sampling_strategy': "not minority", 'replacement': "False" } return space
def get_hyperparameter_search_space(dataset_properties=None): degree = UnParametrizedHyperparameter(name="degree", value=2) interaction_only = UnParametrizedHyperparameter("interaction_only", "True") include_bias = UnParametrizedHyperparameter("include_bias", "False") cs = ConfigurationSpace() cs.add_hyperparameters([degree, interaction_only, include_bias]) return cs
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) criterion = CategoricalHyperparameter("criterion", ["gini", "entropy"], default_value="gini") max_features = UniformFloatHyperparameter("max_features", 0, 1, default_value=0.5, q=0.05) max_depth = UnParametrizedHyperparameter(name="max_depth", value="None") max_leaf_nodes = UnParametrizedHyperparameter( "max_leaf_nodes", "None") min_samples_split = UniformIntegerHyperparameter( "min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = UnParametrizedHyperparameter( 'min_weight_fraction_leaf', 0.) min_impurity_decrease = UnParametrizedHyperparameter( 'min_impurity_decrease', 0.) bootstrap = CategoricalHyperparameter("bootstrap", ["True", "False"], default_value="False") cs.add_hyperparameters([ n_estimators, criterion, max_features, max_depth, max_leaf_nodes, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, min_impurity_decrease, bootstrap ]) return cs elif optimizer == 'tpe': from hyperopt import hp space = { 'n_estimators': 100, 'criterion': hp.choice('ets_criterion', ['gini', 'entropy']), 'max_features': hp.uniform('ets_max_features', 0, 1), 'max_depth': "None", 'max_leaf_nodes': "None", 'min_samples_leaf': hp.randint('ets_samples_leaf', 20) + 1, 'min_samples_split': hp.randint('ets_samples_split', 19) + 2, 'min_weight_fraction_leaf': 0., 'min_impurity_decrease': 0., 'bootstrap': hp.choice('ets_bootstrap', ['True', 'False']) } return space
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() Window_size = UniformIntegerHyperparameter(name="Window_size", lower=5, upper=99, default_value=20) Difference = CategoricalHyperparameter(name="Difference", choices=["True", "False"], default_value="True") tsfresh_feature = CategoricalHyperparameter(name="tsfresh_feature", choices=["True", "False"], default_value="True") n_estimators = UniformIntegerHyperparameter("n_estimators", 10, 200, default_value=100) criterion = CategoricalHyperparameter("criterion", ['mse', 'friedman_mse', 'mae'], default_value='mse') max_features = UniformFloatHyperparameter("max_features", 0.1, 1.0, default_value=1) max_depth = UnParametrizedHyperparameter(name="max_depth", value="None") max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None") min_samples_split = UniformIntegerHyperparameter("min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf", 1, 20, default_value=1) min_impurity_decrease = UnParametrizedHyperparameter( 'min_impurity_decrease', 0.0) bootstrap = CategoricalHyperparameter("bootstrap", ["True", "False"], default_value="False") cs.add_hyperparameters([ n_estimators, criterion, max_features, Difference, max_depth, max_leaf_nodes, min_samples_split, min_samples_leaf, min_impurity_decrease, bootstrap, Window_size, tsfresh_feature ]) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) criterion = UnParametrizedHyperparameter("criterion", "gini") max_features = UnParametrizedHyperparameter("max_features", "auto") max_depth = UnParametrizedHyperparameter("max_depth", "None") min_samples_split = UnParametrizedHyperparameter( "min_samples_split", 2) min_samples_leaf = UnParametrizedHyperparameter("min_samples_leaf", 1) min_weight_fraction_leaf = UnParametrizedHyperparameter( "min_weight_fraction_leaf", 0.) max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None") min_impurity_decrease = UnParametrizedHyperparameter( 'min_impurity_decrease', 0.0) bootstrap = UnParametrizedHyperparameter("bootstrap", "True") cs.add_hyperparameters([ n_estimators, criterion, max_features, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes, bootstrap, min_impurity_decrease ]) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() loss = cs.add_hyperparameter( CategoricalHyperparameter("loss", ["ls", "lad", "huber", "quantile"], default="ls")) learning_rate = cs.add_hyperparameter( UniformFloatHyperparameter(name="learning_rate", lower=0.01, upper=1, default=0.1, log=True)) n_estimators = cs.add_hyperparameter( UniformIntegerHyperparameter(name="n_estimators", lower=50, upper=500, default=100)) max_depth = cs.add_hyperparameter( UniformIntegerHyperparameter(name="max_depth", lower=1, upper=10, default=3)) min_samples_split = cs.add_hyperparameter( UniformIntegerHyperparameter(name="min_samples_split", lower=2, upper=20, default=2, log=False)) min_samples_leaf = cs.add_hyperparameter( UniformIntegerHyperparameter(name="min_samples_leaf", lower=1, upper=20, default=1, log=False)) min_weight_fraction_leaf = cs.add_hyperparameter( UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.)) subsample = cs.add_hyperparameter( UniformFloatHyperparameter(name="subsample", lower=0.01, upper=1.0, default=1.0, log=False)) max_features = cs.add_hyperparameter( UniformFloatHyperparameter("max_features", 0.5, 5, default=1)) max_leaf_nodes = cs.add_hyperparameter( UnParametrizedHyperparameter(name="max_leaf_nodes", value="None")) alpha = cs.add_hyperparameter( UniformFloatHyperparameter("alpha", lower=0.75, upper=0.99, default=0.9)) cs.add_condition(InCondition(alpha, loss, ['huber', 'quantile'])) return cs
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() loss = CategoricalHyperparameter("loss", ['ls', 'lad'], default_value='ls') learning_rate = UniformFloatHyperparameter( name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True) n_estimators = UniformIntegerHyperparameter( "n_estimators", 50, 500, default_value=200) max_depth = UniformIntegerHyperparameter( name="max_depth", lower=1, upper=10, default_value=3) criterion = CategoricalHyperparameter( 'criterion', ['friedman_mse', 'mse', 'mae'], default_value='friedman_mse') min_samples_split = UniformIntegerHyperparameter( name="min_samples_split", lower=2, upper=20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter( name="min_samples_leaf", lower=1, upper=20, default_value=1) min_weight_fraction_leaf = UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.) subsample = UniformFloatHyperparameter( name="subsample", lower=0.1, upper=1.0, default_value=1.0) max_features = UniformFloatHyperparameter( "max_features", 0.1, 1.0, default_value=1) max_leaf_nodes = UnParametrizedHyperparameter( name="max_leaf_nodes", value="None") min_impurity_decrease = UnParametrizedHyperparameter( name='min_impurity_decrease', value=0.0) cs.add_hyperparameters([loss, learning_rate, n_estimators, max_depth, criterion, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, subsample, max_features, max_leaf_nodes, min_impurity_decrease]) return cs elif optimizer == 'tpe': from hyperopt import hp space = {'loss': hp.choice('gb_loss', ["ls", "lad"]), 'learning_rate': hp.loguniform('gb_learning_rate', np.log(0.01), np.log(1)), # 'n_estimators': hp.randint('gb_n_estimators', 451) + 50, 'n_estimators': hp.choice('gb_n_estimators', [100]), 'max_depth': hp.randint('gb_max_depth', 8) + 1, 'criterion': hp.choice('gb_criterion', ['friedman_mse', 'mse', 'mae']), 'min_samples_split': hp.randint('gb_min_samples_split', 19) + 2, 'min_samples_leaf': hp.randint('gb_min_samples_leaf', 20) + 1, 'min_weight_fraction_leaf': hp.choice('gb_min_weight_fraction_leaf', [0]), 'subsample': hp.uniform('gb_subsample', 0.1, 1), 'max_features': hp.uniform('gb_max_features', 0.1, 1), 'max_leaf_nodes': hp.choice('gb_max_leaf_nodes', [None]), 'min_impurity_decrease': hp.choice('gb_min_impurity_decrease', [0])} init_trial = {'loss': "ls", 'learning_rate': 0.1, 'n_estimators': 100, 'max_depth': 3, 'criterion': "friedman_mse", 'min_samples_split': 2, 'min_samples_leaf': 1, 'min_weight_fraction_leaf': 0, 'subsample': 1, 'max_features': 1, 'max_leaf_nodes': None, 'min_impurity_decrease': 0} return space
def get_random_forest_default_search_space(): classif_prefix = 'classifier:random_forest:' cs = ConfigurationSpace() model_type = CategoricalHyperparameter('classifier:__choice__', ['random_forest']) imputation = CategoricalHyperparameter('imputation:strategy', ['mean', 'median', 'most_frequent']) n_estimators = Constant(classif_prefix + "n_estimators", 100) criterion = CategoricalHyperparameter(classif_prefix + "criterion", ["gini", "entropy"], default_value="gini") # The maximum number of features used in the forest is calculated as m^max_features, where # m is the total number of features, and max_features is the hyperparameter specified below. # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This # corresponds with Geurts' heuristic. max_features = UniformFloatHyperparameter(classif_prefix + "max_features", 0., 1., default_value=0.5) max_depth = UnParametrizedHyperparameter(classif_prefix + "max_depth", "None") min_samples_split = UniformIntegerHyperparameter(classif_prefix + "min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter(classif_prefix + "min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = UnParametrizedHyperparameter( classif_prefix + "min_weight_fraction_leaf", 0.) max_leaf_nodes = UnParametrizedHyperparameter( classif_prefix + "max_leaf_nodes", "None") min_impurity_decrease = UnParametrizedHyperparameter( classif_prefix + 'min_impurity_decrease', 0.0) bootstrap = CategoricalHyperparameter(classif_prefix + "bootstrap", ["True", "False"], default_value="True") cs.add_hyperparameters([ model_type, imputation, n_estimators, criterion, max_features, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes, bootstrap, min_impurity_decrease ]) return cs
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() criterion = CategoricalHyperparameter( "criterion", ["mse", "mae"], default_value="mse") # The maximum number of features used in the forest is calculated as m^max_features, where # m is the total number of features, and max_features is the hyperparameter specified below. # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This # corresponds with Geurts' heuristic. max_features = UniformFloatHyperparameter( "max_features", 0., 1., default_value=0.5) max_depth = UnParametrizedHyperparameter("max_depth", "None") min_samples_split = UniformIntegerHyperparameter( "min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter( "min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.) max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None") min_impurity_decrease = UnParametrizedHyperparameter('min_impurity_decrease', 0.0) bootstrap = CategoricalHyperparameter( "bootstrap", ["True", "False"], default_value="True") cs.add_hyperparameters([criterion, max_features, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes, bootstrap, min_impurity_decrease]) return cs elif optimizer == 'tpe': from hyperopt import hp space = {'criterion': hp.choice('rf_criterion', ["mse", "mae"]), 'max_features': hp.uniform('rf_max_features', 0, 1), 'max_depth': hp.choice('rf_max_depth', [None]), 'min_samples_split': hp.randint('rf_min_samples_split', 19) + 2, 'min_samples_leaf': hp.randint('rf_min_samples_leaf', 20) + 1, 'min_weight_fraction_leaf': hp.choice('rf_min_weight_fraction_leaf', [0]), 'max_leaf_nodes': hp.choice('rf_max_leaf_nodes', [None]), 'min_impurity_decrease': hp.choice('rf_min_impurity_decrease', [0]), 'bootstrap': hp.choice('rf_bootstrap', ["True", "False"])} init_trial = {'criterion': "mse", 'max_features': 0.5, 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 1, 'min_weight_fraction_leaf': 0, 'max_leaf_nodes': None, 'min_impurity_decrease': 0, 'bootstrap': "False"} return space
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() criterion = Constant('criterion', 'mse') splitter = Constant("splitter", "best") max_features = Constant('max_features', 1.0) max_depth = UniformFloatHyperparameter('max_depth', 0., 2., default=0.5) min_samples_split = UniformIntegerHyperparameter("min_samples_split", 2, 20, default=2) min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf", 1, 20, default=1) min_weight_fraction_leaf = Constant("min_weight_fraction_leaf", 0.0) max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None") cs.add_hyperparameters([ criterion, splitter, max_features, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes ]) return cs
def _construct_hyperparameter(hyperparameter: Dict) -> Hyperparameter: hp_type = hyperparameter['type'] name = hyperparameter['name'] if hp_type == 'constant': return Constant( name=name, value=hyperparameter['value'], ) elif hp_type == 'unparametrized': return UnParametrizedHyperparameter( name=name, value=hyperparameter['value'], ) elif hp_type == 'uniform_float': return UniformFloatHyperparameter( name=name, log=hyperparameter['log'], lower=hyperparameter['lower'], upper=hyperparameter['upper'], default_value=hyperparameter['default'], ) elif hp_type == 'normal_float': return NormalFloatHyperparameter( name=name, log=hyperparameter['log'], mu=hyperparameter['mu'], sigma=hyperparameter['sigma'], default_value=hyperparameter['default'], ) elif hp_type == 'uniform_int': return UniformIntegerHyperparameter( name=name, log=hyperparameter['log'], lower=hyperparameter['lower'], upper=hyperparameter['upper'], default_value=hyperparameter['default'], ) elif hp_type == 'normal_int': return NormalIntegerHyperparameter( name=name, log=hyperparameter['log'], lower=hyperparameter['lower'], upper=hyperparameter['upper'], default_value=hyperparameter['default'], ) elif hp_type == 'categorical': return CategoricalHyperparameter( name=name, choices=hyperparameter['choices'], default_value=hyperparameter['default'], weights=hyperparameter.get('probabilities'), ) elif hp_type == 'ordinal': return OrdinalHyperparameter( name=name, sequence=hyperparameter['sequence'], default_value=hyperparameter['default'], ) else: raise ValueError(hp_type)
def create_configspace(): cs = ConfigurationSpace() batch_size = UniformIntegerHyperparameter("train_batch_size", 32, 256, default_value=64, q=8) init_lr = UniformFloatHyperparameter('init_lr', lower=1e-3, upper=0.3, default_value=0.1, log=True) lr_decay_factor = UnParametrizedHyperparameter('lr_decay_factor', 0.1) weight_decay = UniformFloatHyperparameter('weight_decay', lower=1e-5, upper=1e-2, default_value=0.0002, log=True) momentum = UniformFloatHyperparameter("momentum", 0.5, .99, default_value=0.9) nesterov = CategoricalHyperparameter('nesterov', ['True', 'False'], default_value='True') cs.add_hyperparameters([ batch_size, init_lr, lr_decay_factor, weight_decay, momentum, nesterov ]) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() Window_size = UniformIntegerHyperparameter(name="Window_size", lower=5, upper=50, default_value=20) tsfresh_feature = CategoricalHyperparameter(name="tsfresh_feature", choices=["True", "False"], default_value="True") Difference = CategoricalHyperparameter(name="Difference", choices=["True", "False"], default_value="True") alpha = UniformFloatHyperparameter("alpha", 10**-5, 10., log=True, q=0.00001, default_value=1.) fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True") tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-3, q=0.00001, log=True) cs.add_hyperparameters([ alpha, fit_intercept, tol, Window_size, Difference, tsfresh_feature ]) return cs
def get_hyperparameter_search_space(seed): imputation = CategoricalHyperparameter('imputation__strategy', ['mean', 'median', 'most_frequent']) C = UniformFloatHyperparameter("classifier__C", 0.03125, 32768, log=True, default_value=1.0) # No linear kernel here, because we have liblinear kernel = CategoricalHyperparameter(name="classifier__kernel", choices=["rbf", "poly", "sigmoid"], default_value="rbf") degree = UniformIntegerHyperparameter("classifier__degree", 2, 5, default_value=3) gamma = UniformFloatHyperparameter("classifier__gamma", 3.0517578125e-05, 8, log=True, default_value=0.1) # TODO this is totally ad-hoc coef0 = UniformFloatHyperparameter("classifier__coef0", -1, 1, default_value=0) # probability is no hyperparameter, but an argument to the SVM algo shrinking = CategoricalHyperparameter("classifier__shrinking", [True, False], default_value=True) tol = UniformFloatHyperparameter("classifier__tol", 1e-5, 1e-1, default_value=1e-3, log=True) # cache size is not a hyperparameter, but an argument to the program! max_iter = UnParametrizedHyperparameter("classifier__max_iter", -1) cs = ConfigurationSpace('sklearn.svm.SVC', seed) cs.add_hyperparameters([imputation, C, kernel, degree, gamma, coef0, shrinking, tol, max_iter]) degree_depends_on_poly = EqualsCondition(degree, kernel, "poly") coef0_condition = InCondition(coef0, kernel, ["poly", "sigmoid"]) cs.add_condition(degree_depends_on_poly) cs.add_condition(coef0_condition) return cs
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() n_estimators = cs.add_hyperparameter(Constant("n_estimators", 100)) criterion = cs.add_hyperparameter( CategoricalHyperparameter("criterion", ["gini", "entropy"], default="gini")) max_features = cs.add_hyperparameter( UniformFloatHyperparameter("max_features", 0.5, 5, default=1)) max_depth = cs.add_hyperparameter( UnParametrizedHyperparameter(name="max_depth", value="None")) min_samples_split = cs.add_hyperparameter( UniformIntegerHyperparameter("min_samples_split", 2, 20, default=2)) min_samples_leaf = cs.add_hyperparameter( UniformIntegerHyperparameter("min_samples_leaf", 1, 20, default=1)) min_weight_fraction_leaf = cs.add_hyperparameter( Constant('min_weight_fraction_leaf', 0.)) bootstrap = cs.add_hyperparameter( CategoricalHyperparameter("bootstrap", ["True", "False"], default="False")) return cs
def evaluate_ml_algorithm(dataset, algo, obj_metric, seed=1, task_type=None): print('EVALUATE-%s-%s-%s' % (dataset, algo, obj_metric)) train_data = load_data(dataset, task_type=task_type, datanode_returned=True) print(set(train_data.data[1])) metric = get_metric(obj_metric) cs = _classifiers[algo].get_hyperparameter_search_space() model = UnParametrizedHyperparameter("estimator", algo) cs.add_hyperparameter(model) default_hpo_config = cs.get_default_configuration() hpo_evaluator = ClassificationEvaluator(default_hpo_config, scorer=metric, data_node=train_data, name='hpo', resampling_strategy='holdout', seed=seed) hpo_optimizer = SMACOptimizer(evaluator=hpo_evaluator, config_space=cs, per_run_time_limit=600, per_run_mem_limit=5120, output_dir='./logs', trials_per_iter=args.iter) hpo_optimizer.iterate() hpo_eval_dict = dict() for key, value in hpo_optimizer.eval_dict.items(): hpo_eval_dict[key[1]] = value save_path = save_dir + '%s-%s-%s-hpo.pkl' % (dataset, algo, obj_metric) with open(save_path, 'wb') as f: pickle.dump(hpo_eval_dict, f)
def get_hyperparameter_search_space(dataset_properties=None): C = UniformFloatHyperparameter("C", 0.03125, 32768, log=True, default_value=1.0) # No linear kernel here, because we have liblinear kernel = CategoricalHyperparameter(name="kernel", choices=["rbf", "poly", "sigmoid"], default_value="rbf") degree = UniformIntegerHyperparameter("degree", 2, 5, default_value=3) gamma = UniformFloatHyperparameter("gamma", 3.0517578125e-05, 8, log=True, default_value=0.1) # TODO this is totally ad-hoc coef0 = UniformFloatHyperparameter("coef0", -1, 1, default_value=0) # probability is no hyperparameter, but an argument to the SVM algo shrinking = CategoricalHyperparameter("shrinking", ["True", "False"], default_value="True") tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-3, log=True) # cache size is not a hyperparameter, but an argument to the program! max_iter = UnParametrizedHyperparameter("max_iter", -1) cs = ConfigurationSpace() cs.add_hyperparameters([C, kernel, degree, gamma, coef0, shrinking, tol, max_iter]) degree_depends_on_poly = EqualsCondition(degree, kernel, "poly") coef0_condition = InCondition(coef0, kernel, ["poly", "sigmoid"]) cs.add_condition(degree_depends_on_poly) cs.add_condition(coef0_condition) return cs