def Adaboost_range(hyperparameters): return {'n_estimators': UniformIntegerHyperparameter('n_estimators', int(0.5*hyperparameters['n_estimators']), int(2*hyperparameters['n_estimators']), default=int(hyperparameters['n_estimators'])), 'learning_rate': UniformFloatHyperparameter('learning_rate', 0.5*hyperparameters['learning_rate'], 2*hyperparameters['learning_rate'], default=hyperparameters['learning_rate'])}
def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() minimum_fraction = UniformFloatHyperparameter( "minimum_fraction", lower=.0001, upper=0.5, default_value=0.01, log=True) cs.add_hyperparameter(minimum_fraction) return cs
cs = ConfigurationSpace() # We can add single hyperparameters: do_bootstrapping = CategoricalHyperparameter("do_bootstrapping", ["true", "false"], default="true") cs.add_hyperparameter(do_bootstrapping) # Or we can add multiple hyperparameters at once: num_trees = UniformIntegerHyperparameter("num_trees", 10, 50, default=10) max_features = UniformIntegerHyperparameter("max_features", 1, boston.data.shape[1], default=1) min_weight_frac_leaf = UniformFloatHyperparameter("min_weight_frac_leaf", 0.0, 0.5, default=0.0) criterion = CategoricalHyperparameter("criterion", ["mse", "mae"], default="mse") min_samples_to_split = UniformIntegerHyperparameter("min_samples_to_split", 2, 20, default=2) min_samples_in_leaf = UniformIntegerHyperparameter("min_samples_in_leaf", 1, 20, default=1) max_leaf_nodes = UniformIntegerHyperparameter("max_leaf_nodes", 10, 1000, default=100)
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() n_estimators = Constant("n_estimators", 100) criterion = CategoricalHyperparameter("criterion", ["mse", "mae"], default_value="mse") # The maximum number of features used in the forest is calculated as m^max_features, where # m is the total number of features, and max_features is the hyperparameter specified below. # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This # corresponds with Geurts' heuristic. max_features = UniformFloatHyperparameter("max_features", 0., 1., default_value=0.5) max_depth = UnParametrizedHyperparameter("max_depth", "None") min_samples_split = UniformIntegerHyperparameter( "min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = UnParametrizedHyperparameter( "min_weight_fraction_leaf", 0.) max_leaf_nodes = UnParametrizedHyperparameter( "max_leaf_nodes", "None") min_impurity_decrease = UnParametrizedHyperparameter( 'min_impurity_decrease', 0.0) bootstrap = CategoricalHyperparameter("bootstrap", ["True", "False"], default_value="True") cs.add_hyperparameters([ n_estimators, criterion, max_features, max_depth, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes, bootstrap, min_impurity_decrease ]) return cs elif optimizer == 'tpe': from hyperopt import hp space = { 'n_estimators': hp.choice('rf_n_estimators', [100]), 'criterion': hp.choice('rf_criterion', ["mse", "mae"]), 'max_features': hp.uniform('rf_max_features', 0, 1), 'max_depth': hp.choice('rf_max_depth', [None]), 'min_samples_split': hp.randint('rf_min_samples_split', 19) + 2, 'min_samples_leaf': hp.randint('rf_min_samples_leaf', 20) + 1, 'min_weight_fraction_leaf': hp.choice('rf_min_weight_fraction_leaf', [0]), 'max_leaf_nodes': hp.choice('rf_max_leaf_nodes', [None]), 'min_impurity_decrease': hp.choice('rf_min_impurity_decrease', [0]), 'bootstrap': hp.choice('rf_bootstrap', ["True", "False"]) } init_trial = { 'n_estimators': 100, 'criterion': "mse", 'max_features': 0.5, 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 1, 'min_weight_fraction_leaf': 0, 'max_leaf_nodes': None, 'min_impurity_decrease': 0, 'bootstrap': "False" } return space
dimensions. The minimium is always at x_i = 1 with a function value of zero. All input parameters are continuous. The search domain for all x's is the interval [-5, 10]. """ x1 = x["x0"] x2 = x["x1"] val = 100. * (x2 - x1**2.)**2. + (1 - x1)**2. return val logging.basicConfig(level=logging.INFO) # logging.DEBUG for debug output # Build Configuration Space which defines all parameters and their ranges cs = ConfigurationSpace() x0 = UniformFloatHyperparameter("x0", -5, 10, default_value=-3) x1 = UniformFloatHyperparameter("x1", -5, 10, default_value=-4) cs.add_hyperparameters([x0, x1]) # Scenario object scenario = Scenario({ "run_obj": "quality", # we optimize quality (alternatively runtime) "runcount-limit": 10, # max. number of function evaluations; for this example set to a low number "cs": cs, # configuration space "deterministic": "true" }) # Example call of the function # It returns: Status, Cost, Runtime, Additional Infos def_value = rosenbrock_2d(cs.get_default_configuration())
def get_hyperparameter_search_space(dataset_properties=None): reg_param = UniformFloatHyperparameter('reg_param', 0.0, 1.0, default_value=0.0) cs = ConfigurationSpace() cs.add_hyperparameter(reg_param) return cs
def test_normalfloat_to_uniformfloat(self): f1 = NormalFloatHyperparameter("param", 0, 10, q=0.1) f1_expected = UniformFloatHyperparameter("param", -30, 30, q=0.1) f1_actual = f1.to_uniform() self.assertEqual(f1_expected, f1_actual)
return 1 - np.mean(scores) # Minimize! #logger = logging.getLogger("SVMExample") logging.basicConfig(level=logging.INFO) # logging.DEBUG for debug output # Build Configuration Space which defines all parameters and their ranges cs = ConfigurationSpace() # We define a few possible types of SVM-kernels and add them as "kernel" to our cs penalty = CategoricalHyperparameter("penalty", ["l1", "l2", "elasticnet", "none"], default_value="l2") alpha = UniformFloatHyperparameter("alpha", 0.00001, 0.1, default_value=0.0001) learning_rate = CategoricalHyperparameter( "learning_rate", ["constant", "optimal", "invscaling", "adaptive"], default_value="optimal") eta0 = UniformFloatHyperparameter("eta0", 0.00001, 0.1, default_value=0.0001) power_t = UniformFloatHyperparameter("power_t", 0.3, 0.7, default_value=0.5) warm_start = CategoricalHyperparameter("warm_start", [True, False], default_value=False) l1_ratio = UniformFloatHyperparameter("l1_ratio", 0.0, 1.0, default_value=0.15) fit_intercept = CategoricalHyperparameter("fit_intercept", [True, False],
def setups_to_configspace(setups, default_params, keyfield='parameter_name', logscale_parameters=None, ignore_parameters=None, ignore_constants=True): # setups is result from openml.setups.list_setups call # note that this config space is not equal to the one # obtained from auto-sklearn; but useful for creating # the pcs file parameter_values = {} flow_id = None for setup_id in setups: current = setups[setup_id] if flow_id is None: flow_id = current.flow_id else: if current.flow_id != flow_id: raise ValueError( 'flow ids are expected to be equal. Expected %d, saw %s' % (flow_id, current.flow_id)) for param_id in current.parameters.keys(): name = getattr(current.parameters[param_id], keyfield) value = current.parameters[param_id].value if name not in parameter_values.keys(): parameter_values[name] = set() parameter_values[name].add(value) uncovered = set(parameter_values.keys()) - set(default_params.keys()) if len(uncovered) > 0: raise ValueError( 'Mismatch between keys default_params and parameter_values. Missing' % str(uncovered)) def is_castable_to(value, type): try: type(value) return True except ValueError: return False cs = ConfigurationSpace() if logscale_parameters is None: logscale_parameters = set() # for parameter in logscale_parameters: # if parameter not in parameter_values.keys(): # raise ValueError('(Logscale) Parameter not recognized: %s' %parameter) constants = set() for name in parameter_values.keys(): if ignore_parameters is not None and name in ignore_parameters: continue all_values = parameter_values[name] if len(all_values) <= 1: constants.add(name) if ignore_constants: continue if all(is_castable_to(item, int) for item in all_values): all_values = [int(item) for item in all_values] lower = min(all_values) upper = max(all_values) default = default_params[name] if not is_castable_to(default, int): sys.stderr.write( 'Illegal default for parameter %s (expected int): %s' % (name, str(default))) default = int(lower + lower + upper / 2) hyper = UniformIntegerHyperparameter(name=name, lower=lower, upper=upper, default=default, log=name in logscale_parameters) cs.add_hyperparameter(hyper) elif all(is_castable_to(item, float) for item in all_values): all_values = [float(item) for item in all_values] lower = min(all_values) upper = max(all_values) default = default_params[name] if not is_castable_to(default, float): sys.stderr.write( 'Illegal default for parameter %s (expected int): %s' % (name, str(default))) default = lower + lower + upper / 2 hyper = UniformFloatHyperparameter(name=name, lower=lower, upper=upper, default=default, log=name in logscale_parameters) cs.add_hyperparameter(hyper) else: values = [flow_to_sklearn(item) for item in all_values] hyper = CategoricalHyperparameter(name=name, choices=values, default=default_params[name]) cs.add_hyperparameter(hyper) return cs, constants
def get_branin_config_space() -> ConfigurationSpace: cs = ConfigurationSpace() cs.add_hyperparameter(UniformFloatHyperparameter('x', -5, 10)) cs.add_hyperparameter(UniformFloatHyperparameter('y', 0, 15)) return cs
from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \ UniformIntegerHyperparameter, CategoricalHyperparameter, \ UnParametrizedHyperparameter, Constant from automl.utl import json_utils cs = ConfigurationSpace() # base_estimator = Constant(name="base_estimator", value="None") n_estimators = UniformIntegerHyperparameter( name="n_estimators", lower=50, upper=500, default_value=50, log=False) learning_rate = UniformFloatHyperparameter( name="learning_rate", lower=0.01, upper=2, default_value=0.1, log=True) loss = CategoricalHyperparameter( name="loss", choices=["linear", "square", "exponential"], default_value="linear") max_depth = UniformIntegerHyperparameter( name="max_depth", lower=1, upper=10, default_value=1, log=False) cs.add_hyperparameters([n_estimators, learning_rate, loss, max_depth]) json_utils.write_cs_to_json_file(cs, "AdaBoostRegressor")
70, default_value=60) cs.add_hyperparameter(train_window) batch_size = CategoricalHyperparameter("batch_size", [64, 128, 256, 1024], default_value=64) cs.add_hyperparameter(batch_size) rnn_depth = UniformIntegerHyperparameter("rnn_depth", 100, 500, default_value=400) cs.add_hyperparameter(rnn_depth) encoder_dropout = UniformFloatHyperparameter("encoder_dropout", 0.0, 0.05, default_value=0.03) cs.add_hyperparameter(encoder_dropout) gate_dropout = UniformFloatHyperparameter("gate_dropout", 0.95, 1.0, default_value=0.997) cs.add_hyperparameter(gate_dropout) decoder_input_dropout = UniformFloatHyperparameter("decoder_input_dropout", 0.95, 1.0, default_value=1.0) cs.add_hyperparameter(decoder_input_dropout)
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() hidden_size = UniformIntegerHyperparameter("hidden_size", 100, 500, default_value=200) activation = CategoricalHyperparameter( "activation", ["identity", "logistic", "tanh", "relu"], default_value="relu") solver = CategoricalHyperparameter("solver", ["sgd", "adam"], default_value="adam") alpha = UniformFloatHyperparameter("alpha", 1e-7, 1., log=True, default_value=0.0001) learning_rate = CategoricalHyperparameter( "learning_rate", ["adaptive", "invscaling", "constant"], default_value="constant") learning_rate_init = UniformFloatHyperparameter( "learning_rate_init", 1e-4, 3e-1, default_value=0.001, log=True) tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, log=True, default_value=1e-4) momentum = UniformFloatHyperparameter("momentum", 0.6, 1, q=0.05, default_value=0.9) nesterovs_momentum = CategoricalHyperparameter( "nesterovs_momentum", [True, False], default_value=True) beta1 = UniformFloatHyperparameter("beta1", 0.6, 1, default_value=0.9) power_t = UniformFloatHyperparameter("power_t", 1e-5, 1, log=True, default_value=0.5) cs.add_hyperparameters([ hidden_size, activation, solver, alpha, learning_rate, learning_rate_init, tol, momentum, nesterovs_momentum, beta1, power_t ]) learning_rate_condition = EqualsCondition(learning_rate, solver, "sgd") momentum_condition = EqualsCondition(momentum, solver, "sgd") nesterovs_momentum_condition = EqualsCondition( nesterovs_momentum, solver, "sgd") beta1_condition = EqualsCondition(beta1, solver, "adam") power_t_condition = EqualsCondition(power_t, learning_rate, "invscaling") cs.add_conditions([ learning_rate_condition, momentum_condition, nesterovs_momentum_condition, beta1_condition, power_t_condition ]) return cs elif optimizer == 'tpe': space = { 'hidden_size': hp.randint("mlp_hidden_size", 450) + 50, 'activation': hp.choice('mlp_activation', ["identity", "logistic", "tanh", "relu"]), 'solver': hp.choice('mlp_solver', [("sgd", { 'learning_rate': hp.choice('mlp_learning_rate', [ ("adaptive", {}), ("constant", {}), ("invscaling", { 'power_t': hp.uniform('mlp_power_t', 1e-5, 1) }) ]), 'momentum': hp.uniform('mlp_momentum', 0.6, 1), 'nesterovs_momentum': hp.choice('mlp_nesterovs_momentum', [True, False]) }), ("adam", { 'beta1': hp.uniform('mlp_beta1', 0.6, 1) })]), 'alpha': hp.loguniform('mlp_alpha', np.log(1e-7), np.log(1e-1)), 'learning_rate_init': hp.loguniform('mlp_learning_rate_init', np.log(1e-6), np.log(1e-1)), 'tol': hp.loguniform('mlp_tol', np.log(1e-5), np.log(1e-1)) } return space
def branin(x): xs = x.get_dictionary() x1 = xs['x1'] x2 = xs['x2'] a = 1. b = 5.1 / (4. * np.pi**2) c = 5. / np.pi r = 6. s = 10. t = 1. / (8. * np.pi) ret = a * (x2 - b * x1**2 + c * x1 - r)**2 + s * (1 - t) * np.cos(x1) + s return ret cs = ConfigurationSpace() x1 = UniformFloatHyperparameter("x1", -5, 10, default_value=0) x2 = UniformFloatHyperparameter("x2", 0, 15, default_value=0) cs.add_hyperparameters([x1, x2]) bo = SMBO(branin, cs, max_runs=30, time_limit_per_trial=30, logging_dir='logs', model_type='gp') bo.run() inc_value = bo.get_incumbent() print(bo.get_history().data) print('=' * 30) print(inc_value)
def fmin_smac(func: callable, x0: list, bounds: list, maxfun: int = -1, maxtime: int = -1, rng: np.random.RandomState = None): """ Minimize a function func using the SMAC algorithm. This function is a convenience wrapper for the SMAC class. Parameters ---------- func : callable f(x) Function to minimize. x0 : list Initial guess/default configuration. bounds : list ``(min, max)`` pairs for each element in ``x``, defining the bound on that parameters. maxtime : int, optional Maximum runtime in seconds. maxfun : int, optional Maximum number of function evaluations. rng : np.random.RandomState, optional Random number generator used by SMAC. Returns ------- x : list Estimated position of the minimum. f : float Value of `func` at the minimum. s : :class:`smac.facade.smac_facade.SMAC` SMAC objects which enables the user to get e.g., the trajectory and runhistory. """ # create configuration space cs = ConfigurationSpace() for idx, (lower_bound, upper_bound) in enumerate(bounds): parameter = UniformFloatHyperparameter(name="x%02d" % (idx + 1), lower=lower_bound, upper=upper_bound, default_value=x0[idx]) cs.add_hyperparameter(parameter) # Create target algorithm runner ta = ExecuteTAFuncArray(ta=func) # create scenario scenario_dict = { "run_obj": "quality", "cs": cs, "deterministic": "true", "initial_incumbent": "DEFAULT" } if maxfun > 0: scenario_dict["runcount_limit"] = maxfun if maxtime > 0: scenario_dict["wallclock_limit"] = maxtime scenario = Scenario(scenario_dict) smac = SMAC(scenario=scenario, tae_runner=ta, rng=rng) smac.logger = logging.getLogger(smac.__module__ + "." + smac.__class__.__name__) incumbent = smac.optimize() config_id = smac.solver.runhistory.config_ids[incumbent] run_key = RunKey(config_id, None, 0) incumbent_performance = smac.solver.runhistory.data[run_key] incumbent = np.array( [incumbent['x%d' % (idx + 1)] for idx in range(len(bounds))], dtype=np.float) return incumbent, incumbent_performance.cost, \ smac
CategoricalHyperparameter, UniformFloatHyperparameter, UniformIntegerHyperparameter, Constant, ) from ConfigSpace.conditions import InCondition # Import SMAC-utilities from smac.tae.execute_func import ExecuteTAFuncDict from smac.scenario.scenario import Scenario # Define the Configuration space dimensions and limits # N.B. random_state = Constant("random_state", 42) solver = Constant("solver", "liblinear") C = UniformFloatHyperparameter("C", 0.03, 10, default_value=1, log=True) cs1 = ConfigurationSpace() cs1.add_hyperparameter(random_state) cs1.add_hyperparameter(solver) cs1.add_hyperparameter(C) # # Optimisation # In[ ]: from smac.facade.smac_facade import SMAC
def get_cs(agentlist=["Flood"]): """Return full ConfigurationSpace for given list of agents.""" cs = ConfigurationSpace() agent = CategoricalHyperparameter( "agent", agentlist, default_value="Flood" if "Flood" in agentlist else agentlist[0]) cs.add_hyperparameter(agent) if "Tree" in agentlist: tree_hps = { "mobility": 0.7, "mobility_decay": 0.0, "health": 0.012, "health_decay": 0.0, "len_advantage": 1.0, "len_advantage_decay": 0.0, "food_ownership": 0.65, "food_ownership_decay": 0.0, "centrality": 0.1, "centrality_decay": 0.0, } for hp in tree_hps: tree_hps[hp] = UniformFloatHyperparameter( PREFIXES["Tree"] + hp, lower=0, upper=1, default_value=tree_hps[hp]) cs.add_hyperparameter(tree_hps[hp]) cs.add_condition(InCondition(tree_hps[hp], agent, ["Tree"])) if "Mobility" in agentlist: mobility_hps = { "health_threshold": UniformIntegerHyperparameter(PREFIXES["Mobility"] + "health_threshold", lower=0, upper=100, default_value=35), "min_len": UniformIntegerHyperparameter(PREFIXES["Mobility"] + "min_len", lower=0, upper=121, default_value=8), "first_move_cost": UniformFloatHyperparameter(PREFIXES["Mobility"] + "first_move_cost", lower=0, upper=3, default_value=1.0), } for hp in mobility_hps: cs.add_hyperparameter(mobility_hps[hp]) cs.add_condition(InCondition(mobility_hps[hp], agent, ["Mobility"])) if "Flood" in agentlist: flood_hps = { "health": { "min": -1.5, "max": 1.5, "default": 0.045, "log": False }, "food_distance": { "min": -2, "max": 2, "default": 0.415, "log": False }, "space": { "min": -6, "max": 6, "default": 0.9, "log": False }, "space_adv": { "min": -6, "max": 6, "default": 0.9, "log": False }, "size_adv": { "min": -15, "max": 15, "default": 6.4, "log": False }, "size_adv_decay": { "min": 1e-10, "max": 0.1, "default": 1e-10, "log": True }, } for hp in flood_hps: flood_hps[hp] = UniformFloatHyperparameter( PREFIXES["Flood"] + hp, lower=flood_hps[hp]["min"], upper=flood_hps[hp]["max"], default_value=flood_hps[hp]["default"], log=flood_hps[hp]["log"]) cs.add_hyperparameter(flood_hps[hp]) cs.add_condition(InCondition(flood_hps[hp], agent, ["Flood"])) return cs
scale_method=6, init_weight=weight) hoist.run() method_name = "HOIST-vae-%d" % id hoist.plot_statistics(method=method_name) print(hoist.get_incumbent(5)) return hoist.get_incumbent(5) def test_mbhb(cs, id): method_name = "MBHB-vae-%d" % id mbhb = MBHB(cs, train, maximal_iter, num_iter=iter_num, n_workers=n_work) mbhb.set_method_name(method_name) mbhb.run() mbhb.plot_statistics(method=method_name) print(mbhb.get_incumbent(5)) return mbhb.get_incumbent(5) if __name__ == "__main__": cs = ConfigurationSpace() learning_rate = UniformFloatHyperparameter("learning_rate", 1e-4, 5e-2, default_value=1e-3, q=2e-4, log=True) batch_size = UniformIntegerHyperparameter("batch_size", 32, 256, default_value=64, q=16) n_layer1 = UniformIntegerHyperparameter("hidden_units", 256, 768, default_value=512, q=16) n_layer2 = UniformIntegerHyperparameter("latent_units", 2, 20, default_value=5, q=1) cs.add_hyperparameters([learning_rate, n_layer1, n_layer2, batch_size]) # train_vae(cs) # test_hb(cs, 1) test_hoist(cs, 1)
def fmin_smac(func: typing.Callable, x0: typing.List[float], bounds: typing.List[typing.Iterable[float]], maxfun: int = -1, rng: typing.Union[np.random.RandomState, int] = None, scenario_args: typing.Mapping[str, typing.Any] = None, **kwargs): """ Minimize a function func using the SMAC4HPO facade (i.e., a modified version of SMAC). This function is a convenience wrapper for the SMAC4HPO class. Parameters ---------- func : typing.Callable Function to minimize. x0 : typing.List[float] Initial guess/default configuration. bounds : typing.List[typing.List[float]] ``(min, max)`` pairs for each element in ``x``, defining the bound on that parameters. maxfun : int, optional Maximum number of function evaluations. rng : np.random.RandomState, optional Random number generator used by SMAC. scenario_args: typing.Mapping[str,typing.Any] Arguments passed to the scenario See dsmac.scenario.scenario.Scenario **kwargs: Arguments passed to the optimizer class See ~dsmac.facade.smac_facade.SMAC Returns ------- x : list Estimated position of the minimum. f : float Value of `func` at the minimum. s : :class:`smac.facade.smac_hpo_facade.SMAC4HPO` SMAC objects which enables the user to get e.g., the trajectory and runhistory. """ # create configuration space cs = ConfigurationSpace() # Adjust zero padding tmplt = 'x{0:0' + str(len(str(len(bounds)))) + 'd}' for idx, (lower_bound, upper_bound) in enumerate(bounds): parameter = UniformFloatHyperparameter(name=tmplt.format(idx + 1), lower=lower_bound, upper=upper_bound, default_value=x0[idx]) cs.add_hyperparameter(parameter) # create scenario scenario_dict = { "run_obj": "quality", "cs": cs, "deterministic": "true", "initial_incumbent": "DEFAULT", } if scenario_args is not None: scenario_dict.update(scenario_args) if maxfun > 0: scenario_dict["runcount_limit"] = maxfun scenario = Scenario(scenario_dict) smac = SMAC4HPO(scenario=scenario, tae_runner=ExecuteTAFuncArray, tae_runner_kwargs={'ta': func}, rng=rng, **kwargs) smac.logger = logging.getLogger(smac.__module__ + "." + smac.__class__.__name__) incumbent = smac.optimize() config_id = smac.solver.runhistory.config_ids[incumbent] run_key = RunKey(config_id, None, 0) incumbent_performance = smac.solver.runhistory.data[run_key] incumbent = np.array( [incumbent[tmplt.format(idx + 1)] for idx in range(len(bounds))], dtype=np.float) return incumbent, incumbent_performance.cost, smac
def get_cs_dimensions(self, api_config: typing.Dict) -> ConfigurationSpace: """ Help routine to setup ConfigurationSpace search space in constructor. Take api_config as argument so this can be static. Parameters ---------- api_config: Dict api dictionary to construct Returns ------- cs: ConfigurationSpace ConfigurationSpace that contains the same hyperparameter as api_config """ # TODO 2 options to transform the real and int hyperaparameters in different scales # option 1: similar to example_submission.skopt.optimizer, merge 'logit' into 'log' and 'bilog' into 'linear' # option 2: use the api bayesmark.space.space to warp and unwarp the samples cs = ConfigurationSpace() param_list = sorted(api_config.keys()) hp_list = [] for param_name in param_list: param_config = api_config[param_name] param_type = param_config["type"] param_space = param_config.get("space", None) param_values = param_config.get("values", None) param_range = param_config.get("range", None) if param_type == "cat": assert param_space is None assert param_range is None hp = CategoricalHyperparameter(name=param_name, choices=param_values) elif param_type == "bool": assert param_space is None assert param_values is None assert param_range is None hp = CategoricalHyperparameter(name=param_name, choices=[True, False]) elif param_type == "ordinal": # appear in example_submission.skopt.optimizer but not in README assert param_space is None assert param_range is None hp = OrdinalHyperparameter(name=param_name, sequence=param_values) elif param_type in ("int", "real"): if param_values is not None: # TODO: decide whether we treat these parameters as discrete values # or step function (example see example_submission.skopt.optimizer, line 71-77) # sort the values to store them in OrdinalHyperparameter param_values_sorted = np.sort(param_values) hp = OrdinalHyperparameter(name=param_name, sequence=param_values_sorted) else: log = True if param_space in ("log", "logit") else False if param_type == "int": hp = UniformIntegerHyperparameter( name=param_name, lower=param_range[0], upper=param_range[-1], log=log) else: hp = UniformFloatHyperparameter(name=param_name, lower=param_range[0], upper=param_range[-1], log=log) else: assert False, "type %s not handled in API" % param_type hp_list.append(hp) cs.add_hyperparameters(hp_list) return cs
def test_uniformfloat(self): # TODO test non-equality # TODO test sampling from a log-distribution which has a negative # lower value! f1 = UniformFloatHyperparameter("param", 0, 10) f1_ = UniformFloatHyperparameter("param", 0, 10) self.assertEqual(f1, f1_) self.assertEqual( "param, Type: UniformFloat, Range: [0.0, 10.0], " "Default: 5.0", str(f1)) f2 = UniformFloatHyperparameter("param", 0, 10, q=0.1) f2_ = UniformFloatHyperparameter("param", 0, 10, q=0.1) self.assertEqual(f2, f2_) self.assertEqual( "param, Type: UniformFloat, Range: [0.0, 10.0], " "Default: 5.0, Q: 0.1", str(f2)) f3 = UniformFloatHyperparameter("param", 0.00001, 10, log=True) f3_ = UniformFloatHyperparameter("param", 0.00001, 10, log=True) self.assertEqual(f3, f3_) self.assertEqual( "param, Type: UniformFloat, Range: [1e-05, 10.0], Default: 0.01, " "on log-scale", str(f3)) f4 = UniformFloatHyperparameter("param", 0, 10, default_value=1.0) f4_ = UniformFloatHyperparameter("param", 0, 10, default_value=1.0) # Test that a int default is converted to float f4__ = UniformFloatHyperparameter("param", 0, 10, default_value=1) self.assertEqual(f4, f4_) self.assertEqual(type(f4.default_value), type(f4__.default_value)) self.assertEqual( "param, Type: UniformFloat, Range: [0.0, 10.0], Default: 1.0", str(f4)) f5 = UniformFloatHyperparameter("param", 0.1, 10, q=0.1, log=True, default_value=1.0) f5_ = UniformFloatHyperparameter("param", 0.1, 10, q=0.1, log=True, default_value=1.0) self.assertEqual(f5, f5_) self.assertEqual( "param, Type: UniformFloat, Range: [0.1, 10.0], Default: 1.0, " "on log-scale, Q: 0.1", str(f5)) self.assertNotEqual(f1, f2) self.assertNotEqual(f1, "UniformFloat") # test that meta-data is stored correctly f_meta = UniformFloatHyperparameter("param", 0.1, 10, q=0.1, log=True, default_value=1.0, meta=dict(self.meta_data)) self.assertEqual(f_meta.meta, self.meta_data)
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() n_estimators = UniformIntegerHyperparameter(name="n_estimators", lower=50, upper=500, default_value=50, log=False) sampling_strategy = CategoricalHyperparameter( name="sampling_strategy", choices=["majority", "not minority", "not majority", "all"], default_value="not minority") replacement = CategoricalHyperparameter("replacement", ["True", "False"], default_value="False") ab_n_estimators = UniformIntegerHyperparameter( name="ab_n_estimators", lower=50, upper=500, default_value=50, log=False) ab_learning_rate = UniformFloatHyperparameter( name="ab_learning_rate", lower=0.01, upper=2, default_value=0.1, log=True) ab_algorithm = CategoricalHyperparameter( name="ab_algorithm", choices=["SAMME.R", "SAMME"], default_value="SAMME.R") ab_max_depth = UniformIntegerHyperparameter(name="ab_max_depth", lower=1, upper=10, default_value=1, log=False) cs.add_hyperparameters([ n_estimators, sampling_strategy, replacement, ab_n_estimators, ab_learning_rate, ab_algorithm, ab_max_depth ]) return cs elif optimizer == 'tpe': from hyperopt import hp space = { 'n_estimators': hp.randint('easy_ensemble_n_estimators', 451) + 50, 'sampling_strategy': hp.choice('easy_ensemble_sampling_strategy', ["majority", "not minority", "not majority", "all"]), 'replacement': hp.choice('easy_ensemble_replacement', ["True", "False"]), 'ab_n_estimators': hp.randint('ab_n_estimators', 451) + 50, 'ab_learning_rate': hp.loguniform('ab_learning_rate', np.log(0.01), np.log(2)), 'ab_algorithm': hp.choice('ab_algorithm', ["SAMME.R", "SAMME"]), 'ab_max_depth': hp.randint('ab_max_depth', 10) + 1 } init_trial = { 'n_estimators': 10, 'sampling_strategy': "not minority", 'replacement': "False", 'ab_n_estimators': 50, 'ab_learning_rate': 0.1, 'ab_algorithm': "SAMME.R", 'ab_max_depth': 1 } return space
def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'): if optimizer == 'smac': cs = ConfigurationSpace() n_estimators = UniformIntegerHyperparameter("n_estimators", 100, 1000, default_value=500) num_leaves = UniformIntegerHyperparameter("num_leaves", 31, 1023, default_value=31) learning_rate = UniformFloatHyperparameter("learning_rate", 0.025, 0.3, default_value=0.1, log=True) min_child_weight = UniformIntegerHyperparameter("min_child_weight", 1, 10, default_value=1) subsample = UniformFloatHyperparameter("subsample", 0.5, 1, default_value=1) colsample_bytree = UniformFloatHyperparameter("colsample_bytree", 0.5, 1, default_value=1) reg_alpha = UniformFloatHyperparameter('reg_alpha', 1e-10, 10, log=True, default_value=1e-10) reg_lambda = UniformFloatHyperparameter("reg_lambda", 1e-10, 10, log=True, default_value=1e-10) cs.add_hyperparameters([ n_estimators, num_leaves, learning_rate, min_child_weight, subsample, colsample_bytree, reg_alpha, reg_lambda ]) return cs elif optimizer == 'tpe': from hyperopt import hp space = { 'n_estimators': hp.randint('lgb_n_estimators', 901) + 100, 'num_leaves': hp.randint('lgb_num_leaves', 993) + 31, 'learning_rate': hp.loguniform('lgb_learning_rate', np.log(0.025), np.log(0.3)), 'min_child_weight': hp.randint('lgb_min_child_weight', 10) + 1, 'subsample': hp.uniform('lgb_subsample', 0.5, 1), 'colsample_bytree': hp.uniform('lgb_colsample_bytree', 0.5, 1), 'reg_alpha': hp.loguniform('lgb_reg_alpha', np.log(1e-10), np.log(10)), 'reg_lambda': hp.loguniform('lgb_reg_lambda', np.log(1e-10), np.log(10)) } init_trial = { 'n_estimators': 500, 'num_leaves': 31, 'learning_rate': 0.1, 'min_child_weight': 1, 'subsample': 1, 'colsample_bytree': 1, 'reg_alpha': 1e-10, 'reg_lambda': 1e-10 } return space
from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \ UniformIntegerHyperparameter, CategoricalHyperparameter, \ UnParametrizedHyperparameter, Constant from ConfigSpace.conditions import EqualsCondition, InCondition from automl.utl import json_utils cs = ConfigurationSpace() loss = CategoricalHyperparameter( "loss", ["hinge", "log", "modified_huber", "squared_hinge", "perceptron"], default_value="log") penalty = CategoricalHyperparameter("penalty", ["l1", "l2", "elasticnet"], default_value="l2") alpha = UniformFloatHyperparameter("alpha", 1e-7, 1e-1, log=True, default_value=0.0001) l1_ratio = UniformFloatHyperparameter("l1_ratio", 1e-9, 1, log=True, default_value=0.15) fit_intercept = Constant("fit_intercept", "True") tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, log=True, default_value=1e-4) epsilon = UniformFloatHyperparameter("epsilon", 1e-5,
def quant_post_hpo( executor, place, model_dir, quantize_model_path, train_sample_generator=None, eval_sample_generator=None, train_dataloader=None, eval_dataloader=None, eval_function=None, model_filename=None, params_filename=None, save_model_filename='__model__', save_params_filename='__params__', scope=None, quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"], is_full_quantize=False, weight_bits=8, activation_bits=8, weight_quantize_type=['channel_wise_abs_max'], algo=["KL", "hist", "avg", "mse"], bias_correct=[True, False], hist_percent=[0.98, 0.999], ### uniform sample in list. batch_size=[10, 30], ### uniform sample in list. batch_num=[10, 30], ### uniform sample in list. optimize_model=False, is_use_cache_file=False, cache_dir="./temp_post_training", runcount_limit=30): """ The function utilizes static post training quantization method to quantize the fp32 model. It uses calibrate data to calculate the scale factor of quantized variables, and inserts fake quantization and dequantization operators to obtain the quantized model. Args: executor(paddle.static.Executor): The executor to load, run and save the quantized model. place(paddle.CPUPlace or paddle.CUDAPlace): This parameter represents the executor run on which device. model_dir(str): The path of fp32 model that will be quantized, and the model and params that saved by ``paddle.static.io.save_inference_model`` are under the path. quantize_model_path(str): The path to save quantized model using api ``paddle.static.io.save_inference_model``. train_sample_generator(Python Generator): The sample generator provides calibrate data for DataLoader, and it only returns a sample every time. eval_sample_generator(Python Generator): The sample generator provides evalution data for DataLoader, and it only returns a sample every time. model_filename(str, optional): The name of model file. If parameters are saved in separate files, set it as 'None'. Default: 'None'. params_filename(str, optional): The name of params file. When all parameters are saved in a single file, set it as filename. If parameters are saved in separate files, set it as 'None'. Default : 'None'. save_model_filename(str): The name of model file to save the quantized inference program. Default: '__model__'. save_params_filename(str): The name of file to save all related parameters. If it is set None, parameters will be saved in separate files. Default: '__params__'. scope(paddle.static.Scope, optional): The scope to run program, use it to load and save variables. If scope is None, will use paddle.static.global_scope(). quantizable_op_type(list[str], optional): The list of op types that will be quantized. Default: ["conv2d", "depthwise_conv2d", "mul"]. is_full_quantize(bool): if True, apply quantization to all supported quantizable op type. If False, only apply quantization to the input quantizable_op_type. Default is False. weight_bits(int, optional): quantization bit number for weights. activation_bits(int): quantization bit number for activation. weight_quantize_type(str): quantization type for weights, support 'abs_max' and 'channel_wise_abs_max'. Compared to 'abs_max', the model accuracy is usually higher when using 'channel_wise_abs_max'. optimize_model(bool, optional): If set optimize_model as True, it applies some passes to optimize the model before quantization. So far, the place of executor must be cpu it supports fusing batch_norm into convs. is_use_cache_file(bool): This param is deprecated. cache_dir(str): This param is deprecated. runcount_limit(int): max. number of model quantization. Returns: None """ global g_quant_config g_quant_config = QuantConfig( executor, place, model_dir, quantize_model_path, algo, hist_percent, bias_correct, batch_size, batch_num, train_sample_generator, eval_sample_generator, train_dataloader, eval_dataloader, eval_function, model_filename, params_filename, save_model_filename, save_params_filename, scope, quantizable_op_type, is_full_quantize, weight_bits, activation_bits, weight_quantize_type, optimize_model, is_use_cache_file, cache_dir) cs = ConfigurationSpace() hyper_params = [] if 'hist' in algo: hist_percent = UniformFloatHyperparameter( "hist_percent", hist_percent[0], hist_percent[1], default_value=hist_percent[0]) hyper_params.append(hist_percent) if len(algo) > 1: algo = CategoricalHyperparameter("algo", algo, default_value=algo[0]) hyper_params.append(algo) else: algo = algo[0] if len(bias_correct) > 1: bias_correct = CategoricalHyperparameter("bias_correct", bias_correct, default_value=bias_correct[0]) hyper_params.append(bias_correct) else: bias_correct = bias_correct[0] if len(weight_quantize_type) > 1: weight_quantize_type = CategoricalHyperparameter("weight_quantize_type", \ weight_quantize_type, default_value=weight_quantize_type[0]) hyper_params.append(weight_quantize_type) else: weight_quantize_type = weight_quantize_type[0] if len(batch_size) > 1: batch_size = UniformIntegerHyperparameter("batch_size", batch_size[0], batch_size[1], default_value=batch_size[0]) hyper_params.append(batch_size) else: batch_size = batch_size[0] if len(batch_num) > 1: batch_num = UniformIntegerHyperparameter("batch_num", batch_num[0], batch_num[1], default_value=batch_num[0]) hyper_params.append(batch_num) else: batch_num = batch_num[0] if len(hyper_params) == 0: quant_post( \ executor=g_quant_config.executor, \ scope=g_quant_config.scope, \ model_dir=g_quant_config.float_infer_model_path, \ quantize_model_path=g_quant_model_cache_path, \ sample_generator=g_quant_config.train_sample_generator, \ data_loader=g_quant_config.train_dataloader, model_filename=g_quant_config.model_filename, \ params_filename=g_quant_config.params_filename, \ save_model_filename=g_quant_config.save_model_filename, \ save_params_filename=g_quant_config.save_params_filename, \ quantizable_op_type=g_quant_config.quantizable_op_type, \ activation_quantize_type="moving_average_abs_max", \ weight_quantize_type=weight_quantize_type, \ algo=algo, \ hist_percent=hist_percent, \ bias_correction=bias_correct, \ batch_size=batch_size, \ batch_nums=batch_num) return cs.add_hyperparameters(hyper_params) scenario = Scenario({ "run_obj": "quality", # we optimize quality (alternative runtime) "runcount-limit": runcount_limit, # max. number of function evaluations; for this example set to a low number "cs": cs, # configuration space "deterministic": "True", "limit_resources": "False", "memory_limit": 4096 # adapt this to reasonable value for your hardware }) # To optimize, we pass the function to the SMAC-object smac = SMAC4HPO(scenario=scenario, rng=np.random.RandomState(42), tae_runner=quantize) # Example call of the function with default values # It returns: Status, Cost, Runtime, Additional Infos def_value = smac.get_tae_runner().run(cs.get_default_configuration(), 1)[1] print("Value for default configuration: %.8f" % def_value) # Start optimization try: incumbent = smac.optimize() finally: incumbent = smac.solver.incumbent inc_value = smac.get_tae_runner().run(incumbent, 1)[1] print("Optimized Value: %.8f" % inc_value) print("quantize completed")
import os path = os.path.dirname(os.path.realpath(__file__)) # directory in which you can find all plots plot_dir = path + '/example_data/test_plots' # artificial dataset (here: features) features = np.loadtxt(path + '/example_data/diabetes_features.csv', delimiter=",") responses = np.loadtxt(path + '/example_data/diabetes_responses.csv', delimiter=",") # config space pcs = list(zip(np.min(features,axis=0), np.max(features, axis=0))) cs = ConfigSpace.ConfigurationSpace() for i in range(len(pcs)): cs.add_hyperparameter(UniformFloatHyperparameter("%i" %i, pcs[i][0], pcs[i][1])) # create an instance of fanova with trained forest and ConfigSpace f = fANOVA(X = features, Y = responses, config_space=cs) # marginal of particular parameter: dims = (1, ) res = f.quantify_importance(dims) print(res) # getting the 10 most important pairwise marginals sorted by importance best_margs = f.get_most_important_pairwise_marginals(n=10) print(best_margs) # visualizations:
kf = KFold(X.shape[0], n_folds=4) # build Configuration Space which defines all parameters and their ranges # to illustrate different parameter types, # we use continuous, integer and categorical parameters cs = ConfigurationSpace() do_bootstrapping = CategoricalHyperparameter("do_bootstrapping", ["true", "false"], default="true") cs.add_hyperparameter(do_bootstrapping) num_trees = UniformIntegerHyperparameter("num_trees", 10, 50, default=10) cs.add_hyperparameter(num_trees) frac_points_per_tree = UniformFloatHyperparameter("frac_points_per_tree", 0.001, 1, default=1) cs.add_hyperparameter(frac_points_per_tree) ratio_features = UniformFloatHyperparameter("ratio_features", 0.001, 1, default=1) cs.add_hyperparameter(ratio_features) min_samples_to_split = UniformIntegerHyperparameter("min_samples_to_split", 2, 20, default=2) cs.add_hyperparameter(min_samples_to_split)
# get sample data from online lda X = np.loadtxt(path + '/example_data/online_lda/online_lda_features.csv', delimiter=",") Y = np.loadtxt(path + '/example_data/online_lda/online_lda_responses.csv', delimiter=",") # setting up config space: param_file = path + '/example_data/online_lda/param-file.txt' f = open(param_file, 'rb') cs = ConfigurationSpace() for row in f: cs.add_hyperparameter( UniformFloatHyperparameter("%s" % row[0:4].decode('utf-8'), np.float(row[6:9]), np.float(row[10:13]), np.float(row[18:21]))) param = cs.get_hyperparameters() # create an instance of fanova with data for the random forest and the configSpace f = fANOVA(X=X, Y=Y, config_space=cs) # marginal for first parameter p_list = (0, ) res = f.quantify_importance(p_list) print(res) p2_list = ('Col1', 'Col2') res2 = f.quantify_importance(p2_list) print(res2) p2_list = ('Col0', 'Col2')
n_layer = UniformIntegerHyperparameter("n_layer", 1, 5, default_value=1) n_neurons = UniformIntegerHyperparameter("n_neurons", 8, 1024, log=True, default_value=10) activation = CategoricalHyperparameter("activation", ['logistic', 'tanh', 'relu'], default_value='tanh') batch_size = UniformIntegerHyperparameter('batch_size', 30, 300, default_value=200) learning_rate_init = UniformFloatHyperparameter('learning_rate_init', 0.0001, 1.0, default_value=0.001, log=True) cs.add_hyperparameters( [n_layer, n_neurons, activation, batch_size, learning_rate_init]) # SMAC scenario object scenario = Scenario({ "run_obj": "quality", # we optimize quality (alternative to runtime) "wallclock-limit": 100, # max duration to run the optimization (in seconds) "cs": cs, # configuration space "deterministic": "true", "limit_resources": True, # Uses pynisher to limit memory and runtime # Alternatively, you can also disable this. # Then you should handle runtime and memory yourself in the TA
def Logit_range(hyperparameters): return {'C': UniformFloatHyperparameter('C', 0.5*hyperparameters['C'], 2*hyperparameters['C'], default=hyperparameters['C'])}