def test_opt(): space = DesignSpace().parse([{ 'name': 'x1', 'type': 'num', 'lb': -3.0, 'ub': 3.0 }]) acq = ToyExample() opt = EvolutionOpt(space, acq, pop=10) rec = opt.optimize(initial_suggest=space.sample(3)) x, xe = space.transform(rec) assert (approx(1.0, 1e-3) == acq(x, xe)[:, 0].squeeze().item())
def hebo_cube_factory(objective, n_trials, n_dim, with_count,n_suggestions=5): global feval_count feval_count = 0 variables = [{'name': 'u' + str(i), 'type': 'num', 'lb': 0., 'ub': 1.} for i in range(n_dim)] space = DesignSpace().parse(variables) opt = HEBO(space) def _objective(params) -> np.ndarray: global feval_count feval_count += len(params.index) return np.array([ objective(ui) for ui in params.values ]) n_batches = int(math.floor(n_trials/n_suggestions)) n_remainder = n_trials - n_suggestions*n_batches for i in range(n_batches): rec = opt.suggest(n_suggestions=n_suggestions) # <-- don't change this opt.observe(rec, _objective(rec)) for i in range(n_remainder): rec = opt.suggest(n_suggestions=1) # <-- don't change this opt.observe(rec, _objective(rec)) best_val = opt.y.min() best_ndx = np.argmin([y[0] for y in opt.y]) # I mean seriously, why make the user do this? best_x = list(opt.X.values[best_ndx]) return (best_val, best_x, feval_count) if with_count else (best_val, best_x)
def _to_hebo_serch_space(self, search_space): space = [] for hp in search_space["hyperparameters"]: if hp["type"] == "CATEGORY": hebo_hp = { "name": hp["key"], "type": "cat", "categories": hp["range"] } elif hp["type"] in ["FLOAT", "FLOAT_EXP"]: hebo_hp = { "name": hp["key"], "type": "num", "lb": hp["range"][0], "ub": hp["range"][1], } elif hp["type"] in ["INT", "INT_EXP"]: hebo_hp = { "name": hp["key"], "type": "int", "lb": hp["range"][0], "ub": hp["range"][1], } elif hp["type"] == "BOOL": hebo_hp = { "name": hp["key"], "type": "bool", } else: raise Exception(f"HEBO does not support parameter type: {hp}") space.append(hebo_hp) space = DesignSpace().parse(space) return space
def test_opt_int(): space = DesignSpace().parse([{ 'name': 'x1', 'type': 'num', 'lb': -3.0, 'ub': 3.0 }, { 'name': 'x2', 'type': 'int', 'lb': -3.0, 'ub': 3.0 }]) acq = ToyExample() opt = EvolutionOpt(space, acq, pop=10) rec = opt.optimize() assert (approx(1.0, 1e-3) == acq(*space.transform(rec))[:, 0].squeeze().item())
def gen_emb_space(eff_dim: int, scale: float) -> DesignSpace: scale = -1 * scale if scale < 0 else scale space = DesignSpace().parse([{ 'name': f'y{i}', 'type': 'num', 'lb': -1 * scale, 'ub': scale } for i in range(eff_dim)]) return space
def test_contextual_opt(opt_cls): space = DesignSpace().parse([ {'name' : 'x0', 'type' : 'int', 'lb' : 0, 'ub' : 7}, {'name' : 'x1', 'type' : 'int', 'lb' : 0, 'ub' : 7}, ]) opt = opt_cls(space, rand_sample = 2, model_name = 'rf') for i in range(2): n_suggestions = 8 if opt.support_parallel_opt else 1 rec = opt.suggest(n_suggestions = n_suggestions, fix_input = {'x0' : 3}) y = (rec[['x0', 'x1']].values ** 2).sum(axis = 1, keepdims =True) assert((rec['x0'] == 3).all()) opt.observe(rec, y)
def parse_space_from_bayesmark(api_config) -> DesignSpace: """ Parse design space of bayesmark (https://github.com/uber/bayesmark) """ space = DesignSpace() params = [] for param_name in api_config: param_conf = api_config[param_name] param_type = param_conf['type'] param_space = param_conf.get('space', None) param_range = param_conf.get("range", None) param_values = param_conf.get("values", None) bo_param_conf = {'name': param_name} if param_type == 'int': # ignore 'log' space # TODO: support log-scale int bo_param_conf['type'] = 'int' bo_param_conf['lb'] = param_range[0] bo_param_conf['ub'] = param_range[1] elif param_type == 'bool': bo_param_conf['type'] = 'bool' elif param_type in ('cat', 'ordinal'): bo_param_conf['type'] = 'cat' bo_param_conf['categories'] = list(set(param_values)) elif param_type == 'real': if param_space in ('log', 'logit'): bo_param_conf['type'] = 'pow' bo_param_conf['base'] = 10 bo_param_conf['lb'] = param_range[0] bo_param_conf['ub'] = param_range[1] else: bo_param_conf['type'] = 'num' bo_param_conf['lb'] = param_range[0] bo_param_conf['ub'] = param_range[1] else: assert False, "type %s not handled in API" % param_type params.append(bo_param_conf) space.parse(params) return space
def test_opt(model_name, opt_cls): space = DesignSpace().parse([ {'name' : 'x0', 'type' : 'num', 'lb' : -3, 'ub' : 7}, {'name' : 'x1', 'type' : 'cat', 'categories' : ['a', 'b', 'c']} ]) opt = opt_cls(space, rand_sample = 10, model_name = model_name) for i in range(11): num_suggest = 8 if opt.support_parallel_opt else 1 rec = opt.suggest(n_suggestions = num_suggest) y = obj(rec) if y.shape[0] > 1 and i > 0: y[np.argmax(y.reshape(-1))] = np.inf opt.observe(rec, y) if opt.y.shape[0] > 11: break
def test_mo(): space = DesignSpace().parse([{ 'name': 'x1', 'type': 'num', 'lb': -3.0, 'ub': 3.0 }, { 'name': 'x2', 'type': 'int', 'lb': -3.0, 'ub': 3.0 }]) acq = ToyExampleMO() opt = EvolutionOpt(space, acq, pop=10) rec = opt.optimize() assert (rec.shape[0] == 10)
def test_opt_fix(): space = DesignSpace().parse([{ 'name': 'x1', 'type': 'num', 'lb': -3.0, 'ub': 3.0 }, { 'name': 'x2', 'type': 'num', 'lb': -3.0, 'ub': 3.0 }]) acq = ToyExample() opt = EvolutionOpt(space, acq, pop=10) rec = opt.optimize(fix_input={'x1': 1.0}) print(rec) assert (rec['x1'].values == approx(1.0, 1e-3))
def test_design_space(): space = DesignSpace().parse([{ 'name': 'x0', 'type': 'num', 'lb': 0, 'ub': 7 }, { 'name': 'x1', 'type': 'int', 'lb': 0, 'ub': 7 }, { 'name': 'x2', 'type': 'pow', 'lb': 1e-4, 'ub': 1e-2, 'base': 10 }, { 'name': 'x3', 'type': 'cat', 'categories': ['a', 'b', 'c'] }, { 'name': 'x4', 'type': 'bool' }]) assert space.numeric_names == ['x0', 'x1', 'x2', 'x4'] assert space.enum_names == ['x3'] assert space.num_paras == 5 assert space.num_numeric == 4 assert space.num_categorical == 1 samp = space.sample(10) x, xe = space.transform(samp) x_, xe_ = space.transform(space.inverse_transform(x, xe)) assert (x - x_).abs().max() < 1e-4 assert (space.opt_lb <= space.opt_ub).all() assert not space.paras['x0'].is_discrete assert space.paras['x1'].is_discrete assert not space.paras['x2'].is_discrete assert space.paras['x3'].is_discrete assert space.paras['x4'].is_discrete
def testConvertHEBO(self): from ray.tune.suggest.hebo import HEBOSearch from hebo.design_space.design_space import DesignSpace import torch # Grid search not supported, should raise ValueError with self.assertRaises(ValueError): HEBOSearch.convert_search_space({"grid": tune.grid_search([0, 1])}) config = { "a": tune.sample.Categorical([2, 3, 4]).uniform(), "b": { "x": tune.sample.Integer(0, 5), "y": 4, "z": tune.sample.Float(1e-4, 1e-2).loguniform() } } converted_config = HEBOSearch.convert_search_space(config) hebo_space_config = [ { "name": "a", "type": "cat", "categories": [2, 3, 4] }, { "name": "b/x", "type": "int", "lb": 0, "ub": 5 }, { "name": "b/z", "type": "pow", "lb": 1e-4, "ub": 1e-2 }, ] hebo_space = DesignSpace().parse(hebo_space_config) searcher1 = HEBOSearch(space=converted_config, metric="a", mode="max", random_state_seed=123) searcher2 = HEBOSearch(space=hebo_space, metric="a", mode="max", random_state_seed=123) np.random.seed(1234) torch.manual_seed(1234) config1 = searcher1.suggest("0") np.random.seed(1234) torch.manual_seed(1234) config2 = searcher2.suggest("0") self.assertEqual(config1, config2) self.assertIn(config1["a"], [2, 3, 4]) self.assertIn(config1["b"]["x"], list(range(5))) self.assertLess(1e-4, config1["b"]["z"]) self.assertLess(config1["b"]["z"], 1e-2) searcher = HEBOSearch(metric="a", mode="max", random_state_seed=123) analysis = tune.run(_mock_objective, config=config, search_alg=searcher, num_samples=1) trial = analysis.trials[0] self.assertIn(trial.config["a"], [2, 3, 4]) self.assertEqual(trial.config["b"]["y"], 4)
def sklearn_tuner(model_class, space_config: [dict], X: np.ndarray, y: np.ndarray, metric: Callable, greater_is_better: bool = True, n_splits=5, max_iter=16, report=False) -> (dict, pd.DataFrame): """Tuning sklearn estimator Parameters: ------------------- model_class: class of sklearn estimator space_config: list of dict, specifying search space X, y: data used to for cross-valiation metrics: metric function in sklearn.metrics greater_is_better: whether a larger metric value is better n_splits: split data into `n_splits` parts for cross validation max_iter: number of trials Returns: ------------------- Best hyper-parameters and all visited data Example: ------------------- from sklearn.datasets import load_boston from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import r2_score, mean_squared_error from hebo.sklearn_tuner import sklearn_tuner space_cfg = [ {'name' : 'max_depth', 'type' : 'int', 'lb' : 1, 'ub' : 20}, {'name' : 'min_samples_leaf', 'type' : 'num', 'lb' : 1e-4, 'ub' : 0.5}, {'name' : 'max_features', 'type' : 'cat', 'categories' : ['auto', 'sqrt', 'log2']}, {'name' : 'bootstrap', 'type' : 'bool'}, {'name' : 'min_impurity_decrease', 'type' : 'pow', 'lb' : 1e-4, 'ub' : 1.0}, ] X, y = load_boston(return_X_y = True) result = sklearn_tuner(RandomForestRegressor, space_cfg, X, y, metric = r2_score, max_iter = 16) """ space = DesignSpace().parse(space_config) opt = HEBO(space) for i in range(max_iter): rec = opt.suggest() model = model_class(**rec.iloc[0].to_dict()) pred = cross_val_predict(model, X, y, cv=KFold(n_splits=n_splits, shuffle=True)) score_v = metric(y, pred) sign = -1. if greater_is_better else 1.0 opt.observe(rec, np.array([sign * score_v])) print('Iter %d, best metric: %g' % (i, sign * opt.y.min())) best_id = np.argmin(opt.y.reshape(-1)) best_hyp = opt.X.iloc[best_id] df_report = opt.X.copy() df_report['metric'] = sign * opt.y if report: return best_hyp.to_dict(), df_report else: return best_hyp.to_dict()
def __init__(self, dim): self.dim = dim self.space = DesignSpace().parse([{'name' : f'x{i}', 'type' : 'num', 'lb' : -1, 'ub' : 1} for i in range(dim)])