def __init__(self, api_config, **kwargs): AbstractOptimizer.__init__(self, api_config) print('api_config:', api_config) self.api_config = api_config self.space_x = JointSpace(api_config) self.bounds = self.space_x.get_bounds() self.lb, self.ub = self.bounds[:, 0], self.bounds[:, 1] self.dim = len(self.bounds) self.X = np.zeros((0, self.dim)) self.y = np.zeros((0, 1)) self.X_init = None self.batch_size = None self.turbo = None self.split_used = 0 self.node = None self.best_values = [] self.config = self._read_config() print('config:', self.config) optimizer_seed = self.config.get('optimizer_seed') fix_optimizer_seed(optimizer_seed) self.sampler_seed = self.config.get('sampler_seed') sampler.fix_sampler_seed(self.sampler_seed) self.is_init_batch = False self.init_batches = []
def __init__(self, goal: str, parameters: Dict[str, Any], num_samples=10, **kwargs) -> None: HyperoptSampler.__init__(self, goal, parameters) params_for_join_space = copy.deepcopy(parameters) cat_params_values_types = {} for param_name, param_values in params_for_join_space.items(): if param_values[TYPE] == CATEGORY: param_values[TYPE] = 'cat' values_str = [] values_types = {} for value in param_values['values']: value_str = str(value) values_str.append(value_str) value_type = type(value) if value_type == bool: value_type = str2bool values_types[value_str] = value_type param_values['values'] = values_str cat_params_values_types[param_name] = values_types if param_values[TYPE] == FLOAT: param_values[TYPE] = 'real' if param_values[TYPE] == INT or param_values[TYPE] == 'real': if SPACE not in param_values: param_values[SPACE] = 'linear' param_values['range'] = (param_values['low'], param_values['high']) del param_values['low'] del param_values['high'] self.cat_params_values_types = cat_params_values_types self.space = JointSpace(params_for_join_space) self.num_samples = num_samples self.samples = self._determine_samples() self.sampled_so_far = 0
def __init__(self, api_config, **kwargs): """Build wrapper class to use an optimizer in benchmark. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. """ AbstractOptimizer.__init__(self, api_config) self.space_x = JointSpace(api_config) self.bounds = self.space_x.get_bounds() self.lb, self.ub = self.bounds[:, 0], self.bounds[:, 1] self.dim = len(self.bounds) self.max_evals = np.iinfo(np.int32).max # NOTE: Largest possible int self.batch_size = None self.history = [] self.turbo = Turbo1( f=None, lb=self.bounds[:, 0], ub=self.bounds[:, 1], n_init=2 * self.dim + 1, max_evals=self.max_evals, batch_size=1, # We need to update this later verbose=False, )
def __init__(self, api_config): """Build wrapper class to use an optimizer in benchmark. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. """ AbstractOptimizer.__init__(self, api_config) self.search_space = JointSpace(api_config) self.bounds = self.search_space.get_bounds() self.iter = 0 # Sets up the optimization problem (needs self.bounds) self.create_opt_prob() self.max_evals = np.iinfo(np.int32).max # NOTE: Largest possible int self.batch_size = None self.history = [] self.proposals = [] # Population-based parameters in DE self.population = [] self.fitness = [] self.F = 0.7 self.Cr = 0.7 # For bayes opt self.dim = len(self.search_space.param_list) self.torch_bounds = torch.from_numpy(self.search_space.get_bounds().T) self.min_max_bounds = torch.from_numpy( np.stack([np.zeros(self.dim), np.ones(self.dim)])) self.archive = [] self.arc_fitness = []
def __init__(self, api_config): """Build wrapper class to use optimizer in benchmark. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. """ AbstractOptimizer.__init__(self, api_config) api_space = BoEI.api_manipulator(api_config) # used for GPyOpt initialization self.space_x = JointSpace(api_config) # used for warping & unwarping of new suggestions & observations self.hasCat, self.cat_vec = BoEI.is_cat(api_config) self.dim = len(self.space_x.get_bounds()) self.objective = GPyOpt.core.task.SingleObjective(None) self.space = GPyOpt.Design_space(api_space) self.model = GPyOpt.models.GPModel(optimize_restarts=5,verbose=False) self.aquisition_optimizer = GPyOpt.optimization.AcquisitionOptimizer(self.space) self.aquisition = AcquisitionEI(self.model, self.space, optimizer=self.aquisition_optimizer, cost_withGradients=None) self.batch_size = None
def __init__(self, api_config): """Build wrapper class to use an optimizer in benchmark. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. """ AbstractOptimizer.__init__(self, api_config) self.space_x = JointSpace(api_config) self.bounds = self.space_x.get_bounds() self.create_opt_prob() # Sets up the optimization problem (needs self.bounds) self.max_evals = np.iinfo(np.int32).max # NOTE: Largest possible int self.batch_size = None self.history = [] self.proposals = []
def __init__(self, api_config): """Build wrapper class to use an optimizer in benchmark. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. """ AbstractOptimizer.__init__(self, api_config) self.space_x = JointSpace(api_config) self.bounds = self.space_x.get_bounds() self.create_opt_prob( ) # Sets up the optimization problem (needs self.bounds) self.max_evals = np.iinfo(np.int32).max # NOTE: Largest possible int self.turbo_batch_size = None self.pysot_batch_size = None self.history = [] self.proposals = [] self.lb, self.ub = self.bounds[:, 0], self.bounds[:, 1] self.dim = len(self.bounds) self.turbo = Turbo1( f=None, lb=self.bounds[:, 0], ub=self.bounds[:, 1], n_init=2 * self.dim + 1, max_evals=self.max_evals, batch_size=4, # We need to update this later verbose=False, ) # hyperopt self.random = np_random space, self.round_to_values = tuSOTOptimizer.get_hyperopt_dimensions( api_config) self.domain = Domain(dummy_f, space, pass_expr_memo_ctrl=None) self.trials = Trials() # Some book keeping like opentuner wrapper self.trial_id_lookup = {} # Store just for data validation self.param_set_chk = frozenset(api_config.keys())
def __init__(self, goal: str, parameters: Dict[str, Any], num_samples=10, **kwargs) -> None: HyperoptSampler.__init__(self, goal, parameters) params_for_join_space = copy.deepcopy(parameters) cat_params_values_types = {} for param_name, param_values in params_for_join_space.items(): if param_values[TYPE] == CATEGORY: param_values[TYPE] = "cat" values_str = [] values_types = {} for value in param_values["values"]: value_type = type(value) if value_type == bool: value_str = str(value) value_type = str2bool elif value_type == str or value_type == int or value_type == float: value_str = str(value) else: value_str = json.dumps(value) value_type = json.loads values_str.append(value_str) values_types[value_str] = value_type param_values["values"] = values_str cat_params_values_types[param_name] = values_types if param_values[TYPE] == FLOAT: param_values[TYPE] = "real" if param_values[TYPE] == INT or param_values[TYPE] == "real": if SPACE not in param_values: param_values[SPACE] = "linear" param_values["range"] = (param_values["low"], param_values["high"]) del param_values["low"] del param_values["high"] self.cat_params_values_types = cat_params_values_types self.space = JointSpace(params_for_join_space) self.num_samples = num_samples self.samples = self._determine_samples() self.sampled_so_far = 0 self.default_batch_size = self.num_samples
def _grid_suggestion(self, n_suggestions): space = JointSpace(self.api_config) grid = space.grid(n_suggestions) # make sure grid has enough items for key in grid: while 0 < len(grid[key]) < n_suggestions: grid[key] += grid[key] self._random_state.shuffle(grid[key]) grid[key] = grid[key][:n_suggestions] # select from the grid suggestions = [] for i in range(n_suggestions): guess = dict() for key in grid: guess[key] = grid[key][i] suggestions.append(guess) return suggestions
def __init__(self, model, dataset, scorer, path): """Build class that wraps sklearn classifier/regressor CV score for use as an objective function surrogate. Parameters ---------- model : str Which classifier to use, must be key in `MODELS_CLF` or `MODELS_REG` dict depending on if dataset is classification or regression. dataset : str Which data set to use, must be key in `DATA_LOADERS` dict, or name of custom csv file. scorer : str Which sklearn scoring metric to use, in `SCORERS_CLF` list or `SCORERS_REG` dict depending on if dataset is classification or regression. path : str Root directory to look for all pickle files. """ TestFunction.__init__(self) # Find the space class, we could consider putting this in pkl too problem_type = get_problem_type(dataset) assert problem_type in (ProblemType.clf, ProblemType.reg) _, _, self.api_config = MODELS_CLF[ model] if problem_type == ProblemType.clf else MODELS_REG[model] self.space = JointSpace(self.api_config) # Load the pre-trained model fname = SklearnModel.test_case_str(model, dataset, scorer) + ".pkl" if isinstance(path, bytes): # This is for test-ability, we could use mock instead. self.model = pkl.loads(path) else: path = os.path.join(path, fname) # pragma: io assert os.path.isfile(path), "Model file not found: %s" % path with absopen(path, "rb") as f: # pragma: io self.model = pkl.load(f) # pragma: io assert callable(getattr(self.model, "predict", None))
def test_sklearn_model_surr(model, dataset, metric, model_seed, rs_seed): prob_type = data.get_problem_type(dataset) assume(metric in data.METRICS_LOOKUP[prob_type]) test_prob = skf.SklearnModel(model, dataset, metric, shuffle_seed=0) api_config = test_prob.get_api_config() space = JointSpace(api_config) n_obj = len(test_prob.objective_names) n_suggestions = 20 x_guess = suggest_dict([], [], api_config, n_suggestions=n_suggestions, random=np.random.RandomState(rs_seed)) x_guess_w = space.warp(x_guess) random = np.random.RandomState(model_seed) y = random.randn(n_suggestions, n_obj) reg = LinearRegression() reg.fit(x_guess_w, y) loss0 = reg.predict(x_guess_w) path = pkl.dumps(reg) del reg assert isinstance(path, bytes) test_prob_surr = skf.SklearnSurrogate(model, dataset, metric, path) loss = test_prob_surr.evaluate(x_guess[0]) assert isinstance(loss, tuple) assert all(isinstance(xx, float) for xx in loss) assert np.shape(loss) == np.shape(test_prob.objective_names) assert np.allclose(loss0[0], np.array(loss))
def suggest_dict(X, y, meta, n_suggestions=1, random=np_util.random): """Stateless function to create suggestions for next query point in random search optimization. This implements the API for general structures of different data types. Parameters ---------- X : list(dict) Places where the objective function has already been evaluated. Not actually used in random search. y : :class:`numpy:numpy.ndarray`, shape (n,) Corresponding values where objective has been evaluated. Not actually used in random search. meta : dict(str, dict) Configuration of the optimization variables. See API description. n_suggestions : int Desired number of parallel suggestions in the output random : :class:`numpy:numpy.random.RandomState` Optionally pass in random stream for reproducibility. Returns ------- next_guess : list(dict) List of `n_suggestions` suggestions to evaluate the objective function. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. """ # Warp and get bounds space_x = JointSpace(meta) X_warped = space_x.warp(X) bounds = space_x.get_bounds() _, n_params = _check_x_y(X_warped, y, allow_impute=True) lb, ub = _check_bounds(bounds, n_params) # Get the suggestion suggest_x = random.uniform(lb, ub, size=(n_suggestions, n_params)) # Unwarp next_guess = space_x.unwarp(suggest_x) return next_guess
def __init__(self, goal: str, parameters: Dict[str, Any], num_samples=10, **kwargs) -> None: HyperoptSampler.__init__(self, goal, parameters) params_for_join_space = copy.deepcopy(parameters) for param_values in params_for_join_space.values(): if param_values[TYPE] == CATEGORY: param_values[TYPE] = 'cat' if param_values[TYPE] == FLOAT: param_values[TYPE] = 'real' if param_values[TYPE] == INT or param_values[TYPE] == 'real': if SPACE not in param_values: param_values[SPACE] = 'linear' param_values['range'] = (param_values['low'], param_values['high']) del param_values['low'] del param_values['high'] self.space = JointSpace(params_for_join_space) self.num_samples = num_samples self.samples = self._determine_samples() self.sampled_so_far = 0
def __init__(self, api_config, **kwargs): """Build wrapper class to use an optimizer in benchmark. Parameters ---------- api_config : dict-like of dict-like Configuration of the optimization variables. See API description. """ AbstractOptimizer.__init__(self, api_config) self.dimensions, self.vars_types, self.param_list = TurboOptimizer.get_sk_dimensions( api_config) print("dimensions: {}".format(self.dimensions)) print("vars_types: {}".format(self.vars_types)) # names of variables print("param_list: {}".format(self.param_list)) self.space_x = JointSpace(api_config) self.bounds = self.space_x.get_bounds() self.lb, self.ub = self.bounds[:, 0], self.bounds[:, 1] self.dim = len(self.bounds) print("lb: {}".format(self.lb)) print("ub: {}".format(self.ub)) print("dim: {}".format(self.dim)) if "max_depth" in self.param_list: print("DT or RF") # max_depth att = "max_depth" print("att: {}".format(att)) att_idx = self.param_list.index(att) print("old lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) self.lb[att_idx] = 10 self.ub[att_idx] = 15 print("new lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) # max_features att = "max_features" print("att: {}".format(att)) att_idx = self.param_list.index(att) print("old lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) self.lb[att_idx] = logit(0.9) self.ub[att_idx] = logit(0.99) print("new lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) # min_impurity_decrease att = "min_impurity_decrease" print("att: {}".format(att)) att_idx = self.param_list.index(att) print("old lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) self.lb[att_idx] = 1e-5 self.ub[att_idx] = 1e-4 print("new lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) if "beta_1" in self.param_list and "hidden_layer_sizes" in self.param_list: print("MLP-adam") # batch_size att = "batch_size" print("att: {}".format(att)) att_idx = self.param_list.index(att) print("old lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) self.lb[att_idx] = 16 self.ub[att_idx] = 128 print("new lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) # hidden_layer_sizes att = "hidden_layer_sizes" print("att: {}".format(att)) att_idx = self.param_list.index(att) print("old lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) self.lb[att_idx] = 64 self.ub[att_idx] = 200 print("new lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) # validation_fraction att = "validation_fraction" print("att: {}".format(att)) att_idx = self.param_list.index(att) print("old lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) self.lb[att_idx] = logit(0.1) self.ub[att_idx] = logit(0.2) print("new lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) if "momentum" in self.param_list and "hidden_layer_sizes" in self.param_list: print("MLP-sgd") # batch_size att = "batch_size" print("att: {}".format(att)) att_idx = self.param_list.index(att) print("old lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) self.lb[att_idx] = 16 self.ub[att_idx] = 128 print("new lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) # hidden_layer_sizes att = "hidden_layer_sizes" print("att: {}".format(att)) att_idx = self.param_list.index(att) print("old lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) self.lb[att_idx] = 64 self.ub[att_idx] = 200 print("new lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) # validation_fraction att = "validation_fraction" print("att: {}".format(att)) att_idx = self.param_list.index(att) print("old lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) self.lb[att_idx] = logit(0.1) self.ub[att_idx] = logit(0.2) print("new lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) if "C" in self.param_list and "gamma" in self.param_list: print("SVM") # C att = "C" print("att: {}".format(att)) att_idx = self.param_list.index(att) print("old lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) self.lb[att_idx] = np.log(1e0) self.ub[att_idx] = np.log(1e3) print("new lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) # tol att = "tol" print("att: {}".format(att)) att_idx = self.param_list.index(att) print("old lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) self.lb[att_idx] = np.log(1e-3) self.ub[att_idx] = np.log(1e-1) print("new lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) if "learning_rate" in self.param_list and "n_estimators" in self.param_list: print("ada") # n_estimators att = "n_estimators" print("att: {}".format(att)) att_idx = self.param_list.index(att) print("old lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) self.lb[att_idx] = 30 self.ub[att_idx] = 100 print("new lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) if "n_neighbors" in self.param_list: print("kNN") # n_neighbors att = "n_neighbors" print("att: {}".format(att)) att_idx = self.param_list.index(att) print("old lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) self.lb[att_idx] = 1 self.ub[att_idx] = 15 print("new lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) # p att = "p" print("att: {}".format(att)) att_idx = self.param_list.index(att) print("old lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) self.lb[att_idx] = 1 self.ub[att_idx] = 2 print("new lb: {}, ub: {}".format(self.lb[att_idx], self.ub[att_idx])) print("new_lb: {}".format(self.lb)) print("new_ub: {}".format(self.ub)) self.max_evals = np.iinfo(np.int32).max # NOTE: Largest possible int self.batch_size = None self.history = [] self.turbo = Turbo1( f=None, lb=self.lb, ub=self.ub, n_init=2 * self.dim + 1, max_evals=self.max_evals, batch_size=1, # We need to update this later verbose=False, ) # count restart self.cnt_restart = 0 # use smaller length_min self.turbo.length_min = 0.5**4 # use distance between batch elements self.turbo.ele_distance = 1e-2
def run_study(optimizer, test_problem, n_calls, n_suggestions, n_obj=1, callback=None): """Run a study for a single optimizer on a single test problem. This function can be used for benchmarking on general stateless objectives (not just `sklearn`). Parameters ---------- optimizer : :class:`.abstract_optimizer.AbstractOptimizer` Instance of one of the wrapper optimizers. test_problem : :class:`.sklearn_funcs.TestFunction` Instance of test function to attempt to minimize. n_calls : int How many iterations of minimization to run. n_suggestions : int How many parallel evaluation we run each iteration. Must be ``>= 1``. n_obj : int Number of different objectives measured, only objective 0 is seen by optimizer. Must be ``>= 1``. callback : callable Optional callback taking the current best function evaluation, and the number of iterations finished. Takes array of shape `(n_obj,)`. Returns ------- function_evals : :class:`numpy:numpy.ndarray` of shape (n_calls, n_suggestions, n_obj) Value of objective for each evaluation. timing_evals : (:class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`) Tuple of 3 timing results: ``(suggest_time, eval_time, observe_time)`` with shapes ``(n_calls,)``, ``(n_calls, n_suggestions)``, and ``(n_calls,)``. These are the time to make each suggestion, the time for each evaluation of the objective function, and the time to make an observe call. suggest_log : list(list(dict(str, object))) Log of the suggestions corresponding to the `function_evals`. """ assert n_suggestions >= 1, "batch size must be at least 1" assert n_obj >= 1, "Must be at least one objective" space_for_validate = JointSpace(test_problem.get_api_config()) if callback is not None: # First do initial log at inf score, in case we don't even get to first eval before crash/job timeout callback(np.full((n_obj, ), np.inf, dtype=float), 0) suggest_time = np.zeros(n_calls) observe_time = np.zeros(n_calls) eval_time = np.zeros((n_calls, n_suggestions)) function_evals = np.zeros((n_calls, n_suggestions, n_obj)) suggest_log = [None] * n_calls for ii in range(n_calls): tt = time() try: next_points = optimizer.suggest(n_suggestions) except Exception as e: logger.warning( "Failure in optimizer suggest. Falling back to random search.") logger.exception(e, exc_info=True) print(json.dumps({"optimizer_suggest_exception": {ITER: ii}})) api_config = test_problem.get_api_config() next_points = rs.suggest_dict([], [], api_config, n_suggestions=n_suggestions) suggest_time[ii] = time() - tt logger.info("suggestion time taken %f iter %d next_points %s" % (suggest_time[ii], ii, str(next_points))) assert len( next_points ) == n_suggestions, "invalid number of suggestions provided by the optimizer" # We could put this inside the TestProblem class, but ok here for now. try: space_for_validate.validate( next_points) # Fails if suggestions outside allowed range except Exception: raise ValueError("Optimizer suggestion is out of range.") for jj, next_point in enumerate(next_points): tt = time() try: f_current_eval = test_problem.evaluate(next_point) except Exception as e: logger.warning("Failure in function eval. Setting to inf.") logger.exception(e, exc_info=True) f_current_eval = np.full((n_obj, ), np.inf, dtype=float) eval_time[ii, jj] = time() - tt assert np.shape(f_current_eval) == (n_obj, ) suggest_log[ii] = next_points function_evals[ii, jj, :] = f_current_eval logger.info( "function_evaluation time %f value %f suggestion %s" % (eval_time[ii, jj], f_current_eval[0], str(next_point))) # Note: this could be inf in the event of a crash in f evaluation, the optimizer must be able to handle that. # Only objective 0 is seen by optimizer. eval_list = function_evals[ii, :, 0].tolist() if callback is not None: idx_ii, idx_jj = argmin_2d(function_evals[:ii + 1, :, 0]) callback(function_evals[idx_ii, idx_jj, :], ii + 1) tt = time() try: optimizer.observe(next_points, eval_list) except Exception as e: logger.warning( "Failure in optimizer observe. Ignoring these observations.") logger.exception(e, exc_info=True) print(json.dumps({"optimizer_observe_exception": {ITER: ii}})) observe_time[ii] = time() - tt logger.info( "observation time %f, current best %f at iter %d" % (observe_time[ii], np.min(function_evals[:ii + 1, :, 0]), ii)) return function_evals, (suggest_time, eval_time, observe_time), suggest_log