def suggest(self, n_suggestions=1): """Get suggestion. Parameters ---------- n_suggestions : int Desired number of parallel suggestions in the output Returns ------- next_guess : list of dict List of `n_suggestions` suggestions to evaluate the objective function. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. """ if self.random.rand() <= 0.5 or self.mode == "normal": x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random) elif self.mode == "delay": sleep(15 * 60) # 15 minutes x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random) elif self.mode == "crash": assert False, "Crashing for testing purposes" else: assert False, "Crashing, not for testing purposes" return x_guess
def test_random_search_suggest_diff(api_args, n_suggest, seed): # Hard to know how many iters needed for arbitrary space that we need to # run so that we don't get dupes by chance. So, for now, let's just stick # with this simple space. dim = {"space": "linear", "type": "real", "range": [1.0, 5.0]} # Use at least 10 n_suggest to make sure don't get same answer by chance X_w, y = api_args D = X_w.shape[1] param_names = ["x%d" % ii for ii in range(5)] meta = dict(zip(param_names, [dim] * D)) # Get the unwarped X S = sp.JointSpace(meta) lower, upper = S.get_bounds().T X_w = linear_rescale(X_w, lb0=0.0, ub0=1.0, lb1=lower, ub1=upper) X = S.unwarp(X_w) S.validate(X) seed = seed // 2 # Keep in bounds even after add 7 x_guess = suggest_dict(X, y, meta, n_suggest, random=np.random.RandomState(seed)) # Use diff seed to intentionally get diff result x_guess2 = suggest_dict(X, y, meta, n_suggest, random=np.random.RandomState(seed + 7)) # Check types too assert len(x_guess) == n_suggest assert len(x_guess2) == n_suggest assert not np.all(x_guess == x_guess2) # Make sure validated S.validate(x_guess) S.validate(x_guess2) # Test sanity of output D, = lower.shape x_guess_w = S.warp(x_guess) assert type(x_guess_w) == np.ndarray assert x_guess_w.dtype.kind == "f" assert x_guess_w.shape == (n_suggest, D) assert x_guess_w.shape == (n_suggest, D) assert np.all(x_guess_w <= upper) x_guess_w = S.warp(x_guess2) assert type(x_guess_w) == np.ndarray assert x_guess_w.dtype.kind == "f" assert x_guess_w.shape == (n_suggest, D) assert x_guess_w.shape == (n_suggest, D) assert np.all(x_guess_w <= upper)
def get_func_signature(f, api_config): """Get the function signature for an objective function in an experiment. Parameters ---------- f : typing.Callable The objective function we want to compute the signature of. This function must take inputs in the form of ``dict(str, object)`` with one dictionary key per variable, and provide `float` as the output. api_config : dict(str, dict) Configuration of the optimization variables. See API description. Returns ------- signature_x : list(dict(str, object)) of shape (n_suggest,) The input locations probed on signature call. signature_y : list(float) of shape (n_suggest,) The objective function values at the inputs points. This is the real signature. """ # Make sure get same sequence on every call to be a signature random = np.random.RandomState(0) signature_x = rs.suggest_dict([], [], api_config, n_suggestions=N_SUGGESTIONS, random=random) # For now, we only take the first output as the signature. We can generalize this later. signature_y = [f(xx)[0] for xx in signature_x] assert np.all(np.isfinite( signature_y)), "non-finite values found in signature for function" return signature_x, signature_y
def suggest(self, n_suggestions=1): if self.flaky: assert self.random.rand() <= 0.5 x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random) return x_guess
def _random_suggestion_rs(self, n_suggestions): return rs.suggest_dict( [], [], self.api_config, n_suggestions=n_suggestions, random=self._random_state, )
def test_random_search_suggest_sanity(api_args, n_suggest, seed): meta, X, y, _ = api_args # Get the unwarped X S = sp.JointSpace(meta) lower, upper = S.get_bounds().T S.validate(X) N = len(X) # Split history and call twice with diff histories but same seed M = N // 2 X1, X2 = X[:M], X[M:] y1, y2 = y[:M], y[M:] x_guess = suggest_dict(X1, y1, meta, n_suggest, random=np.random.RandomState(seed)) x_guess2 = suggest_dict(X2, y2, meta, n_suggest, random=np.random.RandomState(seed)) # Check types too assert len(x_guess) == n_suggest assert all( all( close_enough(x_guess[nn][k], x_guess2[nn][k]) for k in x_guess[nn]) for nn in range(len(x_guess))) assert np.all(x_guess == x_guess2) # Make sure validated S.validate(x_guess) S.validate(x_guess2) # Test sanity of output D, = lower.shape x_guess_w = S.warp(x_guess) assert type(x_guess_w) == np.ndarray assert x_guess_w.dtype.kind == "f" assert x_guess_w.shape == (n_suggest, D) assert x_guess_w.shape == (n_suggest, D) assert np.all(x_guess_w <= upper)
def test_sklearn_model(model, dataset, metric, shuffle_seed, rs_seed): prob_type = data.get_problem_type(dataset) assume(metric in data.METRICS_LOOKUP[prob_type]) test_prob = skf.SklearnModel(model, dataset, metric, shuffle_seed=shuffle_seed) api_config = test_prob.get_api_config() x_guess, = suggest_dict([], [], api_config, n_suggestions=1, random=np.random.RandomState(rs_seed)) loss = test_prob.evaluate(x_guess) assert np.isscalar(loss)
def suggest(self, n_suggestions=1): """Get suggestion. Parameters ---------- n_suggestions : int Desired number of parallel suggestions in the output Returns ------- next_guess : list of dict List of `n_suggestions` suggestions to evaluate the objective function. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. """ x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random) return x_guess
def suggest(self, n_suggestions=1): x_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=n_suggestions, random=self.random) ii = self.random.randint(0, n_suggestions) pp = self.random.choice(self.param_list) if self.api_config[pp]["type"] == "real": eps = self.random.rand() else: eps = self.random.randint(1, 10) if self.random.rand() <= 0.5: x_guess[ii][pp] = self.api_config[pp]["range"][0] - eps else: x_guess[ii][pp] = self.api_config[pp]["range"][1] + eps return x_guess
def test_sklearn_model(model, dataset, metric, shuffle_seed, rs_seed): prob_type = data.get_problem_type(dataset) assume(metric in data.METRICS_LOOKUP[prob_type]) test_prob = skf.SklearnModel(model, dataset, metric, shuffle_seed=shuffle_seed) api_config = test_prob.get_api_config() x_guess, = suggest_dict([], [], api_config, n_suggestions=1, random=np.random.RandomState(rs_seed)) loss = test_prob.evaluate(x_guess) assert isinstance(loss, tuple) assert all(isinstance(xx, float) for xx in loss) assert np.shape(loss) == np.shape(test_prob.objective_names)
def test_sklearn_model_surr(model, dataset, metric, model_seed, rs_seed): prob_type = data.get_problem_type(dataset) assume(metric in data.METRICS_LOOKUP[prob_type]) test_prob = skf.SklearnModel(model, dataset, metric, shuffle_seed=0) api_config = test_prob.get_api_config() space = JointSpace(api_config) n_obj = len(test_prob.objective_names) n_suggestions = 20 x_guess = suggest_dict([], [], api_config, n_suggestions=n_suggestions, random=np.random.RandomState(rs_seed)) x_guess_w = space.warp(x_guess) random = np.random.RandomState(model_seed) y = random.randn(n_suggestions, n_obj) reg = LinearRegression() reg.fit(x_guess_w, y) loss0 = reg.predict(x_guess_w) path = pkl.dumps(reg) del reg assert isinstance(path, bytes) test_prob_surr = skf.SklearnSurrogate(model, dataset, metric, path) loss = test_prob_surr.evaluate(x_guess[0]) assert isinstance(loss, tuple) assert all(isinstance(xx, float) for xx in loss) assert np.shape(loss) == np.shape(test_prob.objective_names) assert np.allclose(loss0[0], np.array(loss))
def suggest(self, n_suggestions=1): """Make `n_suggestions` suggestions for what to evaluate next. This requires the user observe all previous suggestions before calling again. Parameters ---------- n_suggestions : int The number of suggestions to return. Returns ------- next_guess : list of dict List of `n_suggestions` suggestions to evaluate the objective function. Each suggestion is a dictionary where each key corresponds to a parameter being optimized. """ # clear candidates before a new suggestion self.stored_candidates.clear() # wait until Nomad gives candidates while self.inputs_queue.empty(): continue # collect candidates candidates = self.inputs_queue.get() assert len(candidates) >= 1, "No candidates given: error !" assert len( candidates ) <= 8, "Too many candidates, n_suggestions must not be superior to 8 !" # put them in the framework model param_list = sorted(self.api_config.keys()) next_guess = list() for candidate in candidates: guess = dict() for (param_name, val) in zip(param_list, candidate): param_config = self.api_config[param_name] param_type = param_config["type"] param_space = param_config.get("space", None) param_range = param_config.get("range", None) param_values = param_config.get("values", None) if param_type == "int": if param_space in ("log", "logit"): guess[param_name] = np.round(np.exp(val)) else: guess[param_name] = val elif param_type == "bool": guess[param_name] = val elif param_type in ("cat", "ordinal"): guess[param_name] = param_values[val] elif param_type == "real": if param_space in ("log", "logit"): guess[param_name] = np.exp(val) else: guess[param_name] = val # # make correspondance between periodic variables and categorical # if param_type in ("cat", "ordinal"): # guess[param_name] = param_values[val] # else: # guess[param_name] = val # round problematic variables for param_name, round_f in self.round_to_values.items(): guess[param_name] = round_f(guess[param_name]) # Also ensure this is correct dtype so sklearn is happy (according to hyperopt) guess = { k: DTYPE_MAP[self.api_config[k]["type"]](guess[k]) for k in guess } next_guess.append(guess) self.stored_candidates.append(guess) # complete task self.inputs_queue.task_done() # sometimes, the block is not filled: we have to complete it # In this case, via random points if 8 - len(candidates) > 0: random_guess = rs.suggest_dict([], [], self.api_config, n_suggestions=8 - len(candidates), random=self.random) for guess in random_guess: next_guess.append(guess) return next_guess
def run_study(optimizer, test_problem, n_calls, n_suggestions, n_obj=1, callback=None): """Run a study for a single optimizer on a single test problem. This function can be used for benchmarking on general stateless objectives (not just `sklearn`). Parameters ---------- optimizer : :class:`.abstract_optimizer.AbstractOptimizer` Instance of one of the wrapper optimizers. test_problem : :class:`.sklearn_funcs.TestFunction` Instance of test function to attempt to minimize. n_calls : int How many iterations of minimization to run. n_suggestions : int How many parallel evaluation we run each iteration. Must be ``>= 1``. n_obj : int Number of different objectives measured, only objective 0 is seen by optimizer. Must be ``>= 1``. callback : callable Optional callback taking the current best function evaluation, and the number of iterations finished. Takes array of shape `(n_obj,)`. Returns ------- function_evals : :class:`numpy:numpy.ndarray` of shape (n_calls, n_suggestions, n_obj) Value of objective for each evaluation. timing_evals : (:class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`) Tuple of 3 timing results: ``(suggest_time, eval_time, observe_time)`` with shapes ``(n_calls,)``, ``(n_calls, n_suggestions)``, and ``(n_calls,)``. These are the time to make each suggestion, the time for each evaluation of the objective function, and the time to make an observe call. suggest_log : list(list(dict(str, object))) Log of the suggestions corresponding to the `function_evals`. """ assert n_suggestions >= 1, "batch size must be at least 1" assert n_obj >= 1, "Must be at least one objective" space_for_validate = JointSpace(test_problem.get_api_config()) if callback is not None: # First do initial log at inf score, in case we don't even get to first eval before crash/job timeout callback(np.full((n_obj, ), np.inf, dtype=float), 0) suggest_time = np.zeros(n_calls) observe_time = np.zeros(n_calls) eval_time = np.zeros((n_calls, n_suggestions)) function_evals = np.zeros((n_calls, n_suggestions, n_obj)) suggest_log = [None] * n_calls for ii in range(n_calls): tt = time() try: next_points = optimizer.suggest(n_suggestions) except Exception as e: logger.warning( "Failure in optimizer suggest. Falling back to random search.") logger.exception(e, exc_info=True) print(json.dumps({"optimizer_suggest_exception": {ITER: ii}})) api_config = test_problem.get_api_config() next_points = rs.suggest_dict([], [], api_config, n_suggestions=n_suggestions) suggest_time[ii] = time() - tt logger.info("suggestion time taken %f iter %d next_points %s" % (suggest_time[ii], ii, str(next_points))) assert len( next_points ) == n_suggestions, "invalid number of suggestions provided by the optimizer" # We could put this inside the TestProblem class, but ok here for now. try: space_for_validate.validate( next_points) # Fails if suggestions outside allowed range except Exception: raise ValueError("Optimizer suggestion is out of range.") for jj, next_point in enumerate(next_points): tt = time() try: f_current_eval = test_problem.evaluate(next_point) except Exception as e: logger.warning("Failure in function eval. Setting to inf.") logger.exception(e, exc_info=True) f_current_eval = np.full((n_obj, ), np.inf, dtype=float) eval_time[ii, jj] = time() - tt assert np.shape(f_current_eval) == (n_obj, ) suggest_log[ii] = next_points function_evals[ii, jj, :] = f_current_eval logger.info( "function_evaluation time %f value %f suggestion %s" % (eval_time[ii, jj], f_current_eval[0], str(next_point))) # Note: this could be inf in the event of a crash in f evaluation, the optimizer must be able to handle that. # Only objective 0 is seen by optimizer. eval_list = function_evals[ii, :, 0].tolist() if callback is not None: idx_ii, idx_jj = argmin_2d(function_evals[:ii + 1, :, 0]) callback(function_evals[idx_ii, idx_jj, :], ii + 1) tt = time() try: optimizer.observe(next_points, eval_list) except Exception as e: logger.warning( "Failure in optimizer observe. Ignoring these observations.") logger.exception(e, exc_info=True) print(json.dumps({"optimizer_observe_exception": {ITER: ii}})) observe_time[ii] = time() - tt logger.info( "observation time %f, current best %f at iter %d" % (observe_time[ii], np.min(function_evals[:ii + 1, :, 0]), ii)) return function_evals, (suggest_time, eval_time, observe_time), suggest_log
def get_guess(): x = rs.suggest_dict([], [], self.api_config, n_suggestions=N_SUGGESTIONS, random=self.random) _p = {k: [dic[k] for dic in x] for k in x[0]} y = self.cost_function(**_p) return [(_x, _y) for _x, _y in zip(x, y)]
def run_study(optimizer, test_problem, n_calls, n_suggestions): """Run a study for a single optimizer on a single test problem. This function can be used for benchmarking on general stateless objectives (not just `sklearn`). Parameters ---------- optimizer : :class:`.abstract_optimizer.AbstractOptimizer` Instance of one of the wrapper optimizers. test_problem : :class:`.sklearn_funcs.TestFunction` Instance of test function to attempt to minimize. n_calls : int How many iterations of minimization to run. n_suggestions : int How many parallel evaluation we run each iteration. Must be ``>= 1``. Returns ------- function_evals : :class:`numpy:numpy.ndarray` of shape (n_calls, n_suggestions) Value of objective for each evaluation. timing_evals : (:class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`, :class:`numpy:numpy.ndarray`) Tuple of 3 timing results: ``(suggest_time, eval_time, observe_time)`` with shapes ``(n_calls,)``, ``(n_calls, n_suggestions)``, and ``(n_calls,)``. These are the time to make each suggestion, the time for each evaluation of the objective function, and the time to make an observe call. """ assert n_suggestions >= 1, "batch size must be at least 1" suggest_time = np.zeros(n_calls) observe_time = np.zeros(n_calls) eval_time = np.zeros((n_calls, n_suggestions)) function_evals = np.zeros((n_calls, n_suggestions)) for ii in range(n_calls): tt = time() try: next_points = optimizer.suggest(n_suggestions) except Exception as e: logger.warning( "Failure in optimizer suggest. Falling back to random search.") logger.exception(e, exc_info=True) api_config = test_problem.get_api_config() next_points = rs.suggest_dict([], [], api_config, n_suggestions=n_suggestions) suggest_time[ii] = time() - tt logger.info("suggestion time taken %f iter %d next_points %s" % (suggest_time[ii], ii, str(next_points))) assert len( next_points ) == n_suggestions, "invalid number of suggestions provided by the optimizer" for jj, next_point in enumerate(next_points): tt = time() try: f_current_eval = test_problem.evaluate(next_point) except Exception as e: logger.warning("Failure in function eval. Setting to inf.") logger.exception(e, exc_info=True) f_current_eval = np.inf # or maybe nan better eval_time[ii, jj] = time() - tt function_evals[ii, jj] = f_current_eval logger.info("function_evaluation time %f value %f suggestion %s" % (eval_time[ii, jj], f_current_eval, str(next_point))) # Note: this could be inf in the event of a crash in f evaluation, the # optimizer must be able to handle that. eval_list = function_evals[ii, :].tolist() tt = time() try: optimizer.observe(next_points, eval_list) except Exception as e: logger.warning( "Failure in optimizer observe. Ignoring these observations.") logger.exception(e, exc_info=True) observe_time[ii] = time() - tt logger.info("observation time %f, current best %f at iter %d" % (observe_time[ii], np.min(function_evals[:ii + 1]), ii)) return function_evals, (suggest_time, eval_time, observe_time)