def _tree_minimize(func, dimensions, base_estimator, n_calls, n_points, n_random_starts, random_state=None): rng = check_random_state(random_state) space = Space(dimensions) # Initialize with random points if n_random_starts <= 0: raise ValueError( "Expected n_random_starts > 0, got %d" % n_random_starts) if n_calls <= 0: raise ValueError( "Expected n_calls > 0, got %d" % n_random_starts) if n_calls < n_random_starts: raise ValueError( "Expected n_calls >= %d, got %d" % (n_random_starts, n_calls)) Xi = space.rvs(n_samples=n_random_starts, random_state=rng) yi = [func(x) for x in Xi] if np.ndim(yi) != 1: raise ValueError( "The function to be optimized should return a scalar") # Tree-based optimization loop models = [] n_model_iter = n_calls - n_random_starts for i in range(n_model_iter): rgr = clone(base_estimator) rgr.fit(space.transform(Xi), yi) models.append(rgr) # `rgr` predicts constants for each leaf which means that the EI # has zero gradient over large distances. As a result we can not # use gradient based optimizers like BFGS, so using random sampling # for the moment. X = space.transform(space.rvs(n_samples=n_points, random_state=rng)) values = -gaussian_ei(X, rgr, np.min(yi)) next_x = X[np.argmin(values)] next_x = space.inverse_transform(next_x.reshape((1, -1)))[0] next_y = func(next_x) Xi = np.vstack((Xi, next_x)) yi.append(next_y) res = OptimizeResult() best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = np.array(yi) res.x_iters = Xi res.models = models res.space = space return res
def create_result(Xi, yi, space=None, rng=None, specs=None, models=None, model_mu=None, model_std=None, gurobi_mipgap=None): """ Initialize an `OptimizeResult` object. Parameters ---------- Xi : list of lists, shape (n_iters, n_features) Location of the minimum at every iteration. yi : array-like, shape (n_iters,) Minimum value obtained at every iteration. space : Space instance, optional Search space. rng : RandomState instance, optional State of the random state. specs : dict, optional Call specifications. models : list, optional List of fit surrogate models. Returns ------- res : `OptimizeResult`, scipy object OptimizeResult instance with the required information. """ res = OptimizeResult() yi = np.asarray(yi) if np.ndim(yi) == 2: res.log_time = np.ravel(yi[:, 1]) yi = np.ravel(yi[:, 0]) best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = yi res.x_iters = Xi res.models = models res.model_mu = model_mu res.model_std = model_std res.gurobi_mipgap = gurobi_mipgap res.space = space res.random_state = rng res.specs = specs return res
def dummy_minimize(func, bounds, maxiter=1000, random_state=None): """Random search by uniform sampling within the given bounds. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `bounds` [array-like, shape=(n_parameters, 2)]: - ``bounds[i][0]`` should give the lower bound of each parameter and - ``bounds[i][1]`` should give the upper bound of each parameter. * `maxiter` [int, default=1000]: Number of iterations to find the minimum. In other words, the number of function evaluations. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [float]: location of the minimum. - `fun` [float]: function value at the minimum. - `x_iters` [array]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ rng = check_random_state(random_state) n_params = len(bounds) lb, ub = extract_bounds(bounds) X = lb + (ub - lb) * rng.rand(maxiter, n_params) init_y = func(X[0]) if not np.isscalar(init_y): raise ValueError("The function to be optimized should return a scalar") y = np.asarray([init_y] + [func(X[i]) for i in range(maxiter - 1)]) res = OptimizeResult() best = np.argmin(y) res.x = X[best] res.fun = y[best] res.func_vals = y res.x_iters = X return res
def df2result(df, metric_col, param_cols, param_types=None): """Converts dataframe with metrics and hyperparameters to the OptimizeResults format. It is a helper function that lets you use all the tools that expect OptimizeResult object like for example scikit-optimize plot_evaluations function. Args: df(`pandas.DataFrame`): Dataframe containing metric and hyperparameters. metric_col(str): Name of the metric column. param_cols(list): Names of the hyperparameter columns. param_types(list or None): Optional list of hyperparameter column types. By default it will treat all the columns as float but you can also pass str for categorical channels. E.g param_types=[float, str, float, float] Returns: `scipy.optimize.OptimizeResult`: Results object that contains the hyperparameter and metric information. Examples: Instantiate a session:: from neptune.sessions import Session session = Session() Fetch a project and a list of experiments:: project = session.get_projects('neptune-ai')['neptune-ai/Home-Credit-Default-Risk'] leaderboard = project.get_leaderboard(state=['succeeded'], owner=['czakon']) Comvert the leaderboard dataframe to the `ResultOptimize` instance taking only the parameters and metric that you care about:: result = df2result(leaderboard, metric_col='channel_ROC_AUC', param_cols=['parameter_lgbm__max_depth', 'parameter_lgbm__num_leaves', 'parameter_lgbm__min_child_samples']) """ if not param_types: param_types = [float for _ in param_cols] df = _prep_df(df, param_cols, param_types) param_space = _convert_to_param_space(df, param_cols, param_types) results = OptimizeResult() results.x_iters = df[param_cols].values results.func_vals = df[metric_col].to_list() results.x = results.x_iters[np.argmin(results.func_vals)] results.fun = np.min(results.func_vals) results.space = param_space return results
def hyperopt2skopt(trials, space): """Converts hyperopt trials to scipy OptimizeResult. Helper function that converts the hyperopt Trials instance into scipy OptimizeResult format. Args: trials(`hyperopt.base.Trials`): hyperopt trials object which stores training information from the fmin() optimization function. space(`collections.OrderedDict`): Hyper parameter space over which hyperopt will search. It is important to have this as OrderedDict rather than a simple dictionary because otherwise the parameter names will be shuffled. Returns: `scipy.optimize.optimize.OptimizeResult`: Converted OptimizeResult. Examples: Prepare the space of hyperparameters to search over:: from hyperopt import hp, tpe, fmin, Trials space = OrderedDict(num_leaves=hp.choice('num_leaves', range(10, 60, 1)), max_depth=hp.choice('max_depth', range(2, 30, 1)), feature_fraction=hp.uniform('feature_fraction', 0.1, 0.9) ) Create an objective and run your hyperopt training:: trials = Trials() _ = fmin(objective, space, trials=trials, algo=tpe.suggest, max_evals=100) Convert trials object to the OptimizeResult object:: import neptunecontrib.hpo.utils as hp_utils results = hp_utils.hyperopt2skopt(trials, space) """ param_names = list(space.keys()) skopt_space = _convert_space_hop_skopt(space) results_ = {} for trial in trials.trials: trial_params = [trial['misc']['vals'][name][0] for name in param_names] results_.setdefault('x_iters', []).append(trial_params) results_.setdefault('func_vals', []).append(trial['result']['loss']) optimize_results = OptimizeResult() optimize_results.x = [trials.argmin[name] for name in param_names] optimize_results.x_iters = results_['x_iters'] optimize_results.fun = trials.best_trial['result']['loss'] optimize_results.func_vals = results_['func_vals'] optimize_results.space = skopt_space return optimize_results
def df2result(df, metric_col, param_cols, param_types=None): """Converts dataframe with metrics and hyperparameters to the OptimizeResults format.""" if not param_types: param_types = [float for _ in param_cols] df = _prep_df(df, param_cols, param_types) param_space = _convert_to_param_space(df, param_cols, param_types) results = OptimizeResult() results.x_iters = df[param_cols].values results.func_vals = df[metric_col].to_list() results.x = results.x_iters[np.argmin(results.func_vals)] results.fun = np.min(results.func_vals) results.space = param_space return results
def create_result(Xi, yi, space=None, rng=None, specs=None, models=None): """ Initialize an `OptimizeResult` object. Parameters ---------- * `Xi` [list of lists, shape=(n_iters, n_features)]: Location of the minimum at every iteration. * `yi` [array-like, shape=(n_iters,)]: Minimum value obtained at every iteration. * `space` [Space instance, optional]: Search space. * `rng` [RandomState instance, optional]: State of the random state. * `specs` [dict, optional]: Call specifications. * `models` [list, optional]: List of fit surrogate models. Returns ------- * `res` [`OptimizeResult`, scipy object]: OptimizeResult instance with the required information. """ res = OptimizeResult() yi = np.asarray(yi) if np.ndim(yi) == 2: res.log_time = np.ravel(yi[:, 1]) yi = np.ravel(yi[:, 0]) best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = yi res.x_iters = Xi res.models = models res.space = space res.random_state = rng res.specs = specs return res
def create_result(xi, yi, space=None, rs=None, specs=None, models=None): res = OptimizeResult() yi = np.asarray(yi) if np.ndim(yi) == 2: res.log_time = np.ravel(yi[:, 1]) yi = np.ravel(yi[:, 0]) best = np.argmin(yi) res.x = xi[best] res.fun = yi[best] res.func_vals = yi res.x_iters = xi res.models = models res.space = space res.random_state = rs res.specs = specs return res
def dummy_minimize(func, dimensions, n_calls=100, random_state=None): """Random search by uniform sampling within the given bounds. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `dimensions` [list, shape=(n_dims,)]: List of search space dimensions. Each search dimension can be defined either as - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer` dimensions), - a `(upper_bound, lower_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). * `n_calls` [int, default=100]: Number of calls to `func` to find the minimum. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [float]: location of the minimum. - `fun` [float]: function value at the minimum. - `x_iters` [array]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. - `space` [Space]: the optimisation space. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ rng = check_random_state(random_state) space = Space(dimensions) X = space.rvs(n_samples=n_calls, random_state=rng) init_y = func(X[0]) if not np.isscalar(init_y): raise ValueError( "The function to be optimized should return a scalar") y = np.asarray([init_y] + [func(X[i]) for i in range(1, n_calls)]) res = OptimizeResult() best = np.argmin(y) res.x = X[best] res.fun = y[best] res.func_vals = y res.x_iters = X res.space = space return res
def gp_minimize(func, dimensions, base_estimator=None, alpha=10e-10, acq="EI", xi=0.01, kappa=1.96, search="auto", n_calls=100, n_points=500, n_random_starts=10, n_restarts_optimizer=5, x0=None, y0=None, random_state=None): """Bayesian optimization using Gaussian Processes. If every function evaluation is expensive, for instance when the parameters are the hyperparameters of a neural network and the function evaluation is the mean cross-validation score across ten folds, optimizing the hyperparameters by standard optimization routines would take for ever! The idea is to approximate the function using a Gaussian process. In other words the function values are assumed to follow a multivariate gaussian. The covariance of the function values are given by a GP kernel between the parameters. Then a smart choice to choose the next parameter to evaluate can be made by the acquisition function over the Gaussian prior which is much quicker to evaluate. The total number of evaluations, `n_calls`, are performed like the following. If `x0` is provided but not `y0`, then the elements of `x0` are first evaluated, followed by `n_random_starts` evaluations. Finally, `n_calls - len(x0) - n_random_starts` evaluations are made guided by the surrogate model. If `x0` and `y0` are both provided then `n_random_starts` evaluations are first made then `n_calls - n_random_starts` subsequent evaluations are made guided by the surrogate model. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `dimensions` [list, shape=(n_dims,)]: List of search space dimensions. Each search dimension can be defined either as - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer` dimensions), - a `(upper_bound, lower_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). * `base_estimator` [a Gaussian process estimator]: The Gaussian process estimator to use for optimization. * `alpha` [float, default=1e-10]: Value added to the diagonal of the kernel matrix during fitting. Larger values correspond to an increased noise level in the observations and reduce potential numerical issues during fitting. * `acq` [string, default=`"EI"`]: Function to minimize over the gaussian prior. Can be either - `"LCB"` for lower confidence bound, - `"EI"` for expected improvement, - `"PI"` for probability of improvement. * `xi` [float, default=0.01]: Controls how much improvement one wants over the previous best values. Used when the acquisition is either `"EI"` or `"PI"`. * `kappa` [float, default=1.96]: Controls how much of the variance in the predicted values should be taken into account. If set to be very high, then we are favouring exploration over exploitation and vice versa. Used when the acquisition is `"LCB"`. * `search` [string, `"auto"`, `"sampling"` or `"lbfgs"`, default=`"auto"`]: Searching for the next possible candidate to update the Gaussian prior with. If search is set to `"auto"`, then it is set to `"lbfgs"`` if all the search dimensions are Real(continuous). It defaults to `"sampling"` for all other cases. If search is set to `"sampling"`, `n_points` are sampled randomly and the Gaussian Process prior is updated with the point that gives the best acquisition value over the Gaussian prior. If search is set to `"lbfgs"`, then a point is sampled randomly, and lbfgs is run for 10 iterations optimizing the acquisition function over the Gaussian prior. * `n_calls` [int, default=100]: Number of calls to `func`. * `n_points` [int, default=500]: Number of points to sample to determine the next "best" point. Useless if search is set to `"lbfgs"`. * `n_random_starts` [int, default=10]: Number of evaluations of `func` with random initialization points before approximating the `func` with `base_estimator`. * `n_restarts_optimizer` [int, default=10]: The number of restarts of the optimizer when `search` is `"lbfgs"`. * `x0` [list, list of lists or `None`]: Initial input points. - If it is a list of lists, use it as a list of input points. - If it is a list, use it as a single initial input point. - If it is `None`, no initial input points are used. * `y0` [list, scalar or `None`] Evaluation of initial input points. - If it is a list, then it corresponds to evaluations of the function at each element of `x0` : the i-th element of `y0` corresponds to the function evaluated at the i-th element of `x0`. - If it is a scalar, then it corresponds to the evaluation of the function at `x0`. - If it is None and `x0` is provided, then the function is evaluated at each element of `x0`. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [list]: location of the minimum. - `fun` [float]: function value at the minimum. - `models`: surrogate models used for each iteration. - `x_iters` [list of lists]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. - `space` [Space]: the optimization space. - `specs` [dict]`: the call specifications. - `rng` [RandomState instance]: State of the random state at the end of minimization. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ # Save call args specs = { "args": copy.copy(inspect.currentframe().f_locals), "function": inspect.currentframe().f_code.co_name } # Check params rng = check_random_state(random_state) space = Space(dimensions) # Default GP if base_estimator is None: base_estimator = GaussianProcessRegressor( kernel=(ConstantKernel(1.0, (0.01, 1000.0)) * Matern( length_scale=np.ones(space.transformed_n_dims), length_scale_bounds=[(0.01, 100)] * space.transformed_n_dims, nu=2.5)), normalize_y=True, alpha=alpha, random_state=random_state) # Initialize with provided points (x0 and y0) and/or random points if x0 is None: x0 = [] elif not isinstance(x0[0], list): x0 = [x0] if not isinstance(x0, list): raise ValueError("`x0` should be a list, but got %s" % type(x0)) n_init_func_calls = len(x0) if y0 is not None else 0 n_total_init_calls = n_random_starts + n_init_func_calls if n_total_init_calls <= 0: # if x0 is not provided and n_random_starts is 0 then # it will ask for n_random_starts to be > 0. raise ValueError("Expected `n_random_starts` > 0, got %d" % n_random_starts) if n_calls < n_total_init_calls: raise ValueError("Expected `n_calls` >= %d, got %d" % (n_total_init_calls, n_calls)) if y0 is None and x0: y0 = [func(x) for x in x0] elif x0: if isinstance(y0, Iterable): y0 = list(y0) elif isinstance(y0, numbers.Number): y0 = [y0] else: raise ValueError("`y0` should be an iterable or a scalar, got %s" % type(y0)) if len(x0) != len(y0): raise ValueError("`x0` and `y0` should have the same length") if not all(map(np.isscalar, y0)): raise ValueError("`y0` elements should be scalars") else: y0 = [] Xi = x0 + space.rvs(n_samples=n_random_starts, random_state=rng) yi = y0 + [func(x) for x in Xi[len(x0):]] if np.ndim(yi) != 1: raise ValueError("`func` should return a scalar") if search == "auto": if space.is_real: search = "lbfgs" else: search = "sampling" elif search not in ["lbfgs", "sampling"]: raise ValueError( "Expected search to be 'lbfgs', 'sampling' or 'auto', " "got %s" % search) # Bayesian optimization loop models = [] n_model_iter = n_calls - n_total_init_calls for i in range(n_model_iter): gp = clone(base_estimator) with warnings.catch_warnings(): warnings.simplefilter("ignore") gp.fit(space.transform(Xi), yi) models.append(gp) if search == "sampling": X = space.transform(space.rvs(n_samples=n_points, random_state=rng)) values = _gaussian_acquisition(X=X, model=gp, y_opt=np.min(yi), method=acq, xi=xi, kappa=kappa) next_x = X[np.argmin(values)] elif search == "lbfgs": best = np.inf for j in range(n_restarts_optimizer): x0 = space.transform(space.rvs(n_samples=1, random_state=rng))[0] with warnings.catch_warnings(): warnings.simplefilter("ignore") x, a, _ = fmin_l_bfgs_b(_acquisition, x0, args=(gp, np.min(yi), acq, xi, kappa), bounds=space.transformed_bounds, approx_grad=True, maxiter=20) if a < best: next_x, best = x, a next_x = space.inverse_transform(next_x.reshape((1, -1)))[0] next_y = func(next_x) Xi.append(next_x) yi.append(next_y) # Pack results res = OptimizeResult() best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = np.array(yi) res.x_iters = Xi res.models = models res.space = space res.random_state = rng res.specs = specs return res
def gbrt_minimize(func, bounds, base_estimator=None, maxiter=100, n_points=20, n_start=10, random_state=None): """Sequential optimisation using gradient boosted trees. Gradient boosted regression trees are used to model the (very) expensive to evaluate function `func`. The model is improved by sequentially evaluating the expensive function at the next best point. Thereby finding the minimum of `func` with as few evaluations as possible. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `bounds` [array-like, shape=(n_parameters, 2)]: - ``bounds[i][0]`` should give the lower bound of each parameter and - ``bounds[i][1]`` should give the upper bound of each parameter. * `base_estimator` [`GradientBoostingQuantileRegressor`]: The regressor to use as surrogate model * `maxiter` [int, default=100]: Number of iterations used to find the minimum. This corresponds to the total number of evaluations of `func`. If `n_start` > 0 only `maxiter - n_start` iterations are used. * `n_start` [int, default=10]: Number of random points to draw before fitting `base_estimator` for the first time. If `n_start > maxiter` this degrades to a random search for the minimum. * `n_points` [int, default=20]: Number of points to sample when minimizing the acquisition function. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [float]: location of the minimum. - `fun` [float]: function value at the minimum. - `models`: surrogate models used for each iteration. - `x_iters` [array]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ rng = check_random_state(random_state) # Bounds num_params = len(bounds) lower_bounds, upper_bounds = extract_bounds(bounds) # Default estimator if base_estimator is None: base_estimator = GradientBoostingQuantileRegressor(random_state=rng) # Record the points and function values evaluated as part of # the minimization Xi = np.zeros((maxiter, num_params)) yi = np.zeros(maxiter) # Initialize with random points if n_start == 0: raise ValueError("Need at least one starting point.") if maxiter == 0: raise ValueError("Need to perform at least one iteration.") n_start = min(n_start, maxiter) Xi[:n_start] = _random_points(lower_bounds, upper_bounds, n_points=n_start, random_state=rng) best_x = Xi[:n_start].ravel() yi[:n_start] = [func(xi) for xi in Xi[:n_start]] best_y = np.min(yi[:n_start]) models = [] for i in range(n_start, maxiter): rgr = clone(base_estimator) # only the first i points are meaningful rgr.fit(Xi[:i, :], yi[:i]) models.append(rgr) # `rgr` predicts constants for each leaf which means that the EI # has zero gradient over large distances. As a result we can not # use gradient based optimisers like BFGS, use random sampling # for the moment. x0 = _random_points(lower_bounds, upper_bounds, n_points=n_points, random_state=rng) aq = _expected_improvement(x0, rgr, best_y) best = np.argmin(aq) Xi[i] = x0[best].ravel() yi[i] = func(x0[best]) if yi[i] < best_y: best_y = yi[i] best_x = Xi[i] res = OptimizeResult() res.x = best_x res.fun = best_y res.func_vals = yi res.x_iters = Xi res.models = models return res
def gp_minimize(func, dimensions, base_estimator=None, alpha=10e-10, acq="EI", xi=0.01, kappa=1.96, search="auto", n_calls=100, n_points=500, n_random_starts=10, n_restarts_optimizer=5, x0=None, y0=None, random_state=None): """Bayesian optimization using Gaussian Processes. If every function evaluation is expensive, for instance when the parameters are the hyperparameters of a neural network and the function evaluation is the mean cross-validation score across ten folds, optimizing the hyperparameters by standard optimization routines would take for ever! The idea is to approximate the function using a Gaussian process. In other words the function values are assumed to follow a multivariate gaussian. The covariance of the function values are given by a GP kernel between the parameters. Then a smart choice to choose the next parameter to evaluate can be made by the acquisition function over the Gaussian prior which is much quicker to evaluate. The total number of evaluations, `n_calls`, are performed like the following. If `x0` is provided but not `y0`, then the elements of `x0` are first evaluated, followed by `n_random_starts` evaluations. Finally, `n_calls - len(x0) - n_random_starts` evaluations are made guided by the surrogate model. If `x0` and `y0` are both provided then `n_random_starts` evaluations are first made then `n_calls - n_random_starts` subsequent evaluations are made guided by the surrogate model. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `dimensions` [list, shape=(n_dims,)]: List of search space dimensions. Each search dimension can be defined either as - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer` dimensions), - a `(upper_bound, lower_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). * `base_estimator` [a Gaussian process estimator]: The Gaussian process estimator to use for optimization. * `alpha` [float, default=1e-10]: Value added to the diagonal of the kernel matrix during fitting. Larger values correspond to an increased noise level in the observations and reduce potential numerical issues during fitting. * `acq` [string, default=`"EI"`]: Function to minimize over the gaussian prior. Can be either - `"LCB"` for lower confidence bound, - `"EI"` for expected improvement, - `"PI"` for probability of improvement. * `xi` [float, default=0.01]: Controls how much improvement one wants over the previous best values. Used when the acquisition is either `"EI"` or `"PI"`. * `kappa` [float, default=1.96]: Controls how much of the variance in the predicted values should be taken into account. If set to be very high, then we are favouring exploration over exploitation and vice versa. Used when the acquisition is `"LCB"`. * `search` [string, `"auto"`, `"sampling"` or `"lbfgs"`, default=`"auto"`]: Searching for the next possible candidate to update the Gaussian prior with. If search is set to `"auto"`, then it is set to `"lbfgs"`` if all the search dimensions are Real(continuous). It defaults to `"sampling"` for all other cases. If search is set to `"sampling"`, `n_points` are sampled randomly and the Gaussian Process prior is updated with the point that gives the best acquisition value over the Gaussian prior. If search is set to `"lbfgs"`, then a point is sampled randomly, and lbfgs is run for 10 iterations optimizing the acquisition function over the Gaussian prior. * `n_calls` [int, default=100]: Number of calls to `func`. * `n_points` [int, default=500]: Number of points to sample to determine the next "best" point. Useless if search is set to `"lbfgs"`. * `n_random_starts` [int, default=10]: Number of evaluations of `func` with random initialization points before approximating the `func` with `base_estimator`. * `n_restarts_optimizer` [int, default=10]: The number of restarts of the optimizer when `search` is `"lbfgs"`. * `x0` [list, list of lists or `None`]: Initial input points. - If it is a list of lists, use it as a list of input points. - If it is a list, use it as a single initial input point. - If it is `None`, no initial input points are used. * `y0` [list, scalar or `None`] Evaluation of initial input points. - If it is a list, then it corresponds to evaluations of the function at each element of `x0` : the i-th element of `y0` corresponds to the function evaluated at the i-th element of `x0`. - If it is a scalar, then it corresponds to the evaluation of the function at `x0`. - If it is None and `x0` is provided, then the function is evaluated at each element of `x0`. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [list]: location of the minimum. - `fun` [float]: function value at the minimum. - `models`: surrogate models used for each iteration. - `x_iters` [list of lists]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. - `space` [Space]: the optimization space. - `specs` [dict]`: the call specifications. - `rng` [RandomState instance]: State of the random state at the end of minimization. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ # Save call args specs = {"args": copy.copy(inspect.currentframe().f_locals), "function": inspect.currentframe().f_code.co_name} # Check params rng = check_random_state(random_state) space = Space(dimensions) # Default GP if base_estimator is None: base_estimator = GaussianProcessRegressor( kernel=(ConstantKernel(1.0, (0.01, 1000.0)) * Matern(length_scale=np.ones(space.transformed_n_dims), length_scale_bounds=[(0.01, 100)] * space.transformed_n_dims, nu=2.5)), normalize_y=True, alpha=alpha, random_state=random_state) # Initialize with provided points (x0 and y0) and/or random points if x0 is None: x0 = [] elif not isinstance(x0[0], list): x0 = [x0] if not isinstance(x0, list): raise ValueError("`x0` should be a list, but got %s" % type(x0)) n_init_func_calls = len(x0) if y0 is not None else 0 n_total_init_calls = n_random_starts + n_init_func_calls if n_total_init_calls <= 0: # if x0 is not provided and n_random_starts is 0 then # it will ask for n_random_starts to be > 0. raise ValueError( "Expected `n_random_starts` > 0, got %d" % n_random_starts) if n_calls < n_total_init_calls: raise ValueError( "Expected `n_calls` >= %d, got %d" % (n_total_init_calls, n_calls)) if y0 is None and x0: y0 = [func(x) for x in x0] elif x0: if isinstance(y0, Iterable): y0 = list(y0) elif isinstance(y0, numbers.Number): y0 = [y0] else: raise ValueError( "`y0` should be an iterable or a scalar, got %s" % type(y0)) if len(x0) != len(y0): raise ValueError("`x0` and `y0` should have the same length") if not all(map(np.isscalar, y0)): raise ValueError( "`y0` elements should be scalars") else: y0 = [] Xi = x0 + space.rvs(n_samples=n_random_starts, random_state=rng) yi = y0 + [func(x) for x in Xi[len(x0):]] if np.ndim(yi) != 1: raise ValueError("`func` should return a scalar") if search == "auto": if space.is_real: search = "lbfgs" else: search = "sampling" elif search not in ["lbfgs", "sampling"]: raise ValueError( "Expected search to be 'lbfgs', 'sampling' or 'auto', " "got %s" % search) # Bayesian optimization loop models = [] n_model_iter = n_calls - n_total_init_calls for i in range(n_model_iter): gp = clone(base_estimator) with warnings.catch_warnings(): warnings.simplefilter("ignore") gp.fit(space.transform(Xi), yi) models.append(gp) if search == "sampling": X = space.transform(space.rvs(n_samples=n_points, random_state=rng)) values = _gaussian_acquisition( X=X, model=gp, y_opt=np.min(yi), method=acq, xi=xi, kappa=kappa) next_x = X[np.argmin(values)] elif search == "lbfgs": best = np.inf for j in range(n_restarts_optimizer): x0 = space.transform(space.rvs(n_samples=1, random_state=rng))[0] with warnings.catch_warnings(): warnings.simplefilter("ignore") x, a, _ = fmin_l_bfgs_b( _acquisition, x0, args=(gp, np.min(yi), acq, xi, kappa), bounds=space.transformed_bounds, approx_grad=True, maxiter=20) if a < best: next_x, best = x, a next_x = space.inverse_transform(next_x.reshape((1, -1)))[0] next_y = func(next_x) Xi.append(next_x) yi.append(next_y) # Pack results res = OptimizeResult() best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = np.array(yi) res.x_iters = Xi res.models = models res.space = space res.random_state = rng res.specs = specs return res
def gp_minimize(func, bounds=None, search="sampling", random_state=None, maxiter=1000, acq="UCB", num_points=500): """ Black-box optimization using Gaussian Processes. If every function evaluation is expensive, for instance when the parameters are the hyperparameters of a neural network and the function evaluation is the mean cross-validation score across ten folds, optimizing the hyperparameters by standared optimization routines would take for ever! The idea is to approximate the function using a Gaussian process. In other words the function values are assumed to follow a multivariate gaussian. The covariance of the function values are given by a GP kernel between the parameters. Then a smart choice to choose the next parameter to evaluate can be made by the acquistion function over the Gaussian posterior which is much quicker to evaluate. Parameters ---------- func: callable Function to minimize. Should take a array of parameters and return the function value. bounds: array-like, shape (n_parameters, 2) ``bounds[i][0]`` should give the lower bound of each parameter and ``bounds[i][1]`` should give the upper bound of each parameter. search: string, "sampling" or "lbfgs" Searching for the next possible candidate to update the Gaussian prior with. If search is set to "sampling", ``num_points`` are sampled randomly and the Gaussian Process prior is updated with that point that gives the best acquision value over the Gaussian posterior. If search is set to "lbfgs", then a point is sampled randomly, and lbfgs is run for 10 iterations optimizing the acquistion function over the Gaussian posterior. random_state: int, RandomState instance, or None (default) Set random state to something other than None for reproducible results. maxiter: int, default 1000 Number of iterations to find the minimum. In other words, the number of function evaluations. acq: string, default "UCB" Function to minimize over the gaussian posterior. Can be either the "UCB" which refers to the UpperConfidenceBound or "EI" which is the Expected Improvement. num_points: int, default 500 Number of points to sample to determine the next "best" point. Useless if search is set to "lbfgs". Returns ------- res: OptimizeResult, scipy object The optimization result returned as a OptimizeResult object. Important attributes are ``x`` - float, the optimization solution, ``fun`` - float, the value of the function at the optimum, ``models``- gp_models[i]. the prior on the function fit at iteration[i]. ``func_vals`` - the function value at the ith iteration. ``x_iters`` - the value of ``x`` corresponding to the function value at the ith iteration. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ rng = np.random.RandomState(random_state) num_params = len(bounds) lower_bounds, upper_bounds = zip(*bounds) upper_bounds = np.asarray(upper_bounds) lower_bounds = np.asarray(lower_bounds) x0 = rng.rand(num_params) func_val = [func(lower_bounds + (upper_bounds - lower_bounds) * x0)] length_scale = np.ones(num_params) gp_params = { 'kernel': Matern(length_scale=length_scale, nu=2.5), 'normalize_y': True, 'random_state': random_state } lbfgs_bounds = np.tile((0, 1), (num_params, 1)) gp_models = [] x = np.reshape(x0, (1, -1)) for i in range(maxiter): gpr = GaussianProcessRegressor(**gp_params) gpr.fit(x, func_val) if search == "sampling": sampling = rng.rand(num_points, num_params) acquis = acquisition_func(sampling, gpr, np.min(func_val), acq) best_arg = np.argmin(acquis) best_x = sampling[best_arg] elif search == "lbfgs": init = rng.rand(num_params) best_x, _, _ = fmin_l_bfgs_b(acquisition_func, np.asfortranarray(init), args=(gpr, np.min(func_val), acq), bounds=lbfgs_bounds, approx_grad=True, maxiter=10) gp_models.append(gpr) best_f = func(lower_bounds + (upper_bounds - lower_bounds) * best_x) x_list = x.tolist() x_list.append(best_x) x = np.asarray(x_list) func_val.append(best_f) x = lower_bounds + (upper_bounds - lower_bounds) * x func_ind = np.argmin(func_val) x_val = x[func_ind] best_func_val = func_val[func_ind] res = OptimizeResult() res.models = gp_models res.x = x_val res.fun = best_func_val res.func_vals = func_val res.x_iters = x return res
def gp_minimize(func, dimensions, base_estimator=None, acq="LCB", xi=0.01, kappa=1.96, search="sampling", maxiter=1000, n_points=500, n_start=10, n_restarts_optimizer=5, random_state=None): """Bayesian optimization using Gaussian Processes. If every function evaluation is expensive, for instance when the parameters are the hyperparameters of a neural network and the function evaluation is the mean cross-validation score across ten folds, optimizing the hyperparameters by standared optimization routines would take for ever! The idea is to approximate the function using a Gaussian process. In other words the function values are assumed to follow a multivariate gaussian. The covariance of the function values are given by a GP kernel between the parameters. Then a smart choice to choose the next parameter to evaluate can be made by the acquisition function over the Gaussian prior which is much quicker to evaluate. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `dimensions` [list, shape=(n_dims,)]: List of search space dimensions. Each search dimension can be defined either as - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer` dimensions), - a `(upper_bound, lower_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). * `base_estimator` [a Gaussian process estimator]: The Gaussian process estimator to use for optimization. * `acq` [string, default=`"LCB"`]: Function to minimize over the gaussian prior. Can be either - `"LCB"` for lower confidence bound, - `"EI"` for expected improvement, - `"PI"` for probability of improvement. * `xi` [float, default=0.01]: Controls how much improvement one wants over the previous best values. Used when the acquisition is either `"EI"` or `"PI"`. * `kappa` [float, default=1.96]: Controls how much of the variance in the predicted values should be taken into account. If set to be very high, then we are favouring exploration over exploitation and vice versa. Used when the acquisition is `"LCB"`. * `search` [string, `"sampling"` or `"lbfgs"`]: Searching for the next possible candidate to update the Gaussian prior with. If search is set to `"sampling"`, `n_points` are sampled randomly and the Gaussian Process prior is updated with the point that gives the best acquisition value over the Gaussian prior. If search is set to `"lbfgs"`, then a point is sampled randomly, and lbfgs is run for 10 iterations optimizing the acquisition function over the Gaussian prior. * `maxiter` [int, default=1000]: Number of iterations to find the minimum. Note that `n_start` iterations are effectively discounted, such that total number of function evaluations is at most `maxiter`. * `n_points` [int, default=500]: Number of points to sample to determine the next "best" point. Useless if search is set to `"lbfgs"`. * `n_start` [int, default=10]: Number of random initialization points. * `n_restarts_optimizer` [int, default=10]: The number of restarts of the optimizer when `search` is `"lbfgs"`. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [float]: location of the minimum. - `fun` [float]: function value at the minimum. - `models`: surrogate models used for each iteration. - `x_iters` [array]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. - `space` [Space]: the optimisation space. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ rng = check_random_state(random_state) space = Space(dimensions) # Default GP if base_estimator is None: base_estimator = GaussianProcessRegressor( kernel=(ConstantKernel(1.0, (0.01, 1000.0)) * Matern(length_scale=np.ones(space.transformed_n_dims), length_scale_bounds=[(0.01, 100)] * space.transformed_n_dims, nu=2.5)), normalize_y=True, alpha=10e-6, random_state=random_state) # First points Xi = space.rvs(n_samples=n_start, random_state=rng) yi = [func(x) for x in Xi] if np.ndim(yi) != 1: raise ValueError( "The function to be optimized should return a scalar") # Bayesian optimization loop models = [] for i in range(maxiter - n_start): gp = clone(base_estimator) with warnings.catch_warnings(): warnings.simplefilter("ignore") gp.fit(space.transform(Xi), yi) models.append(gp) if search == "sampling": X = space.transform(space.rvs(n_samples=n_points, random_state=rng)) values = _gaussian_acquisition( X=X, model=gp, y_opt=np.min(yi), method=acq, xi=xi, kappa=kappa) next_x = X[np.argmin(values)] elif search == "lbfgs": best = np.inf for j in range(n_restarts_optimizer): x0 = space.transform(space.rvs(n_samples=1, random_state=rng))[0] with warnings.catch_warnings(): warnings.simplefilter("ignore") x, a, _ = fmin_l_bfgs_b( _acquisition, x0, args=(gp, np.min(yi), acq, xi, kappa), bounds=space.transformed_bounds, approx_grad=True, maxiter=10) if a < best: next_x, best = x, a next_x = space.inverse_transform(next_x.reshape((1, -1)))[0] next_y = func(next_x) Xi = np.vstack((Xi, next_x)) yi.append(next_y) # Pack results res = OptimizeResult() best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = np.array(yi) res.x_iters = Xi res.models = models res.space = space return res
def gp_minimize(func, bounds, base_estimator=None, acq="LCB", xi=0.01, kappa=1.96, search="sampling", maxiter=1000, n_points=500, n_start=10, n_restarts_optimizer=5, random_state=None): """Bayesian optimization using Gaussian Processes. If every function evaluation is expensive, for instance when the parameters are the hyperparameters of a neural network and the function evaluation is the mean cross-validation score across ten folds, optimizing the hyperparameters by standared optimization routines would take for ever! The idea is to approximate the function using a Gaussian process. In other words the function values are assumed to follow a multivariate gaussian. The covariance of the function values are given by a GP kernel between the parameters. Then a smart choice to choose the next parameter to evaluate can be made by the acquisition function over the Gaussian prior which is much quicker to evaluate. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `bounds` [array-like, shape=(n_parameters, 2)]: - ``bounds[i][0]`` should give the lower bound of each parameter and - ``bounds[i][1]`` should give the upper bound of each parameter. * `base_estimator` [a Gaussian process estimator]: The Gaussian process estimator to use for optimization. * `acq` [string, default=`"LCB"`]: Function to minimize over the gaussian prior. Can be either - `"LCB"` for lower confidence bound, - `"EI"` for expected improvement, - `"PI"` for probability of improvement. * `xi` [float, default=0.01]: Controls how much improvement one wants over the previous best values. Used when the acquisition is either `"EI"` or `"PI"`. * `kappa` [float, default=1.96]: Controls how much of the variance in the predicted values should be taken into account. If set to be very high, then we are favouring exploration over exploitation and vice versa. Used when the acquisition is `"LCB"`. * `search` [string, `"sampling"` or `"lbfgs"`]: Searching for the next possible candidate to update the Gaussian prior with. If search is set to `"sampling"`, `n_points` are sampled randomly and the Gaussian Process prior is updated with the point that gives the best acquisition value over the Gaussian prior. If search is set to `"lbfgs"`, then a point is sampled randomly, and lbfgs is run for 10 iterations optimizing the acquisition function over the Gaussian prior. * `maxiter` [int, default=1000]: Number of iterations to find the minimum. Note that `n_start` iterations are effectively discounted, such that total number of function evaluations is at most `maxiter`. * `n_points` [int, default=500]: Number of points to sample to determine the next "best" point. Useless if search is set to `"lbfgs"`. * `n_start` [int, default=10]: Number of random initialization points. * `n_restarts_optimizer` [int, default=10]: The number of restarts of the optimizer when `search` is `"lbfgs"`. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [float]: location of the minimum. - `fun` [float]: function value at the minimum. - `models`: surrogate models used for each iteration. - `x_iters` [array]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ rng = check_random_state(random_state) # Bounds n_params = len(bounds) lb, ub = extract_bounds(bounds) # Default GP if base_estimator is None: base_estimator = GaussianProcessRegressor( kernel=(ConstantKernel(1.0, (0.01, 1000.0)) * Matern(length_scale=np.ones(n_params), length_scale_bounds=[(0.01, 100)] * n_params, nu=2.5)), normalize_y=True, alpha=10e-6, random_state=random_state) # First points Xi = lb + (ub - lb) * rng.rand(n_start, n_params) yi = [func(x) for x in Xi] if np.ndim(yi) != 1: raise ValueError("The function to be optimized should return a scalar") # Bayesian optimization loop models = [] for i in range(maxiter - n_start): gp = clone(base_estimator) with warnings.catch_warnings(): warnings.simplefilter("ignore") gp.fit(Xi, yi) models.append(gp) if search == "sampling": X = lb + (ub - lb) * rng.rand(n_points, n_params) values = acquisition(X=X, model=gp, y_opt=np.min(yi), method=acq, xi=xi, kappa=kappa) next_x = X[np.argmin(values)] elif search == "lbfgs": best = np.inf for j in range(n_restarts_optimizer): x0 = lb + (ub - lb) * rng.rand(n_params) with warnings.catch_warnings(): warnings.simplefilter("ignore") x, a, _ = fmin_l_bfgs_b(_acquisition, x0, args=(gp, np.min(yi), acq, xi, kappa), bounds=bounds, approx_grad=True, maxiter=10) if a < best: next_x, best = x, a next_y = func(next_x) Xi = np.vstack((Xi, next_x)) yi.append(next_y) # Pack results res = OptimizeResult() best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = np.array(yi) res.x_iters = Xi res.models = models return res
def dummy_minimize(func, dimensions, n_calls=100, x0=None, y0=None, random_state=None): """Random search by uniform sampling within the given bounds. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `dimensions` [list, shape=(n_dims,)]: List of search space dimensions. Each search dimension can be defined either as - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer` dimensions), - a `(upper_bound, lower_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). * `n_calls` [int, default=100]: Number of calls to `func` to find the minimum. * `x0` [list, list of lists or `None`]: Initial input points. - If it is a list of lists, use it as a list of input points. - If it is a list, use it as a single initial input point. - If it is `None`, no initial input points are used. * `y0` [list, scalar or `None`] Evaluation of initial input points. - If it is a list, then it corresponds to evaluations of the function at each element of `x0` : the i-th element of `y0` corresponds to the function evaluated at the i-th element of `x0`. - If it is a scalar, then it corresponds to the evaluation of the function at `x0`. - If it is None and `x0` is provided, then the function is evaluated at each element of `x0`. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [list]: location of the minimum. - `fun` [float]: function value at the minimum. - `x_iters` [list of lists]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. - `space` [Space]: the optimisation space. - `specs` [dict]: the call specifications. - `rng` [RandomState instance]: State of the random state at the end of minimization. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ # Save call args specs = {"args": copy.copy(inspect.currentframe().f_locals), "function": inspect.currentframe().f_code.co_name} # Check params rng = check_random_state(random_state) space = Space(dimensions) if x0 is None: x0 = [] elif not isinstance(x0[0], list): x0 = [x0] if not isinstance(x0, list): raise ValueError("`x0` should be a list, got %s" % type(x0)) if len(x0) > 0 and y0 is not None: if isinstance(y0, Iterable): y0 = list(y0) elif isinstance(y0, numbers.Number): y0 = [y0] else: raise ValueError("`y0` should be an iterable or a scalar, got %s" % type(y0)) if len(x0) != len(y0): raise ValueError("`x0` and `y0` should have the same length") if not all(map(np.isscalar, y0)): raise ValueError("`y0` elements should be scalars") elif len(x0) > 0 and y0 is None: y0 = [] n_calls -= len(x0) elif len(x0) == 0 and y0 is not None: raise ValueError("`x0`cannot be `None` when `y0` is provided") else: # len(x0) == 0 and y0 is None y0 = [] X = x0 y = y0 # Random search X = X + space.rvs(n_samples=n_calls, random_state=rng) first = True for i in range(len(y0), len(X)): y_i = func(X[i]) if first: first = False if not np.isscalar(y_i): raise ValueError("`func` should return a scalar") y.append(y_i) y = np.array(y) # Pack results res = OptimizeResult() best = np.argmin(y) res.x = X[best] res.fun = y[best] res.func_vals = y res.x_iters = X res.models = [] # Create attribute even though it is empty res.space = space res.random_state = rng res.specs = specs return res
def moving_frame_augmented_hessian_optimizer( rhf_objective: RestrictedHartreeFockObjective, initial_parameters: np.ndarray, opdm_aa_measurement_func: Callable, max_iter: Optional[int] = 15, rtol: Optional[float] = 0.2E-2, delta: Optional[float] = 0.03, verbose: Optional[bool] = True, hessian_update: Optional[bool] = 'diagonal'): # testpragma: no cover # coverage: ignore """ The moving frame optimizer Determine an optimal basis rotation by continuously updating the coordinate system and asking if stationarity is achieved. :param rhf_objective: openfermioncirq.experiments.hfvqe.RestrictedHartreeFockObjective :param initial_parameters: parameters to start the optimization :param opdm_aa_measurement_func: callable functioon that takes the parameter vector and returns the opdm :param max_iter: maximum number of iterations to take :param rtol: Terminate the optimization with the norm of the update angles falls below this threshold :param verbose: Allow printing of intermediate optimization information :param hessian_update: Optional argument if diagonal or full Hessian is used :return: """ if delta > 1 or delta < 0: raise ValueError("Delta must be in the domain [0, 1]") if hessian_update not in ['diagonal', 'energy']: raise ValueError("hessian_update parameter not valid.") res = OptimizeResult() res.fr_vals = [] res.opdms = [] res.x_iters = [] res.func_vals = [] res.f = None res.iter_times = [] fr_vals = initial_parameters current_unitary = np.eye(rhf_objective.nocc + rhf_objective.nvirt) break_at_count = max_iter current_count = 0 energies = [] fval_norms = [] # for debugging opdm_initial = np.diag([1] * rhf_objective.nocc + [0] * rhf_objective.nvirt) start_time = time.time() while current_count < break_at_count: # Iterate of algorithm has a unitary and parameters # first step is to generate new unitary u_new = group_action(old_unitary=current_unitary, new_parameters=fr_vals, occ=rhf_objective.occ, virt=rhf_objective.virt) # get initial opdm from starting parameters opdm = opdm_aa_measurement_func(u_new.copy()) # opdm = u_new @ opdm_initial @ u_new.conj().T # Calculate energy, residual, and hessian terms rdms: InteractionRDM = rhf_objective.rdms_from_opdm_aa(opdm) current_energy: float = rdms.expectation( rhf_objective.hamiltonian).real energies.append(current_energy) res.x_iters.append(u_new) res.func_vals.append(current_energy) res.fr_vals.append(fr_vals) res.opdms.append(opdm) res.iter_times.append(time.time() - start_time) rot_gens = non_redundant_rotation_generators(rhf_objective) dvec, hmat = get_dvec_hmat( rotation_generators=rot_gens, rhf_objective=rhf_objective, rdms=rdms, diagonal_hessian=True if hessian_update == 'diagonal' else False) # talk if talking is allowed if verbose: print("\nITERATION NUMBER : ", current_count) print("\n unitary") print(current_unitary) test_opdm_aa = u_new @ opdm_initial @ u_new.conj().T true_energy = rhf_objective.energy_from_opdm(test_opdm_aa) print("Current Energy: ", current_energy) print("true energy ", true_energy) print("dvec") print(list(zip(dvec, rot_gens))) # build augmented Hessian dvec = dvec.reshape((-1, 1)) aug_hess = np.hstack((np.array([[0]]), dvec.conj().T)) aug_hess = np.vstack((aug_hess, np.hstack((dvec, hmat)))) w, v = np.linalg.eig(aug_hess) sort_idx = np.argsort(w) w = w[sort_idx] v = v[:, sort_idx] new_fr_vals = v[1:, [0]].flatten() / v[0, 0] assert new_fr_vals.shape[0] == initial_parameters.shape[0] assert np.isclose(w[0], dvec.T @ new_fr_vals) # Qiming's algorithm for no learning rate rescaling if np.max(abs(new_fr_vals)) >= delta: new_fr_vals = delta * new_fr_vals / np.max(abs(new_fr_vals)) # keep track of the norm fval_norms.append(np.linalg.norm(new_fr_vals)) # allow a stopping condition if verbose: print("New fr values norm") print(np.linalg.norm(new_fr_vals)) if np.linalg.norm(new_fr_vals) < rtol: if verbose: print("Finished Optimization") break # assign new values to the things being evaluated next iteration fr_vals = new_fr_vals.copy() current_unitary = u_new.copy() current_count += 1 return res
def model_policy_gradient( f: Callable[..., float], x0: np.ndarray, *, args=(), learning_rate: float = 1e-2, decay_rate: float = 0.96, decay_steps: int = 5, log_sigma_init: float = -5.0, max_iterations: int = 1000, batch_size: int = 10, radius_coeff: float = 3.0, warmup_steps: int = 10, batch_size_model: int = 65536, save_func_vals: bool = False, random_state: "cirq.RANDOM_STATE_OR_SEED_LIKE" = None, known_values: Optional[Tuple[List[np.ndarray], List[float]]] = None, max_evaluations: Optional[int] = None ) -> scipy.optimize.OptimizeResult: """Model policy gradient algorithm for black-box optimization. The idea of this algorithm is to perform policy gradient, but estimate the function values using a surrogate model. The surrogate model is a least-squared quadratic fit to points sampled from the vicinity of the current iterate. Args: f: The function to minimize. x0: An initial guess. args: Additional arguments to pass to the function. learning_rate: The learning rate for the policy gradient. decay_rate: the learning decay rate for the Adam optimizer. decay_steps: the learning decay steps for the Adam optimizer. log_sigma_init: the intial value for the sigma of the policy in the log scale. max_iterations: The maximum number of iterations to allow before termination. batch_size: The number of points to sample in each iteration. The cost of evaluation of these samples are computed through the quantum computer cost model. radius_coeff: The ratio determining the size of the radius around the current iterate to sample points from to build the quadratic model. The ratio is with respect to the maximal ratio of the samples from the current policy. warmup_steps: The number of steps before the model policy gradient is performed. before these steps, we use the policy gradient without the model. batch_size_model: The model sample batch size. After we fit the quadratic model, we use the model to evaluate on big enough batch of samples. save_func_vals: whether to compute and save the function values for the current value of parameter. random_state: A seed (int) or `np.random.RandomState` class to use when generating random values. If not set, defaults to using the module methods in `np.random`. known_values: Any prior known values of the objective function. This is given as a tuple where the first element is a list of points and the second element is a list of the function values at those points. max_evaluations: The maximum number of function evaluations to allow before termination. Returns: Scipy OptimizeResult """ random_state = value.parse_random_state(random_state) if known_values is not None: known_xs, known_ys = known_values known_xs = [np.copy(x) for x in known_xs] known_ys = [np.copy(y) for y in known_ys] else: known_xs, known_ys = [], [] if max_evaluations is None: max_evaluations = np.inf n = len(x0) log_sigma = np.ones(n) * log_sigma_init sigma = np.exp(log_sigma) # set up the first and second moment estimate m_mean = np.zeros(n) v_mean = np.zeros(n) m_log_sigma = np.zeros(n) v_log_sigma = np.zeros(n) # set up lr schedule and optimizer lr_schedule1 = _ExponentialSchedule(learning_rate, decay_steps=decay_steps, decay_rate=decay_rate, staircase=True) lr_schedule2 = _ExponentialSchedule(learning_rate, decay_steps=decay_steps, decay_rate=decay_rate, staircase=True) _, f = wrap_function(f, args) res = OptimizeResult() current_x = np.copy(x0) res.x_iters = [] # initializes as lists res.xs_iters = [] res.ys_iters = [] res.func_vals = [] res.fun = 0 total_evals = 0 num_iter = 0 message = None # stats history_max = -np.inf while num_iter < max_iterations: # get samples from the current policy to evaluate z = random_state.randn(batch_size, n) new_xs = sigma * z + current_x if total_evals + batch_size > max_evaluations: message = "Reached maximum number of evaluations." break # Evaluate points res.xs_iters.append(new_xs) new_ys = [f(x) for x in new_xs] res.ys_iters.append(new_ys) total_evals += batch_size known_xs.extend(new_xs) known_ys.extend(new_ys) # Save function value if save_func_vals: res.func_vals.append(f(current_x)) res.x_iters.append(np.copy(current_x)) res.fun = res.func_vals[-1] # current sampling radius (maximal) max_radius = 0 for x in new_xs: if np.linalg.norm(x - current_x) > max_radius: max_radius = np.linalg.norm(x - current_x) reward = [-y for y in new_ys] # warmup steps control whether to use the model to estimate the f if num_iter >= warmup_steps: # Determine points to use to build model model_xs = [] model_ys = [] for x, y in zip(known_xs, known_ys): if np.linalg.norm(x - current_x) < radius_coeff * max_radius: model_xs.append(x) model_ys.append(y) # safer way without the `SVD` not converging try: model = _get_quadratic_model(model_xs, model_ys, x) use_model = True except ValueError: use_model = False if use_model: # get samples (from model) z = random_state.randn(batch_size_model, n) new_xs = sigma * z + current_x # use the model for prediction new_ys = model.predict(new_xs - current_x) reward = [-y for y in new_ys] reward = np.array(reward) # stats reward_mean = np.mean(reward) reward_max = np.max(reward) if reward_max > history_max: history_max = reward_max # subtract baseline reward = reward - reward_mean # analytic derivatives (natural gradient policy gradient) delta_mean = np.dot(z.T, reward) * sigma delta_log_sigma = np.dot(z.T**2, reward) / np.sqrt(2) delta_mean_norm = np.linalg.norm(np.dot(z.T, reward)) delta_log_sigma_norm = np.linalg.norm(np.dot(z.T**2, reward)) delta_mean = delta_mean / delta_mean_norm delta_log_sigma = delta_log_sigma / delta_log_sigma_norm # gradient ascend to update the parameters current_x, m_mean, v_mean = _adam_update(delta_mean, current_x, num_iter, m_mean, v_mean, lr_schedule=lr_schedule1) log_sigma, m_log_sigma, v_log_sigma = _adam_update( delta_log_sigma, log_sigma, num_iter, m_log_sigma, v_log_sigma, lr_schedule=lr_schedule2, ) log_sigma = np.clip(log_sigma, -20.0, 2.0) sigma = np.exp(log_sigma) num_iter += 1 final_val = f(current_x) res.func_vals.append(final_val) if message is None: message = "Reached maximum number of iterations." res.x_iters.append(current_x) total_evals += 1 res.x = current_x res.fun = final_val res.nit = num_iter res.nfev = total_evals res.message = message return res
def _tree_minimize(func, dimensions, base_estimator, n_calls, n_points, n_random_starts, x0=None, y0=None, random_state=None, acq="EI", xi=0.01, kappa=1.96): rng = check_random_state(random_state) space = Space(dimensions) # Initialize with provided points (x0 and y0) and/or random points if n_calls <= 0: raise ValueError( "Expected `n_calls` > 0, got %d" % n_random_starts) if x0 is None: x0 = [] elif not isinstance(x0[0], list): x0 = [x0] if not isinstance(x0, list): raise ValueError("`x0` should be a list, but got %s" % type(x0)) n_init_func_calls = len(x0) if y0 is not None else 0 n_total_init_calls = n_random_starts + n_init_func_calls if n_total_init_calls <= 0: # if x0 is not provided and n_random_starts is 0 then # it will ask for n_random_starts to be > 0. raise ValueError( "Expected `n_random_starts` > 0, got %d" % n_random_starts) if n_calls < n_total_init_calls: raise ValueError( "Expected `n_calls` >= %d, got %d" % (n_total_init_calls, n_calls)) if y0 is None and x0: y0 = [func(x) for x in x0] elif x0: if isinstance(y0, Iterable): y0 = list(y0) elif isinstance(y0, numbers.Number): y0 = [y0] else: raise ValueError( "`y0` should be an iterable or a scalar, got %s" % type(y0)) if len(x0) != len(y0): raise ValueError("`x0` and `y0` should have the same length") if not all(map(np.isscalar, y0)): raise ValueError("`y0` elements should be scalars") else: y0 = [] Xi = x0 + space.rvs(n_samples=n_random_starts, random_state=rng) yi = y0 + [func(x) for x in Xi[len(x0):]] if np.ndim(yi) != 1: raise ValueError("`func` should return a scalar") # Tree-based optimization loop models = [] n_model_iter = n_calls - n_total_init_calls for i in range(n_model_iter): rgr = clone(base_estimator) rgr.fit(space.transform(Xi), yi) models.append(rgr) # `rgr` predicts constants for each leaf which means that the EI # has zero gradient over large distances. As a result we can not # use gradient based optimizers like BFGS, so using random sampling # for the moment. X = space.transform(space.rvs(n_samples=n_points, random_state=rng)) values = _gaussian_acquisition( X=X, model=rgr, y_opt=np.min(yi), method=acq, xi=xi, kappa=kappa) next_x = X[np.argmin(values)] next_x = space.inverse_transform(next_x.reshape((1, -1)))[0] next_y = func(next_x) Xi.append(next_x) yi.append(next_y) res = OptimizeResult() best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = np.array(yi) res.x_iters = Xi res.models = models res.space = space res.random_state = rng return res
def gp_minimize(func, bounds=None, search="sampling", random_state=None, maxiter=1000, acq="UCB", num_points=500): """ Black-box optimization using Gaussian Processes. If every function evaluation is expensive, for instance when the parameters are the hyperparameters of a neural network and the function evaluation is the mean cross-validation score across ten folds, optimizing the hyperparameters by standared optimization routines would take for ever! The idea is to approximate the function using a Gaussian process. In other words the function values are assumed to follow a multivariate gaussian. The covariance of the function values are given by a GP kernel between the parameters. Then a smart choice to choose the next parameter to evaluate can be made by the acquistion function over the Gaussian posterior which is much quicker to evaluate. Parameters ---------- func: callable Function to minimize. Should take a array of parameters and return the function value. bounds: array-like, shape (n_parameters, 2) ``bounds[i][0]`` should give the lower bound of each parameter and ``bounds[i][1]`` should give the upper bound of each parameter. search: string, "sampling" or "lbfgs" Searching for the next possible candidate to update the Gaussian prior with. If search is set to "sampling", ``num_points`` are sampled randomly and the Gaussian Process prior is updated with that point that gives the best acquision value over the Gaussian posterior. If search is set to "lbfgs", then a point is sampled randomly, and lbfgs is run for 10 iterations optimizing the acquistion function over the Gaussian posterior. random_state: int, RandomState instance, or None (default) Set random state to something other than None for reproducible results. maxiter: int, default 1000 Number of iterations to find the minimum. In other words, the number of function evaluations. acq: string, default "UCB" Function to minimize over the gaussian posterior. Can be either the "UCB" which refers to the UpperConfidenceBound or "EI" which is the Expected Improvement. num_points: int, default 500 Number of points to sample to determine the next "best" point. Useless if search is set to "lbfgs". Returns ------- res: OptimizeResult, scipy object The optimization result returned as a OptimizeResult object. Important attributes are ``x`` - float, the optimization solution, ``fun`` - float, the value of the function at the optimum, ``models``- gp_models[i]. the prior on the function fit at iteration[i]. ``func_vals`` - the function value at the ith iteration. ``x_iters`` - the value of ``x`` corresponding to the function value at the ith iteration. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ rng = np.random.RandomState(random_state) num_params = len(bounds) lower_bounds, upper_bounds = zip(*bounds) upper_bounds = np.asarray(upper_bounds) lower_bounds = np.asarray(lower_bounds) x0 = rng.rand(num_params) func_val = [func(lower_bounds + (upper_bounds - lower_bounds) * x0)] length_scale = np.ones(num_params) gp_params = { 'kernel': Matern(length_scale=length_scale, nu=2.5), 'normalize_y': True, 'random_state': random_state } lbfgs_bounds = np.tile((0, 1), (num_params, 1)) gp_models = [] x = np.reshape(x0, (1, -1)) for i in range(maxiter): gpr = GaussianProcessRegressor(**gp_params) gpr.fit(x, func_val) if search == "sampling": sampling = rng.rand(num_points, num_params) acquis = acquisition_func(sampling, gpr, np.min(func_val), acq) best_arg = np.argmin(acquis) best_x = sampling[best_arg] elif search == "lbfgs": init = rng.rand(num_params) best_x, _, _ = fmin_l_bfgs_b( acquisition_func, np.asfortranarray(init), args=(gpr, np.min(func_val), acq), bounds=lbfgs_bounds, approx_grad=True, maxiter=10) gp_models.append(gpr) best_f = func(lower_bounds + (upper_bounds - lower_bounds) * best_x) x_list = x.tolist() x_list.append(best_x) x = np.asarray(x_list) func_val.append(best_f) x = lower_bounds + (upper_bounds - lower_bounds) * x func_ind = np.argmin(func_val) x_val = x[func_ind] best_func_val = func_val[func_ind] res = OptimizeResult() res.models = gp_models res.x = x_val res.fun = best_func_val res.func_vals = func_val res.x_iters = x return res
def create_result(Xi, yi, n_evaluations=None, space=None, rng=None, specs=None, models=None, maximize=False): """ Initialize an `OptimizeResult` object. Parameters ---------- * `Xi` [list of lists, shape=(n_iters, n_features)]: Location of the minimum at every iteration. * `yi` [array-like, shape=(n_iters,)]: Minimum value obtained at every iteration. * `space` [Space instance, optional]: Search space. * `rng` [RandomState instance, optional]: State of the random state. * `specs` [dict, optional]: Call specifications. * `models` [list, optional]: List of fit surrogate models. Returns ------- * `res` [`OptimizeResult`, scipy object]: OptimizeResult instance with the required information. """ res = OptimizeResult() try: # Hyperband returns evaluations as lists of lists. # We want to store the results as a single array. yi = list(itertools.chain.from_iterable(yi)) Xi = list(itertools.chain.from_iterable(Xi)) except TypeError: # All algorithms other than Hyperband already return a single list. pass yi = np.asarray(yi) if np.ndim(yi) == 2: res.log_time = np.ravel(yi[:, 1]) yi = np.ravel(yi[:, 0]) if maximize: best = np.argmax(yi) else: best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] if n_evaluations: unique, sort_indices = np.unique(yi, return_index=True) if len(unique) < n_evaluations: func_sort_idx = np.argsort(yi) func_vals = sorted(yi) res.func_vals = np.asarray(func_vals[:n_evaluations]) x_iter_sort = [] for idx in func_sort_idx: x_iter_sort.append(Xi[idx]) res.x_iters = np.asarray(x_iter_sort[:n_evaluations]) res.all_func_vals = np.asarray(yi) res.all_x_iters = np.asarray(Xi) else: func_vals = sorted(unique) res.func_vals = np.asarray(func_vals[:n_evaluations]) x_iter_sort = [] for idx in sort_indices: x_iter_sort.append(Xi[idx]) res.x_iters = np.asarray(x_iter_sort[:n_evaluations]) res.all_func_vals = np.asarray(yi) res.all_x_iters = np.asarray(Xi) else: res.func_vals = np.asarray(yi) res.x_iters = np.asarray(Xi) res.models = models res.space = space res.random_state = rng res.specs = specs return res
def _tree_minimize(func, dimensions, base_estimator, n_calls, n_points, n_random_starts, x0=None, y0=None, random_state=None, acq="EI", xi=0.01, kappa=1.96): rng = check_random_state(random_state) space = Space(dimensions) # Initialize with provided points (x0 and y0) and/or random points if n_calls <= 0: raise ValueError("Expected `n_calls` > 0, got %d" % n_random_starts) if x0 is None: x0 = [] elif not isinstance(x0[0], list): x0 = [x0] if not isinstance(x0, list): raise ValueError("`x0` should be a list, but got %s" % type(x0)) n_init_func_calls = len(x0) if y0 is not None else 0 n_total_init_calls = n_random_starts + n_init_func_calls if n_total_init_calls <= 0: # if x0 is not provided and n_random_starts is 0 then # it will ask for n_random_starts to be > 0. raise ValueError("Expected `n_random_starts` > 0, got %d" % n_random_starts) if n_calls < n_total_init_calls: raise ValueError("Expected `n_calls` >= %d, got %d" % (n_total_init_calls, n_calls)) if y0 is None and x0: y0 = [func(x) for x in x0] elif x0: if isinstance(y0, Iterable): y0 = list(y0) elif isinstance(y0, numbers.Number): y0 = [y0] else: raise ValueError("`y0` should be an iterable or a scalar, got %s" % type(y0)) if len(x0) != len(y0): raise ValueError("`x0` and `y0` should have the same length") if not all(map(np.isscalar, y0)): raise ValueError("`y0` elements should be scalars") else: y0 = [] Xi = x0 + space.rvs(n_samples=n_random_starts, random_state=rng) yi = y0 + [func(x) for x in Xi[len(x0):]] if np.ndim(yi) != 1: raise ValueError("`func` should return a scalar") # Tree-based optimization loop models = [] n_model_iter = n_calls - n_total_init_calls for i in range(n_model_iter): rgr = clone(base_estimator) rgr.fit(space.transform(Xi), yi) models.append(rgr) # `rgr` predicts constants for each leaf which means that the EI # has zero gradient over large distances. As a result we can not # use gradient based optimizers like BFGS, so using random sampling # for the moment. X = space.transform(space.rvs(n_samples=n_points, random_state=rng)) values = _gaussian_acquisition(X=X, model=rgr, y_opt=np.min(yi), method=acq, xi=xi, kappa=kappa) next_x = X[np.argmin(values)] next_x = space.inverse_transform(next_x.reshape((1, -1)))[0] next_y = func(next_x) Xi.append(next_x) yi.append(next_y) res = OptimizeResult() best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = np.array(yi) res.x_iters = Xi res.models = models res.space = space res.random_state = rng return res
def dummy_minimize(func, dimensions, n_calls=100, x0=None, y0=None, random_state=None): """Random search by uniform sampling within the given bounds. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `dimensions` [list, shape=(n_dims,)]: List of search space dimensions. Each search dimension can be defined either as - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer` dimensions), - a `(upper_bound, lower_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). * `n_calls` [int, default=100]: Number of calls to `func` to find the minimum. * `x0` [list, list of lists or `None`]: Initial input points. - If it is a list of lists, use it as a list of input points. - If it is a list, use it as a single initial input point. - If it is `None`, no initial input points are used. * `y0` [list, scalar or `None`] Evaluation of initial input points. - If it is a list, then it corresponds to evaluations of the function at each element of `x0` : the i-th element of `y0` corresponds to the function evaluated at the i-th element of `x0`. - If it is a scalar, then it corresponds to the evaluation of the function at `x0`. - If it is None and `x0` is provided, then the function is evaluated at each element of `x0`. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [list]: location of the minimum. - `fun` [float]: function value at the minimum. - `x_iters` [list of lists]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. - `space` [Space]: the optimisation space. - `specs` [dict]: the call specifications. - `rng` [RandomState instance]: State of the random state at the end of minimization. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ # Save call args specs = { "args": copy.copy(inspect.currentframe().f_locals), "function": inspect.currentframe().f_code.co_name } # Check params rng = check_random_state(random_state) space = Space(dimensions) if x0 is None: x0 = [] elif not isinstance(x0[0], list): x0 = [x0] if not isinstance(x0, list): raise ValueError("`x0` should be a list, got %s" % type(x0)) if len(x0) > 0 and y0 is not None: if isinstance(y0, Iterable): y0 = list(y0) elif isinstance(y0, numbers.Number): y0 = [y0] else: raise ValueError("`y0` should be an iterable or a scalar, got %s" % type(y0)) if len(x0) != len(y0): raise ValueError("`x0` and `y0` should have the same length") if not all(map(np.isscalar, y0)): raise ValueError("`y0` elements should be scalars") elif len(x0) > 0 and y0 is None: y0 = [] n_calls -= len(x0) elif len(x0) == 0 and y0 is not None: raise ValueError("`x0`cannot be `None` when `y0` is provided") else: # len(x0) == 0 and y0 is None y0 = [] X = x0 y = y0 # Random search X = X + space.rvs(n_samples=n_calls, random_state=rng) first = True for i in range(len(y0), len(X)): y_i = func(X[i]) if first: first = False if not np.isscalar(y_i): raise ValueError("`func` should return a scalar") y.append(y_i) y = np.array(y) # Pack results res = OptimizeResult() best = np.argmin(y) res.x = X[best] res.fun = y[best] res.func_vals = y res.x_iters = X res.models = [] # Create attribute even though it is empty res.space = space res.random_state = rng res.specs = specs return res
def model_gradient_descent( f: Callable[..., float], x0: np.ndarray, *, args=(), rate: float = 1e-1, sample_radius: float = 1e-1, n_sample_points: int = 100, n_sample_points_ratio: Optional[float] = None, rate_decay_exponent: float = 0.0, stability_constant: float = 0.0, sample_radius_decay_exponent: float = 0.0, tol: float = 1e-8, known_values: Optional[Tuple[List[np.ndarray], List[float]]] = None, max_iterations: Optional[int] = None, max_evaluations: Optional[int] = None) -> scipy.optimize.OptimizeResult: """Model gradient descent algorithm for black-box optimization. The idea of this algorithm is to perform gradient descent, but estimate the gradient using a surrogate model instead of, say, by finite-differencing. The surrogate model is a least-squared quadratic fit to points sampled from the vicinity of the current iterate. This algorithm works well when you have an initial guess which is in the convex neighborhood of a local optimum and you want to converge to that local optimum. It's meant to be used when the function is stochastic. Args: f: The function to minimize. x0: An initial guess. args: Additional arguments to pass to the function. rate: The learning rate for the gradient descent. sample_radius: The radius around the current iterate to sample points from to build the quadratic model. n_sample_points: The number of points to sample in each iteration. n_sample_points_ratio: This specifies the number of points to sample in each iteration as a coefficient of the number of points required to exactly determine a quadratic model. The number of sample points will be this coefficient times (n+1)(n+2)/2, rounded up, where n is the number of parameters. Setting this overrides n_sample_points. rate_decay_exponent: Controls decay of learning rate. In each iteration, the learning rate is changed to the base learning rate divided by (i + 1 + S)**a, where S is the stability constant and a is the rate decay exponent (this parameter). stability_constant: Affects decay of learning rate. In each iteration, the learning rate is changed to the base learning rate divided by (i + 1 + S)**a, where S is the stability constant (this parameter) and a is the rate decay exponent. sample_radius_decay_exponent: Controls decay of sample radius. tol: The algorithm terminates when the difference between the current iterate and the next suggested iterate is smaller than this value. known_values: Any prior known values of the objective function. This is given as a tuple where the first element is a list of points and the second element is a list of the function values at those points. max_iterations: The maximum number of iterations to allow before termination. max_evaluations: The maximum number of function evaluations to allow before termination. Returns: Scipy OptimizeResult """ if known_values is not None: known_xs, known_ys = known_values known_xs = [np.copy(x) for x in known_xs] known_ys = [np.copy(y) for y in known_ys] else: known_xs, known_ys = [], [] if max_iterations is None: max_iterations = np.inf if max_evaluations is None: max_evaluations = np.inf n = len(x0) if n_sample_points_ratio is not None: n_sample_points = int( np.ceil(n_sample_points_ratio * (n + 1) * (n + 2) / 2)) _, f = wrap_function(f, args) res = OptimizeResult() current_x = np.copy(x0) res.x_iters = [] # initializes as lists res.xs_iters = [] res.ys_iters = [] res.func_vals = [] res.model_vals = [None] res.fun = 0 total_evals = 0 num_iter = 0 converged = False message = None while num_iter < max_iterations: current_sample_radius = (sample_radius / (num_iter + 1)**sample_radius_decay_exponent) # Determine points to evaluate # in ball around current point new_xs = [np.copy(current_x)] + [ current_x + _random_point_in_ball(n, current_sample_radius) for _ in range(n_sample_points) ] if total_evals + len(new_xs) > max_evaluations: message = 'Reached maximum number of evaluations.' break # Evaluate points res.xs_iters.append(new_xs) new_ys = [f(x) for x in new_xs] res.ys_iters.append(new_ys) total_evals += len(new_ys) known_xs.extend(new_xs) known_ys.extend(new_ys) # Save function value res.func_vals.append(new_ys[0]) res.x_iters.append(np.copy(current_x)) res.fun = res.func_vals[-1] # Determine points to use to build model model_xs = [] model_ys = [] for x, y in zip(known_xs, known_ys): if np.linalg.norm(x - current_x) < current_sample_radius: model_xs.append(x) model_ys.append(y) # Build and solve model model_gradient, model = _get_least_squares_model_gradient( model_xs, model_ys, current_x) # calculate the gradient and update the current point gradient_norm = np.linalg.norm(model_gradient) decayed_rate = ( rate / (num_iter + 1 + stability_constant)**rate_decay_exponent) # Convergence criteria if decayed_rate * gradient_norm < tol: converged = True message = 'Optimization converged successfully.' break # Update current_x -= decayed_rate * model_gradient res.model_vals.append( model.predict([-decayed_rate * model_gradient])[0]) num_iter += 1 if converged: final_val = res.func_vals[-1] else: final_val = f(current_x) res.func_vals.append(final_val) if message is None: message = 'Reached maximum number of iterations.' res.x_iters.append(current_x) total_evals += 1 res.x = current_x res.fun = final_val res.nit = num_iter res.nfev = total_evals res.message = message return res