def result(self): """ The OptimizeResult """ res = OptimizeResult() res.x = self._xmin res.fun = self._fvalue res.message = self._message res.nit = self._step_record return res
def result(self): """ The OptimizeResult """ res = OptimizeResult() res.x = self.es.xbest res.fun = self.es.ebest res.nit = self._iter res.ncall = self.owf.nb_fun_call return res
def scipy_nlopt_cobyla(*args, **kwargs): """Wraps nlopt library cobyla function to be compatible with scipy optimize parameters: args[0]: target, function to be minimized args[1]: x0, starting point for minimization bounds: list of bounds for the movement [[min, max], [min, max], ...] ftol_rel: same as in nlopt xtol_rel: same as in nlopt one of the tol_rel should be specified returns: OptimizeResult() object with properly set x, fun, success. status is not set when nlopt.RoundoffLimited is raised """ answ = OptimizeResult() bounds = kwargs['bounds'] opt = nlopt.opt(nlopt.LN_COBYLA, len(args[1])) opt.set_lower_bounds([i[0] for i in bounds]) opt.set_upper_bounds([i[1] for i in bounds]) if 'ftol_rel' in kwargs.keys(): opt.set_ftol_rel(kwargs['ftol_rel']) if 'xtol_rel' in kwargs.keys(): opt.set_ftol_rel(kwargs['xtol_rel']) opt.set_min_objective(args[0]) x0 = list(args[1]) try: x1 = opt.optimize(x0) except nlopt.RoundoffLimited: answ.x = x0 answ.fun = args[0](x0) answ.success = False answ.message = 'nlopt.RoundoffLimited' return answ answ.x = x1 answ.fun = args[0](x1) answ.success = True if opt.last_optimize_result() in [3, 4] else False answ.status = opt.last_optimize_result() if not answ.fun == opt.last_optimum_value(): print 'Something\'s wrong, ', answ.fun, opt.last_optimum_value() return answ
def setup_method(self): self.x0 = np.array(1) self.f0 = 0 minres = OptimizeResult() minres.x = self.x0 minres.fun = self.f0 self.storage = Storage(minres)
def _tree_minimize(func, dimensions, base_estimator, n_calls, n_points, n_random_starts, random_state=None): rng = check_random_state(random_state) space = Space(dimensions) # Initialize with random points if n_random_starts <= 0: raise ValueError( "Expected n_random_starts > 0, got %d" % n_random_starts) if n_calls <= 0: raise ValueError( "Expected n_calls > 0, got %d" % n_random_starts) if n_calls < n_random_starts: raise ValueError( "Expected n_calls >= %d, got %d" % (n_random_starts, n_calls)) Xi = space.rvs(n_samples=n_random_starts, random_state=rng) yi = [func(x) for x in Xi] if np.ndim(yi) != 1: raise ValueError( "The function to be optimized should return a scalar") # Tree-based optimization loop models = [] n_model_iter = n_calls - n_random_starts for i in range(n_model_iter): rgr = clone(base_estimator) rgr.fit(space.transform(Xi), yi) models.append(rgr) # `rgr` predicts constants for each leaf which means that the EI # has zero gradient over large distances. As a result we can not # use gradient based optimizers like BFGS, so using random sampling # for the moment. X = space.transform(space.rvs(n_samples=n_points, random_state=rng)) values = -gaussian_ei(X, rgr, np.min(yi)) next_x = X[np.argmin(values)] next_x = space.inverse_transform(next_x.reshape((1, -1)))[0] next_y = func(next_x) Xi = np.vstack((Xi, next_x)) yi.append(next_y) res = OptimizeResult() best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = np.array(yi) res.x_iters = Xi res.models = models res.space = space return res
def test_higher_f_rejected(self): new_minres = OptimizeResult() new_minres.x = self.x0 + 1 new_minres.fun = self.f0 + 1 ret = self.storage.update(new_minres) minres = self.storage.get_lowest() assert_equal(self.x0, minres.x) assert_equal(self.f0, minres.fun) assert_(not ret)
def test_lower_f_accepted(self): new_minres = OptimizeResult() new_minres.x = self.x0 + 1 new_minres.fun = self.f0 - 1 ret = self.storage.update(new_minres) minres = self.storage.get_lowest() assert_(self.x0 != minres.x) assert_(self.f0 != minres.fun) assert_(ret)
def scipy_graduate_walk(*args, **kwargs): """Scipy-compatible graduate_walk function wrapper. parameters: args[0]: target, function to be minimized args[1]: x0, starting point for minimization dx=1e-8: step in change of the point dx_start=0.1: starting value for dx step. Must be bigger that dx. dx_step=0.1: change of dx on each iteration. Should be less than 1. diagonal=False: defines directions for point movements. See generate_all_directions generate_nondiagonal_directions for more information. bounds=None: list of bounds for the movement [[min, max], [min, max], ...] if set to None, bounds are ignored ytol=1e-8: relative tolerance for search stop. See graduate_walk for more info. returns: OptimizeResult() object with properly set x, fun, nfev. success is always set to True, status to 1 """ target = args[0] x0 = args[1] dx = kwargs['dx'] if 'dx' in list(kwargs.keys()) else 1e-8 dx_start = kwargs['dx_start'] if 'dx_start' in list(kwargs.keys()) else 0.1 dx_step = kwargs['dx_step'] if 'dx_step' in list(kwargs.keys()) else 0.1 if 'diagonal' in list(kwargs.keys()) and kwargs['diagonal']: directions = generate_all_directions(len(x0)) else: directions = generate_nondiagonal_directions(len(x0)) if 'bounds' in list(kwargs.keys()) and kwargs['bounds'] is not None: bounds = Bounds(kwargs['bounds']) else: bounds = None ytol_rel = kwargs['ytol_rel'] if 'ytol_rel' in list(kwargs.keys()) else 1e-8 res = graduate_walk(target, x0, dx, directions, dx_start, dx_step, bounds=bounds, ytol_rel=ytol_rel) answ = OptimizeResult() answ.x = res['x0'] answ.fun = res['fval'] answ.success = True answ.status = 1 answ.nfev = res['fnval'] return answ
def create_result(Xi, yi, space=None, rng=None, specs=None, models=None): """ Initialize an `OptimizeResult` object. Parameters ---------- * `Xi` [list of lists, shape=(n_iters, n_features)]: Location of the minimum at every iteration. * `yi` [array-like, shape=(n_iters,)]: Minimum value obtained at every iteration. * `space` [Space instance, optional]: Search space. * `rng` [RandomState instance, optional]: State of the random state. * `specs` [dict, optional]: Call specifications. * `models` [list, optional]: List of fit surrogate models. Returns ------- * `res` [`OptimizeResult`, scipy object]: OptimizeResult instance with the required information. """ res = OptimizeResult() yi = np.asarray(yi) if np.ndim(yi) == 2: res.log_time = np.ravel(yi[:, 1]) yi = np.ravel(yi[:, 0]) best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = yi res.x_iters = Xi res.models = models res.space = space res.random_state = rng res.specs = specs return res
def get_optimization_results(t, population, factorial_cost, scalar_fitness, skill_factor, message): K = len(set(skill_factor)) N = len(population) // 2 results = [] for k in range(K): result = OptimizeResult() x, fun = get_best_individual(population, factorial_cost, scalar_fitness, skill_factor, k) result.x = x result.fun = fun result.message = message result.nit = t result.nfev = (t + 1) * N mean, std = get_statistics(factorial_cost, skill_factor, k) result.mean = mean result.std = std results.append(result) return results
def scipy_walk(*args, **kwargs): """Scipy-compatible walk function wrapper. parameters: args[0]: target, function to be minimized args[1]: x0, starting point for minimization dx=1e-8: step in change of the point diagonal=False: defines directions for point movements. See generate_all_directions generate_nondiagonal_directions for more information. bounds=None: list of bounds for the movement [[min, max], [min, max], ...] if set to None, bounds are ignored ytol=1e-8: relative tolerance for search stop. See walk for more info. returns: OptimizeResult() object with properly set x, fun, nfev. success is always set to True, status to 1 """ target = args[0] x0 = args[1] dx = kwargs['dx'] if 'dx' in list(kwargs.keys()) else 1e-8 if 'diagonal' in list(kwargs.keys()) and kwargs['diagonal']: directions = generate_all_directions(len(x0)) else: directions = generate_nondiagonal_directions(len(x0)) if 'bounds' in list(kwargs.keys()) and kwargs['bounds'] is not None: bounds = Bounds(kwargs['bounds']) else: bounds = None ytol_rel = kwargs['ytol_rel'] if 'ytol_rel' in list(kwargs.keys()) else 1e-8 res = walk(target, x0, dx, directions, bounds=bounds, ytol_rel=ytol_rel) answ = OptimizeResult() answ.x = res['x0'] answ.fun = res['fval'] answ.success = True answ.status = 1 answ.nfev = res['fnval'] return answ
def steepest_decent(fun, x0, fprime, args, tol=1.0e-4, maxiter=1000, callback=None): '''最急降下法 ''' x = numpy.array(x0) for itr in xrange(maxiter): direction = -1 * fprime(x, *args) alpha, obj_current, obj_next = armijo_stepsize(fun, x, fprime, direction, args=args) if numpy.linalg.norm(obj_current - obj_next) < tol: break x = x + alpha * direction if callback is not None: callback(x) result = OptimizeResult() result.x = x result.fun = fun(x, *args) result.nit = itr return result
def newton_method(fun, x0, fprime, args, tol=1.0e-4, maxiter=1000, callback=None): '''ニュートン法 ステップサイズにArmijo条件 ''' x = numpy.array(x0) A, b = args for itr in xrange(maxiter): direction = -1 * numpy.linalg.solve(A, fprime(x, *args)) alpha, obj_current, obj_next = armijo_stepsize(fun, x, fprime, direction, args=args) if numpy.linalg.norm(obj_current - obj_next) < tol: break x = x + alpha * direction if callback is not None: callback(x) result = OptimizeResult() result.x = x result.fun = fun(x, *args) result.nit = itr return result
def create_result(Xi, yi, space=None, rng=None, specs=None, models=None): """ Initialize an `OptimizeResult` object. Parameters ---------- Xi : list of lists, shape (n_iters, n_features) Location of the minimum at every iteration. yi : array-like, shape (n_iters,) Minimum value obtained at every iteration. space : Space instance, optional Search space. rng : RandomState instance, optional State of the random state. specs : dict, optional Call specifications. models : list, optional List of fit surrogate models. Returns ------- res : `OptimizeResult`, scipy object OptimizeResult instance with the required information. """ res = OptimizeResult() yi = np.asarray(yi) if np.ndim(yi) == 2: res.log_time = np.ravel(yi[:, 1]) yi = np.ravel(yi[:, 0]) best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = yi res.x_iters = Xi res.models = models res.space = space res.random_state = rng res.specs = specs return res
def solve(self): """ Runs the DifferentialEvolutionSolver. Returns ------- res : OptimizeResult The optimization result represented as a ``OptimizeResult`` object. Important attributes are: ``x`` the solution array, ``success`` a Boolean flag indicating if the optimizer exited successfully and ``message`` which describes the cause of the termination. See `OptimizeResult` for a description of other attributes. If `polish` was employed, and a lower minimum was obtained by the polishing, then OptimizeResult also contains the ``jac`` attribute. """ nit, warning_flag = 0, False # dictionary that holds standard status messages of optimizers status_message = _status_message['success'] # The population may have just been initialized (all entries are # np.inf). If it has you have to calculate the initial energies. # Although this is also done in the evolve generator it's possible # that someone can set maxiter=0, at which point we still want the # initial energies to be calculated (the following loop isn't run). #np.all checks that there are no 0's in the array if self.maxiter == 0: if np.all(np.isinf(self.population_energies)): if self.disp: print("Calculating initial energies when maxiter = 0") self._calculate_population_energies() # for i in range(self.num_population_members): # print(self.population[i,:]) # do the optimisation. for nit in xrange(1, self.maxiter + 1): if self.disp: print("iter: ", nit) # evolve the population by a generation try: next(self) except StopIteration: warning_flag = True status_message = _status_message['maxfev'] break print("differential_evolution step %d: f(x)= %g" % (nit, self.population_energies[0])) #save populations at each iter and rank to analyze after # np.save("before_rank"+str(self.rank)+"iter"+str(nit), self.population) #migrate self.migration() # np.save("after_rank"+str(self.rank)+"iter"+str(nit), self.population) # should the solver terminate? # print("Checking if should converge") # convergence = self.convergence # # if (self.callback and # self.callback(self._scale_parameters(self.population[0]), # convergence=self.tol / convergence) is True): # # warning_flag = True # status_message = ('callback function requested stop early ' # 'by returning True') # break # print("checking if tolerance level reached") ## intol = (np.std(self.population_energies) <= ## self.atol + ## self.tol * np.abs(np.mean(self.population_energies))) # # intol = self.population_energies[0] <= self.mse_thresh # if warning_flag or intol: # print("stopping iterations") # break print("Starting next iter") else: status_message = _status_message['maxiter'] warning_flag = True DE_result = OptimizeResult(x=self.x, fun=self.population_energies[0], nfev=self._nfev, nit=nit, message=status_message, success=(warning_flag is not True)) print("done iters") if self.polish: print("performing final polishing") result = minimize(self.func, np.copy(DE_result.x), method='L-BFGS-B', bounds=self.limits.T, args=self.args) self._nfev += result.nfev DE_result.nfev = self._nfev if result.fun < DE_result.fun: DE_result.fun = result.fun DE_result.x = result.x DE_result.jac = result.jac # to keep internal state consistent self.population_energies[0] = result.fun self.population[0] = self._unscale_parameters(result.x) return DE_result
def dummy_minimize(func, dimensions, n_calls=100, random_state=None): """Random search by uniform sampling within the given bounds. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `dimensions` [list, shape=(n_dims,)]: List of search space dimensions. Each search dimension can be defined either as - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer` dimensions), - a `(upper_bound, lower_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). * `n_calls` [int, default=100]: Number of calls to `func` to find the minimum. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [float]: location of the minimum. - `fun` [float]: function value at the minimum. - `x_iters` [array]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. - `space` [Space]: the optimisation space. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ rng = check_random_state(random_state) space = Space(dimensions) X = space.rvs(n_samples=n_calls, random_state=rng) init_y = func(X[0]) if not np.isscalar(init_y): raise ValueError( "The function to be optimized should return a scalar") y = np.asarray([init_y] + [func(X[i]) for i in range(1, n_calls)]) res = OptimizeResult() best = np.argmin(y) res.x = X[best] res.fun = y[best] res.func_vals = y res.x_iters = X res.space = space return res
def gp_minimize(func, dimensions, base_estimator=None, alpha=10e-10, acq="EI", xi=0.01, kappa=1.96, search="auto", n_calls=100, n_points=500, n_random_starts=10, n_restarts_optimizer=5, x0=None, y0=None, random_state=None): """Bayesian optimization using Gaussian Processes. If every function evaluation is expensive, for instance when the parameters are the hyperparameters of a neural network and the function evaluation is the mean cross-validation score across ten folds, optimizing the hyperparameters by standard optimization routines would take for ever! The idea is to approximate the function using a Gaussian process. In other words the function values are assumed to follow a multivariate gaussian. The covariance of the function values are given by a GP kernel between the parameters. Then a smart choice to choose the next parameter to evaluate can be made by the acquisition function over the Gaussian prior which is much quicker to evaluate. The total number of evaluations, `n_calls`, are performed like the following. If `x0` is provided but not `y0`, then the elements of `x0` are first evaluated, followed by `n_random_starts` evaluations. Finally, `n_calls - len(x0) - n_random_starts` evaluations are made guided by the surrogate model. If `x0` and `y0` are both provided then `n_random_starts` evaluations are first made then `n_calls - n_random_starts` subsequent evaluations are made guided by the surrogate model. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `dimensions` [list, shape=(n_dims,)]: List of search space dimensions. Each search dimension can be defined either as - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer` dimensions), - a `(upper_bound, lower_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). * `base_estimator` [a Gaussian process estimator]: The Gaussian process estimator to use for optimization. * `alpha` [float, default=1e-10]: Value added to the diagonal of the kernel matrix during fitting. Larger values correspond to an increased noise level in the observations and reduce potential numerical issues during fitting. * `acq` [string, default=`"EI"`]: Function to minimize over the gaussian prior. Can be either - `"LCB"` for lower confidence bound, - `"EI"` for expected improvement, - `"PI"` for probability of improvement. * `xi` [float, default=0.01]: Controls how much improvement one wants over the previous best values. Used when the acquisition is either `"EI"` or `"PI"`. * `kappa` [float, default=1.96]: Controls how much of the variance in the predicted values should be taken into account. If set to be very high, then we are favouring exploration over exploitation and vice versa. Used when the acquisition is `"LCB"`. * `search` [string, `"auto"`, `"sampling"` or `"lbfgs"`, default=`"auto"`]: Searching for the next possible candidate to update the Gaussian prior with. If search is set to `"auto"`, then it is set to `"lbfgs"`` if all the search dimensions are Real(continuous). It defaults to `"sampling"` for all other cases. If search is set to `"sampling"`, `n_points` are sampled randomly and the Gaussian Process prior is updated with the point that gives the best acquisition value over the Gaussian prior. If search is set to `"lbfgs"`, then a point is sampled randomly, and lbfgs is run for 10 iterations optimizing the acquisition function over the Gaussian prior. * `n_calls` [int, default=100]: Number of calls to `func`. * `n_points` [int, default=500]: Number of points to sample to determine the next "best" point. Useless if search is set to `"lbfgs"`. * `n_random_starts` [int, default=10]: Number of evaluations of `func` with random initialization points before approximating the `func` with `base_estimator`. * `n_restarts_optimizer` [int, default=10]: The number of restarts of the optimizer when `search` is `"lbfgs"`. * `x0` [list, list of lists or `None`]: Initial input points. - If it is a list of lists, use it as a list of input points. - If it is a list, use it as a single initial input point. - If it is `None`, no initial input points are used. * `y0` [list, scalar or `None`] Evaluation of initial input points. - If it is a list, then it corresponds to evaluations of the function at each element of `x0` : the i-th element of `y0` corresponds to the function evaluated at the i-th element of `x0`. - If it is a scalar, then it corresponds to the evaluation of the function at `x0`. - If it is None and `x0` is provided, then the function is evaluated at each element of `x0`. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [list]: location of the minimum. - `fun` [float]: function value at the minimum. - `models`: surrogate models used for each iteration. - `x_iters` [list of lists]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. - `space` [Space]: the optimization space. - `specs` [dict]`: the call specifications. - `rng` [RandomState instance]: State of the random state at the end of minimization. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ # Save call args specs = {"args": copy.copy(inspect.currentframe().f_locals), "function": inspect.currentframe().f_code.co_name} # Check params rng = check_random_state(random_state) space = Space(dimensions) # Default GP if base_estimator is None: base_estimator = GaussianProcessRegressor( kernel=(ConstantKernel(1.0, (0.01, 1000.0)) * Matern(length_scale=np.ones(space.transformed_n_dims), length_scale_bounds=[(0.01, 100)] * space.transformed_n_dims, nu=2.5)), normalize_y=True, alpha=alpha, random_state=random_state) # Initialize with provided points (x0 and y0) and/or random points if x0 is None: x0 = [] elif not isinstance(x0[0], list): x0 = [x0] if not isinstance(x0, list): raise ValueError("`x0` should be a list, but got %s" % type(x0)) n_init_func_calls = len(x0) if y0 is not None else 0 n_total_init_calls = n_random_starts + n_init_func_calls if n_total_init_calls <= 0: # if x0 is not provided and n_random_starts is 0 then # it will ask for n_random_starts to be > 0. raise ValueError( "Expected `n_random_starts` > 0, got %d" % n_random_starts) if n_calls < n_total_init_calls: raise ValueError( "Expected `n_calls` >= %d, got %d" % (n_total_init_calls, n_calls)) if y0 is None and x0: y0 = [func(x) for x in x0] elif x0: if isinstance(y0, Iterable): y0 = list(y0) elif isinstance(y0, numbers.Number): y0 = [y0] else: raise ValueError( "`y0` should be an iterable or a scalar, got %s" % type(y0)) if len(x0) != len(y0): raise ValueError("`x0` and `y0` should have the same length") if not all(map(np.isscalar, y0)): raise ValueError( "`y0` elements should be scalars") else: y0 = [] Xi = x0 + space.rvs(n_samples=n_random_starts, random_state=rng) yi = y0 + [func(x) for x in Xi[len(x0):]] if np.ndim(yi) != 1: raise ValueError("`func` should return a scalar") if search == "auto": if space.is_real: search = "lbfgs" else: search = "sampling" elif search not in ["lbfgs", "sampling"]: raise ValueError( "Expected search to be 'lbfgs', 'sampling' or 'auto', " "got %s" % search) # Bayesian optimization loop models = [] n_model_iter = n_calls - n_total_init_calls for i in range(n_model_iter): gp = clone(base_estimator) with warnings.catch_warnings(): warnings.simplefilter("ignore") gp.fit(space.transform(Xi), yi) models.append(gp) if search == "sampling": X = space.transform(space.rvs(n_samples=n_points, random_state=rng)) values = _gaussian_acquisition( X=X, model=gp, y_opt=np.min(yi), method=acq, xi=xi, kappa=kappa) next_x = X[np.argmin(values)] elif search == "lbfgs": best = np.inf for j in range(n_restarts_optimizer): x0 = space.transform(space.rvs(n_samples=1, random_state=rng))[0] with warnings.catch_warnings(): warnings.simplefilter("ignore") x, a, _ = fmin_l_bfgs_b( _acquisition, x0, args=(gp, np.min(yi), acq, xi, kappa), bounds=space.transformed_bounds, approx_grad=True, maxiter=20) if a < best: next_x, best = x, a next_x = space.inverse_transform(next_x.reshape((1, -1)))[0] next_y = func(next_x) Xi.append(next_x) yi.append(next_y) # Pack results res = OptimizeResult() best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = np.array(yi) res.x_iters = Xi res.models = models res.space = space res.random_state = rng res.specs = specs return res
def solve(self): """ Runs the DifferentialEvolutionSolver. Returns ------- res : OptimizeResult The optimization result represented as a ``OptimizeResult`` object. Important attributes are: ``x`` the solution array, ``success`` a Boolean flag indicating if the optimizer exited successfully and ``message`` which describes the cause of the termination. See `OptimizeResult` for a description of other attributes. If `polish` was employed, and a lower minimum was obtained by the polishing, then OptimizeResult also contains the ``jac`` attribute. """ nfev, nit, warning_flag = 0, 0, False status_message = _status_message['success'] # calculate energies to start with parameters = np.zeros_like(self.population, order='F') for index, candidate in enumerate(self.population): parameters[index, :] = self._scale_parameters(candidate) self.population_energies[:] = self.evaluate_func(parameters) nfev += self.num_population_members # put the lowest energy into the best solution position. minval = np.argmin(self.population_energies) self._swap_best(minval) if warning_flag: return OptimizeResult(x=self.x, fun=self.population_energies[0], nfev=nfev, nit=nit, message=status_message, success=(warning_flag is not True)) # do the optimisation. trials = np.zeros_like(self.population, order='F') for nit in range(1, self.maxiter + 1): if self.dither is not None: self.scale = self.random_number_generator.rand() * ( self.dither[1] - self.dither[0]) + self.dither[0] # Unlike the standard DE, all the trials are created first and later # evaluated simultaneously. for index in range(self.num_population_members): # create a trial solution trials[index][:] = self._mutate(index) # ensuring that it's in the range [0, 1) self._ensure_constraint(trials[index]) # scale from [0, 1) to the actual parameter value parameters[index][:] = self._scale_parameters(trials[index]) # determine the energy of the objective function energies = self.evaluate_func(parameters) nfev += self.num_population_members # if the energy of the trial candidate is lower than the # original population member then replace it for index in range(self.num_population_members): if energies[index] < self.population_energies[index]: self.population[index] = trials[index] self.population_energies[index] = energies[index] # if the trial candidate also has a lower energy than the # best solution then replace that as well minval = np.argmin(self.population_energies) self._swap_best(minval) # stop when the fractional s.d. of the population is less than tol # of the mean energy convergence = ( np.std(self.population_energies) / np.abs(np.mean(self.population_energies) + _MACHEPS)) if self.disp: print("differential_evolution step %d: f(x)= %g" % (nit, self.population_energies[0])) if self.callbacks: for callback in self.callbacks: callback(step=nit, parameter=self.x, cost=self.population_energies[0]) if (self.earlystop and self.earlystop( self.x, convergence=self.tol / convergence) is True): warning_flag = True status_message = ('earlystop function requested stop early ' 'by returning True') break if convergence < self.tol or warning_flag: break else: status_message = _status_message['maxiter'] warning_flag = True DE_result = OptimizeResult(x=self.x, fun=self.population_energies[0], nfev=nfev, nit=nit, message=status_message, success=(warning_flag is not True)) if self.polish: result = minimize(self.func, np.copy(DE_result.x), method='L-BFGS-B', bounds=self.limits.T, args=self.args) nfev += result.nfev DE_result.nfev = nfev if result.fun < DE_result.fun: DE_result.fun = result.fun DE_result.x = result.x DE_result.jac = result.jac # to keep internal state consistent self.population_energies[0] = result.fun self.population[0] = self._unscale_parameters(result.x) return DE_result
def optimize_stiefel(func, X0, args=(), tau_max=.5, max_it=1, tol=1e-6, disp=False, tau_find_freq=100): """ Optimize a function over a Stiefel manifold. :param func: Function to be optimized :param X0: Initial point for line search :param tau_max: Maximum step size :param max_it: Maximum number of iterations :param tol: Tolerance criteria to terminate line search :param disp: Choose whether to display output :param args: Extra arguments passed to the function """ tol = float(tol) assert tol > 0, 'Tolerance must be positive' max_it = int(max_it) assert max_it > 0, 'The maximum number of iterations must be a positive '\ + 'integer' tau_max = float(tau_max) assert tau_max > 0, 'The parameter `tau_max` must be positive.' k = 0 X = X0.copy() nit = 0 nfev = 0 success = False if disp: print 'Stiefel Optimization'.center(80) print '{0:4s} {1:11s} {2:5s}'.format('It', 'F', '(F - F_old) / F_old') print '-' * 30 ls_func = LSFunc() ls_func.func = func decrease_tau = False tau_max0 = tau_max while nit <= max_it: nit += 1 F, G = func(X, *args) F_old = F nfev += 1 A = compute_A(G, X) ls_func.A = A ls_func.X = X ls_func.func_args = args ls_func.tau_max = tau_max increased_tau = False if nit == 1 or decrease_tau or nit % tau_find_freq == 0: # Need to minimize ls_func with respect to each argument tau_init = np.linspace(-10, 0., 3)[:, None] tau_d = np.linspace(-10, 0., 50)[:, None] tau_all, F_all = pybgo.minimize(ls_func, tau_init, tau_d, fixed_noise=1e-16, add_at_least=1, tol=1e-2, scale=True, train_every=1)[:2] nfev += tau_all.shape[0] idx = np.argmin(F_all) tau = np.exp(tau_all[idx, 0]) * tau_max if tau_max - tau <= 1e-6: tau_max = 1.2 * tau_max if disp: print 'increasing tau_max to {0:1.5e}'.format(tau_max) increased_tau = True if decrease_tau: tau_max = .8 * tau_max if disp: print 'decreasing max_tau to {0:1.5e}'.format(tau_max) decrease_tau = False F = F_all[idx, 0] else: F = ls_func([np.log(tau / tau_max)]) delta_F = (F_old - F) / np.abs(F_old) if delta_F < 0: if disp: print '*** backtracking' nit -= 1 decrease_tau = True continue X_old = X X = Y_func(tau, X, A) if disp: print '{0:4s} {1:1.5e} {2:5e} tau = {3:1.3e}, tau_max = {4:1.3e}'.format( str(nit).zfill(4), F, delta_F, tau, tau_max) if delta_F <= tol: if disp: print '*** Converged ***' success = True break res = OptimizeResult() res.tau_max = tau_max res.X = X res.nfev = nfev res.nit = nit res.fun = F res.success = success return res
def optimize_minimize_mhmcmc_cluster(objective, bounds, args=(), x0=None, T=1, N=3, burnin=100000, maxiter=1000000, target_ar=0.4, ar_tolerance=0.05, cluster_eps=DEFAULT_CLUSTER_EPS, rnd_seed=None, collect_samples=None, logger=None): """ Minimize objective function and return up to N local minima solutions. :param objective: Objective function to minimize. Takes unpacked args as function call arguments and returns a float. :type objective: Callable(\*args) -> float :param bounds: Bounds of the parameter space. :type bounds: scipy.optimize.Bounds :param args: Any additional fixed parameters needed to completely specify the objective function. :type args: tuple or list :param x0: Initial guess. If None, will be selected randomly and uniformly within the parameter bounds. :type x0: numpy.array with same shape as elements of bounds :param T: The "temperature" parameter for the accept or reject criterion. To sample the domain well, should be in the order of the typical difference in local minima objective valuations. :type T: float :param N: Maximum number of minima to return :type N: int :param burnin: Number of random steps to discard before starting to accumulate statistics. :type burnin: int :param maxiter: Maximum number of steps to take (including burnin). :type maxiter: int :param target_ar: Target acceptance rate of point samples generated by stepping. :type target_ar: float between 0 and 1 :param ar_tolerance: Tolerance on the acceptance rate before actively adapting the step size. :type ar_tolerance: float :param cluster_eps: Point proximity tolerance for DBSCAN clustering, in normalized bounds coordinates. :type cluster_eps: float :param rnd_seed: Random seed to force deterministic behaviour :type rnd_seed: int :param collect_samples: If not None and integral type, collect collect_samples at regular intervals and return as part of solution. :type collect_samples: int or NoneType :param logger: Logger instance for outputting log messages. :return: OptimizeResult containing solution(s) and solver data. :rtype: scipy.optimize.OptimizeResult with additional attributes """ @call_counter def obj_counted(*args): return objective(*args) # end func assert maxiter >= 2 * burnin, "maxiter {} should be at least twice burnin steps {}".format( maxiter, burnin) main_iter = maxiter - burnin if collect_samples is not None: assert isinstance(collect_samples, int), "collect_samples expected to be integral type" assert collect_samples > 0, "collect_samples expected to be positive" # end if beta = 1.0 / T if rnd_seed is None: rnd_seed = int(time.time() * 1000) % (1 << 31) # end if np.random.seed(rnd_seed) if logger: logger.info('Using random seed {}'.format(rnd_seed)) # end if x0 is None: x0 = np.random.uniform(bounds.lb, bounds.ub) # end if assert np.all((x0 >= bounds.lb) & (x0 <= bounds.ub)) x = x0.copy() funval = obj_counted(x, *args) # Set up stepper with adaptive acceptance rate stepper = BoundedRandNStepper(bounds) stepper = AdaptiveStepsize(stepper, accept_rate=target_ar, ar_tolerance=ar_tolerance, interval=50) # ------------------------------- # DO BURN-IN rejected_randomly = 0 accepted_burnin = 0 tracked_range = tqdm(range(burnin), total=burnin, desc='BURN-IN') if logger: stepper.logger = lambda msg: tracked_range.write(logger.name + ':' + msg) else: stepper.logger = tracked_range.write # end if for _ in tracked_range: x_new = stepper(x) funval_new = obj_counted(x_new, *args) log_alpha = -(funval_new - funval) * beta if log_alpha > 0 or np.log(np.random.rand()) <= log_alpha: x = x_new funval = funval_new stepper.notify_accept() accepted_burnin += 1 elif log_alpha <= 0: rejected_randomly += 1 # end if # end for ar = float(accepted_burnin) / burnin if logger: logger.info("Burn-in acceptance rate: {}".format(ar)) # end if # ------------------------------- # DO MAIN LOOP if collect_samples is not None: nsamples = min(collect_samples, main_iter) sample_cadence = main_iter / nsamples samples = np.zeros((nsamples, len(x))) samples_fval = np.zeros(nsamples) # end if accepted = 0 rejected_randomly = 0 minima_sorted = SortedList( key=lambda rec: rec[1]) # Sort by objective function value hist = HistogramIncremental(bounds, nbins=100) # Cached a lot of potential minimum values, as these need to be clustered before return N results N_cached = int(np.ceil(N * main_iter / 500)) next_sample = 0.0 sample_count = 0 tracked_range = tqdm(range(main_iter), total=main_iter, desc='MAIN') if logger: stepper.logger = lambda msg: tracked_range.write(logger.name + ':' + msg) else: stepper.logger = tracked_range.write # end if for i in tracked_range: if collect_samples and i >= next_sample: assert sample_count < collect_samples samples[sample_count] = x samples_fval[sample_count] = funval sample_count += 1 next_sample += sample_cadence # end if x_new = stepper(x) funval_new = obj_counted(x_new, *args) log_alpha = -(funval_new - funval) * beta if log_alpha > 0 or np.log(np.random.rand()) <= log_alpha: x = x_new funval = funval_new minima_sorted.add((x, funval)) if len(minima_sorted) > N_cached: minima_sorted.pop() # end if stepper.notify_accept() hist += x accepted += 1 elif log_alpha <= 0: rejected_randomly += 1 # end if # end for stepper.logger = None ar = float(accepted) / main_iter if logger: logger.info("Acceptance rate: {}".format(ar)) logger.info("Best minima (before clustering):\n{}".format( np.array([_mx[0] for _mx in minima_sorted[:10]]))) # end if # ------------------------------- # Cluster minima and associate each cluster with a local minimum. # Using a normalized coordinate space for cluster detection. x_range = bounds.ub - bounds.lb pts = np.array([x[0] for x in minima_sorted]) fvals = np.array([x[1] for x in minima_sorted]) pts_norm = (pts - bounds.lb) / x_range _, labels = dbscan(pts_norm, eps=cluster_eps, min_samples=21, n_jobs=-1) # Compute mean of each cluster and evaluate objective function at cluster mean locations. minima_candidates = [] for grp in range(max(labels) + 1): mask = (labels == grp) mean_loc = np.mean(pts[mask, :], axis=0) # Evaluate objective function precisely at the mean location of each cluster fval = obj_counted(mean_loc, *args) minima_candidates.append((mean_loc, grp, fval)) # end for # Rank minima locations by objective function. minima_candidates.sort(key=lambda c: c[2]) # Pick up to N solutions solutions = minima_candidates[:N] # Put results into OptimizeResult container. # Add histograms to output result (in form of scipy.stats.rv_histogram) solution = OptimizeResult() solution.x = np.array([s[0] for s in solutions]) solution.clusters = [pts[(labels == s[1])] for s in solutions] solution.cluster_funvals = [fvals[(labels == s[1])] for s in solutions] solution.bins = hist.bins solution.distribution = hist.histograms solution.acceptance_rate = ar solution.success = True solution.status = 0 if len(solutions) > 0: solution.message = 'SUCCESS: Found {} local minima'.format( len(solutions)) else: solution.message = 'WARNING: Found no clusters within tolerance {}'.format( cluster_eps) # end if solution.fun = np.array([s[2] for s in solutions]) solution.jac = None solution.nfev = obj_counted.counter solution.njev = 0 solution.nit = main_iter solution.maxcv = None solution.samples = samples if collect_samples else None solution.sample_funvals = samples_fval if collect_samples else None solution.bounds = bounds solution.version = 's0.3' # Solution version for future traceability solution.rnd_seed = rnd_seed return solution
def solve(self): """ Runs the DifferentialEvolutionSolver. Returns ------- res : OptimizeResult The optimization result represented as a ``OptimizeResult`` object. Important attributes are: ``x`` the solution array, ``success`` a Boolean flag indicating if the optimizer exited successfully and ``message`` which describes the cause of the termination. See `OptimizeResult` for a description of other attributes. If polish was employed, then OptimizeResult also contains the ``hess_inv`` and ``jac`` attributes. """ nfev, nit, warning_flag = 0, 0, False status_message = _status_message['success'] # calculate energies to start with for index, candidate in enumerate(self.population): parameters = self._scale_parameters(candidate) self.population_energies[index] = self.func(parameters, *self.args) nfev += 1 if nfev > self.maxfun: warning_flag = True status_message = _status_message['maxfev'] break minval = np.argmin(self.population_energies) # put the lowest energy into the best solution position. lowest_energy = self.population_energies[minval] self.population_energies[minval] = self.population_energies[0] self.population_energies[0] = lowest_energy self.population[[0, minval], :] = self.population[[minval, 0], :] if warning_flag: return OptimizeResult( x=self.x, fun=self.population_energies[0], nfev=nfev, nit=nit, message=status_message, success=(warning_flag != True)) # do the optimisation. for nit in range(1, self.maxiter + 1): if self.dither is not None: self.scale = self.random_number_generator.rand( ) * (self.dither[1] - self.dither[0]) + self.dither[0] for candidate in range(np.size(self.population, 0)): if nfev > self.maxfun: warning_flag = True status_message = _status_message['maxfev'] break trial = self._mutate(candidate) self._ensure_constraint(trial) parameters = self._scale_parameters(trial) energy = self.func(parameters, *self.args) nfev += 1 if energy < self.population_energies[candidate]: self.population[candidate] = trial self.population_energies[candidate] = energy if energy < self.population_energies[0]: self.population_energies[0] = energy self.population[0] = trial # stop when the fractional s.d. of the population is less than tol # of the mean energy convergence = (np.std(self.population_energies) / np.abs(np.mean(self.population_energies) + _MACHEPS)) if self.disp: print("differential_evolution step %d: f(x)= %g" % (nit, self.population_energies[0])) if (self.callback and self.callback(self._scale_parameters(self.population[0]), convergence=self.tol / convergence) is True): warning_flag = True status_message = ('callback function requested stop early ' 'by returning True') break if convergence < self.tol or warning_flag: break else: status_message = _status_message['maxiter'] warning_flag = True DE_result = OptimizeResult( x=self.x, fun=self.population_energies[0], nfev=nfev, nit=nit, message=status_message, success=(warning_flag != True)) if self.polish: result = minimize(self.func, np.copy(DE_result.x), method='L-BFGS-B', bounds=self.limits.T, args=self.args) nfev += result.nfev DE_result.nfev = nfev if result.fun < DE_result.fun: DE_result.fun = result.fun DE_result.x = result.x DE_result.jac = result.jac # to keep internal state consistent self.population_energies[0] = result.fun self.population[0] = self._unscale_parameters(result.x) return DE_result
def solve(self): """ Runs the DifferentialEvolutionSolver. Returns ------- res : OptimizeResult The optimization result represented as a ``OptimizeResult`` object. Important attributes are: ``x`` the solution array, ``success`` a Boolean flag indicating if the optimizer exited successfully and ``message`` which describes the cause of the termination. See `OptimizeResult` for a description of other attributes. If polish was employed, then OptimizeResult also contains the ``hess_inv`` and ``jac`` attributes. """ #nit = self.niter start_time = self.time warning_flag = False if time.time()-start_time > self.maxtime and self.maxtime is not None : # result = {'population':self.population, # 'population_energies':self.population_energies, # 'niter' : nit, # 'message': 'Maximum time has been exceeded.', # 'success' : False } result = OptimizeResult( population = self.population, population_energies = self.population_energies, nit = self.niter, message = 'Maximum time has been exceeded.', success = False ) return result status_message = _status_message['success'] #print(self.population_energies[0]) # do the optimisation. for nit in range(self.niter, self.maxiter + 1): population_count = np.size(self.population, 0) if self.dither is not None: self.scale = self.random_number_generator.rand( ) * (self.dither[1] - self.dither[0]) + self.dither[0] Parameters=[] Trials=[] for candidate in range(population_count): trial = self._mutate(candidate) self._ensure_constraint(trial) Trials.append(trial) Parameters.append( self._scale_parameters(trial) ) pool=multiprocessing.Pool(self.ncore) Energies = pool.map(self.func, Parameters) pool.close() pool.join() iNan = [] for i in range(population_count): if self.population_energies[i] != self.population_energies[i]: iNan.append(i) self.population_energies = np.delete(self.population_energies,iNan) self.population = np.delete(self.population,iNan,0) population_count = np.size(self.population, 0) for candidate in range(population_count): if Energies[candidate] < self.population_energies[candidate]: self.population[candidate] = Trials[candidate] self.population_energies[candidate] = Energies[candidate] if Energies[candidate] < self.population_energies[0]: self.population_energies[0] = Energies[candidate] self.population[0] = Trials[candidate] # stop when the fractional s.d. of the population is less than tol # of the mean energy convergence = (np.std(self.population_energies) / np.abs(np.mean(self.population_energies) + _MACHEPS)) if self.disp: print("differential_evolution step %d: f(x)= %g" % (nit, self.population_energies[0])) print("total population at step %d is %d" %(nit, population_count ) ) if (self.callback and self.callback(self._scale_parameters(self.population[0]), convergence=self.tol / convergence) is True): warning_flag = True status_message = ('callback function requested stop early ' 'by returning True') break if convergence < self.tol or warning_flag: break if time.time()-start_time > self.maxtime and self.maxtime is not None : result = OptimizeResult( population = self.population, population_energies = self.population_energies, nit = self.niter, message = 'Maximum time has been exceeded.', success = False ) return result else: status_message = _status_message['maxiter'] warning_flag = True DE_result = OptimizeResult( x=self.x, fun=self.population_energies[0], nit=nit, message=status_message, success=(warning_flag != True)) if self.polish: result = minimize(self.func, np.copy(DE_result.x), method='L-BFGS-B', bounds=self.limits.T) if result.fun < DE_result.fun: DE_result.fun = result.fun DE_result.x = result.x DE_result.jac = result.jac # to keep internal state consistent self.population_energies[0] = result.fun self.population[0] = self._unscale_parameters(result.x) return DE_result
def _gensa_modified(func, x0, bounds, maxiter=500, initial_temp=5230., visit=2.62, accept=-5.0, maxfun=1e7, args=(), seed=None, pure_sa=False): """Extension of the gensa function available the pygensa package at https://github.com/sgubianpm/pygensa This function is an extension of the function gensa defined in the package pygensa at https://github.com/sgubianpm/pygensa. The only difference with the existing version and _gensa_modified is that it allows the user to pass lower bounds and upper bounds with equal values. Though this is a trivial scenario in which case the optimal solution should be lower = upper, the current version of gensa crashes. :param fun : callable The objective function :param x0 : ndarray The starting coordinates. :param bounds : sequence Bounds for variables. ``(min, max)`` pairs for each element in ``x``, defining the lower and upper bounds for the optimizing argument of `func`. It is required to have ``len(bounds) == len(x)``. ``len(bounds)`` is used to determine the number of parameters in ``x``. :param args : tuple, optional Any additional fixed parameters needed to completely specify the objective function. :param seed : int or `np.random.RandomState`, optional If `seed` is not specified the `np.RandomState` singleton is used. If `seed` is an int, a new `np.random.RandomState` instance is used, seeded with seed. If `seed` is already a `np.random.RandomState instance`, then that `np.random.RandomState` instance is used. Specify `seed` for repeatable minimizations. The random numbers generated with this seed only affect the visiting distribution function and new coordinates generation. :param temp_start : float, optional The initial temperature, use higher values to facilitates a wider search of the energy landscape, allowing gensa to escape local minima that it is trapped in. :param qv : float, optional Parameter for visiting distribution. Higher values give the visiting distribution a heavier tail, this makes the algorithm jump to a more distant region. The value range is (0, 3] :param qa : float, optional Parameter for acceptance distribution. It is used to control the probability of acceptance. The lower the acceptance parameter, the smaller the probability of acceptance. It has to be any negative value. :param maxfun : int, optional Soft limit for the number of objective function calls. If the algorithm is in the middle of a local search, this number will be exceeded, the algorithm will stop just after the local search is done. :param maxsteps: int, optional The maximum number of gensa iterations will perform. :return: :Example: from robust_tail.gensa_modified import _gensa_modified from pygensa.gensa import gensa # Test gensa_modified in the univariate case def f1(x): return x output = _gensa_modified(func = f1,x0 = None, bounds = [[1,1]]) output gensa(func = f1,x0 = None, bounds = [[1,1]]) # This crashes # Test gensa_modified in the bivariate case with one lower bound equal to the upper bound def f2(x): return x[0] + x[1] output = _gensa_modified(func = f2,x0 = None,bounds = [[1,1],[1,3]]) output gensa(func = f2,x0 = None,bounds = [[1,1],[1,3]]) # This crashes # Test gensa_modified in the bivariate case with both lower bound are equal to the upper bounds output = _gensa_modified(func = f2,x0 = None,bounds = [[1,1],[1,1]]) output gensa(func = f2,x0 = None,bounds = [[1,1],[1,1]]) # This crashes # Let's check that when the lower bounds are strictly smaller than the upper bounds, # all goes well) output_gensa_modified = _gensa_modified(func = f2,x0 = None,bounds=[[1,2],[2,3]]) output_gensa = gensa(func = f2,x0 = None,bounds=[[1,2],[2,3]]) # Check that they have the same optimal solution all(output_gensa_modified.x == output_gensa.x) # Check that they have the same optimal objective value output_gensa_modified.fun == output_gensa.fun """ # Check necessary conditions to run gensa_modified if not isfunction(func) or func is None: print("func has to be function.") return None for bound in bounds: if len(bound) != 2: print("Each parameter needs a lower and upper bounds") return None # If all lower bounds are different from the upper bounds, # run the usual gensa algorithm samebound = [x[0] == x[1] for x in bounds] lower_bound = [x[0] for x in bounds] if not any(samebound): output = gensa(func, x0, bounds, maxiter, initial_temp, visit, accept, maxfun, args, seed, pure_sa) else: index = np.where(samebound) def _new_func(new_x=None): if new_x is not None: new_x_copy = list(new_x) x = np.array([ bound[0] if bound[0] == bound[1] else new_x_copy.pop(0) for bound in bounds ]) else: x = np.array([bound[0] for bound in bounds]) output = func(x) return output if all(samebound): output = OptimizeResult() output.x = lower_bound output.fun = _new_func() return output new_bounds = [bound for bound in bounds if bound[0] != bound[1]] if x0 is not None: new_x0 = np.array([ x0[i] for i in range(0, len(bounds)) if bounds[i][0] != bounds[i][1] ]) else: new_x0 = None output = gensa(_new_func, new_x0, new_bounds, maxiter, initial_temp, visit, accept, maxfun, args, seed, pure_sa) # The output vector par of gensa will be of the same dimension as the new_lower bound # we need to include the value of lower_bound that was discarded in "par" if isinstance(OptimizeResult(), OptimizeResult): output_x_copy = list(output.x[:]) output.x = [ bound[0] if bound[0] == bound[1] else output_x_copy.pop(0) for bound in bounds ] return output
def gbrt_minimize(func, bounds, base_estimator=None, maxiter=100, n_points=20, n_start=10, random_state=None): """Sequential optimisation using gradient boosted trees. Gradient boosted regression trees are used to model the (very) expensive to evaluate function `func`. The model is improved by sequentially evaluating the expensive function at the next best point. Thereby finding the minimum of `func` with as few evaluations as possible. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `bounds` [array-like, shape=(n_parameters, 2)]: - ``bounds[i][0]`` should give the lower bound of each parameter and - ``bounds[i][1]`` should give the upper bound of each parameter. * `base_estimator` [`GradientBoostingQuantileRegressor`]: The regressor to use as surrogate model * `maxiter` [int, default=100]: Number of iterations used to find the minimum. This corresponds to the total number of evaluations of `func`. If `n_start` > 0 only `maxiter - n_start` iterations are used. * `n_start` [int, default=10]: Number of random points to draw before fitting `base_estimator` for the first time. If `n_start > maxiter` this degrades to a random search for the minimum. * `n_points` [int, default=20]: Number of points to sample when minimizing the acquisition function. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [float]: location of the minimum. - `fun` [float]: function value at the minimum. - `models`: surrogate models used for each iteration. - `x_iters` [array]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ rng = check_random_state(random_state) # Bounds num_params = len(bounds) lower_bounds, upper_bounds = extract_bounds(bounds) # Default estimator if base_estimator is None: base_estimator = GradientBoostingQuantileRegressor(random_state=rng) # Record the points and function values evaluated as part of # the minimization Xi = np.zeros((maxiter, num_params)) yi = np.zeros(maxiter) # Initialize with random points if n_start == 0: raise ValueError("Need at least one starting point.") if maxiter == 0: raise ValueError("Need to perform at least one iteration.") n_start = min(n_start, maxiter) Xi[:n_start] = _random_points(lower_bounds, upper_bounds, n_points=n_start, random_state=rng) best_x = Xi[:n_start].ravel() yi[:n_start] = [func(xi) for xi in Xi[:n_start]] best_y = np.min(yi[:n_start]) models = [] for i in range(n_start, maxiter): rgr = clone(base_estimator) # only the first i points are meaningful rgr.fit(Xi[:i, :], yi[:i]) models.append(rgr) # `rgr` predicts constants for each leaf which means that the EI # has zero gradient over large distances. As a result we can not # use gradient based optimisers like BFGS, use random sampling # for the moment. x0 = _random_points(lower_bounds, upper_bounds, n_points=n_points, random_state=rng) aq = _expected_improvement(x0, rgr, best_y) best = np.argmin(aq) Xi[i] = x0[best].ravel() yi[i] = func(x0[best]) if yi[i] < best_y: best_y = yi[i] best_x = Xi[i] res = OptimizeResult() res.x = best_x res.fun = best_y res.func_vals = yi res.x_iters = Xi res.models = models return res
def _sequential_random_embeddings(f, x0, bounds, n_reduced_dims_eff=3, n_embeddings=10, verbosity=1, **optimizer_kwargs): """ Implementation of the Sequential Random Embeddings algorithm described in +++++ H. Qian, Y.-Q. Hu, and Y. Yu, Derivative-Free Optimization of High-Dimensional Non-Convex Functions by Sequential Random Embeddings, Proceedings of the Twenty-Fifth International Joint Conference on Artificial Intelligence, AAAI Press (2016). +++++ The idea is basically to reduce high-dimensional problems to low-dimensional ones by embedding the original, high-dimensional search space ℝ^h into a low dimensional one, ℝ^l, by sequentially applying the random linear transformation x(n+1) = α(n+1)x(n) + A•y(n+1), x ∈ ℝ^h, y ∈ ℝ^l, A ∈ N(0, 1)^(h×l), α ∈ ℝ and minimizing the objective function f(αx + A•y) w.r.t. (α, y). :param f: [callable] Objective function. Must accept its argument x as numpy array :param x0: [np.array] Initial values for the bacteria population in the original, high-dimensional space ℝ^h :param bounds: [callable] Bounds projection, see description of parameter ``projection_callback`` in :func:`local_search.bfgs_b` :param n_reduced_dims_eff: [int] Effective dimension of the embedded problem, ℝ^(l+1) :param n_embeddings: [int] Number of embedding iterations :param verbosity: [int] Output verbosity. Must be 0, 1, or 2 :param optimizer_args: [dict] Arguments to pass to the actual optimization routine :return: Best minimum of f found [scipy.optimize.OptimizeResult] """ assert verbosity in [0, 1, 2], 'verbosity must be 0, 1, or 2.' orig_dim = x0.shape[1] x = np.zeros(orig_dim) x_best = x.copy() f_best = np.inf nfev = nit = 0 success_best = False for i in range(n_embeddings): A = np.random.normal(size=(orig_dim, n_reduced_dims_eff - 1)) # Normalize rows of A normalization_sum = A.sum(axis=1) normalization_sum = np.where(normalization_sum == 0, 1, normalization_sum) A = A / normalization_sum[:, np.newaxis] def f_embedded(arg): return f(bounds(arg[0] * x + A.dot(arg[1:]))[0]) # Set up bounds callback def bounds_embedded(arg): bounds_hit = np.zeros(len(arg), dtype=bool) x_proj, bounds_hit_orig = bounds(arg[0] * x + A.dot(arg[1:])) if bounds_hit_orig.any( ): # Boundary hit in original, non-embedded variable arg[1:] = np.linalg.lstsq(A, x_proj - arg[0] * x, rcond=None)[0] bounds_hit[1:] = (A[bounds_hit_orig] != 0).any(axis=0) return arg, bounds_hit # Set up y0 y0 = np.zeros((x0.shape[0], n_reduced_dims_eff)) y0[:, 0] = 1 y0[:, 1:] = np.array( [np.linalg.lstsq(A, x_orig - x, rcond=None)[0] for x_orig in x0]) if verbosity > 0: infoMsg = f'\nEmbedding iteration {i}' print(infoMsg) print('-' * len(infoMsg)) optimizer_kwargs['verbosity'] = verbosity with warnings.catch_warnings(): warnings.filterwarnings( 'ignore', message= 'Found initial conditions outside the defined search domain.') res_embedded = optimize(f_embedded, x0=y0, bounds=bounds_embedded, **optimizer_kwargs) y = res_embedded.x f_val = res_embedded.fun nfev += res_embedded.nfev nit += res_embedded.nit x = bounds(y[0] * x + A.dot(y[1:]))[0] if verbosity > 0: print(f'Random embedding gave x = {x}.') if f_val < f_best: f_best = f_val x_best = x.copy() success_best = res_embedded.success result = OptimizeResult() result.success = success_best result.x = x_best result.fun = f_best result.nfev = nfev result.nit = nit result.trace = None return result
def optimize(f, x0=None, bounds=None, domain_scale=None, init='uniform', stepsize_start=None, stepsize_decay_fac=1e-3, base_tumble_rate=0.1, niter_rt=400, n_bacteria_per_dim=3, stationarity_window=20, eps_stat=1e-3, attraction=False, attraction_window=10, attraction_sigma=None, attraction_strength=0.5, bounds_reflection=False, n_best_selection=3, c_gd=1e-6, a_gd=None, n_linesearch_gd=20, alpha_linesearch_gd=0.5, beta_linesearch_gd=0.33, eps_abs_gd=1e-9, eps_rel_gd=1e-6, niter_gd=100, n_embeddings=5, max_dims=3, n_reduced_dims=2, verbosity=0): """ Metaheuristic global optimization algorithm combining a bacterial run-and-tumble chemotactic search with a local, gradient-based search around the best minimum candidate points. The algorithm's goal is to find min f(x), x ∈ Ω, where f: Ω ⊂ ℝ^n → ℝ. Since the chemotactic search becomes more and more ineffective with increasing problem dimensionality, Sequential Random Embeddings are used to solve the optimization problem once its dimensionality exceeds a given threshold. :param f: [callable] Objective function. Must accept its argument x as numpy array :param x0: [array-like object] Optional initial conditions object. Must have the shape (n_bacteria, n_dims) or (n_dims,). If x0 == None, initial conditions are sampled randomly or uniformly-spaced from Ω. Note that this only works if Ω is a rectangular box, i.e., if no or non-rectangular bounds are imposed, x0 must not be None :param bounds: [callable or array-like object] Defines the bounded domain Ω. If provided, must be one of the following: - Bounds projection callback, as defined in description of parameter ``projection_callback`` in :func:`local_search.bfgs_b` - Rectangular box constraints. For each component x_i of x, bounds[i, 0] <= x_i <= bounds[i, 1], that is, bounds must have shape (n_dims, 2) :param domain_scale: [float] Scale of the optimization problem. If not provided, the algorithm tries to guess the scale from any provided rectangular box constraints. Used for auto-scaling algorithm stepsizes :param init: [string] Determines how initial bacteria positions are sampled from Ω if x0 == None, see description of parameter ``x0``. Currently supported: 'random' and 'uniform' :param stepsize_start: [float] See description of parameter ``stepsize_start`` in :func:`global_search.run_and_tumble`. If not provided, the algorithm tries to auto-scale this length to the problem's scale :param stepsize_decay_fac: [float] Factor by which the run-and-tumble stepsize has decayed in the last run-and-tumble iteration compared to its initial value :param base_tumble_rate: [float] See description of parameter ``base_tumble_rate`` in :func:`global_search.run_and_tumble` :param niter_rt: [int] Maximum number of run-and-tumble iterations :param n_bacteria_per_dim: [int] How many bacteria to spawn in each dimension. Note that the total number of bacteria is i) n_bacteria = n_bacteria_per_dim ** n_dims if n_dims <= max_dims or ii) n_bacteria = n_bacteria_per_dim ** (n_reduced_dims + 1) if n_dims > max_dims. If x0 is provided with shape (n_bacteria, n_dims), n_bacteria should agree with this relation. :param stationarity_window: [int] See description of parameter ``stationarity_window`` in :func:`global_search.run_and_tumble` :param eps_stat: [float] See description of parameter ``stationarity_window`` in :func:`global_search.run_and_tumble` :param attraction: [bool] See description of parameter ``attraction`` in :func:`global_search.run_and_tumble` :param attraction_window: [int] See description of parameter ``attraction_window`` in :func:`global_search.run_and_tumble` :param attraction_sigma: [float] See description of parameter ``attraction_sigma`` in :func:`global_search.run_and_tumble`. If not provided, the algorithm tries to auto-scale this length to the problem's scale :param attraction_strength: [float] See description of parameter ``attraction_strength`` in :func:`global_search.run_and_tumble` :param bounds_reflection: [bool] See description of parameter ``bounds_reflection`` in :func:`global_search.run_and_tumble` :param n_best_selection: [int] At the end of the run-and-tumble exploration stage, a local gradient-based search is performed, starting from the best positions found thus far by the n_best_selection best bacteria :param c_gd: [float] See description of parameter ``c`` in :func:`local_search.bfgs_b` :param a_gd: [float] See description of parameter ``a`` in :func:`local_search.bfgs_b`. If not provided, the algorithm tries to auto-scale this length to the problem's scale :param n_linesearch_gd: [int] See description of parameter ``n_linesearch`` in :func:`local_search.bfgs_b` :param alpha_linesearch_gd: [float] See description of parameter ``alpha_linesearch`` in :func:`local_search.bfgs_b` :param beta_linesearch_gd: [float] See description of parameter ``beta_linesearch`` in :func:`local_search.bfgs_b` :param eps_abs_gd: [float] See description of parameter ``eps_abs`` in :func:`local_search.bfgs_b` :param eps_rel_gd: [float] See description of parameter ``eps_rel`` in :func:`local_search.bfgs_b` :param niter_gd: [int] Maximum number of local, gradient-based search iterations :param n_embeddings: [int] Number of embedding iterations when using Sequential Random Embeddings. Only has an effect if n_dims > max_dims :param max_dims: [int] Maximum dimension of problems to be solved without using Sequential Random Embeddings :param n_reduced_dims: [int] Dimension of the embedded problem. Only has an effect if n_dims > max_dims :param verbosity: [int] Output verbosity. Must be 0, 1, or 2 :return: Best minimum of f found [scipy.optimize.OptimizeResult] """ assert verbosity in [0, 1, 2], 'verbosity must be 0, 1, or 2.' assert n_reduced_dims >= 2, 'n_reduced_dims must not be less than 2.' n_reduced_dims_eff = n_reduced_dims + 1 if bounds is None or callable(bounds): assert x0 is not None, ( 'If no box constraints are provided for bounds, x0 must not be ' + 'None.') x0_population = _prepare_x0(x0, n_bacteria_per_dim, max_dims, n_reduced_dims_eff) n_bacteria, n_dims = x0_population.shape if bounds is None: bound_lower, bound_upper = _prepare_bounds(bounds, n_dims) def projection_callback(x): x = np.clip(x, bound_lower, bound_upper) bounds_hit = np.where( ((x == bound_lower) | (x == bound_upper)), True, False) return x, bounds_hit def projection_callback_population(x): return projection_callback(x) else: def projection_callback(x): return bounds(x) def projection_callback_population(x): out = np.array( [projection_callback(x_single) for x_single in x]) return out[:, 0], out[:, 1] elif isinstance(bounds, (list, np.ndarray)): if x0 is not None: x0_population = _prepare_x0(x0, n_bacteria_per_dim, max_dims, n_reduced_dims_eff) n_bacteria, n_dims = x0_population.shape bound_lower, bound_upper = _prepare_bounds(bounds, n_dims) else: bound_lower, bound_upper = _prepare_bounds(bounds, None) n_dims = len(bound_lower) n_bacteria = (n_bacteria_per_dim**n_dims if n_dims <= max_dims else n_bacteria_per_dim**n_reduced_dims_eff) if init == 'uniform' and n_dims > max_dims: init = 'random' if verbosity > 0: warnings.warn( 'The option init="uniform" is only available for problems with ' + 'dimensionality less than or equal to max_dims, which was ' + f'set to {max_dims}. Since the current problem has ' + f'dimensionality {n_dims}, init was automatically set to ' + f'"random".') if init == 'random': x0_population = np.random.uniform(bound_lower, bound_upper, size=(n_bacteria, n_dims)) elif init == 'uniform': init_points = [] for i in range(n_dims): init_points.append( np.linspace(bound_lower[i], bound_upper[i], n_bacteria_per_dim)) x0_population = np.array(np.meshgrid(*init_points)).reshape( n_dims, -1).T else: raise ValueError('init must either be "random" or "uniform".') def projection_callback(x): x = np.clip(x, bound_lower, bound_upper) bounds_hit = np.where(((x == bound_lower) | (x == bound_upper)), True, False) return x, bounds_hit def projection_callback_population(x): return projection_callback(x) else: raise ValueError( 'bounds must either be None, an array or corresponding nested list of ' + 'shape (n_dims, 2), or a custom callback function. See the docstring ' + 'for details.') assert niter_rt > stationarity_window, 'niter_rt must be larger than stationarity_window.' assert n_best_selection <= n_bacteria, 'n_best_selection must not be larger than n_bacteria.' if stepsize_start is not None: auto_scale_stepsize = False else: auto_scale_stepsize = True stepsize_start = 1e-1 stepsize_end = stepsize_decay_fac * stepsize_start if attraction_sigma is not None: auto_scale_attraction_sigma = False else: auto_scale_attraction_sigma = True attraction_sigma = 1 if a_gd is not None: auto_scale_a_gd = False else: auto_scale_a_gd = True a_gd = 1e-2 x0_population_orig = x0_population.copy() x0_population, _ = projection_callback_population(x0_population) if not np.array_equal(x0_population, x0_population_orig): warnings.warn( 'Found initial conditions outside the defined search domain.') max_scale = None if domain_scale is not None: max_scale = domain_scale elif isinstance(bounds, (list, np.ndarray)): # noinspection PyUnboundLocalVariable domain_range = bound_upper - bound_lower max_scale = np.max(np.where(np.isinf(domain_range), 0, domain_range)) if max_scale is not None and max_scale > 0: if auto_scale_stepsize: stepsize_start = stepsize_start * max_scale stepsize_end = stepsize_end * max_scale if auto_scale_attraction_sigma: attraction_sigma = attraction_sigma * max_scale if auto_scale_a_gd: a_gd = a_gd * max_scale if n_dims > max_dims: if verbosity > 0: print( f'Using sequential random embeddings in {n_reduced_dims} + 1 dimensions.' ) return _sequential_random_embeddings( f, x0_population, projection_callback, n_reduced_dims_eff=n_reduced_dims_eff, n_embeddings=n_embeddings, verbosity=verbosity, domain_scale=max_scale, init=init, stepsize_start=stepsize_start, stepsize_decay_fac=stepsize_decay_fac, base_tumble_rate=base_tumble_rate, niter_rt=niter_rt, n_bacteria_per_dim=n_bacteria_per_dim, stationarity_window=stationarity_window, eps_stat=eps_stat, attraction=attraction, attraction_window=attraction_window, attraction_sigma=attraction_sigma, attraction_strength=attraction_strength, bounds_reflection=bounds_reflection, n_best_selection=n_best_selection, c_gd=c_gd, a_gd=a_gd, n_linesearch_gd=n_linesearch_gd, alpha_linesearch_gd=alpha_linesearch_gd, beta_linesearch_gd=beta_linesearch_gd, eps_abs_gd=eps_abs_gd, eps_rel_gd=eps_rel_gd, niter_gd=niter_gd, max_dims=n_reduced_dims_eff) else: x_best, f_best, nfev, nit, trace = run_and_tumble( f, x0_population, projection_callback_population, niter_rt, stepsize_start, stepsize_end, base_tumble_rate=base_tumble_rate, stationarity_window=stationarity_window, eps_stat=eps_stat, attraction=attraction, attraction_window=attraction_window, attraction_sigma=attraction_sigma, attraction_strength=attraction_strength, bounds_reflection=bounds_reflection, verbosity=verbosity) if verbosity == 2: print( '===============================================================================' ) if verbosity > 0: print( f'Best result after run-and-tumble stage is x = {x_best[np.argmin(f_best)]}, ' + f'f(x) = {np.min(f_best)}. Starting local, gradient-based optimization for the ' + f'{n_best_selection} best bacteria.') sortIdx = f_best.argsort() x_best_selection = x_best[sortIdx[:n_best_selection]] x_best_gd = np.empty(x_best_selection.shape) f_min_gd = np.empty(n_best_selection) nfev_gd = 0 nit_gd = 0 success_gd = np.empty(n_best_selection) trace_gd = np.empty((niter_gd, n_bacteria, n_dims)) trace_gd[:, sortIdx[n_best_selection:], :] = trace[ -1, sortIdx[n_best_selection:], :] nit_gd_arr = np.empty(n_best_selection) visited_points = trace.reshape(-1, n_dims) for n, x_start in enumerate(x_best_selection): if verbosity == 2: print(f'Performing gradient descent for bacterium {n}.') # Calculate quadratic function approximation around x_start num_sampling_points = 2 * int(special.binom(n_dims + 2, 2)) # noinspection PyArgumentList,PyUnresolvedReferences sampling_points = visited_points[spatial.cKDTree( visited_points).query(x_start, num_sampling_points)[1]] func_values = np.array([f(point) for point in sampling_points]) nfev += num_sampling_points polynomial_powers = list( itertools.filterfalse( lambda prod: sum(list(prod)) > 2, itertools.product((0, 1, 2), repeat=n_dims))) sampling_matrix = np.stack([ np.prod(sampling_points**d, axis=1) for d in polynomial_powers ], axis=-1) coeffs = np.linalg.lstsq(sampling_matrix, func_values, 2)[0] # Calculate Hessian matrix from the quadratic approximation H = np.ones((n_dims, n_dims)) square_powers = list( itertools.filterfalse( lambda zipped_item: sum(list(zipped_item[0])) != 2, zip(polynomial_powers, coeffs))) for square_power, coeff in square_powers: idcs_to_consider = np.argwhere(np.array(square_power) != 0) if len(idcs_to_consider) == 1: # Diagonal H[idcs_to_consider[0], idcs_to_consider[0]] = 0.5 * coeff elif len(idcs_to_consider) == 2: # Mixed derivatives H[idcs_to_consider[0], idcs_to_consider[1]] = coeff H[idcs_to_consider[1], idcs_to_consider[0]] = coeff else: raise RuntimeError( "Polynomial function approximation seems to be of higher " "order than two. This shouldn't happen.") local_optimization_result = bfgs_b( f, x_start, projection_callback, H_start=H, a=a_gd, c=c_gd, niter=niter_gd, n_linesearch=n_linesearch_gd, alpha_linesearch=alpha_linesearch_gd, beta_linesearch=beta_linesearch_gd, eps_abs=eps_abs_gd, eps_rel=eps_rel_gd, verbosity=verbosity) x_best_gd[n] = local_optimization_result.x f_min_gd[n] = local_optimization_result.f nfev_gd += local_optimization_result.nfev nit_gd += local_optimization_result.nit nit_gd_arr[n] = local_optimization_result.nit success_gd[n] = local_optimization_result.success trace_gd[:, sortIdx[n], :] = _pad_trace( local_optimization_result.trace, niter_gd) result = OptimizeResult() result.success = success_gd.any() result.x = x_best_gd[np.argmin(f_min_gd)] result.fun = np.min(f_min_gd) result.nfev = nfev + nfev_gd result.nit = nit + nit_gd trace_gd = trace_gd[:np.max(nit_gd_arr).astype(int)] result.trace = np.concatenate((trace, trace_gd)) return result
def dual_annealing(func, x0, bounds, args=(), maxiter=1000, local_search_options={}, initial_temp=5230., restart_temp_ratio=2.e-5, visit=2.62, accept=-5.0, maxfun=1e7, seed=None, no_local_search=False, callback=None): """ Find the global minimum of a function using Dual Annealing. Parameters ---------- func : callable The objective function to be minimized. Must be in the form ``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array and ``args`` is a tuple of any additional fixed parameters needed to completely specify the function. x0 : ndarray, shape(n,) A single initial starting point coordinates. If ``None`` is provided, initial coordinates are automatically generated (using the ``reset`` method from the internal ``EnergyState`` class). bounds : sequence, shape (n, 2) Bounds for variables. ``(min, max)`` pairs for each element in ``x``, defining bounds for the objective function parameter. args : tuple, optional Any additional fixed parameters needed to completely specify the objective function. maxiter : int, optional The maximum number of global search iterations. Default value is 1000. local_search_options : dict, optional Extra keyword arguments to be passed to the local minimizer (`minimize`). Some important options could be: ``method`` for the minimizer method to use and ``args`` for objective function additional arguments. initial_temp : float, optional The initial temperature, use higher values to facilitates a wider search of the energy landscape, allowing dual_annealing to escape local minima that it is trapped in. Default value is 5230. Range is (0.01, 5.e4]. restart_temp_ratio : float, optional During the annealing process, temperature is decreasing, when it reaches ``initial_temp * restart_temp_ratio``, the reannealing process is triggered. Default value of the ratio is 2e-5. Range is (0, 1). visit : float, optional Parameter for visiting distribution. Default value is 2.62. Higher values give the visiting distribution a heavier tail, this makes the algorithm jump to a more distant region. The value range is (0, 3]. accept : float, optional Parameter for acceptance distribution. It is used to control the probability of acceptance. The lower the acceptance parameter, the smaller the probability of acceptance. Default value is -5.0 with a range (-1e4, -5]. maxfun : int, optional Soft limit for the number of objective function calls. If the algorithm is in the middle of a local search, this number will be exceeded, the algorithm will stop just after the local search is done. Default value is 1e7. seed : {int or `numpy.random.RandomState` instance}, optional If `seed` is not specified the `numpy.random.RandomState` singleton is used. If `seed` is an int, a new ``RandomState`` instance is used, seeded with `seed`. If `seed` is already a ``RandomState`` instance, then that instance is used. Specify `seed` for repeatable minimizations. The random numbers generated with this seed only affect the visiting distribution function and new coordinates generation. no_local_search : bool, optional If `no_local_search` is set to True, a traditional Generalized Simulated Annealing will be performed with no local search strategy applied. callback : callable, optional A callback function with signature ``callback(x, f, context)``, which will be called for all minima found. ``x`` and ``f`` are the coordinates and function value of the latest minimum found, and ``context`` has value in [0, 1, 2], with the following meaning: - 0: minimum detected in the annealing process. - 1: detection occured in the local search process. - 2: detection done in the dual annealing process. If the callback implementation returns True, the algorithm will stop. Returns ------- res : OptimizeResult The optimization result represented as a `OptimizeResult` object. Important attributes are: ``x`` the solution array, ``fun`` the value of the function at the solution, and ``message`` which describes the cause of the termination. See `OptimizeResult` for a description of other attributes. Notes ----- This function implements the Dual Annealing optimization. This stochastic approach derived from [3]_ combines the generalization of CSA (Classical Simulated Annealing) and FSA (Fast Simulated Annealing) [1]_ [2]_ coupled to a strategy for applying a local search on accepted locations [4]_. An alternative implementation of this same algorithm is described in [5]_ and benchmarks are presented in [6]_. This approach introduces an advanced method to refine the solution found by the generalized annealing process. This algorithm uses a distorted Cauchy-Lorentz visiting distribution, with its shape controlled by the parameter :math:`q_{v}` .. math:: g_{q_{v}}(\\Delta x(t)) \\propto \\frac{ \\ \\left[T_{q_{v}}(t) \\right]^{-\\frac{D}{3-q_{v}}}}{ \\ \\left[{1+(q_{v}-1)\\frac{(\\Delta x(t))^{2}} { \\ \\left[T_{q_{v}}(t)\\right]^{\\frac{2}{3-q_{v}}}}}\\right]^{ \\ \\frac{1}{q_{v}-1}+\\frac{D-1}{2}}} Where :math:`t` is the artificial time. This visiting distribution is used to generate a trial jump distance :math:`\\Delta x(t)` of variable :math:`x(t)` under artificial temperature :math:`T_{q_{v}}(t)`. From the starting point, after calling the visiting distribution function, the acceptance probability is computed as follows: .. math:: p_{q_{a}} = \\min{\\{1,\\left[1-(1-q_{a}) \\beta \\Delta E \\right]^{ \\ \\frac{1}{1-q_{a}}}\\}} Where :math:`q_{a}` is a acceptance parameter. For :math:`q_{a}<1`, zero acceptance probability is assigned to the cases where .. math:: [1-(1-q_{a}) \\beta \\Delta E] < 0 The artificial temperature :math:`T_{q_{v}}(t)` is decreased according to .. math:: T_{q_{v}}(t) = T_{q_{v}}(1) \\frac{2^{q_{v}-1}-1}{\\left( \\ 1 + t\\right)^{q_{v}-1}-1} Where :math:`q_{v}` is the visiting parameter. .. versionadded:: 1.2.0 References ---------- .. [1] Tsallis C. Possible generalization of Boltzmann-Gibbs statistics. Journal of Statistical Physics, 52, 479-487 (1998). .. [2] Tsallis C, Stariolo DA. Generalized Simulated Annealing. Physica A, 233, 395-406 (1996). .. [3] Xiang Y, Sun DY, Fan W, Gong XG. Generalized Simulated Annealing Algorithm and Its Application to the Thomson Model. Physics Letters A, 233, 216-220 (1997). .. [4] Xiang Y, Gong XG. Efficiency of Generalized Simulated Annealing. Physical Review E, 62, 4473 (2000). .. [5] Xiang Y, Gubian S, Suomela B, Hoeng J. Generalized Simulated Annealing for Efficient Global Optimization: the GenSA Package for R. The R Journal, Volume 5/1 (2013). .. [6] Mullen, K. Continuous Global Optimization in R. Journal of Statistical Software, 60(6), 1 - 45, (2014). DOI:10.18637/jss.v060.i06 Examples -------- The following example is a 10-dimensional problem, with many local minima. The function involved is called Rastrigin (https://en.wikipedia.org/wiki/Rastrigin_function) >>> from scipy.optimize import dual_annealing >>> func = lambda x: np.sum(x*x - 10*np.cos(2*np.pi*x)) + 10*np.size(x) >>> lw = [-5.12] * 10 >>> up = [5.12] * 10 >>> ret = dual_annealing(func, None, bounds=list(zip(lw, up)), seed=1234) >>> print("global minimum: xmin = {0}, f(xmin) = {1:.6f}".format( ... ret.x, ret.fun)) global minimum: xmin = [-4.26437714e-09 -3.91699361e-09 -1.86149218e-09 -3.97165720e-09 -6.29151648e-09 -6.53145322e-09 -3.93616815e-09 -6.55623025e-09 -6.05775280e-09 -5.00668935e-09], f(xmin) = 0.000000 """ if x0 is not None and not len(x0) == len(bounds): raise ValueError('Bounds size does not match x0') lu = list(zip(*bounds)) lower = np.array(lu[0]) upper = np.array(lu[1]) # Check that restart temperature ratio is correct if restart_temp_ratio <= 0. or restart_temp_ratio >= 1.: raise ValueError('Restart temperature ratio has to be in range (0, 1)') # Checking bounds are valid if (np.any(np.isinf(lower)) or np.any(np.isinf(upper)) or np.any( np.isnan(lower)) or np.any(np.isnan(upper))): raise ValueError('Some bounds values are inf values or nan values') # Checking that bounds are consistent if not np.all(lower < upper): raise ValueError('Bounds are note consistent min < max') # Wrapper for the objective function func_wrapper = ObjectiveFunWrapper(func, maxfun, *args) # Wrapper fot the minimizer minimizer_wrapper = LocalSearchWrapper( bounds, func_wrapper, **local_search_options) # Initialization of RandomState for reproducible runs if seed provided rand_state = check_random_state(seed) # Initialization of the energy state energy_state = EnergyState(lower, upper, callback) energy_state.reset(func_wrapper, rand_state, x0) # Minimum value of annealing temperature reached to perform # re-annealing temperature_restart = initial_temp * restart_temp_ratio # VisitingDistribution instance visit_dist = VisitingDistribution(lower, upper, visit, rand_state) # Strategy chain instance strategy_chain = StrategyChain(accept, visit_dist, func_wrapper, minimizer_wrapper, rand_state, energy_state) # Run the search loop need_to_stop = False iteration = 0 message = [] t1 = np.exp((visit - 1) * np.log(2.0)) - 1.0 while(not need_to_stop): for i in range(maxiter): # Compute temperature for this step s = float(i) + 2.0 t2 = np.exp((visit - 1) * np.log(s)) - 1.0 temperature = initial_temp * t1 / t2 iteration += 1 if iteration >= maxiter: message.append("Maximum number of iteration reached") need_to_stop = True break # Need a re-annealing process? if temperature < temperature_restart: energy_state.reset(func_wrapper, rand_state) break # starting strategy chain val = strategy_chain.run(i, temperature) if val is not None: message.append(val) need_to_stop = True break # Possible local search at the end of the strategy chain if not no_local_search: val = strategy_chain.local_search() if val is not None: message.append(val) need_to_stop = True break # Return the OptimizeResult res = OptimizeResult() res.x = energy_state.xbest res.fun = energy_state.ebest res.nit = iteration res.nfev = func_wrapper.nfev res.njev = func_wrapper.ngev res.message = message return res
def dummy_minimize(func, dimensions, n_calls=100, x0=None, y0=None, random_state=None): """Random search by uniform sampling within the given bounds. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `dimensions` [list, shape=(n_dims,)]: List of search space dimensions. Each search dimension can be defined either as - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer` dimensions), - a `(upper_bound, lower_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). * `n_calls` [int, default=100]: Number of calls to `func` to find the minimum. * `x0` [list, list of lists or `None`]: Initial input points. - If it is a list of lists, use it as a list of input points. - If it is a list, use it as a single initial input point. - If it is `None`, no initial input points are used. * `y0` [list, scalar or `None`] Evaluation of initial input points. - If it is a list, then it corresponds to evaluations of the function at each element of `x0` : the i-th element of `y0` corresponds to the function evaluated at the i-th element of `x0`. - If it is a scalar, then it corresponds to the evaluation of the function at `x0`. - If it is None and `x0` is provided, then the function is evaluated at each element of `x0`. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [list]: location of the minimum. - `fun` [float]: function value at the minimum. - `x_iters` [list of lists]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. - `space` [Space]: the optimisation space. - `specs` [dict]: the call specifications. - `rng` [RandomState instance]: State of the random state at the end of minimization. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ # Save call args specs = {"args": copy.copy(inspect.currentframe().f_locals), "function": inspect.currentframe().f_code.co_name} # Check params rng = check_random_state(random_state) space = Space(dimensions) if x0 is None: x0 = [] elif not isinstance(x0[0], list): x0 = [x0] if not isinstance(x0, list): raise ValueError("`x0` should be a list, got %s" % type(x0)) if len(x0) > 0 and y0 is not None: if isinstance(y0, Iterable): y0 = list(y0) elif isinstance(y0, numbers.Number): y0 = [y0] else: raise ValueError("`y0` should be an iterable or a scalar, got %s" % type(y0)) if len(x0) != len(y0): raise ValueError("`x0` and `y0` should have the same length") if not all(map(np.isscalar, y0)): raise ValueError("`y0` elements should be scalars") elif len(x0) > 0 and y0 is None: y0 = [] n_calls -= len(x0) elif len(x0) == 0 and y0 is not None: raise ValueError("`x0`cannot be `None` when `y0` is provided") else: # len(x0) == 0 and y0 is None y0 = [] X = x0 y = y0 # Random search X = X + space.rvs(n_samples=n_calls, random_state=rng) first = True for i in range(len(y0), len(X)): y_i = func(X[i]) if first: first = False if not np.isscalar(y_i): raise ValueError("`func` should return a scalar") y.append(y_i) y = np.array(y) # Pack results res = OptimizeResult() best = np.argmin(y) res.x = X[best] res.fun = y[best] res.func_vals = y res.x_iters = X res.models = [] # Create attribute even though it is empty res.space = space res.random_state = rng res.specs = specs return res
def minimize(D, F, x0_generator=None, descents_count=1, maxiter=None, tol=1e-5, verbose=0): """ Minimizes f(P) = <F, PDP^T>, over the set of permutation matrices. <., .> is the Frobenius inner product. This implementation uses the Frank–Wolfe algorithm. Parameters ---------- D, F : square numpy matrices of the same size. By convention D is the distance and F is the flow in the factory assignment problem. x0_generator : generator for initial search points. It is a callable, that returns doubly stochastic matrices. The default generator picks random points (C + R)/2, where C is the center of the Birkhoff polytope and R is random matrix from it. descents_count : number of searches to perform from an initial point. maxiter : The maximum number of descent steps to perform. If None, there is no limit. tol : tolerance for the decrease of the objective. If the objective decreases with less than tol in one descent step, this local search is terminated. verbose : When True, prints results during the search. Returns ------- scipy.optimize.OptimizeResult object with members fun and x. x is the argument that minimizes f and fun is f(x). the permutation x is returned in line notation. """ n = len(D) is_torch = has_torch and isinstance(D, torch.Tensor) if is_torch: numpy_dtype = torch_dtype_to_numpy[D.dtype] else: numpy_dtype = D.dtype if x0_generator is None: x0_generator = SearchOriginGenerator(n, numpy_dtype) if is_torch: x0_generator = TorchifiedSearchOriginGenerator(x0_generator, device=D.device) projector = TaylorExpansionMinimizer() if is_torch: projector = TorchifiedProjector(projector) relaxed_sol = minimize_relaxed( D, F, projector=projector, x0_generator=x0_generator, count=descents_count, maxiter=maxiter, tol=tol, verbose=verbose, ) res = OptimizeResult() if is_torch: res.x = project_doubly_stochastic_matrix_onto_permutations( relaxed_sol.x.cpu()) else: res.x = project_doubly_stochastic_matrix_onto_permutations( relaxed_sol.x) res.fun = objective(D, F, res.x) if verbose >= 1: print("Frak-Wolfe QP objective = %.3f." % (res.fun)) return res
def fmin_bfgs_f(f_g, x0, B0=None, M=2, gtol=1e-5, Delta=10.0, maxiter=None, callback=None, norm_ord=np.Inf, **_kwargs): """test BFGS with nonmonote line search""" fk, gk = f_g(x0) if B0 is None: Bk = np.eye(len(x0)) else: Bk = B0 Hk = np.linalg.inv(Bk) maxiter = 200 * len(x0) if maxiter is None else maxiter xk = x0 norm = lambda x: np.linalg.norm(x, ord=norm_ord) theta = 0.9 C = 0.5 k = 0 old_old_fval = fk + np.linalg.norm(gk) / 2 old_fval = fk f_s = Seq(M) f_s.add(fk) flag = 0 re_search = 0 for k in range(maxiter): if norm(gk) <= gtol: break dki = -np.dot(Hk, gk) try: pk = dki f = f_g.fun myfprime = f_g.grad gfk = gk old_fval = fk ( alpha_k, fc, gc, old_fval, old_old_fval, gfkp1, ) = line_search_wolfe2(f, myfprime, xk, pk, gfk, f_s.get_max(), old_fval, old_old_fval) except Exception as e: print(e) re_search += 1 xk = xk + dki fk, gk = f_g(xk) old_fval, old_old_fval = fk, old_fval f_s.add(fk) if re_search > 2: flag = 1 break continue if alpha_k is None: print("alpha is None") xk = xk + dki fk, gk = f_g(xk) old_fval, old_old_fval = fk, old_fval f_s.add(fk) re_search += 1 if re_search > 2: flag = 1 break continue dki = alpha_k * pk # fki, gki = f_g(xk + dki) fki, gki = old_fval, gfkp1 Aredk = fk - fki Predk = -(np.dot(gk, dki) + 0.5 * np.dot(np.dot(Bk, dki), dki)) rk = Aredk / Predk xk = xk + dki fk = fki yk = gki - gk tk = C + max(0, -np.dot(yk, dki) / norm(dki)**2) / norm(gk) ystark = (1 - theta) * yk + theta * tk * norm(gk) * dki gk = gki bs = np.dot(Bk, dki) Bk = (Bk + np.outer(yk, yk) / np.dot(yk, dki) - np.outer(bs, bs) / np.dot(bs, dki)) # sk = dki # rhok = 1.0 / (np.dot(yk, sk)) # A1 = 1 - np.outer(sk, yk) * rhok # A2 = 1 - np.outer(yk, sk) * rhok # Hk = np.dot(A2, np.dot(Hk, A1)) - (rhok * np.outer(sk, sk)) # Bk = Bk + np.outer(ystark, ystark)/np.dot(ystark, dki) - \ # np.outer(bs, bs)/np.dot(bs, dki) # MBFGS # print(np.dot(Hk, Bk)) try: Hk = np.linalg.inv(Bk) except Exception: pass f_s.add(fk) if callback is not None: callback(xk) else: flag = 2 # print("fit final: ", k, p, f_g.ncall) s = OptimizeResult() s.messgae = message_dict[flag] s.fun = float(fk) s.nit = k s.nfev = f_g.ncall s.njev = f_g.ncall s.status = flag s.x = np.array(xk) s.jac = np.array(gk) s.hess = np.array(Bk) s.success = flag == 0 return s
def gp_minimize(func, bounds=None, search="sampling", random_state=None, maxiter=1000, acq="UCB", num_points=500): """ Black-box optimization using Gaussian Processes. If every function evaluation is expensive, for instance when the parameters are the hyperparameters of a neural network and the function evaluation is the mean cross-validation score across ten folds, optimizing the hyperparameters by standared optimization routines would take for ever! The idea is to approximate the function using a Gaussian process. In other words the function values are assumed to follow a multivariate gaussian. The covariance of the function values are given by a GP kernel between the parameters. Then a smart choice to choose the next parameter to evaluate can be made by the acquistion function over the Gaussian posterior which is much quicker to evaluate. Parameters ---------- func: callable Function to minimize. Should take a array of parameters and return the function value. bounds: array-like, shape (n_parameters, 2) ``bounds[i][0]`` should give the lower bound of each parameter and ``bounds[i][1]`` should give the upper bound of each parameter. search: string, "sampling" or "lbfgs" Searching for the next possible candidate to update the Gaussian prior with. If search is set to "sampling", ``num_points`` are sampled randomly and the Gaussian Process prior is updated with that point that gives the best acquision value over the Gaussian posterior. If search is set to "lbfgs", then a point is sampled randomly, and lbfgs is run for 10 iterations optimizing the acquistion function over the Gaussian posterior. random_state: int, RandomState instance, or None (default) Set random state to something other than None for reproducible results. maxiter: int, default 1000 Number of iterations to find the minimum. In other words, the number of function evaluations. acq: string, default "UCB" Function to minimize over the gaussian posterior. Can be either the "UCB" which refers to the UpperConfidenceBound or "EI" which is the Expected Improvement. num_points: int, default 500 Number of points to sample to determine the next "best" point. Useless if search is set to "lbfgs". Returns ------- res: OptimizeResult, scipy object The optimization result returned as a OptimizeResult object. Important attributes are ``x`` - float, the optimization solution, ``fun`` - float, the value of the function at the optimum, ``models``- gp_models[i]. the prior on the function fit at iteration[i]. ``func_vals`` - the function value at the ith iteration. ``x_iters`` - the value of ``x`` corresponding to the function value at the ith iteration. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ rng = np.random.RandomState(random_state) num_params = len(bounds) lower_bounds, upper_bounds = zip(*bounds) upper_bounds = np.asarray(upper_bounds) lower_bounds = np.asarray(lower_bounds) x0 = rng.rand(num_params) func_val = [func(lower_bounds + (upper_bounds - lower_bounds) * x0)] length_scale = np.ones(num_params) gp_params = { 'kernel': Matern(length_scale=length_scale, nu=2.5), 'normalize_y': True, 'random_state': random_state } lbfgs_bounds = np.tile((0, 1), (num_params, 1)) gp_models = [] x = np.reshape(x0, (1, -1)) for i in range(maxiter): gpr = GaussianProcessRegressor(**gp_params) gpr.fit(x, func_val) if search == "sampling": sampling = rng.rand(num_points, num_params) acquis = acquisition_func(sampling, gpr, np.min(func_val), acq) best_arg = np.argmin(acquis) best_x = sampling[best_arg] elif search == "lbfgs": init = rng.rand(num_params) best_x, _, _ = fmin_l_bfgs_b( acquisition_func, np.asfortranarray(init), args=(gpr, np.min(func_val), acq), bounds=lbfgs_bounds, approx_grad=True, maxiter=10) gp_models.append(gpr) best_f = func(lower_bounds + (upper_bounds - lower_bounds) * best_x) x_list = x.tolist() x_list.append(best_x) x = np.asarray(x_list) func_val.append(best_f) x = lower_bounds + (upper_bounds - lower_bounds) * x func_ind = np.argmin(func_val) x_val = x[func_ind] best_func_val = func_val[func_ind] res = OptimizeResult() res.models = gp_models res.x = x_val res.fun = best_func_val res.func_vals = func_val res.x_iters = x return res
def model_policy_gradient( f: Callable[..., float], x0: np.ndarray, *, args=(), learning_rate: float = 1e-2, decay_rate: float = 0.96, decay_steps: int = 5, log_sigma_init: float = -5.0, max_iterations: int = 1000, batch_size: int = 10, radius_coeff: float = 3.0, warmup_steps: int = 10, batch_size_model: int = 65536, save_func_vals: bool = False, random_state: "cirq.RANDOM_STATE_OR_SEED_LIKE" = None, known_values: Optional[Tuple[List[np.ndarray], List[float]]] = None, max_evaluations: Optional[int] = None ) -> scipy.optimize.OptimizeResult: """Model policy gradient algorithm for black-box optimization. The idea of this algorithm is to perform policy gradient, but estimate the function values using a surrogate model. The surrogate model is a least-squared quadratic fit to points sampled from the vicinity of the current iterate. Args: f: The function to minimize. x0: An initial guess. args: Additional arguments to pass to the function. learning_rate: The learning rate for the policy gradient. decay_rate: the learning decay rate for the Adam optimizer. decay_steps: the learning decay steps for the Adam optimizer. log_sigma_init: the intial value for the sigma of the policy in the log scale. max_iterations: The maximum number of iterations to allow before termination. batch_size: The number of points to sample in each iteration. The cost of evaluation of these samples are computed through the quantum computer cost model. radius_coeff: The ratio determining the size of the radius around the current iterate to sample points from to build the quadratic model. The ratio is with respect to the maximal ratio of the samples from the current policy. warmup_steps: The number of steps before the model policy gradient is performed. before these steps, we use the policy gradient without the model. batch_size_model: The model sample batch size. After we fit the quadratic model, we use the model to evaluate on big enough batch of samples. save_func_vals: whether to compute and save the function values for the current value of parameter. random_state: A seed (int) or `np.random.RandomState` class to use when generating random values. If not set, defaults to using the module methods in `np.random`. known_values: Any prior known values of the objective function. This is given as a tuple where the first element is a list of points and the second element is a list of the function values at those points. max_evaluations: The maximum number of function evaluations to allow before termination. Returns: Scipy OptimizeResult """ random_state = value.parse_random_state(random_state) if known_values is not None: known_xs, known_ys = known_values known_xs = [np.copy(x) for x in known_xs] known_ys = [np.copy(y) for y in known_ys] else: known_xs, known_ys = [], [] if max_evaluations is None: max_evaluations = np.inf n = len(x0) log_sigma = np.ones(n) * log_sigma_init sigma = np.exp(log_sigma) # set up the first and second moment estimate m_mean = np.zeros(n) v_mean = np.zeros(n) m_log_sigma = np.zeros(n) v_log_sigma = np.zeros(n) # set up lr schedule and optimizer lr_schedule1 = _ExponentialSchedule(learning_rate, decay_steps=decay_steps, decay_rate=decay_rate, staircase=True) lr_schedule2 = _ExponentialSchedule(learning_rate, decay_steps=decay_steps, decay_rate=decay_rate, staircase=True) _, f = wrap_function(f, args) res = OptimizeResult() current_x = np.copy(x0) res.x_iters = [] # initializes as lists res.xs_iters = [] res.ys_iters = [] res.func_vals = [] res.fun = 0 total_evals = 0 num_iter = 0 message = None # stats history_max = -np.inf while num_iter < max_iterations: # get samples from the current policy to evaluate z = random_state.randn(batch_size, n) new_xs = sigma * z + current_x if total_evals + batch_size > max_evaluations: message = "Reached maximum number of evaluations." break # Evaluate points res.xs_iters.append(new_xs) new_ys = [f(x) for x in new_xs] res.ys_iters.append(new_ys) total_evals += batch_size known_xs.extend(new_xs) known_ys.extend(new_ys) # Save function value if save_func_vals: res.func_vals.append(f(current_x)) res.x_iters.append(np.copy(current_x)) res.fun = res.func_vals[-1] # current sampling radius (maximal) max_radius = 0 for x in new_xs: if np.linalg.norm(x - current_x) > max_radius: max_radius = np.linalg.norm(x - current_x) reward = [-y for y in new_ys] # warmup steps control whether to use the model to estimate the f if num_iter >= warmup_steps: # Determine points to use to build model model_xs = [] model_ys = [] for x, y in zip(known_xs, known_ys): if np.linalg.norm(x - current_x) < radius_coeff * max_radius: model_xs.append(x) model_ys.append(y) # safer way without the `SVD` not converging try: model = _get_quadratic_model(model_xs, model_ys, x) use_model = True except ValueError: use_model = False if use_model: # get samples (from model) z = random_state.randn(batch_size_model, n) new_xs = sigma * z + current_x # use the model for prediction new_ys = model.predict(new_xs - current_x) reward = [-y for y in new_ys] reward = np.array(reward) # stats reward_mean = np.mean(reward) reward_max = np.max(reward) if reward_max > history_max: history_max = reward_max # subtract baseline reward = reward - reward_mean # analytic derivatives (natural gradient policy gradient) delta_mean = np.dot(z.T, reward) * sigma delta_log_sigma = np.dot(z.T**2, reward) / np.sqrt(2) delta_mean_norm = np.linalg.norm(np.dot(z.T, reward)) delta_log_sigma_norm = np.linalg.norm(np.dot(z.T**2, reward)) delta_mean = delta_mean / delta_mean_norm delta_log_sigma = delta_log_sigma / delta_log_sigma_norm # gradient ascend to update the parameters current_x, m_mean, v_mean = _adam_update(delta_mean, current_x, num_iter, m_mean, v_mean, lr_schedule=lr_schedule1) log_sigma, m_log_sigma, v_log_sigma = _adam_update( delta_log_sigma, log_sigma, num_iter, m_log_sigma, v_log_sigma, lr_schedule=lr_schedule2, ) log_sigma = np.clip(log_sigma, -20.0, 2.0) sigma = np.exp(log_sigma) num_iter += 1 final_val = f(current_x) res.func_vals.append(final_val) if message is None: message = "Reached maximum number of iterations." res.x_iters.append(current_x) total_evals += 1 res.x = current_x res.fun = final_val res.nit = num_iter res.nfev = total_evals res.message = message return res
def solve(self): nfev, nit, warning_flag = 0, 0, False status_message = _status_message['success'] # calculate energies to start with for index, candidate in enumerate(self.population): parameters = self._scale_parameters(candidate) self.population_energies[index] = self.func(parameters, *self.args) nfev += 1 if nfev > self.maxfun: warning_flag = True status_message = _status_message['maxfev'] break minval = np.argmin(self.population_energies) # put the lowest energy into the best solution position. lowest_energy = self.population_energies[minval] self.population_energies[minval] = self.population_energies[0] self.population_energies[0] = lowest_energy self.population[[0, minval], :] = self.population[[minval, 0], :] if warning_flag: return OptimizeResult( x=self.x, fun=self.population_energies[0], nfev=nfev, nit=nit, message=status_message, success=(warning_flag is not True)) # do the optimisation. start_time = time.time() nit = 0 while nit < self.maxiter + 1: nit += 1 if start_time + self.max_execution_time < time.time(): warning_flag = True status_message = 'Max execution time reached' break if self.dither is not None: self.scale = self.random_number_generator.rand( ) * (self.dither[1] - self.dither[0]) + self.dither[0] for candidate in range(np.size(self.population, 0)): if nfev > self.maxfun: warning_flag = True status_message = _status_message['maxfev'] break trial = self._mutate(candidate) self._ensure_constraint(trial) parameters = self._scale_parameters(trial) energy = self.func(parameters, *self.args) nfev += 1 if energy < self.population_energies[candidate]: self.population[candidate] = trial self.population_energies[candidate] = energy if energy < self.population_energies[0]: self.population_energies[0] = energy self.population[0] = trial # stop when the fractional s.d. of the population is less than tol # of the mean energy convergence = (np.std(self.population_energies) / np.abs(np.mean(self.population_energies) + _MACHEPS)) if self.disp: print("differential_evolution step %d: f(x)= %g" % (nit, self.population_energies[0])) if (self.callback and self.callback(self._scale_parameters(self.population[0]), convergence=self.tol / convergence) is True): warning_flag = True status_message = ('callback function requested stop early ' 'by returning True') break if convergence < self.tol or warning_flag: break else: status_message = _status_message['maxiter'] warning_flag = True DE_result = OptimizeResult( x=self.x, fun=self.population_energies[0], nfev=nfev, nit=nit, message=status_message, success=(warning_flag is not True)) if self.polish: result = minimize(self.func, np.copy(DE_result.x), method='L-BFGS-B', bounds=self.limits.T, args=self.args) nfev += result.nfev DE_result.nfev = nfev if result.fun < DE_result.fun: DE_result.fun = result.fun DE_result.x = result.x DE_result.jac = result.jac # to keep internal state consistent self.population_energies[0] = result.fun self.population[0] = self._unscale_parameters(result.x) return DE_result
def solve(self): """ Runs the DifferentialEvolutionSolver. Returns ------- res : OptimizeResult The optimization result represented as a ``OptimizeResult`` object. Important attributes are: ``x`` the solution array, ``success`` a Boolean flag indicating if the optimizer exited successfully and ``message`` which describes the cause of the termination. See `OptimizeResult` for a description of other attributes. If `polish` was employed, and a lower minimum was obtained by the polishing, then OptimizeResult also contains the ``jac`` attribute. """ nit, warning_flag = 0, False status_message = _status_message['success'] # The population may have just been initialized (all entries are # np.inf). If it has you have to calculate the initial energies. # Although this is also done in the evolve generator it's possible # that someone can set maxiter=0, at which point we still want the # initial energies to be calculated (the following loop isn't run). if np.all(np.isinf(self.population_energies)): self._calculate_population_energies() for nmig in xrange(1,self.number_of_migrations+1): if nmig != 1: # Get the host node host = int(self.island_marker[-1]) # Get all the neighbors list neighbors = self.topology.neighbors(host) neighbor_results = {} neighbor_energy_results = {} for each_neighbor in neighbors: replacement = client.get(self.key + str(each_neighbor)) if replacement is None: for _ in range(int(self.wait_time / self.poll_time)): replacement = client.get(self.key + str(each_neighbor)) if replacement is None: print("POLLING!!!") time.sleep(self.poll_time) else: break if replacement is not None: neighbor_results[each_neighbor] = np.array([float(items) for items in replacement.split(",")]) neighbor_energy_results[each_neighbor] = self.func(neighbor_results[each_neighbor],*self.args) total_computed_neighbors = len(neighbor_results) energies = [] for each_neighbor in neighbor_results.keys(): energies.append((neighbor_results[each_neighbor],neighbor_energy_results[each_neighbor])) for pop_index in range(1,total_computed_neighbors+1): energies.append((self.population[pop_index],self.population_energies[pop_index])) energies.sort(key=lambda x:x[-1]) energies = energies[:total_computed_neighbors] for pop_index in range(1, total_computed_neighbors+1): self.population[pop_index] = energies[pop_index-1][0] self.population_energies[pop_index] = energies[pop_index-1][1] # do the optimisation. is_optimisation_complete = False for nit in xrange(1, self.maxiter + 1): # evolve the population by a generation try: next(self) except StopIteration: warning_flag = True status_message = _status_message['maxfev'] #is_optimisation_complete = False break if self.disp: print("differential_evolution step %d: f(x)= %g" % (nit, self.population_energies[0])) # should the solver terminate? convergence = self.convergence if (self.callback and self.callback(self._scale_parameters(self.population[0]), convergence=self.tol / convergence) is True): warning_flag = True status_message = ('callback function requested stop early ' 'by returning True') is_optimisation_complete = False break intol = (np.std(self.population_energies) <= self.atol + self.tol * np.abs(np.mean(self.population_energies))) if intol: is_optimisation_complete = False if warning_flag or intol: break else: status_message = _status_message['maxiter'] warning_flag = True client.set(self.island_marker, ",".join([str(items) for items in self.x])) print("MARKED IN MEMCACHE") print(self.island_marker, ",".join([str(items) for items in self.x])) if not is_optimisation_complete: #break print("Exited due to some break condition above!!", status_message) DE_result = OptimizeResult( x=self.x, fun=self.population_energies[0], nfev=self._nfev, nit=nit, message=status_message, success=(warning_flag is not True)) if self.polish: result = minimize(self.func, np.copy(DE_result.x), method='L-BFGS-B', bounds=self.limits.T, args=self.args) self._nfev += result.nfev DE_result.nfev = self._nfev if result.fun < DE_result.fun: DE_result.fun = result.fun DE_result.x = result.x DE_result.jac = result.jac # to keep internal state consistent self.population_energies[0] = result.fun self.population[0] = self._unscale_parameters(result.x) return DE_result
def gp_minimize(func, dimensions, base_estimator=None, alpha=10e-10, acq="EI", xi=0.01, kappa=1.96, search="auto", n_calls=100, n_points=500, n_random_starts=10, n_restarts_optimizer=5, x0=None, y0=None, random_state=None): """Bayesian optimization using Gaussian Processes. If every function evaluation is expensive, for instance when the parameters are the hyperparameters of a neural network and the function evaluation is the mean cross-validation score across ten folds, optimizing the hyperparameters by standard optimization routines would take for ever! The idea is to approximate the function using a Gaussian process. In other words the function values are assumed to follow a multivariate gaussian. The covariance of the function values are given by a GP kernel between the parameters. Then a smart choice to choose the next parameter to evaluate can be made by the acquisition function over the Gaussian prior which is much quicker to evaluate. The total number of evaluations, `n_calls`, are performed like the following. If `x0` is provided but not `y0`, then the elements of `x0` are first evaluated, followed by `n_random_starts` evaluations. Finally, `n_calls - len(x0) - n_random_starts` evaluations are made guided by the surrogate model. If `x0` and `y0` are both provided then `n_random_starts` evaluations are first made then `n_calls - n_random_starts` subsequent evaluations are made guided by the surrogate model. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `dimensions` [list, shape=(n_dims,)]: List of search space dimensions. Each search dimension can be defined either as - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer` dimensions), - a `(upper_bound, lower_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). * `base_estimator` [a Gaussian process estimator]: The Gaussian process estimator to use for optimization. * `alpha` [float, default=1e-10]: Value added to the diagonal of the kernel matrix during fitting. Larger values correspond to an increased noise level in the observations and reduce potential numerical issues during fitting. * `acq` [string, default=`"EI"`]: Function to minimize over the gaussian prior. Can be either - `"LCB"` for lower confidence bound, - `"EI"` for expected improvement, - `"PI"` for probability of improvement. * `xi` [float, default=0.01]: Controls how much improvement one wants over the previous best values. Used when the acquisition is either `"EI"` or `"PI"`. * `kappa` [float, default=1.96]: Controls how much of the variance in the predicted values should be taken into account. If set to be very high, then we are favouring exploration over exploitation and vice versa. Used when the acquisition is `"LCB"`. * `search` [string, `"auto"`, `"sampling"` or `"lbfgs"`, default=`"auto"`]: Searching for the next possible candidate to update the Gaussian prior with. If search is set to `"auto"`, then it is set to `"lbfgs"`` if all the search dimensions are Real(continuous). It defaults to `"sampling"` for all other cases. If search is set to `"sampling"`, `n_points` are sampled randomly and the Gaussian Process prior is updated with the point that gives the best acquisition value over the Gaussian prior. If search is set to `"lbfgs"`, then a point is sampled randomly, and lbfgs is run for 10 iterations optimizing the acquisition function over the Gaussian prior. * `n_calls` [int, default=100]: Number of calls to `func`. * `n_points` [int, default=500]: Number of points to sample to determine the next "best" point. Useless if search is set to `"lbfgs"`. * `n_random_starts` [int, default=10]: Number of evaluations of `func` with random initialization points before approximating the `func` with `base_estimator`. * `n_restarts_optimizer` [int, default=10]: The number of restarts of the optimizer when `search` is `"lbfgs"`. * `x0` [list, list of lists or `None`]: Initial input points. - If it is a list of lists, use it as a list of input points. - If it is a list, use it as a single initial input point. - If it is `None`, no initial input points are used. * `y0` [list, scalar or `None`] Evaluation of initial input points. - If it is a list, then it corresponds to evaluations of the function at each element of `x0` : the i-th element of `y0` corresponds to the function evaluated at the i-th element of `x0`. - If it is a scalar, then it corresponds to the evaluation of the function at `x0`. - If it is None and `x0` is provided, then the function is evaluated at each element of `x0`. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [list]: location of the minimum. - `fun` [float]: function value at the minimum. - `models`: surrogate models used for each iteration. - `x_iters` [list of lists]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. - `space` [Space]: the optimization space. - `specs` [dict]`: the call specifications. - `rng` [RandomState instance]: State of the random state at the end of minimization. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ # Save call args specs = { "args": copy.copy(inspect.currentframe().f_locals), "function": inspect.currentframe().f_code.co_name } # Check params rng = check_random_state(random_state) space = Space(dimensions) # Default GP if base_estimator is None: base_estimator = GaussianProcessRegressor( kernel=(ConstantKernel(1.0, (0.01, 1000.0)) * Matern( length_scale=np.ones(space.transformed_n_dims), length_scale_bounds=[(0.01, 100)] * space.transformed_n_dims, nu=2.5)), normalize_y=True, alpha=alpha, random_state=random_state) # Initialize with provided points (x0 and y0) and/or random points if x0 is None: x0 = [] elif not isinstance(x0[0], list): x0 = [x0] if not isinstance(x0, list): raise ValueError("`x0` should be a list, but got %s" % type(x0)) n_init_func_calls = len(x0) if y0 is not None else 0 n_total_init_calls = n_random_starts + n_init_func_calls if n_total_init_calls <= 0: # if x0 is not provided and n_random_starts is 0 then # it will ask for n_random_starts to be > 0. raise ValueError("Expected `n_random_starts` > 0, got %d" % n_random_starts) if n_calls < n_total_init_calls: raise ValueError("Expected `n_calls` >= %d, got %d" % (n_total_init_calls, n_calls)) if y0 is None and x0: y0 = [func(x) for x in x0] elif x0: if isinstance(y0, Iterable): y0 = list(y0) elif isinstance(y0, numbers.Number): y0 = [y0] else: raise ValueError("`y0` should be an iterable or a scalar, got %s" % type(y0)) if len(x0) != len(y0): raise ValueError("`x0` and `y0` should have the same length") if not all(map(np.isscalar, y0)): raise ValueError("`y0` elements should be scalars") else: y0 = [] Xi = x0 + space.rvs(n_samples=n_random_starts, random_state=rng) yi = y0 + [func(x) for x in Xi[len(x0):]] if np.ndim(yi) != 1: raise ValueError("`func` should return a scalar") if search == "auto": if space.is_real: search = "lbfgs" else: search = "sampling" elif search not in ["lbfgs", "sampling"]: raise ValueError( "Expected search to be 'lbfgs', 'sampling' or 'auto', " "got %s" % search) # Bayesian optimization loop models = [] n_model_iter = n_calls - n_total_init_calls for i in range(n_model_iter): gp = clone(base_estimator) with warnings.catch_warnings(): warnings.simplefilter("ignore") gp.fit(space.transform(Xi), yi) models.append(gp) if search == "sampling": X = space.transform(space.rvs(n_samples=n_points, random_state=rng)) values = _gaussian_acquisition(X=X, model=gp, y_opt=np.min(yi), method=acq, xi=xi, kappa=kappa) next_x = X[np.argmin(values)] elif search == "lbfgs": best = np.inf for j in range(n_restarts_optimizer): x0 = space.transform(space.rvs(n_samples=1, random_state=rng))[0] with warnings.catch_warnings(): warnings.simplefilter("ignore") x, a, _ = fmin_l_bfgs_b(_acquisition, x0, args=(gp, np.min(yi), acq, xi, kappa), bounds=space.transformed_bounds, approx_grad=True, maxiter=20) if a < best: next_x, best = x, a next_x = space.inverse_transform(next_x.reshape((1, -1)))[0] next_y = func(next_x) Xi.append(next_x) yi.append(next_y) # Pack results res = OptimizeResult() best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = np.array(yi) res.x_iters = Xi res.models = models res.space = space res.random_state = rng res.specs = specs return res
def solve(self): """ Runs the DifferentialEvolutionSolver. Returns ------- res : OptimizeResult The optimization result represented as a ``OptimizeResult`` object. Important attributes are: ``x`` the solution array, ``success`` a Boolean flag indicating if the optimizer exited successfully and ``message`` which describes the cause of the termination. See `OptimizeResult` for a description of other attributes. If `polish` was employed, and a lower minimum was obtained by the polishing, then OptimizeResult also contains the ``jac`` attribute. """ nit, warning_flag = 0, False status_message = _status_message['success'] # The population may have just been initialized (all entries are # np.inf). If it has you have to calculate the initial energies. # Although this is also done in the evolve generator it's possible # that someone can set maxiter=0, at which point we still want the # initial energies to be calculated (the following loop isn't run). if np.all(np.isinf(self.population_energies)): self.population_energies[:] = self._calculate_population_energies( self.population) self._promote_lowest_energy() # do the optimisation. for nit in xrange(1, self.maxiter + 1): # evolve the population by a generation try: next(self) except StopIteration: warning_flag = True if self._nfev > self.maxfun: status_message = _status_message['maxfev'] elif self._nfev == self.maxfun: status_message = ('Maximum number of function evaluations' ' has been reached.') break if self.disp: print("differential_evolution step %d: f(x)= %g" % (nit, self.population_energies[0])) # should the solver terminate? convergence = self.convergence if (self.callback and self.callback(self._scale_parameters(self.population[0]), convergence=self.tol / convergence) is True): warning_flag = True status_message = ('callback function requested stop early ' 'by returning True') break if np.any(np.isinf(self.population_energies)): intol = False else: intol = (np.std(self.population_energies) <= self.atol + self.tol * np.abs(np.mean(self.population_energies))) if warning_flag or intol: break else: status_message = _status_message['maxiter'] warning_flag = True DE_result = OptimizeResult( x=self.x, fun=self.population_energies[0], nfev=self._nfev, nit=nit, message=status_message, success=(warning_flag is not True)) if self.polish: result = minimize(self.func, np.copy(DE_result.x), method='L-BFGS-B', bounds=self.limits.T) self._nfev += result.nfev DE_result.nfev = self._nfev if result.fun < DE_result.fun: DE_result.fun = result.fun DE_result.x = result.x DE_result.jac = result.jac # to keep internal state consistent self.population_energies[0] = result.fun self.population[0] = self._unscale_parameters(result.x) return DE_result
def solve(self): """ Runs the DifferentialEvolutionSolver. Returns ------- res : OptimizeResult The optimization result represented as a ``OptimizeResult`` object. Important attributes are: ``x`` the solution array, ``success`` a Boolean flag indicating if the optimizer exited successfully and ``message`` which describes the cause of the termination. See `OptimizeResult` for a description of other attributes. If `polish` was employed, and a lower minimum was obtained by the polishing, then OptimizeResult also contains the ``jac`` attribute. """ nit, warning_flag = 0, False status_message = _status_message['success'] # The population may have just been initialized (all entries are # np.inf). If it has you have to calculate the initial energies. # Although this is also done in the evolve generator it's possible # that someone can set maxiter=0, at which point we still want the # initial energies to be calculated (the following loop isn't run). if np.all(np.isinf(self.population_energies)): self._calculate_population_energies() # do the optimisation. for nit in xrange(1, self.maxiter + 1): # evolve the population by a generation try: next(self) except StopIteration: warning_flag = True status_message = _status_message['maxfev'] break if self.disp: print("differential_evolution step %d: f(x)= %g" % (nit, self.population_energies[0])) # should the solver terminate? convergence = self.convergence if (self.callback and self.callback(self._scale_parameters(self.population[0]), convergence=self.tol / convergence) is True): warning_flag = True status_message = ('callback function requested stop early ' 'by returning True') break if np.any(np.isinf(self.population_energies)): intol = False else: intol = (np.std(self.population_energies) <= self.atol + self.tol * np.abs(np.mean(self.population_energies))) if warning_flag or intol: break else: status_message = _status_message['maxiter'] warning_flag = True DE_result = OptimizeResult( x=self.x, fun=self.population_energies[0], nfev=self._nfev, nit=nit, message=status_message, success=(warning_flag is not True)) if self.polish: result = minimize(self.func, np.copy(DE_result.x), method='L-BFGS-B', bounds=self.limits.T, args=self.args) self._nfev += result.nfev DE_result.nfev = self._nfev if result.fun < DE_result.fun: DE_result.fun = result.fun DE_result.x = result.x DE_result.jac = result.jac # to keep internal state consistent self.population_energies[0] = result.fun self.population[0] = self._unscale_parameters(result.x) return DE_result
def _tree_minimize(func, dimensions, base_estimator, n_calls, n_points, n_random_starts, x0=None, y0=None, random_state=None, acq="EI", xi=0.01, kappa=1.96): rng = check_random_state(random_state) space = Space(dimensions) # Initialize with provided points (x0 and y0) and/or random points if n_calls <= 0: raise ValueError("Expected `n_calls` > 0, got %d" % n_random_starts) if x0 is None: x0 = [] elif not isinstance(x0[0], list): x0 = [x0] if not isinstance(x0, list): raise ValueError("`x0` should be a list, but got %s" % type(x0)) n_init_func_calls = len(x0) if y0 is not None else 0 n_total_init_calls = n_random_starts + n_init_func_calls if n_total_init_calls <= 0: # if x0 is not provided and n_random_starts is 0 then # it will ask for n_random_starts to be > 0. raise ValueError("Expected `n_random_starts` > 0, got %d" % n_random_starts) if n_calls < n_total_init_calls: raise ValueError("Expected `n_calls` >= %d, got %d" % (n_total_init_calls, n_calls)) if y0 is None and x0: y0 = [func(x) for x in x0] elif x0: if isinstance(y0, Iterable): y0 = list(y0) elif isinstance(y0, numbers.Number): y0 = [y0] else: raise ValueError("`y0` should be an iterable or a scalar, got %s" % type(y0)) if len(x0) != len(y0): raise ValueError("`x0` and `y0` should have the same length") if not all(map(np.isscalar, y0)): raise ValueError("`y0` elements should be scalars") else: y0 = [] Xi = x0 + space.rvs(n_samples=n_random_starts, random_state=rng) yi = y0 + [func(x) for x in Xi[len(x0):]] if np.ndim(yi) != 1: raise ValueError("`func` should return a scalar") # Tree-based optimization loop models = [] n_model_iter = n_calls - n_total_init_calls for i in range(n_model_iter): rgr = clone(base_estimator) rgr.fit(space.transform(Xi), yi) models.append(rgr) # `rgr` predicts constants for each leaf which means that the EI # has zero gradient over large distances. As a result we can not # use gradient based optimizers like BFGS, so using random sampling # for the moment. X = space.transform(space.rvs(n_samples=n_points, random_state=rng)) values = _gaussian_acquisition(X=X, model=rgr, y_opt=np.min(yi), method=acq, xi=xi, kappa=kappa) next_x = X[np.argmin(values)] next_x = space.inverse_transform(next_x.reshape((1, -1)))[0] next_y = func(next_x) Xi.append(next_x) yi.append(next_y) res = OptimizeResult() best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = np.array(yi) res.x_iters = Xi res.models = models res.space = space res.random_state = rng return res
def _tree_minimize(func, dimensions, base_estimator, n_calls, n_points, n_random_starts, x0=None, y0=None, random_state=None, acq="EI", xi=0.01, kappa=1.96): rng = check_random_state(random_state) space = Space(dimensions) # Initialize with provided points (x0 and y0) and/or random points if n_calls <= 0: raise ValueError( "Expected `n_calls` > 0, got %d" % n_random_starts) if x0 is None: x0 = [] elif not isinstance(x0[0], list): x0 = [x0] if not isinstance(x0, list): raise ValueError("`x0` should be a list, but got %s" % type(x0)) n_init_func_calls = len(x0) if y0 is not None else 0 n_total_init_calls = n_random_starts + n_init_func_calls if n_total_init_calls <= 0: # if x0 is not provided and n_random_starts is 0 then # it will ask for n_random_starts to be > 0. raise ValueError( "Expected `n_random_starts` > 0, got %d" % n_random_starts) if n_calls < n_total_init_calls: raise ValueError( "Expected `n_calls` >= %d, got %d" % (n_total_init_calls, n_calls)) if y0 is None and x0: y0 = [func(x) for x in x0] elif x0: if isinstance(y0, Iterable): y0 = list(y0) elif isinstance(y0, numbers.Number): y0 = [y0] else: raise ValueError( "`y0` should be an iterable or a scalar, got %s" % type(y0)) if len(x0) != len(y0): raise ValueError("`x0` and `y0` should have the same length") if not all(map(np.isscalar, y0)): raise ValueError("`y0` elements should be scalars") else: y0 = [] Xi = x0 + space.rvs(n_samples=n_random_starts, random_state=rng) yi = y0 + [func(x) for x in Xi[len(x0):]] if np.ndim(yi) != 1: raise ValueError("`func` should return a scalar") # Tree-based optimization loop models = [] n_model_iter = n_calls - n_total_init_calls for i in range(n_model_iter): rgr = clone(base_estimator) rgr.fit(space.transform(Xi), yi) models.append(rgr) # `rgr` predicts constants for each leaf which means that the EI # has zero gradient over large distances. As a result we can not # use gradient based optimizers like BFGS, so using random sampling # for the moment. X = space.transform(space.rvs(n_samples=n_points, random_state=rng)) values = _gaussian_acquisition( X=X, model=rgr, y_opt=np.min(yi), method=acq, xi=xi, kappa=kappa) next_x = X[np.argmin(values)] next_x = space.inverse_transform(next_x.reshape((1, -1)))[0] next_y = func(next_x) Xi.append(next_x) yi.append(next_y) res = OptimizeResult() best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = np.array(yi) res.x_iters = Xi res.models = models res.space = space res.random_state = rng return res
def solve(self): """ Runs the DifferentialEvolutionSolver. Returns ------- res : OptimizeResult The optimization result represented as a ``OptimizeResult`` object. Important attributes are: ``x`` the solution array, ``success`` a Boolean flag indicating if the optimizer exited successfully and ``message`` which describes the cause of the termination. See `OptimizeResult` for a description of other attributes. If polish was employed, then OptimizeResult also contains the ``hess_inv`` and ``jac`` attributes. """ nfev, nit, warning_flag = 0, 0, False status_message = _status_message['success'] # calculate energies to start with for index, candidate in enumerate(self.population): parameters = self._scale_parameters(candidate) self.population_energies[index] = self.func(parameters, *self.args) nfev += 1 if nfev > self.maxfun: warning_flag = True status_message = _status_message['maxfev'] break minval = np.argmin(self.population_energies) # put the lowest energy into the best solution position. lowest_energy = self.population_energies[minval] self.population_energies[minval] = self.population_energies[0] self.population_energies[0] = lowest_energy self.population[[0, minval], :] = self.population[[minval, 0], :] if warning_flag: return OptimizeResult(x=self.x, fun=self.population_energies[0], nfev=nfev, nit=nit, message=status_message, success=(warning_flag != True)) # do the optimisation. for nit in range(1, self.maxiter + 1): if self.dither is not None: self.scale = self.random_number_generator.rand() * ( self.dither[1] - self.dither[0]) + self.dither[0] for candidate in range(np.size(self.population, 0)): if nfev > self.maxfun: warning_flag = True status_message = _status_message['maxfev'] break trial = self._mutate(candidate) self._ensure_constraint(trial) parameters = self._scale_parameters(trial) energy = self.func(parameters, *self.args) nfev += 1 if energy < self.population_energies[candidate]: self.population[candidate] = trial self.population_energies[candidate] = energy if energy < self.population_energies[0]: self.population_energies[0] = energy self.population[0] = trial # stop when the fractional s.d. of the population is less than tol # of the mean energy convergence = ( np.std(self.population_energies) / np.abs(np.mean(self.population_energies) + _MACHEPS)) if self.disp: print("differential_evolution step %d: f(x)= %g" % (nit, self.population_energies[0])) if (self.callback and self.callback(self._scale_parameters(self.population[0]), convergence=self.tol / convergence) is True): warning_flag = True status_message = ('callback function requested stop early ' 'by returning True') break if convergence < self.tol or warning_flag: break else: status_message = _status_message['maxiter'] warning_flag = True DE_result = OptimizeResult(x=self.x, fun=self.population_energies[0], nfev=nfev, nit=nit, message=status_message, success=(warning_flag != True)) if self.polish: result = minimize(self.func, np.copy(DE_result.x), method='L-BFGS-B', bounds=self.limits.T, args=self.args) nfev += result.nfev DE_result.nfev = nfev if result.fun < DE_result.fun: DE_result.fun = result.fun DE_result.x = result.x DE_result.jac = result.jac # to keep internal state consistent self.population_energies[0] = result.fun self.population[0] = self._unscale_parameters(result.x) return DE_result
def model_gradient_descent( f: Callable[..., float], x0: np.ndarray, *, args=(), rate: float = 1e-1, sample_radius: float = 1e-1, n_sample_points: int = 100, n_sample_points_ratio: Optional[float] = None, rate_decay_exponent: float = 0.0, stability_constant: float = 0.0, sample_radius_decay_exponent: float = 0.0, tol: float = 1e-8, known_values: Optional[Tuple[List[np.ndarray], List[float]]] = None, max_iterations: Optional[int] = None, max_evaluations: Optional[int] = None) -> scipy.optimize.OptimizeResult: """Model gradient descent algorithm for black-box optimization. The idea of this algorithm is to perform gradient descent, but estimate the gradient using a surrogate model instead of, say, by finite-differencing. The surrogate model is a least-squared quadratic fit to points sampled from the vicinity of the current iterate. This algorithm works well when you have an initial guess which is in the convex neighborhood of a local optimum and you want to converge to that local optimum. It's meant to be used when the function is stochastic. Args: f: The function to minimize. x0: An initial guess. args: Additional arguments to pass to the function. rate: The learning rate for the gradient descent. sample_radius: The radius around the current iterate to sample points from to build the quadratic model. n_sample_points: The number of points to sample in each iteration. n_sample_points_ratio: This specifies the number of points to sample in each iteration as a coefficient of the number of points required to exactly determine a quadratic model. The number of sample points will be this coefficient times (n+1)(n+2)/2, rounded up, where n is the number of parameters. Setting this overrides n_sample_points. rate_decay_exponent: Controls decay of learning rate. In each iteration, the learning rate is changed to the base learning rate divided by (i + 1 + S)**a, where S is the stability constant and a is the rate decay exponent (this parameter). stability_constant: Affects decay of learning rate. In each iteration, the learning rate is changed to the base learning rate divided by (i + 1 + S)**a, where S is the stability constant (this parameter) and a is the rate decay exponent. sample_radius_decay_exponent: Controls decay of sample radius. tol: The algorithm terminates when the difference between the current iterate and the next suggested iterate is smaller than this value. known_values: Any prior known values of the objective function. This is given as a tuple where the first element is a list of points and the second element is a list of the function values at those points. max_iterations: The maximum number of iterations to allow before termination. max_evaluations: The maximum number of function evaluations to allow before termination. Returns: Scipy OptimizeResult """ if known_values is not None: known_xs, known_ys = known_values known_xs = [np.copy(x) for x in known_xs] known_ys = [np.copy(y) for y in known_ys] else: known_xs, known_ys = [], [] if max_iterations is None: max_iterations = np.inf if max_evaluations is None: max_evaluations = np.inf n = len(x0) if n_sample_points_ratio is not None: n_sample_points = int( np.ceil(n_sample_points_ratio * (n + 1) * (n + 2) / 2)) _, f = wrap_function(f, args) res = OptimizeResult() current_x = np.copy(x0) res.x_iters = [] # initializes as lists res.xs_iters = [] res.ys_iters = [] res.func_vals = [] res.model_vals = [None] res.fun = 0 total_evals = 0 num_iter = 0 converged = False message = None while num_iter < max_iterations: current_sample_radius = (sample_radius / (num_iter + 1)**sample_radius_decay_exponent) # Determine points to evaluate # in ball around current point new_xs = [np.copy(current_x)] + [ current_x + _random_point_in_ball(n, current_sample_radius) for _ in range(n_sample_points) ] if total_evals + len(new_xs) > max_evaluations: message = 'Reached maximum number of evaluations.' break # Evaluate points res.xs_iters.append(new_xs) new_ys = [f(x) for x in new_xs] res.ys_iters.append(new_ys) total_evals += len(new_ys) known_xs.extend(new_xs) known_ys.extend(new_ys) # Save function value res.func_vals.append(new_ys[0]) res.x_iters.append(np.copy(current_x)) res.fun = res.func_vals[-1] # Determine points to use to build model model_xs = [] model_ys = [] for x, y in zip(known_xs, known_ys): if np.linalg.norm(x - current_x) < current_sample_radius: model_xs.append(x) model_ys.append(y) # Build and solve model model_gradient, model = _get_least_squares_model_gradient( model_xs, model_ys, current_x) # calculate the gradient and update the current point gradient_norm = np.linalg.norm(model_gradient) decayed_rate = ( rate / (num_iter + 1 + stability_constant)**rate_decay_exponent) # Convergence criteria if decayed_rate * gradient_norm < tol: converged = True message = 'Optimization converged successfully.' break # Update current_x -= decayed_rate * model_gradient res.model_vals.append( model.predict([-decayed_rate * model_gradient])[0]) num_iter += 1 if converged: final_val = res.func_vals[-1] else: final_val = f(current_x) res.func_vals.append(final_val) if message is None: message = 'Reached maximum number of iterations.' res.x_iters.append(current_x) total_evals += 1 res.x = current_x res.fun = final_val res.nit = num_iter res.nfev = total_evals res.message = message return res
def create_result(Xi, yi, n_evaluations=None, space=None, rng=None, specs=None, models=None, maximize=False): """ Initialize an `OptimizeResult` object. Parameters ---------- * `Xi` [list of lists, shape=(n_iters, n_features)]: Location of the minimum at every iteration. * `yi` [array-like, shape=(n_iters,)]: Minimum value obtained at every iteration. * `space` [Space instance, optional]: Search space. * `rng` [RandomState instance, optional]: State of the random state. * `specs` [dict, optional]: Call specifications. * `models` [list, optional]: List of fit surrogate models. Returns ------- * `res` [`OptimizeResult`, scipy object]: OptimizeResult instance with the required information. """ res = OptimizeResult() try: # Hyperband returns evaluations as lists of lists. # We want to store the results as a single array. yi = list(itertools.chain.from_iterable(yi)) Xi = list(itertools.chain.from_iterable(Xi)) except TypeError: # All algorithms other than Hyperband already return a single list. pass yi = np.asarray(yi) if np.ndim(yi) == 2: res.log_time = np.ravel(yi[:, 1]) yi = np.ravel(yi[:, 0]) if maximize: best = np.argmax(yi) else: best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] if n_evaluations: unique, sort_indices = np.unique(yi, return_index=True) if len(unique) < n_evaluations: func_sort_idx = np.argsort(yi) func_vals = sorted(yi) res.func_vals = np.asarray(func_vals[:n_evaluations]) x_iter_sort = [] for idx in func_sort_idx: x_iter_sort.append(Xi[idx]) res.x_iters = np.asarray(x_iter_sort[:n_evaluations]) res.all_func_vals = np.asarray(yi) res.all_x_iters = np.asarray(Xi) else: func_vals = sorted(unique) res.func_vals = np.asarray(func_vals[:n_evaluations]) x_iter_sort = [] for idx in sort_indices: x_iter_sort.append(Xi[idx]) res.x_iters = np.asarray(x_iter_sort[:n_evaluations]) res.all_func_vals = np.asarray(yi) res.all_x_iters = np.asarray(Xi) else: res.func_vals = np.asarray(yi) res.x_iters = np.asarray(Xi) res.models = models res.space = space res.random_state = rng res.specs = specs return res
def gp_minimize(func, bounds, base_estimator=None, acq="LCB", xi=0.01, kappa=1.96, search="sampling", maxiter=1000, n_points=500, n_start=10, n_restarts_optimizer=5, random_state=None): """Bayesian optimization using Gaussian Processes. If every function evaluation is expensive, for instance when the parameters are the hyperparameters of a neural network and the function evaluation is the mean cross-validation score across ten folds, optimizing the hyperparameters by standared optimization routines would take for ever! The idea is to approximate the function using a Gaussian process. In other words the function values are assumed to follow a multivariate gaussian. The covariance of the function values are given by a GP kernel between the parameters. Then a smart choice to choose the next parameter to evaluate can be made by the acquisition function over the Gaussian prior which is much quicker to evaluate. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `bounds` [array-like, shape=(n_parameters, 2)]: - ``bounds[i][0]`` should give the lower bound of each parameter and - ``bounds[i][1]`` should give the upper bound of each parameter. * `base_estimator` [a Gaussian process estimator]: The Gaussian process estimator to use for optimization. * `acq` [string, default=`"LCB"`]: Function to minimize over the gaussian prior. Can be either - `"LCB"` for lower confidence bound, - `"EI"` for expected improvement, - `"PI"` for probability of improvement. * `xi` [float, default=0.01]: Controls how much improvement one wants over the previous best values. Used when the acquisition is either `"EI"` or `"PI"`. * `kappa` [float, default=1.96]: Controls how much of the variance in the predicted values should be taken into account. If set to be very high, then we are favouring exploration over exploitation and vice versa. Used when the acquisition is `"LCB"`. * `search` [string, `"sampling"` or `"lbfgs"`]: Searching for the next possible candidate to update the Gaussian prior with. If search is set to `"sampling"`, `n_points` are sampled randomly and the Gaussian Process prior is updated with the point that gives the best acquisition value over the Gaussian prior. If search is set to `"lbfgs"`, then a point is sampled randomly, and lbfgs is run for 10 iterations optimizing the acquisition function over the Gaussian prior. * `maxiter` [int, default=1000]: Number of iterations to find the minimum. Note that `n_start` iterations are effectively discounted, such that total number of function evaluations is at most `maxiter`. * `n_points` [int, default=500]: Number of points to sample to determine the next "best" point. Useless if search is set to `"lbfgs"`. * `n_start` [int, default=10]: Number of random initialization points. * `n_restarts_optimizer` [int, default=10]: The number of restarts of the optimizer when `search` is `"lbfgs"`. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [float]: location of the minimum. - `fun` [float]: function value at the minimum. - `models`: surrogate models used for each iteration. - `x_iters` [array]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ rng = check_random_state(random_state) # Bounds n_params = len(bounds) lb, ub = extract_bounds(bounds) # Default GP if base_estimator is None: base_estimator = GaussianProcessRegressor( kernel=(ConstantKernel(1.0, (0.01, 1000.0)) * Matern(length_scale=np.ones(n_params), length_scale_bounds=[(0.01, 100)] * n_params, nu=2.5)), normalize_y=True, alpha=10e-6, random_state=random_state) # First points Xi = lb + (ub - lb) * rng.rand(n_start, n_params) yi = [func(x) for x in Xi] if np.ndim(yi) != 1: raise ValueError("The function to be optimized should return a scalar") # Bayesian optimization loop models = [] for i in range(maxiter - n_start): gp = clone(base_estimator) with warnings.catch_warnings(): warnings.simplefilter("ignore") gp.fit(Xi, yi) models.append(gp) if search == "sampling": X = lb + (ub - lb) * rng.rand(n_points, n_params) values = acquisition(X=X, model=gp, y_opt=np.min(yi), method=acq, xi=xi, kappa=kappa) next_x = X[np.argmin(values)] elif search == "lbfgs": best = np.inf for j in range(n_restarts_optimizer): x0 = lb + (ub - lb) * rng.rand(n_params) with warnings.catch_warnings(): warnings.simplefilter("ignore") x, a, _ = fmin_l_bfgs_b(_acquisition, x0, args=(gp, np.min(yi), acq, xi, kappa), bounds=bounds, approx_grad=True, maxiter=10) if a < best: next_x, best = x, a next_y = func(next_x) Xi = np.vstack((Xi, next_x)) yi.append(next_y) # Pack results res = OptimizeResult() best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = np.array(yi) res.x_iters = Xi res.models = models return res
def glpk( c, A_ub=None, b_ub=None, A_eq=None, b_eq=None, bounds=None, solver='simplex', sense=GLPK.GLP_MIN, scale=True, maxit=GLPK.INT_MAX, timeout=GLPK.INT_MAX, basis_fac='luf+ft', message_level=GLPK.GLP_MSG_ERR, disp=False, simplex_options=None, ip_options=None, mip_options=None, ): '''GLPK ctypes interface. Parameters ---------- c : 1-D array (n,) Array of objective coefficients. A_ub : 2-D array (m, n) scipy.sparse.coo_matrix b_ub : 1-D array (m,) A_eq : 2-D array (k, n) scipy.sparse.coo_matrix b_eq : 1-D array (k,) bounds : None or list (n,) of tuple (2,) or tuple (2,) The jth entry in the list corresponds to the jth objective coefficient. Each entry is made up of a tuple describing the bounds. Use None to indicate that there is no bound. By default, bounds are (0, None) (all decision variables are non-negative). If a single tuple (min, max) is provided, then min and max will serve as bounds for all decision variables. solver : { 'simplex', 'interior', 'mip' } Use simplex (LP/MIP) or interior point method (LP only). Default is ``simplex``. sense : { 'GLP_MIN', 'GLP_MAX' } Minimization or maximization problem. Default is ``GLP_MIN``. scale : bool Scale the problem. Default is ``True``. maxit : int Maximum number of iterations. Default is ``INT_MAX``. timout : int Limit solution time to ``timeout`` seconds. Default is ``INT_MAX``. basis_fac : { 'luf+ft', 'luf+cbg', 'luf+cgr', 'btf+cbg', 'btf+cgr' } LP basis factorization strategy. Default is ``luf+ft``. These are combinations of the following strategies: - ``luf`` : plain LU-factorization - ``btf`` : block triangular LU-factorization - ``ft`` : Forrest-Tomlin update - ``cbg`` : Schur complement + Bartels-Golub update - ``cgr`` : Schur complement + Givens rotation update message_level : { GLP_MSG_OFF, GLP_MSG_ERR, GLP_MSG_ON, GLP_MSG_ON, GLP_MSG_ALL, GLP_MSG_DBG } Verbosity level of logging to stdout. Only applied when ``disp=True``. Default is ``GLP_MSG_ERR``. One of the following: ``GLP_MSG_OFF`` : no output ``GLP_MSG_ERR`` : warning and error messages only ``GLP_MSG_ON`` : normal output ``GLP_MSG_ALL`` : full output ``GLP_MSG_DBG`` : debug output disp : bool Display output to stdout. Default is ``False``. simplex_options : dict Options specific to simplex solver. The dictionary consists of the following fields: - primal : { 'primal', 'dual', 'dualp' } Primal or two-phase dual simplex. Default is ``primal``. One of the following: - ``primal`` : use two-phase primal simplex - ``dual`` : use two-phase dual simplex - ``dualp`` : use two-phase dual simplex, and if it fails, switch to the primal simplex - init_basis : { 'std', 'adv', 'bib' } Choice of initial basis. Default is 'adv'. One of the following: - ``std`` : standard initial basis of all slacks - ``adv`` : advanced initial basis - ``bib`` : Bixby's initial basis - steep : bool Use steepest edge technique or standard "textbook" pricing. Default is ``True`` (steepest edge). - ratio : { 'relax', 'norelax', 'flip' } Ratio test strategy. Default is ``relax``. One of the following: - ``relax`` : Harris' two-pass ratio test - ``norelax`` : standard "textbook" ratio test - ``flip`` : long-step ratio test - tol_bnd : double Tolerance used to check if the basic solution is primal feasible. (Default: 1e-7). - tol_dj : double Tolerance used to check if the basic solution is dual feasible. (Default: 1e-7). - tol_piv : double Tolerance used to choose eligble pivotal elements of the simplex table. (Default: 1e-10). - obj_ll : double Lower limit of the objective function. If the objective function reaches this limit and continues decreasing, the solver terminates the search. Used in the dual simplex only. (Default: -DBL_MAX -- the largest finite float64). - obj_ul : double Upper limit of the objective function. If the objective function reaches this limit and continues increasing, the solver terminates the search. Used in the dual simplex only. (Default: +DBL_MAX -- the largest finite float64). - presolve : bool Use presolver (assumes ``scale=True`` and ``init_basis='adv'``. Default is ``True``. - exact : bool Use simplex method based on exact arithmetic. Default is ``False``. If ``True``, all other ``simplex_option`` fields are ignored. ip_options : dict Options specific to interior-pooint solver. The dictionary consists of the following fields: - ordering : { 'nord', 'qmd', 'amd', 'symamd' } Ordering algorithm used before Cholesky factorizaiton. Default is ``amd``. One of the following: - ``nord`` : natural (original) ordering - ``qmd`` : quotient minimum degree ordering - ``amd`` : approximate minimum degree ordering - ``symamd`` : approximate minimum degree ordering algorithm for Cholesky factorization of symmetric matrices. mip_options : dict Options specific to MIP solver. The dictionary consists of the following fields: - intcon : 1-D array Array of integer contraints, specified as the 0-based indices of the solution. Default is an empty array. - bincon : 1-D array Array of binary constraints, specified as the 0-based indices of the solution. If any indices are duplicated between ``bincon`` and ``intcon``, they will be considered as binary constraints. Default is an empty array. - nomip : bool consider all integer variables as continuous (allows solving MIP as pure LP). Default is ``False``. - branch : { 'first', 'last', 'mostf', 'drtom', 'pcost' } Branching rule. Default is ``drtom``. One of the following: - ``first`` : branch on first integer variable - ``last`` : branch on last integer variable - ``mostf`` : branch on most fractional variable - ``drtom`` : branch using heuristic by Driebeck and Tomlin - ``pcost`` : branch using hybrid pseudocost heuristic (may be useful for hard instances) - backtrack : { 'dfs', 'bfs', 'bestp', 'bestb' } Backtracking rule. Default is ``bestb``. One of the following: - ``dfs`` : backtrack using depth first search - ``bfs`` : backtrack using breadth first search - ``bestp`` : backtrack using the best projection heuristic - ``bestb`` : backtrack using node with best local bound - preprocess : { 'none', 'root', 'all' } Preprocessing technique. Default is ``GLP_PP_ALL``. One of the following: - ``none`` : disable preprocessing - ``root`` : perform preprocessing only on the root level - ``all`` : perform preprocessing on all levels - round : bool Simple rounding heuristic. Default is ``True``. - presolve : bool Use MIP presolver. Default is ``True``. - binarize : bool replace general integer variables by binary ones (only used if ``presolve=True``). Default is ``False``. - fpump : bool Apply feasibility pump heuristic. Default is ``False``. - proxy : int Apply proximity search heuristic (in seconds). Default is 60. - cuts : list of { 'gomory', 'mir', 'cover', 'clique', 'all' } Cuts to generate. Default is no cuts. List of the following: - ``gomory`` : Gomory's mixed integer cuts - ``mir`` : MIR (mixed integer rounding) cuts - ``cover`` : mixed cover cuts - ``clique`` : clique cuts - ``all`` : generate all cuts above - tol_int : float Absolute tolerance used to check if optimal solution to the current LP relaxation is integer feasible. (Default: 1e-5). - tol_obj : float Relative tolerance used to check if the objective value in optimal solution to the current LP relaxation is not better than in the best known integer feasible solution. (Default: 1e-7). - mip_gap : float Relative mip gap tolerance. If the relative mip gap for currently known best integer feasiblesolution falls below this tolerance, the solver terminates the search. This allows obtaining suboptimal integer feasible solutions if solving the problem to optimality takes too long time. (Default: 0.0). - bound : float add inequality obj <= bound (minimization) or obj >= bound (maximization) to integer feasibility problem (assumes ``minisat=True``). Notes ----- In general, don't change tolerances without a detailed understanding of their purposes. ''' # Housekeeping if simplex_options is None: simplex_options = {} if ip_options is None: ip_options = {} if mip_options is None: mip_options = {} # Create and fill the GLPK problem struct prob, lp = _fill_prob(c, A_ub, b_ub, A_eq, b_eq, bounds, sense, 'problem-name') c, A_ub, b_ub, A_eq, b_eq, bounds, _x0 = lp # Get the library _lib = GLPK()._lib # Scale the problem no_need_explict_scale = (solver == "simplex" and simplex_options.get("presolve")) if not no_need_explict_scale and scale: _lib.glp_scale_prob(prob, GLPK.GLP_SF_AUTO) # do auto scaling for now # Select basis factorization method bfcp = glp_bfcp() _lib.glp_get_bfcp(prob, ctypes.byref(bfcp)) bfcp.type = { 'luf+ft': GLPK.GLP_BF_LUF + GLPK.GLP_BF_FT, 'luf+cbg': GLPK.GLP_BF_LUF + GLPK.GLP_BF_BG, 'luf+cgr': GLPK.GLP_BF_LUF + GLPK.GLP_BF_GR, 'btf+cbg': GLPK.GLP_BF_BTF + GLPK.GLP_BF_BG, 'btf+cgr': GLPK.GLP_BF_BTF + GLPK.GLP_BF_GR, }[basis_fac] _lib.glp_set_bfcp(prob, ctypes.byref(bfcp)) # Run the solver if solver == 'simplex': # Construct an initial basis basis = simplex_options.get('init_basis', 'adv') basis_fun = { 'std': _lib.glp_std_basis, 'adv': _lib.glp_adv_basis, 'bib': _lib.glp_cpx_basis, }[basis] basis_args = [prob] if basis == 'adv': # adv must have 0 as flags argument basis_args.append(0) basis_fun(*basis_args) # Make control structure smcp = glp_smcp() _lib.glp_init_smcp(ctypes.byref(smcp)) # Set options smcp.msg_lev = message_level*disp smcp.meth = { 'primal': GLPK.GLP_PRIMAL, 'dual': GLPK.GLP_DUAL, 'dualp': GLPK.GLP_DUALP, }[simplex_options.get('method', 'primal')] smcp.pricing = { True: GLPK.GLP_PT_PSE, False: GLPK.GLP_PT_STD, }[simplex_options.get('steep', True)] smcp.r_test = { 'relax': GLPK.GLP_RT_HAR, 'norelax': GLPK.GLP_RT_STD, 'flip': GLPK.GLP_RT_FLIP, }[simplex_options.get('ratio', 'relax')] smcp.tol_bnd = simplex_options.get('tol_bnd', 1e-7) smcp.tol_dj = simplex_options.get('tol_dj', 1e-7) smcp.tol_piv = simplex_options.get('tol_piv', 1e-10) if simplex_options.get('obj_ll', False): smcp.obj_ll = simplex_options['obj_ll'] if simplex_options.get('obj_ul', False): smcp.obj_ul = simplex_options['obj_ul'] smcp.it_lim = maxit smcp.tm_lim = timeout smcp.presolve = { True: GLPK.GLP_ON, False: GLPK.GLP_OFF, }[simplex_options.get('presolve', True)] # Simplex driver if simplex_options.get('exact', False): ret_code = _lib.glp_exact(prob, ctypes.byref(smcp)) else: ret_code = _lib.glp_simplex(prob, ctypes.byref(smcp)) if ret_code != GLPK.SUCCESS: warn('GLPK simplex not successful!', OptimizeWarning) return OptimizeResult({ 'message': GLPK.RET_CODES[ret_code], }) # Figure out what happened status = _lib.glp_get_status(prob) message = GLPK.STATUS_CODES[status] res = OptimizeResult({ 'status': status, 'message': message, 'success': status == GLPK.GLP_OPT, }) # We can read a solution: if status == GLPK.GLP_OPT: res.fun = _lib.glp_get_obj_val(prob) res.x = np.array([_lib.glp_get_col_prim(prob, ii) for ii in range(1, _lib.glp_get_num_cols(prob)+1)]) res.dual = np.array([_lib.glp_get_row_dual(prob, ii) for ii in range(1, _lib.glp_get_num_rows(prob)+1)]) # We don't get slack without doing sensitivity analysis since GLPK # uses auxiliary variables instead of slack! res.slack = b_ub - A_ub @ res.x res.con = b_eq - A_eq @ res.x # We shouldn't be reading this field... But we will anyways res.nit = prob.contents.it_cnt elif solver == 'interior': # Make a control structure iptcp = glp_iptcp() _lib.glp_init_iptcp(ctypes.byref(iptcp)) # Set options iptcp.msg_lev = message_level*disp iptcp.ord_alg = { 'nord': GLPK.GLP_ORD_NONE, 'qmd': GLPK.GLP_ORD_QMD, 'amd': GLPK.GLP_ORD_AMD, 'symamd': GLPK.GLP_ORD_SYMAMD, }[ip_options.get('ordering', 'amd')] # Run the solver ret_code = _lib.glp_interior(prob, ctypes.byref(iptcp)) if ret_code != GLPK.SUCCESS: warn('GLPK interior-point not successful!', OptimizeWarning) return OptimizeResult({ 'message': GLPK.RET_CODES[ret_code], }) # Figure out what happened status = _lib.glp_ipt_status(prob) message = GLPK.STATUS_CODES[status] res = OptimizeResult({ 'status': status, 'message': message, 'success': status == GLPK.GLP_OPT, }) # We can read a solution: if status == GLPK.GLP_OPT: res.fun = _lib.glp_ipt_obj_val(prob) res.x = np.array([_lib.glp_ipt_col_prim(prob, ii) for ii in range(1, _lib.glp_get_num_cols(prob)+1)]) res.dual = np.array([_lib.glp_ipt_row_dual(prob, ii) for ii in range(1, _lib.gpl_get_num_rows(prob)+1)]) # We don't get slack without doing sensitivity analysis since GLPK uses # auxiliary variables instead of slack! res.slack = b_ub - A_ub @ res.x res.con = b_eq - A_eq @ res.x # We shouldn't be reading this field... But we will anyways res.nit = prob.contents.it_cnt elif solver == 'mip': # Make a control structure iocp = glp_iocp() _lib.glp_init_iocp(ctypes.byref(iocp)) # Make variables integer- and binary-valued if not mip_options.get('nomip', False): intcon = mip_options.get('intcon', []) for jj in intcon: _lib.glp_set_col_kind(prob, jj+1, GLPK.GLP_IV) bincon = mip_options.get('bincon', []) for jj in bincon: _lib.glp_set_col_kind(prob, jj+1, GLPK.GLP_BV) # Set options iocp.msg_lev = message_level*disp iocp.br_tech = { 'first': GLPK.GLP_BR_FFV, 'last': GLPK.GLP_BR_LFV, 'mostf': GLPK.GLP_BR_MFV, 'drtom': GLPK.GLP_BR_DTH, 'pcost': GLPK.GLP_BR_PCH, }[mip_options.get('branch', 'drtom')] iocp.bt_tech = { 'dfs': GLPK.GLP_BT_DFS, 'bfs': GLPK.GLP_BT_BFS, 'bestp': GLPK.GLP_BT_BPH, 'bestb': GLPK.GLP_BT_BLB, }[mip_options.get('backtrack', 'bestb')] iocp.pp_teck = { 'none': GLPK.GLP_PP_NONE, 'root': GLPK.GLP_PP_ROOT, 'all': GLPK.GLP_PP_ALL, }[mip_options.get('preprocess', 'all')] iocp.sr_heur = { True: GLPK.GLP_ON, False: GLPK.GLP_OFF, }[mip_options.get('round', True)] iocp.fp_heur = { True: GLPK.GLP_ON, False: GLPK.GLP_OFF, }[mip_options.get('fpump', False)] ps_tm_lim = mip_options.get('proxy', 60) if ps_tm_lim: iocp.ps_heur = GLPK.GLP_ON iocp.ps_tm_lim = ps_tm_lim*1000 else: iocp.ps_heur = GLPK.GLP_OFF iocp.ps_tm_lim = 0 cuts = set(list(mip_options.get('cuts', []))) if 'all' in cuts: cuts = {'gomory', 'mir', 'cover', 'clique'} if 'gomory' in cuts: iocp.gmi_cuts = GLPK.GLP_ON if 'mir' in cuts: iocp.mir_cuts = GLPK.GLP_ON if 'cover' in cuts: iocp.cov_cuts = GLPK.GLP_ON if 'clique' in cuts: iocp.clq_cuts = GLPK.GLP_ON iocp.tol_int = mip_options.get('tol_int', 1e-5) iocp.tol_obj = mip_options.get('tol_obj', 1e-7) iocp.mip_gap = mip_options.get('mip_gap', 0.0) iocp.tm_lim = timeout iocp.presolve = { True: GLPK.GLP_ON, False: GLPK.GLP_OFF, }[mip_options.get('presolve', True)] iocp.binarize = { True: GLPK.GLP_ON, False: GLPK.GLP_OFF, }[mip_options.get('binarize', False)] # Run the solver ret_code = _lib.glp_intopt(prob, ctypes.byref(iocp)) if ret_code != GLPK.SUCCESS: warn('GLPK interior-point not successful!', OptimizeWarning) return OptimizeResult({ 'message': GLPK.RET_CODES[ret_code], }) # Figure out what happened status = _lib.glp_mip_status(prob) message = GLPK.STATUS_CODES[status] res = OptimizeResult({ 'status': status, 'message': message, 'success': status in [GLPK.GLP_OPT, GLPK.GLP_FEAS], }) # We can read a solution: if res.success: res.fun = _lib.glp_mip_obj_val(prob) res.x = np.array([_lib.glp_mip_col_val(prob, ii) for ii in range(1, len(c)+1)]) else: raise ValueError('"%s" is not a recognized solver.' % solver) # We're done, cleanup! _lib.glp_delete_prob(prob) # Map status codes to scipy: # res.status = { # GLPK.GLP_OPT: 0, # }[res.status] return res
def gp_minimize(func, dimensions, base_estimator=None, acq="LCB", xi=0.01, kappa=1.96, search="sampling", maxiter=1000, n_points=500, n_start=10, n_restarts_optimizer=5, random_state=None): """Bayesian optimization using Gaussian Processes. If every function evaluation is expensive, for instance when the parameters are the hyperparameters of a neural network and the function evaluation is the mean cross-validation score across ten folds, optimizing the hyperparameters by standared optimization routines would take for ever! The idea is to approximate the function using a Gaussian process. In other words the function values are assumed to follow a multivariate gaussian. The covariance of the function values are given by a GP kernel between the parameters. Then a smart choice to choose the next parameter to evaluate can be made by the acquisition function over the Gaussian prior which is much quicker to evaluate. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `dimensions` [list, shape=(n_dims,)]: List of search space dimensions. Each search dimension can be defined either as - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer` dimensions), - a `(upper_bound, lower_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). * `base_estimator` [a Gaussian process estimator]: The Gaussian process estimator to use for optimization. * `acq` [string, default=`"LCB"`]: Function to minimize over the gaussian prior. Can be either - `"LCB"` for lower confidence bound, - `"EI"` for expected improvement, - `"PI"` for probability of improvement. * `xi` [float, default=0.01]: Controls how much improvement one wants over the previous best values. Used when the acquisition is either `"EI"` or `"PI"`. * `kappa` [float, default=1.96]: Controls how much of the variance in the predicted values should be taken into account. If set to be very high, then we are favouring exploration over exploitation and vice versa. Used when the acquisition is `"LCB"`. * `search` [string, `"sampling"` or `"lbfgs"`]: Searching for the next possible candidate to update the Gaussian prior with. If search is set to `"sampling"`, `n_points` are sampled randomly and the Gaussian Process prior is updated with the point that gives the best acquisition value over the Gaussian prior. If search is set to `"lbfgs"`, then a point is sampled randomly, and lbfgs is run for 10 iterations optimizing the acquisition function over the Gaussian prior. * `maxiter` [int, default=1000]: Number of iterations to find the minimum. Note that `n_start` iterations are effectively discounted, such that total number of function evaluations is at most `maxiter`. * `n_points` [int, default=500]: Number of points to sample to determine the next "best" point. Useless if search is set to `"lbfgs"`. * `n_start` [int, default=10]: Number of random initialization points. * `n_restarts_optimizer` [int, default=10]: The number of restarts of the optimizer when `search` is `"lbfgs"`. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [float]: location of the minimum. - `fun` [float]: function value at the minimum. - `models`: surrogate models used for each iteration. - `x_iters` [array]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. - `space` [Space]: the optimisation space. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ rng = check_random_state(random_state) space = Space(dimensions) # Default GP if base_estimator is None: base_estimator = GaussianProcessRegressor( kernel=(ConstantKernel(1.0, (0.01, 1000.0)) * Matern(length_scale=np.ones(space.transformed_n_dims), length_scale_bounds=[(0.01, 100)] * space.transformed_n_dims, nu=2.5)), normalize_y=True, alpha=10e-6, random_state=random_state) # First points Xi = space.rvs(n_samples=n_start, random_state=rng) yi = [func(x) for x in Xi] if np.ndim(yi) != 1: raise ValueError( "The function to be optimized should return a scalar") # Bayesian optimization loop models = [] for i in range(maxiter - n_start): gp = clone(base_estimator) with warnings.catch_warnings(): warnings.simplefilter("ignore") gp.fit(space.transform(Xi), yi) models.append(gp) if search == "sampling": X = space.transform(space.rvs(n_samples=n_points, random_state=rng)) values = _gaussian_acquisition( X=X, model=gp, y_opt=np.min(yi), method=acq, xi=xi, kappa=kappa) next_x = X[np.argmin(values)] elif search == "lbfgs": best = np.inf for j in range(n_restarts_optimizer): x0 = space.transform(space.rvs(n_samples=1, random_state=rng))[0] with warnings.catch_warnings(): warnings.simplefilter("ignore") x, a, _ = fmin_l_bfgs_b( _acquisition, x0, args=(gp, np.min(yi), acq, xi, kappa), bounds=space.transformed_bounds, approx_grad=True, maxiter=10) if a < best: next_x, best = x, a next_x = space.inverse_transform(next_x.reshape((1, -1)))[0] next_y = func(next_x) Xi = np.vstack((Xi, next_x)) yi.append(next_y) # Pack results res = OptimizeResult() best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = np.array(yi) res.x_iters = Xi res.models = models res.space = space return res
def dual_annealing(func, bounds, args=(), maxiter=1000, local_search_options={}, initial_temp=5230., restart_temp_ratio=2.e-5, visit=2.62, accept=-5.0, maxfun=1e7, seed=None, no_local_search=False, callback=None, x0=None): """ Find the global minimum of a function using Dual Annealing. Parameters ---------- func : callable The objective function to be minimized. Must be in the form ``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array and ``args`` is a tuple of any additional fixed parameters needed to completely specify the function. bounds : sequence, shape (n, 2) Bounds for variables. ``(min, max)`` pairs for each element in ``x``, defining bounds for the objective function parameter. args : tuple, optional Any additional fixed parameters needed to completely specify the objective function. maxiter : int, optional The maximum number of global search iterations. Default value is 1000. local_search_options : dict, optional Extra keyword arguments to be passed to the local minimizer (`minimize`). Some important options could be: ``method`` for the minimizer method to use and ``args`` for objective function additional arguments. initial_temp : float, optional The initial temperature, use higher values to facilitates a wider search of the energy landscape, allowing dual_annealing to escape local minima that it is trapped in. Default value is 5230. Range is (0.01, 5.e4]. restart_temp_ratio : float, optional During the annealing process, temperature is decreasing, when it reaches ``initial_temp * restart_temp_ratio``, the reannealing process is triggered. Default value of the ratio is 2e-5. Range is (0, 1). visit : float, optional Parameter for visiting distribution. Default value is 2.62. Higher values give the visiting distribution a heavier tail, this makes the algorithm jump to a more distant region. The value range is (0, 3]. accept : float, optional Parameter for acceptance distribution. It is used to control the probability of acceptance. The lower the acceptance parameter, the smaller the probability of acceptance. Default value is -5.0 with a range (-1e4, -5]. maxfun : int, optional Soft limit for the number of objective function calls. If the algorithm is in the middle of a local search, this number will be exceeded, the algorithm will stop just after the local search is done. Default value is 1e7. seed : {int or `~numpy.random.mtrand.RandomState` instance}, optional If `seed` is not specified the `~numpy.random.mtrand.RandomState` singleton is used. If `seed` is an int, a new ``RandomState`` instance is used, seeded with `seed`. If `seed` is already a ``RandomState`` instance, then that instance is used. Specify `seed` for repeatable minimizations. The random numbers generated with this seed only affect the visiting distribution function and new coordinates generation. no_local_search : bool, optional If `no_local_search` is set to True, a traditional Generalized Simulated Annealing will be performed with no local search strategy applied. callback : callable, optional A callback function with signature ``callback(x, f, context)``, which will be called for all minima found. ``x`` and ``f`` are the coordinates and function value of the latest minimum found, and ``context`` has value in [0, 1, 2], with the following meaning: - 0: minimum detected in the annealing process. - 1: detection occurred in the local search process. - 2: detection done in the dual annealing process. If the callback implementation returns True, the algorithm will stop. x0 : ndarray, shape(n,), optional Coordinates of a single N-D starting point. Returns ------- res : OptimizeResult The optimization result represented as a `OptimizeResult` object. Important attributes are: ``x`` the solution array, ``fun`` the value of the function at the solution, and ``message`` which describes the cause of the termination. See `OptimizeResult` for a description of other attributes. Notes ----- This function implements the Dual Annealing optimization. This stochastic approach derived from [3]_ combines the generalization of CSA (Classical Simulated Annealing) and FSA (Fast Simulated Annealing) [1]_ [2]_ coupled to a strategy for applying a local search on accepted locations [4]_. An alternative implementation of this same algorithm is described in [5]_ and benchmarks are presented in [6]_. This approach introduces an advanced method to refine the solution found by the generalized annealing process. This algorithm uses a distorted Cauchy-Lorentz visiting distribution, with its shape controlled by the parameter :math:`q_{v}` .. math:: g_{q_{v}}(\\Delta x(t)) \\propto \\frac{ \\ \\left[T_{q_{v}}(t) \\right]^{-\\frac{D}{3-q_{v}}}}{ \\ \\left[{1+(q_{v}-1)\\frac{(\\Delta x(t))^{2}} { \\ \\left[T_{q_{v}}(t)\\right]^{\\frac{2}{3-q_{v}}}}}\\right]^{ \\ \\frac{1}{q_{v}-1}+\\frac{D-1}{2}}} Where :math:`t` is the artificial time. This visiting distribution is used to generate a trial jump distance :math:`\\Delta x(t)` of variable :math:`x(t)` under artificial temperature :math:`T_{q_{v}}(t)`. From the starting point, after calling the visiting distribution function, the acceptance probability is computed as follows: .. math:: p_{q_{a}} = \\min{\\{1,\\left[1-(1-q_{a}) \\beta \\Delta E \\right]^{ \\ \\frac{1}{1-q_{a}}}\\}} Where :math:`q_{a}` is a acceptance parameter. For :math:`q_{a}<1`, zero acceptance probability is assigned to the cases where .. math:: [1-(1-q_{a}) \\beta \\Delta E] < 0 The artificial temperature :math:`T_{q_{v}}(t)` is decreased according to .. math:: T_{q_{v}}(t) = T_{q_{v}}(1) \\frac{2^{q_{v}-1}-1}{\\left( \\ 1 + t\\right)^{q_{v}-1}-1} Where :math:`q_{v}` is the visiting parameter. .. versionadded:: 1.2.0 References ---------- .. [1] Tsallis C. Possible generalization of Boltzmann-Gibbs statistics. Journal of Statistical Physics, 52, 479-487 (1998). .. [2] Tsallis C, Stariolo DA. Generalized Simulated Annealing. Physica A, 233, 395-406 (1996). .. [3] Xiang Y, Sun DY, Fan W, Gong XG. Generalized Simulated Annealing Algorithm and Its Application to the Thomson Model. Physics Letters A, 233, 216-220 (1997). .. [4] Xiang Y, Gong XG. Efficiency of Generalized Simulated Annealing. Physical Review E, 62, 4473 (2000). .. [5] Xiang Y, Gubian S, Suomela B, Hoeng J. Generalized Simulated Annealing for Efficient Global Optimization: the GenSA Package for R. The R Journal, Volume 5/1 (2013). .. [6] Mullen, K. Continuous Global Optimization in R. Journal of Statistical Software, 60(6), 1 - 45, (2014). DOI:10.18637/jss.v060.i06 Examples -------- The following example is a 10-D problem, with many local minima. The function involved is called Rastrigin (https://en.wikipedia.org/wiki/Rastrigin_function) >>> from scipy.optimize import dual_annealing >>> func = lambda x: np.sum(x*x - 10*np.cos(2*np.pi*x)) + 10*np.size(x) >>> lw = [-5.12] * 10 >>> up = [5.12] * 10 >>> ret = dual_annealing(func, bounds=list(zip(lw, up)), seed=1234) >>> print("global minimum: xmin = {0}, f(xmin) = {1:.6f}".format( ... ret.x, ret.fun)) global minimum: xmin = [-4.26437714e-09 -3.91699361e-09 -1.86149218e-09 -3.97165720e-09 -6.29151648e-09 -6.53145322e-09 -3.93616815e-09 -6.55623025e-09 -6.05775280e-09 -5.00668935e-09], f(xmin) = 0.000000 """ # noqa: E501 if x0 is not None and not len(x0) == len(bounds): raise ValueError('Bounds size does not match x0') lu = list(zip(*bounds)) lower = np.array(lu[0]) upper = np.array(lu[1]) # Check that restart temperature ratio is correct if restart_temp_ratio <= 0. or restart_temp_ratio >= 1.: raise ValueError('Restart temperature ratio has to be in range (0, 1)') # Checking bounds are valid if (np.any(np.isinf(lower)) or np.any(np.isinf(upper)) or np.any(np.isnan(lower)) or np.any(np.isnan(upper))): raise ValueError('Some bounds values are inf values or nan values') # Checking that bounds are consistent if not np.all(lower < upper): raise ValueError('Bounds are not consistent min < max') # Checking that bounds are the same length if not len(lower) == len(upper): raise ValueError('Bounds do not have the same dimensions') # Wrapper for the objective function func_wrapper = ObjectiveFunWrapper(func, maxfun, *args) # Wrapper fot the minimizer minimizer_wrapper = LocalSearchWrapper(bounds, func_wrapper, **local_search_options) # Initialization of RandomState for reproducible runs if seed provided rand_state = check_random_state(seed) # Initialization of the energy state energy_state = EnergyState(lower, upper, callback) energy_state.reset(func_wrapper, rand_state, x0) # Minimum value of annealing temperature reached to perform # re-annealing temperature_restart = initial_temp * restart_temp_ratio # VisitingDistribution instance visit_dist = VisitingDistribution(lower, upper, visit, rand_state) # Strategy chain instance strategy_chain = StrategyChain(accept, visit_dist, func_wrapper, minimizer_wrapper, rand_state, energy_state) need_to_stop = False iteration = 0 message = [] # OptimizeResult object to be returned optimize_res = OptimizeResult() optimize_res.success = True optimize_res.status = 0 t1 = np.exp((visit - 1) * np.log(2.0)) - 1.0 # Run the search loop while (not need_to_stop): for i in range(maxiter): # Compute temperature for this step s = float(i) + 2.0 t2 = np.exp((visit - 1) * np.log(s)) - 1.0 temperature = initial_temp * t1 / t2 if iteration >= maxiter: message.append("Maximum number of iteration reached") need_to_stop = True break # Need a re-annealing process? if temperature < temperature_restart: energy_state.reset(func_wrapper, rand_state) break # starting strategy chain val = strategy_chain.run(i, temperature) if val is not None: message.append(val) need_to_stop = True optimize_res.success = False break # Possible local search at the end of the strategy chain if not no_local_search: val = strategy_chain.local_search() if val is not None: message.append(val) need_to_stop = True optimize_res.success = False break iteration += 1 # Setting the OptimizeResult values optimize_res.x = energy_state.xbest optimize_res.fun = energy_state.ebest optimize_res.nit = iteration optimize_res.nfev = func_wrapper.nfev optimize_res.njev = func_wrapper.ngev optimize_res.nhev = func_wrapper.nhev optimize_res.message = message return optimize_res
def gp_minimize(func, bounds=None, search="sampling", random_state=None, maxiter=1000, acq="UCB", num_points=500): """ Black-box optimization using Gaussian Processes. If every function evaluation is expensive, for instance when the parameters are the hyperparameters of a neural network and the function evaluation is the mean cross-validation score across ten folds, optimizing the hyperparameters by standared optimization routines would take for ever! The idea is to approximate the function using a Gaussian process. In other words the function values are assumed to follow a multivariate gaussian. The covariance of the function values are given by a GP kernel between the parameters. Then a smart choice to choose the next parameter to evaluate can be made by the acquistion function over the Gaussian posterior which is much quicker to evaluate. Parameters ---------- func: callable Function to minimize. Should take a array of parameters and return the function value. bounds: array-like, shape (n_parameters, 2) ``bounds[i][0]`` should give the lower bound of each parameter and ``bounds[i][1]`` should give the upper bound of each parameter. search: string, "sampling" or "lbfgs" Searching for the next possible candidate to update the Gaussian prior with. If search is set to "sampling", ``num_points`` are sampled randomly and the Gaussian Process prior is updated with that point that gives the best acquision value over the Gaussian posterior. If search is set to "lbfgs", then a point is sampled randomly, and lbfgs is run for 10 iterations optimizing the acquistion function over the Gaussian posterior. random_state: int, RandomState instance, or None (default) Set random state to something other than None for reproducible results. maxiter: int, default 1000 Number of iterations to find the minimum. In other words, the number of function evaluations. acq: string, default "UCB" Function to minimize over the gaussian posterior. Can be either the "UCB" which refers to the UpperConfidenceBound or "EI" which is the Expected Improvement. num_points: int, default 500 Number of points to sample to determine the next "best" point. Useless if search is set to "lbfgs". Returns ------- res: OptimizeResult, scipy object The optimization result returned as a OptimizeResult object. Important attributes are ``x`` - float, the optimization solution, ``fun`` - float, the value of the function at the optimum, ``models``- gp_models[i]. the prior on the function fit at iteration[i]. ``func_vals`` - the function value at the ith iteration. ``x_iters`` - the value of ``x`` corresponding to the function value at the ith iteration. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ rng = np.random.RandomState(random_state) num_params = len(bounds) lower_bounds, upper_bounds = zip(*bounds) upper_bounds = np.asarray(upper_bounds) lower_bounds = np.asarray(lower_bounds) x0 = rng.rand(num_params) func_val = [func(lower_bounds + (upper_bounds - lower_bounds) * x0)] length_scale = np.ones(num_params) gp_params = { 'kernel': Matern(length_scale=length_scale, nu=2.5), 'normalize_y': True, 'random_state': random_state } lbfgs_bounds = np.tile((0, 1), (num_params, 1)) gp_models = [] x = np.reshape(x0, (1, -1)) for i in range(maxiter): gpr = GaussianProcessRegressor(**gp_params) gpr.fit(x, func_val) if search == "sampling": sampling = rng.rand(num_points, num_params) acquis = acquisition_func(sampling, gpr, np.min(func_val), acq) best_arg = np.argmin(acquis) best_x = sampling[best_arg] elif search == "lbfgs": init = rng.rand(num_params) best_x, _, _ = fmin_l_bfgs_b(acquisition_func, np.asfortranarray(init), args=(gpr, np.min(func_val), acq), bounds=lbfgs_bounds, approx_grad=True, maxiter=10) gp_models.append(gpr) best_f = func(lower_bounds + (upper_bounds - lower_bounds) * best_x) x_list = x.tolist() x_list.append(best_x) x = np.asarray(x_list) func_val.append(best_f) x = lower_bounds + (upper_bounds - lower_bounds) * x func_ind = np.argmin(func_val) x_val = x[func_ind] best_func_val = func_val[func_ind] res = OptimizeResult() res.models = gp_models res.x = x_val res.fun = best_func_val res.func_vals = func_val res.x_iters = x return res
def dummy_minimize(func, dimensions, n_calls=100, x0=None, y0=None, random_state=None): """Random search by uniform sampling within the given bounds. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `dimensions` [list, shape=(n_dims,)]: List of search space dimensions. Each search dimension can be defined either as - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer` dimensions), - a `(upper_bound, lower_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). * `n_calls` [int, default=100]: Number of calls to `func` to find the minimum. * `x0` [list, list of lists or `None`]: Initial input points. - If it is a list of lists, use it as a list of input points. - If it is a list, use it as a single initial input point. - If it is `None`, no initial input points are used. * `y0` [list, scalar or `None`] Evaluation of initial input points. - If it is a list, then it corresponds to evaluations of the function at each element of `x0` : the i-th element of `y0` corresponds to the function evaluated at the i-th element of `x0`. - If it is a scalar, then it corresponds to the evaluation of the function at `x0`. - If it is None and `x0` is provided, then the function is evaluated at each element of `x0`. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [list]: location of the minimum. - `fun` [float]: function value at the minimum. - `x_iters` [list of lists]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. - `space` [Space]: the optimisation space. - `specs` [dict]: the call specifications. - `rng` [RandomState instance]: State of the random state at the end of minimization. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ # Save call args specs = { "args": copy.copy(inspect.currentframe().f_locals), "function": inspect.currentframe().f_code.co_name } # Check params rng = check_random_state(random_state) space = Space(dimensions) if x0 is None: x0 = [] elif not isinstance(x0[0], list): x0 = [x0] if not isinstance(x0, list): raise ValueError("`x0` should be a list, got %s" % type(x0)) if len(x0) > 0 and y0 is not None: if isinstance(y0, Iterable): y0 = list(y0) elif isinstance(y0, numbers.Number): y0 = [y0] else: raise ValueError("`y0` should be an iterable or a scalar, got %s" % type(y0)) if len(x0) != len(y0): raise ValueError("`x0` and `y0` should have the same length") if not all(map(np.isscalar, y0)): raise ValueError("`y0` elements should be scalars") elif len(x0) > 0 and y0 is None: y0 = [] n_calls -= len(x0) elif len(x0) == 0 and y0 is not None: raise ValueError("`x0`cannot be `None` when `y0` is provided") else: # len(x0) == 0 and y0 is None y0 = [] X = x0 y = y0 # Random search X = X + space.rvs(n_samples=n_calls, random_state=rng) first = True for i in range(len(y0), len(X)): y_i = func(X[i]) if first: first = False if not np.isscalar(y_i): raise ValueError("`func` should return a scalar") y.append(y_i) y = np.array(y) # Pack results res = OptimizeResult() best = np.argmin(y) res.x = X[best] res.fun = y[best] res.func_vals = y res.x_iters = X res.models = [] # Create attribute even though it is empty res.space = space res.random_state = rng res.specs = specs return res
def solve(self): nfev, nit, warning_flag = 0, 0, False status_message = _status_message['success'] # calculate energies to start with for index, candidate in enumerate(self.population): parameters = self._scale_parameters(candidate) self.population_energies[index] = self.func(parameters, *self.args) nfev += 1 if nfev > self.maxfun: warning_flag = True status_message = _status_message['maxfev'] break minval = np.argmin(self.population_energies) # put the lowest energy into the best solution position. lowest_energy = self.population_energies[minval] self.population_energies[minval] = self.population_energies[0] self.population_energies[0] = lowest_energy self.population[[0, minval], :] = self.population[[minval, 0], :] if warning_flag: return OptimizeResult(x=self.x, fun=self.population_energies[0], nfev=nfev, nit=nit, message=status_message, success=(warning_flag is not True)) # do the optimisation. start_time = time.time() nit = 0 while nit < self.maxiter + 1: nit += 1 if start_time + self.max_execution_time < time.time(): warning_flag = True status_message = 'Max execution time reached' break if self.dither is not None: self.scale = self.random_number_generator.rand() * ( self.dither[1] - self.dither[0]) + self.dither[0] for candidate in range(np.size(self.population, 0)): if nfev > self.maxfun: warning_flag = True status_message = _status_message['maxfev'] break trial = self._mutate(candidate) self._ensure_constraint(trial) parameters = self._scale_parameters(trial) energy = self.func(parameters, *self.args) nfev += 1 if energy < self.population_energies[candidate]: self.population[candidate] = trial self.population_energies[candidate] = energy if energy < self.population_energies[0]: self.population_energies[0] = energy self.population[0] = trial # stop when the fractional s.d. of the population is less than tol # of the mean energy convergence = ( np.std(self.population_energies) / np.abs(np.mean(self.population_energies) + _MACHEPS)) if self.disp: print("differential_evolution step %d: f(x)= %g" % (nit, self.population_energies[0])) if (self.callback and self.callback(self._scale_parameters(self.population[0]), convergence=self.tol / convergence) is True): warning_flag = True status_message = ('callback function requested stop early ' 'by returning True') break if convergence < self.tol or warning_flag: break else: status_message = _status_message['maxiter'] warning_flag = True DE_result = OptimizeResult(x=self.x, fun=self.population_energies[0], nfev=nfev, nit=nit, message=status_message, success=(warning_flag is not True)) if self.polish: result = minimize(self.func, np.copy(DE_result.x), method='L-BFGS-B', bounds=self.limits.T, args=self.args) nfev += result.nfev DE_result.nfev = nfev if result.fun < DE_result.fun: DE_result.fun = result.fun DE_result.x = result.x DE_result.jac = result.jac # to keep internal state consistent self.population_energies[0] = result.fun self.population[0] = self._unscale_parameters(result.x) return DE_result