def result(self): """ The OptimizeResult """ res = OptimizeResult() res.x = self._xmin res.fun = self._fvalue res.message = self._message res.nit = self._step_record return res
def result(self): """ The OptimizeResult """ res = OptimizeResult() res.x = self.es.xbest res.fun = self.es.ebest res.nit = self._iter res.ncall = self.owf.nb_fun_call return res
def setup_method(self): self.x0 = np.array(1) self.f0 = 0 minres = OptimizeResult() minres.x = self.x0 minres.fun = self.f0 self.storage = Storage(minres)
def test_higher_f_rejected(self): new_minres = OptimizeResult() new_minres.x = self.x0 + 1 new_minres.fun = self.f0 + 1 ret = self.storage.update(new_minres) minres = self.storage.get_lowest() assert_equal(self.x0, minres.x) assert_equal(self.f0, minres.fun) assert_(not ret)
def test_lower_f_accepted(self): new_minres = OptimizeResult() new_minres.x = self.x0 + 1 new_minres.fun = self.f0 - 1 ret = self.storage.update(new_minres) minres = self.storage.get_lowest() assert_(self.x0 != minres.x) assert_(self.f0 != minres.fun) assert_(ret)
def create_result(Xi, yi, space=None, rng=None, specs=None, models=None): """ Initialize an `OptimizeResult` object. Parameters ---------- * `Xi` [list of lists, shape=(n_iters, n_features)]: Location of the minimum at every iteration. * `yi` [array-like, shape=(n_iters,)]: Minimum value obtained at every iteration. * `space` [Space instance, optional]: Search space. * `rng` [RandomState instance, optional]: State of the random state. * `specs` [dict, optional]: Call specifications. * `models` [list, optional]: List of fit surrogate models. Returns ------- * `res` [`OptimizeResult`, scipy object]: OptimizeResult instance with the required information. """ res = OptimizeResult() yi = np.asarray(yi) if np.ndim(yi) == 2: res.log_time = np.ravel(yi[:, 1]) yi = np.ravel(yi[:, 0]) best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = yi res.x_iters = Xi res.models = models res.space = space res.random_state = rng res.specs = specs return res
def _tree_minimize(func, dimensions, base_estimator, n_calls, n_points, n_random_starts, random_state=None): rng = check_random_state(random_state) space = Space(dimensions) # Initialize with random points if n_random_starts <= 0: raise ValueError( "Expected n_random_starts > 0, got %d" % n_random_starts) if n_calls <= 0: raise ValueError( "Expected n_calls > 0, got %d" % n_random_starts) if n_calls < n_random_starts: raise ValueError( "Expected n_calls >= %d, got %d" % (n_random_starts, n_calls)) Xi = space.rvs(n_samples=n_random_starts, random_state=rng) yi = [func(x) for x in Xi] if np.ndim(yi) != 1: raise ValueError( "The function to be optimized should return a scalar") # Tree-based optimization loop models = [] n_model_iter = n_calls - n_random_starts for i in range(n_model_iter): rgr = clone(base_estimator) rgr.fit(space.transform(Xi), yi) models.append(rgr) # `rgr` predicts constants for each leaf which means that the EI # has zero gradient over large distances. As a result we can not # use gradient based optimizers like BFGS, so using random sampling # for the moment. X = space.transform(space.rvs(n_samples=n_points, random_state=rng)) values = -gaussian_ei(X, rgr, np.min(yi)) next_x = X[np.argmin(values)] next_x = space.inverse_transform(next_x.reshape((1, -1)))[0] next_y = func(next_x) Xi = np.vstack((Xi, next_x)) yi.append(next_y) res = OptimizeResult() best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = np.array(yi) res.x_iters = Xi res.models = models res.space = space return res
def scipy_nlopt_cobyla(*args, **kwargs): """Wraps nlopt library cobyla function to be compatible with scipy optimize parameters: args[0]: target, function to be minimized args[1]: x0, starting point for minimization bounds: list of bounds for the movement [[min, max], [min, max], ...] ftol_rel: same as in nlopt xtol_rel: same as in nlopt one of the tol_rel should be specified returns: OptimizeResult() object with properly set x, fun, success. status is not set when nlopt.RoundoffLimited is raised """ answ = OptimizeResult() bounds = kwargs['bounds'] opt = nlopt.opt(nlopt.LN_COBYLA, len(args[1])) opt.set_lower_bounds([i[0] for i in bounds]) opt.set_upper_bounds([i[1] for i in bounds]) if 'ftol_rel' in kwargs.keys(): opt.set_ftol_rel(kwargs['ftol_rel']) if 'xtol_rel' in kwargs.keys(): opt.set_ftol_rel(kwargs['xtol_rel']) opt.set_min_objective(args[0]) x0 = list(args[1]) try: x1 = opt.optimize(x0) except nlopt.RoundoffLimited: answ.x = x0 answ.fun = args[0](x0) answ.success = False answ.message = 'nlopt.RoundoffLimited' return answ answ.x = x1 answ.fun = args[0](x1) answ.success = True if opt.last_optimize_result() in [3, 4] else False answ.status = opt.last_optimize_result() if not answ.fun == opt.last_optimum_value(): print 'Something\'s wrong, ', answ.fun, opt.last_optimum_value() return answ
def steepest_decent(fun, x0, fprime, args, tol=1.0e-4, maxiter=1000, callback=None): '''最急降下法 ''' x = numpy.array(x0) for itr in xrange(maxiter): direction = -1 * fprime(x, *args) alpha, obj_current, obj_next = armijo_stepsize(fun, x, fprime, direction, args=args) if numpy.linalg.norm(obj_current - obj_next) < tol: break x = x + alpha * direction if callback is not None: callback(x) result = OptimizeResult() result.x = x result.fun = fun(x, *args) result.nit = itr return result
def newton_method(fun, x0, fprime, args, tol=1.0e-4, maxiter=1000, callback=None): '''ニュートン法 ステップサイズにArmijo条件 ''' x = numpy.array(x0) A, b = args for itr in xrange(maxiter): direction = -1 * numpy.linalg.solve(A, fprime(x, *args)) alpha, obj_current, obj_next = armijo_stepsize(fun, x, fprime, direction, args=args) if numpy.linalg.norm(obj_current - obj_next) < tol: break x = x + alpha * direction if callback is not None: callback(x) result = OptimizeResult() result.x = x result.fun = fun(x, *args) result.nit = itr return result
def average_results(self): """ group the results by minimizer and average over the runs """ grouped_results = defaultdict(list) for res in self.results: grouped_results[res.name].append(res) averaged_results = dict() for name, result_list in grouped_results.items(): newres = OptimizeResult() newres.name = name newres.mean_nfev = np.mean([r.nfev for r in result_list]) newres.mean_njev = np.mean([r.njev for r in result_list]) newres.mean_nhev = np.mean([r.nhev for r in result_list]) newres.mean_time = np.mean([r.time for r in result_list]) newres.ntrials = len(result_list) newres.nfail = len([r for r in result_list if not r.success]) try: newres.ndim = len(result_list[0].x) except TypeError: newres.ndim = 1 averaged_results[name] = newres return averaged_results.values()
def scipy_graduate_walk(*args, **kwargs): """Scipy-compatible graduate_walk function wrapper. parameters: args[0]: target, function to be minimized args[1]: x0, starting point for minimization dx=1e-8: step in change of the point dx_start=0.1: starting value for dx step. Must be bigger that dx. dx_step=0.1: change of dx on each iteration. Should be less than 1. diagonal=False: defines directions for point movements. See generate_all_directions generate_nondiagonal_directions for more information. bounds=None: list of bounds for the movement [[min, max], [min, max], ...] if set to None, bounds are ignored ytol=1e-8: relative tolerance for search stop. See graduate_walk for more info. returns: OptimizeResult() object with properly set x, fun, nfev. success is always set to True, status to 1 """ target = args[0] x0 = args[1] dx = kwargs['dx'] if 'dx' in list(kwargs.keys()) else 1e-8 dx_start = kwargs['dx_start'] if 'dx_start' in list(kwargs.keys()) else 0.1 dx_step = kwargs['dx_step'] if 'dx_step' in list(kwargs.keys()) else 0.1 if 'diagonal' in list(kwargs.keys()) and kwargs['diagonal']: directions = generate_all_directions(len(x0)) else: directions = generate_nondiagonal_directions(len(x0)) if 'bounds' in list(kwargs.keys()) and kwargs['bounds'] is not None: bounds = Bounds(kwargs['bounds']) else: bounds = None ytol_rel = kwargs['ytol_rel'] if 'ytol_rel' in list(kwargs.keys()) else 1e-8 res = graduate_walk(target, x0, dx, directions, dx_start, dx_step, bounds=bounds, ytol_rel=ytol_rel) answ = OptimizeResult() answ.x = res['x0'] answ.fun = res['fval'] answ.success = True answ.status = 1 answ.nfev = res['fnval'] return answ
def minimize(fun, bounds=None, x0=None, input_sigma=0.3, popsize=31, max_evaluations=100000, max_iterations=100000, workers=1, accuracy=1.0, stop_fitness=np.nan, is_terminate=None, rg=Generator(MT19937()), runid=0, normalize=True, update_gap=None, logger=None): """Minimization of a scalar function of one or more variables using CMA-ES. Parameters ---------- fun : callable The objective function to be minimized. ``fun(x, *args) -> float`` where ``x`` is an 1-D array with shape (n,) and ``args`` is a tuple of the fixed parameters needed to completely specify the function. bounds : sequence or `Bounds`, optional Bounds on variables. There are two ways to specify the bounds: 1. Instance of the `scipy.Bounds` class. 2. Sequence of ``(min, max)`` pairs for each element in `x`. None is used to specify no bound. x0 : ndarray, shape (n,) Initial guess. Array of real elements of size (n,), where 'n' is the number of independent variables. input_sigma : ndarray, shape (n,) or scalar Initial step size for each dimension. popsize = int, optional CMA-ES population size. max_evaluations : int, optional Forced termination after ``max_evaluations`` function evaluations. max_iterations : int, optional Forced termination after ``max_iterations`` iterations. workers : int or None, optional If workers > 1, function evaluation is performed in parallel for the whole population. Useful for costly objective functions but is deactivated for parallel retry. accuracy : float, optional values > 1.0 reduce the accuracy. stop_fitness : float, optional Limit for fitness value. If reached minimize terminates. is_terminate : callable, optional Callback to be used if the caller of minimize wants to decide when to terminate. rg = numpy.random.Generator, optional Random generator for creating random guesses. runid : int, optional id used by the is_terminate callback to identify the CMA-ES run. normalize : boolean, optional pheno -> if true geno transformation maps arguments to interval [-1,1] update_gap : int, optional number of iterations without distribution update logger : logger, optional logger for log output for tell_one, If None, logging is switched off. Default is a logger which logs both to stdout and appends to a file ``optimizer.log``. Returns ------- res : scipy.OptimizeResult The optimization result is represented as an ``OptimizeResult`` object. Important attributes are: ``x`` the solution array, ``fun`` the best function value, ``nfev`` the number of function evaluations, ``nit`` the number of CMA-ES iterations, ``status`` the stopping critera and ``success`` a Boolean flag indicating if the optimizer exited successfully. """ if workers is None or workers <= 1: fun = serial(fun) cmaes = Cmaes(bounds, x0, input_sigma, popsize, max_evaluations, max_iterations, accuracy, stop_fitness, is_terminate, rg, np.random.randn, runid, normalize, update_gap, fun, logger) if workers and workers > 1: x, val, evals, iterations, stop = cmaes.do_optimize_delayed_update( fun, workers=workers) else: x, val, evals, iterations, stop = cmaes.doOptimize() return OptimizeResult(x=x, fun=val, nfev=evals, nit=iterations, status=stop, success=True)
def dual_annealing(func, x0, bounds, args=(), maxiter=1000, local_search_options={}, initial_temp=5230., restart_temp_ratio=2.e-5, visit=2.62, accept=-5.0, maxfun=1e7, seed=None, no_local_search=False, callback=None): """ Find the global minimum of a function using Dual Annealing. Parameters ---------- func : callable The objective function to be minimized. Must be in the form ``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array and ``args`` is a tuple of any additional fixed parameters needed to completely specify the function. x0 : ndarray, shape(n,) A single initial starting point coordinates. If ``None`` is provided, initial coordinates are automatically generated (using the ``reset`` method from the internal ``EnergyState`` class). bounds : sequence, shape (n, 2) Bounds for variables. ``(min, max)`` pairs for each element in ``x``, defining bounds for the objective function parameter. args : tuple, optional Any additional fixed parameters needed to completely specify the objective function. maxiter : int, optional The maximum number of global search iterations. Default value is 1000. local_search_options : dict, optional Extra keyword arguments to be passed to the local minimizer (`minimize`). Some important options could be: ``method`` for the minimizer method to use and ``args`` for objective function additional arguments. initial_temp : float, optional The initial temperature, use higher values to facilitates a wider search of the energy landscape, allowing dual_annealing to escape local minima that it is trapped in. Default value is 5230. Range is (0.01, 5.e4]. restart_temp_ratio : float, optional During the annealing process, temperature is decreasing, when it reaches ``initial_temp * restart_temp_ratio``, the reannealing process is triggered. Default value of the ratio is 2e-5. Range is (0, 1). visit : float, optional Parameter for visiting distribution. Default value is 2.62. Higher values give the visiting distribution a heavier tail, this makes the algorithm jump to a more distant region. The value range is (0, 3]. accept : float, optional Parameter for acceptance distribution. It is used to control the probability of acceptance. The lower the acceptance parameter, the smaller the probability of acceptance. Default value is -5.0 with a range (-1e4, -5]. maxfun : int, optional Soft limit for the number of objective function calls. If the algorithm is in the middle of a local search, this number will be exceeded, the algorithm will stop just after the local search is done. Default value is 1e7. seed : {int or `numpy.random.RandomState` instance}, optional If `seed` is not specified the `numpy.random.RandomState` singleton is used. If `seed` is an int, a new ``RandomState`` instance is used, seeded with `seed`. If `seed` is already a ``RandomState`` instance, then that instance is used. Specify `seed` for repeatable minimizations. The random numbers generated with this seed only affect the visiting distribution function and new coordinates generation. no_local_search : bool, optional If `no_local_search` is set to True, a traditional Generalized Simulated Annealing will be performed with no local search strategy applied. callback : callable, optional A callback function with signature ``callback(x, f, context)``, which will be called for all minima found. ``x`` and ``f`` are the coordinates and function value of the latest minimum found, and ``context`` has value in [0, 1, 2], with the following meaning: - 0: minimum detected in the annealing process. - 1: detection occured in the local search process. - 2: detection done in the dual annealing process. If the callback implementation returns True, the algorithm will stop. Returns ------- res : OptimizeResult The optimization result represented as a `OptimizeResult` object. Important attributes are: ``x`` the solution array, ``fun`` the value of the function at the solution, and ``message`` which describes the cause of the termination. See `OptimizeResult` for a description of other attributes. Notes ----- This function implements the Dual Annealing optimization. This stochastic approach derived from [3]_ combines the generalization of CSA (Classical Simulated Annealing) and FSA (Fast Simulated Annealing) [1]_ [2]_ coupled to a strategy for applying a local search on accepted locations [4]_. An alternative implementation of this same algorithm is described in [5]_ and benchmarks are presented in [6]_. This approach introduces an advanced method to refine the solution found by the generalized annealing process. This algorithm uses a distorted Cauchy-Lorentz visiting distribution, with its shape controlled by the parameter :math:`q_{v}` .. math:: g_{q_{v}}(\\Delta x(t)) \\propto \\frac{ \\ \\left[T_{q_{v}}(t) \\right]^{-\\frac{D}{3-q_{v}}}}{ \\ \\left[{1+(q_{v}-1)\\frac{(\\Delta x(t))^{2}} { \\ \\left[T_{q_{v}}(t)\\right]^{\\frac{2}{3-q_{v}}}}}\\right]^{ \\ \\frac{1}{q_{v}-1}+\\frac{D-1}{2}}} Where :math:`t` is the artificial time. This visiting distribution is used to generate a trial jump distance :math:`\\Delta x(t)` of variable :math:`x(t)` under artificial temperature :math:`T_{q_{v}}(t)`. From the starting point, after calling the visiting distribution function, the acceptance probability is computed as follows: .. math:: p_{q_{a}} = \\min{\\{1,\\left[1-(1-q_{a}) \\beta \\Delta E \\right]^{ \\ \\frac{1}{1-q_{a}}}\\}} Where :math:`q_{a}` is a acceptance parameter. For :math:`q_{a}<1`, zero acceptance probability is assigned to the cases where .. math:: [1-(1-q_{a}) \\beta \\Delta E] < 0 The artificial temperature :math:`T_{q_{v}}(t)` is decreased according to .. math:: T_{q_{v}}(t) = T_{q_{v}}(1) \\frac{2^{q_{v}-1}-1}{\\left( \\ 1 + t\\right)^{q_{v}-1}-1} Where :math:`q_{v}` is the visiting parameter. .. versionadded:: 1.2.0 References ---------- .. [1] Tsallis C. Possible generalization of Boltzmann-Gibbs statistics. Journal of Statistical Physics, 52, 479-487 (1998). .. [2] Tsallis C, Stariolo DA. Generalized Simulated Annealing. Physica A, 233, 395-406 (1996). .. [3] Xiang Y, Sun DY, Fan W, Gong XG. Generalized Simulated Annealing Algorithm and Its Application to the Thomson Model. Physics Letters A, 233, 216-220 (1997). .. [4] Xiang Y, Gong XG. Efficiency of Generalized Simulated Annealing. Physical Review E, 62, 4473 (2000). .. [5] Xiang Y, Gubian S, Suomela B, Hoeng J. Generalized Simulated Annealing for Efficient Global Optimization: the GenSA Package for R. The R Journal, Volume 5/1 (2013). .. [6] Mullen, K. Continuous Global Optimization in R. Journal of Statistical Software, 60(6), 1 - 45, (2014). DOI:10.18637/jss.v060.i06 Examples -------- The following example is a 10-dimensional problem, with many local minima. The function involved is called Rastrigin (https://en.wikipedia.org/wiki/Rastrigin_function) >>> from scipy.optimize import dual_annealing >>> func = lambda x: np.sum(x*x - 10*np.cos(2*np.pi*x)) + 10*np.size(x) >>> lw = [-5.12] * 10 >>> up = [5.12] * 10 >>> ret = dual_annealing(func, None, bounds=list(zip(lw, up)), seed=1234) >>> print("global minimum: xmin = {0}, f(xmin) = {1:.6f}".format( ... ret.x, ret.fun)) global minimum: xmin = [-4.26437714e-09 -3.91699361e-09 -1.86149218e-09 -3.97165720e-09 -6.29151648e-09 -6.53145322e-09 -3.93616815e-09 -6.55623025e-09 -6.05775280e-09 -5.00668935e-09], f(xmin) = 0.000000 """ if x0 is not None and not len(x0) == len(bounds): raise ValueError('Bounds size does not match x0') lu = list(zip(*bounds)) lower = np.array(lu[0]) upper = np.array(lu[1]) # Check that restart temperature ratio is correct if restart_temp_ratio <= 0. or restart_temp_ratio >= 1.: raise ValueError('Restart temperature ratio has to be in range (0, 1)') # Checking bounds are valid if (np.any(np.isinf(lower)) or np.any(np.isinf(upper)) or np.any( np.isnan(lower)) or np.any(np.isnan(upper))): raise ValueError('Some bounds values are inf values or nan values') # Checking that bounds are consistent if not np.all(lower < upper): raise ValueError('Bounds are note consistent min < max') # Wrapper for the objective function func_wrapper = ObjectiveFunWrapper(func, maxfun, *args) # Wrapper fot the minimizer minimizer_wrapper = LocalSearchWrapper( bounds, func_wrapper, **local_search_options) # Initialization of RandomState for reproducible runs if seed provided rand_state = check_random_state(seed) # Initialization of the energy state energy_state = EnergyState(lower, upper, callback) energy_state.reset(func_wrapper, rand_state, x0) # Minimum value of annealing temperature reached to perform # re-annealing temperature_restart = initial_temp * restart_temp_ratio # VisitingDistribution instance visit_dist = VisitingDistribution(lower, upper, visit, rand_state) # Strategy chain instance strategy_chain = StrategyChain(accept, visit_dist, func_wrapper, minimizer_wrapper, rand_state, energy_state) # Run the search loop need_to_stop = False iteration = 0 message = [] t1 = np.exp((visit - 1) * np.log(2.0)) - 1.0 while(not need_to_stop): for i in range(maxiter): # Compute temperature for this step s = float(i) + 2.0 t2 = np.exp((visit - 1) * np.log(s)) - 1.0 temperature = initial_temp * t1 / t2 iteration += 1 if iteration >= maxiter: message.append("Maximum number of iteration reached") need_to_stop = True break # Need a re-annealing process? if temperature < temperature_restart: energy_state.reset(func_wrapper, rand_state) break # starting strategy chain val = strategy_chain.run(i, temperature) if val is not None: message.append(val) need_to_stop = True break # Possible local search at the end of the strategy chain if not no_local_search: val = strategy_chain.local_search() if val is not None: message.append(val) need_to_stop = True break # Return the OptimizeResult res = OptimizeResult() res.x = energy_state.xbest res.fun = energy_state.ebest res.nit = iteration res.nfev = func_wrapper.nfev res.njev = func_wrapper.ngev res.message = message return res
def solve(self): nfev, nit, warning_flag = 0, 0, False status_message = _status_message['success'] # calculate energies to start with for index, candidate in enumerate(self.population): parameters = self._scale_parameters(candidate) self.population_energies[index] = self.func(parameters, *self.args) nfev += 1 if nfev > self.maxfun: warning_flag = True status_message = _status_message['maxfev'] break minval = np.argmin(self.population_energies) # put the lowest energy into the best solution position. lowest_energy = self.population_energies[minval] self.population_energies[minval] = self.population_energies[0] self.population_energies[0] = lowest_energy self.population[[0, minval], :] = self.population[[minval, 0], :] if warning_flag: return OptimizeResult( x=self.x, fun=self.population_energies[0], nfev=nfev, nit=nit, message=status_message, success=(warning_flag is not True)) # do the optimisation. start_time = time.time() nit = 0 while nit < self.maxiter + 1: nit += 1 if start_time + self.max_execution_time < time.time(): warning_flag = True status_message = 'Max execution time reached' break if self.dither is not None: self.scale = self.random_number_generator.rand( ) * (self.dither[1] - self.dither[0]) + self.dither[0] for candidate in range(np.size(self.population, 0)): if nfev > self.maxfun: warning_flag = True status_message = _status_message['maxfev'] break trial = self._mutate(candidate) self._ensure_constraint(trial) parameters = self._scale_parameters(trial) energy = self.func(parameters, *self.args) nfev += 1 if energy < self.population_energies[candidate]: self.population[candidate] = trial self.population_energies[candidate] = energy if energy < self.population_energies[0]: self.population_energies[0] = energy self.population[0] = trial # stop when the fractional s.d. of the population is less than tol # of the mean energy convergence = (np.std(self.population_energies) / np.abs(np.mean(self.population_energies) + _MACHEPS)) if self.disp: print("differential_evolution step %d: f(x)= %g" % (nit, self.population_energies[0])) if (self.callback and self.callback(self._scale_parameters(self.population[0]), convergence=self.tol / convergence) is True): warning_flag = True status_message = ('callback function requested stop early ' 'by returning True') break if convergence < self.tol or warning_flag: break else: status_message = _status_message['maxiter'] warning_flag = True DE_result = OptimizeResult( x=self.x, fun=self.population_energies[0], nfev=nfev, nit=nit, message=status_message, success=(warning_flag is not True)) if self.polish: result = minimize(self.func, np.copy(DE_result.x), method='L-BFGS-B', bounds=self.limits.T, args=self.args) nfev += result.nfev DE_result.nfev = nfev if result.fun < DE_result.fun: DE_result.fun = result.fun DE_result.x = result.x DE_result.jac = result.jac # to keep internal state consistent self.population_energies[0] = result.fun self.population[0] = self._unscale_parameters(result.x) return DE_result
def minimize( fun, x0, args=(), method="migrad", jac=None, hess=None, hessp=None, bounds=None, constraints=None, tol=None, callback=None, options=None, ): """ Interface to MIGRAD using the ``scipy.optimize.minimize`` API. For a general description of the arguments, see ``scipy.optimize.minimize``. Allowed values for ``method`` are "migrad" or "simplex". Default: "migrad". The ``options`` argument can be used to pass special settings to Minuit. All are optional. **Options:** - *disp* (bool): Set to true to print convergence messages. Default: False. - *stra* (int): Minuit strategy (0: fast/inaccurate, 1: balanced, 2: slow/accurate). Default: 1. - *maxfun* (int): Maximum allowed number of iterations. Default: None. - *maxfev* (int): Deprecated alias for *maxfun*. - *eps* (sequence): Initial step size to numerical compute derivative. Minuit automatically refines this in subsequent iterations and is very insensitive to the initial choice. Default: 1. **Returns: OptimizeResult** (dict with attribute access) - *x* (ndarray): Solution of optimization. - *fun* (float): Value of objective function at minimum. - *message* (str): Description of cause of termination. - *hess_inv* (ndarray): Inverse of Hesse matrix at minimum (may not be exact). - nfev (int): Number of function evaluations. - njev (int): Number of jacobian evaluations. - minuit (object): Minuit object internally used to do the minimization. Use this to extract more information about the parameter errors. """ from scipy.optimize import OptimizeResult, Bounds x0 = np.atleast_1d(x0) if constraints is not None: raise ValueError( "Constraints are not supported by Minuit, only bounds") if hess or hessp: warnings.warn( "hess and hessp arguments cannot be handled and are ignored") def wrapped(func, args, callback=None): if callback is None: return lambda x: func(x, *args) def f(x): callback(x) return func(x, *args) return f wrapped_fun = wrapped(fun, args, callback) wrapped_fun.errordef = 0.5 # so that hesse is really second derivative if bool(jac): if jac is True: raise ValueError("jac=True is not supported, only jac=callable") assert hasattr(jac, "__call__") wrapped_grad = wrapped(jac, args) else: wrapped_grad = None m = Minuit(wrapped_fun, x0, grad=wrapped_grad) if bounds is not None: if isinstance(bounds, Bounds): m.limits = [(a, b) for a, b in zip(bounds.lb, bounds.ub)] else: m.limits = bounds if tol: m.tol = tol ncall = 0 if options: m.print_level = 2 if options.get("disp", False) else 0 if "maxiter" in options: warnings.warn("maxiter not supported, acts like maxfun instead") if "maxfev" in options: warnings.warn("maxfev is deprecated, use maxfun instead", DeprecationWarning) ncall = options.get("maxfun", options.get("maxfev", options.get("maxiter", 0))) errors = options.get("eps", None) if errors is not None: m.errors = errors m.strategy = options.get("stra", 1) if method == "migrad": m.migrad(ncall=ncall) elif method == "simplex": m.simplex(ncall=ncall) else: raise ValueError(f"keyword method={method} not understood") if m.valid: message = "Optimization terminated successfully" if m.accurate: message += "." else: message += ", but uncertainties are unrealiable." else: message = "Optimization failed." fmin = m.fmin if fmin.has_reached_call_limit: message += " Call limit was reached." if fmin.is_above_max_edm: message += " Estimated distance to minimum too large." n = len(x0) return OptimizeResult( x=np.array(m.values), success=m.valid, fun=m.fval, hess_inv=m.covariance if m.covariance is not None else np.ones((n, n)), message=message, nfev=m.nfcn, njev=m.ngrad, minuit=m, )
def direct( func: Callable[[npt.ArrayLike, Tuple[Any]], float], bounds: Union[Iterable, Bounds], *, args: tuple = (), eps: float = 1e-4, maxfun: Union[int, None] = None, maxiter: int = 1000, locally_biased: bool = True, f_min: float = -np.inf, f_min_rtol: float = 1e-4, vol_tol: float = 1e-16, len_tol: float = 1e-6, callback: Optional[Callable[[npt.ArrayLike], NoneType]] = None ) -> OptimizeResult: """ Finds the global minimum of a function using the DIRECT algorithm. Parameters ---------- func : callable The objective function to be minimized. ``func(x, *args) -> float`` where ``x`` is an 1-D array with shape (n,) and ``args`` is a tuple of the fixed parameters needed to completely specify the function. bounds : sequence or `Bounds` Bounds for variables. There are two ways to specify the bounds: 1. Instance of `Bounds` class. 2. ``(min, max)`` pairs for each element in ``x``. args : tuple, optional Any additional fixed parameters needed to completely specify the objective function. eps : float, optional Minimal required difference of the objective function values between the current best hyperrectangle and the next potentially optimal hyperrectangle to be divided. In consequence, `eps` serves as a tradeoff between local and global search: the smaller, the more local the search becomes. Default is 1e-4. maxfun : int or None, optional Approximate upper bound on objective function evaluations. If `None`, will be automatically set to ``1000 * N`` where ``N`` represents the number of dimensions. Will be capped if necessary to limit DIRECT's RAM usage to app. 1GiB. This will only occur for very high dimensional problems and excessive `max_fun`. Default is `None`. maxiter : int, optional Maximum number of iterations. Default is 1000. locally_biased : bool, optional If `True` (default), use the locally biased variant of the algorithm known as DIRECT_L. If `False`, use the original unbiased DIRECT algorithm. For hard problems with many local minima, `False` is recommended. f_min : float, optional Function value of the global optimum. Set this value only if the global optimum is known. Default is ``-np.inf``, so that this termination criterion is deactivated. f_min_rtol : float, optional Terminate the optimization once the relative error between the current best minimum `f` and the supplied global minimum `f_min` is smaller than `f_min_rtol`. This parameter is only used if `f_min` is also set. Default is 1e-4. vol_tol : float, optional Terminate the optimization once the volume of the hyperrectangle containing the lowest function value is smaller than `vol_tol` of the complete search space. Must lie between 0 and 1. Default is 1e-16. len_tol : float, optional If `locally_biased=True`, terminate the optimization once half of the normalized maximal side length of the hyperrectangle containing the lowest function value is smaller than `len_tol`. If `locally_biased=False`, terminate the optimization once half of the normalized diagonal of the hyperrectangle containing the lowest function value is smaller than `len_tol`. Must lie between 0 and 1. Default is 1e-6. callback : callable, optional A callback function with signature ``callback(xk)`` where ``xk`` represents the best function value found so far. Returns ------- res : OptimizeResult The optimization result represented as a ``OptimizeResult`` object. Important attributes are: ``x`` the solution array, ``success`` a Boolean flag indicating if the optimizer exited successfully and ``message`` which describes the cause of the termination. See `OptimizeResult` for a description of other attributes. Notes ----- DIviding RECTangles (DIRECT) is a deterministic global optimization algorithm capable of minimizing a black box function with its variables subject to lower and upper bound constraints by sampling potential solutions in the search space [1]_. The algorithm starts by normalising the search space to an n-dimensional unit hypercube. It samples the function at the center of this hypercube and at 2n (n is the number of variables) more points, 2 in each coordinate direction. Using these function values, DIRECT then divides the domain into hyperrectangles, each having exactly one of the sampling points as its center. In each iteration, DIRECT chooses, using the `eps` parameter which defaults to 1e-4, some of the existing hyperrectangles to be further divided. This division process continues until either the maximum number of iterations or maximum function evaluations allowed are exceeded, or the hyperrectangle containing the minimal value found so far becomes small enough. If `f_min` is specified, the optimization will stop once this function value is reached within a relative tolerance. The locally biased variant of DIRECT (originally called DIRECT_L) [2]_ is used by default. It makes the search more locally biased and more efficient for cases with only a few local minima. A note about termination criteria: `vol_tol` refers to the volume of the hyperrectangle containing the lowest function value found so far. This volume decreases exponentially with increasing dimensionality of the problem. Therefore `vol_tol` should be decreased to avoid premature termination of the algorithm for higher dimensions. This does not hold for `len_tol`: it refers either to half of the maximal side length (for ``locally_biased=True``) or half of the diagonal of the hyperrectangle (for ``locally_biased=False``). This code is based on the DIRECT 2.0.4 Fortran code by Gablonsky et al. at https://ctk.math.ncsu.edu/SOFTWARE/DIRECTv204.tar.gz . This original version was initially converted via f2c and then cleaned up and reorganized by Steven G. Johnson, August 2007, for the NLopt project. The `direct` function wraps the C implementation. .. versionadded:: 1.9.0 References ---------- .. [1] Jones, D.R., Perttunen, C.D. & Stuckman, B.E. Lipschitzian optimization without the Lipschitz constant. J Optim Theory Appl 79, 157-181 (1993). .. [2] Gablonsky, J., Kelley, C. A Locally-Biased form of the DIRECT Algorithm. Journal of Global Optimization 21, 27-37 (2001). Examples -------- The following example is a 2-D problem with four local minima: minimizing the Styblinski-Tang function (https://en.wikipedia.org/wiki/Test_functions_for_optimization). >>> from scipy.optimize import direct, Bounds >>> def styblinski_tang(pos): ... x, y = pos ... return 0.5 * (x**4 - 16*x**2 + 5*x + y**4 - 16*y**2 + 5*y) >>> bounds = Bounds([-4., -4.], [4., 4.]) >>> result = direct(styblinski_tang, bounds) >>> result.x, result.fun, result.nfev array([-2.90321597, -2.90321597]), -78.3323279095383, 2011 The correct global minimum was found but with a huge number of function evaluations (2011). Loosening the termination tolerances `vol_tol` and `len_tol` can be used to stop DIRECT earlier. >>> result = direct(styblinski_tang, bounds, len_tol=1e-3) >>> result.x, result.fun, result.nfev array([-2.9044353, -2.9044353]), -78.33230330754142, 207 """ # convert bounds to new Bounds class if necessary if not isinstance(bounds, Bounds): if isinstance(bounds, list) or isinstance(bounds, tuple): lb, ub = old_bound_to_new(bounds) bounds = Bounds(lb, ub) else: message = ("bounds must be a sequence or " "instance of Bounds class") raise ValueError(message) lb = np.ascontiguousarray(bounds.lb, dtype=np.float64) ub = np.ascontiguousarray(bounds.ub, dtype=np.float64) # validate bounds if not np.all(lb < ub): raise ValueError('Bounds are not consistent min < max') if not len(lb) == len(ub): raise ValueError('Bounds do not have the same dimensions') # check for infs and nans if (np.any(np.isinf(lb)) or np.any(np.isinf(ub))): raise ValueError("Bounds must not be inf.") if (np.any(np.isnan(lb)) or np.any(np.isnan(ub))): raise ValueError("Bounds must not be NaN.") # validate tolerances if (vol_tol < 0 or vol_tol > 1): raise ValueError("vol_tol must be between 0 and 1.") if (len_tol < 0 or len_tol > 1): raise ValueError("len_tol must be between 0 and 1.") if (f_min_rtol < 0 or f_min_rtol > 1): raise ValueError("f_min_rtol must be between 0 and 1.") # validate maxfun and maxiter if maxfun is None: maxfun = 1000 * lb.shape[0] if not isinstance(maxfun, int): raise ValueError("maxfun must be of type int.") if maxfun < 0: raise ValueError("maxfun must be > 0.") if not isinstance(maxiter, int): raise ValueError("maxiter must be of type int.") if maxiter < 0: raise ValueError("maxiter must be > 0.") # validate boolean parameters if not isinstance(locally_biased, bool): raise ValueError("locally_biased must be True or False.") def _func_wrap(x, args=None): x = np.asarray(x) if args is None: f = func(x) else: f = func(x, *args) return f # TODO: fix disp argument x, fun, ret_code, nfev, nit = _direct(_func_wrap, np.asarray(lb), np.asarray(ub), args, False, eps, maxfun, maxiter, locally_biased, f_min, f_min_rtol, vol_tol, len_tol, callback) format_val = (maxfun, maxiter, f_min_rtol, vol_tol, len_tol) if ret_code > 2: message = SUCCESS_MESSAGES[ret_code - 3].format(format_val[ret_code - 1]) elif 0 < ret_code <= 2: message = ERROR_MESSAGES[ret_code - 1].format(format_val[ret_code - 1]) elif 0 > ret_code > -100: message = ERROR_MESSAGES[abs(ret_code) + 1] else: message = ERROR_MESSAGES[ret_code + 99] return OptimizeResult(x=np.asarray(x), fun=fun, status=ret_code, success=ret_code > 2, message=message, nfev=nfev, nit=nit)
def find_pcrosstalk(Q: np.ndarray, PDE: float, N: int, mtype: str = 'binomial', n_cells: int = 0, Ns: int = 100, min_pct: float = 0, max_pct: float = 0.1): """ Brute searching of crosstalk probability by optimizing of g2 difference from noised data and noised poisson photocounting statistics. We use model with 4 neighbors with saturation. See [1] Parameters ---------- Q : iterable Experimental photocounting statistics of a laser source. PDE : float PDE of the detector. N : int Size of poisson photon-number statistics. mtype : {'binomial', 'subbinomial'}, optional Type of the detector: ideal is binomial, realistic is subbinomial, but in the most of applications one can consider the detector as binomial The default is 'binomial'. n_cells : TYPE, optional Number of photocounting cells in the subbinomial case. The default is 0. Ns : int, optional Size of calculation grid of brute function. The default is 100. Returns ------- p_ct: float The probability of a single crosstalk event. res: OptimizeResult See scipy.optimize.brute description. res.x0 consists of optimal p_crosstalk and optimal poisson photon-number distribution p_crosstalk is in res.x0[0] References ---------- .. [1] Gallego, L., et al. "Modeling crosstalk in silicon photomultipliers." Journal of instrumentation 8.05 (2013): P05010. https://iopscience.iop.org/article/10.1088/1748-0221/8/05/P05010/pdf """ res = brute( optctp, ([min_pct, max_pct], [mean(Q) / PDE * 0.9, mean(Q) / PDE * 1.1]), args=(Q, PDE, N, mtype, n_cells), Ns=Ns, full_output=True, workers=-1) info("P_ct = {r[0][0]}, Δg(2) = {r[1]}".format(r=res)) return res[0][0], OptimizeResult(x=res[0], fval=res[1], grid=res[2], Jout=res[3])
def dummy_minimize(func, dimensions, n_calls=100, random_state=None): """Random search by uniform sampling within the given bounds. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `dimensions` [list, shape=(n_dims,)]: List of search space dimensions. Each search dimension can be defined either as - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer` dimensions), - a `(upper_bound, lower_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). * `n_calls` [int, default=100]: Number of calls to `func` to find the minimum. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [float]: location of the minimum. - `fun` [float]: function value at the minimum. - `x_iters` [array]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. - `space` [Space]: the optimisation space. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ rng = check_random_state(random_state) space = Space(dimensions) X = space.rvs(n_samples=n_calls, random_state=rng) init_y = func(X[0]) if not np.isscalar(init_y): raise ValueError( "The function to be optimized should return a scalar") y = np.asarray([init_y] + [func(X[i]) for i in range(1, n_calls)]) res = OptimizeResult() best = np.argmin(y) res.x = X[best] res.fun = y[best] res.func_vals = y res.x_iters = X res.space = space return res
def dual_annealing(func, bounds, args=(), maxiter=1000, local_search_options={}, initial_temp=5230., restart_temp_ratio=2.e-5, visit=2.62, accept=-5.0, maxfun=1e7, seed=None, no_local_search=False, callback=None, x0=None, minima=0): """ Find the global minimum of a function using Dual Annealing. Parameters ---------- func : callable The objective function to be minimized. Must be in the form ``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array and ``args`` is a tuple of any additional fixed parameters needed to completely specify the function. bounds : sequence, shape (n, 2) Bounds for variables. ``(min, max)`` pairs for each element in ``x``, defining bounds for the objective function parameter. args : tuple, optional Any additional fixed parameters needed to completely specify the objective function. maxiter : int, optional The maximum number of global search iterations. Default value is 1000. local_search_options : dict, optional Extra keyword arguments to be passed to the local minimizer (`minimize`). Some important options could be: ``method`` for the minimizer method to use and ``args`` for objective function additional arguments. initial_temp : float, optional The initial temperature, use higher values to facilitates a wider search of the energy landscape, allowing dual_annealing to escape local minima that it is trapped in. Default value is 5230. Range is (0.01, 5.e4]. restart_temp_ratio : float, optional During the annealing process, temperature is decreasing, when it reaches ``initial_temp * restart_temp_ratio``, the reannealing process is triggered. Default value of the ratio is 2e-5. Range is (0, 1). visit : float, optional Parameter for visiting distribution. Default value is 2.62. Higher values give the visiting distribution a heavier tail, this makes the algorithm jump to a more distant region. The value range is (0, 3]. accept : float, optional Parameter for acceptance distribution. It is used to control the probability of acceptance. The lower the acceptance parameter, the smaller the probability of acceptance. Default value is -5.0 with a range (-1e4, -5]. maxfun : int, optional Soft limit for the number of objective function calls. If the algorithm is in the middle of a local search, this number will be exceeded, the algorithm will stop just after the local search is done. Default value is 1e7. seed : {int or `~numpy.random.mtrand.RandomState` instance}, optional If `seed` is not specified the `~numpy.random.mtrand.RandomState` singleton is used. If `seed` is an int, a new ``RandomState`` instance is used, seeded with `seed`. If `seed` is already a ``RandomState`` instance, then that instance is used. Specify `seed` for repeatable minimizations. The random numbers generated with this seed only affect the visiting distribution function and new coordinates generation. no_local_search : bool, optional If `no_local_search` is set to True, a traditional Generalized Simulated Annealing will be performed with no local search strategy applied. callback : callable, optional A callback function with signature ``callback(x, f, context)``, which will be called for all minima found. ``x`` and ``f`` are the coordinates and function value of the latest minimum found, and ``context`` has value in [0, 1, 2], with the following meaning: - 0: minimum detected in the annealing process. - 1: detection occurred in the local search process. - 2: detection done in the dual annealing process. If the callback implementation returns True, the algorithm will stop. x0 : ndarray, shape(n,), optional Coordinates of a single n-dimensional starting point. Returns ------- res : OptimizeResult The optimization result represented as a `OptimizeResult` object. Important attributes are: ``x`` the solution array, ``fun`` the value of the function at the solution, and ``message`` which describes the cause of the termination. See `OptimizeResult` for a description of other attributes. Notes ----- This function implements the Dual Annealing optimization. This stochastic approach derived from [3]_ combines the generalization of CSA (Classical Simulated Annealing) and FSA (Fast Simulated Annealing) [1]_ [2]_ coupled to a strategy for applying a local search on accepted locations [4]_. An alternative implementation of this same algorithm is described in [5]_ and benchmarks are presented in [6]_. This approach introduces an advanced method to refine the solution found by the generalized annealing process. This algorithm uses a distorted Cauchy-Lorentz visiting distribution, with its shape controlled by the parameter :math:`q_{v}` .. math:: g_{q_{v}}(\\Delta x(t)) \\propto \\frac{ \\ \\left[T_{q_{v}}(t) \\right]^{-\\frac{D}{3-q_{v}}}}{ \\ \\left[{1+(q_{v}-1)\\frac{(\\Delta x(t))^{2}} { \\ \\left[T_{q_{v}}(t)\\right]^{\\frac{2}{3-q_{v}}}}}\\right]^{ \\ \\frac{1}{q_{v}-1}+\\frac{D-1}{2}}} Where :math:`t` is the artificial time. This visiting distribution is used to generate a trial jump distance :math:`\\Delta x(t)` of variable :math:`x(t)` under artificial temperature :math:`T_{q_{v}}(t)`. From the starting point, after calling the visiting distribution function, the acceptance probability is computed as follows: .. math:: p_{q_{a}} = \\min{\\{1,\\left[1-(1-q_{a}) \\beta \\Delta E \\right]^{ \\ \\frac{1}{1-q_{a}}}\\}} Where :math:`q_{a}` is a acceptance parameter. For :math:`q_{a}<1`, zero acceptance probability is assigned to the cases where .. math:: [1-(1-q_{a}) \\beta \\Delta E] < 0 The artificial temperature :math:`T_{q_{v}}(t)` is decreased according to .. math:: T_{q_{v}}(t) = T_{q_{v}}(1) \\frac{2^{q_{v}-1}-1}{\\left( \\ 1 + t\\right)^{q_{v}-1}-1} Where :math:`q_{v}` is the visiting parameter. .. versionadded:: 1.2.0 References ---------- .. [1] Tsallis C. Possible generalization of Boltzmann-Gibbs statistics. Journal of Statistical Physics, 52, 479-487 (1998). .. [2] Tsallis C, Stariolo DA. Generalized Simulated Annealing. Physica A, 233, 395-406 (1996). .. [3] Xiang Y, Sun DY, Fan W, Gong XG. Generalized Simulated Annealing Algorithm and Its Application to the Thomson Model. Physics Letters A, 233, 216-220 (1997). .. [4] Xiang Y, Gong XG. Efficiency of Generalized Simulated Annealing. Physical Review E, 62, 4473 (2000). .. [5] Xiang Y, Gubian S, Suomela B, Hoeng J. Generalized Simulated Annealing for Efficient Global Optimization: the GenSA Package for R. The R Journal, Volume 5/1 (2013). .. [6] Mullen, K. Continuous Global Optimization in R. Journal of Statistical Software, 60(6), 1 - 45, (2014). DOI:10.18637/jss.v060.i06 Examples -------- The following example is a 10-dimensional problem, with many local minima. The function involved is called Rastrigin (https://en.wikipedia.org/wiki/Rastrigin_function) >>> from scipy.optimize import dual_annealing >>> func = lambda x: np.sum(x*x - 10*np.cos(2*np.pi*x)) + 10*np.size(x) >>> lw = [-5.12] * 10 >>> up = [5.12] * 10 >>> ret = dual_annealing(func, bounds=list(zip(lw, up)), seed=1234) >>> print("global minimum: xmin = {0}, f(xmin) = {1:.6f}".format( ... ret.x, ret.fun)) global minimum: xmin = [-4.26437714e-09 -3.91699361e-09 -1.86149218e-09 -3.97165720e-09 -6.29151648e-09 -6.53145322e-09 -3.93616815e-09 -6.55623025e-09 -6.05775280e-09 -5.00668935e-09], f(xmin) = 0.000000 """ # noqa: E501 if x0 is not None and not len(x0) == len(bounds): raise ValueError('Bounds size does not match x0') lu = list(zip(*bounds)) lower = np.array(lu[0]) upper = np.array(lu[1]) # Check that restart temperature ratio is correct if restart_temp_ratio <= 0. or restart_temp_ratio >= 1.: raise ValueError('Restart temperature ratio has to be in range (0, 1)') # Checking bounds are valid if (np.any(np.isinf(lower)) or np.any(np.isinf(upper)) or np.any(np.isnan(lower)) or np.any(np.isnan(upper))): raise ValueError('Some bounds values are inf values or nan values') # Checking that bounds are consistent if not np.all(lower < upper): raise ValueError('Bounds are not consistent min < max') # Checking that bounds are the same length if not len(lower) == len(upper): raise ValueError('Bounds do not have the same dimensions') # Wrapper for the objective function func_wrapper = ObjectiveFunWrapper(func, maxfun, *args) # Wrapper fot the minimizer minimizer_wrapper = LocalSearchWrapper(bounds, func_wrapper, **local_search_options) # Initialization of RandomState for reproducible runs if seed provided rand_state = check_random_state(seed) # Initialization of the energy state energy_state = EnergyState(lower, upper, callback) energy_state.reset(func_wrapper, rand_state, x0) # Minimum value of annealing temperature reached to perform # re-annealing temperature_restart = initial_temp * restart_temp_ratio # VisitingDistribution instance visit_dist = VisitingDistribution(lower, upper, visit, rand_state) # Strategy chain instance strategy_chain = StrategyChain(accept, visit_dist, func_wrapper, minimizer_wrapper, rand_state, energy_state) need_to_stop = False iteration = 0 message = [] # OptimizeResult object to be returned optimize_res = OptimizeResult() optimize_res.success = True optimize_res.status = 0 t1 = np.exp((visit - 1) * np.log(2.0)) - 1.0 success = False nfevsuccess = "N/A" hist = [] # Run the search loop while (not need_to_stop): for i in range(maxiter): hist.append(energy_state.ebest) # Compute temperature for this step s = float(i) + 2.0 t2 = np.exp((visit - 1) * np.log(s)) - 1.0 temperature = initial_temp * t1 / t2 if abs(energy_state.ebest - minima) < 1e-3 and success == False: nfevsuccess = func_wrapper.nfev success = True if iteration >= maxiter: message.append("Maximum number of iteration reached") need_to_stop = True break # Need a re-annealing process? if temperature < temperature_restart: energy_state.reset(func_wrapper, rand_state) break # starting strategy chain val = strategy_chain.run(i, temperature) if val is not None: message.append(val) need_to_stop = True optimize_res.success = False break # Possible local search at the end of the strategy chain if not no_local_search: val = strategy_chain.local_search() if val is not None: message.append(val) need_to_stop = True optimize_res.success = False break iteration += 1 # Setting the OptimizeResult values optimize_res.x = energy_state.xbest optimize_res.fun = energy_state.ebest optimize_res.nit = iteration optimize_res.nfev = func_wrapper.nfev optimize_res.njev = func_wrapper.ngev optimize_res.nhev = func_wrapper.nhev optimize_res.message = message optimize_res.nfevsuccess = nfevsuccess optimize_res.success = success optimize_res.hist = hist return optimize_res
def gp_minimize(func, bounds=None, search="sampling", random_state=None, maxiter=1000, acq="UCB", num_points=500): """ Black-box optimization using Gaussian Processes. If every function evaluation is expensive, for instance when the parameters are the hyperparameters of a neural network and the function evaluation is the mean cross-validation score across ten folds, optimizing the hyperparameters by standared optimization routines would take for ever! The idea is to approximate the function using a Gaussian process. In other words the function values are assumed to follow a multivariate gaussian. The covariance of the function values are given by a GP kernel between the parameters. Then a smart choice to choose the next parameter to evaluate can be made by the acquistion function over the Gaussian posterior which is much quicker to evaluate. Parameters ---------- func: callable Function to minimize. Should take a array of parameters and return the function value. bounds: array-like, shape (n_parameters, 2) ``bounds[i][0]`` should give the lower bound of each parameter and ``bounds[i][1]`` should give the upper bound of each parameter. search: string, "sampling" or "lbfgs" Searching for the next possible candidate to update the Gaussian prior with. If search is set to "sampling", ``num_points`` are sampled randomly and the Gaussian Process prior is updated with that point that gives the best acquision value over the Gaussian posterior. If search is set to "lbfgs", then a point is sampled randomly, and lbfgs is run for 10 iterations optimizing the acquistion function over the Gaussian posterior. random_state: int, RandomState instance, or None (default) Set random state to something other than None for reproducible results. maxiter: int, default 1000 Number of iterations to find the minimum. In other words, the number of function evaluations. acq: string, default "UCB" Function to minimize over the gaussian posterior. Can be either the "UCB" which refers to the UpperConfidenceBound or "EI" which is the Expected Improvement. num_points: int, default 500 Number of points to sample to determine the next "best" point. Useless if search is set to "lbfgs". Returns ------- res: OptimizeResult, scipy object The optimization result returned as a OptimizeResult object. Important attributes are ``x`` - float, the optimization solution, ``fun`` - float, the value of the function at the optimum, ``models``- gp_models[i]. the prior on the function fit at iteration[i]. ``func_vals`` - the function value at the ith iteration. ``x_iters`` - the value of ``x`` corresponding to the function value at the ith iteration. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ rng = np.random.RandomState(random_state) num_params = len(bounds) lower_bounds, upper_bounds = zip(*bounds) upper_bounds = np.asarray(upper_bounds) lower_bounds = np.asarray(lower_bounds) x0 = rng.rand(num_params) func_val = [func(lower_bounds + (upper_bounds - lower_bounds) * x0)] length_scale = np.ones(num_params) gp_params = { 'kernel': Matern(length_scale=length_scale, nu=2.5), 'normalize_y': True, 'random_state': random_state } lbfgs_bounds = np.tile((0, 1), (num_params, 1)) gp_models = [] x = np.reshape(x0, (1, -1)) for i in range(maxiter): gpr = GaussianProcessRegressor(**gp_params) gpr.fit(x, func_val) if search == "sampling": sampling = rng.rand(num_points, num_params) acquis = acquisition_func(sampling, gpr, np.min(func_val), acq) best_arg = np.argmin(acquis) best_x = sampling[best_arg] elif search == "lbfgs": init = rng.rand(num_params) best_x, _, _ = fmin_l_bfgs_b( acquisition_func, np.asfortranarray(init), args=(gpr, np.min(func_val), acq), bounds=lbfgs_bounds, approx_grad=True, maxiter=10) gp_models.append(gpr) best_f = func(lower_bounds + (upper_bounds - lower_bounds) * best_x) x_list = x.tolist() x_list.append(best_x) x = np.asarray(x_list) func_val.append(best_f) x = lower_bounds + (upper_bounds - lower_bounds) * x func_ind = np.argmin(func_val) x_val = x[func_ind] best_func_val = func_val[func_ind] res = OptimizeResult() res.models = gp_models res.x = x_val res.fun = best_func_val res.func_vals = func_val res.x_iters = x return res
def dogbox(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, scaling, loss_function, tr_solver, tr_options, verbose): f = f0 f_true = f.copy() nfev = 1 J = J0 njev = 1 if loss_function is not None: rho = loss_function(f) cost = 0.5 * np.sum(rho[0]) J, f = scale_for_robust_loss_function(J, f, rho) else: cost = 0.5 * np.dot(f, f) g = compute_grad(J, f) jac_scaling = isinstance(scaling, string_types) and scaling == 'jac' if jac_scaling: scale, scale_inv = compute_jac_scaling(J) else: scale, scale_inv = scaling, 1 / scaling Delta = norm(x0 * scale, ord=np.inf) if Delta == 0: Delta = 1.0 on_bound = np.zeros_like(x0, dtype=int) on_bound[np.equal(x0, lb)] = -1 on_bound[np.equal(x0, ub)] = 1 x = x0 step = np.empty_like(x0) if max_nfev is None: max_nfev = x0.size * 100 termination_status = None iteration = 0 step_norm = None actual_reduction = None if verbose == 2: print_header_nonlinear() while True: active_set = on_bound * g < 0 free_set = ~active_set g_free = g[free_set] g_full = g.copy() g[active_set] = 0 g_norm = norm(g, ord=np.inf) if g_norm < gtol: termination_status = 1 if verbose == 2: print_iteration_nonlinear(iteration, nfev, cost, actual_reduction, step_norm, g_norm) if termination_status is not None or nfev == max_nfev: break x_free = x[free_set] lb_free = lb[free_set] ub_free = ub[free_set] scale_inv_free = scale_inv[free_set] # Compute (Gauss-)Newton and build quadratic model for Cauchy step. if tr_solver == 'exact': J_free = J[:, free_set] newton_step = lstsq(J_free, -f)[0] # Coefficients for the quadratic model along the anti-gradient. a, b = build_quadratic_1d(J_free, g_free, -g_free) elif tr_solver == 'lsmr': Jop = aslinearoperator(J) # We compute lsmr step in scaled variables and then # transform back to normal variables, if lsmr would give exact lsq # solution this would be equivalent to not doing any # transformations, but from experience it's better this way. # We pass active_set to make computations as if we selected # the free subset of J columns, but without actually doing any # slicing, which is expensive for sparse matrices and impossible # for LinearOperator. lsmr_op = lsmr_operator(Jop, scale_inv, active_set) newton_step = -lsmr(lsmr_op, f, **tr_options)[0][free_set] newton_step *= scale_inv_free # Components of g for active variables were zeroed, so this call # is correct and equivalent to using J_free and g_free. a, b = build_quadratic_1d(Jop, g, -g) actual_reduction = -1.0 while actual_reduction <= 0 and nfev < max_nfev: tr_bounds = Delta * scale_inv_free step_free, on_bound_free, tr_hit = dogleg_step( x_free, newton_step, g_free, a, b, tr_bounds, lb_free, ub_free) step.fill(0.0) step[free_set] = step_free if tr_solver == 'exact': predicted_reduction = -evaluate_quadratic( J_free, g_free, step_free) elif tr_solver == 'lsmr': predicted_reduction = -evaluate_quadratic(Jop, g, step) x_new = x + step f_new = fun(x_new) nfev += 1 step_h_norm = norm(step * scale, ord=np.inf) if not np.all(np.isfinite(f_new)): Delta = 0.25 * step_h_norm continue # Usual trust-region step quality estimation. if loss_function is not None: cost_new = loss_function(f_new, cost_only=True) else: cost_new = 0.5 * np.dot(f_new, f_new) actual_reduction = cost - cost_new Delta, ratio = update_tr_radius(Delta, actual_reduction, predicted_reduction, step_h_norm, tr_hit) step_norm = norm(step) termination_status = check_termination(actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol) if termination_status is not None: break if actual_reduction > 0: on_bound[free_set] = on_bound_free x = x_new # Set variables exactly at the boundary. mask = on_bound == -1 x[mask] = lb[mask] mask = on_bound == 1 x[mask] = ub[mask] f = f_new f_true = f.copy() cost = cost_new J = jac(x, f) njev += 1 if loss_function is not None: rho = loss_function(f) J, f = scale_for_robust_loss_function(J, f, rho) g = compute_grad(J, f) if jac_scaling: scale, scale_inv = compute_jac_scaling(J, scale) else: step_norm = 0 actual_reduction = 0 iteration += 1 if termination_status is None: termination_status = 0 return OptimizeResult(x=x, cost=cost, fun=f_true, jac=J, grad=g_full, optimality=g_norm, active_mask=on_bound, nfev=nfev, njev=njev, status=termination_status)
def solve(self): """ Runs the DifferentialEvolutionSolver. Returns ------- res : OptimizeResult The optimization result represented as a ``OptimizeResult`` object. Important attributes are: ``x`` the solution array, ``success`` a Boolean flag indicating if the optimizer exited successfully and ``message`` which describes the cause of the termination. See `OptimizeResult` for a description of other attributes. If polish was employed, then OptimizeResult also contains the ``hess_inv`` and ``jac`` attributes. """ nfev, nit, warning_flag = 0, 0, False status_message = _status_message['success'] # calculate energies to start with if self.parallel: # vectorized candidate evaluation params = [] for index, candidate in enumerate(self.population): params.append(np.array(self._scale_parameters(candidate))) nfev += 1 if nfev > self.maxfun: warning_flag = True status_message = _status_message['maxfev'] break energies = self.func(np.array(params), *self.args) # for it in range(len(energies)): # self.population_energies[it] = energies[it] self.population_energies = energies else: # or serial candidate evaluation needed for possible # agressive srategies for index, candidate in enumerate(self.population): parameters = self._scale_parameters(candidate) self.population_energies[index] = self.func( parameters, *self.args) nfev += 1 if nfev > self.maxfun: warning_flag = True status_message = _status_message['maxfev'] break minval = np.argmin(self.population_energies) # put the lowest energy into the best solution position. lowest_energy = self.population_energies[minval] self.population_energies[minval] = self.population_energies[0] self.population_energies[0] = lowest_energy self.population[[0, minval], :] = self.population[[minval, 0], :] if warning_flag: return OptimizeResult(x=self.x, fun=self.population_energies[0], nfev=nfev, nit=nit, message=status_message, success=(warning_flag is not True)) # do the optimisation. for nit in range(1, self.maxiter + 1): if self.dither is not None: self.scale = self.random_number_generator.rand() * ( self.dither[1] - self.dither[0]) + self.dither[0] if self.parallel: # vectorized evaluation of the func for parallel case params = [] for candidate in range(np.size(self.population, 0)): if nfev > self.maxfun: warning_flag = True status_message = _status_message['maxfev'] break trial = self._mutate(candidate) self._ensure_constraint(trial) params.append(np.array(self._scale_parameters(trial))) nfev += 1 energies = self.func(np.array(params), *self.args) for it in range(len(energies)): energy = energies[it] if energy < self.population_energies[it]: self.population[it] =\ self._unscale_parameters(params[it]) self.population_energies[it] = energy if energy < self.population_energies[0]: self.population_energies[0] = energy self.population[0] =\ self._unscale_parameters(params[it]) else: # make serial evaluation of the func # serial evaluation is required for agressive strategies for candidate in range(np.size(self.population, 0)): if nfev > self.maxfun: warning_flag = True status_message = _status_message['maxfev'] break trial = self._mutate(candidate) self._ensure_constraint(trial) parameters = self._scale_parameters(trial) energy = self.func(parameters, *self.args) nfev += 1 if energy < self.population_energies[candidate]: self.population[candidate] = trial self.population_energies[candidate] = energy if energy < self.population_energies[0]: self.population_energies[0] = energy self.population[0] = trial # stop when the fractional s.d. of the population is less than tol # of the mean energy convergence = ( np.std(self.population_energies) / np.abs(np.mean(self.population_energies) + _MACHEPS)) if self.disp: print("differential_evolution step %d: f(x)= %g" % (nit, self.population_energies[0])) if (self.callback and self.callback(self._scale_parameters(self.population[0]), convergence=self.tol / convergence) is True): warning_flag = True status_message = ('callback function requested stop early ' 'by returning True') break if convergence < self.tol or warning_flag: break else: status_message = _status_message['maxiter'] warning_flag = True DE_result = OptimizeResult(x=self.x, fun=self.population_energies[0], nfev=nfev, nit=nit, message=status_message, success=(warning_flag is not True)) if self.polish: result = minimize(self.func, np.copy(DE_result.x), method='L-BFGS-B', bounds=self.limits.T, args=self.args) nfev += result.nfev DE_result.nfev = nfev if result.fun < DE_result.fun: DE_result.fun = result.fun DE_result.x = result.x DE_result.jac = result.jac # to keep internal state consistent self.population_energies[0] = result.fun self.population[0] = self._unscale_parameters(result.x) return DE_result
def _tree_minimize(func, dimensions, base_estimator, n_calls, n_points, n_random_starts, x0=None, y0=None, random_state=None, acq="EI", xi=0.01, kappa=1.96): rng = check_random_state(random_state) space = Space(dimensions) # Initialize with provided points (x0 and y0) and/or random points if n_calls <= 0: raise ValueError( "Expected `n_calls` > 0, got %d" % n_random_starts) if x0 is None: x0 = [] elif not isinstance(x0[0], list): x0 = [x0] if not isinstance(x0, list): raise ValueError("`x0` should be a list, but got %s" % type(x0)) n_init_func_calls = len(x0) if y0 is not None else 0 n_total_init_calls = n_random_starts + n_init_func_calls if n_total_init_calls <= 0: # if x0 is not provided and n_random_starts is 0 then # it will ask for n_random_starts to be > 0. raise ValueError( "Expected `n_random_starts` > 0, got %d" % n_random_starts) if n_calls < n_total_init_calls: raise ValueError( "Expected `n_calls` >= %d, got %d" % (n_total_init_calls, n_calls)) if y0 is None and x0: y0 = [func(x) for x in x0] elif x0: if isinstance(y0, Iterable): y0 = list(y0) elif isinstance(y0, numbers.Number): y0 = [y0] else: raise ValueError( "`y0` should be an iterable or a scalar, got %s" % type(y0)) if len(x0) != len(y0): raise ValueError("`x0` and `y0` should have the same length") if not all(map(np.isscalar, y0)): raise ValueError("`y0` elements should be scalars") else: y0 = [] Xi = x0 + space.rvs(n_samples=n_random_starts, random_state=rng) yi = y0 + [func(x) for x in Xi[len(x0):]] if np.ndim(yi) != 1: raise ValueError("`func` should return a scalar") # Tree-based optimization loop models = [] n_model_iter = n_calls - n_total_init_calls for i in range(n_model_iter): rgr = clone(base_estimator) rgr.fit(space.transform(Xi), yi) models.append(rgr) # `rgr` predicts constants for each leaf which means that the EI # has zero gradient over large distances. As a result we can not # use gradient based optimizers like BFGS, so using random sampling # for the moment. X = space.transform(space.rvs(n_samples=n_points, random_state=rng)) values = _gaussian_acquisition( X=X, model=rgr, y_opt=np.min(yi), method=acq, xi=xi, kappa=kappa) next_x = X[np.argmin(values)] next_x = space.inverse_transform(next_x.reshape((1, -1)))[0] next_y = func(next_x) Xi.append(next_x) yi.append(next_y) res = OptimizeResult() best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = np.array(yi) res.x_iters = Xi res.models = models res.space = space res.random_state = rng return res
def _quadratic_assignment_faq_ot( A, B, maximize=False, partial_match=None, rng=None, P0="barycenter", shuffle_input=False, maxiter=30, tol=0.03, reg=100, thr=5e-2, ot=False, ): maxiter = operator.index(maxiter) rng = check_random_state(rng) A, B, partial_match = _common_input_validation(A, B, partial_match) n = A.shape[0] # number of vertices in graphs n_seeds = partial_match.shape[0] # number of seeds n_unseed = n - n_seeds n_layers = A.shape[-1] obj_func_scalar = 1 if maximize: obj_func_scalar = -1 nonseed_B = np.setdiff1d(range(n), partial_match[:, 1]) if shuffle_input: nonseed_B = rng.permutation(nonseed_B) # shuffle_input to avoid results from inputs that were already matched nonseed_A = np.setdiff1d(range(n), partial_match[:, 0]) perm_A = np.concatenate([partial_match[:, 0], nonseed_A]) perm_B = np.concatenate([partial_match[:, 1], nonseed_B]) # definitions according to Seeded Graph Matching [2]. A11, A12, A21, A22 = _split_matrix(A[perm_A][:, perm_A], n_seeds) B11, B12, B21, B22 = _split_matrix(B[perm_B][:, perm_B], n_seeds) # TODO also split contralaterals # [1] Algorithm 1 Line 1 - choose initialization if isinstance(P0, str): # initialize J, a doubly stochastic barycenter J = np.ones((n_unseed, n_unseed)) / n_unseed if P0 == "barycenter": P = J elif P0 == "randomized": # generate a nxn matrix where each entry is a random number [0, 1] # would use rand, but Generators don't have it # would use random, but old mtrand.RandomStates don't have it K = rng.uniform(size=(n_unseed, n_unseed)) # Sinkhorn balancing K = _doubly_stochastic(K) P = J * 0.5 + K * 0.5 else: P0 = np.atleast_2d(P0) _check_init_input(P0, n_unseed) P = P0 currtime = time.time() const_sum = _layered_product(A21, _transpose(B21)) + _layered_product( _transpose(A12), B12 ) print(f"{time.time() - currtime:.3f} seconds elapsed for const_sum.") # [1] Algorithm 1 Line 2 - loop while stopping criteria not met for n_iter in range(1, maxiter + 1): # [1] Algorithm 1 Line 3 - compute the gradient of f(P) = -tr(APB^tP^t) # TODO einsum currtime = time.time() # P = np.repeat(P, n_layers, axis=2) # grad_fp = ( # const_sum # + _layered_product(A22, P, _transpose(B22)) # + _layered_product(_transpose(A22), P, B22) # ) grad_fp = const_sum grad_fp += _layered_product(_layered_product(A22, P), _transpose(B22)) grad_fp += _layered_product(_layered_product(_transpose(A22), P), B22) grad_fp = grad_fp.sum(axis=-1) print(f"{time.time() - currtime:.3f} seconds elapsed for grad_fp.") # [1] Algorithm 1 Line 4 - get direction Q by solving Eq. 8 currtime = time.time() if ot: Q = alap(grad_fp, n_unseed, maximize, reg, thr) else: _, cols = linear_sum_assignment(grad_fp, maximize=maximize) Q = np.eye(n_unseed)[cols] print(f"{time.time() - currtime:.3f} seconds elapsed.") # Q = np.eye(n_unseed)[cols] # [1] Algorithm 1 Line 5 - compute the step size # Noting that e.g. trace(Ax) = trace(A)*x, expand and re-collect # terms as ax**2 + bx + c. c does not affect location of minimum # and can be ignored. Also, note that trace(A@B) = (A.T*B).sum(); # apply where possible for efficiency. # TODO all einsums? currtime = time.time() R = P - Q b21 = (_layered_product(R.T[..., None], A21) * B21).sum() b12 = ( _layered_product(R.T[..., None], _transpose(A12)) * _transpose(B12) ).sum() AR22 = _layered_product(_transpose(A22), R[..., None]) BR22 = _layered_product(B22, R.T[..., None]) b22a = (AR22 * (_layered_product(Q[..., None], _transpose(B22)))).sum() b22b = (A22 * _layered_product(Q[..., None], BR22)).sum() a = (_transpose(AR22) * BR22).sum() b = b21 + b12 + b22a + b22b print(f"{time.time() - currtime:.3f} seconds elapsed for quadradic terms.") # critical point of ax^2 + bx + c is at x = -d/(2*e) # if a * obj_func_scalar > 0, it is a minimum # if minimum is not in [0, 1], only endpoints need to be considered if a * obj_func_scalar > 0 and 0 <= -b / (2 * a) <= 1: alpha = -b / (2 * a) else: alpha = np.argmin([0, (b + a) * obj_func_scalar]) # [1] Algorithm 1 Line 6 - Update P P_i1 = alpha * P + (1 - alpha) * Q if np.linalg.norm(P - P_i1) / np.sqrt(n_unseed) < tol: P = P_i1 break P = P_i1 # [1] Algorithm 1 Line 7 - end main loop # [1] Algorithm 1 Line 8 - project onto the set of permutation matrices # print(P) _, col = linear_sum_assignment(-P) perm = np.concatenate((np.arange(n_seeds), col + n_seeds)) unshuffled_perm = np.zeros(n, dtype=int) unshuffled_perm[perm_A] = perm_B[perm] score = _calc_score(A, B, unshuffled_perm) res = {"col_ind": unshuffled_perm, "fun": score, "nit": n_iter} return OptimizeResult(res)
def minimize_constrained(fun, x0, grad, hess='2-point', constraints=(), method=None, xtol=1e-8, gtol=1e-8, sparse_jacobian=None, options={}, callback=None, max_iter=1000, verbose=0): # Initial value x0 = np.atleast_1d(x0).astype(float) n_vars = np.size(x0) f0 = fun(x0) g0 = np.atleast_1d(grad(x0)) def grad_wrapped(x): return np.atleast_1d(grad(x)) H0 = hess(x0) H0 = np.atleast_2d(np.asarray(H0)) def hess_wrapped(x): return np.atleast_2d(np.asarray(hess(x))) copied_constraints = [deepcopy(constr) for constr in constraints] if len(copied_constraints) == 0: constr = empty_canonical_constraint(x0, n_vars, sparse_jacobian) lagr_hess = lagrangian_hessian(constr, hess_wrapped) state = OptimizeResult(niter=0, nfev=1, ngev=1, ncev=1, njev=1, nhev=0, cg_niter=0, cg_info={}) # Store values return_all = options.get("return_all", False) if return_all: state.allvecs = [] state.allmult = [] def stop_criteria(state): state.status = None if (callback is not None) and callback(state): state.status = 3 elif state.optimality < gtol and state.constr_violation < gtol: state.status = 1 elif state.trust_radius < xtol: state.status = 2 elif state.niter > max_iter: state.status = 0 return state.status in (0, 1, 2, 3) start_time = time.time() # Call inferior function to do the optimization if constr.n_ineq > 0: raise ValueError("'equality_constrained_sqp' does not " "support inequality constraints.") def fun_and_constr(x): f = fun(x) _, c_eq = constr.constr(x) return f, c_eq def grad_and_jac(x): g = grad_wrapped(x) _, J_eq = constr.jac(x) return g, J_eq result = equality_constrained_sqp( fun_and_constr, grad_and_jac, lagr_hess, x0, f0, g0, constr.c_eq0, constr.J_eq0, stop_criteria, state, **options) result.execution_time = time.time() - start_time result.method = method result.message = TERMINATION_MESSAGES[result.status] return result
def gp_minimize(func, dimensions, base_estimator=None, acq="LCB", xi=0.01, kappa=1.96, search="sampling", maxiter=1000, n_points=500, n_start=10, n_restarts_optimizer=5, random_state=None): """Bayesian optimization using Gaussian Processes. If every function evaluation is expensive, for instance when the parameters are the hyperparameters of a neural network and the function evaluation is the mean cross-validation score across ten folds, optimizing the hyperparameters by standared optimization routines would take for ever! The idea is to approximate the function using a Gaussian process. In other words the function values are assumed to follow a multivariate gaussian. The covariance of the function values are given by a GP kernel between the parameters. Then a smart choice to choose the next parameter to evaluate can be made by the acquisition function over the Gaussian prior which is much quicker to evaluate. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `dimensions` [list, shape=(n_dims,)]: List of search space dimensions. Each search dimension can be defined either as - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer` dimensions), - a `(upper_bound, lower_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). * `base_estimator` [a Gaussian process estimator]: The Gaussian process estimator to use for optimization. * `acq` [string, default=`"LCB"`]: Function to minimize over the gaussian prior. Can be either - `"LCB"` for lower confidence bound, - `"EI"` for expected improvement, - `"PI"` for probability of improvement. * `xi` [float, default=0.01]: Controls how much improvement one wants over the previous best values. Used when the acquisition is either `"EI"` or `"PI"`. * `kappa` [float, default=1.96]: Controls how much of the variance in the predicted values should be taken into account. If set to be very high, then we are favouring exploration over exploitation and vice versa. Used when the acquisition is `"LCB"`. * `search` [string, `"sampling"` or `"lbfgs"`]: Searching for the next possible candidate to update the Gaussian prior with. If search is set to `"sampling"`, `n_points` are sampled randomly and the Gaussian Process prior is updated with the point that gives the best acquisition value over the Gaussian prior. If search is set to `"lbfgs"`, then a point is sampled randomly, and lbfgs is run for 10 iterations optimizing the acquisition function over the Gaussian prior. * `maxiter` [int, default=1000]: Number of iterations to find the minimum. Note that `n_start` iterations are effectively discounted, such that total number of function evaluations is at most `maxiter`. * `n_points` [int, default=500]: Number of points to sample to determine the next "best" point. Useless if search is set to `"lbfgs"`. * `n_start` [int, default=10]: Number of random initialization points. * `n_restarts_optimizer` [int, default=10]: The number of restarts of the optimizer when `search` is `"lbfgs"`. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [float]: location of the minimum. - `fun` [float]: function value at the minimum. - `models`: surrogate models used for each iteration. - `x_iters` [array]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. - `space` [Space]: the optimisation space. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ rng = check_random_state(random_state) space = Space(dimensions) # Default GP if base_estimator is None: base_estimator = GaussianProcessRegressor( kernel=(ConstantKernel(1.0, (0.01, 1000.0)) * Matern(length_scale=np.ones(space.transformed_n_dims), length_scale_bounds=[(0.01, 100)] * space.transformed_n_dims, nu=2.5)), normalize_y=True, alpha=10e-6, random_state=random_state) # First points Xi = space.rvs(n_samples=n_start, random_state=rng) yi = [func(x) for x in Xi] if np.ndim(yi) != 1: raise ValueError( "The function to be optimized should return a scalar") # Bayesian optimization loop models = [] for i in range(maxiter - n_start): gp = clone(base_estimator) with warnings.catch_warnings(): warnings.simplefilter("ignore") gp.fit(space.transform(Xi), yi) models.append(gp) if search == "sampling": X = space.transform(space.rvs(n_samples=n_points, random_state=rng)) values = _gaussian_acquisition( X=X, model=gp, y_opt=np.min(yi), method=acq, xi=xi, kappa=kappa) next_x = X[np.argmin(values)] elif search == "lbfgs": best = np.inf for j in range(n_restarts_optimizer): x0 = space.transform(space.rvs(n_samples=1, random_state=rng))[0] with warnings.catch_warnings(): warnings.simplefilter("ignore") x, a, _ = fmin_l_bfgs_b( _acquisition, x0, args=(gp, np.min(yi), acq, xi, kappa), bounds=space.transformed_bounds, approx_grad=True, maxiter=10) if a < best: next_x, best = x, a next_x = space.inverse_transform(next_x.reshape((1, -1)))[0] next_y = func(next_x) Xi = np.vstack((Xi, next_x)) yi.append(next_y) # Pack results res = OptimizeResult() best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = np.array(yi) res.x_iters = Xi res.models = models res.space = space return res
def basicipm( func, x, args, reduced=False, maxiter=50, stepsize=1e-7, # for Hesse Matrix/Gradient gradtol=1, **kwargs): ''' Basic interior point method for nonconvex functions ''' m = 2 * len(kwargs['bounds'].lb) bdry = numpy.zeros(m) for i in range(m / 2): bdry[2 * i] = kwargs['bounds'].lb[i] bdry[2 * i + 1] = kwargs['bounds'].ub[i] print(bdry) niter = 0 n = len(x) #starting parameter z = numpy.ones(m) s = numpy.ones(m) my = 20.0 delta_old = 0 #textdummy = open("DUMMY3.txt", "a") gradtol = 0.000000001 #setup A A = numpy.zeros((m, n)) for i in range(n): A[2 * i, i] = 1.0 A[2 * i + 1, i] = -1.0 #c_x = eval_c(x, bdry) #temp = 20 #while (numpy.linalg.norm(grad(func, x, stepsize))>gradtol and niter<maxiter) : while niter < 3: niter = niter + 1 #print('NITER=') #print(niter) #error = ipm_error(func, x, A, c_x, z, s, my, textdummy) #erroralt = 0 testit = 0 #while (error > 30*my and testit<22) : while (testit < 22): testit = testit + 1 #erroralt = error print('INERTIA CORRECTION....') delta, gamma = inertia_correction_ldl(setup_Jac_ipm, func, A, x, z, s, my, delta_old, a=1, b=0.5) #delta =1 print('INERTIA CORRETION Done') print('DELTA=') print(delta) print('WAITING FOR NEWTON...') if reduced: x, s, z = solve_ipm_newton(setup_F_ipm_reduced, setup_Jac_ipm_reduced, func, A, x, z, s, my, bdry, delta, delta_old, reduced=reduced, variant='gmres') else: x, s, z = solve_ipm_newton(setup_F_ipm, setup_Jac_ipm, func, A, x, z, s, my, bdry, delta, delta_old, reduced=reduced, variant='gmres') #error = ipm_error(func, x, A, c_x, z, s, my, textdummy) delta_old = delta wertwert = func(x) #my = my/2 my = update_my(s, z) print( '#################################################################' ) print('NEW MY IS') print(my) result = OptimizeResult(fun=func, x=x, nit=niter) print('gradient=') print(numpy.linalg.norm(grad(func, x, stepsize))) print('X=') print(x) print('S=') print(s) print('Z=') print(z) return result
def _linprog_simplex(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None, bounds=None, maxiter=1000, disp=False, callback=None, tol=1.0E-12, bland=False, **unknown_options): """ Solve the following linear programming problem via a two-phase simplex algorithm. maximize: c^T * x subject to: A_ub * x <= b_ub A_eq * x == b_eq Parameters ---------- c : array_like Coefficients of the linear objective function to be maximized. A_ub : array_like 2-D array which, when matrix-multiplied by x, gives the values of the upper-bound inequality constraints at x. b_ub : array_like 1-D array of values representing the upper-bound of each inequality constraint (row) in A_ub. A_eq : array_like 2-D array which, when matrix-multiplied by x, gives the values of the equality constraints at x. b_eq : array_like 1-D array of values representing the RHS of each equality constraint (row) in A_eq. bounds : array_like The bounds for each independent variable in the solution, which can take one of three forms:: None : The default bounds, all variables are non-negative. (lb, ub) : If a 2-element sequence is provided, the same lower bound (lb) and upper bound (ub) will be applied to all variables. [(lb_0, ub_0), (lb_1, ub_1), ...] : If an n x 2 sequence is provided, each variable x_i will be bounded by lb[i] and ub[i]. Infinite bounds are specified using -np.inf (negative) or np.inf (positive). callback : callable If a callback function is provide, it will be called within each iteration of the simplex algorithm. The callback must have the signature `callback(xk, **kwargs)` where xk is the current solution vector and kwargs is a dictionary containing the following:: "tableau" : The current Simplex algorithm tableau "nit" : The current iteration. "pivot" : The pivot (row, column) used for the next iteration. "phase" : Whether the algorithm is in Phase 1 or Phase 2. "bv" : A structured array containing a string representation of each basic variable and its current value. Options ------- maxiter : int The maximum number of iterations to perform. disp : bool If True, print exit status message to sys.stdout tol : float The tolerance which determines when a solution is "close enough" to zero in Phase 1 to be considered a basic feasible solution or close enough to positive to to serve as an optimal solution. bland : bool If True, use Bland's anti-cycling rule [3] to choose pivots to prevent cycling. If False, choose pivots which should lead to a converged solution more quickly. The latter method is subject to cycling (non-convergence) in rare instances. Returns ------- A scipy.optimize.OptimizeResult consisting of the following fields:: x : ndarray The independent variable vector which optimizes the linear programming problem. slack : ndarray The values of the slack variables. Each slack variable corresponds to an inequality constraint. If the slack is zero, then the corresponding constraint is active. success : bool Returns True if the algorithm succeeded in finding an optimal solution. status : int An integer representing the exit status of the optimization:: 0 : Optimization terminated successfully 1 : Iteration limit reached 2 : Problem appears to be infeasible 3 : Problem appears to be unbounded nit : int The number of iterations performed. message : str A string descriptor of the exit status of the optimization. Examples -------- Consider the following problem: Minimize: f = -1*x[0] + 4*x[1] Subject to: -3*x[0] + 1*x[1] <= 6 1*x[0] + 2*x[1] <= 4 x[1] >= -3 where: -inf <= x[0] <= inf This problem deviates from the standard linear programming problem. In standard form, linear programming problems assume the variables x are non-negative. Since the variables don't have standard bounds where 0 <= x <= inf, the bounds of the variables must be explicitly set. There are two upper-bound constraints, which can be expressed as dot(A_ub, x) <= b_ub The input for this problem is as follows: >>> from scipy.optimize import linprog >>> c = [-1, 4] >>> A = [[-3, 1], [1, 2]] >>> b = [6, 4] >>> x0_bnds = (None, None) >>> x1_bnds = (-3, None) >>> res = linprog(c, A, b, bounds=(x0_bnds, x1_bnds)) >>> print(res) fun: -22.0 message: 'Optimization terminated successfully.' nit: 1 slack: array([ 39., 0.]) status: 0 success: True x: array([ 10., -3.]) References ---------- .. [1] Dantzig, George B., Linear programming and extensions. Rand Corporation Research Study Princeton Univ. Press, Princeton, NJ, 1963 .. [2] Hillier, S.H. and Lieberman, G.J. (1995), "Introduction to Mathematical Programming", McGraw-Hill, Chapter 4. .. [3] Bland, Robert G. New finite pivoting rules for the simplex method. Mathematics of Operations Research (2), 1977: pp. 103-107. """ #_check_unknown_options(unknown_options) status = 0 messages = { 0: "Optimization terminated successfully.", 1: "Iteration limit reached.", 2: "Optimization failed. Unable to find a feasible" " starting point.", 3: "Optimization failed. The problem appears to be unbounded.", 4: "Optimization failed. Singular matrix encountered." } have_floor_variable = False cc = np.asarray(c) # The initial value of the objective function element in the tableau f0 = 0 # The number of variables as given by c n = len(c) # Convert the input arguments to arrays (sized to zero if not provided) Aeq = np.asarray(A_eq) if A_eq is not None else np.empty([0, len(cc)]) Aub = np.asarray(A_ub) if A_ub is not None else np.empty([0, len(cc)]) beq = np.ravel(np.asarray(b_eq)) if b_eq is not None else np.empty([0]) bub = np.ravel(np.asarray(b_ub)) if b_ub is not None else np.empty([0]) # Analyze the bounds and determine what modifications to be made to # the constraints in order to accommodate them. L = np.zeros(n, dtype=np.float64) U = np.ones(n, dtype=np.float64) * np.inf if bounds is None or len(bounds) == 0: pass elif len(bounds) == 2 and not hasattr(bounds[0], '__len__'): # All bounds are the same a = bounds[0] if bounds[0] is not None else -np.inf b = bounds[1] if bounds[1] is not None else np.inf L = np.asarray(n * [a], dtype=np.float64) U = np.asarray(n * [b], dtype=np.float64) else: if len(bounds) != n: status = -1 message = ("Invalid input for linprog with method = 'simplex'. " "Length of bounds is inconsistent with the length of c") else: try: for i in range(n): if len(bounds[i]) != 2: raise IndexError() L[i] = bounds[i][0] if bounds[i][0] is not None else -np.inf U[i] = bounds[i][1] if bounds[i][1] is not None else np.inf except IndexError: status = -1 message = ("Invalid input for linprog with " "method = 'simplex'. bounds must be a n x 2 " "sequence/array where n = len(c).") if np.any(L == -np.inf): # If any lower-bound constraint is a free variable # add the first column variable as the "floor" variable which # accommodates the most negative variable in the problem. n = n + 1 L = np.concatenate([np.array([0]), L]) U = np.concatenate([np.array([np.inf]), U]) cc = np.concatenate([np.array([0]), cc]) Aeq = np.hstack([np.zeros([Aeq.shape[0], 1]), Aeq]) Aub = np.hstack([np.zeros([Aub.shape[0], 1]), Aub]) have_floor_variable = True # Now before we deal with any variables with lower bounds < 0, # deal with finite bounds which can be simply added as new constraints. # Also validate bounds inputs here. for i in range(n): if (L[i] > U[i]): status = -1 message = ("Invalid input for linprog with method = 'simplex'. " "Lower bound %d is greater than upper bound %d" % (i, i)) if np.isinf(L[i]) and L[i] > 0: status = -1 message = ("Invalid input for linprog with method = 'simplex'. " "Lower bound may not be +infinity") if np.isinf(U[i]) and U[i] < 0: status = -1 message = ("Invalid input for linprog with method = 'simplex'. " "Upper bound may not be -infinity") if np.isfinite(L[i]) and L[i] > 0: # Add a new lower-bound (negative upper-bound) constraint Aub = np.vstack([Aub, np.zeros(n)]) Aub[-1, i] = -1 bub = np.concatenate([bub, np.array([-L[i]])]) L[i] = 0 if np.isfinite(U[i]): # Add a new upper-bound constraint Aub = np.vstack([Aub, np.zeros(n)]) Aub[-1, i] = 1 bub = np.concatenate([bub, np.array([U[i]])]) U[i] = np.inf # Now find negative lower bounds (finite or infinite) which require a # change of variables or free variables and handle them appropriately for i in range(0, n): if L[i] < 0: if np.isfinite(L[i]) and L[i] < 0: # Add a change of variables for x[i] # For each row in the constraint matrices, we take the # coefficient from column i in A, # and subtract the product of that and L[i] to the RHS b beq = beq - Aeq[:, i] * L[i] bub = bub - Aub[:, i] * L[i] # We now have a nonzero initial value for the objective # function as well. f0 = f0 - cc[i] * L[i] else: # This is an unrestricted variable, let x[i] = u[i] - v[0] # where v is the first column in all matrices. Aeq[:, 0] = Aeq[:, 0] - Aeq[:, i] Aub[:, 0] = Aub[:, 0] - Aub[:, i] cc[0] = cc[0] - cc[i] if np.isinf(U[i]): if U[i] < 0: status = -1 message = ("Invalid input for linprog with " "method = 'simplex'. Upper bound may not be -inf.") # The number of upper bound constraints (rows in A_ub and elements in b_ub) mub = len(bub) # The number of equality constraints (rows in A_eq and elements in b_eq) meq = len(beq) # The total number of constraints m = mub + meq # The number of slack variables (one for each of the upper-bound constraints) n_slack = mub # The number of artificial variables (one for each lower-bound and equality # constraint) n_artificial = meq + np.count_nonzero(bub < 0) try: Aub_rows, Aub_cols = Aub.shape except ValueError: raise ValueError("Invalid input. A_ub must be two-dimensional") try: Aeq_rows, Aeq_cols = Aeq.shape except ValueError: raise ValueError("Invalid input. A_eq must be two-dimensional") if Aeq_rows != meq: status = -1 message = ("Invalid input for linprog with method = 'simplex'. " "The number of rows in A_eq must be equal " "to the number of values in b_eq") if Aub_rows != mub: status = -1 message = ("Invalid input for linprog with method = 'simplex'. " "The number of rows in A_ub must be equal " "to the number of values in b_ub") if Aeq_cols > 0 and Aeq_cols != n: status = -1 message = ("Invalid input for linprog with method = 'simplex'. " "Number of columns in A_eq must be equal " "to the size of c") if Aub_cols > 0 and Aub_cols != n: status = -1 message = ("Invalid input for linprog with method = 'simplex'. " "Number of columns in A_ub must be equal to the size of c") if status != 0: # Invalid inputs provided raise ValueError(message) # Create the tableau T = np.zeros([m + 2, n + n_slack + n_artificial + 1]) # Insert objective into tableau T[-2, :n] = cc T[-2, -1] = f0 b = T[:-2, -1] if meq > 0: # Add Aeq to the tableau T[:meq, :n] = Aeq # Add beq to the tableau b[:meq] = beq if mub > 0: # Add Aub to the tableau T[meq:meq + mub, :n] = Aub # At bub to the tableau b[meq:meq + mub] = bub # Add the slack variables to the tableau np.fill_diagonal(T[meq:m, n:n + n_slack], 1) # Further set up the tableau. # If a row corresponds to an equality constraint or a negative b (a lower # bound constraint), then an artificial variable is added for that row. # Also, if b is negative, first flip the signs in that constraint. slcount = 0 avcount = 0 basis = np.zeros(m, dtype=int) r_artificial = np.zeros(n_artificial, dtype=int) for i in range(m): if i < meq or b[i] < 0: # basic variable i is in column n+n_slack+avcount basis[i] = n + n_slack + avcount r_artificial[avcount] = i avcount += 1 if b[i] < 0: b[i] *= -1 T[i, :-1] *= -1 T[i, basis[i]] = 1 T[-1, basis[i]] = 1 else: # basic variable i is in column n+slcount basis[i] = n + slcount slcount += 1 # Make the artificial variables basic feasible variables by subtracting # each row with an artificial variable from the Phase 1 objective for r in r_artificial: T[-1, :] = T[-1, :] - T[r, :] nit1, status, _, _ = _solve_simplex(T, n, basis, phase=1, callback=callback, maxiter=maxiter, tol=tol, bland=bland) # if pseudo objective is zero, remove the last row from the tableau and # proceed to phase 2 if abs(T[-1, -1]) < tol: # Remove the pseudo-objective row from the tableau T = T[:-1, :] # Remove the artificial variable columns from the tableau T = np.delete(T, np.s_[n + n_slack:n + n_slack + n_artificial], 1) else: # Failure to find a feasible starting point status = 2 if status != 0: message = messages[status] if disp: print(message) return OptimizeResult(x=np.nan, fun=-T[-1, -1], nit=nit1, status=status, message=message, success=False) # Phase 2 nit2, status, T, sol = _solve_simplex(T, n, basis, maxiter=maxiter - nit1, phase=2, callback=callback, tol=tol, nit0=nit1, bland=bland) solution = np.zeros(n + n_slack + n_artificial) solution[basis[:m]] = T[:m, -1] x = solution[:n] slack = solution[n:n + n_slack] # For those variables with finite negative lower bounds, # reverse the change of variables masked_L = np.ma.array(L, mask=np.isinf(L), fill_value=0.0).filled() x = x + masked_L # For those variables with infinite negative lower bounds, # take x[i] as the difference between x[i] and the floor variable. if have_floor_variable: for i in range(1, n): if np.isinf(L[i]): x[i] -= x[0] x = x[1:] # Optimization complete at this point obj = -T[-1, -1] if status in (0, 1): if disp: print(messages[status]) print(" Current function value: {0: <12.6f}".format(obj)) print(" Iterations: {0:d}".format(nit2)) else: if disp: print(messages[status]) print(" Iterations: {0:d}".format(nit2)) return OptimizeResult(x=x, fun=obj, nit=int(nit2), status=status, slack=slack, message=messages[status], success=(status == 0)), nit2, obj, sol
def trf_no_bounds(fun, jac, x0, f0, J0, ftol, xtol, gtol, max_nfev, x_scale, loss_function, tr_solver, tr_options, verbose): x = x0.copy() f = f0 f_true = f.copy() nfev = 1 J = J0 njev = 1 m, n = J.shape if loss_function is not None: rho = loss_function(f) cost = 0.5 * np.sum(rho[0]) J, f = scale_for_robust_loss_function(J, f, rho) else: cost = 0.5 * np.dot(f, f) g = compute_grad(J, f) jac_scale = isinstance(x_scale, string_types) and x_scale == 'jac' if jac_scale: scale, scale_inv = compute_jac_scale(J) else: scale, scale_inv = x_scale, 1 / x_scale Delta = norm(x0 * scale_inv) if Delta == 0: Delta = 1.0 if tr_solver == 'lsmr': reg_term = 0 damp = tr_options.pop('damp', 0.0) regularize = tr_options.pop('regularize', True) maxiters = tr_options.pop('maxiters', 1e6) iterCb = tr_options.pop('iterCb', None) if max_nfev is None: max_nfev = x0.size * 100 alpha = 0.0 # "Levenberg-Marquardt" parameter termination_status = None iteration = 0 step_norm = None actual_reduction = None if verbose == 2: print_header_nonlinear() while iteration < maxiters: g_norm = norm(g, ord=np.inf) if g_norm < gtol: termination_status = 1 if verbose == 2: print_iteration_nonlinear(iteration, nfev, cost, actual_reduction, step_norm, g_norm) if termination_status is not None or nfev == max_nfev: break d = scale g_h = d * g if tr_solver == 'exact': J_h = J * d U, s, V = svd(J_h, full_matrices=False) V = V.T uf = U.T.dot(f) elif tr_solver == 'lsmr': J_h = right_multiplied_operator(J, d) if regularize: a, b = build_quadratic_1d(J_h, g_h, -g_h) to_tr = Delta / norm(g_h) ag_value = minimize_quadratic_1d(a, b, 0, to_tr)[1] reg_term = -ag_value / Delta**2 damp_full = (damp**2 + reg_term)**0.5 gn_h = lsmr(J_h, f, damp=damp_full, **tr_options)[0] S = np.vstack((g_h, gn_h)).T S, _ = qr(S, mode='economic') JS = J_h.dot(S) B_S = np.dot(JS.T, JS) g_S = S.T.dot(g_h) actual_reduction = -1 while actual_reduction <= 0 and nfev < max_nfev: if tr_solver == 'exact': step_h, alpha, n_iter = solve_lsq_trust_region( n, m, uf, s, V, Delta, initial_alpha=alpha) elif tr_solver == 'lsmr': p_S, _ = solve_trust_region_2d(B_S, g_S, Delta) step_h = S.dot(p_S) predicted_reduction = -evaluate_quadratic(J_h, g_h, step_h) step = d * step_h x_new = x + step f_new = fun(x_new) nfev += 1 step_h_norm = norm(step_h) if not np.all(np.isfinite(f_new)): Delta = 0.25 * step_h_norm continue # Usual trust-region step quality estimation. if loss_function is not None: cost_new = loss_function(f_new, cost_only=True) else: cost_new = 0.5 * np.dot(f_new, f_new) actual_reduction = cost - cost_new Delta_new, ratio = update_tr_radius(Delta, actual_reduction, predicted_reduction, step_h_norm, step_h_norm > 0.95 * Delta) alpha *= Delta / Delta_new Delta = Delta_new step_norm = norm(step) termination_status = check_termination(actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol) if termination_status is not None: break if actual_reduction > 0: x = x_new f = f_new f_true = f.copy() cost = cost_new J = jac(x, f) njev += 1 if loss_function is not None: rho = loss_function(f) J, f = scale_for_robust_loss_function(J, f, rho) g = compute_grad(J, f) if jac_scale: scale, scale_inv = compute_jac_scale(J, scale_inv) # IterCallback if iterCb is not None: iterCb(x_new) else: step_norm = 0 actual_reduction = 0 iteration += 1 if termination_status is None: termination_status = 0 active_mask = np.zeros_like(x) return OptimizeResult(x=x, cost=cost, fun=f_true, jac=J, grad=g, optimality=g_norm, active_mask=active_mask, nfev=nfev, njev=njev, status=termination_status)
def minimize(objective, p0, args=(), nboundupdate=100, reltol=1e-4, abstol=0.0, maxiters=1e7, method='normal', jac=True, disp=False, callback=None, mask=None): """ minimize objective(P_r) subject to 0.0 <= P_r sum(P_r) = 1 parameters ---------- objective : function returning cost, gradient p0 : starting guess nboundupdate : number of iteration between lower bound updates reltol, abstol, maxiters: numerical parameter method: 'fast' or 'normal' methodrithm disp: print status information during the run mask: Boolean array with directions along which not to optimize output ------ optimal solution """ if not jac: def jobjective(x, *args): return objective(x, *args), approx_fprime(x, objective, 1e-8, *args) jobjective = jobjective else: jobjective = objective if mask is not None: mask = np.asarray(mask) def mobjective(x): f, grad = jobjective(x, *args) if grad is not None: grad[mask] = 0.0 return f, grad mobjective = mobjective mproject = lambda p: project(p, mask) else: mobjective = jobjective mproject = project # initialize p from function input p = mproject(np.asarray(p0)) # counting variable for number of iterations k = 0 # lower bound for the cost function low = 0.0 # setup for accelerated methodrithm if method == 'fast': y = p f, grad = mobjective(p, *args) # starting guess for gradient scaling parameter 1 / | nabla f | s = 1.0 / np.linalg.norm(grad) # refine by backtracking search while True: y_new = mproject(y - s * grad) # abs important as values close to machine precision # might become negative in fft convolution screwing # up cost calculations f_new, grad_new = mobjective(y_new, *args) if f_new < f + np.dot(y_new - y, grad.T) + \ 0.5 * np.linalg.norm(y_new - y)**2 / s: break s *= 0.8 # reduce s by some factor as optimal s might become smaller during # the course of optimization s /= 3.0 told = time.time() while k < maxiters: k += 1 f, grad = mobjective(p, *args) # update lower bound on cost function # initialize at beginning (k=1) and then every nboundupdateth iteration if (k % nboundupdate == 0) or (k == 1): if mask is not None: i = np.argmin(grad[~mask]) low = max((low, f - np.sum(p * grad) + grad[~mask][i])) else: i = np.argmin(grad) low = max((low, f - np.sum(p * grad) + grad[i])) gap = f - low if callback: callback(f, p) if disp: print '%g: f %e, gap %e, relgap %e' % (k, f, gap, gap / low if low != 0 else np.inf) if ((low != 0 and gap / low < reltol) or gap < abstol): if disp: print 'stopping criterion reached' break if method == 'fast': f, grad = mobjective(y, *args) p, pold = mproject(y - s * grad), p y = p + k / (k + 3.0) * (p - pold) else: # generate feasible direction by projection s = 0.1 d = mproject(p - s * grad) - p # Backtracking line search deriv = np.dot(grad.T, d) alpha = 0.1 # in (0, 0.5) p1 = 0.2 # in (0, 1) p2 = 0.25 fnew, grad = mobjective(p + alpha * d, *args) while fnew > f + p1 * alpha * deriv: alpha *= p2 fnew, grad = mobjective(p + alpha * d, *args) p += alpha * d else: print 'warning: maxiters reached before convergence' if disp: print 'niters %e, t per iteration %e' % (k, (time.time() - told) / k) print 'cost %e, low %e, gap %e, relgap %e' % (f, low, gap, gap / low if low != 0 else np.inf) return OptimizeResult(x=p, fun=f, nit=k, low=low)
def _minimize_pso(fun, x0, confunc=None, friction=.8, max_velocity=5., g_rate=.8, l_rate=.5, max_iter=1000, stable_iter=20, ptol=1e-6, ctol=1e-6, callback=None, verbose=False, savefile=None): """Internal implementation for ``psopy.minimize``. See Also -------- psopy.minimize : The SciPy compatible interface to this function. Refer to its documentation for an explanation of the parameters. psopy.gen_confunc : Utility function to convert SciPy style constraints to the form required by this function. Parameters ---------- x0 : array_like of shape (N, D) Initial position to begin PSO from, where ``N`` is the number of points and ``D`` the dimensionality of each point. For the constrained case these points should satisfy all constraints. fun : callable The objective function to be minimized. Must be in the form ``fun(pos, *args)``. The argument ``pos``, is a 2-D array for initial positions, where each row specifies the position of a different particle, and ``args`` is a tuple of any additional fixed parameters needed to completely specify the function. confunc : callable The function that describes constraints. Must be of the form ``confunc(pos)`` that returns the constraint matrix. Notes ----- Using this function directly allows for a slightly faster implementation that does away with the need for the additional recursive calls needed to wrap the constraint and objective functions for compatibility with Scipy. Examples -------- These examples are identical to those laid out in ``psopy.minimize_pso`` and serve to illustrate the additional overhead in ensuring compatibility. >>> import numpy as np >>> from psopy import _minimize_pso Consider the problem of minimizing the Rosenbrock function implemented as ``scipy.optimize.rosen``. >>> from scipy.optimize import rosen >>> fun = lambda x: np.apply_along_axis(rosen, 1, x) Initialize 1000 particles and run the minimization function. >>> x0 = np.random.uniform(0, 2, (1000, 5)) >>> res = _minimize_pso(fun, x0, stable_iter=50) >>> res.x array([1.00000003, 1.00000017, 1.00000034, 1.0000006 , 1.00000135]) Consider the constrained optimization problem with the objective function defined as: >>> fun = lambda x: (x[0] - 1)**2 + (x[1] - 2.5)**2 >>> fun_ = lambda x: np.apply_along_axis(fun, 1, x) and constraints defined as: >>> cons = ({'type': 'ineq', 'fun': lambda x: x[0] - 2 * x[1] + 2}, ... {'type': 'ineq', 'fun': lambda x: -x[0] - 2 * x[1] + 6}, ... {'type': 'ineq', 'fun': lambda x: -x[0] + 2 * x[1] + 2}, ... {'type': 'ineq', 'fun': lambda x: x[0]}, ... {'type': 'ineq', 'fun': lambda x: x[1]}) Initializing the constraint function and feasible solutions: >>> from psopy import init_feasible, gen_confunc >>> x0 = init_feasible(cons, low=0., high=2., shape=(1000, 2)) >>> confunc = gen_confunc(cons) Running the constrained version of the function: >>> res = _minimize_pso(fun_, x0, confunc=confunc, options={ ... 'g_rate': 1., 'l_rate': 1., 'max_velocity': 4., 'stable_iter': 50}) >>> res.x array([ 1.39985398, 1.69992748]) """ if verbose: message = setup_print(x0.shape[1], max_iter, confunc is not None) if savefile: iterinfo = [] position = np.copy(x0) velocity = np.random.uniform(-max_velocity, max_velocity, position.shape) pbest = np.copy(position) gbest = pbest[np.argmin(fun(pbest))] oldfit = fun(gbest[None])[0] stable_count = 0 for ii in range(max_iter): # Determine global and local gradient. dv_g = g_rate * uniform(0, 1) * (gbest - position) if confunc is not None: leaders = np.argmin(distance.cdist(position, pbest, 'sqeuclidean'), axis=1) dv_l = l_rate * uniform(0, 1) * (pbest[leaders] - position) else: dv_l = l_rate * uniform(0, 1) * (pbest - position) # Update velocity and position of particles. velocity *= friction velocity += (dv_g + dv_l) np.clip(velocity, -max_velocity, max_velocity, out=velocity) position += velocity to_update = (fun(position) < fun(pbest)) if confunc is not None: to_update &= (confunc(position).sum(axis=1) < ctol) if to_update.any(): pbest[to_update] = position[to_update] gbest = pbest[np.argmin(fun(pbest))] # Termination criteria. fval = fun(gbest[None])[0] if np.abs(oldfit - fval) < ptol: stable_count += 1 if stable_count == stable_iter: break else: stable_count = 0 oldfit = fval if verbose or savefile: info = [ii, gbest, fval] if confunc is not None: cv = np.max(confunc(gbest[None])) info.append(cv) if verbose: print(message.format(*info)) if savefile: iterinfo.append(info) # Final callback. if callback is not None: position = callback(position) if savefile: save_info(savefile, iterinfo, constraints=confunc is not None) result = OptimizeResult(x=gbest, fun=fun(gbest[None])[0], nit=ii, nsit=stable_count) violation = False if confunc is not None: convec = confunc(gbest[None]) result.maxcv = np.max(convec) result.cvec = convec if convec.sum() > ctol: violation = True if violation: result.status = 2 elif ii == max_iter: result.status = 1 else: result.status = 0 result.success = not result.status return result
def optimize_minimize_mhmcmc_cluster(objective, bounds, args=(), x0=None, T=1, N=3, burnin=100000, maxiter=1000000, target_ar=0.4, ar_tolerance=0.05, cluster_eps=DEFAULT_CLUSTER_EPS, rnd_seed=None, collect_samples=None, logger=None): """ Minimize objective function and return up to N local minima solutions. :param objective: Objective function to minimize. Takes unpacked args as function call arguments and returns a float. :type objective: Callable(\*args) -> float :param bounds: Bounds of the parameter space. :type bounds: scipy.optimize.Bounds :param args: Any additional fixed parameters needed to completely specify the objective function. :type args: tuple or list :param x0: Initial guess. If None, will be selected randomly and uniformly within the parameter bounds. :type x0: numpy.array with same shape as elements of bounds :param T: The "temperature" parameter for the accept or reject criterion. To sample the domain well, should be in the order of the typical difference in local minima objective valuations. :type T: float :param N: Maximum number of minima to return :type N: int :param burnin: Number of random steps to discard before starting to accumulate statistics. :type burnin: int :param maxiter: Maximum number of steps to take (including burnin). :type maxiter: int :param target_ar: Target acceptance rate of point samples generated by stepping. :type target_ar: float between 0 and 1 :param ar_tolerance: Tolerance on the acceptance rate before actively adapting the step size. :type ar_tolerance: float :param cluster_eps: Point proximity tolerance for DBSCAN clustering, in normalized bounds coordinates. :type cluster_eps: float :param rnd_seed: Random seed to force deterministic behaviour :type rnd_seed: int :param collect_samples: If not None and integral type, collect collect_samples at regular intervals and return as part of solution. :type collect_samples: int or NoneType :param logger: Logger instance for outputting log messages. :return: OptimizeResult containing solution(s) and solver data. :rtype: scipy.optimize.OptimizeResult with additional attributes """ @call_counter def obj_counted(*args): return objective(*args) # end func assert maxiter >= 2 * burnin, "maxiter {} should be at least twice burnin steps {}".format( maxiter, burnin) main_iter = maxiter - burnin if collect_samples is not None: assert isinstance(collect_samples, int), "collect_samples expected to be integral type" assert collect_samples > 0, "collect_samples expected to be positive" # end if beta = 1.0 / T if rnd_seed is None: rnd_seed = int(time.time() * 1000) % (1 << 31) # end if np.random.seed(rnd_seed) if logger: logger.info('Using random seed {}'.format(rnd_seed)) # end if x0 is None: x0 = np.random.uniform(bounds.lb, bounds.ub) # end if assert np.all((x0 >= bounds.lb) & (x0 <= bounds.ub)) x = x0.copy() funval = obj_counted(x, *args) # Set up stepper with adaptive acceptance rate stepper = BoundedRandNStepper(bounds) stepper = AdaptiveStepsize(stepper, accept_rate=target_ar, ar_tolerance=ar_tolerance, interval=50) # ------------------------------- # DO BURN-IN rejected_randomly = 0 accepted_burnin = 0 tracked_range = tqdm(range(burnin), total=burnin, desc='BURN-IN') if logger: stepper.logger = lambda msg: tracked_range.write(logger.name + ':' + msg) else: stepper.logger = tracked_range.write # end if for _ in tracked_range: x_new = stepper(x) funval_new = obj_counted(x_new, *args) log_alpha = -(funval_new - funval) * beta if log_alpha > 0 or np.log(np.random.rand()) <= log_alpha: x = x_new funval = funval_new stepper.notify_accept() accepted_burnin += 1 elif log_alpha <= 0: rejected_randomly += 1 # end if # end for ar = float(accepted_burnin) / burnin if logger: logger.info("Burn-in acceptance rate: {}".format(ar)) # end if # ------------------------------- # DO MAIN LOOP if collect_samples is not None: nsamples = min(collect_samples, main_iter) sample_cadence = main_iter / nsamples samples = np.zeros((nsamples, len(x))) samples_fval = np.zeros(nsamples) # end if accepted = 0 rejected_randomly = 0 minima_sorted = SortedList( key=lambda rec: rec[1]) # Sort by objective function value hist = HistogramIncremental(bounds, nbins=100) # Cached a lot of potential minimum values, as these need to be clustered before return N results N_cached = int(np.ceil(N * main_iter / 500)) next_sample = 0.0 sample_count = 0 tracked_range = tqdm(range(main_iter), total=main_iter, desc='MAIN') if logger: stepper.logger = lambda msg: tracked_range.write(logger.name + ':' + msg) else: stepper.logger = tracked_range.write # end if for i in tracked_range: if collect_samples and i >= next_sample: assert sample_count < collect_samples samples[sample_count] = x samples_fval[sample_count] = funval sample_count += 1 next_sample += sample_cadence # end if x_new = stepper(x) funval_new = obj_counted(x_new, *args) log_alpha = -(funval_new - funval) * beta if log_alpha > 0 or np.log(np.random.rand()) <= log_alpha: x = x_new funval = funval_new minima_sorted.add((x, funval)) if len(minima_sorted) > N_cached: minima_sorted.pop() # end if stepper.notify_accept() hist += x accepted += 1 elif log_alpha <= 0: rejected_randomly += 1 # end if # end for stepper.logger = None ar = float(accepted) / main_iter if logger: logger.info("Acceptance rate: {}".format(ar)) logger.info("Best minima (before clustering):\n{}".format( np.array([_mx[0] for _mx in minima_sorted[:10]]))) # end if # ------------------------------- # Cluster minima and associate each cluster with a local minimum. # Using a normalized coordinate space for cluster detection. x_range = bounds.ub - bounds.lb pts = np.array([x[0] for x in minima_sorted]) fvals = np.array([x[1] for x in minima_sorted]) pts_norm = (pts - bounds.lb) / x_range _, labels = dbscan(pts_norm, eps=cluster_eps, min_samples=21, n_jobs=-1) # Compute mean of each cluster and evaluate objective function at cluster mean locations. minima_candidates = [] for grp in range(max(labels) + 1): mask = (labels == grp) mean_loc = np.mean(pts[mask, :], axis=0) # Evaluate objective function precisely at the mean location of each cluster fval = obj_counted(mean_loc, *args) minima_candidates.append((mean_loc, grp, fval)) # end for # Rank minima locations by objective function. minima_candidates.sort(key=lambda c: c[2]) # Pick up to N solutions solutions = minima_candidates[:N] # Put results into OptimizeResult container. # Add histograms to output result (in form of scipy.stats.rv_histogram) solution = OptimizeResult() solution.x = np.array([s[0] for s in solutions]) solution.clusters = [pts[(labels == s[1])] for s in solutions] solution.cluster_funvals = [fvals[(labels == s[1])] for s in solutions] solution.bins = hist.bins solution.distribution = hist.histograms solution.acceptance_rate = ar solution.success = True solution.status = 0 if len(solutions) > 0: solution.message = 'SUCCESS: Found {} local minima'.format( len(solutions)) else: solution.message = 'WARNING: Found no clusters within tolerance {}'.format( cluster_eps) # end if solution.fun = np.array([s[2] for s in solutions]) solution.jac = None solution.nfev = obj_counted.counter solution.njev = 0 solution.nit = main_iter solution.maxcv = None solution.samples = samples if collect_samples else None solution.sample_funvals = samples_fval if collect_samples else None solution.bounds = bounds solution.version = 's0.3' # Solution version for future traceability solution.rnd_seed = rnd_seed return solution
def solve(self): """ Runs the DifferentialEvolutionSolver. Returns ------- res : OptimizeResult The optimization result represented as a ``OptimizeResult`` object. Important attributes are: ``x`` the solution array, ``success`` a Boolean flag indicating if the optimizer exited successfully and ``message`` which describes the cause of the termination. See `OptimizeResult` for a description of other attributes. If `polish` was employed, and a lower minimum was obtained by the polishing, then OptimizeResult also contains the ``jac`` attribute. """ nit, warning_flag = 0, False status_message = _status_message['success'] # The population may have just been initialized (all entries are # np.inf). If it has you have to calculate the initial energies. # Although this is also done in the evolve generator it's possible # that someone can set maxiter=0, at which point we still want the # initial energies to be calculated (the following loop isn't run). if np.all(np.isinf(self.population_energies)): self._calculate_population_energies() # do the optimisation. for nit in xrange(1, self.maxiter + 1): # evolve the population by a generation try: next(self) except StopIteration: warning_flag = True status_message = _status_message['maxfev'] break if self.disp: print("differential_evolution step %d: f(x)= %g" % (nit, self.population_energies[0])) # should the solver terminate? convergence = self.convergence if (self.callback and self.callback(self._scale_parameters(self.population[0]), convergence=self.tol / convergence) is True): warning_flag = True status_message = ('callback function requested stop early ' 'by returning True') break intol = (np.std(self.population_energies) <= self.atol + self.tol * np.abs(np.mean(self.population_energies))) if warning_flag or intol: break else: status_message = _status_message['maxiter'] warning_flag = True DE_result = OptimizeResult(x=self.x, fun=self.population_energies[0], nfev=self._nfev, nit=nit, message=status_message, success=(warning_flag is not True)) if self.polish: result = minimize(self.func, np.copy(DE_result.x), method='L-BFGS-B', bounds=self.limits.T, args=self.args) self._nfev += result.nfev DE_result.nfev = self._nfev if result.fun < DE_result.fun: DE_result.fun = result.fun DE_result.x = result.x DE_result.jac = result.jac # to keep internal state consistent self.population_energies[0] = result.fun self.population[0] = self._unscale_parameters(result.x) return DE_result
def amoeba(objective, x0, xmin, xmax, simplex_scale=.1, xtol=1e-7, ftol=1e-7, maxevals=int(1e3), initial_simplex=None): ''' Nelder-mead optimization adapted from scipy.optimize.fmin Arguments --------- objective : callable Scalar objective function to be minimized x0 : np.ndarray [N] Initial guess for solution in space of N parameters xmin : np.ndarray [N] Lower bounds for parameters. These should be far lower than the values the simplex explores and is only meant to catch the simplex if it runs far off from the solution xmax : np.ndarray [N] Upper bounds for parameters. See xmin documentation for usage Keywords -------- simplex_scale : np.ndarray or float [N] Scale factor for each parameter in generating an initial simplex. xtol : np.ndarray or float [N] Tolerance in each parameter for convergence. The algorithm stops when all values in the simplex are within xtol of each other ftol : float Tolerance in objective function for convergence. The algorithm stops when all function values in simplex are within ftol of each other. maxevals : int Max number of function evaluations before function quits initial_simplex : np.ndarray [N+1, N] Initial simplex. If None, simplex_scale is used to generate an initial simplex Returns ------- result : scipy.optimize.OptimizeResult Contains information about solution, best function value, number of function evaluations and iterations, reason for termination, and success of the fit. See scipy's documentation. ''' simplex_scale = np.asarray(simplex_scale) xtol = np.asarray(xtol) # Initialize simplex N = len(x0) if initial_simplex is None: if type(simplex_scale) is float: simplex_scale = np.full(N, simplex_scale) simplex = np.vstack([x0, np.diag(simplex_scale) + x0]) else: if initial_simplex.shape != (N+1, N): raise ValueError("Initial simplex must be dimension (N+1, N)") simplex = initial_simplex # Initialize algorithm maxevals = maxevals neval = 1 niter = 1 one2np1 = list(range(1, N + 1)) evals = np.zeros(N+1, float) for idx in range(N+1): simplex[idx] = np.maximum(xmin, np.minimum(simplex[idx], xmax)) evals[idx] = objective(simplex[idx]) neval += 1 idxs = np.argsort(evals) evals = np.take(evals, idxs, 0) simplex = np.take(simplex, idxs, 0) rho = 1 chi = 2 psi = 0.5 sigma = 0.5 # START FITTING message = 'failure (hit max evals)' while(neval < maxevals): # Test if simplex is small if all(np.amax(np.abs(simplex[1:] - simplex[0]), axis=0) <= xtol): message = 'convergence (simplex small)' break # Test if function values are similar if np.max(np.abs(evals[0] - evals[1:])) <= ftol: message = 'convergence (fvals similar)' break # Test if simplex hits edge of parameter space end = False for k in range(N): temp = simplex[:, k] if xmax[k] in temp or xmin[k] in temp: end = True if end: message = 'failure (stuck to boundary)' break # Reflect xbar = np.add.reduce(simplex[:-1], 0) / N xr = (1 + rho) * xbar - rho * simplex[-1] xr = np.maximum(xmin, np.minimum(xr, xmax)) fxr = objective(xr) neval += 1 doshrink = 0 # Check if reflection is better than best estimate if fxr < evals[0]: # If so, reflect double and see if that's even better xe = (1 + rho * chi) * xbar - rho * chi * simplex[-1] xe = np.maximum(xmin, np.minimum(xe, xmax)) fxe = objective(xe) neval += 1 if fxe < fxr: simplex[-1] = xe evals[-1] = fxe else: simplex[-1] = xr evals[-1] = fxr else: if fxr < evals[-2]: simplex[-1] = xr evals[-1] = fxr else: # If reflection is not better, contract. if fxr < evals[-1]: xc = (1 + psi * rho) * xbar - psi * rho * simplex[-1] xc = np.maximum(xmin, np.minimum(xc, xmax)) fxc = objective(xc) neval += 1 if fxc <= fxr: simplex[-1] = xc evals[-1] = fxc else: doshrink = 1 else: # Do 'inside' contraction xcc = (1 - psi) * xbar + psi * simplex[-1] xcc = np.maximum(xmin, np.minimum(xcc, xmax)) fxcc = objective(xcc) neval += 1 if fxcc < evals[-1]: simplex[-1] = xcc evals[-1] = fxcc else: doshrink = 1 if doshrink: for j in one2np1: simplex[j] = simplex[0] + sigma * \ (simplex[j] - simplex[0]) simplex[j] = np.maximum( xmin, np.minimum(simplex[j], xmax)) evals[j] = objective(simplex[j]) neval += 1 idxs = np.argsort(evals) simplex = np.take(simplex, idxs, 0) evals = np.take(evals, idxs, 0) niter += 1 best = simplex[0] chi = evals[0] success = False if 'failure' in message else True result = OptimizeResult(x=best, success=success, message=message, nit=niter, nfev=neval, fun=chi) return result
def trf_bounds(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale, loss_function, tr_solver, tr_options, verbose): x = x0.copy() f = f0 f_true = f.copy() nfev = 1 J = J0 njev = 1 m, n = J.shape if loss_function is not None: rho = loss_function(f) cost = 0.5 * np.sum(rho[0]) J, f = scale_for_robust_loss_function(J, f, rho) else: cost = 0.5 * np.dot(f, f) g = compute_grad(J, f) jac_scale = isinstance(x_scale, str) and x_scale == 'jac' if jac_scale: scale, scale_inv = compute_jac_scale(J) else: scale, scale_inv = x_scale, 1 / x_scale v, dv = CL_scaling_vector(x, g, lb, ub) v[dv != 0] *= scale_inv[dv != 0] Delta = norm(x0 * scale_inv / v**0.5) if Delta == 0: Delta = 1.0 g_norm = norm(g * v, ord=np.inf) f_augmented = np.zeros((m + n)) if tr_solver == 'exact': J_augmented = np.empty((m + n, n)) elif tr_solver == 'lsmr': reg_term = 0.0 regularize = tr_options.pop('regularize', True) if max_nfev is None: max_nfev = x0.size * 100 alpha = 0.0 # "Levenberg-Marquardt" parameter termination_status = None iteration = 0 step_norm = None actual_reduction = None if verbose == 2: print_header_nonlinear() while True: v, dv = CL_scaling_vector(x, g, lb, ub) g_norm = norm(g * v, ord=np.inf) if g_norm < gtol: termination_status = 1 if verbose == 2: print_iteration_nonlinear(iteration, nfev, cost, actual_reduction, step_norm, g_norm) if termination_status is not None or nfev == max_nfev: break # Now compute variables in "hat" space. Here, we also account for # scaling introduced by `x_scale` parameter. This part is a bit tricky, # you have to write down the formulas and see how the trust-region # problem is formulated when the two types of scaling are applied. # The idea is that first we apply `x_scale` and then apply Coleman-Li # approach in the new variables. # v is recomputed in the variables after applying `x_scale`, note that # components which were identically 1 not affected. v[dv != 0] *= scale_inv[dv != 0] # Here, we apply two types of scaling. d = v**0.5 * scale # C = diag(g * scale) Jv diag_h = g * dv * scale # After all this has been done, we continue normally. # "hat" gradient. g_h = d * g f_augmented[:m] = f if tr_solver == 'exact': J_augmented[:m] = J * d J_h = J_augmented[:m] # Memory view. J_augmented[m:] = np.diag(diag_h**0.5) U, s, V = svd(J_augmented, full_matrices=False) V = V.T uf = U.T.dot(f_augmented) elif tr_solver == 'lsmr': J_h = right_multiplied_operator(J, d) if regularize: a, b = build_quadratic_1d(J_h, g_h, -g_h, diag=diag_h) to_tr = Delta / norm(g_h) ag_value = minimize_quadratic_1d(a, b, 0, to_tr)[1] reg_term = -ag_value / Delta**2 lsmr_op = regularized_lsq_operator(J_h, (diag_h + reg_term)**0.5) gn_h = lsmr(lsmr_op, f_augmented, **tr_options)[0] S = np.vstack((g_h, gn_h)).T S, _ = qr(S, mode='economic') JS = J_h.dot(S) # LinearOperator does dot too. B_S = np.dot(JS.T, JS) + np.dot(S.T * diag_h, S) g_S = S.T.dot(g_h) # theta controls step back step ratio from the bounds. theta = max(0.995, 1 - g_norm) actual_reduction = -1 while actual_reduction <= 0 and nfev < max_nfev: if tr_solver == 'exact': p_h, alpha, n_iter = solve_lsq_trust_region( n, m, uf, s, V, Delta, initial_alpha=alpha) elif tr_solver == 'lsmr': p_S, _ = solve_trust_region_2d(B_S, g_S, Delta) p_h = S.dot(p_S) p = d * p_h # Trust-region solution in the original space. step, step_h, predicted_reduction = select_step( x, J_h, diag_h, g_h, p, p_h, d, Delta, lb, ub, theta) x_new = make_strictly_feasible(x + step, lb, ub, rstep=0) f_new = fun(x_new) nfev += 1 step_h_norm = norm(step_h) if not np.all(np.isfinite(f_new)): Delta = 0.25 * step_h_norm continue # Usual trust-region step quality estimation. if loss_function is not None: cost_new = loss_function(f_new, cost_only=True) else: cost_new = 0.5 * np.dot(f_new, f_new) actual_reduction = cost - cost_new Delta_new, ratio = update_tr_radius( Delta, actual_reduction, predicted_reduction, step_h_norm, step_h_norm > 0.95 * Delta) step_norm = norm(step) termination_status = check_termination( actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol) if termination_status is not None: break alpha *= Delta / Delta_new Delta = Delta_new if actual_reduction > 0: x = x_new f = f_new f_true = f.copy() cost = cost_new J = jac(x, f) njev += 1 if loss_function is not None: rho = loss_function(f) J, f = scale_for_robust_loss_function(J, f, rho) g = compute_grad(J, f) if jac_scale: scale, scale_inv = compute_jac_scale(J, scale_inv) else: step_norm = 0 actual_reduction = 0 iteration += 1 if termination_status is None: termination_status = 0 active_mask = find_active_constraints(x, lb, ub, rtol=xtol) return OptimizeResult( x=x, cost=cost, fun=f_true, jac=J, grad=g, optimality=g_norm, active_mask=active_mask, nfev=nfev, njev=njev, status=termination_status)
def minimizeCompass(func, x0, args=(), bounds=None, scaling=None, redfactor=2.0, deltainit=1.0, deltatol=1e-3, feps=1e-15, errorcontrol=True, funcNinit=30, funcmultfactor=2.0, paired=True, alpha=0.05, disp=False, callback=None, **kwargs): """ Minimization of an objective function by a pattern search. The algorithm does a compass search along coordinate directions. If `errorcontrol=True` then the function is called repeatedly to average over the stochasticity in the function evaluation. The number of evaluations over which to average is adapted dynamically to ensure convergence. The algorithm terminates when the current iterate is locally optimally at the target pattern size deltatol or when the function value differs by less than the tolerance feps along all directions. Parameters ---------- func: callable objective function to be minimized: called as `func(x, *args)`, if `paired=True`, then called with keyword argument `seed` additionally x0: array-like starting point args: tuple extra arguments to be supplied to func bounds: array-like bounds on the variables scaling: array-like scaling by which to multiply step size and tolerances along different dimensions redfactor: float reduction factor by which to reduce delta if no reduction direction found deltainit: float initial pattern size deltatol: float target pattern size, function differences at this scale need to be larger than stochasticitiy in evaluations to ensure convergence if `errorcontrol=False` feps: float smallest difference in function value to resolve errorcontrol: boolean whether to control error of simulation by repeated sampling funcNinit: int, only for errorcontrol=True initial number of iterations to use for the function, do not set much lower than 30 as otherwise there is no sufficient statistics for function comparisons funcmultfactor: float, only for errorcontrol=True multiplication factor by which to increase number of iterations of function paired: boolean, only for errorcontrol=True compare for same random seeds alpha: float, only for errorcontrol=True significance level of tests, the higher this value the more statistics is acquired, which decreases the risk of taking a step in a non-descent direction at the expense of higher computational cost per iteration disp: boolean whether to output status updates during the optimization callback: callable called after each iteration, as callback(xk), where xk is the current parameter vector. Returns ------- scipy.optimize.OptimizeResult object special entry: free Boolean array indicating parameters that are unconstrained at the optimum (within feps) """ #TODO: implement variable deltas for different directions (might speed up things, see review) if disp: print('minimization starting') print('args', args) print('errorcontrol', errorcontrol) print('paired', paired) # absolute tolerance for float comparisons floatcompatol = 1e-14 x0 = np.asarray(x0) if scaling is None: scaling = np.ones(x0.shape) else: scaling = np.asarray(scaling) # ensure initial point lies within bounds if bounds is not None: bounds = np.asarray(bounds) np.clip(x0, bounds[:, 0], bounds[:, 1], out=x0) def clip(x, d): """clip x+d to respect bounds returns clipped result and effective distance""" xnew = x + d if bounds is not None: # if test point depasses set to boundary instead xclipped = np.clip(xnew, bounds[:, 0], bounds[:, 1]) deltaeff = np.abs(x - xclipped).sum() return xclipped, deltaeff else: return xnew, delta # generate set of search directions (+- s_i e_i | i = 1, ..., N) def unit(i, N): "return ith unit vector in R^N" arr = np.zeros(N) arr[i] = 1.0 return arr N = len(x0) generatingset = [ unit(i, N) * direction * scaling[i] for i in np.arange(N) for direction in [+1, -1] ] # memoize function if errorcontrol: funcm = AveragedFunction(func, fargs=args, paired=paired, N=funcNinit) # apply Bonferroni correction to confidence level # (need more statistics in higher dimensions) alpha = alpha / len(generatingset) else: # freeze function arguments def funcf(x, **kwargs): return func(x, *args, **kwargs) funcm = _memoized(funcf) x = x0 delta = deltainit # number of iterations nit = 0 # continue as long as delta is larger than tolerance # or if there was an update during the last iteration found = False while delta >= deltatol - floatcompatol or found: nit += 1 # if delta gets close to deltatol, do iteration with step size deltatol instead if delta / redfactor < deltatol: delta = deltatol if disp: print('nit %i, Delta %g' % (nit, delta)) found = False np.random.shuffle(generatingset) for d in generatingset: xtest, deltaeff = clip(x, delta * d) if deltaeff < floatcompatol: continue # Does xtest improve upon previous function value? if ((not errorcontrol and (funcm(xtest) < funcm(x) - feps)) or (errorcontrol and funcm.test(xtest, x, type_='smaller', alpha=alpha))): x = xtest found = True if disp: print(x) # Is non-improvement due to too large step size or missing statistics? elif ((deltaeff >= deltatol * np.sum(np.abs(d)) ) # no refinement for boundary steps smaller than tolerance and ((not errorcontrol and (funcm(xtest) < funcm(x) + feps)) or (errorcontrol and funcm.test(xtest, x, type_='equality', alpha=alpha) and (funcm.diffse(xtest, x) > feps)))): # If there is no significant difference the step size might # correspond to taking a step to the other side of the minimum. # Therefore test if middle point is better xmid = 0.5 * (x + xtest) if ((not errorcontrol and funcm(xmid) < funcm(x) - feps) or (errorcontrol and funcm.test(xmid, x, type_='smaller', alpha=alpha))): x = xmid delta /= redfactor found = True if disp: print('mid', x) # otherwise increase accuracy of simulation to try to get to significance elif errorcontrol: funcm.N *= funcmultfactor if disp: print('new N %i' % funcm.N) found = True if callback is not None: callback(x) if not found: delta /= redfactor message = 'convergence within deltatol' # check if any of the directions are free at the optimum delta = deltatol free = np.zeros(x.shape, dtype=bool) for d in generatingset: dim = np.argmax(np.abs(d)) xtest, deltaeff = clip(x, delta * d) if deltaeff < deltatol * np.sum( np.abs(d) ) - floatcompatol: # do not consider as free for boundary steps continue if not free[dim] and ( ((not errorcontrol and funcm(xtest) - feps < funcm(x)) or (errorcontrol and funcm.test(xtest, x, type_='equality', alpha=alpha) and (funcm.diffse(xtest, x) < feps)))): free[dim] = True message += '. dim %i is free at optimum' % dim reskwargs = dict(x=x, nit=nit, nfev=funcm.nev, message=message, free=free, success=True) if errorcontrol: f, funse = funcm(x) res = OptimizeResult(fun=f, funse=funse, **reskwargs) else: f = funcm(x) res = OptimizeResult(fun=f, **reskwargs) if disp: print(res) return res
def optimize_stiefel(func, X0, args=(), tau_max=.5, max_it=1, tol=1e-6, disp=False, tau_find_freq=100): """ Optimize a function over a Stiefel manifold. :param func: Function to be optimized :param X0: Initial point for line search :param tau_max: Maximum step size :param max_it: Maximum number of iterations :param tol: Tolerance criteria to terminate line search :param disp: Choose whether to display output :param args: Extra arguments passed to the function """ tol = float(tol) assert tol > 0, 'Tolerance must be positive' max_it = int(max_it) assert max_it > 0, 'The maximum number of iterations must be a positive '\ + 'integer' tau_max = float(tau_max) assert tau_max > 0, 'The parameter `tau_max` must be positive.' k = 0 X = X0.copy() nit = 0 nfev = 0 success = False if disp: print 'Stiefel Optimization'.center(80) print '{0:4s} {1:11s} {2:5s}'.format('It', 'F', '(F - F_old) / F_old') print '-' * 30 ls_func = LSFunc() ls_func.func = func decrease_tau = False tau_max0 = tau_max while nit <= max_it: nit += 1 F, G = func(X, *args) F_old = F nfev += 1 A = compute_A(G, X) ls_func.A = A ls_func.X = X ls_func.func_args = args ls_func.tau_max = tau_max increased_tau = False if nit == 1 or decrease_tau or nit % tau_find_freq == 0: # Need to minimize ls_func with respect to each argument tau_init = np.linspace(-10, 0., 3)[:, None] tau_d = np.linspace(-10, 0., 50)[:, None] tau_all, F_all = pybgo.minimize(ls_func, tau_init, tau_d, fixed_noise=1e-16, add_at_least=1, tol=1e-2, scale=True, train_every=1)[:2] nfev += tau_all.shape[0] idx = np.argmin(F_all) tau = np.exp(tau_all[idx, 0]) * tau_max if tau_max - tau <= 1e-6: tau_max = 1.2 * tau_max if disp: print 'increasing tau_max to {0:1.5e}'.format(tau_max) increased_tau = True if decrease_tau: tau_max = .8 * tau_max if disp: print 'decreasing max_tau to {0:1.5e}'.format(tau_max) decrease_tau = False F = F_all[idx, 0] else: F = ls_func([np.log(tau / tau_max)]) delta_F = (F_old - F) / np.abs(F_old) if delta_F < 0: if disp: print '*** backtracking' nit -= 1 decrease_tau = True continue X_old = X X = Y_func(tau, X, A) if disp: print '{0:4s} {1:1.5e} {2:5e} tau = {3:1.3e}, tau_max = {4:1.3e}'.format( str(nit).zfill(4), F, delta_F, tau, tau_max) if delta_F <= tol: if disp: print '*** Converged ***' success = True break res = OptimizeResult() res.tau_max = tau_max res.X = X res.nfev = nfev res.nit = nit res.fun = F res.success = success return res
def dummy_minimize(func, dimensions, n_calls=100, x0=None, y0=None, random_state=None): """Random search by uniform sampling within the given bounds. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `dimensions` [list, shape=(n_dims,)]: List of search space dimensions. Each search dimension can be defined either as - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer` dimensions), - a `(upper_bound, lower_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). * `n_calls` [int, default=100]: Number of calls to `func` to find the minimum. * `x0` [list, list of lists or `None`]: Initial input points. - If it is a list of lists, use it as a list of input points. - If it is a list, use it as a single initial input point. - If it is `None`, no initial input points are used. * `y0` [list, scalar or `None`] Evaluation of initial input points. - If it is a list, then it corresponds to evaluations of the function at each element of `x0` : the i-th element of `y0` corresponds to the function evaluated at the i-th element of `x0`. - If it is a scalar, then it corresponds to the evaluation of the function at `x0`. - If it is None and `x0` is provided, then the function is evaluated at each element of `x0`. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [list]: location of the minimum. - `fun` [float]: function value at the minimum. - `x_iters` [list of lists]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. - `space` [Space]: the optimisation space. - `specs` [dict]: the call specifications. - `rng` [RandomState instance]: State of the random state at the end of minimization. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ # Save call args specs = {"args": copy.copy(inspect.currentframe().f_locals), "function": inspect.currentframe().f_code.co_name} # Check params rng = check_random_state(random_state) space = Space(dimensions) if x0 is None: x0 = [] elif not isinstance(x0[0], list): x0 = [x0] if not isinstance(x0, list): raise ValueError("`x0` should be a list, got %s" % type(x0)) if len(x0) > 0 and y0 is not None: if isinstance(y0, Iterable): y0 = list(y0) elif isinstance(y0, numbers.Number): y0 = [y0] else: raise ValueError("`y0` should be an iterable or a scalar, got %s" % type(y0)) if len(x0) != len(y0): raise ValueError("`x0` and `y0` should have the same length") if not all(map(np.isscalar, y0)): raise ValueError("`y0` elements should be scalars") elif len(x0) > 0 and y0 is None: y0 = [] n_calls -= len(x0) elif len(x0) == 0 and y0 is not None: raise ValueError("`x0`cannot be `None` when `y0` is provided") else: # len(x0) == 0 and y0 is None y0 = [] X = x0 y = y0 # Random search X = X + space.rvs(n_samples=n_calls, random_state=rng) first = True for i in range(len(y0), len(X)): y_i = func(X[i]) if first: first = False if not np.isscalar(y_i): raise ValueError("`func` should return a scalar") y.append(y_i) y = np.array(y) # Pack results res = OptimizeResult() best = np.argmin(y) res.x = X[best] res.fun = y[best] res.func_vals = y res.x_iters = X res.models = [] # Create attribute even though it is empty res.space = space res.random_state = rng res.specs = specs return res
def minimizeSPSA(func, x0, args=(), bounds=None, niter=100, paired=True, a=1.0, alpha=0.602, c=1.0, gamma=0.101, disp=False, callback=None): """ Minimization of an objective function by a simultaneous perturbation stochastic approximation algorithm. This algorithm approximates the gradient of the function by finite differences along stochastic directions Deltak. The elements of Deltak are drawn from +- 1 with probability one half. The gradient is approximated from the symmetric difference f(xk + ck*Deltak) - f(xk - ck*Deltak), where the evaluation step size ck is scaled according ck = c/(k+1)**gamma. The algorithm takes a step of size ak = a/(0.01*niter+k+1)**alpha along the negative gradient. See Spall, IEEE, 1998, 34, 817-823 for guidelines about how to choose the algorithm's parameters (a, alpha, c, gamma). Parameters ---------- func: callable objective function to be minimized: called as `func(x, *args)`, if `paired=True`, then called with keyword argument `seed` additionally x0: array-like initial guess for parameters args: tuple extra arguments to be supplied to func bounds: array-like bounds on the variables niter: int number of iterations after which to terminate the algorithm paired: boolean calculate gradient for same random seeds a: float scaling parameter for step size alpha: float scaling exponent for step size c: float scaling parameter for evaluation step size gamma: float scaling exponent for evaluation step size disp: boolean whether to output status updates during the optimization callback: callable called after each iteration, as callback(xk), where xk are the current parameters Returns ------- `scipy.optimize.OptimizeResult` object """ A = 0.01 * niter if bounds is not None: bounds = np.asarray(bounds) project = lambda x: np.clip(x, bounds[:, 0], bounds[:, 1]) if args is not None: # freeze function arguments def funcf(x, **kwargs): return func(x, *args, **kwargs) N = len(x0) x = x0 for k in range(niter): ak = a / (k + 1.0 + A)**alpha ck = c / (k + 1.0)**gamma Deltak = np.random.choice([-1, 1], size=N) fkwargs = dict() if paired: # upper bound needs to be set to signed 32-bit integer # see https://github.com/numpy/numpy/issues/4085#issuecomment-29570567 fkwargs['seed'] = np.random.randint(0, np.iinfo(np.int32).max) if bounds is None: grad = (funcf(x + ck * Deltak, **fkwargs) - funcf(x - ck * Deltak, **fkwargs)) / (2 * ck * Deltak) x -= ak * grad else: # ensure evaluation points are feasible xplus = project(x + ck * Deltak) xminus = project(x - ck * Deltak) grad = (funcf(xplus, **fkwargs) - funcf(xminus, **fkwargs)) / (xplus - xminus) x = project(x - ak * grad) # print 100 status updates if disp=True if disp and (k % (niter // 100)) == 0: print(x) if callback is not None: callback(x) message = 'terminated after reaching max number of iterations' return OptimizeResult(fun=funcf(x), x=x, nit=niter, nfev=2 * niter, message=message, success=True)
def _optimize(self, objective): print('hello world!') return OptimizeResult(x=np.array([0.5]))
def trf_linear(A, b, x_lsq, lb, ub, tol, lsq_solver, lsmr_tol, max_iter, verbose): m, n = A.shape x, _ = reflective_transformation(x_lsq, lb, ub) x = make_strictly_feasible(x, lb, ub, rstep=0.1) if lsq_solver == 'exact': QT, R, perm = qr(A, mode='economic', pivoting=True) QT = QT.T if m < n: R = np.vstack((R, np.zeros((n - m, n)))) QTr = np.zeros(n) k = min(m, n) elif lsq_solver == 'lsmr': r_aug = np.zeros(m + n) auto_lsmr_tol = False if lsmr_tol is None: lsmr_tol = 1e-2 * tol elif lsmr_tol == 'auto': auto_lsmr_tol = True r = A.dot(x) - b g = compute_grad(A, r) cost = 0.5 * np.dot(r, r) initial_cost = cost termination_status = None step_norm = None cost_change = None if max_iter is None: max_iter = 100 if verbose == 2: print_header_linear() for iteration in range(max_iter): v, dv = CL_scaling_vector(x, g, lb, ub) g_scaled = g * v g_norm = norm(g_scaled, ord=np.inf) if g_norm < tol: termination_status = 1 if verbose == 2: print_iteration_linear(iteration, cost, cost_change, step_norm, g_norm) if termination_status is not None: break diag_h = g * dv diag_root_h = diag_h**0.5 d = v**0.5 g_h = d * g A_h = right_multiplied_operator(A, d) if lsq_solver == 'exact': QTr[:k] = QT.dot(r) p_h = -regularized_lsq_with_qr( m, n, R * d[perm], QTr, perm, diag_root_h, copy_R=False) elif lsq_solver == 'lsmr': lsmr_op = regularized_lsq_operator(A_h, diag_root_h) r_aug[:m] = r if auto_lsmr_tol: eta = 1e-2 * min(0.5, g_norm) lsmr_tol = max(EPS, min(0.1, eta * g_norm)) p_h = -lsmr(lsmr_op, r_aug, atol=lsmr_tol, btol=lsmr_tol)[0] p = d * p_h p_dot_g = np.dot(p, g) if p_dot_g > 0: termination_status = -1 theta = 1 - min(0.005, g_norm) step = select_step(x, A_h, g_h, diag_h, p, p_h, d, lb, ub, theta) cost_change = -evaluate_quadratic(A, g, step) # Perhaps almost never executed, the idea is that `p` is descent # direction thus we must find acceptable cost decrease using simple # "backtracking", otherwise algorithm's logic would break. if cost_change < 0: x, step, cost_change = backtracking(A, g, x, p, theta, p_dot_g, lb, ub) else: x = make_strictly_feasible(x + step, lb, ub, rstep=0) step_norm = norm(step) r = A.dot(x) - b g = compute_grad(A, r) if cost_change < tol * cost: termination_status = 2 cost = 0.5 * np.dot(r, r) if termination_status is None: termination_status = 0 active_mask = find_active_constraints(x, lb, ub, rtol=tol) return OptimizeResult(x=x, fun=r, cost=cost, optimality=g_norm, active_mask=active_mask, nit=iteration + 1, status=termination_status, initial_cost=initial_cost)
def create_result(Xi, yi, space=None, rng=None, specs=None, models=None): """ Initialize an `OptimizeResult` object. Parameters ---------- Xi : list of lists, shape (n_iters, n_features) Location of the minimum at every iteration. yi : array-like, shape (n_iters,) Minimum value obtained at every iteration. space : Space instance, optional Search space. rng : RandomState instance, optional State of the random state. specs : dict, optional Call specifications. models : list, optional List of fit surrogate models. Returns ------- res : `OptimizeResult`, scipy object OptimizeResult instance with the required information. """ res = OptimizeResult() yi = np.asarray(yi) if np.ndim(yi) == 2: res.log_time = np.ravel(yi[:, 1]) yi = np.ravel(yi[:, 0]) best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = yi res.x_iters = Xi res.models = models res.space = space res.random_state = rng res.specs = specs return res
def gp_minimize(func, dimensions, base_estimator=None, alpha=10e-10, acq="EI", xi=0.01, kappa=1.96, search="auto", n_calls=100, n_points=500, n_random_starts=10, n_restarts_optimizer=5, x0=None, y0=None, random_state=None): """Bayesian optimization using Gaussian Processes. If every function evaluation is expensive, for instance when the parameters are the hyperparameters of a neural network and the function evaluation is the mean cross-validation score across ten folds, optimizing the hyperparameters by standard optimization routines would take for ever! The idea is to approximate the function using a Gaussian process. In other words the function values are assumed to follow a multivariate gaussian. The covariance of the function values are given by a GP kernel between the parameters. Then a smart choice to choose the next parameter to evaluate can be made by the acquisition function over the Gaussian prior which is much quicker to evaluate. The total number of evaluations, `n_calls`, are performed like the following. If `x0` is provided but not `y0`, then the elements of `x0` are first evaluated, followed by `n_random_starts` evaluations. Finally, `n_calls - len(x0) - n_random_starts` evaluations are made guided by the surrogate model. If `x0` and `y0` are both provided then `n_random_starts` evaluations are first made then `n_calls - n_random_starts` subsequent evaluations are made guided by the surrogate model. Parameters ---------- * `func` [callable]: Function to minimize. Should take a array of parameters and return the function values. * `dimensions` [list, shape=(n_dims,)]: List of search space dimensions. Each search dimension can be defined either as - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer` dimensions), - a `(upper_bound, lower_bound, "prior")` tuple (for `Real` dimensions), - as a list of categories (for `Categorical` dimensions), or - an instance of a `Dimension` object (`Real`, `Integer` or `Categorical`). * `base_estimator` [a Gaussian process estimator]: The Gaussian process estimator to use for optimization. * `alpha` [float, default=1e-10]: Value added to the diagonal of the kernel matrix during fitting. Larger values correspond to an increased noise level in the observations and reduce potential numerical issues during fitting. * `acq` [string, default=`"EI"`]: Function to minimize over the gaussian prior. Can be either - `"LCB"` for lower confidence bound, - `"EI"` for expected improvement, - `"PI"` for probability of improvement. * `xi` [float, default=0.01]: Controls how much improvement one wants over the previous best values. Used when the acquisition is either `"EI"` or `"PI"`. * `kappa` [float, default=1.96]: Controls how much of the variance in the predicted values should be taken into account. If set to be very high, then we are favouring exploration over exploitation and vice versa. Used when the acquisition is `"LCB"`. * `search` [string, `"auto"`, `"sampling"` or `"lbfgs"`, default=`"auto"`]: Searching for the next possible candidate to update the Gaussian prior with. If search is set to `"auto"`, then it is set to `"lbfgs"`` if all the search dimensions are Real(continuous). It defaults to `"sampling"` for all other cases. If search is set to `"sampling"`, `n_points` are sampled randomly and the Gaussian Process prior is updated with the point that gives the best acquisition value over the Gaussian prior. If search is set to `"lbfgs"`, then a point is sampled randomly, and lbfgs is run for 10 iterations optimizing the acquisition function over the Gaussian prior. * `n_calls` [int, default=100]: Number of calls to `func`. * `n_points` [int, default=500]: Number of points to sample to determine the next "best" point. Useless if search is set to `"lbfgs"`. * `n_random_starts` [int, default=10]: Number of evaluations of `func` with random initialization points before approximating the `func` with `base_estimator`. * `n_restarts_optimizer` [int, default=10]: The number of restarts of the optimizer when `search` is `"lbfgs"`. * `x0` [list, list of lists or `None`]: Initial input points. - If it is a list of lists, use it as a list of input points. - If it is a list, use it as a single initial input point. - If it is `None`, no initial input points are used. * `y0` [list, scalar or `None`] Evaluation of initial input points. - If it is a list, then it corresponds to evaluations of the function at each element of `x0` : the i-th element of `y0` corresponds to the function evaluated at the i-th element of `x0`. - If it is a scalar, then it corresponds to the evaluation of the function at `x0`. - If it is None and `x0` is provided, then the function is evaluated at each element of `x0`. * `random_state` [int, RandomState instance, or None (default)]: Set random state to something other than None for reproducible results. Returns ------- * `res` [`OptimizeResult`, scipy object]: The optimization result returned as a OptimizeResult object. Important attributes are: - `x` [list]: location of the minimum. - `fun` [float]: function value at the minimum. - `models`: surrogate models used for each iteration. - `x_iters` [list of lists]: location of function evaluation for each iteration. - `func_vals` [array]: function value for each iteration. - `space` [Space]: the optimization space. - `specs` [dict]`: the call specifications. - `rng` [RandomState instance]: State of the random state at the end of minimization. For more details related to the OptimizeResult object, refer http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html """ # Save call args specs = {"args": copy.copy(inspect.currentframe().f_locals), "function": inspect.currentframe().f_code.co_name} # Check params rng = check_random_state(random_state) space = Space(dimensions) # Default GP if base_estimator is None: base_estimator = GaussianProcessRegressor( kernel=(ConstantKernel(1.0, (0.01, 1000.0)) * Matern(length_scale=np.ones(space.transformed_n_dims), length_scale_bounds=[(0.01, 100)] * space.transformed_n_dims, nu=2.5)), normalize_y=True, alpha=alpha, random_state=random_state) # Initialize with provided points (x0 and y0) and/or random points if x0 is None: x0 = [] elif not isinstance(x0[0], list): x0 = [x0] if not isinstance(x0, list): raise ValueError("`x0` should be a list, but got %s" % type(x0)) n_init_func_calls = len(x0) if y0 is not None else 0 n_total_init_calls = n_random_starts + n_init_func_calls if n_total_init_calls <= 0: # if x0 is not provided and n_random_starts is 0 then # it will ask for n_random_starts to be > 0. raise ValueError( "Expected `n_random_starts` > 0, got %d" % n_random_starts) if n_calls < n_total_init_calls: raise ValueError( "Expected `n_calls` >= %d, got %d" % (n_total_init_calls, n_calls)) if y0 is None and x0: y0 = [func(x) for x in x0] elif x0: if isinstance(y0, Iterable): y0 = list(y0) elif isinstance(y0, numbers.Number): y0 = [y0] else: raise ValueError( "`y0` should be an iterable or a scalar, got %s" % type(y0)) if len(x0) != len(y0): raise ValueError("`x0` and `y0` should have the same length") if not all(map(np.isscalar, y0)): raise ValueError( "`y0` elements should be scalars") else: y0 = [] Xi = x0 + space.rvs(n_samples=n_random_starts, random_state=rng) yi = y0 + [func(x) for x in Xi[len(x0):]] if np.ndim(yi) != 1: raise ValueError("`func` should return a scalar") if search == "auto": if space.is_real: search = "lbfgs" else: search = "sampling" elif search not in ["lbfgs", "sampling"]: raise ValueError( "Expected search to be 'lbfgs', 'sampling' or 'auto', " "got %s" % search) # Bayesian optimization loop models = [] n_model_iter = n_calls - n_total_init_calls for i in range(n_model_iter): gp = clone(base_estimator) with warnings.catch_warnings(): warnings.simplefilter("ignore") gp.fit(space.transform(Xi), yi) models.append(gp) if search == "sampling": X = space.transform(space.rvs(n_samples=n_points, random_state=rng)) values = _gaussian_acquisition( X=X, model=gp, y_opt=np.min(yi), method=acq, xi=xi, kappa=kappa) next_x = X[np.argmin(values)] elif search == "lbfgs": best = np.inf for j in range(n_restarts_optimizer): x0 = space.transform(space.rvs(n_samples=1, random_state=rng))[0] with warnings.catch_warnings(): warnings.simplefilter("ignore") x, a, _ = fmin_l_bfgs_b( _acquisition, x0, args=(gp, np.min(yi), acq, xi, kappa), bounds=space.transformed_bounds, approx_grad=True, maxiter=20) if a < best: next_x, best = x, a next_x = space.inverse_transform(next_x.reshape((1, -1)))[0] next_y = func(next_x) Xi.append(next_x) yi.append(next_y) # Pack results res = OptimizeResult() best = np.argmin(yi) res.x = Xi[best] res.fun = yi[best] res.func_vals = np.array(yi) res.x_iters = Xi res.models = models res.space = space res.random_state = rng res.specs = specs return res
def minimize(fun, bounds=None, x0=None, input_sigma=0.3, popsize=31, max_evaluations=100000, max_iterations=100000, is_parallel=False, accuracy=1.0, stop_fittness=np.nan, is_terminate=None, rg=Generator(MT19937()), runid=0): """Minimization of a scalar function of one or more variables using CMA-ES. Parameters ---------- fun : callable The objective function to be minimized. ``fun(x, *args) -> float`` where ``x`` is an 1-D array with shape (n,) and ``args`` is a tuple of the fixed parameters needed to completely specify the function. bounds : sequence or `Bounds`, optional Bounds on variables. There are two ways to specify the bounds: 1. Instance of the `scipy.Bounds` class. 2. Sequence of ``(min, max)`` pairs for each element in `x`. None is used to specify no bound. x0 : ndarray, shape (n,) Initial guess. Array of real elements of size (n,), where 'n' is the number of independent variables. input_sigma : ndarray, shape (n,) or scalar Initial step size for each dimension. popsize = int, optional CMA-ES population size. max_evaluations : int, optional Forced termination after ``max_evaluations`` function evaluations. max_iterations : int, optional Forced termination after ``max_iterations`` iterations. is_parallel : bool, optional If True, function evaluation is performed in parallel for the whole population, if the CPU has enough threads. Otherwise mp.cpu_count() processes are used. accuracy : float, optional values > 1.0 reduce the accuracy. stop_fittness : float, optional Limit for fitness value. If reached minimize terminates. is_terminate : callable, optional Callback to be used if the caller of minimize wants to decide when to terminate. rg = numpy.random.Generator, optional Random generator for creating random guesses. runid : int, optional id used by the is_terminate callback to identify the CMA-ES run. Returns ------- res : scipy.OptimizeResult The optimization result is represented as an ``OptimizeResult`` object. Important attributes are: ``x`` the solution array, ``fun`` the best function value, ``nfev`` the number of function evaluations, ``nit`` the number of CMA-ES iterations, ``status`` the stopping critera and ``success`` a Boolean flag indicating if the optimizer exited successfully. """ fun = parallel(fun) if is_parallel else serial(fun) cmaes = Cmaes(bounds, x0, input_sigma, popsize, max_evaluations, max_iterations, accuracy, stop_fittness, is_terminate, rg, np.random.randn, runid, fun) x, val, evals, iterations, stop = cmaes.doOptimize() return OptimizeResult(x=x, fun=val, nfev=evals, nit=iterations, status=stop, success=True)
def predint(x: np.ndarray, xd: np.ndarray, yd: np.ndarray, func: Callable[[np.ndarray, np.ndarray], np.ndarray], res: OptimizeResult, **kwargs): """ This function estimates the prediction bands for the fit (see: https://www.mathworks.com/help/curvefit/confidence-and-prediction-bounds.html) Parameters ---------- x: np.ndarray The requested x points for the bands xd: np.ndarray The x datapoints yd: np.ndarray The y datapoints func: Callable[[np.ndarray, np.ndarray] The fitted function res: OptimizeResult The optimzied result from least_squares minimization **kwargs confidence: float The confidence level (default 0.95) simulateneous: bool True if the bound type is simultaneous, false otherwise mode: [functional, observation] Default observation """ if len(yd) != len(xd): raise ValueError('The length of the observations should be the same ' + 'as the length of the predictions.') if len(yd) <= 1: raise ValueError('Too few datapoints') from scipy.optimize import optimize if not isinstance(res, optimize.OptimizeResult): raise ValueError('Argument \'res\' should be an instance of \'scipy.optimize.OptimizeResult\'') simultaneous = kwargs.get('simultaneous', True) mode = kwargs.get('mode', 'observation') confidence = kwargs.get('confidence', 0.95) p = len(res.x) # Needs to estimate the jacobian at the predictor point!!! # From MATLAB toolbox/stats/stats/nlpredci ypred = func(x, res.x) if callable(res.jac): delta = res.jac(x) else: delta = np.zeros((len(ypred), p)) fdiffstep = np.spacing(np.abs(res.x)) ** (1 / 3) # print('diff_step = {0}'.format(fdiffstep)) # print('popt = {0}'.format(res.x)) for i in range(p): change = np.zeros(p) if res.x[i] == 0: nb = np.sqrt(LA.norm(res.x)) change[i] = fdiffstep[i] * (nb + (nb == 0)) else: change[i] = fdiffstep[i] * res.x[i] predplus = func(x, res.x + change) delta[:, i] = (predplus - ypred) / change[i] # print('delta = {0}'.format(delta)) # Find R to get the variance _, R = LA.qr(res.jac) # Get the rank of jac rankJ = res.jac.shape[1] Rinv = LA.pinv(R) pinvJTJ = np.dot(Rinv, Rinv.T) # The residual resid = res.fun n = len(resid) # Get MSE. The degrees of freedom when J is full rank is v = n-p and n-rank(J) otherwise mse = (LA.norm(resid)) ** 2 / (n - rankJ) # Calculate Sigma if usingJ Sigma = mse * pinvJTJ # Compute varpred varpred = np.sum(np.dot(delta, Sigma) * delta, axis=1) # print('varpred = {0}, len: '.format(varpred,len(varpred))) alpha = 1.0 - confidence if mode == 'observation': # Assume a constant variance model if errorModelInfo and weights are # not supplied. errorVar = mse * np.ones(delta.shape[0]) # print('errorVar = {0}, len: '.format(errorVar,len(errorVar))) varpred += errorVar # The significance if simultaneous: from scipy.stats.distributions import f sch = [rankJ + 1] crit = f.ppf(1.0 - alpha, sch, n - rankJ) else: from scipy.stats.distributions import t crit = t.ppf(1.0 - alpha / 2.0, n - rankJ) delta = np.sqrt(varpred) * crit lpb = ypred - delta upb = ypred + delta return ypred, lpb, upb
def lsq_linear(A, b, bounds=(-np.inf, np.inf), method='trf', tol=1e-10, lsq_solver=None, lsmr_tol=None, max_iter=None, verbose=0): r"""Solve a linear least-squares problem with bounds on the variables. Given a m-by-n design matrix A and a target vector b with m elements, `lsq_linear` solves the following optimization problem:: minimize 0.5 * ||A x - b||**2 subject to lb <= x <= ub This optimization problem is convex, hence a found minimum (if iterations have converged) is guaranteed to be global. Parameters ---------- A : array_like, sparse matrix of LinearOperator, shape (m, n) Design matrix. Can be `scipy.sparse.linalg.LinearOperator`. b : array_like, shape (m,) Target vector. bounds : 2-tuple of array_like, optional Lower and upper bounds on independent variables. Defaults to no bounds. Each array must have shape (n,) or be a scalar, in the latter case a bound will be the same for all variables. Use ``np.inf`` with an appropriate sign to disable bounds on all or some variables. method : 'trf' or 'bvls', optional Method to perform minimization. * 'trf' : Trust Region Reflective algorithm adapted for a linear least-squares problem. This is an interior-point-like method and the required number of iterations is weakly correlated with the number of variables. * 'bvls' : Bounded-Variable Least-Squares algorithm. This is an active set method, which requires the number of iterations comparable to the number of variables. Can't be used when `A` is sparse or LinearOperator. Default is 'trf'. tol : float, optional Tolerance parameter. The algorithm terminates if a relative change of the cost function is less than `tol` on the last iteration. Additionally the first-order optimality measure is considered: * ``method='trf'`` terminates if the uniform norm of the gradient, scaled to account for the presence of the bounds, is less than `tol`. * ``method='bvls'`` terminates if Karush-Kuhn-Tucker conditions are satisfied within `tol` tolerance. lsq_solver : {None, 'exact', 'lsmr'}, optional Method of solving unbounded least-squares problems throughout iterations: * 'exact' : Use dense QR or SVD decomposition approach. Can't be used when `A` is sparse or LinearOperator. * 'lsmr' : Use `scipy.sparse.linalg.lsmr` iterative procedure which requires only matrix-vector product evaluations. Can't be used with ``method='bvls'``. If None (default) the solver is chosen based on type of `A`. lsmr_tol : None, float or 'auto', optional Tolerance parameters 'atol' and 'btol' for `scipy.sparse.linalg.lsmr` If None (default), it is set to ``1e-2 * tol``. If 'auto', the tolerance will be adjusted based on the optimality of the current iterate, which can speed up the optimization process, but is not always reliable. max_iter : None or int, optional Maximum number of iterations before termination. If None (default), it is set to 100 for ``method='trf'`` or to the number of variables for ``method='bvls'`` (not counting iterations for 'bvls' initialization). verbose : {0, 1, 2}, optional Level of algorithm's verbosity: * 0 : work silently (default). * 1 : display a termination report. * 2 : display progress during iterations. Returns ------- OptimizeResult with the following fields defined: x : ndarray, shape (n,) Solution found. cost : float Value of the cost function at the solution. fun : ndarray, shape (m,) Vector of residuals at the solution. optimality : float First-order optimality measure. The exact meaning depends on `method`, refer to the description of `tol` parameter. active_mask : ndarray of int, shape (n,) Each component shows whether a corresponding constraint is active (that is, whether a variable is at the bound): * 0 : a constraint is not active. * -1 : a lower bound is active. * 1 : an upper bound is active. Might be somewhat arbitrary for the `trf` method as it generates a sequence of strictly feasible iterates and active_mask is determined within a tolerance threshold. nit : int Number of iterations. Zero if the unconstrained solution is optimal. status : int Reason for algorithm termination: * -1 : the algorithm was not able to make progress on the last iteration. * 0 : the maximum number of iterations is exceeded. * 1 : the first-order optimality measure is less than `tol`. * 2 : the relative change of the cost function is less than `tol`. * 3 : the unconstrained solution is optimal. message : str Verbal description of the termination reason. success : bool True if one of the convergence criteria is satisfied (`status` > 0). See Also -------- nnls : Linear least squares with non-negativity constraint. least_squares : Nonlinear least squares with bounds on the variables. Notes ----- The algorithm first computes the unconstrained least-squares solution by `numpy.linalg.lstsq` or `scipy.sparse.linalg.lsmr` depending on `lsq_solver`. This solution is returned as optimal if it lies within the bounds. Method 'trf' runs the adaptation of the algorithm described in [STIR]_ for a linear least-squares problem. The iterations are essentially the same as in the nonlinear least-squares algorithm, but as the quadratic function model is always accurate, we don't need to track or modify the radius of a trust region. The line search (backtracking) is used as a safety net when a selected step does not decrease the cost function. Read more detailed description of the algorithm in `scipy.optimize.least_squares`. Method 'bvls' runs a Python implementation of the algorithm described in [BVLS]_. The algorithm maintains active and free sets of variables, on each iteration chooses a new variable to move from the active set to the free set and then solves the unconstrained least-squares problem on free variables. This algorithm is guaranteed to give an accurate solution eventually, but may require up to n iterations for a problem with n variables. Additionally, an ad-hoc initialization procedure is implemented, that determines which variables to set free or active initially. It takes some number of iterations before actual BVLS starts, but can significantly reduce the number of further iterations. References ---------- .. [STIR] M. A. Branch, T. F. Coleman, and Y. Li, "A Subspace, Interior, and Conjugate Gradient Method for Large-Scale Bound-Constrained Minimization Problems," SIAM Journal on Scientific Computing, Vol. 21, Number 1, pp 1-23, 1999. .. [BVLS] P. B. Start and R. L. Parker, "Bounded-Variable Least-Squares: an Algorithm and Applications", Computational Statistics, 10, 129-141, 1995. Examples -------- In this example a problem with a large sparse matrix and bounds on the variables is solved. >>> from scipy.sparse import rand >>> from scipy.optimize import lsq_linear ... >>> np.random.seed(0) ... >>> m = 20000 >>> n = 10000 ... >>> A = rand(m, n, density=1e-4) >>> b = np.random.randn(m) ... >>> lb = np.random.randn(n) >>> ub = lb + 1 ... >>> res = lsq_linear(A, b, bounds=(lb, ub), lsmr_tol='auto', verbose=1) # may vary The relative change of the cost function is less than `tol`. Number of iterations 16, initial cost 1.5039e+04, final cost 1.1112e+04, first-order optimality 4.66e-08. """ if method not in ['trf', 'bvls']: raise ValueError("`method` must be 'trf' or 'bvls'") if lsq_solver not in [None, 'exact', 'lsmr']: raise ValueError("`solver` must be None, 'exact' or 'lsmr'.") if verbose not in [0, 1, 2]: raise ValueError("`verbose` must be in [0, 1, 2].") if issparse(A): A = csr_matrix(A) elif not isinstance(A, LinearOperator): A = np.atleast_2d(A) if method == 'bvls': if lsq_solver == 'lsmr': raise ValueError("method='bvls' can't be used with " "lsq_solver='lsmr'") if not isinstance(A, np.ndarray): raise ValueError("method='bvls' can't be used with `A` being " "sparse or LinearOperator.") if lsq_solver is None: if isinstance(A, np.ndarray): lsq_solver = 'exact' else: lsq_solver = 'lsmr' elif lsq_solver == 'exact' and not isinstance(A, np.ndarray): raise ValueError("`exact` solver can't be used when `A` is " "sparse or LinearOperator.") if len(A.shape) != 2: # No ndim for LinearOperator. raise ValueError("`A` must have at most 2 dimensions.") if len(bounds) != 2: raise ValueError("`bounds` must contain 2 elements.") if max_iter is not None and max_iter <= 0: raise ValueError("`max_iter` must be None or positive integer.") m, n = A.shape b = np.atleast_1d(b) if b.ndim != 1: raise ValueError("`b` must have at most 1 dimension.") if b.size != m: raise ValueError("Inconsistent shapes between `A` and `b`.") lb, ub = prepare_bounds(bounds, n) if lb.shape != (n,) and ub.shape != (n,): raise ValueError("Bounds have wrong shape.") if np.any(lb >= ub): raise ValueError("Each lower bound must be strictly less than each " "upper bound.") if lsq_solver == 'exact': x_lsq = np.linalg.lstsq(A, b)[0] elif lsq_solver == 'lsmr': x_lsq = lsmr(A, b, atol=tol, btol=tol)[0] if in_bounds(x_lsq, lb, ub): r = A.dot(x_lsq) - b cost = 0.5 * np.dot(r, r) termination_status = 3 termination_message = TERMINATION_MESSAGES[termination_status] g = compute_grad(A, r) g_norm = norm(g, ord=np.inf) if verbose > 0: print(termination_message) print("Final cost {0:.4e}, first-order optimality {1:.2e}" .format(cost, g_norm)) return OptimizeResult( x=x_lsq, fun=r, cost=cost, optimality=g_norm, active_mask=np.zeros(n), nit=0, status=termination_status, message=termination_message, success=True) if method == 'trf': res = trf_linear(A, b, x_lsq, lb, ub, tol, lsq_solver, lsmr_tol, max_iter, verbose) elif method == 'bvls': res = bvls(A, b, x_lsq, lb, ub, tol, max_iter, verbose) res.message = TERMINATION_MESSAGES[res.status] res.success = res.status > 0 if verbose > 0: print(res.message) print("Number of iterations {0}, initial cost {1:.4e}, " "final cost {2:.4e}, first-order optimality {3:.2e}." .format(res.nit, res.initial_cost, res.cost, res.optimality)) del res.initial_cost return res
def solve(self): """ Runs the DifferentialEvolutionSolver. Returns ------- res : OptimizeResult The optimization result represented as a ``OptimizeResult`` object. Important attributes are: ``x`` the solution array, ``success`` a Boolean flag indicating if the optimizer exited successfully and ``message`` which describes the cause of the termination. See `OptimizeResult` for a description of other attributes. If polish was employed, then OptimizeResult also contains the ``hess_inv`` and ``jac`` attributes. """ nfev, nit, warning_flag = 0, 0, False status_message = _status_message['success'] # calculate energies to start with for index, candidate in enumerate(self.population): parameters = self._scale_parameters(candidate) self.population_energies[index] = self.func(parameters, *self.args) nfev += 1 if nfev > self.maxfun: warning_flag = True status_message = _status_message['maxfev'] break minval = np.argmin(self.population_energies) # put the lowest energy into the best solution position. lowest_energy = self.population_energies[minval] self.population_energies[minval] = self.population_energies[0] self.population_energies[0] = lowest_energy self.population[[0, minval], :] = self.population[[minval, 0], :] if warning_flag: return OptimizeResult( x=self.x, fun=self.population_energies[0], nfev=nfev, nit=nit, message=status_message, success=(warning_flag != True)) # do the optimisation. for nit in range(1, self.maxiter + 1): if self.dither is not None: self.scale = self.random_number_generator.rand( ) * (self.dither[1] - self.dither[0]) + self.dither[0] for candidate in range(np.size(self.population, 0)): if nfev > self.maxfun: warning_flag = True status_message = _status_message['maxfev'] break trial = self._mutate(candidate) self._ensure_constraint(trial) parameters = self._scale_parameters(trial) energy = self.func(parameters, *self.args) nfev += 1 if energy < self.population_energies[candidate]: self.population[candidate] = trial self.population_energies[candidate] = energy if energy < self.population_energies[0]: self.population_energies[0] = energy self.population[0] = trial # stop when the fractional s.d. of the population is less than tol # of the mean energy convergence = (np.std(self.population_energies) / np.abs(np.mean(self.population_energies) + _MACHEPS)) if self.disp: print("differential_evolution step %d: f(x)= %g" % (nit, self.population_energies[0])) if (self.callback and self.callback(self._scale_parameters(self.population[0]), convergence=self.tol / convergence) is True): warning_flag = True status_message = ('callback function requested stop early ' 'by returning True') break if convergence < self.tol or warning_flag: break else: status_message = _status_message['maxiter'] warning_flag = True DE_result = OptimizeResult( x=self.x, fun=self.population_energies[0], nfev=nfev, nit=nit, message=status_message, success=(warning_flag != True)) if self.polish: result = minimize(self.func, np.copy(DE_result.x), method='L-BFGS-B', bounds=self.limits.T, args=self.args) nfev += result.nfev DE_result.nfev = nfev if result.fun < DE_result.fun: DE_result.fun = result.fun DE_result.x = result.x DE_result.jac = result.jac # to keep internal state consistent self.population_energies[0] = result.fun self.population[0] = self._unscale_parameters(result.x) return DE_result
def _minimize_qpso(fun, x0, confunc=None, g=.96, max_iter=1000, stable_iter=40, ptol=1e-6, ctol=1e-6, levy_rate=0, decay_rate=0, reduction_rate=0.5, callback=None, verbose=False, savefile=None): """Internal implementation for ``psopy.minimize_qpso``. See Also -------- psopy.minimize_qpso : The SciPy compatible interface to this function. Refer to its documentation for an explanation of the parameters. psopy.gen_confunc : Utility function to convert SciPy style constraints to the form required by this function. Parameters ---------- x0 : array_like of shape (N, D) Initial position to begin QPSO from, where ``N`` is the number of points and ``D`` the dimensionality of each point. For the constrained case these points should satisfy all constraints. fun : callable The objective function to be minimized. Must be in the form ``fun(pos, *args)``. The argument ``pos``, is a 2-D array for initial positions, where each row specifies the position of a different particle, and ``args`` is a tuple of any additional fixed parameters needed to completely specify the function. confunc : callable The function that describes constraints. Must be of the form ``confunc(pos)`` that returns the constraint matrix. levy_rate: float Whether to run the levy decay qpso or not. > 0 value turns on levy walk decay_rate: float Whether to turn on the decay function or not. > 0 value turns on the decay rate Notes ----- Chaotic Quantum PSO Using this function directly allows for a slightly faster implementation that does away with the need for the additional recursive calls needed to wrap the constraint and objective functions for compatibility with Scipy. """ if verbose: message = setup_print(x0.shape[1], max_iter, confunc is not None) if savefile: iterinfo = [] position = np.copy(x0) nparam = len(position) pbest = np.copy(position) gbest = pbest[np.argmin(fun(pbest))] oldfit = fun(gbest[None])[0] stable_count = 0 dimension = len(position[0]) #simple levy walk. Make a decay function which will push particles around using stable_iter,max_iter is reached, pushing them away from pbest beta = 3 / 2 sigma = (gamma(1 + beta) * sin(pi * beta / 2) / (gamma( (1 + beta) / 2) * beta * 2**((beta - 1) / 2)))**(1 / beta) decay = 1 stepsize = 1.0 for ii in range(max_iter): mbest = np.sum(pbest, axis=0) / pbest.shape[0] u = np.random.normal(0, 1, size=dimension) * sigma v = np.random.normal(0, 1, size=dimension) step = u / abs(v)**(1 / beta) psi_1 = uniform(0, 1) psi_2 = uniform(0, 1) dv_g = psi_1 * gbest if confunc is not None: leaders = np.argmin(distance.cdist(position, pbest, 'sqeuclidean'), axis=1) dv_l = psi_2 * pbest[leaders] else: dv_l = psi_2 * pbest P = (dv_g + dv_l) / (psi_1 + psi_2) u = uniform(0, 1, nparam) stepsize = 1.0 for i in range(0, nparam): if levy_rate > 0: stepsize = 0.01 * step * (1 / (0.0000001 + position[i] - gbest[i])) if decay_rate > 0: decay = stepsize * 5 * (0.001)**(ii / (max_iter * 0.05)) + 1 if uniform(0, 1) > 0.5: position[i] = P[i] - mbest * np.log(1 / u[i]) * decay else: position[i] = P[i] + mbest * np.log(1 / u[i]) * decay to_update = (fun(position) < fun(pbest)) if confunc is not None: to_update &= (confunc(position).sum(axis=1) < ctol) if to_update.any(): pbest[to_update] = position[to_update] gbest = pbest[np.argmin(fun(pbest))] # Termination criteria. fval = fun(gbest[None])[0] if np.abs(oldfit - fval) < ptol: stable_count += 1 if stable_count == stable_iter: break else: stable_count = 0 oldfit = fval if verbose or savefile: info = [ii, gbest, fval] if confunc is not None: cv = np.max(confunc(gbest[None])) info.append(cv) if verbose: print(message.format(*info)) if savefile: iterinfo.append(info) # Final callback. if callback is not None: position = callback(position) if savefile: save_info(savefile, iterinfo, constraints=confunc is not None) result = OptimizeResult(x=gbest, fun=fun(gbest[None])[0], nit=ii, nsit=stable_count) violation = False if confunc is not None: convec = confunc(gbest[None]) result.maxcv = np.max(convec) result.cvec = convec if convec.sum() > ctol: violation = True if violation: result.status = 2 elif ii == max_iter: result.status = 1 else: result.status = 0 result.success = not result.status return result
def minimize(fun, bounds=None, x0=None, input_sigma=0.3, popsize=None, max_evaluations=100000, stop_fitness=None, pbest=0.7, f0=0.0, cr0=0.0, rg=Generator(MT19937()), runid=0, workers=None): """Minimization of a scalar function of one or more variables using a C++ LCL Differential Evolution implementation called via ctypes. Parameters ---------- fun : callable The objective function to be minimized. ``fun(x, *args) -> float`` where ``x`` is an 1-D array with shape (dim,) and ``args`` is a tuple of the fixed parameters needed to completely specify the function. bounds : sequence or `Bounds` Bounds on variables. There are two ways to specify the bounds: 1. Instance of the `scipy.Bounds` class. 2. Sequence of ``(min, max)`` pairs for each element in `x`. x0 : ndarray, shape (dim,) Initial guess. Array of real elements of size (dim,), where 'dim' is the number of independent variables. input_sigma : ndarray, shape (dim,) or scalar Initial step size for each dimension. popsize : int, optional Population size. max_evaluations : int, optional Forced termination after ``max_evaluations`` function evaluations. stop_fitness : float, optional Limit for fitness value. If reached minimize terminates. pbest = float, optional use low value 0 < pbest <= 1 to narrow search. f0 = float, optional The initial mutation constant. In the literature this is also known as differential weight, being denoted by F. Should be in the range [0, 2]. cr0 = float, optional The initial recombination constant. Should be in the range [0, 1]. In the literature this is also known as the crossover probability. rg = numpy.random.Generator, optional Random generator for creating random guesses. runid : int, optional id used to identify the run for debugging / logging. workers : int or None, optional If not workers is None, function evaluation is performed in parallel for the whole population. Useful for costly objective functions but is deactivated for parallel retry. Returns ------- res : scipy.OptimizeResult The optimization result is represented as an ``OptimizeResult`` object. Important attributes are: ``x`` the solution array, ``fun`` the best function value, ``nfev`` the number of function evaluations, ``nit`` the number of iterations, ``success`` a Boolean flag indicating if the optimizer exited successfully. """ lower, upper, guess = _check_bounds(bounds, x0, rg) dim = guess.size if popsize is None: popsize = int(dim * 8.5 + 150) if lower is None: lower = [0] * dim upper = [0] * dim if callable(input_sigma): input_sigma = input_sigma() if np.ndim(input_sigma) == 0: input_sigma = [input_sigma] * dim if stop_fitness is None: stop_fitness = math.inf parfun = None if workers is None else parallel(fun, workers) array_type = ct.c_double * dim c_callback_par = call_back_par(callback_par(fun, parfun)) seed = int(rg.uniform(0, 2**32 - 1)) res = np.empty(dim + 4) res_p = res.ctypes.data_as(ct.POINTER(ct.c_double)) try: optimizeLCLDE_C(runid, c_callback_par, dim, array_type(*guess), array_type(*input_sigma), seed, array_type(*lower), array_type(*upper), max_evaluations, pbest, stop_fitness, popsize, f0, cr0, res_p) x = res[:dim] val = res[dim] evals = int(res[dim + 1]) iterations = int(res[dim + 2]) stop = int(res[dim + 3]) if not parfun is None: parfun.stop() # stop all parallel evaluation processes return OptimizeResult(x=x, fun=val, nfev=evals, nit=iterations, status=stop, success=True) except Exception as ex: if not workers is None: fun.stop() # stop all parallel evaluation processes return OptimizeResult(x=None, fun=sys.float_info.max, nfev=0, nit=0, status=-1, success=False)
def solve(self): """ Runs the DifferentialEvolutionSolver. Returns ------- res : OptimizeResult The optimization result represented as a ``OptimizeResult`` object. Important attributes are: ``x`` the solution array, ``success`` a Boolean flag indicating if the optimizer exited successfully and ``message`` which describes the cause of the termination. See `OptimizeResult` for a description of other attributes. If `polish` was employed, and a lower minimum was obtained by the polishing, then OptimizeResult also contains the ``jac`` attribute. """ nit, warning_flag = 0, False status_message = _status_message['success'] # The population may have just been initialized (all entries are # np.inf). If it has you have to calculate the initial energies. # Although this is also done in the evolve generator it's possible # that someone can set maxiter=0, at which point we still want the # initial energies to be calculated (the following loop isn't run). if np.all(np.isinf(self.population_energies)): self.population_energies[:] = self._calculate_population_energies( self.population) self._promote_lowest_energy() # do the optimisation. for nit in xrange(1, self.maxiter + 1): # evolve the population by a generation try: next(self) except StopIteration: warning_flag = True if self._nfev > self.maxfun: status_message = _status_message['maxfev'] elif self._nfev == self.maxfun: status_message = ('Maximum number of function evaluations' ' has been reached.') break if self.disp: print("differential_evolution step %d: f(x)= %g" % (nit, self.population_energies[0])) # should the solver terminate? convergence = self.convergence if (self.callback and self.callback(self._scale_parameters(self.population[0]), convergence=self.tol / convergence) is True): warning_flag = True status_message = ('callback function requested stop early ' 'by returning True') break if np.any(np.isinf(self.population_energies)): intol = False else: intol = (np.std(self.population_energies) <= self.atol + self.tol * np.abs(np.mean(self.population_energies))) if warning_flag or intol: break else: status_message = _status_message['maxiter'] warning_flag = True DE_result = OptimizeResult( x=self.x, fun=self.population_energies[0], nfev=self._nfev, nit=nit, message=status_message, success=(warning_flag is not True)) if self.polish: result = minimize(self.func, np.copy(DE_result.x), method='L-BFGS-B', bounds=self.limits.T) self._nfev += result.nfev DE_result.nfev = self._nfev if result.fun < DE_result.fun: DE_result.fun = result.fun DE_result.x = result.x DE_result.jac = result.jac # to keep internal state consistent self.population_energies[0] = result.fun self.population[0] = self._unscale_parameters(result.x) return DE_result
def _fit_no_arch_normal_errors(self, cov_type='robust'): """ Estimates model parameters Parameters ---------- cov_type : str, optional Covariance estimator to use when estimating parameter variances and covariances. One of 'hetero' or 'heteroskedastic' for Whites's covariance estimator, or 'mle' for the classic OLS estimator appropriate for homoskedastic data. 'hetero' is the the default. Returns ------- result : ARCHModelResult Results class containing parameter estimates, estimated parameter covariance and related estimates Notes ----- See :class:`ARCHModelResult` for details on computed results """ nobs = self._fit_y.shape[0] if nobs < self.num_params: raise ValueError('Insufficient data, ' + str(self.num_params) + ' regressors, ' + str(nobs) + ' data points available') x = self._fit_regressors y = self._fit_y # Fake convergence results, see GH #87 opt = OptimizeResult({'status': 0, 'message': ''}) if x.shape[1] > 0: regression_params = np.linalg.pinv(x).dot(y) xpxi = np.linalg.inv(x.T.dot(x) / nobs) fitted = x.dot(regression_params) else: regression_params = np.empty(0) xpxi = np.empty((0, 0)) fitted = 0.0 e = y - fitted sigma2 = e.T.dot(e) / nobs params = np.hstack((regression_params, sigma2)) hessian = np.zeros((self.num_params + 1, self.num_params + 1)) hessian[:self.num_params, :self.num_params] = -xpxi hessian[-1, -1] = -1 if cov_type in ('mle', ): param_cov = sigma2 * -hessian param_cov[self.num_params, self.num_params] = 2 * sigma2**2.0 param_cov /= nobs cov_type = COV_TYPES['classic_ols'] elif cov_type in ('robust', ): scores = np.zeros((nobs, self.num_params + 1)) scores[:, :self.num_params] = x * e[:, None] scores[:, -1] = e**2.0 - sigma2 score_cov = scores.T.dot(scores) / nobs param_cov = hessian.dot(score_cov).dot(hessian) / nobs cov_type = COV_TYPES['white'] else: raise ValueError('Unknown cov_type') r2 = self._r2(regression_params) first_obs, last_obs = self._fit_indices resids = np.empty_like(self._y, dtype=np.float64) resids.fill(np.nan) resids[first_obs:last_obs] = e vol = np.zeros_like(resids) vol.fill(np.nan) vol[first_obs:last_obs] = np.sqrt(sigma2) names = self._all_parameter_names() loglikelihood = self._static_gaussian_loglikelihood(e) # Throw away names in the case of starting values num_params = params.shape[0] if len(names) != num_params: names = ['p' + str(i) for i in range(num_params)] fit_start, fit_stop = self._fit_indices return ARCHModelResult(params, param_cov, r2, resids, vol, cov_type, self._y_series, names, loglikelihood, self._is_pandas, opt, fit_start, fit_stop, copy.deepcopy(self))