def fminLooped(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf, epsilon=numpy.sqrt(numpy.finfo(float).eps), maxiter=None, full_output=0, disp=1, retall=0, callback=None): testVar = 0 x0 = asarray(x0).squeeze() if x0.ndim == 0: x0.shape = (1, ) if maxiter is None: maxiter = len(x0) * 200 func_calls, f = wrap_function(f, args) if fprime is None: grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon)) else: grad_calls, myfprime = wrap_function(fprime, args) gfk = myfprime(x0) k = 0 N = len(x0) I = numpy.eye(N, dtype=int) Hk = I old_fval = f(x0) old_old_fval = old_fval + 5000 xk = x0 if retall: allvecs = [x0] sk = [2 * gtol] warnflag = 0 gnorm = vecnorm(gfk, ord=norm) newInputParams = locals() pickleBles, NonPickles = ParamsManager.filterUnpickles(newInputParams) # import dill # pickle.dump(pickleBles, open('inputParams.dat', 'w')) # pickle.dump(loopThing, open('loopFunc.dat', 'w')) for loopI in range(300): newInputParams = loopThing(newInputParams)
def fminLooped(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf, epsilon= numpy.sqrt(numpy.finfo(float).eps), maxiter=None, full_output=0, disp=1, retall=0, callback=None): testVar = 0 x0 = asarray(x0).squeeze() if x0.ndim == 0: x0.shape = (1,) if maxiter is None: maxiter = len(x0)*200 func_calls, f = wrap_function(f, args) if fprime is None: grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon)) else: grad_calls, myfprime = wrap_function(fprime, args) gfk = myfprime(x0) k = 0 N = len(x0) I = numpy.eye(N,dtype=int) Hk = I old_fval = f(x0) old_old_fval = old_fval + 5000 xk = x0 if retall: allvecs = [x0] sk = [2*gtol] warnflag = 0 gnorm = vecnorm(gfk,ord=norm) newInputParams = locals() pickleBles, NonPickles = ParamsManager.filterUnpickles(newInputParams) import dill pickle.dump(pickleBles, open('inputParams.dat', 'w')) pickle.dump(loopThing, open('loopFunc.dat', 'w')) for loopI in range(10): newInputParams = loopThing(newInputParams)
def _minimize_neldermead(func, x0, args=(), callback=None, xtol=1e-4, ftol=1e-4, maxiter=None, maxfev=None, disp=False, return_all=False, **unknown_options): """ Minimization of scalar function of one or more variables using the Nelder-Mead algorithm. Options ------- disp : bool Set to True to print convergence messages. xtol : float Relative error in solution `xopt` acceptable for convergence. ftol : float Relative error in ``fun(xopt)`` acceptable for convergence. maxiter : int Maximum number of iterations to perform. maxfev : int Maximum number of function evaluations to make. """ # _check_unknown_options(unknown_options) maxfun = maxfev retall = return_all fcalls, func = wrap_function(func, args) x0 = asfarray(x0).flatten() N = len(x0) if maxiter is None: maxiter = N * 200 if maxfun is None: maxfun = N * 200 rho = 1 chi = 2 psi = 0.5 sigma = 0.5 one2np1 = list(range(1, N + 1)) sim = numpy.zeros((N + 1, N), dtype=x0.dtype) fsim = numpy.zeros((N + 1, ), float) sim[0] = x0 if retall: allvecs = [sim[0]] fsim[0] = func(x0) nonzdelt = 0.05 zdelt = 0.00025 for k in range(0, N): y = numpy.array(x0, copy=True) if y[k] != 0: y[k] = (1 + nonzdelt) * y[k] else: y[k] = zdelt sim[k + 1] = y f = func(y) fsim[k + 1] = f ind = numpy.argsort(fsim) fsim = numpy.take(fsim, ind, 0) # sort so sim[0,:] has the lowest function value sim = numpy.take(sim, ind, 0) iterations = 1 while (fcalls[0] < maxfun and iterations < maxiter): if (numpy.max(numpy.ravel(numpy.abs(sim[1:] - sim[0]))) <= xtol and numpy.max(numpy.abs(fsim[0] - fsim[1:])) <= ftol): break xbar = numpy.add.reduce(sim[:-1], 0) / N xr = (1 + rho) * xbar - rho * sim[-1] fxr = func(xr) doshrink = 0 if fxr < fsim[0]: xe = (1 + rho * chi) * xbar - rho * chi * sim[-1] fxe = func(xe) if fxe < fxr: sim[-1] = xe fsim[-1] = fxe else: sim[-1] = xr fsim[-1] = fxr else: # fsim[0] <= fxr if fxr < fsim[-2]: sim[-1] = xr fsim[-1] = fxr else: # fxr >= fsim[-2] # Perform contraction if fxr < fsim[-1]: xc = (1 + psi * rho) * xbar - psi * rho * sim[-1] fxc = func(xc) if fxc <= fxr: sim[-1] = xc fsim[-1] = fxc else: doshrink = 1 else: # Perform an inside contraction xcc = (1 - psi) * xbar + psi * sim[-1] fxcc = func(xcc) if fxcc < fsim[-1]: sim[-1] = xcc fsim[-1] = fxcc else: doshrink = 1 if doshrink: for j in one2np1: sim[j] = sim[0] + sigma * (sim[j] - sim[0]) fsim[j] = func(sim[j]) ind = numpy.argsort(fsim) sim = numpy.take(sim, ind, 0) fsim = numpy.take(fsim, ind, 0) if callback is not None: callback(sim[0]) iterations += 1 if retall: allvecs.append(sim[0]) x = sim[0] fval = numpy.min(fsim) warnflag = 0 if fcalls[0] >= maxfun: warnflag = 1 msg = _status_message['maxfev'] if disp: print('Warning: ' + msg) elif iterations >= maxiter: warnflag = 2 msg = _status_message['maxiter'] if disp: print('Warning: ' + msg) else: msg = _status_message['success'] if disp: print(msg) print(" Current function value: %f" % fval) print(" Iterations: %d" % iterations) print(" Function evaluations: %d" % fcalls[0]) result = OptimizeResult(fun=fval, nit=iterations, nfev=fcalls[0], status=warnflag, success=(warnflag == 0), message=msg, x=x, final_simplex=(sim, fsim)) if retall: result['allvecs'] = allvecs return result
def minimize_bfgs(func: Callable, init_param_vec: Sequence, grad: Callable = None, grad_tol: float = 1e-5, return_all: bool = True, last_record: 'OptimizeRecord' = None, notes: dict = None): if notes is None: notes = {} notes["grad_tol"] = grad_tol notes["method"] = "BFGS" f = func fprime = grad epsilon = np.finfo(float).eps**0.5 gtol = grad_tol norm = np.Inf x0 = init_param_vec if x0.ndim == 0: x0.shape = (1, ) maxiter = len(x0) * 200 func_calls, f = wrap_function(f, ()) if fprime is None: grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon)) notes["grad_approx"] = True else: grad_calls, myfprime = wrap_function(fprime, ()) notes["grad_approx"] = False k = 0 N = len(x0) I = np.eye(N, dtype=int) if last_record: old_fval = last_record.final_func gfk = last_record.final_grad old_old_fval = last_record.last_vars["old_old_fval"] Hk = last_record.last_vars["Hk"] else: # Sets the initial step guess to dx ~ 1 old_fval = f(x0) gfk = myfprime(x0) old_old_fval = old_fval + np.linalg.norm(gfk) / 2 Hk = I all_param_vec = [x0] all_func = [old_fval] all_grad = [gfk] xk = x0 warnflag = 0 gnorm = vecnorm(gfk, ord=norm) while (gnorm > gtol) and (k < maxiter): pk = -np.dot(Hk, gfk) try: alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ _line_search_wolfe12(f, myfprime, xk, pk, gfk, old_fval, old_old_fval, amin=1e-100, amax=1e100) except _LineSearchError: # Line search failed to find a better solution. warnflag = 2 break xkp1 = xk + alpha_k * pk sk = xkp1 - xk xk = xkp1 if gfkp1 is None: gfkp1 = myfprime(xkp1) yk = gfkp1 - gfk gfk = gfkp1 k += 1 if return_all: all_param_vec.append(xk) all_func.append(old_fval) all_grad.append(gfk) gnorm = vecnorm(gfk, ord=norm) if (gnorm <= gtol): break if not np.isfinite(old_fval): # We correctly found +-Inf as optimal value, or something went # wrong. warnflag = 2 break try: # this was handled in numeric, let it remaines for more safety rhok = 1.0 / (np.dot(yk, sk)) except ZeroDivisionError: rhok = 1000.0 print("Divide-by-zero encountered: rhok assumed large") if np.isinf(rhok): # this is patch for numpy rhok = 1000.0 print("Divide-by-zero encountered: rhok assumed large") A1 = I - sk[:, np.newaxis] * yk[np.newaxis, :] * rhok A2 = I - yk[:, np.newaxis] * sk[np.newaxis, :] * rhok Hk = np.dot(A1, np.dot( Hk, A2)) + (rhok * sk[:, np.newaxis] * sk[np.newaxis, :]) fval = old_fval if np.isnan(fval): # This can happen if the first call to f returned NaN; # the loop is then never entered. warnflag = 2 if warnflag == 2: msg = _status_message['pr_loss'] elif k >= maxiter: warnflag = 1 msg = _status_message['maxiter'] else: msg = _status_message['success'] history = {"func": all_func, "grad": all_grad, "param_vec": all_param_vec} final_status = { "msg": msg, "warnflag": warnflag, "num_func_call": func_calls[0], "num_grad_call": grad_calls[0], "num_iter": k } last_vars = {"Hk": Hk, "old_old_fval": old_old_fval} record = OptimizeRecord(history, final_status, last_vars, notes) return record
def model_policy_gradient( f: Callable[..., float], x0: np.ndarray, *, args=(), learning_rate: float = 1e-2, decay_rate: float = 0.96, decay_steps: int = 5, log_sigma_init: float = -5.0, max_iterations: int = 1000, batch_size: int = 10, radius_coeff: float = 3.0, warmup_steps: int = 10, batch_size_model: int = 65536, save_func_vals: bool = False, random_state: "cirq.RANDOM_STATE_OR_SEED_LIKE" = None, known_values: Optional[Tuple[List[np.ndarray], List[float]]] = None, max_evaluations: Optional[int] = None ) -> scipy.optimize.OptimizeResult: """Model policy gradient algorithm for black-box optimization. The idea of this algorithm is to perform policy gradient, but estimate the function values using a surrogate model. The surrogate model is a least-squared quadratic fit to points sampled from the vicinity of the current iterate. Args: f: The function to minimize. x0: An initial guess. args: Additional arguments to pass to the function. learning_rate: The learning rate for the policy gradient. decay_rate: the learning decay rate for the Adam optimizer. decay_steps: the learning decay steps for the Adam optimizer. log_sigma_init: the intial value for the sigma of the policy in the log scale. max_iterations: The maximum number of iterations to allow before termination. batch_size: The number of points to sample in each iteration. The cost of evaluation of these samples are computed through the quantum computer cost model. radius_coeff: The ratio determining the size of the radius around the current iterate to sample points from to build the quadratic model. The ratio is with respect to the maximal ratio of the samples from the current policy. warmup_steps: The number of steps before the model policy gradient is performed. before these steps, we use the policy gradient without the model. batch_size_model: The model sample batch size. After we fit the quadratic model, we use the model to evaluate on big enough batch of samples. save_func_vals: whether to compute and save the function values for the current value of parameter. random_state: A seed (int) or `np.random.RandomState` class to use when generating random values. If not set, defaults to using the module methods in `np.random`. known_values: Any prior known values of the objective function. This is given as a tuple where the first element is a list of points and the second element is a list of the function values at those points. max_evaluations: The maximum number of function evaluations to allow before termination. Returns: Scipy OptimizeResult """ random_state = value.parse_random_state(random_state) if known_values is not None: known_xs, known_ys = known_values known_xs = [np.copy(x) for x in known_xs] known_ys = [np.copy(y) for y in known_ys] else: known_xs, known_ys = [], [] if max_evaluations is None: max_evaluations = np.inf n = len(x0) log_sigma = np.ones(n) * log_sigma_init sigma = np.exp(log_sigma) # set up the first and second moment estimate m_mean = np.zeros(n) v_mean = np.zeros(n) m_log_sigma = np.zeros(n) v_log_sigma = np.zeros(n) # set up lr schedule and optimizer lr_schedule1 = _ExponentialSchedule(learning_rate, decay_steps=decay_steps, decay_rate=decay_rate, staircase=True) lr_schedule2 = _ExponentialSchedule(learning_rate, decay_steps=decay_steps, decay_rate=decay_rate, staircase=True) _, f = wrap_function(f, args) res = OptimizeResult() current_x = np.copy(x0) res.x_iters = [] # initializes as lists res.xs_iters = [] res.ys_iters = [] res.func_vals = [] res.fun = 0 total_evals = 0 num_iter = 0 message = None # stats history_max = -np.inf while num_iter < max_iterations: # get samples from the current policy to evaluate z = random_state.randn(batch_size, n) new_xs = sigma * z + current_x if total_evals + batch_size > max_evaluations: message = "Reached maximum number of evaluations." break # Evaluate points res.xs_iters.append(new_xs) new_ys = [f(x) for x in new_xs] res.ys_iters.append(new_ys) total_evals += batch_size known_xs.extend(new_xs) known_ys.extend(new_ys) # Save function value if save_func_vals: res.func_vals.append(f(current_x)) res.x_iters.append(np.copy(current_x)) res.fun = res.func_vals[-1] # current sampling radius (maximal) max_radius = 0 for x in new_xs: if np.linalg.norm(x - current_x) > max_radius: max_radius = np.linalg.norm(x - current_x) reward = [-y for y in new_ys] # warmup steps control whether to use the model to estimate the f if num_iter >= warmup_steps: # Determine points to use to build model model_xs = [] model_ys = [] for x, y in zip(known_xs, known_ys): if np.linalg.norm(x - current_x) < radius_coeff * max_radius: model_xs.append(x) model_ys.append(y) # safer way without the `SVD` not converging try: model = _get_quadratic_model(model_xs, model_ys, x) use_model = True except ValueError: use_model = False if use_model: # get samples (from model) z = random_state.randn(batch_size_model, n) new_xs = sigma * z + current_x # use the model for prediction new_ys = model.predict(new_xs - current_x) reward = [-y for y in new_ys] reward = np.array(reward) # stats reward_mean = np.mean(reward) reward_max = np.max(reward) if reward_max > history_max: history_max = reward_max # subtract baseline reward = reward - reward_mean # analytic derivatives (natural gradient policy gradient) delta_mean = np.dot(z.T, reward) * sigma delta_log_sigma = np.dot(z.T**2, reward) / np.sqrt(2) delta_mean_norm = np.linalg.norm(np.dot(z.T, reward)) delta_log_sigma_norm = np.linalg.norm(np.dot(z.T**2, reward)) delta_mean = delta_mean / delta_mean_norm delta_log_sigma = delta_log_sigma / delta_log_sigma_norm # gradient ascend to update the parameters current_x, m_mean, v_mean = _adam_update(delta_mean, current_x, num_iter, m_mean, v_mean, lr_schedule=lr_schedule1) log_sigma, m_log_sigma, v_log_sigma = _adam_update( delta_log_sigma, log_sigma, num_iter, m_log_sigma, v_log_sigma, lr_schedule=lr_schedule2, ) log_sigma = np.clip(log_sigma, -20.0, 2.0) sigma = np.exp(log_sigma) num_iter += 1 final_val = f(current_x) res.func_vals.append(final_val) if message is None: message = "Reached maximum number of iterations." res.x_iters.append(current_x) total_evals += 1 res.x = current_x res.fun = final_val res.nit = num_iter res.nfev = total_evals res.message = message return res
def _minimize_slsqp(func, x0, args=(), jac=None, bounds=None, constraints=(), maxiter=100, ftol=1.0E-6, iprint=1, disp=False, eps=_epsilon, callback=None, **unknown_options): """ Minimize a scalar function of one or more variables using Sequential Least SQuares Programming (SLSQP). Options ------- ftol : float Precision goal for the value of f in the stopping criterion. eps : float Step size used for numerical approximation of the jacobian. disp : bool Set to True to print convergence messages. If False, `verbosity` is ignored and set to 0. maxiter : int Maximum number of iterations. """ _check_unknown_options(unknown_options) fprime = jac iter = maxiter acc = ftol epsilon = eps if not disp: iprint = 0 # Constraints are triaged per type into a dictionnary of tuples if isinstance(constraints, dict): constraints = (constraints, ) cons = {'eq': (), 'ineq': ()} for ic, con in enumerate(constraints): # check type try: ctype = con['type'].lower() except KeyError: raise KeyError('Constraint %d has no type defined.' % ic) except TypeError: raise TypeError('Constraints must be defined using a ' 'dictionary.') except AttributeError: raise TypeError("Constraint's type must be a string.") else: if ctype not in ['eq', 'ineq']: raise ValueError("Unknown constraint type '%s'." % con['type']) # check function if 'fun' not in con: raise ValueError('Constraint %d has no function defined.' % ic) # check jacobian cjac = con.get('jac') if cjac is None: # approximate jacobian function. The factory function is needed # to keep a reference to `fun`, see gh-4240. def cjac_factory(fun): def cjac(x, *args): return approx_jacobian(x, fun, epsilon, *args) return cjac cjac = cjac_factory(con['fun']) # update constraints' dictionary cons[ctype] += ({ 'fun': con['fun'], 'jac': cjac, 'args': con.get('args', ()) }, ) exit_modes = { -1: "Gradient evaluation required (g & a)", 0: "Optimization terminated successfully.", 1: "Function evaluation required (f & c)", 2: "More equality constraints than independent variables", 3: "More than 3*n iterations in LSQ subproblem", 4: "Inequality constraints incompatible", 5: "Singular matrix E in LSQ subproblem", 6: "Singular matrix C in LSQ subproblem", 7: "Rank-deficient equality constraint subproblem HFTI", 8: "Positive directional derivative for linesearch", 9: "Iteration limit exceeded" } # Wrap func feval, func = wrap_function(func, args) # Wrap fprime, if provided, or approx_jacobian if not if fprime: geval, fprime = wrap_function(fprime, args) else: geval, fprime = wrap_function(approx_jacobian, (func, epsilon)) # Transform x0 into an array. x = asfarray(x0).flatten() # Set the parameters that SLSQP will need # meq, mieq: number of equality and inequality constraints meq = sum( map(len, [atleast_1d(c['fun'](x, *c['args'])) for c in cons['eq']])) mieq = sum( map(len, [atleast_1d(c['fun'](x, *c['args'])) for c in cons['ineq']])) # m = The total number of constraints m = meq + mieq # la = The number of constraints, or 1 if there are no constraints la = array([1, m]).max() # n = The number of independent variables n = len(x) # Define the workspaces for SLSQP n1 = n + 1 mineq = m - meq + n1 + n1 len_w = (3*n1+m)*(n1+1)+(n1-meq+1)*(mineq+2) + 2*mineq+(n1+mineq)*(n1-meq) \ + 2*meq + n1 + ((n+1)*n)//2 + 2*m + 3*n + 3*n1 + 1 len_jw = mineq w = zeros(len_w) jw = zeros(len_jw) # Decompose bounds into xl and xu if bounds is None or len(bounds) == 0: xl = np.empty(n, dtype=float) xu = np.empty(n, dtype=float) xl.fill(np.nan) xu.fill(np.nan) else: bnds = array(bounds, float) if bnds.shape[0] != n: raise IndexError('SLSQP Error: the length of bounds is not ' 'compatible with that of x0.') with np.errstate(invalid='ignore'): bnderr = bnds[:, 0] > bnds[:, 1] if bnderr.any(): raise ValueError('SLSQP Error: lb > ub in bounds %s.' % ', '.join(str(b) for b in bnderr)) xl, xu = bnds[:, 0], bnds[:, 1] # Mark infinite bounds with nans; the Fortran code understands this infbnd = ~isfinite(bnds) xl[infbnd[:, 0]] = np.nan xu[infbnd[:, 1]] = np.nan # Clip initial guess to bounds (SLSQP may fail with bounds-infeasible initial point) have_bound = np.isfinite(xl) x[have_bound] = np.clip(x[have_bound], xl[have_bound], np.inf) have_bound = np.isfinite(xu) x[have_bound] = np.clip(x[have_bound], -np.inf, xu[have_bound]) # Initialize the iteration counter and the mode value mode = array(0, int) acc = array(acc, float) majiter = array(iter, int) majiter_prev = 0 # Print the header if iprint >= 2 if iprint >= 2: print("%5s %5s %16s %16s" % ("NIT", "FC", "OBJFUN", "GNORM")) while 1: if mode == 0 or mode == 1: # objective and constraint evaluation requird # Compute objective function fx = func(x) try: fx = float(np.asarray(fx)) except (TypeError, ValueError): raise ValueError("Objective function must return a scalar") # Compute the constraints if cons['eq']: c_eq = concatenate([ atleast_1d(con['fun'](x, *con['args'])) for con in cons['eq'] ]) else: c_eq = zeros(0) if cons['ineq']: c_ieq = concatenate([ atleast_1d(con['fun'](x, *con['args'])) for con in cons['ineq'] ]) else: c_ieq = zeros(0) # Now combine c_eq and c_ieq into a single matrix c = concatenate((c_eq, c_ieq)) if mode == 0 or mode == -1: # gradient evaluation required # Compute the derivatives of the objective function # For some reason SLSQP wants g dimensioned to n+1 g = append(fprime(x), 0.0) # Compute the normals of the constraints if cons['eq']: a_eq = vstack( [con['jac'](x, *con['args']) for con in cons['eq']]) else: # no equality constraint a_eq = zeros((meq, n)) if cons['ineq']: a_ieq = vstack( [con['jac'](x, *con['args']) for con in cons['ineq']]) else: # no inequality constraint a_ieq = zeros((mieq, n)) # Now combine a_eq and a_ieq into a single a matrix if m == 0: # no constraints a = zeros((la, n)) else: a = vstack((a_eq, a_ieq)) a = concatenate((a, zeros([la, 1])), 1) # Call SLSQP slsqp(m, meq, x, xl, xu, fx, c, g, a, acc, majiter, mode, w, jw) # call callback if major iteration has incremented if callback is not None and majiter > majiter_prev: callback(x) # Print the status of the current iterate if iprint > 2 and the # major iteration has incremented if iprint >= 2 and majiter > majiter_prev: print("%5i %5i % 16.6E % 16.6E" % (majiter, feval[0], fx, linalg.norm(g))) # If exit mode is not -1 or 1, slsqp has completed if abs(mode) != 1: break majiter_prev = int(majiter) # Optimization loop complete. Print status if requested if iprint >= 1: print(exit_modes[int(mode)] + " (Exit mode " + str(mode) + ')') print(" Current function value:", fx) print(" Iterations:", majiter) print(" Function evaluations:", feval[0]) print(" Gradient evaluations:", geval[0]) return OptimizeResult(x=x, fun=fx, jac=g[:-1], nit=int(majiter), nfev=feval[0], njev=geval[0], status=int(mode), message=exit_modes[int(mode)], success=(mode == 0))
def _minimize_neldermead(func, x0, args=(), callback=None, xtol=1e-4, ftol=1e-4, maxiter=None, maxfev=None, disp=False, return_all=False, return_simplex=False, **unknown_options): """ Minimization of scalar function of one or more variables using the Nelder-Mead algorithm. Options ------- disp : bool Set to True to print convergence messages. xtol : float Relative error in solution `xopt` acceptable for convergence. ftol : float Relative error in ``fun(xopt)`` acceptable for convergence. maxiter : int Maximum number of iterations to perform. maxfev : int Maximum number of function evaluations to make. return_simplex : bool Set to True to return all nodes of final simplex and their function values. """ _check_unknown_options(unknown_options) maxfun = maxfev retall = return_all fcalls, func = wrap_function(func, args) x0 = asfarray(x0).flatten() N = len(x0) if maxiter is None: maxiter = N * 200 if maxfun is None: maxfun = N * 200 rho = 1 chi = 2 psi = 0.5 sigma = 0.5 one2np1 = list(range(1, N + 1)) sim = numpy.zeros((N + 1, N), dtype=x0.dtype) fsim = numpy.zeros((N + 1,), float) sim[0] = x0 if retall: allvecs = [sim[0]] fsim[0] = func(x0) nonzdelt = 0.05 zdelt = 0.00025 for k in range(0, N): y = numpy.array(x0, copy=True) if y[k] != 0: y[k] = (1 + nonzdelt) * y[k] else: y[k] = zdelt sim[k + 1] = y f = func(y) fsim[k + 1] = f ind = numpy.argsort(fsim) fsim = numpy.take(fsim, ind, 0) # sort so sim[0,:] has the lowest function value sim = numpy.take(sim, ind, 0) iterations = 1 while (fcalls[0] < maxfun and iterations < maxiter): if (numpy.max(numpy.ravel(numpy.abs(sim[1:] - sim[0]))) <= xtol and numpy.max(numpy.abs(fsim[0] - fsim[1:])) <= ftol): break xbar = numpy.add.reduce(sim[:-1], 0) / N xr = (1 + rho) * xbar - rho * sim[-1] fxr = func(xr) doshrink = 0 if fxr < fsim[0]: xe = (1 + rho * chi) * xbar - rho * chi * sim[-1] fxe = func(xe) if fxe < fxr: sim[-1] = xe fsim[-1] = fxe else: sim[-1] = xr fsim[-1] = fxr else: # fsim[0] <= fxr if fxr < fsim[-2]: sim[-1] = xr fsim[-1] = fxr else: # fxr >= fsim[-2] # Perform contraction if fxr < fsim[-1]: xc = (1 + psi * rho) * xbar - psi * rho * sim[-1] fxc = func(xc) if fxc <= fxr: sim[-1] = xc fsim[-1] = fxc else: doshrink = 1 else: # Perform an inside contraction xcc = (1 - psi) * xbar + psi * sim[-1] fxcc = func(xcc) if fxcc < fsim[-1]: sim[-1] = xcc fsim[-1] = fxcc else: doshrink = 1 if doshrink: for j in one2np1: sim[j] = sim[0] + sigma * (sim[j] - sim[0]) fsim[j] = func(sim[j]) ind = numpy.argsort(fsim) sim = numpy.take(sim, ind, 0) fsim = numpy.take(fsim, ind, 0) if callback is not None: callback(sim[0]) iterations += 1 if retall: allvecs.append(sim[0]) x = sim[0] fval = numpy.min(fsim) warnflag = 0 if fcalls[0] >= maxfun: warnflag = 1 msg = _status_message['maxfev'] if disp: print('Warning: ' + msg) elif iterations >= maxiter: warnflag = 2 msg = _status_message['maxiter'] if disp: print('Warning: ' + msg) else: msg = _status_message['success'] if disp: print(msg) print(" Current function value: %f" % fval) print(" Iterations: %d" % iterations) print(" Function evaluations: %d" % fcalls[0]) result = OptimizeResult(fun=fval, nit=iterations, nfev=fcalls[0], status=warnflag, success=(warnflag == 0), message=msg, x=x) if retall: result['allvecs'] = allvecs if return_simplex: result['sim'] = sim result['fsim'] = fsim return result
def _minimize_lbfgsb_timeup( fun, x0, args=(), jac=None, bounds=None, disp=None, maxcor=10, ftol=2.2204460492503131e-09, gtol=1e-5, eps=1e-8, maxfun=15000, maxiter=15000, iprint=-1, callback=None, maxls=20, t0=None, timeup=float("inf"), **unknown_options): # JFF: added time-up check """ Minimize a scalar function of one or more variables using the L-BFGS-B algorithm. Options ------- disp : bool Set to True to print convergence messages. maxcor : int The maximum number of variable metric corrections used to define the limited memory matrix. (The limited memory BFGS method does not store the full hessian but uses this many terms in an approximation to it.) factr : float The iteration stops when ``(f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr * eps``, where ``eps`` is the machine precision, which is automatically generated by the code. Typical values for `factr` are: 1e12 for low accuracy; 1e7 for moderate accuracy; 10.0 for extremely high accuracy. ftol : float The iteration stops when ``(f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= ftol``. gtol : float The iteration will stop when ``max{|proj g_i | i = 1, ..., n} <= gtol`` where ``pg_i`` is the i-th component of the projected gradient. eps : float Step size used for numerical approximation of the jacobian. disp : int Set to True to print convergence messages. maxfun : int Maximum number of function evaluations. maxiter : int Maximum number of iterations. maxls : int, optional Maximum number of line search steps (per iteration). Default is 20. """ _check_unknown_options(unknown_options) m = maxcor epsilon = eps pgtol = gtol factr = ftol / np.finfo(float).eps x0 = asarray(x0).ravel() n, = x0.shape if bounds is None: bounds = [(None, None)] * n if len(bounds) != n: raise ValueError('length of x0 != length of bounds') # unbounded variables must use None, not +-inf, for optimizer to work properly bounds = [(None if l == -np.inf else l, None if u == np.inf else u) for l, u in bounds] if disp is not None: if disp == 0: iprint = -1 else: iprint = disp n_function_evals, fun = wrap_function(fun, ()) if jac is None: def func_and_grad(x): f = fun(x, *args) g = _approx_fprime_helper(x, fun, epsilon, args=args, f0=f) return f, g else: def func_and_grad(x): f = fun(x, *args) g = jac(x, *args) return f, g nbd = zeros(n, int32) low_bnd = zeros(n, float64) upper_bnd = zeros(n, float64) bounds_map = {(None, None): 0, (1, None): 1, (1, 1): 2, (None, 1): 3} for i in range(0, n): l, u = bounds[i] if l is not None: low_bnd[i] = l l = 1 if u is not None: upper_bnd[i] = u u = 1 nbd[i] = bounds_map[l, u] if not maxls > 0: raise ValueError('maxls must be positive.') x = array(x0, float64) f = array(0.0, float64) g = zeros((n,), float64) wa = zeros(2*m*n + 5*n + 11*m*m + 8*m, float64) iwa = zeros(3*n, int32) task = zeros(1, 'S60') csave = zeros(1, 'S60') lsave = zeros(4, int32) isave = zeros(44, int32) dsave = zeros(29, float64) task[:] = 'START' n_iterations = 0 if t0 is None: t0 = time.time() time_profile.predicted_inner_loop_func2_duration = 0.0 while 1: # x, f, g, wa, iwa, task, csave, lsave, isave, dsave = \ _lbfgsb.setulb(m, x, low_bnd, upper_bnd, nbd, f, g, factr, pgtol, wa, iwa, task, iprint, csave, lsave, isave, dsave, maxls) task_str = task.tostring() # begin EB curr_time = time.time() predicted_inner_loop_func2_duration = (curr_time + time_profile.maximize_inner_time_profile.mean + time_profile.func2_time_profile.mean - t0) if predicted_inner_loop_func2_duration > timeup: # JFF: added time-up check task[:] = ('STOP: PREDICTED COMPUTATION TIME EXCEEDS LIMIT') break # end EB if task_str.startswith(b'FG'): # The minimization routine wants f and g at the current x. # Note that interruptions due to maxfun are postponed # until the completion of the current minimization iteration. # Overwrite f and g: f, g = func_and_grad(x) elif task_str.startswith(b'NEW_X'): # new iteration if n_iterations > maxiter: task[:] = 'STOP: TOTAL NO. of ITERATIONS EXCEEDS LIMIT' elif n_function_evals[0] > maxfun: task[:] = ('STOP: TOTAL NO. of f AND g EVALUATIONS ' 'EXCEEDS LIMIT') else: n_iterations += 1 if callback is not None: callback(x) else: break time_profile.predicted_inner_loop_func2_duration = predicted_inner_loop_func2_duration task_str = task.tostring().strip(b'\x00').strip() if task_str.startswith(b'CONV'): warnflag = 0 elif n_function_evals[0] > maxfun: warnflag = 1 elif n_iterations > maxiter: warnflag = 1 else: warnflag = 2 # These two portions of the workspace are described in the mainlb # subroutine in lbfgsb.f. See line 363. s = wa[0: m*n].reshape(m, n) y = wa[m*n: 2*m*n].reshape(m, n) # See lbfgsb.f line 160 for this portion of the workspace. # isave(31) = the total number of BFGS updates prior the current iteration; n_bfgs_updates = isave[30] n_corrs = min(n_bfgs_updates, maxcor) hess_inv = LbfgsInvHessProduct(s[:n_corrs], y[:n_corrs]) return OptimizeResult(fun=f, jac=g, nfev=n_function_evals[0], nit=n_iterations, status=warnflag, message=task_str, x=x, success=(warnflag == 0), hess_inv=hess_inv)
def model_gradient_descent( f: Callable[..., float], x0: np.ndarray, *, args=(), rate: float = 1e-1, sample_radius: float = 1e-1, n_sample_points: int = 100, n_sample_points_ratio: Optional[float] = None, rate_decay_exponent: float = 0.0, stability_constant: float = 0.0, sample_radius_decay_exponent: float = 0.0, tol: float = 1e-8, known_values: Optional[Tuple[List[np.ndarray], List[float]]] = None, max_iterations: Optional[int] = None, max_evaluations: Optional[int] = None) -> scipy.optimize.OptimizeResult: """Model gradient descent algorithm for black-box optimization. The idea of this algorithm is to perform gradient descent, but estimate the gradient using a surrogate model instead of, say, by finite-differencing. The surrogate model is a least-squared quadratic fit to points sampled from the vicinity of the current iterate. This algorithm works well when you have an initial guess which is in the convex neighborhood of a local optimum and you want to converge to that local optimum. It's meant to be used when the function is stochastic. Args: f: The function to minimize. x0: An initial guess. args: Additional arguments to pass to the function. rate: The learning rate for the gradient descent. sample_radius: The radius around the current iterate to sample points from to build the quadratic model. n_sample_points: The number of points to sample in each iteration. n_sample_points_ratio: This specifies the number of points to sample in each iteration as a coefficient of the number of points required to exactly determine a quadratic model. The number of sample points will be this coefficient times (n+1)(n+2)/2, rounded up, where n is the number of parameters. Setting this overrides n_sample_points. rate_decay_exponent: Controls decay of learning rate. In each iteration, the learning rate is changed to the base learning rate divided by (i + 1 + S)**a, where S is the stability constant and a is the rate decay exponent (this parameter). stability_constant: Affects decay of learning rate. In each iteration, the learning rate is changed to the base learning rate divided by (i + 1 + S)**a, where S is the stability constant (this parameter) and a is the rate decay exponent. sample_radius_decay_exponent: Controls decay of sample radius. tol: The algorithm terminates when the difference between the current iterate and the next suggested iterate is smaller than this value. known_values: Any prior known values of the objective function. This is given as a tuple where the first element is a list of points and the second element is a list of the function values at those points. max_iterations: The maximum number of iterations to allow before termination. max_evaluations: The maximum number of function evaluations to allow before termination. Returns: Scipy OptimizeResult """ if known_values is not None: known_xs, known_ys = known_values known_xs = [np.copy(x) for x in known_xs] known_ys = [np.copy(y) for y in known_ys] else: known_xs, known_ys = [], [] if max_iterations is None: max_iterations = np.inf if max_evaluations is None: max_evaluations = np.inf n = len(x0) if n_sample_points_ratio is not None: n_sample_points = int( np.ceil(n_sample_points_ratio * (n + 1) * (n + 2) / 2)) _, f = wrap_function(f, args) res = OptimizeResult() current_x = np.copy(x0) res.x_iters = [] # initializes as lists res.xs_iters = [] res.ys_iters = [] res.func_vals = [] res.model_vals = [None] res.fun = 0 total_evals = 0 num_iter = 0 converged = False message = None while num_iter < max_iterations: current_sample_radius = (sample_radius / (num_iter + 1)**sample_radius_decay_exponent) # Determine points to evaluate # in ball around current point new_xs = [np.copy(current_x)] + [ current_x + _random_point_in_ball(n, current_sample_radius) for _ in range(n_sample_points) ] if total_evals + len(new_xs) > max_evaluations: message = 'Reached maximum number of evaluations.' break # Evaluate points res.xs_iters.append(new_xs) new_ys = [f(x) for x in new_xs] res.ys_iters.append(new_ys) total_evals += len(new_ys) known_xs.extend(new_xs) known_ys.extend(new_ys) # Save function value res.func_vals.append(new_ys[0]) res.x_iters.append(np.copy(current_x)) res.fun = res.func_vals[-1] # Determine points to use to build model model_xs = [] model_ys = [] for x, y in zip(known_xs, known_ys): if np.linalg.norm(x - current_x) < current_sample_radius: model_xs.append(x) model_ys.append(y) # Build and solve model model_gradient, model = _get_least_squares_model_gradient( model_xs, model_ys, current_x) # calculate the gradient and update the current point gradient_norm = np.linalg.norm(model_gradient) decayed_rate = ( rate / (num_iter + 1 + stability_constant)**rate_decay_exponent) # Convergence criteria if decayed_rate * gradient_norm < tol: converged = True message = 'Optimization converged successfully.' break # Update current_x -= decayed_rate * model_gradient res.model_vals.append( model.predict([-decayed_rate * model_gradient])[0]) num_iter += 1 if converged: final_val = res.func_vals[-1] else: final_val = f(current_x) res.func_vals.append(final_val) if message is None: message = 'Reached maximum number of iterations.' res.x_iters.append(current_x) total_evals += 1 res.x = current_x res.fun = final_val res.nit = num_iter res.nfev = total_evals res.message = message return res
def fmin_slsqp( func, x0 , eqcons=[], f_eqcons=None, ieqcons=[], f_ieqcons=None, bounds = [], fprime = None, fprime_eqcons=None, fprime_ieqcons=None, args = (), iter = 100, acc = 1.0E-6, iprint = 1, full_output = 0, epsilon = _epsilon ): """ Minimize a function using Sequential Least SQuares Programming Python interface function for the SLSQP Optimization subroutine originally implemented by Dieter Kraft. *Parameters*: func : callable f(x,*args) Objective function. x0 : ndarray of float Initial guess for the independent variable(s). eqcons : list A list of functions of length n such that eqcons[j](x0,*args) == 0.0 in a successfully optimized problem. f_eqcons : callable f(x,*args) Returns an array in which each element must equal 0.0 in a successfully optimized problem. If f_eqcons is specified, eqcons is ignored. ieqcons : list A list of functions of length n such that ieqcons[j](x0,*args) >= 0.0 in a successfully optimized problem. f_ieqcons : callable f(x0,*args) Returns an array in which each element must be greater or equal to 0.0 in a successfully optimized problem. If f_ieqcons is specified, ieqcons is ignored. bounds : list A list of tuples specifying the lower and upper bound for each independent variable [(xl0, xu0),(xl1, xu1),...] fprime : callable f(x,*args) A function that evaluates the partial derivatives of func. fprime_eqcons : callable f(x,*args) A function of the form f(x, *args) that returns the m by n array of equality constraint normals. If not provided, the normals will be approximated. The array returned by fprime_eqcons should be sized as ( len(eqcons), len(x0) ). fprime_ieqcons : callable f(x,*args) A function of the form f(x, *args) that returns the m by n array of inequality constraint normals. If not provided, the normals will be approximated. The array returned by fprime_ieqcons should be sized as ( len(ieqcons), len(x0) ). args : sequence Additional arguments passed to func and fprime. iter : int The maximum number of iterations. acc : float Requested accuracy. iprint : int The verbosity of fmin_slsqp: iprint <= 0 : Silent operation iprint == 1 : Print summary upon completion (default) iprint >= 2 : Print status of each iterate and summary full_output : bool If False, return only the minimizer of func (default). Otherwise, output final objective function and summary information. epsilon : float The step size for finite-difference derivative estimates. *Returns*: ( x, { fx, its, imode, smode }) x : ndarray of float The final minimizer of func. fx : ndarray of float The final value of the objective function. its : int The number of iterations. imode : int The exit mode from the optimizer (see below). smode : string Message describing the exit mode from the optimizer. *Notes* Exit modes are defined as follows: -1 : Gradient evaluation required (g & a) 0 : Optimization terminated successfully. 1 : Function evaluation required (f & c) 2 : More equality constraints than independent variables 3 : More than 3*n iterations in LSQ subproblem 4 : Inequality constraints incompatible 5 : Singular matrix E in LSQ subproblem 6 : Singular matrix C in LSQ subproblem 7 : Rank-deficient equality constraint subproblem HFTI 8 : Positive directional derivative for linesearch 9 : Iteration limit exceeded """ exit_modes = { -1 : "Gradient evaluation required (g & a)", 0 : "Optimization terminated successfully.", 1 : "Function evaluation required (f & c)", 2 : "More equality constraints than independent variables", 3 : "More than 3*n iterations in LSQ subproblem", 4 : "Inequality constraints incompatible", 5 : "Singular matrix E in LSQ subproblem", 6 : "Singular matrix C in LSQ subproblem", 7 : "Rank-deficient equality constraint subproblem HFTI", 8 : "Positive directional derivative for linesearch", 9 : "Iteration limit exceeded" } # Now do a lot of function wrapping # Wrap func feval, func = wrap_function(func, args) # Wrap fprime, if provided, or approx_fprime if not if fprime: geval, fprime = wrap_function(fprime,args) else: geval, fprime = wrap_function(approx_fprime,(func,epsilon)) if f_eqcons: # Equality constraints provided via f_eqcons ceval, f_eqcons = wrap_function(f_eqcons,args) if fprime_eqcons: # Wrap fprime_eqcons geval, fprime_eqcons = wrap_function(fprime_eqcons,args) else: # Wrap approx_jacobian geval, fprime_eqcons = wrap_function(approx_jacobian, (f_eqcons,epsilon)) else: # Equality constraints provided via eqcons[] eqcons_prime = [] for i in range(len(eqcons)): eqcons_prime.append(None) if eqcons[i]: # Wrap eqcons and eqcons_prime ceval, eqcons[i] = wrap_function(eqcons[i],args) geval, eqcons_prime[i] = wrap_function(approx_fprime, (eqcons[i],epsilon)) if f_ieqcons: # Inequality constraints provided via f_ieqcons ceval, f_ieqcons = wrap_function(f_ieqcons,args) if fprime_ieqcons: # Wrap fprime_ieqcons geval, fprime_ieqcons = wrap_function(fprime_ieqcons,args) else: # Wrap approx_jacobian geval, fprime_ieqcons = wrap_function(approx_jacobian, (f_ieqcons,epsilon)) else: # Inequality constraints provided via ieqcons[] ieqcons_prime = [] for i in range(len(ieqcons)): ieqcons_prime.append(None) if ieqcons[i]: # Wrap ieqcons and ieqcons_prime ceval, ieqcons[i] = wrap_function(ieqcons[i],args) geval, ieqcons_prime[i] = wrap_function(approx_fprime, (ieqcons[i],epsilon)) # Transform x0 into an array. x = asfarray(x0).flatten() # Set the parameters that SLSQP will need # meq = The number of equality constraints if f_eqcons: meq = len(f_eqcons(x)) else: meq = len(eqcons) if f_ieqcons: mieq = len(f_ieqcons(x)) else: mieq = len(ieqcons) # m = The total number of constraints m = meq + mieq #+ len(bounds) # la = The number of constraints, or 1 if there are no constraints la = array([1,m]).max() # n = The number of independent variables n = len(x) # Define the workspaces for SLSQP n1 = n+1 mineq = m + len(bounds) - meq + n1 + n1 # mineq = m - meq + n1 + n1 len_w = (3*n1+m)*(n1+1)+(n1-meq+1)*(mineq+2) + 2*mineq+(n1+mineq)*(n1-meq) \ + 2*meq + n1 +(n+1)*n/2 + 2*m + 3*n + 3*n1 + 1 len_jw = mineq w = zeros(len_w) jw = zeros(len_jw) # Decompose bounds into xl and xu if len(bounds) == 0: bounds = [(-1.0E12, 1.0E12) for i in range(n)] elif len(bounds) != n: raise IndexError, \ 'SLSQP Error: If bounds is specified, len(bounds) == len(x0)' else: for i in range(len(bounds)): if bounds[i][0] > bounds[i][1]: raise ValueError, \ 'SLSQP Error: lb > ub in bounds[' + str(i) +'] ' + str(bounds[4]) xl = array( [ b[0] for b in bounds ] ) xu = array( [ b[1] for b in bounds ] ) # Initialize the iteration counter and the mode value mode = array(0,int) acc = array(acc,float) majiter = array(iter,int) majiter_prev = 0 # Print the header if iprint >= 2 if iprint >= 2: print "%5s %5s %16s %16s" % ("NIT","FC","OBJFUN","GNORM") while 1: if mode == 0 or mode == 1: # objective and constraint evaluation requird # Compute objective function fx = func(x) # Compute the constraints if f_eqcons: c_eq = f_eqcons(x) else: c_eq = array([ eqcons[i](x) for i in range(meq) ]) if f_ieqcons: c_ieq = f_ieqcons(x) else: c_ieq = array([ ieqcons[i](x) for i in range(len(ieqcons)) ]) # Now combine c_eq and c_ieq into a single matrix if m == 0: # no constraints c = zeros([la]) else: # constraints exist if meq > 0 and mieq == 0: # only equality constraints c = c_eq if meq == 0 and mieq > 0: # only inequality constraints c = c_ieq if meq > 0 and mieq > 0: # both equality and inequality constraints exist c = append(c_eq, c_ieq) if mode == 0 or mode == -1: # gradient evaluation required # Compute the derivatives of the objective function # For some reason SLSQP wants g dimensioned to n+1 g = append(fprime(x),0.0) # Compute the normals of the constraints if fprime_eqcons: a_eq = fprime_eqcons(x) else: a_eq = zeros([meq,n]) for i in range(meq): a_eq[i] = eqcons_prime[i](x) if fprime_ieqcons: a_ieq = fprime_ieqcons(x) else: a_ieq = zeros([mieq,n]) for i in range(mieq): a_ieq[i] = ieqcons_prime[i](x) # Now combine a_eq and a_ieq into a single a matrix if m == 0: # no constraints a = zeros([la,n]) elif meq > 0 and mieq == 0: # only equality constraints a = a_eq elif meq == 0 and mieq > 0: # only inequality constraints a = a_ieq elif meq > 0 and mieq > 0: # both equality and inequality constraints exist a = vstack((a_eq,a_ieq)) a = concatenate((a,zeros([la,1])),1) # Call SLSQP slsqp(m, meq, x, xl, xu, fx, c, g, a, acc, majiter, mode, w, jw) # Print the status of the current iterate if iprint > 2 and the # major iteration has incremented if iprint >= 2 and majiter > majiter_prev: print "%5i %5i % 16.6E % 16.6E" % (majiter,feval[0], fx,linalg.norm(g)) # If exit mode is not -1 or 1, slsqp has completed if abs(mode) != 1: break majiter_prev = int(majiter) # Optimization loop complete. Print status if requested if iprint >= 1: print exit_modes[int(mode)] + " (Exit mode " + str(mode) + ')' print " Current function value:", fx print " Iterations:", majiter print " Function evaluations:", feval[0] print " Gradient evaluations:", geval[0] if not full_output: return x else: return [list(x), float(fx), int(majiter), int(mode), exit_modes[int(mode)] ]
def Customfmin_bfgs(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf, epsilon= numpy.sqrt(numpy.finfo(float).eps), maxiter=None, full_output=0, disp=1, retall=0, callback=None): testVar = 0 x0 = asarray(x0).squeeze() if x0.ndim == 0: x0.shape = (1,) if maxiter is None: maxiter = len(x0)*200 func_calls, f = wrap_function(f, args) if fprime is None: grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon)) else: grad_calls, myfprime = wrap_function(fprime, args) gfk = myfprime(x0) k = 0 N = len(x0) I = numpy.eye(N,dtype=int) Hk = I old_fval = f(x0) old_old_fval = old_fval + 5000 xk = x0 if retall: allvecs = [x0] sk = [2*gtol] warnflag = 0 gnorm = vecnorm(gfk,ord=norm) while (gnorm > gtol) and (k < maxiter): pk = -numpy.dot(Hk,gfk) alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ linesearch.line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # line search failed try different one. alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # This line search also failed to find a better solution. warnflag = 2 break xkp1 = xk + alpha_k * pk if retall: allvecs.append(xkp1) sk = xkp1 - xk xk = xkp1 if gfkp1 is None: gfkp1 = myfprime(xkp1) yk = gfkp1 - gfk gfk = gfkp1 if callback is not None: callback(xk) k += 1 gnorm = vecnorm(gfk,ord=norm) if (gnorm <= gtol): break try: # this was handled in numeric, let it remaines for more safety rhok = 1.0 / (numpy.dot(yk,sk)) except ZeroDivisionError: rhok = 1000.0 print "Divide-by-zero encountered: rhok assumed large" if numpy.isinf(rhok): # this is patch for numpy rhok = 1000.0 print "Divide-by-zero encountered: rhok assumed large" A1 = I - sk[:,numpy.newaxis] * yk[numpy.newaxis,:] * rhok A2 = I - yk[:,numpy.newaxis] * sk[numpy.newaxis,:] * rhok Hk = numpy.dot(A1,numpy.dot(Hk,A2)) + rhok * sk[:,numpy.newaxis] \ * sk[numpy.newaxis,:] if disp or full_output: fval = old_fval if warnflag == 2: if disp: print "Warning: Desired error not necessarily achieved" \ "due to precision loss" print " Current function value: %f" % fval print " Iterations: %d" % k print " Function evaluations: %d" % func_calls[0] print " Gradient evaluations: %d" % grad_calls[0] elif k >= maxiter: warnflag = 1 if disp: print "Warning: Maximum number of iterations has been exceeded" print " Current function value: %f" % fval print " Iterations: %d" % k print " Function evaluations: %d" % func_calls[0] print " Gradient evaluations: %d" % grad_calls[0] else: if disp: print "Optimization terminated successfully." print " Current function value: %f" % fval print " Iterations: %d" % k print " Function evaluations: %d" % func_calls[0] print " Gradient evaluations: %d" % grad_calls[0] if full_output: retlist = xk, fval, gfk, Hk, func_calls[0], grad_calls[0], warnflag if retall: retlist += (allvecs,) else: retlist = xk if retall: retlist = (xk, allvecs) return retlist
def _minimize(fun, x0, args=(), jac=None, callback=None, gtol=1e-5, fxtol=1e-09, xtol=1e-09, norm=Inf, eps=_epsilon, maxiter=None, disp=False, return_all=False, **unknown_options): _check_unknown_options(unknown_options) f = fun fprime = jac epsilon = eps retall = return_all x0 = asarray(x0).flatten() if x0.ndim == 0: x0.shape = (1,) if maxiter is None: maxiter = len(x0) * 200 func_calls, f = wrap_function(f, args) grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon)) gfk = myfprime(x0) k = 0 N = len(x0) I = numpy.eye(N, dtype=int) Hk = I old_fval = f(x0) old_old_fval = None xk = x0 if retall: allvecs = [x0] sk = [2 * gtol] warnflag = 0 gnorm = vecnorm(gfk, ord=norm) xnorm = np.Inf fx = np.Inf print_lst = [] while (gnorm > gtol) and (xnorm > xtol) and (fx > fxtol) and (k < maxiter): pk = -numpy.dot(Hk, gfk) try: alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ _line_search_wolfe12(f, myfprime, xk, pk, gfk, old_fval, old_old_fval) except _LineSearchError: # search failed to find a better solution. print_lst.append("Przeszukiwanie liniowe zawiodlo lub nie moze osiagnac lepszego rozwiazania") warnflag = 2 break xkp1 = xk + alpha_k * pk fx = np.absolute(old_old_fval - old_fval) xnorm = vecnorm(xkp1 - xk) if retall: allvecs.append(xkp1) sk = xkp1 - xk xk = xkp1 if gfkp1 is None: gfkp1 = myfprime(xkp1) yk = gfkp1 - gfk gfk = gfkp1 if callback is not None: callback(xk) k += 1 if disp: print_ = ('Iter: ' + str(k) + '\n') print_ += ('x: ' + str(xk) + '\n') print_ += ('f(x): ' + str(f(xk)) + '\n') #zmiana na fx print_ +=('gtol: ' + str(gnorm) + '\n') print_ +=('xtol: ' + str(xnorm) + '\n') print_ +=('fxtol: ' + str(fx) + '\n') print_lst.append(print_) gnorm = vecnorm(gfk, ord=norm) if (gnorm <= gtol): break if not numpy.isfinite(old_fval): # We correctly found +-Inf as optimal value, or something went # wrong. print_lst.append("Zlaneziono +-Inf za optymalna wartosc... lub cos poszlo zle.") warnflag = 2 break try: # this was handled in numeric, let it remaines for more safety rhok = 1.0 / (numpy.dot(yk, sk)) except ZeroDivisionError: rhok = 1000.0 if disp: print_lst.append("Dzielenie przez zero!!") if isinf(rhok): # this is patch for numpy rhok = 1000.0 if disp: print_lst.appedn("Dzielenie przez zero!!") A1 = I - sk[:, numpy.newaxis] * yk[numpy.newaxis, :] * rhok A2 = I - yk[:, numpy.newaxis] * sk[numpy.newaxis, :] * rhok Hk = numpy.dot(A1, numpy.dot(Hk, A2)) + (rhok * sk[:, numpy.newaxis] * sk[numpy.newaxis, :]) fval = old_fval if np.isnan(fval): # This can happen if the first call to f returned NaN; # the loop is then never entered. print_lst.append("Osiagnieto Nan w pierwszym wywolaniem algorytmu.") warnflag = 2 if warnflag == 2: msg = _status_message['pr_loss'] if disp: print_ = ("Ostrzezenie: " + msg) print_ += (" Wartosc funkcji celu: %f" % fval) print_ += (" Iteracje: %d" % k) print_ += (" Wywolania funkcji: %d" % func_calls[0]) print_ += (" Wywolania gradientu: %d" % grad_calls[0]) elif k >= maxiter: warnflag = 1 msg = _status_message['maxiter'] if disp: print_ = ("Ostrzerzenie: " + msg) print_ += (" Wartosc funkcji celu: %f" % fval) print_ += (" Iteracje: %d" % k) print_ += (" Wywolania funkcji: %d" % func_calls[0]) print_ += (" Wywolania gradientu: %d" % grad_calls[0]) print_lst.append(print_) else: msg = _status_message['success'] if disp: print_ = (msg + '\n') print_ += (" Wartosc funkcji celu: %f" % fval) print_ += (" Iteracje: %d" % k) print_ += (" Wywolania funkcji: %d" % func_calls[0]) print_ += (" Wywolania gradientu: %d" % grad_calls[0]) print_lst.append(print_) [print(line) for line in print_lst] result = OptimizeResult(fun=fval,lst=print_lst, jac=gfk, hess_inv=Hk, nfev=func_calls[0], njev=grad_calls[0], status=warnflag, success=(warnflag == 0), message=msg, x=xk, nit=k) if retall: result['allvecs'] = allvecs return result
def _minimize_cg(fun, x0, args=(), jac=None, callback=None, gtol=1e-5, norm=Inf, eps=_epsilon, maxiter=None, disp=False, return_all=False, xtol= 1e-6, **unknown_options): """ Minimization of scalar function of one or more variables using the conjugate gradient algorithm. Options for the conjugate gradient algorithm are: disp : bool Set to True to print convergence messages. maxiter : int Maximum number of iterations to perform. gtol : float Gradient norm must be less than `gtol` before successful termination. norm : float Order of norm (Inf is max, -Inf is min). eps : float or ndarray If `jac` is approximated, use this value for the step size. This function is called by the `minimize` function with `method=CG`. It is not supposed to be called directly. """ _check_unknown_options(unknown_options) f = fun fprime = jac epsilon = eps retall = return_all x0 = asarray(x0).flatten() if maxiter is None: maxiter = len(x0) * 200 func_calls, f = wrap_function(f, args) if fprime is None: grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon)) else: grad_calls, myfprime = wrap_function(fprime, args) gfk = myfprime(x0) k = 0 xk = x0 old_fval = f(xk) old_old_fval = None if retall: allvecs = [xk] warnflag = 0 pk = -gfk gnorm = vecnorm(gfk, ord=norm) while (gnorm > gtol) and (k < maxiter): deltak = numpy.dot(gfk, gfk) try: alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ _line_search_wolfe12(f, myfprime, xk, pk, gfk, old_fval, old_old_fval, c2=0.4, xtol=xtol) except _LineSearchError: # Line search failed to find a better solution. warnflag = 2 break xk = xk + alpha_k * pk if retall: allvecs.append(xk) if gfkp1 is None: gfkp1 = myfprime(xk) yk = gfkp1 - gfk beta_k = max(0, numpy.dot(yk, gfkp1) / deltak) pk = -gfkp1 + beta_k * pk gfk = gfkp1 gnorm = vecnorm(gfk, ord=norm) if callback is not None: callback(xk) k += 1 fval = old_fval if warnflag == 2: msg = _status_message['pr_loss'] if disp: print("Warning: " + msg) print(" Current function value: %f" % fval) print(" Iterations: %d" % k) print(" Function evaluations: %d" % func_calls[0]) print(" Gradient evaluations: %d" % grad_calls[0]) elif k >= maxiter: warnflag = 1 msg = _status_message['maxiter'] if disp: print("Warning: " + msg) print(" Current function value: %f" % fval) print(" Iterations: %d" % k) print(" Function evaluations: %d" % func_calls[0]) print(" Gradient evaluations: %d" % grad_calls[0]) else: msg = _status_message['success'] if disp: print(msg) print(" Current function value: %f" % fval) print(" Iterations: %d" % k) print(" Function evaluations: %d" % func_calls[0]) print(" Gradient evaluations: %d" % grad_calls[0]) result = OptimizeResult(fun=fval, jac=gfk, nfev=func_calls[0], njev=grad_calls[0], status=warnflag, success=(warnflag == 0), message=msg, x=xk) if retall: result['allvecs'] = allvecs return result
def Customfmin_bfgs(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf, epsilon=numpy.sqrt(numpy.finfo(float).eps), maxiter=None, full_output=0, disp=1, retall=0, callback=None): testVar = 0 x0 = asarray(x0).squeeze() if x0.ndim == 0: x0.shape = (1, ) if maxiter is None: maxiter = len(x0) * 200 func_calls, f = wrap_function(f, args) if fprime is None: grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon)) else: grad_calls, myfprime = wrap_function(fprime, args) gfk = myfprime(x0) k = 0 N = len(x0) I = numpy.eye(N, dtype=int) Hk = I old_fval = f(x0) old_old_fval = old_fval + 5000 xk = x0 if retall: allvecs = [x0] sk = [2 * gtol] warnflag = 0 gnorm = vecnorm(gfk, ord=norm) while (gnorm > gtol) and (k < maxiter): pk = -numpy.dot(Hk, gfk) alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ linesearch.line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # line search failed try different one. alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # This line search also failed to find a better solution. warnflag = 2 break xkp1 = xk + alpha_k * pk if retall: allvecs.append(xkp1) sk = xkp1 - xk xk = xkp1 if gfkp1 is None: gfkp1 = myfprime(xkp1) yk = gfkp1 - gfk gfk = gfkp1 if callback is not None: callback(xk) k += 1 gnorm = vecnorm(gfk, ord=norm) if (gnorm <= gtol): break try: # this was handled in numeric, let it remaines for more safety rhok = 1.0 / (numpy.dot(yk, sk)) except ZeroDivisionError: rhok = 1000.0 print("Divide-by-zero encountered: rhok assumed large") if numpy.isinf(rhok): # this is patch for numpy rhok = 1000.0 print("Divide-by-zero encountered: rhok assumed large") A1 = I - sk[:, numpy.newaxis] * yk[numpy.newaxis, :] * rhok A2 = I - yk[:, numpy.newaxis] * sk[numpy.newaxis, :] * rhok Hk = numpy.dot(A1,numpy.dot(Hk,A2)) + rhok * sk[:,numpy.newaxis] \ * sk[numpy.newaxis,:] if disp or full_output: fval = old_fval if warnflag == 2: if disp: print("Warning: Desired error not necessarily achieved" \ "due to precision loss") print(" Current function value: %f" % fval) print(" Iterations: %d" % k) print(" Function evaluations: %d" % func_calls[0]) print(" Gradient evaluations: %d" % grad_calls[0]) elif k >= maxiter: warnflag = 1 if disp: print("Warning: Maximum number of iterations has been exceeded") print(" Current function value: %f" % fval) print(" Iterations: %d" % k) print(" Function evaluations: %d" % func_calls[0]) print(" Gradient evaluations: %d" % grad_calls[0]) else: if disp: print("Optimization terminated successfully.") print(" Current function value: %f" % fval) print(" Iterations: %d" % k) print(" Function evaluations: %d" % func_calls[0]) print(" Gradient evaluations: %d" % grad_calls[0]) if full_output: retlist = xk, fval, gfk, Hk, func_calls[0], grad_calls[0], warnflag if retall: retlist += (allvecs, ) else: retlist = xk if retall: retlist = (xk, allvecs) return retlist
def leon_ncg_python(make_f, w_0, make_fprime=None, gtol=1e-5, norm=numpy.Inf, maxiter=None, full_output=0, disp=1, retall=0, callback=None, direction='hestenes-stiefel', minibatch_size=None, minibatch_offset=None, restart_every=0, normalize=False, constrain_lambda=True, ): """Minimize a function using a nonlinear conjugate gradient algorithm. Parameters ---------- make_f : callable make_f(k0, k1) When called with (k0, k1) as arguments, return a function f such that f(w) is the objective to be minimize at parameter w, on minibatch x_k0 to x_k1. If k1 is None then the minibatch should contain all the remaining data. w_0 : ndarray Initial guess. make_fprime : callable make_f'(k0, k1) Same as `make_f`, but to compute the derivative of f on a minibatch. gtol : float Stop when norm of gradient is less than gtol. norm : float Order of vector norm to use. -Inf is min, Inf is max. size (can be scalar or vector). callback : callable An optional user-supplied function, called after each iteration. Called as callback(w_t, lambda_t), where w_t is the current parameter vector and lambda_t the coefficient for the new direction. direction : string Formula used to computed the new direction, among: - polak-ribiere - hestenes-stiefel minibatch_size : int Size of each minibatch. Use None for batch learning. minibatch_offset : int Shift of the minibatch. Use None to use the minibatch size (i.e. no overlap at all). restart_every : int Force restart every this number of iterations. If <= 0, then never force a restart. normalize : bool If True, then use the normalized gradient instead of the gradient itself to find the next search direction, and always normalize the search direction. constrain_lambda : bool If True, then the `lambda_t` factor used to compute conjugate directions is constrained to be non-negative (it is thus set to zero if the formula given by `direction` computes a negative value). Returns ------- xopt : ndarray Parameters which minimize f, i.e. f(xopt) == fopt. fopt : float Minimum value found, f(xopt). func_calls : int The number of function_calls made. grad_calls : int The number of gradient calls made. warnflag : int 1 : Maximum number of iterations exceeded. 2 : Gradient and/or function calls not changing. allvecs : ndarray If retall is True (see other parameters below), then this vector containing the result at each iteration is returned. Other Parameters ---------------- maxiter : int Maximum number of iterations to perform. full_output : bool If True then return fopt, func_calls, grad_calls, and warnflag in addition to xopt. disp : bool Print convergence message if True. retall : bool Return a list of results at each iteration if True. Notes ----- Optimize the function, f, whose gradient is given by fprime using the nonlinear conjugate gradient algorithm of Polak and Ribiere. See Wright & Nocedal, 'Numerical Optimization', 1999, pg. 120-122. """ if minibatch_offset is None: if minibatch_size is None: # Batch learning: no offset is needed. minibatch_offset = 0 else: # Use the same offset as the minibatch size. minibatch_offset = minibatch_size w_0 = numpy.asarray(w_0).flatten() if maxiter is None: maxiter = len(w_0)*200 k0 = 0 k1 = minibatch_size assert make_fprime is not None f = make_f(k0, k1) fprime = make_fprime(k0, k1) func_calls = [0] grad_calls = [0] tmp_func_calls, f = wrap_function(f, ()) tmp_grad_calls, myfprime = wrap_function(fprime, ()) g_t = myfprime(w_0) t = 0 N = len(w_0) w_t = w_0 if retall: allvecs = [w_t] warnflag = 0 if normalize: d_t = -g_t / numpy.linalg.norm(g_t) else: d_t = -g_t gnorm = vecnorm(g_t, ord=norm) w_t_previous = None while (gnorm > gtol) and (t < maxiter): #print '||g_t|| = %s' % numpy.linalg.norm(g_t) # Since the function changes at each iteration, we cannot re-use # previous function values. old_fval = f(w_t) if w_t_previous is None: old_old_fval = old_fval + 5000 else: old_old_fval = f(w_t_previous) # These values are modified by the line search, even if it fails. old_fval_backup = old_fval old_old_fval_backup = old_old_fval alpha_t, fc, gc, old_fval, old_old_fval, h_t = \ line_search_wolfe1(f, myfprime, w_t, d_t, g_t, old_fval, old_old_fval, c2=0.4) if alpha_t is None: # line search failed -- use different one. print '*********************************** LINE SEARCH FAILURE *********************************' alpha_t, fc, gc, old_fval, old_old_fval, h_t = \ line_search_wolfe2(f, myfprime, w_t, d_t, g_t, old_fval_backup, old_old_fval_backup) print '*********************************** %s *********************************' % alpha_t if alpha_t is None or alpha_t == 0: # This line search also failed to find a better solution. raise AssertionError() warnflag = 2 break print 'alpha_t = %s' % alpha_t # Update weights. w_tp1 = w_t + alpha_t * d_t # Compute derivative after the weight update, if not done already. if h_t is None: h_t = myfprime(w_tp1) else: assert (h_t == myfprime(w_tp1)).all() # Sanity check. # Switch to next minibatch. func_calls[0] += tmp_func_calls[0] grad_calls[0] += tmp_grad_calls[0] k0 += minibatch_offset if minibatch_size is None: k1 = None else: k1 = k0 + minibatch_size tmp_func_calls, f = wrap_function(make_f(k0, k1), ()) tmp_grad_calls, myfprime = wrap_function(make_fprime(k0, k1), ()) # Compute derivative on new minibatch. g_tp1 = myfprime(w_tp1) if normalize: g_tp1_for_dt = g_tp1 / numpy.linalg.norm(g_tp1) else: g_tp1_for_dt = g_tp1 if retall: allvecs.append(w_tp1) h_t_minus_g_t = h_t - g_t if direction == 'polak-ribiere': # Polak-Ribiere. delta_t = numpy.dot(g_t, g_t) lambda_t = numpy.dot(h_t_minus_g_t, g_tp1_for_dt) / delta_t elif direction == 'hestenes-stiefel': # Hestenes-Stiefel. lambda_t = numpy.dot(h_t_minus_g_t, g_tp1_for_dt) / numpy.dot(h_t_minus_g_t, d_t) else: raise NotImplementedError(direction) if constrain_lambda and lambda_t < 0: lambda_t = 0 if restart_every > 0 and (t + 1) % restart_every == 0: lambda_t = 0 if lambda_t == 0: print '*** RESTART ***' else: print 'lambda_t = %s' % lambda_t d_t = -g_tp1_for_dt + lambda_t * d_t if normalize: d_t /= numpy.linalg.norm(d_t) g_t = g_tp1 w_t_previous = w_t w_t = w_tp1 gnorm = vecnorm(g_t, ord=norm) if callback is not None: callback(w_t, lambda_t) t += 1 if disp or full_output: fval = old_fval if warnflag == 2: if disp: print "Warning: Desired error not necessarily achieved due to precision loss" print " Current function value: %f" % fval print " Iterations: %d" % t print " Function evaluations: %d" % func_calls[0] print " Gradient evaluations: %d" % grad_calls[0] elif t >= maxiter: warnflag = 1 if disp: print "Warning: Maximum number of iterations has been exceeded" print " Current function value: %f" % fval print " Iterations: %d" % t print " Function evaluations: %d" % func_calls[0] print " Gradient evaluations: %d" % grad_calls[0] else: if disp: print "Optimization terminated successfully." print " Current function value: %f" % fval print " Iterations: %d" % t print " Function evaluations: %d" % func_calls[0] print " Gradient evaluations: %d" % grad_calls[0] if full_output: retlist = w_t, fval, func_calls[0], grad_calls[0], warnflag if retall: retlist += (allvecs,) else: retlist = w_t if retall: retlist = (w_t, allvecs) return retlist
def _minimize_bhhh(fun, x0, bounds=None, args=(), jac=None, callback=None, tol={ "abs": 1e-05, "rel": 1e-08 }, norm=np.Inf, maxiter=None, disp=False, return_all=False, **unknown_options): """ Minimization of scalar function of one or more variables using the BHHH algorithm. Options ------- disp : bool Set to True to print convergence messages. maxiter : int Maximum number of iterations to perform. tol : dict Absolute and relative tolerance values. norm : float Order of norm (Inf is max, -Inf is min). """ _check_unknown_options(unknown_options) f = fun fprime = jac retall = return_all k = 0 N = len(x0) x0 = np.asarray(x0).flatten() if x0.ndim == 0: x0.shape = (1, ) if bounds is None: bounds = np.array([np.inf] * N * 2).reshape((2, N)) bounds[0, :] = -bounds[0, :] if bounds.shape[1] != N: raise ValueError("length of x0 != length of bounds") low = bounds[0, :] up = bounds[1, :] x0 = np.clip(x0, low, up) if maxiter is None: maxiter = len(x0) * 200 # Need the aggregate functions to take only x0 as an argument func_calls, agg_fun = wrap_function_agg(f, args) if not callable(fprime): grad_calls, myfprime = wrap_function_num_dev(f, args) else: grad_calls, myfprime = wrap_function(fprime, args) def agg_fprime(x0): return myfprime(x0).sum(axis=0) # Setup for iteration old_fval = agg_fun(x0) gf0 = agg_fprime(x0) norm_pg0 = vecnorm(x0 - np.clip(x0 - gf0, low, up), ord=norm) xk = x0 norm_pgk = norm_pg0 if retall: allvecs = [x0] warnflag = 0 for _ in range(maxiter): # for loop instead. # Individual gfk_obs = myfprime(xk) # Aggregate fprime. Might replace by simply summing up gfk_obs gfk = gfk_obs.sum(axis=0) norm_pgk = vecnorm(xk - np.clip(xk - gfk, low, up), ord=norm) # Check tolerance of gradient norm if norm_pgk <= tol["abs"] + tol["rel"] * norm_pg0: break # Sets the initial step guess to dx ~ 1 old_old_fval = old_fval + np.linalg.norm(gfk) / 2 # Calculate BHHH hessian and step Hk = np.dot(gfk_obs.T, gfk_obs) Bk = np.linalg.inv(Hk) pk = np.empty(N) pk = -np.dot(Bk, gfk) try: alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = _line_search_wolfe12( agg_fun, agg_fprime, xk, pk, gfk, old_fval, old_old_fval, amin=1e-100, amax=1e100, ) except _LineSearchError: # Line search failed to find a better solution. warnflag = 2 break xkp1 = np.clip(xk + alpha_k * pk, low, up) if retall: allvecs.append(xkp1) xk = xkp1 if callback is not None: callback(xk) k += 1 if np.isinf(old_fval): # We correctly found +-Inf as optimal value, or something went # wrong. warnflag = 2 break fval = old_fval if warnflag == 2: msg = _status_message["pr_loss"] elif k >= maxiter: warnflag = 1 msg = _status_message["maxiter"] elif np.isnan(fval) or np.isnan(xk).any(): warnflag = 3 msg = _status_message["nan"] else: msg = _status_message["success"] if disp: print("{}{}".format("Warning: " if warnflag != 0 else "", msg)) print(" Current function value: %f" % fval) print(" Iterations: %d" % k) result = OptimizeResult( fun=fval, jac=gfk, hess_inv=Bk, nfev=func_calls[0], njev=grad_calls[0], status=warnflag, success=(warnflag == 0), message=msg, x=xk, nit=k, ) if retall: result["allvecs"] = allvecs return result