def test_line_search_wolfe2_bounds(self): # See gh-7475 # For this f and p, starting at a point on axis 0, the strong Wolfe # condition 2 is met if and only if the step length s satisfies # |x + s| <= c2 * |x| f = lambda x: np.dot(x, x) fp = lambda x: 2 * x p = np.array([1, 0]) # Smallest s satisfying strong Wolfe conditions for these arguments is 30 x = -60 * p c2 = 0.5 s, _, _, _, _, _ = ls.line_search_wolfe2(f, fp, x, p, amax=30, c2=c2) assert_line_wolfe(x, p, s, f, fp) s, _, _, _, _, _ = assert_warns(LineSearchWarning, ls.line_search_wolfe2, f, fp, x, p, amax=29, c2=c2) assert_(s is None) # s=30 will only be tried on the 6th iteration, so this won't converge assert_warns(LineSearchWarning, ls.line_search_wolfe2, f, fp, x, p, c2=c2, maxiter=5)
def test_line_search_wolfe2(self): c = 0 smax = 100 for name, f, fprime, x, p, old_f in self.line_iter(): f0 = f(x) g0 = fprime(x) self.fcount = 0 with suppress_warnings() as sup: sup.filter(LineSearchWarning, "The line search algorithm did not converge") s, fc, gc, fv, ofv, gv = ls.line_search_wolfe2(f, fprime, x, p, g0, f0, old_f, amax=smax) assert_equal(self.fcount, fc + gc) assert_fp_equal(ofv, f(x)) assert_fp_equal(fv, f(x + s * p)) if gv is not None: assert_array_almost_equal(gv, fprime(x + s * p), decimal=14) if s < smax: c += 1 assert_line_wolfe(x, p, s, f, fprime, err_msg=name) assert_(c > 3) # check that the iterator really works...
def _line_search_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval, **kwargs): """ Same as line_search_wolfe1, but fall back to line_search_wolfe2 if suitable step length is not found, and raise an exception if a suitable step length is not found. Raises ------ _LineSearchError If no suitable step size is found """ ret = line_search_wolfe1(f, fprime, xk, pk, gfk, old_fval, old_old_fval, **kwargs) if ret[0] is None: # line search failed: try different one. ret = line_search_wolfe2(f, fprime, xk, pk, gfk, old_fval, old_old_fval, **kwargs) if ret[0] is None: raise _LineSearchError() return ret
def test_line_search_wolfe2(self): c = 0 smax = 100 for name, f, fprime, x, p, old_f in self.line_iter(): f0 = f(x) g0 = fprime(x) self.fcount = 0 with warnings.catch_warnings(): warnings.simplefilter('ignore', LineSearchWarning) s, fc, gc, fv, ofv, gv = ls.line_search_wolfe2(f, fprime, x, p, g0, f0, old_f, amax=smax) assert_equal(self.fcount, fc + gc) assert_fp_equal(ofv, f(x)) assert_fp_equal(fv, f(x + s * p)) if gv is not None: assert_array_almost_equal(gv, fprime(x + s * p), decimal=14) if s < smax: c += 1 assert_line_wolfe(x, p, s, f, fprime, err_msg=name) assert_(c > 3) # check that the iterator really works...
def ls_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval): """ Same as line_search_wolfe1, but fall back to line_search_wolfe2 if suitable step length is not found, and raise an exception if a suitable step length is not found. Raises ------ _LineSearchError If no suitable step size is found """ ret = line_search_wolfe1(f, fprime, xk, pk, gfk, old_fval, old_old_fval) alpha = ret[0] if alpha is None or alpha < 1e-12: #print('A') # line search failed: try different one. ret = line_search_wolfe2(f, fprime, xk, pk, gfk, old_fval, old_old_fval) alpha = ret[0] if alpha is None or alpha < 1e-12: #print('B') ret = line_search_armijo(f, xk, pk, gfk, old_fval) alpha = ret[0] if alpha is None or alpha < 1e-12: #print('C') alpha = backtracking_line_search(f, gfk, xk, pk) return alpha
def ncg(func, x0, tol=1e-4, max_iter=500, max_n_evals=1000, c1=1e-4, c2=0.1, disp=False, trace=False): time_start = time.clock() f = lambda x: func(x)[0] myfprime = lambda x: func(x)[1].T iter = 0 n_evals = 1 loss, g = func(x0) norm = linalg.norm(g, inf) x = x0 d = -g if (trace): hist = dict(f=[loss], norm_g=[norm], n_evals=[0], elaps_t=[0]) while (iter < max_iter and n_evals < max_n_evals): res = line_search_wolfe2(f, myfprime, x, d, g.T, loss, c1=c1, c2=c2) alpha = res[0] fc = res[1] gc = res[2] x = x + alpha * d loss, g_next = func(x) norm = linalg.norm(g, inf) n_evals = n_evals + fc + gc + 1 iter = iter + 1 if (trace): hist['f'].append(loss) hist['norm_g'].append(norm) hist['n_evals'].append(n_evals) hist['elaps_t'].append(time.clock() - time_start) if (disp): print(iter, ') ', loss[0, 0], ' ', n_evals, ' ', norm) if (norm < tol): result = [x, loss[0, 0], 0] if (trace): result.append(hist) return result else: betta = g_next.T.dot(g_next - g)/(g.T.dot(g)) d = -g_next + betta*d g = g_next result = [x, loss[0, 0], 1] if (trace): result.append(hist) return result
def newton(func, x0, tol=1e-4, max_iter=500, max_n_evals=1000, c1=1e-4, c2=0.9, disp=False, trace=False): time_start = time.clock() f = lambda x: func(x)[0] myfprime = lambda x: func(x)[1].T iter = 0 n_evals = 1 loss, grad, hess = func(x0) choDec = sp.linalg.cho_factor(hess,True) gk = sp.linalg.cho_solve(choDec, -grad) norm = linalg.norm(grad, inf) x = x0 if (trace): hist = dict(f=[loss], norm_g=[norm], n_evals=[0], elaps_t=[0]) while (iter < max_iter and n_evals < max_n_evals): res = line_search_wolfe2(f, myfprime, x, gk, grad.T, loss, c1=c1, c2=c2) alpha = res[0] fc = res[1] gc = res[2] x = x + alpha * gk loss, grad, hess = func(x) choDec = sp.linalg.cho_factor(hess, True) gk = sp.linalg.cho_solve(choDec, -grad) norm = linalg.norm(grad, inf) n_evals = n_evals + fc + gc + 1 iter = iter + 1 if (trace): hist['f'].append(loss) hist['norm_g'].append(norm) hist['n_evals'].append(n_evals) hist['elaps_t'].append(time.clock() - time_start) if (disp): print(iter, ') ', loss[0, 0], ' ', n_evals, ' ', norm) if (norm < tol): result = [x, loss[0, 0], 0] if (trace): result.append(hist) return result result = [x, loss[0, 0], 1] if (trace): result.append(hist) return result
def test_line_search(self): def f(x): return jnp.cos(jnp.sum(jnp.exp(-x)) ** 2) # assert not line_search(jax.value_and_grad(f), np.ones(2), np.array([-0.5, -0.25])).failed xk = jnp.ones(2) pk = jnp.array([-0.5, -0.25]) res = line_search(f, xk, pk, maxiter=100) scipy_res = line_search_wolfe2(f, grad(f), xk, pk) self.assertAllClose(scipy_res[0], res.a_k, atol=1e-5, check_dtypes=False) self.assertAllClose(scipy_res[3], res.f_k, atol=1e-5, check_dtypes=False)
def line_search(self, oracle, x_k, d_k, previous_alpha=None): """ Finds the step size alpha for a given starting point x_k and for a given search direction d_k that satisfies necessary conditions for phi(alpha) = oracle.func(x_k + alpha * d_k). Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func_directional() and .grad_directional() methods implemented for computing function values and its directional derivatives. x_k : np.array Starting point d_k : np.array Search direction previous_alpha : float or None Starting point to use instead of self.alpha_0 to keep the progress from previous steps. If None, self.alpha_0, is used as a starting point. Returns ------- alpha : float or None if failure Chosen step size """ def backtracking(alpha): while oracle.func_directional(x_k, d_k, alpha) > \ oracle.func(x_k) + self.c1 * alpha * oracle.grad(x_k) @ d_k: alpha /= 2 return alpha if self._method == 'Constant': return self.c if self._method == 'Armijo': alpha = self.alpha_0 if not previous_alpha else previous_alpha return backtracking(alpha) if self._method == 'Wolfe': alpha = linesearch.line_search_wolfe2(oracle.func, oracle.grad, x_k, d_k, c1=self.c1, c2=self.c2)[0] if not alpha: alpha = backtracking(self.alpha_0) return alpha return None
def _line_search_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval, **kwargs): """ Same as line_search_wolfe1, but fall back to line_search_wolfe2 if suitable step length is not found, and raise an exception if a suitable step length is not found. Raises ------ _LineSearchError If no suitable step size is found """ extra_condition = kwargs.pop('extra_condition', None) ret = line_search_wolfe1(f, fprime, xk, pk, gfk, old_fval, old_old_fval, **kwargs) if ret[0] is not None and extra_condition is not None: xp1 = xk + ret[0] * pk if not extra_condition(ret[0], xp1, ret[3], ret[5]): # Reject step if extra_condition fails ret = (None, ) if ret[0] is None: # line search failed: try different one. with warnings.catch_warnings(): warnings.simplefilter('ignore', LineSearchWarning) kwargs2 = {} for key in ('c1', 'c2', 'amax'): if key in kwargs: kwargs2[key] = kwargs[key] ret = line_search_wolfe2(f, fprime, xk, pk, gfk, old_fval, old_old_fval, extra_condition=extra_condition, **kwargs2) if ret[0] is None: raise _LineSearchError() return ret
def test_line_search_wolfe2(self): c = 0 smax = 100 for name, f, fprime, x, p, old_f in self.line_iter(): f0 = f(x) g0 = fprime(x) self.fcount = 0 s, fc, gc, fv, ofv, gv = ls.line_search_wolfe2(f, fprime, x, p, g0, f0, old_f, amax=smax) assert_equal(self.fcount, fc+gc) assert_equal(ofv, f(x)) assert_equal(fv, f(x + s*p)) if gv is not None: assert_equal(gv, fprime(x + s*p)) if s < smax: c += 1 assert_line_wolfe(x, p, s, f, fprime, err_msg=name) assert_(c > 3) # check that the iterator really works...
def test_line_search_wolfe2(self): c = 0 smax = 100 for name, f, fprime, x, p, old_f in self.line_iter(): f0 = f(x) g0 = fprime(x) self.fcount = 0 with warnings.catch_warnings(): warnings.simplefilter('ignore', LineSearchWarning) s, fc, gc, fv, ofv, gv = ls.line_search_wolfe2(f, fprime, x, p, g0, f0, old_f, amax=smax) assert_equal(self.fcount, fc+gc) assert_fp_equal(ofv, f(x)) assert_fp_equal(fv, f(x + s*p)) if gv is not None: assert_array_almost_equal(gv, fprime(x + s*p), decimal=14) if s < smax: c += 1 assert_line_wolfe(x, p, s, f, fprime, err_msg=name) assert_(c > 3) # check that the iterator really works...
def test_line_search(self): import jax import jax.numpy as np def f(x): return np.cos(np.sum(np.exp(-x))**2) # assert not line_search(jax.value_and_grad(f), num_per_cluster.ones(2), num_per_cluster.array([-0.5, -0.25])).failed xk = np.ones(2) pk = np.array([-0.5, -0.25]) res = line_search(jax.value_and_grad(f), xk, pk, maxiter=100) from scipy.optimize.linesearch import line_search_wolfe2 scipy_res = line_search_wolfe2(f, jax.grad(f), xk, pk) # print(scipy_res[0], res.a_k) # print(scipy_res[3], res.f_k) assert np.isclose(scipy_res[0], res.a_k) assert np.isclose(scipy_res[3], res.f_k)
def test_line_search_wolfe2(self): c = 0 smax = 512 for name, f, fprime, x, p, old_f in self.line_iter(): f0 = f(x) g0 = fprime(x) self.fcount = 0 with suppress_warnings() as sup: sup.filter(LineSearchWarning, "The line search algorithm could not find a solution") sup.filter(LineSearchWarning, "The line search algorithm did not converge") s, fc, gc, fv, ofv, gv = ls.line_search_wolfe2(f, fprime, x, p, g0, f0, old_f, amax=smax) assert_equal(self.fcount, fc+gc) assert_fp_equal(ofv, f(x)) assert_fp_equal(fv, f(x + s*p)) if gv is not None: assert_array_almost_equal(gv, fprime(x + s*p), decimal=14) if s < smax: c += 1 assert_line_wolfe(x, p, s, f, fprime, err_msg=name) assert_(c > 3) # check that the iterator really works...
def ncg(func, x0, tol=1e-4, max_iter=500, c1=1e-4, c2=0.1, disp=False, trace=False): t_start = time.time() func_wrapper = FuncWrapper(func) func_f = lambda x: func_wrapper(x)[0] func_g = lambda x: func_wrapper(x)[1] x_min = x0 f_min, g_min = func_f(x_min), func_g(x_min) direction = -g_min norm_g = la.norm(g_min, np.inf) n_iter = 0 list_f = list() list_norm_g = list() list_n_evals = list() list_elaps_t = list() while n_iter < max_iter and norm_g > tol: wolfe_answer = line_search_wolfe2(func_f, func_g, x_min, direction, c1=c1, c2=c2) alpha = wolfe_answer[0] x_min = x_min + alpha * direction pre_g_min = g_min f_min, g_min = func_f(x_min), func_g(x_min) norm_g = la.norm(g_min, np.inf) elaps_t = time.time() - t_start beta = np.dot(g_min, g_min) / np.dot(direction, g_min - pre_g_min) direction = -g_min + beta * direction n_iter = n_iter + 1 list_f.append(f_min) list_norm_g.append(norm_g) list_n_evals.append(func_wrapper.n_counter) list_elaps_t.append(elaps_t) if disp: print('%s %3d %s %5f %s %5f %s %3d %s %10f' % ('#:', n_iter, ' f:', f_min, ' norm_g:', norm_g, ' n_evals:', func_wrapper.n_counter, ' elaps_t:', elaps_t)) status = 0 if norm_g < tol else 1 if trace: hist = { 'f': np.array(list_f), 'norm_g': np.array(list_norm_g), 'n_evals': np.array(list_n_evals), 'elaps_t': np.array(list_elaps_t) } return x_min, f_min, status, hist else: return x_min, f_min, status
def fmin_barrier_bfgs( f, x0, fprime=None, gtol=1e-6, norm=Inf, epsilon=_epsilon, maxiter=None, full_output=0, disp=1, retall=0, callback=None, barrier=None, ): """Minimize a function using the BFGS algorithm without jumping a barrier. Parameters ---------- f : callable f(x,*args) Objective function to be minimized. x0 : ndarray Initial guess. fprime : callable f'(x,*args) Gradient of f. args : tuple Extra arguments passed to f and fprime. gtol : float Gradient norm must be less than gtol before succesful termination. norm : float Order of norm (Inf is max, -Inf is min) epsilon : int or ndarray If fprime is approximated, use this value for the step size. callback : callable An optional user-supplied function to call after each iteration. Called as callback(xk), where xk is the current parameter vector. barrier : callable barrier(x) returns true iff a barrier has been jumped. Returns ------- xopt : ndarray Parameters which minimize f, i.e. f(xopt) == fopt. fopt : float Minimum value. gopt : ndarray Value of gradient at minimum, f'(xopt), which should be near 0. Bopt : ndarray Value of 1/f''(xopt), i.e. the inverse hessian matrix. func_calls : int Number of function_calls made. grad_calls : int Number of gradient calls made. warnflag : integer 1 : Maximum number of iterations exceeded. 2 : Gradient and/or function calls not changing. allvecs : list Results at each iteration. Only returned if retall is True. Other Parameters ---------------- maxiter : int Maximum number of iterations to perform. full_output : bool If True,return fopt, func_calls, grad_calls, and warnflag in addition to xopt. disp : bool Print convergence message if True. retall : bool Return a list of results at each iteration if True. Notes ----- Optimize the function, f, whose gradient is given by fprime using the quasi-Newton method of Broyden, Fletcher, Goldfarb, and Shanno (BFGS) See Wright, and Nocedal 'Numerical Optimization', 1999, pg. 198. """ x0 = asarray(x0).squeeze() if x0.ndim == 0: x0.shape = (1,) if maxiter is None: maxiter = len(x0) * 200 func_calls, f = wrap_function(f) if barrier is None: barr_calls, barr = wrap_function(lambda _: 0) else: barr_calls, barr = wrap_function(barrier) if fprime is None: grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon)) else: grad_calls, myfprime = wrap_function(fprime) # debug_here() if barr(x0): print "Optimization started with value violating constraints!" sys.stdout.flush() gfk = myfprime(x0) k = 0 N = len(x0) Hk = numpy.eye(N) old_fval = f(x0) old_old_fval = old_fval + 5000 xk = x0 if retall: allvecs = [x0] sk = [2 * gtol] warnflag = 0 gnorm = vecnorm(gfk, ord=norm) best_x = xk best_f = old_fval best_k = 0 best_g = gfk while (gnorm > gtol) and (k < maxiter): pk = -numpy.dot(Hk, gfk) amax, bamax = backtrack(xk, pk, barr) # scipy.optimize.fmin_bfgs # modified here # and line_searches below! # amax = 50. famax = f(xk + amax * pk) if disp: print "Iter:%d f:%14.10g #b:%d #f:%d" % (k, old_fval, barr_calls[0], func_calls[0]), if barrier is not None: print "barrier%d:%3g f(amax):%3g" % (bamax, amax, famax), method = "" if (bamax == 0) and (famax < old_fval): alpha_k = amax old_fval2 = famax else: alpha_k = None try: alpha_k, fc, gc, old_fval2, old_old_fval2, gfkp1 = line_search_wolfe2( f, myfprime, xk, pk, gfk, old_fval, old_old_fval, amax=amax ) except: if disp: print "Warning: error in line_search_wolfe2.." if alpha_k is not None: method = "wolfe2" else: # line search failed: try different one. alpha_k, fc, gc, old_fval2, old_old_fval2, gfkp1 = line_search_wolfe1( f, myfprime, xk, pk, gfk, old_fval, old_old_fval, amax=amax ) if alpha_k is None: alpha_k, old_fval2 = simple_search(f, xk, pk, amax) if alpha_k is not None: method = "simple" if alpha_k is None: pk = -pk alpha_k, old_fval2 = simple_search(f, xk, pk, amax) if alpha_k is not None: method = "simple2" ## debug_here() # if old_fval>famax and isfinite(famax): # alpha_k = amax # old_fval = famax # gfkp1 = myfprime(xk + amax*pk) # else: # alpha_k = minimum(alpha_k,amax) print if alpha_k is not None: bval = barr(xk + alpha_k * pk) else: bval = 1 if bval: if bamax: warnflag = 2 break if famax < old_fval: alpha_k = amax else: alpha_k, old_fval = simple_search(f, xk, pk, amax) method = "simple3" # if alpha_k is not None: # old_fval= f(xk + alpha_k*pk) # gfkp1 = myfprime(xk + alpha_k*pk) if alpha_k is None: old_fval = f(xk) warnflag = 2 break old_old_fval = old_fval old_fval = old_fval2 xkp1 = xk + alpha_k * pk gfkp1 = myfprime(xk + alpha_k * pk) gnorm = vecnorm(gfkp1, ord=norm) print "gnorm:%4e %s" % (gnorm, method) if callback is not None: callback(xk) if retall: allvecs.append(xkp1) sk = xkp1 - xk xk = xkp1 old_fval = f(xk) if not isfinite(old_fval): pass # debug_here() if gfkp1 is None: gfkp1 = myfprime(xkp1) yk = gfkp1 - gfk gfk = gfkp1 k += 1 if old_fval < best_f: best_x = xk best_f = old_fval best_k = k best_g = gfk if (gnorm <= gtol) or (k > best_k + 10): break try: # this was handled in numeric, let it remaines for more safety rhok = 1.0 / (numpy.dot(yk, sk)) except ZeroDivisionError: rhok = 1000.0 print "Divide-by-zero encountered: rhok assumed large" if isinf(rhok): # this is patch for numpy rhok = 1000.0 print "Divide-by-zero encountered: rhok assumed large" # I = numpy.eye(N,dtype=int) # A1 = I - sk[:,numpy.newaxis] * yk[numpy.newaxis,:] * rhok # A2 = I - yk[:,numpy.newaxis] * sk[numpy.newaxis,:] * rhok # Hk = numpy.dot(A1,numpy.dot(Hk,A2)) + rhok * sk[:,numpy.newaxis] \ # * sk[numpy.newaxis,:] # Same as above with inplace operations Hkyk = numpy.dot(Hk, yk) * rhok numpy.add(Hk, -Hkyk[:, numpy.newaxis] * sk[numpy.newaxis, :], Hk) Hkyk = numpy.dot(Hk.T, yk) * rhok numpy.add(Hk, -sk[:, numpy.newaxis] * Hkyk[numpy.newaxis, :], Hk) numpy.add(Hk, rhok * sk[:, numpy.newaxis] * sk[numpy.newaxis, :], Hk) if disp or full_output: fval = best_f if warnflag == 2: if disp: print "Warning: Desired error not necessarily achieved " "due to precision loss" print " Current function value: %f" % fval print " Iterations: %d" % k print " Function evaluations: %d" % func_calls[0] print " Barrier evaluations: %d" % barr_calls[0] print " Gradient evaluations: %d" % grad_calls[0] elif k >= maxiter: warnflag = 1 if disp: print "Warning: Maximum number of iterations has been exceeded" print " Current function value: %f" % fval print " Iterations: %d" % k print " Function evaluations: %d" % func_calls[0] print " Barrier evaluations: %d" % barr_calls[0] print " Gradient evaluations: %d" % grad_calls[0] else: if disp: print "Optimization terminated successfully." print " Current function value: %f" % fval print " Iterations: %d" % k print " Function evaluations: %d" % func_calls[0] print " Barrier evaluations: %d" % barr_calls[0] print " Gradient evaluations: %d" % grad_calls[0] if full_output: retlist = best_x, fval, best_g, func_calls[0], grad_calls[0], warnflag if retall: retlist += (allvecs,) else: retlist = best_x if retall: retlist = (best_x, allvecs) return retlist
def lbfgs(func, x0, tol=1e-4, max_iter=500, m=10, c1=1e-4, c2=0.9, disp=False, trace=False): t_start = time.time() func_wrapper = FuncWrapper(func) func_f = lambda x: func_wrapper(x)[0] func_g = lambda x: func_wrapper(x)[1] x_min = x0 f_min, g_min = func_f(x_min), func_g(x_min) sy_hist = deque(maxlen=m) n_iter = 0 norm_g = la.norm(g_min, np.inf) list_f = list() list_norm_g = list() list_n_evals = list() list_elaps_t = list() while n_iter < max_iter and norm_g > tol: pre_x = x_min pre_g = g_min direction = lbfgs_compute_dir(sy_hist, g_min) wolfe_answer = line_search_wolfe2(func_f, func_g, x_min, direction, g_min, c1=c1, c2=c2) alpha = wolfe_answer[0] x_min = x_min + alpha * direction f_min, g_min = func_f(x_min), func_g(x_min) norm_g = la.norm(g_min, np.inf) elaps_t = time.time() - t_start s = x_min - pre_x y = g_min - pre_g sy_hist.append(np.array([s, y])) n_iter = n_iter + 1 list_f.append(f_min) list_norm_g.append(norm_g) list_n_evals.append(func_wrapper.n_counter) list_elaps_t.append(elaps_t) if disp: print('%s %3d %s %5f %s %5f %s %3d %s %10f' % ('#:', n_iter, ' f:', f_min, ' norm_g:', norm_g, ' n_evals:', func_wrapper.n_counter, ' elaps_t:', elaps_t)) status = 0 if la.norm(g_min, np.inf) < tol else 1 if trace: hist = { 'f': np.array(list_f), 'norm_g': np.array(list_norm_g), 'n_evals': np.array(list_n_evals), 'elaps_t': np.array(list_elaps_t) } return x_min, f_min, status, hist else: return x_min, f_min, status
def hfn(func, x0, hess_vec, tol=1e-4, max_iter=500, c1=1e-4, c2=0.9, disp=False, trace=False): t_start = time.time() func_wrapper = FuncWrapper(func) func_f = lambda x: func_wrapper(x)[0] func_g = lambda x: func_wrapper(x)[1] x_min = x0 f_min, g_min = func_f(x_min), func_g(x_min) n_iter = 0 norm_g = la.norm(g_min, np.inf) list_f = list() list_norm_g = list() list_n_evals = list() list_elaps_t = list() while n_iter < max_iter and norm_g >= tol: hess_vec_c = lambda v: hess_vec(x_min, v) forcing = min(0.5, norm_g**(0.5)) cg_answer = cg(hess_vec_c, -g_min, x_min, tol=forcing * norm_g, trace=True) direction = cg_answer[0] pre_direction = direction not_direction = np.dot(direction, -g_min) <= 0 while not_direction: forcing = 0.1 * forcing cg_answer = cg(hess_vec_c, -g_min, pre_direction, tol=forcing * norm_g, trace=True) direction = cg_answer[0] not_direction = np.dot(direction, -g_min) <= 0 wolfe_answer = line_search_wolfe2(func_f, func_g, x_min, direction, func_g(x_min), c1=c1, c2=c2) alpha = wolfe_answer[0] x_min = x_min + alpha * direction f_min, g_min = func_f(x_min), func_g(x_min) norm_g = la.norm(g_min, np.inf) elaps_t = time.time() - t_start n_iter = n_iter + 1 list_f.append(f_min) list_norm_g.append(norm_g) list_n_evals.append(func_wrapper.n_counter) list_elaps_t.append(elaps_t) if disp: print('%s %3d %s %5f %s %5f %s %3d %s %8f' % ('#:', n_iter, ' f:', f_min, ' norm_g:', norm_g, ' n_evals:', func_wrapper.n_counter, ' elaps_t:', elaps_t)) status = 0 if norm_g < tol else 1 if trace: hist = { 'f': np.array(list_f), 'norm_g': np.array(list_norm_g), 'n_evals': np.array(list_n_evals), 'elaps_t': np.array(list_elaps_t) } return x_min, f_min, status, hist else: return x_min, f_min, status
def run(self, *a, **kw): status = RUNNING fi = self.f(self.x0) fi_old = fi + 5000 gi, ur, si = self.reset(self.x0, *a, **kw) xi = self.x0 xi_old = numpy.nan it = 0 while it < self.maxiter: if not self.runsignal.is_set(): break if self.f_call.value > self.max_f_eval: status = MAX_F_EVAL gi = -self.df(xi, *a, **kw) if numpy.dot(gi.T, gi) <= self.gtol: status = CONVERGED break if numpy.isnan(numpy.dot(gi.T, gi)): if numpy.any(numpy.isnan(xi_old)): status = CONVERGED break self.reset(xi_old) gammai = ur(gi) if gammai < 1e-6 or it % xi.shape[0] == 0: gi, ur, si = self.reset(xi, *a, **kw) si = gi + gammai * si alphai, _, _, fi2, fi_old2, gfi = line_search_wolfe1( self.f, self.df, xi, si, gi, fi, fi_old) if alphai is None: alphai, _, _, fi2, fi_old2, gfi = \ line_search_wolfe2(self.f, self.df, xi, si, gi, fi, fi_old) if alphai is None: # This line search also failed to find a better solution. status = LINE_SEARCH break if fi2 < fi: fi, fi_old = fi2, fi_old2 if gfi is not None: gi = gfi if numpy.isnan(fi) or fi_old < fi: gi, ur, si = self.reset(xi, *a, **kw) else: xi += numpy.dot(alphai, si) if self.messages: sys.stdout.write("\r") sys.stdout.flush() sys.stdout.write( "iteration: {0:> 6g} f:{1:> 12e} |g|:{2:> 12e}". format(it, fi, numpy.dot(gi.T, gi))) if it % self.report_every == 0: self.callback(xi, fi, gi, it, self.f_call.value, self.df_call.value, status) it += 1 else: status = MAXITER self.callback_return(xi, fi, gi, it, self.f_call.value, self.df_call.value, status) self.result = [ xi, fi, gi, it, self.f_call.value, self.df_call.value, status ]
def run(self, *a, **kw): status = RUNNING fi = self.f(self.x0) fi_old = fi + 5000 gi, ur, si = self.reset(self.x0, *a, **kw) xi = self.x0 xi_old = numpy.nan it = 0 while it < self.maxiter: if not self.runsignal.is_set(): break if self.f_call.value > self.max_f_eval: status = MAX_F_EVAL gi = -self.df(xi, *a, **kw) if numpy.dot(gi.T, gi) <= self.gtol: status = CONVERGED break if numpy.isnan(numpy.dot(gi.T, gi)): if numpy.any(numpy.isnan(xi_old)): status = CONVERGED break self.reset(xi_old) gammai = ur(gi) if gammai < 1e-6 or it % xi.shape[0] == 0: gi, ur, si = self.reset(xi, *a, **kw) si = gi + gammai * si alphai, _, _, fi2, fi_old2, gfi = line_search_wolfe1(self.f, self.df, xi, si, gi, fi, fi_old) if alphai is None: alphai, _, _, fi2, fi_old2, gfi = line_search_wolfe2(self.f, self.df, xi, si, gi, fi, fi_old) if alphai is None: # This line search also failed to find a better solution. status = LINE_SEARCH break if fi2 < fi: fi, fi_old = fi2, fi_old2 if gfi is not None: gi = gfi if numpy.isnan(fi) or fi_old < fi: gi, ur, si = self.reset(xi, *a, **kw) else: xi += numpy.dot(alphai, si) if self.messages: sys.stdout.write("\r") sys.stdout.flush() sys.stdout.write( "iteration: {0:> 6g} f:{1:> 12e} |g|:{2:> 12e}".format(it, fi, numpy.dot(gi.T, gi)) ) if it % self.report_every == 0: self.callback(xi, fi, gi, it, self.f_call.value, self.df_call.value, status) it += 1 else: status = MAXITER self.callback_return(xi, fi, gi, it, self.f_call.value, self.df_call.value, status) self.result = [xi, fi, gi, it, self.f_call.value, self.df_call.value, status]
def _minimize_bfgs(fun, x0, args=(), jac=None, callback=None, tol=1e-5, norm=Inf, eps=_epsilon, maxiter=None, disp=False, return_all=False, **unknown_options): """ Minimization of scalar function of one or more variables using the BFGS algorithm. Options for the BFGS algorithm are: disp : bool Set to True to print convergence messages. maxiter : int Maximum number of iterations to perform. tol : float Cost change must be less than `tol` before succesful termination. norm : float Order of norm (Inf is max, -Inf is min). eps : float or ndarray If `jac` is approximated, use this value for the step size. This function is called by the `minimize` function with `method=BFGS`. It is not supposed to be called directly. """ _check_unknown_options(unknown_options) f = fun fprime = jac epsilon = eps retall = return_all x0 = asarray(x0).flatten() if x0.ndim == 0: x0.shape = (1,) if maxiter is None: maxiter = len(x0)*200 func_calls, f = wrap_function(f, args) if fprime is None: grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon)) else: grad_calls, myfprime = wrap_function(fprime, args) gfk = myfprime(x0) k = 0 N = len(x0) I = numpy.eye(N, dtype=int) Hk = I old_fval = f(x0) old_old_fval = old_fval + 5000 xk = x0 if retall: allvecs = [x0] sk = [2*0.1] warnflag = 0 gnorm = vecnorm(gfk, ord=norm) while (fabs(old_fval - old_old_fval) > tol) and (k < maxiter): pk = -numpy.dot(Hk, gfk) alpha_k, fc, gc, old_fval2, old_old_fval2, gfkp1 = \ line_search_wolfe1(f, myfprime, xk, pk, gfk, old_fval, old_old_fval) if alpha_k is not None: old_fval = old_fval2 old_old_fval = old_old_fval2 else: # line search failed: try different one. alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ line_search_wolfe2(f, myfprime, xk, pk, gfk, old_fval, old_old_fval) if alpha_k is None: # This line search also failed to find a better solution. warnflag = 2 break xkp1 = xk + alpha_k * pk if retall: allvecs.append(xkp1) sk = xkp1 - xk xk = xkp1 if gfkp1 is None: gfkp1 = myfprime(xkp1) yk = gfkp1 - gfk gfk = gfkp1 if callback is not None: callback(xk) k += 1 gnorm = vecnorm(gfk, ord=norm) #if (gnorm <= gtol): # break if not numpy.isfinite(old_fval): # We correctly found +-Inf as optimal value, or something went # wrong. warnflag = 2 break try: #this was handled in numeric, let it remaines for more safety rhok = 1.0 / (numpy.dot(yk, sk)) except ZeroDivisionError: rhok = 1000.0 print("Divide-by-zero encountered: rhok assumed large") if isinf(rhok): #this is patch for numpy rhok = 1000.0 print("Divide-by-zero encountered: rhok assumed large") A1 = I - sk[:, numpy.newaxis] * yk[numpy.newaxis, :] * rhok A2 = I - yk[:, numpy.newaxis] * sk[numpy.newaxis, :] * rhok Hk = numpy.dot(A1, numpy.dot(Hk, A2)) + rhok * sk[:, numpy.newaxis] \ * sk[numpy.newaxis, :] fval = old_fval if warnflag == 2: msg = _status_message['pr_loss'] if disp: print("Warning:", msg) print(" Current function value:",fval) print(" Iterations:", k) print(" Function evaluations:", func_calls[0]) print(" Gradient evaluations:", grad_calls[0]) elif k >= maxiter: warnflag = 1 msg = _status_message['maxiter'] if disp: print("Warning:", msg) print(" Current function value:", fval) print(" Iterations:", k) print(" Function evaluations:", func_calls[0]) print(" Gradient evaluations:", grad_calls[0]) else: msg = _status_message['success'] if disp: print(msg) print(" Current function value:", fval) print(" Iterations:", k) print(" Function evaluations:", func_calls[0]) print(" Gradient evaluations:", grad_calls[0]) result = Result(fun=fval, jac=gfk, hess=Hk, nfev=func_calls[0], njev=grad_calls[0], status=warnflag, success=(warnflag == 0), message=msg, x=xk) if retall: result['allvecs'] = allvecs return result
def hfn(func, x0, hess_vec, tol=1e-5, max_iter=500, c1=1e-4, c2=0.9, disp=False, trace=False): if (trace): hist = {} hist['f'] = [] hist['norm_g'] = [] hist['elaps_t'] = [] start_time = time.clock() f = lambda x: func(x)[0]; df = lambda x: func(x)[1]; x = x0 [loss, grad, extra] = func(x) grad_norm = linalg.norm(grad, inf) eps = min(1 / 2, sqrt(grad_norm)) * grad_norm for i in range(0, max_iter): #Start cg z = zeros(shape(x)) g = grad d = -g u = hess_vec(x, d, extra) for j in range(0,1000): gamma = g.transpose().dot(g)/(d.transpose().dot(u)) z = z + gamma*d g1 = g + gamma*u b = True if linalg.norm(g1,inf)<eps: b = False break else: betta = g1.transpose().dot(g1)/(g.transpose().dot(g)) d = -g1+betta*d u = hess_vec(x,d,extra) g = g1 if b: print('CG не сошелся') #Одномерный линейный поиск alpha = line_search_wolfe2(f = f,myfprime = df, xk = x, pk = z, gfk = grad, old_fval = loss, c1 = c1, c2 = c2) if (alpha[0] == None): alpha = line_search_armijo(f = f,myfprime = df, xk = x, pk = z, gfk = grad, old_fval = loss, c1 = c1, alpha0 = 1) x = x + alpha[0]*z [loss, grad, extra] = func(x) grad_norm = linalg.norm(grad, inf) eps = min(1 / 2, sqrt(grad_norm)) * grad_norm if (disp): print(str(1+i) + ')', loss, grad_norm); if (trace): hist['f'].append(loss) hist['norm_g'].append(grad_norm) current_time = time.clock() - start_time hist['elaps_t'].append(current_time) if grad_norm<tol: return x, loss, 0 return x, loss, 1