def loopThing_BFGS(inputParams): for key in inputParams: exec(key + " = inputParams['" + key + "']") pk = -numpy.dot(Hk, gfk) alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ linesearch.line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # line search failed try different one. alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # This line search also failed to find a better solution. warnflag = 2 outputParams = getOutputParams(inputParams, list(locals().items())) return outputParams xkp1 = xk + alpha_k * pk if retall: allvecs.append(xkp1) sk = xkp1 - xk xk = xkp1 if gfkp1 is None: gfkp1 = myfprime(xkp1) yk = gfkp1 - gfk gfk = gfkp1 if callback is not None: callback(xk) k += 1 gnorm = vecnorm(gfk, ord=norm) if (gnorm <= gtol): outputParams = getOutputParams(inputParams, list(locals().items())) return outputParams try: # this was handled in numeric, let it remaines for more safety rhok = 1.0 / (numpy.dot(yk, sk)) except ZeroDivisionError: rhok = 1000.0 print("Divide-by-zero encountered: rhok assumed large") if numpy.isinf(rhok): # this is patch for numpy rhok = 1000.0 print("Divide-by-zero encountered: rhok assumed large") A1 = I - sk[:, numpy.newaxis] * yk[numpy.newaxis, :] * rhok A2 = I - yk[:, numpy.newaxis] * sk[numpy.newaxis, :] * rhok Hk = numpy.dot(A1,numpy.dot(Hk,A2)) + rhok * sk[:,numpy.newaxis] \ * sk[numpy.newaxis,:] outputParams = getOutputParams(inputParams, list(locals().items())) return outputParams
def loopThing(inputParams): for key in inputParams: exec(key + " = inputParams['" + key + "']") pk = -numpy.dot(Hk,gfk) alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ linesearch.line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # line search failed try different one. alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # This line search also failed to find a better solution. warnflag = 2 outputParams = getOutputParams(inputParams, locals().items()) return outputParams xkp1 = xk + alpha_k * pk if retall: allvecs.append(xkp1) sk = xkp1 - xk xk = xkp1 if gfkp1 is None: gfkp1 = myfprime(xkp1) yk = gfkp1 - gfk gfk = gfkp1 if callback is not None: callback(xk) k += 1 gnorm = vecnorm(gfk,ord=norm) if (gnorm <= gtol): outputParams = getOutputParams(inputParams, locals().items()) return outputParams try: # this was handled in numeric, let it remaines for more safety rhok = 1.0 / (numpy.dot(yk,sk)) except ZeroDivisionError: rhok = 1000.0 print "Divide-by-zero encountered: rhok assumed large" if numpy.isinf(rhok): # this is patch for numpy rhok = 1000.0 print "Divide-by-zero encountered: rhok assumed large" A1 = I - sk[:,numpy.newaxis] * yk[numpy.newaxis,:] * rhok A2 = I - yk[:,numpy.newaxis] * sk[numpy.newaxis,:] * rhok Hk = numpy.dot(A1,numpy.dot(Hk,A2)) + rhok * sk[:,numpy.newaxis] \ * sk[numpy.newaxis,:] outputParams = getOutputParams(inputParams, locals().items()) return outputParams
def line_search(f, myfprime, xk, pk, gfk, old_fval, old_old_fval, args=(), c1=1e-4, c2=0.9, amax=50): try: #XXX: break dependency on scipy.optimize.linesearch from scipy.optimize import linesearch alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ linesearch.line_search(f,myfprime,xk,pk,gfk,\ old_fval,old_old_fval,args,c1,c2,amax) except ImportError: alpha_k = None fc = 0 gc = 0 gfkp1 = gfk #XXX: or None ? return alpha_k, fc, gc, old_fval, old_old_fval, gfkp1
def my_fmin_bfgs(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf, epsilon=_epsilon, maxiter=None, full_output=0, disp=1, retall=0, callback=None): """Minimize a function using the BFGS algorithm. :Parameters: f : callable f(x,*args) Objective function to be minimized. x0 : ndarray Initial guess. fprime : callable f'(x,*args) Gradient of f. args : tuple Extra arguments passed to f and fprime. gtol : float Gradient norm must be less than gtol before succesful termination. norm : float Order of norm (Inf is max, -Inf is min) epsilon : int or ndarray If fprime is approximated, use this value for the step size. callback : callable An optional user-supplied function to call after each iteration. Called as callback(xk), where xk is the current parameter vector. :Returns: (xopt, {fopt, gopt, Hopt, func_calls, grad_calls, warnflag}, <allvecs>) xopt : ndarray Parameters which minimize f, i.e. f(xopt) == fopt. fopt : float Minimum value. gopt : ndarray Value of gradient at minimum, f'(xopt), which should be near 0. Bopt : ndarray Value of 1/f''(xopt), i.e. the inverse hessian matrix. func_calls : int Number of function_calls made. grad_calls : int Number of gradient calls made. warnflag : integer 1 : Maximum number of iterations exceeded. 2 : Gradient and/or function calls not changing. allvecs : list Results at each iteration. Only returned if retall is True. *Other Parameters*: maxiter : int Maximum number of iterations to perform. full_output : bool If True,return fopt, func_calls, grad_calls, and warnflag in addition to xopt. disp : bool Print convergence message if True. retall : bool Return a list of results at each iteration if True. :Notes: Optimize the function, f, whose gradient is given by fprime using the quasi-Newton method of Broyden, Fletcher, Goldfarb, and Shanno (BFGS) See Wright, and Nocedal 'Numerical Optimization', 1999, pg. 198. *See Also*: scikits.openopt : SciKit which offers a unified syntax to call this and other solvers. """ x0 = asarray(x0).squeeze() if x0.ndim == 0: x0.shape = (1,) if maxiter is None: maxiter = len(x0)*200 func_calls, f = wrap_function(f, args) if fprime is None: grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon)) else: grad_calls, myfprime = wrap_function(fprime, args) print "Evaluating initial gradient ..." gfk = myfprime(x0) k = 0 N = len(x0) I = numpy.eye(N,dtype=int) Hk = I print "Evaluating initial function value ..." fval = f(x0) old_fval = fval + 5000 xk = x0 if retall: allvecs = [x0] sk = [2*gtol] warnflag = 0 gnorm = vecnorm(gfk,ord=norm) print "gtol = %g" % gtol print "gnorm = %g" % gnorm while (gnorm > gtol) and (k < maxiter): pk = -numpy.dot(Hk,gfk) print "xk =", xk print "pk =", pk print "Begin iteration %d line search..." % (k + 1) # print " gfk =", gfk # print " Hk = \n", Hk # do line search for alpha_k old_old_fval = old_fval old_fval = fval alpha_k, fc, gc, fval, old_fval, gfkp1 = \ linesearch.line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # line search failed try different one. print "Begin line search (method 2) ..." alpha_k, fc, gc, fval, old_fval, gfkp1 = \ line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # This line search also failed to find a better solution. print "Line search failed!" warnflag = 2 break print "End line search, alpha = %g ..." % alpha_k xkp1 = xk + alpha_k * pk if retall: allvecs.append(xkp1) sk = xkp1 - xk xk = xkp1 if gfkp1 is None: gfkp1 = myfprime(xkp1) yk = gfkp1 - gfk gfk = gfkp1 if callback is not None: callback(xk) k += 1 gnorm = vecnorm(gfk,ord=norm) print "gnorm = %g" % gnorm if (k >= maxiter or gnorm <= gtol): break # Reset the initial quasi-Newton matrix to a scaled identity aimed # at reflecting the size of the inverse true Hessian deltaXDeltaGrad = numpy.dot(sk, yk); updateOk = deltaXDeltaGrad >= _epsilon * max(_epsilonSq, \ vecnorm(sk,ord=2) * vecnorm(yk, ord=2)) if k == 1 and updateOk: Hk = deltaXDeltaGrad / numpy.dot(yk,yk) * numpy.eye(N); print "Hscaled =\n", Hk try: # this was handled in numeric, let it remain for more safety rhok = 1.0 / (numpy.dot(yk,sk)) except ZeroDivisionError: rhok = 1000.0 print "Divide-by-zero encountered: rhok assumed large" if isinf(rhok): # this is patch for numpy rhok = 1000.0 print "Divide-by-zero encountered: rhok assumed large" A1 = I - sk[:,numpy.newaxis] * yk[numpy.newaxis,:] * rhok A2 = I - yk[:,numpy.newaxis] * sk[numpy.newaxis,:] * rhok Hk = numpy.dot(A1,numpy.dot(Hk,A2)) + rhok * sk[:,numpy.newaxis] \ * sk[numpy.newaxis,:] if gnorm > gtol: warnflag = 1 if disp: if warnflag == 1: print "Warning: Maximum number of iterations has been exceeded" elif warnflag == 2: print "Warning: Desired error not necessarily achieved" \ "due to precision loss" else: print "Optimization terminated successfully." print " Current function value: %g" % fval print " Current gradient norm : %g" % gnorm print " Gradient tolerance : %g" % gtol print " Iterations: %d" % k print " Function evaluations: %d" % func_calls[0] print " Gradient evaluations: %d" % grad_calls[0] if full_output: retlist = xk, fval, gfk, Hk, func_calls[0], grad_calls[0], warnflag if retall: retlist += (allvecs,) else: retlist = xk if retall: retlist = (xk, allvecs) return retlist
def my_fmin_bfgs(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf, epsilon=_epsilon, maxiter=None, full_output=0, disp=1, retall=0, callback=None): """Minimize a function using the BFGS algorithm. :Parameters: f : callable f(x,*args) Objective function to be minimized. x0 : ndarray Initial guess. fprime : callable f'(x,*args) Gradient of f. args : tuple Extra arguments passed to f and fprime. gtol : float Gradient norm must be less than gtol before succesful termination. norm : float Order of norm (Inf is max, -Inf is min) epsilon : int or ndarray If fprime is approximated, use this value for the step size. callback : callable An optional user-supplied function to call after each iteration. Called as callback(xk), where xk is the current parameter vector. :Returns: (xopt, {fopt, gopt, Hopt, func_calls, grad_calls, warnflag}, <allvecs>) xopt : ndarray Parameters which minimize f, i.e. f(xopt) == fopt. fopt : float Minimum value. gopt : ndarray Value of gradient at minimum, f'(xopt), which should be near 0. Bopt : ndarray Value of 1/f''(xopt), i.e. the inverse hessian matrix. func_calls : int Number of function_calls made. grad_calls : int Number of gradient calls made. warnflag : integer 1 : Maximum number of iterations exceeded. 2 : Gradient and/or function calls not changing. allvecs : list Results at each iteration. Only returned if retall is True. *Other Parameters*: maxiter : int Maximum number of iterations to perform. full_output : bool If True,return fopt, func_calls, grad_calls, and warnflag in addition to xopt. disp : bool Print convergence message if True. retall : bool Return a list of results at each iteration if True. :Notes: Optimize the function, f, whose gradient is given by fprime using the quasi-Newton method of Broyden, Fletcher, Goldfarb, and Shanno (BFGS) See Wright, and Nocedal 'Numerical Optimization', 1999, pg. 198. *See Also*: scikits.openopt : SciKit which offers a unified syntax to call this and other solvers. """ x0 = asarray(x0).squeeze() if x0.ndim == 0: x0.shape = (1, ) if maxiter is None: maxiter = len(x0) * 200 func_calls, f = wrap_function(f, args) if fprime is None: grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon)) else: grad_calls, myfprime = wrap_function(fprime, args) print "Evaluating initial gradient ..." gfk = myfprime(x0) k = 0 N = len(x0) I = numpy.eye(N, dtype=int) Hk = I print "Evaluating initial function value ..." fval = f(x0) old_fval = fval + 5000 xk = x0 if retall: allvecs = [x0] sk = [2 * gtol] warnflag = 0 gnorm = vecnorm(gfk, ord=norm) print "gtol = %g" % gtol print "gnorm = %g" % gnorm while (gnorm > gtol) and (k < maxiter): pk = -numpy.dot(Hk, gfk) print "xk =", xk print "pk =", pk print "Begin iteration %d line search..." % (k + 1) # print " gfk =", gfk # print " Hk = \n", Hk # do line search for alpha_k old_old_fval = old_fval old_fval = fval alpha_k, fc, gc, fval, old_fval, gfkp1 = \ linesearch.line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # line search failed try different one. print "Begin line search (method 2) ..." alpha_k, fc, gc, fval, old_fval, gfkp1 = \ line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # This line search also failed to find a better solution. print "Line search failed!" warnflag = 2 break print "End line search, alpha = %g ..." % alpha_k xkp1 = xk + alpha_k * pk if retall: allvecs.append(xkp1) sk = xkp1 - xk xk = xkp1 if gfkp1 is None: gfkp1 = myfprime(xkp1) yk = gfkp1 - gfk gfk = gfkp1 if callback is not None: callback(xk) k += 1 gnorm = vecnorm(gfk, ord=norm) print "gnorm = %g" % gnorm if (k >= maxiter or gnorm <= gtol): break # Reset the initial quasi-Newton matrix to a scaled identity aimed # at reflecting the size of the inverse true Hessian deltaXDeltaGrad = numpy.dot(sk, yk) updateOk = deltaXDeltaGrad >= _epsilon * max(_epsilonSq, \ vecnorm(sk,ord=2) * vecnorm(yk, ord=2)) if k == 1 and updateOk: Hk = deltaXDeltaGrad / numpy.dot(yk, yk) * numpy.eye(N) print "Hscaled =\n", Hk try: # this was handled in numeric, let it remain for more safety rhok = 1.0 / (numpy.dot(yk, sk)) except ZeroDivisionError: rhok = 1000.0 print "Divide-by-zero encountered: rhok assumed large" if isinf(rhok): # this is patch for numpy rhok = 1000.0 print "Divide-by-zero encountered: rhok assumed large" A1 = I - sk[:, numpy.newaxis] * yk[numpy.newaxis, :] * rhok A2 = I - yk[:, numpy.newaxis] * sk[numpy.newaxis, :] * rhok Hk = numpy.dot(A1,numpy.dot(Hk,A2)) + rhok * sk[:,numpy.newaxis] \ * sk[numpy.newaxis,:] if gnorm > gtol: warnflag = 1 if disp: if warnflag == 1: print "Warning: Maximum number of iterations has been exceeded" elif warnflag == 2: print "Warning: Desired error not necessarily achieved" \ "due to precision loss" else: print "Optimization terminated successfully." print " Current function value: %g" % fval print " Current gradient norm : %g" % gnorm print " Gradient tolerance : %g" % gtol print " Iterations: %d" % k print " Function evaluations: %d" % func_calls[0] print " Gradient evaluations: %d" % grad_calls[0] if full_output: retlist = xk, fval, gfk, Hk, func_calls[0], grad_calls[0], warnflag if retall: retlist += (allvecs, ) else: retlist = xk if retall: retlist = (xk, allvecs) return retlist
def __call__( self, x0, conf = None, obj_fun = None, obj_fun_grad = None, status = None, obj_args = None ): # def fmin_sd( conf, x0, fn_of, fn_ofg, args = () ): conf = get_default( conf, self.conf ) obj_fun = get_default( obj_fun, self.obj_fun ) obj_fun_grad = get_default( obj_fun_grad, self.obj_fun_grad ) status = get_default( status, self.status ) obj_args = get_default( obj_args, self.obj_args ) if conf.output: globals()['output'] = conf.output output( 'entering optimization loop...' ) nc_of, tt_of, fn_of = wrap_function( obj_fun, obj_args ) nc_ofg, tt_ofg, fn_ofg = wrap_function( obj_fun_grad, obj_args ) time_stats = {'of' : tt_of, 'ofg': tt_ofg, 'check' : []} ofg = None it = 0 xit = x0.copy() while 1: of = fn_of( xit ) if it == 0: of0 = ofit0 = of_prev = of of_prev_prev = of + 5000.0 if ofg is None: ofg = fn_ofg( xit ) if conf.check: tt = time.clock() check_gradient( xit, ofg, fn_of, conf.delta, conf.check ) time_stats['check'].append( time.clock() - tt ) ofg_norm = nla.norm( ofg, conf.norm ) ret = conv_test( conf, it, of, ofit0, ofg_norm ) if ret >= 0: break ofit0 = of ## # Backtrack (on errors). alpha = conf.ls0 can_ls = True while 1: xit2 = xit - alpha * ofg aux = fn_of( xit2 ) if self.log is not None: self.log(of, ofg_norm, alpha, it) if aux is None: alpha *= conf.ls_red_warp can_ls = False output( 'warp: reducing step (%f)' % alpha ) elif conf.ls and conf.ls_method == 'backtracking': if aux < of * conf.ls_on: break alpha *= conf.ls_red output( 'backtracking: reducing step (%f)' % alpha ) else: of_prev_prev = of_prev of_prev = aux break if alpha < conf.ls_min: if aux is None: raise RuntimeError, 'giving up...' output( 'linesearch failed, continuing anyway' ) break # These values are modified by the line search, even if it fails of_prev_bak = of_prev of_prev_prev_bak = of_prev_prev if conf.ls and can_ls and conf.ls_method == 'full': output( 'full linesearch...' ) alpha, fc, gc, of_prev, of_prev_prev, ofg1 = \ linesearch.line_search(fn_of,fn_ofg,xit, -ofg,ofg,of_prev,of_prev_prev, c2=0.4) if alpha is None: # line search failed -- use different one. alpha, fc, gc, of_prev, of_prev_prev, ofg1 = \ sopt.line_search(fn_of,fn_ofg,xit, -ofg,ofg,of_prev_bak, of_prev_prev_bak) if alpha is None or alpha == 0: # This line search also failed to find a better solution. ret = 3 break output( ' -> alpha: %.8e' % alpha ) else: if conf.ls_method == 'full': output( 'full linesearch off (%s and %s)' % (conf.ls, can_ls) ) ofg1 = None if self.log is not None: self.log.plot_vlines(color='g', linewidth=0.5) xit = xit - alpha * ofg if ofg1 is None: ofg = None else: ofg = ofg1.copy() for key, val in time_stats.iteritems(): if len( val ): output( '%10s: %7.2f [s]' % (key, val[-1]) ) it = it + 1 output( 'status: %d' % ret ) output( 'initial value: %.8e' % of0 ) output( 'current value: %.8e' % of ) output( 'iterations: %d' % it ) output( 'function evaluations: %d in %.2f [s]' \ % (nc_of[0], nm.sum( time_stats['of'] ) ) ) output( 'gradient evaluations: %d in %.2f [s]' \ % (nc_ofg[0], nm.sum( time_stats['ofg'] ) ) ) if self.log is not None: self.log(of, ofg_norm, alpha, it) if conf.log.plot is not None: self.log(save_figure=conf.log.plot, finished=True) else: self.log(finished=True) if status is not None: status['log'] = self.log status['status'] = status status['of0'] = of0 status['of'] = of status['it'] = it status['nc_of'] = nc_of[0] status['nc_ofg'] = nc_ofg[0] status['time_stats'] = time_stats return xit
def __call__(self, x0, conf=None, obj_fun=None, obj_fun_grad=None, status=None, obj_args=None): conf = get_default(conf, self.conf) obj_fun = get_default(obj_fun, self.obj_fun) obj_fun_grad = get_default(obj_fun_grad, self.obj_fun_grad) status = get_default(status, self.status) obj_args = get_default(obj_args, self.obj_args) if conf.output: globals()['output'] = conf.output output('entering optimization loop...') nc_of, tt_of, fn_of = wrap_function(obj_fun, obj_args) nc_ofg, tt_ofg, fn_ofg = wrap_function(obj_fun_grad, obj_args) time_stats = {'of': tt_of, 'ofg': tt_ofg, 'check': []} ofg = None it = 0 xit = x0.copy() while 1: of = fn_of(xit) if it == 0: of0 = ofit0 = of_prev = of of_prev_prev = of + 5000.0 if ofg is None: ofg = fn_ofg(xit) if conf.check: tt = time.clock() check_gradient(xit, ofg, fn_of, conf.delta, conf.check) time_stats['check'].append(time.clock() - tt) ofg_norm = nla.norm(ofg, conf.norm) ret = conv_test(conf, it, of, ofit0, ofg_norm) if ret >= 0: break ofit0 = of ## # Backtrack (on errors). alpha = conf.ls0 can_ls = True while 1: xit2 = xit - alpha * ofg aux = fn_of(xit2) if self.log is not None: self.log(of, ofg_norm, alpha, it) if aux is None: alpha *= conf.ls_red_warp can_ls = False output('warp: reducing step (%f)' % alpha) elif conf.ls and conf.ls_method == 'backtracking': if aux < of * conf.ls_on: break alpha *= conf.ls_red output('backtracking: reducing step (%f)' % alpha) else: of_prev_prev = of_prev of_prev = aux break if alpha < conf.ls_min: if aux is None: raise RuntimeError, 'giving up...' output('linesearch failed, continuing anyway') break # These values are modified by the line search, even if it fails of_prev_bak = of_prev of_prev_prev_bak = of_prev_prev if conf.ls and can_ls and conf.ls_method == 'full': output('full linesearch...') alpha, fc, gc, of_prev, of_prev_prev, ofg1 = \ linesearch.line_search(fn_of,fn_ofg,xit, -ofg,ofg,of_prev,of_prev_prev, c2=0.4) if alpha is None: # line search failed -- use different one. alpha, fc, gc, of_prev, of_prev_prev, ofg1 = \ sopt.line_search(fn_of,fn_ofg,xit, -ofg,ofg,of_prev_bak, of_prev_prev_bak) if alpha is None or alpha == 0: # This line search also failed to find a better # solution. ret = 3 break output(' -> alpha: %.8e' % alpha) else: if conf.ls_method == 'full': output('full linesearch off (%s and %s)' % (conf.ls, can_ls)) ofg1 = None if self.log is not None: self.log.plot_vlines(color='g', linewidth=0.5) xit = xit - alpha * ofg if ofg1 is None: ofg = None else: ofg = ofg1.copy() for key, val in time_stats.iteritems(): if len(val): output('%10s: %7.2f [s]' % (key, val[-1])) it = it + 1 output('status: %d' % ret) output('initial value: %.8e' % of0) output('current value: %.8e' % of) output('iterations: %d' % it) output('function evaluations: %d in %.2f [s]' % (nc_of[0], nm.sum(time_stats['of']))) output('gradient evaluations: %d in %.2f [s]' % (nc_ofg[0], nm.sum(time_stats['ofg']))) if self.log is not None: self.log(of, ofg_norm, alpha, it) if conf.log.plot is not None: self.log(save_figure=conf.log.plot, finished=True) else: self.log(finished=True) if status is not None: status['log'] = self.log status['status'] = status status['of0'] = of0 status['of'] = of status['it'] = it status['nc_of'] = nc_of[0] status['nc_ofg'] = nc_ofg[0] status['time_stats'] = time_stats return xit
def my_fmin_bfgs(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf, epsilon=_epsilon, maxiter=None, full_output=0, disp=1, retall=0, callback=None): """Minimize a function using the BFGS algorithm. :Parameters: f : the Python function or method to be minimized. x0 : ndarray the initial guess for the minimizer. fprime : a function to compute the gradient of f. args : extra arguments to f and fprime. gtol : number gradient norm must be less than gtol before succesful termination norm : number order of norm (Inf is max, -Inf is min) epsilon : number if fprime is approximated use this value for the step size (can be scalar or vector) callback : an optional user-supplied function to call after each iteration. It is called as callback(xk), where xk is the current parameter vector. :Returns: (xopt, {fopt, gopt, Hopt, func_calls, grad_calls, warnflag}, <allvecs>) xopt : ndarray the minimizer of f. fopt : number the value of f(xopt). gopt : ndarray the value of f'(xopt). (Should be near 0) Bopt : ndarray the value of 1/f''(xopt). (inverse hessian matrix) func_calls : number the number of function_calls. grad_calls : number the number of gradient calls. warnflag : integer 1 : 'Maximum number of iterations exceeded.' 2 : 'Gradient and/or function calls not changing' allvecs : a list of all iterates (only returned if retall==1) :OtherParameters: maxiter : number the maximum number of iterations. full_output : number if non-zero then return fopt, func_calls, grad_calls, and warnflag in addition to xopt. disp : number print convergence message if non-zero. retall : number return a list of results at each iteration if non-zero :SeeAlso: fmin, fmin_powell, fmin_cg, fmin_bfgs, fmin_ncg -- multivariate local optimizers leastsq -- nonlinear least squares minimizer fmin_l_bfgs_b, fmin_tnc, fmin_cobyla -- constrained multivariate optimizers anneal, brute -- global optimizers fminbound, brent, golden, bracket -- local scalar minimizers fsolve -- n-dimenstional root-finding brentq, brenth, ridder, bisect, newton -- one-dimensional root-finding fixed_point -- scalar fixed-point finder Notes ---------------------------------- Optimize the function, f, whose gradient is given by fprime using the quasi-Newton method of Broyden, Fletcher, Goldfarb, and Shanno (BFGS) See Wright, and Nocedal 'Numerical Optimization', 1999, pg. 198. """ import numpy import scipy.optimize.linesearch as linesearch x0 = asarray(x0).squeeze() if x0.ndim == 0: x0.shape = (1,) if maxiter is None: maxiter = len(x0)*200 func_calls, f = wrap_function(f, args) if fprime is None: grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon)) else: grad_calls, myfprime = wrap_function(fprime, args) gfk = myfprime(x0) k = 0 N = len(x0) I = numpy.eye(N,dtype=int) Hk = I old_fval = f(x0) old_old_fval = old_fval + 5000 xk = x0 if retall: allvecs = [x0] sk = [2*gtol] warnflag = 0 gnorm = vecnorm(gfk,ord=norm) while (gnorm > gtol) and (k < maxiter): pk = -numpy.dot(Hk,gfk) if False: alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ linesearch.line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # line search failed try different one. alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # This line search also failed to find a better solution. warnflag = 2 break else: alpha_k = 0.1 lg.debug("alpha = {0}".format(alpha_k)) xkp1 = xk + alpha_k * pk #0.3 added by hcm print "--------------------------------\npk =", pk dump_mat(Hk) if retall: allvecs.append(xkp1) sk = xkp1 - xk xk = xkp1 #if gfkp1 is None: gfkp1 = myfprime(xkp1) yk = gfkp1 - gfk gfk = gfkp1 if callback is not None: #callback(xk) xk = callback(xk) # changed to the following line by hcm k += 1 gnorm = vecnorm(gfk,ord=norm) if (gnorm <= gtol): break try: # this was handled in numeric, let it remaines for more safety rhok = 1.0 / (numpy.dot(yk,sk)) except ZeroDivisionError: rhok = 1000.0 lg.debug("Divide-by-zero encountered: rhok assumed large") if isinf(rhok): # this is patch for numpy rhok = 1000.0 lg.debug("Divide-by-zero encountered: rhok assumed large") A1 = I - sk[:,numpy.newaxis] * yk[numpy.newaxis,:] * rhok A2 = I - yk[:,numpy.newaxis] * sk[numpy.newaxis,:] * rhok Hk = numpy.dot(A1,numpy.dot(Hk,A2)) + rhok * sk[:,numpy.newaxis] \ * sk[numpy.newaxis,:] if disp or full_output: fval = old_fval if warnflag == 2: if disp: print "Warning: Desired error not necessarily achieved due to precision loss" print " Current function value: %f" % fval print " Iterations: %d" % k print " Function evaluations: %d" % func_calls[0] print " Gradient evaluations: %d" % grad_calls[0] elif k >= maxiter: warnflag = 1 if disp: print "Warning: Maximum number of iterations has been exceeded" print " Current function value: %f" % fval print " Iterations: %d" % k print " Function evaluations: %d" % func_calls[0] print " Gradient evaluations: %d" % grad_calls[0] else: if disp: print "Optimization terminated successfully." print " Current function value: %f" % fval print " Iterations: %d" % k print " Function evaluations: %d" % func_calls[0] print " Gradient evaluations: %d" % grad_calls[0] if full_output: retlist = xk, fval, gfk, Hk, func_calls[0], grad_calls[0], warnflag if retall: retlist += (allvecs,) else: retlist = xk if retall: retlist = (xk, allvecs) return retlist
def Customfmin_bfgs(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf, epsilon= numpy.sqrt(numpy.finfo(float).eps), maxiter=None, full_output=0, disp=1, retall=0, callback=None): testVar = 0 x0 = asarray(x0).squeeze() if x0.ndim == 0: x0.shape = (1,) if maxiter is None: maxiter = len(x0)*200 func_calls, f = wrap_function(f, args) if fprime is None: grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon)) else: grad_calls, myfprime = wrap_function(fprime, args) gfk = myfprime(x0) k = 0 N = len(x0) I = numpy.eye(N,dtype=int) Hk = I old_fval = f(x0) old_old_fval = old_fval + 5000 xk = x0 if retall: allvecs = [x0] sk = [2*gtol] warnflag = 0 gnorm = vecnorm(gfk,ord=norm) while (gnorm > gtol) and (k < maxiter): pk = -numpy.dot(Hk,gfk) alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ linesearch.line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # line search failed try different one. alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # This line search also failed to find a better solution. warnflag = 2 break xkp1 = xk + alpha_k * pk if retall: allvecs.append(xkp1) sk = xkp1 - xk xk = xkp1 if gfkp1 is None: gfkp1 = myfprime(xkp1) yk = gfkp1 - gfk gfk = gfkp1 if callback is not None: callback(xk) k += 1 gnorm = vecnorm(gfk,ord=norm) if (gnorm <= gtol): break try: # this was handled in numeric, let it remaines for more safety rhok = 1.0 / (numpy.dot(yk,sk)) except ZeroDivisionError: rhok = 1000.0 print "Divide-by-zero encountered: rhok assumed large" if numpy.isinf(rhok): # this is patch for numpy rhok = 1000.0 print "Divide-by-zero encountered: rhok assumed large" A1 = I - sk[:,numpy.newaxis] * yk[numpy.newaxis,:] * rhok A2 = I - yk[:,numpy.newaxis] * sk[numpy.newaxis,:] * rhok Hk = numpy.dot(A1,numpy.dot(Hk,A2)) + rhok * sk[:,numpy.newaxis] \ * sk[numpy.newaxis,:] if disp or full_output: fval = old_fval if warnflag == 2: if disp: print "Warning: Desired error not necessarily achieved" \ "due to precision loss" print " Current function value: %f" % fval print " Iterations: %d" % k print " Function evaluations: %d" % func_calls[0] print " Gradient evaluations: %d" % grad_calls[0] elif k >= maxiter: warnflag = 1 if disp: print "Warning: Maximum number of iterations has been exceeded" print " Current function value: %f" % fval print " Iterations: %d" % k print " Function evaluations: %d" % func_calls[0] print " Gradient evaluations: %d" % grad_calls[0] else: if disp: print "Optimization terminated successfully." print " Current function value: %f" % fval print " Iterations: %d" % k print " Function evaluations: %d" % func_calls[0] print " Gradient evaluations: %d" % grad_calls[0] if full_output: retlist = xk, fval, gfk, Hk, func_calls[0], grad_calls[0], warnflag if retall: retlist += (allvecs,) else: retlist = xk if retall: retlist = (xk, allvecs) return retlist
def my_fmin_bfgs(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf, epsilon=_epsilon, maxiter=None, full_output=0, disp=1, retall=0, callback=None): """Minimize a function using the BFGS algorithm. :Parameters: f : the Python function or method to be minimized. x0 : ndarray the initial guess for the minimizer. fprime : a function to compute the gradient of f. args : extra arguments to f and fprime. gtol : number gradient norm must be less than gtol before succesful termination norm : number order of norm (Inf is max, -Inf is min) epsilon : number if fprime is approximated use this value for the step size (can be scalar or vector) callback : an optional user-supplied function to call after each iteration. It is called as callback(xk), where xk is the current parameter vector. :Returns: (xopt, {fopt, gopt, Hopt, func_calls, grad_calls, warnflag}, <allvecs>) xopt : ndarray the minimizer of f. fopt : number the value of f(xopt). gopt : ndarray the value of f'(xopt). (Should be near 0) Bopt : ndarray the value of 1/f''(xopt). (inverse hessian matrix) func_calls : number the number of function_calls. grad_calls : number the number of gradient calls. warnflag : integer 1 : 'Maximum number of iterations exceeded.' 2 : 'Gradient and/or function calls not changing' allvecs : a list of all iterates (only returned if retall==1) :OtherParameters: maxiter : number the maximum number of iterations. full_output : number if non-zero then return fopt, func_calls, grad_calls, and warnflag in addition to xopt. disp : number print convergence message if non-zero. retall : number return a list of results at each iteration if non-zero :SeeAlso: fmin, fmin_powell, fmin_cg, fmin_bfgs, fmin_ncg -- multivariate local optimizers leastsq -- nonlinear least squares minimizer fmin_l_bfgs_b, fmin_tnc, fmin_cobyla -- constrained multivariate optimizers anneal, brute -- global optimizers fminbound, brent, golden, bracket -- local scalar minimizers fsolve -- n-dimenstional root-finding brentq, brenth, ridder, bisect, newton -- one-dimensional root-finding fixed_point -- scalar fixed-point finder Notes ---------------------------------- Optimize the function, f, whose gradient is given by fprime using the quasi-Newton method of Broyden, Fletcher, Goldfarb, and Shanno (BFGS) See Wright, and Nocedal 'Numerical Optimization', 1999, pg. 198. """ import numpy import scipy.optimize.linesearch as linesearch x0 = asarray(x0).squeeze() if x0.ndim == 0: x0.shape = (1, ) if maxiter is None: maxiter = len(x0) * 200 func_calls, f = wrap_function(f, args) if fprime is None: grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon)) else: grad_calls, myfprime = wrap_function(fprime, args) gfk = myfprime(x0) k = 0 N = len(x0) I = numpy.eye(N, dtype=int) Hk = I old_fval = f(x0) old_old_fval = old_fval + 5000 xk = x0 if retall: allvecs = [x0] sk = [2 * gtol] warnflag = 0 gnorm = vecnorm(gfk, ord=norm) while (gnorm > gtol) and (k < maxiter): pk = -numpy.dot(Hk, gfk) if False: alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ linesearch.line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # line search failed try different one. alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # This line search also failed to find a better solution. warnflag = 2 break else: alpha_k = 0.1 lg.debug("alpha = {0}".format(alpha_k)) xkp1 = xk + alpha_k * pk #0.3 added by hcm print "--------------------------------\npk =", pk dump_mat(Hk) if retall: allvecs.append(xkp1) sk = xkp1 - xk xk = xkp1 #if gfkp1 is None: gfkp1 = myfprime(xkp1) yk = gfkp1 - gfk gfk = gfkp1 if callback is not None: #callback(xk) xk = callback(xk) # changed to the following line by hcm k += 1 gnorm = vecnorm(gfk, ord=norm) if (gnorm <= gtol): break try: # this was handled in numeric, let it remaines for more safety rhok = 1.0 / (numpy.dot(yk, sk)) except ZeroDivisionError: rhok = 1000.0 lg.debug("Divide-by-zero encountered: rhok assumed large") if isinf(rhok): # this is patch for numpy rhok = 1000.0 lg.debug("Divide-by-zero encountered: rhok assumed large") A1 = I - sk[:, numpy.newaxis] * yk[numpy.newaxis, :] * rhok A2 = I - yk[:, numpy.newaxis] * sk[numpy.newaxis, :] * rhok Hk = numpy.dot(A1,numpy.dot(Hk,A2)) + rhok * sk[:,numpy.newaxis] \ * sk[numpy.newaxis,:] if disp or full_output: fval = old_fval if warnflag == 2: if disp: print "Warning: Desired error not necessarily achieved due to precision loss" print " Current function value: %f" % fval print " Iterations: %d" % k print " Function evaluations: %d" % func_calls[0] print " Gradient evaluations: %d" % grad_calls[0] elif k >= maxiter: warnflag = 1 if disp: print "Warning: Maximum number of iterations has been exceeded" print " Current function value: %f" % fval print " Iterations: %d" % k print " Function evaluations: %d" % func_calls[0] print " Gradient evaluations: %d" % grad_calls[0] else: if disp: print "Optimization terminated successfully." print " Current function value: %f" % fval print " Iterations: %d" % k print " Function evaluations: %d" % func_calls[0] print " Gradient evaluations: %d" % grad_calls[0] if full_output: retlist = xk, fval, gfk, Hk, func_calls[0], grad_calls[0], warnflag if retall: retlist += (allvecs, ) else: retlist = xk if retall: retlist = (xk, allvecs) return retlist
def Customfmin_bfgs(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf, epsilon=numpy.sqrt(numpy.finfo(float).eps), maxiter=None, full_output=0, disp=1, retall=0, callback=None): testVar = 0 x0 = asarray(x0).squeeze() if x0.ndim == 0: x0.shape = (1, ) if maxiter is None: maxiter = len(x0) * 200 func_calls, f = wrap_function(f, args) if fprime is None: grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon)) else: grad_calls, myfprime = wrap_function(fprime, args) gfk = myfprime(x0) k = 0 N = len(x0) I = numpy.eye(N, dtype=int) Hk = I old_fval = f(x0) old_old_fval = old_fval + 5000 xk = x0 if retall: allvecs = [x0] sk = [2 * gtol] warnflag = 0 gnorm = vecnorm(gfk, ord=norm) while (gnorm > gtol) and (k < maxiter): pk = -numpy.dot(Hk, gfk) alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ linesearch.line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # line search failed try different one. alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # This line search also failed to find a better solution. warnflag = 2 break xkp1 = xk + alpha_k * pk if retall: allvecs.append(xkp1) sk = xkp1 - xk xk = xkp1 if gfkp1 is None: gfkp1 = myfprime(xkp1) yk = gfkp1 - gfk gfk = gfkp1 if callback is not None: callback(xk) k += 1 gnorm = vecnorm(gfk, ord=norm) if (gnorm <= gtol): break try: # this was handled in numeric, let it remaines for more safety rhok = 1.0 / (numpy.dot(yk, sk)) except ZeroDivisionError: rhok = 1000.0 print("Divide-by-zero encountered: rhok assumed large") if numpy.isinf(rhok): # this is patch for numpy rhok = 1000.0 print("Divide-by-zero encountered: rhok assumed large") A1 = I - sk[:, numpy.newaxis] * yk[numpy.newaxis, :] * rhok A2 = I - yk[:, numpy.newaxis] * sk[numpy.newaxis, :] * rhok Hk = numpy.dot(A1,numpy.dot(Hk,A2)) + rhok * sk[:,numpy.newaxis] \ * sk[numpy.newaxis,:] if disp or full_output: fval = old_fval if warnflag == 2: if disp: print("Warning: Desired error not necessarily achieved" \ "due to precision loss") print(" Current function value: %f" % fval) print(" Iterations: %d" % k) print(" Function evaluations: %d" % func_calls[0]) print(" Gradient evaluations: %d" % grad_calls[0]) elif k >= maxiter: warnflag = 1 if disp: print("Warning: Maximum number of iterations has been exceeded") print(" Current function value: %f" % fval) print(" Iterations: %d" % k) print(" Function evaluations: %d" % func_calls[0]) print(" Gradient evaluations: %d" % grad_calls[0]) else: if disp: print("Optimization terminated successfully.") print(" Current function value: %f" % fval) print(" Iterations: %d" % k) print(" Function evaluations: %d" % func_calls[0]) print(" Gradient evaluations: %d" % grad_calls[0]) if full_output: retlist = xk, fval, gfk, Hk, func_calls[0], grad_calls[0], warnflag if retall: retlist += (allvecs, ) else: retlist = xk if retall: retlist = (xk, allvecs) return retlist
def __call__(self, x0, conf=None, obj_fun=None, obj_fun_grad=None, status=None, obj_args=None): # def fmin_sd( conf, x0, fn_of, fn_ofg, args = () ): conf = get_default(conf, self.conf) obj_fun = get_default(obj_fun, self.obj_fun) obj_fun_grad = get_default(obj_fun_grad, self.obj_fun_grad) status = get_default(status, self.status) obj_args = get_default(obj_args, self.obj_args) if conf.output: globals()["output"] = conf.output output("entering optimization loop...") nc_of, tt_of, fn_of = wrap_function(obj_fun, obj_args) nc_ofg, tt_ofg, fn_ofg = wrap_function(obj_fun_grad, obj_args) time_stats = {"of": tt_of, "ofg": tt_ofg, "check": []} if conf.log: log = Log.from_conf(conf, ([r"of"], [r"$||$ofg$||$"], [r"alpha"])) else: log = None ofg = None it = 0 xit = x0.copy() while 1: of = fn_of(xit) if it == 0: of0 = ofit0 = of_prev = of of_prev_prev = of + 5000.0 if ofg is None: # ofg = 1 ofg = fn_ofg(xit) if conf.check: tt = time.clock() check_gradient(xit, ofg, fn_of, conf.delta, conf.check) time_stats["check"].append(time.clock() - tt) ofg_norm = nla.norm(ofg, conf.norm) ret = conv_test(conf, it, of, ofit0, ofg_norm) if ret >= 0: break ofit0 = of ## # Backtrack (on errors). alpha = conf.ls0 can_ls = True while 1: xit2 = xit - alpha * ofg aux = fn_of(xit2) if aux is None: alpha *= conf.ls_red_warp can_ls = False output("warp: reducing step (%f)" % alpha) elif conf.ls and conf.ls_method == "backtracking": if aux < of * conf.ls_on: break alpha *= conf.ls_red output("backtracking: reducing step (%f)" % alpha) else: of_prev_prev = of_prev of_prev = aux break if alpha < conf.ls_min: if aux is None: raise RuntimeError, "giving up..." output("linesearch failed, continuing anyway") break # These values are modified by the line search, even if it fails of_prev_bak = of_prev of_prev_prev_bak = of_prev_prev if conf.ls and can_ls and conf.ls_method == "full": output("full linesearch...") alpha, fc, gc, of_prev, of_prev_prev, ofg1 = linesearch.line_search( fn_of, fn_ofg, xit, -ofg, ofg, of_prev, of_prev_prev, c2=0.4 ) if alpha is None: # line search failed -- use different one. alpha, fc, gc, of_prev, of_prev_prev, ofg1 = sopt.line_search( fn_of, fn_ofg, xit, -ofg, ofg, of_prev_bak, of_prev_prev_bak ) if alpha is None or alpha == 0: # This line search also failed to find a better solution. ret = 3 break output(" -> alpha: %.8e" % alpha) else: if conf.ls_method == "full": output("full linesearch off (%s and %s)" % (conf.ls, can_ls)) ofg1 = None if conf.log: log(of, ofg_norm, alpha) xit = xit - alpha * ofg if ofg1 is None: ofg = None else: ofg = ofg1.copy() for key, val in time_stats.iteritems(): if len(val): output("%10s: %7.2f [s]" % (key, val[-1])) it = it + 1 output("status: %d" % ret) output("initial value: %.8e" % of0) output("current value: %.8e" % of) output("iterations: %d" % it) output("function evaluations: %d in %.2f [s]" % (nc_of[0], nm.sum(time_stats["of"]))) output("gradient evaluations: %d in %.2f [s]" % (nc_ofg[0], nm.sum(time_stats["ofg"]))) if conf.log: log(of, ofg_norm, alpha, finished=True) if status is not None: status["log"] = log status["status"] = status status["of0"] = of0 status["of"] = of status["it"] = it status["nc_of"] = nc_of[0] status["nc_ofg"] = nc_ofg[0] status["time_stats"] = time_stats return xit