def CGM(x, f, g, eps, kmax, iCG, iRC, nu=None, precision=6): gradient_norm = round(np.linalg.norm(g(x)), precision) Xk = [[np.NaN, f(x), gradient_norm]] rk = [np.NaN] Mk = [np.NaN] # ============== # d = -g(x) k = 0 while np.linalg.norm(g(x)) > eps and k < kmax: if k > 0: alpha, *_ = line_search(f, g, x, d, old_old_fval=f(x_prev), c1=0.01, c2=0.45) else: alpha, *_ = line_search(f, g, x, d, c1=0.01, c2=0.45) if alpha is None: break x, x_prev = x + alpha * d, x # =========== # # CGM variants if iCG == "FR": beta = (g(x).T @ g(x)) / (g(x_prev).T @ g(x_prev)) elif iCG == "PR": beta = max(0, g(x).T @ (g(x) - g(x_prev)) / (g(x_prev).T @ g(x_prev))) else: raise TypeError( "iCG should be FR (Fletcher-Reeves) or PR (Polak-Ribière)") # Restart conditions if iRC > 0 and nu is None: raise TypeError( f"nu is a necessary parameter with iRC equal to {iRC}") if (iRC == 1 and k % nu == 0 or iRC == 2 and g(x).T @ g(x_prev) / np.linalg.norm(g(x))**2 > nu or k == 0): d = -g(x) else: d = -g(x) + beta * d k += 1 # =========== # gradient_norm = np.round(np.linalg.norm(g(x)), precision) Xk.append([alpha, f(x), gradient_norm]) rk.append(np.linalg.norm(g(x)) / np.linalg.norm((g(x_prev)))) Mk.append(np.linalg.norm(g(x)) / (np.linalg.norm((g(x_prev)))**2)) # =========== # data = pd.DataFrame(Xk, columns=["alpha", "f(x)", "||g(x)||"], dtype=np.float) data['r'] = rk data['M'] = Mk return x, data
def otimizar(self, p_inicial): # Definindo valores iniciais self.ponto_inicial = np.array(p_inicial) self.tolerancia = 1000000 self.num_iteracoes = 0 self.chamadas_func_obj = 0 self.chamadas_gradiente = 0 ponto_anterior = None ponto = np.array(p_inicial) self.iniciar_tempo() while self.tolerancia >= 1e-6: self.num_iteracoes += 1 direcao = -self.gradiente_himmelblau(ponto) resp = line_search(f=self.func_himmelblau, myfprime=self.gradiente_himmelblau, xk=ponto, pk=direcao) ponto_anterior = ponto ponto = ponto + resp[0] * direcao self.tolerancia = np.linalg.norm( ponto - ponto_anterior) / np.linalg.norm(ponto_anterior) self.chamadas_func_obj += resp[1] self.chamadas_gradiente += resp[2] self.finalizar_tempo() self.ponto_final = ponto self.valor_final = self.func_himmelblau(self.ponto_final)
def optimize(self, start_point, verbose=False): xk = start_point iter = 0 self.obj_f.reset_count() while True: dk = np.linalg.solve(self.G(xk), -self.g(xk)) alpha, fc, gc, new_fval, old_fval, new_slope = line_search( self.f, self.g, xk, dk) if alpha is None: alpha = ALPHA_BK xk_plus_1 = xk + alpha * dk iter += 1 if verbose: print('----------') print("alpha", alpha) print("dk", dk) print("x_k+1", xk_plus_1) print("f_k+1", self.f(xk_plus_1)) if should_break(xk, xk_plus_1): break xk = xk_plus_1 print(" final point", xk_plus_1) print(" final_fval", self.f(xk_plus_1)) print(" iter times", iter) print(" function calls", self.obj_f.get_count()[0]) print(" derivate calls", self.obj_f.get_count()[1]) print(" hessian calls", self.obj_f.get_count()[2]) return xk_plus_1, self.f(xk_plus_1)
def gradient_Wolfe(f, f_grad, x0, PREC, ITE_MAX): x = np.copy(x0) stop = PREC * np.linalg.norm(f_grad(x0)) x_tab = np.copy(x) print( "------------------------------------\n Gradient with Wolfe line search\n------------------------------------\nSTART" ) t_s = timeit.default_timer() for k in range(ITE_MAX): g = f_grad(x) res = line_search(f, f_grad, x, -g, gfk=None, old_fval=None, old_old_fval=None, args=(), c1=0.0001, c2=0.9, amax=50) x = x - res[0] * g x_tab = np.vstack((x_tab, x)) if np.linalg.norm(g) < stop: break t_e = timeit.default_timer() print("FINISHED -- {:d} iterations / {:.6f}s -- final value: {:f}\n\n". format(k, t_e - t_s, f(x))) return x, x_tab
def gaussNewton(f, Df, Jac, r, x, niter=10, backtrack=True): ''' Solve a nonlinear least squares problem with Gauss-Newton method. Inputs: f -- the objective function Df -- gradient of f Jac -- jacobian of residual vector r -- the residual vector x -- initial point niter -- integer giving the number of iterations Returns: the minimizer ''' a=0 for i in xrange(niter): #print i J = Jac(x) g = J.T.dot(r(x)) #print J.T.dot(J) p = la.solve(J.T.dot(J), -g) slope = (g*p).sum() if backtrack: a = backtracking(f, slope, x, p) else: a = opt.line_search(f, Df, x, p)[0] x += a*p print x, f(x), a return x
def conjugate_gradient(self, w, J=10, gtol=1e-5): d, g = [], [] gnorm = gtol + 1 j = 0 while (gnorm > gtol) and (j < J): if j == 0: g.append(self.g_cols(w)) d.append(-g[j]) res = optimize.line_search(self.f_cols, self.g_cols, w, d[j], g[j], self.f_cols(w)) if res[0] is None: return w, j else: alpha = res[0] w = w + alpha * d[j] g.append(self.g_cols(w)) gnorm = vecnorm(g[j + 1], ord=np.Inf) beta_j = max(0, np.dot(g[j + 1].T, g[j + 1] - g[j]) / np.dot(g[j], g[j])) # eq. 7.74 Polak-Ribiere d.append(-g[j + 1] + beta_j * d[j]) # eq.7.67 j += 1 return w, j
def conj_grad(function, gradient, starting_point, iterations, error, results): i = 0 k = 0 r = np.asarray(-gradient(starting_point)) d = r x = starting_point sigma_new = np.dot(r.transpose(), r) sigma_0 = sigma_new while (i < iterations and sigma_new > error ** 2 * sigma_0): j = 0 sigma_d = np.dot(d.transpose(), d) alfa = optimize.line_search(my_function, gradf, x, r)[0] x = x + alfa * d r = -gradf(x) sigma_old = sigma_new sigma_new = np.dot(r.transpose(), r) beta = sigma_new / sigma_old d = r + np.dot(beta, d) k += 1 results.append(x) if k == iterations or np.dot(r.transpose(), d) <= 0: d = r k = 0 i = i + 1
def gradient_descent(x0, f, f_prime, hessian=None, adaptative=False): x_i, y_i = x0 all_x_i = list() all_y_i = list() all_f_i = list() for i in range(1, 100): all_x_i.append(x_i) all_y_i.append(y_i) all_f_i.append(f([x_i, y_i])) dx_i, dy_i = f_prime(np.asarray([x_i, y_i])) if adaptative: # Compute a step size using a line_search to satisfy the Wolf # conditions step = optimize.line_search(f, f_prime, np.r_[x_i, y_i], -np.r_[dx_i, dy_i], np.r_[dx_i, dy_i], c2=.05) step = step[0] if step is None: step = 0 else: step = 1 x_i += - step*dx_i y_i += - step*dy_i if np.abs(all_f_i[-1]) < 1e-16: break return all_x_i, all_y_i, all_f_i
def gaussNewton(f, Df, Jac, r, x, niter=10, backtrack=True): ''' Solve a nonlinear least squares problem with Gauss-Newton method. Inputs: f -- the objective function Df -- gradient of f Jac -- jacobian of residual vector r -- the residual vector x -- initial point niter -- integer giving the number of iterations Returns: the minimizer ''' a = 0 for i in xrange(niter): #print i J = Jac(x) g = J.T.dot(r(x)) #print J.T.dot(J) p = la.solve(J.T.dot(J), -g) slope = (g * p).sum() if backtrack: a = backtracking(f, slope, x, p) else: a = opt.line_search(f, Df, x, p)[0] x += a * p print x, f(x), a return x
def BFGS_algorithm(obj_fun, theta0, max_iter=2e04, epsilon=0): print("Starting BFGS algorithm.") #Initialization of object: bfgs bfgs = BFGS() bfgs.initialize(6, "inv_hess") #Lists to store results for theta (th) and cost(c) th,c = [],[] th.append(theta0) c.append(obj_fun(theta0)) niter = max_iter success = (False, "max_iter reached.") #Iteration for n in range(max_iter): th_0 = th[n] g0 = gradient(th_0) #If loss<epsilon, converged #If epsilon=0, no check for convergence if (epsilon > 0) and (obj_fun(th_0) < epsilon): niter = n success = (True, "Loss = {}".format(obj_fun(th_0))) break #Compute search direction d = bfgs.dot(g0) #Compute step size through line search alpha = line_search(obj_fun,gradient,th_0,-g0)[0] #Update theta and gradient th_1 = th_0 - alpha*d g1 = gradient(th_1) #Update theta history and cost history th.append(th_1) c.append(obj_fun(th_1)) #Update inverse hessian bfgs.update(th_1-th_0, g1-g0) print("Exiting.") return th,c,niter,success
def damped_newton(s): sol = la.solve(H(s), -df(s)) a = opt.line_search(f, df, s, sol)[0] if a == None: a = 1 s_n = s + a * sol return s_n
def quasi_newtonian(f, f1, x0=np.array([1, 1]), maxiter=0, epsi=0.001): if not maxiter: maxiter = len(x0) * 200 k = 0 gfk = f1(x0) N = len(x0) I = np.eye(N, dtype=int) Hk = I xk = x0 while ln.norm(gfk) > epsi and k < maxiter: pk = -np.dot(Hk, gfk) alpha = line_search(f, f1, xk, pk)[0] xkp1 = xk + alpha * pk sk = xkp1 - xk xk = xkp1 gfkp1 = f1(xkp1) yk = gfkp1 - gfk gfk = gfkp1 k += 1 ro = 1.0 / (np.dot(yk, sk)) A1 = I - ro * sk[:, np.newaxis] * yk[np.newaxis, :] A2 = I - ro * yk[:, np.newaxis] * sk[np.newaxis, :] Hk = np.dot(A1, np.dot( Hk, A2)) + (ro * sk[:, np.newaxis] * sk[np.newaxis, :]) return tuple(round(i, 2) for i in xk)
def gradient_descent(x0, f, f_prime, hessian=None, adaptative=False): x_i, y_i = x0 all_x_i = list() all_y_i = list() all_f_i = list() for i in range(1, 100): all_x_i.append(x_i) all_y_i.append(y_i) all_f_i.append(f([x_i, y_i])) dx_i, dy_i = f_prime(np.asarray([x_i, y_i])) if adaptative: # Compute a step size using a line_search to satisfy the Wolf # conditions step = optimize.line_search(f, f_prime, np.r_[x_i, y_i], -np.r_[dx_i, dy_i], np.r_[dx_i, dy_i], c2=.05) step = step[0] else: step = 1 x_i += - step*dx_i y_i += - step*dy_i if np.abs(all_f_i[-1]) < 1e-16: break return all_x_i, all_y_i, all_f_i
def _line_search_update(self): """ Proceed line search with x & d """ with warnings.catch_warnings(): warnings.filterwarnings("error") try: if self._search is not None: alpha, feva, success_flag, self._loss_cache, self._grad_cache = self._search.step( self._x, self.get_d()) self.success += success_flag elif not scipy_flag: feva, alpha = 0, 1 else: def f(x): self._func.refresh_cache(x, dtype="loss") return self._func.loss(x) def g(x): self._func.refresh_cache(x) return self._func.grad(x) alpha, feva, _, self._loss_cache, old_f, self._grad_cache = optimize.line_search( f, g, self._x, self.get_d() ) except RuntimeWarning: feva = 0 if self._search is not None: alpha = self._search._params["floor"] else: alpha = 0.01 self._x += alpha * self._d self.feva += feva self._d = None
def gradient_descent(x0, f, f_prime, hessian, stepsize=None, nsteps=50): """ Steepest-Descent algorithm with option for line search """ x_i, y_i = x0 all_x_i = list() all_y_i = list() all_f_i = list() for i in range(1, nsteps): all_x_i.append(x_i) all_y_i.append(y_i) x = np.array([x_i, y_i]) all_f_i.append(f(x)) dx_i, dy_i = f_prime(x) if stepsize is None: # Compute a step size using a line_search to satisfy the Wolf # conditions step = line_search(f, f_prime, np.r_[x_i, y_i], -np.r_[dx_i, dy_i], np.r_[dx_i, dy_i], c2=.05) step = step[0] if step is None: step = 0 else: step = stepsize x_i += -step * dx_i y_i += -step * dy_i if np.abs(all_f_i[-1]) < 1e-5: break return all_x_i, all_y_i, all_f_i
def _line_search_update(self): """ Proceed line search with x & d """ with warnings.catch_warnings(): warnings.filterwarnings("error") try: if self._search is not None: alpha, feva, success_flag, self._loss_cache, self._grad_cache = self._search.step( self._x, self.get_d()) self.success += success_flag elif not scipy_flag: feva, alpha = 0, 1 else: def f(x): self._func.refresh_cache(x, dtype="loss") return self._func.loss(x) def g(x): self._func.refresh_cache(x) return self._func.grad(x) alpha, feva, _, self._loss_cache, old_f, self._grad_cache = optimize.line_search( f, g, self._x, self.get_d()) except RuntimeWarning: feva = 0 if self._search is not None: alpha = self._search._params["floor"] else: alpha = 0.01 self._x += alpha * self._d self.feva += feva self._d = None
def conjugate_gradient(x0, obj_func, grd_func, args=()): f0 = obj_func(x0, *args) g0 = grd_func(x0, *args) p = -g0 x = x0 g = g0 epoque = 0 while np.linalg.norm(g) >= 0.000005: alpha, fc, gc, new_loss, old_loss, new_slope = line_search( f=obj_func, myfprime=grd_func, xk=x, pk=p, gfk=g, old_fval=f0, args=args) x = x + alpha * p h = grd_func(x, *args) dgg = np.linalg.norm(g) ngg = np.linalg.norm(h) # Fletcher-Reeves's beta (Eq 2.53) # beta = ngg / dgg # Ribière-Polak beta delta = np.dot(h, (h - g)) beta = max(0, delta / dgg) g = h p = -g + beta * p print(f"Epoque {epoque} and loss is: {new_loss}") epoque += 1 return x
def line_search_init_param(self, func): x = self.renderer.get_param() jac = _get_jac(func=func, delta=0.005, x0=x) search_direction = -func(x) / jac(x) res = optimize.line_search(f=func, myfprime=jac, xk=x, pk=search_direction) alpha = res[0] x_new = x + alpha * search_direction self.renderer.set_param(x_new)
def get_alpha(x): for _ in range(100): alpha, _, _, _, _, _ = opt.line_search( f(x), gradient(x), x, -gradient(x), ) return alpha
def get_alpha(fun, current_point): def grad(x): return nd.Gradient(fun)([x[0], x[1]]) x = np.ravel(current_point) p = -grad(x) #current search direc a = line_search(fun, grad, x, p)[0] print(a)
def line_search_rank0_scipy_scalar_search_wolfe1( F, Fp, c1, c2, old_F_val=None, old_Fp_val=None, **kwargs): from scipy.optimize.linesearch import scalar_search_wolfe1 as line_search alpha, phi, phi0 = line_search( F, Fp, phi0=old_F_val, derphi0=old_Fp_val, c1=c1, c2=c2, **kwargs) if alpha is None: phi = None return alpha, phi
def get_alpha(fun, current_point): def grad(x): return nd.Gradient(fun)([x[0], x[1]]) x = np.ravel(current_point) p = -grad(x) #current search direc a = line_search(fun, grad, x, p)[0] return a # line_search(fun,np.array([1.],[3.]),)
def BFGS(x, f, g, eps, kmax, precision=6): gradient_norm = np.round(np.linalg.norm(g(x)), precision) Xk = [[np.NaN, f(x), gradient_norm]] rk = [np.NaN] Mk = [np.NaN] # =========== # H = I = np.identity(len(g(x))) k = 0 while np.linalg.norm(g(x)) > eps and k < kmax: d = -H @ g(x) if k > 0: alpha, *_ = line_search(f, g, x, d, old_old_fval=f(x_prev), c1=0.01, c2=0.45) else: alpha, *_ = line_search(f, g, x, d, c1=0.01, c2=0.45) if alpha is None: break x, x_prev = x + alpha * d, x s = x - x_prev y = g(x) - g(x_prev) y = y[None, :] rho = 1 / ((y).T @ s) H = (I - rho * s @ y.T) @ H @ (I - rho * y @ (s.T)) + rho * s @ s.T k += 1 # =========== # gradient_norm = np.round(np.linalg.norm(g(x)), precision) Xk.append([alpha, f(x), gradient_norm]) rk.append(np.linalg.norm(g(x)) / np.linalg.norm((g(x_prev)))) Mk.append(np.linalg.norm(g(x)) / (np.linalg.norm((g(x_prev)))**2)) # =========== # data = pd.DataFrame(Xk, columns=["alpha", "f(x)", "||g(x)||"], dtype=np.float) data['r'] = rk data['M'] = Mk return x, data
def line_search_rank0_scipy_scalar_search_wolfe2( F, Fp, c1, c2, old_F_val=None, old_Fp_val=None, **kwargs): from scipy.optimize.linesearch import scalar_search_wolfe2 as line_search alpha_star, phi_star, phi0, derphi_star = line_search( F, Fp, phi0=old_F_val, derphi0=old_Fp_val, c1=c1, c2=c2, **kwargs) if derphi_star is None: alpha_star = None if alpha_star is None: phi_star = None return alpha_star, phi_star
def find_alpha(self, y_true, curr_pred, tree_pred): def alpha_obj(x): return self.logistic_loss(y_true, x) def alpha_grad(x): return self.logistic_grad(y_true, x) alpha = line_search(alpha_obj, alpha_grad, xk=curr_pred, pk=tree_pred) if not alpha[0]: return 1.0 return alpha[0]
def quasi_newton_bfgs(self, init_x, eps=1e-6, store=False): self.check_dimension(init_x) size = init_x.size init_f = self.f(init_x) current_x, current_f, current_g, current_H = np.copy(init_x), \ init_f, self.grad_f(init_x), np.eye(size) hist_x = [init_x] hist_f = [init_f] m = 10 previous_s = [None] * m previous_y = [None] * m iteration = 0 lag = 100 while current_f / init_f > eps: current_p = -np.dot(current_H, current_g) alpha = line_search(self.f, self.grad_f, current_x, current_p)[0] # alpha = self.line_search_wolfe(current_x, current_p) next_x = current_x + alpha * current_p next_f = self.f(next_x) next_g = self.grad_f(next_x) s = alpha * current_p y = next_g - current_g rho = np.dot(s, y) Hy = current_H.dot(y) next_H = current_H \ + (rho + Hy.dot(y)) * np.outer(s, s) / rho**2 \ - (np.outer(Hy, s) + np.outer(s, Hy)) / rho if iteration % lag == 0: if store: hist_x.append(current_x) hist_f.append(current_f) # print "iteration {}: {}".format(iteration, current_f) if iteration < m: previous_s[iteration] = s previous_y[iteration] = y iteration += 1 current_x = next_x current_f = next_f current_g = next_g current_H = next_H return (current_x, current_f, iteration, previous_s, previous_y)
def steepest_descent(x, f: fl.Fluxion, tol: float = 1e-8): w = FluxionWrapper(f) xs = np.zeros([2001, 2]) xs[0] = x for i in range(0, 2000): x = xs[i] grad = -w.diff(x) alpha = op.line_search(w.val, w.diff, x, grad) xs[i + 1] = x + alpha[0] * grad stepsize = np.linalg.norm(xs[i + 1] - xs[i]) if stepsize < tol: break return (xs[0:i + 1, :], i + 1, stepsize, xs[i])
def ls_subopt(x, g, i): lam_mag = np.sqrt(np.mean(g[:-1]**2)) rho_mag = np.sqrt(g[-1]**2) if rho_mag > 10 * lam_mag: print " ----- VBoost doing rho Line Search ------ " ff = lambda x: mixture_obj(x, i) gg = lambda x: mixture_obj_grad(x, i) alpha0, fc, gc, _, _, _ = \ optimize.line_search(ff, gg, xk=x, pk=ls_dir) if alpha0 is not None: print "new rho = ", (x + alpha0 * ls_dir)[-1] x = x + alpha0 * ls_dir return x
def lbfgs(f,fgrad,x0,maxiter=100,max_corr=25,grad_norm_tol=1e-9, ihp=None,ls_criteria="armijo"): """ LBFGS algorithm as described by Nocedal & Wright In fact it gives numerically identical answers to L-BFGS-B on some test problems. """ x = x0.copy() yield x if ihp is None: ihp = InverseHessianPairs(max_corr) oldg = fgrad(x) if ls_criteria=="armijo": fval = f(x) p = -oldg/np.linalg.norm(oldg) log = logging.getLogger("lbfgs") iter_count = 0 while True: # TODO compare line searches g=None if ls_criteria == "strong_wolfe": alpha_star, _, _, fval, _, g = opt.line_search(f,fgrad,x,p,oldg) elif ls_criteria == "armijo": import scipy.optimize.linesearch alpha_star,_,fval=scipy.optimize.linesearch.line_search_armijo(f,x,p,oldg,fval) else: raise NotImplementedError if alpha_star is None: log.error("lbfgs line search failed!") break s = alpha_star * p x += s yield x iter_count += 1 if iter_count >= maxiter: break if g is None: log.debug("line search didn't give us a gradient. calculating") g = fgrad(x) if np.linalg.norm(g) < grad_norm_tol: break y = g - oldg ihp.add( s,y ) p = ihp.mvp(-g) oldg = g log.info("lbfgs iter %i %8.3e",iter_count, fval)
def quasi_newton_l_bfgs(self, init_x, previous_s, previous_y, eps=1e-6, m=10, store=False): self.check_dimension(init_x) size = init_x.size init_f = self.f(init_x) current_x, current_f, current_g = np.copy(init_x), init_f, self.grad_f( init_x) hist_x = [init_x] hist_f = [init_f] iteration = m lag = 100 while current_f / init_f > eps: current_p = -self.l_bfgs_two_loop(current_g, previous_s, previous_y) alpha = line_search(self.f, self.grad_f, current_x, current_p)[0] next_x = current_x + alpha * current_p next_f = self.f(next_x) next_g = self.grad_f(next_x) s = alpha * current_p y = next_g - current_g del previous_s[0] previous_s.append(s) del previous_y[0] previous_y.append(y) current_x = next_x current_f = next_f current_g = next_g if iteration % lag == 0: if store: hist_x.append(current_x) hist_f.append(current_f) # print "iteration {}: {}".format(iteration, current_f) iteration += 1 return (current_x, current_f, iteration, hist_x, hist_f, lag)
def conjugate_gradient_step(x, z, w, gradf_old, p, version): ''' One step of Conjugate gradient method with strong Wolfe conditions ''' my_tuple = line_search(f, gradf, x, p, c1=c1, c2=c2, args=(z, w, version)) alpha = my_tuple[0] if alpha == None: alpha = step_length(x, z, w, p, version) x += alpha * p gradf_new = gradf(x, z, w, version) beta = (gradf_new.T @ gradf_new) / (gradf_old.T @ gradf_old) p = -gradf_new + beta * p gradf_old = gradf_new return x, gradf_old, p
def _get_s(self, H, grad, estimate, t): p = np.dot(-H, grad) oofv = None if t == 0 else self.objectives[-2] results = line_search(self.get_objective, lambda x: self.get_gradient(x)[:, 0], estimate, p, gfk=grad[:, 0], old_fval=self.objectives[-1], old_old_fval=oofv) eta = results[0] return eta * p
def optimize(H, x, y, maxiter, index, xRef, lambdaL2=0.5): print 'Doing super-resolution optimization' t = time() miny = np.min(y[0]) maxy = np.max(y[0]) print 'bounds of y : ' + str(miny) + ', ' + str(maxy) iteration = 0 maxdiff = np.ones(len(y)) * (maxy - miny) threshold = 0.01 * (maxy - miny) while iteration < maxiter and np.max(maxdiff) > threshold: gradL2 = lossL2prime(x, H, y) #Find alpha that satisfies strong Wolfe conditions. #http://scipy.github.io/devdocs/generated/scipy.optimize.line_search.html#scipy.optimize.line_search res = line_search(lossL2, lossL2prime, x, -gradL2, args=(H, y)) alphaL2 = res[0] if alphaL2 is None: alphaL2 = computeAlpha(x, gradL2) update = alphaL2 * gradL2 if xRef is not None: gradDenoising = 2.0 * (x - xRef) alphaDenoising = computeAlpha(x, gradDenoising) update = ( 1 - lambdaL2 ) * alphaDenoising * gradDenoising + lambdaL2 * alphaL2 * gradL2 #Update high resolution image x = x - update #Threshold on Maxdiff or update magnitude ? for i in range(len(y)): maxdiff[i] = np.max(H[i].dot(x) - y[i]) #Use bounds to limit intensity range of x x[x < miny] = miny x[x > maxy] = maxy iteration += 1 if iteration == maxiter: print 'Maximum number of iterations is reached' print 'Optimization done in ' + str(time() - t) + ' s, in ' + str( iteration) + ' iterations' return x
def optimize(self, start_point, verbose=False): xk = start_point gk = self.g(xk) Hk = np.eye(gk.shape[0], gk.shape[0]) iter = 0 self.obj_f.reset_count() while True: dk = -Hk.dot(gk) alpha, fc, gc, new_fval, old_fval, new_slope = line_search( self.f, self.g, xk, dk) if alpha is None: alpha = ALPHA_BK sk = alpha * dk xk_plus_1 = xk + sk iter += 1 gk_plus_1 = self.g(xk_plus_1) yk = gk_plus_1 - gk # change to matrix, as column vector sk = np.array([sk]).T yk = np.array([yk]).T Hk_puls_1 = Hk + (1+yk.T.dot(Hk).dot(yk)/yk.T.dot(sk))*(sk.dot(sk.T)/yk.T.dot(sk))-\ (sk.dot(yk.T).dot(Hk)+Hk.dot(yk).dot(sk.T))/yk.T.dot(sk) if verbose: print('----------') print("alpha", alpha) print("dk", dk) print("x_k+1", xk_plus_1) print("f_k+1", self.f(xk_plus_1)) # print("Hk", Hk_puls_1) if should_break(xk, xk_plus_1): break xk = xk_plus_1 Hk = Hk_puls_1 gk = gk_plus_1 print(" final point", xk_plus_1) print(" final_fval", self.f(xk_plus_1)) print(" iter_times", iter) print(" function calls", self.obj_f.get_count()[0]) print(" derivate calls", self.obj_f.get_count()[1]) print(" hessian calls", self.obj_f.get_count()[2]) return xk_plus_1, self.f(xk_plus_1)
def update_overdispersion(self): node = self.nodes['overdispersion'] mu = node.expected_x() var = node.expected_var_x() tau = node.prior_prec.expected_x() mm = node.prior_mean.expected_x() nn = self.Nframe['count'] # make an adjusted F that does not include our pars of interest F_adj = self.F() / node.expected_exp_x() def objfun(x): mu = x[:self.M] kap = x[self.M:] var = np.exp(kap) bar_exp_eta = np.exp(mu + 0.5 * var) * F_adj elbo = -0.5 * np.sum(tau * (var + (mu - mm)**2)) elbo += 0.5 * np.sum(np.log(var)) elbo += np.sum(nn * mu) elbo += -np.sum(bar_exp_eta) return -elbo def gradfun(x): jac = np.empty_like(x) mu = x[:self.M] kap = x[self.M:] var = np.exp(kap) bar_exp_eta = np.exp(mu + 0.5 * var) * F_adj jac[:self.M] = -tau * (mu - mm) jac[:self.M] += (nn - bar_exp_eta) jac[self.M:] = -0.5 * tau * var + 0.5 jac[self.M:] += -0.5 * var * bar_exp_eta return -jac # parameter of vectors to optimize over starts = np.concatenate((mu, np.log(var))) start_g = gradfun(starts) alpha = line_search(objfun, gradfun, starts, -start_g, gfk=start_g) if alpha[0] is not None: xnew = starts - alpha[0] * start_g node.post_mean = xnew[:self.M] node.post_prec = np.exp(-xnew[self.M:]) self.F(update=True)
def GM(x, f, g, eps, kmax, precision=6): gradient_norm = np.round(np.linalg.norm(g(x)), precision) Xk = [[np.NaN, f(x), gradient_norm]] rk = [np.NaN] Mk = [np.NaN] # ============== # k = 0 while np.linalg.norm(g(x)) > eps and k < kmax: d = -g(x) if k > 0: alpha, *_ = line_search(f, g, x, d, old_old_fval=f(x_prev), c1=0.01, c2=0.45) else: alpha, *_ = line_search(f, g, x, d, c1=0.01, c2=0.45) if alpha is None: print("alpha not found (!)") break x, x_prev = x + alpha * d, x k += 1 # =========== # gradient_norm = np.round(np.linalg.norm(g(x)), precision) Xk.append([alpha, f(x), gradient_norm]) rk.append(np.linalg.norm(g(x)) / np.linalg.norm((g(x_prev)))) Mk.append(np.linalg.norm(g(x)) / (np.linalg.norm((g(x_prev)))**2)) # =========== # data = pd.DataFrame(Xk, columns=["alpha", "f(x)", "||g(x)||"], dtype=np.float) data['r'] = rk data['M'] = Mk return x, data
def gradient_descent(fn, fn_grad, x0, gtol=1e-5, maxiter=100): i = 0 x = x0.copy() while i < maxiter: i += 1 dx = -fn_grad(x) if abs(dx).max() <= gtol: print 'Terminated since |g| <= %f' % gtol break t = sio.line_search(fn, fn_grad, x, dx, -dx)[0] x += t*dx print 'Step %d: y=%f, |g|=%f, t=%f' % (i, fn(x), np.linalg.norm(dx), t) if i >= maxiter: print 'Terminated due to iteration limit' return x
def optimize(H,x,y,maxiter,index,xRef,lambdaL2=0.5): print('Doing super-resolution optimization') t = time() miny = np.min(y[0]) maxy = np.max(y[0]) print('bounds of y : '+str(miny)+', '+str(maxy)) iteration = 0 maxdiff = np.ones(len(y)) * (maxy-miny) threshold = 0.01 * (maxy-miny) while iteration<maxiter and np.max(maxdiff) > threshold: gradL2 = lossL2prime(x,H,y) #Find alpha that satisfies strong Wolfe conditions. #http://scipy.github.io/devdocs/generated/scipy.optimize.line_search.html#scipy.optimize.line_search res = line_search(lossL2, lossL2prime, x, -gradL2, args=(H,y)) alphaL2 = res[0] if alphaL2 is None: alphaL2 = computeAlpha(x,gradL2) update = alphaL2*gradL2 if xRef is not None: gradDenoising =2.0* (x-xRef) alphaDenoising = computeAlpha(x,gradDenoising) update = (1-lambdaL2)*alphaDenoising*gradDenoising + lambdaL2*alphaL2*gradL2 #Update high resolution image x = x - update #Threshold on Maxdiff or update magnitude ? for i in range(len(y)): maxdiff[i] = np.max(H[i].dot(x) - y[i]) #Use bounds to limit intensity range of x x[x<miny] = miny x[x>maxy] = maxy iteration+=1 if iteration==maxiter: print('Maximum number of iterations is reached') print('Optimization done in '+str(time()-t)+' s, in '+str(iteration)+' iterations') return x
def gaussNewton(f, df, jac, r, x, niter=10): """Solve a nonlinear least squares problem with Gauss-Newton method. Parameters: f (function): The objective function. df (function): The gradient of f. jac (function): The jacobian of the residual vector. r (function): The residual vector. x (ndarray of shape (n,)): The initial point. niter (int): The number of iterations. Returns: (ndarray of shape (n,)) The minimizer. """ for _ in xrange(niter): p = la.solve(np.dot(jac(x).T,jac(x)), np.dot(-jac(x), r(x))) steps = line_search(f,df,x,p) x = x + steps * p k += 1 return x
def steepest_descent(x0): x=[] x.append(x0) k=0 tol=1e-5 tol1=1 while(tol1>tol): alpha=spo.line_search(f,df,x[k],-df(x[k])) c=alpha[0] if(alpha[0]==None): c=1 xnew=x[k]-c*df(x[k]) x.append(xnew) tol1=la.norm(x[k+1]-x[k]) a=x[k+1] k+=1 #print x return a,x
def damped_newton(x0): x=[] x.append(x0) k=0 tol=1e-12 tol1=1 while(tol1>tol): s=la.solve(hessian(x[k]),-df(x[k])) alpha=spo.line_search(f,df,x[k],-df(x[k])) c=alpha[0] if(alpha[0]==None): c=1 xnew=x[k]+s*c x.append(xnew) tol1=la.norm(x[k+1]-x[k]) a=x[k+1] k+=1 return a,x
def update_weights(self, weight_deltas): real_weights = [layer.weight for layer in self.train_layers] weights_vector = matrix_list_in_one_vector(real_weights) gradients_vetor = matrix_list_in_one_vector(self.gradients) res = line_search(self.check_updates, self.get_gradient_by_weights, xk=weights_vector, pk=matrix_list_in_one_vector(weight_deltas), gfk=gradients_vetor, amax=self.maxstep, c1=self.c1, c2=self.c2) step = (res[0] if res[0] is not None else self.step) # SciPy some times ignore `amax` argument and return # bigger result self.step = min(self.maxstep, step) self.set_weights(real_weights) return super(WolfeSearch, self).update_weights(weight_deltas)
def _FindCenter(self, u): """ linesearch algorithm to find optimal alpha """ qr,pr = self.shape u = params[:pr] U = sparse.dia_matrix( (u,0),shape=(pr,pr) ) U2 = U.dot(U) U_inv = sparse.dia_matrix( (1./np.array(u),0), shape=(pr,pr)) z = np.linalg.inv( self.X.dot(U2.dot(X.T)) ).dot(self.a) d = u - U2.dot(self.X.T).dot(z) f = lambda x: -np.sum(np.log(x) ) gradf = lambda x: - 1./x if np.linalg.norm(U_inv.dot(d) )<.25: alpha=[1.] else: alpha = optimize.line_search(f,gradf,u, d) return u+ alpha[0]*d
def __call__(self, x0, conf=None, obj_fun=None, obj_fun_grad=None, status=None, obj_args=None): # def fmin_sd( conf, x0, fn_of, fn_ofg, args = () ): conf = get_default(conf, self.conf) obj_fun = get_default(obj_fun, self.obj_fun) obj_fun_grad = get_default(obj_fun_grad, self.obj_fun_grad) status = get_default(status, self.status) obj_args = get_default(obj_args, self.obj_args) if conf.output: globals()["output"] = conf.output output("entering optimization loop...") nc_of, tt_of, fn_of = wrap_function(obj_fun, obj_args) nc_ofg, tt_ofg, fn_ofg = wrap_function(obj_fun_grad, obj_args) time_stats = {"of": tt_of, "ofg": tt_ofg, "check": []} if conf.log: log = Log.from_conf(conf, ([r"of"], [r"$||$ofg$||$"], [r"alpha"])) else: log = None ofg = None it = 0 xit = x0.copy() while 1: of = fn_of(xit) if it == 0: of0 = ofit0 = of_prev = of of_prev_prev = of + 5000.0 if ofg is None: # ofg = 1 ofg = fn_ofg(xit) if conf.check: tt = time.clock() check_gradient(xit, ofg, fn_of, conf.delta, conf.check) time_stats["check"].append(time.clock() - tt) ofg_norm = nla.norm(ofg, conf.norm) ret = conv_test(conf, it, of, ofit0, ofg_norm) if ret >= 0: break ofit0 = of ## # Backtrack (on errors). alpha = conf.ls0 can_ls = True while 1: xit2 = xit - alpha * ofg aux = fn_of(xit2) if aux is None: alpha *= conf.ls_red_warp can_ls = False output("warp: reducing step (%f)" % alpha) elif conf.ls and conf.ls_method == "backtracking": if aux < of * conf.ls_on: break alpha *= conf.ls_red output("backtracking: reducing step (%f)" % alpha) else: of_prev_prev = of_prev of_prev = aux break if alpha < conf.ls_min: if aux is None: raise RuntimeError, "giving up..." output("linesearch failed, continuing anyway") break # These values are modified by the line search, even if it fails of_prev_bak = of_prev of_prev_prev_bak = of_prev_prev if conf.ls and can_ls and conf.ls_method == "full": output("full linesearch...") alpha, fc, gc, of_prev, of_prev_prev, ofg1 = linesearch.line_search( fn_of, fn_ofg, xit, -ofg, ofg, of_prev, of_prev_prev, c2=0.4 ) if alpha is None: # line search failed -- use different one. alpha, fc, gc, of_prev, of_prev_prev, ofg1 = sopt.line_search( fn_of, fn_ofg, xit, -ofg, ofg, of_prev_bak, of_prev_prev_bak ) if alpha is None or alpha == 0: # This line search also failed to find a better solution. ret = 3 break output(" -> alpha: %.8e" % alpha) else: if conf.ls_method == "full": output("full linesearch off (%s and %s)" % (conf.ls, can_ls)) ofg1 = None if conf.log: log(of, ofg_norm, alpha) xit = xit - alpha * ofg if ofg1 is None: ofg = None else: ofg = ofg1.copy() for key, val in time_stats.iteritems(): if len(val): output("%10s: %7.2f [s]" % (key, val[-1])) it = it + 1 output("status: %d" % ret) output("initial value: %.8e" % of0) output("current value: %.8e" % of) output("iterations: %d" % it) output("function evaluations: %d in %.2f [s]" % (nc_of[0], nm.sum(time_stats["of"]))) output("gradient evaluations: %d in %.2f [s]" % (nc_ofg[0], nm.sum(time_stats["ofg"]))) if conf.log: log(of, ofg_norm, alpha, finished=True) if status is not None: status["log"] = log status["status"] = status status["of0"] = of0 status["of"] = of status["it"] = it status["nc_of"] = nc_of[0] status["nc_ofg"] = nc_ofg[0] status["time_stats"] = time_stats return xit
def my_fmin_bfgs(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf, epsilon=_epsilon, maxiter=None, full_output=0, disp=1, retall=0, callback=None): """Minimize a function using the BFGS algorithm. :Parameters: f : callable f(x,*args) Objective function to be minimized. x0 : ndarray Initial guess. fprime : callable f'(x,*args) Gradient of f. args : tuple Extra arguments passed to f and fprime. gtol : float Gradient norm must be less than gtol before succesful termination. norm : float Order of norm (Inf is max, -Inf is min) epsilon : int or ndarray If fprime is approximated, use this value for the step size. callback : callable An optional user-supplied function to call after each iteration. Called as callback(xk), where xk is the current parameter vector. :Returns: (xopt, {fopt, gopt, Hopt, func_calls, grad_calls, warnflag}, <allvecs>) xopt : ndarray Parameters which minimize f, i.e. f(xopt) == fopt. fopt : float Minimum value. gopt : ndarray Value of gradient at minimum, f'(xopt), which should be near 0. Bopt : ndarray Value of 1/f''(xopt), i.e. the inverse hessian matrix. func_calls : int Number of function_calls made. grad_calls : int Number of gradient calls made. warnflag : integer 1 : Maximum number of iterations exceeded. 2 : Gradient and/or function calls not changing. allvecs : list Results at each iteration. Only returned if retall is True. *Other Parameters*: maxiter : int Maximum number of iterations to perform. full_output : bool If True,return fopt, func_calls, grad_calls, and warnflag in addition to xopt. disp : bool Print convergence message if True. retall : bool Return a list of results at each iteration if True. :Notes: Optimize the function, f, whose gradient is given by fprime using the quasi-Newton method of Broyden, Fletcher, Goldfarb, and Shanno (BFGS) See Wright, and Nocedal 'Numerical Optimization', 1999, pg. 198. *See Also*: scikits.openopt : SciKit which offers a unified syntax to call this and other solvers. """ x0 = asarray(x0).squeeze() if x0.ndim == 0: x0.shape = (1,) if maxiter is None: maxiter = len(x0)*200 func_calls, f = wrap_function(f, args) if fprime is None: grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon)) else: grad_calls, myfprime = wrap_function(fprime, args) print "Evaluating initial gradient ..." gfk = myfprime(x0) k = 0 N = len(x0) I = numpy.eye(N,dtype=int) Hk = I print "Evaluating initial function value ..." fval = f(x0) old_fval = fval + 5000 xk = x0 if retall: allvecs = [x0] sk = [2*gtol] warnflag = 0 gnorm = vecnorm(gfk,ord=norm) print "gtol = %g" % gtol print "gnorm = %g" % gnorm while (gnorm > gtol) and (k < maxiter): pk = -numpy.dot(Hk,gfk) print "xk =", xk print "pk =", pk print "Begin iteration %d line search..." % (k + 1) # print " gfk =", gfk # print " Hk = \n", Hk # do line search for alpha_k old_old_fval = old_fval old_fval = fval alpha_k, fc, gc, fval, old_fval, gfkp1 = \ linesearch.line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # line search failed try different one. print "Begin line search (method 2) ..." alpha_k, fc, gc, fval, old_fval, gfkp1 = \ line_search(f,myfprime,xk,pk,gfk, old_fval,old_old_fval) if alpha_k is None: # This line search also failed to find a better solution. print "Line search failed!" warnflag = 2 break print "End line search, alpha = %g ..." % alpha_k xkp1 = xk + alpha_k * pk if retall: allvecs.append(xkp1) sk = xkp1 - xk xk = xkp1 if gfkp1 is None: gfkp1 = myfprime(xkp1) yk = gfkp1 - gfk gfk = gfkp1 if callback is not None: callback(xk) k += 1 gnorm = vecnorm(gfk,ord=norm) print "gnorm = %g" % gnorm if (k >= maxiter or gnorm <= gtol): break # Reset the initial quasi-Newton matrix to a scaled identity aimed # at reflecting the size of the inverse true Hessian deltaXDeltaGrad = numpy.dot(sk, yk); updateOk = deltaXDeltaGrad >= _epsilon * max(_epsilonSq, \ vecnorm(sk,ord=2) * vecnorm(yk, ord=2)) if k == 1 and updateOk: Hk = deltaXDeltaGrad / numpy.dot(yk,yk) * numpy.eye(N); print "Hscaled =\n", Hk try: # this was handled in numeric, let it remain for more safety rhok = 1.0 / (numpy.dot(yk,sk)) except ZeroDivisionError: rhok = 1000.0 print "Divide-by-zero encountered: rhok assumed large" if isinf(rhok): # this is patch for numpy rhok = 1000.0 print "Divide-by-zero encountered: rhok assumed large" A1 = I - sk[:,numpy.newaxis] * yk[numpy.newaxis,:] * rhok A2 = I - yk[:,numpy.newaxis] * sk[numpy.newaxis,:] * rhok Hk = numpy.dot(A1,numpy.dot(Hk,A2)) + rhok * sk[:,numpy.newaxis] \ * sk[numpy.newaxis,:] if gnorm > gtol: warnflag = 1 if disp: if warnflag == 1: print "Warning: Maximum number of iterations has been exceeded" elif warnflag == 2: print "Warning: Desired error not necessarily achieved" \ "due to precision loss" else: print "Optimization terminated successfully." print " Current function value: %g" % fval print " Current gradient norm : %g" % gnorm print " Gradient tolerance : %g" % gtol print " Iterations: %d" % k print " Function evaluations: %d" % func_calls[0] print " Gradient evaluations: %d" % grad_calls[0] if full_output: retlist = xk, fval, gfk, Hk, func_calls[0], grad_calls[0], warnflag if retall: retlist += (allvecs,) else: retlist = xk if retall: retlist = (xk, allvecs) return retlist
def steepest_desc(s): sol = -df(s) a = opt.line_search(f, df, s, sol)[0] s_n = s + a * sol return s_n
def stepSize(x): res = optimize.line_search(lambda z: F(z[0], z[1]), lambda t: gradient(t[0], t[1]), np.array(x), np.array(sd), gr) alpha = res[0] return alpha
def __call__( self, x0, conf = None, obj_fun = None, obj_fun_grad = None, status = None, obj_args = None ): # def fmin_sd( conf, x0, fn_of, fn_ofg, args = () ): conf = get_default( conf, self.conf ) obj_fun = get_default( obj_fun, self.obj_fun ) obj_fun_grad = get_default( obj_fun_grad, self.obj_fun_grad ) status = get_default( status, self.status ) obj_args = get_default( obj_args, self.obj_args ) if conf.output: globals()['output'] = conf.output output( 'entering optimization loop...' ) nc_of, tt_of, fn_of = wrap_function( obj_fun, obj_args ) nc_ofg, tt_ofg, fn_ofg = wrap_function( obj_fun_grad, obj_args ) time_stats = {'of' : tt_of, 'ofg': tt_ofg, 'check' : []} ofg = None it = 0 xit = x0.copy() while 1: of = fn_of( xit ) if it == 0: of0 = ofit0 = of_prev = of of_prev_prev = of + 5000.0 if ofg is None: ofg = fn_ofg( xit ) if conf.check: tt = time.clock() check_gradient( xit, ofg, fn_of, conf.delta, conf.check ) time_stats['check'].append( time.clock() - tt ) ofg_norm = nla.norm( ofg, conf.norm ) ret = conv_test( conf, it, of, ofit0, ofg_norm ) if ret >= 0: break ofit0 = of ## # Backtrack (on errors). alpha = conf.ls0 can_ls = True while 1: xit2 = xit - alpha * ofg aux = fn_of( xit2 ) if self.log is not None: self.log(of, ofg_norm, alpha, it) if aux is None: alpha *= conf.ls_red_warp can_ls = False output( 'warp: reducing step (%f)' % alpha ) elif conf.ls and conf.ls_method == 'backtracking': if aux < of * conf.ls_on: break alpha *= conf.ls_red output( 'backtracking: reducing step (%f)' % alpha ) else: of_prev_prev = of_prev of_prev = aux break if alpha < conf.ls_min: if aux is None: raise RuntimeError, 'giving up...' output( 'linesearch failed, continuing anyway' ) break # These values are modified by the line search, even if it fails of_prev_bak = of_prev of_prev_prev_bak = of_prev_prev if conf.ls and can_ls and conf.ls_method == 'full': output( 'full linesearch...' ) alpha, fc, gc, of_prev, of_prev_prev, ofg1 = \ linesearch.line_search(fn_of,fn_ofg,xit, -ofg,ofg,of_prev,of_prev_prev, c2=0.4) if alpha is None: # line search failed -- use different one. alpha, fc, gc, of_prev, of_prev_prev, ofg1 = \ sopt.line_search(fn_of,fn_ofg,xit, -ofg,ofg,of_prev_bak, of_prev_prev_bak) if alpha is None or alpha == 0: # This line search also failed to find a better solution. ret = 3 break output( ' -> alpha: %.8e' % alpha ) else: if conf.ls_method == 'full': output( 'full linesearch off (%s and %s)' % (conf.ls, can_ls) ) ofg1 = None if self.log is not None: self.log.plot_vlines(color='g', linewidth=0.5) xit = xit - alpha * ofg if ofg1 is None: ofg = None else: ofg = ofg1.copy() for key, val in time_stats.iteritems(): if len( val ): output( '%10s: %7.2f [s]' % (key, val[-1]) ) it = it + 1 output( 'status: %d' % ret ) output( 'initial value: %.8e' % of0 ) output( 'current value: %.8e' % of ) output( 'iterations: %d' % it ) output( 'function evaluations: %d in %.2f [s]' \ % (nc_of[0], nm.sum( time_stats['of'] ) ) ) output( 'gradient evaluations: %d in %.2f [s]' \ % (nc_ofg[0], nm.sum( time_stats['ofg'] ) ) ) if self.log is not None: self.log(of, ofg_norm, alpha, it) if conf.log.plot is not None: self.log(save_figure=conf.log.plot, finished=True) else: self.log(finished=True) if status is not None: status['log'] = self.log status['status'] = status status['of0'] = of0 status['of'] = of status['it'] = it status['nc_of'] = nc_of[0] status['nc_ofg'] = nc_ofg[0] status['time_stats'] = time_stats return xit
#optimisation in theta, psi xpath = [0.001*np.random.randn(2)] f_spherical = lambda x: objective(*spherical_to_cart(x[0],x[1],1)) def g_spherical(x): g_cart = grad(*spherical_to_cart(x[0],x[1],1)) theta, psi = x J = np.array([[np.cos(psi)*np.cos(theta),np.sin(psi)*np.cos(theta),-np.sin(theta)], [-np.sin(psi)*np.sin(theta),np.cos(psi)*np.sin(theta),0.]]) return np.dot(J,g_cart) iteration=0 while True: search_dir = -g_spherical(xpath[-1]) grad_norm = np.sum(np.square(search_dir)) if grad_norm<1e-6: break alpha, fc,gc,foo,bar,baz = optimize.line_search(f_spherical,g_spherical,xpath[-1],search_dir,-search_dir) xnew = xpath[-1] + 0.01*alpha*search_dir xpath.append(xnew) iteration += 1 print iteration,grad_norm,'\r', sys.stdout.flush() print '' xx,yy,zz = np.vstack(map(lambda x: spherical_to_cart(x[0],x[1],1),xpath)).T ax.plot(xx,yy,zz,'mo',linewidth=2,mew=0) #natural optimisation in theta, psi xpath = [0.001*np.random.randn(2)] iteration=0 while True: g = g_spherical(xpath[-1])