def test_chlambda1(self): c1, c2, c3 = ch.Ch(1), ch.Ch(2), ch.Ch(3) adder = ch.ChLambda(lambda x, y: x + y) adder.x = c1 adder.y = c2 self.assertTrue(adder.r == 3) adder.x = c2 self.assertTrue(adder.r == 4) adder.x = c1 self.assertTrue(adder.r == 3)
def test_serialization(self): # The main challenge with serialization is the "_parents" # attribute, which is a nonserializable WeakKeyDictionary. # So we pickle/unpickle, change a child and verify the value # at root, and verify that both children have parentage. import six.pickle as pickle tmp = ch.Ch(10) + ch.Ch(20) tmp = pickle.loads(pickle.dumps(tmp)) tmp.b.x = 30 self.assertTrue(tmp.r[0] == 40) self.assertTrue(tmp.a._parents.keys()[0] == tmp) self.assertTrue(tmp.a._parents.keys()[0] == tmp.b._parents.keys()[0])
def test_redundancy_removal(self): for MT in [False, True]: x1, x2 = ch.Ch(10), ch.Ch(20) x1_plus_x2_1 = x1 + x2 x1_plus_x2_2 = x1 + x2 redundant_sum = (x1_plus_x2_1 + x1_plus_x2_2) * 2 redundant_sum.MT = MT self.assertTrue(redundant_sum.a.a is not redundant_sum.a.b) redundant_sum.remove_redundancy() self.assertTrue(redundant_sum.a.a is redundant_sum.a.b)
def test_caching(self): vals = [10, 20, 30, 40, 50] f = lambda a, b, c, d, e: a + (b * c) - d**e # Set up our objects Cs = [ch.Ch(v) for v in vals] C_result = f(*Cs) # Sometimes residuals should be cached r1 = C_result.r r2 = C_result.r self.assertTrue(r1 is r2) # Other times residuals need refreshing Cs[0].set(x=5) r3 = C_result.r self.assertTrue(r3 is not r2) # Sometimes derivatives should be cached dr1 = C_result.dr_wrt(Cs[1]) dr2 = C_result.dr_wrt(Cs[1]) self.assertTrue(dr1 is dr2) # Other times derivatives need refreshing Cs[2].set(x=5) dr3 = C_result.dr_wrt(Cs[1]) self.assertTrue(dr3 is not dr2)
def test_ndim(self): vs = [ch.Ch(np.random.randn(6).reshape(2,3)) for i in range(6)] res = vs[0] + vs[1] - vs[2] * vs[3] / (vs[4] ** 2) ** vs[5] self.assertTrue(res.shape[0]==2 and res.shape[1]==3) res = (vs[0] + 1) + (vs[1] - 2) - (vs[2] * 3) * (vs[3] / 4) / (vs[4] ** 2) ** vs[5] self.assertTrue(res.shape[0]==2 and res.shape[1]==3) drs = [res.dr_wrt(v) for v in vs]
def test_scalars(self): try: import theano.tensor as T from theano import function except: return # Set up variables and function vals = [1, 2, 3, 4, 5] f = lambda a, b, c, d, e: a + (b * c) - d**e # Set up our objects Cs = [ch.Ch(v) for v in vals] C_result = f(*Cs) # Set up Theano's equivalents Ts = T.dscalars('T1', 'T2', 'T3', 'T4', 'T5') TF = f(*Ts) T_result = function(Ts, TF) # Make sure values and derivatives are equal self.assertEqual(C_result.r, T_result(*vals)) for k in range(len(vals)): theano_derivative = function(Ts, T.grad(TF, Ts[k]))(*vals) #print C_result.dr_wrt(Cs[k]) our_derivative = C_result.dr_wrt(Cs[k])[0, 0] #print theano_derivative, our_derivative self.assertEqual(theano_derivative, our_derivative)
def test_indexing(self): big = ch.Ch(np.arange(60).reshape((10, 6))) little = big[1:3, 3:6] self.assertTrue( np.max(np.abs(little.r - np.array([[9, 10, 11], [15, 16, 17]]))) == 0) little = big[5] self.assertTrue(np.max(np.abs(little.r - np.arange(30, 36))) == 0) self.assertTrue( np.max( np.abs( sp.coo_matrix(little.dr_wrt(big)).col - np.arange(30, 36))) == 0) little = big[2, 3] self.assertTrue(little.r[0] == 15.0) little = big[2, 3:5] self.assertTrue(np.max(np.abs(little.r - np.array([15, 16]))) == 0.) _ = little.dr_wrt(big) # Tests assignment through reorderings aa = ch.arange(4 * 4 * 4).reshape((4, 4, 4))[:3, :3, :3] aa[0, 1, 2] = 100 self.assertTrue(aa[0, 1, 2].r[0] == 100) # Tests assignment through reorderings (NaN's are a special case) aa = ch.arange(9).reshape((3, 3)) aa[1, 1] = np.nan self.assertTrue(np.isnan(aa.r[1, 1])) self.assertFalse(np.isnan(aa.r[0, 0]))
def test_unary(self): fns = [ch.exp, ch.log, ch.sin, ch.arcsin, ch.cos, ch.arccos, ch.tan, ch.arctan, ch.negative, ch.square, ch.sqrt, ch.abs, ch.reciprocal] eps = 1e-8 for f in fns: x0 = ch.Ch(.25) x1 = ch.Ch(x0.r+eps) pred = f(x0).dr_wrt(x0) empr = (f(x1).r - f(x0).r) / eps # print pred # print empr if f is ch.reciprocal: self.assertTrue(1e-6 > np.abs(pred.ravel()[0] - empr.ravel()[0])) else: self.assertTrue(1e-7 > np.abs(pred.ravel()[0] - empr.ravel()[0]))
def test_reorder_caching(self): a = ch.Ch(np.zeros(8).reshape((4, 2))) b = a.T dr0 = b.dr_wrt(a) a.x = a.x + 1. dr1 = b.dr_wrt(a) self.assertTrue(dr0 is dr1) a.x = np.zeros(4).reshape((2, 2)) dr2 = b.dr_wrt(a) self.assertTrue(dr2 is not dr1)
def test_vectors(self): try: import theano.tensor as T from theano import function except: return for MT in [False, True]: # Set up variables and function vals = [np.random.randn(20) for i in range(5)] f = lambda a, b, c, d, e: a + (b * c) - d**e # Set up our objects Cs = [ch.Ch(v) for v in vals] C_result = f(*Cs) C_result.MT = MT # Set up Theano equivalents Ts = T.dvectors('T1', 'T2', 'T3', 'T4', 'T5') TF = f(*Ts) T_result = function(Ts, TF) if False: import theano.gradient which = 1 theano_sse = (TF**2.).sum() theano_grad = theano.gradient.grad(theano_sse, Ts[which]) theano_fn = function(Ts, theano_grad) print(theano_fn(*vals)) C_result_grad = ch.SumOfSquares(C_result).dr_wrt(Cs[which]) print(C_result_grad) # if True: # aaa = np.linalg.solve(C_result_grad.T.dot(C_result_grad), C_result_grad.dot(np.zeros(C_result_grad.shape[1]))) # theano_hes = theano.R_obbb = theano.R_op() import pdb pdb.set_trace() # Make sure values and derivatives are equal np.testing.assert_array_equal(C_result.r, T_result(*vals)) for k in range(len(vals)): theano_derivative = function(Ts, T.jacobian(TF, Ts[k]))(*vals) our_derivative = np.array(C_result.dr_wrt(Cs[k]).todense()) #print theano_derivative, our_derivative # Theano produces has more nans than we do during exponentiation. # So we test only on entries where Theano is without NaN's without_nans = np.nonzero( np.logical_not(np.isnan(theano_derivative.flatten())))[0] np.testing.assert_array_equal( theano_derivative.flatten()[without_nans], our_derivative.flatten()[without_nans])
def test_matmatmult(self): from ch import dot mtx1 = ch.Ch(np.arange(6).reshape((3, 2))) mtx2 = ch.Ch(np.arange(8).reshape((2, 4)) * 10) mtx3 = dot(mtx1, mtx2) #print mtx1.r #print mtx2.r #print mtx3.r #print mtx3.dr_wrt(mtx1).todense() #print mtx3.dr_wrt(mtx2).todense() for mtx in [mtx1, mtx2]: oldval = mtx3.r.copy() mtxd = mtx3.dr_wrt(mtx).copy() mtx_diff = np.random.rand(mtx.r.size).reshape(mtx.r.shape) mtx.x = mtx.r + mtx_diff mtx_emp = mtx3.r - oldval mtx_pred = mtxd.dot(mtx_diff.ravel()).reshape(mtx_emp.shape) self.assertTrue(np.max(np.abs(mtx_emp - mtx_pred)) < 1e-11)
def test_shared(self): chs = [ch.Ch(i) for i in range(10)] vrs = [float(i) for i in range(10)] func = lambda a: a[0] * a[1] + (a[2] * a[3]) / a[4] chained_result = func(chs).r regular_result = func(vrs) self.assertTrue(chained_result == regular_result) #print chained_result #print regular_result chained_func = func(chs) chained_func.replace(chs[0], ch.Ch(50)) vrs[0] = 50 chained_result = chained_func.r regular_result = func(vrs) self.assertTrue(chained_result == regular_result)
def test_stacking(self): a1 = ch.Ch(np.arange(10).reshape(2, 5)) b1 = ch.Ch(np.arange(20).reshape(4, 5)) c1 = ch.vstack((a1, b1)) c1_check = np.vstack((a1.r, b1.r)) residuals1 = (c1_check - c1.r).ravel() a2 = ch.Ch(np.arange(10).reshape(5, 2)) b2 = ch.Ch(np.arange(20).reshape(5, 4)) c2 = ch.hstack((a2, b2)) c2_check = np.hstack((a2.r, b2.r)) residuals2 = (c2_check - c2.r).ravel() self.assertFalse(np.any(residuals1)) self.assertFalse(np.any(residuals2)) d0 = ch.array(np.arange(60).reshape((10, 6))) d1 = ch.vstack((d0[:4], d0[4:])) d2 = ch.hstack((d1[:, :3], d1[:, 3:])) tmp = d2.dr_wrt(d0).todense() diff = tmp - np.eye(tmp.shape[0]) self.assertFalse(np.any(diff.ravel()))
def test_iteration_cache(self): """ Each time you set an attribute, the cache (of r's and dr's) of ancestors is cleared. Because children share ancestors, this means these can be cleared multiple times unnecessarily; in some cases, where lots of objects exist, this cache clearing can actually be a bottleneck. Therefore, the concept of an iteration was added; intended to be used in an optimization setting (see optimization.py) and in the set() method, it avoids such redundant clearing of cache.""" a, b, c = ch.Ch(1), ch.Ch(2), ch.Ch(3) x = a + b y = x + c self.assertTrue(y.r[0] == 6) a.__setattr__('x', 10, 1) self.assertTrue(y.r == 15) a.__setattr__('x', 100, 1) self.assertTrue(y.r == 15) a.__setattr__('x', 100, 2) self.assertTrue(y.r == 105) a, b, c = ch.array([1]), ch.array([2]), ch.array([3]) x = a + b y = x + c self.assertTrue(y.r[0] == 6) a.__setattr__('x', np.array([10]), 1) self.assertTrue(y.r[0] == 15) a.__setattr__('x', np.array(100), 1) self.assertTrue(y.r[0] == 15) a.__setattr__('x', np.array(100), 2) self.assertTrue(y.r[0] == 105) a.__setitem__(range(0, 1), np.array(200), 2) self.assertTrue(y.r[0] == 105) a.__setitem__(range(0, 1), np.array(200), 3) self.assertTrue(y.r[0] == 205)
def test_transpose(self): from utils import row, col from copy import deepcopy for which in ('C', 'F'): # test in fortran and contiguous mode a = ch.Ch(np.require(np.zeros(8).reshape((4,2)), requirements=which)) b = a.T b1 = b.r.copy() #dr = b.dr_wrt(a).copy() dr = deepcopy(b.dr_wrt(a)) diff = np.arange(a.size).reshape(a.shape) a.x = np.require(a.r + diff, requirements=which) b2 = b.r.copy() diff_pred = dr.dot(col(diff)).ravel() diff_emp = (b2 - b1).ravel() np.testing.assert_array_equal(diff_pred, diff_emp)
def test_maximum(self): from utils import row, col from ch import maximum # Make sure that when we compare the max of two *identical* numbers, # we get the right derivatives wrt both the_max = maximum(ch.Ch(1), ch.Ch(1)) self.assertTrue(the_max.r.ravel()[0] == 1.) self.assertTrue(the_max.dr_wrt(the_max.a)[0, 0] == 1.) self.assertTrue(the_max.dr_wrt(the_max.b)[0, 0] == 1.) # Now test given that all numbers are different, by allocating from # a pool of randomly permuted numbers. # We test combinations of scalars and 2d arrays. rnd = np.asarray(np.random.permutation(np.arange(20)), np.float64) c1 = ch.Ch(rnd[:6].reshape((2, 3))) c2 = ch.Ch(rnd[6:12].reshape((2, 3))) s1 = ch.Ch(rnd[12]) s2 = ch.Ch(rnd[13]) eps = .1 for first in [c1, s1]: for second in [c2, s2]: the_max = maximum(first, second) for which_to_change in [first, second]: max_r0 = the_max.r.copy() max_r_diff = np.max( np.abs(max_r0 - np.maximum(first.r, second.r))) self.assertTrue(max_r_diff == 0) max_dr = the_max.dr_wrt(which_to_change).copy() which_to_change.x = which_to_change.x + eps max_r1 = the_max.r.copy() emp_diff = (the_max.r - max_r0).ravel() pred_diff = max_dr.dot(col( eps * np.ones(max_dr.shape[1]))).ravel() #print 'comparing the following numbers/vectors:' #print first.r #print second.r #print 'empirical vs predicted difference:' #print emp_diff #print pred_diff #print '-----' max_dr_diff = np.max(np.abs(emp_diff - pred_diff)) #print 'max dr diff: %.2e' % (max_dr_diff,) self.assertTrue(max_dr_diff < 1e-14)
def test_chlambda2(self): passthrough = ch.ChLambda(lambda x: x) self.assertTrue(passthrough.dr_wrt(passthrough.x) is not None) passthrough.x = ch.Ch(123) self.assertTrue(passthrough.dr_wrt(passthrough.x) is not None)
def ravel(a, order='C'): assert(order=='C') if isinstance (a, np.ndarray): self = ch.Ch(a) return reshape(a=a, newshape=(-1,))
def _minimize_dogleg(obj, free_variables, on_step=None, maxiter=200, max_fevals=np.inf, sparse_solver='spsolve', disp=False, show_residuals=None, e_1=1e-15, e_2=1e-15, e_3=0., delta_0=None): """"Nonlinear optimization using Powell's dogleg method. See Lourakis et al, 2005, ICCV '05, "Is Levenberg-Marquardt the Most Efficient Optimization for Implementing Bundle Adjustment?": http://www.ics.forth.gr/cvrl/publications/conferences/0201-P0401-lourakis-levenberg.pdf """ import warnings if show_residuals is not None: import warnings warnings.warn('minimize_dogleg: show_residuals parm is deprecaed, pass a dict instead.') labels = {} if isinstance(obj, list) or isinstance(obj, tuple): obj = ch.concatenate([f.ravel() for f in obj]) elif isinstance(obj, dict): labels = obj obj = ch.concatenate([f.ravel() for f in obj.values()]) niters = maxiter verbose = disp num_unique_ids = len(np.unique(np.array([id(freevar) for freevar in free_variables]))) if num_unique_ids != len(free_variables): raise Exception('The "free_variables" param contains duplicate variables.') obj = ChInputsStacked(obj=obj, free_variables=free_variables, x=np.concatenate([freevar.r.ravel() for freevar in free_variables])) def call_cb(): if on_step is not None: on_step(obj) report_line = "" if len(labels) > 0: report_line += '%.2e | ' % (np.sum(obj.r**2),) for label in sorted(labels.keys()): objective = labels[label] report_line += '%s: %.2e | ' % (label, np.sum(objective.r**2)) if len(labels) > 0: report_line += '\n' sys.stderr.write(report_line) call_cb() # pif = print-if-verbose. # can't use "print" because it's a statement, not a fn pif = lambda x: sys.stdout.write(x + '\n') if verbose else 0 if callable(sparse_solver): solve = sparse_solver elif isinstance(sparse_solver, str) and sparse_solver in _solver_fns.keys(): solve = _solver_fns[sparse_solver] else: raise Exception('sparse_solver argument must be either a string in the set (%s) or have the api of scipy.sparse.linalg.spsolve.' % ''.join(_solver_fns.keys(), ' ')) # optimization parms k_max = niters fevals = 0 k = 0 delta = delta_0 p = col(obj.x.r) fevals += 1 tm = time.time() pif('computing Jacobian...') J = obj.J if sp.issparse(J): assert(J.nnz > 0) pif('Jacobian (%dx%d) computed in %.2fs' % (J.shape[0], J.shape[1], time.time() - tm)) if J.shape[1] != p.size: import pdb; pdb.set_trace() assert(J.shape[1] == p.size) tm = time.time() pif('updating A and g...') A = J.T.dot(J) r = col(obj.r.copy()) g = col(J.T.dot(-r)) pif('A and g updated in %.2fs' % (time.time() - tm)) stop = norm(g, np.inf) < e_1 while (not stop) and (k < k_max) and (fevals < max_fevals): k += 1 pif('beginning iteration %d' % (k,)) d_sd = col((sqnorm(g)) / (sqnorm (J.dot(g))) * g) GNcomputed = False while True: # if the Cauchy point is outside the trust region, # take that direction but only to the edge of the trust region if delta is not None and norm(d_sd) >= delta: pif('PROGRESS: Using stunted cauchy') d_dl = np.array(col(delta/norm(d_sd) * d_sd)) else: if not GNcomputed: tm = time.time() if scipy.sparse.issparse(A): A.eliminate_zeros() pif('sparse solve...sparsity infill is %.3f%% (hessian %dx%d), J infill %.3f%%' % ( 100. * A.nnz / (A.shape[0] * A.shape[1]), A.shape[0], A.shape[1], 100. * J.nnz / (J.shape[0] * J.shape[1]))) if g.size > 1: d_gn = col(solve(A, g)) if np.any(np.isnan(d_gn)) or np.any(np.isinf(d_gn)): from scipy.sparse.linalg import lsqr d_gn = col(lsqr(A, g)[0]) else: d_gn = np.atleast_1d(g.ravel()[0]/A[0,0]) pif('sparse solve...done in %.2fs' % (time.time() - tm)) else: pif('dense solve...') try: d_gn = col(np.linalg.solve(A, g)) except Exception: d_gn = col(np.linalg.lstsq(A, g)[0]) pif('dense solve...done in %.2fs' % (time.time() - tm)) GNcomputed = True # if the gauss-newton solution is within the trust region, use it if delta is None or norm(d_gn) <= delta: pif('PROGRESS: Using gauss-newton solution') d_dl = np.array(d_gn) if delta is None: delta = norm(d_gn) else: # between cauchy step and gauss-newton step pif('PROGRESS: between cauchy and gauss-newton') # compute beta multiplier delta_sq = delta**2 pnow = ( (d_gn-d_sd).T.dot(d_gn-d_sd)*delta_sq + d_gn.T.dot(d_sd)**2 - sqnorm(d_gn) * (sqnorm(d_sd))) B = delta_sq - sqnorm(d_sd) B /= ((d_gn-d_sd).T.dot(d_sd) + math.sqrt(pnow)) # apply step d_dl = np.array(d_sd + float(B) * (d_gn - d_sd)) #assert(math.fabs(norm(d_dl) - delta) < 1e-12) if norm(d_dl) <= e_2*norm(p): pif('stopping because of small step size (norm_dl < %.2e)' % (e_2*norm(p))) stop = True else: p_new = p + d_dl tm_residuals = time.time() obj.x = p_new fevals += 1 r_trial = obj.r.copy() tm_residuals = time.time() - tm # rho is the ratio of... # (improvement in SSE) / (predicted improvement in SSE) # slower #rho = norm(e_p)**2 - norm(e_p_trial)**2 #rho = rho / (L(d_dl*0, e_p, J) - L(d_dl, e_p, J)) # faster sqnorm_ep = sqnorm(r) rho = sqnorm_ep - norm(r_trial)**2 with warnings.catch_warnings(): warnings.filterwarnings('ignore',category=RuntimeWarning) if rho > 0: rho /= predicted_improvement(d_dl, -r, J, sqnorm_ep, A, g) improvement_occurred = rho > 0 # if the objective function improved, update input parameter estimate. # Note that the obj.x already has the new parms, # and we should not set them again to the same (or we'll bust the cache) if improvement_occurred: p = col(p_new) call_cb() if (sqnorm_ep - norm(r_trial)**2) / sqnorm_ep < e_3: stop = True pif('stopping because improvement < %.1e%%' % (100*e_3,)) else: # Put the old parms back obj.x = ch.Ch(p) obj.on_changed('x') # copies from flat vector to free variables # if the objective function improved and we're not done, # get ready for the next iteration if improvement_occurred and not stop: tm_jac = time.time() pif('computing Jacobian...') J = obj.J.copy() tm_jac = time.time() - tm_jac pif('Jacobian (%dx%d) computed in %.2fs' % (J.shape[0], J.shape[1], tm_jac)) pif('Residuals+Jac computed in %.2fs' % (tm_jac + tm_residuals,)) tm = time.time() pif('updating A and g...') A = J.T.dot(J) r = col(r_trial) g = col(J.T.dot(-r)) pif('A and g updated in %.2fs' % (time.time() - tm)) if norm(g, np.inf) < e_1: stop = True pif('stopping because norm(g, np.inf) < %.2e' % (e_1)) # update our trust region delta = updateRadius(rho, delta, d_dl) if delta <= e_2*norm(p): stop = True pif('stopping because trust region is too small') # the following "collect" is very expensive. # please contact matt if you find situations where it actually helps things. #import gc; gc.collect() if stop or improvement_occurred or (fevals >= max_fevals): break if k >= k_max: pif('stopping because max number of user-specified iterations (%d) has been met' % (k_max,)) elif fevals >= max_fevals: pif('stopping because max number of user-specified func evals (%d) has been met' % (max_fevals,)) return obj.free_variables
def asarray(a, dtype=None, order=None): assert (dtype is None or dtype is np.float64) assert (order is 'C' or order is None) if hasattr(a, 'dterms'): return a return ch.Ch(np.asarray(a, dtype, order))
def minimize_dogleg(obj, free_variables, on_step=None, maxiter=200, max_fevals=np.inf, sparse_solver='spsolve', disp=True, e_1=1e-15, e_2=1e-15, e_3=0., delta_0=None, treat_as_dense=False): """"Nonlinear optimization using Powell's dogleg method. See Lourakis et al, 2005, ICCV '05, "Is Levenberg-Marquardt the Most Efficient Optimization for Implementing Bundle Adjustment?": http://www.ics.forth.gr/cvrl/publications/conferences/0201-P0401-lourakis-levenberg.pdf e_N are stopping conditions: e_1 is gradient magnatude threshold e_2 is step size magnatude threshold e_3 is improvement threshold (as a ratio; 0.1 means it must improve by 10%% at each step) maxiter and max_fevals are also stopping conditions. Note that they're not quite the same, as an iteration may evaluate the function more than once. sparse_solver is the solver to use to calculate the Gauss-Newton step in the common case that the Jacobian is sparse. It can be 'spsolve' (in which case scipy.sparse.linalg.spsolve will be used), 'cg' (in which case scipy.sparse.linalg.cg will be used), or any callable that matches the api of scipy.sparse.linalg.spsolve to solve `A x = b` for x where A is sparse. cg, uses a Conjugate Gradient method, and will be faster if A is sparse but x is dense. spsolve will be faster if x is also sparse. delta_0 defines the initial trust region. Generally speaking, if this is set too low then the optimization will never really go anywhere (to small a trust region to make any real progress before running out of iterations) and if it's set too high then the optimization will diverge immidiately and go wild (such a large trust region that the initial step so far overshoots that it can't recover). If it's left as None, it will be automatically estimated on the first iteration; it's always updated at each iteration, so this is treated only as an initialization. handle_as_dense explicitly converts all Jacobians of obj to dense matrices """ solve = setup_sparse_solver(sparse_solver) obj, callback = setup_objective(obj, free_variables, on_step=on_step, disp=disp, make_dense=treat_as_dense) state = DoglegState(delta=delta_0, solve=solve) state.p = obj.x.r #inject profiler if in DEBUG mode if ch.DEBUG: from .monitor import DrWrtProfiler obj.profiler = DrWrtProfiler(obj) callback() state.updateJ(obj) state.r = obj.r def stop(msg): if not state.done: pif(msg) state.done = True if np.linalg.norm(state.g, np.inf) < e_1: stop('stopping because norm(g, np.inf) < %.2e' % e_1) while not state.done: state.start_iteration() while True: state.update_step() if state.step_size <= e_2 * np.linalg.norm(state.p): stop('stopping because of small step size (norm_dl < %.2e)' % (e_2 * np.linalg.norm(state.p))) else: tm = timer() obj.x = state.p + state.step trial = state.trial_r(obj.r) pif('Residuals computed in %.2fs' % tm()) # if the objective function improved, update input parameter estimate. # Note that the obj.x already has the new parms, # and we should not set them again to the same (or we'll bust the cache) if trial.is_improvement: state.p = state.p + state.step callback() if e_3 > 0. and trial.improvement < e_3: stop('stopping because improvement < %.1e%%' % (100 * e_3)) else: state.updateJ(obj) state.r = trial.r if np.linalg.norm(state.g, np.inf) < e_1: stop('stopping because norm(g, np.inf) < %.2e' % e_1) else: # Put the old parms back obj.x = ch.Ch(state.p) obj.on_changed( 'x') # copies from flat vector to free variables # update our trust region state.updateRadius(trial.rho) if state.delta <= e_2 * np.linalg.norm(state.p): stop('stopping because trust region is too small') if state.done or trial.is_improvement or (obj.fevals >= max_fevals): break if state.iteration >= maxiter: stop( 'stopping because max number of user-specified iterations (%d) has been met' % maxiter) elif obj.fevals >= max_fevals: stop( 'stopping because max number of user-specified func evals (%d) has been met' % max_fevals) return obj.free_variables