def add_new_sample(self, k, rvec_extra): # We have resampled at xpt(k) - add this information (fval and fval_v are averages of all samples) assert 0 <= k < self.npt(), "Invalid index %g" % k t = float(self.nsamples[k]) / float(self.nsamples[k] + 1) self.fval_v[k, :] = t * self.fval_v[k, :] + (1 - t) * rvec_extra self.fval[k] = sumsq(self.fval_v[k, :]) self.nsamples[k] += 1 self.kopt = np.argmin(self.fval[:self.npt( )]) # make sure kopt is always the best value we have return
def save_point(self, x, rvec, nsamples, x_in_abs_coords=True): f = sumsq(rvec) if self.fsave is None or f <= self.fsave: self.xsave = x.copy( ) if x_in_abs_coords else self.as_absolute_coordinates(x) self.rsave = rvec.copy() self.fsave = f self.jacsave = self.model_jac.copy() self.nsamples_save = nsamples return True else: return False # this value is worse than what we have already - didn't save
def change_point(self, k, x, rvec, allow_kopt_update=True): # Update point k to x (w.r.t. xbase), with residual values fvec if k >= self.npt_so_far and self.npt_so_far < self.num_pts: assert k == self.npt_so_far, "Growing: updating wrong point" self.npt_so_far += 1 else: assert 0 <= k < self.npt(), "Invalid index %g" % k self.points[k, :] = x.copy() self.fval_v[k, :] = rvec.copy() self.fval[k] = sumsq(rvec) self.nsamples[k] = 1 self.factorisation_current = False if allow_kopt_update and self.fval[k] < self.fopt(): self.kopt = k return
def alt_trust_step(n, xopt, H, sl, su, d, xbdi, nact, gnew, qred): MAX_LOOP_ITERS = 100 * n**2 # avoid infinite loops # while True: # label 100 here for ii in range(MAX_LOOP_ITERS): if nact >= n - 1: return d_within_bounds(d, xopt, sl, su, xbdi), gnew # Prepare for the alternative iteration by calculating some scalars # and by multiplying the reduced D by the second derivative matrix of # Q, where S holds the reduced D in the call of GGMULT. s = np.zeros((n, )) s[xbdi == 0] = d[xbdi == 0] dredsq = sumsq(d[xbdi == 0]) dredg = np.dot(d[xbdi == 0], gnew[xbdi == 0]) gredsq = sumsq(gnew[xbdi == 0]) # Label 210 (crvmin = 0, itcsav = iterc) hs = H.dot(s) hred = hs.copy() # quit 210 by goto 120 # Let the search direction S be a linear combination of the reduced D # and the reduced G that is orthogonal to the reduced D. restart_alt_loop = False # once the below loop finishes, quit unless need to go again # while True: # label 120 for jj in range(MAX_LOOP_ITERS): temp = gredsq * dredsq - dredg**2 if temp <= 1.0e-4 * qred**2: restart_alt_loop = False break # quit inner label 120 loop and return results temp = sqrt(temp) s = np.zeros((n, )) s[xbdi == 0] = (dredg * d[xbdi == 0] - dredsq * gnew[xbdi == 0]) / temp sredg = -temp # By considering the simple bounds on the variables, calculate an upper # bound on the tangent of half the angle of the alternative iteration, # namely ANGBD, except that, if already a free variable has reached a # bound, there is a branch back to label 100 after fixing that variable. free_variable_reached_bound = False angbd = 1.0 iact = None for i in range(n): if xbdi[i] == 0: tempa = xopt[i] + d[i] - sl[i] tempb = su[i] - xopt[i] - d[i] if tempa <= 0.0: nact += 1 xbdi[i] = -1 free_variable_reached_bound = True break # skip the rest of this for loop elif tempb <= 0.0: nact += 1 xbdi[i] = 1 free_variable_reached_bound = True break # skip the rest of this for loop ssq = d[i]**2 + s[i]**2 temp = ssq - (xopt[i] - sl[i])**2 if temp > 0.0: temp = sqrt(temp) - s[i] if angbd * temp > tempa: angbd = tempa / temp iact = i xsav = -1 temp = ssq - (su[i] - xopt[i])**2 if temp > 0.0: temp = sqrt(temp) + s[i] if angbd * temp > tempb: angbd = tempb / temp iact = i xsav = 1 # End for loop if free_variable_reached_bound: # deal with break conditions above restart_alt_loop = True break # quit inner label 120 loop and restart alt iteration loop (label 100) # Label 210 (crvmin = 0, itcsav < iterc since iterc+=1 earlier) hs = H.dot(s) # Label 150 # Calculate HHD and some curvatures for the alternative iteration. shs = np.sum(s[xbdi == 0] * hs[xbdi == 0]) dhs = np.sum(d[xbdi == 0] * hs[xbdi == 0]) dhd = np.sum(d[xbdi == 0] * hred[xbdi == 0]) # Seek the greatest reduction in Q for a range of equally spaced values # of ANGT in [0,ANGBD], where ANGT is the tangent of half the angle of # the alternative iteration. redmax = 0.0 isav = -1 redsav = 0.0 temp = 0.0 # force scope outside i loop below since needed later iu = int(17 * angbd + 3.1) for i in range(iu): # i = 0, ..., iu-1 angt = angbd * float(i + 1) / float(iu) sth = 2.0 * angt / (1.0 + angt**2) temp = shs + angt * (angt * dhd - 2.0 * dhs) rednew = sth * (angt * dredg - sredg - 0.5 * sth * temp) if rednew > redmax: redmax = rednew isav = i rdprev = redsav elif i == isav + 1: rdnext = rednew redsav = rednew # Return if the reduction is zero. Otherwise, set the sine and cosine # of the angle of the alternative iteration, and calculate SDEC. if isav == -1: restart_alt_loop = False break # quit inner label 120 loop and return results if isav < iu - 1: temp = (rdnext - rdprev) / (2.0 * redmax - rdprev - rdnext) angt = angbd * (float(isav + 1) + 0.5 * temp) / float(iu) cth = (1.0 - angt**2) / (1.0 + angt**2) sth = 2.0 * angt / (1.0 + angt**2) temp = shs + angt * (angt * dhd - 2.0 * dhs) sdec = sth * (angt * dredg - sredg - 0.5 * sth * temp) if sdec <= 0.0: restart_alt_loop = False break # quit inner label 120 loop and return results # Update GNEW, D and HRED. If the angle of the alternative iteration # is restricted by a bound on a free variable, that variable is fixed # at the bound. gnew += (cth - 1.0) * hred + sth * hs d[xbdi == 0] = cth * d[xbdi == 0] + sth * s[xbdi == 0] dredg = np.dot(d[xbdi == 0], gnew[xbdi == 0]) gredsq = sumsq(gnew[xbdi == 0]) hred = cth * hred + sth * hs qred += sdec if iact is not None and isav == iu - 1: nact += 1 xbdi[iact] = xsav restart_alt_loop = True break # quit inner label 120 loop and restart alt iteration loop (label 100) if (sdec <= 0.01 * qred): restart_alt_loop = False break # quit inner label 120 loop and return results continue # back to inner label 120 loop # End inner label 120 loop if restart_alt_loop: continue else: break # end outer loop and quit # End while True (label 100) return d_within_bounds(d, xopt, sl, su, xbdi), gnew
def trsbox(xopt, g, H, sl, su, delta, use_fortran=USE_FORTRAN): if use_fortran: return trustregion.solve(g, H, delta, sl=np.minimum(sl - xopt, -ZERO_THRESH), su=np.maximum(su - xopt, ZERO_THRESH), verbose_output=True) n = xopt.size assert xopt.shape == (n, ), "xopt has wrong shape (should be vector)" assert g.shape == (n, ), "g and xopt have incompatible sizes" assert len(H.shape) == 2, "H must be a matrix" assert H.shape == (n, n), "H and xopt have incompatible sizes" assert np.allclose(H, H.T), "H must be symmetric" assert sl.shape == (n, ), "sl and xopt have incompatible sizes" assert su.shape == (n, ), "su and xopt have incompatible sizes" assert np.all(sl <= xopt), "xopt violates lower bound sl" assert np.all(xopt <= su), "xopt violates upper bound su" assert delta > 0.0, "delta must be strictly positive" # Assume g and H have full quadratic model for objective # i.e. skip straight to label 8 in DFBOLS version # The sign of G(I) gives the sign of the change to the I-th variable # that will reduce Q from its value at XOPT. Thus XBDI(I) shows whether # or not to fix the I-th variable at one of its bounds initially, with # NACT being set to the number of fixed variables. D and GNEW are also # set for the first iteration. DELSQ is the upper bound on the sum of # squares of the free variables. QRED is the reduction in Q so far. iterc = 0 nact = 0 # number of fixed variables xbdi = np.zeros((n, ), dtype=int) # fix x_i at bounds? [values -1, 0, 1] xbdi[(xopt <= sl) & (g >= 0.0)] = -1 xbdi[(xopt >= su) & (g <= 0.0)] = 1 d = np.zeros((n, )) s = np.zeros((n, )) gnew = g.copy() qred = 0.0 delsq = delta**2 crvmin = -1.0 beta = 0.0 # label 20 need_alt_trust_step = False # will either quit main CG loop to finish, or do alternative step MAX_LOOP_ITERS = 100 * n**2 # avoid infinite loops # while True: # main CG loop [label 30] for ii in range(MAX_LOOP_ITERS): s[xbdi != 0] = 0.0 if beta == 0.0: s[xbdi == 0] = -gnew[xbdi == 0] else: s[xbdi == 0] = beta * s[xbdi == 0] - gnew[xbdi == 0] stepsq = sumsq(s) if stepsq == 0.0: need_alt_trust_step = False break # break and quit if beta == 0.0: gredsq = stepsq itermax = iterc + n - nact if iterc == 0: gredsq0 = gredsq # Exit conditions if gredsq <= min(1.0e-6 * gredsq0, 1.0e-18) or gredsq * delsq <= min( 1.0e-6 * qred**2, 1.0e-18): # DFBOLS need_alt_trust_step = False break # break and quit # Multiply the search direction by the second derivative matrix of Q and # calculate some scalars for the choice of steplength. Then set BLEN to # the length of the the step to the trust region boundary and STPLEN to # the steplength, ignoring the simple bounds. hs = H.dot(s) # label 50 ds = np.dot(s[xbdi == 0], d[xbdi == 0]) shs = np.dot(s[xbdi == 0], hs[xbdi == 0]) resid = delsq - sumsq(d[xbdi == 0]) if resid <= 0.0: need_alt_trust_step = True break # break and calculate alt step instead temp = sqrt(stepsq * resid + ds**2) blen = (resid / (temp + ds) if ds >= 0.0 else (temp - ds) / stepsq) stplen = (blen if shs <= 0.0 else min(blen, gredsq / shs)) # Exit condition if stplen <= 1.0e-30: # DFBOLS need_alt_trust_step = False break # break and quit # Reduce STPLEN if necessary in order to preserve the simple bounds, # letting IACT be the index of the new constrained variable. iact = None for i in range(n): if s[i] != 0.0: temp = (su[i] - xopt[i] - d[i] if s[i] > 0.0 else sl[i] - xopt[i] - d[i]) / s[i] if temp < stplen: stplen = temp iact = i # Update CRVMIN, GNEW and D. Set SDEC to the decrease that occurs in Q. sdec = 0.0 if stplen > 0.0: iterc += 1 temp = shs / stepsq if iact is None and temp > 0.0: crvmin = min(crvmin, temp) if crvmin != -1.0 else temp ggsav = gredsq gnew += stplen * hs d += stplen * s gredsq = sumsq(gnew[xbdi == 0]) sdec = max(stplen * (ggsav - 0.5 * stplen * shs), 0.0) qred += sdec # Restart the conjugate gradient method if it has hit a new bound. if iact is not None: nact += 1 xbdi[iact] = (1 if s[iact] >= 0.0 else -1) delsq = delsq - d[iact]**2 if delsq <= 0.0: need_alt_trust_step = True break # break and calculate alt step instead beta = 0.0 # label 20 continue # restart loop (new CG iteration) # If STPLEN is less than BLEN, then either apply another conjugate # gradient iteration or RETURN. if stplen >= blen: need_alt_trust_step = True break # break and calculate alt step instead # Exit condition if iterc == itermax or sdec <= 1.0e-6 * qred: # DFBOLS need_alt_trust_step = False break # break and quit beta = gredsq / ggsav continue # new CG iteration # end of CG loop # either done or need to take and alternative step if need_alt_trust_step: crvmin = 0.0 d, gnew = alt_trust_step(n, xopt, H, sl, su, d, xbdi, nact, gnew, qred) return d, gnew, crvmin else: return d_within_bounds(d, xopt, sl, su, xbdi), gnew, crvmin
def interpolate_mini_models_svd(self, verbose=False, make_full_rank=False, min_sing_val=1e-6, sing_val_frac=1.0, max_jac_cond=1e8, get_chg_J=False): W, left_scaling, right_scaling = self.interpolation_matrix() self.factorise_geom_system() ls_interp_cond_num = np.linalg.cond( W ) if verbose else 0.0 # scipy.linalg does not have condition number! # If not make_full_rank, Q is size (npt+n-1, npt+n), R is size (npt+n-1, n) # If make_full_rank, Q is size (2n, 2n), R is size (2n, n) xopt = self.xopt() ropt = self.ropt() fval_row_idx = np.arange(self.npt()) # indices of all rows norm_J_error = 0.0 linalg_resid = 0.0 if make_full_rank: # Remove old full-rank components of Jacobian Y = self.xpt_directions(include_kopt=False).T Qy, Ry = LA.qr(Y, mode='full') # Qy is (n,n), Ry is (n,npt-1)=(n,p) Qhat = Qy[:, :Y.shape[1]] self.model_jac = np.dot(self.model_jac, np.dot(Qhat, Qhat.T)) rhs = self.fval_v[fval_row_idx, :] # size npt * m try: dg = self.solve_geom_system(rhs) # size (n+1)*m except LA.LinAlgError: return False, None, None, None, None # flag error except ValueError: return False, None, None, None, None # flag error (e.g. inf or NaN encountered) J_old = self.model_jac.copy() self.model_jac = dg[1:, :].T self.model_const = dg[0, :] - np.dot(self.model_jac, xopt) # shift base to xbase if verbose or get_chg_J: norm_J_error = np.linalg.norm(self.model_jac - J_old, ord='fro')**2 linalg_resid = np.linalg.norm(W.dot(dg) - rhs)**2 if make_full_rank: try: U, s, Vt = LA.svd( self.model_jac, full_matrices=False ) # U is (m,k), s has length k, Vt is (k,n), where k=min(m,n) except LA.LinAlgError: return False, None, None, None, None # flag error k = min(self.n(), self.m()) r = min(self.npt_so_far - 1, self.n(), self.m()) # current number of directions (i.e. rank of J) floor_val = max(s[0] / max_jac_cond, sing_val_frac * s[r - 1], min_sing_val) s = np.maximum(s, floor_val) S = LA.diagsvd(s, k, k) # s from vector to matrix of correct shape self.model_jac = np.dot(U, np.dot(S, Vt)) # reconstruct J from new svd interp_error = 0.0 if verbose: for k in range(self.npt()): r_pred = self.model_value(self.xpt(k), d_based_at_xopt=False, with_const_term=True) interp_error += self.nsamples[k] * sumsq(self.fval_v[k, :] - r_pred) return True, interp_error, sqrt( norm_J_error), linalg_resid, ls_interp_cond_num # flag ok
def distances_to_xopt(self): sq_distances = np.zeros((self.npt(), )) xopt = self.xopt() for k in range(self.npt()): sq_distances[k] = sumsq(self.points[k, :] - xopt) return sq_distances
def __init__(self, npt, x0, r0, xl, xu, r0_nsamples, n=None, m=None, abs_tol=1e-12, rel_tol=1e-20, precondition=True, do_logging=True): if n is None: n = len(x0) if m is None: m = len(r0) assert npt >= n + 1, "Require npt >= n+1 for linear models" assert x0.shape == ( n, ), "x0 has wrong shape (got %s, expect (%g,))" % (str( x0.shape), n) assert xl.shape == ( n, ), "xl has wrong shape (got %s, expect (%g,))" % (str( xl.shape), n) assert xu.shape == ( n, ), "xu has wrong shape (got %s, expect (%g,))" % (str( xu.shape), n) assert r0.shape == ( m, ), "r0 has wrong shape (got %s, expect (%g,))" % (str( r0.shape), m) self.do_logging = do_logging self.dim = n self.resid_dim = m self.num_pts = npt self.npt_so_far = 1 # number of points added so far (with function values) # Initialise to blank some useful stuff # Interpolation points self.xbase = x0.copy() self.sl = xl - self.xbase # lower bound w.r.t. xbase (require xpt >= sl) self.su = xu - self.xbase # upper bound w.r.t. xbase (require xpt <= su) self.points = np.zeros((npt, n)) # interpolation points w.r.t. xbase # Function values self.fval_v = np.inf * np.ones((npt, m)) # residuals for each xpt self.fval_v[0, :] = r0.copy() self.fval = np.inf * np.ones( (npt, )) # overall objective value for each xpt self.fval[0] = sumsq(r0) self.kopt = 0 # index of current iterate (should be best value so far) self.nsamples = np.zeros( (npt, ), dtype=int ) # number of samples used to evaluate objective at each point self.nsamples[0] = r0_nsamples self.fbeg = self.fval[ 0] # f(x0), saved to check for sufficient reduction # Termination criteria self.abs_tol = abs_tol self.rel_tol = rel_tol # Model information self.model_const = np.zeros( (m, )) # constant term for model m(s) = c + J*s self.model_jac = np.zeros( (m, n)) # Jacobian term for model m(s) = c + J*s # Saved point (in absolute coordinates) - always check this value before quitting solver self.xsave = None self.rsave = None self.fsave = None self.jacsave = None self.nsamples_save = None # Factorisation of interpolation matrix self.factorisation_current = False self.Q = None self.R = None self.qr_of_transpose = False # is QR for W (finished growing) or W.T (growing)? self.precondition = precondition self.left_scaling = None # preconditioning self.right_scaling = None