def plot_single(result, x_opt, f_opt, method): # Unpack result t_hist = result['t_hist'] x_hist = result['x_hist'] f_hist = result['f_hist'] g_hist = result['g_hist'] h_hist = result['h_hist'] # Plot fig, ax = plt.subplots(nrows=4, sharex=True, figsize=(6, 10)) ax[0].semilogy(t_hist, la.norm(x_hist - x_opt, axis=1)) ax[0].set_ylabel('Iterate error norm') ax[1].semilogy(t_hist, f_hist - f_opt) ax[1].set_ylabel('Objective error') ax[2].semilogy(t_hist, la.norm(g_hist, axis=1)) ax[3].axhline(MIN_GRAD_NORM, linestyle='--', color='k', alpha=0.5) ax[2].set_ylabel('Gradient norm') ax[3].plot(t_hist, np.array([la.eigh(h)[0] for h in h_hist])) ax[3].axhline(0, linestyle='--', color='k', alpha=0.5) ax[3].set_yscale('symlog') ax[3].set_ylabel('Hessian eigenvalues') ax[-1].set_xlabel('Iteration') ax[0].set_title(method) return fig, ax
def update_hessian_inverse(self, x, obj, p, state_aux): method = self.setting.step_method a, Hinv = state_aux n = x.size s = a * p y = obj.gradient(x + a * p) - obj.gradient(x) if method == 'bfgs': ssT = np.outer(s, s) ysT = np.outer(y, s) yTs = np.dot(y, s) C = np.eye(n) - ysT / yTs Hinv_new = np.dot(C.T, np.dot(Hinv, C)) + ssT / yTs elif method == 'dfp': Hinv_y = np.dot(Hinv, y) y_Hinv_y = np.dot(y, Hinv_y) ssT = np.outer(s, s) yTs = np.dot(y, s) Hinv_new = Hinv - np.outer(Hinv_y, Hinv_y) / y_Hinv_y + ssT / yTs elif method == 'sr1': Hinv_y = np.dot(Hinv, y) s_minus_Hinv_y = s - Hinv_y denominator = np.dot(s_minus_Hinv_y, y) if np.abs(denominator) > self.setting.sr1_skip_tol * la.norm( y) * la.norm(s_minus_Hinv_y): Hinv_new = Hinv + np.outer(s_minus_Hinv_y, s_minus_Hinv_y) / denominator else: # skipping rule to avoid huge search directions under denominator collapse Hinv_new = np.copy(Hinv) else: raise ValueError('Invalid step method!') return Hinv_new
def plot_multi(results_dict, x_opt, f_opt, category): fig, ax = plt.subplots(nrows=3, sharex=True, figsize=(8, 10)) for method, result_dict in results_dict.items(): result = result_dict['result'] # Unpack result t_hist = result['t_hist'] x_hist = result['x_hist'] f_hist = result['f_hist'] g_hist = result['g_hist'] h_hist = result['h_hist'] elapsed_time = result['elapsed_time'] label = method + ' (%.3f sec)' % elapsed_time ax[0].semilogy(t_hist, la.norm(x_hist - x_opt, axis=1), label=label) ax[1].semilogy(t_hist, f_hist - f_opt, label=label) ax[2].semilogy(t_hist, la.norm(g_hist, axis=1), label=label) ax[2].axhline(MIN_GRAD_NORM, color='k', linestyle='--', alpha=0.5) # xlim = (-1.0, ax[0].get_xlim()[1] * 1.7) xlim = (-1.0, 71) for i in range(3): ax[i].legend(ncol=1, loc='upper right') ax[i].set_xlim(xlim) ax[0].set_title(category + ' methods') ax[-1].set_xlabel('Iteration') ax[0].set_ylabel('Iterate error norm') ax[1].set_ylabel('Objective error') ax[2].set_ylabel('Gradient norm') fig.tight_layout() return fig, ax
def get_neighs(vertex, r): verts = list() # level_0 verts.append(vertex) v = vertex # find closest point in level 1 if sparse.issparse(adj_mtx): nz = adj_mtx.tolil().rows ix_list = nz[v] else: row = adj_mtx[v] ix_list = np.nonzero(row) ix_list = ix_list[0] dists = [] for j in ix_list: d = get_dist(coords, v, j) dists.append(d) ix_min = ix_list[dists.index(min(dists))] closest_ix = ix_min # levels_>=1 for i in range(1, r + 1): # this is the closest vertex of the new level # find the ordering of the level arr = get_order(adj_mtx, coords, ix_list, closest_ix, verts) verts = verts + arr # get next level: for each in ix_list, get neighbors that are not in <verts>, then add them to the new list next_list = [] for j in ix_list: if sparse.issparse(adj_mtx): new_row = nz[j] else: new_row = adj_mtx[j] new_row = np.nonzero(new_row) new_row = new_row[0] for k in new_row: if k not in verts: next_list.append(k) next_list = list(set(next_list)) # find starting point of next level using line eq c1 = coords[vertex] c2 = coords[closest_ix] line_dists = [] for j in next_list: c3 = coords[j] line_dist = LA.norm(np.cross(c2 - c1, c1 - c3)) / LA.norm( c2 - c1) # calculate distance to line line_dists.append(line_dist) ix_list = next_list closest_ix = next_list[line_dists.index(min(line_dists))] return verts
def mesh_convolve(filters, adj_mtx, vals_list, coords, faces, center, r, stride): """ Strides the mesh and applies a convolution to the patches. For testing purposes only, see versions below for efficient implementations. :param filters: list of filters :param adj_mtx: adjacency matrix :param coords: coordinates of each vertex :return: result of the convolution operation """ f_count = vals_list.shape[1] conv_arr = [] for vals in vals_list: depth_arr = [] for c in range(f_count): strided_mesh = mesh_strider(adj_mtx, vals[c], coords, faces, center, r, stride) filter_arr = [] for f in filters[c]: row = [] for p in strided_mesh: p = np.array(p) try: p = p / LA.norm(p) except: x = [i._value for i in p] try: p = x / LA.norm(x) except: y = [i._value for i in x] try: p = y / LA.norm(y) except: print("Convolution error.") try: temp = np.dot(f, p) row.append(temp) except: temp = np.dot(f[:len(p)], p) row.append(temp) if len(filter_arr) == 0: filter_arr = np.array([row]) else: filter_arr = np.vstack((filter_arr, [row])) if len(depth_arr) == 0: depth_arr = np.array([filter_arr]) else: depth_arr = np.vstack((depth_arr, [filter_arr])) if len(conv_arr) == 0: conv_arr = np.array([depth_arr]) else: conv_arr = np.vstack((conv_arr, [depth_arr])) conv_arr = np.sum(conv_arr, axis=1) return conv_arr
def gan_objective(prior_params, d_params, n_data, n_samples, bnn_layer_sizes, act, d_act='tanh'): '''estimates V(G, D) = E_p_gp[D(f)] - E_pbnn[D(f)]]''' x = sample_inputs('uniform', n_data, (-10, 10)) fbnns = sample_bnn(prior_params, x, n_samples, bnn_layer_sizes, act) # [nf, nd] fgps = sample_gpp(x, n_samples, 'rbf') # sample f ~ P_gp(f) D_fbnns = nn_predict(d_params, fbnns, d_act) D_fgps = nn_predict(d_params, fgps, d_act) print(D_fbnns.shape) eps = np.random.uniform() f = eps * fgps + (1 - eps) * fbnns def D(function): return nn_predict(d_params, function, 'tanh') J = jacobian(D)(f) print(J.shape) g = elementwise_grad(D)(f) print(g.shape) pen = 10 * (norm(g, ord=2, axis=1) - 1)**2 return np.mean(D_fgps - D_fbnns + pen)
def calc_update(self, x, p, trust_radius, trust_radius_max, obj, quality_required=0.2, quality_low=0.25, quality_high=0.75): # Parameter checks if not quality_required < quality_low < quality_high: raise ValueError( 'Invalid quality parameters, must be: quality_required < quality_low < quality_high' ) df = obj.function(x) - obj.function(x + p) dm = self.model(x, np.zeros_like(x), obj) - self.model(x, p, obj) quality = df / dm if quality < quality_low: trust_radius_new = quality_low * trust_radius else: if quality > quality_high and np.isclose(la.norm(p), trust_radius): trust_radius_new = min(2 * trust_radius, trust_radius_max) else: trust_radius_new = np.copy(trust_radius) if quality > quality_required: x_new = x + p else: x_new = np.copy(x) return x_new, trust_radius_new
def cost(X): U = X[0] cst = 0 for n in range(N): cst = cst + huber(U[n, :]) Mat = np.matmul(np.matmul(X[0], np.diag(X[1])), X[2]) fidelity = LA.norm(np.subtract(np.matmul(A, Mat), YT)) return cst + lambd * fidelity**2
def print_line(i, f, g, h): # Print per-iteration diagnostic info gi = la.norm(g) hi = np.sort(la.eig(h)[0]) hi_min = hi[0] hi_max = hi[-1] current_cols = [ '%d' % i, '%.3e' % f, '%.3e' % gi, '%.3e' % hi_min, '%.3e' % hi_max ] line = join_strings(current_cols) if tags: line = line + ' ' + ' '.join(tags) print(line) return line
def value_iteration(K0, A, B, Q, X0, min_grad_norm=None, max_iters=100): n = K0.size f, g, h = make_lqr_objective(A, B, Q, X0) # Initialize P = policy_evaluation(K0, A, B, Q) K = np.copy(K0) # Pre-allocate history arrays t_hist = np.arange(max_iters) x_hist = np.zeros([max_iters, n]) f_hist = np.zeros(max_iters) g_hist = np.zeros([max_iters, n]) h_hist = np.zeros([max_iters, n, n]) # Iterate for i in range(max_iters - 1): # Record history vK = vec(K) x_hist[i] = vK f_hist[i] = f(vK) g_hist[i] = g(vK) h_hist[i] = h(vK) K = gain(P, A, B, Q) if min_grad_norm is not None: if la.norm(g(vec(K))) < min_grad_norm: # Trim off unused part of history matrices t_hist = t_hist[0:i + 1] x_hist = x_hist[0:i + 1] f_hist = f_hist[0:i + 1] g_hist = g_hist[0:i + 1] h_hist = h_hist[0:i + 1] break P = ricc(P, A, B, Q) # Final iterate K = gain(P, A, B, Q) vK = vec(K) x_hist[-1] = vK f_hist[-1] = f(vK) g_hist[-1] = g(vK) h_hist[-1] = h(vK) return t_hist, x_hist, f_hist, g_hist, h_hist
def sanity_check(K, A, B, Q, X0, f, g, h, tol=1e-6, verbose=True): # Sanity check - compare cost, gradient, hessian-quadratic-form with hand-calculated expressions n, m = B.shape # Cost C0 = calc_cost_manual(K, A, B, Q, X0) C0_true = f(vec(K)) # Gradient G0 = vec(calc_grad_manual(K, A, B, Q, X0)) G0_true = g(vec(K)) # Hessian E = npr.randn( m, n ) # technically we need to check if Hessian-quadratic-form matches at all possible E, just use 1 H0_EE = calc_hess_manual(K, E, A, B, Q, X0) H0_EE_true = np.dot(vec(E), np.dot(h(vec(K)), vec(E))) if np.abs(C0_true - C0) > tol: raise ValueError('Sanity check failed! Cost does not match true') if la.norm(G0 - G0_true) > tol: raise ValueError('Sanity check failed! Gradient does not match true') if np.abs(H0_EE - H0_EE_true) > tol: raise ValueError( 'Sanity check failed! Hessian quadform does not match true') if verbose: print('SANITY CHECK') print('cost') print(C0_true) print(C0) print('gradient') print(G0_true) print(G0) print('hessian quadform') print(H0_EE) print(H0_EE_true) print('') return
def check_are(K, A, B, Q, verbose=True): n, m = B.shape AB = np.hstack([A, B]) PK = mat(calc_vPK(K, A, B, Q)) H = np.dot(AB.T, np.dot(PK, AB)) + Q Hxx = H[0:n, 0:n] Huu = H[n:n + m, n:n + m] Hux = H[n:n + m, 0:n] LHS = PK RHS = Hxx - np.dot(Hux.T, la.solve(Huu, Hux)) diff = la.norm(LHS - RHS) if verbose: print(' Left-hand side of the ARE: Positive definite = %s' % is_pos_def(LHS)) print(LHS) print('') print('Right-hand side of the ARE: Positive definite = %s' % is_pos_def(RHS)) print(RHS) print('') print('Difference') print(LHS - RHS) print('\n') return diff
def huber(u): if LA.norm(u) < delta: val = LA.norm(u)**2 / (2 * delta) else: val = LA.norm(u) - delta / 2 return val
# Hyperparameters delta = 0.03 lambd = 8 # The uknown matrix with row-sparisity s X0 = np.random.normal(0, 1, [N, K]) arr = np.arange(N) np.random.shuffle(arr) supp_comp = arr[0:N - s] for ind in supp_comp: X0[ind, :] = 0 # The measurement matrix (normalized) A = np.random.normal(0, 1, [M, N]) A = np.matmul(A, LA.inv(np.diag(LA.norm(A, axis=0)))) # The data matrix Y0 = np.matmul(A, X0) uu, vv, dd = LA.svd(Y0) UY = dd[0:r, :] YT = np.dot(uu[:, 0:r], np.diag(vv[0:r])) # Solving the manifold optiization problem def fixedrank(A, YT, r): """ Solves the AX=YT problem on the manifold of r-rank matrices with """ # Instantiate a manifold manifold = FixedRankEmbedded(N, r, r)
def cp_mds(D, X, max_iter=20, v=1): """Projective multi-dimensional scaling algorithm. Detailed description in career grant, pages 6-7 (method 1). Parameters ---------- X : ndarray (2n+2, k) Initial guess of `k` points in CP^n. Result will lie on CP^n for same `n` as the initial guess. (Each column is a data point.) D : ndarray (k, k) Square distance matrix determining cost. max_iter : int, optional Number of times to iterate the loop. Will eventually be updated to a better convergence criterion. Default is 20. v : int, optional Verbosity. If positive, print output relating to convergence conditions at each iteration. Returns ------- X : ndarray (2n+2, k) Optimal configuration of points in CP^n. C : list List of costs at each iteration. """ dim = X.shape[0] num_points = X.shape[1] start_cost_list = [] end_cost_list = [] loop_diff = np.inf percent_cost_diff = 100 # rank = LA.matrix_rank(X) vprint('Finding optimal configuration in CP^%i.' % ((dim - 2) // 2), 1, v) W = distance_to_weights(D) Sreal, Simag = norm_rotations(X) manifold = Oblique(dim, num_points) # Oblique manifold is dim*num_points matrices with unit-norm columns. solver = ConjugateGradient() for i in range(0, max_iter): # AUTOGRAD VERSION cost = setup_CPn_autograd_cost(D, Sreal, Simag, int(dim / 2)) # ANALYTIC VERSION: #cost, egrad, ehess = setup_CPn_cost(D, Sreal, Simag) start_cost_list.append(cost(X)) # AUTOGRAD VERSION: problem = pymanopt.Problem(manifold, cost, verbosity=v) # ANALYTIC VERSION: #problem = pymanopt.Problem(manifold, cost, egrad=egrad, ehess=ehess, # verbosity=v) X_new = solver.solve(problem, x=X) end_cost_list.append(cost(X_new)) Sreal_new, Simag_new = norm_rotations(X_new) S_diff = LA.norm(Sreal_new - Sreal)**2 + LA.norm(Simag_new - Simag)**2 iter_diff = start_cost_list[i] - end_cost_list[i] if i > 0: loop_diff = end_cost_list[i - 1] - end_cost_list[i] percent_cost_diff = 100 * loop_diff / end_cost_list[i - 1] vprint('Through %i iterations:' % (i + 1), 1, v) vprint('\tCost at start: %2.4f' % start_cost_list[i], 1, v) vprint('\tCost at end: %2.4f' % end_cost_list[i], 1, v) vprint('\tCost reduction from optimization: %2.4f' % iter_diff, 1, v) vprint('\tCost reduction over previous loop: %2.4f' % loop_diff, 1, v) vprint('\tPercent cost difference: % 2.4f' % percent_cost_diff, 1, v) vprint('\tDifference in S: % 2.2f' % S_diff, 1, v) if S_diff < .0001: vprint('No change in S matrix. Stopping iterations', 0, v) break if percent_cost_diff < .0001: vprint('No significant cost improvement. Stopping iterations.', 0, v) break if i == max_iter: vprint('Maximum iterations reached.', 0, v) # Update variables: X = X_new Sreal = Sreal_new Simag = Simag_new return X
def F(Y): return 0.5 * (sum([ LA.norm(M[i] * W * (Y.T @ mp(omega, i) @ Y - np.cos(D)))**2 for i in range(p) ]))
def rp_mds(D, X, max_iter=20, verbosity=1): """Projective multi-dimensional scaling algorithm. Detailed description in career grant, pages 6-7 (method 1). Parameters ---------- X : ndarray Initial guess of points in RP^k. Result will lie on RP^k for same k as the initial guess. D : ndarray Square distance matrix determining cost. max_iter : int, optional Number of times to iterate the loop. Will eventually be updated to a better convergence criterion. Default is 20. verbosity : int, optional If positive, print output relating to convergence conditions at each iteration. solve_prog : string, optional Choice of algorithm for low-rank correlation matrix reduction. Options are "pymanopt" or "matlab", default is "pymanopt". Returns ------- X : ndarray Optimal configuration of points in RP^k. C : list List of costs at each iteration. """ num_points = X.shape[0] start_cost_list = [] end_cost_list = [] loop_cost_diff = np.inf percent_cost_diff = 100 rank = LA.matrix_rank(X) vprint('Finding projection onto RP^%i.' % (rank - 1), 1, verbosity) W = distance_to_weights(D) S = np.sign(X @ X.T) C = S * np.cos(D) if np.sum(S == 0) > 0: print('Warning: Some initial guess vectors are orthogonal, this may ' + 'cause issues with convergence.') manifold = Oblique(rank, num_points) # Short, wide matrices. solver = ConjugateGradient() for i in range(0, max_iter): # cost, egrad, ehess = setup_RPn_cost(D, S) cost = setup_square_cost(D) start_cost_list.append(cost(X.T)) # problem = pymanopt.Problem(manifold, cost, egrad=egrad, ehess=ehess, # verbosity=verbosity) problem = pymanopt.Problem(manifold, cost, verbosity=verbosity) X_new = solver.solve(problem, x=X.T) X_new = X_new.T # X should be tall-skinny end_cost_list.append(cost(X_new.T)) S_new = np.sign(X_new @ X_new.T) C_new = S_new * np.cos(D) S_diff = ((LA.norm(S_new - S))**2) / 4 percent_S_diff = 100 * S_diff / S_new.size iteration_cost_diff = start_cost_list[i] - end_cost_list[i] if i > 0: loop_cost_diff = end_cost_list[i - 1] - end_cost_list[i] percent_cost_diff = 100 * loop_cost_diff / end_cost_list[i - 1] vprint('Through %i iterations:' % (i + 1), 1, verbosity) vprint('\tCost at start: %2.4f' % start_cost_list[i], 1, verbosity) vprint('\tCost at end: %2.4f' % end_cost_list[i], 1, verbosity) vprint( '\tCost reduction from optimization: %2.4f' % iteration_cost_diff, 1, verbosity) vprint('\tCost reduction over previous loop: %2.4f' % loop_cost_diff, 1, verbosity) vprint('\tPercent cost difference: % 2.4f' % percent_cost_diff, 1, verbosity) vprint('\tPercent Difference in S: % 2.2f' % percent_S_diff, 1, verbosity) vprint('\tDifference in cost matrix: %2.2f' % (LA.norm(C - C_new)), 1, verbosity) if S_diff < 1: vprint('No change in S matrix. Stopping iterations', 0, verbosity) break if percent_cost_diff < .0001: vprint('No significant cost improvement. Stopping iterations.', 0, verbosity) break if i == max_iter: vprint('Maximum iterations reached.', 0, verbosity) # Update variables: X = X_new C = C_new S = S_new return X
else: cost, egrad, ehess = setup_cost(D,S,return_derivatives=True) problem = pymanopt.Problem(manifold, cost, egrad=egrad, ehess=ehess, verbosity=verbosity) if pmo_solve == 'cg' or pmo_solve == 'sd' or pmo_solve == 'tr': # Use initial condition with gradient-based solvers. X_new = solver.solve(problem,x=X.T) else: X_new = solver.solve(problem) X_new = X_new.T # X should be tall-skinny cost_oldS = cost(X_new.T) cost_list.append(cost_oldS) S_new = np.sign(X_new@X_new.T) C_new = S_new*np.cos(D) cost_new = setup_cost(D,S_new) cost_newS = cost_new(X_new.T) S_diff = ((LA.norm(S_new - S))**2)/4 percent_S_diff = 100*S_diff/S_new.size percent_cost_diff = 100*(cost_list[i] - cost_list[i+1])/cost_list[i] true_cost = setup_cost(projective_distance_matrix(X),S) true_cost_list.append(true_cost(X_new.T)) if verbosity > 0: print('Through %i iterations:' %(i+1)) print('\tTrue cost: %2.2f' %true_cost(X_new.T)) print('\tComputed cost: %2.2f' %cost_list[i+1]) print('\tPercent cost difference: % 2.2f' %percent_cost_diff) print('\tPercent Difference in S: % 2.2f' %percent_S_diff) print('\tComputed cost with new S: %2.2f' %cost_newS) print('\tDifference in cost matrix: %2.2f' %(LA.norm(C-C_new))) if S_diff < 1: print('No change in S matrix. Stopping iterations') break
def cost(X): """Weighted Frobenius norm cost function.""" return 0.5 * (LA.norm(W * (Creal - X.T @ X))**2 + LA.norm(W * (Cimag - X.T @ times_i(X)))**2)
def calc_step(self, x, trust_radius, obj): tags = [] method = self.setting.step_method if method == 'dogleg': n = x.size g = obj.gradient(x) H = obj.hessian(x) B = posdefify(H, self.setting.pos_hess_eps) # Find the minimizing tau along the dogleg path pU = -(np.dot(g, g) / np.dot(g, np.dot(B, g))) * g pB = -la.solve(B, g) dp = pB - pU if la.norm(pB) <= trust_radius: # Minimum of model lies inside the trust region p = np.copy(pB) else: # Minimum of model lies outside the trust region tau_U = trust_radius / la.norm(pU) if tau_U <= 1: # First dogleg segment intersects trust region boundary p = tau_U * pU else: # Second dogleg segment intersects trust region boundary aa = np.dot(dp, dp) ab = 2 * np.dot(dp, pU) ac = np.dot(pU, pU) - trust_radius**2 alphas = quadratic_formula(aa, ab, ac) alpha = np.max(alphas) p = pU + alpha * dp return p, tags elif method == '2d_subspace': g = obj.gradient(x) H = obj.hessian(x) B = posdefify(H, self.setting.pos_hess_eps) # Project g and B onto the 2D-subspace spanned by (normalized versions of) -g and -B^-1 g s1 = -g s2 = -la.solve(B, g) Sorig = np.vstack([s1, s2]).T S, Rtran = la.qr( Sorig ) # This is necessary for us to use same trust_radius before/after transforming g2 = np.dot(S.T, g) B2 = np.dot(S.T, np.dot(B, S)) # Solve the 2D trust-region subproblem try: R, lower = cho_factor(B2) p2 = -cho_solve((R, lower), g2) p22 = np.dot(p2, p2) if np.dot(p2, p2) <= trust_radius**2: p = np.dot(S, p2) return p, tags except LinAlgError: pass a = B2[0, 0] * trust_radius**2 b = B2[0, 1] * trust_radius**2 c = B2[1, 1] * trust_radius**2 d = g2[0] * trust_radius f = g2[1] * trust_radius coeffs = np.array( [-b + d, 2 * (a - c + f), 6 * b, 2 * (-a + c + f), -b - d]) t = np.roots(coeffs) # Can handle leading zeros t = np.real(t[np.isreal(t)]) p2 = trust_radius * np.vstack( (2 * t / (1 + t**2), (1 - t**2) / (1 + t**2))) value = 0.5 * np.sum(p2 * np.dot(B2, p2), axis=0) + np.dot(g2, p2) i = np.argmin(value) p2 = p2[:, i] # Project back into the original n-dim space p = np.dot(S, p2) return p, tags elif method == 'cg_steihaug': # Settings max_iters = 100000 # TODO put in settings # Init n = x.size g = obj.gradient(x) B = obj.hessian(x) z = np.zeros(n) r = np.copy(g) d = -np.copy(g) # Choose eps according to Algo 7.1 grad_norm = la.norm(g) eps = min(0.5, grad_norm**0.5) * grad_norm if la.norm(r) < eps: p = np.zeros(n) tags.append('Stopping tolerance reached!') return p, tags j = 0 while j + 1 < max_iters: # Check if 'd' is a direction of non-positive curvature dBd = np.dot(d, np.dot(B, d)) rr = np.dot(r, r) if dBd <= 0: ta = np.dot(d, d) tb = 2 * np.dot(d, z) tc = np.dot(z, z) - trust_radius**2 taus = quadratic_formula(ta, tb, tc) tau = np.max(taus) p = z + tau * d tags.append('Negative curvature encountered!') return p, tags alpha = rr / dBd z_new = z + alpha * d # Check if trust region bound violated if la.norm(z_new) >= trust_radius: ta = np.dot(d, d) tb = 2 * np.dot(d, z) tc = np.dot(z, z) - trust_radius**2 taus = quadratic_formula(ta, tb, tc) tau = np.max(taus) p = z + tau * d tags.append('Trust region boundary reached!') return p, tags z = np.copy(z_new) r = r + alpha * np.dot(B, d) rr_new = np.dot(r, r) if la.norm(r) < eps: p = np.copy(z) tags.append('Stopping tolerance reached!') return p, tags beta = rr_new / rr d = -r + beta * d j += 1 p = np.zeros(n) tags.append( 'ALERT! CG-Steihaug failed to solve trust-region subproblem within max_iters' ) return p, tags else: raise ValueError('Invalid step method!')
def traverse_mesh(coords, faces, center, stride=1, verbose=False, is_sparse=True): """ Calculates the traversal list of all vertices in the mesh :param coords: coordinates of the vertices :param faces: triplets of vertices for each triangle :param center: center vertex :param stride: the stride to be covered :param verbose: whether to print time after each iteration :param is_sparse: whether a sparse implementation is desired :return: list of all vertices in the mesh, starting from the center and in order of traversal """ adj_mtx, coords, faces = create_adj_mtx(coords, faces, is_sparse) verbose_ctr = 1 start = time.time() if stride == 1: vertex = center verts = list() # level_0 verts.append(vertex) v = vertex # find closest point in level 1 dists = [] if sparse.issparse(adj_mtx): nz = adj_mtx.tolil().rows ix_list = nz[v] else: row = adj_mtx[v] ix_list = np.nonzero(row) ix_list = ix_list[0] for j in ix_list: d = get_dist(coords, v, j) dists.append(d) ix_min = ix_list[dists.index(min(dists))] closest_ix = ix_min # levels_>=1 if sparse.issparse(adj_mtx): l = adj_mtx.shape[0] else: l = len(adj_mtx[0]) # until all vertices are seen while len(verts) <= 0.95 * l: # this is the closest vertex of the new level # find the ordering of the level if verbose: print("Iteration {}: {}".format(verbose_ctr, time.time() - start)) verbose_ctr = verbose_ctr + 1 arr = get_order(adj_mtx, coords, ix_list, closest_ix, verts) for i in arr: if i not in verts: verts.append(i) # get next level: for each in ix_list, get neighbors that are not in <verts>, then add them to the new list next_list = [] for j in ix_list: if sparse.issparse(adj_mtx): new_row = nz[j] else: new_row = adj_mtx[j] new_row = np.nonzero(new_row) new_row = new_row[0] for k in new_row: if k not in verts: next_list.append(k) next_list = list(set(next_list)) if len(next_list) == 0: continue # find starting point of next level using line eq c1 = coords[vertex] c2 = coords[closest_ix] line_dists = [] for j in next_list: c3 = coords[j] line_dist = LA.norm(np.cross(c2 - c1, c1 - c3)) / LA.norm( c2 - c1) # not exactly sure of this line_dists.append(line_dist) ix_list = next_list closest_ix = next_list[line_dists.index(min(line_dists))] return verts else: # multiple stride case vertex = center verts = list() # level_0 verts.append(vertex) v = vertex seen = list() seen.append(v) if sparse.issparse(adj_mtx): nz = adj_mtx.tolil().rows ix_list = nz[v] else: row = adj_mtx[v] ix_list = np.nonzero(row) ix_list = ix_list[0] dists = [] for j in ix_list: d = get_dist(coords, v, j) dists.append(d) ix_min = ix_list[dists.index(min(dists))] closest_ix = ix_min add_to_verts = False ctr = 1 # levels_>=1 if sparse.issparse(adj_mtx): l = adj_mtx.shape[0] else: l = len(adj_mtx[0]) while len(seen) != l: # until all vertices are seen # this is the closest vertex of the new level # find the ordering of the level arr = get_order(adj_mtx, coords, ix_list, closest_ix, seen) seen = seen + arr if add_to_verts: # add only every other level to the traversal list temp_arr = arr[::stride] verts = verts + temp_arr # get next level: for each in ix_list, get neighbors that are not in <verts>, then add them to the new list ctr = ctr + 1 if ctr % stride == 0: add_to_verts = True else: add_to_verts = False next_list = [] for j in ix_list: if sparse.issparse(adj_mtx): nz = adj_mtx.tolil().rows new_row = nz[j] else: new_row = adj_mtx[j] new_row = np.nonzero(new_row) new_row = new_row[0] for k in new_row: if k not in seen: next_list.append(k) next_list = list(set(next_list)) if len(next_list) == 0: continue # find starting point of next level using line eq c1 = coords[vertex] c2 = coords[closest_ix] line_dists = [] for j in next_list: c3 = coords[j] line_dist = LA.norm(np.cross(c2 - c1, c1 - c3)) / LA.norm( c2 - c1) # not exactly sure of this line_dists.append(line_dist) ix_list = next_list closest_ix = next_list[line_dists.index(min(line_dists))] return verts
def optimize(self, obj, hidden_data=None): if hidden_data is not None: A, B, Q, X0 = hidden_data # n, m = B.shape def join_strings(word_list, display_width=16, spacer=' '): new_list = [f'{word:>{display_width}}' for word in word_list] return spacer.join(new_list) if self.setting.verbose: tags = [] print_cols = [ 'iteration', 'objective_value', 'gradient_norm', 'hess_min', 'hess_max' ] header = join_strings(print_cols) print(header) def print_line(i, f, g, h): # Print per-iteration diagnostic info gi = la.norm(g) hi = np.sort(la.eig(h)[0]) hi_min = hi[0] hi_max = hi[-1] current_cols = [ '%d' % i, '%.3e' % f, '%.3e' % gi, '%.3e' % hi_min, '%.3e' % hi_max ] line = join_strings(current_cols) if tags: line = line + ' ' + ' '.join(tags) print(line) return line # Initialize dimension, iterate, step length, state_aux quantities n = self.setting.x0.size x = np.copy(self.setting.x0) state_aux = self.init_state_aux() converged = False # Pre-allocate history arrays t_hist = np.arange(self.setting.max_iters) x_hist = np.zeros([self.setting.max_iters, n]) f_hist = np.zeros(self.setting.max_iters) g_hist = np.zeros([self.setting.max_iters, n]) h_hist = np.zeros([self.setting.max_iters, n, n]) # Perform iterative optimization for i in range(self.setting.max_iters): # if hidden_data is not None: # K = mat(x, (m, n)) # rho = specrad(A + np.dot(B, K)) # print(rho) # Record history f = obj.function(x) g = obj.gradient(x) h = obj.hessian(x) x_hist[i] = np.copy(x) f_hist[i] = np.copy(f) g_hist[i] = np.copy(g) h_hist[i] = np.copy(h) if self.setting.verbose: if (i <= self.setting.verbose_start) or ( i % self.setting.verbose_stride == 0): print_line(i, f, g, h) # Check if gradient has fallen below termination limit if la.norm(g) < self.setting.min_grad_norm: # Trim off unused part of history matrices t_hist = t_hist[0:i + 1] x_hist = x_hist[0:i + 1] f_hist = f_hist[0:i + 1] g_hist = g_hist[0:i + 1] h_hist = h_hist[0:i + 1] converged = True break # Take a step to get the next iterate x, state_aux, tags = self.update(x, obj, state_aux) if not converged: # Record history f = obj.function(x) g = obj.gradient(x) h = obj.hessian(x) x_hist[-1] = np.copy(x) f_hist[-1] = np.copy(f) g_hist[-1] = np.copy(g) h_hist[-1] = np.copy(h) if self.setting.verbose: print_line(i + 1, f, g, h) if converged: print( f"{PrintColors.OKGREEN}Optimization converged successfully!{PrintColors.ENDC}" ) else: print( f"{PrintColors.FAIL}Optimization failed to converge, stopping early!{PrintColors.ENDC}" ) print('') return t_hist, x_hist, f_hist, g_hist, h_hist
def normalize(v): return v / norm(v)
def cost_abs(H, A, S, E_np_masked): pred = np.einsum('Hr, Ar, Sr -> HAS', H, A, S) mask = ~np.isnan(E_np_masked) error_1 = (pred - E_np_masked)[mask].flatten() error_2 = 0.01 * LA.norm(H) + 0.01 * LA.norm(A) + 0.01 * LA.norm(S) return np.sqrt((error_1**2).mean()) + error_2
def run(self, x, *args, **kwargs): return ( norm(np.asarray(self.A) @ x + self.b) ** 2 + self.c * norm(x) ** 2 )