def generic_cost(mu, sigma, u, step_cost, terminal_cost, state_trafo=None, lambd=1.0): """ Generic cost function for multi-step ahead predictions """ if state_trafo is None: state_trafo = lambda mu, sigma: mu, sigma T, n_s = np.shape(mu) _, n_u = np.shape(u) c = 0 for i in range(T - 1): mu_i = cas_reshape(mu[i, :], (n_s, 1)) v_i = cas_reshape(sigma[i, :], (n_s, n_s)) u_i = cas_reshape(u[i, :], (n_u, 1)) mu_i, v_i = state_trafo(mu_i, v_i) c += step_cost(mu_i, v_i, u_i) mu_T = cas_reshape(mu[-1, :], (n_s, 1)) v_T = cas_reshape(sigma[-1, :], (n_s, n_s)) mu_T, v_T = state_trafo(mu_T, v_T) c += terminal_cost(mu_T, v_T) return c
def eval_safety_constraints(self, p_all, q_all, ubg_term=0., lbg_term=-np.inf, ubg_interm=0., lbg_interm=-np.inf, terminal_only=False, eps_constraints=1e-5): """ Evaluate the safety constraints """ g_term_val = self.g_term_cas(p_all[-1, :, None], cas_reshape(q_all[-1, :], (self.n_s, self.n_s))) feasible_term = np.all(lbg_term - eps_constraints < g_term_val) and np.all( g_term_val < ubg_term + eps_constraints) feasible = feasible_term if terminal_only or self.h_mat_obs is None: if self.verbosity > 1: print(( "\n===== Evaluated terminal constraint values: FEASIBLE = {} =====".format( feasible))) print(g_term_val) print("\n===== ===== ===== ===== ===== ===== ===== =====") return feasible, g_term_val g_interm_val = self.g_interm_cas(p_all[-1, :, None], cas_reshape(q_all[-1, :], ( self.n_s, self.n_s))) feasible_interm = np.all( lbg_interm - eps_constraints < g_interm_val) and np.all( g_interm_val < ubg_interm + eps_constraints) feasible = feasible_term and feasible_interm return feasible, np.vstack((g_term_val, g_interm_val))
def _k_lin_rbf(x, hyp, y=None, diag_only=False): """ Evaluate the prdocut of linear and rbf kernel function symbolically using Casadi """ prod_rbf_lengthscale = hyp["prod.rbf.lengthscale"] prod_rbf_variance = hyp["prod.rbf.variance"] prod_linear_variances = hyp["prod.linear.variances"] linear_variances = hyp["linear.variances"] x_rbf = cas_reshape(x[:, 1], (-1, 1)) y_rbf = y if not y is None: y_rbf = cas_reshape(y[:, 1], (-1, 1)) k_prod_rbf = _k_rbf(x_rbf, y_rbf, prod_rbf_variance, prod_rbf_lengthscale, diag_only) x_prod_lin = cas_reshape(x[:, 1], (-1, 1)) y_prod_lin = y if not y is None: y_prod_lin = cas_reshape(y[:, 1], (-1, 1)) k_prod_lin = _k_lin(x_prod_lin, y_prod_lin, prod_linear_variances, diag_only) k_linear = _k_lin(x, y, linear_variances, diag_only) return k_prod_lin * k_prod_rbf + k_linear
def _k_lin_mat52(x, hyp, y=None, diag_only=False): """ Evaluate the custom kernel composed of linear and matern kernels Evaluate the kernel k_lin*k_mat52 + k_lin """ prod_mat52_lengthscale = hyp["prod.mat52.lengthscale"] prod_mat52_variance = hyp["prod.mat52.variance"] prod_linear_variances = hyp["prod.linear.variances"] linear_variances = hyp["linear.variances"] x_m52 = cas_reshape(x[:, 1], (-1, 1)) y_m52 = y if not y is None: y_m52 = cas_reshape(y[:, 1], (-1, 1)) k_prod_mat52 = _k_mat52(x_m52, y_m52, prod_mat52_variance, prod_mat52_lengthscale, diag_only) x_prod_lin = cas_reshape(x[:, 1], (-1, 1)) y_prod_lin = y if not y is None: y_prod_lin = cas_reshape(y[:, 1], (-1, 1)) k_prod_lin = _k_lin(x_prod_lin, y_prod_lin, prod_linear_variances, diag_only) k_linear = _k_lin(x, y, linear_variances, diag_only) return k_prod_lin * k_prod_mat52 + k_linear
def _get_solution(self, sol, crash, p_0, k_fb_0, verbose=False, feas_tol=1e-6): if crash: self.n_fail += 1 exit_code = 2 u_apply, _ = self._get_old_solution(p_0) return u_apply, exit_code x_opt = sol["x"] if self.opt_x0: p_0 = cas_reshape(x_opt[:self.n_s], (self.n_s, 1)) k_ff = cas_reshape(x_opt[self.n_s:], (-1, self.n_u)) else: k_ff = cas_reshape(x_opt, (-1, self.n_u)) mu_all, sigma_all, sigma_g = self.f_multistep_eval(p_0, k_ff, k_fb_0) # Evaluate the constraints g_res = sol["g"] feasible = True if np.any(np.array(self.lbg) - feas_tol > g_res) or np.any( np.array(self.ubg) + feas_tol < g_res): feasible = False if feasible: self.k_ff_old = k_ff exit_code = 0 self.n_fail = 0 if verbose: return np.array(k_ff[0, :]).reshape( self.n_u, ), exit_code, np.vstack( (p_0.T, mu_all)), sigma_all, k_ff, k_fb_0 return np.array(k_ff[0, :]).reshape(self.n_u, ), exit_code else: self.n_fail += 1 u_apply, exit_code = self._get_old_solution(p_0) if verbose: return u_apply, exit_code, None, None, None return u_apply, exit_code
def array_of_vec_to_array_of_mat(array_of_vec, n, m): """ Convert multiple vectorized matrices to 3-dim numpy array Parameters ---------- array_of_vec: T x n*m array_like array of vectorized matrices n: int The first dimension of the vectorized matrices m: int The second dimension of the vectorized matrices Returns ------- array_of_mat: T x n x m ndarray The 3D-array containing the matrices """ return np.reshape(array_of_vec, (-1, n, m)) T, size_vec = np.shape(array_of_vec) assert size_vec == n * m, "Are the shapes of the vectorized and output matrix the same?" array_of_mat = np.empty((T, n, m)) for i in range(T): array_of_mat[i, :, :] = cas_reshape(array_of_mat[i, :], (n, m)) return array_of_mat
def vec_to_mat(v, n, tril_vec=True): """ Reshape vector into square matrix Inputs: v: vector containing matrix entries (either of length n^2 or n*(n+1)/2) n: the dimensionality of the new matrix Optionals: tril_vec: If tril_vec is True we assume that the resulting matrix is symmetric and that the """ n_vec = len(v) if tril_vec: A = np.empty((n, n)) c = 0 for i in range(n): for j in range(i, n): A[i, j] = v[c] A[j, i] = A[i, j] c += 1 else: A = cas_reshape(v, (n, n)) return A
def plot_ellipsoid_trajectory(self, p, q, vis_safety_bounds=True, ax=None, color="r"): """ Plot the reachability ellipsoids given in observation space TODO: Need more principled way to transform ellipsoid to internal states Parameters ---------- p: n x n_s array[float] The ellipsoid centers of the trajectory q: n x n_s x n_s ndarray[float] The shape matrices of the trajectory vis_safety_bounds: bool, optional Visualize the safety bounds of the system """ new_ax = False if ax is None: fig = plt.figure() ax = fig.add_subplot(111) new_ax = True plt.sca(ax) n, n_s = np.shape(p) handles = [None] * n for i in range(n): p_i = cas_reshape(p[i, :], (n_s, 1)) + self.p_origin.reshape( (n_s, 1)) q_i = cas_reshape(q[i, :], (self.n_s, self.n_s)) ax, handles[i] = plot_ellipsoid_2D(p_i, q_i, ax, color=color) # ax = plot_ellipsoid_2D(p_i,q_i,ax,color = color) if vis_safety_bounds: ax = self.plot_safety_bounds(ax) if new_ax: plt.show() return ax, handles
def test_multistep_reachability(before_test_onestep_reachability): """ """ p, _, gp, k_fb, _, L_mu, L_sigm, c_safety, a, b = before_test_onestep_reachability T = 3 n_u, n_s = np.shape(k_fb) u_0 = .2 * np.random.randn(n_u, 1) k_fb_0 = np.random.randn( T - 1, n_s * n_u) # np.zeros((T-1,n_s*n_u))# np.random.randn(T-1,n_s*n_u) k_ff = np.random.randn(T - 1, n_u) # k_fb_ctrl = np.zeros((n_u,n_s))#np.random.randn(n_u,n_s) u_0_cas = MX.sym("u_0", (n_u, 1)) k_fb_cas_0 = MX.sym("k_fb", (T - 1, n_u * n_s)) k_ff_cas = MX.sym("k_ff", (T - 1, n_u)) p_new_cas, q_new_cas, _ = reach_cas.multi_step_reachability( p, u_0, k_fb_cas_0, k_ff_cas, gp, L_mu, L_sigm, c_safety, a, b) f = Function("f", [u_0_cas, k_fb_cas_0, k_ff_cas], [p_new_cas, q_new_cas]) k_fb_0_cas = np.copy(k_fb_0) # np.copy(k_fb_0) for i in range(T - 1): k_fb_0_cas[i, None, :] = k_fb_0_cas[i, None, :] + cas_reshape( k_fb, (1, n_u * n_s)) p_all_cas, q_all_cas = f(u_0, k_fb_0_cas, k_ff) k_ff_all = np.vstack((u_0.T, k_ff)) k_fb_apply = array_of_vec_to_array_of_mat(k_fb_0, n_u, n_s) for i in range(T - 1): k_fb_apply[i, :, :] += k_fb _, _, p_all_num, q_all_num = reach_num.multistep_reachability( p, gp, k_fb_apply, k_ff_all, L_mu, L_sigm, None, c_safety, 0, a, b, None) assert np.allclose(p_all_cas, p_all_num, r_tol, a_tol), "Are the centers of the ellipsoids same?" assert np.allclose(q_all_cas[0, :], q_all_num[0, :, :].reshape( (-1, n_s * n_s)), r_tol, a_tol), "Are the first shape matrices the same?" assert np.allclose(q_all_cas[1, :], q_all_num[1, :, :].reshape( (-1, n_s * n_s)), r_tol, a_tol), "Are the second shape matrices the same?" # assert np.allclose(q_all_cas[1,:],q_all_num[1,:].reshape((-1,n_s*n_s))), "Are the second shape matrices the same?" assert np.allclose(q_all_cas[-1, :], q_all_num[-1, :, :].reshape( (-1, n_s * n_s)), r_tol, a_tol), "Are the last shape matrices the same?" assert np.allclose(q_all_cas, q_all_num.reshape((T, n_s * n_s)), r_tol, a_tol), "Are the shape matrices the same?"
def get_action(self, x0_mu, verbose=False): """ Wrapper around the solve Function Parameters ---------- x0_mu: n_x x 0 np.array[float] The current state of the system Returns ------- u_apply: n_ux0 np.array[float] The action to be applied to the system exit_code: int An exit code that indicates what kind of action is applied. The following values are possible: 0: feasible solution found, optimization succeeded. 1: Optimization failed to find feasible solution. Old solution applied. 2: Optimization crashed. 3: No old feasible solution found, apply k_fb """ assert self.solver_initialized, "Need to initialize the solver first!" if self.opt_x0: k_ff_0 = np.random.randn(self.T, self.n_u) k_fb_0 = self.k_fb params = vertcat(cas_reshape(k_fb_0, (-1, 1))) opt_vars_init = vertcat(cas_reshape(x0_mu, (-1, 1)), cas_reshape(k_ff_0, (-1, 1))) else: k_ff_0, k_fb_0 = self._get_init_controls() params = vertcat(cas_reshape(x0_mu, (-1, 1)), cas_reshape(k_fb_0, (-1, 1))) opt_vars_init = vertcat(cas_reshape(k_ff_0, (-1, 1))) crash = False # sol = self.solver(x0=opt_vars_init,lbg=self.lbg,ubg=self.ubg,p=params) try: # pass sol = self.solver(x0=opt_vars_init, lbg=self.lbg, ubg=self.ubg, p=params) except: exit_code = 2 crash = True warnings.warn("NLP solver crashed, solution infeasible") sol = None return self._get_solution(sol, crash, x0_mu, k_fb_0, verbose)
def test_mpc_casadi_same_constraint_values_as_numeric_eval( before_test_safempc): """check if the returned open loop (numerical) ellipsoids are the same as in internal planning""" env, safe_mpc, k_ff_perf_traj, k_fb_perf_traj, k_fb_apply, k_ff_apply, p_all, q_all, sol, q_0, k_fb_0 = before_test_safempc n_s = env.n_s n_u = env.n_u g_0 = lin_ellipsoid_safety_distance(p_all[0, :].reshape(n_s, 1), q_all[0, :].reshape(n_s, n_s), safe_mpc.h_mat_obs, safe_mpc.h_obs) g_1 = lin_ellipsoid_safety_distance(p_all[1, :].reshape(n_s, 1), cas_reshape(q_all[1, :], (n_s, n_s)), safe_mpc.h_mat_obs, safe_mpc.h_obs) g_safe = lin_ellipsoid_safety_distance(p_all[-1, :].reshape(n_s, 1), q_all[-1, :].reshape(n_s, n_s), safe_mpc.h_mat_safe, safe_mpc.h_safe) idx_state_constraints = env.n_u * safe_mpc.n_safe * 2 - 1 if q_0 is not None: idx_state_constraints += 1 ## not sure why this is constr_values = sol["g"] assert_allclose( g_0, constr_values[idx_state_constraints:idx_state_constraints + safe_mpc.m_obs], r_tol, a_tol) # Are the distances to the obstacle the same after two steps? assert_allclose( g_1, constr_values[idx_state_constraints + safe_mpc.m_obs:idx_state_constraints + 2 * safe_mpc.m_obs], r_tol, a_tol) # Are the distances to the safe set the same after the last step? assert_allclose( g_safe, constr_values[idx_state_constraints + 2 * safe_mpc.m_obs:idx_state_constraints + 2 * safe_mpc.m_obs + safe_mpc.m_safe], r_tol, a_tol)
def _get_solution(self, x_0, sol, k_fb, k_fb_perf_0, sol_verbose=False, crashed=False, feas_tol=1e-6, q_0=None, k_fb_0=None): """ Process the solution dict of the casadi solver Processes the solution dictionary of the casadi solver and (depending on the chosen mode) saves the solution for reuse in the next time step. Depending on the chosen verbosity level, it also prints some statistics. Parameters ---------- sol: dict The solution dictionary returned by the casadi solver sol_verbose: boolean, optional Return additional solver results such as the constraint values Returns ------- k_fb_apply: n_u x n_s array[float] The feedback control term to be applied to the system k_ff_apply: n_u x 1 array[float] The feed-forward control term to be applied to the system k_fb_all: n_safe x n_u x n_s The feedback control terms for all time steps k_ff_all: n_safe x n_u x 1 h_values: (m_obs*n_safe + m_safe) x 0 array[float], optional The values of the constraint evaluation (distance to obstacle) """ success = True feasible = True if crashed: feasible = False if self.verbosity > 1: print("Optimization crashed, infeasible soluion!") else: g_res = np.array(sol["g"]).squeeze() # This is not sufficient, since casadi gives out wrong feasibility values if np.any(np.array(self.lbg) - feas_tol > g_res) or np.any( np.array(self.ubg) + feas_tol < g_res): feasible = False x_opt = sol["x"] self.has_openloop = True if self.opt_x0: x_0 = x_opt[:self.n_s] x_opt = x_opt[self.n_s:, :] # get indices of the respective variables n_u_0 = self.n_u n_u_perf = 0 if self.n_perf > 1: n_u_perf = (self.n_perf - self.r) * self.n_u n_k_ff = (self.n_safe - 1) * self.n_u c = 0 idx_u_0 = np.arange(n_u_0) c += n_u_0 idx_u_perf = np.arange(c, c + n_u_perf) c += n_u_perf idx_k_ff = np.arange(c, c + n_k_ff) c += n_k_ff u_apply = np.array(cas_reshape(x_opt[idx_u_0], (1, self.n_u))) k_ff_perf = np.array( cas_reshape(x_opt[idx_u_perf], (self.n_perf - self.r, self.n_u))) k_ff_safe = np.array( cas_reshape(x_opt[idx_k_ff], (self.n_safe - 1, self.n_u))) k_ff_safe_all = np.vstack((u_apply, k_ff_safe)) k_fb_safe_output = array_of_vec_to_array_of_mat(np.copy(k_fb), self.n_u, self.n_s) p_safe, q_safe, gp_sigma_pred_safe_all = self.get_safety_trajectory_openloop(x_0, u_apply, np.copy(k_fb), k_ff_safe, q_0, k_fb_0) p_safe = np.array(p_safe) q_safe = np.array(q_safe) if self.verbosity > 1: print("=== Safe Trajectory: ===") print("Centers:") print(p_safe) print("Shape matrices:") print(q_safe) print("Safety controls:") print(u_apply) print(k_ff_safe) k_fb_perf_traj_eval = np.empty((0, self.n_s * self.n_u)) k_ff_perf_traj_eval = np.empty((0, self.n_u)) if self.n_safe > 1: k_fb_perf_traj_eval = np.vstack( (k_fb_perf_traj_eval, k_fb[:self.r - 1, :])) k_ff_perf_traj_eval = np.vstack( (k_ff_perf_traj_eval, k_ff_safe[:self.r - 1, :])) if self.n_perf > self.r: k_fb_perf_traj_eval = np.vstack((k_fb_perf_traj_eval, np.matlib.repmat(k_fb_perf_0, self.n_perf - self.r, 1))) k_ff_perf_traj_eval = np.vstack((k_ff_perf_traj_eval, k_ff_perf)) if self.n_perf > 1: mu_perf, sigma_perf = self._f_multistep_perf_eval(x_0.squeeze(), u_apply, k_fb_perf_traj_eval, k_ff_perf_traj_eval) if self.verbosity > 1: print("=== Performance Trajectory: ===") print("Mu perf:") print(mu_perf) print("Peformance controls:") print(k_ff_perf_traj_eval) feasible, _ = self.eval_safety_constraints(p_safe, q_safe) if self.rhc and feasible: self.k_ff_safe = k_ff_safe self.k_ff_perf = k_ff_perf self.p_safe = p_safe self.k_fb_safe_all = np.copy(k_fb) self.u_apply = u_apply self.k_fb_perf_0 = k_fb_perf_0 if feasible: self.n_fail = 0 if not feasible: self.n_fail += 1 q_all = None k_fb_safe_output = None k_ff_all = None p_safe = None q_safe = None g_res = None if self.n_fail >= self.n_safe: # Too many infeasible solutions -> switch to safe controller if self.verbosity > 1: print( "Infeasible solution. Too many infeasible solutions, switching to safe controller") u_apply = self.safe_policy(x_0) k_ff_safe_all = u_apply else: # can apply previous solution if self.verbosity > 1: print(( "Infeasible solution. Switching to previous solution, n_fail = {}, n_safe = {}".format( self.n_fail, self.n_safe))) if sol_verbose: u_apply, k_fb_safe_output, k_ff_safe_all, p_safe = self.get_old_solution( x_0, get_ctrl_traj=True) else: u_apply = self.get_old_solution(x_0) k_ff_safe_all = u_apply if sol_verbose: return x_0, u_apply, feasible, success, k_fb_safe_output, k_ff_safe_all, p_safe, q_safe, sol, gp_sigma_pred_safe_all return x_0, u_apply, success
def solve(self, p_0, u_0=None, k_ff_all_0=None, k_fb_safe=None, u_perf_0=None, k_fb_perf_0=None, sol_verbose=False, q_0=None, k_fb_0=None): """ Solve the MPC problem for a given set of input parameters Parameters ---------- p_0: n_s x 1 array[float] The initial (current) state k_ff_all_0: n_safe x n_u array[float], optional The initialization of the feed-forward controls k_fb_all_0: n_safe x (n_s * n_u) array[float], optional The initialization of the feedback controls Returns ------- k_fb_apply: n_u x n_s array[float] The feedback control term to be applied to the system k_ff_apply: n_u x 1 array[float] The feed-forward control term to be applied to the system k_fb_all: n_safe x n_u x n_s The feedback control terms for all time steps k_ff_all: n_safe x n_u x 1 """ assert self.solver_initialized, "Need to initialize the solver first!" u_0_init, k_ff_all_0_init, k_fb_safe_init, u_perf_0_init, k_fb_perf_0_init = self._get_init_controls() if u_0 is None: u_0 = u_0_init if k_ff_all_0 is None: k_ff_all_0 = k_ff_all_0_init if k_fb_safe is None: k_fb_safe = k_fb_safe_init if u_perf_0 is None: u_perf_0 = u_perf_0_init if k_fb_perf_0 is None: k_fb_perf_0 = k_fb_perf_0_init if q_0 is not None: if k_fb_0 is None: k_fb_0 = self.get_lqr_feedback() if self.opt_x0: params = np.vstack( (cas_reshape(k_fb_safe, (-1, 1)), cas_reshape(k_fb_perf_0, (-1, 1)))) opt_vars_init = vertcat(cas_reshape(p_0, (-1, 1)), cas_reshape(u_0, (-1, 1)), u_perf_0, \ cas_reshape(k_ff_all_0, (-1, 1))) else: params = np.vstack( (p_0, cas_reshape(k_fb_safe, (-1, 1)), cas_reshape(k_fb_perf_0, (-1, 1)))) opt_vars_init = vertcat(cas_reshape(u_0, (-1, 1)), u_perf_0, \ cas_reshape(k_ff_all_0, (-1, 1))) if self.init_uncertainty: params = vertcat(params, cas_reshape(q_0, (-1, 1)), cas_reshape(k_fb_0, (-1, 1))) crash = False sol = self.solver(x0=opt_vars_init, lbg=self.lbg, ubg=self.ubg, p=params) try: # pass sol = self.solver(x0=opt_vars_init, lbg=self.lbg, ubg=self.ubg, p=params) except: crash = True warnings.warn("NLP solver crashed, solution infeasible") sol = None return self._get_solution(p_0, sol, k_fb_safe, k_fb_perf_0, sol_verbose, crash, q_0=q_0, k_fb_0=k_fb_0)
def _get_init_controls(self): """ Initialize the controls for the MPC step Returns ------- u_0: n_u x 0 np.array[float] Initialization of the first (shared) input k_ff_safe_new: (n_safe-1) x n_u np.ndarray[float] Initialization of the safety feed-forward control inputs k_fb_new: n_u x n_x np.ndarray[float] Initialization of the safety feed-back control inputs k_ff_perf_new: (n_perf - r_1) x n_u np.ndarray[float] Initialization of the performance feed-forward control inputs. Not including the shared controls (if r > 1) k_fb_perf_0: n_u x n_x np.ndarray[float] Initialization of the safety feed-back control inputs """ u_perf_0 = None k_fb_perf_0 = None k_fb_lqr = self.get_lqr_feedback() if self.do_shift_solution and self.n_fail == 0: if self.n_safe > 1: k_fb_safe = np.copy(self.k_fb_safe_all) # Shift the safe controls k_ff_safe = np.copy(self.k_ff_safe) u_0 = k_ff_safe[0, :] if self.n_safe > self.r and self.n_perf > self.n_safe: # the first control after the shared controls k_ff_perf = np.copy(self.k_ff_perf) k_ff_r_last = (k_ff_perf[0, :] + k_ff_safe[self.r - 1, :]) / 2 # mean of first perf ctrl and safe ctrl after shared else: k_ff_r_last = k_ff_safe[-1, :] # just the last safe control k_ff_safe_new = np.vstack((k_ff_safe[1:self.r, :], k_ff_r_last)) if self.n_safe > self.r + 1: k_ff_safe_new = np.vstack((k_ff_safe_new, k_ff_safe[self.r:, :])) else: u_0 = self.u_apply k_ff_safe_new = np.array([]) if self.n_perf - self.r > 0: k_ff_perf = np.copy(self.k_ff_perf) k_ff_perf_new = np.vstack((k_ff_perf[1:, :], k_ff_perf[-1, :])) if self.perf_has_fb: k_fb_perf_0 = np.copy(self.k_fb_perf_0) else: k_fb_perf_0 = np.array([]) else: k_ff_perf_new = np.array([]) k_fb_perf_0 = np.array([]) else: k_fb_safe = np.empty((self.n_safe - 1, self.n_s * self.n_u)) for i in range(self.n_safe - 1): k_fb_safe[i] = cas_reshape(k_fb_lqr, (1, -1)) k_ff_safe_new = np.zeros((self.n_safe - 1, self.n_u)) u_0 = np.zeros((self.n_u, 1)) k_ff_perf_new = np.array([]) if self.n_perf > 1: k_ff_perf_new = np.zeros((self.n_perf - self.r, self.n_u)) if self.perf_has_fb: k_fb_perf_0 = k_fb_lqr else: k_fb_perf_0 = np.array([]) if self.n_safe > 1: k_fb_safe_new = np.vstack((k_fb_safe[1:, :], k_fb_safe[-1, :])) else: k_fb_safe_new = np.array([]) return u_0, k_ff_safe_new, k_fb_safe, k_ff_perf_new, k_fb_perf_0
def test_multistep_trajectory(before_test_onestep_reachability): """ Compare multi-steps 'by hand' with the function """ mu_0, _, gp, k_fb, k_ff, L_mu, L_sigm, c_safety, a, b = before_test_onestep_reachability T = 3 n_u, n_s = np.shape(k_fb) k_fb_cas_single = SX.sym("k_fb_single", (n_u, n_s)) k_ff_cas_single = SX.sym("k_ff_single", (n_u, 1)) k_ff_cas_all = SX.sym("k_ff_single", (T, n_u)) k_fb_cas_all = SX.sym("k_fb_all", (T - 1, n_s * n_u)) k_fb_cas_all_inp = [ k_fb_cas_all[i, :].reshape((n_u, n_s)) for i in range(T - 1) ] mu_0_cas = SX.sym("mu_0", (n_s, 1)) sigma_0_cas = SX.sym("sigma_0", (n_s, n_s)) mu_onestep_no_var_in, sigm_onestep_no_var_in, _ = prop_casadi.one_step_taylor( mu_0_cas, gp, k_ff_cas_single, a=a, b=b) mu_one_step, sigm_onestep, _ = prop_casadi.one_step_taylor( mu_0_cas, gp, k_ff_cas_single, k_fb=k_fb_cas_single, sigma_x=sigma_0_cas, a=a, b=b) mu_multistep, sigma_multistep, _ = prop_casadi.multi_step_taylor_symbolic( mu_0_cas, gp, k_ff_cas_all, k_fb_cas_all_inp, a=a, b=b) on_step_no_var_in = Function( "on_step_no_var_in", [mu_0_cas, k_ff_cas_single], [mu_onestep_no_var_in, sigm_onestep_no_var_in]) one_step = Function( "one_step", [mu_0_cas, sigma_0_cas, k_ff_cas_single, k_fb_cas_single], [mu_one_step, sigm_onestep]) multi_step = Function("multi_step", [mu_0_cas, k_ff_cas_all, k_fb_cas_all], [mu_multistep, sigma_multistep]) # TODO: Need mu, sigma as input aswell mu_1, sigma_1 = on_step_no_var_in(mu_0, k_ff) mu_2, sigma_2 = one_step(mu_1, sigma_1, k_ff, k_fb) mu_3, sigma_3 = one_step(mu_2, sigma_2, k_ff, k_fb) # TODO: stack k_ff and k_fb k_fb_mult = np.array(cas_reshape(k_fb, (1, n_u * n_s))) k_fb_mult = np.array(vertcat(*[k_fb_mult] * (T - 1))) k_ff_mult = vertcat(*[k_ff] * T) mu_all, sigma_all = multi_step(mu_0, k_ff_mult, k_fb_mult) assert np.allclose( np.array(mu_all[0, :]), np.array(mu_1).T), "Are the centers of the first prediction the same?" assert np.allclose( np.array(mu_all[-1, :]), np.array(mu_3).T), "Are the centers of the final prediction the same?" assert np.allclose(cas_reshape(sigma_all[-1, :], (n_s, n_s)), sigma_3), "Are the last covariance matrices the same?"
def find_max_variance(self, x0, sol_verbose=False): """ Find the most informative sample in the space constrained by the mpc structure Parameters ---------- n_restarts: int, optional The number of random initializations of the optimization problem ilqr_init: bool, optional initialize the state feedback terms with the ilqr feedback law sample_mean: n_s x 1 np.ndarray[float], optional The mean of the gaussian initial state-action distribution sample_var: n_s x n_s np.ndarray[float], optional The variance of the gaussian initial state-action distribution Returns ------- x_opt: u_opt: sigm_opt: """ sigma_best = 0 x_best = None u_best = None for i in range(self.n_restarts_optimizer): x_0 = self.env._sample_start_state(self.sample_mean, self.sample_std)[:, None] # sample initial state if self.T > 1: u_0 = self.env.random_action()[:, None] k_fb_lqr = self.safempc.get_lqr_feedback() k_ff_0 = np.zeros((self.T - 1, self.n_u)) for j in range(self.T - 1): k_ff_0[j, :] = self.env.random_action() params_0 = cas_reshape(k_fb_lqr, (-1, 1)) vars_0 = np.vstack((x_0, u_0, cas_reshape(k_ff_0, (-1, 1)))) else: u_0 = self.env.random_action()[:, None] params_0 = [] vars_0 = np.vstack((x_0, u_0)) sol = self.solver(x0=vars_0, p=params_0, lbg=self.lbg, ubg=self.ubg) f_opt = sol["f"] sigm_i = -float(f_opt) if sigm_i > sigma_best: # check if solution would improve upon current best g_sol = np.array(sol["g"]).squeeze() if self._is_feasible(g_sol, np.array(self.lbg), np.array( self.ubg)): # check if solution is feasible w_sol = sol["x"] x_best = np.array(w_sol[:self.n_s]) u_best = np.array(w_sol[self.n_s:self.n_s + self.n_u]) sigma_best = sigm_i z_i = np.vstack((x_best, u_best)).T if self.verbosity > 0: print(("New optimal sigma found at iteration {}".format(i))) if self.verbosity > 1: print(( "New feasible solution with sigma sum {} found".format( str(sigm_i)))) return x_best, u_best
def multi_step_reachability(p_0, u_0, k_fb, k_ff, gp, l_mu, l_sigm, c_safety=1., a=None, b=None, t_z_gp=None, q_0=None, k_fb_0=None): """Generate trajectory reachset by iteratively computing the one-step reachability. Parameters ---------- p_0: n_s x 1 ndarray[float | casadi.sym] Initial state u_0: n_u x 1 ndarray[casadi.sym] The initial action k_fb: n_fb x (n_s * n_u) ndarray[casadi.SX] The initial guess of the feedback controls k_ff: n_fb x n_u ndarray[casadi.sym] The feed forward terms to optimize over gp: SimpleGPModel The gp representing the dynamics l_mu: 1d_array of size n_s Set of Lipschitz constants on the Gradients of the mean function (per state dimension) l_sigma: 1d_array of size n_s Set of Lipschitz constants of the predictive variance (per state dimension) c_safety: float, optional The scaling of the semi-axes of the uncertainty matrix corresponding to a level-set of the gaussian pdf. a: n_s x n_s ndarray[float], optional The A matrix of the linear model Ax + Bu b: n_s x n_u ndarray[float], optional The B matrix of the linear model Ax + Bu q_0: n_s x n_s ndarray[float], optional The n_s x n_s s.p.d. shape matrix of the initial state k_fb_0: n_u x n_s ndarray[float], optional Initial state feedback control matrix only required when q_0 is specified Returns ------- p_all q_all """ n_s = np.shape(p_0)[0] n_u = np.shape(u_0)[0] n_fb = np.shape(k_fb)[0] p_new, q_new, gp_pred_sigma = onestep_reachability(p_0, gp, u_0, l_mu, l_sigm, q_0, k_fb_0, c_safety, a, b, t_z_gp) p_all = p_new.T q_all = q_new.reshape((1, n_s * n_s)) gp_pred_sigma_all = gp_pred_sigma for i in range(n_fb): p_old = p_new q_old = q_new k_ff_i = cas_reshape(k_ff[i, :], (n_u, 1)) k_fb_i = cas_reshape(k_fb[i, :], (n_u, n_s)) p_new, q_new, gp_pred_sigma = onestep_reachability( p_old, gp, k_ff_i, l_mu, l_sigm, q_old, k_fb_i, c_safety, a, b, t_z_gp) p_all = vertcat(p_all, p_new.T) q_all = vertcat(q_all, cas_reshape(q_new, (1, n_s * n_s))) gp_pred_sigma_all = vertcat(gp_pred_sigma_all, gp_pred_sigma) return p_all, q_all, gp_pred_sigma_all
def _generate_perf_trajectory_casadi(self, mu_0, u_0, k_ff_ctrl, k_fb_safe, a=None, b=None, lin_trafo_gp_input=None, safety_constr=False): """ Generate the performance trajectory variables for the casadi solver Parameters: mu_0: n_x x 1 casadi.SX Initial state u_0: n_u x 1 casadi.SX Initial control k_ff_ctrl: (n_safe-1) x n_u casadi.SX Safe feed-forward controls k_fb_safe: (n_safe-1) x (n_x * n_u) casadi.SX Safe feedback control matrices a: n_x x n_x np.ndarray[float], optional The A-matrix of the prior linear model b: n_x x n_u np.ndarray[float], optional The B-matrix of the prior linear model lin_trafo_gp_input: n_x_gp_in x n_x np.ndarray[float], optional Allows for a linear transformation of the gp input (e.g. removing an input) safety_constr: boolean, optional True, if we want to put a constraint after (n_safe+1)th performance trajectory state to be inside the terminal safe set. Can potentially help with (recursive) feasibility """ if self.r > 1: warnings.warn( "Coupling performance and safety trajectory for more than one step is UNTESTED") # we don't have a performance trajectory, so nothing to do here. Might wanna catch this even before if self.n_perf <= 1: return np.array([]), np.array([]), np.array([]), np.array( []), None, np.array([]), [], [], [], [], [] else: k_ff_perf = MX.sym("k_ff_perf", (self.n_perf - self.r, self.n_u)) k_ff_perf_traj = vertcat(k_ff_ctrl[:self.r - 1, :], k_ff_perf) k_fb_perf_traj = np.array([]) for i in range(self.r - 1): k_fb_perf_traj = np.append(k_fb_perf_traj, [k_fb_safe[i, :].reshape((self.n_u, self.n_s))]) if self.perf_has_fb and self.n_perf - self.r > 0: k_fb_perf = MX.sym("k_fb_perf", (self.n_u, self.n_s)) for i in range(self.n_perf - self.r): k_fb_perf_traj = np.append(k_fb_perf_traj, [k_fb_perf]) mu_perf_all, sigma_perf_all, gp_sigma_pred_perf_all = self.perf_trajectory( mu_0, self.ssm_forward, vertcat(u_0, k_ff_perf_traj), k_fb_perf_traj, None, a, b, lin_trafo_gp_input) # evaluation trajectory (mainly for verbosity) mu_0_eval = MX.sym("mu_0", (self.n_s, 1)) u_0_eval = MX.sym("u_0", (self.n_u, 1)) k_fb_perf_all_eval = MX.sym("k_fb_perf", (self.n_perf - 1, self.n_u * self.n_s)) k_ff_perf_all_eval = MX.sym("k_ff_perf", (self.n_perf - 1, self.n_u)) list_kfb_perf = [cas_reshape(k_fb_perf_all_eval[i, :], (self.n_u, self.n_s)) for i in range(self.n_perf - 1)] mu_perf_eval_all, sigma_perf_eval_all, gp_sigma_pred_perf_all_eval = self.perf_trajectory( mu_0_eval, self.ssm_forward, vertcat(u_0_eval, k_ff_perf_all_eval), list_kfb_perf, None, a, b, lin_trafo_gp_input) self._f_multistep_perf_eval = cas.Function("f_multistep_perf_eval", [mu_0_eval, u_0_eval, k_fb_perf_all_eval, k_ff_perf_all_eval], [mu_perf_eval_all, gp_sigma_pred_perf_all_eval]) # generate (approxiamte) constraints for the performance trajectory g_name = [] g = [] lbg = [] ubg = [] if safety_constr and self.n_perf > self.n_safe: g_name += ["Terminal safety performance"] g_term = lin_ellipsoid_safety_distance( cas_reshape(mu_perf_all[self.n_safe + 1, :], (self.n_s, 1)), cas_reshape(sigma_perf_all[self.n_safe + 1, :], (self.n_s, self.n_s)), self.h_mat_safe, self.h_safe) g = vertcat(g, g_term) lbg += [-np.inf] * self.m_safe ubg += [0.] * self.m_safe if self.has_ctrl_bounds: for i in range(self.n_perf - self.r): g_u_i, lbu_i, ubu_i = self._generate_control_constraint( k_ff_perf[i, :].T) g = vertcat(g, g_u_i) lbg += lbu_i ubg += ubu_i g_name += ["ctrl_constr_performance_{}".format(i)] return k_ff_perf, k_fb_perf, k_ff_perf_traj, k_fb_perf_traj, mu_perf_all, sigma_perf_all, gp_sigma_pred_perf_all, g, lbg, ubg, g_name