Esempio n. 1
0
def generic_cost(mu,
                 sigma,
                 u,
                 step_cost,
                 terminal_cost,
                 state_trafo=None,
                 lambd=1.0):
    """ Generic cost function for multi-step ahead predictions



    """
    if state_trafo is None:
        state_trafo = lambda mu, sigma: mu, sigma

    T, n_s = np.shape(mu)
    _, n_u = np.shape(u)

    c = 0
    for i in range(T - 1):
        mu_i = cas_reshape(mu[i, :], (n_s, 1))
        v_i = cas_reshape(sigma[i, :], (n_s, n_s))
        u_i = cas_reshape(u[i, :], (n_u, 1))

        mu_i, v_i = state_trafo(mu_i, v_i)
        c += step_cost(mu_i, v_i, u_i)

    mu_T = cas_reshape(mu[-1, :], (n_s, 1))
    v_T = cas_reshape(sigma[-1, :], (n_s, n_s))
    mu_T, v_T = state_trafo(mu_T, v_T)
    c += terminal_cost(mu_T, v_T)

    return c
Esempio n. 2
0
    def eval_safety_constraints(self, p_all, q_all, ubg_term=0., lbg_term=-np.inf,
                                ubg_interm=0., lbg_interm=-np.inf, terminal_only=False,
                                eps_constraints=1e-5):
        """ Evaluate the safety constraints """
        g_term_val = self.g_term_cas(p_all[-1, :, None],
                                     cas_reshape(q_all[-1, :], (self.n_s, self.n_s)))

        feasible_term = np.all(lbg_term - eps_constraints < g_term_val) and np.all(
            g_term_val < ubg_term + eps_constraints)

        feasible = feasible_term
        if terminal_only or self.h_mat_obs is None:

            if self.verbosity > 1:
                print((
                    "\n===== Evaluated terminal constraint values: FEASIBLE = {} =====".format(
                        feasible)))
                print(g_term_val)
                print("\n===== ===== ===== ===== ===== ===== ===== =====")

            return feasible, g_term_val

        g_interm_val = self.g_interm_cas(p_all[-1, :, None], cas_reshape(q_all[-1, :], (
            self.n_s, self.n_s)))

        feasible_interm = np.all(
            lbg_interm - eps_constraints < g_interm_val) and np.all(
            g_interm_val < ubg_interm + eps_constraints)

        feasible = feasible_term and feasible_interm

        return feasible, np.vstack((g_term_val, g_interm_val))
Esempio n. 3
0
def _k_lin_rbf(x, hyp, y=None, diag_only=False):
    """ Evaluate the prdocut of linear and rbf kernel function symbolically using Casadi

    """
    prod_rbf_lengthscale = hyp["prod.rbf.lengthscale"]
    prod_rbf_variance = hyp["prod.rbf.variance"]
    prod_linear_variances = hyp["prod.linear.variances"]
    linear_variances = hyp["linear.variances"]

    x_rbf = cas_reshape(x[:, 1], (-1, 1))
    y_rbf = y
    if not y is None:
        y_rbf = cas_reshape(y[:, 1], (-1, 1))

    k_prod_rbf = _k_rbf(x_rbf, y_rbf, prod_rbf_variance, prod_rbf_lengthscale,
                        diag_only)

    x_prod_lin = cas_reshape(x[:, 1], (-1, 1))
    y_prod_lin = y
    if not y is None:
        y_prod_lin = cas_reshape(y[:, 1], (-1, 1))

    k_prod_lin = _k_lin(x_prod_lin, y_prod_lin, prod_linear_variances,
                        diag_only)

    k_linear = _k_lin(x, y, linear_variances, diag_only)

    return k_prod_lin * k_prod_rbf + k_linear
Esempio n. 4
0
def _k_lin_mat52(x, hyp, y=None, diag_only=False):
    """ Evaluate the custom kernel composed of linear and matern kernels

    Evaluate the kernel
        k_lin*k_mat52 + k_lin
    """

    prod_mat52_lengthscale = hyp["prod.mat52.lengthscale"]
    prod_mat52_variance = hyp["prod.mat52.variance"]
    prod_linear_variances = hyp["prod.linear.variances"]
    linear_variances = hyp["linear.variances"]

    x_m52 = cas_reshape(x[:, 1], (-1, 1))
    y_m52 = y
    if not y is None:
        y_m52 = cas_reshape(y[:, 1], (-1, 1))

    k_prod_mat52 = _k_mat52(x_m52, y_m52, prod_mat52_variance,
                            prod_mat52_lengthscale, diag_only)

    x_prod_lin = cas_reshape(x[:, 1], (-1, 1))
    y_prod_lin = y
    if not y is None:
        y_prod_lin = cas_reshape(y[:, 1], (-1, 1))

    k_prod_lin = _k_lin(x_prod_lin, y_prod_lin, prod_linear_variances,
                        diag_only)

    k_linear = _k_lin(x, y, linear_variances, diag_only)

    return k_prod_lin * k_prod_mat52 + k_linear
Esempio n. 5
0
    def _get_solution(self,
                      sol,
                      crash,
                      p_0,
                      k_fb_0,
                      verbose=False,
                      feas_tol=1e-6):
        if crash:
            self.n_fail += 1
            exit_code = 2
            u_apply, _ = self._get_old_solution(p_0)
            return u_apply, exit_code

        x_opt = sol["x"]
        if self.opt_x0:
            p_0 = cas_reshape(x_opt[:self.n_s], (self.n_s, 1))
            k_ff = cas_reshape(x_opt[self.n_s:], (-1, self.n_u))
        else:
            k_ff = cas_reshape(x_opt, (-1, self.n_u))

        mu_all, sigma_all, sigma_g = self.f_multistep_eval(p_0, k_ff, k_fb_0)

        # Evaluate the constraints
        g_res = sol["g"]
        feasible = True
        if np.any(np.array(self.lbg) - feas_tol > g_res) or np.any(
                np.array(self.ubg) + feas_tol < g_res):
            feasible = False

        if feasible:
            self.k_ff_old = k_ff
            exit_code = 0
            self.n_fail = 0

            if verbose:
                return np.array(k_ff[0, :]).reshape(
                    self.n_u, ), exit_code, np.vstack(
                        (p_0.T, mu_all)), sigma_all, k_ff, k_fb_0

            return np.array(k_ff[0, :]).reshape(self.n_u, ), exit_code
        else:
            self.n_fail += 1

            u_apply, exit_code = self._get_old_solution(p_0)

            if verbose:
                return u_apply, exit_code, None, None, None
            return u_apply, exit_code
Esempio n. 6
0
def array_of_vec_to_array_of_mat(array_of_vec, n, m):
    """ Convert multiple vectorized matrices to 3-dim numpy array


    Parameters
    ----------
    array_of_vec: T x n*m array_like
        array of vectorized matrices
    n: int
        The first dimension of the vectorized matrices
    m: int
        The second dimension of the vectorized matrices

    Returns
    -------
    array_of_mat: T x n x m ndarray
        The 3D-array containing the matrices
    """

    return np.reshape(array_of_vec, (-1, n, m))

    T, size_vec = np.shape(array_of_vec)

    assert size_vec == n * m, "Are the shapes of the vectorized and output matrix the same?"

    array_of_mat = np.empty((T, n, m))
    for i in range(T):
        array_of_mat[i, :, :] = cas_reshape(array_of_mat[i, :], (n, m))

    return array_of_mat
Esempio n. 7
0
def vec_to_mat(v, n, tril_vec=True):
    """ Reshape vector into square matrix

    Inputs:
        v: vector containing matrix entries (either of length n^2 or n*(n+1)/2)
        n: the dimensionality of the new matrix

    Optionals:
        tril_vec:   If tril_vec is True we assume that the resulting matrix is symmetric
                    and that the

    """
    n_vec = len(v)

    if tril_vec:
        A = np.empty((n, n))
        c = 0
        for i in range(n):
            for j in range(i, n):
                A[i, j] = v[c]
                A[j, i] = A[i, j]
                c += 1
    else:
        A = cas_reshape(v, (n, n))

    return A
    def plot_ellipsoid_trajectory(self,
                                  p,
                                  q,
                                  vis_safety_bounds=True,
                                  ax=None,
                                  color="r"):
        """ Plot the reachability ellipsoids given in observation space

        TODO: Need more principled way to transform ellipsoid to internal states

        Parameters
        ----------
        p: n x n_s array[float]
            The ellipsoid centers of the trajectory
        q: n x n_s x n_s  ndarray[float]
            The shape matrices of the trajectory
        vis_safety_bounds: bool, optional
            Visualize the safety bounds of the system

        """
        new_ax = False

        if ax is None:
            fig = plt.figure()
            ax = fig.add_subplot(111)
            new_ax = True

        plt.sca(ax)
        n, n_s = np.shape(p)
        handles = [None] * n
        for i in range(n):
            p_i = cas_reshape(p[i, :], (n_s, 1)) + self.p_origin.reshape(
                (n_s, 1))
            q_i = cas_reshape(q[i, :], (self.n_s, self.n_s))
            ax, handles[i] = plot_ellipsoid_2D(p_i, q_i, ax, color=color)
            # ax = plot_ellipsoid_2D(p_i,q_i,ax,color = color)

        if vis_safety_bounds:
            ax = self.plot_safety_bounds(ax)

        if new_ax:
            plt.show()

        return ax, handles
def test_multistep_reachability(before_test_onestep_reachability):
    """ """
    p, _, gp, k_fb, _, L_mu, L_sigm, c_safety, a, b = before_test_onestep_reachability
    T = 3

    n_u, n_s = np.shape(k_fb)

    u_0 = .2 * np.random.randn(n_u, 1)
    k_fb_0 = np.random.randn(
        T - 1,
        n_s * n_u)  # np.zeros((T-1,n_s*n_u))# np.random.randn(T-1,n_s*n_u)
    k_ff = np.random.randn(T - 1, n_u)
    # k_fb_ctrl = np.zeros((n_u,n_s))#np.random.randn(n_u,n_s)

    u_0_cas = MX.sym("u_0", (n_u, 1))
    k_fb_cas_0 = MX.sym("k_fb", (T - 1, n_u * n_s))
    k_ff_cas = MX.sym("k_ff", (T - 1, n_u))

    p_new_cas, q_new_cas, _ = reach_cas.multi_step_reachability(
        p, u_0, k_fb_cas_0, k_ff_cas, gp, L_mu, L_sigm, c_safety, a, b)
    f = Function("f", [u_0_cas, k_fb_cas_0, k_ff_cas], [p_new_cas, q_new_cas])

    k_fb_0_cas = np.copy(k_fb_0)  # np.copy(k_fb_0)

    for i in range(T - 1):
        k_fb_0_cas[i, None, :] = k_fb_0_cas[i, None, :] + cas_reshape(
            k_fb, (1, n_u * n_s))
    p_all_cas, q_all_cas = f(u_0, k_fb_0_cas, k_ff)

    k_ff_all = np.vstack((u_0.T, k_ff))

    k_fb_apply = array_of_vec_to_array_of_mat(k_fb_0, n_u, n_s)

    for i in range(T - 1):
        k_fb_apply[i, :, :] += k_fb

    _, _, p_all_num, q_all_num = reach_num.multistep_reachability(
        p, gp, k_fb_apply, k_ff_all, L_mu, L_sigm, None, c_safety, 0, a, b,
        None)

    assert np.allclose(p_all_cas, p_all_num, r_tol,
                       a_tol), "Are the centers of the ellipsoids same?"

    assert np.allclose(q_all_cas[0, :], q_all_num[0, :, :].reshape(
        (-1, n_s * n_s)), r_tol,
                       a_tol), "Are the first shape matrices the same?"
    assert np.allclose(q_all_cas[1, :], q_all_num[1, :, :].reshape(
        (-1, n_s * n_s)), r_tol,
                       a_tol), "Are the second shape matrices the same?"
    # assert np.allclose(q_all_cas[1,:],q_all_num[1,:].reshape((-1,n_s*n_s))), "Are the second shape matrices the same?"
    assert np.allclose(q_all_cas[-1, :], q_all_num[-1, :, :].reshape(
        (-1, n_s * n_s)), r_tol,
                       a_tol), "Are the last shape matrices the same?"
    assert np.allclose(q_all_cas, q_all_num.reshape((T, n_s * n_s)), r_tol,
                       a_tol), "Are the shape matrices the same?"
Esempio n. 10
0
    def get_action(self, x0_mu, verbose=False):
        """ Wrapper around the solve Function

        Parameters
        ----------
        x0_mu: n_x x 0 np.array[float]
            The current state of the system

        Returns
        -------
        u_apply: n_ux0 np.array[float]
            The action to be applied to the system
        exit_code: int
            An exit code that indicates what kind of action is applied. The following
            values are possible:
                0: feasible solution found, optimization succeeded.
                1: Optimization failed to find feasible solution. Old solution applied.
                2: Optimization crashed.
                3: No old feasible solution found, apply k_fb

        """

        assert self.solver_initialized, "Need to initialize the solver first!"

        if self.opt_x0:
            k_ff_0 = np.random.randn(self.T, self.n_u)
            k_fb_0 = self.k_fb
            params = vertcat(cas_reshape(k_fb_0, (-1, 1)))
            opt_vars_init = vertcat(cas_reshape(x0_mu, (-1, 1)),
                                    cas_reshape(k_ff_0, (-1, 1)))
        else:
            k_ff_0, k_fb_0 = self._get_init_controls()
            params = vertcat(cas_reshape(x0_mu, (-1, 1)),
                             cas_reshape(k_fb_0, (-1, 1)))
            opt_vars_init = vertcat(cas_reshape(k_ff_0, (-1, 1)))

        crash = False
        # sol = self.solver(x0=opt_vars_init,lbg=self.lbg,ubg=self.ubg,p=params)
        try:
            # pass
            sol = self.solver(x0=opt_vars_init,
                              lbg=self.lbg,
                              ubg=self.ubg,
                              p=params)
        except:
            exit_code = 2
            crash = True
            warnings.warn("NLP solver crashed, solution infeasible")
            sol = None

        return self._get_solution(sol, crash, x0_mu, k_fb_0, verbose)
Esempio n. 11
0
def test_mpc_casadi_same_constraint_values_as_numeric_eval(
        before_test_safempc):
    """check if the returned open loop (numerical) ellipsoids are the same as in internal planning"""

    env, safe_mpc, k_ff_perf_traj, k_fb_perf_traj, k_fb_apply, k_ff_apply, p_all, q_all, sol, q_0, k_fb_0 = before_test_safempc

    n_s = env.n_s
    n_u = env.n_u

    g_0 = lin_ellipsoid_safety_distance(p_all[0, :].reshape(n_s, 1),
                                        q_all[0, :].reshape(n_s, n_s),
                                        safe_mpc.h_mat_obs, safe_mpc.h_obs)
    g_1 = lin_ellipsoid_safety_distance(p_all[1, :].reshape(n_s, 1),
                                        cas_reshape(q_all[1, :], (n_s, n_s)),
                                        safe_mpc.h_mat_obs, safe_mpc.h_obs)
    g_safe = lin_ellipsoid_safety_distance(p_all[-1, :].reshape(n_s, 1),
                                           q_all[-1, :].reshape(n_s, n_s),
                                           safe_mpc.h_mat_safe,
                                           safe_mpc.h_safe)

    idx_state_constraints = env.n_u * safe_mpc.n_safe * 2 - 1
    if q_0 is not None:
        idx_state_constraints += 1  ## not sure why this is

    constr_values = sol["g"]

    assert_allclose(
        g_0, constr_values[idx_state_constraints:idx_state_constraints +
                           safe_mpc.m_obs], r_tol, a_tol)
    # Are the distances to the obstacle the same after two steps?
    assert_allclose(
        g_1, constr_values[idx_state_constraints +
                           safe_mpc.m_obs:idx_state_constraints +
                           2 * safe_mpc.m_obs], r_tol, a_tol)
    # Are the distances to the safe set the same after the last step?
    assert_allclose(
        g_safe, constr_values[idx_state_constraints +
                              2 * safe_mpc.m_obs:idx_state_constraints +
                              2 * safe_mpc.m_obs + safe_mpc.m_safe], r_tol,
        a_tol)
Esempio n. 12
0
    def _get_solution(self, x_0, sol, k_fb, k_fb_perf_0, sol_verbose=False,
                      crashed=False, feas_tol=1e-6, q_0=None, k_fb_0=None):
        """ Process the solution dict of the casadi solver

        Processes the solution dictionary of the casadi solver and
        (depending on the chosen mode) saves the solution for reuse in the next
        time step. Depending on the chosen verbosity level, it also prints
        some statistics.

        Parameters
        ----------
        sol: dict
            The solution dictionary returned by the casadi solver
        sol_verbose: boolean, optional
            Return additional solver results such as the constraint values

        Returns
        -------
        k_fb_apply: n_u x n_s array[float]
            The feedback control term to be applied to the system
        k_ff_apply: n_u x 1 array[float]
            The feed-forward control term to be applied to the system
        k_fb_all: n_safe x n_u x n_s
            The feedback control terms for all time steps
        k_ff_all: n_safe x n_u x 1

        h_values: (m_obs*n_safe + m_safe) x 0 array[float], optional
            The values of the constraint evaluation (distance to obstacle)
        """

        success = True
        feasible = True
        if crashed:
            feasible = False

            if self.verbosity > 1:
                print("Optimization crashed, infeasible soluion!")
        else:
            g_res = np.array(sol["g"]).squeeze()

            # This is not sufficient, since casadi gives out wrong feasibility values
            if np.any(np.array(self.lbg) - feas_tol > g_res) or np.any(
                    np.array(self.ubg) + feas_tol < g_res):
                feasible = False

            x_opt = sol["x"]
            self.has_openloop = True

            if self.opt_x0:
                x_0 = x_opt[:self.n_s]
                x_opt = x_opt[self.n_s:, :]

            # get indices of the respective variables
            n_u_0 = self.n_u
            n_u_perf = 0
            if self.n_perf > 1:
                n_u_perf = (self.n_perf - self.r) * self.n_u
            n_k_ff = (self.n_safe - 1) * self.n_u

            c = 0
            idx_u_0 = np.arange(n_u_0)
            c += n_u_0
            idx_u_perf = np.arange(c, c + n_u_perf)
            c += n_u_perf
            idx_k_ff = np.arange(c, c + n_k_ff)
            c += n_k_ff

            u_apply = np.array(cas_reshape(x_opt[idx_u_0], (1, self.n_u)))
            k_ff_perf = np.array(
                cas_reshape(x_opt[idx_u_perf], (self.n_perf - self.r, self.n_u)))

            k_ff_safe = np.array(
                cas_reshape(x_opt[idx_k_ff], (self.n_safe - 1, self.n_u)))
            k_ff_safe_all = np.vstack((u_apply, k_ff_safe))

            k_fb_safe_output = array_of_vec_to_array_of_mat(np.copy(k_fb), self.n_u,
                                                            self.n_s)

            p_safe, q_safe, gp_sigma_pred_safe_all = self.get_safety_trajectory_openloop(x_0, u_apply,
                                                                 np.copy(k_fb),
                                                                 k_ff_safe, q_0, k_fb_0)

            p_safe = np.array(p_safe)
            q_safe = np.array(q_safe)

            if self.verbosity > 1:
                print("=== Safe Trajectory: ===")
                print("Centers:")
                print(p_safe)
                print("Shape matrices:")
                print(q_safe)
                print("Safety controls:")
                print(u_apply)
                print(k_ff_safe)

            k_fb_perf_traj_eval = np.empty((0, self.n_s * self.n_u))
            k_ff_perf_traj_eval = np.empty((0, self.n_u))
            if self.n_safe > 1:
                k_fb_perf_traj_eval = np.vstack(
                    (k_fb_perf_traj_eval, k_fb[:self.r - 1, :]))
                k_ff_perf_traj_eval = np.vstack(
                    (k_ff_perf_traj_eval, k_ff_safe[:self.r - 1, :]))
            if self.n_perf > self.r:
                k_fb_perf_traj_eval = np.vstack((k_fb_perf_traj_eval,
                                                 np.matlib.repmat(k_fb_perf_0,
                                                                  self.n_perf - self.r,
                                                                  1)))
            k_ff_perf_traj_eval = np.vstack((k_ff_perf_traj_eval, k_ff_perf))

            if self.n_perf > 1:
                mu_perf, sigma_perf = self._f_multistep_perf_eval(x_0.squeeze(),
                                                                  u_apply,
                                                                  k_fb_perf_traj_eval,
                                                                  k_ff_perf_traj_eval)

                if self.verbosity > 1:
                    print("=== Performance Trajectory: ===")
                    print("Mu perf:")
                    print(mu_perf)
                    print("Peformance controls:")
                    print(k_ff_perf_traj_eval)

            feasible, _ = self.eval_safety_constraints(p_safe, q_safe)

            if self.rhc and feasible:
                self.k_ff_safe = k_ff_safe
                self.k_ff_perf = k_ff_perf
                self.p_safe = p_safe
                self.k_fb_safe_all = np.copy(k_fb)
                self.u_apply = u_apply
                self.k_fb_perf_0 = k_fb_perf_0

        if feasible:
            self.n_fail = 0

        if not feasible:
            self.n_fail += 1
            q_all = None
            k_fb_safe_output = None
            k_ff_all = None
            p_safe = None
            q_safe = None
            g_res = None

            if self.n_fail >= self.n_safe:
                # Too many infeasible solutions -> switch to safe controller
                if self.verbosity > 1:
                    print(
                        "Infeasible solution. Too many infeasible solutions, switching to safe controller")
                u_apply = self.safe_policy(x_0)
                k_ff_safe_all = u_apply
            else:
                # can apply previous solution
                if self.verbosity > 1:
                    print((
                        "Infeasible solution. Switching to previous solution, n_fail = {}, n_safe = {}".format(
                            self.n_fail, self.n_safe)))
                if sol_verbose:
                    u_apply, k_fb_safe_output, k_ff_safe_all, p_safe = self.get_old_solution(
                        x_0, get_ctrl_traj=True)
                else:
                    u_apply = self.get_old_solution(x_0)
                    k_ff_safe_all = u_apply

        if sol_verbose:
            return x_0, u_apply, feasible, success, k_fb_safe_output, k_ff_safe_all, p_safe, q_safe, sol, gp_sigma_pred_safe_all

        return x_0, u_apply, success
Esempio n. 13
0
    def solve(self, p_0, u_0=None, k_ff_all_0=None, k_fb_safe=None, u_perf_0=None,
              k_fb_perf_0=None, sol_verbose=False, q_0=None, k_fb_0=None):
        """ Solve the MPC problem for a given set of input parameters


        Parameters
        ----------
        p_0: n_s x 1 array[float]
            The initial (current) state
        k_ff_all_0: n_safe x n_u  array[float], optional
            The initialization of the feed-forward controls
        k_fb_all_0: n_safe x (n_s * n_u) array[float], optional
            The initialization of the feedback controls

        Returns
        -------
        k_fb_apply: n_u x n_s array[float]
            The feedback control term to be applied to the system
        k_ff_apply: n_u x 1 array[float]
            The feed-forward control term to be applied to the system
        k_fb_all: n_safe x n_u x n_s
            The feedback control terms for all time steps
        k_ff_all: n_safe x n_u x 1
        """
        assert self.solver_initialized, "Need to initialize the solver first!"

        u_0_init, k_ff_all_0_init, k_fb_safe_init, u_perf_0_init, k_fb_perf_0_init = self._get_init_controls()

        if u_0 is None:
            u_0 = u_0_init
        if k_ff_all_0 is None:
            k_ff_all_0 = k_ff_all_0_init
        if k_fb_safe is None:
            k_fb_safe = k_fb_safe_init
        if u_perf_0 is None:
            u_perf_0 = u_perf_0_init
        if k_fb_perf_0 is None:
            k_fb_perf_0 = k_fb_perf_0_init
        if q_0 is not None:
            if k_fb_0 is None:
                k_fb_0 = self.get_lqr_feedback()

        if self.opt_x0:
            params = np.vstack(
                (cas_reshape(k_fb_safe, (-1, 1)), cas_reshape(k_fb_perf_0, (-1, 1))))

            opt_vars_init = vertcat(cas_reshape(p_0, (-1, 1)), cas_reshape(u_0, (-1, 1)), u_perf_0, \
                               cas_reshape(k_ff_all_0, (-1, 1)))
        else:
            params = np.vstack(
                (p_0, cas_reshape(k_fb_safe, (-1, 1)), cas_reshape(k_fb_perf_0, (-1, 1))))

            opt_vars_init = vertcat(cas_reshape(u_0, (-1, 1)), u_perf_0, \
                             cas_reshape(k_ff_all_0, (-1, 1)))

        if self.init_uncertainty:
            params = vertcat(params, cas_reshape(q_0, (-1, 1)), cas_reshape(k_fb_0, (-1, 1)))

        crash = False 
        sol = self.solver(x0=opt_vars_init, lbg=self.lbg, ubg=self.ubg, p=params)
        try:
            # pass
            sol = self.solver(x0=opt_vars_init, lbg=self.lbg, ubg=self.ubg, p=params)
        except:
            crash = True
            warnings.warn("NLP solver crashed, solution infeasible")
            sol = None

        return self._get_solution(p_0, sol, k_fb_safe, k_fb_perf_0, sol_verbose, crash, q_0=q_0, k_fb_0=k_fb_0)
Esempio n. 14
0
    def _get_init_controls(self):
        """ Initialize the controls for the MPC step


        Returns
        -------
        u_0: n_u x 0 np.array[float]
            Initialization of the first (shared) input
        k_ff_safe_new:  (n_safe-1) x n_u np.ndarray[float]
            Initialization of the safety feed-forward control inputs
        k_fb_new: n_u x n_x np.ndarray[float]
            Initialization of the safety feed-back control inputs
        k_ff_perf_new: (n_perf - r_1) x n_u np.ndarray[float]
            Initialization of the performance feed-forward control inputs.
            Not including the shared controls (if r > 1)
        k_fb_perf_0: n_u x n_x np.ndarray[float]
            Initialization of the safety feed-back control inputs

        """

        u_perf_0 = None
        k_fb_perf_0 = None
        k_fb_lqr = self.get_lqr_feedback()

        if self.do_shift_solution and self.n_fail == 0:
            if self.n_safe > 1:
                k_fb_safe = np.copy(self.k_fb_safe_all)

                # Shift the safe controls
                k_ff_safe = np.copy(self.k_ff_safe)

                u_0 = k_ff_safe[0, :]

                if self.n_safe > self.r and self.n_perf > self.n_safe:  # the first control after the shared controls
                    k_ff_perf = np.copy(self.k_ff_perf)
                    k_ff_r_last = (k_ff_perf[0, :] + k_ff_safe[self.r - 1,
                                                     :]) / 2  # mean of first perf ctrl and safe ctrl after shared
                else:
                    k_ff_r_last = k_ff_safe[-1, :]  # just the last safe control

                k_ff_safe_new = np.vstack((k_ff_safe[1:self.r, :], k_ff_r_last))

                if self.n_safe > self.r + 1:
                    k_ff_safe_new = np.vstack((k_ff_safe_new, k_ff_safe[self.r:, :]))
            else:
                u_0 = self.u_apply
                k_ff_safe_new = np.array([])

            if self.n_perf - self.r > 0:
                k_ff_perf = np.copy(self.k_ff_perf)
                k_ff_perf_new = np.vstack((k_ff_perf[1:, :], k_ff_perf[-1, :]))

                if self.perf_has_fb:
                    k_fb_perf_0 = np.copy(self.k_fb_perf_0)
                else:
                    k_fb_perf_0 = np.array([])
            else:
                k_ff_perf_new = np.array([])
                k_fb_perf_0 = np.array([])
        else:
            k_fb_safe = np.empty((self.n_safe - 1, self.n_s * self.n_u))
            for i in range(self.n_safe - 1):
                k_fb_safe[i] = cas_reshape(k_fb_lqr, (1, -1))

            k_ff_safe_new = np.zeros((self.n_safe - 1, self.n_u))
            u_0 = np.zeros((self.n_u, 1))

            k_ff_perf_new = np.array([])
            if self.n_perf > 1:
                k_ff_perf_new = np.zeros((self.n_perf - self.r, self.n_u))

                if self.perf_has_fb:
                    k_fb_perf_0 = k_fb_lqr
            else:
                k_fb_perf_0 = np.array([])

        if self.n_safe > 1:
            k_fb_safe_new = np.vstack((k_fb_safe[1:, :], k_fb_safe[-1, :]))

        else:
            k_fb_safe_new = np.array([])

        return u_0, k_ff_safe_new, k_fb_safe, k_ff_perf_new, k_fb_perf_0
Esempio n. 15
0
def test_multistep_trajectory(before_test_onestep_reachability):
    """ Compare multi-steps 'by hand' with the function """

    mu_0, _, gp, k_fb, k_ff, L_mu, L_sigm, c_safety, a, b = before_test_onestep_reachability
    T = 3
    n_u, n_s = np.shape(k_fb)

    k_fb_cas_single = SX.sym("k_fb_single", (n_u, n_s))
    k_ff_cas_single = SX.sym("k_ff_single", (n_u, 1))
    k_ff_cas_all = SX.sym("k_ff_single", (T, n_u))

    k_fb_cas_all = SX.sym("k_fb_all", (T - 1, n_s * n_u))
    k_fb_cas_all_inp = [
        k_fb_cas_all[i, :].reshape((n_u, n_s)) for i in range(T - 1)
    ]
    mu_0_cas = SX.sym("mu_0", (n_s, 1))
    sigma_0_cas = SX.sym("sigma_0", (n_s, n_s))

    mu_onestep_no_var_in, sigm_onestep_no_var_in, _ = prop_casadi.one_step_taylor(
        mu_0_cas, gp, k_ff_cas_single, a=a, b=b)

    mu_one_step, sigm_onestep, _ = prop_casadi.one_step_taylor(
        mu_0_cas,
        gp,
        k_ff_cas_single,
        k_fb=k_fb_cas_single,
        sigma_x=sigma_0_cas,
        a=a,
        b=b)

    mu_multistep, sigma_multistep, _ = prop_casadi.multi_step_taylor_symbolic(
        mu_0_cas, gp, k_ff_cas_all, k_fb_cas_all_inp, a=a, b=b)

    on_step_no_var_in = Function(
        "on_step_no_var_in", [mu_0_cas, k_ff_cas_single],
        [mu_onestep_no_var_in, sigm_onestep_no_var_in])
    one_step = Function(
        "one_step", [mu_0_cas, sigma_0_cas, k_ff_cas_single, k_fb_cas_single],
        [mu_one_step, sigm_onestep])
    multi_step = Function("multi_step", [mu_0_cas, k_ff_cas_all, k_fb_cas_all],
                          [mu_multistep, sigma_multistep])

    # TODO: Need mu, sigma as input aswell
    mu_1, sigma_1 = on_step_no_var_in(mu_0, k_ff)

    mu_2, sigma_2 = one_step(mu_1, sigma_1, k_ff, k_fb)
    mu_3, sigma_3 = one_step(mu_2, sigma_2, k_ff, k_fb)

    # TODO: stack k_ff and k_fb
    k_fb_mult = np.array(cas_reshape(k_fb, (1, n_u * n_s)))
    k_fb_mult = np.array(vertcat(*[k_fb_mult] * (T - 1)))

    k_ff_mult = vertcat(*[k_ff] * T)
    mu_all, sigma_all = multi_step(mu_0, k_ff_mult, k_fb_mult)

    assert np.allclose(
        np.array(mu_all[0, :]),
        np.array(mu_1).T), "Are the centers of the first prediction the same?"
    assert np.allclose(
        np.array(mu_all[-1, :]),
        np.array(mu_3).T), "Are the centers of the final prediction the same?"
    assert np.allclose(cas_reshape(sigma_all[-1, :], (n_s, n_s)),
                       sigma_3), "Are the last covariance matrices the same?"
    def find_max_variance(self, x0,
                          sol_verbose=False):
        """ Find the most informative sample in the space constrained by the mpc structure

        Parameters
        ----------
        n_restarts: int, optional
            The number of random initializations of the optimization problem
        ilqr_init: bool, optional
            initialize the state feedback terms with the ilqr feedback law
        sample_mean: n_s x 1 np.ndarray[float], optional
            The mean of the gaussian initial state-action distribution
        sample_var: n_s x n_s np.ndarray[float], optional
            The variance of the gaussian initial state-action distribution

        Returns
        -------
        x_opt:
        u_opt:
        sigm_opt:

        """

        sigma_best = 0
        x_best = None
        u_best = None

        for i in range(self.n_restarts_optimizer):

            x_0 = self.env._sample_start_state(self.sample_mean, self.sample_std)[:,
                  None]  # sample initial state

            if self.T > 1:
                u_0 = self.env.random_action()[:, None]

                k_fb_lqr = self.safempc.get_lqr_feedback()
                k_ff_0 = np.zeros((self.T - 1, self.n_u))
                for j in range(self.T - 1):
                    k_ff_0[j, :] = self.env.random_action()

                params_0 = cas_reshape(k_fb_lqr, (-1, 1))
                vars_0 = np.vstack((x_0, u_0, cas_reshape(k_ff_0, (-1, 1))))
            else:
                u_0 = self.env.random_action()[:, None]
                params_0 = []
                vars_0 = np.vstack((x_0, u_0))

            sol = self.solver(x0=vars_0, p=params_0, lbg=self.lbg, ubg=self.ubg)

            f_opt = sol["f"]
            sigm_i = -float(f_opt)

            if sigm_i > sigma_best:  # check if solution would improve upon current best
                g_sol = np.array(sol["g"]).squeeze()

                if self._is_feasible(g_sol, np.array(self.lbg), np.array(
                        self.ubg)):  # check if solution is feasible
                    w_sol = sol["x"]
                    x_best = np.array(w_sol[:self.n_s])
                    u_best = np.array(w_sol[self.n_s:self.n_s + self.n_u])
                    sigma_best = sigm_i

                    z_i = np.vstack((x_best, u_best)).T
                    if self.verbosity > 0:
                        print(("New optimal sigma found at iteration {}".format(i)))
                        if self.verbosity > 1:
                            print((
                                "New feasible solution with sigma sum {} found".format(
                                    str(sigm_i))))

        return x_best, u_best
def multi_step_reachability(p_0,
                            u_0,
                            k_fb,
                            k_ff,
                            gp,
                            l_mu,
                            l_sigm,
                            c_safety=1.,
                            a=None,
                            b=None,
                            t_z_gp=None,
                            q_0=None,
                            k_fb_0=None):
    """Generate trajectory reachset by iteratively computing the one-step reachability.

    Parameters
    ----------
    p_0: n_s x 1 ndarray[float | casadi.sym]
        Initial state
    u_0: n_u x 1 ndarray[casadi.sym]
        The initial action
    k_fb: n_fb x (n_s * n_u) ndarray[casadi.SX]
        The initial guess of the feedback controls
    k_ff: n_fb x n_u ndarray[casadi.sym]
        The feed forward terms to optimize over
    gp: SimpleGPModel
        The gp representing the dynamics
    l_mu: 1d_array of size n_s
        Set of Lipschitz constants on the Gradients of the mean function (per state
        dimension)
    l_sigma: 1d_array of size n_s
        Set of Lipschitz constants of the predictive variance (per state dimension)
    c_safety: float, optional
        The scaling of the semi-axes of the uncertainty matrix
        corresponding to a level-set of the gaussian pdf.
    a: n_s x n_s ndarray[float], optional
        The A matrix of the linear model Ax + Bu
    b: n_s x n_u ndarray[float], optional
        The B matrix of the linear model Ax + Bu
    q_0: n_s x n_s ndarray[float], optional
        The n_s x n_s s.p.d. shape matrix of the initial state
    k_fb_0: n_u x n_s ndarray[float], optional
        Initial state feedback control matrix only required
        when q_0 is specified
    Returns
    -------
    p_all
    q_all
    """
    n_s = np.shape(p_0)[0]
    n_u = np.shape(u_0)[0]

    n_fb = np.shape(k_fb)[0]

    p_new, q_new, gp_pred_sigma = onestep_reachability(p_0, gp, u_0, l_mu,
                                                       l_sigm, q_0, k_fb_0,
                                                       c_safety, a, b, t_z_gp)

    p_all = p_new.T
    q_all = q_new.reshape((1, n_s * n_s))
    gp_pred_sigma_all = gp_pred_sigma

    for i in range(n_fb):
        p_old = p_new
        q_old = q_new
        k_ff_i = cas_reshape(k_ff[i, :], (n_u, 1))
        k_fb_i = cas_reshape(k_fb[i, :], (n_u, n_s))

        p_new, q_new, gp_pred_sigma = onestep_reachability(
            p_old, gp, k_ff_i, l_mu, l_sigm, q_old, k_fb_i, c_safety, a, b,
            t_z_gp)

        p_all = vertcat(p_all, p_new.T)
        q_all = vertcat(q_all, cas_reshape(q_new, (1, n_s * n_s)))
        gp_pred_sigma_all = vertcat(gp_pred_sigma_all, gp_pred_sigma)

    return p_all, q_all, gp_pred_sigma_all
Esempio n. 18
0
    def _generate_perf_trajectory_casadi(self, mu_0, u_0, k_ff_ctrl, k_fb_safe, a=None,
                                         b=None, lin_trafo_gp_input=None,
                                         safety_constr=False):
        """ Generate the performance trajectory variables for the casadi solver

        Parameters:
        mu_0: n_x x 1 casadi.SX
            Initial state
        u_0: n_u x 1 casadi.SX
            Initial control
        k_ff_ctrl: (n_safe-1) x n_u casadi.SX
            Safe feed-forward controls
        k_fb_safe: (n_safe-1) x (n_x * n_u) casadi.SX
            Safe feedback control matrices
        a: n_x x n_x np.ndarray[float], optional
            The A-matrix of the prior linear model
        b: n_x x n_u np.ndarray[float], optional
            The B-matrix of the prior linear model
        lin_trafo_gp_input: n_x_gp_in x n_x np.ndarray[float], optional
            Allows for a linear transformation of the gp input (e.g. removing an input)
        safety_constr: boolean, optional
            True, if we want to put a constraint after (n_safe+1)th performance trajectory state to
            be inside the terminal safe set. Can potentially help with (recursive) feasibility
        """
        if self.r > 1:
            warnings.warn(
                "Coupling performance and safety trajectory for more than one step is UNTESTED")

        # we don't have a performance trajectory, so nothing to do here. Might wanna catch this even before
        if self.n_perf <= 1:
            return np.array([]), np.array([]), np.array([]), np.array(
                []), None, np.array([]), [], [], [], [], []
        else:
            k_ff_perf = MX.sym("k_ff_perf", (self.n_perf - self.r, self.n_u))
            k_ff_perf_traj = vertcat(k_ff_ctrl[:self.r - 1, :], k_ff_perf)

            k_fb_perf_traj = np.array([])
            for i in range(self.r - 1):
                k_fb_perf_traj = np.append(k_fb_perf_traj,
                                           [k_fb_safe[i, :].reshape((self.n_u, self.n_s))])
            if self.perf_has_fb and self.n_perf - self.r > 0:
                k_fb_perf = MX.sym("k_fb_perf", (self.n_u, self.n_s))
                for i in range(self.n_perf - self.r):
                    k_fb_perf_traj = np.append(k_fb_perf_traj, [k_fb_perf])

            mu_perf_all, sigma_perf_all, gp_sigma_pred_perf_all = self.perf_trajectory(
                mu_0, self.ssm_forward, vertcat(u_0, k_ff_perf_traj), k_fb_perf_traj, None, a, b,
                lin_trafo_gp_input)

            # evaluation trajectory (mainly for verbosity)
            mu_0_eval = MX.sym("mu_0", (self.n_s, 1))
            u_0_eval = MX.sym("u_0", (self.n_u, 1))
            k_fb_perf_all_eval = MX.sym("k_fb_perf",
                                        (self.n_perf - 1, self.n_u * self.n_s))
            k_ff_perf_all_eval = MX.sym("k_ff_perf", (self.n_perf - 1, self.n_u))

            list_kfb_perf = [cas_reshape(k_fb_perf_all_eval[i, :], (self.n_u, self.n_s))
                             for i in range(self.n_perf - 1)]
            mu_perf_eval_all, sigma_perf_eval_all, gp_sigma_pred_perf_all_eval = self.perf_trajectory(
                mu_0_eval, self.ssm_forward, vertcat(u_0_eval, k_ff_perf_all_eval),
                list_kfb_perf, None, a, b, lin_trafo_gp_input)
            self._f_multistep_perf_eval = cas.Function("f_multistep_perf_eval",
                                                       [mu_0_eval, u_0_eval,
                                                        k_fb_perf_all_eval,
                                                        k_ff_perf_all_eval],
                                                       [mu_perf_eval_all,
                                                        gp_sigma_pred_perf_all_eval])

        # generate (approxiamte) constraints for the performance trajectory
        g_name = []
        g = []
        lbg = []
        ubg = []
        if safety_constr and self.n_perf > self.n_safe:
            g_name += ["Terminal safety performance"]
            g_term = lin_ellipsoid_safety_distance(
                cas_reshape(mu_perf_all[self.n_safe + 1, :], (self.n_s, 1)),
                cas_reshape(sigma_perf_all[self.n_safe + 1, :], (self.n_s, self.n_s)),
                self.h_mat_safe, self.h_safe)
            g = vertcat(g, g_term)
            lbg += [-np.inf] * self.m_safe
            ubg += [0.] * self.m_safe

        if self.has_ctrl_bounds:
            for i in range(self.n_perf - self.r):
                g_u_i, lbu_i, ubu_i = self._generate_control_constraint(
                    k_ff_perf[i, :].T)
                g = vertcat(g, g_u_i)
                lbg += lbu_i
                ubg += ubu_i
                g_name += ["ctrl_constr_performance_{}".format(i)]

        return k_ff_perf, k_fb_perf, k_ff_perf_traj, k_fb_perf_traj, mu_perf_all, sigma_perf_all, gp_sigma_pred_perf_all, g, lbg, ubg, g_name