Esempio n. 1
0
 def __init__(self, i2c, n_iter, sig_u, z_traj=None):
     super().__init__(i2c, n_iter, sig_u, z_traj)
     self.mu = i2c.sys.x0
     self.covar = i2c.sys.sig_x0
     self.mus = []
     self.covars = []
     inference = CubatureQuadrature(1, 0, 0)
     self.dyn_inf = QuadratureInference(inference, self.dim_x)
     self.meas_inf = QuadratureInference(inference, self.dim_x)
Esempio n. 2
0
    def __init__(self, ilqr, n_iter, z_traj=None):
        model = ilqr.env
        self.dim_u, self.dim_x = model.dim_u, model.dim_x
        self.model = model
        self.n_iter = n_iter
        self.z_traj = z_traj
        self.ilqr = ilqr
        if z_traj is not None:
            self.ilqr.weighting = z_traj[:self.ilqr.nb_steps + 1, :, None]
        self.xu_history = []
        self.z_history = []

        self.mu = model.x0
        self.covar = model.sig_x0
        self.mus = []
        self.covars = []
        inference = CubatureQuadrature(1, 0, 0)
        self.dyn_inf = QuadratureInference(inference, self.dim_x)
        self.meas_inf = QuadratureInference(inference, self.dim_x)
Esempio n. 3
0
N_INFERENCE = 200
N_AUG = 1
N_STARTING = 0
N_PLOTS = 4
N_ITERS_PER_PLOT = N_INFERENCE // N_PLOTS

N_BUFFER = 0

POLICY_COVAR = 0.0 * np.eye(1)

# model learning
MODEL = None

sf = 1e-3  # make numbers nice
Q = sf * np.diag([1.0, 1.0, 100.0, 1.0, 100.0, 10.0, 1.0, 1.0])
R = sf * np.diag([0.1])
Qf = sf * np.diag([1.0, 1.0, 100.0, 1.0, 100.0, 10.0, 1.0, 1.0])
# input inference
INFERENCE = GaussianI2c(
    inference=CubatureQuadrature(1, 0, 0),
    Q=Q,
    R=R,
    Qf=Q,
    alpha=0.05,
    alpha_update_tol=0.99,
    mu_u=1e-2 * np.random.randn(N_DURATION, 1),
    sig_u=1.0 * np.eye(1),
    mu_x_term=None,
    sig_x_term=None,
)
              markersize=1)
    a[1].plot(y_samp[:n_plot, 0],
              y_samp[:n_plot, 1],
              "c.",
              alpha=1,
              markersize=1)

    plot_ellipse(a[0], mean, cov, facecolor="k")

    ex_mean = func(mean.reshape((1, -1)))[0, :]
    J = dfunc(mean)
    ex_cov = J @ cov @ J.T
    a[1].plot(ex_mean[0], ex_mean[1], "m+", label="Linearize")
    plot_ellipse(a[1], ex_mean, ex_cov, facecolor="m")

    cub_inf = QuadratureInference(CubatureQuadrature(1, 0, 0), 2)
    quad_mean, quad_cov = cub_inf.forward(func, mean[:, None], cov)
    pts, y_pts = cub_inf.x_pts, cub_inf.y_pts
    a[0].plot(pts[1:, 0], pts[1:, 1], "bx", label="Cubature Points")
    a[1].plot(y_pts[1:, 0], y_pts[1:, 1], "bx")

    a[1].plot(quad_mean[0], quad_mean[1], "b+", label="Cubature")
    plot_ellipse(a[1], quad_mean, quad_cov, facecolor="b")

    cub_inf = QuadratureInference(GaussHermiteQuadrature(4), 2)
    quad_mean, quad_cov = cub_inf.forward(func, mean[:, None], cov)
    pts, y_pts = cub_inf.x_pts, cub_inf.y_pts
    a[0].plot(pts[:, 0], pts[:, 1], "yx", label="Gauss-Hermite Points")
    a[1].plot(y_pts[:, 0], y_pts[:, 1], "yx")

    a[1].plot(quad_mean[0], quad_mean[1], "y+", label="Gauss-Hermite")
Esempio n. 5
0
def single_experiment(use_i2c, feedforward, low_noise, seed, name):
    np.random.seed(seed)

    res_dir = join(dirname(realpath(__file__)), "_results")

    if not os.path.exists(res_dir):
        os.makedirs(res_dir)

    if exists(join(res_dir, f"{name}.npy")):  # have result
        print(f"{name} already done")
        return

    env = Quadrotor()
    model = QuadrotorKnown()
    sig_zeta = (np.diag([1e-6] *
                        8) if low_noise else np.diag([1e-6] * 2 + [5e-5] * 2 +
                                                     [1] * 4))
    env.env.sig_zeta = sig_zeta
    model.sig_zeta = sig_zeta

    T = 100
    T_plan = 10
    mpc_iter = 2

    # trajectory to follow -> sine with 360 pin
    z_traj = np.zeros((T, model.dim_z))
    z_traj[:, 0] = np.linspace(W / 4, 3 * W / 4, T)
    z_traj[:, 1] = H / 2 + (H / 4) * np.sin(np.linspace(0, 2 * np.pi, T))
    z_traj[:, 2] = 2 * np.pi * np.heaviside(np.linspace(-1, 1, T), 1)

    # tracking controller
    Q = np.diag([1e3, 1e3, 1e3, 1, 1, 1])
    R = np.diag([1e-3, 1e-3])
    QR = la.block_diag(Q, R) / 1e3
    Qf = Q / 1e3

    u_init = 0.5 * model.gravity * np.ones((T_plan, model.dim_u))
    if use_i2c:
        sig_u = 1e-2 * np.eye(model.dim_u)
        _i2c = I2cGraph(
            sys=model,
            horizon=T_plan,
            Q=Q,
            R=R,
            Qf=Qf,
            alpha=1.0,
            alpha_update_tol=1.0,
            mu_u=u_init,
            sig_u=sig_u,
            mu_x_terminal=None,
            sig_x_terminal=None,
            inference=CubatureQuadrature(1, 0, 0),
            res_dir=res_dir,
        )
        _i2c._propagate = True  # used for alpha calibration

        policy = PartiallyObservedMpcPolicy(_i2c, mpc_iter, sig_u,
                                            np.copy(z_traj))
    else:

        def cost(x, u, a):
            tau = np.hstack((x, u))
            a = a[:, 0]
            return (tau - a).T @ QR @ (tau - a)

        _ilqr = IterativeLqr(
            env=model,
            cost=cost,
            horizon=T_plan,
            u_lim=np.array([[0.0, 0.0], [30.0, 30.0]]),
        )
        # init with gravity comp.
        _ilqr.uref = u_init.T

        # nd.Jacobian only takes one argument in order to  work!!
        def dyn(tau):
            return model.step(tau[:6], tau[6:])

        _ilqr.dyn = AnalyticalLinearDynamics(dyn, _ilqr.dm_state, _ilqr.dm_act,
                                             _ilqr.nb_steps)

        policy = IlqrMpc(_ilqr, mpc_iter, np.copy(z_traj))

    policy.set_control(feedforward=feedforward)
    x, y = env.reset()

    warm_start_iter = 25  # 100
    if use_i2c:
        policy.i2c.calibrate_alpha()
        print(f"calibrated alpha: {policy.i2c.alpha:.2f}")
        policy.optimize(warm_start_iter, model.x0, model.sig_x0)
        policy.i2c.calibrate_alpha()
        print(f"recalibrated alpha: {policy.i2c.alpha:.2f}")
    else:
        print("ilqr warm start start")
        policy.ilqr.run(warm_start_iter)
        print("ilqr warm start done")
        policy.ilqr.dir_name = res_dir
        policy.ilqr.plot_trajectory("ilqr_warm_start")

    u = np.zeros((model.dim_u, 1))

    states = np.zeros((T, model.dim_s))
    obs = np.zeros((T, model.dim_y))
    stream = []
    for t in range(T):
        u = policy(t, y, u)
        u = model.clip_u(u.T).T
        states[t, :6] = x[:, 0]
        states[t, 6:] = u[:, 0]
        obs[t, :] = y[:, 0]
        x, y = env.step(np.asarray(u.flatten(), dtype=np.float))

        if RENDER:
            still_open, img = env.render(
                i2c=policy.i2c if use_i2c else None,
                ilqr=policy.ilqr if not use_i2c else None,
                z_traj=z_traj,
            )
            stream.append(img)

    err = states - z_traj
    cost = np.einsum("bi,ij,bi->", err, QR, err)
    print(cost)
    np.save(join(res_dir, f"{name}"), cost)
    np.save(join(res_dir, f"state_{name}"), states)
    np.save(join(res_dir, f"obs_{name}"), obs)

    if RENDER:
        gif_name = join(res_dir, f"{name}_render.gif")
        imageio.mimsave(gif_name, stream, fps=FS)
        optimize(gif_name)

    mus = np.asarray(policy.mus).reshape((T, model.dim_x))
    covars = np.asarray(policy.covars).reshape((T, model.dim_x, model.dim_x))

    f, ax = plt.subplots(model.dim_x, 2)
    for i, a in enumerate(ax[:, 0]):
        a.plot(states[:, i], "b-")
        a.plot(mus[:, i], "c--")

    for i, a in enumerate(ax[:, 1]):
        a.plot(np.sqrt(covars[:, i, i]), "c--")
    plt.savefig(join(res_dir, f"{name}_state_estimation.png"),
                bbox_inches="tight",
                format="png")
    plt.close(f)

    f, ax = plt.subplots(1, 3)
    a = ax[0]
    a.plot(z_traj[:, 0], z_traj[:, 1], "m")
    a.plot(states[:, 0], states[:, 1], "b-")
    a.plot(mus[:, 0], mus[:, 1], "c--")
    for t in range(obs.shape[0]):
        a.plot(obs[t, [0, 2]], obs[t, [1, 3]], "y")
    a.set_ylim(0, H)
    a.set_xlim(0, W)
    a.set_ylabel("$y$")
    a.set_xlabel("$x$")

    a = ax[1]
    a.plot(z_traj[:, 2], "m")
    a.plot(states[:, 2], "b-")
    a.plot(mus[:, 2], "c--")
    a.set_xlabel("Timesteps")
    a.set_ylabel("$\psi$")

    a = ax[2]
    a.plot(states[:, 6], "c--", label="$u_1$")
    a.plot(states[:, 7], "b--", label="$u_2$")
    a.set_xlabel("Timesteps")
    a.set_ylabel("$u$")

    plt.savefig(join(res_dir, f"{name}_mpc_summary.png"),
                bbox_inches="tight",
                format="png")
    plt.close(f)
ENVIRONMENT = "LinearKnown"  # environment to control

# top level training parameters
N_DURATION = 60
N_EPISODE = 1
N_INFERENCE = 10
N_AUG = 0
N_STARTING = 0
N_ITERS_PER_PLOT = 1  # N_INFERENCE + 1
POLICY_COVAR = 0 * np.eye(1)
N_PLOTS = 1

# model learning
MODEL = None

# input inference
quad = CubatureQuadrature(1, 0, 0)
INFERENCE = GaussianI2c(
    inference=quad,
    Q=np.diag([10.0, 10.0]),
    R=np.diag([1.0]),
    Qf=np.diag([10.0, 10.0]),
    alpha=800.0,
    alpha_update_tol=0.0,
    mu_u=np.zeros((N_DURATION, 1)),
    sig_u=1.0 * np.eye(1),
    mu_x_term=None,
    sig_x_term=None,
)