def __init__(self, i2c, n_iter, sig_u, z_traj=None): super().__init__(i2c, n_iter, sig_u, z_traj) self.mu = i2c.sys.x0 self.covar = i2c.sys.sig_x0 self.mus = [] self.covars = [] inference = CubatureQuadrature(1, 0, 0) self.dyn_inf = QuadratureInference(inference, self.dim_x) self.meas_inf = QuadratureInference(inference, self.dim_x)
def __init__(self, ilqr, n_iter, z_traj=None): model = ilqr.env self.dim_u, self.dim_x = model.dim_u, model.dim_x self.model = model self.n_iter = n_iter self.z_traj = z_traj self.ilqr = ilqr if z_traj is not None: self.ilqr.weighting = z_traj[:self.ilqr.nb_steps + 1, :, None] self.xu_history = [] self.z_history = [] self.mu = model.x0 self.covar = model.sig_x0 self.mus = [] self.covars = [] inference = CubatureQuadrature(1, 0, 0) self.dyn_inf = QuadratureInference(inference, self.dim_x) self.meas_inf = QuadratureInference(inference, self.dim_x)
N_INFERENCE = 200 N_AUG = 1 N_STARTING = 0 N_PLOTS = 4 N_ITERS_PER_PLOT = N_INFERENCE // N_PLOTS N_BUFFER = 0 POLICY_COVAR = 0.0 * np.eye(1) # model learning MODEL = None sf = 1e-3 # make numbers nice Q = sf * np.diag([1.0, 1.0, 100.0, 1.0, 100.0, 10.0, 1.0, 1.0]) R = sf * np.diag([0.1]) Qf = sf * np.diag([1.0, 1.0, 100.0, 1.0, 100.0, 10.0, 1.0, 1.0]) # input inference INFERENCE = GaussianI2c( inference=CubatureQuadrature(1, 0, 0), Q=Q, R=R, Qf=Q, alpha=0.05, alpha_update_tol=0.99, mu_u=1e-2 * np.random.randn(N_DURATION, 1), sig_u=1.0 * np.eye(1), mu_x_term=None, sig_x_term=None, )
markersize=1) a[1].plot(y_samp[:n_plot, 0], y_samp[:n_plot, 1], "c.", alpha=1, markersize=1) plot_ellipse(a[0], mean, cov, facecolor="k") ex_mean = func(mean.reshape((1, -1)))[0, :] J = dfunc(mean) ex_cov = J @ cov @ J.T a[1].plot(ex_mean[0], ex_mean[1], "m+", label="Linearize") plot_ellipse(a[1], ex_mean, ex_cov, facecolor="m") cub_inf = QuadratureInference(CubatureQuadrature(1, 0, 0), 2) quad_mean, quad_cov = cub_inf.forward(func, mean[:, None], cov) pts, y_pts = cub_inf.x_pts, cub_inf.y_pts a[0].plot(pts[1:, 0], pts[1:, 1], "bx", label="Cubature Points") a[1].plot(y_pts[1:, 0], y_pts[1:, 1], "bx") a[1].plot(quad_mean[0], quad_mean[1], "b+", label="Cubature") plot_ellipse(a[1], quad_mean, quad_cov, facecolor="b") cub_inf = QuadratureInference(GaussHermiteQuadrature(4), 2) quad_mean, quad_cov = cub_inf.forward(func, mean[:, None], cov) pts, y_pts = cub_inf.x_pts, cub_inf.y_pts a[0].plot(pts[:, 0], pts[:, 1], "yx", label="Gauss-Hermite Points") a[1].plot(y_pts[:, 0], y_pts[:, 1], "yx") a[1].plot(quad_mean[0], quad_mean[1], "y+", label="Gauss-Hermite")
def single_experiment(use_i2c, feedforward, low_noise, seed, name): np.random.seed(seed) res_dir = join(dirname(realpath(__file__)), "_results") if not os.path.exists(res_dir): os.makedirs(res_dir) if exists(join(res_dir, f"{name}.npy")): # have result print(f"{name} already done") return env = Quadrotor() model = QuadrotorKnown() sig_zeta = (np.diag([1e-6] * 8) if low_noise else np.diag([1e-6] * 2 + [5e-5] * 2 + [1] * 4)) env.env.sig_zeta = sig_zeta model.sig_zeta = sig_zeta T = 100 T_plan = 10 mpc_iter = 2 # trajectory to follow -> sine with 360 pin z_traj = np.zeros((T, model.dim_z)) z_traj[:, 0] = np.linspace(W / 4, 3 * W / 4, T) z_traj[:, 1] = H / 2 + (H / 4) * np.sin(np.linspace(0, 2 * np.pi, T)) z_traj[:, 2] = 2 * np.pi * np.heaviside(np.linspace(-1, 1, T), 1) # tracking controller Q = np.diag([1e3, 1e3, 1e3, 1, 1, 1]) R = np.diag([1e-3, 1e-3]) QR = la.block_diag(Q, R) / 1e3 Qf = Q / 1e3 u_init = 0.5 * model.gravity * np.ones((T_plan, model.dim_u)) if use_i2c: sig_u = 1e-2 * np.eye(model.dim_u) _i2c = I2cGraph( sys=model, horizon=T_plan, Q=Q, R=R, Qf=Qf, alpha=1.0, alpha_update_tol=1.0, mu_u=u_init, sig_u=sig_u, mu_x_terminal=None, sig_x_terminal=None, inference=CubatureQuadrature(1, 0, 0), res_dir=res_dir, ) _i2c._propagate = True # used for alpha calibration policy = PartiallyObservedMpcPolicy(_i2c, mpc_iter, sig_u, np.copy(z_traj)) else: def cost(x, u, a): tau = np.hstack((x, u)) a = a[:, 0] return (tau - a).T @ QR @ (tau - a) _ilqr = IterativeLqr( env=model, cost=cost, horizon=T_plan, u_lim=np.array([[0.0, 0.0], [30.0, 30.0]]), ) # init with gravity comp. _ilqr.uref = u_init.T # nd.Jacobian only takes one argument in order to work!! def dyn(tau): return model.step(tau[:6], tau[6:]) _ilqr.dyn = AnalyticalLinearDynamics(dyn, _ilqr.dm_state, _ilqr.dm_act, _ilqr.nb_steps) policy = IlqrMpc(_ilqr, mpc_iter, np.copy(z_traj)) policy.set_control(feedforward=feedforward) x, y = env.reset() warm_start_iter = 25 # 100 if use_i2c: policy.i2c.calibrate_alpha() print(f"calibrated alpha: {policy.i2c.alpha:.2f}") policy.optimize(warm_start_iter, model.x0, model.sig_x0) policy.i2c.calibrate_alpha() print(f"recalibrated alpha: {policy.i2c.alpha:.2f}") else: print("ilqr warm start start") policy.ilqr.run(warm_start_iter) print("ilqr warm start done") policy.ilqr.dir_name = res_dir policy.ilqr.plot_trajectory("ilqr_warm_start") u = np.zeros((model.dim_u, 1)) states = np.zeros((T, model.dim_s)) obs = np.zeros((T, model.dim_y)) stream = [] for t in range(T): u = policy(t, y, u) u = model.clip_u(u.T).T states[t, :6] = x[:, 0] states[t, 6:] = u[:, 0] obs[t, :] = y[:, 0] x, y = env.step(np.asarray(u.flatten(), dtype=np.float)) if RENDER: still_open, img = env.render( i2c=policy.i2c if use_i2c else None, ilqr=policy.ilqr if not use_i2c else None, z_traj=z_traj, ) stream.append(img) err = states - z_traj cost = np.einsum("bi,ij,bi->", err, QR, err) print(cost) np.save(join(res_dir, f"{name}"), cost) np.save(join(res_dir, f"state_{name}"), states) np.save(join(res_dir, f"obs_{name}"), obs) if RENDER: gif_name = join(res_dir, f"{name}_render.gif") imageio.mimsave(gif_name, stream, fps=FS) optimize(gif_name) mus = np.asarray(policy.mus).reshape((T, model.dim_x)) covars = np.asarray(policy.covars).reshape((T, model.dim_x, model.dim_x)) f, ax = plt.subplots(model.dim_x, 2) for i, a in enumerate(ax[:, 0]): a.plot(states[:, i], "b-") a.plot(mus[:, i], "c--") for i, a in enumerate(ax[:, 1]): a.plot(np.sqrt(covars[:, i, i]), "c--") plt.savefig(join(res_dir, f"{name}_state_estimation.png"), bbox_inches="tight", format="png") plt.close(f) f, ax = plt.subplots(1, 3) a = ax[0] a.plot(z_traj[:, 0], z_traj[:, 1], "m") a.plot(states[:, 0], states[:, 1], "b-") a.plot(mus[:, 0], mus[:, 1], "c--") for t in range(obs.shape[0]): a.plot(obs[t, [0, 2]], obs[t, [1, 3]], "y") a.set_ylim(0, H) a.set_xlim(0, W) a.set_ylabel("$y$") a.set_xlabel("$x$") a = ax[1] a.plot(z_traj[:, 2], "m") a.plot(states[:, 2], "b-") a.plot(mus[:, 2], "c--") a.set_xlabel("Timesteps") a.set_ylabel("$\psi$") a = ax[2] a.plot(states[:, 6], "c--", label="$u_1$") a.plot(states[:, 7], "b--", label="$u_2$") a.set_xlabel("Timesteps") a.set_ylabel("$u$") plt.savefig(join(res_dir, f"{name}_mpc_summary.png"), bbox_inches="tight", format="png") plt.close(f)
ENVIRONMENT = "LinearKnown" # environment to control # top level training parameters N_DURATION = 60 N_EPISODE = 1 N_INFERENCE = 10 N_AUG = 0 N_STARTING = 0 N_ITERS_PER_PLOT = 1 # N_INFERENCE + 1 POLICY_COVAR = 0 * np.eye(1) N_PLOTS = 1 # model learning MODEL = None # input inference quad = CubatureQuadrature(1, 0, 0) INFERENCE = GaussianI2c( inference=quad, Q=np.diag([10.0, 10.0]), R=np.diag([1.0]), Qf=np.diag([10.0, 10.0]), alpha=800.0, alpha_update_tol=0.0, mu_u=np.zeros((N_DURATION, 1)), sig_u=1.0 * np.eye(1), mu_x_term=None, sig_x_term=None, )