def main(): res_dir = make_results_folder("i2c_lqr_equivalence", 0, "", release=True) env = make_env(experiment) model = make_env_model(experiment.ENVIRONMENT, experiment.MODEL) experiment.N_INFERENCE = 1 # redefine linear system model.xag = 10 * np.ones((env.dim_x, 1)) model.zg_term = 10 * np.ones((env.dim_x, 1)) model.a = model.xag - model.A @ model.xag env.a = model.a env.sig_eta = 0.0 * np.eye(env.dim_x) ug = np.zeros((env.dim_u, )) x_lqr, u_lqr, K_lqr, k_lqr, cost_lqr, P, p = finite_horizon_lqr( experiment.N_DURATION, model.A, model.a[:, 0], model.B, experiment.INFERENCE.Q, experiment.INFERENCE.R, model.x0[:, 0], model.xag[:, 0], ug, model.dim_x, model.dim_u, ) from i2c.exp_types import CubatureQuadrature i2c = I2cGraph( sys=model, horizon=experiment.N_DURATION, Q=experiment.INFERENCE.Q, R=experiment.INFERENCE.R, Qf=experiment.INFERENCE.Qf, alpha=1e-5, # 1e-6, alpha_update_tol=experiment.INFERENCE.alpha_update_tol, mu_u=np.zeros((experiment.N_DURATION, 1)), sig_u=1e2 * np.eye(1), mu_x_terminal=None, sig_x_terminal=None, inference=experiment.INFERENCE.inference, res_dir=None, ) i2c.use_expert_controller = False for c in i2c.cells: c.state_action_independence = True # EM iteration i2c._forward_backward_msgs() i2c.plot_traj(0, dir_name=res_dir, filename="lqr") # compute riccati terms i2c._backward_ricatti_msgs() plot_trajectory(i2c, x_lqr, u_lqr, dir_name=res_dir) plot_controller(i2c, u_lqr, K_lqr, k_lqr, dir_name=res_dir) plot_value_function(i2c, P, p, dir_name=res_dir)
def main(): configure_plots() res_dir = make_results_folder( "i2c_nonlinear_covariance_control", 0, "", release=True ) env = make_env(experiment) model = make_env_model(experiment.ENVIRONMENT, experiment.MODEL) i2c = I2cGraph( sys=model, horizon=experiment.N_DURATION, Q=experiment.INFERENCE.Q, R=experiment.INFERENCE.R, Qf=experiment.INFERENCE.Qf, alpha=experiment.INFERENCE.alpha, alpha_update_tol=experiment.INFERENCE.alpha_update_tol, mu_u=experiment.INFERENCE.mu_u, sig_u=experiment.INFERENCE.sig_u, mu_x_terminal=experiment.INFERENCE.mu_x_term, sig_x_terminal=experiment.INFERENCE.sig_x_term, inference=experiment.INFERENCE.inference, res_dir=res_dir, ) for c in i2c.cells: c.use_expert_controller = False i2c._propagate = True policy = TimeIndexedLinearGaussianPolicy( experiment.POLICY_COVAR, experiment.N_DURATION, i2c.sys.dim_u, i2c.sys.dim_x ) i2c.propagate() for i in tqdm(range(experiment.N_INFERENCE)): i2c.learn_msgs() i2c.plot_metrics(0, 0, dir_name=res_dir, filename="nonlinear_cc") policy.write(*i2c.get_local_linear_policy()) xs, _, _, _ = env.batch_eval(policy=policy, n_eval=50, deterministic=True) env.plot_sim(xs, None, "final", res_dir) plot_covariance_control(i2c, xs, filename="nonlinear_cc", dir_name=res_dir)
def make_dcp_trajopt_gif(): from experiments import double_cartpole_known_cq as experiment gif_filename = os.path.join(DIR_NAME, "..", "assets", "dcp_%ds.gif") stream = [] model = make_env_model(experiment.ENVIRONMENT, experiment.MODEL) i2c = I2cGraph( model, experiment.N_DURATION, experiment.INFERENCE.Q, experiment.INFERENCE.R, experiment.INFERENCE.Qf, experiment.INFERENCE.alpha, experiment.INFERENCE.alpha_update_tol, # experiment.INFERENCE.mu_u, 1e-8 * np.random.randn(experiment.N_DURATION, 1), 0.75 * np.eye(1), # experiment.INFERENCE.sig_u, experiment.INFERENCE.mu_x_term, experiment.INFERENCE.sig_x_term, experiment.INFERENCE.inference, res_dir=None, ) time = range(experiment.N_DURATION) opacity = 0.3 L = 0.3365 for i in tqdm(range(experiment.N_INFERENCE)): i2c.learn_msgs() if i % 1 == 0: fig = plt.figure() gs = fig.add_gridspec(3, 2) ax_x1 = fig.add_subplot(gs[0, 0]) ax_x2 = fig.add_subplot(gs[1, 0]) ax_u = fig.add_subplot(gs[2, 0]) ax_xy = fig.add_subplot(gs[:, 1]) ax_x1.set_title("Double Cartpole\nTrajectory Optimizaiton") ax_xy.set_title(f"Iteration {i:03d}") ax_x1.set_ylabel("$\\theta_0$") ax_x2.set_ylabel("$\\theta_1$") ax_xy.set_ylabel("$y$") ax_xy.set_xlabel("$x$") ax_u.set_ylabel("$u$") ax_u.set_xlabel("$n$") mu_xu, sig_xu = i2c.get_marginal_state_action_distribution() for d, ax in zip([1, 2, -1], [ax_x1, ax_x2, ax_u]): xp_u, xp_l = i2c.indexed_confidence_bound(mu_xu, sig_xu, d) ax.fill_between( time, xp_l, xp_u, where=xp_u >= xp_l, facecolor="c", alpha=opacity ) ax.plot(time, mu_xu[:, d], "c") ax.plot(time, np.zeros((experiment.N_DURATION,)), "k--") x_tip = mu_xu[:, 0] + L * np.sin(mu_xu[:, 1]) + L * np.sin(mu_xu[:, 2]) y_tip = L * np.cos(mu_xu[:, 1]) + L * np.cos(mu_xu[:, 2]) T = mu_xu.shape[0] for t in range(T): x0 = mu_xu[t, 0] x1 = x0 + L * np.sin(mu_xu[t, 1]) x2 = x1 + L * np.sin(mu_xu[t, 2]) y0 = 0 y1 = y0 + L * np.cos(mu_xu[t, 1]) y2 = y1 + L * np.cos(mu_xu[t, 2]) ax_xy.plot([x0, x1], [y0, y1], color="b", alpha=0.2 * t / T) ax_xy.plot([x1, x2], [y1, y2], color="b", alpha=0.2 * t / T) ax_xy.plot(x_tip, y_tip, color="b", alpha=0.5) ax_xy.plot(0, -2 * L, "kx", markersize=10) ax_xy.plot( np.linspace(-1.5, 1.5, 100), 2 * L * np.ones( 100, ), "k--", ) ax_x1.set_ylim(-np.pi, 3 * np.pi) ax_x2.set_ylim(-np.pi, 3 * np.pi) ax_u.set_ylim(-10, 10) ax_x1.set_xticks([]) ax_x2.set_xticks([]) ax_xy.set_xticks([]) ax_xy.set_yticks([]) ax_x1.yaxis.set_major_formatter(plt.FuncFormatter(pi_format)) ax_x2.yaxis.set_major_formatter(plt.FuncFormatter(pi_format)) for a in [ax_x1, ax_x2, ax_u]: a.set_xlim(0, experiment.N_DURATION) fig.canvas.draw() image = np.frombuffer(fig.canvas.tostring_rgb(), dtype="uint8") stream.append(image.reshape(fig.canvas.get_width_height()[::-1] + (3,))) # plt.close(fig) # kwargs_write = {'fps': 30.0, 'quantizer': 'nq'} for T in [1, 2, 3, 4, 5, 10]: name = gif_filename % T fps = len(stream) / T imageio.mimsave(name, stream, fps=fps) optimize(name)
def make_pendulum_cov_control_gif(): import experiments.pendulum_known_act_reg_quad as experiment from i2c.policy.linear import TimeIndexedLinearGaussianPolicy from i2c.utils import covariance_2d model = make_env_model(experiment.ENVIRONMENT, experiment.MODEL) env = make_env(experiment) i2c = I2cGraph( sys=model, horizon=experiment.N_DURATION, Q=experiment.INFERENCE.Q, R=experiment.INFERENCE.R, Qf=experiment.INFERENCE.Qf, alpha=experiment.INFERENCE.alpha, alpha_update_tol=experiment.INFERENCE.alpha_update_tol, mu_u=experiment.INFERENCE.mu_u, # sig_u=experiment.INFERENCE.sig_u, sig_u=1.0 * np.eye(1), mu_x_terminal=experiment.INFERENCE.mu_x_term, sig_x_terminal=experiment.INFERENCE.sig_x_term, inference=experiment.INFERENCE.inference, res_dir=None, ) for c in i2c.cells: c.use_expert_controller = False policy = TimeIndexedLinearGaussianPolicy( experiment.POLICY_COVAR, experiment.N_DURATION, i2c.sys.dim_u, i2c.sys.dim_x ) i2c._propagate = False experiment.N_INFERENCE = 200 iters = range(experiment.N_INFERENCE) gif_filename = os.path.join(DIR_NAME, "..", "assets", "p_cc_%ds.gif") stream = [] for iter in tqdm(iters): i2c.learn_msgs() policy.write(*i2c.get_local_linear_policy()) xs, _, _, _ = env.batch_eval(policy=policy, n_eval=500, deterministic=False) fig, ax = plt.subplots(1, 1) a = ax a.set_title(f"Pendulum Covariance Control\nIteration {iter:03d}") for i, x in enumerate(xs): a.plot(x[:, 0], x[:, 1], ".c", alpha=0.1, markersize=1) a.plot( x[-1, 0], x[-1, 1], ".c", alpha=1.0, label="rollouts" if i == 0 else None, markersize=1, ) covariance_2d(i2c.sys.sig_x0, i2c.sys.x0, a, facecolor="k") a.plot( i2c.sys.x0[0], i2c.sys.x0[1], "xk", label="$\\mathbf{x}_0$", markersize=3 ) covariance_2d(i2c.sig_x_terminal, i2c.mu_x_terminal, a, facecolor="r") a.plot( i2c.mu_x_terminal[0], i2c.mu_x_terminal[1], "xr", label="$\\mathbf{x}_g$", markersize=3, ) a.set_xlabel(i2c.sys.key[0]) a.set_ylabel(i2c.sys.key[1]) a.set_xlim(-np.pi / 4, 3 * np.pi / 2) a.set_ylim(-5, 5) a.legend(loc="lower left") fig.canvas.draw() image = np.frombuffer(fig.canvas.tostring_rgb(), dtype="uint8") stream.append(image.reshape(fig.canvas.get_width_height()[::-1] + (3,))) plt.close(fig) for T in [1, 2, 3, 4, 5, 10]: name = gif_filename % T fps = len(stream) / T imageio.mimsave(name, stream, fps=fps) optimize(name)
def single_experiment(use_i2c, feedforward, low_noise, seed, name): np.random.seed(seed) res_dir = join(dirname(realpath(__file__)), "_results") if not os.path.exists(res_dir): os.makedirs(res_dir) if exists(join(res_dir, f"{name}.npy")): # have result print(f"{name} already done") return env = Quadrotor() model = QuadrotorKnown() sig_zeta = (np.diag([1e-6] * 8) if low_noise else np.diag([1e-6] * 2 + [5e-5] * 2 + [1] * 4)) env.env.sig_zeta = sig_zeta model.sig_zeta = sig_zeta T = 100 T_plan = 10 mpc_iter = 2 # trajectory to follow -> sine with 360 pin z_traj = np.zeros((T, model.dim_z)) z_traj[:, 0] = np.linspace(W / 4, 3 * W / 4, T) z_traj[:, 1] = H / 2 + (H / 4) * np.sin(np.linspace(0, 2 * np.pi, T)) z_traj[:, 2] = 2 * np.pi * np.heaviside(np.linspace(-1, 1, T), 1) # tracking controller Q = np.diag([1e3, 1e3, 1e3, 1, 1, 1]) R = np.diag([1e-3, 1e-3]) QR = la.block_diag(Q, R) / 1e3 Qf = Q / 1e3 u_init = 0.5 * model.gravity * np.ones((T_plan, model.dim_u)) if use_i2c: sig_u = 1e-2 * np.eye(model.dim_u) _i2c = I2cGraph( sys=model, horizon=T_plan, Q=Q, R=R, Qf=Qf, alpha=1.0, alpha_update_tol=1.0, mu_u=u_init, sig_u=sig_u, mu_x_terminal=None, sig_x_terminal=None, inference=CubatureQuadrature(1, 0, 0), res_dir=res_dir, ) _i2c._propagate = True # used for alpha calibration policy = PartiallyObservedMpcPolicy(_i2c, mpc_iter, sig_u, np.copy(z_traj)) else: def cost(x, u, a): tau = np.hstack((x, u)) a = a[:, 0] return (tau - a).T @ QR @ (tau - a) _ilqr = IterativeLqr( env=model, cost=cost, horizon=T_plan, u_lim=np.array([[0.0, 0.0], [30.0, 30.0]]), ) # init with gravity comp. _ilqr.uref = u_init.T # nd.Jacobian only takes one argument in order to work!! def dyn(tau): return model.step(tau[:6], tau[6:]) _ilqr.dyn = AnalyticalLinearDynamics(dyn, _ilqr.dm_state, _ilqr.dm_act, _ilqr.nb_steps) policy = IlqrMpc(_ilqr, mpc_iter, np.copy(z_traj)) policy.set_control(feedforward=feedforward) x, y = env.reset() warm_start_iter = 25 # 100 if use_i2c: policy.i2c.calibrate_alpha() print(f"calibrated alpha: {policy.i2c.alpha:.2f}") policy.optimize(warm_start_iter, model.x0, model.sig_x0) policy.i2c.calibrate_alpha() print(f"recalibrated alpha: {policy.i2c.alpha:.2f}") else: print("ilqr warm start start") print("ilqr warm start done") policy.ilqr.dir_name = res_dir policy.ilqr.plot_trajectory("ilqr_warm_start") u = np.zeros((model.dim_u, 1)) states = np.zeros((T, model.dim_s)) obs = np.zeros((T, model.dim_y)) stream = [] for t in range(T): u = policy(t, y, u) u = model.clip_u(u.T).T states[t, :6] = x[:, 0] states[t, 6:] = u[:, 0] obs[t, :] = y[:, 0] x, y = env.step(np.asarray(u.flatten(), dtype=np.float)) if RENDER: still_open, img = env.render( i2c=policy.i2c if use_i2c else None, ilqr=policy.ilqr if not use_i2c else None, z_traj=z_traj, ) stream.append(img) err = states - z_traj cost = np.einsum("bi,ij,bi->", err, QR, err) print(cost), f"{name}"), cost), f"state_{name}"), states), f"obs_{name}"), obs) if RENDER: gif_name = join(res_dir, f"{name}_render.gif") imageio.mimsave(gif_name, stream, fps=FS) optimize(gif_name) mus = np.asarray(policy.mus).reshape((T, model.dim_x)) covars = np.asarray(policy.covars).reshape((T, model.dim_x, model.dim_x)) f, ax = plt.subplots(model.dim_x, 2) for i, a in enumerate(ax[:, 0]): a.plot(states[:, i], "b-") a.plot(mus[:, i], "c--") for i, a in enumerate(ax[:, 1]): a.plot(np.sqrt(covars[:, i, i]), "c--") plt.savefig(join(res_dir, f"{name}_state_estimation.png"), bbox_inches="tight", format="png") plt.close(f) f, ax = plt.subplots(1, 3) a = ax[0] a.plot(z_traj[:, 0], z_traj[:, 1], "m") a.plot(states[:, 0], states[:, 1], "b-") a.plot(mus[:, 0], mus[:, 1], "c--") for t in range(obs.shape[0]): a.plot(obs[t, [0, 2]], obs[t, [1, 3]], "y") a.set_ylim(0, H) a.set_xlim(0, W) a.set_ylabel("$y$") a.set_xlabel("$x$") a = ax[1] a.plot(z_traj[:, 2], "m") a.plot(states[:, 2], "b-") a.plot(mus[:, 2], "c--") a.set_xlabel("Timesteps") a.set_ylabel("$\psi$") a = ax[2] a.plot(states[:, 6], "c--", label="$u_1$") a.plot(states[:, 7], "b--", label="$u_2$") a.set_xlabel("Timesteps") a.set_ylabel("$u$") plt.savefig(join(res_dir, f"{name}_mpc_summary.png"), bbox_inches="tight", format="png") plt.close(f)
def run(experiment, res_dir, weight_path): env = make_env(experiment) model = make_env_model(experiment.ENVIRONMENT, experiment.MODEL) i2c = I2cGraph( model, experiment.N_DURATION, experiment.INFERENCE.Q, experiment.INFERENCE.R, experiment.INFERENCE.Qf, experiment.INFERENCE.alpha, experiment.INFERENCE.alpha_update_tol, experiment.INFERENCE.mu_u, experiment.INFERENCE.sig_u, experiment.INFERENCE.mu_x_term, experiment.INFERENCE.sig_x_term, experiment.INFERENCE.inference, res_dir=res_dir, ) policy_class = ExpertTimeIndexedLinearGaussianPolicy policy_linear = TimeIndexedLinearGaussianPolicy(experiment.POLICY_COVAR, experiment.N_DURATION, i2c.sys.dim_u, i2c.sys.dim_x) policy = policy_class( experiment.POLICY_COVAR, experiment.N_DURATION, i2c.sys.dim_u, i2c.sys.dim_x, soft=False, ) if weight_path is not None: print("Loading i2c model with {}".format(weight_path)) i2c.sys.model.load(weight_path) # initial marginal traj s_est = np.zeros((experiment.N_DURATION, model.dim_s)) dim_terminal = i2c.Qf.shape[0] traj_eval = StochasticTrajectoryEvaluator(i2c.QR, i2c.Qf, i2c.z, i2c.z_term, dim_terminal) traj_eval_iter = StochasticTrajectoryEvaluator(i2c.QR, i2c.Qf, i2c.z, i2c.z_term, dim_terminal) traj_eval_safe_iter = StochasticTrajectoryEvaluator( i2c.QR, i2c.Qf, i2c.z, i2c.z_term, dim_terminal) i2c.reset_metrics() if env.simulated: xs, ys, zs, z_term = env.batch_eval(policy, N_EVAL) env.plot_sim(xs, s_est, "initial", res_dir) traj_eval.eval(zs, z_term, zs[0], z_term[0]) # inference try: for i in tqdm(range(experiment.N_INFERENCE)): plot = (i % experiment.N_ITERS_PER_PLOT == 0) or (i == experiment.N_INFERENCE - 1) i2c.learn_msgs() if env.simulated: # eval policy policy_linear.write(*i2c.get_local_linear_policy()) xs, ys, zs, zs_term = env.batch_eval(policy_linear, N_EVAL) z_est, z_term_est = i2c.get_marginal_observed_trajectory() traj_eval_iter.eval(zs, zs_term, z_est, z_term_est) policy.write(*i2c.get_local_expert_linear_policy()) xs, ys, zs, zs_term = env.batch_eval(policy, N_EVAL) traj_eval_safe_iter.eval(zs, zs_term, z_est, z_term_est) f"{i:02d} Cost | Plan: {i2c.costs_m[-1]}, " f"Predict: {i2c.costs_pf[-1]}, " f"Sim: [{traj_eval_iter.actual_cost_10[-1]}, " f"{traj_eval_iter.actual_cost_90[-1]}] " f"alpha: {i2c.alphas[-1], i2c.alphas_desired[-1]}") if i == 0: # see how well inference works at the start xs, ys, zs, zs_term = env.batch_eval(policy, N_EVAL, deterministic=False) env.plot_sim(xs, s_est, f"{i}_stochastic", res_dir) if plot: i2c.plot_metrics(0, i, res_dir, "msg") s_est = i2c.get_marginal_trajectory() env.plot_sim(xs, s_est, f"{i}_stochastic", res_dir) i2c.plot_metrics(0, i, res_dir, "msg") except Exception as ex: logging.exception("Inference failed") i2c.plot_metrics(0, i, res_dir, "esc") raise # update policy if env.simulated: # policy.write(*i2c.get_local_linear_policy()) policy_linear.write(*i2c.get_local_linear_policy()) z_est, z_term_est = i2c.get_marginal_observed_trajectory() xs, ys, zs, zs_term = env.batch_eval(policy_linear, N_EVAL) s_est = i2c.get_marginal_trajectory() env.plot_sim(xs, s_est, f"evaluation stochastic", res_dir) xs, ys, zs, zs_term = env.batch_eval(policy_linear, N_EVAL) env.plot_sim(xs, s_est, f"evaluation deterministic", res_dir) z_est, z_term_est = i2c.get_marginal_observed_trajectory() traj_eval_iter.eval(zs, zs_term, z_est, z_term_est) traj_eval.eval(zs, zs_term, z_est, z_term_est) traj_eval_iter.plot("over_iterations", res_dir) traj_eval.plot("over_episodes", res_dir) i2c.plot_alphas(res_dir, "final") i2c.plot_cost(res_dir, "cost_final") policy_linear.write(*i2c.get_local_linear_policy()) x_final, y_final, _, _ = s_est = i2c.get_marginal_trajectory() env.plot_sim(x_final, s_est, "Final", res_dir) # generate gif for mujoco envs env.run_render(policy_linear, res_dir) policy_linear.k = i2c.get_marginal_input().reshape(policy_linear.k.shape) x_ff, _, _, _ = env.plot_sim(x_ff, s_est, "Final Feedforward", res_dir) # save model and data save_trajectories(x_final, y_final, i2c, res_dir)"episodic", res_dir)"iter", res_dir), f"{i}") i2c.close() env.close()