def main():
    res_dir = make_results_folder("i2c_lqr_equivalence", 0, "", release=True)

    env = make_env(experiment)
    model = make_env_model(experiment.ENVIRONMENT, experiment.MODEL)

    experiment.N_INFERENCE = 1

    # redefine linear system
    model.xag = 10 * np.ones((env.dim_x, 1))
    model.zg_term = 10 * np.ones((env.dim_x, 1))
    model.a = model.xag - model.A @ model.xag
    env.a = model.a
    env.sig_eta = 0.0 * np.eye(env.dim_x)
    ug = np.zeros((env.dim_u, ))

    x_lqr, u_lqr, K_lqr, k_lqr, cost_lqr, P, p = finite_horizon_lqr(
        experiment.N_DURATION,
        model.A,
        model.a[:, 0],
        model.B,
        experiment.INFERENCE.Q,
        experiment.INFERENCE.R,
        model.x0[:, 0],
        model.xag[:, 0],
        ug,
        model.dim_x,
        model.dim_u,
    )
    from i2c.exp_types import CubatureQuadrature

    i2c = I2cGraph(
        sys=model,
        horizon=experiment.N_DURATION,
        Q=experiment.INFERENCE.Q,
        R=experiment.INFERENCE.R,
        Qf=experiment.INFERENCE.Qf,
        alpha=1e-5,  # 1e-6,
        alpha_update_tol=experiment.INFERENCE.alpha_update_tol,
        mu_u=np.zeros((experiment.N_DURATION, 1)),
        sig_u=1e2 * np.eye(1),
        mu_x_terminal=None,
        sig_x_terminal=None,
        inference=experiment.INFERENCE.inference,
        res_dir=None,
    )
    i2c.use_expert_controller = False
    for c in i2c.cells:
        c.state_action_independence = True

    # EM iteration
    i2c._forward_backward_msgs()
    i2c.plot_traj(0, dir_name=res_dir, filename="lqr")

    # compute riccati terms
    i2c._backward_ricatti_msgs()

    plot_trajectory(i2c, x_lqr, u_lqr, dir_name=res_dir)
    plot_controller(i2c, u_lqr, K_lqr, k_lqr, dir_name=res_dir)
    plot_value_function(i2c, P, p, dir_name=res_dir)
Beispiel #2
0
def main():
    configure_plots()
    res_dir = make_results_folder(
        "i2c_nonlinear_covariance_control", 0, "", release=True
    )

    env = make_env(experiment)
    model = make_env_model(experiment.ENVIRONMENT, experiment.MODEL)

    i2c = I2cGraph(
        sys=model,
        horizon=experiment.N_DURATION,
        Q=experiment.INFERENCE.Q,
        R=experiment.INFERENCE.R,
        Qf=experiment.INFERENCE.Qf,
        alpha=experiment.INFERENCE.alpha,
        alpha_update_tol=experiment.INFERENCE.alpha_update_tol,
        mu_u=experiment.INFERENCE.mu_u,
        sig_u=experiment.INFERENCE.sig_u,
        mu_x_terminal=experiment.INFERENCE.mu_x_term,
        sig_x_terminal=experiment.INFERENCE.sig_x_term,
        inference=experiment.INFERENCE.inference,
        res_dir=res_dir,
    )
    for c in i2c.cells:
        c.use_expert_controller = False
    i2c._propagate = True

    policy = TimeIndexedLinearGaussianPolicy(
        experiment.POLICY_COVAR, experiment.N_DURATION, i2c.sys.dim_u, i2c.sys.dim_x
    )

    i2c.propagate()
    for i in tqdm(range(experiment.N_INFERENCE)):
        i2c.learn_msgs()

    i2c.plot_metrics(0, 0, dir_name=res_dir, filename="nonlinear_cc")

    policy.write(*i2c.get_local_linear_policy())

    xs, _, _, _ = env.batch_eval(policy=policy, n_eval=50, deterministic=True)
    env.plot_sim(xs, None, "final", res_dir)

    plot_covariance_control(i2c, xs, filename="nonlinear_cc", dir_name=res_dir)
def make_dcp_trajopt_gif():
    from experiments import double_cartpole_known_cq as experiment

    gif_filename = os.path.join(DIR_NAME, "..", "assets", "dcp_%ds.gif")

    stream = []
    model = make_env_model(experiment.ENVIRONMENT, experiment.MODEL)
    i2c = I2cGraph(
        model,
        experiment.N_DURATION,
        experiment.INFERENCE.Q,
        experiment.INFERENCE.R,
        experiment.INFERENCE.Qf,
        experiment.INFERENCE.alpha,
        experiment.INFERENCE.alpha_update_tol,
        # experiment.INFERENCE.mu_u,
        1e-8 * np.random.randn(experiment.N_DURATION, 1),
        0.75 * np.eye(1),
        # experiment.INFERENCE.sig_u,
        experiment.INFERENCE.mu_x_term,
        experiment.INFERENCE.sig_x_term,
        experiment.INFERENCE.inference,
        res_dir=None,
    )
    time = range(experiment.N_DURATION)
    opacity = 0.3
    L = 0.3365
    for i in tqdm(range(experiment.N_INFERENCE)):

        i2c.learn_msgs()

        if i % 1 == 0:
            fig = plt.figure()

            gs = fig.add_gridspec(3, 2)
            ax_x1 = fig.add_subplot(gs[0, 0])
            ax_x2 = fig.add_subplot(gs[1, 0])
            ax_u = fig.add_subplot(gs[2, 0])
            ax_xy = fig.add_subplot(gs[:, 1])

            ax_x1.set_title("Double Cartpole\nTrajectory Optimizaiton")
            ax_xy.set_title(f"Iteration {i:03d}")
            ax_x1.set_ylabel("$\\theta_0$")
            ax_x2.set_ylabel("$\\theta_1$")
            ax_xy.set_ylabel("$y$")
            ax_xy.set_xlabel("$x$")
            ax_u.set_ylabel("$u$")
            ax_u.set_xlabel("$n$")

            mu_xu, sig_xu = i2c.get_marginal_state_action_distribution()
            for d, ax in zip([1, 2, -1], [ax_x1, ax_x2, ax_u]):
                xp_u, xp_l = i2c.indexed_confidence_bound(mu_xu, sig_xu, d)
                ax.fill_between(
                    time, xp_l, xp_u, where=xp_u >= xp_l, facecolor="c", alpha=opacity
                )
                ax.plot(time, mu_xu[:, d], "c")
                ax.plot(time, np.zeros((experiment.N_DURATION,)), "k--")

            x_tip = mu_xu[:, 0] + L * np.sin(mu_xu[:, 1]) + L * np.sin(mu_xu[:, 2])
            y_tip = L * np.cos(mu_xu[:, 1]) + L * np.cos(mu_xu[:, 2])
            T = mu_xu.shape[0]
            for t in range(T):
                x0 = mu_xu[t, 0]
                x1 = x0 + L * np.sin(mu_xu[t, 1])
                x2 = x1 + L * np.sin(mu_xu[t, 2])
                y0 = 0
                y1 = y0 + L * np.cos(mu_xu[t, 1])
                y2 = y1 + L * np.cos(mu_xu[t, 2])
                ax_xy.plot([x0, x1], [y0, y1], color="b", alpha=0.2 * t / T)
                ax_xy.plot([x1, x2], [y1, y2], color="b", alpha=0.2 * t / T)
            ax_xy.plot(x_tip, y_tip, color="b", alpha=0.5)
            ax_xy.plot(0, -2 * L, "kx", markersize=10)
            ax_xy.plot(
                np.linspace(-1.5, 1.5, 100),
                2
                * L
                * np.ones(
                    100,
                ),
                "k--",
            )

            ax_x1.set_ylim(-np.pi, 3 * np.pi)
            ax_x2.set_ylim(-np.pi, 3 * np.pi)
            ax_u.set_ylim(-10, 10)
            ax_x1.set_xticks([])
            ax_x2.set_xticks([])
            ax_xy.set_xticks([])
            ax_xy.set_yticks([])

            ax_x1.yaxis.set_major_formatter(plt.FuncFormatter(pi_format))
            ax_x2.yaxis.set_major_formatter(plt.FuncFormatter(pi_format))
            for a in [ax_x1, ax_x2, ax_u]:
                a.set_xlim(0, experiment.N_DURATION)
            fig.canvas.draw()
            image = np.frombuffer(fig.canvas.tostring_rgb(), dtype="uint8")
            stream.append(image.reshape(fig.canvas.get_width_height()[::-1] + (3,)))
            # plt.show()
            plt.close(fig)

        # kwargs_write = {'fps': 30.0, 'quantizer': 'nq'}
        for T in [1, 2, 3, 4, 5, 10]:
            name = gif_filename % T
            fps = len(stream) / T
            imageio.mimsave(name, stream, fps=fps)
            optimize(name)
def make_pendulum_cov_control_gif():
    import experiments.pendulum_known_act_reg_quad as experiment
    from i2c.policy.linear import TimeIndexedLinearGaussianPolicy
    from i2c.utils import covariance_2d

    model = make_env_model(experiment.ENVIRONMENT, experiment.MODEL)
    env = make_env(experiment)

    i2c = I2cGraph(
        sys=model,
        horizon=experiment.N_DURATION,
        Q=experiment.INFERENCE.Q,
        R=experiment.INFERENCE.R,
        Qf=experiment.INFERENCE.Qf,
        alpha=experiment.INFERENCE.alpha,
        alpha_update_tol=experiment.INFERENCE.alpha_update_tol,
        mu_u=experiment.INFERENCE.mu_u,
        # sig_u=experiment.INFERENCE.sig_u,
        sig_u=1.0 * np.eye(1),
        mu_x_terminal=experiment.INFERENCE.mu_x_term,
        sig_x_terminal=experiment.INFERENCE.sig_x_term,
        inference=experiment.INFERENCE.inference,
        res_dir=None,
    )
    for c in i2c.cells:
        c.use_expert_controller = False

    policy = TimeIndexedLinearGaussianPolicy(
        experiment.POLICY_COVAR, experiment.N_DURATION, i2c.sys.dim_u, i2c.sys.dim_x
    )

    i2c._propagate = False
    experiment.N_INFERENCE = 200
    iters = range(experiment.N_INFERENCE)
    gif_filename = os.path.join(DIR_NAME, "..", "assets", "p_cc_%ds.gif")
    stream = []
    for iter in tqdm(iters):
        i2c.learn_msgs()
        policy.write(*i2c.get_local_linear_policy())
        xs, _, _, _ = env.batch_eval(policy=policy, n_eval=500, deterministic=False)
        fig, ax = plt.subplots(1, 1)
        a = ax
        a.set_title(f"Pendulum Covariance Control\nIteration {iter:03d}")

        for i, x in enumerate(xs):
            a.plot(x[:, 0], x[:, 1], ".c", alpha=0.1, markersize=1)
            a.plot(
                x[-1, 0],
                x[-1, 1],
                ".c",
                alpha=1.0,
                label="rollouts" if i == 0 else None,
                markersize=1,
            )

        covariance_2d(i2c.sys.sig_x0, i2c.sys.x0, a, facecolor="k")
        a.plot(
            i2c.sys.x0[0], i2c.sys.x0[1], "xk", label="$\\mathbf{x}_0$", markersize=3
        )
        covariance_2d(i2c.sig_x_terminal, i2c.mu_x_terminal, a, facecolor="r")
        a.plot(
            i2c.mu_x_terminal[0],
            i2c.mu_x_terminal[1],
            "xr",
            label="$\\mathbf{x}_g$",
            markersize=3,
        )

        a.set_xlabel(i2c.sys.key[0])
        a.set_ylabel(i2c.sys.key[1])
        a.set_xlim(-np.pi / 4, 3 * np.pi / 2)
        a.set_ylim(-5, 5)
        a.legend(loc="lower left")
        fig.canvas.draw()
        image = np.frombuffer(fig.canvas.tostring_rgb(), dtype="uint8")
        stream.append(image.reshape(fig.canvas.get_width_height()[::-1] + (3,)))

        plt.close(fig)
    for T in [1, 2, 3, 4, 5, 10]:
        name = gif_filename % T
        fps = len(stream) / T
        imageio.mimsave(name, stream, fps=fps)
        optimize(name)
Beispiel #5
0
def single_experiment(use_i2c, feedforward, low_noise, seed, name):
    np.random.seed(seed)

    res_dir = join(dirname(realpath(__file__)), "_results")

    if not os.path.exists(res_dir):
        os.makedirs(res_dir)

    if exists(join(res_dir, f"{name}.npy")):  # have result
        print(f"{name} already done")
        return

    env = Quadrotor()
    model = QuadrotorKnown()
    sig_zeta = (np.diag([1e-6] *
                        8) if low_noise else np.diag([1e-6] * 2 + [5e-5] * 2 +
                                                     [1] * 4))
    env.env.sig_zeta = sig_zeta
    model.sig_zeta = sig_zeta

    T = 100
    T_plan = 10
    mpc_iter = 2

    # trajectory to follow -> sine with 360 pin
    z_traj = np.zeros((T, model.dim_z))
    z_traj[:, 0] = np.linspace(W / 4, 3 * W / 4, T)
    z_traj[:, 1] = H / 2 + (H / 4) * np.sin(np.linspace(0, 2 * np.pi, T))
    z_traj[:, 2] = 2 * np.pi * np.heaviside(np.linspace(-1, 1, T), 1)

    # tracking controller
    Q = np.diag([1e3, 1e3, 1e3, 1, 1, 1])
    R = np.diag([1e-3, 1e-3])
    QR = la.block_diag(Q, R) / 1e3
    Qf = Q / 1e3

    u_init = 0.5 * model.gravity * np.ones((T_plan, model.dim_u))
    if use_i2c:
        sig_u = 1e-2 * np.eye(model.dim_u)
        _i2c = I2cGraph(
            sys=model,
            horizon=T_plan,
            Q=Q,
            R=R,
            Qf=Qf,
            alpha=1.0,
            alpha_update_tol=1.0,
            mu_u=u_init,
            sig_u=sig_u,
            mu_x_terminal=None,
            sig_x_terminal=None,
            inference=CubatureQuadrature(1, 0, 0),
            res_dir=res_dir,
        )
        _i2c._propagate = True  # used for alpha calibration

        policy = PartiallyObservedMpcPolicy(_i2c, mpc_iter, sig_u,
                                            np.copy(z_traj))
    else:

        def cost(x, u, a):
            tau = np.hstack((x, u))
            a = a[:, 0]
            return (tau - a).T @ QR @ (tau - a)

        _ilqr = IterativeLqr(
            env=model,
            cost=cost,
            horizon=T_plan,
            u_lim=np.array([[0.0, 0.0], [30.0, 30.0]]),
        )
        # init with gravity comp.
        _ilqr.uref = u_init.T

        # nd.Jacobian only takes one argument in order to  work!!
        def dyn(tau):
            return model.step(tau[:6], tau[6:])

        _ilqr.dyn = AnalyticalLinearDynamics(dyn, _ilqr.dm_state, _ilqr.dm_act,
                                             _ilqr.nb_steps)

        policy = IlqrMpc(_ilqr, mpc_iter, np.copy(z_traj))

    policy.set_control(feedforward=feedforward)
    x, y = env.reset()

    warm_start_iter = 25  # 100
    if use_i2c:
        policy.i2c.calibrate_alpha()
        print(f"calibrated alpha: {policy.i2c.alpha:.2f}")
        policy.optimize(warm_start_iter, model.x0, model.sig_x0)
        policy.i2c.calibrate_alpha()
        print(f"recalibrated alpha: {policy.i2c.alpha:.2f}")
    else:
        print("ilqr warm start start")
        policy.ilqr.run(warm_start_iter)
        print("ilqr warm start done")
        policy.ilqr.dir_name = res_dir
        policy.ilqr.plot_trajectory("ilqr_warm_start")

    u = np.zeros((model.dim_u, 1))

    states = np.zeros((T, model.dim_s))
    obs = np.zeros((T, model.dim_y))
    stream = []
    for t in range(T):
        u = policy(t, y, u)
        u = model.clip_u(u.T).T
        states[t, :6] = x[:, 0]
        states[t, 6:] = u[:, 0]
        obs[t, :] = y[:, 0]
        x, y = env.step(np.asarray(u.flatten(), dtype=np.float))

        if RENDER:
            still_open, img = env.render(
                i2c=policy.i2c if use_i2c else None,
                ilqr=policy.ilqr if not use_i2c else None,
                z_traj=z_traj,
            )
            stream.append(img)

    err = states - z_traj
    cost = np.einsum("bi,ij,bi->", err, QR, err)
    print(cost)
    np.save(join(res_dir, f"{name}"), cost)
    np.save(join(res_dir, f"state_{name}"), states)
    np.save(join(res_dir, f"obs_{name}"), obs)

    if RENDER:
        gif_name = join(res_dir, f"{name}_render.gif")
        imageio.mimsave(gif_name, stream, fps=FS)
        optimize(gif_name)

    mus = np.asarray(policy.mus).reshape((T, model.dim_x))
    covars = np.asarray(policy.covars).reshape((T, model.dim_x, model.dim_x))

    f, ax = plt.subplots(model.dim_x, 2)
    for i, a in enumerate(ax[:, 0]):
        a.plot(states[:, i], "b-")
        a.plot(mus[:, i], "c--")

    for i, a in enumerate(ax[:, 1]):
        a.plot(np.sqrt(covars[:, i, i]), "c--")
    plt.savefig(join(res_dir, f"{name}_state_estimation.png"),
                bbox_inches="tight",
                format="png")
    plt.close(f)

    f, ax = plt.subplots(1, 3)
    a = ax[0]
    a.plot(z_traj[:, 0], z_traj[:, 1], "m")
    a.plot(states[:, 0], states[:, 1], "b-")
    a.plot(mus[:, 0], mus[:, 1], "c--")
    for t in range(obs.shape[0]):
        a.plot(obs[t, [0, 2]], obs[t, [1, 3]], "y")
    a.set_ylim(0, H)
    a.set_xlim(0, W)
    a.set_ylabel("$y$")
    a.set_xlabel("$x$")

    a = ax[1]
    a.plot(z_traj[:, 2], "m")
    a.plot(states[:, 2], "b-")
    a.plot(mus[:, 2], "c--")
    a.set_xlabel("Timesteps")
    a.set_ylabel("$\psi$")

    a = ax[2]
    a.plot(states[:, 6], "c--", label="$u_1$")
    a.plot(states[:, 7], "b--", label="$u_2$")
    a.set_xlabel("Timesteps")
    a.set_ylabel("$u$")

    plt.savefig(join(res_dir, f"{name}_mpc_summary.png"),
                bbox_inches="tight",
                format="png")
    plt.close(f)
def run(experiment, res_dir, weight_path):
    env = make_env(experiment)
    model = make_env_model(experiment.ENVIRONMENT, experiment.MODEL)

    i2c = I2cGraph(
        model,
        experiment.N_DURATION,
        experiment.INFERENCE.Q,
        experiment.INFERENCE.R,
        experiment.INFERENCE.Qf,
        experiment.INFERENCE.alpha,
        experiment.INFERENCE.alpha_update_tol,
        experiment.INFERENCE.mu_u,
        experiment.INFERENCE.sig_u,
        experiment.INFERENCE.mu_x_term,
        experiment.INFERENCE.sig_x_term,
        experiment.INFERENCE.inference,
        res_dir=res_dir,
    )

    policy_class = ExpertTimeIndexedLinearGaussianPolicy
    policy_linear = TimeIndexedLinearGaussianPolicy(experiment.POLICY_COVAR,
                                                    experiment.N_DURATION,
                                                    i2c.sys.dim_u,
                                                    i2c.sys.dim_x)
    policy = policy_class(
        experiment.POLICY_COVAR,
        experiment.N_DURATION,
        i2c.sys.dim_u,
        i2c.sys.dim_x,
        soft=False,
    )

    if weight_path is not None:
        print("Loading i2c model with {}".format(weight_path))
        i2c.sys.model.load(weight_path)

    # initial marginal traj
    s_est = np.zeros((experiment.N_DURATION, model.dim_s))

    dim_terminal = i2c.Qf.shape[0]
    traj_eval = StochasticTrajectoryEvaluator(i2c.QR, i2c.Qf, i2c.z,
                                              i2c.z_term, dim_terminal)
    traj_eval_iter = StochasticTrajectoryEvaluator(i2c.QR, i2c.Qf, i2c.z,
                                                   i2c.z_term, dim_terminal)
    traj_eval_safe_iter = StochasticTrajectoryEvaluator(
        i2c.QR, i2c.Qf, i2c.z, i2c.z_term, dim_terminal)

    i2c.reset_metrics()

    if env.simulated:
        policy.zero()
        xs, ys, zs, z_term = env.batch_eval(policy, N_EVAL)
        env.plot_sim(xs, s_est, "initial", res_dir)
        traj_eval.eval(zs, z_term, zs[0], z_term[0])

    # inference
    try:
        for i in tqdm(range(experiment.N_INFERENCE)):
            plot = (i % experiment.N_ITERS_PER_PLOT
                    == 0) or (i == experiment.N_INFERENCE - 1)

            i2c.learn_msgs()

            if env.simulated:
                # eval policy
                policy_linear.write(*i2c.get_local_linear_policy())

                xs, ys, zs, zs_term = env.batch_eval(policy_linear, N_EVAL)
                z_est, z_term_est = i2c.get_marginal_observed_trajectory()
                traj_eval_iter.eval(zs, zs_term, z_est, z_term_est)

                policy.write(*i2c.get_local_expert_linear_policy())
                xs, ys, zs, zs_term = env.batch_eval(policy, N_EVAL)
                traj_eval_safe_iter.eval(zs, zs_term, z_est, z_term_est)

                logging.info(
                    f"{i:02d} Cost | Plan: {i2c.costs_m[-1]}, "
                    f"Predict: {i2c.costs_pf[-1]}, "
                    f"Sim: [{traj_eval_iter.actual_cost_10[-1]}, "
                    f"{traj_eval_iter.actual_cost_90[-1]}] "
                    f"alpha: {i2c.alphas[-1], i2c.alphas_desired[-1]}")

            if i == 0:  # see how well inference works at the start
                xs, ys, zs, zs_term = env.batch_eval(policy,
                                                     N_EVAL,
                                                     deterministic=False)
                env.plot_sim(xs, s_est, f"{i}_stochastic", res_dir)

            if plot:
                i2c.plot_metrics(0, i, res_dir, "msg")
                s_est = i2c.get_marginal_trajectory()
                env.plot_sim(xs, s_est, f"{i}_stochastic", res_dir)

        i2c.plot_metrics(0, i, res_dir, "msg")
    except Exception as ex:
        logging.exception("Inference failed")
        i2c.plot_metrics(0, i, res_dir, "esc")
        raise

    # update policy
    if env.simulated:
        # policy.write(*i2c.get_local_linear_policy())
        policy_linear.write(*i2c.get_local_linear_policy())
        z_est, z_term_est = i2c.get_marginal_observed_trajectory()
        xs, ys, zs, zs_term = env.batch_eval(policy_linear, N_EVAL)
        s_est = i2c.get_marginal_trajectory()
        env.plot_sim(xs, s_est, f"evaluation stochastic", res_dir)

        xs, ys, zs, zs_term = env.batch_eval(policy_linear, N_EVAL)
        env.plot_sim(xs, s_est, f"evaluation deterministic", res_dir)

        z_est, z_term_est = i2c.get_marginal_observed_trajectory()
        traj_eval_iter.eval(zs, zs_term, z_est, z_term_est)
        traj_eval.eval(zs, zs_term, z_est, z_term_est)
        traj_eval_iter.plot("over_iterations", res_dir)
        traj_eval.plot("over_episodes", res_dir)

    i2c.plot_alphas(res_dir, "final")
    i2c.plot_cost(res_dir, "cost_final")

    policy_linear.write(*i2c.get_local_linear_policy())
    x_final, y_final, _, _ = env.run(policy_linear)
    s_est = i2c.get_marginal_trajectory()
    env.plot_sim(x_final, s_est, "Final", res_dir)
    # generate gif for mujoco envs
    env.run_render(policy_linear, res_dir)

    policy_linear.zero()
    policy_linear.k = i2c.get_marginal_input().reshape(policy_linear.k.shape)
    x_ff, _, _, _ = env.run(policy_linear)
    env.plot_sim(x_ff, s_est, "Final Feedforward", res_dir)

    # save model and data
    save_trajectories(x_final, y_final, i2c, res_dir)
    traj_eval.save("episodic", res_dir)
    traj_eval_iter.save("iter", res_dir)
    i2c.save(res_dir, f"{i}")

    i2c.close()
    env.close()