Esempio n. 1
0
def rollout_dummy_rbf_policy_4dof():
    # Environment
    env = WAMBallInCupSim(
        num_dof=4,
        max_steps=3000,
        # Note, when tuning the task args: the `R` matrices are now 4x4 for the 4 dof WAM
        task_args=dict(R=np.zeros((4, 4)), R_dev=np.diag([0.2, 0.2, 1e-2, 1e-2])),
    )

    # Stabilize ball and print out the stable state
    env.reset()
    act = np.zeros(env.spec.act_space.flat_dim)
    for i in range(1500):
        env.step(act)
        env.render(mode=RenderMode(video=True))

    # Printing out actual positions for 4-dof (..just needed to setup the hard-coded values in the class)
    print("Ball pos:", env.sim.data.get_body_xpos("ball"))
    print("Cup goal:", env.sim.data.get_site_xpos("cup_goal"))
    print("Cup bottom:", env.sim.data.get_site_xpos("cup_bottom"))
    print("Joint pos (incl. first rope angle):", env.sim.data.qpos[:5])

    # Apply DualRBFLinearPolicy and plot the joint states over the desired ones
    rbf_hparam = dict(num_feat_per_dim=7, bounds=(np.array([0.0]), np.array([1.0])))
    policy = DualRBFLinearPolicy(env.spec, rbf_hparam, dim_mask=2)
    done, param = False, None
    while not done:
        ro = rollout(env, policy, render_mode=RenderMode(video=True), eval=True, reset_kwargs=dict(domain_param=param))
        print_cbt(f"Return: {ro.undiscounted_return()}", "g", bright=True)
        done, _, param = after_rollout_query(env, policy, ro)
Esempio n. 2
0
def eval_dryfriction():
    """Plot joint trajectories for different joint stiction parameters"""
    # Environment
    env = WAMBallInCupSim(num_dof=7, max_steps=1500)

    # Policy (random init)
    policy_hparam = dict(num_feat_per_dim=12, bounds=(np.array([0.0]), np.array([1.0])))
    policy = DualRBFLinearPolicy(env.spec, policy_hparam, dim_mask=2)

    # Do the rolllouts
    t_all = []
    qpos_all = []
    dp_vals = [0.0, 0.3, 0.6, 0.9, 1.2]
    print_cbt(f"Run policy for stiction coefficients: {dp_vals}")
    for dpv in dp_vals:
        env.reset(
            domain_param=dict(
                joint_1_dryfriction=dpv,
                joint_2_dryfriction=dpv,
                joint_3_dryfriction=dpv,
                joint_4_dryfriction=dpv,
                joint_5_dryfriction=dpv,
                joint_6_dryfriction=dpv,
                joint_7_dryfriction=dpv,
            )
        )
        ro = rollout(env, policy, render_mode=RenderMode(video=False), eval=True)
        t_all.append(ro.time[:-1])
        qpos_all.append(ro.env_infos["qpos"])

    # Plot
    fig, ax = plt.subplots(nrows=env.num_dof, sharex="all", figsize=(16, 7))
    for i, idx_joint in enumerate([dof for dof in range(env.num_dof)]):
        ax[i].set_prop_cycle(color=plt.get_cmap("cividis")(np.linspace(0, 1, env.num_dof)))
        ax[i].set_ylabel(f"joint {idx_joint+1} pos [rad]")
        for j in range(len(dp_vals)):
            ax[i].plot(t_all[j], qpos_all[j][:, idx_joint], ls="--", label=f"s = {dp_vals[j]}")
            if i == 0:
                ax[i].legend(ncol=len(dp_vals))
    ax[-1].set_xlabel("time [s]")
    plt.suptitle("Evaluation of joint stiction coefficients")
    plt.show()
Esempio n. 3
0
def rollout_dummy_rbf_policy_7dof():
    # Environment
    env = WAMBallInCupSim(num_dof=7, max_steps=1750, task_args=dict(sparse_rew_fcn=True))

    # Stabilize around initial position
    env.reset(domain_param=dict(cup_scale=1.0, rope_length=0.3103, ball_mass=0.021))
    act = np.zeros((6,))  # desired deltas from the initial pose
    for i in range(500):
        env.step(act)
        env.render(mode=RenderMode(video=True))

    # Apply DualRBFLinearPolicy
    policy_hparam = dict(num_feat_per_dim=7, bounds=(np.array([0.0]), np.array([1.0])))
    policy = DualRBFLinearPolicy(env.spec, policy_hparam, dim_mask=1)
    done, param = False, None
    while not done:
        ro = rollout(env, policy, render_mode=RenderMode(video=True), eval=True, reset_kwargs=dict(domain_param=param))
        print_cbt(f"Return: {ro.undiscounted_return()}", "g", bright=True)
        done, _, param = after_rollout_query(env, policy, ro)

    # Retrieve infos from rollout
    t = ro.time
    des_pos_traj = ro.env_infos["qpos_des"]
    pos_traj = ro.env_infos["qpos"]
    des_vel_traj = ro.env_infos["qvel_des"]
    vel_traj = ro.env_infos["qvel"]
    ball_pos = ro.env_infos["ball_pos"]
    cup_pos = ro.env_infos["cup_pos"]

    # Plot trajectories of the directly controlled joints and their corresponding desired trajectories
    fig, ax = plt.subplots(3, sharex="all")
    for i, idx in enumerate([1, 3, 5]):
        ax[i].plot(t, des_pos_traj[:, idx], label=f"qpos_des {idx}")
        ax[i].plot(t, pos_traj[:, idx], label=f"qpos {idx}")
        ax[i].legend()

    fig = plt.figure()
    ax = fig.add_subplot(111, projection="3d")
    ax.plot(xs=ball_pos[:, 0], ys=ball_pos[:, 1], zs=ball_pos[:, 2], color="blue", label="Ball")
    ax.scatter(xs=ball_pos[-1, 0], ys=ball_pos[-1, 1], zs=ball_pos[-1, 2], color="blue", label="Ball final")
    ax.plot(xs=cup_pos[:, 0], ys=cup_pos[:, 1], zs=cup_pos[:, 2], color="red", label="Cup")
    ax.scatter(xs=cup_pos[-1, 0], ys=cup_pos[-1, 1], zs=cup_pos[-1, 2], color="red", label="Cup final")
    ax.legend()
    ax.set_xlabel("x")
    ax.set_ylabel("y")
    ax.set_zlabel("z")
    ax.view_init(elev=16.0, azim=-7.0)
    plt.show()
    # Environment
    env_hparams = dict(
        num_dof=4,
        max_steps=1500,
        fixed_init_state=False,
        observe_ball=True,
        task_args=dict(
            final_factor=500,
            success_bonus=250,
            Q=np.diag([0.5, 1e-4, 4e1]),
            R=np.diag([0, 0, 1e-1, 2e-1]),
            Q_dev=np.diag([0.0, 0.0, 5]),
            # R_dev=np.diag([0., 0., 1e-3, 1e-3])
        ),
    )
    env = WAMBallInCupSim(**env_hparams)

    # Randomizer
    randomizer = DomainRandomizer(
        UniformDomainParam(name="cup_scale", mean=1.0, halfspan=0.2),
        NormalDomainParam(name="rope_length", mean=0.3, std=0.005),
        NormalDomainParam(name="ball_mass", mean=0.021, std=0.001),
        UniformDomainParam(name="joint_2_damping", mean=0.05, halfspan=0.05),
        UniformDomainParam(name="joint_2_dryfriction", mean=0.1, halfspan=0.1),
    )
    env = DomainRandWrapperLive(env, randomizer)

    # Policy
    bounds = ([0.0, 0.25, 0.5], [1.0, 1.5, 2.5])
    policy_hparam = dict(rbf_hparam=dict(num_feat_per_dim=9, bounds=bounds, scale=None), dim_mask=2)
    policy = DualRBFLinearPolicy(env.spec, **policy_hparam)
Esempio n. 5
0
 def default_wambic():
     return WAMBallInCupSim(num_dof=7, max_steps=1750)
Esempio n. 6
0
    ref_ex_dir = osp.join(pyrado.EXP_DIR, WAMBallInCupSim.name,
                          f"{BayRn.name}-{PoWER.name}", ref_ex_name)

    # Experiment (set seed before creating the modules)
    ex_dir = setup_experiment(
        WAMBallInCupSim.name,
        PoWER.name + "_" + DualRBFLinearPolicy.name,
        f"ref_{ref_ex_name}_argmax_seed-{args.seed}",
    )

    # Hyperparameters of reference experiment
    hparams = load_dict_from_yaml(osp.join(ref_ex_dir, "hyperparams.yaml"))

    # Environment
    env_hparams = hparams["env"]
    env = WAMBallInCupSim(**env_hparams)

    # Randomizer
    dp_nom = WAMBallInCupSim.get_nominal_domain_param()
    randomizer = DomainRandomizer(
        # UniformDomainParam(name='cup_scale', mean=0.95, halfspan=0.05),
        # UniformDomainParam(name='ball_mass', mean=2.1000e-02, halfspan=3.1500e-03, clip_lo=0),
        # UniformDomainParam(name='rope_length', mean=3.0000e-01, halfspan=1.5000e-02, clip_lo=0.27, clip_up=0.33),
        # UniformDomainParam(name='rope_damping', mean=1.0000e-04, halfspan=1.0000e-04, clip_lo=1e-2),
        # UniformDomainParam(name='joint_2_damping', mean=5.0000e-02, halfspan=5.0000e-02, clip_lo=1e-6),
        # UniformDomainParam(name='joint_2_dryfriction', mean=2.0000e-01, halfspan=2.0000e-01, clip_lo=0),
        #
        NormalDomainParam(name="rope_length",
                          mean=2.9941e-01,
                          std=1.0823e-02,
                          clip_lo=0.27,
Esempio n. 7
0
            "it been specified explicitly! Please provide the time step size using --dt."
        )

    if env_name == QBallBalancerSim.name:
        env = QBallBalancerSim(dt=dt)

    elif env_name == QCartPoleSwingUpSim.name:
        env = QCartPoleSwingUpSim(dt=dt)

    elif env_name == QQubeSwingUpSim.name:
        env = QQubeSwingUpSim(dt=dt)

    elif env_name == "wam-bic":  # avoid loading mujoco
        from pyrado.environments.mujoco.wam_bic import WAMBallInCupSim

        env = WAMBallInCupSim(num_dof=4)
        env.init_space = BoxSpace(-pyrado.inf,
                                  pyrado.inf,
                                  shape=env.init_space.shape)

    elif env_name == "wam-jsc":  # avoid loading mujoco
        from pyrado.environments.mujoco.wam_jsc import WAMJointSpaceCtrlSim

        env = WAMJointSpaceCtrlSim(num_dof=7)
        env.init_space = BoxSpace(-pyrado.inf,
                                  pyrado.inf,
                                  shape=env.init_space.shape)

    elif env_name == "mg-ik":  # avoid loading _rcsenv
        from pyrado.environments.rcspysim.mini_golf import MiniGolfIKSim
Esempio n. 8
0
def create_default_randomizer_wambic() -> DomainRandomizer:
    from pyrado.environments.mujoco.wam_bic import WAMBallInCupSim

    dp_nom = WAMBallInCupSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name="ball_mass",
                          mean=dp_nom["ball_mass"],
                          std=dp_nom["ball_mass"] / 10,
                          clip_lo=1e-2),
        # Ball needs to fit into the cup
        NormalDomainParam(name="cup_scale",
                          mean=dp_nom["cup_scale"],
                          std=dp_nom["cup_scale"] / 5,
                          clip_lo=0.65),
        # Rope won't be more than 3cm off
        NormalDomainParam(name="rope_length",
                          mean=dp_nom["rope_length"],
                          std=dp_nom["rope_length"] / 30,
                          clip_lo=0.27,
                          clip_up=0.33),
        UniformDomainParam(name="rope_damping",
                           mean=dp_nom["rope_damping"],
                           halfspan=dp_nom["rope_damping"] / 2,
                           clip_lo=1e-6),
        UniformDomainParam(name="joint_1_damping",
                           mean=dp_nom["joint_1_damping"],
                           halfspan=dp_nom["joint_1_damping"] / 2,
                           clip_lo=0.0),
        UniformDomainParam(name="joint_2_damping",
                           mean=dp_nom["joint_2_damping"],
                           halfspan=dp_nom["joint_2_damping"] / 2,
                           clip_lo=0.0),
        UniformDomainParam(name="joint_3_damping",
                           mean=dp_nom["joint_3_damping"],
                           halfspan=dp_nom["joint_3_damping"] / 2,
                           clip_lo=0.0),
        UniformDomainParam(name="joint_4_damping",
                           mean=dp_nom["joint_4_damping"],
                           halfspan=dp_nom["joint_4_damping"] / 2,
                           clip_lo=0.0),
        UniformDomainParam(name="joint_5_damping",
                           mean=dp_nom["joint_5_damping"],
                           halfspan=dp_nom["joint_5_damping"] / 2,
                           clip_lo=0.0),
        UniformDomainParam(name="joint_6_damping",
                           mean=dp_nom["joint_6_damping"],
                           halfspan=dp_nom["joint_6_damping"] / 2,
                           clip_lo=0.0),
        UniformDomainParam(name="joint_7_damping",
                           mean=dp_nom["joint_7_damping"],
                           halfspan=dp_nom["joint_7_damping"] / 2,
                           clip_lo=0.0),
        UniformDomainParam(
            name="joint_1_dryfriction",
            mean=dp_nom["joint_1_dryfriction"],
            halfspan=dp_nom["joint_1_dryfriction"] / 2,
            clip_lo=0.0,
        ),
        UniformDomainParam(
            name="joint_2_dryfriction",
            mean=dp_nom["joint_2_dryfriction"],
            halfspan=dp_nom["joint_2_dryfriction"] / 2,
            clip_lo=0.0,
        ),
        UniformDomainParam(
            name="joint_3_dryfriction",
            mean=dp_nom["joint_3_dryfriction"],
            halfspan=dp_nom["joint_3_dryfriction"] / 2,
            clip_lo=0.0,
        ),
        UniformDomainParam(
            name="joint_4_dryfriction",
            mean=dp_nom["joint_4_dryfriction"],
            halfspan=dp_nom["joint_4_dryfriction"] / 2,
            clip_lo=0.0,
        ),
        UniformDomainParam(
            name="joint_5_dryfriction",
            mean=dp_nom["joint_5_dryfriction"],
            halfspan=dp_nom["joint_5_dryfriction"] / 2,
            clip_lo=0.0,
        ),
        UniformDomainParam(
            name="joint_6_dryfriction",
            mean=dp_nom["joint_6_dryfriction"],
            halfspan=dp_nom["joint_6_dryfriction"] / 2,
            clip_lo=0.0,
        ),
        UniformDomainParam(
            name="joint_7_dryfriction",
            mean=dp_nom["joint_7_dryfriction"],
            halfspan=dp_nom["joint_7_dryfriction"] / 2,
            clip_lo=0.0,
        ),
    )