Exemple #1
0
def rollout_dummy_rbf_policy_4dof():
    # Environment
    env = WAMBallInCupSim(
        num_dof=4,
        max_steps=3000,
        # Note, when tuning the task args: the `R` matrices are now 4x4 for the 4 dof WAM
        task_args=dict(R=np.zeros((4, 4)),
                       R_dev=np.diag([0.2, 0.2, 1e-2, 1e-2])),
    )

    # Stabilize ball and print out the stable state
    env.reset()
    act = np.zeros(env.spec.act_space.flat_dim)
    for i in range(1500):
        env.step(act)
        env.render(mode=RenderMode(video=True))

    # Printing out actual positions for 4-dof (..just needed to setup the hard-coded values in the class)
    print('Ball pos:', env.sim.data.get_body_xpos('ball'))
    print('Cup goal:', env.sim.data.get_site_xpos('cup_goal'))
    print('Joint pos (incl. first rope angle):', env.sim.data.qpos[:5])

    # Apply DualRBFLinearPolicy and plot the joint states over the desired ones
    rbf_hparam = dict(num_feat_per_dim=7,
                      bounds=(np.array([0.]), np.array([1.])))
    policy = DualRBFLinearPolicy(env.spec, rbf_hparam, dim_mask=2)
    done, param = False, None
    while not done:
        ro = rollout(env,
                     policy,
                     render_mode=RenderMode(video=True),
                     eval=True,
                     reset_kwargs=dict(domain_param=param))
        print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True)
        done, _, param = after_rollout_query(env, policy, ro)
Exemple #2
0
def eval_damping():
    """ Plot joint trajectories for different joint damping parameters """
    # Load experiment and remove possible randomization wrappers
    ex_dir = ask_for_experiment()
    env, policy, _ = load_experiment(ex_dir)
    env = inner_env(env)
    env.domain_param = WAMBallInCupSim.get_nominal_domain_param()

    data = []
    t = []
    dampings = [0., 1e-2, 1e-1, 1e0]
    print_cbt(f'Run policy for damping coefficients: {dampings}')
    for d in dampings:
        env.reset(domain_param=dict(joint_damping=d))
        ro = rollout(env,
                     policy,
                     render_mode=RenderMode(video=False),
                     eval=True)
        t.append(ro.env_infos['t'])
        data.append(ro.env_infos['qpos'])

    fig, ax = plt.subplots(3, sharex='all')
    ls = ['k-', 'b--', 'g-.', 'r:']  # line style setting for better visibility
    for i, idx in enumerate([1, 3, 5]):
        for j in range(len(dampings)):
            ax[i].plot(t[j],
                       data[j][:, idx],
                       ls[j],
                       label=f'damping: {dampings[j]}')
            if i == 0:
                ax[i].legend()
        ax[i].set_ylabel(f'joint {idx} pos [rad]')
    ax[2].set_xlabel('time [s]')
    plt.suptitle('Evaluation of joint damping coefficient')
    plt.show()
Exemple #3
0
def get_default_randomizer_wambic() -> DomainRandomizer:
    from pyrado.environments.mujoco.wam import WAMBallInCupSim
    dp_nom = WAMBallInCupSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='cup_scale',
                          mean=dp_nom['cup_scale'],
                          std=dp_nom['cup_scale'] / 5,
                          clip_lo=0.6),  # ball needs to fit into the cup
        NormalDomainParam(name='rope_length',
                          mean=dp_nom['rope_length'],
                          std=dp_nom['rope_length'] / 10,
                          clip_lo=0.2),  # rope won't be less then 10cm shorter
    )
Exemple #4
0
def rollout_dummy_rbf_policy():
    # Environment
    env = WAMBallInCupSim(max_steps=1750, task_args=dict(sparse_rew_fcn=True))

    # Stabilize around initial position
    env.reset(domain_param=dict(cup_scale=1., rope_length=0.3103, ball_mass=0.021))
    act = np.zeros((6,))  # desired deltas from the initial pose
    for i in range(500):
        env.step(act)
        env.render(mode=RenderMode(video=True))

    # Apply DualRBFLinearPolicy
    rbf_hparam = dict(num_feat_per_dim=7, bounds=(np.array([0.]), np.array([1.])))
    policy = DualRBFLinearPolicy(env.spec, rbf_hparam, dim_mask=1)
    done, param = False, None
    while not done:
        ro = rollout(env, policy, render_mode=RenderMode(video=True), eval=True, reset_kwargs=dict(domain_param=param))
        print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True)
        done, _, param = after_rollout_query(env, policy, ro)

    # Retrieve infos from rollout
    t = ro.env_infos['t']
    des_pos_traj = ro.env_infos['qpos_des']  # (max_steps,7) ndarray
    pos_traj = ro.env_infos['qpos']
    des_vel_traj = ro.env_infos['qvel_des']  # (max_steps,7) ndarray
    vel_traj = ro.env_infos['qvel']
    ball_pos = ro.env_infos['ball_pos']
    state_des = ro.env_infos['state_des']

    # Plot trajectories of the directly controlled joints and their corresponding desired trajectories
    fig, ax = plt.subplots(3, sharex='all')
    for i, idx in enumerate([1, 3, 5]):
        ax[i].plot(t, des_pos_traj[:, idx], label=f'qpos_des {idx}')
        ax[i].plot(t, pos_traj[:, idx], label=f'qpos {idx}')
        ax[i].legend()

    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.plot(xs=ball_pos[:, 0], ys=ball_pos[:, 1], zs=ball_pos[:, 2], color='blue', label='Ball')
    ax.scatter(xs=ball_pos[-1, 0], ys=ball_pos[-1, 1], zs=ball_pos[-1, 2], color='blue', label='Ball final')
    ax.plot(xs=state_des[:, 0], ys=state_des[:, 1], zs=state_des[:, 2], color='red', label='Cup')
    ax.scatter(xs=state_des[-1, 0], ys=state_des[-1, 1], zs=state_des[-1, 2], color='red', label='Cup final')
    ax.legend()
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    ax.view_init(elev=16., azim=-7.)
    plt.show()
Exemple #5
0
def create_default_randomizer_wambic() -> DomainRandomizer:
    from pyrado.environments.mujoco.wam import WAMBallInCupSim
    dp_nom = WAMBallInCupSim.get_nominal_domain_param()
    return DomainRandomizer(
        # Ball needs to fit into the cup
        NormalDomainParam(name='cup_scale', mean=dp_nom['cup_scale'], std=dp_nom['cup_scale']/5, clip_lo=0.65),
        # Rope won't be more than 3cm off
        NormalDomainParam(name='rope_length', mean=dp_nom['rope_length'], std=dp_nom['rope_length']/30,
                          clip_lo=0.27, clip_up=0.33),
        NormalDomainParam(name='ball_mass', mean=dp_nom['ball_mass'], std=dp_nom['ball_mass']/10, clip_lo=1e-2),
        UniformDomainParam(name='joint_damping', mean=dp_nom['joint_damping'], halfspan=dp_nom['joint_damping']/2,
                           clip_lo=0.),
        UniformDomainParam(name='joint_stiction', mean=dp_nom['joint_stiction'], halfspan=dp_nom['joint_stiction']/2,
                           clip_lo=0.),
        UniformDomainParam(name='rope_damping', mean=dp_nom['rope_damping'], halfspan=dp_nom['rope_damping']/2,
                           clip_lo=1e-6),
    )
Exemple #6
0
def default_wambic():
    return WAMBallInCupSim(max_steps=1750)
    # Experiment (set seed before creating the modules)
    ex_dir = setup_experiment(WAMBallInCupSim.name, f'{BayRn.name}-{PoWER.name}_{DualRBFLinearPolicy.name}',
                              'rand-rl-rd-bm-js-jd')

    # Set seed if desired
    pyrado.set_seed(args.seed, verbose=True)

    # Environments
    env_sim_hparams = dict(
        num_dof=4,
        max_steps=1750,
        fixed_init_state=True,
        stop_on_collision=True,
        task_args=dict(final_factor=0.2)
    )
    env_sim = WAMBallInCupSim(**env_sim_hparams)
    env_sim = DomainRandWrapperLive(env_sim, create_zero_var_randomizer(env_sim))
    dp_map = {
        0: ('rope_length', 'mean'),
        1: ('rope_length', 'std'),
        2: ('rope_damping', 'mean'),
        3: ('rope_damping', 'halfspan'),
        4: ('ball_mass', 'mean'),
        5: ('ball_mass', 'std'),
        6: ('joint_stiction', 'mean'),
        7: ('joint_stiction', 'halfspan'),
        8: ('joint_damping', 'mean'),
        9: ('joint_damping', 'halfspan'),
    }
    env_sim = MetaDomainRandWrapper(env_sim, dp_map)
    # Experiment
    ex_dir = setup_experiment(
        WAMBallInCupSim.name,
        f'{UDR.name}-{PoWER.name}_{DualRBFLinearPolicy.name}',
        'rand-cs-rl-bm-jd-js')

    # Set seed if desired
    pyrado.set_seed(args.seed, verbose=True)

    # Environment
    env_hparams = dict(num_dof=4,
                       max_steps=1750,
                       task_args=dict(final_factor=0.5),
                       fixed_init_state=False)
    env = WAMBallInCupSim(**env_hparams)

    # Randomizer
    randomizer = DomainRandomizer(
        UniformDomainParam(name='cup_scale', mean=0.95, halfspan=0.05),
        NormalDomainParam(name='rope_length', mean=0.3, std=0.005),
        NormalDomainParam(name='ball_mass', mean=0.021, std=0.001),
        UniformDomainParam(name='joint_damping', mean=0.05, halfspan=0.05),
        UniformDomainParam(name='joint_stiction', mean=0.1, halfspan=0.1),
    )
    env = DomainRandWrapperLive(env, randomizer)

    # Policy
    policy_hparam = dict(rbf_hparam=dict(num_feat_per_dim=10,
                                         bounds=(0., 1.),
                                         scale=None),
Exemple #9
0
 def default_wambic():
     return WAMBallInCupSim(num_dof=7, max_steps=1750)