def rollout_dummy_rbf_policy_4dof(): # Environment env = WAMBallInCupSim( num_dof=4, max_steps=3000, # Note, when tuning the task args: the `R` matrices are now 4x4 for the 4 dof WAM task_args=dict(R=np.zeros((4, 4)), R_dev=np.diag([0.2, 0.2, 1e-2, 1e-2])), ) # Stabilize ball and print out the stable state env.reset() act = np.zeros(env.spec.act_space.flat_dim) for i in range(1500): env.step(act) env.render(mode=RenderMode(video=True)) # Printing out actual positions for 4-dof (..just needed to setup the hard-coded values in the class) print('Ball pos:', env.sim.data.get_body_xpos('ball')) print('Cup goal:', env.sim.data.get_site_xpos('cup_goal')) print('Joint pos (incl. first rope angle):', env.sim.data.qpos[:5]) # Apply DualRBFLinearPolicy and plot the joint states over the desired ones rbf_hparam = dict(num_feat_per_dim=7, bounds=(np.array([0.]), np.array([1.]))) policy = DualRBFLinearPolicy(env.spec, rbf_hparam, dim_mask=2) done, param = False, None while not done: ro = rollout(env, policy, render_mode=RenderMode(video=True), eval=True, reset_kwargs=dict(domain_param=param)) print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True) done, _, param = after_rollout_query(env, policy, ro)
def eval_damping(): """ Plot joint trajectories for different joint damping parameters """ # Load experiment and remove possible randomization wrappers ex_dir = ask_for_experiment() env, policy, _ = load_experiment(ex_dir) env = inner_env(env) env.domain_param = WAMBallInCupSim.get_nominal_domain_param() data = [] t = [] dampings = [0., 1e-2, 1e-1, 1e0] print_cbt(f'Run policy for damping coefficients: {dampings}') for d in dampings: env.reset(domain_param=dict(joint_damping=d)) ro = rollout(env, policy, render_mode=RenderMode(video=False), eval=True) t.append(ro.env_infos['t']) data.append(ro.env_infos['qpos']) fig, ax = plt.subplots(3, sharex='all') ls = ['k-', 'b--', 'g-.', 'r:'] # line style setting for better visibility for i, idx in enumerate([1, 3, 5]): for j in range(len(dampings)): ax[i].plot(t[j], data[j][:, idx], ls[j], label=f'damping: {dampings[j]}') if i == 0: ax[i].legend() ax[i].set_ylabel(f'joint {idx} pos [rad]') ax[2].set_xlabel('time [s]') plt.suptitle('Evaluation of joint damping coefficient') plt.show()
def get_default_randomizer_wambic() -> DomainRandomizer: from pyrado.environments.mujoco.wam import WAMBallInCupSim dp_nom = WAMBallInCupSim.get_nominal_domain_param() return DomainRandomizer( NormalDomainParam(name='cup_scale', mean=dp_nom['cup_scale'], std=dp_nom['cup_scale'] / 5, clip_lo=0.6), # ball needs to fit into the cup NormalDomainParam(name='rope_length', mean=dp_nom['rope_length'], std=dp_nom['rope_length'] / 10, clip_lo=0.2), # rope won't be less then 10cm shorter )
def rollout_dummy_rbf_policy(): # Environment env = WAMBallInCupSim(max_steps=1750, task_args=dict(sparse_rew_fcn=True)) # Stabilize around initial position env.reset(domain_param=dict(cup_scale=1., rope_length=0.3103, ball_mass=0.021)) act = np.zeros((6,)) # desired deltas from the initial pose for i in range(500): env.step(act) env.render(mode=RenderMode(video=True)) # Apply DualRBFLinearPolicy rbf_hparam = dict(num_feat_per_dim=7, bounds=(np.array([0.]), np.array([1.]))) policy = DualRBFLinearPolicy(env.spec, rbf_hparam, dim_mask=1) done, param = False, None while not done: ro = rollout(env, policy, render_mode=RenderMode(video=True), eval=True, reset_kwargs=dict(domain_param=param)) print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True) done, _, param = after_rollout_query(env, policy, ro) # Retrieve infos from rollout t = ro.env_infos['t'] des_pos_traj = ro.env_infos['qpos_des'] # (max_steps,7) ndarray pos_traj = ro.env_infos['qpos'] des_vel_traj = ro.env_infos['qvel_des'] # (max_steps,7) ndarray vel_traj = ro.env_infos['qvel'] ball_pos = ro.env_infos['ball_pos'] state_des = ro.env_infos['state_des'] # Plot trajectories of the directly controlled joints and their corresponding desired trajectories fig, ax = plt.subplots(3, sharex='all') for i, idx in enumerate([1, 3, 5]): ax[i].plot(t, des_pos_traj[:, idx], label=f'qpos_des {idx}') ax[i].plot(t, pos_traj[:, idx], label=f'qpos {idx}') ax[i].legend() fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.plot(xs=ball_pos[:, 0], ys=ball_pos[:, 1], zs=ball_pos[:, 2], color='blue', label='Ball') ax.scatter(xs=ball_pos[-1, 0], ys=ball_pos[-1, 1], zs=ball_pos[-1, 2], color='blue', label='Ball final') ax.plot(xs=state_des[:, 0], ys=state_des[:, 1], zs=state_des[:, 2], color='red', label='Cup') ax.scatter(xs=state_des[-1, 0], ys=state_des[-1, 1], zs=state_des[-1, 2], color='red', label='Cup final') ax.legend() ax.set_xlabel('x') ax.set_ylabel('y') ax.set_zlabel('z') ax.view_init(elev=16., azim=-7.) plt.show()
def create_default_randomizer_wambic() -> DomainRandomizer: from pyrado.environments.mujoco.wam import WAMBallInCupSim dp_nom = WAMBallInCupSim.get_nominal_domain_param() return DomainRandomizer( # Ball needs to fit into the cup NormalDomainParam(name='cup_scale', mean=dp_nom['cup_scale'], std=dp_nom['cup_scale']/5, clip_lo=0.65), # Rope won't be more than 3cm off NormalDomainParam(name='rope_length', mean=dp_nom['rope_length'], std=dp_nom['rope_length']/30, clip_lo=0.27, clip_up=0.33), NormalDomainParam(name='ball_mass', mean=dp_nom['ball_mass'], std=dp_nom['ball_mass']/10, clip_lo=1e-2), UniformDomainParam(name='joint_damping', mean=dp_nom['joint_damping'], halfspan=dp_nom['joint_damping']/2, clip_lo=0.), UniformDomainParam(name='joint_stiction', mean=dp_nom['joint_stiction'], halfspan=dp_nom['joint_stiction']/2, clip_lo=0.), UniformDomainParam(name='rope_damping', mean=dp_nom['rope_damping'], halfspan=dp_nom['rope_damping']/2, clip_lo=1e-6), )
def default_wambic(): return WAMBallInCupSim(max_steps=1750)
# Experiment (set seed before creating the modules) ex_dir = setup_experiment(WAMBallInCupSim.name, f'{BayRn.name}-{PoWER.name}_{DualRBFLinearPolicy.name}', 'rand-rl-rd-bm-js-jd') # Set seed if desired pyrado.set_seed(args.seed, verbose=True) # Environments env_sim_hparams = dict( num_dof=4, max_steps=1750, fixed_init_state=True, stop_on_collision=True, task_args=dict(final_factor=0.2) ) env_sim = WAMBallInCupSim(**env_sim_hparams) env_sim = DomainRandWrapperLive(env_sim, create_zero_var_randomizer(env_sim)) dp_map = { 0: ('rope_length', 'mean'), 1: ('rope_length', 'std'), 2: ('rope_damping', 'mean'), 3: ('rope_damping', 'halfspan'), 4: ('ball_mass', 'mean'), 5: ('ball_mass', 'std'), 6: ('joint_stiction', 'mean'), 7: ('joint_stiction', 'halfspan'), 8: ('joint_damping', 'mean'), 9: ('joint_damping', 'halfspan'), } env_sim = MetaDomainRandWrapper(env_sim, dp_map)
# Experiment ex_dir = setup_experiment( WAMBallInCupSim.name, f'{UDR.name}-{PoWER.name}_{DualRBFLinearPolicy.name}', 'rand-cs-rl-bm-jd-js') # Set seed if desired pyrado.set_seed(args.seed, verbose=True) # Environment env_hparams = dict(num_dof=4, max_steps=1750, task_args=dict(final_factor=0.5), fixed_init_state=False) env = WAMBallInCupSim(**env_hparams) # Randomizer randomizer = DomainRandomizer( UniformDomainParam(name='cup_scale', mean=0.95, halfspan=0.05), NormalDomainParam(name='rope_length', mean=0.3, std=0.005), NormalDomainParam(name='ball_mass', mean=0.021, std=0.001), UniformDomainParam(name='joint_damping', mean=0.05, halfspan=0.05), UniformDomainParam(name='joint_stiction', mean=0.1, halfspan=0.1), ) env = DomainRandWrapperLive(env, randomizer) # Policy policy_hparam = dict(rbf_hparam=dict(num_feat_per_dim=10, bounds=(0., 1.), scale=None),
def default_wambic(): return WAMBallInCupSim(num_dof=7, max_steps=1750)