def rollout_dummy_rbf_policy_4dof(): # Environment env = WAMBallInCupSim( num_dof=4, max_steps=3000, # Note, when tuning the task args: the `R` matrices are now 4x4 for the 4 dof WAM task_args=dict(R=np.zeros((4, 4)), R_dev=np.diag([0.2, 0.2, 1e-2, 1e-2])), ) # Stabilize ball and print out the stable state env.reset() act = np.zeros(env.spec.act_space.flat_dim) for i in range(1500): env.step(act) env.render(mode=RenderMode(video=True)) # Printing out actual positions for 4-dof (..just needed to setup the hard-coded values in the class) print('Ball pos:', env.sim.data.get_body_xpos('ball')) print('Cup goal:', env.sim.data.get_site_xpos('cup_goal')) print('Joint pos (incl. first rope angle):', env.sim.data.qpos[:5]) # Apply DualRBFLinearPolicy and plot the joint states over the desired ones rbf_hparam = dict(num_feat_per_dim=7, bounds=(np.array([0.]), np.array([1.]))) policy = DualRBFLinearPolicy(env.spec, rbf_hparam, dim_mask=2) done, param = False, None while not done: ro = rollout(env, policy, render_mode=RenderMode(video=True), eval=True, reset_kwargs=dict(domain_param=param)) print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True) done, _, param = after_rollout_query(env, policy, ro)
def rollout_dummy_rbf_policy_7dof(): # Environment env = WAMBallInCupSim(num_dof=7, max_steps=1750, task_args=dict(sparse_rew_fcn=True)) # Stabilize around initial position env.reset(domain_param=dict(cup_scale=1.0, rope_length=0.3103, ball_mass=0.021)) act = np.zeros((6,)) # desired deltas from the initial pose for i in range(500): env.step(act) env.render(mode=RenderMode(video=True)) # Apply DualRBFLinearPolicy policy_hparam = dict(num_feat_per_dim=7, bounds=(np.array([0.0]), np.array([1.0]))) policy = DualRBFLinearPolicy(env.spec, policy_hparam, dim_mask=1) done, param = False, None while not done: ro = rollout(env, policy, render_mode=RenderMode(video=True), eval=True, reset_kwargs=dict(domain_param=param)) print_cbt(f"Return: {ro.undiscounted_return()}", "g", bright=True) done, _, param = after_rollout_query(env, policy, ro) # Retrieve infos from rollout t = ro.time des_pos_traj = ro.env_infos["qpos_des"] pos_traj = ro.env_infos["qpos"] des_vel_traj = ro.env_infos["qvel_des"] vel_traj = ro.env_infos["qvel"] ball_pos = ro.env_infos["ball_pos"] cup_pos = ro.env_infos["cup_pos"] # Plot trajectories of the directly controlled joints and their corresponding desired trajectories fig, ax = plt.subplots(3, sharex="all") for i, idx in enumerate([1, 3, 5]): ax[i].plot(t, des_pos_traj[:, idx], label=f"qpos_des {idx}") ax[i].plot(t, pos_traj[:, idx], label=f"qpos {idx}") ax[i].legend() fig = plt.figure() ax = fig.add_subplot(111, projection="3d") ax.plot(xs=ball_pos[:, 0], ys=ball_pos[:, 1], zs=ball_pos[:, 2], color="blue", label="Ball") ax.scatter(xs=ball_pos[-1, 0], ys=ball_pos[-1, 1], zs=ball_pos[-1, 2], color="blue", label="Ball final") ax.plot(xs=cup_pos[:, 0], ys=cup_pos[:, 1], zs=cup_pos[:, 2], color="red", label="Cup") ax.scatter(xs=cup_pos[-1, 0], ys=cup_pos[-1, 1], zs=cup_pos[-1, 2], color="red", label="Cup final") ax.legend() ax.set_xlabel("x") ax.set_ylabel("y") ax.set_zlabel("z") ax.view_init(elev=16.0, azim=-7.0) plt.show()
def rollout_dummy_rbf_policy(): # Environment env = WAMBallInCupSim(max_steps=1750, task_args=dict(sparse_rew_fcn=True)) # Stabilize around initial position env.reset(domain_param=dict(cup_scale=1., rope_length=0.3103, ball_mass=0.021)) act = np.zeros((6,)) # desired deltas from the initial pose for i in range(500): env.step(act) env.render(mode=RenderMode(video=True)) # Apply DualRBFLinearPolicy rbf_hparam = dict(num_feat_per_dim=7, bounds=(np.array([0.]), np.array([1.]))) policy = DualRBFLinearPolicy(env.spec, rbf_hparam, dim_mask=1) done, param = False, None while not done: ro = rollout(env, policy, render_mode=RenderMode(video=True), eval=True, reset_kwargs=dict(domain_param=param)) print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True) done, _, param = after_rollout_query(env, policy, ro) # Retrieve infos from rollout t = ro.env_infos['t'] des_pos_traj = ro.env_infos['qpos_des'] # (max_steps,7) ndarray pos_traj = ro.env_infos['qpos'] des_vel_traj = ro.env_infos['qvel_des'] # (max_steps,7) ndarray vel_traj = ro.env_infos['qvel'] ball_pos = ro.env_infos['ball_pos'] state_des = ro.env_infos['state_des'] # Plot trajectories of the directly controlled joints and their corresponding desired trajectories fig, ax = plt.subplots(3, sharex='all') for i, idx in enumerate([1, 3, 5]): ax[i].plot(t, des_pos_traj[:, idx], label=f'qpos_des {idx}') ax[i].plot(t, pos_traj[:, idx], label=f'qpos {idx}') ax[i].legend() fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.plot(xs=ball_pos[:, 0], ys=ball_pos[:, 1], zs=ball_pos[:, 2], color='blue', label='Ball') ax.scatter(xs=ball_pos[-1, 0], ys=ball_pos[-1, 1], zs=ball_pos[-1, 2], color='blue', label='Ball final') ax.plot(xs=state_des[:, 0], ys=state_des[:, 1], zs=state_des[:, 2], color='red', label='Cup') ax.scatter(xs=state_des[-1, 0], ys=state_des[-1, 1], zs=state_des[-1, 2], color='red', label='Cup final') ax.legend() ax.set_xlabel('x') ax.set_ylabel('y') ax.set_zlabel('z') ax.view_init(elev=16., azim=-7.) plt.show()
def __init__(self): ShowBase.__init__(self) self.done = False self.state = None self.param = None print("a") self.ro = rollout( env, policy, render_mode=RenderMode(text=args.verbose, video=args.animation), eval=True, max_steps=max_steps, stop_on_done=not args.relentless, reset_kwargs=dict(domain_param=self.param, init_state=self.state), ) print("hoi") print_domain_params(env.domain_param) print_cbt(f"Return: {self.ro.undiscounted_return()}", "g", bright=True) self.done, self.state, self.param = after_rollout_query( env, policy, self.ro) print("1") self.bob = BallOnBeamSim(2) print("2") self.pos, self.r_ball, self.a, self.l_beam, self.d_beam = self.bob._init_anim( ) print("3") self.ball = self.loader.loadModel("my_models/ball") self.ball.reparentTo(self.render) self.ball.setPos(self.pos) self.box = self.loader.loadModel("my_models/box") self.box.reparentTo(self.render) self.box.setPos(0, 0, 0) self.box.setScale(self.l_beam, self.d_beam, 2 * self.d_beam) self.camera.setPos(0, -10, 0)
def sim_policy_fixed_env(env: SimEnv, policy: Policy, domain_param: [dict, list]): """ Simulate (with animation) a rollout in a environment with fixed domain parameters. :param env: environment stack as it was used during training :param policy: policy to simulate :param domain_param: domain parameter set or a list of sets that specify the environment """ # Remove wrappers that make the rollouts stochastic env = remove_env(env, GaussianObsNoiseWrapper) env = remove_env(env, DomainRandWrapperBuffer) env = remove_env(env, DomainRandWrapperLive) # Initialize done, state, i = False, None, 0 if isinstance(domain_param, dict): param = domain_param elif isinstance(domain_param, list): param = domain_param[i] else: raise pyrado.TypeErr(given=domain_param, expected_type=[dict, list]) while not done: ro = rollout(env, policy, reset_kwargs=dict(domain_param=param, init_state=state), render_mode=RenderMode(video=True), eval=True) print_domain_params(env.domain_param) print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True) done, state, _ = after_rollout_query(env, policy, ro) if isinstance(domain_param, list): # Iterate over the list of domain parameter sets i = (i + 1) % len(domain_param) param = domain_param[i]
cand = to.load(osp.join(ex_dir, found_cands[i])).numpy() ax.scatter(np.arange(cand.size), cand, label='$\phi_{' + str(i) + '}$', c=f'C{i%10}', s=16) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ax.set_ylabel('parameter value') ax.set_xlabel('parameter index') plt.legend() plt.show() # Simulate for i in range(len(found_policies)): # Load current policy = to.load(osp.join(ex_dir, found_policies[i])) cand = to.load(osp.join(ex_dir, found_cands[i])) # Set the domain randomizer given the hyper-parameters if isinstance(env_sim, MetaDomainRandWrapper): env_sim.adapt_randomizer(cand) print_cbt(f'Set the domain randomizer to\n{env_sim.randomizer}', 'c') else: raise pyrado.TypeErr(given=env_sim, expected_type=MetaDomainRandWrapper) done, state, param = False, None, None while not done: print_cbt(f'Simulating {found_policies[i]} with associated domain parameter distribution.', 'g') ro = rollout(env_sim, policy, render_mode=RenderMode(video=True), eval=True, reset_kwargs=dict(domain_param=param, init_state=state)) # calls env.reset() print_domain_params(env_sim.domain_param) print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True) done, state, param = after_rollout_query(env_sim, policy, ro) pyrado.close_vpython()
from pyrado.utils.argparser import get_argparser if __name__ == '__main__': # Parse command line arguments args = get_argparser().parse_args() # Get the experiment's directory to load from ex_dir = ask_for_experiment() # Load the policy (trained in simulation) and the environment (for constructing the real-world counterpart) env_sim, policy, _ = load_experiment(ex_dir) # Detect the correct real-world counterpart and create it if isinstance(inner_env(env_sim), WAMBallInCupSim): # If `max_steps` (or `dt`) are not explicitly set using `args`, use the same as in the simulation max_steps = args.max_steps if args.max_steps < pyrado.inf else env_sim.max_steps dt = args.dt if args.dt is not None else env_sim.dt env_real = WAMBallInCupReal(dt=dt, max_steps=max_steps) else: raise pyrado.TypeErr(given=env_sim, expected_type=WAMBallInCupSim) # Finally wrap the env in the same as done during training env_real = wrap_like_other_env(env_real, env_sim) # Run on device done = False while not done: ro = rollout(env_real, policy, eval=True) print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True) done, _, _ = after_rollout_query(env_real, policy, ro)