def __init__(self, max_steps: int, example_config: bool): """ Constructor :param max_steps: maximum number of simulation steps :param example_config: configuration for the 'illustrative example' in the journal """ Serializable._init(self, locals()) super().__init__(dt=None, max_steps=max_steps) self.example_config = example_config self._planet = -1 # Initialize the domain parameters (Earth) self._g = 9.81 # gravity constant [m/s**2] self._k = 2e3 # catapult spring's stiffness constant [N/m] self._x = 1. # catapult spring's pre-elongation [m] # Domain independent parameter self._m = 70. # victim's mass [kg] # Set the bounds for the system's states adn actions max_state = np.array([1000.]) # [m], arbitrary but >> self._x max_act = max_state self._curr_act = np.zeros_like(max_act) # just for usage in render function self._state_space = BoxSpace(-max_state, max_state, labels=['$h$']) self._init_space = SingularStateSpace(np.zeros(self._state_space.shape), labels=['$h_0$']) self._act_space = BoxSpace(-max_act, max_act, labels=[r'$\theta$']) # Define the task including the reward function self._task = self._create_task(task_args=dict())
def __init__(self): """ Constructor """ Serializable._init(self, locals()) # Initialize basic variables super().__init__(dt=None, max_steps=1) # Set the bounds for the system's states adn actions max_state = np.array([100., 100.]) max_act = max_state self._curr_act = np.zeros_like( max_act) # just for usage in render function self._state_space = BoxSpace(-max_state, max_state, labels=['x_1', 'x_2']) self._init_space = SingularStateSpace(np.zeros( self._state_space.shape), labels=['x_1_init', 'x_2_init']) self._act_space = BoxSpace(-max_act, max_act, labels=['x_1_next', 'x_2_next']) # Define the task including the reward function self._task = self._create_task() # Animation with pyplot self._anim = dict(fig=None, trace_x=[], trace_y=[], trace_z=[])
def eval_domain_params( pool: SamplerPool, env: SimEnv, policy: Policy, params: List[Dict], init_state: Optional[np.ndarray] = None) -> List[StepSequence]: """ Evaluate a policy on a multidimensional grid of domain parameters. :param pool: parallel sampler :param env: environment to evaluate in :param policy: policy to evaluate :param params: multidimensional grid of domain parameters :param init_state: initial state of the environment which will be fixed if not set to `None` :return: list of rollouts """ # Strip all domain randomization wrappers from the environment env = remove_all_dr_wrappers(env, verbose=True) if init_state is not None: env.init_space = SingularStateSpace(fixed_state=init_state) pool.invoke_all(_ps_init, pickle.dumps(env), pickle.dumps(policy)) # Run with progress bar with tqdm(leave=False, file=sys.stdout, unit="rollouts", desc="Sampling") as pb: return pool.run_map( functools.partial(_ps_run_one_domain_param, eval=True), params, pb)
def _create_spaces(self): # Initial state space # Set the actual stable initial position. This position would be reached after some time using the internal # PD controller to stabilize at self.qpos_des_init if self.num_dof == 7: self.init_qpos[:7] = np.array( [0., 0.65, 0., 1.41, 0., -0.28, -1.57]) self.init_qpos[ 7] = -0.21 # angle of the first rope segment relative to the cup bottom plate init_ball_pos = np.array([0.828, 0., 1.131]) init_cup_goal = np.array([0.82521, 0, 1.4469]) elif self.num_dof == 4: self.init_qpos[:4] = np.array([0., 0.63, 0., 1.27]) self.init_qpos[ 4] = -0.34 # angle of the first rope segment relative to the cup bottom plate init_ball_pos = np.array([0.723, 0., 1.168]) init_cup_goal = np.array([0.758, 0, 1.5]) # The initial position of the ball in cartesian coordinates init_state = np.concatenate( [self.init_qpos, self.init_qvel, init_ball_pos, init_cup_goal]) if self.fixed_init_state: self._init_space = SingularStateSpace(init_state) else: # Add plus/minus one degree to each motor joint and the first rope segment joint init_state_up = init_state.copy() init_state_up[:self.num_dof] += np.pi / 180 * np.array( [0.1, 1, 0.5, 1., 0.1, 1., 1.])[:self.num_dof] init_state_lo = init_state.copy() init_state_lo[:self.num_dof] -= np.pi / 180 * np.array( [0.1, 1, 0.5, 1., 0.1, 1., 1.])[:self.num_dof] self._init_space = BoxSpace(init_state_lo, init_state_up) # State space state_shape = init_state.shape state_up, state_lo = np.full(state_shape, pyrado.inf), np.full( state_shape, -pyrado.inf) # Ensure that joint limits of the arm are not reached (5 deg safety margin) state_up[:self.num_dof] = np.array([ 2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7 ])[:self.num_dof] - 5 * np.pi / 180 state_lo[:self.num_dof] = np.array([ -2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7 ])[:self.num_dof] + 5 * np.pi / 180 self._state_space = BoxSpace(state_lo, state_up) # Torque space max_torque = np.array([150.0, 125.0, 40.0, 60.0, 5.0, 5.0, 2.0])[:self.num_dof] self._torque_space = BoxSpace(-max_torque, max_torque) # Action space (PD controller on joint positions and velocities) if self.num_dof == 4: self._act_space = act_space_wam_4dof elif self.num_dof == 7: self._act_space = act_space_wam_7dof # Observation space (normalized time) self._obs_space = BoxSpace(np.array([0.]), np.array([1.]), labels=['t'])
def __init__(self, task_args: dict, ref_frame: str, position_mps: bool, mps_left: [Sequence[dict], None], fixed_init_state: bool = False, **kwargs): """ Constructor .. note:: This constructor should only be called via the subclasses. :param task_args: arguments for the task construction :param ref_frame: reference frame for the MPs, e.g. 'world', 'box', or 'upperGoal' :param mps_left: left arm's movement primitives holding the dynamical systems and the goal states :param position_mps: `True` if the MPs are defined on position level, `False` if defined on velocity level :param fixed_init_state: use an init state space with only one state (e.g. for debugging) :param kwargs: keyword arguments which are available for all task-based `RcsSim` taskCombinationMethod: str = 'mean', # 'sum', 'mean', 'product', or 'softmax' checkJointLimits: bool = False, collisionAvoidanceIK: bool = True, observeVelocities: bool = True, observeCollisionCost: bool = True, observePredictedCollisionCost: bool = False, observeManipulabilityIndex: bool = False, observeCurrentManipulability: bool = True, observeDynamicalSystemDiscrepancy: bool = False, observeTaskSpaceDiscrepancy: bool = True, observeForceTorque: bool = True """ Serializable._init(self, locals()) # Forward to the RcsSim's constructor RcsSim.__init__( self, envType='BoxShelving', extraConfigDir=osp.join(rcsenv.RCSPYSIM_CONFIG_PATH, 'BoxShelving'), hudColor='BLACK_RUBBER', task_args=task_args, refFrame=ref_frame, positionTasks=position_mps, tasksLeft=mps_left, **kwargs ) # Initial state space definition if fixed_init_state: dafault_init_state = np.array([0., 0., 0., 0.8, 30.*np.pi/180, 90.*np.pi/180]) # [m, m, rad, m, rad, rad] self._init_space = SingularStateSpace(dafault_init_state, labels=['$x$', '$y$', '$th$', '$z$', '$q_2$', '$q_4$']) else: min_init_state = np.array([-0.02, -0.02, -3.*np.pi/180., 0.78, 27.*np.pi/180, 77.*np.pi/180]) max_init_state = np.array([0.02, 0.02, 3.*np.pi/180., 0.82, 33.*np.pi/180, 83.*np.pi/180]) self._init_space = BoxSpace(min_init_state, max_init_state, # [m, m, rad, m, rad, rad] labels=['$x$', '$y$', '$th$', '$z$', '$q_2$', '$q_4$'])
def __init__(self, task_args: dict, max_dist_force: float = None, **kwargs): """ Constructor .. note:: This constructor should only be called via the subclasses. :param task_args: arguments for the task construction :param max_dist_force: maximum disturbance force, set to None (default) for no disturbance :param kwargs: keyword arguments forwarded to `RcsSim` collisionConfig: specification of the Rcs CollisionModel """ Serializable._init(self, locals()) if kwargs.get('collisionConfig', None) is None: collision_config = { 'pairs': [ { 'body1': 'Effector', 'body2': 'Link2' }, { 'body1': 'Effector', 'body2': 'Link1' }, ], 'threshold': 0.15, 'predCollHorizon': 20 } else: collision_config = kwargs.get('collisionConfig') # Forward to the RcsSim's constructor, nothing more needs to be done here RcsSim.__init__(self, envType='Planar3Link', graphFileName='gPlanar3Link.xml', task_args=task_args, collisionConfig=collision_config, **kwargs) # Initial state space definition upright_init_state = np.array([0.1, 0.1, 0.1]) # [rad, rad, rad] self._init_space = SingularStateSpace( upright_init_state, labels=['$q_1$', '$q_2$', '$q_3$']) # Setup disturbance self._max_dist_force = max_dist_force
def _create_spaces(self): # Define the spaces max_state = np.array([4 * np.pi, 4 * np.pi]) # [rad, rad/s] max_obs = np.array([1.0, 1.0, np.inf]) # [-, -, rad/s] tau_max = self.domain_param["torque_thold"] self._state_space = BoxSpace(-max_state, max_state, labels=["theta", "theta_dot"]) self._obs_space = BoxSpace( -max_obs, max_obs, labels=["sin_theta", "cos_theta", "theta_dot"]) self._init_space = SingularStateSpace(self._init_state, labels=["theta", "theta_dot"]) self._act_space = BoxSpace(-tau_max, tau_max, labels=["tau"])
def _create_spaces(self): # Torque space max_torque = np.array([150.0, 125.0, 40.0, 60.0, 5.0, 5.0, 2.0]) self._torque_space = BoxSpace(-max_torque, max_torque) # Initial state space # Set the actual stable initial position. This position would be reached after some time using the internal # PD controller to stabilize at self.init_pose_des # An initial qpos measured on real Barret WAM: # [-3.6523e-05 6.4910e-01 4.4244e-03 1.4211e+00 8.8864e-03 -2.7763e-01 -1.5309e+00] self.init_qpos[:7] = np.array([0., 0.65, 0., 1.41, 0., -0.28, -1.57]) # Set the angle of the first rope segment relative to the cup bottom plate self.init_qpos[7] = -0.21 # The initial position of the ball in cartesian coordinates init_ball_pos = np.array([0., -0.8566, 0.85391]) init_state = np.concatenate( [self.init_qpos, self.init_qvel, init_ball_pos]) if self.fixed_initial_state: self._init_space = SingularStateSpace(init_state) else: # Add plus/minus one degree to each motor joint and the first rope segment joint init_state_up = init_state.copy() init_state_up[:8] += 1 * np.pi / 180 init_state_lo = init_state.copy() init_state_lo[:8] -= 1 * np.pi / 180 self._init_space = BoxSpace(init_state_lo, init_state_up) # State space state_shape = init_state.shape max_state = np.full(state_shape, pyrado.inf) self._state_space = BoxSpace(-max_state, max_state) # Action space (PD controller on 3 joint positions and velocities) act_up = np.array( [1.985, np.pi, np.pi / 2, 10 * np.pi, 10 * np.pi, 10 * np.pi]) act_lo = np.array( [-1.985, -0.9, -np.pi / 2, -10 * np.pi, -10 * np.pi, -10 * np.pi]) self._act_space = BoxSpace( act_lo, act_up, # [rad, rad, rad, rad/s, rad/s, rad/s] labels=[ r'$q_{1,des}$', r'$q_{3,des}$', r'$q_{5,des}$', r'$\dot{q}_{1,des}$', r'$\dot{q}_{3,des}$', r'$\dot{q}_{5,des}$' ]) # Observation space (normalized time) self._obs_space = BoxSpace(np.array([0.]), np.array([1.]), labels=['$t$'])
def __init__(self): """Constructor""" Serializable._init(self, locals()) # Call SimEnv's constructor super().__init__(dt=1.0, max_steps=1) # Initialize the domain parameters and the derived constants self._domain_param = self.get_nominal_domain_param() self._mean, self._covariance_matrix = self.calc_constants(self.domain_param) self._init_space = SingularStateSpace(np.zeros(self.state_space.shape)) # Define a dummy task including the reward function self._task = self._create_task()
def test_velocity_filter(plot: bool): # Set up environment env_gt = QQubeSwingUpSim(dt=1 / 500.0, max_steps=350) env_gt.init_space = SingularStateSpace(np.array([0.1, np.pi / 2, 3.0, 0])) env_filt = ObsVelFiltWrapper(env_gt, idcs_pos=["theta", "alpha"], idcs_vel=["theta_dot", "alpha_dot"]) # Set up policy policy = IdlePolicy(env_gt.spec) # Simulate ro_gt = rollout(env_gt, policy) ro_filt = rollout(env_filt, policy) # Filter the observations of the last rollout theta_dot_gt = ro_gt.observations[:, 4] alpha_dot_gt = ro_gt.observations[:, 5] theta_dot_filt = ro_filt.observations[:, 4] alpha_dot_filt = ro_filt.observations[:, 5] assert theta_dot_filt[0] != pytest.approx( theta_dot_gt[0]) # can't be equal since we set an init vel of 3 rad/s assert alpha_dot_filt[0] == pytest.approx(alpha_dot_gt[0], abs=1e-4) # Compute the error rmse_theta = rmse(theta_dot_gt, theta_dot_filt) rmse_alpha = rmse(alpha_dot_gt, alpha_dot_filt) if plot: from matplotlib import pyplot as plt # Plot the filtered signals versus the orignal observations plt.rc("text", usetex=True) fix, axs = plt.subplots(2, figsize=(16, 9)) axs[0].plot(theta_dot_gt, label=r"$\dot{\theta}_{true}$") axs[0].plot(theta_dot_filt, label=r"$\dot{\theta}_{filt}$") axs[1].plot(alpha_dot_gt, label=r"$\dot{\alpha}_{true}$") axs[1].plot(alpha_dot_filt, label=r"$\dot{\alpha}_{filt}$") axs[0].set_title(rf"RMSE($\theta$): {rmse_theta}") axs[0].set_ylabel(r"$\dot{\theta}$ [rad/s]") axs[0].legend() axs[1].set_title(rf"RMSE($\alpha$): {rmse_alpha}") axs[1].set_xlabel("time steps") axs[1].set_ylabel(r"$\dot{\alpha}$ [rad/s]") axs[1].legend() plt.show()
def _create_spaces(self): tau_max = self.domain_param['tau_max'] # Define the spaces max_state = np.array([4 * np.pi, 4 * np.pi]) # [rad, rad/s] max_obs = np.array([1., 1., np.inf]) # [-, -, rad/s] init_state = np.zeros(2) # [rad, rad/s] self._state_space = BoxSpace(-max_state, max_state, labels=['theta', 'theta_dot']) self._obs_space = BoxSpace( -max_obs, max_obs, labels=['sin_theta', 'cos_theta', 'theta_dot']) self._init_space = SingularStateSpace(init_state, labels=['theta', 'theta_dot']) self._act_space = BoxSpace(-tau_max, tau_max, labels=['tau'])
def __init__( self, num_dof: int, frame_skip: int = 4, dt: Optional[float] = None, max_steps: int = pyrado.inf, task_args: Optional[dict] = None, ): """ Constructor :param num_dof: number of degrees of freedom (4 or 7), depending on which Barrett WAM setup being used :param frame_skip: number of simulation frames for which the same action is held, results in a multiplier of the time step size `dt` :param dt: by default the time step size is the one from the mujoco config file multiplied by the number of frame skips (legacy from OpenAI environments). By passing an explicit `dt` value, this can be overwritten. Possible use case if if you know that you recorded a trajectory with a specific `dt`. :param max_steps: max number of simulation time steps :param task_args: arguments for the task construction """ Serializable._init(self, locals()) # Initialize num DoF specific variables if num_dof == 4: graph_file_name = "wam_4dof_base.xml" self.qpos_des_init = INIT_QPOS_DES_4DOF self.p_gains = WAM_PGAINS_4DOF self.d_gains = WAM_DGAINS_4DOF elif num_dof == 7: graph_file_name = "wam_7dof_base.xml" self.qpos_des_init = INIT_QPOS_DES_7DOF self.p_gains = WAM_PGAINS_7DOF self.d_gains = WAM_DGAINS_7DOF else: raise pyrado.ValueErr(given=num_dof, eq_constraint="4 or 7") model_path = osp.join(pyrado.MUJOCO_ASSETS_DIR, graph_file_name) super().__init__(num_dof, model_path, frame_skip, dt, max_steps, task_args) # Fixed initial state space init_state = np.concatenate([self.init_qpos, self.init_qvel]) self._init_space = SingularStateSpace(init_state)
def __init__(self, mps: Sequence[dict] = None, task_args: dict = None, max_dist_force: float = None, position_mps: bool = True, **kwargs): """ Constructor :param mps: movement primitives holding the dynamical systems and the goal states :param task_args: arguments for the task construction :param max_dist_force: maximum disturbance force, set to None (default) for no disturbance :param position_mps: `True` if the MPs are defined on position level, `False` if defined on velocity level, only matters if `actionModelType='activation'` :param kwargs: keyword arguments forwarded to `RcsSim` taskCombinationMethod='sum' # or 'mean' or 'product' """ Serializable._init(self, locals()) # Forward to the RcsSim's constructor, nothing more needs to be done here RcsSim.__init__( self, envType='MPBlending', task_args=task_args, actionModelType='activation', tasks=mps, positionTasks=position_mps, **kwargs ) # Store Planar3Link specific vars center_init_state = np.array([0., 0.]) # [m] self._init_space = SingularStateSpace(center_init_state, labels=['$x$', '$y$']) # Setup disturbance self._max_dist_force = max_dist_force
def __init__(self, task_args: dict, collision_config: dict = None, max_dist_force: float = None, **kwargs): """ Constructor .. note:: This constructor should only be called via the subclasses. :param task_args: arguments for the task construction :param max_dist_force: maximum disturbance force, set to None (default) for no disturbance :param kwargs: keyword arguments forwarded to `RcsSim` collisionConfig: specification of the Rcs CollisionModel """ Serializable._init(self, locals()) # Forward to the RcsSim's constructor, nothing more needs to be done here RcsSim.__init__(self, envType='PlanarInsert', physicsConfigFile='pPlanarInsert.xml', task_args=task_args, collisionConfig=collision_config, **kwargs) if kwargs.get('collisionConfig', None) is None: collision_config = { 'pairs': [ { 'body1': 'Effector', 'body2': 'Link3' }, { 'body1': 'Effector', 'body2': 'Link2' }, { 'body1': 'UpperWall', 'body2': 'Link4' }, { 'body1': 'LowerWall', 'body2': 'Link4' }, { 'body1': 'LowerWall', 'body2': 'Link3' }, { 'body1': 'LowerWall', 'body2': 'Link2' }, ], 'threshold': 0.05 } else: collision_config = kwargs.get('collisionConfig') # Initial state space definition init_state = np.array([-40, 30, 30, 30, -30 ]) / 180 * np.pi # [rad, rad, rad] self._init_space = SingularStateSpace( init_state, labels=['$q_1$', '$q_2$', '$q_3$', '$q_4$', '$q_5$']) # Setup disturbance self._max_dist_force = max_dist_force
class RosenSim(SimEnv, Serializable): """ This environment wraps the Rosenbrock function to use it as a test case for Pyrado algorithms. """ name: str = 'rosen' def __init__(self): """ Constructor """ Serializable._init(self, locals()) # Initialize basic variables super().__init__(dt=None, max_steps=1) # Set the bounds for the system's states adn actions max_state = np.array([100., 100.]) max_act = max_state self._curr_act = np.zeros_like( max_act) # just for usage in render function self._state_space = BoxSpace(-max_state, max_state, labels=['x_1', 'x_2']) self._init_space = SingularStateSpace(np.zeros( self._state_space.shape), labels=['x_1_init', 'x_2_init']) self._act_space = BoxSpace(-max_act, max_act, labels=['x_1_next', 'x_2_next']) # Define the task including the reward function self._task = self._create_task() # Animation with pyplot self._anim = dict(fig=None, trace_x=[], trace_y=[], trace_z=[]) @property def state_space(self): return self._state_space @property def obs_space(self): return self._state_space @property def init_space(self): return self._init_space @property def act_space(self): return self._act_space def _create_task(self, task_args: dict = None) -> OptimProxyTask: return OptimProxyTask(self.spec, StateBasedRewFcn(rosenbrock, flip_sign=True)) @property def task(self) -> OptimProxyTask: return self._task @property def domain_param(self): return {} @domain_param.setter def domain_param(self, param): pass @classmethod def get_nominal_domain_param(cls) -> dict: return {} def reset(self, init_state: np.ndarray = None, domain_param: dict = None) -> np.ndarray: # Reset time self._curr_step = 0 # Reset the domain parameters if domain_param is not None: self.domain_param = domain_param # Reset the state if init_state is None: self.state = self._init_space.sample_uniform() # zero else: if not init_state.shape == self.obs_space.shape: raise pyrado.ShapeErr(given=init_state, expected_match=self.obs_space) if isinstance(init_state, np.ndarray): self.state = init_state.copy() else: try: self.state = np.array(init_state) except Exception: raise pyrado.TypeErr(given=init_state, expected_type=[np.ndarray, list]) # No need to reset the task # Return perfect observation return self.observe(self.state) def step(self, act): # Apply actuator limits act = self.limit_act(act) # Action equal selection a new state a.k.a. solution of the optimization problem self.state = act # Current reward depending on the state after the step (since there is only one step) self._curr_rew = self.task.step_rew(self.state) self._curr_step += 1 # Check if the task or the environment is done done = self.task.is_done(self.state) if self._curr_step >= self._max_steps: done = True return self.observe(self.state), self._curr_rew, done, {} def render(self, mode: RenderMode, render_step: int = 1): # Call base class super().render(mode) # Print to console if mode.text: if self._curr_step % render_step == 0 and self._curr_step > 0: # skip the render before the first step print( "step: {:3} | r_t: {: 1.3f} | a_t: {}\t | s_t+1: {}". format(self._curr_step, self._curr_rew, self._curr_act, self.state)) # Render using pyplot if mode.video: from matplotlib import pyplot as plt from pyrado.plotting.surface import draw_surface plt.ion() if self._anim['fig'] is None: # Plot Rosenbrock function once if not already plotted x = np.linspace(-2, 2, 20, True) y = np.linspace(-1, 3, 20, True) self._anim['fig'] = draw_surface(x, y, rosenbrock, 'x', 'y', 'z') self._anim['trace_x'].append(self.state[0]) self._anim['trace_y'].append(self.state[1]) self._anim['trace_z'].append(rosenbrock(self.state)) ax = self._anim['fig'].gca() ax.scatter(self._anim['trace_x'], self._anim['trace_y'], self._anim['trace_z'], s=8, c='w') plt.draw()
class CatapultSim(SimEnv, Serializable): """ In this special environment, the action is equal to the policy parameter. Therefore, it makes only sense to use it in combination with a linear policy that has only one constant feature. """ name: str = "cata" def __init__(self, max_steps: int, example_config: bool): """ Constructor :param max_steps: maximum number of simulation steps :param example_config: configuration for the 'illustrative example' in the journal """ Serializable._init(self, locals()) super().__init__(dt=1, max_steps=max_steps) self.example_config = example_config self._planet = -1 # Initialize the domain parameters (Earth) self._g = CatapultSim.get_nominal_domain_param()[ "gravity_const"] # planet's gravity constant [m/s**2] self._k = CatapultSim.get_nominal_domain_param()[ "stiffness"] # catapult spring's stiffness constant [N/m] self._x = CatapultSim.get_nominal_domain_param()[ "elongation"] # catapult spring's pre-elongation [m] # Domain independent parameter self._m = 70.0 # victim's mass [kg] # Set the bounds for the system's states adn actions max_state = np.array([1000.0]) # [m], arbitrary but >> self._x max_act = max_state self._curr_act = np.zeros_like( max_act) # just for usage in render function self._state_space = BoxSpace(-max_state, max_state, labels=["h"]) self._init_space = SingularStateSpace(np.zeros( self._state_space.shape), labels=["h_0"]) self._act_space = BoxSpace(-max_act, max_act, labels=["theta"]) # Define the task including the reward function self._task = self._create_task(task_args=dict()) @property def state_space(self): return self._state_space @property def obs_space(self): return self._state_space @property def init_space(self): return self._init_space @property def act_space(self): return self._act_space def _create_task(self, task_args: dict) -> DesStateTask: # Define the task including the reward function state_des = task_args.get("state_des", np.zeros(self._state_space.shape)) return DesStateTask(self.spec, state_des, rew_fcn=AbsErrRewFcn(q=np.array([1.0]), r=np.array([0.0]))) @property def task(self): return self._task @property def domain_param(self) -> dict: if self.example_config: return dict(planet=self._planet) else: return dict(gravity_const=self._g, k=self._k, x=self._x) @domain_param.setter def domain_param(self, domain_param: dict): assert isinstance(domain_param, dict) # Set the new domain params if given, else the default value if self.example_config: if domain_param["planet"] == 0: # Mars self._g = 3.71 self._k = 1e3 self._x = 0.5 elif domain_param["planet"] == 1: # Venus self._g = 8.87 self._k = 3e3 self._x = 1.5 elif domain_param["planet"] == -1: # Default value which should make the computation invalid self._g = None self._k = None self._x = None else: raise pyrado.ValueErr(given=domain_param["planet"], eq_constraint="0 or 1") else: assert self._g > 0 and self._k > 0 and self._x > 0 self._g = domain_param.get("gravity_const", self._g) self._k = domain_param.get("stiffness", self._k) self._x = domain_param.get("elongation", self._x) @classmethod def get_nominal_domain_param(cls) -> dict: return dict(gravity_const=9.81, stiffness=2000.0, elongation=1.0) def reset(self, init_state: np.ndarray = None, domain_param: dict = None) -> np.ndarray: # Reset time self._curr_step = 0 # Reset the domain parameters if domain_param is not None: self.domain_param = domain_param # Reset the state if init_state is None: # Sample from the init state space self.state = self._init_space.sample_uniform() else: if not init_state.shape == self.obs_space.shape: raise pyrado.ShapeErr(given=init_state, expected_match=self.obs_space) if isinstance(init_state, np.ndarray): self.state = init_state.copy() else: try: self.state = np.asarray(init_state) except Exception: raise pyrado.TypeErr(given=init_state, expected_type=np.ndarray) # Reset the task self._task.reset(env_spec=self.spec) # Return perfect observation return self.observe(self.state) def step(self, act: np.ndarray) -> tuple: # Apply actuator limits act = self.limit_act(act) # dummy for CatapultSim self._curr_act = act # just for the render function # Calculate the maximum height of the flight trajectory ("one step dynamics") self.state = self._k / (2.0 * self._m * self._g) * (act - self._x)**2 # h(theta, xi) # Current reward depending on the state after the step (since there is only one step) and the (unlimited) action self._curr_rew = self.task.step_rew(self.state, act, self._curr_step) self._curr_step += 1 # Check if the task or the environment is done done = self._task.is_done(self.state) if self._curr_step >= self._max_steps: done = True if done: # Add final reward if done remaining_steps = self._max_steps - ( self._curr_step + 1) if self._max_steps is not pyrado.inf else 0 self._curr_rew += self._task.final_rew(self.state, remaining_steps) return self.observe(self.state), self._curr_rew, done, {} def render(self, mode: RenderMode, render_step: int = 1): # Call base class super().render(mode) # Print to console if mode.text: if self._curr_step % render_step == 0 and self._curr_step > 0: # skip the render before the first step print( f"step: {self._curr_step:4d} | r_t: {self._curr_rew: 1.3f} | a_t: {self._curr_act} | s_t+1: {self.state}" )
def test_pair_plot( env: SimEnv, policy: Policy, layout: str, labels: Optional[str], prob_labels: Optional[str], use_prior: bool, use_trafo: bool, ): def _simulator(dp: to.Tensor) -> to.Tensor: """The most simple interface of a simulation to sbi, using `env` and `policy` from outer scope""" ro = rollout( env, policy, eval=True, reset_kwargs=dict(domain_param=dict(m=dp[0], k=dp[1], d=dp[2]))) observation_sim = to.from_numpy( ro.observations[-1]).to(dtype=to.float32) return to.atleast_2d(observation_sim) # Fix the init state env.init_space = SingularStateSpace(env.init_space.sample_uniform()) env_real = deepcopy(env) env_real.domain_param = {"mass": 0.8, "stiffness": 35, "d": 0.7} # Optionally transformed domain parameters for inference if use_trafo: env = SqrtDomainParamTransform(env, mask=["stiffness"]) # Domain parameter mapping and prior dp_mapping = {0: "mass", 1: "stiffness", 2: "d"} prior = sbiutils.BoxUniform(low=to.tensor([0.5, 20, 0.2]), high=to.tensor([1.5, 40, 0.8])) # Learn a likelihood from the simulator density_estimator = sbiutils.posterior_nn(model="maf", hidden_features=10, num_transforms=3) snpe = SNPE(prior, density_estimator) simulator, prior = prepare_for_sbi(_simulator, prior) domain_param, data_sim = simulate_for_sbi(simulator=simulator, proposal=prior, num_simulations=50, num_workers=1) snpe.append_simulations(domain_param, data_sim) density_estimator = snpe.train(max_num_epochs=5) posterior = snpe.build_posterior(density_estimator) # Create a fake (random) true domain parameter domain_param_gt = to.tensor( [env_real.domain_param[key] for _, key in dp_mapping.items()]) domain_param_gt += domain_param_gt * to.randn(len(dp_mapping)) / 5 domain_param_gt = domain_param_gt.unsqueeze(0) data_real = simulator(domain_param_gt) # Get a (random) condition condition = Embedding.pack(domain_param_gt.clone()) if layout == "inside": num_rows, num_cols = len(dp_mapping), len(dp_mapping) else: num_rows, num_cols = len(dp_mapping) + 1, len(dp_mapping) + 1 if use_prior: grid_bounds = None else: prior = None grid_bounds = to.cat( [to.zeros((len(dp_mapping), 1)), to.ones((len(dp_mapping), 1))], dim=1) _, axs = plt.subplots(num_rows, num_cols, figsize=(14, 14), tight_layout=True) fig = draw_posterior_pairwise_heatmap( axs, posterior, data_real, dp_mapping, condition, prior=prior, env_real=env_real, marginal_layout=layout, grid_bounds=grid_bounds, grid_res=100, normalize_posterior=False, rescale_posterior=True, labels=None if labels is None else [""] * len(dp_mapping), prob_labels=prob_labels, ) assert fig is not None
def test_pair_plot_scatter( env: SimEnv, policy: Policy, layout: str, labels: Optional[str], legend_labels: Optional[str], axis_limits: Optional[str], use_kde: bool, use_trafo: bool, ): def _simulator(dp: to.Tensor) -> to.Tensor: """The most simple interface of a simulation to sbi, using `env` and `policy` from outer scope""" ro = rollout( env, policy, eval=True, reset_kwargs=dict(domain_param=dict(m=dp[0], k=dp[1], d=dp[2]))) observation_sim = to.from_numpy( ro.observations[-1]).to(dtype=to.float32) return to.atleast_2d(observation_sim) # Fix the init state env.init_space = SingularStateSpace(env.init_space.sample_uniform()) env_real = deepcopy(env) env_real.domain_param = {"mass": 0.8, "stiffness": 15, "d": 0.7} # Optionally transformed domain parameters for inference if use_trafo: env = LogDomainParamTransform(env, mask=["stiffness"]) # Domain parameter mapping and prior dp_mapping = {0: "mass", 1: "stiffness", 2: "d"} k_low = np.log(10) if use_trafo else 10 k_up = np.log(20) if use_trafo else 20 prior = sbiutils.BoxUniform(low=to.tensor([0.5, k_low, 0.2]), high=to.tensor([1.5, k_up, 0.8])) # Learn a likelihood from the simulator density_estimator = sbiutils.posterior_nn(model="maf", hidden_features=10, num_transforms=3) snpe = SNPE(prior, density_estimator) simulator, prior = prepare_for_sbi(_simulator, prior) domain_param, data_sim = simulate_for_sbi(simulator=simulator, proposal=prior, num_simulations=50, num_workers=1) snpe.append_simulations(domain_param, data_sim) density_estimator = snpe.train(max_num_epochs=5) posterior = snpe.build_posterior(density_estimator) # Create a fake (random) true domain parameter domain_param_gt = to.tensor([ env_real.domain_param[dp_mapping[key]] for key in sorted(dp_mapping.keys()) ]) domain_param_gt += domain_param_gt * to.randn(len(dp_mapping)) / 10 domain_param_gt = domain_param_gt.unsqueeze(0) data_real = simulator(domain_param_gt) domain_params, log_probs = SBIBase.eval_posterior( posterior, data_real, num_samples=6, normalize_posterior=False, subrtn_sbi_sampling_hparam=dict(sample_with_mcmc=False), ) dp_samples = [ domain_params.reshape(1, -1, domain_params.shape[-1]).squeeze() ] if layout == "inside": num_rows, num_cols = len(dp_mapping), len(dp_mapping) else: num_rows, num_cols = len(dp_mapping) + 1, len(dp_mapping) + 1 _, axs = plt.subplots(num_rows, num_cols, figsize=(8, 8), tight_layout=True) fig = draw_posterior_pairwise_scatter( axs=axs, dp_samples=dp_samples, dp_mapping=dp_mapping, prior=prior if axis_limits == "use_prior" else None, env_sim=env, env_real=env_real, axis_limits=axis_limits, marginal_layout=layout, labels=labels, legend_labels=legend_labels, use_kde=use_kde, ) assert fig is not None
def act_space(self): return SingularStateSpace(np.zeros(1))
def __init__( self, num_dof: int, frame_skip: int = 4, dt: Optional[float] = None, max_steps: int = pyrado.inf, fixed_init_state: bool = True, stop_on_collision: bool = True, observe_ball: bool = False, observe_cup: bool = False, task_args: Optional[dict] = None, ): """ Constructor :param num_dof: number of degrees of freedom (4 or 7), depending on which Barrett WAM setup being used :param frame_skip: number of simulation frames for which the same action is held, results in a multiplier of the time step size `dt` :param dt: by default the time step size is the one from the mujoco config file multiplied by the number of frame skips (legacy from OpenAI environments). By passing an explicit `dt` value, this can be overwritten. Possible use case if if you know that you recorded a trajectory with a specific `dt`. :param max_steps: max number of simulation time steps :param fixed_init_state: enables/disables deterministic, fixed initial state :param stop_on_collision: set the `failed` flag in the dict returned by `_mujoco_step()` to true, if the ball collides with something else than the desired parts of the cup. This causes the episode to end. Keep in mind that in case of a negative step reward and no final cost on failing, this might result in undesired behavior. :param observe_ball: if `True`, include the 2-dim (x-z plane) cartesian ball position into the observation :param observe_cup: if `True`, include the 2-dim (x-z plane) cartesian cup position into the observation :param task_args: arguments for the task construction """ Serializable._init(self, locals()) self.fixed_init_state = fixed_init_state self.observe_ball = observe_ball self.observe_cup = observe_cup # Initialize num DoF specific variables if num_dof == 4: graph_file_name = "wam_4dof_bic.xml" self.qpos_des_init = INIT_QPOS_DES_4DOF self.p_gains = WAM_PGAINS_4DOF self.d_gains = WAM_DGAINS_4DOF init_ball_pos = np.array([0.723, 0.0, 1.168]) init_cup_goal = GOAL_POS_INIT_SIM_4DOF elif num_dof == 7: graph_file_name = "wam_7dof_bic.xml" self.qpos_des_init = INIT_QPOS_DES_7DOF self.p_gains = WAM_PGAINS_7DOF self.d_gains = WAM_DGAINS_7DOF init_ball_pos = np.array([0.828, 0.0, 1.131]) init_cup_goal = GOAL_POS_INIT_SIM_7DOF else: raise pyrado.ValueErr(given=num_dof, eq_constraint="4 or 7") model_path = osp.join(pyrado.MUJOCO_ASSETS_DIR, graph_file_name) super().__init__(num_dof, model_path, frame_skip, dt, max_steps, task_args) # Actual initial joint position (when the WAM moved to the home position) if num_dof == 4: self.init_qpos[:4] = np.array([0.0, 0.63, 0.0, 1.27]) self.init_qpos[ 4] = -0.34 # angle of the first rope segment relative to the cup bottom plate else: self.init_qpos[:7] = np.array( [0.0, 0.65, 0.0, 1.41, 0.0, -0.28, -1.57]) self.init_qpos[ 7] = -0.21 # angle of the first rope segment relative to the cup bottom plate # Set the actual stable initial position. This position would be reached after some time using the internal # PD controller to stabilize at self._qpos_des_init. # The initial position of the ball in cartesian coordinates self._init_state = np.concatenate( [self.init_qpos, self.init_qvel, init_ball_pos, init_cup_goal]) if self.fixed_init_state: self._init_space = SingularStateSpace(self._init_state) else: # Add plus/minus one degree to each motor joint and the first rope segment joint init_state_up = self._init_state.copy() init_state_up[:self._num_dof] += np.pi / 180 * np.array( [0.1, 1, 0.5, 1.0, 0.1, 1.0, 1.0])[:self._num_dof] init_state_lo = self._init_state.copy() init_state_lo[:self._num_dof] -= np.pi / 180 * np.array( [0.1, 1, 0.5, 1.0, 0.1, 1.0, 1.0])[:self._num_dof] self._init_space = BoxSpace(init_state_lo, init_state_up) # Bodies to check fo collision self._collision_bodies = [ "wam/base_link", "wam/shoulder_yaw_link", "wam/shoulder_pitch_link", "wam/upper_arm_link", "wam/forearm_link", "wrist_palm_link", "wam/wrist_pitch_link", "wam/wrist_yaw_link", ] if self._num_dof == 4: self._collision_bodies = self._collision_bodies[:6] # We access a private attribute since a method like 'model.geom_names[geom_id]' cannot be used because # not every geom has a name self._collision_geom_ids = [ self.model._geom_name2id[name] for name in ["cup_geom1", "cup_geom2"] ] self.stop_on_collision = stop_on_collision self.camera_config.update(dict(distance=2.7))