def create_box_flip_task(env_spec: EnvSpec, continuous_rew_fcn): # Define the indices for selection. This needs to match the observations' names in RcsPySim. idcs = ['Box_A'] # Get the masked environment specification spec = EnvSpec( env_spec.obs_space, env_spec.act_space, env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs))) # Create a desired state task # state_des = np.array([0.3]) # box position is measured relative to the table state_des = np.array([-np.pi / 2 ]) # box position is measured world coordinates if continuous_rew_fcn: q = np.array([0. / np.pi]) r = 1e-6 * np.ones(spec.act_space.flat_dim) rew_fcn_act = AbsErrRewFcn(q, r) rew_fcn_box = CosOfOneEleRewFcn(idx=0) rew_fcn = CompoundRewFcn([rew_fcn_act, rew_fcn_box]) else: rew_fcn = MinusOnePerStepRewFcn() ef_task = EndlessFlippingTask(spec, rew_fcn, init_angle=0.) # Return the masked tasks return MaskedTask(env_spec, ef_task, idcs)
def create_collision_task(env_spec: EnvSpec, factor: float) -> MaskedTask: """ Create a task which punishes collision costs given a collision model with pairs of bodies. This task only looks at the instantaneous collision cost. .. note:: This task was designed with an RcsPySim environment in mind, but is not restricted to these environments. :param env_spec: environment specification :param factor: cost / reward function scaling factor :return: masked task that only considers a subspace of all observations """ if not factor >= 0: raise pyrado.ValueErr(given=factor, ge_constraint="0") # Define the indices for selection. This needs to match the observations' names in RcsPySim. obs_labels = ["CollCost"] # Get the masked environment specification spec = EnvSpec( env_spec.obs_space, env_spec.act_space, env_spec.state_space.subspace( env_spec.state_space.create_mask(obs_labels)), ) rew_fcn = AbsErrRewFcn(q=np.array([factor]), r=np.zeros(spec.act_space.shape)) # Create an endlessly running desired state task (no collision is desired) task = DesStateTask(spec, np.zeros(spec.state_space.shape), rew_fcn, never_succeeded) # Mask selected collision cost observation return MaskedTask(env_spec, task, obs_labels)
def create_collision_task(env_spec: EnvSpec, factor: float) -> MaskedTask: """ Create a task which punishes collision costs given a collision model with pairs of bodies. This task only looks at the instantaneous collision cost. :param env_spec: environment specification :param factor: cost / reward function scaling factor :return: masked task """ if not factor >= 0: raise pyrado.ValueErr(given=factor, ge_constraint='0') # Define the indices for selection. This needs to match the observations' names in RcsPySim. idcs = ['CollCost'] # Get the masked environment specification spec = EnvSpec( env_spec.obs_space, env_spec.act_space, env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs))) rew_fcn = AbsErrRewFcn(q=np.array([factor]), r=np.zeros(spec.act_space.shape)) # Create a desired state task (no collision is desired and the task never stops because of success) dst = DesStateTask(spec, np.zeros(spec.state_space.shape), rew_fcn, never_succeeded) # Mask selected collision cost observation return MaskedTask(env_spec, dst, idcs)
def _create_task(self, task_args: dict) -> DesStateTask: # Define the task including the reward function state_des = task_args.get("state_des", np.zeros(self._state_space.shape)) return DesStateTask(self.spec, state_des, rew_fcn=AbsErrRewFcn(q=np.array([1.0]), r=np.array([0.0])))
def _create_task(self, task_args: dict) -> Task: # Create the tasks continuous_rew_fcn = task_args.get('continuous_rew_fcn', True) task_box = create_box_lift_task(self.spec, continuous_rew_fcn, succ_thold=0.03) task_check_bounds = create_check_all_boundaries_task(self.spec, penalty=1e3) task_collision = create_collision_task(self.spec, factor=1.) task_ts_discrepancy = create_task_space_discrepancy_task( self.spec, AbsErrRewFcn(q=0.5 * np.ones(3), r=np.zeros(self.act_space.shape))) return ParallelTasks( [task_box, task_check_bounds, task_collision, task_ts_discrepancy], hold_rew_when_done=False)
def _create_task(self, task_args: dict) -> Task: # Create the tasks continuous_rew_fcn = task_args.get('continuous_rew_fcn', True) task_box = create_box_flip_task(self.spec, continuous_rew_fcn) task_check_bounds = create_check_all_boundaries_task(self.spec, penalty=1e3) # task_collision = create_collision_task(self.spec, factor=5e-2) from pyrado.environments.rcspysim.box_flipping import create_task_space_discrepancy_task task_ts_discrepancy = create_task_space_discrepancy_task( self.spec, AbsErrRewFcn(q=5e-2 * np.ones(2), r=np.zeros(self.act_space.shape))) return ParallelTasks( [ task_box, task_check_bounds, # task_collision, task_ts_discrepancy ], hold_rew_when_done=False)
def _create_task(self, task_args: dict) -> Task: # Create the tasks des_state1 = self.get_body_position( 'Goal', '', '')[:2] # x and y coordinates in world frame task_box = create_extract_ball_task(self.spec, task_args, des_state1) des_state2 = np.array( self.get_body_position('Slider', '', '')[1] - 0.5) # y coordinate in world frame task_slider = create_extract_slider_task(self.spec, task_args, des_state2) task_check_bounds = create_check_all_boundaries_task(self.spec, penalty=1e3) task_collision = create_collision_task(self.spec, factor=5e-2) task_ts_discrepancy = create_task_space_discrepancy_task( self.spec, AbsErrRewFcn(q=0.5 * np.ones(6), r=np.zeros(self.act_space.flat_dim))) return ParallelTasks([ task_box, task_slider, task_check_bounds, task_collision, task_ts_discrepancy ], hold_rew_when_done=False)
def create_mini_golf_task(env_spec: EnvSpec, hole_pos: np.ndarray, succ_thold: float): """ Create a task for putting the ball into a whole. .. note:: This task was designed with an RcsPySim environment in mind, but is not restricted to these environments. :param env_spec: environment specification :param hole_pos: planar x and yy position of the goal's center :param succ_thold: once the object of interest is closer than this threshold, the task is considered successfully :return: masked task that only considers a subspace of all observations """ if not hole_pos.size == 2: raise pyrado.ShapeErr(given=hole_pos, expected_match=(2, )) # Define the indices for selection. This needs to match the observations' names in RcsPySim. idcs = ["Ball_X", "Ball_Y"] # Get the masked environment specification spec = EnvSpec( env_spec.obs_space, env_spec.act_space, env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs))) # Create a desired state task dst = DesStateTask( spec, state_des=hole_pos, rew_fcn=AbsErrRewFcn(q=np.ones(2), r=1e-4 * np.ones(spec.act_space.shape)), success_fcn=functools.partial(proximity_succeeded, thold_dist=succ_thold), ) frt = FinalRewTask(dst, FinalRewMode(always_positive=True)) # Return the masked tasks return MaskedTask(env_spec, frt, idcs)
def _create_task(self, task_args: dict) -> Task: # Define the task including the reward function state_des = task_args.get("state_des", None) if state_des is None: # Get the goal position in world coordinates p = self.get_body_position("Goal", "", "") state_des = np.array([p[0], p[2], 0, 0, 0, 0]) # X, Z, B, Xd, Zd, Bd # Create the individual subtasks task_reach_goal = create_insert_task( self.spec, state_des, rew_fcn=ExpQuadrErrRewFcn( Q=np.diag([2e1, 2e1, 1e-1, 1e-2, 1e-2, 1e-2]), R=2e-2 * np.eye(self.act_space.flat_dim)), success_fcn=partial(proximity_succeeded, thold_dist=0.07, dims=[0, 1, 2]), # position and angle ) task_ts_discrepancy = create_task_space_discrepancy_task( self.spec, AbsErrRewFcn(q=0.1 * np.ones(2), r=np.zeros(self.act_space.shape))) return ParallelTasks([task_reach_goal, task_ts_discrepancy])
def _create_task(self, task_args: dict) -> Task: # Define the indices for selection. This needs to match the observations' names in RcsPySim. if task_args.get("consider_velocities", False): idcs = ["Effector_X", "Effector_Z", "Effector_Xd", "Effector_Zd"] else: idcs = ["Effector_X", "Effector_Z"] # Get the masked environment specification spec = EnvSpec( self.spec.obs_space, self.spec.act_space, self.spec.state_space.subspace( self.spec.state_space.create_mask(idcs)), ) # Get the goal position in world coordinates for all three sub-goals p1 = self.get_body_position("Goal1", "", "") p2 = self.get_body_position("Goal2", "", "") p3 = self.get_body_position("Goal3", "", "") state_des1 = np.array([p1[0], p1[2], 0, 0]) state_des2 = np.array([p2[0], p2[2], 0, 0]) state_des3 = np.array([p3[0], p3[2], 0, 0]) if task_args.get("consider_velocities", False): Q = np.diag([5e-1, 5e-1, 5e-3, 5e-3]) else: Q = np.diag([1e0, 1e0]) state_des1 = state_des1[:2] state_des2 = state_des2[:2] state_des3 = state_des3[:2] success_fcn = partial(proximity_succeeded, thold_dist=7.5e-2, dims=[0, 1]) # min distance = 7cm R = np.zeros((spec.act_space.flat_dim, spec.act_space.flat_dim)) # Create the tasks subtask_1 = FinalRewTask(DesStateTask(spec, state_des1, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_2 = FinalRewTask(DesStateTask(spec, state_des2, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_3 = FinalRewTask(DesStateTask(spec, state_des3, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_4 = FinalRewTask(DesStateTask(spec, state_des1, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_5 = FinalRewTask(DesStateTask(spec, state_des2, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) subtask_6 = FinalRewTask(DesStateTask(spec, state_des3, ExpQuadrErrRewFcn(Q, R), success_fcn), mode=FinalRewMode(time_dependent=True)) task = FinalRewTask( SequentialTasks( [ subtask_1, subtask_2, subtask_3, subtask_4, subtask_5, subtask_6 ], hold_rew_when_done=True, verbose=True, ), mode=FinalRewMode(always_positive=True), factor=5e3, ) masked_task = MaskedTask(self.spec, task, idcs) # Additional tasks task_check_bounds = create_check_all_boundaries_task(self.spec, penalty=1e3) if isinstance(self, Planar3LinkJointCtrlSim): # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound return ParallelTasks([masked_task, task_check_bounds], easily_satisfied=True) else: task_ts_discrepancy = create_task_space_discrepancy_task( self.spec, AbsErrRewFcn(q=0.5 * np.ones(2), r=np.zeros(self.act_space.shape))) # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound return ParallelTasks( [masked_task, task_check_bounds, task_ts_discrepancy], easily_satisfied=True)