def create_check_all_boundaries_task(env_spec: EnvSpec, penalty: float) -> FinalRewTask: # Check every limit (nut just of a subspace of the state state as it could happen when using a MaskedTask) return FinalRewTask(DesStateTask(env_spec, np.zeros(env_spec.state_space.shape), ZeroPerStepRewFcn(), never_succeeded), FinalRewMode(always_negative=True), factor=penalty)
def _create_main_task(self, task_args: dict) -> Task: # Create a DesStateTask that masks everything but the ball position idcs = list( range(self.state_space.flat_dim - 6, self.state_space.flat_dim - 3)) # Cartesian ball position spec = EnvSpec( self.spec.obs_space, self.spec.act_space, self.spec.state_space.subspace( self.spec.state_space.create_mask(idcs))) # If we do not use copy(), state_des coming from MuJoCo is a reference and updates automatically at each step. # Note: sim.forward() + get_body_xpos() results in wrong output for state_des, as sim has not been updated to # init_space.sample(), which is first called in reset() if task_args.get('sparse_rew_fcn', False): factor = task_args.get('success_bonus', 1) # Binary final reward task main_task = FinalRewTask(ConditionOnlyTask( spec, condition_fcn=self.check_ball_in_cup, is_success_condition=True), mode=FinalRewMode(always_positive=True), factor=factor) # Yield -1 on fail after the main task ist done (successfully or not) dont_fail_after_succ_task = FinalRewTask( GoallessTask(spec, ZeroPerStepRewFcn()), mode=FinalRewMode(always_negative=True), factor=factor) # Augment the binary task with an endless dummy task, to avoid early stopping task = SequentialTasks((main_task, dont_fail_after_succ_task)) return MaskedTask(self.spec, task, idcs) else: state_des = self.sim.data.get_site_xpos( 'cup_goal') # this is a reference R_default = np.diag([ 0, 0, 1, 1e-2, 1e-2, 1e-1 ]) if self.num_dof == 7 else np.diag([0, 0, 1e-2, 1e-2]) rew_fcn = ExpQuadrErrRewFcn( Q=task_args.get('Q', np.diag([ 2e1, 1e-4, 2e1 ])), # distance ball - cup; shouldn't move in y-direction R=task_args.get('R', R_default) # last joint is really unreliable ) task = DesStateTask(spec, state_des, rew_fcn) # Wrap the masked DesStateTask to add a bonus for the best state in the rollout return BestStateFinalRewTask(MaskedTask(self.spec, task, idcs), max_steps=self.max_steps, factor=task_args.get( 'final_factor', 0.05))
def create_check_all_boundaries_task(env_spec: EnvSpec, penalty: float) -> FinalRewTask: """ Create a task that is checking if any of the state space bounds is violated. This checks every limit and not just of a subspace of the state state as it could happen when using a `MaskedTask`. .. note:: This task was designed with an RcsPySim environment in mind, but is not restricted to these environments. :param env_spec: environment specification :param penalty: scalar cost (positive values) for violating the bounds :return: masked task that only considers a subspace of all observations """ return FinalRewTask( DesStateTask(env_spec, np.zeros(env_spec.state_space.shape), ZeroPerStepRewFcn(), never_succeeded), FinalRewMode(always_negative=True), factor=penalty, )
def _create_task(self, task_args: dict) -> Task: # Dummy task return GoallessTask(self.spec, ZeroPerStepRewFcn())
def _create_task(self, task_args: dict) -> Task: # The wrapped task acts as a dummy and carries the FinalRewTask return FinalRewTask(GoallessTask(self.spec, ZeroPerStepRewFcn()), mode=FinalRewMode(user_input=True))
def rew_fcn(self) -> RewFcn: # To expose that this task yields zero reward per step return ZeroPerStepRewFcn()
def _create_task(self, task_args: dict) -> DesStateTask: # Dummy task return DesStateTask(self.spec, np.zeros(self.state_space.shape), ZeroPerStepRewFcn())
def _create_task(self, task_args: dict = None) -> Task: state_des = np.concatenate( [self.init_qpos.copy(), self.init_qvel.copy()]) return DesStateTask(self.spec, state_des, ZeroPerStepRewFcn())
def _create_task(self, task_args: Optional[dict] = None) -> Task: state_des = np.concatenate([self.init_qpos, self.init_qvel]) return DesStateTask(self.spec, state_des, ZeroPerStepRewFcn())