Example #1
0
def create_check_all_boundaries_task(env_spec: EnvSpec,
                                     penalty: float) -> FinalRewTask:
    # Check every limit (nut just of a subspace of the state state as it could happen when using a MaskedTask)
    return FinalRewTask(DesStateTask(env_spec,
                                     np.zeros(env_spec.state_space.shape),
                                     ZeroPerStepRewFcn(), never_succeeded),
                        FinalRewMode(always_negative=True),
                        factor=penalty)
Example #2
0
    def _create_main_task(self, task_args: dict) -> Task:
        # Create a DesStateTask that masks everything but the ball position
        idcs = list(
            range(self.state_space.flat_dim - 6,
                  self.state_space.flat_dim - 3))  # Cartesian ball position
        spec = EnvSpec(
            self.spec.obs_space, self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)))

        # If we do not use copy(), state_des coming from MuJoCo is a reference and updates automatically at each step.
        # Note: sim.forward() + get_body_xpos() results in wrong output for state_des, as sim has not been updated to
        # init_space.sample(), which is first called in reset()

        if task_args.get('sparse_rew_fcn', False):
            factor = task_args.get('success_bonus', 1)
            # Binary final reward task
            main_task = FinalRewTask(ConditionOnlyTask(
                spec,
                condition_fcn=self.check_ball_in_cup,
                is_success_condition=True),
                                     mode=FinalRewMode(always_positive=True),
                                     factor=factor)
            # Yield -1 on fail after the main task ist done (successfully or not)
            dont_fail_after_succ_task = FinalRewTask(
                GoallessTask(spec, ZeroPerStepRewFcn()),
                mode=FinalRewMode(always_negative=True),
                factor=factor)

            # Augment the binary task with an endless dummy task, to avoid early stopping
            task = SequentialTasks((main_task, dont_fail_after_succ_task))

            return MaskedTask(self.spec, task, idcs)

        else:
            state_des = self.sim.data.get_site_xpos(
                'cup_goal')  # this is a reference
            R_default = np.diag([
                0, 0, 1, 1e-2, 1e-2, 1e-1
            ]) if self.num_dof == 7 else np.diag([0, 0, 1e-2, 1e-2])
            rew_fcn = ExpQuadrErrRewFcn(
                Q=task_args.get('Q', np.diag([
                    2e1, 1e-4, 2e1
                ])),  # distance ball - cup; shouldn't move in y-direction
                R=task_args.get('R',
                                R_default)  # last joint is really unreliable
            )
            task = DesStateTask(spec, state_des, rew_fcn)

            # Wrap the masked DesStateTask to add a bonus for the best state in the rollout
            return BestStateFinalRewTask(MaskedTask(self.spec, task, idcs),
                                         max_steps=self.max_steps,
                                         factor=task_args.get(
                                             'final_factor', 0.05))
Example #3
0
def create_check_all_boundaries_task(env_spec: EnvSpec,
                                     penalty: float) -> FinalRewTask:
    """
    Create a task that is checking if any of the state space bounds is violated.
    This checks every limit and not just of a subspace of the state state as it could happen when using a `MaskedTask`.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param penalty: scalar cost (positive values) for violating the bounds
    :return: masked task that only considers a subspace of all observations
    """
    return FinalRewTask(
        DesStateTask(env_spec, np.zeros(env_spec.state_space.shape),
                     ZeroPerStepRewFcn(), never_succeeded),
        FinalRewMode(always_negative=True),
        factor=penalty,
    )
Example #4
0
 def _create_task(self, task_args: dict) -> Task:
     # Dummy task
     return GoallessTask(self.spec, ZeroPerStepRewFcn())
Example #5
0
 def _create_task(self, task_args: dict) -> Task:
     # The wrapped task acts as a dummy and carries the FinalRewTask
     return FinalRewTask(GoallessTask(self.spec, ZeroPerStepRewFcn()),
                         mode=FinalRewMode(user_input=True))
Example #6
0
 def rew_fcn(self) -> RewFcn:
     # To expose that this task yields zero reward per step
     return ZeroPerStepRewFcn()
Example #7
0
 def _create_task(self, task_args: dict) -> DesStateTask:
     # Dummy task
     return DesStateTask(self.spec, np.zeros(self.state_space.shape), ZeroPerStepRewFcn())
Example #8
0
 def _create_task(self, task_args: dict = None) -> Task:
     state_des = np.concatenate(
         [self.init_qpos.copy(),
          self.init_qvel.copy()])
     return DesStateTask(self.spec, state_des, ZeroPerStepRewFcn())
Example #9
0
 def _create_task(self, task_args: Optional[dict] = None) -> Task:
     state_des = np.concatenate([self.init_qpos, self.init_qvel])
     return DesStateTask(self.spec, state_des, ZeroPerStepRewFcn())