Exemple #1
0
def create_box_flip_task(env_spec: EnvSpec, continuous_rew_fcn):
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = ['Box_A']

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    # Create a desired state task
    # state_des = np.array([0.3])  # box position is measured relative to the table
    state_des = np.array([-np.pi / 2
                          ])  # box position is measured world coordinates
    if continuous_rew_fcn:
        q = np.array([0. / np.pi])
        r = 1e-6 * np.ones(spec.act_space.flat_dim)
        rew_fcn_act = AbsErrRewFcn(q, r)
        rew_fcn_box = CosOfOneEleRewFcn(idx=0)
        rew_fcn = CompoundRewFcn([rew_fcn_act, rew_fcn_box])
    else:
        rew_fcn = MinusOnePerStepRewFcn()
    ef_task = EndlessFlippingTask(spec, rew_fcn, init_angle=0.)

    # Return the masked tasks
    return MaskedTask(env_spec, ef_task, idcs)
Exemple #2
0
def create_collision_task(env_spec: EnvSpec, factor: float) -> MaskedTask:
    """
    Create a task which punishes collision costs given a collision model with pairs of bodies.
    This task only looks at the instantaneous collision cost.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param factor: cost / reward function scaling factor
    :return: masked task that only considers a subspace of all observations
    """
    if not factor >= 0:
        raise pyrado.ValueErr(given=factor, ge_constraint="0")

    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    obs_labels = ["CollCost"]

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(
            env_spec.state_space.create_mask(obs_labels)),
    )

    rew_fcn = AbsErrRewFcn(q=np.array([factor]),
                           r=np.zeros(spec.act_space.shape))

    # Create an endlessly running desired state task (no collision is desired)
    task = DesStateTask(spec, np.zeros(spec.state_space.shape), rew_fcn,
                        never_succeeded)

    # Mask selected collision cost observation
    return MaskedTask(env_spec, task, obs_labels)
Exemple #3
0
def create_collision_task(env_spec: EnvSpec, factor: float) -> MaskedTask:
    """
    Create a task which punishes collision costs given a collision model with pairs of bodies.
    This task only looks at the instantaneous collision cost.

    :param env_spec: environment specification
    :param factor: cost / reward function scaling factor
    :return: masked task
    """
    if not factor >= 0:
        raise pyrado.ValueErr(given=factor, ge_constraint='0')

    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = ['CollCost']

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    rew_fcn = AbsErrRewFcn(q=np.array([factor]),
                           r=np.zeros(spec.act_space.shape))

    # Create a desired state task (no collision is desired and the task never stops because of success)
    dst = DesStateTask(spec, np.zeros(spec.state_space.shape), rew_fcn,
                       never_succeeded)

    # Mask selected collision cost observation
    return MaskedTask(env_spec, dst, idcs)
Exemple #4
0
 def _create_task(self, task_args: dict) -> DesStateTask:
     # Define the task including the reward function
     state_des = task_args.get("state_des",
                               np.zeros(self._state_space.shape))
     return DesStateTask(self.spec,
                         state_des,
                         rew_fcn=AbsErrRewFcn(q=np.array([1.0]),
                                              r=np.array([0.0])))
Exemple #5
0
    def _create_task(self, task_args: dict) -> Task:
        # Create the tasks
        continuous_rew_fcn = task_args.get('continuous_rew_fcn', True)
        task_box = create_box_lift_task(self.spec,
                                        continuous_rew_fcn,
                                        succ_thold=0.03)
        task_check_bounds = create_check_all_boundaries_task(self.spec,
                                                             penalty=1e3)
        task_collision = create_collision_task(self.spec, factor=1.)
        task_ts_discrepancy = create_task_space_discrepancy_task(
            self.spec,
            AbsErrRewFcn(q=0.5 * np.ones(3), r=np.zeros(self.act_space.shape)))

        return ParallelTasks(
            [task_box, task_check_bounds, task_collision, task_ts_discrepancy],
            hold_rew_when_done=False)
Exemple #6
0
    def _create_task(self, task_args: dict) -> Task:
        # Create the tasks
        continuous_rew_fcn = task_args.get('continuous_rew_fcn', True)
        task_box = create_box_flip_task(self.spec, continuous_rew_fcn)
        task_check_bounds = create_check_all_boundaries_task(self.spec,
                                                             penalty=1e3)
        # task_collision = create_collision_task(self.spec, factor=5e-2)
        from pyrado.environments.rcspysim.box_flipping import create_task_space_discrepancy_task
        task_ts_discrepancy = create_task_space_discrepancy_task(
            self.spec,
            AbsErrRewFcn(q=5e-2 * np.ones(2),
                         r=np.zeros(self.act_space.shape)))

        return ParallelTasks(
            [
                task_box,
                task_check_bounds,
                # task_collision,
                task_ts_discrepancy
            ],
            hold_rew_when_done=False)
Exemple #7
0
    def _create_task(self, task_args: dict) -> Task:
        # Create the tasks
        des_state1 = self.get_body_position(
            'Goal', '', '')[:2]  # x and y coordinates in world frame
        task_box = create_extract_ball_task(self.spec, task_args, des_state1)
        des_state2 = np.array(
            self.get_body_position('Slider', '', '')[1] -
            0.5)  # y coordinate in world frame
        task_slider = create_extract_slider_task(self.spec, task_args,
                                                 des_state2)
        task_check_bounds = create_check_all_boundaries_task(self.spec,
                                                             penalty=1e3)
        task_collision = create_collision_task(self.spec, factor=5e-2)
        task_ts_discrepancy = create_task_space_discrepancy_task(
            self.spec,
            AbsErrRewFcn(q=0.5 * np.ones(6),
                         r=np.zeros(self.act_space.flat_dim)))

        return ParallelTasks([
            task_box, task_slider, task_check_bounds, task_collision,
            task_ts_discrepancy
        ],
                             hold_rew_when_done=False)
Exemple #8
0
def create_mini_golf_task(env_spec: EnvSpec, hole_pos: np.ndarray,
                          succ_thold: float):
    """
    Create a task for putting the ball into a whole.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param hole_pos: planar x and yy  position of the goal's center
    :param succ_thold: once the object of interest is closer than this threshold, the task is considered successfully
    :return: masked task that only considers a subspace of all observations
    """
    if not hole_pos.size == 2:
        raise pyrado.ShapeErr(given=hole_pos, expected_match=(2, ))

    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = ["Ball_X", "Ball_Y"]

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    # Create a desired state task
    dst = DesStateTask(
        spec,
        state_des=hole_pos,
        rew_fcn=AbsErrRewFcn(q=np.ones(2),
                             r=1e-4 * np.ones(spec.act_space.shape)),
        success_fcn=functools.partial(proximity_succeeded,
                                      thold_dist=succ_thold),
    )
    frt = FinalRewTask(dst, FinalRewMode(always_positive=True))

    # Return the masked tasks
    return MaskedTask(env_spec, frt, idcs)
Exemple #9
0
    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get("state_des", None)
        if state_des is None:
            # Get the goal position in world coordinates
            p = self.get_body_position("Goal", "", "")
            state_des = np.array([p[0], p[2], 0, 0, 0,
                                  0])  # X, Z, B, Xd, Zd, Bd

        # Create the individual subtasks
        task_reach_goal = create_insert_task(
            self.spec,
            state_des,
            rew_fcn=ExpQuadrErrRewFcn(
                Q=np.diag([2e1, 2e1, 1e-1, 1e-2, 1e-2, 1e-2]),
                R=2e-2 * np.eye(self.act_space.flat_dim)),
            success_fcn=partial(proximity_succeeded,
                                thold_dist=0.07,
                                dims=[0, 1, 2]),  # position and angle
        )
        task_ts_discrepancy = create_task_space_discrepancy_task(
            self.spec,
            AbsErrRewFcn(q=0.1 * np.ones(2), r=np.zeros(self.act_space.shape)))
        return ParallelTasks([task_reach_goal, task_ts_discrepancy])
Exemple #10
0
    def _create_task(self, task_args: dict) -> Task:
        # Define the indices for selection. This needs to match the observations' names in RcsPySim.
        if task_args.get("consider_velocities", False):
            idcs = ["Effector_X", "Effector_Z", "Effector_Xd", "Effector_Zd"]
        else:
            idcs = ["Effector_X", "Effector_Z"]

        # Get the masked environment specification
        spec = EnvSpec(
            self.spec.obs_space,
            self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)),
        )

        # Get the goal position in world coordinates for all three sub-goals
        p1 = self.get_body_position("Goal1", "", "")
        p2 = self.get_body_position("Goal2", "", "")
        p3 = self.get_body_position("Goal3", "", "")
        state_des1 = np.array([p1[0], p1[2], 0, 0])
        state_des2 = np.array([p2[0], p2[2], 0, 0])
        state_des3 = np.array([p3[0], p3[2], 0, 0])
        if task_args.get("consider_velocities", False):
            Q = np.diag([5e-1, 5e-1, 5e-3, 5e-3])
        else:
            Q = np.diag([1e0, 1e0])
            state_des1 = state_des1[:2]
            state_des2 = state_des2[:2]
            state_des3 = state_des3[:2]

        success_fcn = partial(proximity_succeeded,
                              thold_dist=7.5e-2,
                              dims=[0, 1])  # min distance = 7cm
        R = np.zeros((spec.act_space.flat_dim, spec.act_space.flat_dim))

        # Create the tasks
        subtask_1 = FinalRewTask(DesStateTask(spec, state_des1,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_2 = FinalRewTask(DesStateTask(spec, state_des2,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_3 = FinalRewTask(DesStateTask(spec, state_des3,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_4 = FinalRewTask(DesStateTask(spec, state_des1,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_5 = FinalRewTask(DesStateTask(spec, state_des2,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_6 = FinalRewTask(DesStateTask(spec, state_des3,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        task = FinalRewTask(
            SequentialTasks(
                [
                    subtask_1, subtask_2, subtask_3, subtask_4, subtask_5,
                    subtask_6
                ],
                hold_rew_when_done=True,
                verbose=True,
            ),
            mode=FinalRewMode(always_positive=True),
            factor=5e3,
        )
        masked_task = MaskedTask(self.spec, task, idcs)

        # Additional tasks
        task_check_bounds = create_check_all_boundaries_task(self.spec,
                                                             penalty=1e3)
        if isinstance(self, Planar3LinkJointCtrlSim):
            # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound
            return ParallelTasks([masked_task, task_check_bounds],
                                 easily_satisfied=True)
        else:
            task_ts_discrepancy = create_task_space_discrepancy_task(
                self.spec,
                AbsErrRewFcn(q=0.5 * np.ones(2),
                             r=np.zeros(self.act_space.shape)))
            # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound
            return ParallelTasks(
                [masked_task, task_check_bounds, task_ts_discrepancy],
                easily_satisfied=True)