Esempio n. 1
0
def test_composite_task_structure(envspec_432, task_type):
    state_des1 = np.zeros(3)
    state_des2 = -.5*np.ones(3)
    state_des3 = +.5*np.ones(3)
    rew_fcn = MinusOnePerStepRewFcn()
    t1 = FinalRewTask(DesStateTask(envspec_432, state_des1, rew_fcn), mode=FinalRewMode(always_positive=True),
                      factor=10)
    t2 = FinalRewTask(DesStateTask(envspec_432, state_des2, rew_fcn), mode=FinalRewMode(always_positive=True),
                      factor=10)
    t3 = FinalRewTask(DesStateTask(envspec_432, state_des3, rew_fcn), mode=FinalRewMode(always_positive=True),
                      factor=10)

    if task_type == 'ParallelTasks':
        ct = ParallelTasks([t1, t2, t3])
    elif task_type == 'SequentialTasks':
        ct = SequentialTasks([t1, t2, t3])
    else:
        raise NotImplementedError
    ct.reset(env_spec=envspec_432)

    assert len(ct) == 3
    assert ct.env_spec.obs_space == envspec_432.obs_space
    assert ct.env_spec.act_space == envspec_432.act_space
    assert ct.env_spec.state_space == envspec_432.state_space
    assert isinstance(ct.tasks[0].rew_fcn, RewFcn)
    assert isinstance(ct.tasks[1].rew_fcn, RewFcn)
    assert isinstance(ct.tasks[2].rew_fcn, RewFcn)

    if type == 'ParallelTasks':
        assert np.all(ct.state_des[0] == state_des1)
        assert np.all(ct.state_des[1] == state_des2)
        assert np.all(ct.state_des[2] == state_des3)
    elif type == 'SequentialTasks':
        assert np.all(ct.state_des == state_des1)
Esempio n. 2
0
def create_home_pos_task(env_spec: EnvSpec, obs_labels: Sequence[str],
                         state_des: np.ndarray) -> MaskedTask:
    """
    Create a task for moving the robot to safe position.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param obs_labels: labels for selection, e.g. ['PowerGrasp_R_Y', 'PowerGrasp_R_Z']. This needs to match the
                       observations' names in RcsPySim
    :param state_des: desired state (depends of the coordinate system). If reached, the task is over.
    :return: masked task that only considers a subspace of all observations
    """
    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(
            env_spec.state_space.create_mask(obs_labels)),
    )

    # Create a desired state task
    Q = 1e1 * np.eye(len(state_des))
    R = 1e-1 * np.eye(spec.act_space.flat_dim)
    rew_fcn = ExpQuadrErrRewFcn(Q, R)
    task = DesStateTask(spec, state_des, rew_fcn)

    # Return the masked tasks
    return MaskedTask(env_spec, task, obs_labels)
Esempio n. 3
0
def create_forcemin_task(env_spec: EnvSpec, obs_labels: Sequence[str],
                         Q: np.ndarray) -> MaskedTask:
    """
    Create a task which punishes the amount of used force.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param obs_labels: labels for selection, e.g. ['WristLoadCellLBR_R_Fy']. This needs to match the observations'
                       names in RcsPySim
    :param Q: weight matrix of dim NxN with N=num_forces for the quadratic force costs
    :return: masked task that only considers a subspace of all observations
    """
    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(
            env_spec.state_space.create_mask(obs_labels)),
    )

    # Create an endlessly running desired state task
    rew_fcn = QuadrErrRewFcn(Q=Q,
                             R=np.zeros((spec.act_space.flat_dim,
                                         spec.act_space.flat_dim)))
    task = DesStateTask(spec, np.zeros(spec.state_space.shape), rew_fcn,
                        never_succeeded)

    # Mask selected collision cost observation
    return MaskedTask(env_spec, task, obs_labels)
Esempio n. 4
0
def create_goal_dist_task(env_spec: EnvSpec,
                          ds_label: int,
                          rew_fcn: RewFcn,
                          succ_thold: float = 0.01) -> MaskedTask:
    """
    Create a task that rewards minimizing the `GoalDistance` of dynamical system movements primitives (see RcsPySim).

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param ds_label: label of the dynamical system (see RcsPySim)
    :param rew_fcn: reward function
    :param succ_thold: once the object of interest is closer than this threshold, the task is considered successfully
    :return: masked task that only considers a subspace of all observations
    """
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    obs_labels = [f"GD_DS{ds_label}"]

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(
            env_spec.state_space.create_mask(obs_labels))
        if env_spec.state_space is not EmptySpace else EmptySpace,
    )

    # Create a desired state task with the goal [0, 0]
    task = DesStateTask(
        spec, np.zeros(2), rew_fcn,
        functools.partial(proximity_succeeded, thold_dist=succ_thold))

    # Mask selected goal distance
    return MaskedTask(env_spec, task, obs_labels)
Esempio n. 5
0
def create_collision_task(env_spec: EnvSpec, factor: float) -> MaskedTask:
    """
    Create a task which punishes collision costs given a collision model with pairs of bodies.
    This task only looks at the instantaneous collision cost.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param factor: cost / reward function scaling factor
    :return: masked task that only considers a subspace of all observations
    """
    if not factor >= 0:
        raise pyrado.ValueErr(given=factor, ge_constraint="0")

    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    obs_labels = ["CollCost"]

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(
            env_spec.state_space.create_mask(obs_labels)),
    )

    rew_fcn = AbsErrRewFcn(q=np.array([factor]),
                           r=np.zeros(spec.act_space.shape))

    # Create an endlessly running desired state task (no collision is desired)
    task = DesStateTask(spec, np.zeros(spec.state_space.shape), rew_fcn,
                        never_succeeded)

    # Mask selected collision cost observation
    return MaskedTask(env_spec, task, obs_labels)
Esempio n. 6
0
def create_box_lift_task(env_spec: EnvSpec, continuous_rew_fcn: bool,
                         succ_thold: float):
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = ['Box_Z']

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    # Create a desired state task
    # state_des = np.array([0.3])  # box position is measured relative to the table
    state_des = np.array([1.1])  # box position is measured world coordinates
    if continuous_rew_fcn:
        Q = np.diag([3e1])
        R = 1e0 * np.eye(spec.act_space.flat_dim)
        rew_fcn = ExpQuadrErrRewFcn(Q, R)
    else:
        rew_fcn = MinusOnePerStepRewFcn()
    dst = DesStateTask(
        spec, state_des, rew_fcn,
        functools.partial(proximity_succeeded, thold_dist=succ_thold))

    # Return the masked tasks
    return MaskedTask(env_spec, dst, idcs)
Esempio n. 7
0
def create_task_space_discrepancy_task(env_spec: EnvSpec,
                                       rew_fcn: RewFcn) -> MaskedTask:
    """
    Create a task which punishes the discrepancy between the actual and the commanded state of the observed body.
    The observed body is specified in in the associated experiment configuration file in RcsPySim.
    This task only looks at the X and Z coordinates.

    :param env_spec: environment specification
    :param rew_fcn: reward function
    :return: masked task
    """
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = [idx for idx in env_spec.state_space.labels if 'DiscrepTS' in idx]
    if not idcs:
        raise pyrado.ValueErr(
            msg="No state space labels found that contain 'DiscrepTS'")

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    # Create a desired state task (no task space discrepancy is desired and the task never stops because of success)
    dst = DesStateTask(spec, np.zeros(spec.state_space.shape), rew_fcn,
                       never_succeeded)

    # Mask selected discrepancy observation
    return MaskedTask(env_spec, dst, idcs)
Esempio n. 8
0
def create_collision_task(env_spec: EnvSpec, factor: float) -> MaskedTask:
    """
    Create a task which punishes collision costs given a collision model with pairs of bodies.
    This task only looks at the instantaneous collision cost.

    :param env_spec: environment specification
    :param factor: cost / reward function scaling factor
    :return: masked task
    """
    if not factor >= 0:
        raise pyrado.ValueErr(given=factor, ge_constraint='0')

    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = ['CollCost']

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    rew_fcn = AbsErrRewFcn(q=np.array([factor]),
                           r=np.zeros(spec.act_space.shape))

    # Create a desired state task (no collision is desired and the task never stops because of success)
    dst = DesStateTask(spec, np.zeros(spec.state_space.shape), rew_fcn,
                       never_succeeded)

    # Mask selected collision cost observation
    return MaskedTask(env_spec, dst, idcs)
Esempio n. 9
0
 def _create_task(self, task_args: dict) -> Task:
     # Define the task including the reward function
     state_des = task_args.get('state_des', None)
     if state_des is None:
         state_des = np.zeros(self.obs_space.flat_dim)
     Q = np.diag([
         1e-0,
         1e-0,
         1e-0,
         1e-0,
         1e-0,
         1e+3,
         1e+3,
         1e+3,  # Px, Py, Pz, Pa, Pb, Bx, By, Bz,
         1e-2,
         1e-2,
         1e-2,
         1e-2,
         1e-2,
         1e-0,
         1e-0,
         1e-0
     ])  # Pxd, Pyd, Pzd, Pad, Pbd, Bxd, Byd, Bzd
     R = np.diag([1e-2, 1e-2, 1e-2, 1e-3,
                  1e-3])  # Pxdd, Pydd, Pzdd, Padd, Pbdd
     return DesStateTask(
         self.spec, state_des,
         ScaledExpQuadrErrRewFcn(Q,
                                 R,
                                 self.state_space,
                                 self.act_space,
                                 min_rew=1e-4))
Esempio n. 10
0
def create_box_upper_shelve_task(env_spec: EnvSpec, continuous_rew_fcn: bool,
                                 succ_thold: float):
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = ['Box_X', 'Box_Y', 'Box_Z', 'Box_A', 'Box_B', 'Box_C']

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    # Create a desired state task
    state_des = np.zeros(
        6)  # zeros since we observe the box position relative to the goal
    if continuous_rew_fcn:
        Q = np.diag([5e0, 5e0, 5e0, 1e-1, 1e-1, 1e-1])
        R = 5e-2 * np.eye(spec.act_space.flat_dim)
        rew_fcn = ExpQuadrErrRewFcn(Q, R)
    else:
        rew_fcn = MinusOnePerStepRewFcn
    dst = DesStateTask(
        spec, state_des, rew_fcn,
        functools.partial(proximity_succeeded, thold_dist=succ_thold))

    # Return the masked tasks
    return MaskedTask(env_spec, dst, idcs)
Esempio n. 11
0
 def _create_task(self, task_args: dict) -> DesStateTask:
     # Define the task including the reward function
     state_des = task_args.get("state_des",
                               np.zeros(self._state_space.shape))
     return DesStateTask(self.spec,
                         state_des,
                         rew_fcn=AbsErrRewFcn(q=np.array([1.0]),
                                              r=np.array([0.0])))
Esempio n. 12
0
def create_check_all_boundaries_task(env_spec: EnvSpec,
                                     penalty: float) -> FinalRewTask:
    # Check every limit (nut just of a subspace of the state state as it could happen when using a MaskedTask)
    return FinalRewTask(DesStateTask(env_spec,
                                     np.zeros(env_spec.state_space.shape),
                                     ZeroPerStepRewFcn(), never_succeeded),
                        FinalRewMode(always_negative=True),
                        factor=penalty)
Esempio n. 13
0
    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get('state_des', np.zeros(4))
        Q = task_args.get('Q', np.diag([1e5, 1e3, 1e3, 1e2]))
        R = task_args.get('R', np.eye(1))

        return DesStateTask(
            self.spec, state_des, ScaledExpQuadrErrRewFcn(Q, R, self.state_space, self.act_space, min_rew=1e-4)
        )
    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get('state_des', np.zeros(2))
        Q = task_args.get('Q', np.diag([1e1, 1e-2]))
        R = task_args.get('R', np.diag([1e-6]))

        return FinalRewTask(
            DesStateTask(self.spec, state_des, QuadrErrRewFcn(Q, R)), factor=1e3,
            mode=FinalRewMode(always_negative=True)
        )
Esempio n. 15
0
    def _create_main_task(self, task_args: dict) -> Task:
        # Create a DesStateTask that masks everything but the ball position
        idcs = list(
            range(self.state_space.flat_dim - 6,
                  self.state_space.flat_dim - 3))  # Cartesian ball position
        spec = EnvSpec(
            self.spec.obs_space, self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)))

        # If we do not use copy(), state_des coming from MuJoCo is a reference and updates automatically at each step.
        # Note: sim.forward() + get_body_xpos() results in wrong output for state_des, as sim has not been updated to
        # init_space.sample(), which is first called in reset()

        if task_args.get('sparse_rew_fcn', False):
            factor = task_args.get('success_bonus', 1)
            # Binary final reward task
            main_task = FinalRewTask(ConditionOnlyTask(
                spec,
                condition_fcn=self.check_ball_in_cup,
                is_success_condition=True),
                                     mode=FinalRewMode(always_positive=True),
                                     factor=factor)
            # Yield -1 on fail after the main task ist done (successfully or not)
            dont_fail_after_succ_task = FinalRewTask(
                GoallessTask(spec, ZeroPerStepRewFcn()),
                mode=FinalRewMode(always_negative=True),
                factor=factor)

            # Augment the binary task with an endless dummy task, to avoid early stopping
            task = SequentialTasks((main_task, dont_fail_after_succ_task))

            return MaskedTask(self.spec, task, idcs)

        else:
            state_des = self.sim.data.get_site_xpos(
                'cup_goal')  # this is a reference
            R_default = np.diag([
                0, 0, 1, 1e-2, 1e-2, 1e-1
            ]) if self.num_dof == 7 else np.diag([0, 0, 1e-2, 1e-2])
            rew_fcn = ExpQuadrErrRewFcn(
                Q=task_args.get('Q', np.diag([
                    2e1, 1e-4, 2e1
                ])),  # distance ball - cup; shouldn't move in y-direction
                R=task_args.get('R',
                                R_default)  # last joint is really unreliable
            )
            task = DesStateTask(spec, state_des, rew_fcn)

            # Wrap the masked DesStateTask to add a bonus for the best state in the rollout
            return BestStateFinalRewTask(MaskedTask(self.spec, task, idcs),
                                         max_steps=self.max_steps,
                                         factor=task_args.get(
                                             'final_factor', 0.05))
Esempio n. 16
0
def test_tracking_task(envspec_432, rew_fcn):
    # Create env spec and sub-tasks (state_space is necessary for the has_failed function)
    num_steps = 5
    state_init = envspec_432.state_space.bound_lo.copy()
    state_des = envspec_432.state_space.bound_lo.copy()
    task = DesStateTask(envspec_432, state_des, rew_fcn)
    task.reset(env_spec=envspec_432)

    # Create artificial dynamics by hard-coding a sequence of states
    fixed_traj = np.linspace(-.5, +.5, num_steps, endpoint=True)
    r = [-pyrado.inf]*num_steps

    for i in range(num_steps):
        # Advance the desired state, but keep the system state
        old_state_des_task = task.state_des.copy()
        state_des[:] = fixed_traj[i]*np.ones(3)
        r[i] = task.step_rew(state_init, act=np.zeros(2), remaining_steps=num_steps - (i + 1))

        if i > 0:
            assert all(task.state_des >= old_state_des_task)  # desired state is moving away
            assert r[i] <= r[i - 1]  # reward goes down
Esempio n. 17
0
    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get('state_des', np.zeros(8))
        Q = task_args.get(
            'Q', np.diag([1e0, 1e0, 5e3, 5e3, 1e-2, 1e-2, 5e-1, 5e-1]))
        R = task_args.get('R', np.diag([1e-2, 1e-2]))
        # Q = np.diag([1e2, 1e2, 5e2, 5e2, 1e-2, 1e-2, 1e+1, 1e+1])  # for LQR
        # R = np.diag([1e-2, 1e-2])  # for LQR

        return DesStateTask(
            self.spec, state_des,
            ScaledExpQuadrErrRewFcn(Q,
                                    R,
                                    self.state_space,
                                    self.act_space,
                                    min_rew=1e-4))
Esempio n. 18
0
def create_task_space_discrepancy_task(env_spec: EnvSpec, rew_fcn: RewFcn) -> MaskedTask:
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = ['DiscrepTS_Y', 'DiscrepTS_Z']

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs))
    )

    # Create a desired state task (no task space discrepancy is desired and the task never stops because of success)
    dst = DesStateTask(spec, np.zeros(spec.state_space.shape), rew_fcn, never_succeeded)

    # Mask selected discrepancy observation
    return MaskedTask(env_spec, dst, idcs)
Esempio n. 19
0
    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get("state_des",
                                  np.zeros(self.obs_space.flat_dim))

        Q = np.diag([1e-1, 1e-1, 1e1, 1e1, 0, 1e-3, 1e-3, 1e-2, 1e-2,
                     0])  # Pa, Pb, Bx, By, Bz, Pad, Pbd, Bxd, Byd, Bzd
        R = np.diag([1e-3, 1e-3])  # Padd, Pbdd

        return DesStateTask(
            self.spec, state_des,
            ScaledExpQuadrErrRewFcn(Q,
                                    R,
                                    self.state_space,
                                    self.act_space,
                                    min_rew=1e-4))
Esempio n. 20
0
    def _create_task(self, task_args: dict) -> Task:
        # Create a DesStateTask that masks everything but the ball position
        idcs = list(
            range(self.state_space.flat_dim - 3,
                  self.state_space.flat_dim))  # Cartesian ball position
        spec = EnvSpec(
            self.spec.obs_space, self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)))

        # If we do not use copy(), state_des coming from MuJoCo is a reference and updates automatically at each step.
        # Note: sim.forward() + get_body_xpos() results in wrong output for state_des, as sim has not been updated to
        # init_space.sample(), which is first called in reset()

        if task_args.get('sparse_rew_fcn', False):
            # Binary final reward task
            task = FinalRewTask(ConditionOnlyTask(
                spec,
                condition_fcn=self.check_ball_in_cup,
                is_success_condition=True),
                                mode=FinalRewMode(always_positive=True),
                                factor=1)

            return MaskedTask(self.spec, task, idcs)

        else:
            # If we do not use copy(), state_des is a reference to passed body and updates automatically at each step
            state_des = self.sim.data.get_site_xpos(
                'cup_goal')  # this is a reference
            rew_fcn = ExpQuadrErrRewFcn(
                Q=task_args.get('Q', np.diag([
                    1e1, 1e5, 2e1
                ])),  # distance ball - cup; shouldn't move in y-direction
                R=task_args.get('R',
                                np.diag([
                                    1e-1, 1e-1, 1e-1, 1e-2, 1e-2, 1e-2
                                ]))  # desired joint angles and velocities
            )
            task = DesStateTask(spec, state_des, rew_fcn)

            # Wrap the masked DesStateTask to add a bonus for the best state in the rollout
            return BestStateFinalRewTask(MaskedTask(self.spec, task, idcs),
                                         max_steps=self.max_steps,
                                         factor=task_args.get(
                                             'final_factor', 1.))
Esempio n. 21
0
def create_extract_slider_task(env_spec: EnvSpec, task_args: dict,
                               des_state: np.ndarray):
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = ['Slider_Y']

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    # Create a desired state task
    Q = task_args.get('Q_slider', np.array([[4e1]]))
    R = task_args.get('R_slider', 1e-6 * np.eye(spec.act_space.flat_dim))
    rew_fcn = ExpQuadrErrRewFcn(Q, R)
    dst_task = DesStateTask(spec, des_state, rew_fcn)

    # Return the masked tasks
    return MaskedTask(env_spec, dst_task, idcs)
Esempio n. 22
0
def create_check_all_boundaries_task(env_spec: EnvSpec,
                                     penalty: float) -> FinalRewTask:
    """
    Create a task that is checking if any of the state space bounds is violated.
    This checks every limit and not just of a subspace of the state state as it could happen when using a `MaskedTask`.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param penalty: scalar cost (positive values) for violating the bounds
    :return: masked task that only considers a subspace of all observations
    """
    return FinalRewTask(
        DesStateTask(env_spec, np.zeros(env_spec.state_space.shape),
                     ZeroPerStepRewFcn(), never_succeeded),
        FinalRewMode(always_negative=True),
        factor=penalty,
    )
Esempio n. 23
0
def create_insert_task(env_spec: EnvSpec, state_des: np.ndarray,
                       rew_fcn: RewFcn, success_fcn: Callable):
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = [
        'Effector_X', 'Effector_Z', 'Effector_B', 'Effector_Xd', 'Effector_Zd',
        'Effector_Bd'
    ]

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    # Create a wrapped desired state task with the goal behind the wall
    fdst = FinalRewTask(DesStateTask(spec, state_des, rew_fcn, success_fcn),
                        mode=FinalRewMode(state_dependent=True,
                                          time_dependent=True))

    # Mask selected states
    return MaskedTask(env_spec, fdst, idcs)
Esempio n. 24
0
def create_goal_dist_distvel_task(env_spec: EnvSpec,
                                  ds_index: int,
                                  rew_fcn: RewFcn,
                                  succ_thold: float = 0.01) -> MaskedTask:
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = [f'GD_DS{ds_index}', f'GD_DS{ds_index}d']

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs))
        if env_spec.state_space is not EmptySpace else EmptySpace)

    # Create a desired state task with the goal [0, 0]
    dst = DesStateTask(
        spec, np.zeros(2), rew_fcn,
        functools.partial(proximity_succeeded, thold_dist=succ_thold))

    # Mask selected goal distance velocities
    return MaskedTask(env_spec, dst, idcs)
Esempio n. 25
0
def test_best_state_final_rew_task(envspec_432, rew_fcn, factor):
    # Create env spec and sub-tasks (state_space is necessary for the has_failed function)
    num_steps = 5
    state_des = np.array([0.05, 0.05, 0.05])
    task = BestStateFinalRewTask(DesStateTask(envspec_432, state_des, rew_fcn), max_steps=num_steps, factor=factor)
    task.reset(env_spec=envspec_432)

    # Create artificial dynamics by hard-coding a sequence of states
    fixed_traj = np.linspace(-.5, +.5, num_steps, endpoint=True)
    r = [-pyrado.inf]*num_steps

    for i in range(num_steps):
        # Advance the artificial state
        state = fixed_traj[i]*np.ones(3)
        r[i] = task.step_rew(state, act=np.zeros(2), remaining_steps=num_steps - (i + 1))

    last_state = fixed_traj[-1]*np.ones(3)
    final_rew = task.final_rew(last_state, remaining_steps=0)
    assert final_rew == pytest.approx(max(r)*num_steps*factor)
    assert task.best_rew == pytest.approx(max(r))
Esempio n. 26
0
    def _create_deviation_task(self, task_args: dict) -> Task:
        idcs = list(
            range(self.state_space.flat_dim - 3,
                  self.state_space.flat_dim))  # Cartesian cup goal position
        spec = EnvSpec(
            self.spec.obs_space, self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)))
        # init cup goal position
        state_des = np.array([
            0.82521, 0, 1.4469
        ]) if self.num_dof == 7 else np.array([0.758, 0, 1.5])
        rew_fcn = QuadrErrRewFcn(
            Q=task_args.get('Q_dev', np.diag(
                [2e-1, 1e-4, 5e0])),  # Cartesian distance from init position
            R=task_args.get(
                'R_dev',
                np.zeros(
                    (self._act_space.shape[0], self._act_space.shape[0]))))
        task = DesStateTask(spec, state_des, rew_fcn)

        return MaskedTask(self.spec, task, idcs)
Esempio n. 27
0
def create_mini_golf_task(env_spec: EnvSpec, hole_pos: np.ndarray,
                          succ_thold: float):
    """
    Create a task for putting the ball into a whole.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param hole_pos: planar x and yy  position of the goal's center
    :param succ_thold: once the object of interest is closer than this threshold, the task is considered successfully
    :return: masked task that only considers a subspace of all observations
    """
    if not hole_pos.size == 2:
        raise pyrado.ShapeErr(given=hole_pos, expected_match=(2, ))

    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = ["Ball_X", "Ball_Y"]

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    # Create a desired state task
    dst = DesStateTask(
        spec,
        state_des=hole_pos,
        rew_fcn=AbsErrRewFcn(q=np.ones(2),
                             r=1e-4 * np.ones(spec.act_space.shape)),
        success_fcn=functools.partial(proximity_succeeded,
                                      thold_dist=succ_thold),
    )
    frt = FinalRewTask(dst, FinalRewMode(always_positive=True))

    # Return the masked tasks
    return MaskedTask(env_spec, frt, idcs)
Esempio n. 28
0
def create_task_space_discrepancy_task(env_spec: EnvSpec,
                                       rew_fcn: RewFcn) -> MaskedTask:
    """
    Create a task which punishes the discrepancy between the actual and the commanded state of the observed body.
    The observed body is specified in in the associated experiment configuration file in RcsPySim.
    This task only looks at the X and Z coordinates.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param rew_fcn: reward function
    :return: masked task that only considers a subspace of all observations
    """
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    obs_labels = [
        obs_label for obs_label in env_spec.state_space.labels
        if "DiscrepTS" in obs_label
    ]
    if not obs_labels:
        raise pyrado.ValueErr(
            msg="No state space labels found that contain 'DiscrepTS'")

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(
            env_spec.state_space.create_mask(obs_labels)),
    )

    # Create an endlessly running desired state task (no task space discrepancy is desired)
    task = DesStateTask(spec, np.zeros(spec.state_space.shape), rew_fcn,
                        never_succeeded)

    # Mask selected discrepancy observation
    return MaskedTask(env_spec, task, obs_labels)
Esempio n. 29
0
def create_lifting_task(
    env_spec: EnvSpec,
    obs_labels: Sequence[str],
    des_height: Union[float, np.ndarray],
    succ_thold: float = 0.01,
) -> MaskedTask:
    """
    Create a task for lifting an object.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param obs_labels: labels for selection, e.g. ['Box_Z']. This needs to match the observations' names in RcsPySim
    :param des_height: desired height of the object (depends of the coordinate system). If reached, the task is over.
    :param succ_thold: once the object of interest is closer than this threshold, the task is considered successfully
    :return: masked task that only considers a subspace of all observations
    """
    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(
            env_spec.state_space.create_mask(obs_labels)),
    )

    # Create a desired state task
    state_des = np.asarray(des_height)
    Q = np.diag([6e2])
    R = 1e-1 * np.eye(spec.act_space.flat_dim)
    rew_fcn = ExpQuadrErrRewFcn(Q, R)
    task = DesStateTask(
        spec, state_des, rew_fcn,
        functools.partial(proximity_succeeded, thold_dist=succ_thold))

    # Return the masked tasks
    return MaskedTask(env_spec, task, obs_labels)
Esempio n. 30
0
    def _create_deviation_task(self, task_args: dict) -> Task:
        idcs = list(
            range(self.state_space.flat_dim - 3,
                  self.state_space.flat_dim))  # Cartesian cup goal position
        spec = EnvSpec(
            self.spec.obs_space,
            self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)),
        )
        # init cup goal position
        state_des = GOAL_POS_INIT_SIM_7DOF if self._num_dof == 7 else GOAL_POS_INIT_SIM_4DOF
        rew_fcn = QuadrErrRewFcn(
            Q=task_args.get("Q_dev", np.diag(
                [2e-1, 1e-6,
                 5e0])),  # Cartesian distance from init cup position
            R=task_args.get(
                "R_dev",
                np.zeros((self.act_space.shape[0], self.act_space.shape[0]))
            ),  # joint space distance from init pose, interferes with R_default from _create_main_task
        )
        task = DesStateTask(spec, state_des, rew_fcn)

        return MaskedTask(self.spec, task, idcs)