예제 #1
0
    def _create_main_task(self, task_args: dict) -> Task:
        # Create a DesStateTask that masks everything but the ball position
        idcs = list(
            range(self.state_space.flat_dim - 6,
                  self.state_space.flat_dim - 3))  # Cartesian ball position
        spec = EnvSpec(
            self.spec.obs_space, self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)))

        # If we do not use copy(), state_des coming from MuJoCo is a reference and updates automatically at each step.
        # Note: sim.forward() + get_body_xpos() results in wrong output for state_des, as sim has not been updated to
        # init_space.sample(), which is first called in reset()

        if task_args.get('sparse_rew_fcn', False):
            factor = task_args.get('success_bonus', 1)
            # Binary final reward task
            main_task = FinalRewTask(ConditionOnlyTask(
                spec,
                condition_fcn=self.check_ball_in_cup,
                is_success_condition=True),
                                     mode=FinalRewMode(always_positive=True),
                                     factor=factor)
            # Yield -1 on fail after the main task ist done (successfully or not)
            dont_fail_after_succ_task = FinalRewTask(
                GoallessTask(spec, ZeroPerStepRewFcn()),
                mode=FinalRewMode(always_negative=True),
                factor=factor)

            # Augment the binary task with an endless dummy task, to avoid early stopping
            task = SequentialTasks((main_task, dont_fail_after_succ_task))

            return MaskedTask(self.spec, task, idcs)

        else:
            state_des = self.sim.data.get_site_xpos(
                'cup_goal')  # this is a reference
            R_default = np.diag([
                0, 0, 1, 1e-2, 1e-2, 1e-1
            ]) if self.num_dof == 7 else np.diag([0, 0, 1e-2, 1e-2])
            rew_fcn = ExpQuadrErrRewFcn(
                Q=task_args.get('Q', np.diag([
                    2e1, 1e-4, 2e1
                ])),  # distance ball - cup; shouldn't move in y-direction
                R=task_args.get('R',
                                R_default)  # last joint is really unreliable
            )
            task = DesStateTask(spec, state_des, rew_fcn)

            # Wrap the masked DesStateTask to add a bonus for the best state in the rollout
            return BestStateFinalRewTask(MaskedTask(self.spec, task, idcs),
                                         max_steps=self.max_steps,
                                         factor=task_args.get(
                                             'final_factor', 0.05))
예제 #2
0
def create_box_flip_task(env_spec: EnvSpec, continuous_rew_fcn):
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = ['Box_A']

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs))
    )

    # Create a desired state task
    # state_des = np.array([0.3])  # box position is measured relative to the table
    state_des = np.array([-np.pi/2])  # box position is measured world coordinates
    if continuous_rew_fcn:
        # q = np.array([0./np.pi])
        # r = 1e-6*np.ones(spec.act_space.flat_dim)
        # rew_fcn_act = AbsErrRewFcn(q, r)
        rew_fcn = CosOfOneEleRewFcn(idx=0)
        # rew_fcn = CompoundRewFcn([rew_fcn_act, rew_fcn_box])
    else:
        rew_fcn = MinusOnePerStepRewFcn()
    ef_task = EndlessFlippingTask(spec, rew_fcn, init_angle=0.)

    # Return the masked tasks
    return MaskedTask(env_spec, ef_task, idcs)
예제 #3
0
def create_collision_task(env_spec: EnvSpec, factor: float) -> MaskedTask:
    """
    Create a task which punishes collision costs given a collision model with pairs of bodies.
    This task only looks at the instantaneous collision cost.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param factor: cost / reward function scaling factor
    :return: masked task that only considers a subspace of all observations
    """
    if not factor >= 0:
        raise pyrado.ValueErr(given=factor, ge_constraint="0")

    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    obs_labels = ["CollCost"]

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(
            env_spec.state_space.create_mask(obs_labels)),
    )

    rew_fcn = AbsErrRewFcn(q=np.array([factor]),
                           r=np.zeros(spec.act_space.shape))

    # Create an endlessly running desired state task (no collision is desired)
    task = DesStateTask(spec, np.zeros(spec.state_space.shape), rew_fcn,
                        never_succeeded)

    # Mask selected collision cost observation
    return MaskedTask(env_spec, task, obs_labels)
예제 #4
0
def create_task_space_discrepancy_task(env_spec: EnvSpec,
                                       rew_fcn: RewFcn) -> MaskedTask:
    """
    Create a task which punishes the discrepancy between the actual and the commanded state of the observed body.
    The observed body is specified in in the associated experiment configuration file in RcsPySim.
    This task only looks at the X and Z coordinates.

    :param env_spec: environment specification
    :param rew_fcn: reward function
    :return: masked task
    """
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = [idx for idx in env_spec.state_space.labels if 'DiscrepTS' in idx]
    if not idcs:
        raise pyrado.ValueErr(
            msg="No state space labels found that contain 'DiscrepTS'")

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    # Create a desired state task (no task space discrepancy is desired and the task never stops because of success)
    dst = DesStateTask(spec, np.zeros(spec.state_space.shape), rew_fcn,
                       never_succeeded)

    # Mask selected discrepancy observation
    return MaskedTask(env_spec, dst, idcs)
예제 #5
0
def create_forcemin_task(env_spec: EnvSpec, obs_labels: Sequence[str],
                         Q: np.ndarray) -> MaskedTask:
    """
    Create a task which punishes the amount of used force.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param obs_labels: labels for selection, e.g. ['WristLoadCellLBR_R_Fy']. This needs to match the observations'
                       names in RcsPySim
    :param Q: weight matrix of dim NxN with N=num_forces for the quadratic force costs
    :return: masked task that only considers a subspace of all observations
    """
    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(
            env_spec.state_space.create_mask(obs_labels)),
    )

    # Create an endlessly running desired state task
    rew_fcn = QuadrErrRewFcn(Q=Q,
                             R=np.zeros((spec.act_space.flat_dim,
                                         spec.act_space.flat_dim)))
    task = DesStateTask(spec, np.zeros(spec.state_space.shape), rew_fcn,
                        never_succeeded)

    # Mask selected collision cost observation
    return MaskedTask(env_spec, task, obs_labels)
예제 #6
0
def create_collision_task(env_spec: EnvSpec, factor: float) -> MaskedTask:
    """
    Create a task which punishes collision costs given a collision model with pairs of bodies.
    This task only looks at the instantaneous collision cost.

    :param env_spec: environment specification
    :param factor: cost / reward function scaling factor
    :return: masked task
    """
    if not factor >= 0:
        raise pyrado.ValueErr(given=factor, ge_constraint='0')

    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = ['CollCost']

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    rew_fcn = AbsErrRewFcn(q=np.array([factor]),
                           r=np.zeros(spec.act_space.shape))

    # Create a desired state task (no collision is desired and the task never stops because of success)
    dst = DesStateTask(spec, np.zeros(spec.state_space.shape), rew_fcn,
                       never_succeeded)

    # Mask selected collision cost observation
    return MaskedTask(env_spec, dst, idcs)
예제 #7
0
def create_flipping_task(
    env_spec: EnvSpec,
    obs_labels: Sequence[str],
    des_angle_delta: float = np.pi / 2.0,
    endless: bool = True,
) -> MaskedTask:
    """
    Create a task for rotating an object.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param obs_labels: labels for selection, e.g. ['Box_A']. This needs to match the observations' names in RcsPySim
    :param des_angle_delta: desired angle to rotate. If reached, the task is reset, and rotating continues.
    :param endless: if `True`, the task will promote endlessly repeated flipping about the desired angle, else only one
                    flip is desired
    :return: masked task that only considers a subspace of all observations
    """
    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(
            env_spec.state_space.create_mask(obs_labels)),
    )

    # Create a desired state task
    rew_fcn = CosOfOneEleRewFcn(idx=0)
    task = FlippingTask(spec, des_angle_delta, rew_fcn, endless=endless)

    # Return the masked tasks
    return MaskedTask(env_spec, task, obs_labels)
예제 #8
0
def create_box_upper_shelve_task(env_spec: EnvSpec, continuous_rew_fcn: bool,
                                 succ_thold: float):
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = ['Box_X', 'Box_Y', 'Box_Z', 'Box_A', 'Box_B', 'Box_C']

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    # Create a desired state task
    state_des = np.zeros(
        6)  # zeros since we observe the box position relative to the goal
    if continuous_rew_fcn:
        Q = np.diag([5e0, 5e0, 5e0, 1e-1, 1e-1, 1e-1])
        R = 5e-2 * np.eye(spec.act_space.flat_dim)
        rew_fcn = ExpQuadrErrRewFcn(Q, R)
    else:
        rew_fcn = MinusOnePerStepRewFcn
    dst = DesStateTask(
        spec, state_des, rew_fcn,
        functools.partial(proximity_succeeded, thold_dist=succ_thold))

    # Return the masked tasks
    return MaskedTask(env_spec, dst, idcs)
예제 #9
0
def create_home_pos_task(env_spec: EnvSpec, obs_labels: Sequence[str],
                         state_des: np.ndarray) -> MaskedTask:
    """
    Create a task for moving the robot to safe position.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param obs_labels: labels for selection, e.g. ['PowerGrasp_R_Y', 'PowerGrasp_R_Z']. This needs to match the
                       observations' names in RcsPySim
    :param state_des: desired state (depends of the coordinate system). If reached, the task is over.
    :return: masked task that only considers a subspace of all observations
    """
    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(
            env_spec.state_space.create_mask(obs_labels)),
    )

    # Create a desired state task
    Q = 1e1 * np.eye(len(state_des))
    R = 1e-1 * np.eye(spec.act_space.flat_dim)
    rew_fcn = ExpQuadrErrRewFcn(Q, R)
    task = DesStateTask(spec, state_des, rew_fcn)

    # Return the masked tasks
    return MaskedTask(env_spec, task, obs_labels)
예제 #10
0
def create_goal_dist_task(env_spec: EnvSpec,
                          ds_label: int,
                          rew_fcn: RewFcn,
                          succ_thold: float = 0.01) -> MaskedTask:
    """
    Create a task that rewards minimizing the `GoalDistance` of dynamical system movements primitives (see RcsPySim).

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param ds_label: label of the dynamical system (see RcsPySim)
    :param rew_fcn: reward function
    :param succ_thold: once the object of interest is closer than this threshold, the task is considered successfully
    :return: masked task that only considers a subspace of all observations
    """
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    obs_labels = [f"GD_DS{ds_label}"]

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(
            env_spec.state_space.create_mask(obs_labels))
        if env_spec.state_space is not EmptySpace else EmptySpace,
    )

    # Create a desired state task with the goal [0, 0]
    task = DesStateTask(
        spec, np.zeros(2), rew_fcn,
        functools.partial(proximity_succeeded, thold_dist=succ_thold))

    # Mask selected goal distance
    return MaskedTask(env_spec, task, obs_labels)
예제 #11
0
def create_box_lift_task(env_spec: EnvSpec, continuous_rew_fcn: bool,
                         succ_thold: float):
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = ['Box_Z']

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    # Create a desired state task
    # state_des = np.array([0.3])  # box position is measured relative to the table
    state_des = np.array([1.1])  # box position is measured world coordinates
    if continuous_rew_fcn:
        Q = np.diag([3e1])
        R = 1e0 * np.eye(spec.act_space.flat_dim)
        rew_fcn = ExpQuadrErrRewFcn(Q, R)
    else:
        rew_fcn = MinusOnePerStepRewFcn()
    dst = DesStateTask(
        spec, state_des, rew_fcn,
        functools.partial(proximity_succeeded, thold_dist=succ_thold))

    # Return the masked tasks
    return MaskedTask(env_spec, dst, idcs)
예제 #12
0
    def _create_task(self, task_args: dict) -> Task:
        # Create a DesStateTask that masks everything but the ball position
        idcs = list(
            range(self.state_space.flat_dim - 3,
                  self.state_space.flat_dim))  # Cartesian ball position
        spec = EnvSpec(
            self.spec.obs_space, self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)))

        # If we do not use copy(), state_des coming from MuJoCo is a reference and updates automatically at each step.
        # Note: sim.forward() + get_body_xpos() results in wrong output for state_des, as sim has not been updated to
        # init_space.sample(), which is first called in reset()

        if task_args.get('sparse_rew_fcn', False):
            # Binary final reward task
            task = FinalRewTask(ConditionOnlyTask(
                spec,
                condition_fcn=self.check_ball_in_cup,
                is_success_condition=True),
                                mode=FinalRewMode(always_positive=True),
                                factor=1)

            return MaskedTask(self.spec, task, idcs)

        else:
            # If we do not use copy(), state_des is a reference to passed body and updates automatically at each step
            state_des = self.sim.data.get_site_xpos(
                'cup_goal')  # this is a reference
            rew_fcn = ExpQuadrErrRewFcn(
                Q=task_args.get('Q', np.diag([
                    1e1, 1e5, 2e1
                ])),  # distance ball - cup; shouldn't move in y-direction
                R=task_args.get('R',
                                np.diag([
                                    1e-1, 1e-1, 1e-1, 1e-2, 1e-2, 1e-2
                                ]))  # desired joint angles and velocities
            )
            task = DesStateTask(spec, state_des, rew_fcn)

            # Wrap the masked DesStateTask to add a bonus for the best state in the rollout
            return BestStateFinalRewTask(MaskedTask(self.spec, task, idcs),
                                         max_steps=self.max_steps,
                                         factor=task_args.get(
                                             'final_factor', 1.))
예제 #13
0
def create_task_space_discrepancy_task(env_spec: EnvSpec, rew_fcn: RewFcn) -> MaskedTask:
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = ['DiscrepTS_Y', 'DiscrepTS_Z']

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs))
    )

    # Create a desired state task (no task space discrepancy is desired and the task never stops because of success)
    dst = DesStateTask(spec, np.zeros(spec.state_space.shape), rew_fcn, never_succeeded)

    # Mask selected discrepancy observation
    return MaskedTask(env_spec, dst, idcs)
예제 #14
0
def create_extract_slider_task(env_spec: EnvSpec, task_args: dict,
                               des_state: np.ndarray):
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = ['Slider_Y']

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    # Create a desired state task
    Q = task_args.get('Q_slider', np.array([[4e1]]))
    R = task_args.get('R_slider', 1e-6 * np.eye(spec.act_space.flat_dim))
    rew_fcn = ExpQuadrErrRewFcn(Q, R)
    dst_task = DesStateTask(spec, des_state, rew_fcn)

    # Return the masked tasks
    return MaskedTask(env_spec, dst_task, idcs)
예제 #15
0
def create_insert_task(env_spec: EnvSpec, state_des: np.ndarray,
                       rew_fcn: RewFcn, success_fcn: Callable):
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = [
        'Effector_X', 'Effector_Z', 'Effector_B', 'Effector_Xd', 'Effector_Zd',
        'Effector_Bd'
    ]

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    # Create a wrapped desired state task with the goal behind the wall
    fdst = FinalRewTask(DesStateTask(spec, state_des, rew_fcn, success_fcn),
                        mode=FinalRewMode(state_dependent=True,
                                          time_dependent=True))

    # Mask selected states
    return MaskedTask(env_spec, fdst, idcs)
예제 #16
0
def create_goal_dist_distvel_task(env_spec: EnvSpec,
                                  ds_index: int,
                                  rew_fcn: RewFcn,
                                  succ_thold: float = 0.01) -> MaskedTask:
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = [f'GD_DS{ds_index}', f'GD_DS{ds_index}d']

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs))
        if env_spec.state_space is not EmptySpace else EmptySpace)

    # Create a desired state task with the goal [0, 0]
    dst = DesStateTask(
        spec, np.zeros(2), rew_fcn,
        functools.partial(proximity_succeeded, thold_dist=succ_thold))

    # Mask selected goal distance velocities
    return MaskedTask(env_spec, dst, idcs)
예제 #17
0
    def _create_deviation_task(self, task_args: dict) -> Task:
        idcs = list(
            range(self.state_space.flat_dim - 3,
                  self.state_space.flat_dim))  # Cartesian cup goal position
        spec = EnvSpec(
            self.spec.obs_space, self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)))
        # init cup goal position
        state_des = np.array([
            0.82521, 0, 1.4469
        ]) if self.num_dof == 7 else np.array([0.758, 0, 1.5])
        rew_fcn = QuadrErrRewFcn(
            Q=task_args.get('Q_dev', np.diag(
                [2e-1, 1e-4, 5e0])),  # Cartesian distance from init position
            R=task_args.get(
                'R_dev',
                np.zeros(
                    (self._act_space.shape[0], self._act_space.shape[0]))))
        task = DesStateTask(spec, state_des, rew_fcn)

        return MaskedTask(self.spec, task, idcs)
예제 #18
0
def create_mini_golf_task(env_spec: EnvSpec, hole_pos: np.ndarray,
                          succ_thold: float):
    """
    Create a task for putting the ball into a whole.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param hole_pos: planar x and yy  position of the goal's center
    :param succ_thold: once the object of interest is closer than this threshold, the task is considered successfully
    :return: masked task that only considers a subspace of all observations
    """
    if not hole_pos.size == 2:
        raise pyrado.ShapeErr(given=hole_pos, expected_match=(2, ))

    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = ["Ball_X", "Ball_Y"]

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    # Create a desired state task
    dst = DesStateTask(
        spec,
        state_des=hole_pos,
        rew_fcn=AbsErrRewFcn(q=np.ones(2),
                             r=1e-4 * np.ones(spec.act_space.shape)),
        success_fcn=functools.partial(proximity_succeeded,
                                      thold_dist=succ_thold),
    )
    frt = FinalRewTask(dst, FinalRewMode(always_positive=True))

    # Return the masked tasks
    return MaskedTask(env_spec, frt, idcs)
예제 #19
0
def create_task_space_discrepancy_task(env_spec: EnvSpec,
                                       rew_fcn: RewFcn) -> MaskedTask:
    """
    Create a task which punishes the discrepancy between the actual and the commanded state of the observed body.
    The observed body is specified in in the associated experiment configuration file in RcsPySim.
    This task only looks at the X and Z coordinates.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param rew_fcn: reward function
    :return: masked task that only considers a subspace of all observations
    """
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    obs_labels = [
        obs_label for obs_label in env_spec.state_space.labels
        if "DiscrepTS" in obs_label
    ]
    if not obs_labels:
        raise pyrado.ValueErr(
            msg="No state space labels found that contain 'DiscrepTS'")

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(
            env_spec.state_space.create_mask(obs_labels)),
    )

    # Create an endlessly running desired state task (no task space discrepancy is desired)
    task = DesStateTask(spec, np.zeros(spec.state_space.shape), rew_fcn,
                        never_succeeded)

    # Mask selected discrepancy observation
    return MaskedTask(env_spec, task, obs_labels)
예제 #20
0
def create_lifting_task(
    env_spec: EnvSpec,
    obs_labels: Sequence[str],
    des_height: Union[float, np.ndarray],
    succ_thold: float = 0.01,
) -> MaskedTask:
    """
    Create a task for lifting an object.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param obs_labels: labels for selection, e.g. ['Box_Z']. This needs to match the observations' names in RcsPySim
    :param des_height: desired height of the object (depends of the coordinate system). If reached, the task is over.
    :param succ_thold: once the object of interest is closer than this threshold, the task is considered successfully
    :return: masked task that only considers a subspace of all observations
    """
    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(
            env_spec.state_space.create_mask(obs_labels)),
    )

    # Create a desired state task
    state_des = np.asarray(des_height)
    Q = np.diag([6e2])
    R = 1e-1 * np.eye(spec.act_space.flat_dim)
    rew_fcn = ExpQuadrErrRewFcn(Q, R)
    task = DesStateTask(
        spec, state_des, rew_fcn,
        functools.partial(proximity_succeeded, thold_dist=succ_thold))

    # Return the masked tasks
    return MaskedTask(env_spec, task, obs_labels)
예제 #21
0
    def _create_deviation_task(self, task_args: dict) -> Task:
        idcs = list(
            range(self.state_space.flat_dim - 3,
                  self.state_space.flat_dim))  # Cartesian cup goal position
        spec = EnvSpec(
            self.spec.obs_space,
            self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)),
        )
        # init cup goal position
        state_des = GOAL_POS_INIT_SIM_7DOF if self._num_dof == 7 else GOAL_POS_INIT_SIM_4DOF
        rew_fcn = QuadrErrRewFcn(
            Q=task_args.get("Q_dev", np.diag(
                [2e-1, 1e-6,
                 5e0])),  # Cartesian distance from init cup position
            R=task_args.get(
                "R_dev",
                np.zeros((self.act_space.shape[0], self.act_space.shape[0]))
            ),  # joint space distance from init pose, interferes with R_default from _create_main_task
        )
        task = DesStateTask(spec, state_des, rew_fcn)

        return MaskedTask(self.spec, task, idcs)
예제 #22
0
    def _create_task(self, task_args: dict) -> Task:
        # Define the indices for selection. This needs to match the observations' names in RcsPySim.
        if task_args.get("consider_velocities", False):
            idcs = ["Effector_X", "Effector_Z", "Effector_Xd", "Effector_Zd"]
        else:
            idcs = ["Effector_X", "Effector_Z"]

        # Get the masked environment specification
        spec = EnvSpec(
            self.spec.obs_space,
            self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)),
        )

        # Get the goal position in world coordinates for all three sub-goals
        p1 = self.get_body_position("Goal1", "", "")
        p2 = self.get_body_position("Goal2", "", "")
        p3 = self.get_body_position("Goal3", "", "")
        state_des1 = np.array([p1[0], p1[2], 0, 0])
        state_des2 = np.array([p2[0], p2[2], 0, 0])
        state_des3 = np.array([p3[0], p3[2], 0, 0])
        if task_args.get("consider_velocities", False):
            Q = np.diag([5e-1, 5e-1, 5e-3, 5e-3])
        else:
            Q = np.diag([1e0, 1e0])
            state_des1 = state_des1[:2]
            state_des2 = state_des2[:2]
            state_des3 = state_des3[:2]

        success_fcn = partial(proximity_succeeded,
                              thold_dist=7.5e-2,
                              dims=[0, 1])  # min distance = 7cm
        R = np.zeros((spec.act_space.flat_dim, spec.act_space.flat_dim))

        # Create the tasks
        subtask_1 = FinalRewTask(DesStateTask(spec, state_des1,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_2 = FinalRewTask(DesStateTask(spec, state_des2,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_3 = FinalRewTask(DesStateTask(spec, state_des3,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_4 = FinalRewTask(DesStateTask(spec, state_des1,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_5 = FinalRewTask(DesStateTask(spec, state_des2,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_6 = FinalRewTask(DesStateTask(spec, state_des3,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        task = FinalRewTask(
            SequentialTasks(
                [
                    subtask_1, subtask_2, subtask_3, subtask_4, subtask_5,
                    subtask_6
                ],
                hold_rew_when_done=True,
                verbose=True,
            ),
            mode=FinalRewMode(always_positive=True),
            factor=5e3,
        )
        masked_task = MaskedTask(self.spec, task, idcs)

        # Additional tasks
        task_check_bounds = create_check_all_boundaries_task(self.spec,
                                                             penalty=1e3)
        if isinstance(self, Planar3LinkJointCtrlSim):
            # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound
            return ParallelTasks([masked_task, task_check_bounds],
                                 easily_satisfied=True)
        else:
            task_ts_discrepancy = create_task_space_discrepancy_task(
                self.spec,
                AbsErrRewFcn(q=0.5 * np.ones(2),
                             r=np.zeros(self.act_space.shape)))
            # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound
            return ParallelTasks(
                [masked_task, task_check_bounds, task_ts_discrepancy],
                easily_satisfied=True)
예제 #23
0
    def _create_task(self, task_args: dict) -> Task:
        # Define the indices for selection. This needs to match the observations' names in RcsPySim.
        if task_args.get('consider_velocities', False):
            idcs = ['Effector_X', 'Effector_Z', 'Effector_Xd', 'Effector_Zd']
        else:
            idcs = ['Effector_X', 'Effector_Z']

        # Get the masked environment specification
        spec = EnvSpec(
            self.spec.obs_space, self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)))

        # Get and set goal position in world coordinates for all three sub-goals
        p1 = self.get_body_position('Goal1', '', '')
        p2 = self.get_body_position('Goal2', '', '')
        p3 = self.get_body_position('Goal3', '', '')
        if task_args.get('consider_velocities', False):
            Q = np.diag([1e0, 1e0, 1e-1, 1e-1])
            state_des1 = np.array([p1[0], p1[2], 0, 0])
            state_des2 = np.array([p2[0], p2[2], 0, 0])
            state_des3 = np.array([p3[0], p3[2], 0, 0])
        else:
            Q = np.diag([1e0, 1e0])
            state_des1 = np.array([p1[0], p1[2]])
            state_des2 = np.array([p2[0], p2[2]])
            state_des3 = np.array([p3[0], p3[2]])

        success_fcn = functools.partial(proximity_succeeded,
                                        thold_dist=7.5e-2,
                                        dims=[0, 1])  # min distance = 7cm
        R = np.zeros((spec.act_space.flat_dim, spec.act_space.flat_dim))

        # Create the tasks
        subtask_11 = FinalRewTask(DesStateTask(spec, state_des1,
                                               ExpQuadrErrRewFcn(Q, R),
                                               success_fcn),
                                  mode=FinalRewMode(time_dependent=True))
        subtask_21 = FinalRewTask(DesStateTask(spec, state_des2,
                                               ExpQuadrErrRewFcn(Q, R),
                                               success_fcn),
                                  mode=FinalRewMode(time_dependent=True))
        subtask_1p = ParallelTasks([subtask_11, subtask_21],
                                   hold_rew_when_done=True,
                                   verbose=False)
        subtask_3 = FinalRewTask(DesStateTask(spec, state_des3,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_12 = FinalRewTask(DesStateTask(spec, state_des1,
                                               ExpQuadrErrRewFcn(Q, R),
                                               success_fcn),
                                  mode=FinalRewMode(time_dependent=True))
        subtask_22 = FinalRewTask(DesStateTask(spec, state_des2,
                                               ExpQuadrErrRewFcn(Q, R),
                                               success_fcn),
                                  mode=FinalRewMode(time_dependent=True))
        subtask_2p = ParallelTasks([subtask_12, subtask_22],
                                   hold_rew_when_done=True,
                                   verbose=False)
        task = FinalRewTask(SequentialTasks(
            [subtask_1p, subtask_3, subtask_2p],
            hold_rew_when_done=True,
            verbose=True),
                            mode=FinalRewMode(always_positive=True),
                            factor=2e3)
        masked_task = MaskedTask(self.spec, task, idcs)

        task_check_bounds = create_check_all_boundaries_task(self.spec,
                                                             penalty=1e3)

        # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound
        return ParallelTasks([masked_task, task_check_bounds])