Ejemplos de FinalRewMode en Python, ejemplos de pyrado.tasks.final_reward.FinalRewMode en Python

Ejemplo n.º 1

0

Mostrar archivo

def test_composite_task_structure(envspec_432, task_type):
    state_des1 = np.zeros(3)
    state_des2 = -.5*np.ones(3)
    state_des3 = +.5*np.ones(3)
    rew_fcn = MinusOnePerStepRewFcn()
    t1 = FinalRewTask(DesStateTask(envspec_432, state_des1, rew_fcn), mode=FinalRewMode(always_positive=True),
                      factor=10)
    t2 = FinalRewTask(DesStateTask(envspec_432, state_des2, rew_fcn), mode=FinalRewMode(always_positive=True),
                      factor=10)
    t3 = FinalRewTask(DesStateTask(envspec_432, state_des3, rew_fcn), mode=FinalRewMode(always_positive=True),
                      factor=10)

    if task_type == 'ParallelTasks':
        ct = ParallelTasks([t1, t2, t3])
    elif task_type == 'SequentialTasks':
        ct = SequentialTasks([t1, t2, t3])
    else:
        raise NotImplementedError
    ct.reset(env_spec=envspec_432)

    assert len(ct) == 3
    assert ct.env_spec.obs_space == envspec_432.obs_space
    assert ct.env_spec.act_space == envspec_432.act_space
    assert ct.env_spec.state_space == envspec_432.state_space
    assert isinstance(ct.tasks[0].rew_fcn, RewFcn)
    assert isinstance(ct.tasks[1].rew_fcn, RewFcn)
    assert isinstance(ct.tasks[2].rew_fcn, RewFcn)

    if type == 'ParallelTasks':
        assert np.all(ct.state_des[0] == state_des1)
        assert np.all(ct.state_des[1] == state_des2)
        assert np.all(ct.state_des[2] == state_des3)
    elif type == 'SequentialTasks':
        assert np.all(ct.state_des == state_des1)

Ejemplo n.º 2

0

Mostrar archivo

    def _create_main_task(self, task_args: dict) -> Task:
        # Create a DesStateTask that masks everything but the ball position
        idcs = list(
            range(self.state_space.flat_dim - 6,
                  self.state_space.flat_dim - 3))  # Cartesian ball position
        spec = EnvSpec(
            self.spec.obs_space, self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)))

        # If we do not use copy(), state_des coming from MuJoCo is a reference and updates automatically at each step.
        # Note: sim.forward() + get_body_xpos() results in wrong output for state_des, as sim has not been updated to
        # init_space.sample(), which is first called in reset()

        if task_args.get('sparse_rew_fcn', False):
            factor = task_args.get('success_bonus', 1)
            # Binary final reward task
            main_task = FinalRewTask(ConditionOnlyTask(
                spec,
                condition_fcn=self.check_ball_in_cup,
                is_success_condition=True),
                                     mode=FinalRewMode(always_positive=True),
                                     factor=factor)
            # Yield -1 on fail after the main task ist done (successfully or not)
            dont_fail_after_succ_task = FinalRewTask(
                GoallessTask(spec, ZeroPerStepRewFcn()),
                mode=FinalRewMode(always_negative=True),
                factor=factor)

            # Augment the binary task with an endless dummy task, to avoid early stopping
            task = SequentialTasks((main_task, dont_fail_after_succ_task))

            return MaskedTask(self.spec, task, idcs)

        else:
            state_des = self.sim.data.get_site_xpos(
                'cup_goal')  # this is a reference
            R_default = np.diag([
                0, 0, 1, 1e-2, 1e-2, 1e-1
            ]) if self.num_dof == 7 else np.diag([0, 0, 1e-2, 1e-2])
            rew_fcn = ExpQuadrErrRewFcn(
                Q=task_args.get('Q', np.diag([
                    2e1, 1e-4, 2e1
                ])),  # distance ball - cup; shouldn't move in y-direction
                R=task_args.get('R',
                                R_default)  # last joint is really unreliable
            )
            task = DesStateTask(spec, state_des, rew_fcn)

            # Wrap the masked DesStateTask to add a bonus for the best state in the rollout
            return BestStateFinalRewTask(MaskedTask(self.spec, task, idcs),
                                         max_steps=self.max_steps,
                                         factor=task_args.get(
                                             'final_factor', 0.05))

Ejemplo n.º 3

0

Mostrar archivo

def create_check_all_boundaries_task(env_spec: EnvSpec,
                                     penalty: float) -> FinalRewTask:
    # Check every limit (nut just of a subspace of the state state as it could happen when using a MaskedTask)
    return FinalRewTask(DesStateTask(env_spec,
                                     np.zeros(env_spec.state_space.shape),
                                     ZeroPerStepRewFcn(), never_succeeded),
                        FinalRewMode(always_negative=True),
                        factor=penalty)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: quanser_cartpole.py Proyecto: fdamken/SimuRLacra

    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get("state_des", np.array([0.0, np.pi, 0.0, 0.0]))

        return FinalRewTask(
            RadiallySymmDesStateTask(self.spec, state_des, UnderActuatedSwingUpRewFcn(), idcs=[1]),
            mode=FinalRewMode(always_negative=True),
        )

Ejemplo n.º 5

0

Mostrar archivo

Archivo: one_mass_oscillator.py Proyecto: arlene-kuehn/SimuRLacra

    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get('state_des', np.zeros(2))
        Q = task_args.get('Q', np.diag([1e1, 1e-2]))
        R = task_args.get('R', np.diag([1e-6]))

        return FinalRewTask(
            DesStateTask(self.spec, state_des, QuadrErrRewFcn(Q, R)), factor=1e3,
            mode=FinalRewMode(always_negative=True)
        )

Ejemplo n.º 6

0

Mostrar archivo

Archivo: quanser_cartpole.py Proyecto: fdamken/SimuRLacra

    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get("state_des", np.array([0.0, np.pi, 0.0, 0.0]))
        Q = task_args.get("Q", np.diag([5e-0, 1e1, 1e-2, 1e-2]))
        R = task_args.get("R", np.diag([1e-3]))

        return FinalRewTask(
            RadiallySymmDesStateTask(self.spec, state_des, QuadrErrRewFcn(Q, R), idcs=[1]),
            mode=FinalRewMode(state_dependent=True, time_dependent=True),
        )

Ejemplo n.º 7

0

Mostrar archivo

Archivo: quanser_cartpole.py Proyecto: arlene-kuehn/SimuRLacra

    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get('state_des', None)
        if state_des is None:
            state_des = np.array([0., np.pi, 0., 0.])

        return FinalRewTask(RadiallySymmDesStateTask(
            self.spec,
            state_des,
            UnderActuatedSwingUpRewFcn(c_act=1e-2),
            idcs=[1]),
                            mode=FinalRewMode(always_negative=True))

Ejemplo n.º 8

0

Mostrar archivo

Archivo: quanser_cartpole.py Proyecto: fdamken/SimuRLacra

    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get("state_des", np.array([0.0, np.pi, 0.0,
                                                         0.0]))
        Q = task_args.get("Q", np.diag([3e-1, 5e-1, 5e-3, 1e-3]))
        R = task_args.get("R", np.diag([1e-3]))
        rew_fcn = QuadrErrRewFcn(Q, R)

        return FinalRewTask(
            RadiallySymmDesStateTask(self.spec, state_des, rew_fcn, idcs=[1]),
            mode=FinalRewMode(always_negative=True),
            factor=1e4,
        )

Ejemplo n.º 9

0

Mostrar archivo

    def _create_task(self, task_args: dict) -> Task:
        # Create a DesStateTask that masks everything but the ball position
        idcs = list(
            range(self.state_space.flat_dim - 3,
                  self.state_space.flat_dim))  # Cartesian ball position
        spec = EnvSpec(
            self.spec.obs_space, self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)))

        # If we do not use copy(), state_des coming from MuJoCo is a reference and updates automatically at each step.
        # Note: sim.forward() + get_body_xpos() results in wrong output for state_des, as sim has not been updated to
        # init_space.sample(), which is first called in reset()

        if task_args.get('sparse_rew_fcn', False):
            # Binary final reward task
            task = FinalRewTask(ConditionOnlyTask(
                spec,
                condition_fcn=self.check_ball_in_cup,
                is_success_condition=True),
                                mode=FinalRewMode(always_positive=True),
                                factor=1)

            return MaskedTask(self.spec, task, idcs)

        else:
            # If we do not use copy(), state_des is a reference to passed body and updates automatically at each step
            state_des = self.sim.data.get_site_xpos(
                'cup_goal')  # this is a reference
            rew_fcn = ExpQuadrErrRewFcn(
                Q=task_args.get('Q', np.diag([
                    1e1, 1e5, 2e1
                ])),  # distance ball - cup; shouldn't move in y-direction
                R=task_args.get('R',
                                np.diag([
                                    1e-1, 1e-1, 1e-1, 1e-2, 1e-2, 1e-2
                                ]))  # desired joint angles and velocities
            )
            task = DesStateTask(spec, state_des, rew_fcn)

            # Wrap the masked DesStateTask to add a bonus for the best state in the rollout
            return BestStateFinalRewTask(MaskedTask(self.spec, task, idcs),
                                         max_steps=self.max_steps,
                                         factor=task_args.get(
                                             'final_factor', 1.))

Ejemplo n.º 10

0

Mostrar archivo

Archivo: predefined.py Proyecto: fdamken/SimuRLacra

def create_check_all_boundaries_task(env_spec: EnvSpec,
                                     penalty: float) -> FinalRewTask:
    """
    Create a task that is checking if any of the state space bounds is violated.
    This checks every limit and not just of a subspace of the state state as it could happen when using a `MaskedTask`.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param penalty: scalar cost (positive values) for violating the bounds
    :return: masked task that only considers a subspace of all observations
    """
    return FinalRewTask(
        DesStateTask(env_spec, np.zeros(env_spec.state_space.shape),
                     ZeroPerStepRewFcn(), never_succeeded),
        FinalRewMode(always_negative=True),
        factor=penalty,
    )

Ejemplo n.º 11

0

Mostrar archivo

def create_insert_task(env_spec: EnvSpec, state_des: np.ndarray,
                       rew_fcn: RewFcn, success_fcn: Callable):
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = [
        'Effector_X', 'Effector_Z', 'Effector_B', 'Effector_Xd', 'Effector_Zd',
        'Effector_Bd'
    ]

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    # Create a wrapped desired state task with the goal behind the wall
    fdst = FinalRewTask(DesStateTask(spec, state_des, rew_fcn, success_fcn),
                        mode=FinalRewMode(state_dependent=True,
                                          time_dependent=True))

    # Mask selected states
    return MaskedTask(env_spec, fdst, idcs)

Ejemplo n.º 12

0

Mostrar archivo

def create_mini_golf_task(env_spec: EnvSpec, hole_pos: np.ndarray,
                          succ_thold: float):
    """
    Create a task for putting the ball into a whole.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param hole_pos: planar x and yy  position of the goal's center
    :param succ_thold: once the object of interest is closer than this threshold, the task is considered successfully
    :return: masked task that only considers a subspace of all observations
    """
    if not hole_pos.size == 2:
        raise pyrado.ShapeErr(given=hole_pos, expected_match=(2, ))

    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = ["Ball_X", "Ball_Y"]

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space, env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)))

    # Create a desired state task
    dst = DesStateTask(
        spec,
        state_des=hole_pos,
        rew_fcn=AbsErrRewFcn(q=np.ones(2),
                             r=1e-4 * np.ones(spec.act_space.shape)),
        success_fcn=functools.partial(proximity_succeeded,
                                      thold_dist=succ_thold),
    )
    frt = FinalRewTask(dst, FinalRewMode(always_positive=True))

    # Return the masked tasks
    return MaskedTask(env_spec, frt, idcs)

Ejemplo n.º 13

0

Mostrar archivo

def test_sequential_task_function(envspec_432, hold_rew_when_done):
    # Create env spec and sub-tasks (state_space is necessary for the has_failed function)
    state_des1 = -.5*np.ones(3)
    state_des2 = np.zeros(3)
    state_des3 = +.5*np.ones(3)
    rew_fcn = MinusOnePerStepRewFcn()
    succ_fcn = functools.partial(proximity_succeeded, thold_dist=1e-6)  # necessary to stop a sub-task on success
    t1 = FinalRewTask(DesStateTask(envspec_432, state_des1, rew_fcn, succ_fcn),
                      mode=FinalRewMode(always_positive=True), factor=10)
    t2 = FinalRewTask(DesStateTask(envspec_432, state_des2, rew_fcn, succ_fcn),
                      mode=FinalRewMode(always_positive=True), factor=10)
    t3 = FinalRewTask(DesStateTask(envspec_432, state_des3, rew_fcn, succ_fcn),
                      mode=FinalRewMode(always_positive=True), factor=10)

    st = FinalRewTask(SequentialTasks([t1, t2, t3], 0, hold_rew_when_done),
                      mode=FinalRewMode(always_positive=True), factor=100)

    # Create artificial dynamics by hard-coding a sequence of states
    num_steps = 12
    fixed_traj = np.linspace(-.5, +.6, num_steps, endpoint=True)  # for the final step, all sub-tasks would be true
    r = [-pyrado.inf]*num_steps

    for i in range(num_steps):
        # Advance the artificial state
        state = fixed_traj[i]*np.ones(3)

        # Get all sub-tasks step rew, check if they are done, and if so
        r[i] = st.step_rew(state, act=np.zeros(2), remaining_steps=num_steps - (i + 1))

        # Check if reaching the sub-goals is recognized
        if np.all(state == state_des1):
            assert st.wrapped_task.tasks[0].has_succeeded(state)
            if hold_rew_when_done:
                assert r[i] == 9  # only true for this specific setup
            else:
                assert r[i] == 9  # only true for this specific setup
        if np.all(state == state_des2):
            assert st.wrapped_task.tasks[1].has_succeeded(state)
            if hold_rew_when_done:
                assert r[i] == 8  # only true for this specific setup
            else:
                assert r[i] == 9  # only true for this specific setup
        if np.all(state == state_des3):
            assert st.wrapped_task.tasks[2].has_succeeded(state)
            if hold_rew_when_done:
                assert r[i] == 7  # only true for this specific setup
            else:
                assert r[i] == 9  # only true for this specific setup

        if i < 10:
            # The combined task is not successful until all sub-tasks are successful
            assert not st.has_succeeded(state)
        elif i == 10:
            # Should succeed on the second to last
            assert st.has_succeeded(state)
            assert st.final_rew(state, 0) == pytest.approx(100.)
        elif i == 11:
            # The very last step reward
            if hold_rew_when_done:
                assert r[i] == -3.
            else:
                assert r[i] == 0.
            assert st.final_rew(state, 0) == pytest.approx(0.)  # only yield once

Ejemplo n.º 14

0

Mostrar archivo

    def _create_task(self, task_args: dict) -> Task:
        # Define the indices for selection. This needs to match the observations' names in RcsPySim.
        if task_args.get('consider_velocities', False):
            idcs = ['Effector_X', 'Effector_Z', 'Effector_Xd', 'Effector_Zd']
        else:
            idcs = ['Effector_X', 'Effector_Z']

        # Get the masked environment specification
        spec = EnvSpec(
            self.spec.obs_space, self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)))

        # Get and set goal position in world coordinates for all three sub-goals
        p1 = self.get_body_position('Goal1', '', '')
        p2 = self.get_body_position('Goal2', '', '')
        p3 = self.get_body_position('Goal3', '', '')
        if task_args.get('consider_velocities', False):
            Q = np.diag([1e0, 1e0, 1e-1, 1e-1])
            state_des1 = np.array([p1[0], p1[2], 0, 0])
            state_des2 = np.array([p2[0], p2[2], 0, 0])
            state_des3 = np.array([p3[0], p3[2], 0, 0])
        else:
            Q = np.diag([1e0, 1e0])
            state_des1 = np.array([p1[0], p1[2]])
            state_des2 = np.array([p2[0], p2[2]])
            state_des3 = np.array([p3[0], p3[2]])

        success_fcn = functools.partial(proximity_succeeded,
                                        thold_dist=7.5e-2,
                                        dims=[0, 1])  # min distance = 7cm
        R = np.zeros((spec.act_space.flat_dim, spec.act_space.flat_dim))

        # Create the tasks
        subtask_11 = FinalRewTask(DesStateTask(spec, state_des1,
                                               ExpQuadrErrRewFcn(Q, R),
                                               success_fcn),
                                  mode=FinalRewMode(time_dependent=True))
        subtask_21 = FinalRewTask(DesStateTask(spec, state_des2,
                                               ExpQuadrErrRewFcn(Q, R),
                                               success_fcn),
                                  mode=FinalRewMode(time_dependent=True))
        subtask_1p = ParallelTasks([subtask_11, subtask_21],
                                   hold_rew_when_done=True,
                                   verbose=False)
        subtask_3 = FinalRewTask(DesStateTask(spec, state_des3,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_12 = FinalRewTask(DesStateTask(spec, state_des1,
                                               ExpQuadrErrRewFcn(Q, R),
                                               success_fcn),
                                  mode=FinalRewMode(time_dependent=True))
        subtask_22 = FinalRewTask(DesStateTask(spec, state_des2,
                                               ExpQuadrErrRewFcn(Q, R),
                                               success_fcn),
                                  mode=FinalRewMode(time_dependent=True))
        subtask_2p = ParallelTasks([subtask_12, subtask_22],
                                   hold_rew_when_done=True,
                                   verbose=False)
        task = FinalRewTask(SequentialTasks(
            [subtask_1p, subtask_3, subtask_2p],
            hold_rew_when_done=True,
            verbose=True),
                            mode=FinalRewMode(always_positive=True),
                            factor=2e3)
        masked_task = MaskedTask(self.spec, task, idcs)

        task_check_bounds = create_check_all_boundaries_task(self.spec,
                                                             penalty=1e3)

        # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound
        return ParallelTasks([masked_task, task_check_bounds])

Ejemplo n.º 15

0

Mostrar archivo

Archivo: wam.py Proyecto: arlene-kuehn/SimuRLacra

 def _create_task(self, task_args: dict) -> Task:
     # The wrapped task acts as a dummy and carries the FinalRewTask
     return FinalRewTask(GoallessTask(self.spec, ZeroPerStepRewFcn()),
                         mode=FinalRewMode(user_input=True))

Ejemplo n.º 16

0

Mostrar archivo

    def _create_task(self, task_args: dict) -> Task:
        # Define the indices for selection. This needs to match the observations' names in RcsPySim.
        if task_args.get("consider_velocities", False):
            idcs = ["Effector_X", "Effector_Z", "Effector_Xd", "Effector_Zd"]
        else:
            idcs = ["Effector_X", "Effector_Z"]

        # Get the masked environment specification
        spec = EnvSpec(
            self.spec.obs_space,
            self.spec.act_space,
            self.spec.state_space.subspace(
                self.spec.state_space.create_mask(idcs)),
        )

        # Get the goal position in world coordinates for all three sub-goals
        p1 = self.get_body_position("Goal1", "", "")
        p2 = self.get_body_position("Goal2", "", "")
        p3 = self.get_body_position("Goal3", "", "")
        state_des1 = np.array([p1[0], p1[2], 0, 0])
        state_des2 = np.array([p2[0], p2[2], 0, 0])
        state_des3 = np.array([p3[0], p3[2], 0, 0])
        if task_args.get("consider_velocities", False):
            Q = np.diag([5e-1, 5e-1, 5e-3, 5e-3])
        else:
            Q = np.diag([1e0, 1e0])
            state_des1 = state_des1[:2]
            state_des2 = state_des2[:2]
            state_des3 = state_des3[:2]

        success_fcn = partial(proximity_succeeded,
                              thold_dist=7.5e-2,
                              dims=[0, 1])  # min distance = 7cm
        R = np.zeros((spec.act_space.flat_dim, spec.act_space.flat_dim))

        # Create the tasks
        subtask_1 = FinalRewTask(DesStateTask(spec, state_des1,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_2 = FinalRewTask(DesStateTask(spec, state_des2,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_3 = FinalRewTask(DesStateTask(spec, state_des3,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_4 = FinalRewTask(DesStateTask(spec, state_des1,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_5 = FinalRewTask(DesStateTask(spec, state_des2,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        subtask_6 = FinalRewTask(DesStateTask(spec, state_des3,
                                              ExpQuadrErrRewFcn(Q, R),
                                              success_fcn),
                                 mode=FinalRewMode(time_dependent=True))
        task = FinalRewTask(
            SequentialTasks(
                [
                    subtask_1, subtask_2, subtask_3, subtask_4, subtask_5,
                    subtask_6
                ],
                hold_rew_when_done=True,
                verbose=True,
            ),
            mode=FinalRewMode(always_positive=True),
            factor=5e3,
        )
        masked_task = MaskedTask(self.spec, task, idcs)

        # Additional tasks
        task_check_bounds = create_check_all_boundaries_task(self.spec,
                                                             penalty=1e3)
        if isinstance(self, Planar3LinkJointCtrlSim):
            # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound
            return ParallelTasks([masked_task, task_check_bounds],
                                 easily_satisfied=True)
        else:
            task_ts_discrepancy = create_task_space_discrepancy_task(
                self.spec,
                AbsErrRewFcn(q=0.5 * np.ones(2),
                             r=np.zeros(self.act_space.shape)))
            # Return the masked task and and additional task that ends the episode if the unmasked state is out of bound
            return ParallelTasks(
                [masked_task, task_check_bounds, task_ts_discrepancy],
                easily_satisfied=True)