Ejemplo n.º 1
0
def create_flipping_task(
    env_spec: EnvSpec,
    obs_labels: Sequence[str],
    des_angle_delta: float = np.pi / 2.0,
    endless: bool = True,
) -> MaskedTask:
    """
    Create a task for rotating an object.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param obs_labels: labels for selection, e.g. ['Box_A']. This needs to match the observations' names in RcsPySim
    :param des_angle_delta: desired angle to rotate. If reached, the task is reset, and rotating continues.
    :param endless: if `True`, the task will promote endlessly repeated flipping about the desired angle, else only one
                    flip is desired
    :return: masked task that only considers a subspace of all observations
    """
    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(
            env_spec.state_space.create_mask(obs_labels)),
    )

    # Create a desired state task
    rew_fcn = CosOfOneEleRewFcn(idx=0)
    task = FlippingTask(spec, des_angle_delta, rew_fcn, endless=endless)

    # Return the masked tasks
    return MaskedTask(env_spec, task, obs_labels)
Ejemplo n.º 2
0
def create_box_flip_task(env_spec: EnvSpec, continuous_rew_fcn):
    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    idcs = ['Box_A']

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs))
    )

    # Create a desired state task
    # state_des = np.array([0.3])  # box position is measured relative to the table
    state_des = np.array([-np.pi/2])  # box position is measured world coordinates
    if continuous_rew_fcn:
        # q = np.array([0./np.pi])
        # r = 1e-6*np.ones(spec.act_space.flat_dim)
        # rew_fcn_act = AbsErrRewFcn(q, r)
        rew_fcn = CosOfOneEleRewFcn(idx=0)
        # rew_fcn = CompoundRewFcn([rew_fcn_act, rew_fcn_box])
    else:
        rew_fcn = MinusOnePerStepRewFcn()
    ef_task = EndlessFlippingTask(spec, rew_fcn, init_angle=0.)

    # Return the masked tasks
    return MaskedTask(env_spec, ef_task, idcs)
Ejemplo n.º 3
0
from pyrado.tasks.utils import proximity_succeeded
from pyrado.tasks.desired_state import DesStateTask, RadiallySymmDesStateTask
from pyrado.tasks.parallel import ParallelTasks
from pyrado.tasks.reward_functions import CompoundRewFcn, CosOfOneEleRewFcn, MinusOnePerStepRewFcn, QuadrErrRewFcn, \
    ScaledExpQuadrErrRewFcn, RewFcn, PlusOnePerStepRewFcn


@pytest.fixture(scope='function')
def envspec_432():
    return EnvSpec(obs_space=BoxSpace(-1, 1, 4), act_space=BoxSpace(-1, 1, 2), state_space=BoxSpace(-1, 1, 3))


@pytest.mark.parametrize(
    'fcn_list, reset_args, reset_kwargs', [
        ([MinusOnePerStepRewFcn()], [None], [None]),
        ([CosOfOneEleRewFcn(0)], [None], [None]),
        ([QuadrErrRewFcn(np.eye(2), np.eye(1))], [None], [None]),
        ([MinusOnePerStepRewFcn(), QuadrErrRewFcn(Q=np.eye(2), R=np.eye(1))], [None, None], [None, None]),
    ], ids=['wo_args-wo_kwargs', 'w_args-wo_kwargs', 'w_args2-wo_kwargs', 'wo_args-w_kwargs'])
def test_combined_reward_function_step(fcn_list, reset_args, reset_kwargs):
    # Create combined reward function
    c = CompoundRewFcn(fcn_list)
    # Create example state and action error
    err_s, err_a = np.array([1., 2.]), np.array([3.])
    # Calculate combined reward
    rew = c(err_s, err_a)
    assert isinstance(rew, float)
    # Reset the reward functions
    c.reset(reset_args, reset_kwargs)