def create_flipping_task( env_spec: EnvSpec, obs_labels: Sequence[str], des_angle_delta: float = np.pi / 2.0, endless: bool = True, ) -> MaskedTask: """ Create a task for rotating an object. .. note:: This task was designed with an RcsPySim environment in mind, but is not restricted to these environments. :param env_spec: environment specification :param obs_labels: labels for selection, e.g. ['Box_A']. This needs to match the observations' names in RcsPySim :param des_angle_delta: desired angle to rotate. If reached, the task is reset, and rotating continues. :param endless: if `True`, the task will promote endlessly repeated flipping about the desired angle, else only one flip is desired :return: masked task that only considers a subspace of all observations """ # Get the masked environment specification spec = EnvSpec( env_spec.obs_space, env_spec.act_space, env_spec.state_space.subspace( env_spec.state_space.create_mask(obs_labels)), ) # Create a desired state task rew_fcn = CosOfOneEleRewFcn(idx=0) task = FlippingTask(spec, des_angle_delta, rew_fcn, endless=endless) # Return the masked tasks return MaskedTask(env_spec, task, obs_labels)
def create_box_flip_task(env_spec: EnvSpec, continuous_rew_fcn): # Define the indices for selection. This needs to match the observations' names in RcsPySim. idcs = ['Box_A'] # Get the masked environment specification spec = EnvSpec( env_spec.obs_space, env_spec.act_space, env_spec.state_space.subspace(env_spec.state_space.create_mask(idcs)) ) # Create a desired state task # state_des = np.array([0.3]) # box position is measured relative to the table state_des = np.array([-np.pi/2]) # box position is measured world coordinates if continuous_rew_fcn: # q = np.array([0./np.pi]) # r = 1e-6*np.ones(spec.act_space.flat_dim) # rew_fcn_act = AbsErrRewFcn(q, r) rew_fcn = CosOfOneEleRewFcn(idx=0) # rew_fcn = CompoundRewFcn([rew_fcn_act, rew_fcn_box]) else: rew_fcn = MinusOnePerStepRewFcn() ef_task = EndlessFlippingTask(spec, rew_fcn, init_angle=0.) # Return the masked tasks return MaskedTask(env_spec, ef_task, idcs)
from pyrado.tasks.utils import proximity_succeeded from pyrado.tasks.desired_state import DesStateTask, RadiallySymmDesStateTask from pyrado.tasks.parallel import ParallelTasks from pyrado.tasks.reward_functions import CompoundRewFcn, CosOfOneEleRewFcn, MinusOnePerStepRewFcn, QuadrErrRewFcn, \ ScaledExpQuadrErrRewFcn, RewFcn, PlusOnePerStepRewFcn @pytest.fixture(scope='function') def envspec_432(): return EnvSpec(obs_space=BoxSpace(-1, 1, 4), act_space=BoxSpace(-1, 1, 2), state_space=BoxSpace(-1, 1, 3)) @pytest.mark.parametrize( 'fcn_list, reset_args, reset_kwargs', [ ([MinusOnePerStepRewFcn()], [None], [None]), ([CosOfOneEleRewFcn(0)], [None], [None]), ([QuadrErrRewFcn(np.eye(2), np.eye(1))], [None], [None]), ([MinusOnePerStepRewFcn(), QuadrErrRewFcn(Q=np.eye(2), R=np.eye(1))], [None, None], [None, None]), ], ids=['wo_args-wo_kwargs', 'w_args-wo_kwargs', 'w_args2-wo_kwargs', 'wo_args-w_kwargs']) def test_combined_reward_function_step(fcn_list, reset_args, reset_kwargs): # Create combined reward function c = CompoundRewFcn(fcn_list) # Create example state and action error err_s, err_a = np.array([1., 2.]), np.array([3.]) # Calculate combined reward rew = c(err_s, err_a) assert isinstance(rew, float) # Reset the reward functions c.reset(reset_args, reset_kwargs)