def test_rew_fcn_constructor(state_space, act_space): r_m1 = MinusOnePerStepRewFcn() r_quadr = QuadrErrRewFcn(Q=5*np.eye(4), R=2*np.eye(1)) r_exp = ScaledExpQuadrErrRewFcn(Q=np.eye(7), R=np.eye(3), state_space=state_space, act_space=act_space) assert r_m1 is not None assert r_quadr is not None assert r_exp is not None
def create_forcemin_task(env_spec: EnvSpec, obs_labels: Sequence[str], Q: np.ndarray) -> MaskedTask: """ Create a task which punishes the amount of used force. .. note:: This task was designed with an RcsPySim environment in mind, but is not restricted to these environments. :param env_spec: environment specification :param obs_labels: labels for selection, e.g. ['WristLoadCellLBR_R_Fy']. This needs to match the observations' names in RcsPySim :param Q: weight matrix of dim NxN with N=num_forces for the quadratic force costs :return: masked task that only considers a subspace of all observations """ # Get the masked environment specification spec = EnvSpec( env_spec.obs_space, env_spec.act_space, env_spec.state_space.subspace( env_spec.state_space.create_mask(obs_labels)), ) # Create an endlessly running desired state task rew_fcn = QuadrErrRewFcn(Q=Q, R=np.zeros((spec.act_space.flat_dim, spec.act_space.flat_dim))) task = DesStateTask(spec, np.zeros(spec.state_space.shape), rew_fcn, never_succeeded) # Mask selected collision cost observation return MaskedTask(env_spec, task, obs_labels)
def _create_task(self, task_args: dict) -> Task: # Define the task including the reward function state_des = task_args.get('state_des', np.zeros(2)) Q = task_args.get('Q', np.diag([1e1, 1e-2])) R = task_args.get('R', np.diag([1e-6])) return FinalRewTask( DesStateTask(self.spec, state_des, QuadrErrRewFcn(Q, R)), factor=1e3, mode=FinalRewMode(always_negative=True) )
def _create_task(self, task_args: dict) -> Task: # Define the task including the reward function state_des = task_args.get("state_des", np.array([0.0, np.pi, 0.0, 0.0])) Q = task_args.get("Q", np.diag([5e-0, 1e1, 1e-2, 1e-2])) R = task_args.get("R", np.diag([1e-3])) return FinalRewTask( RadiallySymmDesStateTask(self.spec, state_des, QuadrErrRewFcn(Q, R), idcs=[1]), mode=FinalRewMode(state_dependent=True, time_dependent=True), )
def _create_task(self, task_args: dict) -> Task: # Define the task including the reward function state_des = task_args.get("state_des", np.array([0.0, np.pi, 0.0, 0.0])) Q = task_args.get("Q", np.diag([3e-1, 5e-1, 5e-3, 1e-3])) R = task_args.get("R", np.diag([1e-3])) rew_fcn = QuadrErrRewFcn(Q, R) return FinalRewTask( RadiallySymmDesStateTask(self.spec, state_des, rew_fcn, idcs=[1]), mode=FinalRewMode(always_negative=True), factor=1e4, )
def test_modulated_rew_fcn(): Q = np.eye(4) R = np.eye(2) s = np.array([1, 2, 3, 4]) a = np.array([0, 0]) # Modulo 2 for all selected states idcs = [0, 1, 3] rew_fcn = QuadrErrRewFcn(Q, R) task = RadiallySymmDesStateTask(EnvSpec(None, None, None), np.zeros(4), rew_fcn, idcs, 2) r = task.step_rew(s, a) assert r == -(1**2 + 3**2) # Different modulo factor for the selected states idcs = [1, 3] task = RadiallySymmDesStateTask(EnvSpec(None, None, None), np.zeros(4), rew_fcn, idcs, np.array([2, 3])) r = task.step_rew(s, a) assert r == -(1**2 + 3**2 + 1**2)
def _create_deviation_task(self, task_args: dict) -> Task: idcs = list( range(self.state_space.flat_dim - 3, self.state_space.flat_dim)) # Cartesian cup goal position spec = EnvSpec( self.spec.obs_space, self.spec.act_space, self.spec.state_space.subspace( self.spec.state_space.create_mask(idcs))) # init cup goal position state_des = np.array([ 0.82521, 0, 1.4469 ]) if self.num_dof == 7 else np.array([0.758, 0, 1.5]) rew_fcn = QuadrErrRewFcn( Q=task_args.get('Q_dev', np.diag( [2e-1, 1e-4, 5e0])), # Cartesian distance from init position R=task_args.get( 'R_dev', np.zeros( (self._act_space.shape[0], self._act_space.shape[0])))) task = DesStateTask(spec, state_des, rew_fcn) return MaskedTask(self.spec, task, idcs)
def _create_deviation_task(self, task_args: dict) -> Task: idcs = list( range(self.state_space.flat_dim - 3, self.state_space.flat_dim)) # Cartesian cup goal position spec = EnvSpec( self.spec.obs_space, self.spec.act_space, self.spec.state_space.subspace( self.spec.state_space.create_mask(idcs)), ) # init cup goal position state_des = GOAL_POS_INIT_SIM_7DOF if self._num_dof == 7 else GOAL_POS_INIT_SIM_4DOF rew_fcn = QuadrErrRewFcn( Q=task_args.get("Q_dev", np.diag( [2e-1, 1e-6, 5e0])), # Cartesian distance from init cup position R=task_args.get( "R_dev", np.zeros((self.act_space.shape[0], self.act_space.shape[0])) ), # joint space distance from init pose, interferes with R_default from _create_main_task ) task = DesStateTask(spec, state_des, rew_fcn) return MaskedTask(self.spec, task, idcs)
from pyrado.tasks.desired_state import DesStateTask, RadiallySymmDesStateTask from pyrado.tasks.parallel import ParallelTasks from pyrado.tasks.reward_functions import CompoundRewFcn, CosOfOneEleRewFcn, MinusOnePerStepRewFcn, QuadrErrRewFcn, \ ScaledExpQuadrErrRewFcn, RewFcn, PlusOnePerStepRewFcn @pytest.fixture(scope='function') def envspec_432(): return EnvSpec(obs_space=BoxSpace(-1, 1, 4), act_space=BoxSpace(-1, 1, 2), state_space=BoxSpace(-1, 1, 3)) @pytest.mark.parametrize( 'fcn_list, reset_args, reset_kwargs', [ ([MinusOnePerStepRewFcn()], [None], [None]), ([CosOfOneEleRewFcn(0)], [None], [None]), ([QuadrErrRewFcn(np.eye(2), np.eye(1))], [None], [None]), ([MinusOnePerStepRewFcn(), QuadrErrRewFcn(Q=np.eye(2), R=np.eye(1))], [None, None], [None, None]), ], ids=['wo_args-wo_kwargs', 'w_args-wo_kwargs', 'w_args2-wo_kwargs', 'wo_args-w_kwargs']) def test_combined_reward_function_step(fcn_list, reset_args, reset_kwargs): # Create combined reward function c = CompoundRewFcn(fcn_list) # Create example state and action error err_s, err_a = np.array([1., 2.]), np.array([3.]) # Calculate combined reward rew = c(err_s, err_a) assert isinstance(rew, float) # Reset the reward functions c.reset(reset_args, reset_kwargs) def test_modulated_rew_fcn():
from pyrado.tasks.parallel import ParallelTasks from pyrado.tasks.reward_functions import CompoundRewFcn, CosOfOneEleRewFcn, MinusOnePerStepRewFcn, QuadrErrRewFcn, \ ScaledExpQuadrErrRewFcn, RewFcn, PlusOnePerStepRewFcn @pytest.fixture(scope='function') def envspec_432(): return EnvSpec(obs_space=BoxSpace(-1, 1, 4), act_space=BoxSpace(-1, 1, 2), state_space=BoxSpace(-1, 1, 3)) @pytest.mark.parametrize('fcn_list, reset_args, reset_kwargs', [ ([MinusOnePerStepRewFcn()], [None], [None]), ([CosOfOneEleRewFcn(0)], [None], [None]), ([QuadrErrRewFcn(np.eye(2), np.eye(1))], [None], [None]), ([MinusOnePerStepRewFcn(), QuadrErrRewFcn(Q=np.eye(2), R=np.eye(1))], [None, None], [None, None]), ], ids=[ 'wo_args-wo_kwargs', 'w_args-wo_kwargs', 'w_args2-wo_kwargs', 'wo_args-w_kwargs' ]) def test_combined_reward_function_step(fcn_list, reset_args, reset_kwargs): # Create combined reward function c = CompoundRewFcn(fcn_list) # Create example state and action error err_s, err_a = np.array([1., 2.]), np.array([3.]) # Calculate combined reward rew = c(err_s, err_a) assert isinstance(rew, float)
from pyrado.tasks.sequential import SequentialTasks from pyrado.tasks.utils import proximity_succeeded from pyrado.utils.data_types import EnvSpec @pytest.fixture(scope="function") def envspec_432(): return EnvSpec(obs_space=BoxSpace(-1, 1, 4), act_space=BoxSpace(-1, 1, 2), state_space=BoxSpace(-1, 1, 3)) @pytest.mark.parametrize( "fcn_list, reset_args, reset_kwargs", [ ([MinusOnePerStepRewFcn()], [None], [None]), ([CosOfOneEleRewFcn(0)], [None], [None]), ([QuadrErrRewFcn(np.eye(2), np.eye(1))], [None], [None]), ([MinusOnePerStepRewFcn(), QuadrErrRewFcn(Q=np.eye(2), R=np.eye(1))], [None, None], [None, None]), ], ids=["wo_args-wo_kwargs", "w_args-wo_kwargs", "w_args2-wo_kwargs", "wo_args-w_kwargs"], ) def test_combined_reward_function_step(fcn_list, reset_args, reset_kwargs): # Create combined reward function c = CompoundRewFcn(fcn_list) # Create example state and action error err_s, err_a = np.array([1.0, 2.0]), np.array([3.0]) # Calculate combined reward rew = c(err_s, err_a) assert isinstance(rew, float) # Reset the reward functions c.reset(reset_args, reset_kwargs)