def test_rew_fcn_constructor(state_space, act_space): r_m1 = MinusOnePerStepRewFcn() r_quadr = QuadrErrRewFcn(Q=5*np.eye(4), R=2*np.eye(1)) r_exp = ScaledExpQuadrErrRewFcn(Q=np.eye(7), R=np.eye(3), state_space=state_space, act_space=act_space) assert r_m1 is not None assert r_quadr is not None assert r_exp is not None
def _create_task(self, task_args: dict) -> Task: # Define the task including the reward function state_des = task_args.get('state_des', None) if state_des is None: state_des = np.zeros(self.obs_space.flat_dim) Q = np.diag([ 1e-0, 1e-0, 1e-0, 1e-0, 1e-0, 1e+3, 1e+3, 1e+3, # Px, Py, Pz, Pa, Pb, Bx, By, Bz, 1e-2, 1e-2, 1e-2, 1e-2, 1e-2, 1e-0, 1e-0, 1e-0 ]) # Pxd, Pyd, Pzd, Pad, Pbd, Bxd, Byd, Bzd R = np.diag([1e-2, 1e-2, 1e-2, 1e-3, 1e-3]) # Pxdd, Pydd, Pzdd, Padd, Pbdd return DesStateTask( self.spec, state_des, ScaledExpQuadrErrRewFcn(Q, R, self.state_space, self.act_space, min_rew=1e-4))
def _create_task(self, task_args: dict) -> Task: # Define the task including the reward function state_des = task_args.get('state_des', np.zeros(4)) Q = task_args.get('Q', np.diag([1e5, 1e3, 1e3, 1e2])) R = task_args.get('R', np.eye(1)) return DesStateTask( self.spec, state_des, ScaledExpQuadrErrRewFcn(Q, R, self.state_space, self.act_space, min_rew=1e-4) )
def _create_task(self, task_args: dict) -> Task: # Define the task including the reward function state_des = task_args.get('state_des', np.zeros(8)) Q = task_args.get( 'Q', np.diag([1e0, 1e0, 5e3, 5e3, 1e-2, 1e-2, 5e-1, 5e-1])) R = task_args.get('R', np.diag([1e-2, 1e-2])) # Q = np.diag([1e2, 1e2, 5e2, 5e2, 1e-2, 1e-2, 1e+1, 1e+1]) # for LQR # R = np.diag([1e-2, 1e-2]) # for LQR return DesStateTask( self.spec, state_des, ScaledExpQuadrErrRewFcn(Q, R, self.state_space, self.act_space, min_rew=1e-4))
def _create_task(self, task_args: dict) -> Task: # Define the task including the reward function state_des = task_args.get("state_des", np.zeros(self.obs_space.flat_dim)) Q = np.diag([1e-1, 1e-1, 1e1, 1e1, 0, 1e-3, 1e-3, 1e-2, 1e-2, 0]) # Pa, Pb, Bx, By, Bz, Pad, Pbd, Bxd, Byd, Bzd R = np.diag([1e-3, 1e-3]) # Padd, Pbdd return DesStateTask( self.spec, state_des, ScaledExpQuadrErrRewFcn(Q, R, self.state_space, self.act_space, min_rew=1e-4))