예제 #1
0
def test_rew_fcn_constructor(state_space, act_space):
    r_m1 = MinusOnePerStepRewFcn()
    r_quadr = QuadrErrRewFcn(Q=5*np.eye(4), R=2*np.eye(1))
    r_exp = ScaledExpQuadrErrRewFcn(Q=np.eye(7), R=np.eye(3), state_space=state_space, act_space=act_space)
    assert r_m1 is not None
    assert r_quadr is not None
    assert r_exp is not None
예제 #2
0
 def _create_task(self, task_args: dict) -> Task:
     # Define the task including the reward function
     state_des = task_args.get('state_des', None)
     if state_des is None:
         state_des = np.zeros(self.obs_space.flat_dim)
     Q = np.diag([
         1e-0,
         1e-0,
         1e-0,
         1e-0,
         1e-0,
         1e+3,
         1e+3,
         1e+3,  # Px, Py, Pz, Pa, Pb, Bx, By, Bz,
         1e-2,
         1e-2,
         1e-2,
         1e-2,
         1e-2,
         1e-0,
         1e-0,
         1e-0
     ])  # Pxd, Pyd, Pzd, Pad, Pbd, Bxd, Byd, Bzd
     R = np.diag([1e-2, 1e-2, 1e-2, 1e-3,
                  1e-3])  # Pxdd, Pydd, Pzdd, Padd, Pbdd
     return DesStateTask(
         self.spec, state_des,
         ScaledExpQuadrErrRewFcn(Q,
                                 R,
                                 self.state_space,
                                 self.act_space,
                                 min_rew=1e-4))
예제 #3
0
    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get('state_des', np.zeros(4))
        Q = task_args.get('Q', np.diag([1e5, 1e3, 1e3, 1e2]))
        R = task_args.get('R', np.eye(1))

        return DesStateTask(
            self.spec, state_des, ScaledExpQuadrErrRewFcn(Q, R, self.state_space, self.act_space, min_rew=1e-4)
        )
예제 #4
0
    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get('state_des', np.zeros(8))
        Q = task_args.get(
            'Q', np.diag([1e0, 1e0, 5e3, 5e3, 1e-2, 1e-2, 5e-1, 5e-1]))
        R = task_args.get('R', np.diag([1e-2, 1e-2]))
        # Q = np.diag([1e2, 1e2, 5e2, 5e2, 1e-2, 1e-2, 1e+1, 1e+1])  # for LQR
        # R = np.diag([1e-2, 1e-2])  # for LQR

        return DesStateTask(
            self.spec, state_des,
            ScaledExpQuadrErrRewFcn(Q,
                                    R,
                                    self.state_space,
                                    self.act_space,
                                    min_rew=1e-4))
예제 #5
0
    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get("state_des",
                                  np.zeros(self.obs_space.flat_dim))

        Q = np.diag([1e-1, 1e-1, 1e1, 1e1, 0, 1e-3, 1e-3, 1e-2, 1e-2,
                     0])  # Pa, Pb, Bx, By, Bz, Pad, Pbd, Bxd, Byd, Bzd
        R = np.diag([1e-3, 1e-3])  # Padd, Pbdd

        return DesStateTask(
            self.spec, state_des,
            ScaledExpQuadrErrRewFcn(Q,
                                    R,
                                    self.state_space,
                                    self.act_space,
                                    min_rew=1e-4))