예제 #1
0
    def _create_spaces(self):
        # Action
        act_bounds = self.model.actuator_ctrlrange.copy().T
        self._act_space = BoxSpace(
            *act_bounds,
            labels=['bthigh', 'bshin', 'bfoot', 'fthigh', 'fshin', 'ffoot'])

        # State
        state_shape = np.concatenate([self.sim.data.qpos,
                                      self.sim.data.qvel]).shape
        max_state = np.full(state_shape, pyrado.inf)
        self._state_space = BoxSpace(-max_state, max_state)

        # Initial state
        noise_halfspan = self.domain_param['reset_noise_halfspan']
        min_init_qpos = self.init_qpos - np.full_like(self.init_qpos,
                                                      noise_halfspan)
        max_init_qpos = self.init_qpos + np.full_like(self.init_qpos,
                                                      noise_halfspan)
        min_init_qvel = self.init_qvel - np.full_like(self.init_qpos,
                                                      noise_halfspan)
        max_init_qvel = self.init_qvel + np.full_like(self.init_qpos,
                                                      noise_halfspan)
        min_init_state = np.concatenate([min_init_qpos, min_init_qvel]).ravel()
        max_init_state = np.concatenate([max_init_qpos, max_init_qvel]).ravel()
        self._init_space = BoxSpace(min_init_state, max_init_state)

        # Observation
        obs_shape = self.observe(max_state).shape
        max_obs = np.full(obs_shape, pyrado.inf)
        self._obs_space = BoxSpace(-max_obs, max_obs)
예제 #2
0
    def _create_spaces(self):
        super()._create_spaces()
        l_rail = self.domain_param["rail_length"]

        min_state = np.array([
            -l_rail / 2.0 + self._x_buffer, np.pi - self.stab_thold, -l_rail,
            -2 * np.pi
        ])  # [m, rad, m/s, rad/s]
        max_state = np.array([
            +l_rail / 2.0 - self._x_buffer, np.pi + self.stab_thold, +l_rail,
            +2 * np.pi
        ])  # [m, rad, m/s, rad/s]

        max_init_state = np.array(
            [+0.02, np.pi + self.max_init_th_offset, +0.02,
             +5 / 180 * np.pi])  # [m, rad, m/s, rad/s]
        min_init_state = np.array(
            [-0.02, np.pi - self.max_init_th_offset, -0.02,
             -5 / 180 * np.pi])  # [m, rad, m/s, rad/s]

        self._state_space = BoxSpace(
            min_state, max_state, labels=["x", "theta", "x_dot", "theta_dot"])
        self._init_space = BoxSpace(
            min_init_state,
            max_init_state,
            labels=["x", "theta", "x_dot", "theta_dot"])
예제 #3
0
    def __init__(self,
                 dt: float = 1/500.,
                 max_steps: int = pyrado.inf,
                 ip: str = '192.168.2.17',
                 task_args: [dict, None] = None):
        """
        Constructor

        :param dt: time step size on the Quanser device [s]
        :param max_steps: maximum number of steps executed on the device
        :param ip: IP address of the Cart-pole platform
        :param task_args: arguments for the task construction
        """
        super().__init__(dt, max_steps, ip, task_args)

        self._sleep_before_start = True

        # Define the task-specific state space
        stab_thold = 15/180.*np.pi  # threshold angle for the stabilization task to be a failure [rad]
        min_state_1 = np.array([-self._l_rail/2. + self._x_buffer, np.pi - stab_thold, -np.inf, -np.inf])
        max_state_1 = np.array([+self._l_rail/2. - self._x_buffer, np.pi + stab_thold, np.inf, np.inf])
        min_state_2 = np.array([-self._l_rail/2. + self._x_buffer, -np.pi - stab_thold, -np.inf, -np.inf])
        max_state_2 = np.array([+self._l_rail/2. - self._x_buffer, -np.pi + stab_thold, np.inf, np.inf])

        bs_1 = BoxSpace(min_state_1, max_state_1, labels=['$x$', r'$\theta$', r'$\dot{x}$', r'$\dot{\theta}$'])
        bs_2 = BoxSpace(min_state_2, max_state_2, labels=['$x$', r'$\theta$', r'$\dot{x}$', r'$\dot{\theta}$'])

        self._state_space = CompoundSpace([bs_1, bs_2])
예제 #4
0
    def _create_spaces(self):
        l_beam = self.domain_param['l_beam']
        g = self.domain_param['g']

        # Set the bounds for the system's states and actions
        max_state = np.array([l_beam/2., np.pi/4., 10., np.pi])
        max_act = np.array([l_beam/2.*g*3.])  # max torque [Nm]; the factor 3.0 is arbitrary
        self._curr_act = np.zeros_like(max_act)  # just for usage in render function

        self._state_space = BoxSpace(-max_state, max_state,
                                     labels=['$x$', r'$\alpha$', r'$\dot{x}$', r'$\dot{\alpha}$'])
        self._obs_space = self._state_space
        self._init_space = CompoundSpace([
            BoxSpace(
                np.array([-0.8*l_beam/2., -5/180.*np.pi, -0.02*max_state[2], -0.02*max_state[3]]),
                np.array([-0.7*l_beam/2., +5/180.*np.pi, +0.02*max_state[2], +0.02*max_state[3]]),
                labels=['$x$', r'$\alpha$', r'$\dot{x}$', r'$\dot{\alpha}$']
            ),
            BoxSpace(
                np.array([0.7*l_beam/2., -5/180.*np.pi, -0.02*max_state[2], -0.02*max_state[3]]),
                np.array([0.8*l_beam/2., +5/180.*np.pi, +0.02*max_state[2], +0.02*max_state[3]]),
                labels=['$x$', r'$\alpha$', r'$\dot{x}$', r'$\dot{\alpha}$']
            )
        ])
        self._act_space = BoxSpace(-max_act, max_act, labels=[r'$\tau$'])
예제 #5
0
def test_domain_param():
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )),
                      obs_space=BoxSpace(-1, 1, shape=(2, )))
    wenv = DownsamplingWrapper(mockenv, factor=2)

    # Reset to initialize buffer
    wenv.reset()

    # Perform some actions
    wenv.step(np.array([0, 1]))
    assert mockenv.last_act == [0, 1]
    wenv.step(np.array([2, 4]))
    assert mockenv.last_act == [0, 1]
    wenv.step(np.array([4, 4]))
    assert mockenv.last_act == [4, 4]

    # change the downsampling and reset
    wenv.domain_param = {'downsampling': 1}
    wenv.reset()

    wenv.step(np.array([1, 2]))
    assert mockenv.last_act == [1, 2]
    wenv.step(np.array([2, 3]))
    assert mockenv.last_act == [2, 3]
    wenv.step(np.array([8, 9]))
    assert mockenv.last_act == [8, 9]
예제 #6
0
    def __init__(self, max_steps: int, example_config: bool):
        """
        Constructor

        :param max_steps: maximum number of simulation steps
        :param example_config: configuration for the 'illustrative example' in the journal
        """
        Serializable._init(self, locals())

        super().__init__(dt=None, max_steps=max_steps)

        self.example_config = example_config
        self._planet = -1

        # Initialize the domain parameters (Earth)
        self._g = 9.81  # gravity constant [m/s**2]
        self._k = 2e3  # catapult spring's stiffness constant [N/m]
        self._x = 1.  # catapult spring's pre-elongation [m]

        # Domain independent parameter
        self._m = 70.  # victim's mass [kg]

        # Set the bounds for the system's states adn actions
        max_state = np.array([1000.])  # [m], arbitrary but >> self._x
        max_act = max_state
        self._curr_act = np.zeros_like(max_act)  # just for usage in render function

        self._state_space = BoxSpace(-max_state, max_state, labels=['$h$'])
        self._init_space = SingularStateSpace(np.zeros(self._state_space.shape), labels=['$h_0$'])
        self._act_space = BoxSpace(-max_act, max_act, labels=[r'$\theta$'])

        # Define the task including the reward function
        self._task = self._create_task(task_args=dict())
예제 #7
0
def test_combination_delay_downsampling():
    """ After delay number of actions, the actions are downsampled by the factor """
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )),
                      obs_space=BoxSpace(-1, 1, shape=(2, )))
    wenv_dl_ds = ActDelayWrapper(mockenv, delay=3)
    wenv_dl_ds = DownsamplingWrapper(wenv_dl_ds, factor=2)

    # Reset to initialize buffer
    wenv_dl_ds.reset()

    # The first ones are 0 because the ActDelayWrapper's queue is initialized with 0
    wenv_dl_ds.step(np.array([0, 1]))
    assert mockenv.last_act == [0, 0]
    wenv_dl_ds.step(np.array([0, 2]))
    assert mockenv.last_act == [0, 0]
    wenv_dl_ds.step(np.array([0, 3]))
    assert mockenv.last_act == [0, 0]
    # One time step earlier than the other order of wrappers
    wenv_dl_ds.step(np.array([0, 4]))
    assert mockenv.last_act == [0, 1]
    wenv_dl_ds.step(np.array([0, 5]))
    assert mockenv.last_act == [0, 1]
    wenv_dl_ds.step(np.array([0, 6]))
    assert mockenv.last_act == [0, 3]
    wenv_dl_ds.step(np.array([0, 7]))
    assert mockenv.last_act == [0, 3]
    wenv_dl_ds.step(np.array([0, 8]))
    assert mockenv.last_act == [0, 5]
    wenv_dl_ds.step(np.array([0, 9]))
    assert mockenv.last_act == [0, 5]
    wenv_dl_ds.step(np.array([1, 0]))
    assert mockenv.last_act == [0, 7]
    wenv_dl_ds.step(np.array([1, 1]))
    assert mockenv.last_act == [0, 7]
예제 #8
0
    def __init__(self,
                 env_spec: EnvSpec,
                 batch_size: int,
                 reward_multiplier: float,
                 lr: float = 3e-3,
                 logger: StepLogger = None,
                 device: str = 'cuda' if to.cuda.is_available() else 'cpu'):
        """
        Constructor

        :param env_spec: environment specification
        :param batch_size: batch size for each update step
        :param reward_multiplier: factor for the predicted probability
        :param lr: learning rate
        :param logger: logger for every step of the algorithm, if `None` the default logger will be created
        """
        self.device = device
        self.batch_size = batch_size
        self.reward_multiplier = reward_multiplier
        self.lr = lr
        spec = EnvSpec(obs_space=BoxSpace.cat(
            [env_spec.obs_space, env_spec.obs_space, env_spec.act_space]),
                       act_space=BoxSpace(bound_lo=[0], bound_up=[1]))
        self.discriminator = FNNPolicy(spec=spec,
                                       hidden_nonlin=to.tanh,
                                       hidden_sizes=[62],
                                       output_nonlin=to.sigmoid)
        self.loss_fcn = nn.BCELoss()
        self.optimizer = to.optim.Adam(self.discriminator.parameters(),
                                       lr=lr,
                                       eps=1e-5)
        self.logger = logger
예제 #9
0
    def _create_spaces(self):
        # Action
        act_bounds = self.model.actuator_ctrlrange.copy().T
        self._act_space = BoxSpace(*act_bounds,
                                   labels=['thigh', 'leg', 'foot'])

        # State
        n = self.init_qpos.size + self.init_qvel.size
        min_state = -self.domain_param['state_bound'] * np.ones(n)
        min_state[0] = -pyrado.inf  # ignore forward position
        min_state[1] = self.domain_param['z_lower_bound']
        min_state[2] = -self.domain_param['angle_bound']
        max_state = self.domain_param['state_bound'] * np.ones(n)
        max_state[0] = +pyrado.inf  # ignore forward position
        max_state[2] = self.domain_param['angle_bound']
        self._state_space = BoxSpace(min_state, max_state)

        # Initial state
        noise_halfspan = self.domain_param['reset_noise_halfspan']
        min_init_qpos = self.init_qpos - np.full_like(self.init_qpos,
                                                      noise_halfspan)
        max_init_qpos = self.init_qpos + np.full_like(self.init_qpos,
                                                      noise_halfspan)
        min_init_qvel = self.init_qvel - np.full_like(self.init_qpos,
                                                      noise_halfspan)
        max_init_qvel = self.init_qvel + np.full_like(self.init_qpos,
                                                      noise_halfspan)
        min_init_state = np.concatenate([min_init_qpos, min_init_qvel]).ravel()
        max_init_state = np.concatenate([max_init_qpos, max_init_qvel]).ravel()
        self._init_space = BoxSpace(min_init_state, max_init_state)

        # Observation
        obs_shape = self.observe(max_state).shape
        max_obs = np.full(obs_shape, pyrado.inf)
        self._obs_space = BoxSpace(-max_obs, max_obs)
예제 #10
0
    def _create_spaces(self):
        # Define the spaces
        max_state = np.array(
            [115. / 180 * np.pi, 4 * np.pi, 20 * np.pi,
             20 * np.pi])  # [rad, rad, rad/s, rad/s]
        max_obs = np.array([1., 1., 1., 1., np.inf,
                            np.inf])  # [-, -, -, -, rad/s, rad/s]
        max_init_state = np.array([2., 1., 0.5, 0.5
                                   ]) / 180 * np.pi  # [rad, rad, rad/s, rad/s]

        self._state_space = BoxSpace(
            -max_state,
            max_state,
            labels=['theta', 'alpha', 'theta_dot', 'alpha_dot'])
        self._obs_space = BoxSpace(-max_obs,
                                   max_obs,
                                   labels=[
                                       'sin_theta', 'cos_theta', 'sin_alpha',
                                       'cos_alpha', 'theta_dot', 'alpha_dot'
                                   ])
        self._init_space = BoxSpace(
            -max_init_state,
            max_init_state,
            labels=['theta', 'alpha', 'theta_dot', 'alpha_dot'])
        self._act_space = BoxSpace(-max_act_qq, max_act_qq, labels=['V'])
예제 #11
0
    def _create_spaces(self):
        super()._create_spaces()

        # Define the spaces
        l_rail = self.domain_param["rail_length"]
        max_state = np.array([
            +l_rail / 2.0 - self._x_buffer, +4 * np.pi, 2 * l_rail, 20 * np.pi
        ])  # [m, rad, m/s, rad/s]
        min_state = np.array([
            -l_rail / 2.0 + self._x_buffer, -4 * np.pi, -2 * l_rail,
            -20 * np.pi
        ])  # [m, rad, m/s, rad/s]
        if self._wild_init:
            max_init_state = np.array([0.05, np.pi, 0.02, 2 / 180.0 * np.pi
                                       ])  # [m, rad, m/s, rad/s]
        else:
            max_init_state = np.array(
                [0.02, 2 / 180.0 * np.pi, 0.0,
                 1 / 180.0 * np.pi])  # [m, rad, m/s, rad/s]

        self._state_space = BoxSpace(
            min_state, max_state, labels=["x", "theta", "x_dot", "theta_dot"])
        self._init_space = BoxSpace(
            -max_init_state,
            max_init_state,
            labels=["x", "theta", "x_dot", "theta_dot"])
예제 #12
0
    def __init__(
        self,
        dt: float = 1 / 500.0,
        max_steps: int = pyrado.inf,
        task_args: Optional[dict] = None,
        ip: Optional[str] = "192.168.2.38",
    ):
        """
        Constructor

        :param dt: time step size on the Quanser device [s]
        :param max_steps: maximum number of steps executed on the device
        :param task_args: arguments for the task construction
        :param ip: IP address of the Cart-pole platform
        """
        super().__init__(dt, max_steps, task_args, ip)

        # Define the task-specific state space
        stab_thold = 15 / 180.0 * np.pi  # threshold angle for the stabilization task to be a failure [rad]
        min_state_1 = np.array([-self._l_rail / 2.0 + self._x_buffer, np.pi - stab_thold, -np.inf, -np.inf])
        max_state_1 = np.array([+self._l_rail / 2.0 - self._x_buffer, np.pi + stab_thold, np.inf, np.inf])
        min_state_2 = np.array([-self._l_rail / 2.0 + self._x_buffer, -np.pi - stab_thold, -np.inf, -np.inf])
        max_state_2 = np.array([+self._l_rail / 2.0 - self._x_buffer, -np.pi + stab_thold, np.inf, np.inf])

        bs_1 = BoxSpace(min_state_1, max_state_1, labels=["x", "theta", "x_dot", "theta_dot"])
        bs_2 = BoxSpace(min_state_2, max_state_2, labels=["x", "theta", "x_dot", "theta_dot"])

        self._state_space = CompoundSpace([bs_1, bs_2])
예제 #13
0
    def _create_spaces(self):
        # Initial state space
        # Set the actual stable initial position. This position would be reached after some time using the internal
        # PD controller to stabilize at self.qpos_des_init
        if self.num_dof == 7:
            self.init_qpos[:7] = np.array(
                [0., 0.65, 0., 1.41, 0., -0.28, -1.57])
            self.init_qpos[
                7] = -0.21  # angle of the first rope segment relative to the cup bottom plate
            init_ball_pos = np.array([0.828, 0., 1.131])
            init_cup_goal = np.array([0.82521, 0, 1.4469])
        elif self.num_dof == 4:
            self.init_qpos[:4] = np.array([0., 0.63, 0., 1.27])
            self.init_qpos[
                4] = -0.34  # angle of the first rope segment relative to the cup bottom plate
            init_ball_pos = np.array([0.723, 0., 1.168])
            init_cup_goal = np.array([0.758, 0, 1.5])
        # The initial position of the ball in cartesian coordinates
        init_state = np.concatenate(
            [self.init_qpos, self.init_qvel, init_ball_pos, init_cup_goal])
        if self.fixed_init_state:
            self._init_space = SingularStateSpace(init_state)
        else:
            # Add plus/minus one degree to each motor joint and the first rope segment joint
            init_state_up = init_state.copy()
            init_state_up[:self.num_dof] += np.pi / 180 * np.array(
                [0.1, 1, 0.5, 1., 0.1, 1., 1.])[:self.num_dof]
            init_state_lo = init_state.copy()
            init_state_lo[:self.num_dof] -= np.pi / 180 * np.array(
                [0.1, 1, 0.5, 1., 0.1, 1., 1.])[:self.num_dof]
            self._init_space = BoxSpace(init_state_lo, init_state_up)

        # State space
        state_shape = init_state.shape
        state_up, state_lo = np.full(state_shape, pyrado.inf), np.full(
            state_shape, -pyrado.inf)
        # Ensure that joint limits of the arm are not reached (5 deg safety margin)
        state_up[:self.num_dof] = np.array([
            2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7
        ])[:self.num_dof] - 5 * np.pi / 180
        state_lo[:self.num_dof] = np.array([
            -2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7
        ])[:self.num_dof] + 5 * np.pi / 180
        self._state_space = BoxSpace(state_lo, state_up)

        # Torque space
        max_torque = np.array([150.0, 125.0, 40.0, 60.0, 5.0, 5.0,
                               2.0])[:self.num_dof]
        self._torque_space = BoxSpace(-max_torque, max_torque)

        # Action space (PD controller on joint positions and velocities)
        if self.num_dof == 4:
            self._act_space = act_space_wam_4dof
        elif self.num_dof == 7:
            self._act_space = act_space_wam_7dof

        # Observation space (normalized time)
        self._obs_space = BoxSpace(np.array([0.]),
                                   np.array([1.]),
                                   labels=['t'])
예제 #14
0
    def _create_spaces(self):
        super()._create_spaces()
        l_rail = self.domain_param['l_rail']

        min_state = np.array([
            -l_rail / 2. + self.x_buffer, np.pi - self.stab_thold, -l_rail,
            -2 * np.pi
        ])  # [m, rad, m/s, rad/s]
        max_state = np.array([
            +l_rail / 2. - self.x_buffer, np.pi + self.stab_thold, +l_rail,
            +2 * np.pi
        ])  # [m, rad, m/s, rad/s]

        max_init_state = np.array(
            [+0.05, np.pi + self.max_init_th_offset, +0.05,
             +8 / 180 * np.pi])  # [m, rad, m/s, rad/s]
        min_init_state = np.array(
            [-0.05, np.pi - self.max_init_th_offset, -0.05,
             -8 / 180 * np.pi])  # [m, rad, m/s, rad/s]

        self._state_space = BoxSpace(
            min_state, max_state, labels=['x', 'theta', 'x_dot', 'theta_dot'])
        self._init_space = BoxSpace(
            min_init_state,
            max_init_state,
            labels=['x', 'theta', 'x_dot', 'theta_dot'])
예제 #15
0
def test_combination_downsampling_delay():
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )),
                      obs_space=BoxSpace(-1, 1, shape=(2, )))
    wenv_ds_dl = DownsamplingWrapper(mockenv, factor=2)
    wenv_ds_dl = ActDelayWrapper(wenv_ds_dl, delay=3)

    # Reset to initialize buffer
    wenv_ds_dl.reset()

    # The first ones are 0 because the ActDelayWrapper's queue is initialized with 0
    wenv_ds_dl.step(np.array([0, 1]))
    assert mockenv.last_act == [0, 0]
    wenv_ds_dl.step(np.array([0, 2]))
    assert mockenv.last_act == [0, 0]
    wenv_ds_dl.step(np.array([0, 3]))
    assert mockenv.last_act == [0, 0]
    wenv_ds_dl.step(np.array([0, 4]))
    # Intuitively one would think last_act would be [0, 1] here, but this is the effect of the wrappers' combination
    assert mockenv.last_act == [0, 0]
    wenv_ds_dl.step(np.array([0, 5]))
    assert mockenv.last_act == [0, 2]
    wenv_ds_dl.step(np.array([0, 6]))
    assert mockenv.last_act == [0, 2]
    wenv_ds_dl.step(np.array([0, 7]))
    assert mockenv.last_act == [0, 4]
    wenv_ds_dl.step(np.array([0, 8]))
    assert mockenv.last_act == [0, 4]
    wenv_ds_dl.step(np.array([0, 9]))
    assert mockenv.last_act == [0, 6]
    wenv_ds_dl.step(np.array([1, 0]))
    assert mockenv.last_act == [0, 6]
예제 #16
0
    def __init__(self):
        """ Constructor """
        Serializable._init(self, locals())

        # Initialize basic variables
        super().__init__(dt=None, max_steps=1)

        # Set the bounds for the system's states adn actions
        max_state = np.array([100., 100.])
        max_act = max_state
        self._curr_act = np.zeros_like(
            max_act)  # just for usage in render function

        self._state_space = BoxSpace(-max_state,
                                     max_state,
                                     labels=['x_1', 'x_2'])
        self._init_space = SingularStateSpace(np.zeros(
            self._state_space.shape),
                                              labels=['x_1_init', 'x_2_init'])
        self._act_space = BoxSpace(-max_act,
                                   max_act,
                                   labels=['x_1_next', 'x_2_next'])

        # Define the task including the reward function
        self._task = self._create_task()

        # Animation with pyplot
        self._anim = dict(fig=None, trace_x=[], trace_y=[], trace_z=[])
def test_mask_from_indices():
    # Test the create_mask helper separately
    space = BoxSpace(-1, 1, shape=5)
    indices = [1, 4]

    mask = space.create_mask(indices)
    assert list(mask) == [0, 1, 0, 0, 1]
예제 #18
0
 def _create_spaces(self):
     # Define the spaces
     max_state = np.array([
         np.pi / 4.,
         np.pi / 4.,
         0.275 / 2.,
         0.275 / 2.,  # [rad, rad, m, m, rad/s, ...
         5 * np.pi,
         5 * np.pi,
         0.5,
         0.5
     ])  # ... rad/s, rad/s, m/s, m/s]
     max_act = np.array([3., 3.])  # [V]
     self._state_space = BoxSpace(-max_state,
                                  max_state,
                                  labels=[
                                      r'$\theta_{x}$', r'$\theta_{y}$',
                                      '$x$', '$y$', r'$\dot{\theta}_{x}$',
                                      r'$\dot{\theta}_{y}$', r'$\dot{x}$',
                                      r'$\dot{y}$'
                                  ])
     self._obs_space = self._state_space
     self._act_space = BoxSpace(-max_act,
                                max_act,
                                labels=['V_{x}', 'V_{y}'])
def test_mask_from_labels():
    # Test the create_mask helper separately
    space = BoxSpace(-1, 1, shape=5, labels=['w', 'o', 'r', 'l', 'd'])
    indices = ['w', 'o']

    mask = space.create_mask(indices)
    assert list(mask) == [1, 1, 0, 0, 0]
예제 #20
0
def create_nonrecurrent_policy():
    return LinearPolicy(
        EnvSpec(
            BoxSpace(-1, 1, 4),
            BoxSpace(-1, 1, 3),
        ),
        FeatureStack(const_feat, identity_feat, squared_feat),
    )
예제 #21
0
 def _create_spaces(self):
     # Define the spaces
     self._state_space = None  # needs to be set in subclasses
     max_obs = np.array([0.814/2., 1., 1., pyrado.inf, pyrado.inf])
     max_act = np.array([8.])  # [V], original: 24
     self._obs_space = BoxSpace(-max_obs, max_obs,
                                labels=['$x$', r'$\sin\theta$', r'$\cos\theta$', r'$\dot{x}$', r'$\dot{\theta}$'])
     self._act_space = BoxSpace(-max_act, max_act, labels=['$V$'])
예제 #22
0
def create_recurrent_policy():
    return RNNPolicy(
        EnvSpec(
            BoxSpace(-1, 1, 4),
            BoxSpace(-1, 1, 3),
        ),
        hidden_size=32, num_recurrent_layers=1, hidden_nonlin='tanh'
    )
예제 #23
0
def test_no_downsampling():
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2,)), obs_space=BoxSpace(-1, 1, shape=(2,)))
    wenv = DownsamplingWrapper(mockenv, factor=1)

    # Perform some actions
    wenv.step(np.array([4, 1]))
    assert mockenv.last_act == [4, 1]
    wenv.step(np.array([7, 5]))
    assert mockenv.last_act == [7, 5]
예제 #24
0
 def _create_spaces(self):
     # Define the spaces
     self._state_space = None  # needs to be set in subclasses
     max_obs = np.array([0.814 / 2., 1., 1., pyrado.inf, pyrado.inf])
     self._obs_space = BoxSpace(
         -max_obs,
         max_obs,
         labels=['x', 'sin_theta', 'cos_theta', 'x_dot', 'theta_dot'])
     self._act_space = BoxSpace(-max_act_qcp, max_act_qcp, labels=['V'])
예제 #25
0
    def _create_spaces(self):
        l_rail = self.domain_param["rail_length"]
        max_obs = np.array([l_rail / 2.0, 1.0, 1.0, np.inf, np.inf])

        self._state_space = None
        self._obs_space = BoxSpace(
            -max_obs,
            max_obs,
            labels=["x", "sin_theta", "cos_theta", "x_dot", "theta_dot"])
        self._init_space = None
        self._act_space = BoxSpace(-MAX_ACT_QCP, MAX_ACT_QCP, labels=["V"])
예제 #26
0
    def _create_spaces(self):
        l_rail = self.domain_param['l_rail']
        max_obs = np.array([l_rail / 2., 1., 1., np.inf, np.inf])

        self._state_space = None
        self._obs_space = BoxSpace(
            -max_obs,
            max_obs,
            labels=['x', 'sin_theta', 'cos_theta', 'x_dot', 'theta_dot'])
        self._init_space = None
        self._act_space = BoxSpace(-max_act_qcp, max_act_qcp, labels=['V'])
예제 #27
0
    def _create_spaces(self):
        super()._create_spaces()
        l_rail = self.domain_param['l_rail']

        # Define the spaces
        max_state = np.array([+l_rail/2. - self.x_buffer, +4*np.pi, np.inf, np.inf])  # [m, rad, m/s, rad/s]
        min_state = np.array([-l_rail/2. + self.x_buffer, -4*np.pi, -np.inf, -np.inf])  # [m, rad, m/s, rad/s]
        max_init_state = np.array([0.03, 1/180.*np.pi, 0.005, 2/180.*np.pi])  # [m, rad, m/s, rad/s]

        self._state_space = BoxSpace(min_state, max_state, labels=['$x$', r'$\theta$', r'$\dot{x}$', r'$\dot{\theta}$'])
        self._init_space = BoxSpace(-max_init_state, max_init_state,
                                    labels=['$x$', r'$\theta$', r'$\dot{x}$', r'$\dot{\theta}$'])
예제 #28
0
    def _create_spaces(self):
        l_rail = self.domain_param['l_rail']

        max_act = np.array([8.])  # [V], original: 24, energy-based swing up controller needs at about +-6.5V
        max_obs = np.array([l_rail/2., 1., 1., np.inf, np.inf])

        self._state_space = None
        self._obs_space = BoxSpace(-max_obs, max_obs,
                                   labels=['$x$', r'$\sin(\theta)$', r'$\cos(\theta)$',
                                           r'$\dot{x}$', r'$\dot{\theta}$'])
        self._init_space = None
        self._act_space = BoxSpace(-max_act, max_act, labels=['$V$'])
예제 #29
0
def test_act_downsampling():
    mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2,)), obs_space=BoxSpace(-1, 1, shape=(2,)))
    wenv = DownsamplingWrapper(mockenv, factor=2)

    # Perform some actions
    wenv.step(np.array([0, 1]))
    assert mockenv.last_act == [0, 1]
    wenv.step(np.array([2, 4]))  # should be ignored
    assert mockenv.last_act == [0, 1]
    wenv.step(np.array([1, 2]))
    assert mockenv.last_act == [1, 2]
    wenv.step(np.array([2, 3]))  # should be ignored
    assert mockenv.last_act == [1, 2]
예제 #30
0
    def _create_spaces(self):
        # Define the spaces
        max_state = np.array([4 * np.pi, 4 * np.pi])  # [rad, rad/s]
        max_obs = np.array([1.0, 1.0, np.inf])  # [-, -, rad/s]
        tau_max = self.domain_param["torque_thold"]

        self._state_space = BoxSpace(-max_state,
                                     max_state,
                                     labels=["theta", "theta_dot"])
        self._obs_space = BoxSpace(
            -max_obs, max_obs, labels=["sin_theta", "cos_theta", "theta_dot"])
        self._init_space = SingularStateSpace(self._init_state,
                                              labels=["theta", "theta_dot"])
        self._act_space = BoxSpace(-tau_max, tau_max, labels=["tau"])