def _create_spaces(self): # Action act_bounds = self.model.actuator_ctrlrange.copy().T self._act_space = BoxSpace( *act_bounds, labels=['bthigh', 'bshin', 'bfoot', 'fthigh', 'fshin', 'ffoot']) # State state_shape = np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).shape max_state = np.full(state_shape, pyrado.inf) self._state_space = BoxSpace(-max_state, max_state) # Initial state noise_halfspan = self.domain_param['reset_noise_halfspan'] min_init_qpos = self.init_qpos - np.full_like(self.init_qpos, noise_halfspan) max_init_qpos = self.init_qpos + np.full_like(self.init_qpos, noise_halfspan) min_init_qvel = self.init_qvel - np.full_like(self.init_qpos, noise_halfspan) max_init_qvel = self.init_qvel + np.full_like(self.init_qpos, noise_halfspan) min_init_state = np.concatenate([min_init_qpos, min_init_qvel]).ravel() max_init_state = np.concatenate([max_init_qpos, max_init_qvel]).ravel() self._init_space = BoxSpace(min_init_state, max_init_state) # Observation obs_shape = self.observe(max_state).shape max_obs = np.full(obs_shape, pyrado.inf) self._obs_space = BoxSpace(-max_obs, max_obs)
def _create_spaces(self): super()._create_spaces() l_rail = self.domain_param["rail_length"] min_state = np.array([ -l_rail / 2.0 + self._x_buffer, np.pi - self.stab_thold, -l_rail, -2 * np.pi ]) # [m, rad, m/s, rad/s] max_state = np.array([ +l_rail / 2.0 - self._x_buffer, np.pi + self.stab_thold, +l_rail, +2 * np.pi ]) # [m, rad, m/s, rad/s] max_init_state = np.array( [+0.02, np.pi + self.max_init_th_offset, +0.02, +5 / 180 * np.pi]) # [m, rad, m/s, rad/s] min_init_state = np.array( [-0.02, np.pi - self.max_init_th_offset, -0.02, -5 / 180 * np.pi]) # [m, rad, m/s, rad/s] self._state_space = BoxSpace( min_state, max_state, labels=["x", "theta", "x_dot", "theta_dot"]) self._init_space = BoxSpace( min_init_state, max_init_state, labels=["x", "theta", "x_dot", "theta_dot"])
def __init__(self, dt: float = 1/500., max_steps: int = pyrado.inf, ip: str = '192.168.2.17', task_args: [dict, None] = None): """ Constructor :param dt: time step size on the Quanser device [s] :param max_steps: maximum number of steps executed on the device :param ip: IP address of the Cart-pole platform :param task_args: arguments for the task construction """ super().__init__(dt, max_steps, ip, task_args) self._sleep_before_start = True # Define the task-specific state space stab_thold = 15/180.*np.pi # threshold angle for the stabilization task to be a failure [rad] min_state_1 = np.array([-self._l_rail/2. + self._x_buffer, np.pi - stab_thold, -np.inf, -np.inf]) max_state_1 = np.array([+self._l_rail/2. - self._x_buffer, np.pi + stab_thold, np.inf, np.inf]) min_state_2 = np.array([-self._l_rail/2. + self._x_buffer, -np.pi - stab_thold, -np.inf, -np.inf]) max_state_2 = np.array([+self._l_rail/2. - self._x_buffer, -np.pi + stab_thold, np.inf, np.inf]) bs_1 = BoxSpace(min_state_1, max_state_1, labels=['$x$', r'$\theta$', r'$\dot{x}$', r'$\dot{\theta}$']) bs_2 = BoxSpace(min_state_2, max_state_2, labels=['$x$', r'$\theta$', r'$\dot{x}$', r'$\dot{\theta}$']) self._state_space = CompoundSpace([bs_1, bs_2])
def _create_spaces(self): l_beam = self.domain_param['l_beam'] g = self.domain_param['g'] # Set the bounds for the system's states and actions max_state = np.array([l_beam/2., np.pi/4., 10., np.pi]) max_act = np.array([l_beam/2.*g*3.]) # max torque [Nm]; the factor 3.0 is arbitrary self._curr_act = np.zeros_like(max_act) # just for usage in render function self._state_space = BoxSpace(-max_state, max_state, labels=['$x$', r'$\alpha$', r'$\dot{x}$', r'$\dot{\alpha}$']) self._obs_space = self._state_space self._init_space = CompoundSpace([ BoxSpace( np.array([-0.8*l_beam/2., -5/180.*np.pi, -0.02*max_state[2], -0.02*max_state[3]]), np.array([-0.7*l_beam/2., +5/180.*np.pi, +0.02*max_state[2], +0.02*max_state[3]]), labels=['$x$', r'$\alpha$', r'$\dot{x}$', r'$\dot{\alpha}$'] ), BoxSpace( np.array([0.7*l_beam/2., -5/180.*np.pi, -0.02*max_state[2], -0.02*max_state[3]]), np.array([0.8*l_beam/2., +5/180.*np.pi, +0.02*max_state[2], +0.02*max_state[3]]), labels=['$x$', r'$\alpha$', r'$\dot{x}$', r'$\dot{\alpha}$'] ) ]) self._act_space = BoxSpace(-max_act, max_act, labels=[r'$\tau$'])
def test_domain_param(): mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )), obs_space=BoxSpace(-1, 1, shape=(2, ))) wenv = DownsamplingWrapper(mockenv, factor=2) # Reset to initialize buffer wenv.reset() # Perform some actions wenv.step(np.array([0, 1])) assert mockenv.last_act == [0, 1] wenv.step(np.array([2, 4])) assert mockenv.last_act == [0, 1] wenv.step(np.array([4, 4])) assert mockenv.last_act == [4, 4] # change the downsampling and reset wenv.domain_param = {'downsampling': 1} wenv.reset() wenv.step(np.array([1, 2])) assert mockenv.last_act == [1, 2] wenv.step(np.array([2, 3])) assert mockenv.last_act == [2, 3] wenv.step(np.array([8, 9])) assert mockenv.last_act == [8, 9]
def __init__(self, max_steps: int, example_config: bool): """ Constructor :param max_steps: maximum number of simulation steps :param example_config: configuration for the 'illustrative example' in the journal """ Serializable._init(self, locals()) super().__init__(dt=None, max_steps=max_steps) self.example_config = example_config self._planet = -1 # Initialize the domain parameters (Earth) self._g = 9.81 # gravity constant [m/s**2] self._k = 2e3 # catapult spring's stiffness constant [N/m] self._x = 1. # catapult spring's pre-elongation [m] # Domain independent parameter self._m = 70. # victim's mass [kg] # Set the bounds for the system's states adn actions max_state = np.array([1000.]) # [m], arbitrary but >> self._x max_act = max_state self._curr_act = np.zeros_like(max_act) # just for usage in render function self._state_space = BoxSpace(-max_state, max_state, labels=['$h$']) self._init_space = SingularStateSpace(np.zeros(self._state_space.shape), labels=['$h_0$']) self._act_space = BoxSpace(-max_act, max_act, labels=[r'$\theta$']) # Define the task including the reward function self._task = self._create_task(task_args=dict())
def test_combination_delay_downsampling(): """ After delay number of actions, the actions are downsampled by the factor """ mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )), obs_space=BoxSpace(-1, 1, shape=(2, ))) wenv_dl_ds = ActDelayWrapper(mockenv, delay=3) wenv_dl_ds = DownsamplingWrapper(wenv_dl_ds, factor=2) # Reset to initialize buffer wenv_dl_ds.reset() # The first ones are 0 because the ActDelayWrapper's queue is initialized with 0 wenv_dl_ds.step(np.array([0, 1])) assert mockenv.last_act == [0, 0] wenv_dl_ds.step(np.array([0, 2])) assert mockenv.last_act == [0, 0] wenv_dl_ds.step(np.array([0, 3])) assert mockenv.last_act == [0, 0] # One time step earlier than the other order of wrappers wenv_dl_ds.step(np.array([0, 4])) assert mockenv.last_act == [0, 1] wenv_dl_ds.step(np.array([0, 5])) assert mockenv.last_act == [0, 1] wenv_dl_ds.step(np.array([0, 6])) assert mockenv.last_act == [0, 3] wenv_dl_ds.step(np.array([0, 7])) assert mockenv.last_act == [0, 3] wenv_dl_ds.step(np.array([0, 8])) assert mockenv.last_act == [0, 5] wenv_dl_ds.step(np.array([0, 9])) assert mockenv.last_act == [0, 5] wenv_dl_ds.step(np.array([1, 0])) assert mockenv.last_act == [0, 7] wenv_dl_ds.step(np.array([1, 1])) assert mockenv.last_act == [0, 7]
def __init__(self, env_spec: EnvSpec, batch_size: int, reward_multiplier: float, lr: float = 3e-3, logger: StepLogger = None, device: str = 'cuda' if to.cuda.is_available() else 'cpu'): """ Constructor :param env_spec: environment specification :param batch_size: batch size for each update step :param reward_multiplier: factor for the predicted probability :param lr: learning rate :param logger: logger for every step of the algorithm, if `None` the default logger will be created """ self.device = device self.batch_size = batch_size self.reward_multiplier = reward_multiplier self.lr = lr spec = EnvSpec(obs_space=BoxSpace.cat( [env_spec.obs_space, env_spec.obs_space, env_spec.act_space]), act_space=BoxSpace(bound_lo=[0], bound_up=[1])) self.discriminator = FNNPolicy(spec=spec, hidden_nonlin=to.tanh, hidden_sizes=[62], output_nonlin=to.sigmoid) self.loss_fcn = nn.BCELoss() self.optimizer = to.optim.Adam(self.discriminator.parameters(), lr=lr, eps=1e-5) self.logger = logger
def _create_spaces(self): # Action act_bounds = self.model.actuator_ctrlrange.copy().T self._act_space = BoxSpace(*act_bounds, labels=['thigh', 'leg', 'foot']) # State n = self.init_qpos.size + self.init_qvel.size min_state = -self.domain_param['state_bound'] * np.ones(n) min_state[0] = -pyrado.inf # ignore forward position min_state[1] = self.domain_param['z_lower_bound'] min_state[2] = -self.domain_param['angle_bound'] max_state = self.domain_param['state_bound'] * np.ones(n) max_state[0] = +pyrado.inf # ignore forward position max_state[2] = self.domain_param['angle_bound'] self._state_space = BoxSpace(min_state, max_state) # Initial state noise_halfspan = self.domain_param['reset_noise_halfspan'] min_init_qpos = self.init_qpos - np.full_like(self.init_qpos, noise_halfspan) max_init_qpos = self.init_qpos + np.full_like(self.init_qpos, noise_halfspan) min_init_qvel = self.init_qvel - np.full_like(self.init_qpos, noise_halfspan) max_init_qvel = self.init_qvel + np.full_like(self.init_qpos, noise_halfspan) min_init_state = np.concatenate([min_init_qpos, min_init_qvel]).ravel() max_init_state = np.concatenate([max_init_qpos, max_init_qvel]).ravel() self._init_space = BoxSpace(min_init_state, max_init_state) # Observation obs_shape = self.observe(max_state).shape max_obs = np.full(obs_shape, pyrado.inf) self._obs_space = BoxSpace(-max_obs, max_obs)
def _create_spaces(self): # Define the spaces max_state = np.array( [115. / 180 * np.pi, 4 * np.pi, 20 * np.pi, 20 * np.pi]) # [rad, rad, rad/s, rad/s] max_obs = np.array([1., 1., 1., 1., np.inf, np.inf]) # [-, -, -, -, rad/s, rad/s] max_init_state = np.array([2., 1., 0.5, 0.5 ]) / 180 * np.pi # [rad, rad, rad/s, rad/s] self._state_space = BoxSpace( -max_state, max_state, labels=['theta', 'alpha', 'theta_dot', 'alpha_dot']) self._obs_space = BoxSpace(-max_obs, max_obs, labels=[ 'sin_theta', 'cos_theta', 'sin_alpha', 'cos_alpha', 'theta_dot', 'alpha_dot' ]) self._init_space = BoxSpace( -max_init_state, max_init_state, labels=['theta', 'alpha', 'theta_dot', 'alpha_dot']) self._act_space = BoxSpace(-max_act_qq, max_act_qq, labels=['V'])
def _create_spaces(self): super()._create_spaces() # Define the spaces l_rail = self.domain_param["rail_length"] max_state = np.array([ +l_rail / 2.0 - self._x_buffer, +4 * np.pi, 2 * l_rail, 20 * np.pi ]) # [m, rad, m/s, rad/s] min_state = np.array([ -l_rail / 2.0 + self._x_buffer, -4 * np.pi, -2 * l_rail, -20 * np.pi ]) # [m, rad, m/s, rad/s] if self._wild_init: max_init_state = np.array([0.05, np.pi, 0.02, 2 / 180.0 * np.pi ]) # [m, rad, m/s, rad/s] else: max_init_state = np.array( [0.02, 2 / 180.0 * np.pi, 0.0, 1 / 180.0 * np.pi]) # [m, rad, m/s, rad/s] self._state_space = BoxSpace( min_state, max_state, labels=["x", "theta", "x_dot", "theta_dot"]) self._init_space = BoxSpace( -max_init_state, max_init_state, labels=["x", "theta", "x_dot", "theta_dot"])
def __init__( self, dt: float = 1 / 500.0, max_steps: int = pyrado.inf, task_args: Optional[dict] = None, ip: Optional[str] = "192.168.2.38", ): """ Constructor :param dt: time step size on the Quanser device [s] :param max_steps: maximum number of steps executed on the device :param task_args: arguments for the task construction :param ip: IP address of the Cart-pole platform """ super().__init__(dt, max_steps, task_args, ip) # Define the task-specific state space stab_thold = 15 / 180.0 * np.pi # threshold angle for the stabilization task to be a failure [rad] min_state_1 = np.array([-self._l_rail / 2.0 + self._x_buffer, np.pi - stab_thold, -np.inf, -np.inf]) max_state_1 = np.array([+self._l_rail / 2.0 - self._x_buffer, np.pi + stab_thold, np.inf, np.inf]) min_state_2 = np.array([-self._l_rail / 2.0 + self._x_buffer, -np.pi - stab_thold, -np.inf, -np.inf]) max_state_2 = np.array([+self._l_rail / 2.0 - self._x_buffer, -np.pi + stab_thold, np.inf, np.inf]) bs_1 = BoxSpace(min_state_1, max_state_1, labels=["x", "theta", "x_dot", "theta_dot"]) bs_2 = BoxSpace(min_state_2, max_state_2, labels=["x", "theta", "x_dot", "theta_dot"]) self._state_space = CompoundSpace([bs_1, bs_2])
def _create_spaces(self): # Initial state space # Set the actual stable initial position. This position would be reached after some time using the internal # PD controller to stabilize at self.qpos_des_init if self.num_dof == 7: self.init_qpos[:7] = np.array( [0., 0.65, 0., 1.41, 0., -0.28, -1.57]) self.init_qpos[ 7] = -0.21 # angle of the first rope segment relative to the cup bottom plate init_ball_pos = np.array([0.828, 0., 1.131]) init_cup_goal = np.array([0.82521, 0, 1.4469]) elif self.num_dof == 4: self.init_qpos[:4] = np.array([0., 0.63, 0., 1.27]) self.init_qpos[ 4] = -0.34 # angle of the first rope segment relative to the cup bottom plate init_ball_pos = np.array([0.723, 0., 1.168]) init_cup_goal = np.array([0.758, 0, 1.5]) # The initial position of the ball in cartesian coordinates init_state = np.concatenate( [self.init_qpos, self.init_qvel, init_ball_pos, init_cup_goal]) if self.fixed_init_state: self._init_space = SingularStateSpace(init_state) else: # Add plus/minus one degree to each motor joint and the first rope segment joint init_state_up = init_state.copy() init_state_up[:self.num_dof] += np.pi / 180 * np.array( [0.1, 1, 0.5, 1., 0.1, 1., 1.])[:self.num_dof] init_state_lo = init_state.copy() init_state_lo[:self.num_dof] -= np.pi / 180 * np.array( [0.1, 1, 0.5, 1., 0.1, 1., 1.])[:self.num_dof] self._init_space = BoxSpace(init_state_lo, init_state_up) # State space state_shape = init_state.shape state_up, state_lo = np.full(state_shape, pyrado.inf), np.full( state_shape, -pyrado.inf) # Ensure that joint limits of the arm are not reached (5 deg safety margin) state_up[:self.num_dof] = np.array([ 2.6, 1.985, 2.8, 3.14159, 1.25, 1.5707, 2.7 ])[:self.num_dof] - 5 * np.pi / 180 state_lo[:self.num_dof] = np.array([ -2.6, -1.985, -2.8, -0.9, -4.55, -1.5707, -2.7 ])[:self.num_dof] + 5 * np.pi / 180 self._state_space = BoxSpace(state_lo, state_up) # Torque space max_torque = np.array([150.0, 125.0, 40.0, 60.0, 5.0, 5.0, 2.0])[:self.num_dof] self._torque_space = BoxSpace(-max_torque, max_torque) # Action space (PD controller on joint positions and velocities) if self.num_dof == 4: self._act_space = act_space_wam_4dof elif self.num_dof == 7: self._act_space = act_space_wam_7dof # Observation space (normalized time) self._obs_space = BoxSpace(np.array([0.]), np.array([1.]), labels=['t'])
def _create_spaces(self): super()._create_spaces() l_rail = self.domain_param['l_rail'] min_state = np.array([ -l_rail / 2. + self.x_buffer, np.pi - self.stab_thold, -l_rail, -2 * np.pi ]) # [m, rad, m/s, rad/s] max_state = np.array([ +l_rail / 2. - self.x_buffer, np.pi + self.stab_thold, +l_rail, +2 * np.pi ]) # [m, rad, m/s, rad/s] max_init_state = np.array( [+0.05, np.pi + self.max_init_th_offset, +0.05, +8 / 180 * np.pi]) # [m, rad, m/s, rad/s] min_init_state = np.array( [-0.05, np.pi - self.max_init_th_offset, -0.05, -8 / 180 * np.pi]) # [m, rad, m/s, rad/s] self._state_space = BoxSpace( min_state, max_state, labels=['x', 'theta', 'x_dot', 'theta_dot']) self._init_space = BoxSpace( min_init_state, max_init_state, labels=['x', 'theta', 'x_dot', 'theta_dot'])
def test_combination_downsampling_delay(): mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2, )), obs_space=BoxSpace(-1, 1, shape=(2, ))) wenv_ds_dl = DownsamplingWrapper(mockenv, factor=2) wenv_ds_dl = ActDelayWrapper(wenv_ds_dl, delay=3) # Reset to initialize buffer wenv_ds_dl.reset() # The first ones are 0 because the ActDelayWrapper's queue is initialized with 0 wenv_ds_dl.step(np.array([0, 1])) assert mockenv.last_act == [0, 0] wenv_ds_dl.step(np.array([0, 2])) assert mockenv.last_act == [0, 0] wenv_ds_dl.step(np.array([0, 3])) assert mockenv.last_act == [0, 0] wenv_ds_dl.step(np.array([0, 4])) # Intuitively one would think last_act would be [0, 1] here, but this is the effect of the wrappers' combination assert mockenv.last_act == [0, 0] wenv_ds_dl.step(np.array([0, 5])) assert mockenv.last_act == [0, 2] wenv_ds_dl.step(np.array([0, 6])) assert mockenv.last_act == [0, 2] wenv_ds_dl.step(np.array([0, 7])) assert mockenv.last_act == [0, 4] wenv_ds_dl.step(np.array([0, 8])) assert mockenv.last_act == [0, 4] wenv_ds_dl.step(np.array([0, 9])) assert mockenv.last_act == [0, 6] wenv_ds_dl.step(np.array([1, 0])) assert mockenv.last_act == [0, 6]
def __init__(self): """ Constructor """ Serializable._init(self, locals()) # Initialize basic variables super().__init__(dt=None, max_steps=1) # Set the bounds for the system's states adn actions max_state = np.array([100., 100.]) max_act = max_state self._curr_act = np.zeros_like( max_act) # just for usage in render function self._state_space = BoxSpace(-max_state, max_state, labels=['x_1', 'x_2']) self._init_space = SingularStateSpace(np.zeros( self._state_space.shape), labels=['x_1_init', 'x_2_init']) self._act_space = BoxSpace(-max_act, max_act, labels=['x_1_next', 'x_2_next']) # Define the task including the reward function self._task = self._create_task() # Animation with pyplot self._anim = dict(fig=None, trace_x=[], trace_y=[], trace_z=[])
def test_mask_from_indices(): # Test the create_mask helper separately space = BoxSpace(-1, 1, shape=5) indices = [1, 4] mask = space.create_mask(indices) assert list(mask) == [0, 1, 0, 0, 1]
def _create_spaces(self): # Define the spaces max_state = np.array([ np.pi / 4., np.pi / 4., 0.275 / 2., 0.275 / 2., # [rad, rad, m, m, rad/s, ... 5 * np.pi, 5 * np.pi, 0.5, 0.5 ]) # ... rad/s, rad/s, m/s, m/s] max_act = np.array([3., 3.]) # [V] self._state_space = BoxSpace(-max_state, max_state, labels=[ r'$\theta_{x}$', r'$\theta_{y}$', '$x$', '$y$', r'$\dot{\theta}_{x}$', r'$\dot{\theta}_{y}$', r'$\dot{x}$', r'$\dot{y}$' ]) self._obs_space = self._state_space self._act_space = BoxSpace(-max_act, max_act, labels=['V_{x}', 'V_{y}'])
def test_mask_from_labels(): # Test the create_mask helper separately space = BoxSpace(-1, 1, shape=5, labels=['w', 'o', 'r', 'l', 'd']) indices = ['w', 'o'] mask = space.create_mask(indices) assert list(mask) == [1, 1, 0, 0, 0]
def create_nonrecurrent_policy(): return LinearPolicy( EnvSpec( BoxSpace(-1, 1, 4), BoxSpace(-1, 1, 3), ), FeatureStack(const_feat, identity_feat, squared_feat), )
def _create_spaces(self): # Define the spaces self._state_space = None # needs to be set in subclasses max_obs = np.array([0.814/2., 1., 1., pyrado.inf, pyrado.inf]) max_act = np.array([8.]) # [V], original: 24 self._obs_space = BoxSpace(-max_obs, max_obs, labels=['$x$', r'$\sin\theta$', r'$\cos\theta$', r'$\dot{x}$', r'$\dot{\theta}$']) self._act_space = BoxSpace(-max_act, max_act, labels=['$V$'])
def create_recurrent_policy(): return RNNPolicy( EnvSpec( BoxSpace(-1, 1, 4), BoxSpace(-1, 1, 3), ), hidden_size=32, num_recurrent_layers=1, hidden_nonlin='tanh' )
def test_no_downsampling(): mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2,)), obs_space=BoxSpace(-1, 1, shape=(2,))) wenv = DownsamplingWrapper(mockenv, factor=1) # Perform some actions wenv.step(np.array([4, 1])) assert mockenv.last_act == [4, 1] wenv.step(np.array([7, 5])) assert mockenv.last_act == [7, 5]
def _create_spaces(self): # Define the spaces self._state_space = None # needs to be set in subclasses max_obs = np.array([0.814 / 2., 1., 1., pyrado.inf, pyrado.inf]) self._obs_space = BoxSpace( -max_obs, max_obs, labels=['x', 'sin_theta', 'cos_theta', 'x_dot', 'theta_dot']) self._act_space = BoxSpace(-max_act_qcp, max_act_qcp, labels=['V'])
def _create_spaces(self): l_rail = self.domain_param["rail_length"] max_obs = np.array([l_rail / 2.0, 1.0, 1.0, np.inf, np.inf]) self._state_space = None self._obs_space = BoxSpace( -max_obs, max_obs, labels=["x", "sin_theta", "cos_theta", "x_dot", "theta_dot"]) self._init_space = None self._act_space = BoxSpace(-MAX_ACT_QCP, MAX_ACT_QCP, labels=["V"])
def _create_spaces(self): l_rail = self.domain_param['l_rail'] max_obs = np.array([l_rail / 2., 1., 1., np.inf, np.inf]) self._state_space = None self._obs_space = BoxSpace( -max_obs, max_obs, labels=['x', 'sin_theta', 'cos_theta', 'x_dot', 'theta_dot']) self._init_space = None self._act_space = BoxSpace(-max_act_qcp, max_act_qcp, labels=['V'])
def _create_spaces(self): super()._create_spaces() l_rail = self.domain_param['l_rail'] # Define the spaces max_state = np.array([+l_rail/2. - self.x_buffer, +4*np.pi, np.inf, np.inf]) # [m, rad, m/s, rad/s] min_state = np.array([-l_rail/2. + self.x_buffer, -4*np.pi, -np.inf, -np.inf]) # [m, rad, m/s, rad/s] max_init_state = np.array([0.03, 1/180.*np.pi, 0.005, 2/180.*np.pi]) # [m, rad, m/s, rad/s] self._state_space = BoxSpace(min_state, max_state, labels=['$x$', r'$\theta$', r'$\dot{x}$', r'$\dot{\theta}$']) self._init_space = BoxSpace(-max_init_state, max_init_state, labels=['$x$', r'$\theta$', r'$\dot{x}$', r'$\dot{\theta}$'])
def _create_spaces(self): l_rail = self.domain_param['l_rail'] max_act = np.array([8.]) # [V], original: 24, energy-based swing up controller needs at about +-6.5V max_obs = np.array([l_rail/2., 1., 1., np.inf, np.inf]) self._state_space = None self._obs_space = BoxSpace(-max_obs, max_obs, labels=['$x$', r'$\sin(\theta)$', r'$\cos(\theta)$', r'$\dot{x}$', r'$\dot{\theta}$']) self._init_space = None self._act_space = BoxSpace(-max_act, max_act, labels=['$V$'])
def test_act_downsampling(): mockenv = MockEnv(act_space=BoxSpace(-1, 1, shape=(2,)), obs_space=BoxSpace(-1, 1, shape=(2,))) wenv = DownsamplingWrapper(mockenv, factor=2) # Perform some actions wenv.step(np.array([0, 1])) assert mockenv.last_act == [0, 1] wenv.step(np.array([2, 4])) # should be ignored assert mockenv.last_act == [0, 1] wenv.step(np.array([1, 2])) assert mockenv.last_act == [1, 2] wenv.step(np.array([2, 3])) # should be ignored assert mockenv.last_act == [1, 2]
def _create_spaces(self): # Define the spaces max_state = np.array([4 * np.pi, 4 * np.pi]) # [rad, rad/s] max_obs = np.array([1.0, 1.0, np.inf]) # [-, -, rad/s] tau_max = self.domain_param["torque_thold"] self._state_space = BoxSpace(-max_state, max_state, labels=["theta", "theta_dot"]) self._obs_space = BoxSpace( -max_obs, max_obs, labels=["sin_theta", "cos_theta", "theta_dot"]) self._init_space = SingularStateSpace(self._init_state, labels=["theta", "theta_dot"]) self._act_space = BoxSpace(-tau_max, tau_max, labels=["tau"])