def __init__( self, dt: float, max_steps: int, task_args: Optional[dict], long: bool, simple_dynamics: bool, wild_init: bool, ): r""" Constructor :param dt: simulation step size [s] :param max_steps: maximum number of simulation steps :param task_args: arguments for the task construction :param long: set to `True` if using the long pole, else `False` :param simple_dynamics: if `True, use the simpler dynamics model from Quanser. If `False`, use a dynamics model which includes friction :param wild_init: if `True` the init state space is increased drastically, e.g. the initial pendulum angle can be in $[-\pi, +\pi]$. Only applicable to `QCartPoleSwingUpSim`. """ Serializable._init(self, locals()) self._simple_dynamics = simple_dynamics self._th_ddot = None # internal memory necessary for computing the friction force self._obs_space = None self._long = long self._wild_init = wild_init self._x_buffer = 0.05 # [m] # Call SimPyEnv's constructor super().__init__(dt, max_steps, task_args) # Update the class-specific domain parameters self.domain_param = self.get_nominal_domain_param(long=long)
def __init__(self, wrapped_env: SimEnv, noise_mean: list = None, noise_std: list = None): """ :param wrapped_env: environment to wrap :param noise_mean: list or ndarray for the mean of the noise (mostly all zeros) :param noise_std: list or ndarray for the standard deviation of the noise (no default value!) """ Serializable._init(self, locals()) super().__init__(wrapped_env) # Parse noise specification if noise_mean is not None: self._mean = np.array(noise_mean) assert self._mean.shape == self.obs_space.shape else: self._mean = np.zeros(self.obs_space.shape) if noise_std is not None: self._std = np.array(noise_std) assert self._std.shape == self.obs_space.shape else: self._std = np.zeros(self.obs_space.shape)
def __init__( self, dt: float, max_steps: int = pyrado.inf, task_args: Optional[dict] = None, long: bool = True, simple_dynamics: bool = True, ): """ Constructor :param dt: simulation step size [s] :param max_steps: maximum number of simulation steps :param task_args: arguments for the task construction :param long: set to `True` if using the long pole, else `False` :param simple_dynamics: if `True, use the simpler dynamics model from Quanser. If `False`, use a dynamics model which includes friction """ Serializable._init(self, locals()) self.stab_thold = 15 / 180.0 * np.pi # threshold angle for the stabilization task to be a failure [rad] self.max_init_th_offset = 8 / 180.0 * np.pi # [rad] super().__init__(dt, max_steps, task_args, long, simple_dynamics, wild_init=False)
def __init__(self, dt: float, max_steps: int = pyrado.inf, task_args: [dict, None] = None, simplified_dyn: bool = False, load_experimental_tholds: bool = True): """ Constructor :param dt: simulation step size [s] :param max_steps: maximum number of simulation steps :param task_args: arguments for the task construction :param simplified_dyn: use a dynamics model without Coriolis forces and without friction :param load_experimental_tholds: use the voltage thresholds determined from experiments """ Serializable._init(self, locals()) self._simplified_dyn = simplified_dyn self.plate_angs = np.zeros( 2 ) # plate's angles alpha and beta [rad] (unused for simplified_dyn = True) # Call SimPyEnv's constructor super().__init__(dt, max_steps, task_args) if not simplified_dyn: self._kin = QBallBalancerKin(self)
def __init__(self, qbb, num_opt_iter=100, render_mode=RenderMode()): """ Constructor :param qbb: QBallBalancerSim object :param num_opt_iter: number of optimizer iterations for the IK :param mode: the render mode: a for animating (pyplot), or `` for no animation """ Serializable._init(self, locals()) self._qbb = qbb self.num_opt_iter = num_opt_iter self.render_mode = render_mode self.r = float(self._qbb.domain_param['r_arm']) self.l = float(self._qbb.domain_param['l_plate'] / 2.) self.d = 0.10 # [m] roughly measured # Visualization if render_mode.video: self.fig, self.ax = plt.subplots(figsize=(5, 5)) self.ax.set_xlim(-0.5 * self.r, 1.2 * (self.r + self.l)) self.ax.set_ylim(-1.0 * self.d, 2 * self.d) self.ax.set_aspect('equal') self.line1, = self.ax.plot([0, 0], [0, 0], marker='o') self.line2, = self.ax.plot([0, 0], [0, 0], marker='o') self.line3, = self.ax.plot([0, 0], [0, 0], marker='o')
def __init__( self, wrapped_env: Union[SimEnv, EnvWrapper], noise_std: Union[list, np.ndarray], noise_mean: Optional[Union[list, np.ndarray]] = None, ): """ :param wrapped_env: environment to wrap :param noise_std: list or numpy array for the standard deviation of the noise :param noise_mean: list or numpy array for the mean of the noise, by default all zeros, i.e. no bias """ Serializable._init(self, locals()) super().__init__(wrapped_env) # Parse noise specification self._std = np.array(noise_std) if not self._std.shape == self.obs_space.shape: raise pyrado.ShapeErr(given=self._std, expected_match=self.obs_space) if noise_mean is not None: self._mean = np.array(noise_mean) if not self._mean.shape == self.obs_space.shape: raise pyrado.ShapeErr(given=self._mean, expected_match=self.obs_space) else: self._mean = np.zeros(self.obs_space.shape)
def __init__(self): """ Constructor """ Serializable._init(self, locals()) # Initialize basic variables super().__init__(dt=None, max_steps=1) # Set the bounds for the system's states adn actions max_state = np.array([100., 100.]) max_act = max_state self._curr_act = np.zeros_like( max_act) # just for usage in render function self._state_space = BoxSpace(-max_state, max_state, labels=['x_1', 'x_2']) self._init_space = SingularStateSpace(np.zeros( self._state_space.shape), labels=['x_1_init', 'x_2_init']) self._act_space = BoxSpace(-max_act, max_act, labels=['x_1_next', 'x_2_next']) # Define the task including the reward function self._task = self._create_task() # Animation with pyplot self._anim = dict(fig=None, trace_x=[], trace_y=[], trace_z=[])
def __init__( self, dt: float, max_steps: int = pyrado.inf, task_args: Optional[dict] = None, long: bool = False, simple_dynamics: bool = False, wild_init: bool = True, ): r""" Constructor :param dt: simulation step size [s] :param max_steps: maximum number of simulation steps :param task_args: arguments for the task construction :param long: set to `True` if using the long pole, else `False` :param simple_dynamics: if `True`, use the simpler dynamics model from Quanser. If `False`, use a dynamics model which includes friction :param wild_init: if `True` the init state space is increased drastically, e.g. the initial pendulum angle can be in $[-\pi, +\pi]$ """ Serializable._init(self, locals()) super().__init__(dt, max_steps, task_args, long, simple_dynamics, wild_init)
def __init__(self, max_steps: int, example_config: bool): """ Constructor :param max_steps: maximum number of simulation steps :param example_config: configuration for the 'illustrative example' in the journal """ Serializable._init(self, locals()) super().__init__(dt=None, max_steps=max_steps) self.example_config = example_config self._planet = -1 # Initialize the domain parameters (Earth) self._g = 9.81 # gravity constant [m/s**2] self._k = 2e3 # catapult spring's stiffness constant [N/m] self._x = 1. # catapult spring's pre-elongation [m] # Domain independent parameter self._m = 70. # victim's mass [kg] # Set the bounds for the system's states adn actions max_state = np.array([1000.]) # [m], arbitrary but >> self._x max_act = max_state self._curr_act = np.zeros_like(max_act) # just for usage in render function self._state_space = BoxSpace(-max_state, max_state, labels=['$h$']) self._init_space = SingularStateSpace(np.zeros(self._state_space.shape), labels=['$h_0$']) self._act_space = BoxSpace(-max_act, max_act, labels=[r'$\theta$']) # Define the task including the reward function self._task = self._create_task(task_args=dict())
def __init__(self, wrapped_env: Env, mask: list = None, idcs: list = None, keep_selected: bool = False): """ Constructor :param wrapped_env: environment to wrap :param mask: mask out array, entries with 1 are dropped (behavior can be inverted by keep_selected=True) :param idcs: indices to drop, ignored if mask is specified. If the observation space is labeled, the labels can be used as indices. :param keep_selected: set to true to keep the mask entries with 1/the specified indices and drop the others """ Serializable._init(self, locals()) super(ObsPartialWrapper, self).__init__(wrapped_env) # Parse selection if mask is not None: # Use explicit mask mask = np.array(mask, dtype=bool) if not mask.shape == wrapped_env.obs_space.shape: raise pyrado.ShapeErr(given=mask, expected_match=wrapped_env.obs_space) else: # Parse indices assert idcs is not None, "Either mask or indices must be specified" mask = wrapped_env.obs_space.create_mask(idcs) # Invert if needed if keep_selected: self.keep_mask = mask else: self.keep_mask = np.logical_not(mask)
def __init__(self, *args, **kwargs): """ Constructor :param args: forwarded to BallOnBeamSim's constructor :param kwargs: forwarded to BallOnBeamSim's constructor """ Serializable._init(self, locals()) super().__init__(*args, **kwargs)
def __init__(self, dt: float): """ Constructor :param dt: simulation step size [s] """ Serializable._init(self, locals()) self._dt = dt self.omega = None self.zeta = None self.A = None self.B = None
def __init__(self, dt: float, max_steps: int = pyrado.inf, task_args: [dict, None] = None, long: bool = False): """ Constructor :param dt: simulation step size [s] :param max_steps: maximum number of simulation steps :param task_args: arguments for the task construction :param long: long (`True`) or short (`False`) pole """ Serializable._init(self, locals()) super().__init__(dt, max_steps, task_args, long)
def __init__(self, dt: float, max_steps: int = pyrado.inf, task_args: [dict, None] = None, long: bool = False): """ Constructor :param dt: simulation step size [s] :param max_steps: maximum number of simulation steps :param task_args: arguments for the task construction :param long: long (`True`) or short (`False`) pole """ Serializable._init(self, locals()) self.stab_thold = 15 / 180. * np.pi # threshold angle for the stabilization task to be a failure [rad] self.max_init_th_offset = 8 / 180. * np.pi # [rad] super().__init__(dt, max_steps, task_args, long)
def __init__( self, dt: float, max_steps: int = pyrado.inf, task_args: Optional[dict] = None, init_state: Optional[np.ndarray] = None, ): """ Constructor :param dt: simulation step size [s] :param max_steps: maximum number of simulation steps :param task_args: arguments for the task construction :param init_state: set an pole angle and pole angular velocity for the `SingularStateSpace` """ Serializable._init(self, locals()) self._init_state = np.zeros(2) if init_state is None else np.asarray( init_state) # [rad, rad/s] if self._init_state.size != 2: raise pyrado.ShapeErr(given=self._init_state, expected_match=(2, )) super().__init__(dt, max_steps, task_args)
def __init__(self, dt: float, max_steps: int = pyrado.inf, task_args: [dict, None] = None, long: bool = False): """ Constructor :param dt: simulation step size [s] :param max_steps: maximum number of simulation steps :param task_args: arguments for the task construction :param long: long (`True`) or short (`False`) pole """ Serializable._init(self, locals()) self._obs_space = None self._long = long self.x_buffer = 0.05 # [m] # Call SimPyEnv's constructor super().__init__(dt, max_steps, task_args) # Update the class-specific domain parameters self.domain_param = self.get_nominal_domain_param(long=long)
def __init__( self, wrapped_env: SimEnv, mask_pos: Optional[List] = None, idcs_pos: Optional[List] = None, mask_vel: Optional[List] = None, idcs_vel: Optional[List] = None, num: Optional[Tuple] = (50, 0), den: Optional[Tuple] = (1, 50), ): """ Constructor :param wrapped_env: environment to wrap, can only be used on `SimEnv` since access to the state is needed, and we don't want to assume that all `RealEnv` can reconstruct this state from the observations. It wouldn't make much sense to wrap a `RealEnv` with this wrapper anyway, since the goal it to mimic the behavior of the real environments velocity filter. :param mask_pos: state mask array to select the position quantities in the state space, entries with 1 are kept :param idcs_pos: state indices to select, ignored if mask is specified. If the state space is labeled, these labels can be used as indices. :param mask_vel: observation mask array to select the velocity quantities in the observation space, entries with 1 are kept :param idcs_vel: velocity observation indices to select, ignored if mask is specified. If the observation space is labeled, these labels can be used as indices. :param num: continuous-time filter numerator :param den: continuous-time filter denominator """ if not isinstance(inner_env(wrapped_env), SimEnv): raise pyrado.TypeErr(given=inner_env(wrapped_env), expected_type=SimEnv) Serializable._init(self, locals()) # Call EnvWrapperObs's constructor super().__init__(wrapped_env) # Parse selections for the positions to be filtered if mask_pos is not None: # Use explicit mask self.mask_pos = np.array(mask_pos, dtype=bool) if not self.mask_pos.shape == wrapped_env.state_space.shape: raise pyrado.ShapeErr(given=mask_pos, expected_match=wrapped_env.state_space) else: # Parse indices if idcs_pos is None: raise pyrado.ValueErr( msg="Either mask or indices must be specified!") self.mask_pos = wrapped_env.state_space.create_mask(idcs_pos) # Parse selections for the velocities to be replaced by the filtered positions if mask_vel is not None: # Use explicit mask self.mask_vel = np.array(mask_vel, dtype=bool) if not self.mask_vel.shape == wrapped_env.obs_space.shape: raise pyrado.ShapeErr(given=mask_vel, expected_match=wrapped_env.obs_space) else: # Parse indices if idcs_vel is None: raise pyrado.ValueErr( msg="Either mask or indices must be specified!") self.mask_vel = wrapped_env.obs_space.create_mask(idcs_vel) # Creat the filter and map it to continuous space derivative_filter = signal.cont2discrete((num, den), dt=wrapped_env.dt) # Initialize discrete filter coefficients and state self.b = derivative_filter[0].ravel().astype(np.float32) self.a = derivative_filter[1].astype(np.float32) self.z = np.zeros( (max(len(self.a), len(self.b)) - 1, sum(self.mask_pos)), dtype=np.float32)