def adapt(self, mean: to.Tensor = None, halfspan: Union[to.Tensor, float] = None): """ Adapt the mean and the half interval span of the noise on the action or parameters. Use `None` to leave one of the parameters at their current value. :param mean: exploration strategy's new mean :param halfspan: exploration strategy's new half interval span """ if not (isinstance(mean, to.Tensor) or mean is None): raise pyrado.TypeErr(given=mean, expected_type=to.Tensor) if not (isinstance(halfspan, to.Tensor) and (halfspan >= 0).all() or halfspan is None): raise pyrado.TypeErr( msg= 'The halfspan must be a Tensor with all elements > 0 or None!') if mean is not None: assert self.mean is not None, 'Can not change fixed zero mean!' if not mean.shape == self.mean.shape: raise pyrado.ShapeErr(given=mean, expected_match=self.mean) self.mean.data = mean if halfspan is not None: if not halfspan.shape == self.log_halfspan.shape: raise pyrado.ShapeErr(given=halfspan, expected_match=self.halfspan) self.halfspan = halfspan
def __init__( self, wrapped_env: Union[SimEnv, EnvWrapper], noise_std: Union[list, np.ndarray], noise_mean: Optional[Union[list, np.ndarray]] = None, ): """ :param wrapped_env: environment to wrap :param noise_std: list or numpy array for the standard deviation of the noise :param noise_mean: list or numpy array for the mean of the noise, by default all zeros, i.e. no bias """ Serializable._init(self, locals()) super().__init__(wrapped_env) # Parse noise specification self._std = np.array(noise_std) if not self._std.shape == self.obs_space.shape: raise pyrado.ShapeErr(given=self._std, expected_match=self.obs_space) if noise_mean is not None: self._mean = np.array(noise_mean) if not self._mean.shape == self.obs_space.shape: raise pyrado.ShapeErr(given=self._mean, expected_match=self.obs_space) else: self._mean = np.zeros(self.obs_space.shape)
def unpack(data: to.Tensor, dim_data_orig: int) -> to.Tensor: """ Reshape the data such that the shape is [batch_dim, num_rollouts, len_time_series, dim_data]. :param data: packed a.k.a. flattened data :param dim_data_orig: dimension of the original data :return: un-pack a.k.a. un-flattened data """ if data.ndim != 3: raise pyrado.ShapeErr( msg= f"The data must have exactly 3 dimensions, but is of shape {data.shape}! Check if packed before " f"unpacking. This error can also occur if the simulator is not batched. Either enable it to process " f"batches of domain parameters or implement a 2-dim case of pack() and unpack()." ) batch_size, num_rollouts = data.shape[: 2] # packing is designed to ensure this data = data.view(batch_size, num_rollouts, -1, dim_data_orig) if data.ndim != 4: raise pyrado.ShapeErr( msg= "The data tensor must have exactly 4 dimensions after unpacking!" ) return data
def adapt(self, mean: to.Tensor = None, std: Union[to.Tensor, float] = None): """ Adapt the mean and the variance of the noise on the action or parameters. Use `None` to leave one of the parameters at their current value. :param mean: exploration strategy's new mean :param std: exploration strategy's new standard deviation """ if not (isinstance(mean, to.Tensor) or mean is None): raise pyrado.TypeErr(given=mean, expected_type=to.Tensor) if not (isinstance(std, to.Tensor) and (std >= 0).all() or std is None): raise pyrado.TypeErr( msg='The std must be a Tensor with all elements > 0 or None!') if mean is not None: assert self.mean is not None, 'Can not change fixed zero mean!' if not mean.shape == self.mean.shape: raise pyrado.ShapeErr(given=mean, expected_match=self.mean) self.mean.data = mean if std is not None: if not std.shape == self.log_std.shape: raise pyrado.ShapeErr(given=std, expected_match=self.std) self.std = std
def __init__( self, wrapped_env: Env, noise_mean: Union[float, np.ndarray] = None, noise_std: Union[float, np.ndarray] = None ): """ Constructor :param wrapped_env: environment to wrap around (only makes sense for simulations) :param noise_mean: mean of the noise distribution :param noise_std: standard deviation of the noise distribution """ Serializable._init(self, locals()) # Invoke base constructor super().__init__(wrapped_env) # Parse noise specification if noise_mean is not None: self._mean = np.array(noise_mean) if not self._mean.shape == self.act_space.shape: raise pyrado.ShapeErr(given=self._mean, expected_match=self.act_space) else: self._mean = np.zeros(self.act_space.shape) if noise_std is not None: self._std = np.array(noise_std) if not self._std.shape == self.act_space.shape: raise pyrado.ShapeErr(given=self._noise_std, expected_match=self.act_space) else: self._std = np.zeros(self.act_space.shape)
def __call__(self, data: [np.ndarray, to.Tensor]): """ Update the internal variables and normalize the input. :param data: input data to be standardized :return: normalized data in [-1, 1] """ if isinstance(data, np.ndarray): data_2d = np.atleast_2d(data) data_min = np.min(data_2d, axis=0) data_max = np.max(data_2d, axis=0) self._iter += 1 # Handle first iteration separately if self._iter <= 1: self._bound_lo = data_min self._bound_up = data_max else: if not self._bound_lo.shape == data_min.shape: raise pyrado.ShapeErr(given=data_min, expected_match=self._bound_lo) # Update bounds with element wise self._bound_lo = np.fmin(self._bound_lo, data_min) self._bound_up = np.fmax(self._bound_up, data_max) # Make sure that the bounds do not collapse (e.g. for one sample) if np.linalg.norm(self._bound_up - self._bound_lo, ord=1) < self.eps: self._bound_lo -= self.eps / 2 self._bound_up += self.eps / 2 elif isinstance(data, to.Tensor): data_2d = data.view(-1, 1) if data.ndim < 2 else data data_min, _ = to.min(data_2d, dim=0) data_max, _ = to.max(data_2d, dim=0) self._iter += 1 # Handle first iteration separately if self._iter <= 1: self._bound_lo = data_min self._bound_up = data_max else: if not self._bound_lo.shape == data_min.shape: raise pyrado.ShapeErr(given=data_min, expected_match=self._bound_lo) # Update bounds with element wise self._bound_lo = to.min(self._bound_lo, data_min) self._bound_up = to.max(self._bound_up, data_max) # Make sure that the bounds do not collapse (e.g. for one sample) if to.norm(self._bound_up - self._bound_lo, p=1) < self.eps: self._bound_lo -= self.eps / 2 self._bound_up += self.eps / 2 else: raise pyrado.TypeErr(given=data, expected_type=[np.ndarray, to.Tensor]) # Return standardized data return (data - self._bound_lo) / (self._bound_up - self._bound_lo) * 2 - 1
def reset(self, init_state: np.ndarray = None, domain_param: dict = None) -> np.ndarray: # Reset time self._curr_step = 0 # Reset the domain parameters if domain_param is not None: self.domain_param = domain_param # Sample or set the initial simulation state if init_state is None: # Sample init state from init state space init_state = self.init_space.sample_uniform() elif not isinstance(init_state, np.ndarray): # Make sure init state is a numpy array try: init_state = np.asarray(init_state) except Exception: raise pyrado.TypeErr(given=init_state, expected_type=np.ndarray) if not self.init_space.contains(init_state, verbose=True): raise pyrado.ValueErr( msg="The init state must be within init state space!") # Update the state attribute self.state = init_state.copy() # Reset the task which also resets the reward function if necessary self._task.reset(env_spec=self.spec, init_state=init_state.copy()) # Reset MuJoCo simulation model (only reset the joint configuration) self.sim.reset() old_state = self.sim.get_state() nq = self.init_qpos.size if not init_state[:nq].shape == old_state.qpos.shape: # check joint positions dimension raise pyrado.ShapeErr(given=init_state[:nq], expected_match=old_state.qpos) # Exclude everything that is appended to the state (at the end), e.g. the ball position for WAMBallInCupSim if not init_state[ nq:2 * nq].shape == old_state.qvel.shape: # check joint velocities dimension raise pyrado.ShapeErr(given=init_state[nq:2 * nq], expected_match=old_state.qvel) new_state = mujoco_py.MjSimState( # Exclude everything that is appended to the state (at the end), e.g. the ball position for WAMBallInCupSim old_state.time, init_state[:nq], init_state[nq:2 * nq], old_state.act, old_state.udd_state, ) self.sim.set_state(new_state) self.sim.forward() # Return an observation return self.observe(self.state)
def _get_wrapper_domain_param(self, domain_param: dict): """ Load the action noise parameters from the domain parameter dict :param domain_param: domain parameter dict """ if "act_noise_mean" in domain_param: self._noise_mean = np.array(domain_param["act_noise_mean"]) if not self._noise_mean.shape == self.act_space.shape: raise pyrado.ShapeErr(given=self._noise_mean, expected_match=self.act_space) if "act_noise_std" in domain_param: self._noise_std = np.array(domain_param["act_noise_std"]) if not self._noise_std.shape == self.act_space.shape: raise pyrado.ShapeErr(given=self._noise_std, expected_match=self.act_space)
def skyline( dt: Union[int, float, np.ndarray], t_end: Union[int, float, np.ndarray], t_intvl_space: BoxSpace, val_space: BoxSpace, ) -> Tuple[np.ndarray, np.ndarray]: """ Step function that randomly samples a value from the given range, and then holds this value for a time interval which is also randomly sampled given a range of time intervals. This procedure is repeated until the sequence is long enough, i.e. `dt * t_end` samples. :param dt: time step size :param t_end: final time :param t_intvl_space: 1-dim `BoxSpace` determining the range of time intervals that can be sampled :param val_space: 1-dim `BoxSpace` determining the range of values that can be sampled :return: array of time steps together with the associated array of values """ if dt <= 0: raise pyrado.ValueErr(given=dt, g_constraint="0") if t_end < dt: raise pyrado.ValueErr(given=t_end, ge_constraint=f"{dt}") if not isinstance(t_intvl_space, BoxSpace): raise pyrado.TypeErr(given=t_intvl_space, expected_type=BoxSpace) if not isinstance(val_space, BoxSpace): raise pyrado.TypeErr(given=val_space, expected_type=BoxSpace) if not t_intvl_space.flat_dim == 1: raise pyrado.ShapeErr(given=t_intvl_space, expected_match=(1, )) if not val_space.flat_dim == 1: raise pyrado.ShapeErr(given=val_space, expected_match=(1, )) dt = np.asarray(dt, dtype=np.float32) t_end = np.asarray(t_end, dtype=np.float32) # First iter t_intvl = t_intvl_space.sample_uniform() t_intvl = np.clip(t_intvl, dt, t_end + dt) t = np.arange(start=0.0, stop=t_intvl, step=dt) vals = val_space.sample_uniform() * np.ones_like(t) # Iterate until the time is up while t[-1] < t_end: t_intvl = t_intvl_space.sample_uniform() t_intvl = np.clip(t_intvl, dt, t_end - t[-1] + dt) t_new = np.arange(start=t[-1] + dt, stop=t[-1] + t_intvl, step=dt) t = np.concatenate([t, t_new]) val_new = val_space.sample_uniform() * np.ones_like(t_new) vals = np.concatenate([vals, val_new]) return t, vals
def __init__(self, num_feat_per_dim: int, bounds: [ Sequence[np.ndarray], Sequence[to.Tensor], Sequence[float] ], scale: float = None, state_wise_norm: bool = True): """ Constructor :param num_feat_per_dim: number of radial basis functions, identical for every dimension of the input :param bounds: lower and upper bound for the Gaussians' centers, the input dimension is inferred from them :param scale: scaling factor for the squared distance, if `None` the factor is determined such that two neighboring RBFs have a value of 0.2 at the other center :param state_wise_norm: `True` to apply the normalization across input state dimensions separately (every dimension sums to one), or `False` to jointly normalize them """ if not num_feat_per_dim > 1: raise pyrado.ValueErr(given=num_feat_per_dim, g_constraint='1') if not len(bounds) == 2: raise pyrado.ShapeErr(given=bounds, expected_match=np.empty(2)) # Get the bounds, e.g. from the observation space and then clip them in case the bounds_to = [None, None] for i, b in enumerate(bounds): if isinstance(b, np.ndarray): bounds_to[i] = to.from_numpy(b) elif isinstance(b, to.Tensor): bounds_to[i] = b.clone() elif isinstance(b, (int, float)): bounds_to[i] = to.tensor(b, dtype=to.get_default_dtype()).view( 1, ) else: raise pyrado.TypeErr( given=b, expected_type=[np.ndarray, to.Tensor, int, float]) if any([any(np.isinf(b)) for b in bounds_to]): bound_lo, bound_up = [ to.clamp(b, min=-1e6, max=1e6) for b in bounds_to ] print_cbt('Clipped the bounds of the RBF centers to [-1e6, 1e6].', 'y') else: bound_lo, bound_up = bounds_to # Create a matrix with center locations for the Gaussians num_dim = len(bound_lo) self.num_feat = num_feat_per_dim * num_dim self.centers = to.empty(num_feat_per_dim, num_dim) for i in range(num_dim): # Features along columns self.centers[:, i] = to.linspace(bound_lo[i], bound_up[i], num_feat_per_dim) if scale is None: delta_center = self.centers[1, :] - self.centers[0, :] self.scale = -to.log(to.tensor(0.2)) / to.pow(delta_center, 2) else: self.scale = scale self._state_wise_norm = state_wise_norm
def state_des(self, state_des: np.ndarray): if not isinstance(state_des, np.ndarray): raise pyrado.TypeErr(given=state_des, expected_type=np.ndarray) if not state_des.shape == self.state_des.shape: raise pyrado.ShapeErr(given=state_des, expected_match=self.state_des) self._state_des = state_des
def pack(data: to.Tensor) -> to.Tensor: """ Reshape the data such that the shape is [batch_dim, num_rollouts, data_points_flattened]. :param data: un-packed a.k.a. un-flattened data :return: packed a.k.a. flattened data """ if data.ndim == 2: # The data is not batched, and we have one target domain rollouts which is un-flattened return data.view(1, 1, -1) elif data.ndim == 3: # The data is not batched, but we have multiple target domain rollouts which are un-flattened num_rollouts = data.shape[0] return data.view(1, num_rollouts, -1) elif data.ndim == 4: # The data is batched, and we have multiple target domain rollouts batch_size, num_rollouts = data.shape[:2] return data.view(batch_size, num_rollouts, -1) else: raise pyrado.ShapeErr( msg= f"The data must have either 2, 3, or 4 dimensions, not {data.ndim}!" )
def __init__(self, in_features: int, nonlin: [Callable, Sequence[Callable]], bias: bool, weight: bool = True): """ Constructor :param in_features: size of each input sample :param nonlin: nonlinearity :param bias: if `True`, a learnable bias is subtracted, else no bias is used :param weight: if `True` (default), the input is multiplied with a learnable scaling factor """ if not callable(nonlin): if not len(nonlin) == in_features: raise pyrado.ShapeErr(given=nonlin, expected_match=in_features) super().__init__() self.nonlin = deepcopy(nonlin) if is_iterable(nonlin) else nonlin if weight: self.weight = nn.Parameter(to.randn(in_features, dtype=to.get_default_dtype()), requires_grad=True) else: self.weight = None if bias: self.bias = nn.Parameter(to.randn(in_features, dtype=to.get_default_dtype()), requires_grad=True) else: self.bias = None
def loss_fcn(self, rollout_real: StepSequence, rollout_sim: StepSequence) -> float: """ Compute the discrepancy between two time sequences of observations given metric. Be sure to align and truncate the rollouts beforehand. :param rollout_real: (concatenated) real-world rollout containing the observations :param rollout_sim: (concatenated) simulated rollout containing the observations :return: discrepancy cost summed over the observation dimensions """ if len(rollout_real) != len(rollout_sim): raise pyrado.ShapeErr(given=rollout_real, expected_match=rollout_sim) # Extract the observations real_obs = rollout_real.get_data_values("observations", truncate_last=True) sim_obs = rollout_sim.get_data_values("observations", truncate_last=True) # Filter the observations real_obs = gaussian_filter1d(real_obs, self.std_obs_filt, axis=0) sim_obs = gaussian_filter1d(sim_obs, self.std_obs_filt, axis=0) # Normalize the signals real_obs_norm = self.obs_normalizer.project_to(real_obs) sim_obs_norm = self.obs_normalizer.project_to(sim_obs) # Compute loss based on the error loss_per_obs_dim = self.metric(real_obs_norm - sim_obs_norm) assert len(loss_per_obs_dim) == real_obs.shape[1] assert all(loss_per_obs_dim >= 0) return sum(loss_per_obs_dim)
def __init__(self, wrapped_env: Env, mask: list = None, idcs: list = None, keep_selected: bool = False): """ Constructor :param wrapped_env: environment to wrap :param mask: mask out array, entries with 1 are dropped (behavior can be inverted by keep_selected=True) :param idcs: indices to drop, ignored if mask is specified. If the observation space is labeled, the labels can be used as indices. :param keep_selected: set to true to keep the mask entries with 1/the specified indices and drop the others """ Serializable._init(self, locals()) super(ObsPartialWrapper, self).__init__(wrapped_env) # Parse selection if mask is not None: # Use explicit mask mask = np.array(mask, dtype=bool) if not mask.shape == wrapped_env.obs_space.shape: raise pyrado.ShapeErr(given=mask, expected_match=wrapped_env.obs_space) else: # Parse indices assert idcs is not None, "Either mask or indices must be specified" mask = wrapped_env.obs_space.create_mask(idcs) # Invert if needed if keep_selected: self.keep_mask = mask else: self.keep_mask = np.logical_not(mask)
def __init__( self, spec: EnvSpec, act_recordings: List[Union[to.Tensor, np.array]], no_reset: bool = False, use_cuda: bool = False, ): """ Constructor :param spec: environment specification :param act_recordings: pre-recorded sequence of actions to be played back later :param no_reset: `True` to turn `reset()` into a dummy function :param use_cuda: `True` to move the policy to the GPU, `False` (default) to use the CPU """ if not isinstance(act_recordings, list): raise pyrado.TypeErr(given=act_recordings, expected_type=list) super().__init__(spec, use_cuda) self._curr_rec = -1 # is increased before the first use self._curr_step = 0 self._no_reset = no_reset self._num_rec = len(act_recordings) self._act_rec_buffer = [to.atleast_2d(to.as_tensor(ar)) for ar in act_recordings] if not all(b.shape[1] == self.env_spec.act_space.flat_dim for b in self._act_rec_buffer): raise pyrado.ShapeErr( given=(-1, self._act_rec_buffer[0].shape[1]), expected_match=(-1, self.env_spec.act_space.flat_dim) )
def _unpack_hidden(self, hidden: to.Tensor, batch_size: int = None): # Special case - need to split into hidden and cell term memory # Assume it's a flattened view of hid/cell x nrl x batch x hs if len(hidden.shape) == 1: assert hidden.shape[0] == self.hidden_size, \ "Passed hidden variable's size doesn't match the one required by the network." # We could handle this case, but for now it's not necessary assert batch_size is None, \ 'Cannot use batched observations with unbatched hidden state' # Reshape to hid/cell x nrl x batch x hs hd = hidden.view(2, self._num_recurrent_layers, 1, self._hidden_size) # Split hidden and cell state return hd[0, ...], hd[1, ...] elif len(hidden.shape) == 2: assert hidden.shape[1] == self.hidden_size, \ "Passed hidden variable's size doesn't match the one required by the network." assert hidden.shape[0] == batch_size, \ f'Batch size of hidden state ({hidden.shape[0]}) must match batch size of observations ({batch_size})' # Reshape to hid/cell x nrl x batch x hs hd = hidden.view(batch_size, 2, self._num_recurrent_layers, self._hidden_size).permute(1, 2, 0, 3) # Split hidden and cell state return hd[0, ...], hd[1, ...] else: raise pyrado.ShapeErr( msg= f"Improper shape of 'hidden'. Policy received {hidden.shape}," f"but shape should be 1- or 2-dim")
def forward(self, data: to.Tensor) -> to.Tensor: """ Transforms rollouts into the observations used for likelihood-free inference. Currently a state-representation as well as state-action summary-statistics are available. :param data: packed data of shape [batch_size, num_rollouts, len_time_series, dim_data] :return: features of the data extracted from the embedding of shape [[batch_size, num_rollouts * dim_feat] """ data = data.to(device=self.device, dtype=to.get_default_dtype()) # Bring the data back into the un-flattened form of shape [batch_size, num_rollouts, len_time_series, dim_data] data = Embedding.unpack(data, self._dim_data_orig) if self.downsampling_factor > 1: data = data[:, :, ::self.downsampling_factor, :] # Iterate over all data batches computing the features from the data x = to.stack([self.forward_one_batch(batch) for batch in data], dim=0) # Check the shape if x.shape != (data.shape[0], data.shape[1] * self.dim_output): raise pyrado.ShapeErr( given=x, expected_match=(data.shape[0], data.shape[1] * self.dim_output)) return x
def derivative(self, inp: to.Tensor) -> to.Tensor: """ Compute the drivative of the features w.r.t. the inputs. .. note:: Only processing of 1-dim input (e.g., no images)! The input can be batched along the first dimension. :param inp: input i.e. observations in the RL setting :return: value of all features derivatives given the observations """ if inp.ndimension() > 2: raise pyrado.ShapeErr(msg='RBF class can only handle 1-dim or 2-dim input!') inp = atleast_2D(inp) # first dim is the batch size, the second dim it the actual input dimension inp = inp.reshape(inp.shape[0], 1, inp.shape[1]).repeat(1, self.centers.shape[0], 1) # reshape explicitly exp_sq_dist = to.exp(-self.scale*to.pow(inp - self.centers, 2)) exp_sq_dist_d = -2*self.scale * (inp - self.centers) feat_val = to.empty(inp.shape[0], self.num_feat) feat_val_dot = to.empty(inp.shape[0], self.num_feat) for i, (sample, sample_d) in enumerate(zip(exp_sq_dist, exp_sq_dist_d)): if self._state_wise_norm: # Normalize the features such that the activation for every state dimension sums up to one feat_val[i, :] = normalize(sample, axis=0, order=1).reshape(-1, ) else: # Turn the features into a vector and normalize over all of them feat_val[i, :] = normalize(sample.t().reshape(-1, ), axis=-1, order=1) feat_val_dot[i, :] = sample_d.squeeze() * feat_val[i, :] - feat_val[i, :] * sum(sample_d.squeeze() * feat_val[i, :]) return feat_val_dot
def transform_to_ddp_space(self, params: to.Tensor) -> to.Tensor: """ Get the transformed domain distribution parameters. The policy's parameters are in log space. :param params: policy parameters (can be the log of the actual domain distribution parameter value) :return: policy parameters transformed according to the mask """ ddp = params.clone() if ddp.ndimension() == 1: # Only one set of domain distribution parameters if self._scale_params: ddp.data = self.param_scaler.scale_back(ddp.data) ddp.data[self.mask] = to.exp(ddp.data[self.mask]) elif ddp.ndimension() == 2: # Multiple sets of domain distribution parameters along the first axis if self._scale_params: for i in range(ddp.shape[0]): ddp[i].data = self.param_scaler.scale_back(ddp[i].data) ddp.data[:, self.mask] = to.exp(ddp.data[:, self.mask]) else: raise pyrado.ShapeErr(msg='Inputs must not have more than 2 dimensions!') return ddp
def contains(self, cand: np.ndarray, verbose: bool = False) -> bool: # Check the candidate validity (shape and NaN values) if not cand.shape == self.shape: raise pyrado.ShapeErr(given=cand, expected_match=self) if np.isnan(cand).any(): raise pyrado.ValueErr( msg=f'At least one value is NaN!' + tabulate([ list(self.labels), [*color_validity(cand, np.invert(np.isnan(cand)))] ], headers='firstrow')) # Check upper and lower bound separately check_lo = (cand >= self.bound_lo).astype(int) check_up = (cand <= self.bound_up).astype(int) idcs_valid = np.bitwise_and(check_lo, check_up) if np.all(idcs_valid): return True else: if verbose: print( tabulate([[ 'lower bound ', *color_validity(self.bound_lo, check_lo) ], ['candidate ', *color_validity(cand, idcs_valid)], [ 'upper bound ', *color_validity(self.bound_up, check_up) ]], headers=[''] + list(self.labels))) return False
def add_value(self, key: str, value): """ Add a column value to the current step. :param key: data key :param value: value to record, pass '' to print nothing """ # Compute full prefixed key key = self._prefix_str + key if self._first_step: # Record new key during first step self._value_keys.append(key) elif key not in self._value_keys: # Make sure the key was used during first step raise KeyError( 'New value keys may only be added before the first step is finished' ) # Pre-process non-scalar values if isinstance(value, to.Tensor): # Only support scalar tensor for now if value.ndimension() <= 1: value = value.item() else: raise pyrado.ShapeErr( msg= 'Logger only support scalar PyTorch Tensors, otherwise the progress.csv file' ' gets meed up.') # Record value self._current_values[key] = value self._values_changed = True
def reset(self, init_state: np.ndarray = None, domain_param: dict = None) -> np.ndarray: # Reset time self._curr_step = 0 # Reset the domain parameters if domain_param is not None: self.domain_param = domain_param # Reset the state if init_state is None: self.state = self._init_space.sample_uniform() # zero else: if not init_state.shape == self.obs_space.shape: raise pyrado.ShapeErr(given=init_state, expected_match=self.obs_space) if isinstance(init_state, np.ndarray): self.state = init_state.copy() else: try: self.state = np.array(init_state) except Exception: raise pyrado.TypeErr(given=init_state, expected_type=[np.ndarray, list]) # No need to reset the task # Return perfect observation return self.observe(self.state)
def space_des(self, space_des: Space): if not isinstance(space_des, Space): raise pyrado.TypeErr(given=space_des, expected_type=Space) if not space_des.shape == self.space_des.shape: raise pyrado.ShapeErr(given=space_des, expected_match=self.space_des) self._space_des = space_des
def __init__(self, eles: [np.ndarray, list], labels: Sequence[str] = None): """ Constructor :param eles: N x D array of all actions, where N is the number of actions and D is the dimension of each action :param labels: label element of the space. This is useful for giving the states and actions names to later identify them (e.g. for plotting). """ if isinstance(eles, np.ndarray): # Make sure the dimension of the state is along the first array dimension self.eles = eles if eles.ndim == 2 else eles.reshape(-1, 1) elif isinstance(eles, list): self.eles = np.array(eles, dtype=np.int) # Make sure the dimension of the state is along the first array dimension self.eles = eles if self.eles.ndim == 2 else self.eles.reshape( -1, 1) else: raise pyrado.TypeErr(given=eles, expected_type=[np.ndarray, list]) self.eles = np.atleast_2d(self.eles) self.bound_lo = np.min(self.eles, axis=0) self.bound_up = np.max(self.eles, axis=0) # Process the labels if labels is not None: labels = np.array(labels, dtype=object) if not labels.shape == self.shape: raise pyrado.ShapeErr(given=labels, expected_match=self) self._labels = labels else: self._labels = np.empty(self.shape, dtype=object) self._labels.fill(None)
def reset(self, init_state: np.ndarray = None, domain_param: dict = None) -> np.ndarray: # Reset time self._curr_step = 0 # Reset the state if init_state is None: # Sample from the init state space init_state = self._init_space.sample_uniform() else: if not init_state.shape == self._init_space.shape: raise pyrado.ShapeErr(given=init_state, expected_match=self._init_space) # Reset the task self._task.reset(env_spec=self.spec) # Use stored domain parameters if not overwritten if domain_param is None: domain_param = self._domain_param # Forward to C++ implementation obs = self._sim.reset( domainParam=self._adapt_domain_param(domain_param), initState=init_state) self.state = self._state_from_obs(obs) return obs
def fill_domain_param_buffer(env: DomainRandWrapper, dp_mapping: Mapping[int, str], domain_params: to.Tensor): """ Fill the environments domain parameter buffer according to the domain parameter map, and reset the ring index. :param env: environment in which the domain parameters are inserted :param dp_mapping: mapping from subsequent integers (starting at 0) to domain parameter names (e.g. mass) :param domain_params: tensor of domain parameters [num_samples x dim domain param] """ if not isinstance(env, DomainRandWrapperBuffer): raise pyrado.TypeErr(given=env, expected_type=DomainRandWrapperBuffer) if domain_params.ndim != 2 or domain_params.shape[1] != len( dp_mapping): raise pyrado.ShapeErr( msg= f"The domain parameter must be a 2-dim PyTorch tensor, where the second dimension matched the " f"domain parameter mapping, but it has the shape {domain_params.shape}!" ) domain_params = domain_params.detach().cpu().numpy() env.buffer = [ dict(zip(dp_mapping.values(), dp)) for dp in domain_params ] env.ring_idx = 0 print_cbt( f"Filled the environment's buffer with {len(env.buffer)} domain parameters sets.", "g")
def cov(x: to.Tensor, data_along_rows: bool = False): """ Compute the covariance matrix given data. .. note:: Only real valued matrices are supported :param x: matrix containing multiple observations of multiple variables :param data_along_rows: if `True` the variables are stacked along the columns, else they are along the rows :return: covariance matrix given the data """ if x.dim() > 2: raise ValueError('m has more than 2 dimensions') if x.dim() < 2: x = x.view(1, -1) if data_along_rows and x.size(0) != 1: # Transpose if necessary x = x.t() num_samples = x.size(1) if num_samples < 2: raise pyrado.ShapeErr( msg='Need at least 2 samples to compute the covariance!') x -= to.mean(x, dim=1, keepdim=True) return x.matmul(x.t()).squeeze() / (num_samples - 1)
def transform_to_ddp_space(self, params: to.Tensor) -> to.Tensor: """ Get the transformed domain distribution parameters. Where ever the mask is `True`, the corresponding policy parameter is learned in sqrt space. Moreover, the policy parameters can be scaled. :param params: policy parameters (can be the sqrt of the actual domain distribution parameter value) :return: policy parameters transformed according to the mask """ ddp = params.clone() if ddp.ndimension() == 1: # Only one set of domain distribution parameters if self._scale_params: ddp.data = self.param_scaler.scale_back(ddp.data) ddp.data[self.mask] = to.pow(ddp.data[self.mask], 2) elif ddp.ndimension() == 2: # Multiple sets of domain distribution parameters along the first axis if self._scale_params: for i in range(ddp.shape[0]): ddp[i].data = self.param_scaler.scale_back(ddp[i].data) ddp.data[:, self.mask] = to.pow(ddp.data[:, self.mask], 2) else: raise pyrado.ShapeErr( msg="The input must not have more than 2 dimensions!") return ddp
def __call__(self, inp: to.Tensor) -> to.Tensor: """ Evaluate the features and normalize them. .. note:: Only processing of 1-dim input (e.g., no images)! The input can be batched along the first dimension. :param inp: input i.e. observations in the RL setting :return: 1-dim vector of all feature values given the observations """ inp = inp.to(device=self._device, dtype=to.get_default_dtype()) if inp.ndimension() > 2: raise pyrado.ShapeErr( msg="RBF class can only handle 1-dim or 2-dim input!") inp = to.atleast_2d( inp ) # first dim is the batch size, the second dim it the actual input dimension inp = inp.reshape(inp.shape[0], 1, inp.shape[1]).repeat(1, self.centers.shape[0], 1) # reshape explicitly # Exponentiate the squared distances exp_sq_dist = to.exp(-self.scale * to.pow(inp - self.centers, 2)) # Normalize, reshape, and return the feature values return to.stack( [self._normalize_and_reshape(esd) for esd in exp_sq_dist])