Example #1
0
    def adapt(self,
              mean: to.Tensor = None,
              halfspan: Union[to.Tensor, float] = None):
        """
        Adapt the mean and the half interval span of the noise on the action or parameters.
        Use `None` to leave one of the parameters at their current value.

        :param mean: exploration strategy's new mean
        :param halfspan: exploration strategy's new half interval span
        """
        if not (isinstance(mean, to.Tensor) or mean is None):
            raise pyrado.TypeErr(given=mean, expected_type=to.Tensor)
        if not (isinstance(halfspan, to.Tensor) and
                (halfspan >= 0).all() or halfspan is None):
            raise pyrado.TypeErr(
                msg=
                'The halfspan must be a Tensor with all elements > 0 or None!')
        if mean is not None:
            assert self.mean is not None, 'Can not change fixed zero mean!'
            if not mean.shape == self.mean.shape:
                raise pyrado.ShapeErr(given=mean, expected_match=self.mean)
            self.mean.data = mean
        if halfspan is not None:
            if not halfspan.shape == self.log_halfspan.shape:
                raise pyrado.ShapeErr(given=halfspan,
                                      expected_match=self.halfspan)
            self.halfspan = halfspan
Example #2
0
    def __init__(
        self,
        wrapped_env: Union[SimEnv, EnvWrapper],
        noise_std: Union[list, np.ndarray],
        noise_mean: Optional[Union[list, np.ndarray]] = None,
    ):
        """
        :param wrapped_env: environment to wrap
        :param noise_std: list or numpy array for the standard deviation of the noise
        :param noise_mean: list or numpy array for the mean of the noise, by default all zeros, i.e. no bias
        """
        Serializable._init(self, locals())

        super().__init__(wrapped_env)

        # Parse noise specification
        self._std = np.array(noise_std)
        if not self._std.shape == self.obs_space.shape:
            raise pyrado.ShapeErr(given=self._std,
                                  expected_match=self.obs_space)
        if noise_mean is not None:
            self._mean = np.array(noise_mean)
            if not self._mean.shape == self.obs_space.shape:
                raise pyrado.ShapeErr(given=self._mean,
                                      expected_match=self.obs_space)
        else:
            self._mean = np.zeros(self.obs_space.shape)
Example #3
0
    def unpack(data: to.Tensor, dim_data_orig: int) -> to.Tensor:
        """
        Reshape the data such that the shape is [batch_dim, num_rollouts, len_time_series, dim_data].

        :param data: packed a.k.a. flattened data
        :param dim_data_orig: dimension of the original data
        :return: un-pack a.k.a. un-flattened data
        """
        if data.ndim != 3:
            raise pyrado.ShapeErr(
                msg=
                f"The data must have exactly 3 dimensions, but is of shape {data.shape}! Check if packed before "
                f"unpacking. This error can also occur if the simulator is not batched. Either enable it to process "
                f"batches of domain parameters or implement a 2-dim case of pack() and unpack()."
            )

        batch_size, num_rollouts = data.shape[:
                                              2]  # packing is designed to ensure this
        data = data.view(batch_size, num_rollouts, -1, dim_data_orig)

        if data.ndim != 4:
            raise pyrado.ShapeErr(
                msg=
                "The data tensor must have exactly 4 dimensions after unpacking!"
            )

        return data
Example #4
0
    def adapt(self,
              mean: to.Tensor = None,
              std: Union[to.Tensor, float] = None):
        """
        Adapt the mean and the variance of the noise on the action or parameters.
        Use `None` to leave one of the parameters at their current value.

        :param mean: exploration strategy's new mean
        :param std: exploration strategy's new standard deviation
        """
        if not (isinstance(mean, to.Tensor) or mean is None):
            raise pyrado.TypeErr(given=mean, expected_type=to.Tensor)
        if not (isinstance(std, to.Tensor) and
                (std >= 0).all() or std is None):
            raise pyrado.TypeErr(
                msg='The std must be a Tensor with all elements > 0 or None!')

        if mean is not None:
            assert self.mean is not None, 'Can not change fixed zero mean!'
            if not mean.shape == self.mean.shape:
                raise pyrado.ShapeErr(given=mean, expected_match=self.mean)
            self.mean.data = mean
        if std is not None:
            if not std.shape == self.log_std.shape:
                raise pyrado.ShapeErr(given=std, expected_match=self.std)
            self.std = std
Example #5
0
    def __init__(
        self, wrapped_env: Env, noise_mean: Union[float, np.ndarray] = None, noise_std: Union[float, np.ndarray] = None
    ):
        """
        Constructor

        :param wrapped_env: environment to wrap around (only makes sense for simulations)
        :param noise_mean: mean of the noise distribution
        :param noise_std: standard deviation of the noise distribution
        """
        Serializable._init(self, locals())

        # Invoke base constructor
        super().__init__(wrapped_env)

        # Parse noise specification
        if noise_mean is not None:
            self._mean = np.array(noise_mean)
            if not self._mean.shape == self.act_space.shape:
                raise pyrado.ShapeErr(given=self._mean, expected_match=self.act_space)
        else:
            self._mean = np.zeros(self.act_space.shape)
        if noise_std is not None:
            self._std = np.array(noise_std)
            if not self._std.shape == self.act_space.shape:
                raise pyrado.ShapeErr(given=self._noise_std, expected_match=self.act_space)
        else:
            self._std = np.zeros(self.act_space.shape)
Example #6
0
    def __call__(self, data: [np.ndarray, to.Tensor]):
        """
        Update the internal variables and normalize the input.

        :param data: input data to be standardized
        :return: normalized data in [-1, 1]
        """
        if isinstance(data, np.ndarray):
            data_2d = np.atleast_2d(data)
            data_min = np.min(data_2d, axis=0)
            data_max = np.max(data_2d, axis=0)
            self._iter += 1

            # Handle first iteration separately
            if self._iter <= 1:
                self._bound_lo = data_min
                self._bound_up = data_max
            else:
                if not self._bound_lo.shape == data_min.shape:
                    raise pyrado.ShapeErr(given=data_min, expected_match=self._bound_lo)

                # Update bounds with element wise
                self._bound_lo = np.fmin(self._bound_lo, data_min)
                self._bound_up = np.fmax(self._bound_up, data_max)

            # Make sure that the bounds do not collapse (e.g. for one sample)
            if np.linalg.norm(self._bound_up - self._bound_lo, ord=1) < self.eps:
                self._bound_lo -= self.eps / 2
                self._bound_up += self.eps / 2

        elif isinstance(data, to.Tensor):
            data_2d = data.view(-1, 1) if data.ndim < 2 else data
            data_min, _ = to.min(data_2d, dim=0)
            data_max, _ = to.max(data_2d, dim=0)
            self._iter += 1

            # Handle first iteration separately
            if self._iter <= 1:
                self._bound_lo = data_min
                self._bound_up = data_max
            else:
                if not self._bound_lo.shape == data_min.shape:
                    raise pyrado.ShapeErr(given=data_min, expected_match=self._bound_lo)

                # Update bounds with element wise
                self._bound_lo = to.min(self._bound_lo, data_min)
                self._bound_up = to.max(self._bound_up, data_max)

            # Make sure that the bounds do not collapse (e.g. for one sample)
            if to.norm(self._bound_up - self._bound_lo, p=1) < self.eps:
                self._bound_lo -= self.eps / 2
                self._bound_up += self.eps / 2

        else:
            raise pyrado.TypeErr(given=data, expected_type=[np.ndarray, to.Tensor])

        # Return standardized data
        return (data - self._bound_lo) / (self._bound_up - self._bound_lo) * 2 - 1
Example #7
0
    def reset(self,
              init_state: np.ndarray = None,
              domain_param: dict = None) -> np.ndarray:
        # Reset time
        self._curr_step = 0

        # Reset the domain parameters
        if domain_param is not None:
            self.domain_param = domain_param

        # Sample or set the initial simulation state
        if init_state is None:
            # Sample init state from init state space
            init_state = self.init_space.sample_uniform()
        elif not isinstance(init_state, np.ndarray):
            # Make sure init state is a numpy array
            try:
                init_state = np.asarray(init_state)
            except Exception:
                raise pyrado.TypeErr(given=init_state,
                                     expected_type=np.ndarray)
        if not self.init_space.contains(init_state, verbose=True):
            raise pyrado.ValueErr(
                msg="The init state must be within init state space!")

        # Update the state attribute
        self.state = init_state.copy()

        # Reset the task which also resets the reward function if necessary
        self._task.reset(env_spec=self.spec, init_state=init_state.copy())

        # Reset MuJoCo simulation model (only reset the joint configuration)
        self.sim.reset()
        old_state = self.sim.get_state()
        nq = self.init_qpos.size
        if not init_state[:nq].shape == old_state.qpos.shape:  # check joint positions dimension
            raise pyrado.ShapeErr(given=init_state[:nq],
                                  expected_match=old_state.qpos)
        # Exclude everything that is appended to the state (at the end), e.g. the ball position for WAMBallInCupSim
        if not init_state[
                nq:2 *
                nq].shape == old_state.qvel.shape:  # check joint velocities dimension
            raise pyrado.ShapeErr(given=init_state[nq:2 * nq],
                                  expected_match=old_state.qvel)
        new_state = mujoco_py.MjSimState(
            # Exclude everything that is appended to the state (at the end), e.g. the ball position for WAMBallInCupSim
            old_state.time,
            init_state[:nq],
            init_state[nq:2 * nq],
            old_state.act,
            old_state.udd_state,
        )
        self.sim.set_state(new_state)
        self.sim.forward()

        # Return an observation
        return self.observe(self.state)
Example #8
0
    def _get_wrapper_domain_param(self, domain_param: dict):
        """
        Load the action noise parameters from the domain parameter dict

        :param domain_param: domain parameter dict
        """
        if "act_noise_mean" in domain_param:
            self._noise_mean = np.array(domain_param["act_noise_mean"])
            if not self._noise_mean.shape == self.act_space.shape:
                raise pyrado.ShapeErr(given=self._noise_mean, expected_match=self.act_space)
        if "act_noise_std" in domain_param:
            self._noise_std = np.array(domain_param["act_noise_std"])
            if not self._noise_std.shape == self.act_space.shape:
                raise pyrado.ShapeErr(given=self._noise_std, expected_match=self.act_space)
Example #9
0
def skyline(
    dt: Union[int, float, np.ndarray],
    t_end: Union[int, float, np.ndarray],
    t_intvl_space: BoxSpace,
    val_space: BoxSpace,
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Step function that randomly samples a value from the given range, and then holds this value for a time interval
    which is also randomly sampled given a range of time intervals. This procedure is repeated until the sequence is
    long enough, i.e. `dt * t_end` samples.

    :param dt: time step size
    :param t_end: final time
    :param t_intvl_space: 1-dim `BoxSpace` determining the range of time intervals that can be sampled
    :param val_space: 1-dim `BoxSpace` determining the range of values that can be sampled
    :return: array of time steps together with the associated array of values
    """
    if dt <= 0:
        raise pyrado.ValueErr(given=dt, g_constraint="0")
    if t_end < dt:
        raise pyrado.ValueErr(given=t_end, ge_constraint=f"{dt}")
    if not isinstance(t_intvl_space, BoxSpace):
        raise pyrado.TypeErr(given=t_intvl_space, expected_type=BoxSpace)
    if not isinstance(val_space, BoxSpace):
        raise pyrado.TypeErr(given=val_space, expected_type=BoxSpace)
    if not t_intvl_space.flat_dim == 1:
        raise pyrado.ShapeErr(given=t_intvl_space, expected_match=(1, ))
    if not val_space.flat_dim == 1:
        raise pyrado.ShapeErr(given=val_space, expected_match=(1, ))

    dt = np.asarray(dt, dtype=np.float32)
    t_end = np.asarray(t_end, dtype=np.float32)

    # First iter
    t_intvl = t_intvl_space.sample_uniform()
    t_intvl = np.clip(t_intvl, dt, t_end + dt)
    t = np.arange(start=0.0, stop=t_intvl, step=dt)
    vals = val_space.sample_uniform() * np.ones_like(t)

    # Iterate until the time is up
    while t[-1] < t_end:
        t_intvl = t_intvl_space.sample_uniform()
        t_intvl = np.clip(t_intvl, dt, t_end - t[-1] + dt)
        t_new = np.arange(start=t[-1] + dt, stop=t[-1] + t_intvl, step=dt)
        t = np.concatenate([t, t_new])
        val_new = val_space.sample_uniform() * np.ones_like(t_new)
        vals = np.concatenate([vals, val_new])

    return t, vals
Example #10
0
    def __init__(self,
                 num_feat_per_dim: int,
                 bounds: [
                     Sequence[np.ndarray], Sequence[to.Tensor], Sequence[float]
                 ],
                 scale: float = None,
                 state_wise_norm: bool = True):
        """
        Constructor

        :param num_feat_per_dim: number of radial basis functions, identical for every dimension of the input
        :param bounds: lower and upper bound for the Gaussians' centers, the input dimension is inferred from them
        :param scale: scaling factor for the squared distance, if `None` the factor is determined such that two
                      neighboring RBFs have a value of 0.2 at the other center
        :param state_wise_norm: `True` to apply the normalization across input state dimensions separately (every
                                 dimension sums to one), or `False` to jointly normalize them
        """
        if not num_feat_per_dim > 1:
            raise pyrado.ValueErr(given=num_feat_per_dim, g_constraint='1')
        if not len(bounds) == 2:
            raise pyrado.ShapeErr(given=bounds, expected_match=np.empty(2))

        # Get the bounds, e.g. from the observation space and then clip them in case the
        bounds_to = [None, None]
        for i, b in enumerate(bounds):
            if isinstance(b, np.ndarray):
                bounds_to[i] = to.from_numpy(b)
            elif isinstance(b, to.Tensor):
                bounds_to[i] = b.clone()
            elif isinstance(b, (int, float)):
                bounds_to[i] = to.tensor(b, dtype=to.get_default_dtype()).view(
                    1, )
            else:
                raise pyrado.TypeErr(
                    given=b, expected_type=[np.ndarray, to.Tensor, int, float])
        if any([any(np.isinf(b)) for b in bounds_to]):
            bound_lo, bound_up = [
                to.clamp(b, min=-1e6, max=1e6) for b in bounds_to
            ]
            print_cbt('Clipped the bounds of the RBF centers to [-1e6, 1e6].',
                      'y')
        else:
            bound_lo, bound_up = bounds_to

        # Create a matrix with center locations for the Gaussians
        num_dim = len(bound_lo)
        self.num_feat = num_feat_per_dim * num_dim
        self.centers = to.empty(num_feat_per_dim, num_dim)
        for i in range(num_dim):
            # Features along columns
            self.centers[:, i] = to.linspace(bound_lo[i], bound_up[i],
                                             num_feat_per_dim)

        if scale is None:
            delta_center = self.centers[1, :] - self.centers[0, :]
            self.scale = -to.log(to.tensor(0.2)) / to.pow(delta_center, 2)
        else:
            self.scale = scale

        self._state_wise_norm = state_wise_norm
Example #11
0
 def state_des(self, state_des: np.ndarray):
     if not isinstance(state_des, np.ndarray):
         raise pyrado.TypeErr(given=state_des, expected_type=np.ndarray)
     if not state_des.shape == self.state_des.shape:
         raise pyrado.ShapeErr(given=state_des,
                               expected_match=self.state_des)
     self._state_des = state_des
Example #12
0
    def pack(data: to.Tensor) -> to.Tensor:
        """
        Reshape the data such that the shape is [batch_dim, num_rollouts, data_points_flattened].

        :param data: un-packed a.k.a. un-flattened data
        :return: packed a.k.a. flattened data
        """
        if data.ndim == 2:
            # The data is not batched, and we have one target domain rollouts which is un-flattened
            return data.view(1, 1, -1)

        elif data.ndim == 3:
            # The data is not batched, but we have multiple target domain rollouts which are un-flattened
            num_rollouts = data.shape[0]
            return data.view(1, num_rollouts, -1)

        elif data.ndim == 4:
            # The data is batched, and we have multiple target domain rollouts
            batch_size, num_rollouts = data.shape[:2]
            return data.view(batch_size, num_rollouts, -1)

        else:
            raise pyrado.ShapeErr(
                msg=
                f"The data must have either 2, 3, or 4 dimensions, not {data.ndim}!"
            )
Example #13
0
    def __init__(self,
                 in_features: int,
                 nonlin: [Callable, Sequence[Callable]],
                 bias: bool,
                 weight: bool = True):
        """
        Constructor

        :param in_features: size of each input sample
        :param nonlin: nonlinearity
        :param bias: if `True`, a learnable bias is subtracted, else no bias is used
        :param weight: if `True` (default), the input is multiplied with a learnable scaling factor
        """
        if not callable(nonlin):
            if not len(nonlin) == in_features:
                raise pyrado.ShapeErr(given=nonlin, expected_match=in_features)

        super().__init__()

        self.nonlin = deepcopy(nonlin) if is_iterable(nonlin) else nonlin
        if weight:
            self.weight = nn.Parameter(to.randn(in_features, dtype=to.get_default_dtype()), requires_grad=True)
        else:
            self.weight = None
        if bias:
            self.bias = nn.Parameter(to.randn(in_features, dtype=to.get_default_dtype()), requires_grad=True)
        else:
            self.bias = None
Example #14
0
    def loss_fcn(self, rollout_real: StepSequence,
                 rollout_sim: StepSequence) -> float:
        """
        Compute the discrepancy between two time sequences of observations given metric.
        Be sure to align and truncate the rollouts beforehand.

        :param rollout_real: (concatenated) real-world rollout containing the observations
        :param rollout_sim: (concatenated) simulated rollout containing the observations
        :return: discrepancy cost summed over the observation dimensions
        """
        if len(rollout_real) != len(rollout_sim):
            raise pyrado.ShapeErr(given=rollout_real,
                                  expected_match=rollout_sim)

        # Extract the observations
        real_obs = rollout_real.get_data_values("observations",
                                                truncate_last=True)
        sim_obs = rollout_sim.get_data_values("observations",
                                              truncate_last=True)

        # Filter the observations
        real_obs = gaussian_filter1d(real_obs, self.std_obs_filt, axis=0)
        sim_obs = gaussian_filter1d(sim_obs, self.std_obs_filt, axis=0)

        # Normalize the signals
        real_obs_norm = self.obs_normalizer.project_to(real_obs)
        sim_obs_norm = self.obs_normalizer.project_to(sim_obs)

        # Compute loss based on the error
        loss_per_obs_dim = self.metric(real_obs_norm - sim_obs_norm)
        assert len(loss_per_obs_dim) == real_obs.shape[1]
        assert all(loss_per_obs_dim >= 0)
        return sum(loss_per_obs_dim)
Example #15
0
    def __init__(self, wrapped_env: Env, mask: list = None, idcs: list = None, keep_selected: bool = False):
        """
        Constructor

        :param wrapped_env: environment to wrap
        :param mask: mask out array, entries with 1 are dropped (behavior can be inverted by keep_selected=True)
        :param idcs: indices to drop, ignored if mask is specified. If the observation space is labeled,
                     the labels can be used as indices.
        :param keep_selected: set to true to keep the mask entries with 1/the specified indices and drop the others
        """
        Serializable._init(self, locals())

        super(ObsPartialWrapper, self).__init__(wrapped_env)

        # Parse selection
        if mask is not None:
            # Use explicit mask
            mask = np.array(mask, dtype=bool)
            if not mask.shape == wrapped_env.obs_space.shape:
                raise pyrado.ShapeErr(given=mask, expected_match=wrapped_env.obs_space)
        else:
            # Parse indices
            assert idcs is not None, "Either mask or indices must be specified"
            mask = wrapped_env.obs_space.create_mask(idcs)
        # Invert if needed
        if keep_selected:
            self.keep_mask = mask
        else:
            self.keep_mask = np.logical_not(mask)
Example #16
0
    def __init__(
        self,
        spec: EnvSpec,
        act_recordings: List[Union[to.Tensor, np.array]],
        no_reset: bool = False,
        use_cuda: bool = False,
    ):
        """
        Constructor

        :param spec: environment specification
        :param act_recordings: pre-recorded sequence of actions to be played back later
        :param no_reset: `True` to turn `reset()` into a dummy function
        :param use_cuda: `True` to move the policy to the GPU, `False` (default) to use the CPU
        """
        if not isinstance(act_recordings, list):
            raise pyrado.TypeErr(given=act_recordings, expected_type=list)

        super().__init__(spec, use_cuda)

        self._curr_rec = -1  # is increased before the first use
        self._curr_step = 0
        self._no_reset = no_reset
        self._num_rec = len(act_recordings)
        self._act_rec_buffer = [to.atleast_2d(to.as_tensor(ar)) for ar in act_recordings]
        if not all(b.shape[1] == self.env_spec.act_space.flat_dim for b in self._act_rec_buffer):
            raise pyrado.ShapeErr(
                given=(-1, self._act_rec_buffer[0].shape[1]), expected_match=(-1, self.env_spec.act_space.flat_dim)
            )
Example #17
0
    def _unpack_hidden(self, hidden: to.Tensor, batch_size: int = None):
        # Special case - need to split into hidden and cell term memory
        # Assume it's a flattened view of hid/cell x nrl x batch x hs
        if len(hidden.shape) == 1:
            assert hidden.shape[0] == self.hidden_size, \
                "Passed hidden variable's size doesn't match the one required by the network."
            # We could handle this case, but for now it's not necessary
            assert batch_size is None, \
                'Cannot use batched observations with unbatched hidden state'

            # Reshape to hid/cell x nrl x batch x hs
            hd = hidden.view(2, self._num_recurrent_layers, 1,
                             self._hidden_size)
            # Split hidden and cell state
            return hd[0, ...], hd[1, ...]

        elif len(hidden.shape) == 2:
            assert hidden.shape[1] == self.hidden_size, \
                "Passed hidden variable's size doesn't match the one required by the network."
            assert hidden.shape[0] == batch_size, \
                f'Batch size of hidden state ({hidden.shape[0]}) must match batch size of observations ({batch_size})'

            # Reshape to hid/cell x nrl x batch x hs
            hd = hidden.view(batch_size, 2, self._num_recurrent_layers,
                             self._hidden_size).permute(1, 2, 0, 3)
            # Split hidden and cell state
            return hd[0, ...], hd[1, ...]

        else:
            raise pyrado.ShapeErr(
                msg=
                f"Improper shape of 'hidden'. Policy received {hidden.shape},"
                f"but shape should be 1- or 2-dim")
Example #18
0
    def forward(self, data: to.Tensor) -> to.Tensor:
        """
        Transforms rollouts into the observations used for likelihood-free inference.
        Currently a state-representation as well as state-action summary-statistics are available.

        :param data: packed data of shape [batch_size, num_rollouts, len_time_series, dim_data]
        :return: features of the data extracted from the embedding of shape [[batch_size, num_rollouts * dim_feat]
        """
        data = data.to(device=self.device, dtype=to.get_default_dtype())

        # Bring the data back into the un-flattened form of shape [batch_size, num_rollouts, len_time_series, dim_data]
        data = Embedding.unpack(data, self._dim_data_orig)

        if self.downsampling_factor > 1:
            data = data[:, :, ::self.downsampling_factor, :]

        # Iterate over all data batches computing the features from the data
        x = to.stack([self.forward_one_batch(batch) for batch in data], dim=0)

        # Check the shape
        if x.shape != (data.shape[0], data.shape[1] * self.dim_output):
            raise pyrado.ShapeErr(
                given=x,
                expected_match=(data.shape[0],
                                data.shape[1] * self.dim_output))

        return x
Example #19
0
    def derivative(self, inp: to.Tensor) -> to.Tensor:
        """
        Compute the drivative of the features w.r.t. the inputs.

        .. note::
            Only processing of 1-dim input (e.g., no images)! The input can be batched along the first dimension.

        :param inp: input i.e. observations in the RL setting
        :return: value of all features derivatives given the observations
        """

        if inp.ndimension() > 2:
            raise pyrado.ShapeErr(msg='RBF class can only handle 1-dim or 2-dim input!')
        inp = atleast_2D(inp)  # first dim is the batch size, the second dim it the actual input dimension
        inp = inp.reshape(inp.shape[0], 1, inp.shape[1]).repeat(1, self.centers.shape[0], 1)  # reshape explicitly

        exp_sq_dist = to.exp(-self.scale*to.pow(inp - self.centers, 2))
        exp_sq_dist_d = -2*self.scale * (inp - self.centers)

        feat_val = to.empty(inp.shape[0], self.num_feat)
        feat_val_dot = to.empty(inp.shape[0], self.num_feat)

        for i, (sample, sample_d) in enumerate(zip(exp_sq_dist, exp_sq_dist_d)):
            if self._state_wise_norm:
                # Normalize the features such that the activation for every state dimension sums up to one
                feat_val[i, :] = normalize(sample, axis=0, order=1).reshape(-1, )
            else:
                # Turn the features into a vector and normalize over all of them
                feat_val[i, :] = normalize(sample.t().reshape(-1, ), axis=-1, order=1)

            feat_val_dot[i, :] = sample_d.squeeze() * feat_val[i, :] - feat_val[i, :] * sum(sample_d.squeeze() * feat_val[i, :])

        return feat_val_dot
    def transform_to_ddp_space(self, params: to.Tensor) -> to.Tensor:
        """
        Get the transformed domain distribution parameters. The policy's parameters are in log space.

        :param params: policy parameters (can be the log of the actual domain distribution parameter value)
        :return: policy parameters transformed according to the mask
        """
        ddp = params.clone()

        if ddp.ndimension() == 1:
            # Only one set of domain distribution parameters
            if self._scale_params:
                ddp.data = self.param_scaler.scale_back(ddp.data)
            ddp.data[self.mask] = to.exp(ddp.data[self.mask])

        elif ddp.ndimension() == 2:
            # Multiple sets of domain distribution parameters along the first axis
            if self._scale_params:
                for i in range(ddp.shape[0]):
                    ddp[i].data = self.param_scaler.scale_back(ddp[i].data)
            ddp.data[:, self.mask] = to.exp(ddp.data[:, self.mask])

        else:
            raise pyrado.ShapeErr(msg='Inputs must not have more than 2 dimensions!')

        return ddp
Example #21
0
    def contains(self, cand: np.ndarray, verbose: bool = False) -> bool:
        # Check the candidate validity (shape and NaN values)
        if not cand.shape == self.shape:
            raise pyrado.ShapeErr(given=cand, expected_match=self)
        if np.isnan(cand).any():
            raise pyrado.ValueErr(
                msg=f'At least one value is NaN!' +
                tabulate([
                    list(self.labels),
                    [*color_validity(cand, np.invert(np.isnan(cand)))]
                ],
                         headers='firstrow'))

        # Check upper and lower bound separately
        check_lo = (cand >= self.bound_lo).astype(int)
        check_up = (cand <= self.bound_up).astype(int)
        idcs_valid = np.bitwise_and(check_lo, check_up)

        if np.all(idcs_valid):
            return True
        else:
            if verbose:
                print(
                    tabulate([[
                        'lower bound ',
                        *color_validity(self.bound_lo, check_lo)
                    ], ['candidate ', *color_validity(cand, idcs_valid)],
                              [
                                  'upper bound ',
                                  *color_validity(self.bound_up, check_up)
                              ]],
                             headers=[''] + list(self.labels)))
            return False
Example #22
0
    def add_value(self, key: str, value):
        """
        Add a column value to the current step.

        :param key: data key
        :param value: value to record, pass '' to print nothing
        """
        # Compute full prefixed key
        key = self._prefix_str + key

        if self._first_step:
            # Record new key during first step
            self._value_keys.append(key)
        elif key not in self._value_keys:
            # Make sure the key was used during first step
            raise KeyError(
                'New value keys may only be added before the first step is finished'
            )

        # Pre-process non-scalar values
        if isinstance(value, to.Tensor):
            # Only support scalar tensor for now
            if value.ndimension() <= 1:
                value = value.item()
            else:
                raise pyrado.ShapeErr(
                    msg=
                    'Logger only support scalar PyTorch Tensors, otherwise the progress.csv file'
                    ' gets meed up.')

        # Record value
        self._current_values[key] = value
        self._values_changed = True
Example #23
0
    def reset(self,
              init_state: np.ndarray = None,
              domain_param: dict = None) -> np.ndarray:
        # Reset time
        self._curr_step = 0

        # Reset the domain parameters
        if domain_param is not None:
            self.domain_param = domain_param

        # Reset the state
        if init_state is None:
            self.state = self._init_space.sample_uniform()  # zero
        else:
            if not init_state.shape == self.obs_space.shape:
                raise pyrado.ShapeErr(given=init_state,
                                      expected_match=self.obs_space)
            if isinstance(init_state, np.ndarray):
                self.state = init_state.copy()
            else:
                try:
                    self.state = np.array(init_state)
                except Exception:
                    raise pyrado.TypeErr(given=init_state,
                                         expected_type=[np.ndarray, list])

        # No need to reset the task

        # Return perfect observation
        return self.observe(self.state)
Example #24
0
 def space_des(self, space_des: Space):
     if not isinstance(space_des, Space):
         raise pyrado.TypeErr(given=space_des, expected_type=Space)
     if not space_des.shape == self.space_des.shape:
         raise pyrado.ShapeErr(given=space_des,
                               expected_match=self.space_des)
     self._space_des = space_des
Example #25
0
    def __init__(self, eles: [np.ndarray, list], labels: Sequence[str] = None):
        """
        Constructor

        :param eles: N x D array of all actions, where N is the number of actions and D is the dimension of each action
        :param labels: label element of the space. This is useful for giving the states and actions names to later
                       identify them (e.g. for plotting).
        """
        if isinstance(eles, np.ndarray):
            # Make sure the dimension of the state is along the first array dimension
            self.eles = eles if eles.ndim == 2 else eles.reshape(-1, 1)
        elif isinstance(eles, list):
            self.eles = np.array(eles, dtype=np.int)
            # Make sure the dimension of the state is along the first array dimension
            self.eles = eles if self.eles.ndim == 2 else self.eles.reshape(
                -1, 1)
        else:
            raise pyrado.TypeErr(given=eles, expected_type=[np.ndarray, list])

        self.eles = np.atleast_2d(self.eles)
        self.bound_lo = np.min(self.eles, axis=0)
        self.bound_up = np.max(self.eles, axis=0)

        # Process the labels
        if labels is not None:
            labels = np.array(labels, dtype=object)
            if not labels.shape == self.shape:
                raise pyrado.ShapeErr(given=labels, expected_match=self)
            self._labels = labels
        else:
            self._labels = np.empty(self.shape, dtype=object)
            self._labels.fill(None)
Example #26
0
    def reset(self,
              init_state: np.ndarray = None,
              domain_param: dict = None) -> np.ndarray:
        # Reset time
        self._curr_step = 0

        # Reset the state
        if init_state is None:
            # Sample from the init state space
            init_state = self._init_space.sample_uniform()
        else:
            if not init_state.shape == self._init_space.shape:
                raise pyrado.ShapeErr(given=init_state,
                                      expected_match=self._init_space)

        # Reset the task
        self._task.reset(env_spec=self.spec)

        # Use stored domain parameters if not overwritten
        if domain_param is None:
            domain_param = self._domain_param

        # Forward to C++ implementation
        obs = self._sim.reset(
            domainParam=self._adapt_domain_param(domain_param),
            initState=init_state)
        self.state = self._state_from_obs(obs)

        return obs
Example #27
0
    def fill_domain_param_buffer(env: DomainRandWrapper,
                                 dp_mapping: Mapping[int, str],
                                 domain_params: to.Tensor):
        """
        Fill the environments domain parameter buffer according to the domain parameter map, and reset the ring index.

        :param env: environment in which the domain parameters are inserted
        :param dp_mapping: mapping from subsequent integers (starting at 0) to domain parameter names (e.g. mass)
        :param domain_params: tensor of domain parameters [num_samples x dim domain param]
        """
        if not isinstance(env, DomainRandWrapperBuffer):
            raise pyrado.TypeErr(given=env,
                                 expected_type=DomainRandWrapperBuffer)
        if domain_params.ndim != 2 or domain_params.shape[1] != len(
                dp_mapping):
            raise pyrado.ShapeErr(
                msg=
                f"The domain parameter must be a 2-dim PyTorch tensor, where the second dimension matched the "
                f"domain parameter mapping, but it has the shape {domain_params.shape}!"
            )

        domain_params = domain_params.detach().cpu().numpy()
        env.buffer = [
            dict(zip(dp_mapping.values(), dp)) for dp in domain_params
        ]
        env.ring_idx = 0
        print_cbt(
            f"Filled the environment's buffer with {len(env.buffer)} domain parameters sets.",
            "g")
Example #28
0
def cov(x: to.Tensor, data_along_rows: bool = False):
    """
    Compute the covariance matrix given data.

    .. note::
        Only real valued matrices are supported

    :param x: matrix containing multiple observations of multiple variables
    :param data_along_rows: if `True` the variables are stacked along the columns, else they are along the rows
    :return: covariance matrix given the data
    """
    if x.dim() > 2:
        raise ValueError('m has more than 2 dimensions')
    if x.dim() < 2:
        x = x.view(1, -1)
    if data_along_rows and x.size(0) != 1:
        # Transpose if necessary
        x = x.t()

    num_samples = x.size(1)
    if num_samples < 2:
        raise pyrado.ShapeErr(
            msg='Need at least 2 samples to compute the covariance!')

    x -= to.mean(x, dim=1, keepdim=True)
    return x.matmul(x.t()).squeeze() / (num_samples - 1)
Example #29
0
    def transform_to_ddp_space(self, params: to.Tensor) -> to.Tensor:
        """
        Get the transformed domain distribution parameters. Where ever the mask is `True`, the corresponding policy
        parameter is learned in sqrt space. Moreover, the policy parameters can be scaled.

        :param params: policy parameters (can be the sqrt of the actual domain distribution parameter value)
        :return: policy parameters transformed according to the mask
        """
        ddp = params.clone()

        if ddp.ndimension() == 1:
            # Only one set of domain distribution parameters
            if self._scale_params:
                ddp.data = self.param_scaler.scale_back(ddp.data)
            ddp.data[self.mask] = to.pow(ddp.data[self.mask], 2)

        elif ddp.ndimension() == 2:
            # Multiple sets of domain distribution parameters along the first axis
            if self._scale_params:
                for i in range(ddp.shape[0]):
                    ddp[i].data = self.param_scaler.scale_back(ddp[i].data)
            ddp.data[:, self.mask] = to.pow(ddp.data[:, self.mask], 2)

        else:
            raise pyrado.ShapeErr(
                msg="The input must not have more than 2 dimensions!")

        return ddp
Example #30
0
    def __call__(self, inp: to.Tensor) -> to.Tensor:
        """
        Evaluate the features and normalize them.

        .. note::
            Only processing of 1-dim input (e.g., no images)! The input can be batched along the first dimension.

        :param inp: input i.e. observations in the RL setting
        :return: 1-dim vector of all feature values given the observations
        """
        inp = inp.to(device=self._device, dtype=to.get_default_dtype())

        if inp.ndimension() > 2:
            raise pyrado.ShapeErr(
                msg="RBF class can only handle 1-dim or 2-dim input!")
        inp = to.atleast_2d(
            inp
        )  # first dim is the batch size, the second dim it the actual input dimension
        inp = inp.reshape(inp.shape[0], 1,
                          inp.shape[1]).repeat(1, self.centers.shape[0],
                                               1)  # reshape explicitly

        # Exponentiate the squared distances
        exp_sq_dist = to.exp(-self.scale * to.pow(inp - self.centers, 2))

        # Normalize, reshape, and return the feature values
        return to.stack(
            [self._normalize_and_reshape(esd) for esd in exp_sq_dist])