Esempio n. 1
0
    def create_mask(self, *idcs):
        """
        Create a mask selecting the given indices from this space.
        Every index should be a number or a name in labels.

        :param idcs: index list, which can either be varargs or a single iterable
        :return: mask array with 1 at each index
        """
        mask = np.zeros(self.shape, dtype=np.bool_)

        if len(idcs) == 1 and isinstance(
                idcs[0], Iterable) and not isinstance(idcs[0], str):
            # Unwrap single iterable argument
            idcs = idcs[0]

        labels = self.labels
        # Set selected values to 1
        for idx in idcs:
            if isinstance(idx, str):
                # Handle labels
                assert labels is not None, 'The space must be labeled to use label-based indexing'
                for idx_label, label in np.ndenumerate(labels):
                    if label == idx:
                        idx = idx_label
                        break
                else:
                    raise pyrado.ValueErr(
                        msg=f'Label {idx} not found in {self}')
            if np.all(mask[idx] == 1):
                label_desc = f' ({labels[idx]})' if labels is not None else ""
                raise pyrado.ValueErr(msg=f'Duplicate index {idx}{label_desc}')
            mask[idx] = 1

        return mask
Esempio n. 2
0
    def create_mask(self, *idcs) -> np.ndarray:
        """
        Create a mask selecting the given indices from this space.
        Every index should be a number or a name in the space's labels.

        :param idcs: index list, which can either be varargs or a single iterable
        :return: boolean mask array with `1` at each index specified by the indices or labels
        """
        mask = np.zeros(self.shape, dtype=np.bool_)

        if len(idcs) == 1 and isinstance(idcs[0], Iterable) and not isinstance(idcs[0], str):
            # Unwrap single iterable argument
            idcs = idcs[0]

        # Set selected values to 1
        for idx in idcs:
            if isinstance(idx, str):
                # Handle labels
                if self.labels is None:
                    raise pyrado.TypeErr(msg="The space must be labeled to use label-based indexing!")
                for idx_label, label in np.ndenumerate(self.labels):
                    if label == idx:
                        idx = idx_label
                        break
                else:
                    raise pyrado.ValueErr(msg=f"Label {idx} not found in {self}")

            if np.all(mask[idx] == 1):
                label_desc = f" ({self.labels[idx]})" if self.labels is not None else ""
                raise pyrado.ValueErr(msg=f"Duplicate index {idx}{label_desc}")

            mask[idx] = 1

        return mask
    def __init__(self,
                 wrapped_env: Env,
                 explicit_lb: Mapping[str, float] = None,
                 explicit_ub: Mapping[str, float] = None):
        """
        Constructor

        :param wrapped_env: environment to wrap
        :param explicit_lb: dict to override the environment's lower bound; by default (`None`) this is ignored;
                            the keys are space labels, the values the new bound for that labeled entry
        :param explicit_ub: dict to override the environment's upper bound; by default (`None`) this is ignored;
                            the keys are space labels, the values the new bound for that labeled entry
        """
        Serializable._init(self, locals())
        super().__init__(wrapped_env)

        # Explicitly override the bounds if desired
        self.explicit_lb = explicit_lb
        self.explicit_ub = explicit_ub

        # Get the bounds of the inner observation space
        wos = self.wrapped_env.obs_space
        lb, ub = wos.bounds

        # Override the bounds if desired and store the result for usage in _process_obs
        self.ov_lb = ObsNormWrapper.override_bounds(lb, self.explicit_lb, wos.labels)
        self.ov_ub = ObsNormWrapper.override_bounds(ub, self.explicit_ub, wos.labels)

        # Check if the new bounds are valid
        if any(self.ov_lb == -pyrado.inf):
            raise pyrado.ValueErr(msg=f'At least one element of the lower bounds is (negative) infinite:\n'
                                      f'(overwritten) bound: {self.ov_lb}\nnames: {wos.labels}')
        if any(self.ov_ub == pyrado.inf):
            raise pyrado.ValueErr(msg=f'At least one element of the upper bound is (positive) infinite:\n'
                                      f'(overwritten) bound: {self.ov_ub}\nnames: {wos.labels}')
Esempio n. 4
0
    def eval_init_policies(self):
        """
        Execute the trained initial policies on the target device and store the estimated return per candidate.
        The number of initial policies to evaluate is the number of found policies.
        """
        # Crawl through the experiment's directory
        for root, dirs, files in os.walk(self.save_dir):
            dirs.clear()  # prevents walk() from going into subdirectories
            found_policies = [p for p in files if p.startswith('init_') and p.endswith('_policy.pt')]
            found_cands = [c for c in files if c.startswith('init_') and c.endswith('_candidate.pt')]
        if not len(found_policies) == len(found_cands):
            raise pyrado.ValueErr(msg='Found a different number of initial policies than candidates!')
        elif len(found_policies) == 0:
            raise pyrado.ValueErr(msg='No policies or candidates found!')

        num_init_cand = len(found_cands)
        cands_values = to.empty(num_init_cand)

        # Load all found candidates to save them into a single tensor
        found_cands = natural_sort(found_cands)  # the order is important since it determines the rows of the tensor
        cands = to.stack([to.load(osp.join(self.save_dir, c)) for c in found_cands])

        # Evaluate learned policies from random candidates on the target environment (real-world) system
        for i in range(num_init_cand):
            policy = pyrado.load(self.policy, 'policy', 'pt', self.save_dir, meta_info=dict(prefix=f'init_{i}'))
            cands_values[i] = self.eval_policy(self.save_dir, self._env_real, policy, self.mc_estimator,
                                               prefix=f'init_{i}', num_rollouts=self.num_eval_rollouts_real)

        # Save candidates's and their returns into tensors (policy is saved during training or exists already)
        # pyrado.save(cands, 'candidates', 'pt', self._save_dir, meta_info)
        pyrado.save(cands_values, 'candidates_values', 'pt', self.save_dir, meta_info=None)
        self.cands, self.cands_values = cands, cands_values
Esempio n. 5
0
    def from_stacked(dim: int,
                     stacked: np.ndarray) -> "MultivariateNormalWrapper":
        r"""
        Creates an instance of this class from the given stacked numpy array as generated e.g. by
        `MultivariateNormalWrapper.get_stacked(self)`.

        :param dim: dimensionality `k` of the random variable
        :param stacked: array containing the mean and standard deviations of shape `(2 * k,)`, where the first `k`
                        entries are the mean and the last `k` entries are the standard deviations
        :return: a `MultivariateNormalWrapper` with the given mean/cov.
        """
        if not (len(stacked.shape) == 1):
            raise pyrado.ValueErr(
                msg="Stacked has invalid shape! Must be 1-dimensional.")
        if not (stacked.shape[0] == 2 * dim):
            raise pyrado.ValueErr(
                msg="Stacked has invalid size!"
                "Must be 2*dim (one times for mean, a second time for covariance cholesky diagonal)."
            )

        mean = stacked[:dim]
        cov_chol_flat = stacked[dim:]

        return MultivariateNormalWrapper(
            to.tensor(mean).double(),
            to.tensor(cov_chol_flat).double())
Esempio n. 6
0
    def make_snapshot(self,
                      snapshot_mode: str,
                      curr_avg_ret: float = None,
                      meta_info: dict = None):
        """
        Make a snapshot of the training progress.
        This method is called from the subclasses and delegates to the custom method `save_snapshot()`.

        :param snapshot_mode: determines when the snapshots are stored (e.g. on every iteration or on new highscore)
        :param curr_avg_ret: current average return used for the snapshot_mode 'best' to trigger `save_snapshot()`
        :param meta_info: is not `None` if this algorithm is run as a subroutine of a meta-algorithm,
                          contains a dict of information about the current iteration of the meta-algorithm
        """
        if snapshot_mode == "latest":
            self.save_snapshot(meta_info)
        elif snapshot_mode == "best":
            if curr_avg_ret is None:
                raise pyrado.ValueErr(
                    msg=
                    "curr_avg_ret must not be None when snapshot_mode = 'best'!"
                )
            if curr_avg_ret > self._highest_avg_ret:
                self._highest_avg_ret = curr_avg_ret
                self.save_snapshot(meta_info)
        elif snapshot_mode in {"no", "None"}:
            pass  # don't save anything
        else:
            raise pyrado.ValueErr(given=snapshot_mode,
                                  eq_constraint="'latest', 'best', or 'no'")
Esempio n. 7
0
    def _process_obs_space(self, space: BoxSpace) -> BoxSpace:
        if not isinstance(space, BoxSpace):
            raise NotImplementedError(
                'Only implemented ObsNormWrapper._process_obs_space() for BoxSpace!'
            )
        # Get the bounds of the inner observation space
        lb, ub = space.bounds

        # Override the bounds if desired
        lb_ov = ObsNormWrapper.override_bounds(lb, self.explicit_lb, 'lower',
                                               space.labels)
        ub_ov = ObsNormWrapper.override_bounds(ub, self.explicit_ub, 'upper',
                                               space.labels)

        if any(lb_ov == -pyrado.inf):
            raise pyrado.ValueErr(
                msg=
                f'At least one element of the lower bounds is (negative) infinite:\n'
                f'(overwritten) bound: {lb_ov}\nnames: {space.labels}')
        if any(ub_ov == pyrado.inf):
            raise pyrado.ValueErr(
                msg=
                f'At least one element of the upper bound is (positive) infinite:\n'
                f'(overwritten) bound: {ub_ov}\nnames: {space.labels}')

        # Report actual bounds, which are not +-1 for overridden fields
        lb_norm = (lb - lb_ov) / (ub_ov - lb_ov) * 2 - 1
        ub_norm = (ub - lb_ov) / (ub_ov - lb_ov) * 2 - 1
        return BoxSpace(lb_norm, ub_norm, labels=space.labels)
    def init_param(self, init_values: to.Tensor = None, **kwargs):
        if init_values is not None:
            # First check if there are some specific values to set
            self.param_values = init_values

        elif kwargs.get('prior', None) is not None:
            # Prior information is expected to be in form of a DomainRandomizer since it holds the distributions
            if not isinstance(kwargs['prior'], DomainRandomizer):
                raise pyrado.TypeErr(given=kwargs['prior'], expected_type=DomainRandomizer)

            # For every domain distribution parameter in the mapping, check if there is prior information
            for idx, ddp in self.mapping.items():
                for dp in kwargs['prior'].domain_params:
                    if ddp[0] == dp.name and ddp[1] in dp.get_field_names():
                        # The domain parameter exists in the prior and in the mapping
                        val = getattr(dp, f'{ddp[1]}')
                        if self.mask[idx]:
                            # Log-transform since it will later be exp-transformed
                            self.params[idx].data.fill_(to.log(to.tensor(val)))
                        else:
                            self.params[idx].data.fill_(to.tensor(val))
                        if to.any(to.isnan(self.params[idx].data)):
                            raise pyrado.ValueErr(msg='DomainDistrParamPolicy parameter became NaN during'
                                                      'initialization! Check the mask and negative mean values.')

        else:
            raise pyrado.ValueErr(msg='DomainDistrParamPolicy needs to be initialized! Either with a set of policy'
                                      'parameters, or with a prior in form of a DomainRandomizer!')

        if self._scale_params:
            # After initializing, we have an estimate on the magnitude of the policy parameters. Usually, the
            # non-transformed means are a magnitude smaller than e.g. the transformed stds. Thus, we will approximately
            # project them to [-0.5, 0.5]
            self.param_scaler = MinMaxScaler(bound_lo=-0.5, bound_up=0.5)
            self.params.data = self.param_scaler.scale_to(self.params.data)  # params now in [-0.5, 0.5]
Esempio n. 9
0
    def __init__(self,
                 vfcn: [nn.Module, Policy],
                 gamma: float = 0.99,
                 lamda: float = 0.95,
                 num_epoch: int = 10,
                 batch_size: int = 64,
                 standardize_adv: bool = True,
                 standardizer: [None, RunningStandardizer] = None,
                 max_grad_norm: float = None,
                 lr: float = 5e-4,
                 lr_scheduler=None,
                 lr_scheduler_hparam: [dict, None] = None):
        r"""
        Constructor

        :param vfcn: value function, which can be a `FNN` or a `Policy`
        :param gamma: temporal discount factor
        :param lamda: regulates the trade-off between bias (max for 0) and variance (max for 1), see [1]
        :param num_epoch: number of iterations over all gathered samples during one estimator update
        :param batch_size: number of samples per estimator update batch
        :param standardize_adv: if `True`, the advantages are standardized to be $~ N(0,1)$
        :param standardizer: pass `None` to use stateless standardisation, alternatively pass `RunningStandardizer()`
                             to use a standardizer wich keeps track of past values
        :param max_grad_norm: maximum L2 norm of the gradients for clipping, set to `None` to disable gradient clipping
        :param lr: (initial) learning rate for the optimizer which can be by modified by the scheduler.
                   By default, the learning rate is constant.
        :param lr_scheduler: learning rate scheduler that does one step per epoch (pass through the whole data set)
        :param lr_scheduler_hparam: hyper-parameters for the learning rate scheduler
        """
        if not isinstance(vfcn, (nn.Module, Policy)):
            raise pyrado.TypeErr(given=vfcn, expected_type=[nn.Module, Policy])
        if isinstance(vfcn, Policy):
            if not vfcn.env_spec.act_space == ValueFunctionSpace:
                raise pyrado.ShapeErr(msg='The given act_space held by the vfcn should be a ValueFunctionSpace.')
        if not 0 <= gamma <= 1:
            raise pyrado.ValueErr(given=gamma, ge_constraint='0', le_constraint='1')
        if not 0 <= lamda <= 1:
            raise pyrado.ValueErr(given=lamda, ge_constraint='0', le_constraint='1')

        # Call Module's constructor
        super().__init__()

        # Store the inputs
        self._vfcn = vfcn
        self.gamma = gamma
        self.lamda = lamda
        self.num_epoch = num_epoch
        self.batch_size = batch_size
        self.max_grad_norm = max_grad_norm
        self.standardize_adv = standardize_adv
        self.standardizer = standardizer

        # Initialize
        self.loss_fcn = nn.MSELoss()
        self.optim = to.optim.Adam(self._vfcn.parameters(), lr=lr, eps=1e-5)
        self._lr_scheduler = lr_scheduler
        self._lr_scheduler_hparam = lr_scheduler_hparam
        if lr_scheduler is not None:
            self._lr_scheduler = lr_scheduler(self.optim, **lr_scheduler_hparam)
Esempio n. 10
0
    def __init__(self,
                 noise_dim: [int, tuple],
                 std_init: [float, to.Tensor],
                 std_min: [float, to.Tensor] = 0.01,
                 train_mean: bool = False,
                 learnable: bool = True):
        """
        Constructor

        :param noise_dim: number of dimension
        :param std_init: initial standard deviation for the exploration noise
        :param std_min: minimal standard deviation for the exploration noise
        :param train_mean: `True` if the noise should have an adaptive nonzero mean, `False` otherwise
        :param learnable: `True` if the parameters should be tuneable (default), `False` for shallow use (just sampling)
        """
        if not isinstance(std_init, (float, to.Tensor)):
            raise pyrado.TypeErr(given=std_init,
                                 expected_type=(float, to.Tensor))
        if isinstance(std_init,
                      to.Tensor) and not std_init.size() == noise_dim:
            raise pyrado.ShapeErr(given=std_init,
                                  expected_match=to.empty(noise_dim))
        if not (isinstance(std_init, float) and std_init > 0
                or isinstance(std_init, to.Tensor) and all(std_init > 0)):
            raise pyrado.ValueErr(given=std_init, g_constraint='0')
        if not isinstance(std_min, (float, to.Tensor)):
            raise pyrado.TypeErr(given=std_min,
                                 expected_type=(float, to.Tensor))
        if not (isinstance(std_min, float) and std_min > 0
                or isinstance(std_min, to.Tensor) and all(std_min > 0)):
            raise pyrado.ValueErr(given=std_min, g_constraint='0')

        super().__init__()

        # Register parameters
        if learnable:
            self.cov = nn.Parameter(to.Tensor(noise_dim, noise_dim),
                                    requires_grad=True)
            self.mean = nn.Parameter(
                to.Tensor(noise_dim),
                requires_grad=True) if train_mean else None
        else:
            self.cov = to.empty(noise_dim, noise_dim)
            self.mean = None

        # Initialize parameters
        self.cov_init = std_init**2 * to.eye(noise_dim) if isinstance(
            std_init, float) else to.diag(to.pow(std_init, 2))
        self.std_min = to.tensor(std_min) if isinstance(std_min,
                                                        float) else std_min
        if not isinstance(self.cov_init, to.Tensor):
            raise pyrado.TypeErr(given=self.cov_init, expected_type=to.Tensor)
        if not isinstance(self.std_min, to.Tensor):
            raise pyrado.TypeErr(given=self.std_min, expected_type=to.Tensor)

        self.reset_expl_params()
Esempio n. 11
0
    def __init__(
        self,
        num_dof: int,
        max_steps: int,
        dt: float = 1 / 500.0,
        ip: Optional[str] = "192.168.2.2",
    ):
        """
        Constructor

        :param num_dof: number of degrees of freedom (4 or 7), depending on which Barrett WAM setup being used
        :param max_steps: maximum number of time steps
        :param dt: sampling time interval, changing this value is highly discouraged
        :param ip: IP address of the PC controlling the Barrett WAM, pass `None` to skip connecting
        """
        # Make sure max_steps is reachable
        if not max_steps < pyrado.inf:
            raise pyrado.ValueErr(given=max_steps, given_name="max_steps", l_constraint=pyrado.inf)

        # Call the base class constructor to initialize fundamental members
        super().__init__(dt, max_steps)

        # Create the robcom client and connect to it. Use a Process to timeout if connection cannot be established.
        self._connected = False
        self._client = robcom.Client()
        self._robot_group_name = "RIGHT_ARM"
        try:
            self._client.start(ip, 2013, 1000)  # ip address, port, timeout in ms
            self._connected = True
            print_cbt("Connected to the Barret WAM client.", "c", bright=True)
        except RuntimeError:
            print_cbt("Connection to the Barret WAM client failed!", "r", bright=True)
        self._jg = self._client.robot.get_group([self._robot_group_name])
        self._dc = None  # direct-control process
        self._t = None  # only needed for WAMBallInCupRealStepBased

        # Desired joint position for the initial state and indices of the joints the policy operates on
        self._num_dof = num_dof
        if self._num_dof == 4:
            self._qpos_des_init = INIT_QPOS_DES_4DOF
            self._idcs_act = [0, 1, 2, 3]  # use all joints by default
        elif self._num_dof == 7:
            self._qpos_des_init = INIT_QPOS_DES_7DOF
            self._idcs_act = [0, 1, 2, 3, 4, 5, 6]  # use all joints by default
        else:
            raise pyrado.ValueErr(given=self._num_dof, eq_constraint="4 or 7")

        # Initialize task
        self._task = self._create_task(task_args=dict())

        # Trajectory containers (are set in reset())
        self.qpos_real = None
        self.qvel_real = None
        self.qpos_des = None
        self.qvel_des = None
Esempio n. 12
0
    def from_stacked(
            self, stacked: np.ndarray
    ) -> "ParameterAgnosticMultivariateNormalWrapper":
        """
        Builds a new `ParameterAgnosticMultivariateNormalWrapper` from the given stacked values. In contrast to
        `MultivariateNormalWrapper.from_stacked(dim, stacked)`, this does not require a dimensionality as it is an
        instance rather than a static method. Also, the stacked representations has to either contain the mean or the
        standard deviations or both, according the the values originally passed to the constructor. If one of them is
        not treated as a parameter, the current values is copied instead.

        :param stacked: the stacked representation of the parameters according to the documentation above; can have
                        either shape `(0,)`, `(k,)`, or `(2 * k)`
        :return: a `ParameterAgnosticMultivariateNormalWrapper` with the new values for the parameters
        """
        if not (len(stacked.shape) == 1):
            raise pyrado.ValueErr(
                msg="Stacked has invalid shape! Must be 1-dimensional.")

        expected_dim_multiplier = 0
        if self._mean_is_parameter:
            expected_dim_multiplier += 1
        if self._cov_is_parameter:
            expected_dim_multiplier += 1
        if not (stacked.shape[0] == expected_dim_multiplier * self.dim):
            raise pyrado.ValueErr(
                msg=
                f"Stacked has invalid size! Must be {expected_dim_multiplier}*dim."
            )

        if self._mean_is_parameter and self._cov_is_parameter:
            mean = stacked[:self.dim]
            cov_chol_flat = stacked[self.dim:]
        elif self._mean_is_parameter and not self._cov_is_parameter:
            mean = stacked[:self.dim]
            cov_chol_flat = self.cov_chol_flat
        elif not self._mean_is_parameter and self._cov_is_parameter:
            mean = self.mean
            cov_chol_flat = stacked
        else:
            mean = self.mean
            cov_chol_flat = self.cov_chol_flat

        if type(mean) == np.ndarray:
            mean = to.tensor(mean).double()

        if type(cov_chol_flat) == np.ndarray:
            cov_chol_flat = to.tensor(cov_chol_flat).double()

        return ParameterAgnosticMultivariateNormalWrapper(
            mean=mean,
            cov_chol_flat=cov_chol_flat,
            mean_is_parameter=self._mean_is_parameter,
            cov_is_parameter=self._cov_is_parameter,
        )
Esempio n. 13
0
def skyline(
    dt: Union[int, float, np.ndarray],
    t_end: Union[int, float, np.ndarray],
    t_intvl_space: BoxSpace,
    val_space: BoxSpace,
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Step function that randomly samples a value from the given range, and then holds this value for a time interval
    which is also randomly sampled given a range of time intervals. This procedure is repeated until the sequence is
    long enough, i.e. `dt * t_end` samples.

    :param dt: time step size
    :param t_end: final time
    :param t_intvl_space: 1-dim `BoxSpace` determining the range of time intervals that can be sampled
    :param val_space: 1-dim `BoxSpace` determining the range of values that can be sampled
    :return: array of time steps together with the associated array of values
    """
    if dt <= 0:
        raise pyrado.ValueErr(given=dt, g_constraint="0")
    if t_end < dt:
        raise pyrado.ValueErr(given=t_end, ge_constraint=f"{dt}")
    if not isinstance(t_intvl_space, BoxSpace):
        raise pyrado.TypeErr(given=t_intvl_space, expected_type=BoxSpace)
    if not isinstance(val_space, BoxSpace):
        raise pyrado.TypeErr(given=val_space, expected_type=BoxSpace)
    if not t_intvl_space.flat_dim == 1:
        raise pyrado.ShapeErr(given=t_intvl_space, expected_match=(1, ))
    if not val_space.flat_dim == 1:
        raise pyrado.ShapeErr(given=val_space, expected_match=(1, ))

    dt = np.asarray(dt, dtype=np.float32)
    t_end = np.asarray(t_end, dtype=np.float32)

    # First iter
    t_intvl = t_intvl_space.sample_uniform()
    t_intvl = np.clip(t_intvl, dt, t_end + dt)
    t = np.arange(start=0.0, stop=t_intvl, step=dt)
    vals = val_space.sample_uniform() * np.ones_like(t)

    # Iterate until the time is up
    while t[-1] < t_end:
        t_intvl = t_intvl_space.sample_uniform()
        t_intvl = np.clip(t_intvl, dt, t_end - t[-1] + dt)
        t_new = np.arange(start=t[-1] + dt, stop=t[-1] + t_intvl, step=dt)
        t = np.concatenate([t, t_new])
        val_new = val_space.sample_uniform() * np.ones_like(t_new)
        vals = np.concatenate([vals, val_new])

    return t, vals
Esempio n. 14
0
    def unstandardize(
            self, data: Union[np.ndarray,
                              to.Tensor]) -> Union[np.ndarray, to.Tensor]:
        r"""
        Revert the previous standardization of the input data to make it $~ N(\mu, \sigma)$.

        :param data: input ndarray or Tensor
        :return: un-standardized ndarray or Tensor
        """
        if self.mean is None or self.std is None:
            raise pyrado.ValueErr(msg="Use standardize before unstandardize!")

        # Input type must match stored type
        if isinstance(data, np.ndarray) and isinstance(self.mean, np.ndarray):
            pass
        elif isinstance(data, to.Tensor) and isinstance(self.mean, to.Tensor):
            pass
        elif isinstance(data, np.ndarray) and isinstance(self.mean, to.Tensor):
            self.mean = self.mean.numpy()
            self.std = self.std.numpy()
        elif isinstance(data, to.Tensor) and isinstance(self.mean, np.ndarray):
            self.mean = to.from_numpy(self.mean).to(to.get_default_dtype())
            self.std = to.from_numpy(self.std).to(to.get_default_dtype())

        x_unstd = data * self.std + self.mean
        return x_unstd
Esempio n. 15
0
    def __init__(self, *args, **kwargs):
        """
        Constructor

        :param expl_r_init: initial radius of the hyper sphere for the exploration strategy
        :param args: forwarded the superclass constructor
        :param kwargs: forwarded the superclass constructor
        """
        # Preprocess inputs and call HC's constructor
        expl_r_init = kwargs.pop('expl_r_init')
        if expl_r_init <= 0:
            raise pyrado.ValueErr(given=expl_r_init, g_constraint='0')

        if 'expl_std_init' in kwargs:
            # This is just for the ability to create one common hyper-param list for HCNormal and HCHyper
            kwargs.pop('expl_std_init')

        # Get from kwargs with default values
        self.expl_r_min = kwargs.pop('expl_r_min', 0.01)
        self.expl_r_max = max(expl_r_init, kwargs.pop('expl_r_max', 10.))

        # Call HC's constructor
        super().__init__(*args, **kwargs)

        self._expl_strat = HyperSphereParamNoise(
            param_dim=self._policy.num_param,
            expl_r_init=expl_r_init,
        )
Esempio n. 16
0
    def __init__(self,
                 save_dir: str,
                 max_iter: int,
                 policy: Optional[Policy],
                 logger: Optional[StepLogger] = None,
                 save_name: str = 'algo'):
        """
        Constructor

        :param save_dir: directory to save the snapshots i.e. the results in
        :param max_iter: maximum number of iterations
        :param policy: Pyrado policy (subclass of PyTorch's Module) to train
        :param logger: logger for every step of the algorithm, if `None` the default logger will be created
        :param save_name: name of the algorithm's pickle file without the ending, this becomes important if the
                          algorithm is run as a subroutine
        """
        if not isinstance(max_iter, int) and max_iter > 0:
            raise pyrado.ValueErr(given=max_iter, g_constraint='0')
        if not isinstance(policy, Policy) and policy is not None:
            raise pyrado.TypeErr(
                msg='If a policy is given, it needs to be of type Policy!')
        if not isinstance(logger, StepLogger) and logger is not None:
            raise pyrado.TypeErr(
                msg='If a logger is given, it needs to be of type StepLogger!')
        if not isinstance(save_name, str):
            raise pyrado.TypeErr(given=save_name, expected_type=str)

        self._save_dir = save_dir
        self._save_name = save_name
        self._max_iter = max_iter
        self._curr_iter = 0
        self._policy = policy
        self._logger = logger
        self._cnt_samples = 0
        self._highest_avg_ret = -pyrado.inf  # for snapshot_mode = 'best'
Esempio n. 17
0
    def __init__(self, wrapped_env: Union[SimEnv, EnvWrapper],
                 mask: Union[List[str], Tuple[str]]):
        """
        Constructor

        :param wrapped_env: environment to wrap
        :param mask: every domain parameters which names are in this mask will be transformed. Capitalisation matters.
        """
        if not isinstance(wrapped_env, (SimEnv, EnvWrapper)):
            raise pyrado.TypeErr(given=wrapped_env,
                                 expected_type=(SimEnv, EnvWrapper))
        if not isinstance(mask, (list, tuple)):
            raise pyrado.TypeErr(given=wrapped_env,
                                 expected_type=(list, tuple))

        Serializable._init(self, locals())

        # Call EnvWrapper's constructor
        super().__init__(wrapped_env)

        if any(item not in wrapped_env.supported_domain_param
               for item in mask):
            raise pyrado.ValueErr(
                msg=
                f"The specified mask {mask} contains domain parameters that are not supported by the wrapped "
                f"environment! Here are the supported domain parameters {wrapped_env.supported_domain_param}."
            )
        self._mask = mask
Esempio n. 18
0
def generate_oscillation_data(dt, t_end, excitation):
    """
    Use OMOEnv to generate a 1-dim damped oscillation signal.

    :param dt: time step size [s]
    :param t_end: Time duration [s]
    :param excitation: type of excitation, either (initial) 'position' or 'force' (function of time)
    :return: 1-dim oscillation trajectory
    """
    env = OneMassOscillatorSim(dt, np.ceil(t_end / dt))
    env.domain_param = dict(m=1., k=10., d=2.0)
    if excitation == 'force':
        policy = TimePolicy(
            env.spec,
            functools.partial(_dirac_impulse, env_spec=env.spec, amp=0.5), dt)
        reset_kwargs = dict(init_state=np.array([0, 0]))
    elif excitation == 'position':
        policy = IdlePolicy(env.spec)
        reset_kwargs = dict(init_state=np.array([0.5, 0]))
    else:
        raise pyrado.ValueErr(given=excitation,
                              eq_constraint="'force' or 'position'")

    # Generate the data
    ro = rollout(env, policy, reset_kwargs=reset_kwargs, record_dts=False)
    return ro.observations[:, 0]
Esempio n. 19
0
    def get_rollout(self, index):
        """
        Get an indexed sub-rollout.

        :param index: generic index of sub-rollout, negative values, slices and iterables are allowed
        :return: selected subset. 
        """
        if not self.continuous:
            raise pyrado.ValueErr(
                msg='Sub-rollouts are only supported on continuous data.')
        if isinstance(index, slice):
            # Analyze slice
            start, end, step = index.indices(self.rollout_count)
            if step == 1:
                # A simple, continuous slice
                bounds = self._rollout_bounds
                start_step = bounds[start]
                end_step = bounds[end]
                return self[start_step:end_step]

            # Convert nonstandard slice to range
            index = range(start, end, step)
        if isinstance(index, Iterable):
            # Nontrivial non-continuous slice, need to slice each element and concat them.
            return StepSequence.concat([self.get_rollout(i) for i in index],
                                       self.data_format)

        # Decode index
        index = _index_to_int(index, self.rollout_count)
        bounds = self._rollout_bounds
        start_step = bounds[index]
        end_step = bounds[index + 1]
        return self[start_step:end_step]
Esempio n. 20
0
    def __init__(self,
                 num_checkpoints: int,
                 init_checkpoint: int = 0,
                 *args,
                 **kwargs):
        """
        Constructor

        :param num_checkpoints: total number of checkpoints
        :param init_checkpoint: initial value of the cyclic counter, defaults to 0, use negative values can to mark
                                sections that should only be executed once
        :param args: positional arguments forwarded to Algorithm's constructor
        :param kwargs: keyword arguments forwarded to Algorithm's constructor
        """
        if not isinstance(num_checkpoints, int):
            raise pyrado.TypeErr(given=num_checkpoints, expected_type=int)
        if num_checkpoints < 1:
            raise pyrado.ValueErr(given=num_checkpoints, ge_constraint="1")
        if not isinstance(init_checkpoint, int):
            raise pyrado.TypeErr(given=init_checkpoint, expected_type=int)

        self._num_checkpoints = num_checkpoints
        self._curr_checkpoint = init_checkpoint

        # Call Algorithm's constructor
        super().__init__(*args, **kwargs)
Esempio n. 21
0
def print_cbt(msg: str, color: str = "", bright: bool = False, tag: str = "", end="\n"):
    """
    Print a colored (and bright) message with a tag in the beginning.

    :param msg: string to print
    :param color: color to print in, default `''` is the IDE's/system's default
    :param bright: flag if the message should be printed bright
    :param tag: tag to be printed in brackets in front of the message
    :param end: endline symbol forwarded to `print()`
    """
    brgt = Style.BRIGHT if bright else ""

    if not isinstance(tag, str):
        raise pyrado.TypeErr(given=tag, expected_type=str)
    else:
        if tag != "":
            tag = f"[{tag}] "

    color = color.lower()
    if color in ["", "w", "white"]:
        print(brgt + tag + msg + Style.RESET_ALL, end=end)
    elif color in ["y", "yellow"]:
        print(Fore.YELLOW + brgt + tag + msg + Style.RESET_ALL, end=end)
    elif color in ["b", "blue"]:
        print(Fore.BLUE + brgt + tag + msg + Style.RESET_ALL, end=end)
    elif color in ["g", "green"]:
        print(Fore.GREEN + brgt + tag + msg + Style.RESET_ALL, end=end)
    elif color in ["r", "red"]:
        print(Fore.RED + brgt + tag + msg + Style.RESET_ALL, end=end)
    elif color in ["c", "cyan"]:
        print(Fore.CYAN + brgt + tag + msg + Style.RESET_ALL, end=end)
    else:
        raise pyrado.ValueErr(given=color, eq_constraint="'y', 'b', 'g', 'r', or 'c'")
Esempio n. 22
0
    def __init__(self,
                 num_feat_per_dim: int,
                 bounds: [
                     Sequence[np.ndarray], Sequence[to.Tensor], Sequence[float]
                 ],
                 scale: float = None,
                 state_wise_norm: bool = True):
        """
        Constructor

        :param num_feat_per_dim: number of radial basis functions, identical for every dimension of the input
        :param bounds: lower and upper bound for the Gaussians' centers, the input dimension is inferred from them
        :param scale: scaling factor for the squared distance, if `None` the factor is determined such that two
                      neighboring RBFs have a value of 0.2 at the other center
        :param state_wise_norm: `True` to apply the normalization across input state dimensions separately (every
                                 dimension sums to one), or `False` to jointly normalize them
        """
        if not num_feat_per_dim > 1:
            raise pyrado.ValueErr(given=num_feat_per_dim, g_constraint='1')
        if not len(bounds) == 2:
            raise pyrado.ShapeErr(given=bounds, expected_match=np.empty(2))

        # Get the bounds, e.g. from the observation space and then clip them in case the
        bounds_to = [None, None]
        for i, b in enumerate(bounds):
            if isinstance(b, np.ndarray):
                bounds_to[i] = to.from_numpy(b)
            elif isinstance(b, to.Tensor):
                bounds_to[i] = b.clone()
            elif isinstance(b, (int, float)):
                bounds_to[i] = to.tensor(b, dtype=to.get_default_dtype()).view(
                    1, )
            else:
                raise pyrado.TypeErr(
                    given=b, expected_type=[np.ndarray, to.Tensor, int, float])
        if any([any(np.isinf(b)) for b in bounds_to]):
            bound_lo, bound_up = [
                to.clamp(b, min=-1e6, max=1e6) for b in bounds_to
            ]
            print_cbt('Clipped the bounds of the RBF centers to [-1e6, 1e6].',
                      'y')
        else:
            bound_lo, bound_up = bounds_to

        # Create a matrix with center locations for the Gaussians
        num_dim = len(bound_lo)
        self.num_feat = num_feat_per_dim * num_dim
        self.centers = to.empty(num_feat_per_dim, num_dim)
        for i in range(num_dim):
            # Features along columns
            self.centers[:, i] = to.linspace(bound_lo[i], bound_up[i],
                                             num_feat_per_dim)

        if scale is None:
            delta_center = self.centers[1, :] - self.centers[0, :]
            self.scale = -to.log(to.tensor(0.2)) / to.pow(delta_center, 2)
        else:
            self.scale = scale

        self._state_wise_norm = state_wise_norm
Esempio n. 23
0
def pd_capacity_32_abs(p: to.Tensor, s: to.Tensor, h: to.Tensor, tau: to.Tensor, **kwargs) -> to.Tensor:
    r"""
    Capacity-based dynamics with 3 stable ($p=-C$, $p=0$, $p=C$) and 2 unstable fix points ($p=-C/2$, $p=C/2$) for $s=0$

    $\tau \dot{p} =  \left( s + (h - p) (1 - \frac{\left| (h - p) \right|}{C})
    (1 - \frac{2 \left| (h - p) \right|}{C}) \right)$

    The "absolute version" of `pd_capacity_32` is less skewed due to a lower oder of the resulting polynomial.

    .. note::
        Intended to be used with tanh activation function, e.g. for the velocity tasks in RcsPySim.

    :param p: potential, higher values lead to higher activations
    :param s: stimulus, higher values lead to larger changes of the potentials (depends on the dynamics function)
    :param h: resting level, a.k.a. constant offset
    :param tau: time scaling factor, higher values lead to slower changes of the potentials (linear dependency)
    :param kwargs: additional parameters to the potential dynamics
    """
    if not all(tau > 0):
        raise pyrado.ValueErr(given=tau, g_constraint="0")
    return (
        s
        + (h - p)
        * (to.ones_like(p) - to.abs(h - p) / kwargs["capacity"])
        * (to.ones_like(p) - 2 * to.abs(h - p) / kwargs["capacity"])
    ) / tau
Esempio n. 24
0
 def dt(self, dt: Union[int, float]):
     """ Set the time step size. """
     if not dt > 0:
         raise pyrado.ValueErr(given=dt, g_constraint='0')
     if not isinstance(dt, (float, int)):
         raise pyrado.TypeErr(given=dt, expected_type=[float, int])
     self._dt = float(dt)
Esempio n. 25
0
 def rollout_lengths(self):
     """ Lengths of sub-rollouts. """
     if not self.continuous:
         raise pyrado.ValueErr(
             msg='Sub-rollouts are only supported on continuous data.')
     bounds = self._rollout_bounds
     return bounds[1:] - bounds[:-1]
Esempio n. 26
0
    def scale_to(
            self, data: Union[np.ndarray,
                              to.Tensor]) -> Union[np.ndarray, to.Tensor]:
        r"""
        Transform the input data to be in $[a, b]$, where $a$ and $b$ are defined during construction.

        :param data: unscaled input ndarray or Tensor
        :return: ndarray or Tensor scaled to be in $[a, b]$
        """
        # Convert to the right type if necessary
        bound_lo, bound_up = self._convert_bounds(data)

        if not (bound_lo < bound_up).all():
            raise pyrado.ValueErr(given_name="lower bound",
                                  l_constraint="upper bound")

        if isinstance(data, np.ndarray):
            self._data_min = np.min(data)
            self._data_span = np.max(data) - np.min(data)
        elif isinstance(data, to.Tensor):
            self._data_min = to.min(data)
            self._data_span = to.max(data) - to.min(data)
        else:
            raise pyrado.TypeErr(given=data,
                                 expected_type=[np.ndarray, to.Tensor])

        data_ = (data - self._data_min) / self._data_span
        return data_ * (bound_up - bound_lo) + bound_lo
Esempio n. 27
0
 def curr_step(self, curr_step: int):
     """Set the number of the current replay step (0 for the initial step)."""
     if not isinstance(curr_step, int) or not 0 <= curr_step < len(self._act_rec_buffer[self._curr_rec]):
         raise pyrado.ValueErr(
             given=curr_step, ge_constraint="0 (int)", l_constraint=len(self._act_rec_buffer[self._curr_rec])
         )
     self._curr_step = curr_step
Esempio n. 28
0
def create_collision_task(env_spec: EnvSpec, factor: float) -> MaskedTask:
    """
    Create a task which punishes collision costs given a collision model with pairs of bodies.
    This task only looks at the instantaneous collision cost.

    .. note::
        This task was designed with an RcsPySim environment in mind, but is not restricted to these environments.

    :param env_spec: environment specification
    :param factor: cost / reward function scaling factor
    :return: masked task that only considers a subspace of all observations
    """
    if not factor >= 0:
        raise pyrado.ValueErr(given=factor, ge_constraint="0")

    # Define the indices for selection. This needs to match the observations' names in RcsPySim.
    obs_labels = ["CollCost"]

    # Get the masked environment specification
    spec = EnvSpec(
        env_spec.obs_space,
        env_spec.act_space,
        env_spec.state_space.subspace(
            env_spec.state_space.create_mask(obs_labels)),
    )

    rew_fcn = AbsErrRewFcn(q=np.array([factor]),
                           r=np.zeros(spec.act_space.shape))

    # Create an endlessly running desired state task (no collision is desired)
    task = DesStateTask(spec, np.zeros(spec.state_space.shape), rew_fcn,
                        never_succeeded)

    # Mask selected collision cost observation
    return MaskedTask(env_spec, task, obs_labels)
Esempio n. 29
0
def ensure_math_mode(inp: [str, Sequence[str]]) -> [str, list]:
    """
    Naive way to ensure that a sting is compatible with LaTeX math mode for printing.

    :param inp: input string
    :return s: sting in math mode
    """
    if isinstance(inp, str):
        if inp.count("$") == 0:
            # There are no $ symbols yet
            if not inp[0] == "$":
                inp = "$" + inp
            if not inp[-1] == "$":
                inp = inp + "$"
        elif inp.count("$") % 2 == 0:
            # There is an even number of $ symbols, so we assume they are correct and do nothing
            pass
        else:
            raise pyrado.ValueErr(msg=f"The string {inp} must contain an even number of '$' symbols!")

    elif inp is None:
        return None  # in case there a Space has 1 one dimension but no labels

    elif isinstance(inp, Iterable):
        # Do it recursively
        return [ensure_math_mode(s) if s is not None else None for s in inp]  # skip None entries

    else:
        raise pyrado.TypeErr(given=inp, expected_type=[str, list])

    return inp
Esempio n. 30
0
    def override_bounds(bounds: np.ndarray, override: Optional[Mapping[str,
                                                                       float]],
                        bound_label: str, names: np.ndarray) -> np.ndarray:
        """
        Override a given bound. This function is useful if some entries of the observation space have an infinite bound
        and/or you want to specify a certain bound

        :param bounds: bound to override
        :param override: value to override with
        :param bound_label: label of the bound to override
        :param names: e.g. lower or upper
        :return: new bound created from a copy of the old bound
        """
        if not override:
            return bounds
        # Override in copy of bounds
        bc = bounds.copy()
        for idx, name in np.ndenumerate(names):
            ov = override.get(name)
            if ov is not None:
                # Apply override
                bc[idx] = ov
            elif np.isinf(bc[idx]):
                # Report unbounded entry
                raise pyrado.ValueErr(
                    msg=
                    f'{name} entry of {bound_label} bound is infinite and not overridden.'
                    f'Cannot apply normalization.')
            else:
                # Do nothing if ov is None
                pass
        return bc