def __init__(self,
                 save_dir: str,
                 dataset: TimeSeriesDataSet,
                 policy: Policy,
                 max_iter: int,
                 windowed: bool = False,
                 cascaded: bool = False,
                 optim_class=optim.Adam,
                 optim_hparam: dict = None,
                 loss_fcn=nn.MSELoss(),
                 lr_scheduler=None,
                 lr_scheduler_hparam: Optional[dict] = None,
                 num_workers: int = 1,
                 logger: StepLogger = None):
        """
        Constructor

        :param save_dir: directory to save the snapshots i.e. the results in
        :param dataset: complete data set, where the samples are along the first dimension
        :param policy: Pyrado policy (subclass of PyTorch's Module) to train
        :param max_iter: maximum number of iterations
        :param windowed: if `True`, one fixed-length (short) input sequence is provided to the policy which then
                         predicts one sample, else the complete (long) input sequence is fed to the policy which then
                         predicts an sequence of samples of equal length
        :param cascaded: it `True`, the predictions are made based on previous predictions instead of the current input
        :param optim_class: PyTorch optimizer class
        :param optim_hparam: hyper-parameters for the PyTorch optimizer
        :param loss_fcn: loss function for training, by default `torch.nn.MSELoss()`
        :param lr_scheduler: learning rate scheduler that does one step per epoch (pass through the whole data set)
        :param lr_scheduler_hparam: hyper-parameters for the learning rate scheduler
        :param num_workers: number of environments for parallel sampling
        :param logger: logger for every step of the algorithm, if `None` the default logger will be created
        """
        if not isinstance(dataset, TimeSeriesDataSet):
            raise pyrado.TypeErr(given=dataset,
                                 expected_type=TimeSeriesDataSet)
        if not policy.is_recurrent:
            raise pyrado.TypeErr(
                msg='TSPred algorithm only supports recurrent policies!')

        # Call Algorithm's constructor
        super().__init__(save_dir, max_iter, policy, logger)

        # Store the inputs
        self.dataset = dataset
        self.cascaded = cascaded
        self.windowed = windowed
        self.loss_fcn = loss_fcn

        optim_hparam = dict(
            lr=1e-1, eps=1e-8,
            weight_decay=1e-4) if optim_hparam is None else optim_hparam
        self.optim = optim_class([{
            'params': self._policy.parameters()
        }], **optim_hparam)
        self._lr_scheduler = lr_scheduler
        self._lr_scheduler_hparam = lr_scheduler_hparam
        if lr_scheduler is not None and lr_scheduler_hparam is not None:
            self._lr_scheduler = lr_scheduler(self.optim,
                                              **lr_scheduler_hparam)
Exemple #2
0
    def __init__(self,
                 spec: EnvSpec,
                 net: nn.Module,
                 init_param_kwargs: dict = None,
                 use_cuda: bool = False):
        """
        Constructor

        :param spec: environment specification
        :param net: module that approximates the Q-values given the observations and possible (discrete) actions.
                    Make sure to create this object with the correct input and output sizes by using
                    `DiscreteActQValPolicy.get_qfcn_input_size()` and `DiscreteActQValPolicy.get_qfcn_output_size()`.
        :param init_param_kwargs: additional keyword arguments for the policy parameter initialization
        :param use_cuda: `True` to move the policy to the GPU, `False` (default) to use the CPU
        """
        if not isinstance(spec.act_space, DiscreteSpace):
            raise pyrado.TypeErr(given=spec.act_space,
                                 expected_type=DiscreteSpace)
        if not isinstance(net, nn.Module):
            raise pyrado.TypeErr(given=net, expected_type=nn.Module)

        # Call Policy's constructor
        super().__init__(spec, use_cuda)

        # Make sure the net runs on the correct device
        self.net = net.to(self.device)

        # Call custom initialization function after PyTorch network parameter initialization
        init_param_kwargs = init_param_kwargs if init_param_kwargs is not None else dict(
        )
        self.init_param(None, **init_param_kwargs)
    def adapt(self,
              mean: to.Tensor = None,
              std: Union[to.Tensor, float] = None):
        """
        Adapt the mean and the variance of the noise on the action or parameters.
        Use `None` to leave one of the parameters at their current value.

        :param mean: exploration strategy's new mean
        :param std: exploration strategy's new standard deviation
        """
        if not (isinstance(mean, to.Tensor) or mean is None):
            raise pyrado.TypeErr(given=mean, expected_type=to.Tensor)
        if not (isinstance(std, to.Tensor) and
                (std >= 0).all() or std is None):
            raise pyrado.TypeErr(
                msg='The std must be a Tensor with all elements > 0 or None!')

        if mean is not None:
            assert self.mean is not None, 'Can not change fixed zero mean!'
            if not mean.shape == self.mean.shape:
                raise pyrado.ShapeErr(given=mean, expected_match=self.mean)
            self.mean.data = mean
        if std is not None:
            if not std.shape == self.log_std.shape:
                raise pyrado.ShapeErr(given=std, expected_match=self.std)
            self.std = std
Exemple #4
0
    def __init__(self, spec: EnvSpec, use_cuda: bool):
        """
        Constructor

        :param spec: environment specification
        :param use_cuda: `True` to move the policy to the GPU, `False` (default) to use the CPU
        """
        if not isinstance(spec, EnvSpec):
            raise pyrado.TypeErr(given=spec, expected_type=EnvSpec)
        if not isinstance(use_cuda, bool):
            raise pyrado.TypeErr(given=use_cuda, expected_type=bool)

        # Call torch.nn.Module's constructor
        super().__init__()

        self._env_spec = spec
        if not use_cuda:
            self._device = 'cpu'
        elif use_cuda and to.cuda.is_available():
            self._device = 'cuda'
        elif use_cuda and not to.cuda.is_available():
            warn(
                'Tried to run on CUDA, but it is not available. Falling back to CPU.'
            )
            self._device = 'cpu'
Exemple #5
0
    def __init__(self,
                 env_spec: EnvSpec,
                 space_des: Space,
                 rew_fcn: RewFcn = ZeroPerStepRewFcn):
        """
        Constructor

        :param env_spec: environment specification of a simulated or real environment
        :param space_des: desired state a.k.a. goal state
        :param rew_fcn: reward function, an instance of a subclass of RewFcn
        """
        if not isinstance(env_spec, EnvSpec):
            raise pyrado.TypeErr(given=env_spec, expected_type=EnvSpec)
        if not isinstance(space_des, Space):
            raise pyrado.TypeErr(given=space_des, expected_type=Space)
        if not isinstance(
                rew_fcn,
            (ZeroPerStepRewFcn, PlusOnePerStepRewFcn, MinusOnePerStepRewFcn)):
            raise pyrado.TypeErr(given=rew_fcn,
                                 expected_type=[
                                     ZeroPerStepRewFcn, PlusOnePerStepRewFcn,
                                     MinusOnePerStepRewFcn
                                 ])

        self._env_spec = env_spec
        self._space_des = space_des
        self._rew_fcn = rew_fcn
Exemple #6
0
    def __init__(self,
                 env_spec: EnvSpec,
                 rew_fcn: RewFcn,
                 init_angle: float,
                 des_angle_delta: float = np.pi / 2.,
                 angle_tol: float = 1 / 180. * np.pi):
        """
        Constructor

        :param env_spec: environment specification of a simulated or real environment
        :param rew_fcn: reward function, an instance of a subclass of RewFcn
        :param init_angle: initial angle
        :param des_angle_delta: desired angle that counts as a flip
        :param angle_tol: tolerance
        """
        if not isinstance(env_spec, EnvSpec):
            raise pyrado.TypeErr(given=env_spec, expected_type=EnvSpec)
        if not isinstance(rew_fcn, RewFcn):
            raise pyrado.TypeErr(given=rew_fcn, expected_type=RewFcn)

        self._env_spec = env_spec
        self._rew_fcn = rew_fcn
        self._init_angle = init_angle
        self._last_angle = init_angle
        self.des_angle_delta = des_angle_delta
        self.angle_tol = angle_tol
        self._held_rew = 0.
Exemple #7
0
def print_domain_params(domain_params: [dict, Sequence[dict]]):
    """
    Print a list of (domain parameter) dicts / a dict (of domain parameters) prettily.

    :param domain_params: list of dicts or a single dict containing the a list of domain parameters
    """
    if domain_params:
        # Do nothing if domain_param list/dict is empty

        if isinstance(domain_params, list):
            # Check the first element
            if isinstance(domain_params[0], dict):
                # Assuming all dicts have identical keys
                print(tabulate([dp.values() for dp in domain_params],
                               headers=domain_params[0].keys(), tablefmt='simple'))
            else:
                raise pyrado.TypeErr(given=domain_params, expected_type=dict)

        elif isinstance(domain_params, dict):
            dp = deepcopy(domain_params)
            for k, v in dp.items():
                # Check if the values of the dirct are iterable
                if isinstance(v, (int, float, bool)):
                    dp[k] = [v]  # cast float to list of one element to make it iterable for tabulate
                if isinstance(v, np.ndarray) and v.size == 1:
                    dp[k] = [v.item()]  # cast scalar array to list of one element to make it iterable for tabulate
                elif isinstance(v, list):
                    pass
                else:
                    pyrado.TypeErr(given=v, expected_type=[int, float, bool, list])
            print(tabulate(dp, headers="keys", tablefmt='simple'))

        else:
            raise pyrado.TypeErr(given=domain_params, expected_type=[dict, list])
    def __call__(self, dp_values: to.Tensor = None) -> Tuple[to.Tensor, StepSequence]:
        """
        Yield one rollout from the pre-recorded buffer of rollouts, and compute the features of the data used for sbi.

        :param dp_values: ignored, just here for the interface compatibility
        :return: features computed from the time series data, and the complete rollout
        """
        print_cbt_once(f"Using pre-recorded target domain rollouts to from {self.rollouts_dir}", "g")

        # Get pre-recoded rollout and advance the index
        if not isinstance(self.rollouts_rec, list):
            raise pyrado.TypeErr(given=self.rollouts_rec, expected_type=list)
        if not isinstance(self.rollouts_rec[0], StepSequence):
            raise pyrado.TypeErr(given=self.rollouts_rec[0], expected_type=StepSequence)

        ro = self.rollouts_rec[self._ring_idx]
        self._ring_idx = (self._ring_idx + 1) % self.num_rollouts

        # Pre-processing
        ro.torch()

        # Assemble the data
        data_real = to.cat([ro.states[:-1, :], ro.get_data_values(self._action_field)], dim=1)
        if self._embedding.requires_target_domain_data:
            data_real = to.cat([data_real, data_real], dim=1)

        # Compute the features
        data_real = data_real.unsqueeze(0)  # only one target domain rollout
        data_real = self._embedding(Embedding.pack(data_real))  # shape [1, dim_feat]

        # Check shape (here no batching and always one rollout)
        if data_real.shape[0] != 1 or data_real.ndim != 2:
            raise pyrado.ShapeErr(given=data_real, expected_match=(1, -1))

        return data_real, ro
Exemple #9
0
    def argmax_posterior_mean(cands: to.Tensor, cands_values: to.Tensor,
                              ddp_space: BoxSpace, num_restarts: int,
                              num_samples: int) -> to.Tensor:
        """
        Compute the GP input with the maximal posterior mean.

        :param cands: candidates a.k.a. x
        :param cands_values: observed values a.k.a. y
        :param ddp_space: space of the domain distribution parameters, indicates the lower and upper bound
        :param num_restarts: number of restarts for the optimization of the acquisition function
        :param num_samples: number of samples for the optimization of the acquisition function
        :return: un-normalized candidate with maximum posterior value a.k.a. x
        """
        if not isinstance(cands, to.Tensor):
            raise pyrado.TypeErr(given=cands, expected_type=to.Tensor)
        if not isinstance(cands_values, to.Tensor):
            raise pyrado.TypeErr(given=cands_values, expected_type=to.Tensor)
        if not isinstance(ddp_space, BoxSpace):
            raise pyrado.TypeErr(given=ddp_space, expected_type=BoxSpace)

        # Normalize the input data and standardize the output data
        uc_projector = UnitCubeProjector(
            to.from_numpy(ddp_space.bound_lo).to(dtype=to.get_default_dtype()),
            to.from_numpy(ddp_space.bound_up).to(dtype=to.get_default_dtype()),
        )
        cands_norm = uc_projector.project_to(cands)
        cands_values_stdized = standardize(cands_values)

        if cands_norm.shape[0] > cands_values.shape[0]:
            print_cbt(
                f"There are {cands.shape[0]} candidates but only {cands_values.shape[0]} evaluations. Ignoring "
                f"the candidates without evaluation for computing the argmax.",
                "y",
            )
            cands_norm = cands_norm[:cands_values.shape[0], :]

        # Create and fit the GP model
        gp = SingleTaskGP(cands_norm, cands_values_stdized)
        gp.likelihood.noise_covar.register_constraint("raw_noise",
                                                      GreaterThan(1e-5))
        mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
        fit_gpytorch_model(mll)

        # Find position with maximal posterior mean
        cand_norm, _ = optimize_acqf(
            acq_function=PosteriorMean(gp),
            bounds=to.stack(
                [to.zeros(ddp_space.flat_dim),
                 to.ones(ddp_space.flat_dim)]).to(dtype=to.float32),
            q=1,
            num_restarts=num_restarts,
            raw_samples=num_samples,
        )

        cand_norm = cand_norm.to(dtype=to.get_default_dtype())
        cand = uc_projector.project_back(cand_norm.detach())
        print_cbt(f"Converged to argmax of the posterior mean: {cand.numpy()}",
                  "g",
                  bright=True)
        return cand
Exemple #10
0
    def minimize(
        self, loss_fcn: Callable, rets: to.Tensor = None, param_samples: to.Tensor = None, w: to.Tensor = None
    ):
        """
        Minimize the given dual function. This function can be called for the dual evaluation loss or the dual
        improvement loss.

        :param loss_fcn: function to minimize, different for `wml()` and `wmap()`
        :param rets: return values per policy sample after averaging over multiple rollouts using the same policy
        :param param_samples: all sampled policy parameters
        :param w: weights of the policy parameter samples
        """
        if self.optim_mode == "scipy":
            # Use scipy optimizers
            if loss_fcn == self.dual_evaluation:
                res = optimize.minimize(
                    partial(self.dual_evaluation, rets=rets.numpy()),
                    jac=partial(get_grad_via_torch, fcn_to=partial(self.dual_evaluation, rets=rets)),
                    x0=np.array([1.0]),
                    method="SLSQP",
                    bounds=((1e-8, 1e8),),
                )
            elif loss_fcn == self.dual_improvement:
                res = optimize.minimize(
                    partial(self.dual_improvement, param_samples=param_samples, w=w),
                    jac=partial(
                        get_grad_via_torch, fcn_to=partial(self.dual_improvement, param_samples=param_samples, w=w)
                    ),
                    x0=np.array([1.0]),
                    method="SLSQP",
                    bounds=((1e-8, 1e8),),
                )
            else:
                raise pyrado.TypeErr(msg="Received an improper loss function in REPS.minimize()!")

            eta = to.from_numpy(res["x"]).to(to.get_default_dtype())
            self._log_eta = to.log(eta)

        else:
            for _ in tqdm(
                range(self.num_epoch_dual),
                total=self.num_epoch_dual,
                desc=f"Minimizing dual",
                unit="epochs",
                file=sys.stdout,
                leave=False,
            ):
                # Use PyTorch optimizers
                self.optim_dual.zero_grad()
                if loss_fcn == self.dual_evaluation:
                    loss = self.dual_evaluation(self.eta, rets)
                elif loss_fcn == self.dual_improvement:
                    loss = self.dual_improvement(self.eta, param_samples, w)
                else:
                    raise pyrado.TypeErr(msg="Received an improper loss function in REPS.minimize()!")
                loss.backward()
                self.optim_dual.step()

        if to.isnan(self._log_eta):
            raise RuntimeError(f"The dual's optimization parameter _log_eta became NaN!")
Exemple #11
0
    def __init__(self, wrapped_env: Union[SimEnv, EnvWrapper],
                 mask: Union[List[str], Tuple[str]]):
        """
        Constructor

        :param wrapped_env: environment to wrap
        :param mask: every domain parameters which names are in this mask will be transformed. Capitalisation matters.
        """
        if not isinstance(wrapped_env, (SimEnv, EnvWrapper)):
            raise pyrado.TypeErr(given=wrapped_env,
                                 expected_type=(SimEnv, EnvWrapper))
        if not isinstance(mask, (list, tuple)):
            raise pyrado.TypeErr(given=wrapped_env,
                                 expected_type=(list, tuple))

        Serializable._init(self, locals())

        # Call EnvWrapper's constructor
        super().__init__(wrapped_env)

        if any(item not in wrapped_env.supported_domain_param
               for item in mask):
            raise pyrado.ValueErr(
                msg=
                f"The specified mask {mask} contains domain parameters that are not supported by the wrapped "
                f"environment! Here are the supported domain parameters {wrapped_env.supported_domain_param}."
            )
        self._mask = mask
Exemple #12
0
    def __init__(self, env_spec: EnvSpec, condition_fcn: Callable,
                 is_success_condition: bool):
        """
        Constructor

        :usage:
        .. code-block:: python

            task = FinalRewTask(
                       ConditionOnlyTask(<some EnvSpec>, <some Callable>, <True or False>),
                       mode=FinalRewMode(time_dependent=True)
            )

        :param env_spec: environment specification of a simulated or real environment
        :param condition_fcn: function to determine if the task was solved, by default (`None`) this task runs endlessly
        :param is_success_condition: if `True` the `condition_fcn` returns `True` for a success,
                                     if `False` the `condition_fcn` returns `True` for a failure
        """
        if not isinstance(env_spec, EnvSpec):
            raise pyrado.TypeErr(given=env_spec, expected_type=EnvSpec)
        if not callable(condition_fcn):
            raise pyrado.TypeErr(given=condition_fcn, expected_type=Callable)

        self._env_spec = env_spec
        self.condition_fcn = condition_fcn
        self.is_success_condition = is_success_condition
    def adapt(self,
              mean: to.Tensor = None,
              halfspan: Union[to.Tensor, float] = None):
        """
        Adapt the mean and the half interval span of the noise on the action or parameters.
        Use `None` to leave one of the parameters at their current value.

        :param mean: exploration strategy's new mean
        :param halfspan: exploration strategy's new half interval span
        """
        if not (isinstance(mean, to.Tensor) or mean is None):
            raise pyrado.TypeErr(given=mean, expected_type=to.Tensor)
        if not (isinstance(halfspan, to.Tensor) and
                (halfspan >= 0).all() or halfspan is None):
            raise pyrado.TypeErr(
                msg=
                'The halfspan must be a Tensor with all elements > 0 or None!')
        if mean is not None:
            assert self.mean is not None, 'Can not change fixed zero mean!'
            if not mean.shape == self.mean.shape:
                raise pyrado.ShapeErr(given=mean, expected_match=self.mean)
            self.mean.data = mean
        if halfspan is not None:
            if not halfspan.shape == self.log_halfspan.shape:
                raise pyrado.ShapeErr(given=halfspan,
                                      expected_match=self.halfspan)
            self.halfspan = halfspan
Exemple #14
0
    def __init__(self,
                 save_dir: str,
                 max_iter: int,
                 policy: Optional[Policy],
                 logger: Optional[StepLogger] = None,
                 save_name: str = 'algo'):
        """
        Constructor

        :param save_dir: directory to save the snapshots i.e. the results in
        :param max_iter: maximum number of iterations
        :param policy: Pyrado policy (subclass of PyTorch's Module) to train
        :param logger: logger for every step of the algorithm, if `None` the default logger will be created
        :param save_name: name of the algorithm's pickle file without the ending, this becomes important if the
                          algorithm is run as a subroutine
        """
        if not isinstance(max_iter, int) and max_iter > 0:
            raise pyrado.ValueErr(given=max_iter, g_constraint='0')
        if not isinstance(policy, Policy) and policy is not None:
            raise pyrado.TypeErr(
                msg='If a policy is given, it needs to be of type Policy!')
        if not isinstance(logger, StepLogger) and logger is not None:
            raise pyrado.TypeErr(
                msg='If a logger is given, it needs to be of type StepLogger!')
        if not isinstance(save_name, str):
            raise pyrado.TypeErr(given=save_name, expected_type=str)

        self._save_dir = save_dir
        self._save_name = save_name
        self._max_iter = max_iter
        self._curr_iter = 0
        self._policy = policy
        self._logger = logger
        self._cnt_samples = 0
        self._highest_avg_ret = -pyrado.inf  # for snapshot_mode = 'best'
Exemple #15
0
    def __init__(self,
                 num_checkpoints: int,
                 init_checkpoint: int = 0,
                 *args,
                 **kwargs):
        """
        Constructor

        :param num_checkpoints: total number of checkpoints
        :param init_checkpoint: initial value of the cyclic counter, defaults to 0, use negative values can to mark
                                sections that should only be executed once
        :param args: positional arguments forwarded to Algorithm's constructor
        :param kwargs: keyword arguments forwarded to Algorithm's constructor
        """
        if not isinstance(num_checkpoints, int):
            raise pyrado.TypeErr(given=num_checkpoints, expected_type=int)
        if num_checkpoints < 1:
            raise pyrado.ValueErr(given=num_checkpoints, ge_constraint="1")
        if not isinstance(init_checkpoint, int):
            raise pyrado.TypeErr(given=init_checkpoint, expected_type=int)

        self._num_checkpoints = num_checkpoints
        self._curr_checkpoint = init_checkpoint

        # Call Algorithm's constructor
        super().__init__(*args, **kwargs)
Exemple #16
0
    def __init__(self,
                 env: Env,
                 actor: Policy,
                 critic: GAE,
                 save_dir: str,
                 max_iter: int,
                 logger: StepLogger = None):
        """
        Constructor

        :param env: environment which the policy operates
        :param actor: policy taking the actions in the environment
        :param critic: estimates the value of states (e.g. advantage or return)
        :param save_dir: directory to save the snapshots i.e. the results in
        :param max_iter: maximum number of iterations
        :param logger: logger for every step of the algorithm
        """
        if not isinstance(env, Env):
            raise pyrado.TypeErr(given=env, expected_type=Env)
        if not isinstance(critic, GAE):
            raise pyrado.TypeErr(given=critic, expected_type=GAE)

        # Call Algorithm's constructor
        super().__init__(save_dir, max_iter, actor, logger)

        # Store the inputs
        self._env = env
        self._critic = critic

        # Initialize
        self._expl_strat = None
        self.sampler = None
        self._lr_scheduler = None
        self._lr_scheduler_hparam = None
    def __init__(self,
                 save_dir: str,
                 env: Env,
                 policy: Policy,
                 max_iter: int,
                 num_rollouts: int,
                 pop_size: [int, None] = None,
                 num_workers: int = 4,
                 logger: StepLogger = None):
        """
        Constructor

        :param save_dir: directory to save the snapshots i.e. the results in
        :param env: the environment which the policy operates
        :param policy: policy to be updated
        :param max_iter: maximum number of iterations (i.e. policy updates) that this algorithm runs
        :param num_rollouts: number of rollouts per policy parameter set
        :param pop_size: number of solutions in the population, pass `None` to use a default that scales logarithmically
                         with the number of policy parameters
        :param num_workers: number of environments for parallel sampling
        :param logger: logger for every step of the algorithm, if `None` the default logger will be created
        """
        if not isinstance(env, Env):
            raise pyrado.TypeErr(given=env, expected_type=Env)
        if not (isinstance(pop_size, int) or pop_size is None):
            raise pyrado.TypeErr(given=pop_size, expected_type=int)
        if isinstance(pop_size, int) and pop_size <= 0:
            raise pyrado.ValueErr(given=pop_size, g_constraint='0')

        # Call Algorithm's constructor
        super().__init__(save_dir, max_iter, policy, logger)

        # Store the inputs
        self._env = env
        self.num_rollouts = num_rollouts

        # Auto-select population size if needed
        if pop_size is None:
            pop_size = 4 + int(3 * np.log(policy.num_param))
            print_cbt(f'Initialized population size to {pop_size}.', 'y')
        self.pop_size = pop_size

        # Create sampler
        self.sampler = ParameterExplorationSampler(
            env,
            policy,
            num_workers=num_workers,
            num_rollouts_per_param=num_rollouts,
        )

        # Stopping criterion
        self.ret_avg_stack = 1e3 * np.random.randn(20)  # stack size = 20
        self.thold_ret_std = 1e-1  # algorithm terminates if below for multiple iterations

        # Saving the best policy (this is not the mean for policy parameter exploration)
        self.best_policy_param = policy.param_values.clone()

        # Set this in subclasses
        self._expl_strat = None
Exemple #18
0
def cpp_export(
    save_dir: pyrado.PathLike,
    policy: Policy,
    env: Optional[SimEnv] = None,
    policy_export_name: str = "policy_export",
    write_policy_node: bool = True,
    policy_node_name: str = "policy",
):
    """
    Convenience function to export the policy using PyTorch's scripting or tracing, and the experiment's XML
    configuration if the environment from RcsPySim.

    :param save_dir: directory to save in
    :param policy: (trained) policy
    :param env: environment the policy was trained in
    :param policy_export_name: name of the exported policy file without the file type ending
    :param write_policy_node: if `True`, write the PyTorch-based control policy into the experiment's XML configuration.
                              This requires the experiment's XML configuration to be exported beforehand.
    :param policy_node_name: name of the control policies node in the XML file, e.g. 'policy' or 'preStrikePolicy'
    """
    if not osp.isdir(save_dir):
        raise pyrado.PathErr(given=save_dir)
    if not isinstance(policy, Policy):
        raise pyrado.TypeErr(given=policy, expected_type=Policy)
    if not isinstance(policy_export_name, str):
        raise pyrado.TypeErr(given=policy_export_name, expected_type=str)

    # Use torch.jit.trace / torch.jit.script (the latter if recurrent) to generate a torch.jit.ScriptModule
    ts_module = policy.double().script(
    )  # can be evaluated like a regular PyTorch module

    # Serialize the script module to a file and save it in the same directory we loaded the policy from
    policy_export_file = osp.join(save_dir, f"{policy_export_name}.pt")
    ts_module.save(policy_export_file)  # former .zip, and before that .pth
    print_cbt(f"Exported the loaded policy to {policy_export_file}",
              "g",
              bright=True)

    # Export the experiment config for C++
    exp_export_file = osp.join(save_dir, "ex_config_export.xml")
    if env is not None and isinstance(inner_env(env), RcsSim):
        inner_env(env).save_config_xml(exp_export_file)
        print_cbt(f"Exported experiment configuration to {exp_export_file}",
                  "g",
                  bright=True)

    # Open the XML file again to add the policy node
    if write_policy_node and osp.isfile(exp_export_file):
        tree = et.parse(exp_export_file)
        root = tree.getroot()
        policy_node = et.Element(policy_node_name)
        policy_node.set("type", "torch")
        policy_node.set("file", f"{policy_export_name}.pt")
        root.append(policy_node)
        tree.write(exp_export_file)
        print_cbt(
            f"Added {policy_export_name}.pt to the experiment configuration.",
            "g")
Exemple #19
0
def plot_observations_actions_rewards(ro: StepSequence):
    """
    Plot all observation, action, and reward trajectories of the given rollout.

    :param ro: input rollout
    """
    if hasattr(ro, "observations") and hasattr(ro, "actions") and hasattr(
            ro, "env_infos"):
        if not isinstance(ro.observations, np.ndarray):
            raise pyrado.TypeErr(given=ro.observations,
                                 expected_type=np.ndarray)
        if not isinstance(ro.actions, np.ndarray):
            raise pyrado.TypeErr(given=ro.actions, expected_type=np.ndarray)

        dim_obs = ro.observations.shape[1]
        dim_act = ro.actions.shape[1]

        # Use recorded time stamps if possible
        t = getattr(ro, "time", np.arange(0, ro.length + 1))

        num_rows, num_cols = num_rows_cols_from_length(dim_obs + dim_act + 1,
                                                       transposed=True)
        fig, axs = plt.subplots(num_rows,
                                num_cols,
                                figsize=(14, 10),
                                tight_layout=True)
        axs = np.atleast_2d(axs)
        axs = correct_atleast_2d(axs)
        fig.canvas.manager.set_window_title(
            "Observations, Actions, and Reward over Time")
        colors = plt.get_cmap("tab20")(np.linspace(
            0, 1, dim_obs if dim_obs > dim_act else dim_act))

        # Observations (without the last time step)
        for idx_o in range(dim_obs):
            ax = axs[idx_o // num_cols,
                     idx_o % num_cols] if isinstance(axs, np.ndarray) else axs
            ax.plot(t, ro.observations[:, idx_o], c=colors[idx_o])
            ax.set_ylabel(_get_obs_label(ro, idx_o))

        # Actions
        for idx_a in range(dim_obs, dim_obs + dim_act):
            ax = axs[idx_a // num_cols,
                     idx_a % num_cols] if isinstance(axs, np.ndarray) else axs
            ax.plot(t[:len(ro.actions[:, idx_a - dim_obs])],
                    ro.actions[:, idx_a - dim_obs],
                    c=colors[idx_a - dim_obs])
            ax.set_ylabel(_get_act_label(ro, idx_a - dim_obs))
        # action_labels = env.unwrapped.action_space.labels; label=action_labels[0]

        # Rewards
        ax = axs[num_rows - 1,
                 num_cols - 1] if isinstance(axs, np.ndarray) else axs
        ax.plot(t[:len(ro.rewards)], ro.rewards, c="k")
        ax.set_ylabel("reward")
        ax.set_xlabel("time")
        plt.subplots_adjust(hspace=0.5)
Exemple #20
0
    def __init__(self,
                 noise_dim: [int, tuple],
                 std_init: [float, to.Tensor],
                 std_min: [float, to.Tensor] = 0.01,
                 train_mean: bool = False,
                 learnable: bool = True):
        """
        Constructor

        :param noise_dim: number of dimension
        :param std_init: initial standard deviation for the exploration noise
        :param std_min: minimal standard deviation for the exploration noise
        :param train_mean: `True` if the noise should have an adaptive nonzero mean, `False` otherwise
        :param learnable: `True` if the parameters should be tuneable (default), `False` for shallow use (just sampling)
        """
        if not isinstance(std_init, (float, to.Tensor)):
            raise pyrado.TypeErr(given=std_init,
                                 expected_type=(float, to.Tensor))
        if isinstance(std_init,
                      to.Tensor) and not std_init.size() == noise_dim:
            raise pyrado.ShapeErr(given=std_init,
                                  expected_match=to.empty(noise_dim))
        if not (isinstance(std_init, float) and std_init > 0
                or isinstance(std_init, to.Tensor) and all(std_init > 0)):
            raise pyrado.ValueErr(given=std_init, g_constraint='0')
        if not isinstance(std_min, (float, to.Tensor)):
            raise pyrado.TypeErr(given=std_min,
                                 expected_type=(float, to.Tensor))
        if not (isinstance(std_min, float) and std_min > 0
                or isinstance(std_min, to.Tensor) and all(std_min > 0)):
            raise pyrado.ValueErr(given=std_min, g_constraint='0')

        super().__init__()

        # Register parameters
        if learnable:
            self.cov = nn.Parameter(to.Tensor(noise_dim, noise_dim),
                                    requires_grad=True)
            self.mean = nn.Parameter(
                to.Tensor(noise_dim),
                requires_grad=True) if train_mean else None
        else:
            self.cov = to.empty(noise_dim, noise_dim)
            self.mean = None

        # Initialize parameters
        self.cov_init = std_init**2 * to.eye(noise_dim) if isinstance(
            std_init, float) else to.diag(to.pow(std_init, 2))
        self.std_min = to.tensor(std_min) if isinstance(std_min,
                                                        float) else std_min
        if not isinstance(self.cov_init, to.Tensor):
            raise pyrado.TypeErr(given=self.cov_init, expected_type=to.Tensor)
        if not isinstance(self.std_min, to.Tensor):
            raise pyrado.TypeErr(given=self.std_min, expected_type=to.Tensor)

        self.reset_expl_params()
    def __init__(
        self,
        env: Union[SimEnv, EnvWrapper],
        policy: Policy,
        dp_mapping: Mapping[int, str],
        embedding: Embedding,
        num_segments: int = None,
        len_segments: int = None,
        stop_on_done: bool = True,
        rollouts_real: Optional[List[StepSequence]] = None,
        use_rec_act: bool = True,
    ):
        """
        Constructor

        :param env: environment which the policy operates, which must not be a randomized environment since we want to
                    randomize it manually via the domain parameters coming from the sbi package
        :param policy: policy used for sampling the rollout
        :param dp_mapping: mapping from subsequent integers (starting at 0) to domain parameter names (e.g. mass)
        :param embedding: embedding used for pre-processing the data before (later) passing it to the posterior
        :param num_segments: number of segments in which the rollouts are split into. For every segment, the initial
                             state of the simulation is reset, and thus for every set the features of the trajectories
                             are computed separately. Either specify `num_segments` or `len_segments`.
        :param len_segments: length of the segments in which the rollouts are split into. For every segment, the initial
                            state of the simulation is reset, and thus for every set the features of the trajectories
                            are computed separately. Either specify `num_segments` or `len_segments`.
        :param stop_on_done: if `True`, the rollouts are stopped as soon as they hit the state or observation space
                             boundaries. This behavior is save, but can lead to short trajectories which are eventually
                             padded with zeroes. Chose `False` to ignore the boundaries (dangerous on the real system).
        :param rollouts_real: list of rollouts recorded from the target domain, which are used to sync the simulations'
                              initial states
        :param use_rec_act: if `True` the recorded actions form the target domain are used to generate the rollout
                            during simulation (feed-forward). If `False` there policy is used to generate (potentially)
                            state-dependent actions (feed-back).
        """
        if typed_env(env, DomainRandWrapper):
            raise pyrado.TypeErr(
                msg="The environment passed to sbi as simulator must not be wrapped with a subclass of"
                "DomainRandWrapper since sbi has be able to set the domain parameters explicitly!"
            )
        if rollouts_real is not None:
            if not isinstance(rollouts_real, list):
                raise pyrado.TypeErr(given=rollouts_real, expected_type=list)
            if not isinstance(rollouts_real[0], StepSequence):  # only check 1st element
                raise pyrado.TypeErr(given=rollouts_real[0], expected_type=StepSequence)

        Serializable._init(self, locals())

        super().__init__(env, policy, embedding, num_segments, len_segments, stop_on_done)

        self.dp_names = dp_mapping.values()
        self.rollouts_real = rollouts_real
        self.use_rec_act = use_rec_act
        if self.rollouts_real is not None:
            self._set_action_field(self.rollouts_real)
Exemple #22
0
    def add_value(self, key: str, value, round_digits: Optional[int] = None):
        """
        Add a column value to the current step.

        :param key: data key
        :param value: value to record, pass '' to print nothing
        :param round_digits: digits to rounds to, pass `None` (default) for no rounding
        """
        if not isinstance(key, str):
            raise pyrado.TypeErr(given=key, expected_type=str)
        if round_digits is not None and not isinstance(round_digits, int):
            raise pyrado.TypeErr(given=round_digits, expected_type=int)

        # Compute full prefixed key
        key = self._prefix_str + key

        if self._first_step:
            # Record new key during first step
            self._value_keys.append(key)
        elif key not in self._value_keys:
            # Make sure the key was used during first step
            raise pyrado.KeyErr(
                msg=
                "New value keys may only be added before the first step is finished"
            )

        # Pre-process lists
        if isinstance(value, list):
            if len(value) == 1:
                value = value[0]
        # Pre-process PyTorch tensors and numpy arrays (the same way)
        if isinstance(value, to.Tensor):
            value = value.detach().cpu().numpy()
        if isinstance(value, np.ndarray):
            if round_digits is not None:
                value = np.round(value, round_digits)
            if value.ndim == 0 or value.size == 1:  # scalar
                value = value.item()
            else:
                value = value.flatten()
                if value.ndim == 1:  # vector
                    value = value.tolist()
                else:
                    raise pyrado.ShapeErr(
                        msg="Logging 2-dim arrays or tensors is not supported."
                    )
        # Pre-process floats
        elif isinstance(value, float):
            if round_digits is not None:
                value = round(value, round_digits)

        # Record value
        self._current_values[key] = value
        self._values_changed = True
    def __call__(self,
                 err_s: np.ndarray,
                 err_a: np.ndarray,
                 remaining_steps: int = None) -> float:
        if not isinstance(err_s, np.ndarray):
            raise pyrado.TypeErr(given=err_s, expected_type=np.ndarray)
        if not isinstance(err_a, np.ndarray):
            raise pyrado.TypeErr(given=err_a, expected_type=np.ndarray)

        # Reward should be roughly between [0, 1]
        return float(self.factor * (1 - np.abs(err_s[1] / np.pi)**2) +
                     (1 - self.factor) * (np.abs(err_s[0])))
Exemple #24
0
    def __init__(self, idx_sin: int, idx_cos: int):
        """
        Constructor

        :param idx_sin: indices of the numerator, i.e. the sin-transformed observation dimension
        :param idx_cos: indices of the denominator, i.e. the cos-transformed observation dimension
        """
        if not isinstance(idx_sin, int):
            raise pyrado.TypeErr(given=idx_sin, expected_type=int)
        if not isinstance(idx_cos, int):
            raise pyrado.TypeErr(given=idx_cos, expected_type=int)
        self._idx_sin = idx_sin
        self._idx_cos = idx_cos
Exemple #25
0
    def __init__(self, params, param_min: to.Tensor, param_max: to.Tensor):
        # assert all(group['params'].size() == 1 for group in params)  # only for scalar params
        if not isinstance(param_min, to.Tensor):
            raise pyrado.TypeErr(given=param_min, expected_type=to.Tensor)
        if not isinstance(param_max, to.Tensor):
            raise pyrado.TypeErr(given=param_max, expected_type=to.Tensor)
        if not param_min.shape == param_max.shape:
            raise pyrado.ShapeErr(given=param_min, expected_match=param_max)
        if not all(param_min < param_max):
            raise pyrado.ValueErr(given=param_min, l_constraint=param_max)

        defaults = dict(param_min=param_min, param_max=param_max)
        super().__init__(params, defaults)
        self.gr = (math.sqrt(5) + 1) / 2
Exemple #26
0
def skyline(
    dt: Union[int, float, np.ndarray],
    t_end: Union[int, float, np.ndarray],
    t_intvl_space: BoxSpace,
    val_space: BoxSpace,
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Step function that randomly samples a value from the given range, and then holds this value for a time interval
    which is also randomly sampled given a range of time intervals. This procedure is repeated until the sequence is
    long enough, i.e. `dt * t_end` samples.

    :param dt: time step size
    :param t_end: final time
    :param t_intvl_space: 1-dim `BoxSpace` determining the range of time intervals that can be sampled
    :param val_space: 1-dim `BoxSpace` determining the range of values that can be sampled
    :return: array of time steps together with the associated array of values
    """
    if dt <= 0:
        raise pyrado.ValueErr(given=dt, g_constraint="0")
    if t_end < dt:
        raise pyrado.ValueErr(given=t_end, ge_constraint=f"{dt}")
    if not isinstance(t_intvl_space, BoxSpace):
        raise pyrado.TypeErr(given=t_intvl_space, expected_type=BoxSpace)
    if not isinstance(val_space, BoxSpace):
        raise pyrado.TypeErr(given=val_space, expected_type=BoxSpace)
    if not t_intvl_space.flat_dim == 1:
        raise pyrado.ShapeErr(given=t_intvl_space, expected_match=(1, ))
    if not val_space.flat_dim == 1:
        raise pyrado.ShapeErr(given=val_space, expected_match=(1, ))

    dt = np.asarray(dt, dtype=np.float32)
    t_end = np.asarray(t_end, dtype=np.float32)

    # First iter
    t_intvl = t_intvl_space.sample_uniform()
    t_intvl = np.clip(t_intvl, dt, t_end + dt)
    t = np.arange(start=0.0, stop=t_intvl, step=dt)
    vals = val_space.sample_uniform() * np.ones_like(t)

    # Iterate until the time is up
    while t[-1] < t_end:
        t_intvl = t_intvl_space.sample_uniform()
        t_intvl = np.clip(t_intvl, dt, t_end - t[-1] + dt)
        t_new = np.arange(start=t[-1] + dt, stop=t[-1] + t_intvl, step=dt)
        t = np.concatenate([t, t_new])
        val_new = val_space.sample_uniform() * np.ones_like(t_new)
        vals = np.concatenate([vals, val_new])

    return t, vals
Exemple #27
0
    def truncate_rollouts(
        rollouts_real: Sequence[StepSequence],
        rollouts_sim: Sequence[StepSequence],
        replicate: bool = True
    ) -> Tuple[Sequence[StepSequence], Sequence[StepSequence]]:
        """
        In case (some of the) rollouts failed or succeed in one domain, but not in the other, we truncate the longer
        observation sequence. When truncating, we compare every of the M real rollouts to every of the N simulated
        rollouts, thus replicate the real rollout N times and the simulated rollouts M times.

        :param rollouts_real: M real-world rollouts of different length if `replicate = True`, else K real-world
                              rollouts of different length
        :param rollouts_sim: N simulated rollouts of different length if `replicate = True`, else K simulated
                              rollouts of different length
        :param replicate: if `False` the i-th rollout from `rollouts_real` is (only) compared with the i-th rollout from
                          `rollouts_sim`, in this case the number of rollouts and the initial states have to match
        :return: MxN real-world rollouts and MxN simulated rollouts of equal length if `replicate = True`, else
                 K real-world rollouts and K simulated rollouts of equal length
        """
        if not isinstance(rollouts_real[0], Iterable):
            raise pyrado.TypeErr(given=rollouts_real[0],
                                 expected_type=Iterable)
        if not isinstance(rollouts_sim[0], Iterable):
            raise pyrado.TypeErr(given=rollouts_sim[0], expected_type=Iterable)
        if not replicate and len(rollouts_real) != len(rollouts_sim):
            raise pyrado.ShapeErr(
                msg=
                "In case of a one on one comparison, the number of rollouts needs to be equal!"
            )

        # Choose the function for creating the comparison, the rollouts
        comp_fcn = product if replicate else zip

        # Go over all combinations rollouts individually
        rollouts_real_tr = []
        rollouts_sim_tr = []
        for ro_r, ro_s in comp_fcn(rollouts_real, rollouts_sim):
            # Handle rollouts of different length, assuming that they are staring at the same state
            if ro_r.length < ro_s.length:
                rollouts_real_tr.append(ro_r)
                rollouts_sim_tr.append(ro_s[:ro_r.length])
            elif ro_r.length > ro_s.length:
                rollouts_real_tr.append(ro_r[:ro_s.length])
                rollouts_sim_tr.append(ro_s)
            else:
                rollouts_real_tr.append(ro_r)
                rollouts_sim_tr.append(ro_s)

        return rollouts_real_tr, rollouts_sim_tr
Exemple #28
0
def normalize(x: Union[np.ndarray, to.Tensor],
              axis: int = -1,
              order: int = 1,
              eps: float = 1e-8) -> Union[np.ndarray, to.Tensor]:
    """
    Normalize a numpy array or a PyTroch Tensor without changing the input.
    Choosing `axis=1` and `norm_order=1` makes all columns of sum to 1.

    :param x: input to normalize
    :param axis: axis of the array to normalize along
    :param order: order of the norm (e.g., L1 norm: absolute values, L2 norm: quadratic values)
    :param eps: lower bound on the norm, to avoid division by zero
    :return: normalized array
    """
    if isinstance(x, np.ndarray):
        norm_x = np.atleast_1d(np.linalg.norm(
            x, ord=order, axis=axis))  # calculate norm over axis
        norm_x = np.where(norm_x > eps, norm_x,
                          np.ones_like(norm_x))  # avoid division by 0
        return x / np.expand_dims(norm_x, axis)  # element wise division
    elif isinstance(x, to.Tensor):
        norm_x = to.norm(x, p=order, dim=axis)  # calculate norm over axis
        norm_x = to.where(norm_x > eps, norm_x,
                          to.ones_like(norm_x))  # avoid division by 0
        return x / norm_x.unsqueeze(axis)  # element wise division
    else:
        raise pyrado.TypeErr(given=x, expected_type=[np.array, to.Tensor])
Exemple #29
0
    def create_mask(self, *idcs) -> np.ndarray:
        """
        Create a mask selecting the given indices from this space.
        Every index should be a number or a name in the space's labels.

        :param idcs: index list, which can either be varargs or a single iterable
        :return: boolean mask array with `1` at each index specified by the indices or labels
        """
        mask = np.zeros(self.shape, dtype=np.bool_)

        if len(idcs) == 1 and isinstance(idcs[0], Iterable) and not isinstance(idcs[0], str):
            # Unwrap single iterable argument
            idcs = idcs[0]

        # Set selected values to 1
        for idx in idcs:
            if isinstance(idx, str):
                # Handle labels
                if self.labels is None:
                    raise pyrado.TypeErr(msg="The space must be labeled to use label-based indexing!")
                for idx_label, label in np.ndenumerate(self.labels):
                    if label == idx:
                        idx = idx_label
                        break
                else:
                    raise pyrado.ValueErr(msg=f"Label {idx} not found in {self}")

            if np.all(mask[idx] == 1):
                label_desc = f" ({self.labels[idx]})" if self.labels is not None else ""
                raise pyrado.ValueErr(msg=f"Duplicate index {idx}{label_desc}")

            mask[idx] = 1

        return mask
Exemple #30
0
    def scale_to(
            self, data: Union[np.ndarray,
                              to.Tensor]) -> Union[np.ndarray, to.Tensor]:
        r"""
        Transform the input data to be in $[a, b]$, where $a$ and $b$ are defined during construction.

        :param data: unscaled input ndarray or Tensor
        :return: ndarray or Tensor scaled to be in $[a, b]$
        """
        # Convert to the right type if necessary
        bound_lo, bound_up = self._convert_bounds(data)

        if not (bound_lo < bound_up).all():
            raise pyrado.ValueErr(given_name="lower bound",
                                  l_constraint="upper bound")

        if isinstance(data, np.ndarray):
            self._data_min = np.min(data)
            self._data_span = np.max(data) - np.min(data)
        elif isinstance(data, to.Tensor):
            self._data_min = to.min(data)
            self._data_span = to.max(data) - to.min(data)
        else:
            raise pyrado.TypeErr(given=data,
                                 expected_type=[np.ndarray, to.Tensor])

        data_ = (data - self._data_min) / self._data_span
        return data_ * (bound_up - bound_lo) + bound_lo