Ejemplo n.º 1
0
def test_cov(x, data_along_rows):
    rowvar = not data_along_rows
    cov_np = np.cov(x, rowvar=rowvar)
    cov_pyrado = cov(to.from_numpy(x), data_along_rows=data_along_rows).numpy()

    assert cov_pyrado.shape[0] == cov_pyrado.shape[1]
    if data_along_rows:
        assert cov_np.shape[0] == x.shape[1]
        assert cov_pyrado.shape[0] == x.shape[1]
    else:
        assert cov_np.shape[0] == x.shape[0]
        assert cov_pyrado.shape[0] == x.shape[0]
    assert np.allclose(cov_np, cov_pyrado)
Ejemplo n.º 2
0
    def update(self,
               param_results: ParameterSamplingResult,
               ret_avg_curr: float = None):
        # Average the return values over the rollouts
        rets_avg_ros = to.tensor(param_results.mean_returns)

        # Descending sort according to return values and the importance samples a.k.a. elites (see [1, p.12])
        idcs_dcs = to.argsort(rets_avg_ros, descending=True)
        idcs_dcs = idcs_dcs[:self.num_is_samples]
        rets_avg_is = rets_avg_ros[idcs_dcs]
        params_is = param_results.parameters[idcs_dcs, :]

        # Update the policy parameters from the mean importance samples
        self._policy.param_values = to.mean(params_is, dim=0)

        # Update the exploration covariance from the empirical variance of the importance samples
        if isinstance(self._expl_strat.noise, DiagNormalNoise):
            std_is = to.std(params_is, dim=0)
            extra_expl_std = self.extra_expl_std_init * max(
                1. - self._curr_iter / self.extra_expl_decay_iter,
                0  # see [2, p.4]
            )
            self._expl_strat.noise.adapt(std=std_is + extra_expl_std)
        elif isinstance(self._expl_strat.noise, FullNormalNoise):
            cov_is = cov(params_is, data_along_rows=True)
            extra_expl_cov = to.pow(self.extra_expl_std_init, 2) * max(
                1. - self._curr_iter / self.extra_expl_decay_iter,
                0  # see [2, p.4]
            )
            self._expl_strat.noise.adapt(cov=cov_is + extra_expl_cov)
        else:
            raise NotImplementedError  # CEM could also sample using different distributions

        print(self._policy.param_values)

        # Logging
        self.logger.add_value('median imp samp return', to.median(rets_avg_is))
        self.logger.add_value('min imp samp return', to.min(rets_avg_is))
        self.logger.add_value('min expl strat std',
                              to.min(self._expl_strat.std))
        self.logger.add_value(
            'avg expl strat std',
            to.mean(self._expl_strat.std.data).detach().numpy())
        self.logger.add_value('max expl strat std',
                              to.max(self._expl_strat.std))
        self.logger.add_value('expl strat entropy',
                              self._expl_strat.get_entropy().item())
Ejemplo n.º 3
0
    def update(self,
               param_results: ParameterSamplingResult,
               ret_avg_curr: float = None):
        # Average the return values over the rollouts
        rets_avg_ros = to.from_numpy(param_results.mean_returns).to(
            to.get_default_dtype())

        # Descending sort according to return values and the importance samples a.k.a. elites (see [1, p.12])
        idcs_dcs = to.argsort(rets_avg_ros, descending=True)
        idcs_dcs = idcs_dcs[:self.num_is_samples]
        rets_avg_is = rets_avg_ros[idcs_dcs]
        params_is = param_results.parameters[idcs_dcs, :]

        # Update the policy parameters from the mean importance samples
        self._policy.param_values = to.mean(params_is, dim=0)

        # Update the exploration covariance from the empirical variance of the importance samples
        if isinstance(self._expl_strat.noise, DiagNormalNoise):
            std_is = to.std(params_is, dim=0)
            extra_expl_std = self.extra_expl_std_init * max(
                1.0 - self._curr_iter / self.extra_expl_decay_iter,
                0  # see [2, p.4]
            )
            self._expl_strat.noise.adapt(std=std_is + extra_expl_std)
        elif isinstance(self._expl_strat.noise, FullNormalNoise):
            cov_is = cov(params_is, data_along_rows=True)
            extra_expl_cov = to.pow(self.extra_expl_std_init, 2) * max(
                1.0 - self._curr_iter / self.extra_expl_decay_iter,
                0  # see [2, p.4]
            )
            self._expl_strat.noise.adapt(cov=cov_is + extra_expl_cov)

        # Logging
        self.logger.add_value("median imp samp return", to.median(rets_avg_is),
                              4)
        self.logger.add_value("min imp samp return", to.min(rets_avg_is), 4)
        self.logger.add_value("min expl strat std",
                              to.min(self._expl_strat.std), 4)
        self.logger.add_value("avg expl strat std",
                              to.mean(self._expl_strat.std), 4)
        self.logger.add_value("max expl strat std",
                              to.max(self._expl_strat.std), 4)
        self.logger.add_value("expl strat entropy",
                              self._expl_strat.get_entropy(), 4)