Beispiel #1
0
    def update_states(self,
                      env_states: StatesEnv = None,
                      model_states: StatesModel = None,
                      **kwargs):
        """
        Update the States variables that do not contain internal data and \
        accumulate the rewards in the internal states if applicable.

        Args:
            env_states: States containing the data associated with the Environment.
            model_states: States containing data associated with the Environment.
            **kwargs: Internal states will be updated via keyword arguments.

        """
        if kwargs:
            if kwargs.get("rewards") is not None:
                self._accumulate_and_update_rewards(kwargs["rewards"])
                del kwargs["rewards"]
            self.states.update(**kwargs)
        if isinstance(env_states, StatesEnv):
            dt = model_states.get("dt",
                                  1.0) if model_states is not None else 1.0
            times = self._env_states.get("times") + dt
            self._env_states.update(env_states)
            self._env_states.update(times=times)
            if hasattr(env_states, "rewards"):
                self._accumulate_and_update_rewards(env_states.rewards)
        if isinstance(model_states, StatesModel):
            self._model_states.update(model_states)
        self.update_ids()
Beispiel #2
0
    def sample(
        self,
        batch_size: int,
        model_states: StatesModel = None,
        env_states: StatesEnv = None,
        walkers_states: StatesWalkers = None,
        **kwargs,
    ) -> StatesModel:
        """
        Calculate the corresponding data to interact with the Environment and \
        store it in model states.

        Args:
            batch_size: Number of new points to the sampled.
            model_states: States corresponding to the environment data.
            env_states: States corresponding to the model data.
            walkers_states: States corresponding to the walkers data.
            kwargs: Passed to the :class:`Critic` if any.

        Returns:
            Tuple containing a tensor with the sampled actions and the new model states variable.

        """
        if model_states is None or walkers_states is None:
            return super(CMAES, self).sample(
                batch_size=batch_size,
                model_states=model_states,
                env_states=env_states,
                walkers_states=walkers_states,
                **kwargs
            )
        actions = (
            env_states.get("observs")
            if self._count_eval > self.pop_size * 2
            else model_states.get("actions")
        )
        fitness = (
            walkers_states.get("virtual_rewards")
            if self.virtual_reward_fitness
            else walkers_states.get("cum_rewards")
        )
        sorted_fitness = numpy.argsort(fitness)[: self.mu_const]
        selected_actions = actions[sorted_fitness].T
        self._update_evolution_paths(selected_actions)
        self._adapt_covariance_matrix(selected_actions)
        self._adapt_sigma()
        self._cov_matrix_diagonalization()

        actions = self._sample_actions()
        return self.update_states_with_critic(
            actions=actions, batch_size=batch_size, model_states=model_states, **kwargs
        )