Esempio n. 1
0
    def update_states_with_critic(
        self, actions: numpy.ndarray, batch_size: int, model_states: StatesModel, **kwargs
    ) -> StatesModel:
        """
        Compute the time steps generated by the critic and add them to \
        `model_states`. If there is no Critic the default value of dt will be a \
        vector of 1.

        Args:
            actions: Numpy array representing the actions calculated by the model.
            batch_size: Same batch size used when calling `sample`.
            model_states: Same model_states used when calling `sample`.
            **kwargs: Kwargs for `critic.calculate`.

        Returns:
            model_states updated with the actions and the dt calculated by the Critic.

        """
        if self.critic is not None:
            critic_states = self.critic.calculate(
                batch_size=batch_size, model_states=model_states, **kwargs
            )

            dt = (
                critic_states.critic_score.astype(int)
                if isinstance(critic_states.critic_score, numpy.ndarray)
                else critic_states.critic_score
            )
            model_states.update(actions=actions, other=critic_states, dt=dt)
        else:
            dt = numpy.ones(batch_size, dtype=int)
            model_states.update(actions=actions, critic_score=dt, dt=dt)
        return model_states
Esempio n. 2
0
    def reset(
        self,
        walkers_states: StatesWalkers = None,
        model_states: StatesModel = None,
        env_states: StatesEnv = None,
    ):
        """
        Reset the :class:`fragile.Walkers`, the :class:`Environment`, the \
        :class:`Model` and clear the internal data to start a new search process.

        Args:
            model_states: :class:`StatesModel` that define the initial state of \
                          the :class:`Model`.
            env_states: :class:`StatesEnv` that define the initial state of \
                        the :class:`Environment`.
            walkers_states: :class:`StatesWalkers` that define the internal \
                            states of the :class:`Walkers`.
        """
        env_sates = self.env.reset(
            batch_size=self.walkers.n) if env_states is None else env_states
        model_states = (self.model.reset(batch_size=self.walkers.n,
                                         env_states=env_states)
                        if model_states is None else model_states)
        model_states.update(init_actions=model_states.actions)
        self.walkers.reset(env_states=env_sates, model_states=model_states)
        if self._use_tree:
            root_ids = numpy.array([self.tree.ROOT_HASH] * self.walkers.n)
            self.walkers.states.id_walkers = root_ids
            self.tree.reset(
                env_states=self.walkers.env_states,
                model_states=self.walkers.model_states,
                walkers_states=walkers_states,
            )
            self.update_tree(root_ids.tolist())
Esempio n. 3
0
    def update_states_with_critic(
        self, actions: numpy.ndarray, batch_size: int, model_states: StatesModel, **kwargs
    ) -> StatesModel:
        """
        Compute the time steps generated by the critic and add them to \
        `model_states`. If there is no Critic the default value of dt will be a \
        vector of 1.

        Args:
            actions: Numpy array representing the actions calculated by the model.
            batch_size: Same batch size used when calling `sample`.
            model_states: Same model_states used when calling `sample`.
            **kwargs: Kwargs for `critic.calculate`.

        Returns:
            model_states updated with the actions and the dt calculated by the Critic.

        """
        if self.critic is None:
            model_states.update(actions=actions)
        else:
            critic_state = self.critic.calculate(
                batch_size=batch_size, model_states=model_states, **kwargs
            )
            model_states.update(other=critic_state, actions=actions)
        return model_states
Esempio n. 4
0
    def predict(self, root_env_states: StatesEnv, walkers: StepWalkers,) -> StatesModel:
        """
        Select the ``init_action`` and ``init_dt`` of the best walker found \
        during the internal swarm run.

        Args:
            root_env_states: :env-st:`StatesEnv` class containing the data \
                            corresponding to the root walker of a :class:`StepSwarm`.
            walkers: :walkers:`StepWalkers` used by the internal swarm of a \
                     :class:`StepSwarm`.

        Returns:
            :class:`StatesModel` containing the ``actions`` and ``dt`` that the root walkers
            will use to step the :env:`Environment`.

        """
        init_actions = walkers.states.init_actions.flatten().astype(int)
        best_ix = walkers.get_best_index()
        root_model_states = StatesModel(
            batch_size=1, state_dict={"actions": {"dtype": int}, "dt": {"dtype": int}}
        )
        root_model_states.actions[:] = init_actions[best_ix]
        if hasattr(root_model_states, "dt"):
            target_dt = walkers.states.init_dt.flatten().astype(int)[best_ix]
            root_model_states.dt[:] = target_dt
        return root_model_states
Esempio n. 5
0
    def predict(self, root_env_states: StatesEnv, walkers: StepWalkers,) -> StatesModel:
        """
        Select the most frequent ``init_action`` assigned to the internal swarm's walkers.

        The selected ``dt`` will be equal to the minimum ``init_dts`` among all \
        the walkers that sampled the selected ``init_action``.

        Args:
            root_env_states: :env-st:`StatesEnv` class containing the data \
                            corresponding to the root walker of a :class:`StepSwarm`.
            walkers: :walkers:`StepWalkers` used by the internal warm of a \
                     :class:`StepSwarm`.

        Returns:
            :class:`StatesModel` containing the ``actions`` and ``dt`` that the root walkers
            will use to step the :env:`Environment`.

        """
        init_actions = walkers.states.init_actions.flatten().astype(int)
        y = numpy.bincount(init_actions)
        most_used_action = numpy.nonzero(y)[0][0]
        root_model_states = StatesModel(
            batch_size=1, state_dict={"actions": {"dtype": int}, "dt": {"dtype": int}}
        )
        root_model_states.actions[:] = most_used_action
        if hasattr(root_model_states, "dt"):
            init_dts = walkers.states.init_dts.flatten().astype(int)
            index_dt = init_actions == most_used_action
            target_dt = init_dts[index_dt].min()
            root_model_states.dt[:] = target_dt
        return root_model_states
Esempio n. 6
0
    def reset(self,
              batch_size: int = 1,
              model_states: StatesModel = None,
              env_states: StatesEnv = None,
              *args,
              **kwargs) -> StatesModel:
        """
        Return a new blank State for a `DiscreteUniform` instance, and a valid \
        prediction based on that new state.

        Args:
            batch_size: Number of walkers that the new model `State`.
            model_states: :class:`StatesModel` corresponding to the model data.
            env_states: :class:`StatesEnv` containing the environment data.
            *args: Passed to `predict`.
            **kwargs: Passed to `predict`.

        Returns:
            New model states containing sampled data.

        """
        self.pop_size = batch_size
        self._count_eval = 0
        self._init_algorithm_params(batch_size)
        # Take the first sample from a random uniform distribution
        if batch_size is None and env_states is None:
            raise ValueError("env_states and batch_size cannot be both None.")
        batch_size = batch_size or env_states.n
        model_states = model_states or self.create_new_states(
            batch_size=batch_size)
        init_actions = self.random_state.randn(self.mu_const)
        self.x_mean = numpy.matmul(init_actions.T, self.weights_const)
        actions = self._sample_actions()
        model_states.update(actions=actions)
        return model_states
Esempio n. 7
0
    def reset(
        self,
        root_walker: OneWalker = None,
        walkers_states: StatesWalkers = None,
        model_states: StatesModel = None,
        env_states: StatesEnv = None,
    ):
        """
        Reset the :class:`fragile.Walkers`, the :class:`Environment`, the \
        :class:`Model` and clear the internal data to start a new search process.

        Args:
            root_walker: Walker representing the initial state of the search. \
                         The walkers will be reset to this walker, and it will \
                         be added to the root of the :class:`StateTree` if any.
            model_states: :class:`StatesModel` that define the initial state of \
                          the :class:`Model`.
            env_states: :class:`StatesEnv` that define the initial state of \
                        the :class:`Environment`.
            walkers_states: :class:`StatesWalkers` that define the internal \
                            states of the :class:`Walkers`.

        """
        self._epoch = 0
        env_states = (
            self.env.reset(batch_size=self.walkers.n) if env_states is None else env_states
        )
        # Add corresponding root_walkers data to env_states
        if root_walker is not None:
            if not isinstance(root_walker, OneWalker):
                raise ValueError(
                    "Root walker needs to be an "
                    "instance of OneWalker, got %s instead." % type(root_walker)
                )
            env_states = self._update_env_with_root(root_walker=root_walker, env_states=env_states)

        model_states = (
            self.model.reset(batch_size=self.walkers.n, env_states=env_states)
            if model_states is None
            else model_states
        )
        model_states.update(init_actions=model_states.actions)
        self.walkers.reset(env_states=env_states, model_states=model_states)
        if self._use_tree:
            if root_walker is not None:
                self.tree.reset(root_hash=int(root_walker.id_walkers))
            root_ids = numpy.array([self.tree.root_hash] * self.walkers.n)
            self.tree.reset(
                root_hash=int(self.tree.root_hash),
                env_states=self.walkers.env_states,
                model_states=self.walkers.model_states,
                walkers_states=walkers_states,
            )
            ids: List[int] = root_ids.tolist()
            self.update_tree(states_ids=ids)
Esempio n. 8
0
    async def reset(
        self,
        root_walker: OneWalker = None,
        walkers_states: StatesWalkers = None,
        model_states: StatesModel = None,
        env_states: StatesEnv = None,
    ):
        """
        Reset the :class:`fragile.Walkers`, the :class:`Environment`, the \
        :class:`Model` and clear the internal data to start a new search process.

        Args:
            root_walker: Walker representing the initial state of the search. \
                         The walkers will be reset to this walker, and it will \
                         be added to the root of the :class:`StateTree` if any.
            model_states: :class:`StatesModel` that define the initial state of \
                          the :class:`Model`.
            env_states: :class:`StatesEnv` that define the initial state of \
                        the :class:`Environment`.
            walkers_states: :class:`StatesWalkers` that define the internal \
                            states of the :class:`Walkers`.

        """
        self._epoch = 0
        n_walkers = self.walkers.get("n_walkers")
        reset_id = (self.env.reset.remote(
            batch_size=n_walkers) if env_states is None else env_states)
        env_states = await reset_id
        # Add corresponding root_walkers data to env_states
        if root_walker is not None:
            if not isinstance(root_walker, OneWalker):
                raise ValueError("Root walker needs to be an "
                                 "instance of OneWalker, got %s instead." %
                                 type(root_walker))
            env_states = self._update_env_with_root(root_walker=root_walker,
                                                    env_states=env_states)

        model_states = (self.model.reset(batch_size=n_walkers,
                                         env_states=env_states)
                        if model_states is None else model_states)
        model_states.update(init_actions=model_states.actions)
        self.walkers.reset(env_states=env_states, model_states=model_states)
        if self.tree is not None:
            id_walkers = self.walkers.get("id_walkers")
            root_id = id_walkers[0] if root_walker is None else copy.copy(
                root_walker.id_walkers)
            self.tree.reset(
                root_id=root_id,
                env_states=self.walkers.env_states,
                model_states=self.walkers.model_states,
                walkers_states=self.walkers.states,
            )
Esempio n. 9
0
 def _classic_control_env():
     env = classic_control_env()
     params = {
         "actions": {
             "dtype": dtype.int64
         },
         "dt": {
             "dtype": dtype.float32
         }
     }
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     states.update(actions=judo.ones(N_WALKERS), dt=judo.ones(N_WALKERS))
     return env, states
Esempio n. 10
0
 def _parallel_environment():
     env = parallel_environment()
     params = {
         "actions": {
             "dtype": numpy.int64
         },
         "critic": {
             "dtype": numpy.float32
         }
     }
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     states.update(actions=numpy.ones(N_WALKERS),
                   critic=numpy.ones(N_WALKERS))
     return env, states
Esempio n. 11
0
 def _atari_env():
     env = discrete_atari_env()
     params = {
         "actions": {
             "dtype": dtype.int64
         },
         "critic": {
             "dtype": dtype.float32
         }
     }
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     states.update(actions=judo.ones(N_WALKERS),
                   critic=judo.ones(N_WALKERS))
     return env, states
Esempio n. 12
0
    def step(self, model_states: StatesModel,
             env_states: StatesEnv) -> StatesEnv:
        """
        Set the environment to the target states by applying the specified \
        actions an arbitrary number of time steps.

        The state transitions will be calculated in parallel.

        Args:
            model_states: :class:`StatesModel` representing the data to be used \
                         to act on the environment.
            env_states: :class:`StatesEnv` representing the data to be set in \
                        the environment.

        Returns:
            :class:`StatesEnv` containing the information that describes the \
            new state of the Environment.

        """
        split_env_states = [
            env.step.remote(model_states=ms, env_states=es)
            for env, ms, es in zip(
                self.envs,
                model_states.split_states(self.n_workers),
                env_states.split_states(self.n_workers),
            )
        ]
        env_states = ray.get(split_env_states)
        new_env_states: StatesEnv = StatesEnv.merge_states(env_states)
        return new_env_states
Esempio n. 13
0
 def test_minimizer_step(self):
     minim = local_minimizer()
     params = {"actions": {"dtype": numpy.float64, "size": (2,)}}
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     assert minim.shape == minim.shape
     states = minim.step(model_states=states, env_states=minim.reset(N_WALKERS))
     assert numpy.allclose(states.rewards.min(), 0)
Esempio n. 14
0
    def update_states(self,
                      env_states: StatesEnv = None,
                      model_states: StatesModel = None,
                      **kwargs):
        """
        Update the States variables that do not contain internal data and \
        accumulate the rewards in the internal states if applicable.

        Args:
            env_states: States containing the data associated with the Environment.
            model_states: States containing data associated with the Environment.
            **kwargs: Internal states will be updated via keyword arguments.

        """
        if kwargs:
            if kwargs.get("rewards") is not None:
                self._accumulate_and_update_rewards(kwargs["rewards"])
                del kwargs["rewards"]
            self.states.update(**kwargs)
        if model_states is not None and "dt" in model_states.keys():
            times = self.model_states.get("dt") + self.states.get("times")
            self.states.update(times=times)
        if isinstance(env_states, StatesEnv):
            self._env_states.update(env_states)
            if hasattr(env_states, "rewards"):
                self._accumulate_and_update_rewards(env_states.rewards)
        if isinstance(model_states, StatesModel):
            self._model_states.update(model_states)
        self.update_ids()
Esempio n. 15
0
 def test_step(self, dummy_env):
     states = dummy_env.reset()
     actions = StatesModel(actions=numpy.ones((1, 2)) * 2,
                           batch_size=1,
                           dt=numpy.ones((1, 2)))
     new_states: StatesEnv = dummy_env.step(actions, states)
     assert isinstance(new_states, StatesEnv)
     assert new_states.rewards[0].item() == 1
Esempio n. 16
0
 def _make_transitions(self, model_states: StatesModel,
                       env_states: StatesEnv) -> List[StatesEnv]:
     n_chunks = len(self._envs)
     results = [
         env.step(self._blocking, env_states=es, model_states=ms)
         for env, es, ms in zip(self._envs, env_states.split_states(
             n_chunks), model_states.split_states(n_chunks))
     ]
     states = [result if self._blocking else result() for result in results]
     return states
Esempio n. 17
0
 def test_step(self, function_env, batch_size):
     states = function_env.reset(batch_size=batch_size)
     actions = StatesModel(
         actions=judo.zeros(states.observs.shape),
         batch_size=batch_size,
         dt=judo.ones((1, 2)),
     )
     new_states: StatesEnv = function_env.step(actions, states)
     assert isinstance(new_states, StatesEnv)
     assert new_states.oobs[0].item() == 0
Esempio n. 18
0
    def sample(
        self,
        batch_size: int,
        model_states: StatesModel = None,
        env_states: StatesEnv = None,
        walkers_states: StatesWalkers = None,
        **kwargs,
    ) -> StatesModel:
        """
        Calculate the corresponding data to interact with the Environment and \
        store it in model states.

        Args:
            batch_size: Number of new points to the sampled.
            model_states: States corresponding to the environment data.
            env_states: States corresponding to the model data.
            walkers_states: States corresponding to the walkers data.
            kwargs: Passed to the :class:`Critic` if any.

        Returns:
            Tuple containing a tensor with the sampled actions and the new model states variable.

        """
        if model_states is None or walkers_states is None:
            return super(CMAES, self).sample(
                batch_size=batch_size,
                model_states=model_states,
                env_states=env_states,
                walkers_states=walkers_states,
                **kwargs
            )
        actions = (
            env_states.get("observs")
            if self._count_eval > self.pop_size * 2
            else model_states.get("actions")
        )
        fitness = (
            walkers_states.get("virtual_rewards")
            if self.virtual_reward_fitness
            else walkers_states.get("cum_rewards")
        )
        sorted_fitness = numpy.argsort(fitness)[: self.mu_const]
        selected_actions = actions[sorted_fitness].T
        self._update_evolution_paths(selected_actions)
        self._adapt_covariance_matrix(selected_actions)
        self._adapt_sigma()
        self._cov_matrix_diagonalization()

        actions = self._sample_actions()
        return self.update_states_with_critic(
            actions=actions, batch_size=batch_size, model_states=model_states, **kwargs
        )
Esempio n. 19
0
 def _ray_function():
     init_ray()
     env = ray_function()
     params = {
         "actions": {
             "dtype": numpy.int64
         },
         "critic": {
             "dtype": numpy.float32
         }
     }
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     return env, states
Esempio n. 20
0
 def _parallel_function():
     env = parallel_function()
     params = {
         "actions": {
             "dtype": numpy.float32,
             "size": (2, )
         },
         "critic": {
             "dtype": numpy.float32
         },
     }
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     return env, states
Esempio n. 21
0
    def __init__(self,
                 n_walkers: int,
                 env_state_params: StateDict,
                 model_state_params: StateDict,
                 reward_scale: float = 1.0,
                 distance_scale: float = 1.0,
                 accumulate_rewards: bool = True,
                 max_epochs: int = None,
                 distance_function: Optional[Callable[
                     [numpy.ndarray, numpy.ndarray], numpy.ndarray]] = None,
                 ignore_clone: Optional[Dict[str, Set[str]]] = None,
                 **kwargs):
        """
        Initialize a new `Walkers` instance.

        Args:
            n_walkers: Number of walkers of the instance.
            env_state_params: Dictionary to instantiate the States of an :class:`Environment`.
            model_state_params: Dictionary to instantiate the States of a :class:`Model`.
            reward_scale: Regulates the importance of the reward. Recommended to \
                          keep in the [0, 5] range. Higher values correspond to \
                          higher importance.
            distance_scale: Regulates the importance of the distance. Recommended to \
                            keep in the [0, 5] range. Higher values correspond to \
                            higher importance.
            accumulate_rewards: If ``True`` the rewards obtained after transitioning \
                                to a new state will accumulate. If ``False`` only the last \
                                reward will be taken into account.
            distance_function: Function to compute the distances between two \
                               groups of walkers. It will be applied row-wise \
                               to the walkers observations and it will return a \
                               vector of scalars. Defaults to l2 norm.
            ignore_clone: Dictionary containing the attribute values that will \
                          not be cloned. Its keys can be be either "env", of \
                          "model", to reference the `env_states` and the \
                          `model_states`. Its values are a set of strings with \
                          the names of the attributes that will not be cloned.
            max_epochs: Maximum number of iterations that the walkers are allowed \
                       to perform.
            kwargs: Additional attributes stored in the :class:`StatesWalkers`.

        """
        super(SimpleWalkers, self).__init__(
            n_walkers=n_walkers,
            env_state_params=env_state_params,
            model_state_params=model_state_params,
            accumulate_rewards=accumulate_rewards,
            max_epochs=max_epochs,
        )

        def l2_norm(x: numpy.ndarray, y: numpy.ndarray) -> numpy.ndarray:
            return numpy.linalg.norm(x - y, axis=1)

        self._model_states = StatesModel(state_dict=model_state_params,
                                         batch_size=n_walkers)
        self._env_states = StatesEnv(state_dict=env_state_params,
                                     batch_size=n_walkers)
        self._states = self.STATE_CLASS(batch_size=n_walkers, **kwargs)
        self.distance_function = distance_function if distance_function is not None else l2_norm
        self.reward_scale = reward_scale
        self.distance_scale = distance_scale
        self._id_counter = 0
        self.ignore_clone = ignore_clone if ignore_clone is not None else {}
Esempio n. 22
0
class SimpleWalkers(BaseWalkers):
    """
    This class is in charge of performing all the mathematical operations involved in evolving a \
    cloud of walkers.

    """

    STATE_CLASS = StatesWalkers

    def __init__(self,
                 n_walkers: int,
                 env_state_params: StateDict,
                 model_state_params: StateDict,
                 reward_scale: float = 1.0,
                 distance_scale: float = 1.0,
                 accumulate_rewards: bool = True,
                 max_epochs: int = None,
                 distance_function: Optional[Callable[
                     [numpy.ndarray, numpy.ndarray], numpy.ndarray]] = None,
                 ignore_clone: Optional[Dict[str, Set[str]]] = None,
                 **kwargs):
        """
        Initialize a new `Walkers` instance.

        Args:
            n_walkers: Number of walkers of the instance.
            env_state_params: Dictionary to instantiate the States of an :class:`Environment`.
            model_state_params: Dictionary to instantiate the States of a :class:`Model`.
            reward_scale: Regulates the importance of the reward. Recommended to \
                          keep in the [0, 5] range. Higher values correspond to \
                          higher importance.
            distance_scale: Regulates the importance of the distance. Recommended to \
                            keep in the [0, 5] range. Higher values correspond to \
                            higher importance.
            accumulate_rewards: If ``True`` the rewards obtained after transitioning \
                                to a new state will accumulate. If ``False`` only the last \
                                reward will be taken into account.
            distance_function: Function to compute the distances between two \
                               groups of walkers. It will be applied row-wise \
                               to the walkers observations and it will return a \
                               vector of scalars. Defaults to l2 norm.
            ignore_clone: Dictionary containing the attribute values that will \
                          not be cloned. Its keys can be be either "env", of \
                          "model", to reference the `env_states` and the \
                          `model_states`. Its values are a set of strings with \
                          the names of the attributes that will not be cloned.
            max_epochs: Maximum number of iterations that the walkers are allowed \
                       to perform.
            kwargs: Additional attributes stored in the :class:`StatesWalkers`.

        """
        super(SimpleWalkers, self).__init__(
            n_walkers=n_walkers,
            env_state_params=env_state_params,
            model_state_params=model_state_params,
            accumulate_rewards=accumulate_rewards,
            max_epochs=max_epochs,
        )

        def l2_norm(x: numpy.ndarray, y: numpy.ndarray) -> numpy.ndarray:
            return numpy.linalg.norm(x - y, axis=1)

        self._model_states = StatesModel(state_dict=model_state_params,
                                         batch_size=n_walkers)
        self._env_states = StatesEnv(state_dict=env_state_params,
                                     batch_size=n_walkers)
        self._states = self.STATE_CLASS(batch_size=n_walkers, **kwargs)
        self.distance_function = distance_function if distance_function is not None else l2_norm
        self.reward_scale = reward_scale
        self.distance_scale = distance_scale
        self._id_counter = 0
        self.ignore_clone = ignore_clone if ignore_clone is not None else {}

    def __repr__(self) -> str:
        """Print all the data involved in the current run of the algorithm."""
        with numpy.printoptions(linewidth=100, threshold=200, edgeitems=9):
            try:
                text = self._print_stats()
                text += "Walkers States: {}\n".format(
                    self._repr_state(self._states))
                text += "Environment States: {}\n".format(
                    self._repr_state(self._env_states))
                text += "Model States: {}\n".format(
                    self._repr_state(self._model_states))
                return text
            except Exception:
                return super(SimpleWalkers, self).__repr__()

    def _print_stats(self) -> str:
        """Print several statistics of the current state of the swarm."""
        text = "{} iteration {} Out of bounds walkers: {:.2f}% Cloned: {:.2f}%\n\n".format(
            self.__class__.__name__,
            self.epoch,
            100 * self.env_states.oobs.sum() / self.n,
            100 * self.states.will_clone.sum() / self.n,
        )
        return text

    def get(self, name: str, default: Any = None) -> Any:
        """Access attributes of the :class:`Swarm` and its children."""
        if hasattr(self.states, name):
            return getattr(self.states, name)
        elif hasattr(self.env_states, name):
            return getattr(self.env_states, name)
        elif hasattr(self.model_states, name):
            return getattr(self.model_states, name)
        elif hasattr(self, name):
            return getattr(self, name)
        return default

    def ids(self) -> List[int]:
        """
        Return a list of unique ids for each walker state.

        The returned ids are integers representing the hash of the different states.
        """
        return self.env_states.hash_values("states")

    def update_ids(self):
        """Update the unique id of each walker and store it in the :class:`StatesWalkers`."""
        self.states.update(id_walkers=self.ids().copy())

    @property
    def states(self) -> StatesWalkers:
        """Return the `StatesWalkers` class that contains the data used by the instance."""
        return self._states

    @property
    def env_states(self) -> StatesEnv:
        """Return the `States` class that contains the data used by the :class:`Environment`."""
        return self._env_states

    @property
    def model_states(self) -> StatesModel:
        """Return the `States` class that contains the data used by a Model."""
        return self._model_states

    @property
    def best_state(self) -> numpy.ndarray:
        """Return the state of the best walker found in the current algorithm run."""
        return self.states.best_state

    @property
    def best_reward(self) -> Scalar:
        """Return the reward of the best walker found in the current algorithm run."""
        return self.states.best_reward

    @property
    def best_id(self) -> int:
        """
        Return the id (hash value of the state) of the best walker found in the \
        current algorithm run.
        """
        return self.states.best_id

    @property
    def best_obs(self) -> numpy.ndarray:
        """
        Return the observation corresponding to the best walker found in the \
        current algorithm run.
        """
        return self.states.best_obs

    def calculate_end_condition(self) -> bool:
        """
        Process data from the current state to decide if the iteration process should stop.

        Returns:
            Boolean indicating if the iteration process should be finished. ``True`` means \
            it should be stopped, and ``False`` means it should continue.

        """
        non_terminal_states = numpy.logical_not(self.env_states.terminals)
        all_non_terminal_out_of_bounds = self.env_states.oobs[
            non_terminal_states].all()
        max_epochs_reached = self.epoch >= self.max_epochs
        all_in_bounds_are_terminal = self.env_states.terminals[
            self.states.in_bounds].all()
        return max_epochs_reached or all_non_terminal_out_of_bounds or all_in_bounds_are_terminal

    def calculate_distances(self) -> None:
        """Calculate the corresponding distance function for each observation with \
        respect to another observation chosen at random.

        The internal :class:`StateWalkers` is updated with the relativized distance values.
        """
        # TODO(guillemdb): Check if self.get_in_bounds_compas() works better.
        compas_ix = numpy.random.permutation(numpy.arange(self.n))
        obs = self.env_states.observs.reshape(self.n, -1)
        distances = self.distance_function(obs, obs[compas_ix])
        distances = relativize(distances.flatten())
        self.update_states(distances=distances, compas_dist=compas_ix)

    def calculate_virtual_reward(self) -> None:
        """
        Calculate the virtual reward and update the internal state.

        The cumulative_reward is transformed with the relativize function. \
        The distances stored in the :class:`StatesWalkers` are already transformed.
        """
        processed_rewards = relativize(self.states.cum_rewards)
        virt_rw = (processed_rewards**self.reward_scale *
                   self.states.distances**self.distance_scale)
        self.update_states(virtual_rewards=virt_rw,
                           processed_rewards=processed_rewards)

    def get_in_bounds_compas(self) -> numpy.ndarray:
        """
        Return the indexes of walkers inside bounds chosen at random.

        Returns:
            Numpy array containing the int indexes of in bounds walkers chosen at \
            random with replacement. Its length is equal to the number of walkers.

        """
        if not self.states.in_bounds.any(
        ):  # No need to sample if all walkers are dead.
            return numpy.arange(self.n)
        alive_indexes = numpy.arange(self.n, dtype=int)[self.states.in_bounds]
        compas_ix = self.random_state.permutation(alive_indexes)
        compas = self.random_state.choice(compas_ix, self.n, replace=True)
        compas[:len(compas_ix)] = compas_ix
        return compas

    def update_clone_probs(self) -> None:
        """
        Calculate the new probability of cloning for each walker.

        Updates the :class:`StatesWalkers` with both the probability of cloning \
        and the index of the randomly chosen companions that were selected to \
        compare the virtual rewards.
        """
        all_virtual_rewards_are_equal = (self.states.virtual_rewards ==
                                         self.states.virtual_rewards[0]).all()
        if all_virtual_rewards_are_equal:
            clone_probs = numpy.zeros(self.n, dtype=float_type)
            compas_ix = numpy.arange(self.n)
        else:
            compas_ix = self.get_in_bounds_compas()
            companions = self.states.virtual_rewards[compas_ix]
            # This value can be negative!!
            clone_probs = (companions - self.states.virtual_rewards
                           ) / self.states.virtual_rewards
        self.update_states(clone_probs=clone_probs, compas_clone=compas_ix)

    def balance(self) -> Tuple[set, set]:
        """
        Perform an iteration of the FractalAI algorithm for balancing the \
        walkers distribution.

        It performs the necessary calculations to determine which walkers will clone, \
        and performs the cloning process.

        Returns:
            A tuple containing two sets: The first one represent the unique ids \
            of the states for each walker at the start of the iteration. The second \
            one contains the ids of the states after the cloning process.

        """
        old_ids = set(self.states.id_walkers.copy())
        self.states.in_bounds = numpy.logical_not(self.env_states.oobs)
        self.calculate_distances()
        self.calculate_virtual_reward()
        self.update_clone_probs()
        self.clone_walkers()
        new_ids = set(self.states.id_walkers.copy())
        return old_ids, new_ids

    def clone_walkers(self) -> None:
        """
        Sample the clone probability distribution and clone the walkers accordingly.

        This function will update the internal :class:`StatesWalkers`, \
        :class:`StatesEnv`, and :class:`StatesModel`.
        """
        will_clone = self.states.clone_probs > self.random_state.random_sample(
            self.n)
        will_clone[
            self.env_states.oobs] = True  # Out of bounds walkers always clone
        self.update_states(will_clone=will_clone)
        clone, compas = self.states.clone()
        self._env_states.clone(will_clone=clone,
                               compas_ix=compas,
                               ignore=self.ignore_clone.get("env"))
        self._model_states.clone(will_clone=clone,
                                 compas_ix=compas,
                                 ignore=self.ignore_clone.get("model"))

    def reset(
        self,
        env_states: StatesEnv = None,
        model_states: StatesModel = None,
        walkers_states: StatesWalkers = None,
    ) -> None:
        """
        Restart all the internal states involved in the algorithm iteration.

        After reset a new run of the algorithm will be ready to be launched.
        """
        if walkers_states is not None:
            self.states.update(walkers_states)
        else:
            self.states.reset()
        self.update_states(env_states=env_states, model_states=model_states)
        self._epoch = 0

    def update_states(self,
                      env_states: StatesEnv = None,
                      model_states: StatesModel = None,
                      **kwargs):
        """
        Update the States variables that do not contain internal data and \
        accumulate the rewards in the internal states if applicable.

        Args:
            env_states: States containing the data associated with the Environment.
            model_states: States containing data associated with the Environment.
            **kwargs: Internal states will be updated via keyword arguments.

        """
        if kwargs:
            if kwargs.get("rewards") is not None:
                self._accumulate_and_update_rewards(kwargs["rewards"])
                del kwargs["rewards"]
            self.states.update(**kwargs)
        if isinstance(env_states, StatesEnv):
            self._env_states.update(env_states)
            if hasattr(env_states, "rewards"):
                self._accumulate_and_update_rewards(env_states.rewards)
        if isinstance(model_states, StatesModel):
            self._model_states.update(model_states)
        self.update_ids()

    def _accumulate_and_update_rewards(self, rewards: numpy.ndarray):
        """
        Use as reward either the sum of all the rewards received during the \
        current run, or use the last reward value received as reward.

        Args:
            rewards: Array containing the last rewards received by every walker.
        """
        if self._accumulate_rewards:
            if not isinstance(self.states.get("cum_rewards"), numpy.ndarray):
                cum_rewards = numpy.zeros(self.n)
            else:
                cum_rewards = self.states.cum_rewards
            cum_rewards = cum_rewards + rewards
        else:
            cum_rewards = rewards
        self.update_states(cum_rewards=cum_rewards)

    @staticmethod
    def _repr_state(state):
        string = "\n"
        for k, v in state.items():
            if k in ["observs", "states", "id_walkers", "best_id"]:
                continue
            shape = v.shape if hasattr(v, "shape") else None
            new_str = (
                "{}: shape {} Mean: {:.3f}, Std: {:.3f}, Max: {:.3f} Min: {:.3f}\n"
                .format(k, shape, *statistics_from_array(v))
                if isinstance(v, numpy.ndarray) and "best" not in k else
                ("%s %s\n" %
                 (k, v if not isinstance(v, numpy.ndarray) else v.flatten())))
            string += new_str
        return string

    def fix_best(self):
        """Ensure the best state found is assigned to the last walker of the \
        swarm, so walkers can always choose to clone to the best state."""
        pass
Esempio n. 23
0
def create_model_states(model: BaseModel, batch_size: int = 10):
    return StatesModel(batch_size=batch_size,
                       state_dict=model.get_params_dict())
Esempio n. 24
0
 def _custom_domain_function():
     env = custom_domain_function()
     params = {"actions": {"dtype": numpy.float64, "size": (2,)}}
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     return env, states
Esempio n. 25
0
 def _local_minimizer():
     env = local_minimizer()
     params = {"actions": {"dtype": numpy.float64, "size": (2,)}}
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     return env, states
Esempio n. 26
0
 def create_model_states(self, model: BaseModel, batch_size: int = None):
     batch_size = self.BATCH_SIZE if batch_size is None else batch_size
     return StatesModel(batch_size=batch_size,
                        state_dict=model.get_params_dict())
Esempio n. 27
0
 def create_model_states(self, model, batch_size: int = None):
     return StatesModel(batch_size=batch_size,
                        state_dict=model.get_params_dict())
Esempio n. 28
0
 def _function():
     env = function()
     params = {"actions": {"dtype": judo.float64, "size": (2, )}}
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     return env, states