Ejemplo n.º 1
0
    def import_best(self, walkers: ExportedWalkers):
        """
        Import the best walker from the target :class:`ExportedWalkers` if it \
        improves the best value present in the :class:`Swarm`' walkers.

        Args:
            walkers: Walkers containing the best walker that will be imported \
                    if it improves the current best value found.

        Returns:
            None.

        """

        if self._imported_best_is_better(walkers):
            best_ix = walkers.get_best_index(self.swarm.walkers.minimize)
            best_reward = judo.copy(walkers.rewards[best_ix])
            best_state = judo.copy(walkers.states[best_ix])
            best_obs = judo.copy(walkers.observs[best_ix])
            best_id = judo.copy(walkers.id_walkers[best_ix])

            self.swarm.walkers.states.update(
                best_reward=best_reward, best_state=best_state, best_obs=best_obs, best_id=best_id
            )
            self.swarm.walkers.fix_best()
Ejemplo n.º 2
0
    def update_states(self, env_states, model_states, best_ix):
        """Update the data of the root state."""
        self.root_env_states.update(other=env_states)
        self.root_model_states.update(other=model_states)
        if self.accumulate_rewards:
            cum_rewards = self.root_walkers_states.cum_rewards
            cum_rewards = cum_rewards + self.root_env_states.rewards
        else:
            cum_rewards = self.root_env_states.rewards
        dt = self.root_model_states.dt if hasattr(self.root_model_states,
                                                  "dt") else 1.0
        times = dt + self.root_walker.times
        root_id = tensor(self.walkers.states.id_walkers[best_ix])
        self.root_walkers_states.update(
            cum_rewards=cum_rewards,
            times=times,
            id_walkers=tensor([root_id]),
        )

        self.root_walker = OneWalker(
            reward=judo.copy(cum_rewards[0]),
            observ=judo.copy(self.root_env_states.observs[0]),
            state=judo.copy(self.root_env_states.states[0]),
            time=judo.copy(times[0]),
            id_walker=root_id.squeeze(),
        )
Ejemplo n.º 3
0
 def copy(self):
     """Return a copy of the current instance."""
     new_walkers = ExportedWalkers(batch_size=len(self))
     new_walkers.update(
         id_walkers=judo.copy(self.id_walkers),
         rewards=judo.copy(self.rewards),
         states=judo.copy(self.states),
         observs=judo.copy(self.observs),
     )
     return new_walkers
Ejemplo n.º 4
0
    def minimize_batch(
            self, x: typing.Tensor) -> Tuple[typing.Tensor, typing.Tensor]:
        """
        Minimize a batch of points.

        Args:
            x: Array representing a batch of points to be optimized, stacked \
               across the first dimension.

        Returns:
            Tuple of arrays containing the local optimum found for each point, \
            and an array with the values assigned to each of the points found.

        """
        x = judo.to_numpy(judo.copy(x))
        with Backend.use_backend("numpy"):
            result = judo.zeros_like(x)
            rewards = judo.zeros((x.shape[0], 1))
            for i in range(x.shape[0]):
                new_x, reward = self.minimize_point(x[i, :])
                result[i, :] = new_x
                rewards[i, :] = float(reward)
        self.bounds.high = tensor(self.bounds.high)
        self.bounds.low = tensor(self.bounds.low)
        result, rewards = tensor(result), tensor(rewards)
        return result, rewards
Ejemplo n.º 5
0
 def copy(self) -> "States":
     """Crete a copy of the current instance."""
     param_dict = {
         str(name): judo.copy(val) if judo.is_tensor(val) else copy.deepcopy(val)
         for name, val in self.items()
     }
     return States(batch_size=self.n, **param_dict)
Ejemplo n.º 6
0
 def reset(self, batch_size: int = 1, **kwargs) -> StatesEnv:
     states = super(LennardJones, self).reset(batch_size=batch_size,
                                              **kwargs)
     new_states = random_state.normal(0,
                                      scale=1.0,
                                      size=states.states.shape)
     states.update(observs=new_states, states=judo.copy(new_states))
     return states
Ejemplo n.º 7
0
 def update_states(self, best_ix):
     """Update the data of the root walker after an internal Swarm iteration has finished."""
     # The accumulation of rewards is already done in the internal Swarm
     cum_rewards = self.root_walkers_states.cum_rewards
     times = self.root_walkers_states.times + self.root_walker.times
     root_id = tensor(self.walkers.states.id_walkers[best_ix])
     self.root_walkers_states.update(
         cum_rewards=cum_rewards,
         id_walkers=tensor([root_id]),
         times=times,
     )
     self.root_walker = OneWalker(
         reward=judo.copy(cum_rewards[0]),
         observ=judo.copy(self.root_env_states.observs[0]),
         state=judo.copy(self.root_env_states.states[0]),
         time=judo.copy(times[0]),
         id_walker=root_id,
     )
Ejemplo n.º 8
0
    def reset(
        self,
        env_states: StatesEnv = None,
        model_states: StatesModel = None,
        walkers_states: StatesWalkers = None,
    ) -> None:
        """
        Restart all the internal states involved in the algorithm iteration.

        After reset a new run of the algorithm will be ready to be launched.
        """
        if walkers_states is not None:
            self.states.update(walkers_states)
        else:
            self.states.reset()
        self.env_states.times = judo.copy(self.env_states.times)
        self.env_states.times[:] = -1.0
        old_ids = judo.copy(self.states.id_walkers)
        self.update_states(env_states=env_states, model_states=model_states)
        self.states.id_walkers = old_ids
        self._epoch = 0
Ejemplo n.º 9
0
    def reset(
        self,
        root_walker: OneWalker = None,
        walkers_states: StatesWalkers = None,
        model_states: StatesModel = None,
        env_states: StatesEnv = None,
    ):
        """
        Reset the :class:`fragile.Walkers`, the :class:`Environment`, the \
        :class:`Model` and clear the internal data to start a new search process.

        Args:
            root_walker: Walker representing the initial state of the search. \
                         The walkers will be reset to this walker, and it will \
                         be added to the root of the :class:`StateTree` if any.
            model_states: :class:`StatesModel` that define the initial state of \
                          the :class:`Model`.
            env_states: :class:`StatesEnv` that define the initial state of \
                        the :class:`Environment`.
            walkers_states: :class:`StatesWalkers` that define the internal \
                            states of the :class:`Walkers`.

        """
        self._epoch = 0
        env_states = (self.env.reset(
            batch_size=self.walkers.n) if env_states is None else env_states)
        # Add corresponding root_walkers data to env_states
        if root_walker is not None:
            if not isinstance(root_walker, OneWalker):
                raise ValueError("Root walker needs to be an "
                                 "instance of OneWalker, got %s instead." %
                                 type(root_walker))
            env_states = self._update_env_with_root(root_walker=root_walker,
                                                    env_states=env_states)

        model_states = (self.model.reset(batch_size=len(self.walkers),
                                         env_states=env_states)
                        if model_states is None else model_states)
        model_states.update(init_actions=model_states.actions)
        self.walkers.reset(env_states=env_states, model_states=model_states)
        root_id = (self.walkers.get("id_walkers")[0] if root_walker is None
                   else judo.copy(root_walker.id_walkers[0]))
        self.walkers.states.id_walkers[:] = root_id
        self.walkers.states.best_id = root_id
        if self.tree is not None:

            self.tree.reset(
                root_id=root_id,
                env_states=self.walkers.env_states,
                model_states=self.walkers.model_states,
                walkers_states=self.walkers.states,
            )
Ejemplo n.º 10
0
    def update_best(self, walkers: ExportedWalkers):
        """
        Update the values tracked by the walker if the passed \
        :class:`ExternalWalkers` contain  a better value.

        Args:
            walkers: The current best values will be compared against the \
                     walkers of this instance of :class:`ExportedWalkers`.

        Returns:
            None

        """
        curr_best = self.get_best_reward(self.minimize)
        other_best = walkers.get_best_reward(self.minimize)
        other_improves = curr_best > other_best if self.minimize else curr_best < other_best
        if other_improves:
            ix = walkers.get_best_index(self.minimize)
            self.states = judo.copy(walkers.states[ix])  # judo.copy(walkers.states[ix])
            self.observs = judo.copy(walkers.observs[ix])
            self.rewards = judo.copy(walkers.rewards[ix])
            self.id_walkers = judo.copy(walkers.id_walkers[ix])
Ejemplo n.º 11
0
    def step_walkers(self) -> None:
        """
        Make the walkers evolve to their next state sampling an action from the \
        :class:`Model` and applying it to the :class:`Environment`.
        """
        model_states = self.walkers.model_states
        env_states = self.walkers.env_states

        parent_ids = judo.copy(
            self.walkers.states.id_walkers) if self.tree is not None else None
        model_states = self.model.predict(env_states=env_states,
                                          model_states=model_states,
                                          walkers_states=self.walkers.states)
        env_states = self.env.step(model_states=model_states,
                                   env_states=env_states)
        self.walkers.update_states(
            env_states=env_states,
            model_states=model_states,
        )
        self.update_tree(parent_ids)
Ejemplo n.º 12
0
    async def step_walkers(self) -> None:
        """
        Make the walkers evolve to their next state sampling an action from the \
        :class:`Model` and applying it to the :class:`Environment`.
        """
        model_states = self.walkers.get("model_states")
        env_states = self.walkers.get("env_states")
        walkers_states = self.walkers.get("states")
        parent_ids = judo.copy(
            self.walkers.get("id_walkers")) if self.tree is not None else None

        model_states = self.model.predict(env_states=env_states,
                                          model_states=model_states,
                                          walkers_states=walkers_states)
        env_states = await self.env.step.remote(model_states=model_states,
                                                env_states=env_states)
        # env_states = ray.get(step_id)
        self.walkers.update_states(
            env_states=env_states,
            model_states=model_states,
        )
        self.update_tree(parent_ids)
Ejemplo n.º 13
0
    def __init__(self,
                 state: Tensor,
                 observ: Tensor,
                 reward: Scalar,
                 id_walker=None,
                 time=0.0,
                 state_dict: StateDict = None,
                 **kwargs):
        """
        Initialize a :class:`OneWalker`.

        Args:
            state: Non batched numpy array defining the state of the walker.
            observ: Non batched numpy array defining the observation of the walker.
            reward: typing.Scalar value representing the reward of the walker.
            id_walker: Hash of the provided State. If None it will be calculated when the
                       the :class:`OneWalker` is initialized.
            state_dict: External :class:`typing.StateDict` that overrides the default values.
            time: Time step of the current walker. Measures the length of the path followed \
                  by the walker.
            **kwargs: Additional data needed to define the walker. Its structure \
                      needs to be defined in the provided ``state_dict``. These attributes
                      will be assigned to the :class:`EnvStates` of the :class:`Swarm`.

        """
        self.id_walkers = None
        self.rewards = None
        self.observs = None
        self.states = None
        self.times = None
        self._observs_size = observ.shape
        self._observs_dtype = observ.dtype
        self._states_size = state.shape
        self._states_dtype = state.dtype
        self._rewards_dtype = tensor(reward).dtype
        # Accept external definition of param_dict values
        walkers_dict = self.get_params_dict()
        if state_dict is not None:
            for k, v in state_dict.items():
                if k in ["observs", "states"
                         ]:  # These two are parsed from the provided opts
                    continue
                if k in walkers_dict:
                    walkers_dict[k] = v
        super(OneWalker, self).__init__(batch_size=1, state_dict=walkers_dict)
        # Keyword arguments must be defined in state_dict
        if state_dict is not None:
            for k in kwargs.keys():
                if k not in state_dict:
                    raise ValueError(
                        "The provided attributes must be defined in state_dict."
                        "param_dict: %s\n kwargs: %s" % (state_dict, kwargs))
        self.observs[:] = judo.copy(observ)
        self.states[:] = judo.copy(state)
        self.rewards[:] = judo.copy(reward) if judo.is_tensor(
            reward) else copy.deepcopy(reward)
        self.times[:] = judo.copy(time) if judo.is_tensor(
            time) else copy.deepcopy(time)
        self.id_walkers[:] = (judo.copy(id_walker.squeeze()) if id_walker
                              is not None else hasher.hash_tensor(state))
        self.update(**kwargs)