Example #1
0
 def __hash__(self) -> int:
     _hash = hash(
         tuple([
             hash_numpy(x) if isinstance(x, numpy.ndarray) else hash(x)
             for x in self.vals()
         ]))
     return _hash
Example #2
0
 def hash_values(self, name: str) -> List[int]:
     """Return a unique id for each walker attribute."""
     values = getattr(self, name)
     hashes = [
         hash_numpy(val) if isinstance(val, numpy.ndarray) else hash(val) for val in values
     ]
     return hashes
Example #3
0
    def reset(
        self,
        env_states: StatesEnv = None,
        model_states: StatesModel = None,
        walkers_states: StatesWalkers = None,
    ):
        """
        Reset a :class:`Walkers` and clear the internal data to start a \
        new search process.

        Restart all the variables needed to perform the fractal evolution process.

        Args:
            model_states: :class:`StatesModel` that define the initial state of the environment.
            env_states: :class:`StatesEnv` that define the initial state of the model.
            walkers_states: :class:`StatesWalkers` that define the internal states of the walkers.

        """
        super(Walkers, self).reset(env_states=env_states,
                                   model_states=model_states,
                                   walkers_states=walkers_states)
        best_ix = (self.env_states.rewards.argmin()
                   if self.minimize else self.env_states.rewards.argmax())
        self.states.update(
            best_reward=copy.deepcopy(self.env_states.rewards[best_ix]),
            best_obs=copy.deepcopy(self.env_states.observs[best_ix]),
            best_state=copy.deepcopy(self.env_states.states[best_ix]),
            best_id=hash_numpy(self.env_states.states[best_ix]),
        )
        if self.critic is not None:
            critic_score = self.critic.reset(env_states=self.env_states,
                                             model_states=model_states,
                                             walker_states=walkers_states)
            self.states.update(critic_score=critic_score)
Example #4
0
    def __init__(
        self,
        state: numpy.ndarray,
        observ: numpy.ndarray,
        reward: Scalar,
        id_walker: int = None,
        state_dict: StateDict = None,
        **kwargs
    ):
        """
        Initialize a :class:`OneWalker`.

        Args:
            state: Non batched numpy array defining the state of the walker.
            observ: Non batched numpy array defining the observation of the walker.
            reward: Scalar value representing the reward of the walker.
            id_walker: Hash of the provided State. If None it will be calculated when the
                       the :class:`OneWalker` is initialized.
            state_dict: External :class:`StateDict` that overrides the default values.
            **kwargs: Additional data needed to define the walker. Its structure \
                      needs to be defined in the provided ``state_dict``. These attributes
                      will be assigned to the :class:`EnvStates` of the :class:`Swarm`.

        """
        self.id_walkers = None
        self.rewards = None
        self.observs = None
        self.states = None
        self._observs_size = observ.shape
        self._observs_dtype = observ.dtype
        self._states_size = state.shape
        self._states_dtype = state.dtype
        self._rewards_dtype = type(reward)
        # Accept external definition of param_dict values
        walkers_dict = self.get_params_dict()
        if state_dict is not None:
            for k, v in state_dict.items():
                if k in ["observs", "states"]:  # These two are parsed from the provided opts
                    continue
                if k in walkers_dict:
                    walkers_dict[k] = v
        super(OneWalker, self).__init__(batch_size=1, state_dict=walkers_dict)
        # Keyword arguments must be defined in state_dict
        if state_dict is not None:
            for k in kwargs.keys():
                if k not in state_dict:
                    raise ValueError(
                        "The provided attributes must be defined in state_dict."
                        "param_dict: %s\n kwargs: %s" % (state_dict, kwargs)
                    )
        self.observs[:] = copy.deepcopy(observ)
        self.states[:] = copy.deepcopy(state)
        self.rewards[:] = copy.deepcopy(reward)
        self.id_walkers[:] = (
            copy.deepcopy(id_walker) if id_walker is not None else hash_numpy(state)
        )
        self.update(**kwargs)
Example #5
0
 def update_states(self):
     """Update the data of the root walker after an internal Swarm iteration has finished."""
     # The accumulation of rewards is already done in the internal Swarm
     cum_rewards = self.root_walkers_states.cum_rewards
     self.root_walkers_states.update(
         cum_rewards=cum_rewards,
         id_walkers=numpy.array([hash_numpy(self.root_env_states.states[0])]),
     )
     self.root_walker = OneWalker(
         reward=copy.deepcopy(cum_rewards[0]),
         observ=copy.deepcopy(self.root_env_states.observs[0]),
         state=copy.deepcopy(self.root_env_states.states[0]),
     )
Example #6
0
    def update_states(self):
        """Update the data of the root state."""
        if self.accumulate_rewards:
            cum_rewards = self.root_walkers_states.cum_rewards
            cum_rewards = cum_rewards + self.root_env_states.rewards
        else:
            cum_rewards = self.root_env_states.rewards
        self.root_walkers_states.update(
            cum_rewards=cum_rewards,
            id_walkers=numpy.array(
                [hash_numpy(self.root_env_states.states[0])]),
        )

        self.root_walker = OneWalker(
            reward=copy.deepcopy(cum_rewards[0]),
            observ=copy.deepcopy(self.root_env_states.observs[0]),
            state=copy.deepcopy(self.root_env_states.states[0]),
        )
Example #7
0
    def update_states(self, env_states, model_states):
        """Update the data of the root state."""
        self.root_env_states.update(other=env_states)
        self.root_model_states.update(other=model_states)
        if self.accumulate_rewards:
            cum_rewards = self.root_walkers_states.cum_rewards
            cum_rewards = cum_rewards + self.root_env_states.rewards
        else:
            cum_rewards = self.root_env_states.rewards

        times = self.root_walkers_states.times + self.root_walker.times
        self.root_walkers_states.update(
            cum_rewards=cum_rewards,
            id_walkers=numpy.array(
                [hash_numpy(self.root_env_states.states[0])]),
            times=times,
        )

        self.root_walker = OneWalker(
            reward=copy.deepcopy(cum_rewards[0]),
            observ=copy.deepcopy(self.root_env_states.observs[0]),
            state=copy.deepcopy(self.root_env_states.states[0]),
            time=copy.deepcopy(times[0]),
        )
Example #8
0
 def group_hash(self, name: str) -> int:
     """Return a unique id for a given attribute."""
     val = getattr(self, name)
     return hash_numpy(val) if isinstance(val, numpy.ndarray) else hash(val)
Example #9
0
    def __init__(self,
                 n_walkers: int,
                 env_state_params: StateDict,
                 model_state_params: StateDict,
                 reward_scale: float = 1.0,
                 distance_scale: float = 1.0,
                 max_epochs: int = None,
                 accumulate_rewards: bool = True,
                 distance_function: Optional[DistanceFunction] = None,
                 ignore_clone: Optional[Dict[str, Set[str]]] = None,
                 critic: Optional[BaseCritic] = None,
                 minimize: bool = False,
                 best_walker: Optional[Tuple[numpy.ndarray, numpy.ndarray,
                                             Scalar]] = None,
                 reward_limit: float = None,
                 **kwargs):
        """
        Initialize a :class:`Walkers`.

        Args:
            n_walkers: Number of walkers of the instance.
            env_state_params: Dictionary to instantiate the States of an :class:`Environment`.
            model_state_params: Dictionary to instantiate the States of a :class:`Model`.
            reward_scale: Regulates the importance of the reward. Recommended to \
                          keep in the [0, 5] range. Higher values correspond to \
                          higher importance.
            distance_scale: Regulates the importance of the distance. Recommended to \
                            keep in the [0, 5] range. Higher values correspond to \
                            higher importance.
            max_epochs: Maximum number of iterations that the walkers are allowed \
                       to perform.
            accumulate_rewards: If ``True`` the rewards obtained after transitioning \
                                to a new state will accumulate. If ``False`` only the last \
                                reward will be taken into account.
            distance_function: Function to compute the distances between two \
                               groups of walkers. It will be applied row-wise \
                               to the walkers observations and it will return a \
                               vector of scalars. Defaults to l2 norm.
            ignore_clone: Dictionary containing the attribute values that will \
                          not be cloned. Its keys can be be either "env", of \
                          "model", to reference the `env_states` and the \
                          `model_states`. Its values are a set of strings with \
                          the names of the attributes that will not be cloned.
            critic: :class:`Critic` that will be used to calculate custom rewards.
            minimize: If ``True`` the algorithm will perform a minimization \
                      process. If ``False`` it will be a maximization process.
            best_walker: Tuple containing the best state and reward that will \
                        be used as the initial best values found.
            reward_limit: The algorithm run will stop after reaching this \
                          reward value. If you are running a minimization process \
                          it will be considered the minimum reward possible, and \
                          if you are maximizing a reward it will be the maximum \
                          value.
            kwargs: Additional attributes stored in the :class:`StatesWalkers`.

        """
        # Add data specific to the child class in the StatesWalkers class as new attributes.
        if critic is not None:
            kwargs["critic_score"] = kwargs.get("critic_score",
                                                numpy.zeros(n_walkers))
        self.dtype = float_type
        if best_walker is not None:
            best_state, best_obs, best_reward = best_walker
            best_id = hash_numpy(best_state)
        else:
            best_state, best_obs, best_reward, best_id = (None, None,
                                                          -numpy.inf, None)
        super(Walkers, self).__init__(n_walkers=n_walkers,
                                      env_state_params=env_state_params,
                                      model_state_params=model_state_params,
                                      reward_scale=reward_scale,
                                      distance_scale=distance_scale,
                                      max_epochs=max_epochs,
                                      accumulate_rewards=accumulate_rewards,
                                      distance_function=distance_function,
                                      ignore_clone=ignore_clone,
                                      best_reward=best_reward,
                                      best_obs=best_obs,
                                      best_state=best_state,
                                      best_id=best_id,
                                      **kwargs)
        self.critic = critic
        self.minimize = minimize
        self.efficiency = 0
        self._min_entropy = 0
        if reward_limit is None:
            reward_limit = -numpy.inf if self.minimize else numpy.inf
        self.reward_limit = reward_limit