def __hash__(self) -> int: _hash = hash( tuple([ hash_numpy(x) if isinstance(x, numpy.ndarray) else hash(x) for x in self.vals() ])) return _hash
def hash_values(self, name: str) -> List[int]: """Return a unique id for each walker attribute.""" values = getattr(self, name) hashes = [ hash_numpy(val) if isinstance(val, numpy.ndarray) else hash(val) for val in values ] return hashes
def reset( self, env_states: StatesEnv = None, model_states: StatesModel = None, walkers_states: StatesWalkers = None, ): """ Reset a :class:`Walkers` and clear the internal data to start a \ new search process. Restart all the variables needed to perform the fractal evolution process. Args: model_states: :class:`StatesModel` that define the initial state of the environment. env_states: :class:`StatesEnv` that define the initial state of the model. walkers_states: :class:`StatesWalkers` that define the internal states of the walkers. """ super(Walkers, self).reset(env_states=env_states, model_states=model_states, walkers_states=walkers_states) best_ix = (self.env_states.rewards.argmin() if self.minimize else self.env_states.rewards.argmax()) self.states.update( best_reward=copy.deepcopy(self.env_states.rewards[best_ix]), best_obs=copy.deepcopy(self.env_states.observs[best_ix]), best_state=copy.deepcopy(self.env_states.states[best_ix]), best_id=hash_numpy(self.env_states.states[best_ix]), ) if self.critic is not None: critic_score = self.critic.reset(env_states=self.env_states, model_states=model_states, walker_states=walkers_states) self.states.update(critic_score=critic_score)
def __init__( self, state: numpy.ndarray, observ: numpy.ndarray, reward: Scalar, id_walker: int = None, state_dict: StateDict = None, **kwargs ): """ Initialize a :class:`OneWalker`. Args: state: Non batched numpy array defining the state of the walker. observ: Non batched numpy array defining the observation of the walker. reward: Scalar value representing the reward of the walker. id_walker: Hash of the provided State. If None it will be calculated when the the :class:`OneWalker` is initialized. state_dict: External :class:`StateDict` that overrides the default values. **kwargs: Additional data needed to define the walker. Its structure \ needs to be defined in the provided ``state_dict``. These attributes will be assigned to the :class:`EnvStates` of the :class:`Swarm`. """ self.id_walkers = None self.rewards = None self.observs = None self.states = None self._observs_size = observ.shape self._observs_dtype = observ.dtype self._states_size = state.shape self._states_dtype = state.dtype self._rewards_dtype = type(reward) # Accept external definition of param_dict values walkers_dict = self.get_params_dict() if state_dict is not None: for k, v in state_dict.items(): if k in ["observs", "states"]: # These two are parsed from the provided opts continue if k in walkers_dict: walkers_dict[k] = v super(OneWalker, self).__init__(batch_size=1, state_dict=walkers_dict) # Keyword arguments must be defined in state_dict if state_dict is not None: for k in kwargs.keys(): if k not in state_dict: raise ValueError( "The provided attributes must be defined in state_dict." "param_dict: %s\n kwargs: %s" % (state_dict, kwargs) ) self.observs[:] = copy.deepcopy(observ) self.states[:] = copy.deepcopy(state) self.rewards[:] = copy.deepcopy(reward) self.id_walkers[:] = ( copy.deepcopy(id_walker) if id_walker is not None else hash_numpy(state) ) self.update(**kwargs)
def update_states(self): """Update the data of the root walker after an internal Swarm iteration has finished.""" # The accumulation of rewards is already done in the internal Swarm cum_rewards = self.root_walkers_states.cum_rewards self.root_walkers_states.update( cum_rewards=cum_rewards, id_walkers=numpy.array([hash_numpy(self.root_env_states.states[0])]), ) self.root_walker = OneWalker( reward=copy.deepcopy(cum_rewards[0]), observ=copy.deepcopy(self.root_env_states.observs[0]), state=copy.deepcopy(self.root_env_states.states[0]), )
def update_states(self): """Update the data of the root state.""" if self.accumulate_rewards: cum_rewards = self.root_walkers_states.cum_rewards cum_rewards = cum_rewards + self.root_env_states.rewards else: cum_rewards = self.root_env_states.rewards self.root_walkers_states.update( cum_rewards=cum_rewards, id_walkers=numpy.array( [hash_numpy(self.root_env_states.states[0])]), ) self.root_walker = OneWalker( reward=copy.deepcopy(cum_rewards[0]), observ=copy.deepcopy(self.root_env_states.observs[0]), state=copy.deepcopy(self.root_env_states.states[0]), )
def update_states(self, env_states, model_states): """Update the data of the root state.""" self.root_env_states.update(other=env_states) self.root_model_states.update(other=model_states) if self.accumulate_rewards: cum_rewards = self.root_walkers_states.cum_rewards cum_rewards = cum_rewards + self.root_env_states.rewards else: cum_rewards = self.root_env_states.rewards times = self.root_walkers_states.times + self.root_walker.times self.root_walkers_states.update( cum_rewards=cum_rewards, id_walkers=numpy.array( [hash_numpy(self.root_env_states.states[0])]), times=times, ) self.root_walker = OneWalker( reward=copy.deepcopy(cum_rewards[0]), observ=copy.deepcopy(self.root_env_states.observs[0]), state=copy.deepcopy(self.root_env_states.states[0]), time=copy.deepcopy(times[0]), )
def group_hash(self, name: str) -> int: """Return a unique id for a given attribute.""" val = getattr(self, name) return hash_numpy(val) if isinstance(val, numpy.ndarray) else hash(val)
def __init__(self, n_walkers: int, env_state_params: StateDict, model_state_params: StateDict, reward_scale: float = 1.0, distance_scale: float = 1.0, max_epochs: int = None, accumulate_rewards: bool = True, distance_function: Optional[DistanceFunction] = None, ignore_clone: Optional[Dict[str, Set[str]]] = None, critic: Optional[BaseCritic] = None, minimize: bool = False, best_walker: Optional[Tuple[numpy.ndarray, numpy.ndarray, Scalar]] = None, reward_limit: float = None, **kwargs): """ Initialize a :class:`Walkers`. Args: n_walkers: Number of walkers of the instance. env_state_params: Dictionary to instantiate the States of an :class:`Environment`. model_state_params: Dictionary to instantiate the States of a :class:`Model`. reward_scale: Regulates the importance of the reward. Recommended to \ keep in the [0, 5] range. Higher values correspond to \ higher importance. distance_scale: Regulates the importance of the distance. Recommended to \ keep in the [0, 5] range. Higher values correspond to \ higher importance. max_epochs: Maximum number of iterations that the walkers are allowed \ to perform. accumulate_rewards: If ``True`` the rewards obtained after transitioning \ to a new state will accumulate. If ``False`` only the last \ reward will be taken into account. distance_function: Function to compute the distances between two \ groups of walkers. It will be applied row-wise \ to the walkers observations and it will return a \ vector of scalars. Defaults to l2 norm. ignore_clone: Dictionary containing the attribute values that will \ not be cloned. Its keys can be be either "env", of \ "model", to reference the `env_states` and the \ `model_states`. Its values are a set of strings with \ the names of the attributes that will not be cloned. critic: :class:`Critic` that will be used to calculate custom rewards. minimize: If ``True`` the algorithm will perform a minimization \ process. If ``False`` it will be a maximization process. best_walker: Tuple containing the best state and reward that will \ be used as the initial best values found. reward_limit: The algorithm run will stop after reaching this \ reward value. If you are running a minimization process \ it will be considered the minimum reward possible, and \ if you are maximizing a reward it will be the maximum \ value. kwargs: Additional attributes stored in the :class:`StatesWalkers`. """ # Add data specific to the child class in the StatesWalkers class as new attributes. if critic is not None: kwargs["critic_score"] = kwargs.get("critic_score", numpy.zeros(n_walkers)) self.dtype = float_type if best_walker is not None: best_state, best_obs, best_reward = best_walker best_id = hash_numpy(best_state) else: best_state, best_obs, best_reward, best_id = (None, None, -numpy.inf, None) super(Walkers, self).__init__(n_walkers=n_walkers, env_state_params=env_state_params, model_state_params=model_state_params, reward_scale=reward_scale, distance_scale=distance_scale, max_epochs=max_epochs, accumulate_rewards=accumulate_rewards, distance_function=distance_function, ignore_clone=ignore_clone, best_reward=best_reward, best_obs=best_obs, best_state=best_state, best_id=best_id, **kwargs) self.critic = critic self.minimize = minimize self.efficiency = 0 self._min_entropy = 0 if reward_limit is None: reward_limit = -numpy.inf if self.minimize else numpy.inf self.reward_limit = reward_limit