def update_states_with_critic( self, actions: numpy.ndarray, batch_size: int, model_states: StatesModel, **kwargs ) -> StatesModel: """ Compute the time steps generated by the critic and add them to \ `model_states`. If there is no Critic the default value of dt will be a \ vector of 1. Args: actions: Numpy array representing the actions calculated by the model. batch_size: Same batch size used when calling `sample`. model_states: Same model_states used when calling `sample`. **kwargs: Kwargs for `critic.calculate`. Returns: model_states updated with the actions and the dt calculated by the Critic. """ if self.critic is not None: critic_states = self.critic.calculate( batch_size=batch_size, model_states=model_states, **kwargs ) dt = ( critic_states.critic_score.astype(int) if isinstance(critic_states.critic_score, numpy.ndarray) else critic_states.critic_score ) model_states.update(actions=actions, other=critic_states, dt=dt) else: dt = numpy.ones(batch_size, dtype=int) model_states.update(actions=actions, critic_score=dt, dt=dt) return model_states
def reset( self, walkers_states: StatesWalkers = None, model_states: StatesModel = None, env_states: StatesEnv = None, ): """ Reset the :class:`fragile.Walkers`, the :class:`Environment`, the \ :class:`Model` and clear the internal data to start a new search process. Args: model_states: :class:`StatesModel` that define the initial state of \ the :class:`Model`. env_states: :class:`StatesEnv` that define the initial state of \ the :class:`Environment`. walkers_states: :class:`StatesWalkers` that define the internal \ states of the :class:`Walkers`. """ env_sates = self.env.reset( batch_size=self.walkers.n) if env_states is None else env_states model_states = (self.model.reset(batch_size=self.walkers.n, env_states=env_states) if model_states is None else model_states) model_states.update(init_actions=model_states.actions) self.walkers.reset(env_states=env_sates, model_states=model_states) if self._use_tree: root_ids = numpy.array([self.tree.ROOT_HASH] * self.walkers.n) self.walkers.states.id_walkers = root_ids self.tree.reset( env_states=self.walkers.env_states, model_states=self.walkers.model_states, walkers_states=walkers_states, ) self.update_tree(root_ids.tolist())
def update_states_with_critic( self, actions: numpy.ndarray, batch_size: int, model_states: StatesModel, **kwargs ) -> StatesModel: """ Compute the time steps generated by the critic and add them to \ `model_states`. If there is no Critic the default value of dt will be a \ vector of 1. Args: actions: Numpy array representing the actions calculated by the model. batch_size: Same batch size used when calling `sample`. model_states: Same model_states used when calling `sample`. **kwargs: Kwargs for `critic.calculate`. Returns: model_states updated with the actions and the dt calculated by the Critic. """ if self.critic is None: model_states.update(actions=actions) else: critic_state = self.critic.calculate( batch_size=batch_size, model_states=model_states, **kwargs ) model_states.update(other=critic_state, actions=actions) return model_states
def predict(self, root_env_states: StatesEnv, walkers: StepWalkers,) -> StatesModel: """ Select the ``init_action`` and ``init_dt`` of the best walker found \ during the internal swarm run. Args: root_env_states: :env-st:`StatesEnv` class containing the data \ corresponding to the root walker of a :class:`StepSwarm`. walkers: :walkers:`StepWalkers` used by the internal swarm of a \ :class:`StepSwarm`. Returns: :class:`StatesModel` containing the ``actions`` and ``dt`` that the root walkers will use to step the :env:`Environment`. """ init_actions = walkers.states.init_actions.flatten().astype(int) best_ix = walkers.get_best_index() root_model_states = StatesModel( batch_size=1, state_dict={"actions": {"dtype": int}, "dt": {"dtype": int}} ) root_model_states.actions[:] = init_actions[best_ix] if hasattr(root_model_states, "dt"): target_dt = walkers.states.init_dt.flatten().astype(int)[best_ix] root_model_states.dt[:] = target_dt return root_model_states
def predict(self, root_env_states: StatesEnv, walkers: StepWalkers,) -> StatesModel: """ Select the most frequent ``init_action`` assigned to the internal swarm's walkers. The selected ``dt`` will be equal to the minimum ``init_dts`` among all \ the walkers that sampled the selected ``init_action``. Args: root_env_states: :env-st:`StatesEnv` class containing the data \ corresponding to the root walker of a :class:`StepSwarm`. walkers: :walkers:`StepWalkers` used by the internal warm of a \ :class:`StepSwarm`. Returns: :class:`StatesModel` containing the ``actions`` and ``dt`` that the root walkers will use to step the :env:`Environment`. """ init_actions = walkers.states.init_actions.flatten().astype(int) y = numpy.bincount(init_actions) most_used_action = numpy.nonzero(y)[0][0] root_model_states = StatesModel( batch_size=1, state_dict={"actions": {"dtype": int}, "dt": {"dtype": int}} ) root_model_states.actions[:] = most_used_action if hasattr(root_model_states, "dt"): init_dts = walkers.states.init_dts.flatten().astype(int) index_dt = init_actions == most_used_action target_dt = init_dts[index_dt].min() root_model_states.dt[:] = target_dt return root_model_states
def reset(self, batch_size: int = 1, model_states: StatesModel = None, env_states: StatesEnv = None, *args, **kwargs) -> StatesModel: """ Return a new blank State for a `DiscreteUniform` instance, and a valid \ prediction based on that new state. Args: batch_size: Number of walkers that the new model `State`. model_states: :class:`StatesModel` corresponding to the model data. env_states: :class:`StatesEnv` containing the environment data. *args: Passed to `predict`. **kwargs: Passed to `predict`. Returns: New model states containing sampled data. """ self.pop_size = batch_size self._count_eval = 0 self._init_algorithm_params(batch_size) # Take the first sample from a random uniform distribution if batch_size is None and env_states is None: raise ValueError("env_states and batch_size cannot be both None.") batch_size = batch_size or env_states.n model_states = model_states or self.create_new_states( batch_size=batch_size) init_actions = self.random_state.randn(self.mu_const) self.x_mean = numpy.matmul(init_actions.T, self.weights_const) actions = self._sample_actions() model_states.update(actions=actions) return model_states
def reset( self, root_walker: OneWalker = None, walkers_states: StatesWalkers = None, model_states: StatesModel = None, env_states: StatesEnv = None, ): """ Reset the :class:`fragile.Walkers`, the :class:`Environment`, the \ :class:`Model` and clear the internal data to start a new search process. Args: root_walker: Walker representing the initial state of the search. \ The walkers will be reset to this walker, and it will \ be added to the root of the :class:`StateTree` if any. model_states: :class:`StatesModel` that define the initial state of \ the :class:`Model`. env_states: :class:`StatesEnv` that define the initial state of \ the :class:`Environment`. walkers_states: :class:`StatesWalkers` that define the internal \ states of the :class:`Walkers`. """ self._epoch = 0 env_states = ( self.env.reset(batch_size=self.walkers.n) if env_states is None else env_states ) # Add corresponding root_walkers data to env_states if root_walker is not None: if not isinstance(root_walker, OneWalker): raise ValueError( "Root walker needs to be an " "instance of OneWalker, got %s instead." % type(root_walker) ) env_states = self._update_env_with_root(root_walker=root_walker, env_states=env_states) model_states = ( self.model.reset(batch_size=self.walkers.n, env_states=env_states) if model_states is None else model_states ) model_states.update(init_actions=model_states.actions) self.walkers.reset(env_states=env_states, model_states=model_states) if self._use_tree: if root_walker is not None: self.tree.reset(root_hash=int(root_walker.id_walkers)) root_ids = numpy.array([self.tree.root_hash] * self.walkers.n) self.tree.reset( root_hash=int(self.tree.root_hash), env_states=self.walkers.env_states, model_states=self.walkers.model_states, walkers_states=walkers_states, ) ids: List[int] = root_ids.tolist() self.update_tree(states_ids=ids)
async def reset( self, root_walker: OneWalker = None, walkers_states: StatesWalkers = None, model_states: StatesModel = None, env_states: StatesEnv = None, ): """ Reset the :class:`fragile.Walkers`, the :class:`Environment`, the \ :class:`Model` and clear the internal data to start a new search process. Args: root_walker: Walker representing the initial state of the search. \ The walkers will be reset to this walker, and it will \ be added to the root of the :class:`StateTree` if any. model_states: :class:`StatesModel` that define the initial state of \ the :class:`Model`. env_states: :class:`StatesEnv` that define the initial state of \ the :class:`Environment`. walkers_states: :class:`StatesWalkers` that define the internal \ states of the :class:`Walkers`. """ self._epoch = 0 n_walkers = self.walkers.get("n_walkers") reset_id = (self.env.reset.remote( batch_size=n_walkers) if env_states is None else env_states) env_states = await reset_id # Add corresponding root_walkers data to env_states if root_walker is not None: if not isinstance(root_walker, OneWalker): raise ValueError("Root walker needs to be an " "instance of OneWalker, got %s instead." % type(root_walker)) env_states = self._update_env_with_root(root_walker=root_walker, env_states=env_states) model_states = (self.model.reset(batch_size=n_walkers, env_states=env_states) if model_states is None else model_states) model_states.update(init_actions=model_states.actions) self.walkers.reset(env_states=env_states, model_states=model_states) if self.tree is not None: id_walkers = self.walkers.get("id_walkers") root_id = id_walkers[0] if root_walker is None else copy.copy( root_walker.id_walkers) self.tree.reset( root_id=root_id, env_states=self.walkers.env_states, model_states=self.walkers.model_states, walkers_states=self.walkers.states, )
def _classic_control_env(): env = classic_control_env() params = { "actions": { "dtype": dtype.int64 }, "dt": { "dtype": dtype.float32 } } states = StatesModel(state_dict=params, batch_size=N_WALKERS) states.update(actions=judo.ones(N_WALKERS), dt=judo.ones(N_WALKERS)) return env, states
def _parallel_environment(): env = parallel_environment() params = { "actions": { "dtype": numpy.int64 }, "critic": { "dtype": numpy.float32 } } states = StatesModel(state_dict=params, batch_size=N_WALKERS) states.update(actions=numpy.ones(N_WALKERS), critic=numpy.ones(N_WALKERS)) return env, states
def _atari_env(): env = discrete_atari_env() params = { "actions": { "dtype": dtype.int64 }, "critic": { "dtype": dtype.float32 } } states = StatesModel(state_dict=params, batch_size=N_WALKERS) states.update(actions=judo.ones(N_WALKERS), critic=judo.ones(N_WALKERS)) return env, states
def step(self, model_states: StatesModel, env_states: StatesEnv) -> StatesEnv: """ Set the environment to the target states by applying the specified \ actions an arbitrary number of time steps. The state transitions will be calculated in parallel. Args: model_states: :class:`StatesModel` representing the data to be used \ to act on the environment. env_states: :class:`StatesEnv` representing the data to be set in \ the environment. Returns: :class:`StatesEnv` containing the information that describes the \ new state of the Environment. """ split_env_states = [ env.step.remote(model_states=ms, env_states=es) for env, ms, es in zip( self.envs, model_states.split_states(self.n_workers), env_states.split_states(self.n_workers), ) ] env_states = ray.get(split_env_states) new_env_states: StatesEnv = StatesEnv.merge_states(env_states) return new_env_states
def test_minimizer_step(self): minim = local_minimizer() params = {"actions": {"dtype": numpy.float64, "size": (2,)}} states = StatesModel(state_dict=params, batch_size=N_WALKERS) assert minim.shape == minim.shape states = minim.step(model_states=states, env_states=minim.reset(N_WALKERS)) assert numpy.allclose(states.rewards.min(), 0)
def update_states(self, env_states: StatesEnv = None, model_states: StatesModel = None, **kwargs): """ Update the States variables that do not contain internal data and \ accumulate the rewards in the internal states if applicable. Args: env_states: States containing the data associated with the Environment. model_states: States containing data associated with the Environment. **kwargs: Internal states will be updated via keyword arguments. """ if kwargs: if kwargs.get("rewards") is not None: self._accumulate_and_update_rewards(kwargs["rewards"]) del kwargs["rewards"] self.states.update(**kwargs) if model_states is not None and "dt" in model_states.keys(): times = self.model_states.get("dt") + self.states.get("times") self.states.update(times=times) if isinstance(env_states, StatesEnv): self._env_states.update(env_states) if hasattr(env_states, "rewards"): self._accumulate_and_update_rewards(env_states.rewards) if isinstance(model_states, StatesModel): self._model_states.update(model_states) self.update_ids()
def test_step(self, dummy_env): states = dummy_env.reset() actions = StatesModel(actions=numpy.ones((1, 2)) * 2, batch_size=1, dt=numpy.ones((1, 2))) new_states: StatesEnv = dummy_env.step(actions, states) assert isinstance(new_states, StatesEnv) assert new_states.rewards[0].item() == 1
def _make_transitions(self, model_states: StatesModel, env_states: StatesEnv) -> List[StatesEnv]: n_chunks = len(self._envs) results = [ env.step(self._blocking, env_states=es, model_states=ms) for env, es, ms in zip(self._envs, env_states.split_states( n_chunks), model_states.split_states(n_chunks)) ] states = [result if self._blocking else result() for result in results] return states
def test_step(self, function_env, batch_size): states = function_env.reset(batch_size=batch_size) actions = StatesModel( actions=judo.zeros(states.observs.shape), batch_size=batch_size, dt=judo.ones((1, 2)), ) new_states: StatesEnv = function_env.step(actions, states) assert isinstance(new_states, StatesEnv) assert new_states.oobs[0].item() == 0
def sample( self, batch_size: int, model_states: StatesModel = None, env_states: StatesEnv = None, walkers_states: StatesWalkers = None, **kwargs, ) -> StatesModel: """ Calculate the corresponding data to interact with the Environment and \ store it in model states. Args: batch_size: Number of new points to the sampled. model_states: States corresponding to the environment data. env_states: States corresponding to the model data. walkers_states: States corresponding to the walkers data. kwargs: Passed to the :class:`Critic` if any. Returns: Tuple containing a tensor with the sampled actions and the new model states variable. """ if model_states is None or walkers_states is None: return super(CMAES, self).sample( batch_size=batch_size, model_states=model_states, env_states=env_states, walkers_states=walkers_states, **kwargs ) actions = ( env_states.get("observs") if self._count_eval > self.pop_size * 2 else model_states.get("actions") ) fitness = ( walkers_states.get("virtual_rewards") if self.virtual_reward_fitness else walkers_states.get("cum_rewards") ) sorted_fitness = numpy.argsort(fitness)[: self.mu_const] selected_actions = actions[sorted_fitness].T self._update_evolution_paths(selected_actions) self._adapt_covariance_matrix(selected_actions) self._adapt_sigma() self._cov_matrix_diagonalization() actions = self._sample_actions() return self.update_states_with_critic( actions=actions, batch_size=batch_size, model_states=model_states, **kwargs )
def _ray_function(): init_ray() env = ray_function() params = { "actions": { "dtype": numpy.int64 }, "critic": { "dtype": numpy.float32 } } states = StatesModel(state_dict=params, batch_size=N_WALKERS) return env, states
def _parallel_function(): env = parallel_function() params = { "actions": { "dtype": numpy.float32, "size": (2, ) }, "critic": { "dtype": numpy.float32 }, } states = StatesModel(state_dict=params, batch_size=N_WALKERS) return env, states
def __init__(self, n_walkers: int, env_state_params: StateDict, model_state_params: StateDict, reward_scale: float = 1.0, distance_scale: float = 1.0, accumulate_rewards: bool = True, max_epochs: int = None, distance_function: Optional[Callable[ [numpy.ndarray, numpy.ndarray], numpy.ndarray]] = None, ignore_clone: Optional[Dict[str, Set[str]]] = None, **kwargs): """ Initialize a new `Walkers` instance. Args: n_walkers: Number of walkers of the instance. env_state_params: Dictionary to instantiate the States of an :class:`Environment`. model_state_params: Dictionary to instantiate the States of a :class:`Model`. reward_scale: Regulates the importance of the reward. Recommended to \ keep in the [0, 5] range. Higher values correspond to \ higher importance. distance_scale: Regulates the importance of the distance. Recommended to \ keep in the [0, 5] range. Higher values correspond to \ higher importance. accumulate_rewards: If ``True`` the rewards obtained after transitioning \ to a new state will accumulate. If ``False`` only the last \ reward will be taken into account. distance_function: Function to compute the distances between two \ groups of walkers. It will be applied row-wise \ to the walkers observations and it will return a \ vector of scalars. Defaults to l2 norm. ignore_clone: Dictionary containing the attribute values that will \ not be cloned. Its keys can be be either "env", of \ "model", to reference the `env_states` and the \ `model_states`. Its values are a set of strings with \ the names of the attributes that will not be cloned. max_epochs: Maximum number of iterations that the walkers are allowed \ to perform. kwargs: Additional attributes stored in the :class:`StatesWalkers`. """ super(SimpleWalkers, self).__init__( n_walkers=n_walkers, env_state_params=env_state_params, model_state_params=model_state_params, accumulate_rewards=accumulate_rewards, max_epochs=max_epochs, ) def l2_norm(x: numpy.ndarray, y: numpy.ndarray) -> numpy.ndarray: return numpy.linalg.norm(x - y, axis=1) self._model_states = StatesModel(state_dict=model_state_params, batch_size=n_walkers) self._env_states = StatesEnv(state_dict=env_state_params, batch_size=n_walkers) self._states = self.STATE_CLASS(batch_size=n_walkers, **kwargs) self.distance_function = distance_function if distance_function is not None else l2_norm self.reward_scale = reward_scale self.distance_scale = distance_scale self._id_counter = 0 self.ignore_clone = ignore_clone if ignore_clone is not None else {}
class SimpleWalkers(BaseWalkers): """ This class is in charge of performing all the mathematical operations involved in evolving a \ cloud of walkers. """ STATE_CLASS = StatesWalkers def __init__(self, n_walkers: int, env_state_params: StateDict, model_state_params: StateDict, reward_scale: float = 1.0, distance_scale: float = 1.0, accumulate_rewards: bool = True, max_epochs: int = None, distance_function: Optional[Callable[ [numpy.ndarray, numpy.ndarray], numpy.ndarray]] = None, ignore_clone: Optional[Dict[str, Set[str]]] = None, **kwargs): """ Initialize a new `Walkers` instance. Args: n_walkers: Number of walkers of the instance. env_state_params: Dictionary to instantiate the States of an :class:`Environment`. model_state_params: Dictionary to instantiate the States of a :class:`Model`. reward_scale: Regulates the importance of the reward. Recommended to \ keep in the [0, 5] range. Higher values correspond to \ higher importance. distance_scale: Regulates the importance of the distance. Recommended to \ keep in the [0, 5] range. Higher values correspond to \ higher importance. accumulate_rewards: If ``True`` the rewards obtained after transitioning \ to a new state will accumulate. If ``False`` only the last \ reward will be taken into account. distance_function: Function to compute the distances between two \ groups of walkers. It will be applied row-wise \ to the walkers observations and it will return a \ vector of scalars. Defaults to l2 norm. ignore_clone: Dictionary containing the attribute values that will \ not be cloned. Its keys can be be either "env", of \ "model", to reference the `env_states` and the \ `model_states`. Its values are a set of strings with \ the names of the attributes that will not be cloned. max_epochs: Maximum number of iterations that the walkers are allowed \ to perform. kwargs: Additional attributes stored in the :class:`StatesWalkers`. """ super(SimpleWalkers, self).__init__( n_walkers=n_walkers, env_state_params=env_state_params, model_state_params=model_state_params, accumulate_rewards=accumulate_rewards, max_epochs=max_epochs, ) def l2_norm(x: numpy.ndarray, y: numpy.ndarray) -> numpy.ndarray: return numpy.linalg.norm(x - y, axis=1) self._model_states = StatesModel(state_dict=model_state_params, batch_size=n_walkers) self._env_states = StatesEnv(state_dict=env_state_params, batch_size=n_walkers) self._states = self.STATE_CLASS(batch_size=n_walkers, **kwargs) self.distance_function = distance_function if distance_function is not None else l2_norm self.reward_scale = reward_scale self.distance_scale = distance_scale self._id_counter = 0 self.ignore_clone = ignore_clone if ignore_clone is not None else {} def __repr__(self) -> str: """Print all the data involved in the current run of the algorithm.""" with numpy.printoptions(linewidth=100, threshold=200, edgeitems=9): try: text = self._print_stats() text += "Walkers States: {}\n".format( self._repr_state(self._states)) text += "Environment States: {}\n".format( self._repr_state(self._env_states)) text += "Model States: {}\n".format( self._repr_state(self._model_states)) return text except Exception: return super(SimpleWalkers, self).__repr__() def _print_stats(self) -> str: """Print several statistics of the current state of the swarm.""" text = "{} iteration {} Out of bounds walkers: {:.2f}% Cloned: {:.2f}%\n\n".format( self.__class__.__name__, self.epoch, 100 * self.env_states.oobs.sum() / self.n, 100 * self.states.will_clone.sum() / self.n, ) return text def get(self, name: str, default: Any = None) -> Any: """Access attributes of the :class:`Swarm` and its children.""" if hasattr(self.states, name): return getattr(self.states, name) elif hasattr(self.env_states, name): return getattr(self.env_states, name) elif hasattr(self.model_states, name): return getattr(self.model_states, name) elif hasattr(self, name): return getattr(self, name) return default def ids(self) -> List[int]: """ Return a list of unique ids for each walker state. The returned ids are integers representing the hash of the different states. """ return self.env_states.hash_values("states") def update_ids(self): """Update the unique id of each walker and store it in the :class:`StatesWalkers`.""" self.states.update(id_walkers=self.ids().copy()) @property def states(self) -> StatesWalkers: """Return the `StatesWalkers` class that contains the data used by the instance.""" return self._states @property def env_states(self) -> StatesEnv: """Return the `States` class that contains the data used by the :class:`Environment`.""" return self._env_states @property def model_states(self) -> StatesModel: """Return the `States` class that contains the data used by a Model.""" return self._model_states @property def best_state(self) -> numpy.ndarray: """Return the state of the best walker found in the current algorithm run.""" return self.states.best_state @property def best_reward(self) -> Scalar: """Return the reward of the best walker found in the current algorithm run.""" return self.states.best_reward @property def best_id(self) -> int: """ Return the id (hash value of the state) of the best walker found in the \ current algorithm run. """ return self.states.best_id @property def best_obs(self) -> numpy.ndarray: """ Return the observation corresponding to the best walker found in the \ current algorithm run. """ return self.states.best_obs def calculate_end_condition(self) -> bool: """ Process data from the current state to decide if the iteration process should stop. Returns: Boolean indicating if the iteration process should be finished. ``True`` means \ it should be stopped, and ``False`` means it should continue. """ non_terminal_states = numpy.logical_not(self.env_states.terminals) all_non_terminal_out_of_bounds = self.env_states.oobs[ non_terminal_states].all() max_epochs_reached = self.epoch >= self.max_epochs all_in_bounds_are_terminal = self.env_states.terminals[ self.states.in_bounds].all() return max_epochs_reached or all_non_terminal_out_of_bounds or all_in_bounds_are_terminal def calculate_distances(self) -> None: """Calculate the corresponding distance function for each observation with \ respect to another observation chosen at random. The internal :class:`StateWalkers` is updated with the relativized distance values. """ # TODO(guillemdb): Check if self.get_in_bounds_compas() works better. compas_ix = numpy.random.permutation(numpy.arange(self.n)) obs = self.env_states.observs.reshape(self.n, -1) distances = self.distance_function(obs, obs[compas_ix]) distances = relativize(distances.flatten()) self.update_states(distances=distances, compas_dist=compas_ix) def calculate_virtual_reward(self) -> None: """ Calculate the virtual reward and update the internal state. The cumulative_reward is transformed with the relativize function. \ The distances stored in the :class:`StatesWalkers` are already transformed. """ processed_rewards = relativize(self.states.cum_rewards) virt_rw = (processed_rewards**self.reward_scale * self.states.distances**self.distance_scale) self.update_states(virtual_rewards=virt_rw, processed_rewards=processed_rewards) def get_in_bounds_compas(self) -> numpy.ndarray: """ Return the indexes of walkers inside bounds chosen at random. Returns: Numpy array containing the int indexes of in bounds walkers chosen at \ random with replacement. Its length is equal to the number of walkers. """ if not self.states.in_bounds.any( ): # No need to sample if all walkers are dead. return numpy.arange(self.n) alive_indexes = numpy.arange(self.n, dtype=int)[self.states.in_bounds] compas_ix = self.random_state.permutation(alive_indexes) compas = self.random_state.choice(compas_ix, self.n, replace=True) compas[:len(compas_ix)] = compas_ix return compas def update_clone_probs(self) -> None: """ Calculate the new probability of cloning for each walker. Updates the :class:`StatesWalkers` with both the probability of cloning \ and the index of the randomly chosen companions that were selected to \ compare the virtual rewards. """ all_virtual_rewards_are_equal = (self.states.virtual_rewards == self.states.virtual_rewards[0]).all() if all_virtual_rewards_are_equal: clone_probs = numpy.zeros(self.n, dtype=float_type) compas_ix = numpy.arange(self.n) else: compas_ix = self.get_in_bounds_compas() companions = self.states.virtual_rewards[compas_ix] # This value can be negative!! clone_probs = (companions - self.states.virtual_rewards ) / self.states.virtual_rewards self.update_states(clone_probs=clone_probs, compas_clone=compas_ix) def balance(self) -> Tuple[set, set]: """ Perform an iteration of the FractalAI algorithm for balancing the \ walkers distribution. It performs the necessary calculations to determine which walkers will clone, \ and performs the cloning process. Returns: A tuple containing two sets: The first one represent the unique ids \ of the states for each walker at the start of the iteration. The second \ one contains the ids of the states after the cloning process. """ old_ids = set(self.states.id_walkers.copy()) self.states.in_bounds = numpy.logical_not(self.env_states.oobs) self.calculate_distances() self.calculate_virtual_reward() self.update_clone_probs() self.clone_walkers() new_ids = set(self.states.id_walkers.copy()) return old_ids, new_ids def clone_walkers(self) -> None: """ Sample the clone probability distribution and clone the walkers accordingly. This function will update the internal :class:`StatesWalkers`, \ :class:`StatesEnv`, and :class:`StatesModel`. """ will_clone = self.states.clone_probs > self.random_state.random_sample( self.n) will_clone[ self.env_states.oobs] = True # Out of bounds walkers always clone self.update_states(will_clone=will_clone) clone, compas = self.states.clone() self._env_states.clone(will_clone=clone, compas_ix=compas, ignore=self.ignore_clone.get("env")) self._model_states.clone(will_clone=clone, compas_ix=compas, ignore=self.ignore_clone.get("model")) def reset( self, env_states: StatesEnv = None, model_states: StatesModel = None, walkers_states: StatesWalkers = None, ) -> None: """ Restart all the internal states involved in the algorithm iteration. After reset a new run of the algorithm will be ready to be launched. """ if walkers_states is not None: self.states.update(walkers_states) else: self.states.reset() self.update_states(env_states=env_states, model_states=model_states) self._epoch = 0 def update_states(self, env_states: StatesEnv = None, model_states: StatesModel = None, **kwargs): """ Update the States variables that do not contain internal data and \ accumulate the rewards in the internal states if applicable. Args: env_states: States containing the data associated with the Environment. model_states: States containing data associated with the Environment. **kwargs: Internal states will be updated via keyword arguments. """ if kwargs: if kwargs.get("rewards") is not None: self._accumulate_and_update_rewards(kwargs["rewards"]) del kwargs["rewards"] self.states.update(**kwargs) if isinstance(env_states, StatesEnv): self._env_states.update(env_states) if hasattr(env_states, "rewards"): self._accumulate_and_update_rewards(env_states.rewards) if isinstance(model_states, StatesModel): self._model_states.update(model_states) self.update_ids() def _accumulate_and_update_rewards(self, rewards: numpy.ndarray): """ Use as reward either the sum of all the rewards received during the \ current run, or use the last reward value received as reward. Args: rewards: Array containing the last rewards received by every walker. """ if self._accumulate_rewards: if not isinstance(self.states.get("cum_rewards"), numpy.ndarray): cum_rewards = numpy.zeros(self.n) else: cum_rewards = self.states.cum_rewards cum_rewards = cum_rewards + rewards else: cum_rewards = rewards self.update_states(cum_rewards=cum_rewards) @staticmethod def _repr_state(state): string = "\n" for k, v in state.items(): if k in ["observs", "states", "id_walkers", "best_id"]: continue shape = v.shape if hasattr(v, "shape") else None new_str = ( "{}: shape {} Mean: {:.3f}, Std: {:.3f}, Max: {:.3f} Min: {:.3f}\n" .format(k, shape, *statistics_from_array(v)) if isinstance(v, numpy.ndarray) and "best" not in k else ("%s %s\n" % (k, v if not isinstance(v, numpy.ndarray) else v.flatten()))) string += new_str return string def fix_best(self): """Ensure the best state found is assigned to the last walker of the \ swarm, so walkers can always choose to clone to the best state.""" pass
def create_model_states(model: BaseModel, batch_size: int = 10): return StatesModel(batch_size=batch_size, state_dict=model.get_params_dict())
def _custom_domain_function(): env = custom_domain_function() params = {"actions": {"dtype": numpy.float64, "size": (2,)}} states = StatesModel(state_dict=params, batch_size=N_WALKERS) return env, states
def _local_minimizer(): env = local_minimizer() params = {"actions": {"dtype": numpy.float64, "size": (2,)}} states = StatesModel(state_dict=params, batch_size=N_WALKERS) return env, states
def create_model_states(self, model: BaseModel, batch_size: int = None): batch_size = self.BATCH_SIZE if batch_size is None else batch_size return StatesModel(batch_size=batch_size, state_dict=model.get_params_dict())
def create_model_states(self, model, batch_size: int = None): return StatesModel(batch_size=batch_size, state_dict=model.get_params_dict())
def _function(): env = function() params = {"actions": {"dtype": judo.float64, "size": (2, )}} states = StatesModel(state_dict=params, batch_size=N_WALKERS) return env, states