Ejemplo n.º 1
0
    def predict(self, root_env_states: StatesEnv, walkers: StepWalkers,) -> StatesModel:
        """
        Select the ``init_action`` and ``init_dt`` of the best walker found \
        during the internal swarm run.

        Args:
            root_env_states: :env-st:`StatesEnv` class containing the data \
                            corresponding to the root walker of a :class:`StepSwarm`.
            walkers: :walkers:`StepWalkers` used by the internal swarm of a \
                     :class:`StepSwarm`.

        Returns:
            :class:`StatesModel` containing the ``actions`` and ``dt`` that the root walkers
            will use to step the :env:`Environment`.

        """
        init_actions = walkers.states.init_actions.flatten().astype(int)
        best_ix = walkers.get_best_index()
        root_model_states = StatesModel(
            batch_size=1, state_dict={"actions": {"dtype": int}, "dt": {"dtype": int}}
        )
        root_model_states.actions[:] = init_actions[best_ix]
        if hasattr(root_model_states, "dt"):
            target_dt = walkers.states.init_dt.flatten().astype(int)[best_ix]
            root_model_states.dt[:] = target_dt
        return root_model_states
Ejemplo n.º 2
0
 def test_minimizer_step(self):
     minim = local_minimizer()
     params = {"actions": {"dtype": numpy.float64, "size": (2,)}}
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     assert minim.shape == minim.shape
     states = minim.step(model_states=states, env_states=minim.reset(N_WALKERS))
     assert numpy.allclose(states.rewards.min(), 0)
Ejemplo n.º 3
0
    def predict(self, root_env_states: StatesEnv, walkers: StepWalkers,) -> StatesModel:
        """
        Select the most frequent ``init_action`` assigned to the internal swarm's walkers.

        The selected ``dt`` will be equal to the minimum ``init_dts`` among all \
        the walkers that sampled the selected ``init_action``.

        Args:
            root_env_states: :env-st:`StatesEnv` class containing the data \
                            corresponding to the root walker of a :class:`StepSwarm`.
            walkers: :walkers:`StepWalkers` used by the internal warm of a \
                     :class:`StepSwarm`.

        Returns:
            :class:`StatesModel` containing the ``actions`` and ``dt`` that the root walkers
            will use to step the :env:`Environment`.

        """
        init_actions = walkers.states.init_actions.flatten().astype(int)
        y = numpy.bincount(init_actions)
        most_used_action = numpy.nonzero(y)[0][0]
        root_model_states = StatesModel(
            batch_size=1, state_dict={"actions": {"dtype": int}, "dt": {"dtype": int}}
        )
        root_model_states.actions[:] = most_used_action
        if hasattr(root_model_states, "dt"):
            init_dts = walkers.states.init_dts.flatten().astype(int)
            index_dt = init_actions == most_used_action
            target_dt = init_dts[index_dt].min()
            root_model_states.dt[:] = target_dt
        return root_model_states
Ejemplo n.º 4
0
 def test_step(self, dummy_env):
     states = dummy_env.reset()
     actions = StatesModel(actions=numpy.ones((1, 2)) * 2,
                           batch_size=1,
                           dt=numpy.ones((1, 2)))
     new_states: StatesEnv = dummy_env.step(actions, states)
     assert isinstance(new_states, StatesEnv)
     assert new_states.rewards[0].item() == 1
Ejemplo n.º 5
0
 def test_step(self, function_env, batch_size):
     states = function_env.reset(batch_size=batch_size)
     actions = StatesModel(
         actions=judo.zeros(states.observs.shape),
         batch_size=batch_size,
         dt=judo.ones((1, 2)),
     )
     new_states: StatesEnv = function_env.step(actions, states)
     assert isinstance(new_states, StatesEnv)
     assert new_states.oobs[0].item() == 0
Ejemplo n.º 6
0
 def _parallel_function():
     env = parallel_function()
     params = {
         "actions": {
             "dtype": numpy.float32,
             "size": (2, )
         },
         "critic": {
             "dtype": numpy.float32
         },
     }
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     return env, states
Ejemplo n.º 7
0
 def _ray_function():
     init_ray()
     env = ray_function()
     params = {
         "actions": {
             "dtype": numpy.int64
         },
         "critic": {
             "dtype": numpy.float32
         }
     }
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     return env, states
Ejemplo n.º 8
0
 def _classic_control_env():
     env = classic_control_env()
     params = {
         "actions": {
             "dtype": dtype.int64
         },
         "dt": {
             "dtype": dtype.float32
         }
     }
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     states.update(actions=judo.ones(N_WALKERS), dt=judo.ones(N_WALKERS))
     return env, states
Ejemplo n.º 9
0
 def _parallel_environment():
     env = parallel_environment()
     params = {
         "actions": {
             "dtype": numpy.int64
         },
         "critic": {
             "dtype": numpy.float32
         }
     }
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     states.update(actions=numpy.ones(N_WALKERS),
                   critic=numpy.ones(N_WALKERS))
     return env, states
Ejemplo n.º 10
0
 def _atari_env():
     env = discrete_atari_env()
     params = {
         "actions": {
             "dtype": dtype.int64
         },
         "critic": {
             "dtype": dtype.float32
         }
     }
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     states.update(actions=judo.ones(N_WALKERS),
                   critic=judo.ones(N_WALKERS))
     return env, states
Ejemplo n.º 11
0
 def _custom_domain_function():
     env = custom_domain_function()
     params = {"actions": {"dtype": numpy.float64, "size": (2,)}}
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     return env, states
Ejemplo n.º 12
0
 def _local_minimizer():
     env = local_minimizer()
     params = {"actions": {"dtype": numpy.float64, "size": (2,)}}
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     return env, states
Ejemplo n.º 13
0
 def create_model_states(self, model: BaseModel, batch_size: int = None):
     batch_size = self.BATCH_SIZE if batch_size is None else batch_size
     return StatesModel(batch_size=batch_size,
                        state_dict=model.get_params_dict())
Ejemplo n.º 14
0
def create_model_states(model: BaseModel, batch_size: int = 10):
    return StatesModel(batch_size=batch_size,
                       state_dict=model.get_params_dict())
Ejemplo n.º 15
0
 def _function():
     env = function()
     params = {"actions": {"dtype": judo.float64, "size": (2, )}}
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     return env, states
Ejemplo n.º 16
0
    def __init__(self,
                 n_walkers: int,
                 env_state_params: StateDict,
                 model_state_params: StateDict,
                 reward_scale: float = 1.0,
                 distance_scale: float = 1.0,
                 accumulate_rewards: bool = True,
                 max_epochs: int = None,
                 distance_function: Optional[Callable[
                     [numpy.ndarray, numpy.ndarray], numpy.ndarray]] = None,
                 ignore_clone: Optional[Dict[str, Set[str]]] = None,
                 **kwargs):
        """
        Initialize a new `Walkers` instance.

        Args:
            n_walkers: Number of walkers of the instance.
            env_state_params: Dictionary to instantiate the States of an :class:`Environment`.
            model_state_params: Dictionary to instantiate the States of a :class:`Model`.
            reward_scale: Regulates the importance of the reward. Recommended to \
                          keep in the [0, 5] range. Higher values correspond to \
                          higher importance.
            distance_scale: Regulates the importance of the distance. Recommended to \
                            keep in the [0, 5] range. Higher values correspond to \
                            higher importance.
            accumulate_rewards: If ``True`` the rewards obtained after transitioning \
                                to a new state will accumulate. If ``False`` only the last \
                                reward will be taken into account.
            distance_function: Function to compute the distances between two \
                               groups of walkers. It will be applied row-wise \
                               to the walkers observations and it will return a \
                               vector of scalars. Defaults to l2 norm.
            ignore_clone: Dictionary containing the attribute values that will \
                          not be cloned. Its keys can be be either "env", of \
                          "model", to reference the `env_states` and the \
                          `model_states`. Its values are a set of strings with \
                          the names of the attributes that will not be cloned.
            max_epochs: Maximum number of iterations that the walkers are allowed \
                       to perform.
            kwargs: Additional attributes stored in the :class:`StatesWalkers`.

        """
        super(SimpleWalkers, self).__init__(
            n_walkers=n_walkers,
            env_state_params=env_state_params,
            model_state_params=model_state_params,
            accumulate_rewards=accumulate_rewards,
            max_epochs=max_epochs,
        )

        def l2_norm(x: numpy.ndarray, y: numpy.ndarray) -> numpy.ndarray:
            return numpy.linalg.norm(x - y, axis=1)

        self._model_states = StatesModel(state_dict=model_state_params,
                                         batch_size=n_walkers)
        self._env_states = StatesEnv(state_dict=env_state_params,
                                     batch_size=n_walkers)
        self._states = self.STATE_CLASS(batch_size=n_walkers, **kwargs)
        self.distance_function = distance_function if distance_function is not None else l2_norm
        self.reward_scale = reward_scale
        self.distance_scale = distance_scale
        self._id_counter = 0
        self.ignore_clone = ignore_clone if ignore_clone is not None else {}
Ejemplo n.º 17
0
 def create_model_states(self, model, batch_size: int = None):
     return StatesModel(batch_size=batch_size,
                        state_dict=model.get_params_dict())