Esempi in Python per StatesModel.update, esempi in Python per fragile.core.states.StatesModel.update

Esempio n. 1

0

Mostra file

File: swarm.py Progetto: vmarkovtsev/fragile

    def reset(
        self,
        walkers_states: StatesWalkers = None,
        model_states: StatesModel = None,
        env_states: StatesEnv = None,
    ):
        """
        Reset the :class:`fragile.Walkers`, the :class:`Environment`, the \
        :class:`Model` and clear the internal data to start a new search process.

        Args:
            model_states: :class:`StatesModel` that define the initial state of \
                          the :class:`Model`.
            env_states: :class:`StatesEnv` that define the initial state of \
                        the :class:`Environment`.
            walkers_states: :class:`StatesWalkers` that define the internal \
                            states of the :class:`Walkers`.
        """
        env_sates = self.env.reset(
            batch_size=self.walkers.n) if env_states is None else env_states
        model_states = (self.model.reset(batch_size=self.walkers.n,
                                         env_states=env_states)
                        if model_states is None else model_states)
        model_states.update(init_actions=model_states.actions)
        self.walkers.reset(env_states=env_sates, model_states=model_states)
        if self._use_tree:
            root_ids = numpy.array([self.tree.ROOT_HASH] * self.walkers.n)
            self.walkers.states.id_walkers = root_ids
            self.tree.reset(
                env_states=self.walkers.env_states,
                model_states=self.walkers.model_states,
                walkers_states=walkers_states,
            )
            self.update_tree(root_ids.tolist())

Esempio n. 2

0

Mostra file

File: models.py Progetto: justindujardin/fragile

    def update_states_with_critic(
        self, actions: numpy.ndarray, batch_size: int, model_states: StatesModel, **kwargs
    ) -> StatesModel:
        """
        Compute the time steps generated by the critic and add them to \
        `model_states`. If there is no Critic the default value of dt will be a \
        vector of 1.

        Args:
            actions: Numpy array representing the actions calculated by the model.
            batch_size: Same batch size used when calling `sample`.
            model_states: Same model_states used when calling `sample`.
            **kwargs: Kwargs for `critic.calculate`.

        Returns:
            model_states updated with the actions and the dt calculated by the Critic.

        """
        if self.critic is not None:
            critic_states = self.critic.calculate(
                batch_size=batch_size, model_states=model_states, **kwargs
            )

            dt = (
                critic_states.critic_score.astype(int)
                if isinstance(critic_states.critic_score, numpy.ndarray)
                else critic_states.critic_score
            )
            model_states.update(actions=actions, other=critic_states, dt=dt)
        else:
            dt = numpy.ones(batch_size, dtype=int)
            model_states.update(actions=actions, critic_score=dt, dt=dt)
        return model_states

Esempio n. 3

0

Mostra file

File: models.py Progetto: justindujardin/fragile

    def update_states_with_critic(
        self, actions: numpy.ndarray, batch_size: int, model_states: StatesModel, **kwargs
    ) -> StatesModel:
        """
        Compute the time steps generated by the critic and add them to \
        `model_states`. If there is no Critic the default value of dt will be a \
        vector of 1.

        Args:
            actions: Numpy array representing the actions calculated by the model.
            batch_size: Same batch size used when calling `sample`.
            model_states: Same model_states used when calling `sample`.
            **kwargs: Kwargs for `critic.calculate`.

        Returns:
            model_states updated with the actions and the dt calculated by the Critic.

        """
        if self.critic is None:
            model_states.update(actions=actions)
        else:
            critic_state = self.critic.calculate(
                batch_size=batch_size, model_states=model_states, **kwargs
            )
            model_states.update(other=critic_state, actions=actions)
        return model_states

Esempio n. 4

0

Mostra file

File: models.py Progetto: softmaxhuanchen/fragile

    def reset(self,
              batch_size: int = 1,
              model_states: StatesModel = None,
              env_states: StatesEnv = None,
              *args,
              **kwargs) -> StatesModel:
        """
        Return a new blank State for a `DiscreteUniform` instance, and a valid \
        prediction based on that new state.

        Args:
            batch_size: Number of walkers that the new model `State`.
            model_states: :class:`StatesModel` corresponding to the model data.
            env_states: :class:`StatesEnv` containing the environment data.
            *args: Passed to `predict`.
            **kwargs: Passed to `predict`.

        Returns:
            New model states containing sampled data.

        """
        self.pop_size = batch_size
        self._count_eval = 0
        self._init_algorithm_params(batch_size)
        # Take the first sample from a random uniform distribution
        if batch_size is None and env_states is None:
            raise ValueError("env_states and batch_size cannot be both None.")
        batch_size = batch_size or env_states.n
        model_states = model_states or self.create_new_states(
            batch_size=batch_size)
        init_actions = self.random_state.randn(self.mu_const)
        self.x_mean = numpy.matmul(init_actions.T, self.weights_const)
        actions = self._sample_actions()
        model_states.update(actions=actions)
        return model_states

Esempio n. 5

0

Mostra file

File: swarm.py Progetto: Zeta36/fragile

    def reset(
        self,
        root_walker: OneWalker = None,
        walkers_states: StatesWalkers = None,
        model_states: StatesModel = None,
        env_states: StatesEnv = None,
    ):
        """
        Reset the :class:`fragile.Walkers`, the :class:`Environment`, the \
        :class:`Model` and clear the internal data to start a new search process.

        Args:
            root_walker: Walker representing the initial state of the search. \
                         The walkers will be reset to this walker, and it will \
                         be added to the root of the :class:`StateTree` if any.
            model_states: :class:`StatesModel` that define the initial state of \
                          the :class:`Model`.
            env_states: :class:`StatesEnv` that define the initial state of \
                        the :class:`Environment`.
            walkers_states: :class:`StatesWalkers` that define the internal \
                            states of the :class:`Walkers`.

        """
        self._epoch = 0
        env_states = (
            self.env.reset(batch_size=self.walkers.n) if env_states is None else env_states
        )
        # Add corresponding root_walkers data to env_states
        if root_walker is not None:
            if not isinstance(root_walker, OneWalker):
                raise ValueError(
                    "Root walker needs to be an "
                    "instance of OneWalker, got %s instead." % type(root_walker)
                )
            env_states = self._update_env_with_root(root_walker=root_walker, env_states=env_states)

        model_states = (
            self.model.reset(batch_size=self.walkers.n, env_states=env_states)
            if model_states is None
            else model_states
        )
        model_states.update(init_actions=model_states.actions)
        self.walkers.reset(env_states=env_states, model_states=model_states)
        if self._use_tree:
            if root_walker is not None:
                self.tree.reset(root_hash=int(root_walker.id_walkers))
            root_ids = numpy.array([self.tree.root_hash] * self.walkers.n)
            self.tree.reset(
                root_hash=int(self.tree.root_hash),
                env_states=self.walkers.env_states,
                model_states=self.walkers.model_states,
                walkers_states=walkers_states,
            )
            ids: List[int] = root_ids.tolist()
            self.update_tree(states_ids=ids)

Esempio n. 6

0

Mostra file

File: swarm.py Progetto: justindujardin/fragile

    async def reset(
        self,
        root_walker: OneWalker = None,
        walkers_states: StatesWalkers = None,
        model_states: StatesModel = None,
        env_states: StatesEnv = None,
    ):
        """
        Reset the :class:`fragile.Walkers`, the :class:`Environment`, the \
        :class:`Model` and clear the internal data to start a new search process.

        Args:
            root_walker: Walker representing the initial state of the search. \
                         The walkers will be reset to this walker, and it will \
                         be added to the root of the :class:`StateTree` if any.
            model_states: :class:`StatesModel` that define the initial state of \
                          the :class:`Model`.
            env_states: :class:`StatesEnv` that define the initial state of \
                        the :class:`Environment`.
            walkers_states: :class:`StatesWalkers` that define the internal \
                            states of the :class:`Walkers`.

        """
        self._epoch = 0
        n_walkers = self.walkers.get("n_walkers")
        reset_id = (self.env.reset.remote(
            batch_size=n_walkers) if env_states is None else env_states)
        env_states = await reset_id
        # Add corresponding root_walkers data to env_states
        if root_walker is not None:
            if not isinstance(root_walker, OneWalker):
                raise ValueError("Root walker needs to be an "
                                 "instance of OneWalker, got %s instead." %
                                 type(root_walker))
            env_states = self._update_env_with_root(root_walker=root_walker,
                                                    env_states=env_states)

        model_states = (self.model.reset(batch_size=n_walkers,
                                         env_states=env_states)
                        if model_states is None else model_states)
        model_states.update(init_actions=model_states.actions)
        self.walkers.reset(env_states=env_states, model_states=model_states)
        if self.tree is not None:
            id_walkers = self.walkers.get("id_walkers")
            root_id = id_walkers[0] if root_walker is None else copy.copy(
                root_walker.id_walkers)
            self.tree.reset(
                root_id=root_id,
                env_states=self.walkers.env_states,
                model_states=self.walkers.model_states,
                walkers_states=self.walkers.states,
            )

Esempio n. 7

0

Mostra file

File: test_env.py Progetto: Guillemdb/fragile

 def _classic_control_env():
     env = classic_control_env()
     params = {
         "actions": {
             "dtype": dtype.int64
         },
         "dt": {
             "dtype": dtype.float32
         }
     }
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     states.update(actions=judo.ones(N_WALKERS), dt=judo.ones(N_WALKERS))
     return env, states

Esempio n. 8

0

Mostra file

 def _parallel_environment():
     env = parallel_environment()
     params = {
         "actions": {
             "dtype": numpy.int64
         },
         "critic": {
             "dtype": numpy.float32
         }
     }
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     states.update(actions=numpy.ones(N_WALKERS),
                   critic=numpy.ones(N_WALKERS))
     return env, states

Esempio n. 9

0

Mostra file

File: test_env.py Progetto: Guillemdb/fragile

 def _atari_env():
     env = discrete_atari_env()
     params = {
         "actions": {
             "dtype": dtype.int64
         },
         "critic": {
             "dtype": dtype.float32
         }
     }
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     states.update(actions=judo.ones(N_WALKERS),
                   critic=judo.ones(N_WALKERS))
     return env, states

Esempio n. 10

0

Mostra file

class SimpleWalkers(BaseWalkers):
    """
    This class is in charge of performing all the mathematical operations involved in evolving a \
    cloud of walkers.

    """

    STATE_CLASS = StatesWalkers

    def __init__(self,
                 n_walkers: int,
                 env_state_params: StateDict,
                 model_state_params: StateDict,
                 reward_scale: float = 1.0,
                 distance_scale: float = 1.0,
                 accumulate_rewards: bool = True,
                 max_epochs: int = None,
                 distance_function: Optional[Callable[
                     [numpy.ndarray, numpy.ndarray], numpy.ndarray]] = None,
                 ignore_clone: Optional[Dict[str, Set[str]]] = None,
                 **kwargs):
        """
        Initialize a new `Walkers` instance.

        Args:
            n_walkers: Number of walkers of the instance.
            env_state_params: Dictionary to instantiate the States of an :class:`Environment`.
            model_state_params: Dictionary to instantiate the States of a :class:`Model`.
            reward_scale: Regulates the importance of the reward. Recommended to \
                          keep in the [0, 5] range. Higher values correspond to \
                          higher importance.
            distance_scale: Regulates the importance of the distance. Recommended to \
                            keep in the [0, 5] range. Higher values correspond to \
                            higher importance.
            accumulate_rewards: If ``True`` the rewards obtained after transitioning \
                                to a new state will accumulate. If ``False`` only the last \
                                reward will be taken into account.
            distance_function: Function to compute the distances between two \
                               groups of walkers. It will be applied row-wise \
                               to the walkers observations and it will return a \
                               vector of scalars. Defaults to l2 norm.
            ignore_clone: Dictionary containing the attribute values that will \
                          not be cloned. Its keys can be be either "env", of \
                          "model", to reference the `env_states` and the \
                          `model_states`. Its values are a set of strings with \
                          the names of the attributes that will not be cloned.
            max_epochs: Maximum number of iterations that the walkers are allowed \
                       to perform.
            kwargs: Additional attributes stored in the :class:`StatesWalkers`.

        """
        super(SimpleWalkers, self).__init__(
            n_walkers=n_walkers,
            env_state_params=env_state_params,
            model_state_params=model_state_params,
            accumulate_rewards=accumulate_rewards,
            max_epochs=max_epochs,
        )

        def l2_norm(x: numpy.ndarray, y: numpy.ndarray) -> numpy.ndarray:
            return numpy.linalg.norm(x - y, axis=1)

        self._model_states = StatesModel(state_dict=model_state_params,
                                         batch_size=n_walkers)
        self._env_states = StatesEnv(state_dict=env_state_params,
                                     batch_size=n_walkers)
        self._states = self.STATE_CLASS(batch_size=n_walkers, **kwargs)
        self.distance_function = distance_function if distance_function is not None else l2_norm
        self.reward_scale = reward_scale
        self.distance_scale = distance_scale
        self._id_counter = 0
        self.ignore_clone = ignore_clone if ignore_clone is not None else {}

    def __repr__(self) -> str:
        """Print all the data involved in the current run of the algorithm."""
        with numpy.printoptions(linewidth=100, threshold=200, edgeitems=9):
            try:
                text = self._print_stats()
                text += "Walkers States: {}\n".format(
                    self._repr_state(self._states))
                text += "Environment States: {}\n".format(
                    self._repr_state(self._env_states))
                text += "Model States: {}\n".format(
                    self._repr_state(self._model_states))
                return text
            except Exception:
                return super(SimpleWalkers, self).__repr__()

    def _print_stats(self) -> str:
        """Print several statistics of the current state of the swarm."""
        text = "{} iteration {} Out of bounds walkers: {:.2f}% Cloned: {:.2f}%\n\n".format(
            self.__class__.__name__,
            self.epoch,
            100 * self.env_states.oobs.sum() / self.n,
            100 * self.states.will_clone.sum() / self.n,
        )
        return text

    def get(self, name: str, default: Any = None) -> Any:
        """Access attributes of the :class:`Swarm` and its children."""
        if hasattr(self.states, name):
            return getattr(self.states, name)
        elif hasattr(self.env_states, name):
            return getattr(self.env_states, name)
        elif hasattr(self.model_states, name):
            return getattr(self.model_states, name)
        elif hasattr(self, name):
            return getattr(self, name)
        return default

    def ids(self) -> List[int]:
        """
        Return a list of unique ids for each walker state.

        The returned ids are integers representing the hash of the different states.
        """
        return self.env_states.hash_values("states")

    def update_ids(self):
        """Update the unique id of each walker and store it in the :class:`StatesWalkers`."""
        self.states.update(id_walkers=self.ids().copy())

    @property
    def states(self) -> StatesWalkers:
        """Return the `StatesWalkers` class that contains the data used by the instance."""
        return self._states

    @property
    def env_states(self) -> StatesEnv:
        """Return the `States` class that contains the data used by the :class:`Environment`."""
        return self._env_states

    @property
    def model_states(self) -> StatesModel:
        """Return the `States` class that contains the data used by a Model."""
        return self._model_states

    @property
    def best_state(self) -> numpy.ndarray:
        """Return the state of the best walker found in the current algorithm run."""
        return self.states.best_state

    @property
    def best_reward(self) -> Scalar:
        """Return the reward of the best walker found in the current algorithm run."""
        return self.states.best_reward

    @property
    def best_id(self) -> int:
        """
        Return the id (hash value of the state) of the best walker found in the \
        current algorithm run.
        """
        return self.states.best_id

    @property
    def best_obs(self) -> numpy.ndarray:
        """
        Return the observation corresponding to the best walker found in the \
        current algorithm run.
        """
        return self.states.best_obs

    def calculate_end_condition(self) -> bool:
        """
        Process data from the current state to decide if the iteration process should stop.

        Returns:
            Boolean indicating if the iteration process should be finished. ``True`` means \
            it should be stopped, and ``False`` means it should continue.

        """
        non_terminal_states = numpy.logical_not(self.env_states.terminals)
        all_non_terminal_out_of_bounds = self.env_states.oobs[
            non_terminal_states].all()
        max_epochs_reached = self.epoch >= self.max_epochs
        all_in_bounds_are_terminal = self.env_states.terminals[
            self.states.in_bounds].all()
        return max_epochs_reached or all_non_terminal_out_of_bounds or all_in_bounds_are_terminal

    def calculate_distances(self) -> None:
        """Calculate the corresponding distance function for each observation with \
        respect to another observation chosen at random.

        The internal :class:`StateWalkers` is updated with the relativized distance values.
        """
        # TODO(guillemdb): Check if self.get_in_bounds_compas() works better.
        compas_ix = numpy.random.permutation(numpy.arange(self.n))
        obs = self.env_states.observs.reshape(self.n, -1)
        distances = self.distance_function(obs, obs[compas_ix])
        distances = relativize(distances.flatten())
        self.update_states(distances=distances, compas_dist=compas_ix)

    def calculate_virtual_reward(self) -> None:
        """
        Calculate the virtual reward and update the internal state.

        The cumulative_reward is transformed with the relativize function. \
        The distances stored in the :class:`StatesWalkers` are already transformed.
        """
        processed_rewards = relativize(self.states.cum_rewards)
        virt_rw = (processed_rewards**self.reward_scale *
                   self.states.distances**self.distance_scale)
        self.update_states(virtual_rewards=virt_rw,
                           processed_rewards=processed_rewards)

    def get_in_bounds_compas(self) -> numpy.ndarray:
        """
        Return the indexes of walkers inside bounds chosen at random.

        Returns:
            Numpy array containing the int indexes of in bounds walkers chosen at \
            random with replacement. Its length is equal to the number of walkers.

        """
        if not self.states.in_bounds.any(
        ):  # No need to sample if all walkers are dead.
            return numpy.arange(self.n)
        alive_indexes = numpy.arange(self.n, dtype=int)[self.states.in_bounds]
        compas_ix = self.random_state.permutation(alive_indexes)
        compas = self.random_state.choice(compas_ix, self.n, replace=True)
        compas[:len(compas_ix)] = compas_ix
        return compas

    def update_clone_probs(self) -> None:
        """
        Calculate the new probability of cloning for each walker.

        Updates the :class:`StatesWalkers` with both the probability of cloning \
        and the index of the randomly chosen companions that were selected to \
        compare the virtual rewards.
        """
        all_virtual_rewards_are_equal = (self.states.virtual_rewards ==
                                         self.states.virtual_rewards[0]).all()
        if all_virtual_rewards_are_equal:
            clone_probs = numpy.zeros(self.n, dtype=float_type)
            compas_ix = numpy.arange(self.n)
        else:
            compas_ix = self.get_in_bounds_compas()
            companions = self.states.virtual_rewards[compas_ix]
            # This value can be negative!!
            clone_probs = (companions - self.states.virtual_rewards
                           ) / self.states.virtual_rewards
        self.update_states(clone_probs=clone_probs, compas_clone=compas_ix)

    def balance(self) -> Tuple[set, set]:
        """
        Perform an iteration of the FractalAI algorithm for balancing the \
        walkers distribution.

        It performs the necessary calculations to determine which walkers will clone, \
        and performs the cloning process.

        Returns:
            A tuple containing two sets: The first one represent the unique ids \
            of the states for each walker at the start of the iteration. The second \
            one contains the ids of the states after the cloning process.

        """
        old_ids = set(self.states.id_walkers.copy())
        self.states.in_bounds = numpy.logical_not(self.env_states.oobs)
        self.calculate_distances()
        self.calculate_virtual_reward()
        self.update_clone_probs()
        self.clone_walkers()
        new_ids = set(self.states.id_walkers.copy())
        return old_ids, new_ids

    def clone_walkers(self) -> None:
        """
        Sample the clone probability distribution and clone the walkers accordingly.

        This function will update the internal :class:`StatesWalkers`, \
        :class:`StatesEnv`, and :class:`StatesModel`.
        """
        will_clone = self.states.clone_probs > self.random_state.random_sample(
            self.n)
        will_clone[
            self.env_states.oobs] = True  # Out of bounds walkers always clone
        self.update_states(will_clone=will_clone)
        clone, compas = self.states.clone()
        self._env_states.clone(will_clone=clone,
                               compas_ix=compas,
                               ignore=self.ignore_clone.get("env"))
        self._model_states.clone(will_clone=clone,
                                 compas_ix=compas,
                                 ignore=self.ignore_clone.get("model"))

    def reset(
        self,
        env_states: StatesEnv = None,
        model_states: StatesModel = None,
        walkers_states: StatesWalkers = None,
    ) -> None:
        """
        Restart all the internal states involved in the algorithm iteration.

        After reset a new run of the algorithm will be ready to be launched.
        """
        if walkers_states is not None:
            self.states.update(walkers_states)
        else:
            self.states.reset()
        self.update_states(env_states=env_states, model_states=model_states)
        self._epoch = 0

    def update_states(self,
                      env_states: StatesEnv = None,
                      model_states: StatesModel = None,
                      **kwargs):
        """
        Update the States variables that do not contain internal data and \
        accumulate the rewards in the internal states if applicable.

        Args:
            env_states: States containing the data associated with the Environment.
            model_states: States containing data associated with the Environment.
            **kwargs: Internal states will be updated via keyword arguments.

        """
        if kwargs:
            if kwargs.get("rewards") is not None:
                self._accumulate_and_update_rewards(kwargs["rewards"])
                del kwargs["rewards"]
            self.states.update(**kwargs)
        if isinstance(env_states, StatesEnv):
            self._env_states.update(env_states)
            if hasattr(env_states, "rewards"):
                self._accumulate_and_update_rewards(env_states.rewards)
        if isinstance(model_states, StatesModel):
            self._model_states.update(model_states)
        self.update_ids()

    def _accumulate_and_update_rewards(self, rewards: numpy.ndarray):
        """
        Use as reward either the sum of all the rewards received during the \
        current run, or use the last reward value received as reward.

        Args:
            rewards: Array containing the last rewards received by every walker.
        """
        if self._accumulate_rewards:
            if not isinstance(self.states.get("cum_rewards"), numpy.ndarray):
                cum_rewards = numpy.zeros(self.n)
            else:
                cum_rewards = self.states.cum_rewards
            cum_rewards = cum_rewards + rewards
        else:
            cum_rewards = rewards
        self.update_states(cum_rewards=cum_rewards)

    @staticmethod
    def _repr_state(state):
        string = "\n"
        for k, v in state.items():
            if k in ["observs", "states", "id_walkers", "best_id"]:
                continue
            shape = v.shape if hasattr(v, "shape") else None
            new_str = (
                "{}: shape {} Mean: {:.3f}, Std: {:.3f}, Max: {:.3f} Min: {:.3f}\n"
                .format(k, shape, *statistics_from_array(v))
                if isinstance(v, numpy.ndarray) and "best" not in k else
                ("%s %s\n" %
                 (k, v if not isinstance(v, numpy.ndarray) else v.flatten())))
            string += new_str
        return string

    def fix_best(self):
        """Ensure the best state found is assigned to the last walker of the \
        swarm, so walkers can always choose to clone to the best state."""
        pass