Beispiel #1
0
 def test_reset_with_root_walker(self, swarm):
     swarm.reset()
     param_dict = swarm.walkers.env_states.get_params_dict()
     obs_dict = param_dict["observs"]
     state_dict = param_dict["states"]
     obs_size = obs_dict.get("size", obs_dict["shape"][1:])
     state_size = state_dict.get("size", state_dict["shape"][1:])
     obs = judo.astype(random_state.random(obs_size), obs_dict["dtype"])
     state = judo.astype(random_state.random(state_size),
                         state_dict["dtype"])
     reward = 160290
     root_walker = OneWalker(observ=obs, reward=reward, state=state)
     swarm.reset(root_walker=root_walker)
     swarm_best_id = swarm.best_id
     root_walker_id = root_walker.id_walkers
     assert (swarm.best_state == state).all()
     assert (swarm.best_obs == obs).all(), (obs, tensor(swarm.best_obs))
     assert swarm.best_reward == reward
     assert (swarm.walkers.env_states.observs == obs).all()
     assert (swarm.walkers.env_states.states == state).all()
     assert (swarm.walkers.env_states.rewards == reward).all()
     if Backend.is_numpy():
         assert (swarm.walkers.states.id_walkers == root_walker.id_walkers
                 ).all()
         assert swarm_best_id == root_walker_id[0]
Beispiel #2
0
def cross_clone(
    host_virtual_rewards: Tensor,
    ext_virtual_rewards: Tensor,
    host_oobs: Tensor = None,
    eps=1e-3,
):
    """Perform a clone operation between two different groups of points."""
    compas_ix = random_state.permutation(judo.arange(len(ext_virtual_rewards)))
    host_vr = judo.astype(host_virtual_rewards.flatten(), dtype=dtype.float32)
    ext_vr = judo.astype(ext_virtual_rewards.flatten(), dtype=dtype.float32)
    clone_probs = (ext_vr[compas_ix] - host_vr) / judo.where(
        ext_vr > eps, ext_vr, tensor(eps, dtype=dtype.float32))
    will_clone = clone_probs.flatten() > random_state.random(len(clone_probs))
    if host_oobs is not None:
        will_clone[host_oobs] = True
    return compas_ix, will_clone
Beispiel #3
0
    def calculate(
        self,
        batch_size: Optional[int] = None,
        model_states: Optional[StatesModel] = None,
        env_states: Optional[StatesEnv] = None,
        walkers_states: Optional[StatesWalkers] = None,
    ) -> States:
        """
        Calculate the target time step values.

        Args:
            batch_size: Number of new points to the sampled.
            model_states: States corresponding to the model data.
            env_states: States corresponding to the environment data.
            walkers_states: States corresponding to the walkers data.

        Returns:
            Array containing the sampled time step values drawn from a gaussian \
            distribution.

        """
        if batch_size is None and env_states is None:
            raise ValueError("env_states and batch_size cannot be both None.")
        batch_size = batch_size or env_states.n
        dt = self.random_state.normal(loc=self.mean_dt,
                                      scale=self.std_dt,
                                      size=batch_size)
        dt = judo.astype(judo.clip(dt, self.min_dt, self.max_dt), self._dtype)
        states = self.states_from_data(batch_size=batch_size,
                                       critic_score=dt,
                                       dt=dt)
        return states
Beispiel #4
0
    def predict(
        self,
        root_env_states: StatesEnv,
        walkers: StepWalkers,
    ) -> StatesModel:
        """
        Select the most frequent ``init_action`` assigned to the internal swarm's walkers.

        The selected ``dt`` will be equal to the minimum ``init_dts`` among all \
        the walkers that sampled the selected ``init_action``.

        Args:
            root_env_states: :env-st:`StatesEnv` class containing the data \
                            corresponding to the root walker of a :class:`StepSwarm`.
            walkers: :walkers:`StepWalkers` used by the internal warm of a \
                     :class:`StepSwarm`.

        Returns:
            :class:`StatesModel` containing the ``actions`` and ``dt`` that the root walkers
            will use to step the :env:`Environment`.

        """
        init_actions = judo.astype(walkers.states.init_actions.flatten(),
                                   judo.int)
        init_actions = judo.to_numpy(init_actions)
        with Backend.use_backend("numpy"):
            y = numpy.bincount(init_actions)
            most_used_action = numpy.nonzero(y)[0][0]
        most_used_action = tensor(most_used_action)
        root_model_states = StatesModel(
            batch_size=1,
            state_dict={
                "actions": {
                    "dtype": judo.int64
                },
                "dt": {
                    "dtype": judo.int64
                }
            },
        )
        root_model_states.actions[:] = most_used_action
        if hasattr(root_model_states, "dt"):
            init_dts = judo.astype(walkers.states.init_dts.flatten(), judo.int)
            index_dt = init_actions == most_used_action
            target_dt = init_dts[index_dt].min()
            root_model_states.dt[:] = target_dt
        return root_model_states
Beispiel #5
0
 def test_alive_compas(self, walkers):
     end_cond = judo.astype(judo.zeros_like(walkers.env_states.oobs),
                            dtype.bool)
     end_cond[3] = True
     walkers.states.in_bounds = end_cond
     compas = walkers.get_in_bounds_compas()
     assert judo.all(
         compas ==
         3), "Type of end_cond: {} end_cond: {}: alive ix: {}".format(
             type(end_cond), end_cond, walkers.states.in_bounds)
     assert len(compas.shape) == 1
Beispiel #6
0
def relativize(x: Tensor) -> Tensor:
    """Normalize the data using a custom smoothing technique."""
    orig = x
    x = judo.astype(x, dtype.float)
    std = x.std()
    if float(std) == 0:
        return judo.ones(len(x), dtype=orig.dtype)
    standard = (x - x.mean()) / std
    with numpy.errstate(invalid="ignore", divide="ignore"):
        res = judo.where(standard > 0.0,
                         judo.log(1.0 + standard) + 1.0, judo.exp(standard))
    return res
Beispiel #7
0
    def clip(self, x: Tensor) -> Tensor:
        """
        Clip the values of the target array to fall inside the bounds (closed interval).

        Args:
            x: Numpy array to be clipped.

        Returns:
            Clipped numpy array with all its values inside the defined bounds.

        """
        return API.clip(judo.astype(x, dtype.float), self.low, self.high)
Beispiel #8
0
    def predict(
        self,
        root_env_states: StatesEnv,
        walkers: StepWalkers,
    ) -> StatesModel:
        """
        Select the ``init_action`` and ``init_dt`` of the best walker found \
        during the internal swarm run.

        Args:
            root_env_states: :env-st:`StatesEnv` class containing the data \
                            corresponding to the root walker of a :class:`StepSwarm`.
            walkers: :walkers:`StepWalkers` used by the internal swarm of a \
                     :class:`StepSwarm`.

        Returns:
            :class:`StatesModel` containing the ``actions`` and ``dt`` that the root walkers
            will use to step the :env:`Environment`.

        """
        init_actions = judo.astype(walkers.states.init_actions.flatten(),
                                   judo.int)
        best_ix = walkers.get_best_index()
        root_model_states = StatesModel(
            batch_size=1,
            state_dict={
                "actions": {
                    "dtype": judo.int64
                },
                "dt": {
                    "dtype": judo.int
                }
            },
        )
        root_model_states.actions[:] = init_actions[best_ix]
        if hasattr(root_model_states, "dt"):
            target_dt = judo.astype(walkers.states.init_dt.flatten(),
                                    judo.int)[best_ix]
            root_model_states.dt[:] = target_dt
        return root_model_states
Beispiel #9
0
 def get_best_index(self) -> int:
     """
     Return the index of the best state present in the :class:`Walkers` \
     that is considered alive (inside the boundary conditions of the problem). \
     If no walker is alive it will return the index of the last walker, which \
     corresponds with the best state found.
     """
     rewards = self.states.cum_rewards[self.states.in_bounds]
     if len(rewards) == 0:
         return self.n - 1
     best = rewards.min() if self.minimize else rewards.max()
     idx = judo.astype(self.states.cum_rewards == best, dtype.int)
     ix = idx.argmax()
     return int(ix)
Beispiel #10
0
    def sample_bounds(self, batch_size: int) -> typing.Tensor:
        """
        Return a matrix of points sampled uniformly from the :class:`Function` \
        domain.

        Args:
            batch_size: Number of points that will be sampled.

        Returns:
            Array containing ``batch_size`` points that lie inside the \
            :class:`Function` domain, stacked across the first dimension.

        """
        new_points = judo.zeros(tuple([batch_size]) + self.shape,
                                dtype=judo.float32)
        for i in range(batch_size):
            values = self.random_state.uniform(
                low=judo.astype(self.bounds.low, judo.float),
                high=judo.astype(self.bounds.high, judo.float32),
            )
            values = judo.astype(values, self.bounds.low.dtype)
            new_points[i, :] = values

        return new_points
Beispiel #11
0
    def points_in_bounds(self, x: Tensor) -> Union[Tensor, bool]:
        """
        Check if the rows of the target array have all their coordinates inside \
        specified bounds.

        If the array is one dimensional it will return a boolean, otherwise a vector of booleans.

        Args:
            x: Array to be checked against the bounds.

        Returns:
            Numpy array of booleans indicating if a row lies inside the bounds.

        """
        match = self.clip(x) == judo.astype(x, dtype.float)
        return match.all(1).flatten() if len(match.shape) > 1 else match.all()
Beispiel #12
0
    def test_sample_with_critic(self, n_actions):
        model = DiscreteUniform(n_actions=n_actions, critic=DummyCritic())
        model_states = model.predict(batch_size=1000)
        actions = model_states.actions
        assert len(actions.shape) == 1
        assert len(judo.unique(actions)) <= n_actions
        assert all(actions >= 0)
        assert all(actions <= n_actions)
        assert "critic_score" in model_states.keys()
        assert (model_states.critic_score == 5).all()

        states = create_model_states(batch_size=100, model=model)
        model_states = model.sample(batch_size=states.n, model_states=states)
        actions = model_states.actions
        assert len(actions.shape) == 1
        assert len(judo.unique(actions)) <= n_actions
        assert all(actions >= 0)
        assert all(actions <= n_actions)
        assert judo.allclose(actions, judo.astype(actions, dtype.int))
        assert "critic_score" in model_states.keys()
        assert (model_states.critic_score == 5).all()
Beispiel #13
0
    def get_scaled_intervals(
        low: Union[Tensor, float, int],
        high: Union[Tensor, float, int],
        scale: float,
    ) -> Tuple[Union[Tensor, float], Union[Tensor, float]]:
        """
        Scale the high and low vectors by an scale factor.

        The value of the high and low will be proportional to the maximum and minimum values of \
        the array. Scale defines the proportion to make the bounds bigger and smaller. For \
        example, if scale is 1.1 the higher bound will be 10% higher, and the lower bounds 10% \
        smaller. If scale is 0.9 the higher bound will be 10% lower, and the lower bound 10% \
        higher. If scale is one, `high` and `low` will be equal to the maximum and minimum values \
        of the array.

        Args:
            high: Higher bound to be scaled.
            low: Lower bound to be scaled.
            scale: Value representing the tolerance in percentage from the current maximum and \
            minimum values of the array.

        Returns:
            :class:`Bounds` instance.

        """
        pct = tensor(scale - 1)
        big_scale = 1 + API.abs(pct)
        small_scale = 1 - API.abs(pct)
        zero = judo.astype(tensor(0.0), low.dtype)
        if pct > 0:
            xmin_scaled = API.where(low < zero, low * big_scale,
                                    low * small_scale)
            xmax_scaled = API.where(high < zero, high * small_scale,
                                    high * big_scale)
        else:
            xmin_scaled = API.where(low < zero, low * small_scale,
                                    low * small_scale)
            xmax_scaled = API.where(high < zero, high * big_scale,
                                    high * small_scale)
        return xmin_scaled, xmax_scaled
Beispiel #14
0
    def test_get_best_index(self, walkers):
        # Rewards = [1,1,...] InBounds = [0,0,...]
        walkers.states.update(cum_rewards=judo.ones(walkers.n),
                              in_bounds=judo.zeros(walkers.n,
                                                   dtype=dtype.bool))
        best_idx = walkers.get_best_index()
        # If there are no in_bound rewards, the last walker is returned
        assert best_idx == walkers.n - 1

        # Some OOB rewards
        #
        # Rewards = [0,1,0,...] InBounds = [0,1,...]
        oobs_best_idx = 1
        oobs_rewards = judo.zeros(walkers.n)
        oobs_rewards[oobs_best_idx] = 1
        some_oobs = judo.zeros(walkers.n)
        some_oobs[oobs_best_idx] = 1
        walkers.states.update(cum_rewards=oobs_rewards,
                              in_bounds=judo.astype(some_oobs, dtype.bool))
        best_idx = walkers.get_best_index()
        assert best_idx == oobs_best_idx

        # If the walkers are minimizing, set all but one reward to 1.0
        # If the walkers are maximizing, set all but one reward to 0.0
        positive_val = 0.0 if walkers.minimize else 1.0
        negative_val = 1.0 if walkers.minimize else 0.0
        # Rewards = [-,+,-,-,-,...] InBounds = [1,...]
        mixed_rewards = judo.full((walkers.n, ),
                                  fill_value=negative_val,
                                  dtype=dtype.float)
        mixed_best = 1  # could be any index
        mixed_rewards[mixed_best] = positive_val
        walkers.states.update(cum_rewards=mixed_rewards,
                              in_bounds=judo.ones(walkers.n, dtype=dtype.bool))
        best_idx = walkers.get_best_index()
        assert best_idx == mixed_best
Beispiel #15
0
    def __init__(
        self,
        high: Union[Tensor, Scalar] = numpy.inf,
        low: Union[Tensor, Scalar] = numpy.NINF,
        shape: Optional[tuple] = None,
        dtype: Optional[type] = None,
    ):
        """
        Initialize a :class:`Bounds`.

        Args:
            high: Higher value for the bound interval. If it is an typing_.Scalar \
                  it will be applied to all the coordinates of a target vector. \
                  If it is a vector, the bounds will be checked coordinate-wise. \
                  It defines and closed interval.
            low: Lower value for the bound interval. If it is a typing_.Scalar it \
                 will be applied to all the coordinates of a target vector. \
                 If it is a vector, the bounds will be checked coordinate-wise. \
                 It defines and closed interval.
            shape: Shape of the array that will be bounded. Only needed if `high` and `low` are \
                   vectors and it is used to define the dimensions that will be bounded.
            dtype:  Data type of the array that will be bounded. It can be inferred from `high` \
                    or `low` (the type of `high` takes priority).

        Examples:
            Initializing :class:`Bounds` using  numpy arrays:

            >>> import numpy
            >>> high, low = numpy.ones(3, dtype=float), -1 * numpy.ones(3, dtype=int)
            >>> bounds = Bounds(high=high, low=low)
            >>> print(bounds)
            Bounds shape float64 dtype (3,) low [-1 -1 -1] high [1. 1. 1.]

            Initializing :class:`Bounds` using  typing_.Scalars:

            >>> import numpy
            >>> high, low, shape = 4, 2.1, (5,)
            >>> bounds = Bounds(high=high, low=low, shape=shape)
            >>> print(bounds)
            Bounds shape float64 dtype (5,) low [2.1 2.1 2.1 2.1 2.1] high [4. 4. 4. 4. 4.]

        """
        # Infer shape if not specified
        if shape is None and hasattr(high, "shape"):
            shape = high.shape
        elif shape is None and hasattr(low, "shape"):
            shape = low.shape
        elif shape is None:
            raise TypeError(
                "If shape is None high or low need to have .shape attribute.")
        # High and low will be arrays of target shape
        if not judo.is_tensor(high):
            high = tensor(high) if isinstance(
                high, _Iterable) else API.ones(shape) * high
        if not judo.is_tensor(low):
            low = tensor(low) if isinstance(
                low, _Iterable) else API.ones(shape) * low
        self.high = judo.astype(high, dtype)
        self.low = judo.astype(low, dtype)
        if dtype is not None:
            self.dtype = dtype
        elif hasattr(high, "dtype"):
            self.dtype = high.dtype
        elif hasattr(low, "dtype"):
            self.dtype = low.dtype
        else:
            self.dtype = type(high) if high is not None else type(low)