Example #1
0
 def test_states_from_data(self, env_data, batch_size, states_dim):
     env, model_states = env_data
     states = judo.zeros((batch_size, states_dim))
     observs = judo.ones((batch_size, states_dim))
     rewards = judo.arange(batch_size)
     oobs = judo.zeros(batch_size, dtype=dtype.bool)
     state = env.states_from_data(batch_size=batch_size,
                                  states=states,
                                  observs=observs,
                                  rewards=rewards,
                                  oobs=oobs)
     assert isinstance(state, StatesEnv)
     for val in state.vals():
         assert dtype.is_tensor(val)
         assert len(val) == batch_size
Example #2
0
    def params_to_arrays(param_dict: StateDict, n_walkers: int) -> Dict[str, Tensor]:
        """
        Create a dictionary containing the arrays specified by param_dict.

        Args:
            param_dict: Dictionary defining the attributes of the tensors.
            n_walkers: Number items in the first dimension of the data tensors.

        Returns:
              Dictionary with the same keys as param_dict, containing arrays specified \
              by `param_dict` values.

        """
        tensor_dict = {}
        for key, val in param_dict.items():
            # Shape already includes the number of walkers. Remove walkers axis to create size.
            shape = val.get("shape")
            if shape is None:
                val_size = val.get("size")
            elif len(shape) > 1:
                val_size = shape[1:]
            else:
                val_size = val.get("size")
            # Create appropriate shapes with current state's number of walkers.
            sizes = n_walkers if val_size is None else tuple([n_walkers]) + val_size
            if "size" in val:
                del val["size"]
            if "shape" in val:
                del val["shape"]
            tensor_dict[key] = judo.zeros(sizes, **val)
        return tensor_dict
Example #3
0
    def reset(self, batch_size: int = 1, **kwargs) -> StatesEnv:
        """
        Reset the :class:`Function` to the start of a new episode and returns \
        an :class:`StatesEnv` instance describing its internal state.

        Args:
            batch_size: Number of walkers that the returned state will have.
            **kwargs: Ignored. This environment resets without using any external data.

        Returns:
            :class:`EnvStates` instance describing the state of the :class:`Function`. \
            The first dimension of the data tensors (number of walkers) will be \
            equal to batch_size.

        """
        oobs = judo.zeros(batch_size, dtype=judo.bool)
        new_points = self.sample_bounds(batch_size=batch_size)
        rewards = self.function(new_points).flatten()
        new_states = self.states_from_data(
            states=new_points,
            observs=new_points,
            rewards=rewards,
            oobs=oobs,
            batch_size=batch_size,
        )
        return new_states
Example #4
0
    def minimize_batch(
            self, x: typing.Tensor) -> Tuple[typing.Tensor, typing.Tensor]:
        """
        Minimize a batch of points.

        Args:
            x: Array representing a batch of points to be optimized, stacked \
               across the first dimension.

        Returns:
            Tuple of arrays containing the local optimum found for each point, \
            and an array with the values assigned to each of the points found.

        """
        x = judo.to_numpy(judo.copy(x))
        with Backend.use_backend("numpy"):
            result = judo.zeros_like(x)
            rewards = judo.zeros((x.shape[0], 1))
            for i in range(x.shape[0]):
                new_x, reward = self.minimize_point(x[i, :])
                result[i, :] = new_x
                rewards[i, :] = float(reward)
        self.bounds.high = tensor(self.bounds.high)
        self.bounds.low = tensor(self.bounds.low)
        result, rewards = tensor(result), tensor(rewards)
        return result, rewards
Example #5
0
 def test_points_in_bounds(self, bounds_fixture):
     zeros = judo.zeros((3, 3))
     assert all(bounds_fixture.points_in_bounds(zeros))
     tens = judo.ones((3, 3)) * 10.0
     res = bounds_fixture.points_in_bounds(tens)
     assert not res.any(), (res, tens)
     tens = tensor([[-10, 0, 1], [0, 0, 0], [10, 10, 10]])
     assert sum(bounds_fixture.points_in_bounds(tens)) == 1
Example #6
0
 def test_step(self, function_env, batch_size):
     states = function_env.reset(batch_size=batch_size)
     actions = StatesModel(
         actions=judo.zeros(states.observs.shape),
         batch_size=batch_size,
         dt=judo.ones((1, 2)),
     )
     new_states: StatesEnv = function_env.step(actions, states)
     assert isinstance(new_states, StatesEnv)
     assert new_states.oobs[0].item() == 0
Example #7
0
def small_tree():
    node_data = {"a": judo.arange(10), "b": judo.zeros(10)}
    edge_data = {"c": judo.ones(10)}
    g = networkx.DiGraph()
    for i in range(8):
        g.add_node(to_node_id(i), **node_data)
    pairs = [(0, 1), (1, 2), (2, 3), (2, 4), (2, 5), (3, 6), (3, 7)]
    for a, b in pairs:
        g.add_edge(to_node_id(a), to_node_id(b), **edge_data)
    return g
Example #8
0
 def test_calculate_end_condition(self, walkers):
     walkers.reset()
     walkers.env_states.update(oobs=judo.ones(walkers.n, dtype=dtype.bool))
     assert walkers.calculate_end_condition()
     walkers.env_states.update(oobs=judo.zeros(walkers.n, dtype=dtype.bool))
     assert not walkers.calculate_end_condition()
     walkers.max_epochs = 10
     walkers._epoch = 8
     assert not walkers.calculate_end_condition()
     walkers._epoch = 11
     assert walkers.calculate_end_condition()
Example #9
0
    def test_get_best_index(self, walkers):
        # Rewards = [1,1,...] InBounds = [0,0,...]
        walkers.states.update(cum_rewards=judo.ones(walkers.n),
                              in_bounds=judo.zeros(walkers.n,
                                                   dtype=dtype.bool))
        best_idx = walkers.get_best_index()
        # If there are no in_bound rewards, the last walker is returned
        assert best_idx == walkers.n - 1

        # Some OOB rewards
        #
        # Rewards = [0,1,0,...] InBounds = [0,1,...]
        oobs_best_idx = 1
        oobs_rewards = judo.zeros(walkers.n)
        oobs_rewards[oobs_best_idx] = 1
        some_oobs = judo.zeros(walkers.n)
        some_oobs[oobs_best_idx] = 1
        walkers.states.update(cum_rewards=oobs_rewards,
                              in_bounds=judo.astype(some_oobs, dtype.bool))
        best_idx = walkers.get_best_index()
        assert best_idx == oobs_best_idx

        # If the walkers are minimizing, set all but one reward to 1.0
        # If the walkers are maximizing, set all but one reward to 0.0
        positive_val = 0.0 if walkers.minimize else 1.0
        negative_val = 1.0 if walkers.minimize else 0.0
        # Rewards = [-,+,-,-,-,...] InBounds = [1,...]
        mixed_rewards = judo.full((walkers.n, ),
                                  fill_value=negative_val,
                                  dtype=dtype.float)
        mixed_best = 1  # could be any index
        mixed_rewards[mixed_best] = positive_val
        walkers.states.update(cum_rewards=mixed_rewards,
                              in_bounds=judo.ones(walkers.n, dtype=dtype.bool))
        best_idx = walkers.get_best_index()
        assert best_idx == mixed_best
Example #10
0
    def test_clone(self, states_class):
        batch_size = 10
        states = states_class(batch_size=batch_size)
        states.miau = judo.arange(states.n)
        states.miau_2 = judo.arange(states.n)

        will_clone = judo.zeros(states.n, dtype=judo.bool)
        will_clone[3:6] = True
        compas_ix = tensor(list(range(states.n))[::-1])

        states.clone(will_clone=will_clone, compas_ix=compas_ix)
        target_1 = judo.arange(10)

        assert bool(
            judo.all(target_1 == states.miau)), (target_1 - states.miau,
                                                 states_class)
Example #11
0
    def _accumulate_and_update_rewards(self, rewards: Tensor):
        """
        Use as reward either the sum of all the rewards received during the \
        current run, or use the last reward value received as reward.

        Args:
            rewards: Array containing the last rewards received by every walker.
        """
        if self._accumulate_rewards:
            if self.states.get("cum_rewards") is None:
                cum_rewards = judo.zeros(rewards.shape[0])
            else:
                cum_rewards = self.states.cum_rewards
            cum_rewards = cum_rewards + rewards
        else:
            cum_rewards = rewards
        self.update_states(cum_rewards=cum_rewards)
Example #12
0
 def test_accumulate_rewards(self, walkers):
     walkers.reset()
     walkers._accumulate_rewards = True
     walkers.states.update(
         cum_rewards=[0, 0])  # Override array of Floats and set to None
     walkers.states.update(cum_rewards=None)
     rewards = judo.arange(len(walkers))
     walkers._accumulate_and_update_rewards(rewards)
     assert (walkers.states.cum_rewards == rewards).all()
     walkers._accumulate_rewards = False
     walkers.states.update(cum_rewards=judo.zeros(len(walkers)))
     rewards = judo.arange(len(walkers))
     walkers._accumulate_and_update_rewards(rewards)
     assert (walkers.states.cum_rewards == rewards).all()
     walkers._accumulate_rewards = True
     walkers.states.update(cum_rewards=judo.ones(len(walkers)))
     rewards = judo.arange(len(walkers))
     walkers._accumulate_and_update_rewards(rewards)
     assert (walkers.states.cum_rewards == rewards + 1).all()
Example #13
0
    def update_clone_probs(self) -> None:
        """
        Calculate the new probability of cloning for each walker.

        Updates the :class:`StatesWalkers` with both the probability of cloning \
        and the index of the randomly chosen companions that were selected to \
        compare the virtual rewards.
        """
        all_virtual_rewards_are_equal = (self.states.virtual_rewards ==
                                         self.states.virtual_rewards[0]).all()
        if all_virtual_rewards_are_equal:
            clone_probs = judo.zeros(self.n, dtype=dtype.float)
            compas_ix = judo.arange(self.n)
        else:
            compas_ix = self.get_in_bounds_compas()
            companions = self.states.virtual_rewards[compas_ix]
            # This value can be negative!!
            clone_probs = (companions - self.states.virtual_rewards
                           ) / self.states.virtual_rewards
        self.update_states(clone_probs=clone_probs, compas_clone=compas_ix)
Example #14
0
 def reset(self):
     """Clear the internal data of the class."""
     params = self.get_params_dict()
     other_attrs = [name for name in self.keys() if name not in params]
     for attr in other_attrs:
         setattr(self, attr, None)
     self.update(
         id_walkers=judo.zeros(self.n, dtype=judo.hash_type),
         compas_dist=judo.arange(self.n),
         compas_clone=judo.arange(self.n),
         processed_rewards=judo.zeros(self.n, dtype=judo.float),
         cum_rewards=judo.zeros(self.n, dtype=judo.float),
         virtual_rewards=judo.ones(self.n, dtype=judo.float),
         distances=judo.zeros(self.n, dtype=judo.float),
         clone_probs=judo.zeros(self.n, dtype=judo.float),
         will_clone=judo.zeros(self.n, dtype=judo.bool),
         in_bounds=judo.ones(self.n, dtype=judo.bool),
     )
Example #15
0
    def sample_bounds(self, batch_size: int) -> typing.Tensor:
        """
        Return a matrix of points sampled uniformly from the :class:`Function` \
        domain.

        Args:
            batch_size: Number of points that will be sampled.

        Returns:
            Array containing ``batch_size`` points that lie inside the \
            :class:`Function` domain, stacked across the first dimension.

        """
        new_points = judo.zeros(tuple([batch_size]) + self.shape,
                                dtype=judo.float32)
        for i in range(batch_size):
            values = self.random_state.uniform(
                low=judo.astype(self.bounds.low, judo.float),
                high=judo.astype(self.bounds.high, judo.float32),
            )
            values = judo.astype(values, self.bounds.low.dtype)
            new_points[i, :] = values

        return new_points
Example #16
0
def fai_iteration(
    observs: Tensor,
    rewards: Tensor,
    oobs: Tensor = None,
    dist_coef: float = 1.0,
    reward_coef: float = 1.0,
    eps=1e-8,
    other_reward: Tensor = 1.0,
):
    """Perform a FAI iteration."""
    oobs = oobs if oobs is not None else judo.zeros(rewards.shape,
                                                    dtype=dtype.bool)
    virtual_reward = calculate_virtual_reward(
        observs,
        rewards,
        oobs,
        dist_coef=dist_coef,
        reward_coef=reward_coef,
        other_reward=other_reward,
    )
    compas_ix, will_clone = calculate_clone(virtual_rewards=virtual_reward,
                                            oobs=oobs,
                                            eps=eps)
    return compas_ix, will_clone
Example #17
0
 def test_from_judo(self, backend):
     x = judo.zeros((10, 10))
     assert judo.sqrt(x).sum() == 0
Example #18
0
 def best_state(self):
     return judo.zeros(self.shape)
Example #19
0
 def reset(self):
     """Reset the data of the :class:`StepStatesWalkers`."""
     super(StepStatesWalkers, self).reset()
     self.update(init_actions=judo.zeros((len(self), 1)),
                 init_dt=judo.ones((len(self), 1)))
Example #20
0
    def __init__(self,
                 n_walkers: int,
                 env_state_params: StateDict,
                 model_state_params: StateDict,
                 reward_scale: float = 1.0,
                 distance_scale: float = 1.0,
                 max_epochs: int = None,
                 accumulate_rewards: bool = True,
                 distance_function: Optional[DistanceFunction] = None,
                 ignore_clone: Optional[Dict[str, Set[str]]] = None,
                 critic: Optional[BaseCritic] = None,
                 minimize: bool = False,
                 reward_limit: float = None,
                 fix_best: bool = True,
                 **kwargs):
        """
        Initialize a :class:`Walkers`.

        Args:
            n_walkers: Number of walkers of the instance.
            env_state_params: Dictionary to instantiate the States of an :class:`Environment`.
            model_state_params: Dictionary to instantiate the States of a :class:`Model`.
            reward_scale: Regulates the importance of the reward. Recommended to \
                          keep in the [0, 5] range. Higher values correspond to \
                          higher importance.
            distance_scale: Regulates the importance of the distance. Recommended to \
                            keep in the [0, 5] range. Higher values correspond to \
                            higher importance.
            max_epochs: Maximum number of iterations that the walkers are allowed \
                       to perform.
            accumulate_rewards: If ``True`` the rewards obtained after transitioning \
                                to a new state will accumulate. If ``False`` only the last \
                                reward will be taken into account.
            distance_function: Function to compute the distances between two \
                               groups of walkers. It will be applied row-wise \
                               to the walkers observations and it will return a \
                               vector of typing_.Scalars. Defaults to l2 norm.
            ignore_clone: Dictionary containing the attribute values that will \
                          not be cloned. Its keys can be be either "env", of \
                          "model", to reference the `env_states` and the \
                          `model_states`. Its values are a set of strings with \
                          the names of the attributes that will not be cloned.
            critic: :class:`Critic` that will be used to calculate custom rewards.
            minimize: If ``True`` the algorithm will perform a minimization \
                      process. If ``False`` it will be a maximization process.
            reward_limit: The algorithm run will stop after reaching this \
                          reward value. If you are running a minimization process \
                          it will be considered the minimum reward possible, and \
                          if you are maximizing a reward it will be the maximum \
                          value.
            fix_best: If ``True`` Override the last walker of the Swarm with the \
                      best walker at the beginning of each epoch.
            kwargs: Additional attributes stored in the :class:`StatesWalkers`.

        """
        # Add data specific to the child class in the StatesWalkers class as new attributes.
        if critic is not None:
            kwargs["critic_score"] = kwargs.get("critic_score",
                                                judo.zeros(n_walkers))
        self.dtype = dtype.float
        best_state, best_obs, best_reward, best_id = (None, None, numpy.NINF,
                                                      None)
        super(Walkers, self).__init__(n_walkers=n_walkers,
                                      env_state_params=env_state_params,
                                      model_state_params=model_state_params,
                                      reward_scale=reward_scale,
                                      distance_scale=distance_scale,
                                      max_epochs=max_epochs,
                                      accumulate_rewards=accumulate_rewards,
                                      distance_function=distance_function,
                                      ignore_clone=ignore_clone,
                                      best_reward=best_reward,
                                      best_obs=best_obs,
                                      best_state=best_state,
                                      best_id=best_id,
                                      **kwargs)
        self.critic = critic
        self.minimize = minimize
        self.efficiency = 0
        self._min_entropy = 0
        if reward_limit is None:
            reward_limit = numpy.NINF if self.minimize else numpy.inf
        self.reward_limit = reward_limit
        self.clone_to_best = fix_best