Ejemplo n.º 1
0
 def _classic_control_env():
     env = classic_control_env()
     params = {
         "actions": {
             "dtype": dtype.int64
         },
         "dt": {
             "dtype": dtype.float32
         }
     }
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     states.update(actions=judo.ones(N_WALKERS), dt=judo.ones(N_WALKERS))
     return env, states
Ejemplo n.º 2
0
 def _parallel_environment():
     env = parallel_environment()
     params = {
         "actions": {
             "dtype": dtype.int64
         },
         "critic": {
             "dtype": dtype.float32
         }
     }
     states = StatesModel(state_dict=params, batch_size=N_WALKERS)
     states.update(actions=judo.ones(N_WALKERS),
                   critic=judo.ones(N_WALKERS))
     return env, states
Ejemplo n.º 3
0
    def calculate(
        self,
        batch_size: Optional[int] = None,
        model_states: Optional[StatesModel] = None,
        env_states: Optional[StatesEnv] = None,
        walkers_states: Optional[StatesWalkers] = None,
    ) -> States:
        """
        Calculate the target time step values.

        Args:
            batch_size: Number of new points to the sampled.
            model_states: States corresponding to the model data.
            env_states: States corresponding to the environment data.
            walkers_states: States corresponding to the walkers data.

        Returns:
            Array containing the sampled time step.

        """
        if batch_size is None and env_states is None:
            raise ValueError("env_states and batch_size cannot be both None.")
        batch_size = batch_size or env_states.n
        dt = judo.ones(batch_size, dtype=self._dtype) * self.dt
        states = self.states_from_data(batch_size=batch_size,
                                       critic_score=dt,
                                       dt=dt)
        return states
Ejemplo n.º 4
0
def function_env() -> Function:
    return Function.from_bounds_params(
        function=lambda x: judo.ones(len(x)),
        shape=(2, ),
        low=tensor([-10, -5]),
        high=tensor([10, 5]),
    )
Ejemplo n.º 5
0
    def test_clone_to_imported(self, export_swarm):
        walkers = ExportedWalkers(3)
        walkers.rewards = tensor([999, 777, 333], dtype=dtype.float)
        walkers.states = tensor([999, 777, 333], dtype=dtype.float)
        walkers.id_walkers = tensor([999, 777, 333], dtype=dtype.float)
        walkers.observs = tensor(
            [[999, 999, 999, 999], [777, 777, 777, 777], [333, 333, 333, 333]],
            dtype=dtype.float)

        compas_ix = tensor([0, 1])
        will_clone = tensor([True, False])
        local_ix = tensor([0, 1])
        import_ix = tensor([0, 1])

        export_swarm._clone_to_imported(
            compas_ix=compas_ix,
            will_clone=will_clone,
            local_ix=local_ix,
            import_ix=import_ix,
            walkers=walkers,
        )
        assert export_swarm.walkers.states.cum_rewards[0] == 999.0
        assert export_swarm.walkers.env_states.states[0] == 999.0
        assert (export_swarm.walkers.env_states.observs[0] == judo.ones(4) *
                999).all()
Ejemplo n.º 6
0
 def test_points_in_bounds(self, bounds_fixture):
     zeros = judo.zeros((3, 3))
     assert all(bounds_fixture.points_in_bounds(zeros))
     tens = judo.ones((3, 3)) * 10.0
     res = bounds_fixture.points_in_bounds(tens)
     assert not res.any(), (res, tens)
     tens = tensor([[-10, 0, 1], [0, 0, 0], [10, 10, 10]])
     assert sum(bounds_fixture.points_in_bounds(tens)) == 1
Ejemplo n.º 7
0
 def reset(self):
     """Clear the internal data of the class."""
     params = self.get_params_dict()
     other_attrs = [name for name in self.keys() if name not in params]
     for attr in other_attrs:
         setattr(self, attr, None)
     self.update(
         id_walkers=judo.zeros(self.n, dtype=judo.hash_type),
         compas_dist=judo.arange(self.n),
         compas_clone=judo.arange(self.n),
         processed_rewards=judo.zeros(self.n, dtype=judo.float),
         cum_rewards=judo.zeros(self.n, dtype=judo.float),
         virtual_rewards=judo.ones(self.n, dtype=judo.float),
         distances=judo.zeros(self.n, dtype=judo.float),
         clone_probs=judo.zeros(self.n, dtype=judo.float),
         will_clone=judo.zeros(self.n, dtype=judo.bool),
         in_bounds=judo.ones(self.n, dtype=judo.bool),
     )
Ejemplo n.º 8
0
def small_tree():
    node_data = {"a": judo.arange(10), "b": judo.zeros(10)}
    edge_data = {"c": judo.ones(10)}
    g = networkx.DiGraph()
    for i in range(8):
        g.add_node(to_node_id(i), **node_data)
    pairs = [(0, 1), (1, 2), (2, 3), (2, 4), (2, 5), (3, 6), (3, 7)]
    for a, b in pairs:
        g.add_edge(to_node_id(a), to_node_id(b), **edge_data)
    return g
Ejemplo n.º 9
0
 def test_step(self, function_env, batch_size):
     states = function_env.reset(batch_size=batch_size)
     actions = StatesModel(
         actions=judo.zeros(states.observs.shape),
         batch_size=batch_size,
         dt=judo.ones((1, 2)),
     )
     new_states: StatesEnv = function_env.step(actions, states)
     assert isinstance(new_states, StatesEnv)
     assert new_states.oobs[0].item() == 0
Ejemplo n.º 10
0
 def calculate(
     self,
     batch_size: int = None,
     model_states: StatesModel = None,
     env_states: StatesEnv = None,
     walkers_states: StatesWalkers = None,
 ) -> States:
     batch_size = batch_size or env_states.n
     return States(batch_size=batch_size,
                   critic_score=5 * judo.ones(batch_size))
Ejemplo n.º 11
0
 def test_calculate_end_condition(self, walkers):
     walkers.reset()
     walkers.env_states.update(oobs=judo.ones(walkers.n, dtype=dtype.bool))
     assert walkers.calculate_end_condition()
     walkers.env_states.update(oobs=judo.zeros(walkers.n, dtype=dtype.bool))
     assert not walkers.calculate_end_condition()
     walkers.max_epochs = 10
     walkers._epoch = 8
     assert not walkers.calculate_end_condition()
     walkers._epoch = 11
     assert walkers.calculate_end_condition()
Ejemplo n.º 12
0
def relativize(x: Tensor) -> Tensor:
    """Normalize the data using a custom smoothing technique."""
    orig = x
    x = judo.astype(x, dtype.float)
    std = x.std()
    if float(std) == 0:
        return judo.ones(len(x), dtype=orig.dtype)
    standard = (x - x.mean()) / std
    with numpy.errstate(invalid="ignore", divide="ignore"):
        res = judo.where(standard > 0.0,
                         judo.log(1.0 + standard) + 1.0, judo.exp(standard))
    return res
Ejemplo n.º 13
0
    def test_get_best_index(self, walkers):
        # Rewards = [1,1,...] InBounds = [0,0,...]
        walkers.states.update(cum_rewards=judo.ones(walkers.n),
                              in_bounds=judo.zeros(walkers.n,
                                                   dtype=dtype.bool))
        best_idx = walkers.get_best_index()
        # If there are no in_bound rewards, the last walker is returned
        assert best_idx == walkers.n - 1

        # Some OOB rewards
        #
        # Rewards = [0,1,0,...] InBounds = [0,1,...]
        oobs_best_idx = 1
        oobs_rewards = judo.zeros(walkers.n)
        oobs_rewards[oobs_best_idx] = 1
        some_oobs = judo.zeros(walkers.n)
        some_oobs[oobs_best_idx] = 1
        walkers.states.update(cum_rewards=oobs_rewards,
                              in_bounds=judo.astype(some_oobs, dtype.bool))
        best_idx = walkers.get_best_index()
        assert best_idx == oobs_best_idx

        # If the walkers are minimizing, set all but one reward to 1.0
        # If the walkers are maximizing, set all but one reward to 0.0
        positive_val = 0.0 if walkers.minimize else 1.0
        negative_val = 1.0 if walkers.minimize else 0.0
        # Rewards = [-,+,-,-,-,...] InBounds = [1,...]
        mixed_rewards = judo.full((walkers.n, ),
                                  fill_value=negative_val,
                                  dtype=dtype.float)
        mixed_best = 1  # could be any index
        mixed_rewards[mixed_best] = positive_val
        walkers.states.update(cum_rewards=mixed_rewards,
                              in_bounds=judo.ones(walkers.n, dtype=dtype.bool))
        best_idx = walkers.get_best_index()
        assert best_idx == mixed_best
Ejemplo n.º 14
0
 def test_states_from_data(self, env_data, batch_size, states_dim):
     env, model_states = env_data
     states = judo.zeros((batch_size, states_dim))
     observs = judo.ones((batch_size, states_dim))
     rewards = judo.arange(batch_size)
     oobs = judo.zeros(batch_size, dtype=dtype.bool)
     state = env.states_from_data(batch_size=batch_size,
                                  states=states,
                                  observs=observs,
                                  rewards=rewards,
                                  oobs=oobs)
     assert isinstance(state, StatesEnv)
     for val in state.vals():
         assert dtype.is_tensor(val)
         assert len(val) == batch_size
Ejemplo n.º 15
0
 def test_accumulate_rewards(self, walkers):
     walkers.reset()
     walkers._accumulate_rewards = True
     walkers.states.update(
         cum_rewards=[0, 0])  # Override array of Floats and set to None
     walkers.states.update(cum_rewards=None)
     rewards = judo.arange(len(walkers))
     walkers._accumulate_and_update_rewards(rewards)
     assert (walkers.states.cum_rewards == rewards).all()
     walkers._accumulate_rewards = False
     walkers.states.update(cum_rewards=judo.zeros(len(walkers)))
     rewards = judo.arange(len(walkers))
     walkers._accumulate_and_update_rewards(rewards)
     assert (walkers.states.cum_rewards == rewards).all()
     walkers._accumulate_rewards = True
     walkers.states.update(cum_rewards=judo.ones(len(walkers)))
     rewards = judo.arange(len(walkers))
     walkers._accumulate_and_update_rewards(rewards)
     assert (walkers.states.cum_rewards == rewards + 1).all()
Ejemplo n.º 16
0
    def __init__(self, batch_size: int, state_dict: StateDict = None):
        """
        Initialize a :class:`ExportWalkers`.

        Args:
            batch_size: Number of walkers that will be exported.
            state_dict: External :class:`typing.StateDict` that overrides the default values.

        """
        self.id_walkers = None
        self.rewards = None
        self.observs = None
        self.states = None

        # Accept external definition of ExportedWalkers param_dict values
        walkers_dict = self.get_params_dict()
        if state_dict is not None:
            for k, v in state_dict.items():
                if k in walkers_dict:
                    walkers_dict[k] = v
        super(ExportedWalkers, self).__init__(batch_size=batch_size, state_dict=walkers_dict)
        # Set to ones to avoid empty sequences that may cause errors
        self.update(id_walkers=judo.ones(self.n, dtype=dtype.hash_type))
Ejemplo n.º 17
0
 def best_state(self):
     return judo.ones(self.shape)
Ejemplo n.º 18
0
 def reset(self):
     """Reset the data of the :class:`StepStatesWalkers`."""
     super(StepStatesWalkers, self).reset()
     self.update(init_actions=judo.zeros((len(self), 1)),
                 init_dt=judo.ones((len(self), 1)))
Ejemplo n.º 19
0
    def __init__(
        self,
        high: Union[Tensor, Scalar] = numpy.inf,
        low: Union[Tensor, Scalar] = numpy.NINF,
        shape: Optional[tuple] = None,
        dtype: Optional[type] = None,
    ):
        """
        Initialize a :class:`Bounds`.

        Args:
            high: Higher value for the bound interval. If it is an typing_.Scalar \
                  it will be applied to all the coordinates of a target vector. \
                  If it is a vector, the bounds will be checked coordinate-wise. \
                  It defines and closed interval.
            low: Lower value for the bound interval. If it is a typing_.Scalar it \
                 will be applied to all the coordinates of a target vector. \
                 If it is a vector, the bounds will be checked coordinate-wise. \
                 It defines and closed interval.
            shape: Shape of the array that will be bounded. Only needed if `high` and `low` are \
                   vectors and it is used to define the dimensions that will be bounded.
            dtype:  Data type of the array that will be bounded. It can be inferred from `high` \
                    or `low` (the type of `high` takes priority).

        Examples:
            Initializing :class:`Bounds` using  numpy arrays:

            >>> import numpy
            >>> high, low = numpy.ones(3, dtype=float), -1 * numpy.ones(3, dtype=int)
            >>> bounds = Bounds(high=high, low=low)
            >>> print(bounds)
            Bounds shape float64 dtype (3,) low [-1 -1 -1] high [1. 1. 1.]

            Initializing :class:`Bounds` using  typing_.Scalars:

            >>> import numpy
            >>> high, low, shape = 4, 2.1, (5,)
            >>> bounds = Bounds(high=high, low=low, shape=shape)
            >>> print(bounds)
            Bounds shape float64 dtype (5,) low [2.1 2.1 2.1 2.1 2.1] high [4. 4. 4. 4. 4.]

        """
        # Infer shape if not specified
        if shape is None and hasattr(high, "shape"):
            shape = high.shape
        elif shape is None and hasattr(low, "shape"):
            shape = low.shape
        elif shape is None:
            raise TypeError(
                "If shape is None high or low need to have .shape attribute.")
        # High and low will be arrays of target shape
        if not judo.is_tensor(high):
            high = tensor(high) if isinstance(
                high, _Iterable) else judo.ones(shape) * high
        if not judo.is_tensor(low):
            low = tensor(low) if isinstance(
                low, _Iterable) else judo.ones(shape) * low
        self.high = judo.astype(high, judo.float)
        self.low = judo.astype(low, judo.float)
        if dtype is not None:
            self.dtype = dtype
        elif hasattr(high, "dtype"):
            self.dtype = high.dtype
        elif hasattr(low, "dtype"):
            self.dtype = low.dtype
        else:
            self.dtype = type(high) if high is not None else type(low)