def test_reset_with_root_walker(self, swarm): swarm.reset() param_dict = swarm.walkers.env_states.get_params_dict() obs_dict = param_dict["observs"] state_dict = param_dict["states"] obs_size = obs_dict.get("size", obs_dict["shape"][1:]) state_size = state_dict.get("size", state_dict["shape"][1:]) obs = judo.astype(random_state.random(obs_size), obs_dict["dtype"]) state = judo.astype(random_state.random(state_size), state_dict["dtype"]) reward = 160290 root_walker = OneWalker(observ=obs, reward=reward, state=state) swarm.reset(root_walker=root_walker) swarm_best_id = swarm.best_id root_walker_id = root_walker.id_walkers assert (swarm.best_state == state).all() assert (swarm.best_obs == obs).all(), (obs, tensor(swarm.best_obs)) assert swarm.best_reward == reward assert (swarm.walkers.env_states.observs == obs).all() assert (swarm.walkers.env_states.states == state).all() assert (swarm.walkers.env_states.rewards == reward).all() if Backend.is_numpy(): assert (swarm.walkers.states.id_walkers == root_walker.id_walkers ).all() assert swarm_best_id == root_walker_id[0]
def cross_clone( host_virtual_rewards: Tensor, ext_virtual_rewards: Tensor, host_oobs: Tensor = None, eps=1e-3, ): """Perform a clone operation between two different groups of points.""" compas_ix = random_state.permutation(judo.arange(len(ext_virtual_rewards))) host_vr = judo.astype(host_virtual_rewards.flatten(), dtype=dtype.float32) ext_vr = judo.astype(ext_virtual_rewards.flatten(), dtype=dtype.float32) clone_probs = (ext_vr[compas_ix] - host_vr) / judo.where( ext_vr > eps, ext_vr, tensor(eps, dtype=dtype.float32)) will_clone = clone_probs.flatten() > random_state.random(len(clone_probs)) if host_oobs is not None: will_clone[host_oobs] = True return compas_ix, will_clone
def calculate( self, batch_size: Optional[int] = None, model_states: Optional[StatesModel] = None, env_states: Optional[StatesEnv] = None, walkers_states: Optional[StatesWalkers] = None, ) -> States: """ Calculate the target time step values. Args: batch_size: Number of new points to the sampled. model_states: States corresponding to the model data. env_states: States corresponding to the environment data. walkers_states: States corresponding to the walkers data. Returns: Array containing the sampled time step values drawn from a gaussian \ distribution. """ if batch_size is None and env_states is None: raise ValueError("env_states and batch_size cannot be both None.") batch_size = batch_size or env_states.n dt = self.random_state.normal(loc=self.mean_dt, scale=self.std_dt, size=batch_size) dt = judo.astype(judo.clip(dt, self.min_dt, self.max_dt), self._dtype) states = self.states_from_data(batch_size=batch_size, critic_score=dt, dt=dt) return states
def predict( self, root_env_states: StatesEnv, walkers: StepWalkers, ) -> StatesModel: """ Select the most frequent ``init_action`` assigned to the internal swarm's walkers. The selected ``dt`` will be equal to the minimum ``init_dts`` among all \ the walkers that sampled the selected ``init_action``. Args: root_env_states: :env-st:`StatesEnv` class containing the data \ corresponding to the root walker of a :class:`StepSwarm`. walkers: :walkers:`StepWalkers` used by the internal warm of a \ :class:`StepSwarm`. Returns: :class:`StatesModel` containing the ``actions`` and ``dt`` that the root walkers will use to step the :env:`Environment`. """ init_actions = judo.astype(walkers.states.init_actions.flatten(), judo.int) init_actions = judo.to_numpy(init_actions) with Backend.use_backend("numpy"): y = numpy.bincount(init_actions) most_used_action = numpy.nonzero(y)[0][0] most_used_action = tensor(most_used_action) root_model_states = StatesModel( batch_size=1, state_dict={ "actions": { "dtype": judo.int64 }, "dt": { "dtype": judo.int64 } }, ) root_model_states.actions[:] = most_used_action if hasattr(root_model_states, "dt"): init_dts = judo.astype(walkers.states.init_dts.flatten(), judo.int) index_dt = init_actions == most_used_action target_dt = init_dts[index_dt].min() root_model_states.dt[:] = target_dt return root_model_states
def test_alive_compas(self, walkers): end_cond = judo.astype(judo.zeros_like(walkers.env_states.oobs), dtype.bool) end_cond[3] = True walkers.states.in_bounds = end_cond compas = walkers.get_in_bounds_compas() assert judo.all( compas == 3), "Type of end_cond: {} end_cond: {}: alive ix: {}".format( type(end_cond), end_cond, walkers.states.in_bounds) assert len(compas.shape) == 1
def relativize(x: Tensor) -> Tensor: """Normalize the data using a custom smoothing technique.""" orig = x x = judo.astype(x, dtype.float) std = x.std() if float(std) == 0: return judo.ones(len(x), dtype=orig.dtype) standard = (x - x.mean()) / std with numpy.errstate(invalid="ignore", divide="ignore"): res = judo.where(standard > 0.0, judo.log(1.0 + standard) + 1.0, judo.exp(standard)) return res
def clip(self, x: Tensor) -> Tensor: """ Clip the values of the target array to fall inside the bounds (closed interval). Args: x: Numpy array to be clipped. Returns: Clipped numpy array with all its values inside the defined bounds. """ return API.clip(judo.astype(x, dtype.float), self.low, self.high)
def predict( self, root_env_states: StatesEnv, walkers: StepWalkers, ) -> StatesModel: """ Select the ``init_action`` and ``init_dt`` of the best walker found \ during the internal swarm run. Args: root_env_states: :env-st:`StatesEnv` class containing the data \ corresponding to the root walker of a :class:`StepSwarm`. walkers: :walkers:`StepWalkers` used by the internal swarm of a \ :class:`StepSwarm`. Returns: :class:`StatesModel` containing the ``actions`` and ``dt`` that the root walkers will use to step the :env:`Environment`. """ init_actions = judo.astype(walkers.states.init_actions.flatten(), judo.int) best_ix = walkers.get_best_index() root_model_states = StatesModel( batch_size=1, state_dict={ "actions": { "dtype": judo.int64 }, "dt": { "dtype": judo.int } }, ) root_model_states.actions[:] = init_actions[best_ix] if hasattr(root_model_states, "dt"): target_dt = judo.astype(walkers.states.init_dt.flatten(), judo.int)[best_ix] root_model_states.dt[:] = target_dt return root_model_states
def get_best_index(self) -> int: """ Return the index of the best state present in the :class:`Walkers` \ that is considered alive (inside the boundary conditions of the problem). \ If no walker is alive it will return the index of the last walker, which \ corresponds with the best state found. """ rewards = self.states.cum_rewards[self.states.in_bounds] if len(rewards) == 0: return self.n - 1 best = rewards.min() if self.minimize else rewards.max() idx = judo.astype(self.states.cum_rewards == best, dtype.int) ix = idx.argmax() return int(ix)
def sample_bounds(self, batch_size: int) -> typing.Tensor: """ Return a matrix of points sampled uniformly from the :class:`Function` \ domain. Args: batch_size: Number of points that will be sampled. Returns: Array containing ``batch_size`` points that lie inside the \ :class:`Function` domain, stacked across the first dimension. """ new_points = judo.zeros(tuple([batch_size]) + self.shape, dtype=judo.float32) for i in range(batch_size): values = self.random_state.uniform( low=judo.astype(self.bounds.low, judo.float), high=judo.astype(self.bounds.high, judo.float32), ) values = judo.astype(values, self.bounds.low.dtype) new_points[i, :] = values return new_points
def points_in_bounds(self, x: Tensor) -> Union[Tensor, bool]: """ Check if the rows of the target array have all their coordinates inside \ specified bounds. If the array is one dimensional it will return a boolean, otherwise a vector of booleans. Args: x: Array to be checked against the bounds. Returns: Numpy array of booleans indicating if a row lies inside the bounds. """ match = self.clip(x) == judo.astype(x, dtype.float) return match.all(1).flatten() if len(match.shape) > 1 else match.all()
def test_sample_with_critic(self, n_actions): model = DiscreteUniform(n_actions=n_actions, critic=DummyCritic()) model_states = model.predict(batch_size=1000) actions = model_states.actions assert len(actions.shape) == 1 assert len(judo.unique(actions)) <= n_actions assert all(actions >= 0) assert all(actions <= n_actions) assert "critic_score" in model_states.keys() assert (model_states.critic_score == 5).all() states = create_model_states(batch_size=100, model=model) model_states = model.sample(batch_size=states.n, model_states=states) actions = model_states.actions assert len(actions.shape) == 1 assert len(judo.unique(actions)) <= n_actions assert all(actions >= 0) assert all(actions <= n_actions) assert judo.allclose(actions, judo.astype(actions, dtype.int)) assert "critic_score" in model_states.keys() assert (model_states.critic_score == 5).all()
def get_scaled_intervals( low: Union[Tensor, float, int], high: Union[Tensor, float, int], scale: float, ) -> Tuple[Union[Tensor, float], Union[Tensor, float]]: """ Scale the high and low vectors by an scale factor. The value of the high and low will be proportional to the maximum and minimum values of \ the array. Scale defines the proportion to make the bounds bigger and smaller. For \ example, if scale is 1.1 the higher bound will be 10% higher, and the lower bounds 10% \ smaller. If scale is 0.9 the higher bound will be 10% lower, and the lower bound 10% \ higher. If scale is one, `high` and `low` will be equal to the maximum and minimum values \ of the array. Args: high: Higher bound to be scaled. low: Lower bound to be scaled. scale: Value representing the tolerance in percentage from the current maximum and \ minimum values of the array. Returns: :class:`Bounds` instance. """ pct = tensor(scale - 1) big_scale = 1 + API.abs(pct) small_scale = 1 - API.abs(pct) zero = judo.astype(tensor(0.0), low.dtype) if pct > 0: xmin_scaled = API.where(low < zero, low * big_scale, low * small_scale) xmax_scaled = API.where(high < zero, high * small_scale, high * big_scale) else: xmin_scaled = API.where(low < zero, low * small_scale, low * small_scale) xmax_scaled = API.where(high < zero, high * big_scale, high * small_scale) return xmin_scaled, xmax_scaled
def test_get_best_index(self, walkers): # Rewards = [1,1,...] InBounds = [0,0,...] walkers.states.update(cum_rewards=judo.ones(walkers.n), in_bounds=judo.zeros(walkers.n, dtype=dtype.bool)) best_idx = walkers.get_best_index() # If there are no in_bound rewards, the last walker is returned assert best_idx == walkers.n - 1 # Some OOB rewards # # Rewards = [0,1,0,...] InBounds = [0,1,...] oobs_best_idx = 1 oobs_rewards = judo.zeros(walkers.n) oobs_rewards[oobs_best_idx] = 1 some_oobs = judo.zeros(walkers.n) some_oobs[oobs_best_idx] = 1 walkers.states.update(cum_rewards=oobs_rewards, in_bounds=judo.astype(some_oobs, dtype.bool)) best_idx = walkers.get_best_index() assert best_idx == oobs_best_idx # If the walkers are minimizing, set all but one reward to 1.0 # If the walkers are maximizing, set all but one reward to 0.0 positive_val = 0.0 if walkers.minimize else 1.0 negative_val = 1.0 if walkers.minimize else 0.0 # Rewards = [-,+,-,-,-,...] InBounds = [1,...] mixed_rewards = judo.full((walkers.n, ), fill_value=negative_val, dtype=dtype.float) mixed_best = 1 # could be any index mixed_rewards[mixed_best] = positive_val walkers.states.update(cum_rewards=mixed_rewards, in_bounds=judo.ones(walkers.n, dtype=dtype.bool)) best_idx = walkers.get_best_index() assert best_idx == mixed_best
def __init__( self, high: Union[Tensor, Scalar] = numpy.inf, low: Union[Tensor, Scalar] = numpy.NINF, shape: Optional[tuple] = None, dtype: Optional[type] = None, ): """ Initialize a :class:`Bounds`. Args: high: Higher value for the bound interval. If it is an typing_.Scalar \ it will be applied to all the coordinates of a target vector. \ If it is a vector, the bounds will be checked coordinate-wise. \ It defines and closed interval. low: Lower value for the bound interval. If it is a typing_.Scalar it \ will be applied to all the coordinates of a target vector. \ If it is a vector, the bounds will be checked coordinate-wise. \ It defines and closed interval. shape: Shape of the array that will be bounded. Only needed if `high` and `low` are \ vectors and it is used to define the dimensions that will be bounded. dtype: Data type of the array that will be bounded. It can be inferred from `high` \ or `low` (the type of `high` takes priority). Examples: Initializing :class:`Bounds` using numpy arrays: >>> import numpy >>> high, low = numpy.ones(3, dtype=float), -1 * numpy.ones(3, dtype=int) >>> bounds = Bounds(high=high, low=low) >>> print(bounds) Bounds shape float64 dtype (3,) low [-1 -1 -1] high [1. 1. 1.] Initializing :class:`Bounds` using typing_.Scalars: >>> import numpy >>> high, low, shape = 4, 2.1, (5,) >>> bounds = Bounds(high=high, low=low, shape=shape) >>> print(bounds) Bounds shape float64 dtype (5,) low [2.1 2.1 2.1 2.1 2.1] high [4. 4. 4. 4. 4.] """ # Infer shape if not specified if shape is None and hasattr(high, "shape"): shape = high.shape elif shape is None and hasattr(low, "shape"): shape = low.shape elif shape is None: raise TypeError( "If shape is None high or low need to have .shape attribute.") # High and low will be arrays of target shape if not judo.is_tensor(high): high = tensor(high) if isinstance( high, _Iterable) else API.ones(shape) * high if not judo.is_tensor(low): low = tensor(low) if isinstance( low, _Iterable) else API.ones(shape) * low self.high = judo.astype(high, dtype) self.low = judo.astype(low, dtype) if dtype is not None: self.dtype = dtype elif hasattr(high, "dtype"): self.dtype = high.dtype elif hasattr(low, "dtype"): self.dtype = low.dtype else: self.dtype = type(high) if high is not None else type(low)