def _classic_control_env(): env = classic_control_env() params = { "actions": { "dtype": dtype.int64 }, "dt": { "dtype": dtype.float32 } } states = StatesModel(state_dict=params, batch_size=N_WALKERS) states.update(actions=judo.ones(N_WALKERS), dt=judo.ones(N_WALKERS)) return env, states
def _parallel_environment(): env = parallel_environment() params = { "actions": { "dtype": dtype.int64 }, "critic": { "dtype": dtype.float32 } } states = StatesModel(state_dict=params, batch_size=N_WALKERS) states.update(actions=judo.ones(N_WALKERS), critic=judo.ones(N_WALKERS)) return env, states
def calculate( self, batch_size: Optional[int] = None, model_states: Optional[StatesModel] = None, env_states: Optional[StatesEnv] = None, walkers_states: Optional[StatesWalkers] = None, ) -> States: """ Calculate the target time step values. Args: batch_size: Number of new points to the sampled. model_states: States corresponding to the model data. env_states: States corresponding to the environment data. walkers_states: States corresponding to the walkers data. Returns: Array containing the sampled time step. """ if batch_size is None and env_states is None: raise ValueError("env_states and batch_size cannot be both None.") batch_size = batch_size or env_states.n dt = judo.ones(batch_size, dtype=self._dtype) * self.dt states = self.states_from_data(batch_size=batch_size, critic_score=dt, dt=dt) return states
def function_env() -> Function: return Function.from_bounds_params( function=lambda x: judo.ones(len(x)), shape=(2, ), low=tensor([-10, -5]), high=tensor([10, 5]), )
def test_clone_to_imported(self, export_swarm): walkers = ExportedWalkers(3) walkers.rewards = tensor([999, 777, 333], dtype=dtype.float) walkers.states = tensor([999, 777, 333], dtype=dtype.float) walkers.id_walkers = tensor([999, 777, 333], dtype=dtype.float) walkers.observs = tensor( [[999, 999, 999, 999], [777, 777, 777, 777], [333, 333, 333, 333]], dtype=dtype.float) compas_ix = tensor([0, 1]) will_clone = tensor([True, False]) local_ix = tensor([0, 1]) import_ix = tensor([0, 1]) export_swarm._clone_to_imported( compas_ix=compas_ix, will_clone=will_clone, local_ix=local_ix, import_ix=import_ix, walkers=walkers, ) assert export_swarm.walkers.states.cum_rewards[0] == 999.0 assert export_swarm.walkers.env_states.states[0] == 999.0 assert (export_swarm.walkers.env_states.observs[0] == judo.ones(4) * 999).all()
def test_points_in_bounds(self, bounds_fixture): zeros = judo.zeros((3, 3)) assert all(bounds_fixture.points_in_bounds(zeros)) tens = judo.ones((3, 3)) * 10.0 res = bounds_fixture.points_in_bounds(tens) assert not res.any(), (res, tens) tens = tensor([[-10, 0, 1], [0, 0, 0], [10, 10, 10]]) assert sum(bounds_fixture.points_in_bounds(tens)) == 1
def reset(self): """Clear the internal data of the class.""" params = self.get_params_dict() other_attrs = [name for name in self.keys() if name not in params] for attr in other_attrs: setattr(self, attr, None) self.update( id_walkers=judo.zeros(self.n, dtype=judo.hash_type), compas_dist=judo.arange(self.n), compas_clone=judo.arange(self.n), processed_rewards=judo.zeros(self.n, dtype=judo.float), cum_rewards=judo.zeros(self.n, dtype=judo.float), virtual_rewards=judo.ones(self.n, dtype=judo.float), distances=judo.zeros(self.n, dtype=judo.float), clone_probs=judo.zeros(self.n, dtype=judo.float), will_clone=judo.zeros(self.n, dtype=judo.bool), in_bounds=judo.ones(self.n, dtype=judo.bool), )
def small_tree(): node_data = {"a": judo.arange(10), "b": judo.zeros(10)} edge_data = {"c": judo.ones(10)} g = networkx.DiGraph() for i in range(8): g.add_node(to_node_id(i), **node_data) pairs = [(0, 1), (1, 2), (2, 3), (2, 4), (2, 5), (3, 6), (3, 7)] for a, b in pairs: g.add_edge(to_node_id(a), to_node_id(b), **edge_data) return g
def test_step(self, function_env, batch_size): states = function_env.reset(batch_size=batch_size) actions = StatesModel( actions=judo.zeros(states.observs.shape), batch_size=batch_size, dt=judo.ones((1, 2)), ) new_states: StatesEnv = function_env.step(actions, states) assert isinstance(new_states, StatesEnv) assert new_states.oobs[0].item() == 0
def calculate( self, batch_size: int = None, model_states: StatesModel = None, env_states: StatesEnv = None, walkers_states: StatesWalkers = None, ) -> States: batch_size = batch_size or env_states.n return States(batch_size=batch_size, critic_score=5 * judo.ones(batch_size))
def test_calculate_end_condition(self, walkers): walkers.reset() walkers.env_states.update(oobs=judo.ones(walkers.n, dtype=dtype.bool)) assert walkers.calculate_end_condition() walkers.env_states.update(oobs=judo.zeros(walkers.n, dtype=dtype.bool)) assert not walkers.calculate_end_condition() walkers.max_epochs = 10 walkers._epoch = 8 assert not walkers.calculate_end_condition() walkers._epoch = 11 assert walkers.calculate_end_condition()
def relativize(x: Tensor) -> Tensor: """Normalize the data using a custom smoothing technique.""" orig = x x = judo.astype(x, dtype.float) std = x.std() if float(std) == 0: return judo.ones(len(x), dtype=orig.dtype) standard = (x - x.mean()) / std with numpy.errstate(invalid="ignore", divide="ignore"): res = judo.where(standard > 0.0, judo.log(1.0 + standard) + 1.0, judo.exp(standard)) return res
def test_get_best_index(self, walkers): # Rewards = [1,1,...] InBounds = [0,0,...] walkers.states.update(cum_rewards=judo.ones(walkers.n), in_bounds=judo.zeros(walkers.n, dtype=dtype.bool)) best_idx = walkers.get_best_index() # If there are no in_bound rewards, the last walker is returned assert best_idx == walkers.n - 1 # Some OOB rewards # # Rewards = [0,1,0,...] InBounds = [0,1,...] oobs_best_idx = 1 oobs_rewards = judo.zeros(walkers.n) oobs_rewards[oobs_best_idx] = 1 some_oobs = judo.zeros(walkers.n) some_oobs[oobs_best_idx] = 1 walkers.states.update(cum_rewards=oobs_rewards, in_bounds=judo.astype(some_oobs, dtype.bool)) best_idx = walkers.get_best_index() assert best_idx == oobs_best_idx # If the walkers are minimizing, set all but one reward to 1.0 # If the walkers are maximizing, set all but one reward to 0.0 positive_val = 0.0 if walkers.minimize else 1.0 negative_val = 1.0 if walkers.minimize else 0.0 # Rewards = [-,+,-,-,-,...] InBounds = [1,...] mixed_rewards = judo.full((walkers.n, ), fill_value=negative_val, dtype=dtype.float) mixed_best = 1 # could be any index mixed_rewards[mixed_best] = positive_val walkers.states.update(cum_rewards=mixed_rewards, in_bounds=judo.ones(walkers.n, dtype=dtype.bool)) best_idx = walkers.get_best_index() assert best_idx == mixed_best
def test_states_from_data(self, env_data, batch_size, states_dim): env, model_states = env_data states = judo.zeros((batch_size, states_dim)) observs = judo.ones((batch_size, states_dim)) rewards = judo.arange(batch_size) oobs = judo.zeros(batch_size, dtype=dtype.bool) state = env.states_from_data(batch_size=batch_size, states=states, observs=observs, rewards=rewards, oobs=oobs) assert isinstance(state, StatesEnv) for val in state.vals(): assert dtype.is_tensor(val) assert len(val) == batch_size
def test_accumulate_rewards(self, walkers): walkers.reset() walkers._accumulate_rewards = True walkers.states.update( cum_rewards=[0, 0]) # Override array of Floats and set to None walkers.states.update(cum_rewards=None) rewards = judo.arange(len(walkers)) walkers._accumulate_and_update_rewards(rewards) assert (walkers.states.cum_rewards == rewards).all() walkers._accumulate_rewards = False walkers.states.update(cum_rewards=judo.zeros(len(walkers))) rewards = judo.arange(len(walkers)) walkers._accumulate_and_update_rewards(rewards) assert (walkers.states.cum_rewards == rewards).all() walkers._accumulate_rewards = True walkers.states.update(cum_rewards=judo.ones(len(walkers))) rewards = judo.arange(len(walkers)) walkers._accumulate_and_update_rewards(rewards) assert (walkers.states.cum_rewards == rewards + 1).all()
def __init__(self, batch_size: int, state_dict: StateDict = None): """ Initialize a :class:`ExportWalkers`. Args: batch_size: Number of walkers that will be exported. state_dict: External :class:`typing.StateDict` that overrides the default values. """ self.id_walkers = None self.rewards = None self.observs = None self.states = None # Accept external definition of ExportedWalkers param_dict values walkers_dict = self.get_params_dict() if state_dict is not None: for k, v in state_dict.items(): if k in walkers_dict: walkers_dict[k] = v super(ExportedWalkers, self).__init__(batch_size=batch_size, state_dict=walkers_dict) # Set to ones to avoid empty sequences that may cause errors self.update(id_walkers=judo.ones(self.n, dtype=dtype.hash_type))
def best_state(self): return judo.ones(self.shape)
def reset(self): """Reset the data of the :class:`StepStatesWalkers`.""" super(StepStatesWalkers, self).reset() self.update(init_actions=judo.zeros((len(self), 1)), init_dt=judo.ones((len(self), 1)))
def __init__( self, high: Union[Tensor, Scalar] = numpy.inf, low: Union[Tensor, Scalar] = numpy.NINF, shape: Optional[tuple] = None, dtype: Optional[type] = None, ): """ Initialize a :class:`Bounds`. Args: high: Higher value for the bound interval. If it is an typing_.Scalar \ it will be applied to all the coordinates of a target vector. \ If it is a vector, the bounds will be checked coordinate-wise. \ It defines and closed interval. low: Lower value for the bound interval. If it is a typing_.Scalar it \ will be applied to all the coordinates of a target vector. \ If it is a vector, the bounds will be checked coordinate-wise. \ It defines and closed interval. shape: Shape of the array that will be bounded. Only needed if `high` and `low` are \ vectors and it is used to define the dimensions that will be bounded. dtype: Data type of the array that will be bounded. It can be inferred from `high` \ or `low` (the type of `high` takes priority). Examples: Initializing :class:`Bounds` using numpy arrays: >>> import numpy >>> high, low = numpy.ones(3, dtype=float), -1 * numpy.ones(3, dtype=int) >>> bounds = Bounds(high=high, low=low) >>> print(bounds) Bounds shape float64 dtype (3,) low [-1 -1 -1] high [1. 1. 1.] Initializing :class:`Bounds` using typing_.Scalars: >>> import numpy >>> high, low, shape = 4, 2.1, (5,) >>> bounds = Bounds(high=high, low=low, shape=shape) >>> print(bounds) Bounds shape float64 dtype (5,) low [2.1 2.1 2.1 2.1 2.1] high [4. 4. 4. 4. 4.] """ # Infer shape if not specified if shape is None and hasattr(high, "shape"): shape = high.shape elif shape is None and hasattr(low, "shape"): shape = low.shape elif shape is None: raise TypeError( "If shape is None high or low need to have .shape attribute.") # High and low will be arrays of target shape if not judo.is_tensor(high): high = tensor(high) if isinstance( high, _Iterable) else judo.ones(shape) * high if not judo.is_tensor(low): low = tensor(low) if isinstance( low, _Iterable) else judo.ones(shape) * low self.high = judo.astype(high, judo.float) self.low = judo.astype(low, judo.float) if dtype is not None: self.dtype = dtype elif hasattr(high, "dtype"): self.dtype = high.dtype elif hasattr(low, "dtype"): self.dtype = low.dtype else: self.dtype = type(high) if high is not None else type(low)