def minimize_batch( self, x: typing.Tensor) -> Tuple[typing.Tensor, typing.Tensor]: """ Minimize a batch of points. Args: x: Array representing a batch of points to be optimized, stacked \ across the first dimension. Returns: Tuple of arrays containing the local optimum found for each point, \ and an array with the values assigned to each of the points found. """ x = judo.to_numpy(judo.copy(x)) with Backend.use_backend("numpy"): result = judo.zeros_like(x) rewards = judo.zeros((x.shape[0], 1)) for i in range(x.shape[0]): new_x, reward = self.minimize_point(x[i, :]) result[i, :] = new_x rewards[i, :] = float(reward) self.bounds.high = tensor(self.bounds.high) self.bounds.low = tensor(self.bounds.low) result, rewards = tensor(result), tensor(rewards) return result, rewards
def function() -> Function: return Function.from_bounds_params( function=sphere, shape=(2, ), low=tensor([-10, -5]), high=tensor([10, 5]), )
def function_env() -> Function: return Function.from_bounds_params( function=lambda x: judo.ones(len(x)), shape=(2, ), low=tensor([-10, -5]), high=tensor([10, 5]), )
def update_states(self, env_states, model_states, best_ix): """Update the data of the root state.""" self.root_env_states.update(other=env_states) self.root_model_states.update(other=model_states) if self.accumulate_rewards: cum_rewards = self.root_walkers_states.cum_rewards cum_rewards = cum_rewards + self.root_env_states.rewards else: cum_rewards = self.root_env_states.rewards dt = self.root_model_states.dt if hasattr(self.root_model_states, "dt") else 1.0 times = dt + self.root_walker.times root_id = tensor(self.walkers.states.id_walkers[best_ix]) self.root_walkers_states.update( cum_rewards=cum_rewards, times=times, id_walkers=tensor([root_id]), ) self.root_walker = OneWalker( reward=judo.copy(cum_rewards[0]), observ=judo.copy(self.root_env_states.observs[0]), state=judo.copy(self.root_env_states.states[0]), time=judo.copy(times[0]), id_walker=root_id.squeeze(), )
def test_clip(self): tup = ((-1, 10), (-3, 4), (2, 5)) array = tensor([[-10, 0, 0], [11, 0, 0], [0, 11, 0], [11, 11, 11]], dtype=dtype.float) bounds = Bounds.from_tuples(tup) clipped = bounds.clip(array) target = tensor( [[-1.0, 0.0, 2.0], [10.0, 0.0, 2.0], [0.0, 4.0, 2], [10, 4, 5]], dtype=dtype.float) assert API.allclose(clipped, target), (clipped.dtype, target.dtype)
def test_calculate_reward(self, observs, rewards, oobs): with numpy.errstate(**NUMPY_IGNORE_WARNINGS_PARAMS): virtual_reward, compas = calculate_virtual_reward( observs=tensor(observs), rewards=tensor(rewards), oobs=tensor(oobs), return_compas=True, ) assert dtype.is_tensor(virtual_reward) assert len(virtual_reward.shape) == 1 assert len(virtual_reward) == len(rewards)
def test_import_best(self, export_swarm): walkers = ExportedWalkers(2) walkers.rewards = tensor([999.0, 2.0]) walkers.states = tensor([0.0, 1.0]) walkers.id_walkers = tensor([10.0, 11.0]) walkers.observs = tensor([[0, 0, 0, 0], [2, 3, 1, 2]], dtype=dtype.float) export_swarm.import_best(walkers) assert export_swarm.best_reward == 999 assert export_swarm.walkers.states.best_state == walkers.states[0] assert ( export_swarm.walkers.states.best_obs == walkers.observs[0]).all() assert export_swarm.walkers.states.best_id == walkers.id_walkers[0]
def test_imported_best_is_better(self, export_swarm): export_swarm.reset() export_swarm.run_step() walkers = ExportedWalkers(1) walkers.rewards = tensor([1]) * numpy.inf new_is_better = export_swarm._imported_best_is_better(walkers) assert new_is_better, export_swarm.best_reward walkers = ExportedWalkers(1) export_swarm.walkers.minimize = True walkers.rewards = tensor([1]) * numpy.NINF new_is_better = export_swarm._imported_best_is_better(walkers) assert new_is_better, export_swarm.best_reward export_swarm.walkers.minimize = False
def test_clone_to_imported(self, export_swarm): walkers = ExportedWalkers(3) walkers.rewards = tensor([999, 777, 333], dtype=dtype.float) walkers.states = tensor([999, 777, 333], dtype=dtype.float) walkers.id_walkers = tensor([999, 777, 333], dtype=dtype.float) walkers.observs = tensor( [[999, 999, 999, 999], [777, 777, 777, 777], [333, 333, 333, 333]], dtype=dtype.float) compas_ix = tensor([0, 1]) will_clone = tensor([True, False]) local_ix = tensor([0, 1]) import_ix = tensor([0, 1]) export_swarm._clone_to_imported( compas_ix=compas_ix, will_clone=will_clone, local_ix=local_ix, import_ix=import_ix, walkers=walkers, ) assert export_swarm.walkers.states.cum_rewards[0] == 999.0 assert export_swarm.walkers.env_states.states[0] == 999.0 assert (export_swarm.walkers.env_states.observs[0] == judo.ones(4) * 999).all()
def create_bounds(name): if name == "scalars": return lambda: Bounds(high=5, low=-5, shape=(3, )) elif name == "high_array": return lambda: Bounds(high=tensor([1, 2, 5], dtype=judo.float), low=-5) elif name == "low_array": return lambda: Bounds(low=tensor([-1, -5, -3], dtype=judo.float), high=5) elif name == "both_array": array = tensor([1, 2, 5], dtype=judo.float) return lambda: Bounds(high=array, low=-array) elif name == "high_list": return lambda: Bounds(low=tensor([-5, -2, -3], dtype=judo.float), high=[5, 5, 5], dtype=judo.float)
def ids(self) -> Tensor: """ Return a list of unique ids for each walker state. The returned ids are integers representing the hash of the different states. """ return tensor(self.env_states.hash_walkers("states"))
def test_reset_with_root_walker(self, swarm): swarm.reset() param_dict = swarm.walkers.env_states.get_params_dict() obs_dict = param_dict["observs"] state_dict = param_dict["states"] obs_size = obs_dict.get("size", obs_dict["shape"][1:]) state_size = state_dict.get("size", state_dict["shape"][1:]) obs = judo.astype(random_state.random(obs_size), obs_dict["dtype"]) state = judo.astype(random_state.random(state_size), state_dict["dtype"]) reward = 160290 root_walker = OneWalker(observ=obs, reward=reward, state=state) swarm.reset(root_walker=root_walker) swarm_best_id = swarm.best_id root_walker_id = root_walker.id_walkers assert (swarm.best_state == state).all() assert (swarm.best_obs == obs).all(), (obs, tensor(swarm.best_obs)) assert swarm.best_reward == reward assert (swarm.walkers.env_states.observs == obs).all() assert (swarm.walkers.env_states.states == state).all() assert (swarm.walkers.env_states.rewards == reward).all() if Backend.is_numpy(): assert (swarm.walkers.states.id_walkers == root_walker.id_walkers ).all() assert swarm_best_id == root_walker_id[0]
def test_points_in_bounds(self, bounds_fixture): zeros = API.zeros((3, 3)) assert all(bounds_fixture.points_in_bounds(zeros)) tens = API.ones((3, 3)) * 10.0 res = bounds_fixture.points_in_bounds(tens) assert not res.any(), (res, tens) tens = tensor([[-10, 0, 1], [0, 0, 0], [10, 10, 10]]) assert sum(bounds_fixture.points_in_bounds(tens)) == 1
def minimize_point( self, x: typing.Tensor) -> Tuple[typing.Tensor, typing.Scalar]: """ Minimize the target function passing one starting point. Args: x: Array representing a single point of the function to be minimized. Returns: Tuple containing a numpy array representing the best solution found, \ and the numerical value of the function at that point. """ optim_result = self.minimize(x) point = tensor(optim_result["x"]) reward = tensor(float(optim_result["fun"])) return point, reward
def update_states(self, best_ix): """Update the data of the root walker after an internal Swarm iteration has finished.""" # The accumulation of rewards is already done in the internal Swarm cum_rewards = self.root_walkers_states.cum_rewards times = self.root_walkers_states.times + self.root_walker.times root_id = tensor(self.walkers.states.id_walkers[best_ix]) self.root_walkers_states.update( cum_rewards=cum_rewards, id_walkers=tensor([root_id]), times=times, ) self.root_walker = OneWalker( reward=judo.copy(cum_rewards[0]), observ=judo.copy(self.root_env_states.observs[0]), state=judo.copy(self.root_env_states.states[0]), time=judo.copy(times[0]), id_walker=root_id, )
def test_run_exchange_step(self, export_swarm): empty_walkers = ray.get(export_swarm.get_empty_export_walkers.remote()) ray.get(export_swarm.run_exchange_step.remote(empty_walkers)) walkers = ExportedWalkers(3) walkers.rewards = tensor([999, 777, 333], dtype=dtype.float) walkers.states = tensor( [[999, 999, 999, 999], [777, 777, 777, 777], [333, 333, 333, 333]], dtype=dtype.float) walkers.id_walkers = tensor([999, 777, 333], dtype=dtype.float) walkers.observs = tensor( [[999, 999, 999, 999], [777, 777, 777, 777], [333, 333, 333, 333]], dtype=dtype.float) ray.get(export_swarm.reset.remote()) exported = ray.get(export_swarm.run_exchange_step.remote(walkers)) best_found = ray.get(export_swarm.get.remote("best_reward")) assert len(exported) == ray.get(export_swarm.get.remote("n_export")) assert best_found == 999
def test_sample(self): bounds = Bounds(low=-5, high=5, shape=(3, )) model = NormalContinuous(bounds=bounds) actions = model.predict(batch_size=10000).actions assert actions.min() >= -5 assert actions.max() <= 5 assert judo.allclose(actions.mean(), tensor(0.0), atol=0.05) assert judo.allclose(actions.std(), tensor(1.0), atol=0.05) bounds = Bounds(low=-10, high=30, shape=(3, 10)) model = NormalContinuous(bounds=bounds, loc=5, scale=2) actions = model.predict(batch_size=10000).actions assert actions.min() >= -10 assert actions.max() <= 30 assert judo.allclose(actions.mean(), tensor(5.0), atol=0.05), actions.mean() assert judo.allclose(actions.std(), tensor(2.0), atol=0.05), actions.std()
def test_fai_iteration(self, observs, rewards, oobs): with numpy.errstate(**NUMPY_IGNORE_WARNINGS_PARAMS): compas_ix, will_clone = fai_iteration(observs=tensor(observs), rewards=tensor(rewards), oobs=tensor(oobs)) assert dtype.is_tensor(compas_ix) assert dtype.is_tensor(will_clone) assert len(compas_ix.shape) == 1 assert len(will_clone.shape) == 1 assert len(compas_ix) == len(rewards) assert len(will_clone) == len(rewards) if Backend.is_numpy(): assert isinstance(compas_ix[0], dtype.int64), type(compas_ix[0]) assert isinstance(will_clone[0], dtype.bool), type(will_clone[0])
def test_run_exchange_step(self, export_swarm): export_swarm.reset() walkers_0 = ExportedWalkers(0) exported = export_swarm.run_exchange_step(walkers_0) assert len(exported) == export_swarm.n_export walkers = ExportedWalkers(3) walkers.rewards = tensor([999, 777, 333], dtype=dtype.float) walkers.states = tensor( [[999, 999, 999, 999], [777, 777, 777, 777], [333, 333, 333, 333]], dtype=dtype.float) walkers.id_walkers = tensor([999, 777, 333], dtype=dtype.float) walkers.observs = tensor( [[999, 999, 999, 999], [777, 777, 777, 777], [333, 333, 333, 333]], dtype=dtype.float) export_swarm.reset() exported = export_swarm.run_exchange_step(walkers) assert len(exported) == export_swarm.n_export assert export_swarm.best_reward == 999.0
def test_from_array_with_scale_positive(self): array = tensor([[0, 0, 0], [10, 0, 0], [0, 10, 0], [10, 10, 10]], dtype=dtype.float) bounds = Bounds.from_array(array, scale=1.1) assert (bounds.low == tensor([0, 0, 0], dtype=dtype.float)).all(), ( bounds.low, array.min(axis=0), ) assert (bounds.high == tensor([11, 11, 11], dtype=dtype.float)).all(), ( bounds.high, array.max(axis=0), ) assert bounds.shape == (3, ) array = tensor( [[-10, 0, 0], [-10, 0, 0], [0, -10, 0], [-10, -10, -10]], dtype=dtype.float) bounds = Bounds.from_array(array, scale=1.1) assert (bounds.high == tensor([0, 0, 0], dtype=dtype.float)).all(), ( bounds.high, array.max(axis=0), ) assert (bounds.low == tensor([-11, -11, -11], dtype=dtype.float)).all(), ( bounds.low, array.min(axis=0), ) assert bounds.shape == (3, ) array = tensor( [[10, 10, 10], [100, 10, 10], [10, 100, 10], [100, 100, 100]], dtype=dtype.float) bounds = Bounds.from_array(array, scale=1.1) assert API.allclose(bounds.low, tensor([9.0, 9.0, 9], dtype=dtype.float)), ( bounds.low, array.min(axis=0), ) assert API.allclose(bounds.high, tensor([110, 110, 110], dtype=dtype.float)), ( bounds.high, array.max(axis=0), ) assert bounds.shape == (3, )
def _clone_to_imported( self, compas_ix: Tensor, will_clone: Tensor, local_ix: Tensor, import_ix: Tensor, walkers: ExportedWalkers, ) -> None: """Clone the :class:`Swarm` selected walkers to the target imported walkers.""" clone_ids = tensor(walkers.id_walkers[import_ix][compas_ix][will_clone]) clone_rewards = tensor(walkers.rewards[import_ix][compas_ix][will_clone]) clone_states = tensor(walkers.states[import_ix][compas_ix][will_clone]) clone_obs = tensor(walkers.observs[import_ix][compas_ix][will_clone]) # TODO: Find a better way to do this. Assignment does not work after double array indexing i = 0 for (ix, wc) in zip(local_ix, will_clone): if wc: self.swarm.walkers.states.id_walkers[ix] = clone_ids[i] self.swarm.walkers.states.cum_rewards[ix] = clone_rewards[i] self.swarm.walkers.env_states.states[ix] = clone_states[i] self.swarm.walkers.env_states.observs[ix] = clone_obs[i] i += 1
def test_calculate_clone(self, virtual_rewards, oobs, eps): with numpy.errstate(**NUMPY_IGNORE_WARNINGS_PARAMS): compas_ix, will_clone = calculate_clone( virtual_rewards=tensor(virtual_rewards), oobs=tensor(oobs), eps=tensor(eps)) assert dtype.is_tensor(compas_ix) assert dtype.is_tensor(will_clone) assert len(compas_ix.shape) == 1 assert len(will_clone.shape) == 1 assert len(compas_ix) == len(virtual_rewards) assert len(will_clone) == len(virtual_rewards) if Backend.is_numpy(): assert isinstance(compas_ix[0], dtype.int64), type(compas_ix[0]) assert isinstance(will_clone[0], dtype.bool), type(will_clone[0]) elif Backend.is_torch(): assert compas_ix[0].dtype == dtype.int64, type(compas_ix[0]) assert will_clone[0].dtype == dtype.bool, type(will_clone[0])
def test_clone(self, states_class): batch_size = 10 states = states_class(batch_size=batch_size) states.miau = judo.arange(states.n) states.miau_2 = judo.arange(states.n) will_clone = judo.zeros(states.n, dtype=judo.bool) will_clone[3:6] = True compas_ix = tensor(list(range(states.n))[::-1]) states.clone(will_clone=will_clone, compas_ix=compas_ix) target_1 = judo.arange(10) assert bool( judo.all(target_1 == states.miau)), (target_1 - states.miau, states_class)
def predict( self, root_env_states: StatesEnv, walkers: StepWalkers, ) -> StatesModel: """ Select the most frequent ``init_action`` assigned to the internal swarm's walkers. The selected ``dt`` will be equal to the minimum ``init_dts`` among all \ the walkers that sampled the selected ``init_action``. Args: root_env_states: :env-st:`StatesEnv` class containing the data \ corresponding to the root walker of a :class:`StepSwarm`. walkers: :walkers:`StepWalkers` used by the internal warm of a \ :class:`StepSwarm`. Returns: :class:`StatesModel` containing the ``actions`` and ``dt`` that the root walkers will use to step the :env:`Environment`. """ init_actions = judo.astype(walkers.states.init_actions.flatten(), judo.int) init_actions = judo.to_numpy(init_actions) with Backend.use_backend("numpy"): y = numpy.bincount(init_actions) most_used_action = numpy.nonzero(y)[0][0] most_used_action = tensor(most_used_action) root_model_states = StatesModel( batch_size=1, state_dict={ "actions": { "dtype": judo.int64 }, "dt": { "dtype": judo.int64 } }, ) root_model_states.actions[:] = most_used_action if hasattr(root_model_states, "dt"): init_dts = judo.astype(walkers.states.init_dts.flatten(), judo.int) index_dt = init_actions == most_used_action target_dt = init_dts[index_dt].min() root_model_states.dt[:] = target_dt return root_model_states
class Sphere(OptimBenchmark): benchmark = tensor(0.0) def __init__(self, dims: int, *args, **kwargs): super(Sphere, self).__init__(dims=dims, function=sphere, *args, **kwargs) @staticmethod def get_bounds(dims): bounds = [(-1000, 1000) for _ in range(dims)] return Bounds.from_tuples(bounds) @property def best_state(self): return judo.zeros(self.shape)
class EggHolder(OptimBenchmark): benchmark = tensor(-959.64066271) def __init__(self, dims: int = None, *args, **kwargs): super(EggHolder, self).__init__(dims=2, function=eggholder, *args, **kwargs) @staticmethod def get_bounds(dims=None): bounds = [(-512.0, 512.0), (-512.0, 512.0)] return Bounds.from_tuples(bounds) @property def best_state(self): return tensor([512.0, 404.2319])
class Rastrigin(OptimBenchmark): benchmark = tensor(0.0) def __init__(self, dims: int, *args, **kwargs): super(Rastrigin, self).__init__(dims=dims, function=rastrigin, *args, **kwargs) @staticmethod def get_bounds(dims): bounds = [(-5.12, 5.12) for _ in range(dims)] return Bounds.from_tuples(bounds) @property def best_state(self): return judo.zeros(self.shape)
def resize_image(frame: Tensor, width: int, height: int, mode: str = "RGB") -> Tensor: """ Use PIL to resize an RGB frame to an specified height and width. Args: frame: Target numpy array representing the image that will be resized. width: Width of the resized image. height: Height of the resized image. mode: Passed to Image.convert. Returns: The resized frame that matches the provided width and height. """ from PIL import Image frame = judo.to_numpy(frame) with judo.Backend.use_backend(name="numpy"): frame = Image.fromarray(frame) frame = judo.to_numpy(frame.convert(mode).resize(size=(width, height))) return judo.tensor(frame)
def test_benchmarks(self): for k, val in LennardJones.minima.items(): lennard = LennardJones(n_atoms=int(k)) lennard.function(tensor(numpy.random.random((1, 3 * int(k)))))
def benchmark(self): return tensor(0.0)