def test_merge_states_with_atari(self): swarm = create_atari_swarm() for states in (swarm.walkers.states, swarm.walkers.env_states, swarm.walkers.model_states): split_states = tuple(states.split_states(states.n)) merged = states.merge_states(split_states) assert len(merged) == states.n if (Backend.is_numpy() and Backend.use_true_hash() ): # Pytorch hashes are not real hashes assert hash(merged) == hash(states)
class TestESModel: @pytest.mark.skipif(not Backend.is_numpy(), reason="Only for numpy") def create_model_states(self, model, batch_size: int = None): return StatesModel(batch_size=batch_size, state_dict=model.get_params_dict()) @pytest.mark.skipif(not Backend.is_numpy(), reason="Only for numpy") def create_env_states(self, model, batch_size: int = None): return StatesEnv(batch_size=batch_size, state_dict=model.get_params_dict()) @pytest.mark.skipif(not Backend.is_numpy(), reason="Only for numpy") def test_run_for_1000_predictions(self, model, batch_size): for _ in range(100): TestModel.test_predict(self, model, batch_size)
def minimize_batch( self, x: typing.Tensor) -> Tuple[typing.Tensor, typing.Tensor]: """ Minimize a batch of points. Args: x: Array representing a batch of points to be optimized, stacked \ across the first dimension. Returns: Tuple of arrays containing the local optimum found for each point, \ and an array with the values assigned to each of the points found. """ x = judo.to_numpy(judo.copy(x)) with Backend.use_backend("numpy"): result = judo.zeros_like(x) rewards = judo.zeros((x.shape[0], 1)) for i in range(x.shape[0]): new_x, reward = self.minimize_point(x[i, :]) result[i, :] = new_x rewards[i, :] = float(reward) self.bounds.high = tensor(self.bounds.high) self.bounds.low = tensor(self.bounds.low) result, rewards = tensor(result), tensor(rewards) return result, rewards
def test_reset_with_root_walker(self, swarm): swarm.reset() param_dict = swarm.walkers.env_states.get_params_dict() obs_dict = param_dict["observs"] state_dict = param_dict["states"] obs_size = obs_dict.get("size", obs_dict["shape"][1:]) state_size = state_dict.get("size", state_dict["shape"][1:]) obs = judo.astype(random_state.random(obs_size), obs_dict["dtype"]) state = judo.astype(random_state.random(state_size), state_dict["dtype"]) reward = 160290 root_walker = OneWalker(observ=obs, reward=reward, state=state) swarm.reset(root_walker=root_walker) swarm_best_id = swarm.best_id root_walker_id = root_walker.id_walkers assert (swarm.best_state == state).all() assert (swarm.best_obs == obs).all(), (obs, tensor(swarm.best_obs)) assert swarm.best_reward == reward assert (swarm.walkers.env_states.observs == obs).all() assert (swarm.walkers.env_states.states == state).all() assert (swarm.walkers.env_states.rewards == reward).all() if Backend.is_numpy(): assert (swarm.walkers.states.id_walkers == root_walker.id_walkers ).all() assert swarm_best_id == root_walker_id[0]
def __repr__(self): with numpy.printoptions(linewidth=100, threshold=200, edgeitems=9): init_actions = judo.to_numpy( self.internal_swarm.walkers.states.init_actions.flatten()) with Backend.use_backend("numpy"): y = numpy.bincount(init_actions.astype(int)) ii = numpy.nonzero(y)[0] string = str(self.root_walker) string += "\n Init actions [action, count]: \n%s" % numpy.vstack( (ii, y[ii])).T return string
class TestFunction: def test_init_error(self): with pytest.raises(TypeError): Function(function=sphere, bounds=(True, False)) def test_from_bounds_params_error(self): with pytest.raises(TypeError): Function.from_bounds_params(function=sphere) @pytest.mark.parametrize("batch_s", [1, 10]) def test_reset_batch_size(self, function_env, batch_s): new_states: StatesEnv = function_env.reset(batch_size=batch_s) assert isinstance(new_states, StatesEnv) """assert not (new_states.observs == 0).all().item() assert (new_states.rewards == 1).all().item(), ( new_states.rewards, new_states.rewards.shape, )""" assert (new_states.oobs == 0).all().item() assert len(new_states.rewards.shape) == 1 assert new_states.rewards.shape[0] == batch_s assert new_states.oobs.shape[0] == batch_s assert new_states.observs.shape[0] == batch_s assert new_states.observs.shape[1] == 2 def test_step(self, function_env, batch_size): states = function_env.reset(batch_size=batch_size) actions = StatesModel( actions=judo.zeros(states.observs.shape), batch_size=batch_size, dt=judo.ones((1, 2)), ) new_states: StatesEnv = function_env.step(actions, states) assert isinstance(new_states, StatesEnv) assert new_states.oobs[0].item() == 0 def test_minimizer_getattr(self): bounds = Bounds(shape=(2, ), high=10, low=-5, dtype=float) env = Function(function=sphere, bounds=bounds) minim = MinimizerWrapper(env) assert minim.shape == env.shape @pytest.mark.skipif(not Backend.is_numpy(), reason="only in numpy for now") def test_minimizer_step(self): minim = local_minimizer() params = {"actions": {"dtype": judo.float64, "size": (2, )}} states = StatesModel(state_dict=params, batch_size=N_WALKERS) assert minim.shape == minim.shape states = minim.step(model_states=states, env_states=minim.reset(N_WALKERS)) assert judo.allclose(states.rewards.min(), 0)
def test_calculate_clone(self, virtual_rewards, oobs, eps): with numpy.errstate(**NUMPY_IGNORE_WARNINGS_PARAMS): compas_ix, will_clone = calculate_clone( virtual_rewards=tensor(virtual_rewards), oobs=tensor(oobs), eps=tensor(eps)) assert dtype.is_tensor(compas_ix) assert dtype.is_tensor(will_clone) assert len(compas_ix.shape) == 1 assert len(will_clone.shape) == 1 assert len(compas_ix) == len(virtual_rewards) assert len(will_clone) == len(virtual_rewards) if Backend.is_numpy(): assert isinstance(compas_ix[0], dtype.int64), type(compas_ix[0]) assert isinstance(will_clone[0], dtype.bool), type(will_clone[0]) elif Backend.is_torch(): assert compas_ix[0].dtype == dtype.int64, type(compas_ix[0]) assert will_clone[0].dtype == dtype.bool, type(will_clone[0])
def predict( self, root_env_states: StatesEnv, walkers: StepWalkers, ) -> StatesModel: """ Select the most frequent ``init_action`` assigned to the internal swarm's walkers. The selected ``dt`` will be equal to the minimum ``init_dts`` among all \ the walkers that sampled the selected ``init_action``. Args: root_env_states: :env-st:`StatesEnv` class containing the data \ corresponding to the root walker of a :class:`StepSwarm`. walkers: :walkers:`StepWalkers` used by the internal warm of a \ :class:`StepSwarm`. Returns: :class:`StatesModel` containing the ``actions`` and ``dt`` that the root walkers will use to step the :env:`Environment`. """ init_actions = judo.astype(walkers.states.init_actions.flatten(), judo.int) init_actions = judo.to_numpy(init_actions) with Backend.use_backend("numpy"): y = numpy.bincount(init_actions) most_used_action = numpy.nonzero(y)[0][0] most_used_action = tensor(most_used_action) root_model_states = StatesModel( batch_size=1, state_dict={ "actions": { "dtype": judo.int64 }, "dt": { "dtype": judo.int64 } }, ) root_model_states.actions[:] = most_used_action if hasattr(root_model_states, "dt"): init_dts = judo.astype(walkers.states.init_dts.flatten(), judo.int) index_dt = init_actions == most_used_action target_dt = init_dts[index_dt].min() root_model_states.dt[:] = target_dt return root_model_states
def test_fai_iteration(self, observs, rewards, oobs): with numpy.errstate(**NUMPY_IGNORE_WARNINGS_PARAMS): compas_ix, will_clone = fai_iteration(observs=tensor(observs), rewards=tensor(rewards), oobs=tensor(oobs)) assert dtype.is_tensor(compas_ix) assert dtype.is_tensor(will_clone) assert len(compas_ix.shape) == 1 assert len(will_clone.shape) == 1 assert len(compas_ix) == len(rewards) assert len(will_clone) == len(rewards) if Backend.is_numpy(): assert isinstance(compas_ix[0], dtype.int64), type(compas_ix[0]) assert isinstance(will_clone[0], dtype.bool), type(will_clone[0])
def model(request): prev_backend = Backend.get_current_backend() Backend.set_backend("numpy") yield create_model(request.param)() Backend.set_backend(prev_backend)
class TestCMAES: @pytest.mark.skipif(not Backend.is_numpy(), reason="Only for numpy") def test_init_params(self, cmaes: CMAES): cmaes._init_algorithm_params(batch_size=8) # Constant params assert cmaes.n_dims == 5 assert cmaes.mu_const == 4 test_weights = np.array([0.529930, 0.285714, 0.142857, 0.041498]) assert np.allclose(cmaes.weights_const.flatten(), test_weights) assert cmaes.weights_const.shape == (4, 1) assert np.allclose(cmaes.mu_eff_const, 2.6002) assert np.allclose(cmaes.cum_covm_const, 0.45020) assert np.allclose(cmaes.cum_sigma_const, 0.36509) assert np.allclose(cmaes.lr_covrank1_const, 0.047292) assert np.allclose(cmaes.damp_sigma_const, 1.3651) assert np.round(cmaes.chi_norm_const, 4) == 2.1285 # Variable params assert (cmaes.invsqrtC == np.eye(cmaes.n_dims)).all() assert (cmaes.cov_matrix == np.eye(cmaes.n_dims)).all() assert (cmaes.paths_covm == 0).all() assert (cmaes.paths_sigma == 0).all() assert cmaes.paths_covm.shape == (cmaes.n_dims, 1) assert cmaes.paths_sigma.shape == (cmaes.n_dims, 1) assert (cmaes.scaling_diag == np.ones((cmaes.n_dims, 1))).all() @pytest.mark.skipif(not Backend.is_numpy(), reason="Only for numpy") def test_sample_actions(self, det_cmaes): batch_size = 8 assert det_cmaes._count_eval == 0 model_states = det_cmaes.reset(batch_size=batch_size, init_xmean=init_xmean, noise=noise_iter_1, model_states=None) assert det_cmaes.pop_size == batch_size assert det_cmaes._count_eval == batch_size assert np.allclose(det_cmaes.x_mean, init_xmean) actions = np.array(model_states.actions.T) assert np.allclose(actions, actions_iter_1, rtol=1e-5, atol=1e-5) @pytest.mark.skipif(not Backend.is_numpy(), reason="Only for numpy") def test_update_evolution_paths(self, det_cmaes): model_states = det_cmaes.reset(batch_size=8, init_xmean=init_xmean, noise=noise_iter_1, model_states=None) assert np.allclose(det_cmaes.x_mean, init_xmean) actions = np.array(model_states.actions.T) assert np.allclose(actions, actions_iter_1, rtol=1e-5, atol=1e-5) sorted_fitness = np.argsort(fitness_iter_1)[:det_cmaes.mu_const] selected_actions = model_states.actions[sorted_fitness].T det_cmaes._update_evolution_paths(selected_actions) assert np.allclose(det_cmaes.old_x_mean, init_xmean) assert np.allclose( det_cmaes.x_mean, xmean_iter_1, rtol=1e-5, atol=1e-5), "dif: %s" % (det_cmaes.x_mean - xmean_iter_1) assert np.allclose( det_cmaes.paths_sigma, ps_iter_1, rtol=1e-5, atol=1e-5), "dif: %s" % (det_cmaes.paths_sigma - ps_iter_1) assert det_cmaes.hsig == hsig_iter_1 assert np.allclose( det_cmaes.paths_covm, pc_iter_1, rtol=1e-5, atol=1e-5), "dif: %s" % (det_cmaes.paths_covm - pc_iter_1) @pytest.mark.skipif(not Backend.is_numpy(), reason="Only for numpy") def test_adapt_covariance_matrix(self, det_cmaes): model_states = det_cmaes.reset(batch_size=8, init_xmean=init_xmean, noise=noise_iter_1, model_states=None) assert np.allclose(det_cmaes.x_mean, init_xmean) actions = np.array(model_states.actions.T) assert np.allclose(actions, actions_iter_1, rtol=1e-5, atol=1e-5) sorted_fitness = np.argsort(fitness_iter_1)[:det_cmaes.mu_const] selected_actions = model_states.actions[sorted_fitness].T det_cmaes._update_evolution_paths(selected_actions) det_cmaes._adapt_covariance_matrix(selected_actions) assert np.allclose( det_cmaes.artmp, artmp_iter_1), "dif: %s" % (det_cmaes.artmp - artmp_iter_1) assert np.allclose( det_cmaes.cov_matrix, cov_matrix_iter_1, rtol=1e-6, atol=1e-6), "dif: %s" % (det_cmaes.cov_matrix - cov_matrix_iter_1) @pytest.mark.skipif(not Backend.is_numpy(), reason="Only for numpy") def test_adapt_sigma(self, det_cmaes): test_sigma = 0.29992 model_states = det_cmaes.reset(batch_size=8, init_xmean=init_xmean, noise=noise_iter_1, model_states=None) assert np.allclose(det_cmaes.x_mean, init_xmean) actions = np.array(model_states.actions.T) assert np.allclose(actions, actions_iter_1, rtol=1e-5, atol=1e-5) sorted_fitness = np.argsort(fitness_iter_1)[:det_cmaes.mu_const] selected_actions = model_states.actions[sorted_fitness].T det_cmaes._update_evolution_paths(selected_actions) det_cmaes._adapt_covariance_matrix(selected_actions) det_cmaes._adapt_sigma() assert np.allclose(det_cmaes.sigma, test_sigma) @pytest.mark.skipif(not Backend.is_numpy(), reason="Only for numpy") def test_covariance_matrix_diagonalization(self, det_cmaes): model_states = det_cmaes.reset(batch_size=8, init_xmean=init_xmean, noise=noise_iter_1, model_states=None) assert np.allclose(det_cmaes.x_mean, init_xmean) actions = np.array(model_states.actions.T) assert np.allclose(actions, actions_iter_1, rtol=1e-5, atol=1e-5) sorted_fitness = np.argsort(fitness_iter_1)[:det_cmaes.mu_const] selected_actions = model_states.actions[sorted_fitness].T det_cmaes._update_evolution_paths(selected_actions) det_cmaes._adapt_covariance_matrix(selected_actions) det_cmaes._adapt_sigma() det_cmaes._cov_matrix_diagonalization() assert np.allclose( det_cmaes.invsqrtC, invsqrt_cov_iter_1, rtol=1e-6, atol=1e-6), "dif: %s" % (det_cmaes.invsqrtC - invsqrt_cov_iter_1)