def test_crash(self): env = ObservationWrapper(self.env) env.reset() state, reward, done, _ = env.step(np.nan) self.assertTrue(env.crashed) self.assertFalse(any(np.isnan(state))) self.assertTrue(reward == env.crash_penalty)
def test_flatten(self): wrapped_env = ObservationWrapper(self.get_test_env()) d = {"b": 0, "a": np.array([0, 1.4, 3])} flat = wrapped_env.flatten(d) expected = np.array([0, 1.4, 3, 0]) np.testing.assert_array_almost_equal(flat, expected)
def test_stateless(self): env = ObservationWrapper(self.env) rng = np.random.default_rng(123) mems = [] instance_idxs = [] for _ in range(3): env.reset() instance_idxs.append(env.instance_index) done = False mem = [] step = 0 while not done and step < 5: action = np.exp(rng.integers(low=-10, high=1)) state, reward, done, _ = env.step(action) mem.append(np.concatenate([state, [reward, int(done), action]])) step += 1 mems.append(np.array(mem)) rng = np.random.default_rng(123) for i, idx in enumerate(reversed(instance_idxs)): env.instance_index = idx - 1 env.reset() self.assertTrue(env.instance_index == idx) done = False mem = [] step = 0 while not done and step < 5: action = mems[-(i + 1)][step][-1] state, reward, done, _ = env.step(action) mem.append(np.concatenate([state, [reward, int(done), action]])) step += 1 np.testing.assert_allclose(mems[-(i + 1)], np.array(mem))
def make_benchmark(config): bench = getattr(benchmarks, config["benchmark"])() env = bench.get_benchmark(seed=config["seed"]) if config["benchmark"] in ["SGDBenchmark", "CMAESBenchmark"]: env = ObservationWrapper(env) wrapped = PerformanceTrackingWrapper(env, logger=config["logger"]) logger.set_env(wrapped) return wrapped
def test_step(self): benchmark = SGDBenchmark() benchmark.config = objdict(SGD_DEFAULTS.copy()) benchmark.read_instance_set() for reward_type in Reward: benchmark.config.reward_type = reward_type env = SGDEnv(benchmark.config) env = ObservationWrapper(env) self.assertTrue(env.reward_range == reward_type.func.frange) env.reset() state, reward, done, meta = env.step(1.0) self.assertTrue(reward >= env.reward_range[0]) self.assertTrue(reward <= env.reward_range[1]) self.assertFalse(done) self.assertTrue(len(meta.keys()) == 0)
def test_reproducibility(self): mems = [] instances = [] env = ObservationWrapper(self.env) for _ in range(2): rng = np.random.default_rng(123) env.seed(123) env.instance_index = 0 instances.append(env.get_instance_set()) env.reset() done = False mem = [] step = 0 while not done and step < 5: action = np.exp(rng.integers(low=-10, high=1)) state, reward, done, _ = env.step(action) mem.append(np.concatenate([state, [reward, int(done), action]])) step += 1 mems.append(np.array(mem)) self.assertEqual(mems[0].size, mems[1].size) self.assertEqual(instances[0], instances[1]) np.testing.assert_allclose(mems[0], mems[1])
def test_conversion_wrapper(self): action = 0.2 env = self.get_test_env() reset_state_env = env.reset() step_state_env, *rest_env = env.step(action) self.assertIsInstance(reset_state_env, dict) wrapped_env = ObservationWrapper(self.get_test_env()) reset_state_wrapped = wrapped_env.reset() step_state_wrapped, *reset_wrapped = wrapped_env.step(action) self.assertIsInstance(reset_state_wrapped, np.ndarray) self.assertListEqual(rest_env, reset_wrapped) np.testing.assert_array_equal(wrapped_env.flatten(reset_state_env), reset_state_wrapped) np.testing.assert_array_equal(wrapped_env.flatten(step_state_env), step_state_wrapped)
def make_benchmark(config): bench = getattr(benchmarks, config["benchmark"])() env = bench.get_benchmark(seed=config["seed"]) if config["benchmark"] in ["SGDBenchmark", "CMAESBenchmark"]: env = ObservationWrapper(env) return env
# Make logger object logger = Logger(experiment_name="CMAESBenchmark", output_path=Path("../plotting/data")) # Make CMA-ES environment # We use the configuration from the "Learning to Optimize Step-size Adaption in CMA-ES" Paper by Shala et al. bench = CMAESBenchmark() env = bench.get_benchmark() logger.set_env(env) # Wrap to track performance performance_logger = logger.add_module(PerformanceTrackingWrapper) env = PerformanceTrackingWrapper(env=env, logger=performance_logger) # Also wrap to make the dictionary observations into an easy to work with list env = ObservationWrapper(env) # Make chainer agent obs_size = env.observation_space.low.size action_size = env.action_space.low.size agent = make_chainer_a3c(obs_size, action_size) # Training num_episodes = 3 for i in range(num_episodes): # Reset environment to begin episode state = env.reset() # Initialize episode done = False r = 0