Esempio n. 1
0
 def test_crash(self):
     env = ObservationWrapper(self.env)
     env.reset()
     state, reward, done, _ = env.step(np.nan)
     self.assertTrue(env.crashed)
     self.assertFalse(any(np.isnan(state)))
     self.assertTrue(reward == env.crash_penalty)
    def test_flatten(self):
        wrapped_env = ObservationWrapper(self.get_test_env())

        d = {"b": 0, "a": np.array([0, 1.4, 3])}
        flat = wrapped_env.flatten(d)

        expected = np.array([0, 1.4, 3, 0])

        np.testing.assert_array_almost_equal(flat, expected)
Esempio n. 3
0
    def test_stateless(self):
        env = ObservationWrapper(self.env)
        rng = np.random.default_rng(123)
        mems = []
        instance_idxs = []
        for _ in range(3):
            env.reset()
            instance_idxs.append(env.instance_index)

            done = False
            mem = []
            step = 0
            while not done and step < 5:
                action = np.exp(rng.integers(low=-10, high=1))
                state, reward, done, _ = env.step(action)
                mem.append(np.concatenate([state, [reward, int(done), action]]))
                step += 1
            mems.append(np.array(mem))

        rng = np.random.default_rng(123)
        for i, idx in enumerate(reversed(instance_idxs)):
            env.instance_index = idx - 1
            env.reset()
            self.assertTrue(env.instance_index == idx)

            done = False
            mem = []
            step = 0
            while not done and step < 5:
                action = mems[-(i + 1)][step][-1]
                state, reward, done, _ = env.step(action)
                mem.append(np.concatenate([state, [reward, int(done), action]]))
                step += 1
            np.testing.assert_allclose(mems[-(i + 1)], np.array(mem))
Esempio n. 4
0
def make_benchmark(config):
    bench = getattr(benchmarks, config["benchmark"])()
    env = bench.get_benchmark(seed=config["seed"])
    if config["benchmark"] in ["SGDBenchmark", "CMAESBenchmark"]:
        env = ObservationWrapper(env)
    wrapped = PerformanceTrackingWrapper(env, logger=config["logger"])
    logger.set_env(wrapped)
    return wrapped
Esempio n. 5
0
    def test_step(self):
        benchmark = SGDBenchmark()
        benchmark.config = objdict(SGD_DEFAULTS.copy())
        benchmark.read_instance_set()

        for reward_type in Reward:
            benchmark.config.reward_type = reward_type
            env = SGDEnv(benchmark.config)
            env = ObservationWrapper(env)
            self.assertTrue(env.reward_range == reward_type.func.frange)

            env.reset()
            state, reward, done, meta = env.step(1.0)
            self.assertTrue(reward >= env.reward_range[0])
            self.assertTrue(reward <= env.reward_range[1])
            self.assertFalse(done)
            self.assertTrue(len(meta.keys()) == 0)
Esempio n. 6
0
    def test_reproducibility(self):
        mems = []
        instances = []
        env = ObservationWrapper(self.env)
        for _ in range(2):
            rng = np.random.default_rng(123)
            env.seed(123)
            env.instance_index = 0
            instances.append(env.get_instance_set())

            env.reset()

            done = False
            mem = []
            step = 0
            while not done and step < 5:
                action = np.exp(rng.integers(low=-10, high=1))
                state, reward, done, _ = env.step(action)
                mem.append(np.concatenate([state, [reward, int(done), action]]))
                step += 1
            mems.append(np.array(mem))
        self.assertEqual(mems[0].size, mems[1].size)
        self.assertEqual(instances[0], instances[1])
        np.testing.assert_allclose(mems[0], mems[1])
    def test_conversion_wrapper(self):
        action = 0.2

        env = self.get_test_env()
        reset_state_env = env.reset()
        step_state_env, *rest_env = env.step(action)
        self.assertIsInstance(reset_state_env, dict)

        wrapped_env = ObservationWrapper(self.get_test_env())
        reset_state_wrapped = wrapped_env.reset()
        step_state_wrapped, *reset_wrapped = wrapped_env.step(action)

        self.assertIsInstance(reset_state_wrapped, np.ndarray)

        self.assertListEqual(rest_env, reset_wrapped)

        np.testing.assert_array_equal(wrapped_env.flatten(reset_state_env),
                                      reset_state_wrapped)
        np.testing.assert_array_equal(wrapped_env.flatten(step_state_env),
                                      step_state_wrapped)
Esempio n. 8
0
def make_benchmark(config):
    bench = getattr(benchmarks, config["benchmark"])()
    env = bench.get_benchmark(seed=config["seed"])
    if config["benchmark"] in ["SGDBenchmark", "CMAESBenchmark"]:
        env = ObservationWrapper(env)
    return env
Esempio n. 9
0
# Make logger object
logger = Logger(experiment_name="CMAESBenchmark",
                output_path=Path("../plotting/data"))

# Make CMA-ES environment
# We use the configuration from the "Learning to Optimize Step-size Adaption in CMA-ES" Paper by Shala et al.
bench = CMAESBenchmark()
env = bench.get_benchmark()
logger.set_env(env)

# Wrap to track performance
performance_logger = logger.add_module(PerformanceTrackingWrapper)
env = PerformanceTrackingWrapper(env=env, logger=performance_logger)

# Also wrap to make the dictionary observations into an easy to work with list
env = ObservationWrapper(env)

# Make chainer agent
obs_size = env.observation_space.low.size
action_size = env.action_space.low.size
agent = make_chainer_a3c(obs_size, action_size)

# Training
num_episodes = 3
for i in range(num_episodes):
    # Reset environment to begin episode
    state = env.reset()

    # Initialize episode
    done = False
    r = 0