Ejemplo n.º 1
0
    def test_rendering(self):
        bench = CMAESBenchmark()
        env = bench.get_environment()
        wrapped = StateTrackingWrapper(env)
        wrapped.reset()
        with pytest.raises(NotImplementedError):
            wrapped.render_state_tracking()

        bench = CMAESBenchmark()

        def dummy():
            return [1, [2, 3]]

        bench.config.state_method = dummy
        bench.config.observation_space = gym.spaces.Tuple(
            (
                gym.spaces.Discrete(2),
                gym.spaces.Box(low=np.array([-1, 1]), high=np.array([5, 5])),
            )
        )
        env = bench.get_environment()
        wrapped = StateTrackingWrapper(env)
        wrapped.reset()
        with pytest.raises(NotImplementedError):
            wrapped.render_state_tracking()

        def dummy2():
            return [0.5]

        bench.config.state_method = dummy2
        bench.config.observation_space = gym.spaces.Box(
            low=np.array([0]), high=np.array([1])
        )
        env = bench.get_environment()
        wrapped = StateTrackingWrapper(env)
        wrapped.reset()
        wrapped.step(1)
        wrapped.step(1)
        img = wrapped.render_state_tracking()
        self.assertTrue(img.shape[-1] == 3)

        bench = LubyBenchmark()
        env = bench.get_environment()
        wrapped = StateTrackingWrapper(env, 2)
        wrapped.reset()
        wrapped.step(1)
        wrapped.step(1)
        img = wrapped.render_state_tracking()
        self.assertTrue(img.shape[-1] == 3)

        class discrete_obs_env:
            def __init__(self):
                self.observation_space = gym.spaces.Discrete(2)
                self.action_space = gym.spaces.Discrete(2)
                self.reward_range = (1, 2)
                self.metadata = {}

            def reset(self):
                return 1

            def step(self, action):
                return 1, 1, 1, 1

        env = discrete_obs_env()
        wrapped = StateTrackingWrapper(env, 2)
        wrapped.reset()
        wrapped.step(1)
        img = wrapped.render_state_tracking()
        self.assertTrue(img.shape[-1] == 3)

        class multi_discrete_obs_env:
            def __init__(self):
                self.observation_space = gym.spaces.MultiDiscrete([2, 3])
                self.action_space = gym.spaces.Discrete(2)
                self.reward_range = (1, 2)
                self.metadata = {}

            def reset(self):
                return [1, 2]

            def step(self, action):
                return [1, 2], 1, 1, 1

        env = multi_discrete_obs_env()
        wrapped = StateTrackingWrapper(env)
        wrapped.reset()
        wrapped.step(1)
        img = wrapped.render_state_tracking()
        self.assertTrue(img.shape[-1] == 3)

        class multi_binary_obs_env:
            def __init__(self):
                self.observation_space = gym.spaces.MultiBinary(2)
                self.action_space = gym.spaces.Discrete(2)
                self.reward_range = (1, 2)
                self.metadata = {}

            def reset(self):
                return [1, 1]

            def step(self, action):
                return [1, 1], 1, 1, 1

        env = multi_binary_obs_env()
        wrapped = StateTrackingWrapper(env)
        wrapped.reset()
        wrapped.step(1)
        img = wrapped.render_state_tracking()
        self.assertTrue(img.shape[-1] == 3)
Ejemplo n.º 2
0
 def test_get_env(self):
     bench = CMAESBenchmark()
     env = bench.get_environment()
     self.assertTrue(issubclass(type(env), CMAESEnv))
Ejemplo n.º 3
0
import numpy as np
from examples.example_utils import make_chainer_a3c, train_chainer
from dacbench.benchmarks import CMAESBenchmark
from dacbench.wrappers import EpisodeTimeWrapper

# Helper function to flatten observation space


def flatten(li):
    return [value for sublist in li for value in sublist]


# Make CMA-ES environment
# We use the configuration from the "Learning to Optimize Step-size Adaption in CMA-ES" Paper by Shala et al.
bench = CMAESBenchmark()
env = bench.get_environment()

# Wrap environment to track time
# Here we also want the mean of each 2 step interval
env = EpisodeTimeWrapper(env, 2)

# Make chainer agent
space_array = [
    env.observation_space[k].low
    for k in list(env.observation_space.spaces.keys())
]
obs_size = np.array(flatten(space_array)).size
action_size = env.action_space.low.size
agent = make_chainer_a3c(obs_size, action_size)

# Train agent for 10 episodes