Ejemplo n.º 1
0
    def test_init(self):
        bench = LubyBenchmark()
        env = bench.get_environment()
        wrapped = StateTrackingWrapper(env)
        self.assertTrue(len(wrapped.overall_states) == 0)
        self.assertTrue(wrapped.state_interval is None)
        wrapped.instance = [0]
        self.assertTrue(wrapped.instance[0] == 0)

        wrapped2 = StateTrackingWrapper(env, 10)
        self.assertTrue(len(wrapped2.overall_states) == 0)
        self.assertTrue(wrapped2.state_interval == 10)
        self.assertTrue(len(wrapped2.state_intervals) == 0)
        self.assertTrue(len(wrapped2.current_states) == 0)
Ejemplo n.º 2
0
    def test_step_reset(self):
        bench = LubyBenchmark()
        env = bench.get_environment()
        wrapped = StateTrackingWrapper(env, 2)

        state = wrapped.reset()
        self.assertTrue(len(state) > 1)
        self.assertTrue(len(wrapped.overall_states) == 1)

        state, reward, done, _ = wrapped.step(1)
        self.assertTrue(len(state) > 1)
        self.assertTrue(reward <= 0)
        self.assertFalse(done)

        self.assertTrue(len(wrapped.overall_states) == 2)
        self.assertTrue(len(wrapped.current_states) == 2)
        self.assertTrue(len(wrapped.state_intervals) == 0)

        state = wrapped.reset()
        self.assertTrue(len(wrapped.overall_states) == 3)
        self.assertTrue(len(wrapped.current_states) == 1)
        self.assertTrue(len(wrapped.state_intervals) == 1)
Ejemplo n.º 3
0
    def test_dict_logging(self):
        temp_dir = tempfile.TemporaryDirectory()

        seed = 0
        episodes = 2
        logger = Logger(
            output_path=Path(temp_dir.name),
            experiment_name="test_dict_logging",
            step_write_frequency=None,
            episode_write_frequency=1,
        )

        bench = CMAESBenchmark()
        bench.set_seed(seed)
        env = bench.get_environment()
        state_logger = logger.add_module(StateTrackingWrapper)
        wrapped = StateTrackingWrapper(env, logger=state_logger)
        agent = StaticAgent(env, 3.5)
        logger.set_env(env)

        run_benchmark(wrapped, agent, episodes, logger)
        state_logger.close()

        logs = load_logs(state_logger.get_logfile())
        dataframe = log2dataframe(logs, wide=False)
        state_parts = {
            "Loc": 10,
            "Past Deltas": 40,
            "Population Size": 1,
            "Sigma": 1,
            "History Deltas": 80,
            "Past Sigma Deltas": 40,
        }

        names = dataframe.name.unique()

        def field(name: str):
            state, field_, *idx = name.split("_")
            return field_

        parts = groupby(sorted(names), key=field)

        for part, group_members in parts:
            expected_number = state_parts[part]
            actual_number = len(list(group_members))

            self.assertEqual(expected_number, actual_number)

        temp_dir.cleanup()
Ejemplo n.º 4
0
    def test_box_logging(self):
        temp_dir = tempfile.TemporaryDirectory()

        seed = 0
        episodes = 10
        logger = Logger(
            output_path=Path(temp_dir.name),
            experiment_name="test_box_logging",
            step_write_frequency=None,
            episode_write_frequency=1,
        )

        bench = LubyBenchmark()
        bench.set_seed(seed)
        env = bench.get_environment()
        state_logger = logger.add_module(StateTrackingWrapper)
        wrapped = StateTrackingWrapper(env, logger=state_logger)
        agent = StaticAgent(env, 1)
        logger.set_env(env)

        run_benchmark(wrapped, agent, episodes, logger)
        state_logger.close()

        logs = load_logs(state_logger.get_logfile())
        dataframe = log2dataframe(logs, wide=True)

        sate_columns = [
            "state_Action t (current)",
            "state_Step t (current)",
            "state_Action t-1",
            "state_Action t-2",
            "state_Step t-1",
            "state_Step t-2",
        ]

        for state_column in sate_columns:
            self.assertTrue(state_column in dataframe.columns)
            self.assertTrue((~dataframe[state_column].isna()).all())

        temp_dir.cleanup()
Ejemplo n.º 5
0
from pathlib import Path

from dacbench.agents import RandomAgent
from dacbench.logger import Logger
from dacbench.runner import run_benchmark
from dacbench.benchmarks import CMAESBenchmark
from dacbench.wrappers import StateTrackingWrapper

# Make CMAESBenchmark environment
bench = CMAESBenchmark()
env = bench.get_environment()

# Make Logger object to track state information
logger = Logger(experiment_name=type(bench).__name__,
                output_path=Path("../plotting/data"))
logger.set_env(env)

# Wrap env with StateTrackingWrapper
env = StateTrackingWrapper(env, logger=logger.add_module(StateTrackingWrapper))

# Run random agent for 5 episodes and log state information to file
# You can plot these results with the plotting examples
agent = RandomAgent(env)
run_benchmark(env, agent, 5, logger=logger)
logger.close()
Ejemplo n.º 6
0
    def test_rendering(self):
        bench = CMAESBenchmark()
        env = bench.get_environment()
        wrapped = StateTrackingWrapper(env)
        wrapped.reset()
        with pytest.raises(NotImplementedError):
            wrapped.render_state_tracking()

        bench = CMAESBenchmark()

        def dummy():
            return [1, [2, 3]]

        bench.config.state_method = dummy
        bench.config.observation_space = gym.spaces.Tuple(
            (
                gym.spaces.Discrete(2),
                gym.spaces.Box(low=np.array([-1, 1]), high=np.array([5, 5])),
            )
        )
        env = bench.get_environment()
        wrapped = StateTrackingWrapper(env)
        wrapped.reset()
        with pytest.raises(NotImplementedError):
            wrapped.render_state_tracking()

        def dummy2():
            return [0.5]

        bench.config.state_method = dummy2
        bench.config.observation_space = gym.spaces.Box(
            low=np.array([0]), high=np.array([1])
        )
        env = bench.get_environment()
        wrapped = StateTrackingWrapper(env)
        wrapped.reset()
        wrapped.step(1)
        wrapped.step(1)
        img = wrapped.render_state_tracking()
        self.assertTrue(img.shape[-1] == 3)

        bench = LubyBenchmark()
        env = bench.get_environment()
        wrapped = StateTrackingWrapper(env, 2)
        wrapped.reset()
        wrapped.step(1)
        wrapped.step(1)
        img = wrapped.render_state_tracking()
        self.assertTrue(img.shape[-1] == 3)

        class discrete_obs_env:
            def __init__(self):
                self.observation_space = gym.spaces.Discrete(2)
                self.action_space = gym.spaces.Discrete(2)
                self.reward_range = (1, 2)
                self.metadata = {}

            def reset(self):
                return 1

            def step(self, action):
                return 1, 1, 1, 1

        env = discrete_obs_env()
        wrapped = StateTrackingWrapper(env, 2)
        wrapped.reset()
        wrapped.step(1)
        img = wrapped.render_state_tracking()
        self.assertTrue(img.shape[-1] == 3)

        class multi_discrete_obs_env:
            def __init__(self):
                self.observation_space = gym.spaces.MultiDiscrete([2, 3])
                self.action_space = gym.spaces.Discrete(2)
                self.reward_range = (1, 2)
                self.metadata = {}

            def reset(self):
                return [1, 2]

            def step(self, action):
                return [1, 2], 1, 1, 1

        env = multi_discrete_obs_env()
        wrapped = StateTrackingWrapper(env)
        wrapped.reset()
        wrapped.step(1)
        img = wrapped.render_state_tracking()
        self.assertTrue(img.shape[-1] == 3)

        class multi_binary_obs_env:
            def __init__(self):
                self.observation_space = gym.spaces.MultiBinary(2)
                self.action_space = gym.spaces.Discrete(2)
                self.reward_range = (1, 2)
                self.metadata = {}

            def reset(self):
                return [1, 1]

            def step(self, action):
                return [1, 1], 1, 1, 1

        env = multi_binary_obs_env()
        wrapped = StateTrackingWrapper(env)
        wrapped.reset()
        wrapped.step(1)
        img = wrapped.render_state_tracking()
        self.assertTrue(img.shape[-1] == 3)
Ejemplo n.º 7
0
    def test_get_states(self):
        bench = LubyBenchmark()
        env = bench.get_environment()
        wrapped = StateTrackingWrapper(env)
        wrapped.reset()
        for i in range(4):
            wrapped.step(i)
        wrapped2 = StateTrackingWrapper(env, 2)
        wrapped2.reset()
        for i in range(4):
            wrapped2.step(i)

        overall_states_only = wrapped.get_states()
        overall_states, intervals = wrapped2.get_states()
        self.assertTrue(np.array_equal(overall_states, overall_states_only))
        self.assertTrue(len(overall_states_only) == 5)
        self.assertTrue(len(overall_states_only[4]) == 6)

        self.assertTrue(len(intervals) == 3)
        self.assertTrue(len(intervals[0]) == 2)
        self.assertTrue(len(intervals[1]) == 2)
        self.assertTrue(len(intervals[2]) == 1)
Ejemplo n.º 8
0
        experiment_name="sigmoid_example",
        output_path=Path("plotting/data"),
        step_write_frequency=None,
        episode_write_frequency=None,
    )
    state_logger = logger.add_module(StateTrackingWrapper)
    performance_logger = logger.add_module(PerformanceTrackingWrapper)

    for s in seeds:
        # Log the seed
        logger.set_additional_info(seed=s)

        # Make & wrap benchmark environment
        env = bench.get_benchmark(seed=s)
        env = PerformanceTrackingWrapper(env, logger=performance_logger)
        env = StateTrackingWrapper(env, logger=state_logger)

        # Add env to logger
        logger.set_env(env)

        # Run random agent
        agent = RandomAgent(env)
        run_benchmark(env, agent, num_episodes, logger)

    # Close logger object
    logger.close()

    # Load performance of last seed into pandas DataFrame
    logs = load_logs(performance_logger.get_logfile())
    dataframe = log2dataframe(logs, wide=True)
Ejemplo n.º 9
0
from chainerrl import wrappers
import matplotlib.pyplot as plt
from examples.example_utils import train_chainer, make_chainer_dqn
from dacbench.benchmarks import FastDownwardBenchmark
from dacbench.wrappers import StateTrackingWrapper

# Get FastDownward Environment
bench = FastDownwardBenchmark()
env = bench.get_environment()

# Wrap environment to track state
# In this case we also want the mean of each 5 step interval
env = StateTrackingWrapper(env, 5)

# Chainer requires casting to float32
env = wrappers.CastObservationToFloat32(env)

# Make chainer agent
obs_size = env.observation_space.low.size
agent = make_chainer_dqn(obs_size, env.action_space)

# Train for 10 episodes
train_chainer(agent, env)

# Plot state values after training
env.render_state_tracking()
plt.show()