def test_init(self): bench = LubyBenchmark() env = bench.get_environment() wrapped = StateTrackingWrapper(env) self.assertTrue(len(wrapped.overall_states) == 0) self.assertTrue(wrapped.state_interval is None) wrapped.instance = [0] self.assertTrue(wrapped.instance[0] == 0) wrapped2 = StateTrackingWrapper(env, 10) self.assertTrue(len(wrapped2.overall_states) == 0) self.assertTrue(wrapped2.state_interval == 10) self.assertTrue(len(wrapped2.state_intervals) == 0) self.assertTrue(len(wrapped2.current_states) == 0)
def test_get_states(self): bench = LubyBenchmark() env = bench.get_environment() wrapped = StateTrackingWrapper(env) wrapped.reset() for i in range(4): wrapped.step(i) wrapped2 = StateTrackingWrapper(env, 2) wrapped2.reset() for i in range(4): wrapped2.step(i) overall_states_only = wrapped.get_states() overall_states, intervals = wrapped2.get_states() self.assertTrue(np.array_equal(overall_states, overall_states_only)) self.assertTrue(len(overall_states_only) == 5) self.assertTrue(len(overall_states_only[4]) == 6) self.assertTrue(len(intervals) == 3) self.assertTrue(len(intervals[0]) == 2) self.assertTrue(len(intervals[1]) == 2) self.assertTrue(len(intervals[2]) == 1)
def test_dict_logging(self): temp_dir = tempfile.TemporaryDirectory() seed = 0 episodes = 2 logger = Logger( output_path=Path(temp_dir.name), experiment_name="test_dict_logging", step_write_frequency=None, episode_write_frequency=1, ) bench = CMAESBenchmark() bench.set_seed(seed) env = bench.get_environment() state_logger = logger.add_module(StateTrackingWrapper) wrapped = StateTrackingWrapper(env, logger=state_logger) agent = StaticAgent(env, 3.5) logger.set_env(env) run_benchmark(wrapped, agent, episodes, logger) state_logger.close() logs = load_logs(state_logger.get_logfile()) dataframe = log2dataframe(logs, wide=False) state_parts = { "Loc": 10, "Past Deltas": 40, "Population Size": 1, "Sigma": 1, "History Deltas": 80, "Past Sigma Deltas": 40, } names = dataframe.name.unique() def field(name: str): state, field_, *idx = name.split("_") return field_ parts = groupby(sorted(names), key=field) for part, group_members in parts: expected_number = state_parts[part] actual_number = len(list(group_members)) self.assertEqual(expected_number, actual_number) temp_dir.cleanup()
def test_box_logging(self): temp_dir = tempfile.TemporaryDirectory() seed = 0 episodes = 10 logger = Logger( output_path=Path(temp_dir.name), experiment_name="test_box_logging", step_write_frequency=None, episode_write_frequency=1, ) bench = LubyBenchmark() bench.set_seed(seed) env = bench.get_environment() state_logger = logger.add_module(StateTrackingWrapper) wrapped = StateTrackingWrapper(env, logger=state_logger) agent = StaticAgent(env, 1) logger.set_env(env) run_benchmark(wrapped, agent, episodes, logger) state_logger.close() logs = load_logs(state_logger.get_logfile()) dataframe = log2dataframe(logs, wide=True) sate_columns = [ "state_Action t (current)", "state_Step t (current)", "state_Action t-1", "state_Action t-2", "state_Step t-1", "state_Step t-2", ] for state_column in sate_columns: self.assertTrue(state_column in dataframe.columns) self.assertTrue((~dataframe[state_column].isna()).all()) temp_dir.cleanup()
def test_step_reset(self): bench = LubyBenchmark() env = bench.get_environment() wrapped = StateTrackingWrapper(env, 2) state = wrapped.reset() self.assertTrue(len(state) > 1) self.assertTrue(len(wrapped.overall_states) == 1) state, reward, done, _ = wrapped.step(1) self.assertTrue(len(state) > 1) self.assertTrue(reward <= 0) self.assertFalse(done) self.assertTrue(len(wrapped.overall_states) == 2) self.assertTrue(len(wrapped.current_states) == 2) self.assertTrue(len(wrapped.state_intervals) == 0) state = wrapped.reset() self.assertTrue(len(wrapped.overall_states) == 3) self.assertTrue(len(wrapped.current_states) == 1) self.assertTrue(len(wrapped.state_intervals) == 1)
from pathlib import Path from dacbench.agents import RandomAgent from dacbench.logger import Logger from dacbench.runner import run_benchmark from dacbench.benchmarks import CMAESBenchmark from dacbench.wrappers import StateTrackingWrapper # Make CMAESBenchmark environment bench = CMAESBenchmark() env = bench.get_environment() # Make Logger object to track state information logger = Logger(experiment_name=type(bench).__name__, output_path=Path("../plotting/data")) logger.set_env(env) # Wrap env with StateTrackingWrapper env = StateTrackingWrapper(env, logger=logger.add_module(StateTrackingWrapper)) # Run random agent for 5 episodes and log state information to file # You can plot these results with the plotting examples agent = RandomAgent(env) run_benchmark(env, agent, 5, logger=logger) logger.close()
def test_rendering(self): bench = CMAESBenchmark() env = bench.get_environment() wrapped = StateTrackingWrapper(env) wrapped.reset() with pytest.raises(NotImplementedError): wrapped.render_state_tracking() bench = CMAESBenchmark() def dummy(): return [1, [2, 3]] bench.config.state_method = dummy bench.config.observation_space = gym.spaces.Tuple( ( gym.spaces.Discrete(2), gym.spaces.Box(low=np.array([-1, 1]), high=np.array([5, 5])), ) ) env = bench.get_environment() wrapped = StateTrackingWrapper(env) wrapped.reset() with pytest.raises(NotImplementedError): wrapped.render_state_tracking() def dummy2(): return [0.5] bench.config.state_method = dummy2 bench.config.observation_space = gym.spaces.Box( low=np.array([0]), high=np.array([1]) ) env = bench.get_environment() wrapped = StateTrackingWrapper(env) wrapped.reset() wrapped.step(1) wrapped.step(1) img = wrapped.render_state_tracking() self.assertTrue(img.shape[-1] == 3) bench = LubyBenchmark() env = bench.get_environment() wrapped = StateTrackingWrapper(env, 2) wrapped.reset() wrapped.step(1) wrapped.step(1) img = wrapped.render_state_tracking() self.assertTrue(img.shape[-1] == 3) class discrete_obs_env: def __init__(self): self.observation_space = gym.spaces.Discrete(2) self.action_space = gym.spaces.Discrete(2) self.reward_range = (1, 2) self.metadata = {} def reset(self): return 1 def step(self, action): return 1, 1, 1, 1 env = discrete_obs_env() wrapped = StateTrackingWrapper(env, 2) wrapped.reset() wrapped.step(1) img = wrapped.render_state_tracking() self.assertTrue(img.shape[-1] == 3) class multi_discrete_obs_env: def __init__(self): self.observation_space = gym.spaces.MultiDiscrete([2, 3]) self.action_space = gym.spaces.Discrete(2) self.reward_range = (1, 2) self.metadata = {} def reset(self): return [1, 2] def step(self, action): return [1, 2], 1, 1, 1 env = multi_discrete_obs_env() wrapped = StateTrackingWrapper(env) wrapped.reset() wrapped.step(1) img = wrapped.render_state_tracking() self.assertTrue(img.shape[-1] == 3) class multi_binary_obs_env: def __init__(self): self.observation_space = gym.spaces.MultiBinary(2) self.action_space = gym.spaces.Discrete(2) self.reward_range = (1, 2) self.metadata = {} def reset(self): return [1, 1] def step(self, action): return [1, 1], 1, 1, 1 env = multi_binary_obs_env() wrapped = StateTrackingWrapper(env) wrapped.reset() wrapped.step(1) img = wrapped.render_state_tracking() self.assertTrue(img.shape[-1] == 3)
experiment_name="sigmoid_example", output_path=Path("plotting/data"), step_write_frequency=None, episode_write_frequency=None, ) state_logger = logger.add_module(StateTrackingWrapper) performance_logger = logger.add_module(PerformanceTrackingWrapper) for s in seeds: # Log the seed logger.set_additional_info(seed=s) # Make & wrap benchmark environment env = bench.get_benchmark(seed=s) env = PerformanceTrackingWrapper(env, logger=performance_logger) env = StateTrackingWrapper(env, logger=state_logger) # Add env to logger logger.set_env(env) # Run random agent agent = RandomAgent(env) run_benchmark(env, agent, num_episodes, logger) # Close logger object logger.close() # Load performance of last seed into pandas DataFrame logs = load_logs(performance_logger.get_logfile()) dataframe = log2dataframe(logs, wide=True)
from chainerrl import wrappers import matplotlib.pyplot as plt from examples.example_utils import train_chainer, make_chainer_dqn from dacbench.benchmarks import FastDownwardBenchmark from dacbench.wrappers import StateTrackingWrapper # Get FastDownward Environment bench = FastDownwardBenchmark() env = bench.get_environment() # Wrap environment to track state # In this case we also want the mean of each 5 step interval env = StateTrackingWrapper(env, 5) # Chainer requires casting to float32 env = wrappers.CastObservationToFloat32(env) # Make chainer agent obs_size = env.observation_space.low.size agent = make_chainer_dqn(obs_size, env.action_space) # Train for 10 episodes train_chainer(agent, env) # Plot state values after training env.render_state_tracking() plt.show()