def setUp(self) -> None: self.temp_dir = tempfile.TemporaryDirectory() episodes = 80 seeds = [0, 1, 3, 4, 5] experiment_name = "test_env" logger = Logger( output_path=Path(self.temp_dir.name), experiment_name=experiment_name, step_write_frequency=None, episode_write_frequency=None, ) benchmark = SigmoidBenchmark() env = benchmark.get_benchmark() agent = RandomAgent(env) logger.set_env(env) env_logger = logger.add_module(env) for seed in seeds: env.seed(seed) logger.set_additional_info(seed=seed) logger.reset_episode() for episode in range(episodes): state = env.reset() done = False reward = 0 step = 0 while not done: action = agent.act(state, reward) env_logger.log( "logged_step", step, ) env_logger.log( "logged_episode", episode, ) next_state, reward, done, _ = env.step(action) env_logger.log( "reward", reward, ) env_logger.log( "done", done, ) agent.train(next_state, reward) state = next_state logger.next_step() step += 1 agent.end_episode(state, reward) logger.next_episode() env.close() logger.close() self.log_file = env_logger.log_file.name
def test_save_conf(self): bench = SigmoidBenchmark() bench.save_config("test_conf.json") with open("test_conf.json", "r") as fp: recovered = json.load(fp) for k in bench.config.keys(): self.assertTrue(k in recovered.keys()) os.remove("test_conf.json")
def test_init(self): bench = SigmoidBenchmark() bench.set_action_values((3, )) env = bench.get_environment() wrapped = PolicyProgressWrapper(env, compute_optimal_sigmoid) self.assertTrue(len(wrapped.policy_progress) == 0) self.assertTrue(len(wrapped.episode) == 0) self.assertFalse(wrapped.compute_optimal is None)
def test_benchmark_env(self): bench = SigmoidBenchmark() for d in [1, 2, 3, 5]: env = bench.get_benchmark(d) self.assertTrue(issubclass(type(env), InstanceSamplingWrapper)) env.reset() s, r, d, i = env.step(0) self.assertTrue(env.inst_id == 0) self.assertTrue(len(env.instance_set) == 1)
def test_render(self, mock_plt): bench = SigmoidBenchmark() bench.set_action_values((3, )) env = bench.get_environment() env = PolicyProgressWrapper(env, compute_optimal_sigmoid) for _ in range(2): done = False env.reset() while not done: _, _, done, _ = env.step(1) env.render_policy_progress() self.assertTrue(mock_plt.show.called)
def test_step(self): bench = SigmoidBenchmark() bench.set_action_values((3, )) bench.config.instance_set = [[0, 0], [1, 1], [3, 4], [5, 6]] env = bench.get_environment() wrapped = PolicyProgressWrapper(env, compute_optimal_sigmoid) wrapped.reset() _, _, done, _ = wrapped.step(1) self.assertTrue(len(wrapped.episode) == 1) while not done: _, _, done, _ = wrapped.step(1) self.assertTrue(len(wrapped.episode) == 0) self.assertTrue(len(wrapped.policy_progress) == 1)
def test_scenarios(self): scenarios = [ "sigmoid_1D3M.json", "sigmoid_2D3M.json", "sigmoid_3D3M.json", "sigmoid_5D3M.json", ] for s in scenarios: path = os.path.join("dacbench/additional_configs/sigmoid", s) bench = SigmoidBenchmark(path) self.assertTrue(bench.config is not None) env = bench.get_environment() state = env.reset() self.assertTrue(state is not None) state, _, _, _ = env.step(0) self.assertTrue(state is not None)
def make_sigmoid(config): bench = SigmoidBenchmark() for k in config.keys(): if k == "action_values": bench.set_action_values(config[k]) else: bench.config[k] = config[k] return bench.get_environment()
def test_read_instances(self): bench = SigmoidBenchmark() bench.read_instance_set() self.assertTrue(len(bench.config.instance_set.keys()) == 300) self.assertTrue(len(bench.config.instance_set[0]) == 4) first_inst = bench.config.instance_set[0] bench2 = SigmoidBenchmark() env = bench2.get_environment() self.assertTrue(len(env.instance_set[0]) == 4) self.assertTrue(env.instance_set[0] == first_inst) self.assertTrue(len(env.instance_set.keys()) == 300)
def test_read_instances(self): bench = SigmoidBenchmark() bench.read_instance_set() self.assertTrue(len(bench.config.instance_set) == 100) self.assertTrue(len(bench.config.instance_set[0]) == 2) self.assertTrue(bench.config.instance_set[0] == [2.0004403531465558, 7.903476325943215]) bench2 = SigmoidBenchmark() env = bench2.get_environment() self.assertTrue(len(env.instance_set[0]) == 2) self.assertTrue( env.instance_set[0] == [2.0004403531465558, 7.903476325943215]) self.assertTrue(len(env.instance_set) == 100)
instances.append(sample_sigmoid()) return instances # Helper method to print current set def print_instance_set(instance_set): c = 1 for i in instance_set: print(f"Instance {c}: {i[0]}, {i[1]}") c += 1 # Make Sigmoid benchmark bench = SigmoidBenchmark() bench.set_action_values([3]) # First example: read instances from default instance set path instances_from_file = bench.get_environment() print("Instance set read from file") print_instance_set(instances_from_file.instance_set) print("\n") # Second example: Sample instance set before training instance_set = sample_instance(20) bench.config.instance_set = instance_set instances_sampled_beforehand = bench.get_environment() print("Instance set sampled before env creation") print_instance_set(instances_sampled_beforehand.instance_set) print("\n")
def test_get_env(self): bench = SigmoidBenchmark() env = bench.get_environment() self.assertTrue(issubclass(type(env), SigmoidEnv))
def test_action_value_setting(self): bench = SigmoidBenchmark() bench.set_action_values([1, 2, 3]) self.assertTrue(bench.config.action_values == [1, 2, 3]) self.assertTrue(bench.config.action_space_args == [6]) self.assertTrue(len(bench.config.observation_space_args[0]) == 10)
from pathlib import Path from dacbench.plotting import plot_performance, plot_performance_per_instance from dacbench.logger import Logger, log2dataframe, load_logs from dacbench.agents.simple_agents import RandomAgent from dacbench.benchmarks import SigmoidBenchmark from dacbench.runner import run_benchmark from dacbench.wrappers import PerformanceTrackingWrapper, StateTrackingWrapper import matplotlib.pyplot as plt # Run an experiment and log the results if __name__ == "__main__": # Make benchmark bench = SigmoidBenchmark() # Run for 10 episodes each on 10 seeds num_episodes = 10 seeds = range(10) # Make logger object and add modules for performance & state logging logger = Logger( experiment_name="sigmoid_example", output_path=Path("plotting/data"), step_write_frequency=None, episode_write_frequency=None, ) state_logger = logger.add_module(StateTrackingWrapper) performance_logger = logger.add_module(PerformanceTrackingWrapper)
def get_agent(self, switching_interval): env = SigmoidBenchmark().get_benchmark() env.seed_action_space() agent = DynamicRandomAgent(env, switching_interval=switching_interval) return agent, env