def run_optimal(results_path, benchmark_name, num_episodes, seeds=np.arange(10)): bench = getattr(benchmarks, benchmark_name)() if benchmark_name == "LubyBenchmark": policy = optimal_luby elif benchmark_name == "SigmoidBenchmark": policy = optimal_sigmoid elif benchmark_name == "FastDownwardBenchmark": policy = optimal_fd elif benchmark_name == "CMAESBenchmark": policy = csa else: print("No comparison policy found for this benchmark") return for s in seeds: env = bench.get_benchmark(seed=s) env = PerformanceTrackingWrapper(env) agent = GenericAgent(env, policy) run_benchmark(env, agent, num_episodes) performance = env.get_performance()[0] filedir = results_path + "/" + benchmark_name + "/optimal" filename = f"{filedir}/seed_{s}.json" if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(results_path + "/" + benchmark_name): os.makedirs(results_path + "/" + benchmark_name) if not os.path.exists(filedir): os.makedirs(filedir) with open(filename, "w+") as fp: json.dump(performance, fp)
def run_static(results_path, benchmark_name, action, num_episodes, seeds=np.arange(10)): bench = getattr(benchmarks, benchmark_name)() for s in seeds: env = bench.get_benchmark(seed=s) env = PerformanceTrackingWrapper(env) agent = StaticAgent(env, action) run_benchmark(env, agent, num_episodes) performance = env.get_performance()[0] filedir = results_path + "/" + benchmark_name + "/static_" + str( action) filename = f"{filedir}/seed_{s}.json" if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(results_path + "/" + benchmark_name): os.makedirs(results_path + "/" + benchmark_name) if not os.path.exists(filedir): os.makedirs(filedir) with open(filename, "w+") as fp: json.dump(performance, fp)
def run_dacbench(results_path, agent_method, num_episodes): """ Run all benchmarks for 10 seeds for a given number of episodes with a given agent and save result Parameters ------- results_path : str Path to where results should be saved agent_method : function Method that takes an env as input and returns an agent num_episodes : int Number of episodes to run for each benchmark """ if not os.path.exists(results_path): os.makedirs(results_path) for b in map(benchmarks.__dict__.get, benchmarks.__all__): overall = [] print(f"Evaluating {b.__name__}") for i in range(10): print(f"Seed {i}/10") bench = b() env = bench.get_benchmark(seed=i) env = PerformanceTrackingWrapper(env) agent = agent_method(env) run_benchmark(env, agent, num_episodes) performance = env.get_performance()[0] overall.append(performance) print("\n") file_name = results_path + "/" + b.__name__ + ".json" with open(file_name, "w+") as fp: json.dump(overall, fp)
def test_init(self): bench = LubyBenchmark() env = bench.get_environment() wrapped = PerformanceTrackingWrapper(env) self.assertTrue(len(wrapped.overall_performance) == 0) self.assertTrue(wrapped.performance_interval is None) wrapped.instance = [0] self.assertTrue(wrapped.instance[0] == 0) wrapped2 = PerformanceTrackingWrapper(env, 10) self.assertTrue(len(wrapped2.overall_performance) == 0) self.assertTrue(wrapped2.performance_interval == 10) self.assertTrue(len(wrapped2.performance_intervals) == 0) self.assertTrue(len(wrapped2.current_performance) == 0)
def run_policy(results_path, benchmark_name, num_episodes, policy, seeds=np.arange(10)): bench = getattr(benchmarks, benchmark_name)() for s in seeds: if benchmark_name == "CMAESBenchmark": experiment_name = f"csa_{s}" else: experiment_name = f"optimal_{s}" logger = Logger(experiment_name=experiment_name, output_path=results_path / benchmark_name) env = bench.get_benchmark(seed=s) env = PerformanceTrackingWrapper( env, logger=logger.add_module(PerformanceTrackingWrapper)) agent = GenericAgent(env, policy) logger.add_agent(agent) logger.add_benchmark(bench) logger.set_env(env) run_benchmark(env, agent, num_episodes, logger) logger.close()
def test_render(self, mock_plt): bench = LubyBenchmark() env = bench.get_environment() env = PerformanceTrackingWrapper(env) for _ in range(10): done = False env.reset() while not done: _, _, done, _ = env.step(1) env.render_performance() self.assertTrue(mock_plt.show.called) env.render_instance_performance() self.assertTrue(mock_plt.show.called)
def make_benchmark(config): bench = getattr(benchmarks, config["benchmark"])() env = bench.get_benchmark(seed=config["seed"]) if config["benchmark"] in ["SGDBenchmark", "CMAESBenchmark"]: env = ObservationWrapper(env) wrapped = PerformanceTrackingWrapper(env, logger=config["logger"]) logger.set_env(wrapped) return wrapped
def run_dacbench(results_path, agent_method, num_episodes, bench=None, seeds=None): """ Run all benchmarks for 10 seeds for a given number of episodes with a given agent and save result Parameters ------- bench results_path : str Path to where results should be saved agent_method : function Method that takes an env as input and returns an agent num_episodes : int Number of episodes to run for each benchmark seeds : list[int] List of seeds to runs all benchmarks for. If None (default) seeds [1, ..., 10] are used. """ if bench is None: bench = map(benchmarks.__dict__.get, benchmarks.__all__) else: bench = [getattr(benchmarks, b) for b in bench] seeds = seeds if seeds is not None else range(10) for b in bench: print(f"Evaluating {b.__name__}") for i in seeds: print(f"Seed {i}/10") bench = b() try: env = bench.get_benchmark(seed=i) except: continue logger = Logger( experiment_name=f"seed_{i}", output_path=Path(results_path) / f"{b.__name__}", ) perf_logger = logger.add_module(PerformanceTrackingWrapper) logger.add_benchmark(bench) logger.set_env(env) env = PerformanceTrackingWrapper(env, logger=perf_logger) agent = agent_method(env) logger.add_agent(agent) run_benchmark(env, agent, num_episodes, logger) logger.close()
def run_random(results_path, benchmark_name, num_episodes, seeds, fixed): bench = getattr(benchmarks, benchmark_name)() for s in seeds: if fixed > 1: experiment_name = f"random_fixed{fixed}_{s}" else: experiment_name = f"random_{s}" logger = Logger(experiment_name=experiment_name, output_path=results_path / benchmark_name) env = bench.get_benchmark(seed=s) env = PerformanceTrackingWrapper( env, logger=logger.add_module(PerformanceTrackingWrapper)) agent = DynamicRandomAgent(env, fixed) logger.add_agent(agent) logger.add_benchmark(bench) logger.set_env(env) run_benchmark(env, agent, num_episodes, logger) logger.close()
def run_optimal(results_path, benchmark_name, num_episodes, seeds=np.arange(10)): bench = getattr(benchmarks, benchmark_name)() if benchmark_name == "LubyBenchmark": policy = optimal_luby elif benchmark_name == "SigmoidBenchmark": policy = optimal_sigmoid elif benchmark_name == "FastDownwardBenchmark": policy = optimal_fd elif benchmark_name == "CMAESBenchmark": policy = csa else: print("No comparison policy found for this benchmark") return for s in seeds: if benchmark_name == "CMAESBenchmark": experiment_name = f"csa_{s}" else: experiment_name = f"optimal_{s}" logger = Logger(experiment_name=experiment_name, output_path=results_path / benchmark_name) env = bench.get_benchmark(seed=s) env = PerformanceTrackingWrapper( env, logger=logger.add_module(PerformanceTrackingWrapper)) agent = GenericAgent(env, policy) logger.add_agent(agent) logger.add_benchmark(bench) logger.set_env(env) logger.set_additional_info(seed=s) run_benchmark(env, agent, num_episodes, logger) logger.close()
def run_dacbench(results_path, agent_method, num_episodes): """ Run all benchmarks for 10 seeds for a given number of episodes with a given agent and save result Parameters ------- results_path : str Path to where results should be saved agent_method : function Method that takes an env as input and returns an agent num_episodes : int Number of episodes to run for each benchmark """ for b in map(benchmarks.__dict__.get, benchmarks.__all__): print(f"Evaluating {b.__name__}") for i in range(10): print(f"Seed {i}/10") bench = b() env = bench.get_benchmark(seed=i) logger = Logger( experiment_name=f"seed_{i}", output_path=Path(results_path) / f"{b.__name__}", ) perf_logger = logger.add_module(PerformanceTrackingWrapper) logger.add_benchmark(bench) logger.set_env(env) logger.set_additional_info(seed=i) env = PerformanceTrackingWrapper(env, logger=perf_logger) agent = agent_method(env) logger.add_agent(agent) run_benchmark(env, agent, num_episodes, logger) logger.close()
def run_static(results_path, benchmark_name, action, num_episodes, seeds=np.arange(10)): bench = getattr(benchmarks, benchmark_name)() for s in seeds: logger = Logger( experiment_name=f"static_{action}_{s}", output_path=results_path / benchmark_name, ) env = bench.get_benchmark(seed=s) env = PerformanceTrackingWrapper( env, logger=logger.add_module(PerformanceTrackingWrapper)) agent = StaticAgent(env, action) logger.add_agent(agent) logger.add_benchmark(bench) logger.set_env(env) logger.set_additional_info(seed=s, action=action) run_benchmark(env, agent, num_episodes, logger) logger.close()
else: bench.config[k] = config[k] return bench.get_environment() # Experiment configuration # Play 5D scenario action_values = (3, 3, 3, 3, 3) env_config = { "seed": 0, "action_values": action_values, "instance_set_path": "../instance_sets/sigmoid/sigmoid_5D3M_train.csv", } # Make environment # To track rewards we use our wrapper (this is only for simplicity) env = make_sigmoid(env_config) env = PerformanceTrackingWrapper(env) # Make simple PPO policy model = PPO2("MlpPolicy", env) # Run for 10 steps model.learn(total_timesteps=200) performance = env.get_performance()[0] for i in range(len(performance)): print( f"Episode {i+1}/{len(performance)}...........................................Reward: {performance[i]}" )
def test_get_performance(self): bench = LubyBenchmark() env = bench.get_environment() wrapped = PerformanceTrackingWrapper(env) wrapped.reset() done = False while not done: _, _, done, _ = wrapped.step(1) wrapped2 = PerformanceTrackingWrapper(env, 2) wrapped2.reset() done = False while not done: _, _, done, _ = wrapped2.step(1) wrapped3 = PerformanceTrackingWrapper(env, 2, track_instance_performance=False) wrapped3.reset() for i in range(5): wrapped3.step(i) wrapped4 = PerformanceTrackingWrapper(env, track_instance_performance=False) wrapped4.reset() for i in range(5): wrapped4.step(i) overall, instance_performance = wrapped.get_performance() overall_perf, interval_perf, instance_perf = wrapped2.get_performance() overall_performance_only = wrapped4.get_performance() overall_performance, intervals = wrapped3.get_performance() self.assertTrue( np.array_equal( np.round(overall_performance, decimals=2), np.round(overall_performance_only, decimals=2), )) self.assertTrue( np.array_equal(np.round(overall_perf, decimals=2), np.round(overall, decimals=2))) self.assertTrue(len(instance_performance.keys()) == 1) self.assertTrue(len(list(instance_performance.values())[0]) == 1) self.assertTrue(len(instance_perf.keys()) == 1) self.assertTrue(len(list(instance_perf.values())[0]) == 1) self.assertTrue(len(intervals) == 1) self.assertTrue(len(intervals[0]) == 0) self.assertTrue(len(interval_perf) == 1) self.assertTrue(len(interval_perf[0]) == 1)
def test_step(self): bench = LubyBenchmark() bench.config.instance_set = { 0: [0, 0], 1: [1, 1], 2: [3, 4], 3: [5, 6] } env = bench.get_environment() wrapped = PerformanceTrackingWrapper(env, 2) state = wrapped.reset() self.assertTrue(len(state) > 1) state, reward, done, _ = wrapped.step(1) self.assertTrue(len(state) > 1) self.assertTrue(reward <= 0) self.assertFalse(done) while not done: _, _, done, _ = wrapped.step(1) self.assertTrue(len(wrapped.overall_performance) == 1) self.assertTrue(len(wrapped.performance_intervals) == 0) self.assertTrue(len(wrapped.current_performance) == 1) self.assertTrue(len(wrapped.instance_performances.keys()) == 1) done = False while not done: _, _, done, _ = wrapped.step(1) done = False while not done: _, _, done, _ = wrapped.step(1) self.assertTrue(len(wrapped.performance_intervals) == 1) self.assertTrue(len(wrapped.current_performance) == 1) wrapped.reset() done = False while not done: _, _, done, _ = wrapped.step(1) wrapped.reset() done = False while not done: _, _, done, _ = wrapped.step(1) self.assertTrue(len(wrapped.instance_performances.keys()) == 3) wrapped.reset() done = False while not done: _, _, done, _ = wrapped.step(1) wrapped.reset() done = False while not done: _, _, done, _ = wrapped.step(1) self.assertTrue(len(wrapped.instance_performances.keys()) == 4)
logger = Logger( experiment_name="sigmoid_example", output_path=Path("plotting/data"), step_write_frequency=None, episode_write_frequency=None, ) state_logger = logger.add_module(StateTrackingWrapper) performance_logger = logger.add_module(PerformanceTrackingWrapper) for s in seeds: # Log the seed logger.set_additional_info(seed=s) # Make & wrap benchmark environment env = bench.get_benchmark(seed=s) env = PerformanceTrackingWrapper(env, logger=performance_logger) env = StateTrackingWrapper(env, logger=state_logger) # Add env to logger logger.set_env(env) # Run random agent agent = RandomAgent(env) run_benchmark(env, agent, num_episodes, logger) # Close logger object logger.close() # Load performance of last seed into pandas DataFrame logs = load_logs(performance_logger.get_logfile()) dataframe = log2dataframe(logs, wide=True)
from examples.example_utils import make_chainer_a3c from dacbench.benchmarks import CMAESBenchmark # Make logger object logger = Logger(experiment_name="CMAESBenchmark", output_path=Path("../plotting/data")) # Make CMA-ES environment # We use the configuration from the "Learning to Optimize Step-size Adaption in CMA-ES" Paper by Shala et al. bench = CMAESBenchmark() env = bench.get_benchmark() logger.set_env(env) # Wrap to track performance performance_logger = logger.add_module(PerformanceTrackingWrapper) env = PerformanceTrackingWrapper(env=env, logger=performance_logger) # Also wrap to make the dictionary observations into an easy to work with list env = ObservationWrapper(env) # Make chainer agent obs_size = env.observation_space.low.size action_size = env.action_space.low.size agent = make_chainer_a3c(obs_size, action_size) # Training num_episodes = 3 for i in range(num_episodes): # Reset environment to begin episode state = env.reset()