def train_chainer( agent, env, num_episodes=10, flatten_state=False, logger: Logger = None ): for i in range(num_episodes): state = env.reset() if flatten_state: state = np.array(flatten([state[k] for k in state.keys()])) state = state.astype(np.float32) done = False r = 0 reward = 0 while not done: action = agent.act_and_train(state, reward) next_state, reward, done, _ = env.step(action) r += reward if flatten_state: state = np.array(flatten([next_state[k] for k in next_state.keys()])) state = state.astype(np.float32) else: state = next_state if logger is not None: logger.next_step() agent.stop_episode_and_train(state, reward, done=done) if logger is not None: logger.next_episode() print( f"Episode {i}/{num_episodes}...........................................Reward: {r}" )
def test_dict_logging(self): temp_dir = tempfile.TemporaryDirectory() seed = 0 episodes = 2 logger = Logger( output_path=Path(temp_dir.name), experiment_name="test_dict_logging", step_write_frequency=None, episode_write_frequency=1, ) bench = CMAESBenchmark() bench.set_seed(seed) env = bench.get_environment() state_logger = logger.add_module(StateTrackingWrapper) wrapped = StateTrackingWrapper(env, logger=state_logger) agent = StaticAgent(env, 3.5) logger.set_env(env) run_benchmark(wrapped, agent, episodes, logger) state_logger.close() logs = load_logs(state_logger.get_logfile()) dataframe = log2dataframe(logs, wide=False) state_parts = { "Loc": 10, "Past Deltas": 40, "Population Size": 1, "Sigma": 1, "History Deltas": 80, "Past Sigma Deltas": 40, } names = dataframe.name.unique() def field(name: str): state, field_, *idx = name.split("_") return field_ parts = groupby(sorted(names), key=field) for part, group_members in parts: expected_number = state_parts[part] actual_number = len(list(group_members)) self.assertEqual(expected_number, actual_number) temp_dir.cleanup()
def test_box_logging(self): temp_dir = tempfile.TemporaryDirectory() seed = 0 episodes = 10 logger = Logger( output_path=Path(temp_dir.name), experiment_name="test_box_logging", step_write_frequency=None, episode_write_frequency=1, ) bench = LubyBenchmark() bench.set_seed(seed) env = bench.get_environment() state_logger = logger.add_module(StateTrackingWrapper) wrapped = StateTrackingWrapper(env, logger=state_logger) agent = StaticAgent(env, 1) logger.set_env(env) run_benchmark(wrapped, agent, episodes, logger) state_logger.close() logs = load_logs(state_logger.get_logfile()) dataframe = log2dataframe(logs, wide=True) sate_columns = [ "state_Action t (current)", "state_Step t (current)", "state_Action t-1", "state_Action t-2", "state_Step t-1", "state_Step t-2", ] for state_column in sate_columns: self.assertTrue(state_column in dataframe.columns) self.assertTrue((~dataframe[state_column].isna()).all()) temp_dir.cleanup()
def test_logging(self): temp_dir = tempfile.TemporaryDirectory() episodes = 5 logger = Logger( output_path=Path(temp_dir.name), experiment_name="test_logging", ) bench = LubyBenchmark() env = bench.get_environment() time_logger = logger.add_module(EpisodeTimeWrapper) wrapped = EpisodeTimeWrapper(env, logger=time_logger) agent = StaticAgent(env=env, action=1) run_benchmark(wrapped, agent, episodes, logger) logger.close() logs = load_logs(time_logger.get_logfile()) dataframe = log2dataframe(logs, wide=True) # all steps must have logged time self.assertTrue((~dataframe.step_duration.isna()).all()) # each episode has a recored time episodes = dataframe.groupby("episode") last_steps_per_episode = dataframe.iloc[episodes.step.idxmax()] self.assertTrue( (~last_steps_per_episode.episode_duration.isna()).all()) # episode time equals the sum of the steps in episode calculated_episode_times = episodes.step_duration.sum() recorded_episode_times = last_steps_per_episode.episode_duration self.assertListEqual(calculated_episode_times.tolist(), recorded_episode_times.tolist()) temp_dir.cleanup()
def run_optimal(results_path, benchmark_name, num_episodes, seeds=np.arange(10)): bench = getattr(benchmarks, benchmark_name)() if benchmark_name == "LubyBenchmark": policy = optimal_luby elif benchmark_name == "SigmoidBenchmark": policy = optimal_sigmoid elif benchmark_name == "FastDownwardBenchmark": policy = optimal_fd elif benchmark_name == "CMAESBenchmark": policy = csa else: print("No comparison policy found for this benchmark") return for s in seeds: if benchmark_name == "CMAESBenchmark": experiment_name = f"csa_{s}" else: experiment_name = f"optimal_{s}" logger = Logger(experiment_name=experiment_name, output_path=results_path / benchmark_name) env = bench.get_benchmark(seed=s) env = PerformanceTrackingWrapper( env, logger=logger.add_module(PerformanceTrackingWrapper)) agent = GenericAgent(env, policy) logger.add_agent(agent) logger.add_benchmark(bench) logger.set_env(env) logger.set_additional_info(seed=s) run_benchmark(env, agent, num_episodes, logger) logger.close()
def run_static(results_path, benchmark_name, action, num_episodes, seeds=np.arange(10)): bench = getattr(benchmarks, benchmark_name)() for s in seeds: logger = Logger( experiment_name=f"static_{action}_{s}", output_path=results_path / benchmark_name, ) env = bench.get_benchmark(seed=s) env = PerformanceTrackingWrapper( env, logger=logger.add_module(PerformanceTrackingWrapper)) agent = StaticAgent(env, action) logger.add_agent(agent) logger.add_benchmark(bench) logger.set_env(env) logger.set_additional_info(seed=s, action=action) run_benchmark(env, agent, num_episodes, logger) logger.close()
def run_random(results_path, benchmark_name, num_episodes, seeds, fixed): bench = getattr(benchmarks, benchmark_name)() for s in seeds: if fixed > 1: experiment_name = f"random_fixed{fixed}_{s}" else: experiment_name = f"random_{s}" logger = Logger(experiment_name=experiment_name, output_path=results_path / benchmark_name) env = bench.get_benchmark(seed=s) env = PerformanceTrackingWrapper( env, logger=logger.add_module(PerformanceTrackingWrapper)) agent = DynamicRandomAgent(env, fixed) logger.add_agent(agent) logger.add_benchmark(bench) logger.set_env(env) logger.set_additional_info(seed=s) run_benchmark(env, agent, num_episodes, logger) logger.close()
def test_logging_multi_discrete(self): temp_dir = tempfile.TemporaryDirectory() seed = 0 logger = Logger( output_path=Path(temp_dir.name), experiment_name="test_multi_discrete_logging", step_write_frequency=None, episode_write_frequency=1, ) bench = ModeaBenchmark() bench.set_seed(seed) env = bench.get_environment() env.seed_action_space(seed) action_logger = logger.add_module(ActionFrequencyWrapper) wrapped = ActionFrequencyWrapper(env, logger=action_logger) agent = RandomAgent(env) logger.set_env(env) run_benchmark(wrapped, agent, 1, logger) action_logger.close() logs = load_logs(action_logger.get_logfile()) dataframe = log2dataframe(logs, wide=True) expected_actions = pd.DataFrame({ "action_0": { 0: 0, 1: 1, 2: 0, 3: 1, 4: 1, 5: 0, 6: 1, 7: 1, 8: 0, 9: 0, 10: 0, }, "action_1": { 0: 1, 1: 0, 2: 1, 3: 0, 4: 0, 5: 1, 6: 0, 7: 1, 8: 0, 9: 0, 10: 1, }, "action_10": { 0: 0, 1: 0, 2: 1, 3: 0, 4: 0, 5: 0, 6: 0, 7: 2, 8: 1, 9: 2, 10: 1, }, "action_2": { 0: 1, 1: 1, 2: 1, 3: 0, 4: 1, 5: 1, 6: 1, 7: 1, 8: 0, 9: 0, 10: 1, }, "action_3": { 0: 0, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 0, 8: 0, 9: 1, 10: 1, }, "action_4": { 0: 0, 1: 1, 2: 1, 3: 0, 4: 1, 5: 0, 6: 0, 7: 1, 8: 0, 9: 1, 10: 0, }, "action_5": { 0: 1, 1: 0, 2: 0, 3: 0, 4: 1, 5: 1, 6: 1, 7: 0, 8: 0, 9: 0, 10: 1, }, "action_6": { 0: 0, 1: 1, 2: 1, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 1, 9: 0, 10: 0, }, "action_7": { 0: 1, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 1, 8: 1, 9: 1, 10: 0, }, "action_8": { 0: 0, 1: 1, 2: 0, 3: 1, 4: 1, 5: 1, 6: 0, 7: 1, 8: 0, 9: 0, 10: 1, }, "action_9": { 0: 1, 1: 2, 2: 1, 3: 0, 4: 0, 5: 1, 6: 1, 7: 1, 8: 2, 9: 0, 10: 2, }, }) for column in expected_actions.columns: # todo: seems to be an bug here. Every so ofter the last action is missing. # Double checked not a logging problem. Could be a seeding issue self.assertListEqual( dataframe[column].to_list()[:10], expected_actions[column].to_list()[:10], f"Column {column}", ) temp_dir.cleanup()
def test_logging_discrete(self): temp_dir = tempfile.TemporaryDirectory() seed = 0 logger = Logger( output_path=Path(temp_dir.name), experiment_name="test_discrete_logging", step_write_frequency=None, episode_write_frequency=1, ) bench = LubyBenchmark() bench.set_seed(seed) env = bench.get_environment() env.seed_action_space(seed) action_logger = logger.add_module(ActionFrequencyWrapper) wrapped = ActionFrequencyWrapper(env, logger=action_logger) agent = RandomAgent(env) logger.set_env(env) run_benchmark(wrapped, agent, 10, logger) action_logger.close() logs = load_logs(action_logger.get_logfile()) dataframe = log2dataframe(logs, wide=True) expected_actions = [ 0, 3, 5, 4, 3, 5, 5, 5, 3, 3, 2, 1, 0, 1, 2, 0, 1, 1, 0, 1, 2, 4, 3, 0, 1, 3, 0, 3, 3, 3, 4, 4, 4, 5, 4, 0, 4, 2, 1, 3, 4, 2, 1, 3, 3, 2, 0, 5, 2, 5, 2, 1, 5, 3, 2, 5, 1, 0, 2, 3, 1, 3, 2, 3, 2, 4, 3, 4, 0, 5, 5, 1, 5, 0, 1, 5, 5, 3, 3, 2, ] self.assertListEqual(dataframe.action.to_list(), expected_actions) temp_dir.cleanup()
parser.add_argument("--benchmarks", nargs="+", type=str, default=None, help="Benchmarks to run PPO for") parser.add_argument("--timesteps", type=int, default=1000000, help="Number of timesteps to run") parser.add_argument( "--seeds", nargs="+", type=int, default=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], help="Seeds for evaluation", ) args = parser.parse_args() for b in args.benchmarks: for s in args.seeds: logger = Logger(experiment_name=f"PPO_{b}_s{s}", output_path=Path(args.outdir)) perf_logger = logger.add_module(PerformanceTrackingWrapper) logger.set_additional_info(seed=s) config = {"seed": s, "logger": perf_logger, "benchmark": b} env = make_benchmark(config) model = PPO2("MlpPolicy", env) logging = LoggerCallback(logger) model.learn(total_timesteps=args.timesteps, callback=logging) logger.close()
def run_policy(results_path, benchmark_name, num_episodes, policy, seeds=np.arange(10)): bench = getattr(benchmarks, benchmark_name)() for s in seeds: if benchmark_name == "CMAESBenchmark": experiment_name = f"csa_{s}" else: experiment_name = f"optimal_{s}" logger = Logger(experiment_name=experiment_name, output_path=results_path / benchmark_name) env = bench.get_benchmark(seed=s) env = PerformanceTrackingWrapper( env, logger=logger.add_module(PerformanceTrackingWrapper)) agent = GenericAgent(env, policy) logger.add_agent(agent) logger.add_benchmark(bench) logger.set_env(env) run_benchmark(env, agent, num_episodes, logger) logger.close()
from pathlib import Path from dacbench.logger import Logger from dacbench.wrappers import PerformanceTrackingWrapper, ObservationWrapper from examples.example_utils import make_chainer_a3c from dacbench.benchmarks import CMAESBenchmark # Make logger object logger = Logger(experiment_name="CMAESBenchmark", output_path=Path("../plotting/data")) # Make CMA-ES environment # We use the configuration from the "Learning to Optimize Step-size Adaption in CMA-ES" Paper by Shala et al. bench = CMAESBenchmark() env = bench.get_benchmark() logger.set_env(env) # Wrap to track performance performance_logger = logger.add_module(PerformanceTrackingWrapper) env = PerformanceTrackingWrapper(env=env, logger=performance_logger) # Also wrap to make the dictionary observations into an easy to work with list env = ObservationWrapper(env) # Make chainer agent obs_size = env.observation_space.low.size action_size = env.action_space.low.size agent = make_chainer_a3c(obs_size, action_size) # Training num_episodes = 3 for i in range(num_episodes):
import matplotlib.pyplot as plt # Run an experiment and log the results if __name__ == "__main__": # Make benchmark bench = SigmoidBenchmark() # Run for 10 episodes each on 10 seeds num_episodes = 10 seeds = range(10) # Make logger object and add modules for performance & state logging logger = Logger( experiment_name="sigmoid_example", output_path=Path("plotting/data"), step_write_frequency=None, episode_write_frequency=None, ) state_logger = logger.add_module(StateTrackingWrapper) performance_logger = logger.add_module(PerformanceTrackingWrapper) for s in seeds: # Log the seed logger.set_additional_info(seed=s) # Make & wrap benchmark environment env = bench.get_benchmark(seed=s) env = PerformanceTrackingWrapper(env, logger=performance_logger) env = StateTrackingWrapper(env, logger=state_logger) # Add env to logger
def setUp(self) -> None: self.temp_dir = tempfile.TemporaryDirectory() episodes = 80 seeds = [0, 1, 3, 4, 5] experiment_name = "test_env" logger = Logger( output_path=Path(self.temp_dir.name), experiment_name=experiment_name, step_write_frequency=None, episode_write_frequency=None, ) benchmark = SigmoidBenchmark() env = benchmark.get_benchmark() agent = RandomAgent(env) logger.set_env(env) env_logger = logger.add_module(env) for seed in seeds: env.seed(seed) logger.set_additional_info(seed=seed) logger.reset_episode() for episode in range(episodes): state = env.reset() done = False reward = 0 step = 0 while not done: action = agent.act(state, reward) env_logger.log( "logged_step", step, ) env_logger.log( "logged_episode", episode, ) next_state, reward, done, _ = env.step(action) env_logger.log( "reward", reward, ) env_logger.log( "done", done, ) agent.train(next_state, reward) state = next_state logger.next_step() step += 1 agent.end_episode(state, reward) logger.next_episode() env.close() logger.close() self.log_file = env_logger.log_file.name
def run_dacbench(results_path, agent_method, num_episodes): """ Run all benchmarks for 10 seeds for a given number of episodes with a given agent and save result Parameters ------- results_path : str Path to where results should be saved agent_method : function Method that takes an env as input and returns an agent num_episodes : int Number of episodes to run for each benchmark """ for b in map(benchmarks.__dict__.get, benchmarks.__all__): print(f"Evaluating {b.__name__}") for i in range(10): print(f"Seed {i}/10") bench = b() env = bench.get_benchmark(seed=i) logger = Logger( experiment_name=f"seed_{i}", output_path=Path(results_path) / f"{b.__name__}", ) perf_logger = logger.add_module(PerformanceTrackingWrapper) logger.add_benchmark(bench) logger.set_env(env) logger.set_additional_info(seed=i) env = PerformanceTrackingWrapper(env, logger=perf_logger) agent = agent_method(env) logger.add_agent(agent) run_benchmark(env, agent, num_episodes, logger) logger.close()
from pathlib import Path from dacbench.agents import RandomAgent from dacbench.logger import Logger from dacbench.runner import run_benchmark from dacbench.benchmarks.modcma_benchmark import ModCMABenchmark from dacbench.wrappers import ActionFrequencyWrapper if __name__ == "__main__": bench = ModCMABenchmark() env = bench.get_environment() # Make logger object logger = Logger(experiment_name=type(bench).__name__, output_path=Path("../plotting/data")) logger.set_env(env) logger.add_benchmark(bench) # Wrap environment to track action frequency env = ActionFrequencyWrapper( env, logger=logger.add_module(ActionFrequencyWrapper)) # Run random agent for 5 episodes and log actions to file agent = RandomAgent(env) run_benchmark(env, agent, 5, logger=logger)
from pathlib import Path from dacbench.agents import RandomAgent from dacbench.logger import Logger from dacbench.runner import run_benchmark from dacbench.benchmarks import CMAESBenchmark from dacbench.wrappers import StateTrackingWrapper # Make CMAESBenchmark environment bench = CMAESBenchmark() env = bench.get_environment() # Make Logger object to track state information logger = Logger(experiment_name=type(bench).__name__, output_path=Path("../plotting/data")) logger.set_env(env) # Wrap env with StateTrackingWrapper env = StateTrackingWrapper(env, logger=logger.add_module(StateTrackingWrapper)) # Run random agent for 5 episodes and log state information to file # You can plot these results with the plotting examples agent = RandomAgent(env) run_benchmark(env, agent, 5, logger=logger) logger.close()
def run_dacbench(results_path, agent_method, num_episodes, bench=None, seeds=None): """ Run all benchmarks for 10 seeds for a given number of episodes with a given agent and save result Parameters ------- bench results_path : str Path to where results should be saved agent_method : function Method that takes an env as input and returns an agent num_episodes : int Number of episodes to run for each benchmark seeds : list[int] List of seeds to runs all benchmarks for. If None (default) seeds [1, ..., 10] are used. """ if bench is None: bench = map(benchmarks.__dict__.get, benchmarks.__all__) else: bench = [getattr(benchmarks, b) for b in bench] seeds = seeds if seeds is not None else range(10) for b in bench: print(f"Evaluating {b.__name__}") for i in seeds: print(f"Seed {i}/10") bench = b() try: env = bench.get_benchmark(seed=i) except: continue logger = Logger( experiment_name=f"seed_{i}", output_path=Path(results_path) / f"{b.__name__}", ) perf_logger = logger.add_module(PerformanceTrackingWrapper) logger.add_benchmark(bench) logger.set_env(env) env = PerformanceTrackingWrapper(env, logger=perf_logger) agent = agent_method(env) logger.add_agent(agent) run_benchmark(env, agent, num_episodes, logger) logger.close()