def performance_example(): """ Plot Sigmoid performance over time, divided by seed and with each seed in its own plot """ file = Path("./data/sigmoid_example/PerformanceTrackingWrapper.jsonl") logs = load_logs(file) data = log2dataframe(logs, wide=True, drop_columns=["time"]) Path("output").mkdir(exist_ok=True) # overall grid = plot_performance(data, title="Overall Performance") grid.savefig("output/sigmoid_overall_performance.pdf") plt.show() # per instance seed (hue) grid = plot_performance(data, title="Overall Performance", hue="seed") grid.savefig("output/sigmoid_overall_performance_per_seed_hue.pdf") plt.show() # per instance seed (col) with plotting_context("poster"): grid = plot_performance(data, title="Overall Performance", col="seed", col_wrap=3) grid.fig.subplots_adjust(top=0.92) grid.savefig("output/sigmoid_overall_performance_per_seed.pdf") plt.show()
def plot_state_CMAES(): """ Plot state information of CMA-ES run over time """ # Since converting the json logs to a data frame takes a couple of minutes # we we cache the logs for tuning the plot settings in a picked datafarme object path = Path("output/cached_logs.pickle") if not path.exists(): file = Path("./data/CMAESBenchmark/StateTrackingWrapper.jsonl") if not file.exists(): print( "Please run 'examples/benchmarks/chainerrl_cma.py' to generate plotting data first" ) return logs = load_logs(file) dataframe = log2dataframe(logs, wide=True) dataframe.to_pickle(path) else: dataframe = pd.read_pickle(path) Path("output").mkdir(exist_ok=True) # The CMAES observation space has over 170 dims. Here we just plot a subset # here we get all different parts of the states columns = pd.DataFrame( (column.split("_") for column in dataframe.columns), columns=["part", "subpart", "i"], ) state_parts = columns[columns["part"] == "state"]["subpart"].unique() print(f"State parts {state_parts}") # But since History Deltas(80), Past Deltas(40) and Past Sigma Deltas(40) # have to many dims to be plotted we only show state_parts = ["Loc", "Population Size", "Sigma"] for state_part in state_parts: state_part_columns = [ column for column in dataframe.columns if not column.startswith("state") or column.split("_")[1] == state_part ] grid = plot_state(dataframe[state_part_columns], interval=100, title=state_part) grid.savefig(f"output/cmaes_state_{state_part}.pdf") plt.show() # one can also show the global step (increasing step over episodes) on x axis grid = plot_state( dataframe[state_part_columns], show_global_step=True, interval=100, title=state_part, ) grid.savefig(f"output/cmaes_state_{state_part}_global_step.pdf") plt.show()
def per_instance_example(): """ Plot CMA performance for each training instance """ file = Path("./data/chainererrl_cma/PerformanceTrackingWrapper.jsonl") logs = load_logs(file) data = log2dataframe(logs, wide=True, drop_columns=["time"]) grid = plot_performance_per_instance( data, title="CMA Mean Performance per Instance") grid.savefig("output/cma_performance_per_instance.pdf") plt.show()
def test_dict_logging(self): temp_dir = tempfile.TemporaryDirectory() seed = 0 episodes = 2 logger = Logger( output_path=Path(temp_dir.name), experiment_name="test_dict_logging", step_write_frequency=None, episode_write_frequency=1, ) bench = CMAESBenchmark() bench.set_seed(seed) env = bench.get_environment() state_logger = logger.add_module(StateTrackingWrapper) wrapped = StateTrackingWrapper(env, logger=state_logger) agent = StaticAgent(env, 3.5) logger.set_env(env) run_benchmark(wrapped, agent, episodes, logger) state_logger.close() logs = load_logs(state_logger.get_logfile()) dataframe = log2dataframe(logs, wide=False) state_parts = { "Loc": 10, "Past Deltas": 40, "Population Size": 1, "Sigma": 1, "History Deltas": 80, "Past Sigma Deltas": 40, } names = dataframe.name.unique() def field(name: str): state, field_, *idx = name.split("_") return field_ parts = groupby(sorted(names), key=field) for part, group_members in parts: expected_number = state_parts[part] actual_number = len(list(group_members)) self.assertEqual(expected_number, actual_number) temp_dir.cleanup()
def plot_scalar_action(): """ Plot Sigmoid actions over time by action component and by mean action component in intervals """ file = Path("./data/sigmoid_example/ActionFrequencyWrapper.jsonl") logs = load_logs(file) dataframe = log2dataframe(logs, wide=True) Path("output").mkdir(exist_ok=True) grid = plot_action(dataframe, interval=18, title="Sigmoid", col="seed", col_wrap=3) grid.savefig("output/sigmoid_example_action_interval_18.pdf") plt.show() grid = plot_action(dataframe, title="Sigmoid", col="seed", col_wrap=3) grid.savefig("output/sigmoid_example_action.pdf") plt.show()
def plot_action_modea(): """ Plot ModEA actions over time and in intervals """ file = Path("data/ModeaBenchmark/ActionFrequencyWrapper.jsonl") logs = load_logs(file) dataframe = log2dataframe(logs, wide=True) Path("output").mkdir(exist_ok=True) grid = plot_action(dataframe, interval=5) grid.savefig("output/modea_action_interval_5.pdf") plt.show() grid = plot_action(dataframe) grid.savefig("output/modea_action.pdf") plt.show()
def run_optimal_test_with_benchmark(self, benchmark): seeds = [42] num_episodes = 3 with tempfile.TemporaryDirectory() as temp_dir: result_path = Path(temp_dir) run_optimal(result_path, benchmark, num_episodes, seeds) expected_experiment_path = result_path / benchmark / f"optimal_{seeds[0]}" self.assertTrue(expected_experiment_path.exists()) performance_tracking_log = ( expected_experiment_path / "PerformanceTrackingWrapper.jsonl" ) self.assertTrue(performance_tracking_log.exists()) logs = log2dataframe(load_logs(performance_tracking_log)) self.assertEqual(len(logs), num_episodes) self.assertTrue((logs["seed"] == seeds[0]).all())
def test_box_logging(self): temp_dir = tempfile.TemporaryDirectory() seed = 0 episodes = 10 logger = Logger( output_path=Path(temp_dir.name), experiment_name="test_box_logging", step_write_frequency=None, episode_write_frequency=1, ) bench = LubyBenchmark() bench.set_seed(seed) env = bench.get_environment() state_logger = logger.add_module(StateTrackingWrapper) wrapped = StateTrackingWrapper(env, logger=state_logger) agent = StaticAgent(env, 1) logger.set_env(env) run_benchmark(wrapped, agent, episodes, logger) state_logger.close() logs = load_logs(state_logger.get_logfile()) dataframe = log2dataframe(logs, wide=True) sate_columns = [ "state_Action t (current)", "state_Step t (current)", "state_Action t-1", "state_Action t-2", "state_Step t-1", "state_Step t-2", ] for state_column in sate_columns: self.assertTrue(state_column in dataframe.columns) self.assertTrue((~dataframe[state_column].isna()).all()) temp_dir.cleanup()
def run_static_test_with_benchmark(self, benchmark): seeds = [42] num_episodes = 3 action = DISCRETE_ACTIONS[benchmark][0] with tempfile.TemporaryDirectory() as temp_dir: result_path = Path(temp_dir) run_static(result_path, benchmark, action, num_episodes, seeds) expected_experiment_path = ( result_path / benchmark / f"static_{action}_{seeds[0]}" ) self.assertTrue(expected_experiment_path.exists()) performance_tracking_log = ( expected_experiment_path / "PerformanceTrackingWrapper.jsonl" ) self.assertTrue(performance_tracking_log.exists()) logs = log2dataframe(load_logs(performance_tracking_log)) self.assertEqual(len(logs), num_episodes) self.assertTrue((logs["seed"] == seeds[0]).all())
def test_logging(self): temp_dir = tempfile.TemporaryDirectory() episodes = 5 logger = Logger( output_path=Path(temp_dir.name), experiment_name="test_logging", ) bench = LubyBenchmark() env = bench.get_environment() time_logger = logger.add_module(EpisodeTimeWrapper) wrapped = EpisodeTimeWrapper(env, logger=time_logger) agent = StaticAgent(env=env, action=1) run_benchmark(wrapped, agent, episodes, logger) logger.close() logs = load_logs(time_logger.get_logfile()) dataframe = log2dataframe(logs, wide=True) # all steps must have logged time self.assertTrue((~dataframe.step_duration.isna()).all()) # each episode has a recored time episodes = dataframe.groupby("episode") last_steps_per_episode = dataframe.iloc[episodes.step.idxmax()] self.assertTrue( (~last_steps_per_episode.episode_duration.isna()).all()) # episode time equals the sum of the steps in episode calculated_episode_times = episodes.step_duration.sum() recorded_episode_times = last_steps_per_episode.episode_duration self.assertListEqual(calculated_episode_times.tolist(), recorded_episode_times.tolist()) temp_dir.cleanup()
def test_logging_multi_discrete(self): temp_dir = tempfile.TemporaryDirectory() seed = 0 logger = Logger( output_path=Path(temp_dir.name), experiment_name="test_multi_discrete_logging", step_write_frequency=None, episode_write_frequency=1, ) bench = ModeaBenchmark() bench.set_seed(seed) env = bench.get_environment() env.seed_action_space(seed) action_logger = logger.add_module(ActionFrequencyWrapper) wrapped = ActionFrequencyWrapper(env, logger=action_logger) agent = RandomAgent(env) logger.set_env(env) run_benchmark(wrapped, agent, 1, logger) action_logger.close() logs = load_logs(action_logger.get_logfile()) dataframe = log2dataframe(logs, wide=True) expected_actions = pd.DataFrame({ "action_0": { 0: 0, 1: 1, 2: 0, 3: 1, 4: 1, 5: 0, 6: 1, 7: 1, 8: 0, 9: 0, 10: 0, }, "action_1": { 0: 1, 1: 0, 2: 1, 3: 0, 4: 0, 5: 1, 6: 0, 7: 1, 8: 0, 9: 0, 10: 1, }, "action_10": { 0: 0, 1: 0, 2: 1, 3: 0, 4: 0, 5: 0, 6: 0, 7: 2, 8: 1, 9: 2, 10: 1, }, "action_2": { 0: 1, 1: 1, 2: 1, 3: 0, 4: 1, 5: 1, 6: 1, 7: 1, 8: 0, 9: 0, 10: 1, }, "action_3": { 0: 0, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 0, 8: 0, 9: 1, 10: 1, }, "action_4": { 0: 0, 1: 1, 2: 1, 3: 0, 4: 1, 5: 0, 6: 0, 7: 1, 8: 0, 9: 1, 10: 0, }, "action_5": { 0: 1, 1: 0, 2: 0, 3: 0, 4: 1, 5: 1, 6: 1, 7: 0, 8: 0, 9: 0, 10: 1, }, "action_6": { 0: 0, 1: 1, 2: 1, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 1, 9: 0, 10: 0, }, "action_7": { 0: 1, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 1, 8: 1, 9: 1, 10: 0, }, "action_8": { 0: 0, 1: 1, 2: 0, 3: 1, 4: 1, 5: 1, 6: 0, 7: 1, 8: 0, 9: 0, 10: 1, }, "action_9": { 0: 1, 1: 2, 2: 1, 3: 0, 4: 0, 5: 1, 6: 1, 7: 1, 8: 2, 9: 0, 10: 2, }, }) for column in expected_actions.columns: # todo: seems to be an bug here. Every so ofter the last action is missing. # Double checked not a logging problem. Could be a seeding issue self.assertListEqual( dataframe[column].to_list()[:10], expected_actions[column].to_list()[:10], f"Column {column}", ) temp_dir.cleanup()
def test_logging_discrete(self): temp_dir = tempfile.TemporaryDirectory() seed = 0 logger = Logger( output_path=Path(temp_dir.name), experiment_name="test_discrete_logging", step_write_frequency=None, episode_write_frequency=1, ) bench = LubyBenchmark() bench.set_seed(seed) env = bench.get_environment() env.seed_action_space(seed) action_logger = logger.add_module(ActionFrequencyWrapper) wrapped = ActionFrequencyWrapper(env, logger=action_logger) agent = RandomAgent(env) logger.set_env(env) run_benchmark(wrapped, agent, 10, logger) action_logger.close() logs = load_logs(action_logger.get_logfile()) dataframe = log2dataframe(logs, wide=True) expected_actions = [ 0, 3, 5, 4, 3, 5, 5, 5, 3, 3, 2, 1, 0, 1, 2, 0, 1, 1, 0, 1, 2, 4, 3, 0, 1, 3, 0, 3, 3, 3, 4, 4, 4, 5, 4, 0, 4, 2, 1, 3, 4, 2, 1, 3, 3, 2, 0, 5, 2, 5, 2, 1, 5, 3, 2, 5, 1, 0, 2, 3, 1, 3, 2, 3, 2, 4, 3, 4, 0, 5, 5, 1, 5, 0, 1, 5, 5, 3, 3, 2, ] self.assertListEqual(dataframe.action.to_list(), expected_actions) temp_dir.cleanup()
def step_time_interval_example(data: pd.DataFrame, interval: int = 10): """ Plot mean time spent on steps in a given interval Parameters ---------- data : pd.DataFrame The non-wide data frame resulting from loading the logging results from EpisodeTimeTracker interval : int Number of steps to average over """ grid = plot_step_time(data, interval, title="Mean Step Duration") grid.savefig("output/sigmoid_step_duration.pdf") plt.show() if __name__ == "__main__": # Load data from file into pandas DataFrame file = Path("data/sigmoid_example/EpisodeTimeWrapper.jsonl") logs = load_logs(file) data = log2dataframe(logs, wide=True, drop_columns=["time"]) Path('output').mkdir(exist_ok=True) # Plot episode time episode_time_example(data) # Plot step time (overall & per seed) step_time_example(data) # Plot step time over intervals of 10 steps step_time_interval_example(data)
for s in seeds: # Log the seed logger.set_additional_info(seed=s) # Make & wrap benchmark environment env = bench.get_benchmark(seed=s) env = PerformanceTrackingWrapper(env, logger=performance_logger) env = StateTrackingWrapper(env, logger=state_logger) # Add env to logger logger.set_env(env) # Run random agent agent = RandomAgent(env) run_benchmark(env, agent, num_episodes, logger) # Close logger object logger.close() # Load performance of last seed into pandas DataFrame logs = load_logs(performance_logger.get_logfile()) dataframe = log2dataframe(logs, wide=True) # Plot overall performance plot_performance(dataframe) plt.show() # Plot performance per instance plot_performance_per_instance(dataframe) plt.show()