def check_save_load(agent, env="continuous_state", init_kwargs=None): """ Check that the agent save a non-empty file and can load. Parameters ---------- agent: rlberry agent module Agent class to test. env: tuple (env_ctor, env_kwargs) or str in {"continuous_state", "discrete_state"}, default="continuous_state" if tuple, env is the constructor and keywords of the env on which to test. if str in {"continuous_state", "discrete_state"}, we use a default Benchmark environment. init_kwargs : dict Arguments required by the agent's constructor. """ if init_kwargs is None: init_kwargs = {} train_env = _make_env(env) env = train_env[0](**train_env[1]) with tempfile.TemporaryDirectory() as tmpdirname: agent = AgentManager( agent, train_env, fit_budget=5, n_fit=1, seed=SEED, init_kwargs=init_kwargs, output_dir=tmpdirname, ) agent.fit(3) assert ( os.path.getsize(str(agent.output_dir_) + "/agent_handlers/idx_0.pickle") > 1 ), "The saved file is empty." try: agent.load(str(agent.output_dir_) + "/agent_handlers/idx_0.pickle") except Exception: raise RuntimeError("Failed to load the agent file.")
PPOAgent, train_env, fit_budget=N_EPISODES, init_kwargs=params_ppo, eval_kwargs=eval_kwargs, n_fit=4, output_dir="dev/", parallelization="process", ) ppo_stats.fit() # fit the 4 agents ppo_stats_fname = ppo_stats.save() del ppo_stats # ------------------------------- # Load and plot results # -------------------------------- ppo_stats = AgentManager.load(ppo_stats_fname) # learning curves plot_writer_data( ppo_stats, tag="episode_rewards", preprocess_func=np.cumsum, title="Cumulative Rewards", show=False, ) # compare final policies output = evaluate_agents([ppo_stats], n_simulations=15) print(output)
def execute_message(message: interface.Message, resources: interface.Resources) -> interface.Message: response = interface.Message.create(command=interface.Command.ECHO) # LIST_RESOURCES if message.command == interface.Command.LIST_RESOURCES: info = {} for rr in resources: info[rr] = resources[rr]["description"] response = interface.Message.create(info=info) # AGENT_MANAGER_CREATE_INSTANCE elif message.command == interface.Command.AGENT_MANAGER_CREATE_INSTANCE: params = message.params base_dir = pathlib.Path(metadata_utils.RLBERRY_DEFAULT_DATA_DIR) if "output_dir" in params: params[ "output_dir"] = base_dir / "server_data" / params["output_dir"] else: params["output_dir"] = base_dir / "server_data/" agent_manager = AgentManager(**params) filename = str(agent_manager.save()) response = interface.Message.create(info=dict( filename=filename, agent_name=agent_manager.agent_name, output_dir=str(agent_manager.output_dir).replace( "server_data/", "client_data/"), )) del agent_manager # AGENT_MANAGER_FIT elif message.command == interface.Command.AGENT_MANAGER_FIT: filename = message.params["filename"] budget = message.params["budget"] extra_params = message.params["extra_params"] agent_manager = AgentManager.load(filename) agent_manager.fit(budget, **extra_params) agent_manager.save() response = interface.Message.create(command=interface.Command.ECHO) del agent_manager # AGENT_MANAGER_EVAL elif message.command == interface.Command.AGENT_MANAGER_EVAL: filename = message.params["filename"] agent_manager = AgentManager.load(filename) eval_output = agent_manager.eval_agents( message.params["n_simulations"]) response = interface.Message.create(data=dict(output=eval_output)) del agent_manager # AGENT_MANAGER_CLEAR_OUTPUT_DIR elif message.command == interface.Command.AGENT_MANAGER_CLEAR_OUTPUT_DIR: filename = message.params["filename"] agent_manager = AgentManager.load(filename) agent_manager.clear_output_dir() response = interface.Message.create( message=f"Cleared output dir: {agent_manager.output_dir}") del agent_manager # AGENT_MANAGER_CLEAR_HANDLERS elif message.command == interface.Command.AGENT_MANAGER_CLEAR_HANDLERS: filename = message.params["filename"] agent_manager = AgentManager.load(filename) agent_manager.clear_handlers() agent_manager.save() response = interface.Message.create( message=f"Cleared handlers: {filename}") del agent_manager # AGENT_MANAGER_SET_WRITER elif message.command == interface.Command.AGENT_MANAGER_SET_WRITER: filename = message.params["filename"] agent_manager = AgentManager.load(filename) agent_manager.set_writer(**message.params["kwargs"]) agent_manager.save() del agent_manager # AGENT_MANAGER_OPTIMIZE_HYPERPARAMS elif message.command == interface.Command.AGENT_MANAGER_OPTIMIZE_HYPERPARAMS: filename = message.params["filename"] agent_manager = AgentManager.load(filename) best_params_dict = agent_manager.optimize_hyperparams( **message.params["kwargs"]) agent_manager.save() del agent_manager response = interface.Message.create(data=best_params_dict) # AGENT_MANAGER_GET_WRITER_DATA elif message.command == interface.Command.AGENT_MANAGER_GET_WRITER_DATA: # writer scalar data filename = message.params["filename"] agent_manager = AgentManager.load(filename) writer_data = agent_manager.get_writer_data() writer_data = writer_data or dict() for idx in writer_data: writer_data[idx] = writer_data[idx].to_csv(index=False) # tensoboard data tensorboard_bin_data = None if agent_manager.tensorboard_dir is not None: tensorboard_zip_file = rlberry.utils.io.zipdir( agent_manager.tensorboard_dir, agent_manager.output_dir / "tensorboard_data.zip", ) if tensorboard_zip_file is not None: tensorboard_bin_data = open(tensorboard_zip_file, "rb").read() tensorboard_bin_data = base64.b64encode( tensorboard_bin_data).decode("ascii") response = interface.Message.create( data=dict(writer_data=writer_data, tensorboard_bin_data=tensorboard_bin_data)) del agent_manager # end return response
def test_agent_manager_1(): # Define train and evaluation envs train_env = (GridWorld, {}) # Parameters params = dict(hyperparameter1=-1, hyperparameter2=100) eval_kwargs = dict(eval_horizon=10) # Check DummyAgent agent = DummyAgent(train_env[0](**train_env[1]), **params) agent.fit(10) agent.policy(None) # Run AgentManager params_per_instance = [dict(hyperparameter2=ii) for ii in range(4)] stats_agent1 = AgentManager( DummyAgent, train_env, fit_budget=5, eval_kwargs=eval_kwargs, init_kwargs=params, n_fit=4, seed=123, init_kwargs_per_instance=params_per_instance, ) stats_agent2 = AgentManager( DummyAgent, train_env, fit_budget=5, eval_kwargs=eval_kwargs, init_kwargs=params, n_fit=4, seed=123, ) agent_manager_list = [stats_agent1, stats_agent2] for st in agent_manager_list: st.fit() for ii, instance in enumerate(stats_agent1.agent_handlers): assert instance.hyperparameter1 == -1 assert instance.hyperparameter2 == ii for ii, instance in enumerate(stats_agent2.agent_handlers): assert instance.hyperparameter1 == -1 assert instance.hyperparameter2 == 100 # learning curves plot_writer_data(agent_manager_list, tag="episode_rewards", show=False) # compare final policies evaluate_agents(agent_manager_list, show=False) # check if fitted for agent_manager in agent_manager_list: assert len(agent_manager.agent_handlers) == 4 for agent in agent_manager.agent_handlers: assert agent.fitted # test saving/loading fname = stats_agent1.save() loaded_stats = AgentManager.load(fname) assert stats_agent1.unique_id == loaded_stats.unique_id # test hyperparameter optimization call loaded_stats.optimize_hyperparams(n_trials=5) loaded_stats.optimize_hyperparams(n_trials=5, continue_previous=True) for st in agent_manager_list: st.clear_output_dir()
def test_agent_manager_2(): # Define train and evaluation envs train_env = (GridWorld, {}) eval_env = (GridWorld, {}) # Parameters params = {} eval_kwargs = dict(eval_horizon=10) # Run AgentManager stats_agent1 = AgentManager( DummyAgent, train_env, eval_env=eval_env, fit_budget=5, eval_kwargs=eval_kwargs, init_kwargs=params, n_fit=4, seed=123, ) stats_agent2 = AgentManager( DummyAgent, train_env, eval_env=eval_env, fit_budget=5, eval_kwargs=eval_kwargs, init_kwargs=params, n_fit=4, seed=123, ) agent_manager_list = [stats_agent1, stats_agent2] for st in agent_manager_list: st.fit() # compare final policies evaluate_agents(agent_manager_list, show=False) evaluate_agents(agent_manager_list, show=False) # learning curves plot_writer_data(agent_manager_list, tag="episode_rewards", show=False) # check if fitted for agent_manager in agent_manager_list: assert len(agent_manager.agent_handlers) == 4 for agent in agent_manager.agent_handlers: assert agent.fitted # test saving/loading fname = stats_agent1.save() loaded_stats = AgentManager.load(fname) assert stats_agent1.unique_id == loaded_stats.unique_id # test hyperparemeter optimization loaded_stats.optimize_hyperparams(n_trials=5) # delete some writers stats_agent1.set_writer(1, None) stats_agent1.set_writer(2, None) stats_agent1.clear_output_dir() stats_agent2.clear_output_dir()
manager.optimize_hyperparams( n_trials=5, timeout=None, n_fit=2, sampler_method="optuna_default", optuna_parallelization="thread", ) initial_n_trials = len(manager.optuna_study.trials) # save manager_fname = manager.save() del manager # load manager = AgentManager.load(manager_fname) # continue previous optimization, now with 120s of timeout and multiprocessing manager.optimize_hyperparams( n_trials=512, timeout=120, n_fit=8, continue_previous=True, optuna_parallelization="process", n_optuna_workers=4, ) print("number of initial trials = ", initial_n_trials) print("number of trials after continuing= ", len(manager.optuna_study.trials)) print("----")
def load_experiment_results(output_dir, experiment_name): """ Parameters ---------- output_dir : str or Path, or list directory (or list of directories) where experiment results are stored (command line argument --output_dir when running the eperiment) experiment_name : str or Path, or list name of yaml file describing the experiment. Returns ------- output_data: dict dictionary such that output_data['experiment_dirs'] = list of paths to experiment directory (output_dir/experiment_name) output_data['agent_list'] = list containing the names of the agents in the experiment output_data['manager'][agent_name] = fitted AgentManager for agent_name output_data['dataframes'][agent_name] = dict of pandas data frames from the last run of the experiment output_data['data_dir'][agent_name] = directory from which the results were loaded """ output_data = {} output_data["agent_list"] = [] output_data["manager"] = {} output_data["dataframes"] = {} output_data["data_dir"] = {} # preprocess input if not isinstance(output_dir, list): output_dir = [output_dir] if not isinstance(experiment_name, list): experiment_name = [experiment_name] ndirs = len(output_dir) if ndirs > 1: assert ( len(experiment_name) == ndirs ), "Number of experiment names must match the number of output_dirs " else: output_dir = len(experiment_name) * output_dir results_dirs = [] for dd, exper in zip(output_dir, experiment_name): results_dirs.append(Path(dd) / Path(exper).stem) output_data["experiment_dirs"] = results_dirs # Subdirectories with data for each agent subdirs = [] for dd in results_dirs: subdirs.extend([f for f in dd.iterdir() if f.is_dir()]) # Create dictionary dict[agent_name] = most recent result dir data_dirs = {} for dd in subdirs: data_dirs[dd.name] = _get_most_recent_path( [f for f in dd.iterdir() if f.is_dir()]) data_dirs[dd.name] = data_dirs[dd.name] / "manager_data" # Load data from each subdir for agent_name in data_dirs: output_data["agent_list"].append(agent_name) # store data_dir output_data["data_dir"][agent_name] = data_dirs[agent_name] # store AgentManager output_data["manager"][agent_name] = None fname = data_dirs[agent_name] / "manager_obj.pickle" try: output_data["manager"][agent_name] = AgentManager.load(fname) except Exception: logger.warning( f"Could not load AgentManager instance for {agent_name}.") logger.info("... loaded " + str(fname)) # store data frames dataframes = {} csv_files = [ f for f in data_dirs[agent_name].iterdir() if f.suffix == ".csv" ] for ff in csv_files: dataframes[ff.stem] = pd.read_csv(ff) logger.info("... loaded " + str(ff)) output_data["dataframes"][agent_name] = dataframes return output_data