def test_agent_stats_1(): # Define train and evaluation envs train_env = GridWorld() eval_env = GridWorld() # Parameters params = {"n_episodes": 500} horizon = 20 # Check DummyAgent agent = DummyAgent(train_env, **params) agent.fit() agent.policy(None) # Run AgentStats stats_agent1 = AgentStats(DummyAgent, train_env, init_kwargs=params, n_fit=4, eval_horizon=10) stats_agent2 = AgentStats(DummyAgent, train_env, init_kwargs=params, n_fit=4, eval_horizon=10) agent_stats_list = [stats_agent1, stats_agent2] # learning curves plot_episode_rewards(agent_stats_list, cumulative=True, show=False) # compare final policies compare_policies(agent_stats_list, eval_env, eval_horizon=horizon, n_sim=10, show=False) compare_policies(agent_stats_list, eval_env, eval_horizon=horizon, n_sim=10, show=False, stationary_policy=False) # check if fitted for agent_stats in agent_stats_list: assert len(agent_stats.fitted_agents) == 4 for agent in agent_stats.fitted_agents: assert agent.fitted # test saving/loading stats_agent1.save('test_agent_stats_file.pickle') loaded_stats = AgentStats.load('test_agent_stats_file.pickle') assert stats_agent1.identifier == loaded_stats.identifier # delete file os.remove('test_agent_stats_file.pickle') # test hyperparemeter optimization loaded_stats.optimize_hyperparams() loaded_stats.optimize_hyperparams(continue_previous=True)
def test_agent_stats_2(): # Define train and evaluation envs train_env = GridWorld() eval_env = GridWorld() # Parameters params = {"n_episodes": 500} # Run AgentStats stats_agent1 = AgentStats(DummyAgent, train_env, eval_env=eval_env, init_kwargs=params, n_fit=4, eval_horizon=10, n_jobs=1) stats_agent2 = AgentStats(DummyAgent, train_env, eval_env=eval_env, init_kwargs=params, n_fit=4, eval_horizon=10, n_jobs=1) agent_stats_list = [stats_agent1, stats_agent2] # set some writers stats_agent1.set_writer(1, None) stats_agent1.set_writer(2, None) # compare final policies compare_policies(agent_stats_list, n_sim=10, show=False) compare_policies(agent_stats_list, n_sim=10, show=False, stationary_policy=False) # learning curves plot_episode_rewards(agent_stats_list, cumulative=True, show=False) # check if fitted for agent_stats in agent_stats_list: assert len(agent_stats.fitted_agents) == 4 for agent in agent_stats.fitted_agents: assert agent.fitted # test saving/loading dirname = stats_agent1.output_dir fname = dirname / 'stats' stats_agent1.save() loaded_stats = AgentStats.load(fname) assert stats_agent1.identifier == loaded_stats.identifier # delete file os.remove(fname.with_suffix('.pickle')) dirname.rmdir() # test hyperparemeter optimization loaded_stats.optimize_hyperparams()
best_trial, data = ppo_stats.optimize_hyperparams( n_trials=10, timeout=None, n_sim=5, n_fit=2, n_jobs=2, sampler_method='optuna_default') initial_n_trials = len(ppo_stats.study.trials) # save ppo_stats.save('ppo_stats_backup') del ppo_stats # load ppo_stats = AgentStats.load('ppo_stats_backup') # continue previous optimization, now with 5s of timeout best_trial, data = ppo_stats.optimize_hyperparams(n_trials=10, timeout=5, n_sim=5, n_fit=2, n_jobs=2, continue_previous=True) print("number of initial trials = ", initial_n_trials) print("number of trials after continuing= ", len(ppo_stats.study.trials)) print("----") print("fitting agents after choosing hyperparams...") ppo_stats.fit() # fit the 4 agents
BONUS_SCALE_FACTOR = 0.1 MIN_DIST = 0.1 params_ppo = {"n_episodes": N_EPISODES, "gamma": GAMMA, "horizon": HORIZON, "learning_rate": 0.0003} # ------------------------------- # Run AgentStats and save results # -------------------------------- ppo_stats = AgentStats(PPOAgent, train_env, init_kwargs=params_ppo, n_fit=4) ppo_stats.fit() # fit the 4 agents ppo_stats.save('ppo_stats') del ppo_stats # ------------------------------- # Load and plot results # -------------------------------- ppo_stats = AgentStats.load('ppo_stats') agent_stats_list = [ppo_stats] # learning curves plot_episode_rewards(agent_stats_list, cumulative=True, show=False) # compare final policies output = compare_policies(agent_stats_list, eval_env, eval_horizon=HORIZON, n_sim=10) print(output)
def load_experiment_results(output_dir, experiment_name): """ Parameters ---------- output_dir : str or Path, or list directory (or list of directories) where experiment results are stored (command line argument --output_dir when running the eperiment) experiment_name : str or Path, or list name of yaml file describing the experiment. Returns ------- output_data: dict dictionary such that output_data['experiment_dirs'] = list of paths to experiment directory (output_dir/experiment_name) output_data['agent_list'] = list containing the names of the agents in the experiment output_data['stats'][agent_name] = fitted AgentStats for agent_name output_data['dataframes'][agent_name] = dict of pandas data frames from the last run of the experiment output_data['data_dir'][agent_name] = directory from which the results were loaded """ output_data = {} output_data['agent_list'] = [] output_data['stats'] = {} output_data['dataframes'] = {} output_data['data_dir'] = {} # preprocess input if not isinstance(output_dir, list): output_dir = [output_dir] if not isinstance(experiment_name, list): experiment_name = [experiment_name] ndirs = len(output_dir) if ndirs > 1: assert len( experiment_name ) == ndirs, "Number of experiment names must match the number of output_dirs " else: output_dir = len(experiment_name) * output_dir results_dirs = [] for dd, exper in zip(output_dir, experiment_name): results_dirs.append(Path(dd) / Path(exper).stem) output_data['experiment_dirs'] = results_dirs # Subdirectories with data for each agent subdirs = [] for dd in results_dirs: subdirs.extend([f for f in dd.iterdir() if f.is_dir()]) # Create dictionary dict[agent_name] = most recent result dir data_dirs = {} for dd in subdirs: data_dirs[dd.name] = _get_most_recent_path( [f for f in dd.iterdir() if f.is_dir()]) # Load data from each subdir for agent_name in data_dirs: output_data['agent_list'].append(agent_name) # store data_dir output_data['data_dir'][agent_name] = data_dirs[agent_name] # store AgentStats output_data['stats'][agent_name] = None fname = data_dirs[agent_name] / 'stats.pickle' try: output_data['stats'][agent_name] = AgentStats.load(fname) except Exception: pass logger.info("... loaded " + str(fname)) # store data frames dataframes = {} csv_files = [ f for f in data_dirs[agent_name].iterdir() if f.suffix == '.csv' ] for ff in csv_files: dataframes[ff.stem] = pd.read_csv(ff) logger.info("... loaded " + str(ff)) output_data['dataframes'][agent_name] = dataframes return output_data