Exemple #1
0
def test_agent_stats_1():
    # Define train and evaluation envs
    train_env = GridWorld()
    eval_env = GridWorld()

    # Parameters
    params = {"n_episodes": 500}
    horizon = 20

    # Check DummyAgent
    agent = DummyAgent(train_env, **params)
    agent.fit()
    agent.policy(None)

    # Run AgentStats
    stats_agent1 = AgentStats(DummyAgent,
                              train_env,
                              init_kwargs=params,
                              n_fit=4,
                              eval_horizon=10)
    stats_agent2 = AgentStats(DummyAgent,
                              train_env,
                              init_kwargs=params,
                              n_fit=4,
                              eval_horizon=10)
    agent_stats_list = [stats_agent1, stats_agent2]

    # learning curves
    plot_episode_rewards(agent_stats_list, cumulative=True, show=False)

    # compare final policies
    compare_policies(agent_stats_list,
                     eval_env,
                     eval_horizon=horizon,
                     n_sim=10,
                     show=False)
    compare_policies(agent_stats_list,
                     eval_env,
                     eval_horizon=horizon,
                     n_sim=10,
                     show=False,
                     stationary_policy=False)

    # check if fitted
    for agent_stats in agent_stats_list:
        assert len(agent_stats.fitted_agents) == 4
        for agent in agent_stats.fitted_agents:
            assert agent.fitted

    # test saving/loading
    stats_agent1.save('test_agent_stats_file.pickle')
    loaded_stats = AgentStats.load('test_agent_stats_file.pickle')
    assert stats_agent1.identifier == loaded_stats.identifier

    # delete file
    os.remove('test_agent_stats_file.pickle')

    # test hyperparemeter optimization
    loaded_stats.optimize_hyperparams()
    loaded_stats.optimize_hyperparams(continue_previous=True)
def test_agent_stats_2():
    # Define train and evaluation envs
    train_env = GridWorld()
    eval_env = GridWorld()

    # Parameters
    params = {"n_episodes": 500}

    # Run AgentStats
    stats_agent1 = AgentStats(DummyAgent,
                              train_env,
                              eval_env=eval_env,
                              init_kwargs=params,
                              n_fit=4,
                              eval_horizon=10,
                              n_jobs=1)
    stats_agent2 = AgentStats(DummyAgent,
                              train_env,
                              eval_env=eval_env,
                              init_kwargs=params,
                              n_fit=4,
                              eval_horizon=10,
                              n_jobs=1)
    agent_stats_list = [stats_agent1, stats_agent2]

    # set some writers
    stats_agent1.set_writer(1, None)
    stats_agent1.set_writer(2, None)

    # compare final policies
    compare_policies(agent_stats_list, n_sim=10, show=False)
    compare_policies(agent_stats_list,
                     n_sim=10,
                     show=False,
                     stationary_policy=False)

    # learning curves
    plot_episode_rewards(agent_stats_list, cumulative=True, show=False)

    # check if fitted
    for agent_stats in agent_stats_list:
        assert len(agent_stats.fitted_agents) == 4
        for agent in agent_stats.fitted_agents:
            assert agent.fitted

    # test saving/loading
    dirname = stats_agent1.output_dir
    fname = dirname / 'stats'
    stats_agent1.save()
    loaded_stats = AgentStats.load(fname)
    assert stats_agent1.identifier == loaded_stats.identifier

    # delete file
    os.remove(fname.with_suffix('.pickle'))
    dirname.rmdir()

    # test hyperparemeter optimization
    loaded_stats.optimize_hyperparams()
Exemple #3
0
best_trial, data = ppo_stats.optimize_hyperparams(
    n_trials=10,
    timeout=None,
    n_sim=5,
    n_fit=2,
    n_jobs=2,
    sampler_method='optuna_default')

initial_n_trials = len(ppo_stats.study.trials)

# save
ppo_stats.save('ppo_stats_backup')
del ppo_stats

# load
ppo_stats = AgentStats.load('ppo_stats_backup')

# continue previous optimization, now with 5s of timeout
best_trial, data = ppo_stats.optimize_hyperparams(n_trials=10,
                                                  timeout=5,
                                                  n_sim=5,
                                                  n_fit=2,
                                                  n_jobs=2,
                                                  continue_previous=True)

print("number of initial trials = ", initial_n_trials)
print("number of trials after continuing= ", len(ppo_stats.study.trials))

print("----")
print("fitting agents after choosing hyperparams...")
ppo_stats.fit()  # fit the 4 agents
Exemple #4
0
BONUS_SCALE_FACTOR = 0.1
MIN_DIST = 0.1


params_ppo = {"n_episodes": N_EPISODES,
              "gamma": GAMMA,
              "horizon": HORIZON,
              "learning_rate": 0.0003}

# -------------------------------
# Run AgentStats and save results
# --------------------------------
ppo_stats = AgentStats(PPOAgent, train_env, init_kwargs=params_ppo, n_fit=4)
ppo_stats.fit()  # fit the 4 agents
ppo_stats.save('ppo_stats')
del ppo_stats

# -------------------------------
# Load and plot results
# --------------------------------
ppo_stats = AgentStats.load('ppo_stats')
agent_stats_list = [ppo_stats]

# learning curves
plot_episode_rewards(agent_stats_list, cumulative=True, show=False)

# compare final policies
output = compare_policies(agent_stats_list, eval_env,
                          eval_horizon=HORIZON, n_sim=10)
print(output)
def load_experiment_results(output_dir, experiment_name):
    """
    Parameters
    ----------
    output_dir : str or Path, or list
        directory (or list of directories) where experiment results are stored
        (command line argument --output_dir when running the eperiment)
    experiment_name : str or Path, or list
        name of yaml file describing the experiment.

    Returns
    -------
    output_data: dict
        dictionary such that

        output_data['experiment_dirs'] = list of paths to experiment directory (output_dir/experiment_name)
        output_data['agent_list'] = list containing the names of the agents in the experiment
        output_data['stats'][agent_name] = fitted AgentStats for agent_name
        output_data['dataframes'][agent_name] = dict of pandas data frames from the last run of the experiment
        output_data['data_dir'][agent_name] = directory from which the results were loaded
    """
    output_data = {}
    output_data['agent_list'] = []
    output_data['stats'] = {}
    output_data['dataframes'] = {}
    output_data['data_dir'] = {}

    # preprocess input
    if not isinstance(output_dir, list):
        output_dir = [output_dir]
    if not isinstance(experiment_name, list):
        experiment_name = [experiment_name]
    ndirs = len(output_dir)

    if ndirs > 1:
        assert len(
            experiment_name
        ) == ndirs, "Number of experiment names must match the number of output_dirs "
    else:
        output_dir = len(experiment_name) * output_dir

    results_dirs = []
    for dd, exper in zip(output_dir, experiment_name):
        results_dirs.append(Path(dd) / Path(exper).stem)
    output_data['experiment_dirs'] = results_dirs

    # Subdirectories with data for each agent
    subdirs = []
    for dd in results_dirs:
        subdirs.extend([f for f in dd.iterdir() if f.is_dir()])

    # Create dictionary dict[agent_name] = most recent result dir
    data_dirs = {}
    for dd in subdirs:
        data_dirs[dd.name] = _get_most_recent_path(
            [f for f in dd.iterdir() if f.is_dir()])

    # Load data from each subdir
    for agent_name in data_dirs:
        output_data['agent_list'].append(agent_name)

        # store data_dir
        output_data['data_dir'][agent_name] = data_dirs[agent_name]

        # store AgentStats
        output_data['stats'][agent_name] = None
        fname = data_dirs[agent_name] / 'stats.pickle'
        try:
            output_data['stats'][agent_name] = AgentStats.load(fname)
        except Exception:
            pass
        logger.info("... loaded " + str(fname))

        # store data frames
        dataframes = {}
        csv_files = [
            f for f in data_dirs[agent_name].iterdir() if f.suffix == '.csv'
        ]
        for ff in csv_files:
            dataframes[ff.stem] = pd.read_csv(ff)
            logger.info("... loaded " + str(ff))
        output_data['dataframes'][agent_name] = dataframes

    return output_data