Exemple #1
0
def test_agent_manager_partial_fit_and_tuple_env():
    # Define train and evaluation envs
    train_env = (
        GridWorld,
        None,
    )  # tuple (constructor, kwargs) must also work in AgentManager

    # Parameters
    params = {}
    eval_kwargs = dict(eval_horizon=10)

    # Run AgentManager
    stats = AgentManager(
        DummyAgent,
        train_env,
        init_kwargs=params,
        n_fit=4,
        fit_budget=5,
        eval_kwargs=eval_kwargs,
        seed=123,
    )
    stats2 = AgentManager(
        DummyAgent,
        train_env,
        init_kwargs=params,
        n_fit=4,
        fit_budget=5,
        eval_kwargs=eval_kwargs,
        seed=123,
    )

    # Run partial fit
    stats.fit(10)
    stats.fit(20)
    for agent in stats.agent_handlers:
        assert agent.total_budget == 30

    # Run fit
    stats2.fit()

    # learning curves
    plot_writer_data([stats],
                     tag="episode_rewards",
                     show=False,
                     preprocess_func=np.cumsum)

    # compare final policies
    evaluate_agents([stats], show=False)

    # delete some writers
    stats.set_writer(0, None)
    stats.set_writer(3, None)

    stats.clear_output_dir()
    stats2.clear_output_dir()
        PPOAgent,
        train_env,
        fit_budget=N_EPISODES,
        init_kwargs=params_ppo,
        eval_kwargs=eval_kwargs,
        n_fit=4,
        output_dir="dev/",
        parallelization="process",
    )
    ppo_stats.fit()  # fit the 4 agents
    ppo_stats_fname = ppo_stats.save()
    del ppo_stats

    # -------------------------------
    # Load and plot results
    # --------------------------------
    ppo_stats = AgentManager.load(ppo_stats_fname)

    # learning curves
    plot_writer_data(
        ppo_stats,
        tag="episode_rewards",
        preprocess_func=np.cumsum,
        title="Cumulative Rewards",
        show=False,
    )

    # compare final policies
    output = evaluate_agents([ppo_stats], n_simulations=15)
    print(output)
Exemple #3
0
    eval_kwargs = dict(eval_horizon=HORIZON, n_simulations=20)

    # -----------------------------
    # Run AgentManager
    # -----------------------------
    ppo_stats = AgentManager(
        PPOAgent,
        train_env,
        fit_budget=N_EPISODES,
        init_kwargs=params_ppo,
        eval_kwargs=eval_kwargs,
        n_fit=4,
    )

    ppo_stats.set_writer(0,
                         SummaryWriter,
                         writer_kwargs={"comment": "worker_0"})
    ppo_stats.set_writer(1,
                         SummaryWriter,
                         writer_kwargs={"comment": "worker_1"})

    agent_manager_list = [ppo_stats]

    agent_manager_list[0].fit()
    agent_manager_list[0].save(
    )  # after fit, writers are set to None to avoid pickle problems.

    # compare final policies
    output = evaluate_agents(agent_manager_list)
    print(output)
    stats_alternative.optimize_hyperparams(
        timeout=600,
        n_optuna_workers=2,
        n_fit=2,
        optuna_parallelization="process",
        fit_fraction=1.0,
    )

    # Fit everything in parallel
    multimanagers = MultipleManagers()
    multimanagers.append(stats)
    multimanagers.append(stats_alternative)

    multimanagers.run()

    # Plot policy evaluation
    out = evaluate_agents(multimanagers.managers)
    print(out)

    # Visualize policy
    env = stats_alternative.build_eval_env()
    agent = stats_alternative.agent_handlers[0]
    obs = env.reset()
    for i in range(2500):
        action = agent.policy(obs)
        obs, reward, done, info = env.step(action)
        env.render()
        if done:
            break
    env.close()
Exemple #5
0
    # Oracle (optimal policy)
    oracle_stats = AgentManager(
        ValueIterationAgent,
        env,
        init_kwargs=params_oracle,
        fit_budget=n_episodes,
        eval_kwargs=eval_kwargs,
        n_fit=1,
    )

    # fit
    stats.fit()
    stats_ucbvi.fit()
    stats_random.fit()
    oracle_stats.fit()

    # visualize results
    plot_writer_data(
        [stats, stats_ucbvi, stats_random],
        tag="episode_rewards",
        preprocess_func=np.cumsum,
        title="Cumulative Rewards",
        show=False,
    )
    plot_writer_data([stats, stats_ucbvi, stats_random],
                     tag="dw_time_elapsed",
                     show=False)
    evaluate_agents([stats, stats_ucbvi, stats_random, oracle_stats],
                    n_simulations=20)
Exemple #6
0
def test_agent_manager_1():
    # Define train and evaluation envs
    train_env = (GridWorld, {})

    # Parameters
    params = dict(hyperparameter1=-1, hyperparameter2=100)
    eval_kwargs = dict(eval_horizon=10)

    # Check DummyAgent
    agent = DummyAgent(train_env[0](**train_env[1]), **params)
    agent.fit(10)
    agent.policy(None)

    # Run AgentManager
    params_per_instance = [dict(hyperparameter2=ii) for ii in range(4)]
    stats_agent1 = AgentManager(
        DummyAgent,
        train_env,
        fit_budget=5,
        eval_kwargs=eval_kwargs,
        init_kwargs=params,
        n_fit=4,
        seed=123,
        init_kwargs_per_instance=params_per_instance,
    )
    stats_agent2 = AgentManager(
        DummyAgent,
        train_env,
        fit_budget=5,
        eval_kwargs=eval_kwargs,
        init_kwargs=params,
        n_fit=4,
        seed=123,
    )
    agent_manager_list = [stats_agent1, stats_agent2]
    for st in agent_manager_list:
        st.fit()

    for ii, instance in enumerate(stats_agent1.agent_handlers):
        assert instance.hyperparameter1 == -1
        assert instance.hyperparameter2 == ii

    for ii, instance in enumerate(stats_agent2.agent_handlers):
        assert instance.hyperparameter1 == -1
        assert instance.hyperparameter2 == 100

    # learning curves
    plot_writer_data(agent_manager_list, tag="episode_rewards", show=False)

    # compare final policies
    evaluate_agents(agent_manager_list, show=False)

    # check if fitted
    for agent_manager in agent_manager_list:
        assert len(agent_manager.agent_handlers) == 4
        for agent in agent_manager.agent_handlers:
            assert agent.fitted

    # test saving/loading
    fname = stats_agent1.save()
    loaded_stats = AgentManager.load(fname)
    assert stats_agent1.unique_id == loaded_stats.unique_id

    # test hyperparameter optimization call
    loaded_stats.optimize_hyperparams(n_trials=5)
    loaded_stats.optimize_hyperparams(n_trials=5, continue_previous=True)

    for st in agent_manager_list:
        st.clear_output_dir()
Exemple #7
0
def test_agent_manager_2():
    # Define train and evaluation envs
    train_env = (GridWorld, {})
    eval_env = (GridWorld, {})

    # Parameters
    params = {}
    eval_kwargs = dict(eval_horizon=10)

    # Run AgentManager
    stats_agent1 = AgentManager(
        DummyAgent,
        train_env,
        eval_env=eval_env,
        fit_budget=5,
        eval_kwargs=eval_kwargs,
        init_kwargs=params,
        n_fit=4,
        seed=123,
    )
    stats_agent2 = AgentManager(
        DummyAgent,
        train_env,
        eval_env=eval_env,
        fit_budget=5,
        eval_kwargs=eval_kwargs,
        init_kwargs=params,
        n_fit=4,
        seed=123,
    )
    agent_manager_list = [stats_agent1, stats_agent2]
    for st in agent_manager_list:
        st.fit()

    # compare final policies
    evaluate_agents(agent_manager_list, show=False)
    evaluate_agents(agent_manager_list, show=False)

    # learning curves
    plot_writer_data(agent_manager_list, tag="episode_rewards", show=False)

    # check if fitted
    for agent_manager in agent_manager_list:
        assert len(agent_manager.agent_handlers) == 4
        for agent in agent_manager.agent_handlers:
            assert agent.fitted

    # test saving/loading
    fname = stats_agent1.save()
    loaded_stats = AgentManager.load(fname)
    assert stats_agent1.unique_id == loaded_stats.unique_id

    # test hyperparemeter optimization
    loaded_stats.optimize_hyperparams(n_trials=5)

    # delete some writers
    stats_agent1.set_writer(1, None)
    stats_agent1.set_writer(2, None)

    stats_agent1.clear_output_dir()
    stats_agent2.clear_output_dir()
Exemple #8
0
ppo_params["k_epochs"] = 4

eval_kwargs = dict(eval_horizon=horizon, n_simulations=20)

ppo_stats = AgentManager(
    PPOAgent,
    env,
    fit_budget=n_episodes,
    eval_kwargs=eval_kwargs,
    init_kwargs=ppo_params,
    n_fit=2,
)
ppo_stats.fit(n_episodes // 2)
plot_writer_data(
    ppo_stats,
    tag="episode_rewards",
    preprocess_func=np.cumsum,
    title="Cumulative Rewards",
    show=False,
)
evaluate_agents([ppo_stats], show=False)
ppo_stats.fit(n_episodes // 4)
plot_writer_data(
    ppo_stats,
    tag="episode_rewards",
    preprocess_func=np.cumsum,
    title="Cumulative Rewards",
    show=False,
)
evaluate_agents([ppo_stats], show=True)