def test_single_agent_tasks_only_accept_one_agent(pendulum_task,
                                                  ppo_config_dict):
    agent = build_PPO_Agent(pendulum_task, ppo_config_dict, 'Test-PPO')
    with pytest.raises(NotImplementedError) as _:
        _ = benchmark_agents_on_tasks(tasks=[pendulum_task],
                                      agents=[agent, agent],
                                      num_episodes=1)
def test_all_tasks_must_be_single_agent_or_multiagent(RPSTask, pendulum_task,
                                                      ppo_config_dict):
    agent = build_PPO_Agent(RPSTask, ppo_config_dict, 'Test-PPO')
    with pytest.raises(ValueError) as _:
        _ = benchmark_agents_on_tasks(tasks=[RPSTask, pendulum_task],
                                      agents=[agent],
                                      num_episodes=1)
def test_if_populate_all_agents_is_set_a_single_agent_must_be_provided(
        RPSTask, ppo_config_dict):
    agent = build_PPO_Agent(RPSTask, ppo_config_dict, 'Test-PPO')
    with pytest.raises(ValueError) as _:
        _ = benchmark_agents_on_tasks(tasks=[RPSTask],
                                      agents=[agent, agent],
                                      num_episodes=1,
                                      populate_all_agents=True)
Esempio n. 4
0
def main():
    task = generate_task("Yaniv-v0", EnvType.MULTIAGENT_SEQUENTIAL_ACTION)
    # random_r1 = build_Random_Agent(task, {}, agent_name="random")

    ppo = build_PPO_Agent(task, hyperparams, "ppo")

    traj = task.run_episode(
        [ppo, ppo],
        training=True,
    )
    print(traj)
Esempio n. 5
0
def test_integration_ppo_rock_paper_scissors(ppo_config_dict, RPSTask):
    population = [
        build_PPO_Agent(RPSTask, ppo_config_dict, 'Test-1'),
        build_PPO_Agent(RPSTask, ppo_config_dict.copy(), 'Test-2')
    ]
    winrate_matrix_metagame = compute_winrate_matrix_metagame(
        population=population,
        episodes_per_matchup=5,
        task=RPSTask,
        num_workers=1)

    # Diagonal winrates are all 0.5
    np.testing.assert_allclose(
        winrate_matrix_metagame.diagonal(),
        np.full(winrate_matrix_metagame.diagonal().shape, 0.5))
    # a_i,j + a_j,i = 1 for all non diagonal entries
    for i, j in zip(*np.triu_indices_from(winrate_matrix_metagame, k=1)):
        complementary_sum = winrate_matrix_metagame[
            i, j] + winrate_matrix_metagame[j, i]
        np.testing.assert_allclose(complementary_sum, 1.)
def test_zero_or_negative_episodes_raises_value_exception(
        RPSTask, ppo_config_dict):
    agent = build_PPO_Agent(RPSTask, ppo_config_dict, 'Test-PPO')
    with pytest.raises(ValueError) as _:
        _ = benchmark_agents_on_tasks(tasks=[RPSTask],
                                      agents=[agent],
                                      num_episodes=-1)
    with pytest.raises(ValueError) as _:
        _ = benchmark_agents_on_tasks(tasks=[RPSTask],
                                      agents=[agent],
                                      num_episodes=0)
def test_if_populate_all_agents_is_not_set_having_fewer_or_more_agents_raises_value_error(
        RPSTask, ppo_config_dict):
    agent = build_PPO_Agent(RPSTask, ppo_config_dict, 'Test-PPO')
    with pytest.raises(ValueError) as _:
        _ = benchmark_agents_on_tasks(tasks=[RPSTask],
                                      agents=[agent],
                                      num_episodes=1,
                                      populate_all_agents=False)
    with pytest.raises(ValueError) as _:
        _ = benchmark_agents_on_tasks(tasks=[RPSTask],
                                      agents=[agent, agent, agent],
                                      num_episodes=1,
                                      populate_all_agents=False)
Esempio n. 8
0
def initialize_agent(task):
    # Config defined in paper
    config = dict()
    config['discount'] = 0.99
    config['use_gae'] = True
    config['use_cuda'] = False
    config['gae_tau'] = 0.95
    config['entropy_weight'] = 0.01
    config['gradient_clip'] = 5
    config['optimization_epochs'] = 10
    config['mini_batch_size'] = 64
    config['ppo_ratio_clip'] = 0.2
    config['learning_rate'] = 3.0e-4
    config['adam_eps'] = 1.0e-5
    config['horizon'] = 2048
    config['phi_arch'] = 'MLP'
    config['actor_arch'] = 'None'
    config['critic_arch'] = 'None'
    config['state_preprocess'] = flatten_and_turn_into_single_element_batch

    agent = build_PPO_Agent(task, config, 'ppo_agent')
    return agent
Esempio n. 9
0
 def partial_match_build_function(agent_name, task, config):
     if agent_name.startswith('tabularqlearning'): return build_TabularQ_Agent(task, config, agent_name)
     if agent_name.startswith('deepqlearning'): return build_DQN_Agent(task, config, agent_name)
     if agent_name.startswith('ppo'): return build_PPO_Agent(task, config, agent_name)
     if agent_name.startswith('expert_iteration'): return build_ExpertIteration_Agent(task, config, agent_name)
     else: raise ValueError('Unkown agent name: {agent_name}'.format(agent_name))