def test_all_tasks_must_be_single_agent_or_multiagent(RPSTask, pendulum_task, ppo_config_dict): agent = build_PPO_Agent(RPSTask, ppo_config_dict, 'Test-PPO') with pytest.raises(ValueError) as _: _ = benchmark_agents_on_tasks(tasks=[RPSTask, pendulum_task], agents=[agent], num_episodes=1)
def test_if_populate_all_agents_is_set_a_single_agent_must_be_provided( RPSTask, ppo_config_dict): agent = build_PPO_Agent(RPSTask, ppo_config_dict, 'Test-PPO') with pytest.raises(ValueError) as _: _ = benchmark_agents_on_tasks(tasks=[RPSTask], agents=[agent, agent], num_episodes=1, populate_all_agents=True)
def test_single_agent_can_populate_all_agents(RPSTask): expected_winrates = [0.5] actual_winrates = benchmark_agents_on_tasks(tasks=[RPSTask], agents=[rockAgent], num_episodes=200, populate_all_agents=True) np.testing.assert_allclose(expected_winrates, actual_winrates, atol=0.1)
def generate_evaluation_matrix(cool_game_params, logger): # 0: SawBot 1: TorchBot 2: NailBot benchmarking_episodes = 1 mcts_budget = 1 saw_vs_torch_task = generate_task('CoolGame-v0', EnvType.MULTIAGENT_SIMULTANEOUS_ACTION, botA_type=0, botB_type=1, **cool_game_params) saw_vs_nail_task = generate_task('CoolGame-v0', EnvType.MULTIAGENT_SIMULTANEOUS_ACTION, botA_type=0, botB_type=2, **cool_game_params) torch_vs_nail_task = generate_task('CoolGame-v0', EnvType.MULTIAGENT_SIMULTANEOUS_ACTION, botA_type=1, botB_type=2, **cool_game_params) mcts_config = {'budget': mcts_budget} mcts_agent = build_MCTS_Agent(saw_vs_torch_task, mcts_config, agent_name='MCTS agent') saw_winrates = benchmark_agents_on_tasks( tasks=[saw_vs_torch_task, saw_vs_nail_task], agents=[mcts_agent], populate_all_agents=True, num_episodes=benchmarking_episodes) nail_winrate = benchmark_agents_on_tasks( tasks=[torch_vs_nail_task], agents=[mcts_agent], populate_all_agents=True, num_episodes=benchmarking_episodes) bench_msg = f'episodes={benchmarking_episodes} MCTS_budget={mcts_budget}' winrates_msg = f'winrates=saw:{saw_winrates} nail:{nail_winrate}' logger.info(bench_msg) logger.info(winrates_msg) logger.info(f'params={cool_game_params}') return np.array([[0., saw_winrates[0], saw_winrates[1]], [-saw_winrates[0], 0., nail_winrate[0]], [-saw_winrates[0], -nail_winrate[0], 0.]])
def test_can_compute_winrate_for_player1_multiagent_task(RPSTask): expected_winrates = [0, 1] vs_paper = deepcopy(RPSTask) vs_scissors = deepcopy(RPSTask) # Ugly, would be awesome to have it in a one line vs_paper.extend_task(agents={1: paperAgent}) vs_scissors.extend_task(agents={1: scissorsAgent}) actual_winrates = benchmark_agents_on_tasks(tasks=[vs_paper, vs_scissors], agents=[rockAgent], num_episodes=10) np.testing.assert_array_equal(expected_winrates, actual_winrates)
def test_can_compute_cumulative_reward_for_agent_single_agent_task( RPSTask2Repetitions): vs_paper = deepcopy(RPSTask2Repetitions) vs_scissors = deepcopy(RPSTask2Repetitions) vs_paper.extend_task(agents={1: paperAgent}) vs_scissors.extend_task(agents={1: scissorsAgent}) expected_cumulative_rewards = [-2., 2] actual_winrates, actual_cumulative_rewards = benchmark_agents_on_tasks( tasks=[vs_paper, vs_scissors], agents=[rockAgent], keep_cumulative_rewards=True, num_episodes=10) np.testing.assert_array_equal(expected_cumulative_rewards, actual_cumulative_rewards)