def test_mcts_can_take_actions_discrete_obvservation_discrete_action( Connect4Task, mcts_config_dict): mcts1 = build_MCTS_Agent(Connect4Task, mcts_config_dict, agent_name='MCTS1-test') mcts2 = build_MCTS_Agent(Connect4Task, mcts_config_dict, agent_name='MCTS2-test') Connect4Task.run_episode([mcts1, mcts2], training=False)
def test_can_defeat_random_play_in_connect4_both_positions_single_env(Connect4Task, expert_iteration_config_dict): expert_iteration_config_dict['mcts_budget'] = 100 expert_iteration_config_dict['mcts_rollout_budget'] = 20 ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='MCTS1-test') random_agent = build_Random_Agent(Connect4Task, {}, agent_name='Random') trajectory = Connect4Task.run_episode([ex_it, random_agent], training=False) assert trajectory.winner == 0 # First player (index 0) has a much higher budget trajectory = Connect4Task.run_episode([random_agent, ex_it], training=False) assert trajectory.winner == 1 # Second player (index 1) has a much higher budget
def test_can_defeat_random_play_in_connect4_both_positions( Connect4Task, mcts_config_dict): mcts1 = build_MCTS_Agent(Connect4Task, mcts_config_dict, agent_name='MCTS1-test') mcts_config_dict['budget'] = 50 mcts2 = build_MCTS_Agent(Connect4Task, mcts_config_dict, agent_name='MCTS2-test') trajectory = Connect4Task.run_episode([mcts1, mcts2], training=False) assert extract_winner( trajectory) == 1 # Second player (index 1) has a much higher budget trajectory = Connect4Task.run_episode([mcts2, mcts1], training=False) assert extract_winner( trajectory) == 0 # First player (index 0) has a much higher budget
def test_deterministic_agent_can_act_on_multiagent_sequential_environment( Connect4Task): expected_actions = [0, 1] agent_1 = build_Deterministic_Agent(Connect4Task, {'action': expected_actions[0]}, 'DeterministicTest-1') agent_2 = build_Deterministic_Agent(Connect4Task, {'action': expected_actions[1]}, 'DeterministicTest-2') trajectory = Connect4Task.run_episode([agent_1, agent_2], training=False) for i, (s, a, r, succ_s, o) in enumerate(trajectory): assert a == expected_actions[i % 2]
def test_sequential_trajectories_feature_agent_predictions_single_env( Connect4Task): agent_1 = build_Deterministic_Agent(Connect4Task, {'action': 0}, 'Col-0-DeterministicAgent') agent_1.requires_opponents_prediction = True # Required! agent_2 = build_Deterministic_Agent(Connect4Task, {'action': 1}, 'Col-0-DeterministicAgent') trajectory = Connect4Task.run_episode([agent_1, agent_2], training=False) expected_prediction_1 = {'a': 0, 'probs': [[1., 0., 0., 0., 0., 0., 0.]]} expected_prediction_2 = {'a': 1, 'probs': [[0., 1., 0., 0., 0., 0., 0.]]} expected_predictions = [expected_prediction_1, expected_prediction_2] compare_trajectory_extra_info_against_expected(trajectory, expected_predictions)
def test_can_use_apprentice_in_expert_in_expansion_and_rollout_phase(Connect4Task, expert_iteration_config_dict): expert_iteration_config_dict['use_apprentice_in_expert'] = True expert_iteration_config_dict['rollout_budget'] = 0 exIt_agent_1 = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt1-test') exIt_agent_2 = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt2-test') Connect4Task.run_episode([exIt_agent_1, exIt_agent_2], training=False)
def test_expert_iteration_can_take_actions_discrete_obvservation_discrete_action(Connect4Task, expert_iteration_config_dict): exIt_agent_1 = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt1-test') exIt_agent_2 = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt2-test') Connect4Task.run_episode([exIt_agent_1, exIt_agent_2], training=False)