def test_mcts_can_take_actions_discrete_obvservation_discrete_action( Connect4Task, mcts_config_dict): mcts1 = build_MCTS_Agent(Connect4Task, mcts_config_dict, agent_name='MCTS1-test') mcts2 = build_MCTS_Agent(Connect4Task, mcts_config_dict, agent_name='MCTS2-test') Connect4Task.run_episode([mcts1, mcts2], training=False)
def test_can_defeat_random_play_in_connect4_both_positions_single_env(Connect4Task, expert_iteration_config_dict): expert_iteration_config_dict['mcts_budget'] = 100 expert_iteration_config_dict['mcts_rollout_budget'] = 20 ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='MCTS1-test') random_agent = build_Random_Agent(Connect4Task, {}, agent_name='Random') trajectory = Connect4Task.run_episode([ex_it, random_agent], training=False) assert trajectory.winner == 0 # First player (index 0) has a much higher budget trajectory = Connect4Task.run_episode([random_agent, ex_it], training=False) assert trajectory.winner == 1 # Second player (index 1) has a much higher budget
def test_can_defeat_random_play_in_connect4_both_positions_multi_env(Connect4Task, expert_iteration_config_dict): expert_iteration_config_dict['mcts_budget'] = 100 expert_iteration_config_dict['mcts_rollout_budget'] = 20 ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='MCTS1-test') random_agent = build_Random_Agent(Connect4Task, {}, agent_name='Random') trajectories = Connect4Task.run_episodes( [ex_it, random_agent], training=False, num_envs=4, num_episodes=4) assert all(map(lambda t: t.winner == 0, trajectories)) # First player (index 0) has a much higher budget trajectories = Connect4Task.run_episodes( [random_agent, ex_it], training=False, num_envs=4, num_episodes=4) assert all(map(lambda t: t.winner == 1, trajectories)) # Second player (index 1) has a much higher budget
def test_can_collect_one_hot_encoded_opponent_action_multi_env(Connect4Task, expert_iteration_config_dict): expert_iteration_config_dict['use_agent_modelling'] = True expert_iteration_config_dict['request_observed_action'] = True ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt-opponent_modelling-test') assert ex_it.requires_opponents_prediction random_agent = build_Random_Agent(Connect4Task, {}, agent_name='Random') _ = Connect4Task.run_episodes( agent_vector=[ex_it, random_agent], training=True, # Required for ExIt agent to `handle_experience`s num_envs=2, num_episodes=2) # We only check for existance of the key, rather than it's content assert 'opponent_policy' in ex_it.algorithm.memory.keys assert 'opponent_s' in ex_it.algorithm.memory.keys # ex_it.algorithm.memory. Once you fix it. push! assert len(ex_it.algorithm.memory.opponent_policy) == len(ex_it.algorithm.memory.s) assert len(ex_it.algorithm.memory.opponent_policy) == len(ex_it.algorithm.memory.opponent_s) for opponent_action in ex_it.algorithm.memory.opponent_policy: # There is a single 1, all other elements are 0 if torch.any(torch.isnan(opponent_action)): continue else: values, counts = opponent_action.unique(return_counts=True) assert torch.equal(torch.Tensor([0, 1]), values.float()) assert torch.equal(torch.Tensor([Connect4Task.action_dim - 1, 1]), counts.float())
def test_can_defeat_random_play_in_connect4_both_positions( Connect4Task, mcts_config_dict): mcts1 = build_MCTS_Agent(Connect4Task, mcts_config_dict, agent_name='MCTS1-test') mcts_config_dict['budget'] = 50 mcts2 = build_MCTS_Agent(Connect4Task, mcts_config_dict, agent_name='MCTS2-test') trajectory = Connect4Task.run_episode([mcts1, mcts2], training=False) assert extract_winner( trajectory) == 1 # Second player (index 1) has a much higher budget trajectory = Connect4Task.run_episode([mcts2, mcts1], training=False) assert extract_winner( trajectory) == 0 # First player (index 0) has a much higher budget
def test_deterministic_agent_can_act_on_multiagent_sequential_environment( Connect4Task): expected_actions = [0, 1] agent_1 = build_Deterministic_Agent(Connect4Task, {'action': expected_actions[0]}, 'DeterministicTest-1') agent_2 = build_Deterministic_Agent(Connect4Task, {'action': expected_actions[1]}, 'DeterministicTest-2') trajectory = Connect4Task.run_episode([agent_1, agent_2], training=False) for i, (s, a, r, succ_s, o) in enumerate(trajectory): assert a == expected_actions[i % 2]
def test_can_use_data_augmentation_to_double_experiences(Connect4Task, expert_iteration_config_dict): expert_iteration_config_dict['state_preprocessing_fn'] = 'turn_into_single_element_batch' expert_iteration_config_dict['data_augmnentation_fn'] = { 'name': 'generate_horizontal_symmetry', 'flip_obs_on_dim': 1 } ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt1-test') random_agent = build_Random_Agent(Connect4Task, {}, agent_name='Random') trajectories = Connect4Task.run_episodes(agent_vector=[ex_it, random_agent], num_envs=2, num_episodes=1, training=True) import ipdb; ipdb.set_trace() # Add data augmentation as part of expert_iteration_config_dict # Ran episode against random opponent # Check that number of datapoints in storage is twice the number of datapoints elsewhere? # Check that there is a single "done" flag in the storage (i.e finished episodes is only 1 in agent) pass
def test_sequential_trajectories_feature_agent_predictions_single_env( Connect4Task): agent_1 = build_Deterministic_Agent(Connect4Task, {'action': 0}, 'Col-0-DeterministicAgent') agent_1.requires_opponents_prediction = True # Required! agent_2 = build_Deterministic_Agent(Connect4Task, {'action': 1}, 'Col-0-DeterministicAgent') trajectory = Connect4Task.run_episode([agent_1, agent_2], training=False) expected_prediction_1 = {'a': 0, 'probs': [[1., 0., 0., 0., 0., 0., 0.]]} expected_prediction_2 = {'a': 1, 'probs': [[0., 1., 0., 0., 0., 0., 0.]]} expected_predictions = [expected_prediction_1, expected_prediction_2] compare_trajectory_extra_info_against_expected(trajectory, expected_predictions)
def test_deterministic_agent_can_act_on_async_single_agent(Connect4Task): expected_actions = [0, 1] agent_1 = build_Deterministic_Agent(Connect4Task, {'action': expected_actions[0]}, 'DeterministicTest-1') agent_2 = build_Deterministic_Agent(Connect4Task, {'action': expected_actions[1]}, 'DeterministicTest-2') trajectories = Connect4Task.run_episodes([agent_1, agent_2], training=False, num_envs=2, num_episodes=2) for trajectory in trajectories: for i, (s, a, r, succ_s, o) in enumerate(trajectory): assert a == expected_actions[i % 2]
def test_can_collect_opponent_action_distributions_multi_env(Connect4Task, expert_iteration_config_dict): expert_iteration_config_dict['use_agent_modelling'] = True ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt-opponent_modelling-test') assert ex_it.requires_opponents_prediction random_agent = build_Random_Agent(Connect4Task, {}, agent_name='Random') _ = Connect4Task.run_episodes( agent_vector=[ex_it, random_agent], training=True, # Required for ExIt agent to `handle_experience`s num_envs=2, num_episodes=2) # We only check for existance of the key, rather than it's content assert 'opponent_policy' in ex_it.algorithm.memory.keys assert 'opponent_s' in ex_it.algorithm.memory.keys # ex_it.algorithm.memory. Once you fix it. push! assert len(ex_it.algorithm.memory.opponent_policy) == len(ex_it.algorithm.memory.s) assert len(ex_it.algorithm.memory.opponent_policy) == len(ex_it.algorithm.memory.opponent_s)
def test_sequential_trajectories_feature_agent_predictions_multienv( Connect4Task): agent_1 = build_Deterministic_Agent(Connect4Task, {'action': 0}, 'Col-0-DeterministicAgent') agent_1.requires_opponents_prediction = True # Required! agent_2 = build_Deterministic_Agent(Connect4Task, {'action': 1}, 'Col-0-DeterministicAgent') trajectories = Connect4Task.run_episodes([agent_1, agent_2], training=False, num_envs=2, num_episodes=2) # on single agents there's a batch dimension in 'probs', but not # on multiagent_loops. Does this matter? expected_prediction_1 = {'a': 0, 'probs': [1., 0., 0., 0., 0., 0., 0.]} expected_prediction_2 = {'a': 1, 'probs': [0., 1., 0., 0., 0., 0., 0.]} expected_predictions = [expected_prediction_1, expected_prediction_2] for trajectory in trajectories: compare_trajectory_extra_info_against_expected(trajectory, expected_predictions)
def test_can_use_apprentice_in_expert_in_expansion_and_rollout_phase(Connect4Task, expert_iteration_config_dict): expert_iteration_config_dict['use_apprentice_in_expert'] = True expert_iteration_config_dict['rollout_budget'] = 0 exIt_agent_1 = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt1-test') exIt_agent_2 = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt2-test') Connect4Task.run_episode([exIt_agent_1, exIt_agent_2], training=False)
def test_expert_iteration_can_take_actions_discrete_obvservation_discrete_action(Connect4Task, expert_iteration_config_dict): exIt_agent_1 = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt1-test') exIt_agent_2 = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt2-test') Connect4Task.run_episode([exIt_agent_1, exIt_agent_2], training=False)