def main(): task = generate_task('CoolGame-v0', EnvType.MULTIAGENT_SIMULTANEOUS_ACTION, botA_type=1, botB_type=2) random_r1 = build_Random_Agent(task, {}, agent_name='random') random_r2 = deepcopy(random_r1) mcts_config = { 'budget': 10, 'rollout_budget': 1000, 'selection_phase': 'ucb1', 'exploration_factor_ucb1': 4 # Might need to tweak this? } mcts_r1 = build_MCTS_Agent(task, mcts_config, agent_name='P1: MCTS') mcts_r2 = build_MCTS_Agent(task, mcts_config, agent_name='P2: MCTS') human_r1 = HumanAgent(task.action_dim, name='P1') human_r2 = HumanAgent(task.action_dim, name='P2') # t = task.run_episode([mcts_r1, mcts_r2], training=False, render_mode='rgb', save_gif=True) t = task.run_episode([mcts_r1, mcts_r2], training=False) print(t)
def test_train_apprentice_using_dagger_against_random_connect4(Connect4Task, expert_iteration_config_dict, mcts_config_dict): # Train worthy params expert_iteration_config_dict['use_apprentice_in_expert'] = False expert_iteration_config_dict['games_per_iteration'] = 10 expert_iteration_config_dict['mcts_budget'] = 500 expert_iteration_config_dict['mcts_rollout_budget'] = 100 expert_iteration_config_dict['initial_memory_size'] = 10000 expert_iteration_config_dict['memory_size_increase_frequency'] = 5 expert_iteration_config_dict['end_memory_size'] = 30000 expert_iteration_config_dict['use_dirichlet'] = False expert_iteration_config_dict['learning_rate'] = 1.0e-2 expert_iteration_config_dict['batch_size'] = 256 expert_iteration_config_dict['num_epochs_per_iteration'] = 4 expert_iteration_config_dict['residual_connections'] = [(1, 2), (2, 3), (3, 4)] ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt-test') ex_it.algorithm.summary_writer = SummaryWriter('expert_iteration_test') random_agent = build_Random_Agent(Connect4Task, mcts_config_dict, agent_name=f"Random") parallel_learn_against_fix_opponent(ex_it, fixed_opponent=random_agent, agent_position=0, task=Connect4Task, training_episodes=5000, test_episodes=100, benchmarking_episodes=20, benchmark_every_n_episodes=500, reward_tolerance=0.2, maximum_average_reward=1.0, evaluation_method='last', show_progress=True, summary_writer=summary_writer)
def test_can_collect_one_hot_encoded_opponent_action_multi_env(Connect4Task, expert_iteration_config_dict): expert_iteration_config_dict['use_agent_modelling'] = True expert_iteration_config_dict['request_observed_action'] = True ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt-opponent_modelling-test') assert ex_it.requires_opponents_prediction random_agent = build_Random_Agent(Connect4Task, {}, agent_name='Random') _ = Connect4Task.run_episodes( agent_vector=[ex_it, random_agent], training=True, # Required for ExIt agent to `handle_experience`s num_envs=2, num_episodes=2) # We only check for existance of the key, rather than it's content assert 'opponent_policy' in ex_it.algorithm.memory.keys assert 'opponent_s' in ex_it.algorithm.memory.keys # ex_it.algorithm.memory. Once you fix it. push! assert len(ex_it.algorithm.memory.opponent_policy) == len(ex_it.algorithm.memory.s) assert len(ex_it.algorithm.memory.opponent_policy) == len(ex_it.algorithm.memory.opponent_s) for opponent_action in ex_it.algorithm.memory.opponent_policy: # There is a single 1, all other elements are 0 if torch.any(torch.isnan(opponent_action)): continue else: values, counts = opponent_action.unique(return_counts=True) assert torch.equal(torch.Tensor([0, 1]), values.float()) assert torch.equal(torch.Tensor([Connect4Task.action_dim - 1, 1]), counts.float())
def test_random_agent_can_act_on_single_agent_env(CartPoleTask): action_space = CartPoleTask.env.action_space agent = build_Random_Agent(CartPoleTask, {}, 'RandomTest') trajectory = CartPoleTask.run_episode([agent], training=False) assert all( map(lambda a: action_space.contains(a), extract_actions_from_trajectory(trajectory)))
def test_integration_random_agent_rock_paper_scissors(RPSTask): population = [ build_Random_Agent(RPSTask, {}, 'Test-1'), build_Random_Agent(RPSTask, {}, 'Test-2') ] winrate_matrix_metagame = compute_winrate_matrix_metagame( population=population, episodes_per_matchup=5, task=RPSTask, num_envs=1) # Diagonal winrates are all 0.5 np.testing.assert_allclose( winrate_matrix_metagame.diagonal(), np.full(winrate_matrix_metagame.diagonal().shape, 0.5)) # a_i,j + a_j,i = 1 for all non diagonal entries for i, j in zip(*np.triu_indices_from(winrate_matrix_metagame, k=1)): complementary_sum = winrate_matrix_metagame[ i, j] + winrate_matrix_metagame[j, i] np.testing.assert_allclose(complementary_sum, 1.)
def test_can_defeat_random_play_in_connect4_both_positions_single_env(Connect4Task, expert_iteration_config_dict): expert_iteration_config_dict['mcts_budget'] = 100 expert_iteration_config_dict['mcts_rollout_budget'] = 20 ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='MCTS1-test') random_agent = build_Random_Agent(Connect4Task, {}, agent_name='Random') trajectory = Connect4Task.run_episode([ex_it, random_agent], training=False) assert trajectory.winner == 0 # First player (index 0) has a much higher budget trajectory = Connect4Task.run_episode([random_agent, ex_it], training=False) assert trajectory.winner == 1 # Second player (index 1) has a much higher budget
def test_can_defeat_random_play_in_connect4_both_positions_multi_env(Connect4Task, expert_iteration_config_dict): expert_iteration_config_dict['mcts_budget'] = 100 expert_iteration_config_dict['mcts_rollout_budget'] = 20 ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='MCTS1-test') random_agent = build_Random_Agent(Connect4Task, {}, agent_name='Random') trajectories = Connect4Task.run_episodes( [ex_it, random_agent], training=False, num_envs=4, num_episodes=4) assert all(map(lambda t: t.winner == 0, trajectories)) # First player (index 0) has a much higher budget trajectories = Connect4Task.run_episodes( [random_agent, ex_it], training=False, num_envs=4, num_episodes=4) assert all(map(lambda t: t.winner == 1, trajectories)) # Second player (index 1) has a much higher budget
def test_can_use_data_augmentation_to_double_experiences(Connect4Task, expert_iteration_config_dict): expert_iteration_config_dict['state_preprocessing_fn'] = 'turn_into_single_element_batch' expert_iteration_config_dict['data_augmnentation_fn'] = { 'name': 'generate_horizontal_symmetry', 'flip_obs_on_dim': 1 } ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt1-test') random_agent = build_Random_Agent(Connect4Task, {}, agent_name='Random') trajectories = Connect4Task.run_episodes(agent_vector=[ex_it, random_agent], num_envs=2, num_episodes=1, training=True) import ipdb; ipdb.set_trace() # Add data augmentation as part of expert_iteration_config_dict # Ran episode against random opponent # Check that number of datapoints in storage is twice the number of datapoints elsewhere? # Check that there is a single "done" flag in the storage (i.e finished episodes is only 1 in agent) pass
def test_can_collect_opponent_action_distributions_multi_env(Connect4Task, expert_iteration_config_dict): expert_iteration_config_dict['use_agent_modelling'] = True ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name='ExIt-opponent_modelling-test') assert ex_it.requires_opponents_prediction random_agent = build_Random_Agent(Connect4Task, {}, agent_name='Random') _ = Connect4Task.run_episodes( agent_vector=[ex_it, random_agent], training=True, # Required for ExIt agent to `handle_experience`s num_envs=2, num_episodes=2) # We only check for existance of the key, rather than it's content assert 'opponent_policy' in ex_it.algorithm.memory.keys assert 'opponent_s' in ex_it.algorithm.memory.keys # ex_it.algorithm.memory. Once you fix it. push! assert len(ex_it.algorithm.memory.opponent_policy) == len(ex_it.algorithm.memory.s) assert len(ex_it.algorithm.memory.opponent_policy) == len(ex_it.algorithm.memory.opponent_s)
def test_singleagent_tasks_run_faster_on_parallel(env_name): task = generate_task(env_name, EnvType.SINGLE_AGENT) random_agent = build_Random_Agent(task, {}, 'Test-Random') num_episodes = 50 num_envs = 1 start = time.time() trajectories = task.run_episodes([random_agent], num_episodes=num_episodes, num_envs=num_envs, training=False) total_single = time.time() - start start = time.time() num_envs = multiprocessing.cpu_count() trajectories = task.run_episodes([random_agent], num_episodes=num_episodes, num_envs=num_envs, training=False) total_multiple = time.time() - start assert total_multiple < total_single
def test_multiagent_sequential_tasks_run_faster_on_parallel(env_name): task = generate_task(env_name, EnvType.MULTIAGENT_SEQUENTIAL_ACTION) random_agent = build_Random_Agent(task, {}, 'Test-Random') start = time.time() num_episodes = 100 num_envs = 1 _ = task.run_episodes([random_agent, random_agent], num_episodes=num_episodes, num_envs=num_envs, training=False) total_single = time.time() - start start = time.time() num_envs = multiprocessing.cpu_count() _ = task.run_episodes([random_agent, random_agent], num_episodes=num_episodes, num_envs=num_envs, training=False) total_multiple = time.time() - start print('Parallel: ', total_multiple, 'Sequential: ', total_single, 'Diff: ', total_single - total_multiple) assert total_multiple < total_single
def run_parallel_task_with_random_agent(env_name, env_type, num_envs, num_episodes, model_based_agents=False): task = generate_task(env_name, env_type) # Random agens, either MCTS or random if model_based_agents: mcts_config = { 'budget': 1, 'rollout_budget': 0, 'use_dirichlet': False, 'dirichlet_alpha': 1, 'selection_phase': 'ucb1', 'exploration_factor_ucb1': 1, 'expose_tree_in_predictions': True } agent_vector = [ build_MCTS_Agent(task, mcts_config, 'Test-MCTS-Random') for _ in range(task.num_agents) ] else: agent_vector = [ build_Random_Agent(task, {}, 'Test-Random') for _ in range(task.num_agents) ] # The number of environments is larger than number of # episodes because we want to test if we can generate # a specific number of trajectories regardless of the # Number of environments used to generate them trajectories = task.run_episodes(agent_vector, num_episodes=num_episodes, num_envs=num_envs, training=True, store_extra_information=True) import pdbr pdbr.set_trace() # We have the exact number of trajectories we asked for # The number of trajectories is lower-bounded by :param: num_episodes # But it is possible that multiple environments finish at the same time assert (len(trajectories) >= num_episodes) and (len(trajectories) <= (num_episodes + num_envs)) # All trajectories finish with a "done" flag assert all([t[-1].done for t in trajectories]) # All timesteps except for last one in all trajectories don't have "done" set for t in trajectories: assert all([not timestep.done for timestep in t[:-1]]) # ASSUMPTION: observation and succ_observation are numpy array if env_type == EnvType.SINGLE_AGENT: # Observation and succ_observation are the same # ASSUMPTION: observation and succ_observation are numpy array assert all([(ex_1.succ_observation == ex_2.observation).all() for t in trajectories for ex_1, ex_2 in zip(t, t[1:])]) else: # Observation and succ_observation are the same for all agents assert all([ (ex_1.succ_observation[a_i] == ex_2.observation[a_i]).all() for t in trajectories for ex_1, ex_2 in zip(t, t[1:]) for a_i in range(task.num_agents) ])