def main():
    task = generate_task('CoolGame-v0',
                         EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                         botA_type=1,
                         botB_type=2)

    random_r1 = build_Random_Agent(task, {}, agent_name='random')
    random_r2 = deepcopy(random_r1)

    mcts_config = {
        'budget': 10,
        'rollout_budget': 1000,
        'selection_phase': 'ucb1',
        'exploration_factor_ucb1': 4  # Might need to tweak this?
    }

    mcts_r1 = build_MCTS_Agent(task, mcts_config, agent_name='P1: MCTS')
    mcts_r2 = build_MCTS_Agent(task, mcts_config, agent_name='P2: MCTS')

    human_r1 = HumanAgent(task.action_dim, name='P1')
    human_r2 = HumanAgent(task.action_dim, name='P2')

    # t = task.run_episode([mcts_r1, mcts_r2], training=False, render_mode='rgb', save_gif=True)
    t = task.run_episode([mcts_r1, mcts_r2], training=False)
    print(t)
def generate_evaluation_matrix(cool_game_params, benchmarking_episodes,
                               mcts_budget):
    # 0: SawBot 1: TorchBot 2: NailBot
    import gym_cool_game
    saw_vs_torch_task = generate_task('CoolGame-v0',
                                      EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                                      botA_type=0,
                                      botB_type=1,
                                      **cool_game_params)
    saw_vs_nail_task = generate_task('CoolGame-v0',
                                     EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                                     botA_type=0,
                                     botB_type=2,
                                     **cool_game_params)
    torch_vs_nail_task = generate_task('CoolGame-v0',
                                       EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                                       botA_type=1,
                                       botB_type=2,
                                       **cool_game_params)

    mcts_config = {
        'budget': mcts_budget,
        'rollout_budget': 1000,
        'selection_phase': 'ucb1',
        'exploration_factor_ucb1': 4  # Might need to tweak this?
    }
    mcts_agent = build_MCTS_Agent(saw_vs_torch_task,
                                  mcts_config,
                                  agent_name='MCTS agent')

    saw_vs_torch = compute_matchup_winrates(mcts_agent, saw_vs_torch_task,
                                            'Saw vs Torch',
                                            benchmarking_episodes, mcts_budget)

    saw_vs_nail = compute_matchup_winrates(mcts_agent, saw_vs_nail_task,
                                           'Saw vs Nail',
                                           benchmarking_episodes, mcts_budget)

    torch_vs_nail = compute_matchup_winrates(mcts_agent, torch_vs_nail_task,
                                             'Torch vs Nail',
                                             benchmarking_episodes,
                                             mcts_budget)

    bench_msg = f'episodes={benchmarking_episodes} MCTS_budget={mcts_budget}'
    winrates_msg = f'winrates=saw:[{saw_vs_torch}, {saw_vs_nail}] nail:[{torch_vs_nail}]'
    logger.info(bench_msg)
    logger.info(winrates_msg)
    logger.info(f'params={cool_game_params}')
    wandb.log({
        'Winrate_Saw_vs_Torch': saw_vs_torch,
        'Winrate_Saw_vs_Nail': saw_vs_nail,
        'Winrate_Torch_vs_Nail': torch_vs_nail
    })
    return np.array([[0., saw_vs_torch, saw_vs_nail],
                     [1. - saw_vs_torch, 0., torch_vs_nail],
                     [1. - saw_vs_nail, 1. - torch_vs_nail, 0.]])
def generate_evaluation_matrix(cool_game_params, benchmarking_episodes,
                               mcts_budget, logger: logging.Logger):
    # 0: SawBot 1: TorchBot 2: NailBot
    import gym_cool_game
    saw_vs_torch_task = generate_task('CoolGame-v0',
                                      EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                                      botA_type=0,
                                      botB_type=1,
                                      **cool_game_params)
    saw_vs_nail_task = generate_task('CoolGame-v0',
                                     EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                                     botA_type=0,
                                     botB_type=2,
                                     **cool_game_params)
    torch_vs_nail_task = generate_task('CoolGame-v0',
                                       EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                                       botA_type=1,
                                       botB_type=2,
                                       **cool_game_params)

    mcts_config = {'budget': mcts_budget, 'rollout_budget': 10}
    mcts_agent = build_MCTS_Agent(saw_vs_torch_task,
                                  mcts_config,
                                  agent_name='MCTS agent')

    saw_vs_torch = compute_matchup_winrates(mcts_agent, saw_vs_torch_task,
                                            'Saw vs Torch',
                                            benchmarking_episodes, mcts_budget,
                                            logger)

    saw_vs_nail = compute_matchup_winrates(mcts_agent, saw_vs_nail_task,
                                           'Saw vs Nail',
                                           benchmarking_episodes, mcts_budget,
                                           logger)

    torch_vs_nail = compute_matchup_winrates(mcts_agent, torch_vs_nail_task,
                                             'Torch vs Nail',
                                             benchmarking_episodes,
                                             mcts_budget, logger)

    bench_msg = f'episodes={benchmarking_episodes} MCTS_budget={mcts_budget}'
    winrates_msg = f'winrates=saw:[{saw_vs_torch}, {saw_vs_nail}] nail:[{torch_vs_nail}]'
    logger.info(bench_msg)
    logger.info(winrates_msg)
    logger.info(f'params={cool_game_params}')
    return np.array([[0., saw_vs_torch, saw_vs_nail],
                     [1. - saw_vs_torch, 0., torch_vs_nail],
                     [1. - saw_vs_nail, 1. - torch_vs_nail, 0.]])
Esempio n. 4
0
def generate_evaluation_matrix(cool_game_params, logger):
    # 0: SawBot 1: TorchBot 2: NailBot
    benchmarking_episodes = 1
    mcts_budget = 1

    saw_vs_torch_task = generate_task('CoolGame-v0',
                                      EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                                      botA_type=0,
                                      botB_type=1,
                                      **cool_game_params)
    saw_vs_nail_task = generate_task('CoolGame-v0',
                                     EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                                     botA_type=0,
                                     botB_type=2,
                                     **cool_game_params)
    torch_vs_nail_task = generate_task('CoolGame-v0',
                                       EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                                       botA_type=1,
                                       botB_type=2,
                                       **cool_game_params)

    mcts_config = {'budget': mcts_budget}
    mcts_agent = build_MCTS_Agent(saw_vs_torch_task,
                                  mcts_config,
                                  agent_name='MCTS agent')

    saw_winrates = benchmark_agents_on_tasks(
        tasks=[saw_vs_torch_task, saw_vs_nail_task],
        agents=[mcts_agent],
        populate_all_agents=True,
        num_episodes=benchmarking_episodes)
    nail_winrate = benchmark_agents_on_tasks(
        tasks=[torch_vs_nail_task],
        agents=[mcts_agent],
        populate_all_agents=True,
        num_episodes=benchmarking_episodes)

    bench_msg = f'episodes={benchmarking_episodes} MCTS_budget={mcts_budget}'
    winrates_msg = f'winrates=saw:{saw_winrates} nail:{nail_winrate}'
    logger.info(bench_msg)
    logger.info(winrates_msg)
    logger.info(f'params={cool_game_params}')
    return np.array([[0., saw_winrates[0], saw_winrates[1]],
                     [-saw_winrates[0], 0., nail_winrate[0]],
                     [-saw_winrates[0], -nail_winrate[0], 0.]])
Esempio n. 5
0
def test_multiagent_sequential_tasks_with_model_based_agents_run_faster_on_parallel(
        env_name):
    task = generate_task(env_name, EnvType.MULTIAGENT_SEQUENTIAL_ACTION)
    mcts_config = {
        'budget': 10,
        'rollout_budget': 100,
        'use_dirichlet': False,
        'dirichlet_alpha': 1,
        'selection_phase': 'ucb1',
        'exploration_factor_ucb1': 1
    }
    agent_vector = [
        build_MCTS_Agent(task, mcts_config, 'Test-MCTS-Random')
        for _ in range(task.num_agents)
    ]

    start = time.time()
    num_episodes = 10
    num_envs = 1

    _ = task.run_episodes(agent_vector,
                          num_episodes=num_episodes,
                          num_envs=num_envs,
                          training=False)
    total_single = time.time() - start
    print('Sequential: ', total_single)

    start = time.time()
    num_envs = multiprocessing.cpu_count()
    _ = task.run_episodes(agent_vector,
                          num_episodes=num_episodes,
                          num_envs=num_envs,
                          training=False)
    total_multiple = time.time() - start
    print('Parallel: ', total_multiple, 'Sequential: ', total_single, 'Diff: ',
          total_single - total_multiple)
    assert total_multiple < total_single
Esempio n. 6
0
def estimate_agent_strength(agent: regym.rl_algorithms.agents.Agent,
                            task: regym.environments.Task,
                            desired_winrate: float,
                            initial_mcts_config: Dict,
                            benchmarking_episodes: int = 200) -> int:
    '''
    Computes MCTS budget required to reach a :param: desired winrate against :param: agent
    '''
    config = initial_mcts_config.copy()
    for budget in range(initial_mcts_config['budget'], 2000, 1):
        logger.info(f'Starting benchmarking with BUDGET: {budget}')
        config['budget'] = budget

        mcts_agent = build_MCTS_Agent(task, config, f'MCTS-{budget}')
        traj_1 = task.run_episodes([agent, mcts_agent],
                                   num_episodes=(benchmarking_episodes / 2),
                                   num_envs=-1,
                                   training=False)
        logger.info('Half way through')
        traj_2 = task.run_episodes([mcts_agent, agent],
                                   num_episodes=(benchmarking_episodes / 2),
                                   num_envs=-1,
                                   training=False)
        traj_1_winners = [extract_winner(t)
                          for t in traj_1]  # Our agent is pos: 0
        traj_2_winners = [extract_winner(t)
                          for t in traj_2]  # Our agent is pos: 1
        pos_0_winrate = traj_1_winners.count(0) / len(traj_1)
        pos_1_winrate = traj_2_winners.count(1) / len(traj_2)
        avg_winrate = (pos_0_winrate + pos_1_winrate) / 2
        logger.info(
            f'WINRATES: Total = {avg_winrate}\tPos 0 = {pos_0_winrate}\t Pos 1 = {pos_1_winrate}'
        )
        logger.info('')

        if avg_winrate < desired_winrate:
            return avg_winrate
Esempio n. 7
0
def test_train_vanilla_exit_against_random_connect4(Connect4Task, expert_iteration_config_dict, mcts_config_dict):
    # Train worthy params
    expert_iteration_config_dict['use_apprentice_in_expert'] = True
    expert_iteration_config_dict['games_per_iteration'] = 100

    expert_iteration_config_dict['mcts_budget'] = 200
    expert_iteration_config_dict['mcts_rollout_budget'] = 10000
    expert_iteration_config_dict['initial_memory_size'] = 6000
    expert_iteration_config_dict['memory_size_increase_frequency'] = 2
    expert_iteration_config_dict['end_memory_size'] = 30000
    expert_iteration_config_dict['dirichlet_alpha'] = 1

    expert_iteration_config_dict['batch_size'] = 256
    expert_iteration_config_dict['num_epochs_per_iteration'] = 4
    # expert_iteration_config_dict['residual_connections'] = [(2, 3), (3, 4)]

    ex_it = build_ExpertIteration_Agent(Connect4Task, expert_iteration_config_dict, agent_name=f"ExIt-test:{expert_iteration_config_dict['mcts_budget']}")
    ex_it.algorithm.summary_writer = SummaryWriter('expert_iteration_test')

    mcts_config_dict['budget'] = 1
    mcts_agent = build_MCTS_Agent(Connect4Task, mcts_config_dict, agent_name=f"MCTS:{mcts_config_dict['budget']}")

    parallel_learn_against_fix_opponent(ex_it,
            fixed_opponent=mcts_agent,
            agent_position=0,
            task=Connect4Task,
            training_episodes=5000,
            test_episodes=100,
            benchmarking_episodes=20,
            benchmark_every_n_episodes=500,
            reward_tolerance=0.2,
            maximum_average_reward=1.0,
            evaluation_method='last',
            show_progress=True,
            num_envs=torch.multiprocessing.cpu_count(),
            summary_writer=summary_writer)
def estimate_agent_strength(
        agent: regym.rl_algorithms.agents.Agent,
        task: regym.environments.Task,
        desired_winrate: float,
        initial_mcts_config: Dict,
        logger,
        benchmarking_episodes: int = 200) -> Tuple[int, pd.DataFrame]:
    '''
    Computes MCTS budget for an MCTS agent (with :param: initial_mcts_config)
    required to reach a :param: desired_winrate against :param: agent in
    :param: task.

    TODO: mention that we are talking about a non-symmetrical game

    :param agent: TODO
    :param task: TODO
    :param desired_winrate: TODO
    :param initial_mcts_config: TODO
    :param logger: TODO
    :param benchmarking_episodes: TODO
    :returns pd.DataFrame containing logs about winrates observed during
             strength estimation
    '''
    df = pd.DataFrame(columns=('test_agent_id', 'mcts_budget', 'winrate_pos_0',
                               'winrate_pos_1', 'avg_winrate'))

    config = initial_mcts_config.copy()
    for budget in range(initial_mcts_config['budget'], 2000, 1):
        logger.info(f'Starting benchmarking with BUDGET: {budget}')

        config['budget'] = budget
        mcts_agent = build_MCTS_Agent(task, config, f'MCTS-{budget}')

        traj_1 = task.run_episodes([agent, mcts_agent],
                                   num_episodes=(benchmarking_episodes // 2),
                                   num_envs=-1,
                                   training=False)
        traj_2 = task.run_episodes([mcts_agent, agent],
                                   num_episodes=(benchmarking_episodes // 2),
                                   num_envs=-1,
                                   training=False)

        winrates_1 = [
            len(list(filter(lambda t: t.winner == a_i, traj_1))) / len(traj_1)
            for a_i in range(2)
        ]
        winrates_2 = [
            len(list(filter(lambda t: t.winner == a_i, traj_2))) / len(traj_2)
            for a_i in range(2)
        ]

        avg_winrate = (winrates_1[0] + winrates_2[1]) / 2

        df = df.append(
            {
                'test_agent_id': agent.handled_experiences,
                'mcts_budget': budget,
                'winrate_pos_0': winrates_1[0],
                'winrate_pos_1': winrates_2[1],
                'avg_winrate': avg_winrate
            },
            ignore_index=True)

        logger.info(
            f'WINRATES: Total = {avg_winrate}\tPos 0 = {winrates_1[0]}\t Pos 1 = {winrates_2[1]}'
        )

        if avg_winrate < desired_winrate:
            return budget, df
    os.mkdir(args.name)

    ### To refactor at some point
    #sort_fn = lambda x: int(x.split('_')[-1][:-3])  # ExIt
    sort_fn = lambda x: int(x.split('/')[-1].split('_')[0])  # PPO test training
    sorted_population = load_population_from_path(path=args.path, sort_fn=sort_fn)
    sorted_population.sort(key=lambda agent: agent.finished_episodes)

    for agent in sorted_population:
        agent.requires_environment_model = False
        agent.training = False
    ###

    # Taken from MCTS equivalent strength benchmarking
    mcts_budgets = [29, 42, 42, 38, 45, 56, 48, 49, 51, 42, 53, 46, 35, 49, 49,
                    42, 45, 40, 45, 42, 47, 38, 42, 47, 45, 37, 42, 35, 39, 25,
                    38, 34, 33, 38, 40]
    mcts_population = []
    for budget in mcts_budgets:
        initial_mcts_config = {'budget': budget, 'rollout_budget': 100,
                               'selection_phase': 'ucb1',
                               'exploration_factor_ucb1': 1.41,
                               'use_dirichlet': False,
                               'dirichlet_alpha': None}
        mcts_population.append(
            build_MCTS_Agent(generate_task('Connect4-v0', EnvType.MULTIAGENT_SEQUENTIAL_ACTION),
                             initial_mcts_config, agent_name=f'MCTS:{budget}')
        )

    main(population=sorted_population+mcts_population, name=args.name)
Esempio n. 10
0
def run_parallel_task_with_random_agent(env_name,
                                        env_type,
                                        num_envs,
                                        num_episodes,
                                        model_based_agents=False):
    task = generate_task(env_name, env_type)
    # Random agens, either MCTS or random
    if model_based_agents:
        mcts_config = {
            'budget': 1,
            'rollout_budget': 0,
            'use_dirichlet': False,
            'dirichlet_alpha': 1,
            'selection_phase': 'ucb1',
            'exploration_factor_ucb1': 1,
            'expose_tree_in_predictions': True
        }
        agent_vector = [
            build_MCTS_Agent(task, mcts_config, 'Test-MCTS-Random')
            for _ in range(task.num_agents)
        ]
    else:
        agent_vector = [
            build_Random_Agent(task, {}, 'Test-Random')
            for _ in range(task.num_agents)
        ]

    # The number of environments is larger than number of
    # episodes because we want to test if we can generate
    # a specific number of trajectories regardless of the
    # Number of environments used to generate them
    trajectories = task.run_episodes(agent_vector,
                                     num_episodes=num_episodes,
                                     num_envs=num_envs,
                                     training=True,
                                     store_extra_information=True)
    import pdbr
    pdbr.set_trace()

    # We have the exact number of trajectories we asked for
    # The number of trajectories is lower-bounded by :param: num_episodes
    # But it is possible that multiple environments finish at the same time
    assert (len(trajectories) >= num_episodes) and (len(trajectories) <=
                                                    (num_episodes + num_envs))

    # All trajectories finish with a "done" flag
    assert all([t[-1].done for t in trajectories])

    # All timesteps except for last one in all trajectories don't have "done" set
    for t in trajectories:
        assert all([not timestep.done for timestep in t[:-1]])

    # ASSUMPTION: observation and succ_observation are numpy array
    if env_type == EnvType.SINGLE_AGENT:
        # Observation and succ_observation are the same
        # ASSUMPTION: observation and succ_observation are numpy array
        assert all([(ex_1.succ_observation == ex_2.observation).all()
                    for t in trajectories for ex_1, ex_2 in zip(t, t[1:])])
    else:
        # Observation and succ_observation are the same for all agents
        assert all([
            (ex_1.succ_observation[a_i] == ex_2.observation[a_i]).all()
            for t in trajectories for ex_1, ex_2 in zip(t, t[1:])
            for a_i in range(task.num_agents)
        ])