Ejemplo n.º 1
0
def test_can_stack_frames_singleagent_env():
    num_stack = 3
    frame_stack = partial(FrameStack, num_stack=num_stack)

    pendulum_task = generate_task('Pendulum-v0')
    stack_pendulum_task = generate_task('Pendulum-v0', wrappers=[frame_stack])

    assert stack_pendulum_task.observation_dim == (
        num_stack, *pendulum_task.observation_dim)
Ejemplo n.º 2
0
def test_can_stack_frames_sequential_multiagent_env():
    num_stack = 4
    frame_stack = partial(FrameStack, num_stack=num_stack)

    connect_4_task = generate_task('Connect4-v0',
                                   EnvType.MULTIAGENT_SEQUENTIAL_ACTION)
    stack_connect_4_task = generate_task('Connect4-v0',
                                         EnvType.MULTIAGENT_SEQUENTIAL_ACTION,
                                         wrappers=[frame_stack])
    assert stack_connect_4_task.observation_dim == (
        num_stack, *connect_4_task.observation_dim)

    num_envs = 3
    vector_env = RegymAsyncVectorEnv(stack_connect_4_task.name,
                                     num_envs=num_envs,
                                     wrappers=[frame_stack])

    actual_obs = vector_env.reset()

    # Standard Connect4 dimensions is (3, 7, 6)
    # NOTE: Think of board as being sideways (chips fall right-to-left)
    single_env_initial_observation = np.array([[[1., 1., 1., 1., 1., 1.],
                                                [1., 1., 1., 1., 1., 1.],
                                                [1., 1., 1., 1., 1., 1.],
                                                [1., 1., 1., 1., 1., 1.],
                                                [1., 1., 1., 1., 1., 1.],
                                                [1., 1., 1., 1., 1., 1.],
                                                [1., 1., 1., 1., 1., 1.]],
                                               [[0., 0., 0., 0., 0., 0.],
                                                [0., 0., 0., 0., 0., 0.],
                                                [0., 0., 0., 0., 0., 0.],
                                                [0., 0., 0., 0., 0., 0.],
                                                [0., 0., 0., 0., 0., 0.],
                                                [0., 0., 0., 0., 0., 0.],
                                                [0., 0., 0., 0., 0., 0.]],
                                               [[0., 0., 0., 0., 0., 0.],
                                                [0., 0., 0., 0., 0., 0.],
                                                [0., 0., 0., 0., 0., 0.],
                                                [0., 0., 0., 0., 0., 0.],
                                                [0., 0., 0., 0., 0., 0.],
                                                [0., 0., 0., 0., 0., 0.],
                                                [0., 0., 0., 0., 0., 0.]]])

    # We extend by number of stacked frames
    # So that per environment observation shape is (num_stacks, 3, 7, 6)
    stacked_single_env_initial_observation = np.array(
        [single_env_initial_observation for _ in range(num_stack)])

    # We extend by number of environments
    # So that each agent receives observation of shape (num_envs, num_stack, 3, 7, 6)
    expected_player_obs = np.array(
        [stacked_single_env_initial_observation for _ in range(num_envs)])

    num_agents = 2
    for i in range(num_agents):
        np.testing.assert_array_equal(expected_player_obs, actual_obs[i])
def generate_evaluation_matrix(cool_game_params, benchmarking_episodes,
                               mcts_budget):
    # 0: SawBot 1: TorchBot 2: NailBot
    import gym_cool_game
    saw_vs_torch_task = generate_task('CoolGame-v0',
                                      EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                                      botA_type=0,
                                      botB_type=1,
                                      **cool_game_params)
    saw_vs_nail_task = generate_task('CoolGame-v0',
                                     EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                                     botA_type=0,
                                     botB_type=2,
                                     **cool_game_params)
    torch_vs_nail_task = generate_task('CoolGame-v0',
                                       EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                                       botA_type=1,
                                       botB_type=2,
                                       **cool_game_params)

    mcts_config = {
        'budget': mcts_budget,
        'rollout_budget': 1000,
        'selection_phase': 'ucb1',
        'exploration_factor_ucb1': 4  # Might need to tweak this?
    }
    mcts_agent = build_MCTS_Agent(saw_vs_torch_task,
                                  mcts_config,
                                  agent_name='MCTS agent')

    saw_vs_torch = compute_matchup_winrates(mcts_agent, saw_vs_torch_task,
                                            'Saw vs Torch',
                                            benchmarking_episodes, mcts_budget)

    saw_vs_nail = compute_matchup_winrates(mcts_agent, saw_vs_nail_task,
                                           'Saw vs Nail',
                                           benchmarking_episodes, mcts_budget)

    torch_vs_nail = compute_matchup_winrates(mcts_agent, torch_vs_nail_task,
                                             'Torch vs Nail',
                                             benchmarking_episodes,
                                             mcts_budget)

    bench_msg = f'episodes={benchmarking_episodes} MCTS_budget={mcts_budget}'
    winrates_msg = f'winrates=saw:[{saw_vs_torch}, {saw_vs_nail}] nail:[{torch_vs_nail}]'
    logger.info(bench_msg)
    logger.info(winrates_msg)
    logger.info(f'params={cool_game_params}')
    wandb.log({
        'Winrate_Saw_vs_Torch': saw_vs_torch,
        'Winrate_Saw_vs_Nail': saw_vs_nail,
        'Winrate_Torch_vs_Nail': torch_vs_nail
    })
    return np.array([[0., saw_vs_torch, saw_vs_nail],
                     [1. - saw_vs_torch, 0., torch_vs_nail],
                     [1. - saw_vs_nail, 1. - torch_vs_nail, 0.]])
def generate_evaluation_matrix(cool_game_params, benchmarking_episodes,
                               mcts_budget, logger: logging.Logger):
    # 0: SawBot 1: TorchBot 2: NailBot
    import gym_cool_game
    saw_vs_torch_task = generate_task('CoolGame-v0',
                                      EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                                      botA_type=0,
                                      botB_type=1,
                                      **cool_game_params)
    saw_vs_nail_task = generate_task('CoolGame-v0',
                                     EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                                     botA_type=0,
                                     botB_type=2,
                                     **cool_game_params)
    torch_vs_nail_task = generate_task('CoolGame-v0',
                                       EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                                       botA_type=1,
                                       botB_type=2,
                                       **cool_game_params)

    mcts_config = {'budget': mcts_budget, 'rollout_budget': 10}
    mcts_agent = build_MCTS_Agent(saw_vs_torch_task,
                                  mcts_config,
                                  agent_name='MCTS agent')

    saw_vs_torch = compute_matchup_winrates(mcts_agent, saw_vs_torch_task,
                                            'Saw vs Torch',
                                            benchmarking_episodes, mcts_budget,
                                            logger)

    saw_vs_nail = compute_matchup_winrates(mcts_agent, saw_vs_nail_task,
                                           'Saw vs Nail',
                                           benchmarking_episodes, mcts_budget,
                                           logger)

    torch_vs_nail = compute_matchup_winrates(mcts_agent, torch_vs_nail_task,
                                             'Torch vs Nail',
                                             benchmarking_episodes,
                                             mcts_budget, logger)

    bench_msg = f'episodes={benchmarking_episodes} MCTS_budget={mcts_budget}'
    winrates_msg = f'winrates=saw:[{saw_vs_torch}, {saw_vs_nail}] nail:[{torch_vs_nail}]'
    logger.info(bench_msg)
    logger.info(winrates_msg)
    logger.info(f'params={cool_game_params}')
    return np.array([[0., saw_vs_torch, saw_vs_nail],
                     [1. - saw_vs_torch, 0., torch_vs_nail],
                     [1. - saw_vs_nail, 1. - torch_vs_nail, 0.]])
Ejemplo n.º 5
0
def RandomWalkTask():
    from gym.envs.registration import register
    register(
        id='RandomWalk-v0',
        entry_point='regym.tests.rl_algorithms.random_walk_env:RandomWalkEnv')
    return generate_task('RandomWalk-v0',
                         EnvType.MULTIAGENT_SIMULTANEOUS_ACTION)
def main(population: List['Agent'], logger, num_stack: int):
    initial_mcts_config = {
        'budget': 20,
        'rollout_budget': 100,
        'selection_phase': 'ucb1',
        'exploration_factor_ucb1': 1.41,
        'use_dirichlet': False,
        'dirichlet_alpha': None
    }
    task = generate_task('Connect4-v0',
                         EnvType.MULTIAGENT_SEQUENTIAL_ACTION,
                         wrappers=create_wrapper(num_stack=num_stack))

    strength_estimation_df = pd.DataFrame(
        columns=('test_agent_id', 'mcts_budget', 'winrate_pos_0',
                 'winrate_pos_1', 'avg_winrate'))

    for agent in population:
        logger.info(
            f'Benchmarking agent with {agent.algorithm.num_updates} number of updates and {agent.finished_episodes} finished episodes'
        )

        agent_strength, agent_specific_strength_estimation_df = estimate_agent_strength(
            agent, task, 0.5, initial_mcts_config, logger)
        strength_estimation_df = strength_estimation_df.append(
            agent_specific_strength_estimation_df, ignore_index=True)

        logger.info(f'Agent strength: {agent_strength} (MCTS budget)')
    strength_estimation_df.to_csv('mcts_equivalent_strenght_estimation_df.csv')
def main(path: str, name: str):
    task = generate_task('Connect4-v0', EnvType.MULTIAGENT_SEQUENTIAL_ACTION)

    #sort_fn = lambda x: int(x.split('_')[-1][:-3])  # ExIt
    sort_fn = lambda x: int(x.split('/')[-1].split('_')[0]
                            )  # PPO test training
    sorted_population = load_population_from_path(path=path, sort_fn=sort_fn)

    for agent in sorted_population:
        print(agent.algorithm.num_updates)
        agent.requires_environment_model = False
        agent.training = False

    winrate_matrix = compute_winrate_matrix_metagame(
        population=sorted_population, episodes_per_matchup=1000, task=task)
    maxent_nash, nash_averaging = compute_nash_averaging(
        winrate_matrix, perform_logodds_transformation=True)

    winrate_matrix = np.array(winrate_matrix)
    print(
        'Saving winrate_matrix, max-entropy Nash equilibrium for game defined by winrate matrix and Nash averaging'
    )
    np.savetxt(f'{name}_winrate_matrix.csv', winrate_matrix, delimiter=', ')
    np.savetxt(f'{name}_maxent_nash.csv', maxent_nash, delimiter=', ')
    np.savetxt(f'{name}_nash_averaging.csv', maxent_nash, delimiter=', ')

    ax = plot_winrate_matrix(winrate_matrix)

    plt.show()
def main():
    task = generate_task('CoolGame-v0',
                         EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                         botA_type=1,
                         botB_type=2)

    random_r1 = build_Random_Agent(task, {}, agent_name='random')
    random_r2 = deepcopy(random_r1)

    mcts_config = {
        'budget': 10,
        'rollout_budget': 1000,
        'selection_phase': 'ucb1',
        'exploration_factor_ucb1': 4  # Might need to tweak this?
    }

    mcts_r1 = build_MCTS_Agent(task, mcts_config, agent_name='P1: MCTS')
    mcts_r2 = build_MCTS_Agent(task, mcts_config, agent_name='P2: MCTS')

    human_r1 = HumanAgent(task.action_dim, name='P1')
    human_r2 = HumanAgent(task.action_dim, name='P2')

    # t = task.run_episode([mcts_r1, mcts_r2], training=False, render_mode='rgb', save_gif=True)
    t = task.run_episode([mcts_r1, mcts_r2], training=False)
    print(t)
def initialize_experiment(experiment_config, agents_config, self_play_configs):
    env_name, requested_env_type = experiment_config['environment']
    task = generate_task(env_name, EnvType(requested_env_type))
    sp_schemes = initialize_training_schemes(self_play_configs, task)
    agents = initialize_agents(task, agents_config)

    return task, sp_schemes, agents
Ejemplo n.º 10
0
def main(population: List, name: str, num_stack: int):
    #task = generate_task('Connect4-v0', EnvType.MULTIAGENT_SEQUENTIAL_ACTION)
    task = generate_task('Connect4-v0',
                         EnvType.MULTIAGENT_SEQUENTIAL_ACTION,
                         wrappers=create_wrapper(num_stack=num_stack))

    winrate_matrix = compute_winrate_matrix_metagame(
        population=sorted_population,
        episodes_per_matchup=200,
        num_envs=-1,
        task=task,
        is_game_symmetrical=False,
        show_progress=True)
    maxent_nash, nash_averaging = compute_nash_averaging(
        winrate_matrix, perform_logodds_transformation=True)

    winrate_matrix = np.array(winrate_matrix)
    print(
        'Saving winrate_matrix, max-entropy Nash equilibrium for game defined by winrate matrix and Nash averaging'
    )
    np.savetxt(f'{name}/winrate_matrix.csv', winrate_matrix, delimiter=', ')
    np.savetxt(f'{name}/maxent_nash.csv', maxent_nash, delimiter=', ')
    np.savetxt(f'{name}/nash_averaging.csv', maxent_nash, delimiter=', ')

    ax = plot_winrate_matrix(winrate_matrix)

    plt.show()
Ejemplo n.º 11
0
def generate_evaluation_matrix(cool_game_params, logger):
    # 0: SawBot 1: TorchBot 2: NailBot
    benchmarking_episodes = 1
    mcts_budget = 1

    saw_vs_torch_task = generate_task('CoolGame-v0',
                                      EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                                      botA_type=0,
                                      botB_type=1,
                                      **cool_game_params)
    saw_vs_nail_task = generate_task('CoolGame-v0',
                                     EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                                     botA_type=0,
                                     botB_type=2,
                                     **cool_game_params)
    torch_vs_nail_task = generate_task('CoolGame-v0',
                                       EnvType.MULTIAGENT_SIMULTANEOUS_ACTION,
                                       botA_type=1,
                                       botB_type=2,
                                       **cool_game_params)

    mcts_config = {'budget': mcts_budget}
    mcts_agent = build_MCTS_Agent(saw_vs_torch_task,
                                  mcts_config,
                                  agent_name='MCTS agent')

    saw_winrates = benchmark_agents_on_tasks(
        tasks=[saw_vs_torch_task, saw_vs_nail_task],
        agents=[mcts_agent],
        populate_all_agents=True,
        num_episodes=benchmarking_episodes)
    nail_winrate = benchmark_agents_on_tasks(
        tasks=[torch_vs_nail_task],
        agents=[mcts_agent],
        populate_all_agents=True,
        num_episodes=benchmarking_episodes)

    bench_msg = f'episodes={benchmarking_episodes} MCTS_budget={mcts_budget}'
    winrates_msg = f'winrates=saw:{saw_winrates} nail:{nail_winrate}'
    logger.info(bench_msg)
    logger.info(winrates_msg)
    logger.info(f'params={cool_game_params}')
    return np.array([[0., saw_winrates[0], saw_winrates[1]],
                     [-saw_winrates[0], 0., nail_winrate[0]],
                     [-saw_winrates[0], -nail_winrate[0], 0.]])
Ejemplo n.º 12
0
def FixedLengthDummyTask():
    from regym.environments import generate_task, EnvType
    from gym.envs.registration import register
    register(
        id='FixedLengthDummy-v0',
        entry_point=
        'regym.tests.test_utils.fixed_length_dummy_env:FixedLengthDummyEnv')
    return generate_task('FixedLengthDummy-v0',
                         EnvType.MULTIAGENT_SEQUENTIAL_ACTION)
Ejemplo n.º 13
0
def main():
    task = generate_task("Yaniv-v0", EnvType.MULTIAGENT_SEQUENTIAL_ACTION)
    # random_r1 = build_Random_Agent(task, {}, agent_name="random")

    ppo = build_PPO_Agent(task, hyperparams, "ppo")

    traj = task.run_episode(
        [ppo, ppo],
        training=True,
    )
    print(traj)
Ejemplo n.º 14
0
def test_can_parse_connect4_task():
    import gym_connect4

    task = generate_task('Connect4-v0', EnvType.MULTIAGENT_SEQUENTIAL_ACTION)

    expected_observation_dim = (3, 7, 6)
    expected_observation_size = 126
    expected_observation_type = 'Continuous'

    assert expected_observation_dim == task.observation_dim
    assert expected_observation_type == task.observation_type
    assert expected_observation_size == task.observation_size
Ejemplo n.º 15
0
def test_can_pass_kwargs_to_env():
    from gym.envs.registration import register
    register(
        id='DummyEnv-v0',
        entry_point='regym.tests.environments.params_test_env:ParamsTestEnv')

    params = {'param1': 1, 'param2': 2, 'param3': 3}

    task = generate_task('DummyEnv-v0', **params)

    assert task.env.param1 == 1
    assert task.env.param2 == 2
    assert task.env.param3 == 3
Ejemplo n.º 16
0
def initialize_experiment(experiment_config, agents_config, self_play_configs):
    env, env_type = experiment_config['environment']
    task = generate_task(env, EnvType(env_type))
    sp_schemes = initialize_training_schemes(self_play_configs, task)
    agents = initialize_agents(task, agents_config)

    seeds = list(map(int, experiment_config['seeds']))

    number_of_runs = experiment_config['number_of_runs']
    if len(seeds) < number_of_runs:
        print(f'Number of random seeds does not match "number of runs" config value. Genereting new seeds"')
        seeds = np.random.randint(0, 10000, number_of_runs).tolist()

    return task, sp_schemes, agents, seeds
Ejemplo n.º 17
0
def test_singleagent_tasks_run_faster_on_parallel(env_name):
    task = generate_task(env_name, EnvType.SINGLE_AGENT)
    random_agent = build_Random_Agent(task, {}, 'Test-Random')

    num_episodes = 50
    num_envs = 1
    start = time.time()
    trajectories = task.run_episodes([random_agent],
                                     num_episodes=num_episodes,
                                     num_envs=num_envs,
                                     training=False)
    total_single = time.time() - start

    start = time.time()
    num_envs = multiprocessing.cpu_count()
    trajectories = task.run_episodes([random_agent],
                                     num_episodes=num_episodes,
                                     num_envs=num_envs,
                                     training=False)
    total_multiple = time.time() - start

    assert total_multiple < total_single
Ejemplo n.º 18
0
def main():
    task = generate_task('RockPaperScissors-v0',
                         EnvType.MULTIAGENT_SIMULTANEOUS_ACTION)
    print('Initializing agent')
    agent = initialize_agent(task)

    print('Computing SP-induced trajectories')
    training_trajectories = compute_sp_training_trajectories(
        task=task,
        agent=agent,
        sp_scheme=NaiveSelfPlay,
    )

    print('Computing basis trajectories')
    basis_trajectories = compute_basis_trajectories(task)

    print('Merging trajectories')
    import ipdb
    ipdb.set_trace()
    all_trajectories = merge_basis_and_trained_trajectories(
        basis_trajectories, training_trajectories)
    print('Number basis trajectories:', len(basis_trajectories['trajectory']))

    # Compute trajectories from training agent
    ts = copy.deepcopy(all_trajectories['trajectory'])
    print(
        f'Number trajectories: {len(ts)} // Steps per trajectories: {len(ts[0])}'
    )

    actions = [
        # We get the last observation of the first agent,
        # This contains the last joint action by both agents.
        [
            step.observation[0][-1] for idx, step in enumerate(t)
            if idx < 10 and idx > 0
        ] for t in ts
    ]

    embeddings = generate_t_sne_embedding(actions, all_trajectories)
Ejemplo n.º 19
0
def play_against_fixed_agent(agent,
                             fixed_agent_action,
                             agent_position,
                             max_reward,
                             total_episodes=2000):
    '''
    Test used to make sure that agent is 'learning' by learning a best response
    against an agent that only plays rock in rock paper scissors.
    i.e from random, learns to play only (or mostly) paper
    '''
    from play_against_fixed_opponent import learn_against_fix_opponent

    class FixedAgent(Agent):
        def __init__(self, action):
            super(FixedAgent, self).__init__(name=f'FixedAction: {action}')
            self.action = action

        def take_action(self, *args):
            return self.action

        def handle_experience(self, *args):
            pass

        def clone(self, *args):
            pass

    fixed_opponent = FixedAgent(fixed_agent_action)
    kuhn_task = generate_task('KuhnPoker-v0',
                              EnvType.MULTIAGENT_SEQUENTIAL_ACTION)
    assert agent.training
    learn_against_fix_opponent(agent,
                               fixed_opponent=fixed_opponent,
                               agent_position=agent_position,
                               task=kuhn_task,
                               total_episodes=total_episodes,
                               training_percentage=0.9,
                               reward_tolerance=1.,
                               maximum_average_reward=max_reward,
                               evaluation_method='last')
Ejemplo n.º 20
0
def test_multiagent_sequential_tasks_run_faster_on_parallel(env_name):
    task = generate_task(env_name, EnvType.MULTIAGENT_SEQUENTIAL_ACTION)
    random_agent = build_Random_Agent(task, {}, 'Test-Random')

    start = time.time()
    num_episodes = 100
    num_envs = 1
    _ = task.run_episodes([random_agent, random_agent],
                          num_episodes=num_episodes,
                          num_envs=num_envs,
                          training=False)
    total_single = time.time() - start

    start = time.time()
    num_envs = multiprocessing.cpu_count()
    _ = task.run_episodes([random_agent, random_agent],
                          num_episodes=num_episodes,
                          num_envs=num_envs,
                          training=False)
    total_multiple = time.time() - start
    print('Parallel: ', total_multiple, 'Sequential: ', total_single, 'Diff: ',
          total_single - total_multiple)
    assert total_multiple < total_single
Ejemplo n.º 21
0
def main(path: str):
    initial_mcts_config = {
        'budget': 10,
        'rollout_budget': 100,
        'selection_phase': 'ucb1',
        'exploration_factor_ucb1': 1.41,
        'use_dirichlet': False,
        'dirichlet_alpha': None
    }
    task = generate_task('Connect4-v0', EnvType.MULTIAGENT_SEQUENTIAL_ACTION)
    for agent in load_population(path):
        logger.info(
            f'Benchmarking agent with {agent.algorithm.num_updates} number of updates'
        )
        nn_agent = build_NeuralNet_Agent(
            task, {
                'neural_net': agent.algorithm.model,
                'pre_processing_fn': batch_vector_observation
            },
            agent_name='NeuralNet')
        agent_strength = estimate_agent_strength(nn_agent, task, 0.5,
                                                 initial_mcts_config)
        logger.info(f'Agent strength: {agent_strength} (MCTS budget)')
Ejemplo n.º 22
0
def play_against_fixed_agent(agent, fixed_agent_action, agent_position,
                             max_reward, total_episodes=2000):
    '''
    Test used to make sure that agent is 'learning' by learning a best response
    against an agent that only plays rock in rock paper scissors.
    i.e from random, learns to play only (or mostly) paper
    '''
    kuhn_task = generate_task('KuhnPoker-v0', EnvType.MULTIAGENT_SEQUENTIAL_ACTION)
    fixed_opponent = build_Deterministic_Agent(kuhn_task, {'action': fixed_agent_action},
                                               f'Fixed action: {fixed_agent_action}')
    assert agent.training
    parallel_learn_against_fix_opponent(
        agent,
        fixed_opponent,
        task=kuhn_task,
        agent_position=agent_position,
        training_episodes=total_episodes,
        test_episodes=100,
        reward_tolerance=max_reward*0.1,  # 10% off maximum
        benchmark_every_n_episodes=3000,  # has to be larger than total_episodes
        maximum_average_reward=max_reward,
        evaluation_method='last',
    )
Ejemplo n.º 23
0
def test_multiagent_sequential_tasks_with_model_based_agents_run_faster_on_parallel(
        env_name):
    task = generate_task(env_name, EnvType.MULTIAGENT_SEQUENTIAL_ACTION)
    mcts_config = {
        'budget': 10,
        'rollout_budget': 100,
        'use_dirichlet': False,
        'dirichlet_alpha': 1,
        'selection_phase': 'ucb1',
        'exploration_factor_ucb1': 1
    }
    agent_vector = [
        build_MCTS_Agent(task, mcts_config, 'Test-MCTS-Random')
        for _ in range(task.num_agents)
    ]

    start = time.time()
    num_episodes = 10
    num_envs = 1

    _ = task.run_episodes(agent_vector,
                          num_episodes=num_episodes,
                          num_envs=num_envs,
                          training=False)
    total_single = time.time() - start
    print('Sequential: ', total_single)

    start = time.time()
    num_envs = multiprocessing.cpu_count()
    _ = task.run_episodes(agent_vector,
                          num_episodes=num_episodes,
                          num_envs=num_envs,
                          training=False)
    total_multiple = time.time() - start
    print('Parallel: ', total_multiple, 'Sequential: ', total_single, 'Diff: ',
          total_single - total_multiple)
    assert total_multiple < total_single
Ejemplo n.º 24
0
def initialize_experiment(experiment_config, agents_config):
    env_name, requested_env_type = experiment_config['environment']
    task = generate_task(env_name, EnvType(requested_env_type))
    agents = initialize_agents(task, agents_config)
    return task, agents
    os.mkdir(args.name)

    ### To refactor at some point
    #sort_fn = lambda x: int(x.split('_')[-1][:-3])  # ExIt
    sort_fn = lambda x: int(x.split('/')[-1].split('_')[0])  # PPO test training
    sorted_population = load_population_from_path(path=args.path, sort_fn=sort_fn)
    sorted_population.sort(key=lambda agent: agent.finished_episodes)

    for agent in sorted_population:
        agent.requires_environment_model = False
        agent.training = False
    ###

    # Taken from MCTS equivalent strength benchmarking
    mcts_budgets = [29, 42, 42, 38, 45, 56, 48, 49, 51, 42, 53, 46, 35, 49, 49,
                    42, 45, 40, 45, 42, 47, 38, 42, 47, 45, 37, 42, 35, 39, 25,
                    38, 34, 33, 38, 40]
    mcts_population = []
    for budget in mcts_budgets:
        initial_mcts_config = {'budget': budget, 'rollout_budget': 100,
                               'selection_phase': 'ucb1',
                               'exploration_factor_ucb1': 1.41,
                               'use_dirichlet': False,
                               'dirichlet_alpha': None}
        mcts_population.append(
            build_MCTS_Agent(generate_task('Connect4-v0', EnvType.MULTIAGENT_SEQUENTIAL_ACTION),
                             initial_mcts_config, agent_name=f'MCTS:{budget}')
        )

    main(population=sorted_population+mcts_population, name=args.name)
Ejemplo n.º 26
0
def create_task_from_config(environment_config):
    wrappers = create_wrappers(environment_config)
    task = generate_task(environment_config['name'],
                         EnvType(environment_config['env_type']),
                         wrappers=wrappers)
    return task
Ejemplo n.º 27
0
def pendulum_task():
    return generate_task('Pendulum-v0')
Ejemplo n.º 28
0
def Connect4Task():
    import gym_connect4
    return generate_task('Connect4-v0', EnvType.MULTIAGENT_SEQUENTIAL_ACTION)
Ejemplo n.º 29
0
def RPSTask():
    import gym_rock_paper_scissors
    return generate_task('RockPaperScissors-v0',
                         EnvType.MULTIAGENT_SIMULTANEOUS_ACTION)
Ejemplo n.º 30
0
def run_parallel_task_with_random_agent(env_name,
                                        env_type,
                                        num_envs,
                                        num_episodes,
                                        model_based_agents=False):
    task = generate_task(env_name, env_type)
    # Random agens, either MCTS or random
    if model_based_agents:
        mcts_config = {
            'budget': 1,
            'rollout_budget': 0,
            'use_dirichlet': False,
            'dirichlet_alpha': 1,
            'selection_phase': 'ucb1',
            'exploration_factor_ucb1': 1,
            'expose_tree_in_predictions': True
        }
        agent_vector = [
            build_MCTS_Agent(task, mcts_config, 'Test-MCTS-Random')
            for _ in range(task.num_agents)
        ]
    else:
        agent_vector = [
            build_Random_Agent(task, {}, 'Test-Random')
            for _ in range(task.num_agents)
        ]

    # The number of environments is larger than number of
    # episodes because we want to test if we can generate
    # a specific number of trajectories regardless of the
    # Number of environments used to generate them
    trajectories = task.run_episodes(agent_vector,
                                     num_episodes=num_episodes,
                                     num_envs=num_envs,
                                     training=True,
                                     store_extra_information=True)
    import pdbr
    pdbr.set_trace()

    # We have the exact number of trajectories we asked for
    # The number of trajectories is lower-bounded by :param: num_episodes
    # But it is possible that multiple environments finish at the same time
    assert (len(trajectories) >= num_episodes) and (len(trajectories) <=
                                                    (num_episodes + num_envs))

    # All trajectories finish with a "done" flag
    assert all([t[-1].done for t in trajectories])

    # All timesteps except for last one in all trajectories don't have "done" set
    for t in trajectories:
        assert all([not timestep.done for timestep in t[:-1]])

    # ASSUMPTION: observation and succ_observation are numpy array
    if env_type == EnvType.SINGLE_AGENT:
        # Observation and succ_observation are the same
        # ASSUMPTION: observation and succ_observation are numpy array
        assert all([(ex_1.succ_observation == ex_2.observation).all()
                    for t in trajectories for ex_1, ex_2 in zip(t, t[1:])])
    else:
        # Observation and succ_observation are the same for all agents
        assert all([
            (ex_1.succ_observation[a_i] == ex_2.observation[a_i]).all()
            for t in trajectories for ex_1, ex_2 in zip(t, t[1:])
            for a_i in range(task.num_agents)
        ])