Esempio n. 1
0
def create_train_env(
    game_name,
    seed,
    eps,
    max_frame,
    num_thread,
    num_game_per_thread,
    actor_creator,
    *,
    terminal_on_life_loss=False,
    terminal_signal_on_life_loss=True,
):
    context = rela.Context()
    games = []
    actors = []
    for thread_idx in range(num_thread):
        env = rela.VectorEnv()
        for game_idx in range(num_game_per_thread):
            game = create_game(
                game_name,
                seed + thread_idx * num_game_per_thread + game_idx,
                eps[thread_idx * num_game_per_thread + game_idx],
                max_frame,
                terminal_on_life_loss,
                terminal_signal_on_life_loss,
            )
            games.append(game)
            env.append(game)

        actor = actor_creator(thread_idx)
        thread = rela.BasicThreadLoop(actor, env, False)
        actors.append(actor)
        context.push_env_thread(thread)
    print("Finished creating environments with %d games" % (len(games)))
    return context, games, actors
Esempio n. 2
0
def create_eval_env(
    game_name,
    num_thread,
    model_locker,
    actor_cls,
    seed,
    max_frame,
    *,
    eval_eps=0,
    terminal_on_life_loss=False,
    terminal_signal_on_life_loss=True,
):
    context = rela.Context()
    games = []
    for i in range(num_thread):
        game = create_game(
            game_name,
            seed + i,
            eval_eps,
            max_frame,
            terminal_on_life_loss,
            terminal_signal_on_life_loss,
        )
        games.append(game)
        env = rela.VectorEnv()
        env.append(game)
        actor = actor_cls(model_locker)
        thread = rela.BasicThreadLoop(actor, env, True)
        context.push_env_thread(thread)
    return context, games
Esempio n. 3
0
def evaluate(agents,
             num_game,
             seed,
             bomb,
             eps,
             sad,
             *,
             hand_size=5,
             runners=None,
             device="cuda:0"):
    """
    evaluate agents as long as they have a "act" function
    """
    assert agents is None or runners is None
    if agents is not None:
        runners = [
            rela.BatchRunner(agent, device, 1000, ["act"]) for agent in agents
        ]
    num_player = len(runners)

    context = rela.Context()
    games = create_envs(
        num_game,
        seed,
        num_player,
        hand_size,
        bomb,
        [eps],
        -1,
        sad,
        False,
        False,
    )

    for g in games:
        env = hanalearn.HanabiVecEnv()
        env.append(g)
        actors = []
        for i in range(num_player):
            actors.append(rela.R2D2Actor(runners[i], 1))
        thread = hanalearn.HanabiThreadLoop(actors, env, True)
        context.push_env_thread(thread)

    for runner in runners:
        runner.start()

    context.start()
    while not context.terminated():
        time.sleep(0.5)
    context.terminate()
    while not context.terminated():
        time.sleep(0.5)

    for runner in runners:
        runner.stop()

    scores = [g.last_score() for g in games]
    num_perfect = np.sum([1 for s in scores if s == 25])
    return np.mean(scores), num_perfect / len(scores), scores, num_perfect
Esempio n. 4
0
def create_train_env(
    method,
    seed,
    num_thread,
    num_game_per_thread,
    actor_cons,
    max_len,
    num_player,
    bomb,
    greedy_extra,
):
    assert method in ["vdn", "iql"]
    context = rela.Context()
    games = []
    actors = []
    threads = []
    print("training with bomb: %d" % bomb)
    for thread_idx in range(num_thread):
        env = rela.VectorEnv()
        for game_idx in range(num_game_per_thread):
            unique_seed = seed + game_idx + thread_idx * num_game_per_thread
            game = hanalearn.HanabiEnv(
                {
                    "players": str(num_player),
                    "seed": str(unique_seed),
                    "bomb": str(bomb),
                },
                max_len,
                greedy_extra,
                False,
            )
            games.append(game)
            env.append(game)

        assert max_len > 0
        if method == "vdn":
            # assert len(actor_cons) == 1
            actor = actor_cons(thread_idx)
            actors.append(actor)
            thread = hanalearn.HanabiVDNThreadLoop(actor, env, False)
        else:
            assert len(actor_cons) == num_player
            env_actors = []
            for i in range(num_player):
                env_actors.append(actor_cons[i](thread_idx))
            actors.extend(env_actors)
            thread = hanalearn.HanabiIQLThreadLoop(env_actors, env, False)

        threads.append(thread)
        context.push_env_thread(thread)
    print("Finished creating environments with %d games and %d actors" %
          (len(games), len(actors)))
    return context, games, actors, threads
Esempio n. 5
0
def create_threads(
    num_thread,
    num_game_per_thread,
    actors,
    games,
):
    context = rela.Context()
    threads = []
    for thread_idx in range(num_thread):
        env = hanalearn.HanabiVecEnv()
        for game_idx in range(num_game_per_thread):
            env.append(games[thread_idx * num_game_per_thread + game_idx])
        thread = hanalearn.HanabiThreadLoop(actors[thread_idx], env, False)
        threads.append(thread)
        context.push_env_thread(thread)
    print("Finished creating %d threads with %d games and %d actors" %
          (len(threads), len(games), len(actors)))
    return context, threads
Esempio n. 6
0
def create_eval_env(
    seed,
    num_thread,
    model_lockers,
    eval_eps,
    num_player,
    bomb,
    greedy_extra,
    log_prefix=None,
):
    context = rela.Context()
    games = []
    for i in range(num_thread):
        game = hanalearn.HanabiEnv(
            {
                "players": str(num_player),
                "seed": str(seed + i),
                "bomb": str(bomb),
            },
            -1,
            greedy_extra,
            False,
        )
        games.append(game)
        env = rela.VectorEnv()
        env.append(game)
        env_actors = []
        for j in range(num_player):
            env_actors.append(rela.R2D2Actor(model_lockers[j], 1, eval_eps))
        if log_prefix is None:
            thread = hanalearn.HanabiIQLThreadLoop(env_actors, env, True)
        else:
            log_file = os.path.join(log_prefix, "game%d.txt" % i)
            thread = hanalearn.HanabiIQLThreadLoop(env_actors, env, True,
                                                   log_file)
        context.push_env_thread(thread)
    return context, games