예제 #1
0
def run_game_worker(args):
    # :matchup_info ~ [(bot_1_species, bot_1_gen), ...]
    env_name, matchup_info = args

    env_module = get_env_module(env_name)
    environment = env_module.Environment()

    bot_1_species, bot_1_gen = matchup_info[0]
    bot_2_species, bot_2_gen = matchup_info[1]

    sp = get_species(bot_1_species)
    Agent1 = sp.AgentClass
    agent_1_settings = sp.agent_settings(env_name,
                                         bot_1_gen,
                                         play_setting="evaluation")
    agent_1 = Agent1(environment=environment, **agent_1_settings)

    sp = get_species(bot_2_species)
    Agent2 = sp.AgentClass
    agent_2_settings = sp.agent_settings(env_name,
                                         bot_2_gen,
                                         play_setting="evaluation")
    agent_2 = Agent2(environment=environment, **agent_2_settings)

    environment.add_agent(agent_1)
    environment.add_agent(agent_2)

    environment.setup()
    outcomes = environment.run()

    return (matchup_info, outcomes)
예제 #2
0
def run_game_worker(args):
    # :matchup_info ~ [(bot_1_species, bot_1_generation), ...]
    environment_name, matchup_info = args

    env_module = get_env_module(environment_name)
    environment = env_module.Environment()

    bot_1_species, bot_1_generation = matchup_info[0]
    bot_2_species, bot_2_generation = matchup_info[1]

    Agent1, agent_1_settings = configure_agent(
        environment_name,
        bot_1_species,
        bot_1_generation,
        play_setting="evaluation",
    )
    agent_1 = Agent1(environment=environment, **agent_1_settings)

    Agent2, agent_2_settings = configure_agent(
        environment_name,
        bot_2_species,
        bot_2_generation,
        play_setting="evaluation",
    )
    agent_2 = Agent2(environment=environment, **agent_2_settings)

    environment.add_agent(agent_1)
    environment.add_agent(agent_2)

    outcomes, _ = environment.run()

    return (matchup_info, outcomes)
예제 #3
0
def batch_info_worker_task(args):
    (
        environment_name,
        species_name,
        batch_num,
        worker_num,
        num_workers,
    ) = args

    # Go through every replay and sum up stats
    env_class = get_env_module(environment_name)
    replay_directory = find_batch_directory(environment_name, species_name,
                                            batch_num)
    ws = WorkerStats()
    for agent_replay in iter_replay_data(
            replay_directory,
            env_class.State,
            worker_num,
            num_workers,
    ):
        ws.total_mcts_considerations += agent_replay.total_mcts_considerations(
        )
        ws.num_games += 1
        ws.num_positions += len(agent_replay.positions)
    return astuple(ws)
예제 #4
0
def run_faceoff(
    environment_name,
    bot_species,
    bot_generation,
    num_rounds,
    num_workers=1,
):
    env_class = get_env_module(environment_name)

    # The bot your testing and the current best bot
    bots = []
    for i in range(bot_generation - 1, bot_generation + 1):
        Agent, agent_settings = configure_agent(environment_name, bot_species,
                                                i, "evaluation")
        bots.append(Bot(
            f"{bot_species}-{i}",
            Agent,
            agent_settings,
        ))

    # Run the faceoff
    tournament = Tournament.setup(
        environment=env_class.Environment,
        bots=bots,
    )
    for i in range(num_rounds):
        tournament.ladder(num_rounds=1,
                          num_workers=num_workers)  # 2 x 3 games each round
        tournament.display_results()

    # Return contender matchup
    contender_entrant = tournament.entrants[bots[-1].name]
    contender_matchup_info = contender_entrant.matchup_histories[bots[0].name]
    return contender_matchup_info
def default_models(environment_name):
    env_module = get_env_module(environment_name)
    value_model = intuition_model.UnopinionatedValue()
    if hasattr(env_module, "BootstrapValue"):
        value_model = env_module.BootstrapValue()
    policy_model = intuition_model.UniformPolicy()
    return value_model, policy_model
예제 #6
0
def replay_video(replay_path, speed, first_n_moves=100_000_000):
    replay = json.loads(open(replay_path, 'r').read())
    environment_name = replay["name"]
    moves = []
    for move_info in replay["replay"]:
        moves.append(move_info["move"])

    env_module = get_env_module(environment_name)
    environment = env_module.Environment()
    current_state = environment.initial_state()
    os.system("clear")
    for i, move in enumerate(moves):
        if i >= first_n_moves:
            break

        # Show state
        to_display = environment.text_display(current_state)
        os.system("clear")
        rprint(to_display)
        time.sleep(speed)

        if move is None:
            break

        current_state = environment.transition_state(current_state, move)
def gbdt_configuration(
    environment_name,
    species,
    generation,
    play_setting="self_play",
):
    '''
    :play_setting ~ {"self_play", "evaluation"}
    '''
    env_module = get_env_module(environment_name)

    value_model, policy_model = default_models(environment_name)
    if generation > 1:
        value_model_path, policy_model_path = build_model_paths(
            environment_name,
            species,
            generation,
        )
        value_model = intuition_model.GBDTValue()
        value_model.load(value_model_path)

        policy_model = intuition_model.GBDTPolicy()
        policy_model.load(policy_model_path)

    move_consideration_time = 3.0
    temperature = 0.0
    full_search_proportion = 1.0
    full_search_steps = 800
    partial_search_steps = full_search_steps // 5

    # Play-setting dependent
    # - XXX Add Puct params
    lower_bound_time = 0.01
    if play_setting == "self_play":
        temperature = .3 # XXX Make a temp profile per move
        move_consideration_time = lower_bound_time
    elif play_setting == "evaluation":
        move_consideration_time = lower_bound_time

    return dict(
        species=species,
        generation=generation,
        game_tree=None,
        current_node=None,
        feature_extractor=env_module.generate_features,
        value_model=value_model,
        policy_model=policy_model,
        move_consideration_time=move_consideration_time, # at least N seconds
        puct_explore_factor=1.0,
        puct_noise_alpha=0.4,
        puct_noise_influence=0.25,
        full_search_proportion=full_search_proportion,
        full_search_steps=full_search_steps,
        partial_search_steps=partial_search_steps,
        temperature=temperature,
    )
예제 #8
0
def narrate_surprise(surprise, agent_replay):
    '''
    - Display surprise info
    - Starting from initial position, display every game state up to final
      state.
    '''
    env_module = get_env_module(agent_replay.environment_name)
    env = env_module.Environment()

    final_expectation = agent_replay.positions[
        surprise.final_position_index].expectation()
    initial_position_index = surprise.initial_position_index

    surprise_positions = agent_replay.positions[initial_position_index:surprise
                                                .final_position_index + 1]

    # Make position, error table
    rprint(f"\n{'POSITION':<15}{'EXPECTATION':<15}{'ERROR':<15}")
    for position in surprise_positions:
        is_my_move = agent_replay.agent_settings.agent_num == position.state.whose_move
        if not is_my_move:
            continue
        expectation = round(position.expectation(), 2)
        error_from_here = round(abs(final_expectation - expectation), 2)
        rprint(f"{position.index:<15}{expectation:<15}{error_from_here:<15}")

    for position in surprise_positions:
        player_num = position.state.whose_move + 1
        is_my_move = agent_replay.agent_settings.agent_num == position.state.whose_move

        position_progress = f"{position.index} / {surprise.final_position_index}"
        rprint(f"\n\n===== POSITION {position_progress}, PLAYER", player_num,
               "MOVING =====")
        current_state = position.state
        rprint(env.text_display(current_state))

        rprint("State Value:", round(position.value, 3))
        if is_my_move:
            display_best_moves(env, list(position.actions_considered.values()))

        action_id = position.chosen_action_id
        human_move = env.action_name_by_id.get(action_id)
        rprint(
            f"\nPlayer {player_num} chose [bold green]{human_move}[/bold green] ({action_id})"
        )

        if is_my_move:
            expectation = position.expectation()
            expectation = round(expectation, 2)
            error_from_here = round(abs(final_expectation - expectation), 3)
            rprint(
                f"Player {player_num} expects {expectation}, error: {error_from_here}"
            )
        else:
            expectation = None
예제 #9
0
    def agent_settings(self, environment, generation, play_setting):
        env_module = get_env_module(environment)

        # Setup value/policy models
        if generation == 1:
            value_model = intuition_model.UnopinionatedValue()
            if hasattr(env_module, "BootstrapValue"):
                value_model = env_module.BootstrapValue()
            policy_model = intuition_model.UniformPolicy()
        else:
            value_model_path, policy_model_path = build_model_paths(
                environment,
                self.name,
                generation,
            )
            value_model = intuition_model.GBDTValue()
            value_model.load(value_model_path)

            policy_model = intuition_model.GBDTPolicy()
            policy_model.load(policy_model_path)

        # Settings
        move_consideration_time = 3.0
        temperature = 0.0
        full_search_proportion = 1.0
        full_search_steps = 800
        partial_search_steps = full_search_steps // 5

        # Play-setting dependent
        # - XXX Add Puct params
        lower_bound_time = 0.01
        if play_setting == "self_play":
            temperature = .3  # XXX Make a temp profile per move
            move_consideration_time = lower_bound_time
        elif play_setting == "evaluation":
            move_consideration_time = lower_bound_time

        return dict(
            species=self.name,
            generation=generation,
            game_tree=None,
            current_node=None,
            feature_extractor=env_module.generate_features,
            value_model=value_model,
            policy_model=policy_model,
            move_consideration_time=
            move_consideration_time,  # at least N seconds
            puct_explore_factor=1.0,
            puct_noise_alpha=0.4,
            puct_noise_influence=0.25,
            full_search_proportion=full_search_proportion,
            full_search_steps=full_search_steps,
            partial_search_steps=partial_search_steps,
            temperature=temperature,
        )
예제 #10
0
def self_play_cycle(
    environment_name,
    Agent,
    agent_settings,
    replay_directory,
):
    env_module = get_env_module(environment_name)

    # Play a full game
    agent_replays = play_game(
        env_module,
        Agent,
        agent_settings,
        replay_directory=replay_directory,
    )

    # Replay more games from certain positions (if enabled)
    if not agent_settings.get("revisit_violated_expectations", False):
        return

    # Setup revisit settings
    # XXX: Tune
    # agent_settings["full_search_proportion"] = 1.0
    # agent_settings["temperature"] = 1.0
    num_revisits = 10
    raw_error_range = [-2.0, -0.50]
    upstream_turns = 1

    # Run revisits
    for agent_replay in agent_replays:
        # Get the position with a highest expectation violation, above a certain
        # threshold.
        surprises = find_surprises(
            agent_replay=agent_replay,
            raw_error_range=raw_error_range,
        )
        if not surprises:
            continue

        # Play :num_revisits games from a few turns upstream of that position.
        initial_index = max(
            surprises[0].initial_position_index - upstream_turns, 0)
        reconstruction_info = (agent_replay,
                               agent_replay.positions[initial_index])
        for _ in range(num_revisits):
            play_game(
                env_module,
                Agent,
                agent_settings,
                replay_directory=replay_directory,
                reconstruction_info=reconstruction_info,
            )
예제 #11
0
 def play_cli_video(
     self,
     initial_position_index,
     final_position_index,
     speed=0.3,
 ):
     env_module = get_env_module(self.environment_name)
     environment = env_module.Environment()
     current_state = environment.initial_state()
     current_position_idx = initial_position_index
     while current_position_idx <= final_position_index:
         current_state = self.positions[current_position_idx].state
         os.system("clear")
         print("Game", self.game_id)
         rprint(environment.text_display(current_state))
         time.sleep(speed)
         current_position_idx += 1
예제 #12
0
def run_generation_ladder(
    environment_name,
    species_list,  # [(species, low_gen, high_gen), ...]
    num_workers=1,
    entrants_per_species=7,
):
    bots = []
    for species, lowest_generation, highest_generation in species_list:
        num_entrants = entrants_per_species
        generations = [
            int(round(x)) for x in numpy.linspace(
                lowest_generation, highest_generation, num_entrants)
        ]
        generations = list(set(generations))
        for i in generations:
            sp = get_species(species)
            Agent = sp.AgentClass
            agent_settings = sp.agent_settings(environment_name,
                                               i,
                                               play_setting="evaluation")
            print(f"Adding bot {species}-{i} to tourney")
            bots.append(Bot(
                f"{species}-{i}",
                Agent,
                agent_settings,
            ))

    species_str = []
    for species, lg, hg in species_list:
        species_str.append(f"{species}-{lg}-{hg}")
    species_str.sort()
    species_str = "__".join(species_str)
    tournament_key = f"{round(time.time())}-{species_str}"
    results_path = build_tournament_results_path(tournament_key)

    env_class = get_env_module(environment_name)
    tournament = Tournament.setup(
        environment=env_class.Environment,
        bots=bots,
    )
    for i in range(300):
        tournament.ladder(num_rounds=1, num_workers=num_workers)
        tournament.display_results()
        print(f"\nTournament id: {tournament_key}")
        tournament.save_results(results_path)
예제 #13
0
def play_game(
    environment_name,
    Agent,
    agent_settings,
    replay_directory=None,
):
    env_module = get_env_module(environment_name)
    environment = env_module.Environment()

    mcts_agent_1 = Agent(environment=environment, **agent_settings)
    mcts_agent_2 = Agent(environment=environment, **agent_settings)

    # Play
    environment.add_agent(mcts_agent_1)
    environment.add_agent(mcts_agent_2)
    _, was_early_stopped = environment.run()

    mcts_agent_1.record_replay(replay_directory, was_early_stopped)
    mcts_agent_2.record_replay(replay_directory, was_early_stopped)
예제 #14
0
def retrieve_move_lists(environment, species, batch_num):
    env_module = get_env_module(environment)
    replay_directory = find_batch_directory(environment, species, batch_num)

    print("\nGetting move lists")
    games = []
    i = 1
    seen_games = set()
    for replay in iter_replay_data(replay_directory, env_module.State):
        if i % 500 == 0:
            print(f"{i} replays parsed")
        i += 1

        if replay.game_id in seen_games:
            continue
        seen_games.add(replay.game_id)

        moves = []
        for position in replay.positions:
            moves.append(position.chosen_action_id)
        games.append(moves)
    return games
예제 #15
0
def sample_surprise(
    environment,
    species,
    batch,
    raw_error_range=None,
    discounted_error_range=None,
    max_state_span=MAX_STATE_SPAN,
    only_terminal=False,
):
    env_module = get_env_module(environment)
    rep_paths = sample_batch_replay_files(environment, species, batch)
    for replay_path in rep_paths:
        agent_replay = AgentReplay.from_path(replay_path, env_module.State)
        surprises = find_surprises(
            agent_replay=agent_replay,
            raw_error_range=raw_error_range,
            discounted_error_range=discounted_error_range,
            max_state_span=max_state_span,
            only_terminal=only_terminal,
        )
        if not surprises:
            continue
        analyze_surprise(agent_replay, replay_path, surprises[0])
        break
예제 #16
0
    species = get_species(species_name)
    agent_class = species.AgentClass
    agent_settings = species.agent_settings(environment,
                                            generation,
                                            play_setting="play")

    # Fix the amount of time per move for bots
    if "move_consideration_time" in agent_settings:
        agent_settings["move_consideration_time"] = consideration_time

    return agent_class, agent_settings


P1_agent_class, p1_agent_settings = setup_agent(p1_bot_name,
                                                consideration_time)
P2_agent_class, p2_agent_settings = setup_agent(p2_bot_name,
                                                consideration_time)

env_module = get_env_module(environment)

environment = env_module.Environment()

agent_1 = P1_agent_class(environment=environment, **p1_agent_settings)
agent_2 = P2_agent_class(environment=environment, **p2_agent_settings)

environment.add_agent(agent_1)
environment.add_agent(agent_2)

environment.setup()
environment.run()
예제 #17
0
def run_worker(args):
    # What is this, Perl??
    (
        environment,
        bot_species,
        batch_num,
        max_positions,
        worker_num,
        num_workers,
    ) = args

    env_class = get_env_module(environment)
    env = env_class.Environment()

    replay_directory = find_batch_directory(environment, bot_species,
                                            batch_num)

    print("Collecting Samples", replay_directory)
    value_meta = []
    value_features = []
    value_labels = []
    policy_meta = []
    policy_features = []
    policy_labels = []
    for position_num, sample in enumerate(
            generate_training_samples(
                replay_directory,
                env_class.State,
                env_class.generate_features,
                env,
                worker_num=worker_num,
                num_workers=num_workers,
            )):
        if position_num >= (max_positions - 1):
            break

        # game_bucket, features, labels (or just label for value)
        meta_info = sample[1]  # [game_bucket, generation, ...]
        if sample[0] == "value":
            value_meta.append(meta_info)
            value_features.append(sample[2])  # [[float, ...]]
            value_labels.append(sample[3])  # [int, ...]
        else:
            policy_meta.append(meta_info)
            policy_features.append(sample[2])
            policy_labels.append(sample[3])  # [[float, ...], ...]

    datasets = [
        ("value_meta", value_meta),
        ("value_features", value_features),
        ("value_labels", value_labels),
        ("policy_meta", policy_meta),
        ("policy_features", policy_features),
        ("policy_labels", policy_labels),
    ]
    for sample_type, data in datasets:
        basename = f"{sample_type}_samples_{worker_num + 1:04d}of{num_workers:04d}.npy"
        parsed_samples_path = f"{replay_directory}/{basename}"
        numpy.save(parsed_samples_path, data)
        print(f"Saved: {parsed_samples_path}")

    return position_num
예제 #18
0
            time.sleep(speed)
            current_position_idx += 1

    def replay_game_from_position(
        self,
        initial_position,
        environment,
        species,
        generation,
        num_turns_to_play=1_000_000,
        agent_setting_overrides=None,
    ):
        # Setup game
        # - inline import needed for circular dep... XXX: fix
        from agent_configuration import configure_agent
        env_module = get_env_module(self.environment_name)
        env = env_module.Environment()

        Agent, agent_settings = configure_agent(environment, species,
                                                generation, "self_play")
        if agent_setting_overrides:
            for k, v in agent_setting_overrides.items():
                agent_settings[k] = v
        agent_1 = Agent(environment=env, **agent_settings)
        agent_2 = Agent(environment=env, **agent_settings)

        env.add_agent(agent_1)
        env.add_agent(agent_2)

        game_state = env.initial_state()