Beispiel #1
0
def run_game_worker(args):
    # :matchup_info ~ [(bot_1_species, bot_1_gen), ...]
    env_name, matchup_info = args

    env_module = get_env_module(env_name)
    environment = env_module.Environment()

    bot_1_species, bot_1_gen = matchup_info[0]
    bot_2_species, bot_2_gen = matchup_info[1]

    sp = get_species(bot_1_species)
    Agent1 = sp.AgentClass
    agent_1_settings = sp.agent_settings(env_name,
                                         bot_1_gen,
                                         play_setting="evaluation")
    agent_1 = Agent1(environment=environment, **agent_1_settings)

    sp = get_species(bot_2_species)
    Agent2 = sp.AgentClass
    agent_2_settings = sp.agent_settings(env_name,
                                         bot_2_gen,
                                         play_setting="evaluation")
    agent_2 = Agent2(environment=environment, **agent_2_settings)

    environment.add_agent(agent_1)
    environment.add_agent(agent_2)

    environment.setup()
    outcomes = environment.run()

    return (matchup_info, outcomes)
Beispiel #2
0
def run_worker(args):
    environment, species, generation, num_games, batch = args

    sp = get_species(species)
    Agent = sp.AgentClass
    agent_settings = sp.agent_settings(environment,
                                       generation,
                                       play_setting="self_play")

    replay_directory = build_replay_directory(environment, species, generation,
                                              batch)
    print(
        f"Self playing, bot: {species}-{generation}, batch: {replay_directory}"
    )

    total_elapsed = 0.0
    for i in range(num_games):
        st_time = time.time()
        try:
            self_play_cycle(environment, Agent, agent_settings,
                            replay_directory)
            # play_game(environment, Agent, agent_settings, replay_directory)
        except Exception as e:
            print("GAME FAILED:", e)
            traceback.print_exc()
        elapsed = time.time() - st_time
        total_elapsed += elapsed
        if i % 10 == 0:
            print(
                f"GAME {i:05d}: {round(elapsed, 2)} seconds, AVERAGE: {round(total_elapsed / (i + 1), 2)} seconds"
            )

    return batch, num_games
Beispiel #3
0
def setup_agent(bot_name, consideration_time):
    species_name, generation = bot_name.split("-")
    generation = int(generation)

    species = get_species(species_name)
    agent_class = species.AgentClass
    agent_settings = species.agent_settings(environment,
                                            generation,
                                            play_setting="play")

    # Fix the amount of time per move for bots
    if "move_consideration_time" in agent_settings:
        agent_settings["move_consideration_time"] = consideration_time

    return agent_class, agent_settings
def run_generation_ladder(
    environment_name,
    species_list,  # [(species, low_gen, high_gen), ...]
    num_workers=1,
    entrants_per_species=7,
):
    bots = []
    for species, lowest_generation, highest_generation in species_list:
        num_entrants = entrants_per_species
        generations = [
            int(round(x)) for x in numpy.linspace(
                lowest_generation, highest_generation, num_entrants)
        ]
        generations = list(set(generations))
        for i in generations:
            sp = get_species(species)
            Agent = sp.AgentClass
            agent_settings = sp.agent_settings(environment_name,
                                               i,
                                               play_setting="evaluation")
            print(f"Adding bot {species}-{i} to tourney")
            bots.append(Bot(
                f"{species}-{i}",
                Agent,
                agent_settings,
            ))

    species_str = []
    for species, lg, hg in species_list:
        species_str.append(f"{species}-{lg}-{hg}")
    species_str.sort()
    species_str = "__".join(species_str)
    tournament_key = f"{round(time.time())}-{species_str}"
    results_path = build_tournament_results_path(tournament_key)

    env_class = get_env_module(environment_name)
    tournament = Tournament.setup(
        environment=env_class.Environment,
        bots=bots,
    )
    for i in range(300):
        tournament.ladder(num_rounds=1, num_workers=num_workers)
        tournament.display_results()
        print(f"\nTournament id: {tournament_key}")
        tournament.save_results(results_path)
def run_faceoff(
    environment_name,
    species,
    generation,
    num_rounds,
    num_workers=1,
):
    env_class = get_env_module(environment_name)

    # The bot your testing and the current best bot
    bots = []
    for i in range(generation - 1, generation + 1):
        sp = get_species(species)
        Agent = sp.AgentClass
        agent_settings = sp.agent_settings(environment_name,
                                           i,
                                           play_setting="evaluation")
        bots.append(Bot(
            f"{species}-{i}",
            Agent,
            agent_settings,
        ))

    # Run the faceoff
    tournament = Tournament.setup(
        environment=env_class.Environment,
        bots=bots,
    )
    for i in range(num_rounds):
        tournament.ladder(num_rounds=1,
                          num_workers=num_workers)  # 2 x 3 games each round
        tournament.display_results()

    # Return contender matchup
    contender_entrant = tournament.entrants[bots[-1].name]
    contender_matchup_info = contender_entrant.matchup_histories[bots[0].name]
    return contender_matchup_info
Beispiel #6
0
def run(
    environment,
    species_name,
    num_batches,
    num_workers=settings.SELF_PLAY_THREADS,
    adjusted_win_rate_threshold=0.50,
    num_assessment_games=200,
):
    num_faceoff_rounds = math.ceil(
        num_assessment_games /
        num_workers)  # Will play at least num_workers per round

    training_info = TrainingInfo.load(environment, species_name)
    final_training_batch = len(training_info.batches) + num_batches
    for _ in range(num_batches):
        current_batch = len(training_info.batches) + 1
        generation_self_play = training_info.current_self_play_generation()
        generation_training = generation_self_play + 1

        species = get_species(species_name)

        print(f"\n\nBatch {current_batch} / {final_training_batch}")
        print(f"environment: {environment}, species: {species_name}")
        print(f"self-play generation: {generation_self_play}")

        # Ensure directories are made/etc.
        # - Not sure this actually depends on generation, but maybe it will later.
        setup_filesystem(
            environment,
            species_name,
            generation_self_play,
        )

        # Self play another batch
        games_per_batch = species.self_play_settings(
            environment, generation_self_play)["num_games"]
        print(f"\n\nSelf Play ({games_per_batch} cycles)")
        self_play_start_time = time.time()
        run_self_play(
            environment,
            species_name,
            generation_self_play,
            games_per_batch,
            current_batch,
            num_workers,
        )
        self_play_end_time = time.time()
        elapsed = round(self_play_end_time - self_play_start_time, 1)
        cycles_per_second = round(games_per_batch / elapsed, 1)
        print(f"\nSelf play finished in {elapsed} seconds")
        print(
            f"Cycles ran: {games_per_batch}, cycles per sec: {cycles_per_second}"
        )

        # Train new model
        print("\n\nTraining")
        training_start_time = time.time()
        run_model_training(
            environment,
            species_name,
            generation_training,
            current_batch,
            num_workers,
        )
        training_end_time = time.time()
        elapsed = round(training_end_time - training_start_time, 1)
        print(f"\nTrained new models in {elapsed} seconds")

        # Assess new model
        print("\n\nAssessing")
        assessment_start_time = time.time()
        contender_matchup_info = run_faceoff(
            environment,
            species_name,
            generation_training,
            num_rounds=num_faceoff_rounds,
            num_workers=num_workers,
        )
        assessment_end_time = time.time()
        elapsed = round(assessment_end_time - assessment_start_time, 1)
        print(f"\nAssessed new model in {elapsed} seconds")

        adjusted_win_rate = contender_matchup_info.win_rate(draw_weight=0.5)
        print("Adjusted Win Rate:", round(adjusted_win_rate, 3))
        generation_trained = None
        if adjusted_win_rate >= adjusted_win_rate_threshold:
            generation_trained = generation_training
            print("FOUND NEW BOT:", generation_trained)

        training_info.finalize_batch(
            self_play_start_time=self_play_start_time,
            self_play_end_time=self_play_end_time,
            training_start_time=training_start_time,
            training_end_time=training_end_time,
            assessment_start_time=assessment_start_time,
            assessment_end_time=assessment_end_time,
            generation_self_play=generation_self_play,
            generation_trained=generation_trained,
            assessed_awr=adjusted_win_rate,
        )
    def replay_game_from_position(
        self,
        initial_position,
        environment,
        species,
        generation,
        num_turns_to_play=1_000_000,
        agent_setting_overrides=None,
    ):
        # Setup game
        # - inline import needed for circular dep... XXX: fix
        from species import get_species
        env_module = get_env_module(self.environment_name)
        env = env_module.Environment()

        sp = get_species(species)
        Agent = sp.AgentClass
        agent_settings = sp.agent_settings(environment, generation, play_setting="self_play")
        if agent_setting_overrides:
            for k, v in agent_setting_overrides.items():
                agent_settings[k] = v
        agent_1 = Agent(environment=env, **agent_settings)
        agent_2 = Agent(environment=env, **agent_settings)

        env.add_agent(agent_1)
        env.add_agent(agent_2)

        game_state = env.initial_state()

        # Let agents do any setup
        for agent in env.agents:
Beispiel #8
0
            species,
            batch_num=batch_num,
            num_workers=num_workers,
            positions_per_batch=positions_per_batch,
        )

    ####################
    # Train Models
    ####################
    value_model_path, policy_model_path = build_model_paths(
        environment,
        species,
        generation,
    )

    ts = get_species(species).training_settings(environment, generation)
    value_model = ts["ValueModel"](**ts["value_model_settings"])
    policy_model = ts["PolicyModel"](**ts["policy_model_settings"])

    model_directory = build_model_directory(environment, species, generation)
    model_settings = [
        ("value", value_model, value_model_path),
        ("policy", policy_model, policy_model_path),
    ]
    for model_type, model, model_path in model_settings:
        game_samples = load_game_samples(
            environment,
            species,
            batches=batch_nums,
            model_type=model_type,
        )