def run_game_worker(args): # :matchup_info ~ [(bot_1_species, bot_1_gen), ...] env_name, matchup_info = args env_module = get_env_module(env_name) environment = env_module.Environment() bot_1_species, bot_1_gen = matchup_info[0] bot_2_species, bot_2_gen = matchup_info[1] sp = get_species(bot_1_species) Agent1 = sp.AgentClass agent_1_settings = sp.agent_settings(env_name, bot_1_gen, play_setting="evaluation") agent_1 = Agent1(environment=environment, **agent_1_settings) sp = get_species(bot_2_species) Agent2 = sp.AgentClass agent_2_settings = sp.agent_settings(env_name, bot_2_gen, play_setting="evaluation") agent_2 = Agent2(environment=environment, **agent_2_settings) environment.add_agent(agent_1) environment.add_agent(agent_2) environment.setup() outcomes = environment.run() return (matchup_info, outcomes)
def run_worker(args): environment, species, generation, num_games, batch = args sp = get_species(species) Agent = sp.AgentClass agent_settings = sp.agent_settings(environment, generation, play_setting="self_play") replay_directory = build_replay_directory(environment, species, generation, batch) print( f"Self playing, bot: {species}-{generation}, batch: {replay_directory}" ) total_elapsed = 0.0 for i in range(num_games): st_time = time.time() try: self_play_cycle(environment, Agent, agent_settings, replay_directory) # play_game(environment, Agent, agent_settings, replay_directory) except Exception as e: print("GAME FAILED:", e) traceback.print_exc() elapsed = time.time() - st_time total_elapsed += elapsed if i % 10 == 0: print( f"GAME {i:05d}: {round(elapsed, 2)} seconds, AVERAGE: {round(total_elapsed / (i + 1), 2)} seconds" ) return batch, num_games
def setup_agent(bot_name, consideration_time): species_name, generation = bot_name.split("-") generation = int(generation) species = get_species(species_name) agent_class = species.AgentClass agent_settings = species.agent_settings(environment, generation, play_setting="play") # Fix the amount of time per move for bots if "move_consideration_time" in agent_settings: agent_settings["move_consideration_time"] = consideration_time return agent_class, agent_settings
def run_generation_ladder( environment_name, species_list, # [(species, low_gen, high_gen), ...] num_workers=1, entrants_per_species=7, ): bots = [] for species, lowest_generation, highest_generation in species_list: num_entrants = entrants_per_species generations = [ int(round(x)) for x in numpy.linspace( lowest_generation, highest_generation, num_entrants) ] generations = list(set(generations)) for i in generations: sp = get_species(species) Agent = sp.AgentClass agent_settings = sp.agent_settings(environment_name, i, play_setting="evaluation") print(f"Adding bot {species}-{i} to tourney") bots.append(Bot( f"{species}-{i}", Agent, agent_settings, )) species_str = [] for species, lg, hg in species_list: species_str.append(f"{species}-{lg}-{hg}") species_str.sort() species_str = "__".join(species_str) tournament_key = f"{round(time.time())}-{species_str}" results_path = build_tournament_results_path(tournament_key) env_class = get_env_module(environment_name) tournament = Tournament.setup( environment=env_class.Environment, bots=bots, ) for i in range(300): tournament.ladder(num_rounds=1, num_workers=num_workers) tournament.display_results() print(f"\nTournament id: {tournament_key}") tournament.save_results(results_path)
def run_faceoff( environment_name, species, generation, num_rounds, num_workers=1, ): env_class = get_env_module(environment_name) # The bot your testing and the current best bot bots = [] for i in range(generation - 1, generation + 1): sp = get_species(species) Agent = sp.AgentClass agent_settings = sp.agent_settings(environment_name, i, play_setting="evaluation") bots.append(Bot( f"{species}-{i}", Agent, agent_settings, )) # Run the faceoff tournament = Tournament.setup( environment=env_class.Environment, bots=bots, ) for i in range(num_rounds): tournament.ladder(num_rounds=1, num_workers=num_workers) # 2 x 3 games each round tournament.display_results() # Return contender matchup contender_entrant = tournament.entrants[bots[-1].name] contender_matchup_info = contender_entrant.matchup_histories[bots[0].name] return contender_matchup_info
def run( environment, species_name, num_batches, num_workers=settings.SELF_PLAY_THREADS, adjusted_win_rate_threshold=0.50, num_assessment_games=200, ): num_faceoff_rounds = math.ceil( num_assessment_games / num_workers) # Will play at least num_workers per round training_info = TrainingInfo.load(environment, species_name) final_training_batch = len(training_info.batches) + num_batches for _ in range(num_batches): current_batch = len(training_info.batches) + 1 generation_self_play = training_info.current_self_play_generation() generation_training = generation_self_play + 1 species = get_species(species_name) print(f"\n\nBatch {current_batch} / {final_training_batch}") print(f"environment: {environment}, species: {species_name}") print(f"self-play generation: {generation_self_play}") # Ensure directories are made/etc. # - Not sure this actually depends on generation, but maybe it will later. setup_filesystem( environment, species_name, generation_self_play, ) # Self play another batch games_per_batch = species.self_play_settings( environment, generation_self_play)["num_games"] print(f"\n\nSelf Play ({games_per_batch} cycles)") self_play_start_time = time.time() run_self_play( environment, species_name, generation_self_play, games_per_batch, current_batch, num_workers, ) self_play_end_time = time.time() elapsed = round(self_play_end_time - self_play_start_time, 1) cycles_per_second = round(games_per_batch / elapsed, 1) print(f"\nSelf play finished in {elapsed} seconds") print( f"Cycles ran: {games_per_batch}, cycles per sec: {cycles_per_second}" ) # Train new model print("\n\nTraining") training_start_time = time.time() run_model_training( environment, species_name, generation_training, current_batch, num_workers, ) training_end_time = time.time() elapsed = round(training_end_time - training_start_time, 1) print(f"\nTrained new models in {elapsed} seconds") # Assess new model print("\n\nAssessing") assessment_start_time = time.time() contender_matchup_info = run_faceoff( environment, species_name, generation_training, num_rounds=num_faceoff_rounds, num_workers=num_workers, ) assessment_end_time = time.time() elapsed = round(assessment_end_time - assessment_start_time, 1) print(f"\nAssessed new model in {elapsed} seconds") adjusted_win_rate = contender_matchup_info.win_rate(draw_weight=0.5) print("Adjusted Win Rate:", round(adjusted_win_rate, 3)) generation_trained = None if adjusted_win_rate >= adjusted_win_rate_threshold: generation_trained = generation_training print("FOUND NEW BOT:", generation_trained) training_info.finalize_batch( self_play_start_time=self_play_start_time, self_play_end_time=self_play_end_time, training_start_time=training_start_time, training_end_time=training_end_time, assessment_start_time=assessment_start_time, assessment_end_time=assessment_end_time, generation_self_play=generation_self_play, generation_trained=generation_trained, assessed_awr=adjusted_win_rate, )
def replay_game_from_position( self, initial_position, environment, species, generation, num_turns_to_play=1_000_000, agent_setting_overrides=None, ): # Setup game # - inline import needed for circular dep... XXX: fix from species import get_species env_module = get_env_module(self.environment_name) env = env_module.Environment() sp = get_species(species) Agent = sp.AgentClass agent_settings = sp.agent_settings(environment, generation, play_setting="self_play") if agent_setting_overrides: for k, v in agent_setting_overrides.items(): agent_settings[k] = v agent_1 = Agent(environment=env, **agent_settings) agent_2 = Agent(environment=env, **agent_settings) env.add_agent(agent_1) env.add_agent(agent_2) game_state = env.initial_state() # Let agents do any setup for agent in env.agents:
species, batch_num=batch_num, num_workers=num_workers, positions_per_batch=positions_per_batch, ) #################### # Train Models #################### value_model_path, policy_model_path = build_model_paths( environment, species, generation, ) ts = get_species(species).training_settings(environment, generation) value_model = ts["ValueModel"](**ts["value_model_settings"]) policy_model = ts["PolicyModel"](**ts["policy_model_settings"]) model_directory = build_model_directory(environment, species, generation) model_settings = [ ("value", value_model, value_model_path), ("policy", policy_model, policy_model_path), ] for model_type, model, model_path in model_settings: game_samples = load_game_samples( environment, species, batches=batch_nums, model_type=model_type, )