def run_game_worker(args): # :matchup_info ~ [(bot_1_species, bot_1_gen), ...] env_name, matchup_info = args env_module = get_env_module(env_name) environment = env_module.Environment() bot_1_species, bot_1_gen = matchup_info[0] bot_2_species, bot_2_gen = matchup_info[1] sp = get_species(bot_1_species) Agent1 = sp.AgentClass agent_1_settings = sp.agent_settings(env_name, bot_1_gen, play_setting="evaluation") agent_1 = Agent1(environment=environment, **agent_1_settings) sp = get_species(bot_2_species) Agent2 = sp.AgentClass agent_2_settings = sp.agent_settings(env_name, bot_2_gen, play_setting="evaluation") agent_2 = Agent2(environment=environment, **agent_2_settings) environment.add_agent(agent_1) environment.add_agent(agent_2) environment.setup() outcomes = environment.run() return (matchup_info, outcomes)
def run_game_worker(args): # :matchup_info ~ [(bot_1_species, bot_1_generation), ...] environment_name, matchup_info = args env_module = get_env_module(environment_name) environment = env_module.Environment() bot_1_species, bot_1_generation = matchup_info[0] bot_2_species, bot_2_generation = matchup_info[1] Agent1, agent_1_settings = configure_agent( environment_name, bot_1_species, bot_1_generation, play_setting="evaluation", ) agent_1 = Agent1(environment=environment, **agent_1_settings) Agent2, agent_2_settings = configure_agent( environment_name, bot_2_species, bot_2_generation, play_setting="evaluation", ) agent_2 = Agent2(environment=environment, **agent_2_settings) environment.add_agent(agent_1) environment.add_agent(agent_2) outcomes, _ = environment.run() return (matchup_info, outcomes)
def batch_info_worker_task(args): ( environment_name, species_name, batch_num, worker_num, num_workers, ) = args # Go through every replay and sum up stats env_class = get_env_module(environment_name) replay_directory = find_batch_directory(environment_name, species_name, batch_num) ws = WorkerStats() for agent_replay in iter_replay_data( replay_directory, env_class.State, worker_num, num_workers, ): ws.total_mcts_considerations += agent_replay.total_mcts_considerations( ) ws.num_games += 1 ws.num_positions += len(agent_replay.positions) return astuple(ws)
def run_faceoff( environment_name, bot_species, bot_generation, num_rounds, num_workers=1, ): env_class = get_env_module(environment_name) # The bot your testing and the current best bot bots = [] for i in range(bot_generation - 1, bot_generation + 1): Agent, agent_settings = configure_agent(environment_name, bot_species, i, "evaluation") bots.append(Bot( f"{bot_species}-{i}", Agent, agent_settings, )) # Run the faceoff tournament = Tournament.setup( environment=env_class.Environment, bots=bots, ) for i in range(num_rounds): tournament.ladder(num_rounds=1, num_workers=num_workers) # 2 x 3 games each round tournament.display_results() # Return contender matchup contender_entrant = tournament.entrants[bots[-1].name] contender_matchup_info = contender_entrant.matchup_histories[bots[0].name] return contender_matchup_info
def default_models(environment_name): env_module = get_env_module(environment_name) value_model = intuition_model.UnopinionatedValue() if hasattr(env_module, "BootstrapValue"): value_model = env_module.BootstrapValue() policy_model = intuition_model.UniformPolicy() return value_model, policy_model
def replay_video(replay_path, speed, first_n_moves=100_000_000): replay = json.loads(open(replay_path, 'r').read()) environment_name = replay["name"] moves = [] for move_info in replay["replay"]: moves.append(move_info["move"]) env_module = get_env_module(environment_name) environment = env_module.Environment() current_state = environment.initial_state() os.system("clear") for i, move in enumerate(moves): if i >= first_n_moves: break # Show state to_display = environment.text_display(current_state) os.system("clear") rprint(to_display) time.sleep(speed) if move is None: break current_state = environment.transition_state(current_state, move)
def gbdt_configuration( environment_name, species, generation, play_setting="self_play", ): ''' :play_setting ~ {"self_play", "evaluation"} ''' env_module = get_env_module(environment_name) value_model, policy_model = default_models(environment_name) if generation > 1: value_model_path, policy_model_path = build_model_paths( environment_name, species, generation, ) value_model = intuition_model.GBDTValue() value_model.load(value_model_path) policy_model = intuition_model.GBDTPolicy() policy_model.load(policy_model_path) move_consideration_time = 3.0 temperature = 0.0 full_search_proportion = 1.0 full_search_steps = 800 partial_search_steps = full_search_steps // 5 # Play-setting dependent # - XXX Add Puct params lower_bound_time = 0.01 if play_setting == "self_play": temperature = .3 # XXX Make a temp profile per move move_consideration_time = lower_bound_time elif play_setting == "evaluation": move_consideration_time = lower_bound_time return dict( species=species, generation=generation, game_tree=None, current_node=None, feature_extractor=env_module.generate_features, value_model=value_model, policy_model=policy_model, move_consideration_time=move_consideration_time, # at least N seconds puct_explore_factor=1.0, puct_noise_alpha=0.4, puct_noise_influence=0.25, full_search_proportion=full_search_proportion, full_search_steps=full_search_steps, partial_search_steps=partial_search_steps, temperature=temperature, )
def narrate_surprise(surprise, agent_replay): ''' - Display surprise info - Starting from initial position, display every game state up to final state. ''' env_module = get_env_module(agent_replay.environment_name) env = env_module.Environment() final_expectation = agent_replay.positions[ surprise.final_position_index].expectation() initial_position_index = surprise.initial_position_index surprise_positions = agent_replay.positions[initial_position_index:surprise .final_position_index + 1] # Make position, error table rprint(f"\n{'POSITION':<15}{'EXPECTATION':<15}{'ERROR':<15}") for position in surprise_positions: is_my_move = agent_replay.agent_settings.agent_num == position.state.whose_move if not is_my_move: continue expectation = round(position.expectation(), 2) error_from_here = round(abs(final_expectation - expectation), 2) rprint(f"{position.index:<15}{expectation:<15}{error_from_here:<15}") for position in surprise_positions: player_num = position.state.whose_move + 1 is_my_move = agent_replay.agent_settings.agent_num == position.state.whose_move position_progress = f"{position.index} / {surprise.final_position_index}" rprint(f"\n\n===== POSITION {position_progress}, PLAYER", player_num, "MOVING =====") current_state = position.state rprint(env.text_display(current_state)) rprint("State Value:", round(position.value, 3)) if is_my_move: display_best_moves(env, list(position.actions_considered.values())) action_id = position.chosen_action_id human_move = env.action_name_by_id.get(action_id) rprint( f"\nPlayer {player_num} chose [bold green]{human_move}[/bold green] ({action_id})" ) if is_my_move: expectation = position.expectation() expectation = round(expectation, 2) error_from_here = round(abs(final_expectation - expectation), 3) rprint( f"Player {player_num} expects {expectation}, error: {error_from_here}" ) else: expectation = None
def agent_settings(self, environment, generation, play_setting): env_module = get_env_module(environment) # Setup value/policy models if generation == 1: value_model = intuition_model.UnopinionatedValue() if hasattr(env_module, "BootstrapValue"): value_model = env_module.BootstrapValue() policy_model = intuition_model.UniformPolicy() else: value_model_path, policy_model_path = build_model_paths( environment, self.name, generation, ) value_model = intuition_model.GBDTValue() value_model.load(value_model_path) policy_model = intuition_model.GBDTPolicy() policy_model.load(policy_model_path) # Settings move_consideration_time = 3.0 temperature = 0.0 full_search_proportion = 1.0 full_search_steps = 800 partial_search_steps = full_search_steps // 5 # Play-setting dependent # - XXX Add Puct params lower_bound_time = 0.01 if play_setting == "self_play": temperature = .3 # XXX Make a temp profile per move move_consideration_time = lower_bound_time elif play_setting == "evaluation": move_consideration_time = lower_bound_time return dict( species=self.name, generation=generation, game_tree=None, current_node=None, feature_extractor=env_module.generate_features, value_model=value_model, policy_model=policy_model, move_consideration_time= move_consideration_time, # at least N seconds puct_explore_factor=1.0, puct_noise_alpha=0.4, puct_noise_influence=0.25, full_search_proportion=full_search_proportion, full_search_steps=full_search_steps, partial_search_steps=partial_search_steps, temperature=temperature, )
def self_play_cycle( environment_name, Agent, agent_settings, replay_directory, ): env_module = get_env_module(environment_name) # Play a full game agent_replays = play_game( env_module, Agent, agent_settings, replay_directory=replay_directory, ) # Replay more games from certain positions (if enabled) if not agent_settings.get("revisit_violated_expectations", False): return # Setup revisit settings # XXX: Tune # agent_settings["full_search_proportion"] = 1.0 # agent_settings["temperature"] = 1.0 num_revisits = 10 raw_error_range = [-2.0, -0.50] upstream_turns = 1 # Run revisits for agent_replay in agent_replays: # Get the position with a highest expectation violation, above a certain # threshold. surprises = find_surprises( agent_replay=agent_replay, raw_error_range=raw_error_range, ) if not surprises: continue # Play :num_revisits games from a few turns upstream of that position. initial_index = max( surprises[0].initial_position_index - upstream_turns, 0) reconstruction_info = (agent_replay, agent_replay.positions[initial_index]) for _ in range(num_revisits): play_game( env_module, Agent, agent_settings, replay_directory=replay_directory, reconstruction_info=reconstruction_info, )
def play_cli_video( self, initial_position_index, final_position_index, speed=0.3, ): env_module = get_env_module(self.environment_name) environment = env_module.Environment() current_state = environment.initial_state() current_position_idx = initial_position_index while current_position_idx <= final_position_index: current_state = self.positions[current_position_idx].state os.system("clear") print("Game", self.game_id) rprint(environment.text_display(current_state)) time.sleep(speed) current_position_idx += 1
def run_generation_ladder( environment_name, species_list, # [(species, low_gen, high_gen), ...] num_workers=1, entrants_per_species=7, ): bots = [] for species, lowest_generation, highest_generation in species_list: num_entrants = entrants_per_species generations = [ int(round(x)) for x in numpy.linspace( lowest_generation, highest_generation, num_entrants) ] generations = list(set(generations)) for i in generations: sp = get_species(species) Agent = sp.AgentClass agent_settings = sp.agent_settings(environment_name, i, play_setting="evaluation") print(f"Adding bot {species}-{i} to tourney") bots.append(Bot( f"{species}-{i}", Agent, agent_settings, )) species_str = [] for species, lg, hg in species_list: species_str.append(f"{species}-{lg}-{hg}") species_str.sort() species_str = "__".join(species_str) tournament_key = f"{round(time.time())}-{species_str}" results_path = build_tournament_results_path(tournament_key) env_class = get_env_module(environment_name) tournament = Tournament.setup( environment=env_class.Environment, bots=bots, ) for i in range(300): tournament.ladder(num_rounds=1, num_workers=num_workers) tournament.display_results() print(f"\nTournament id: {tournament_key}") tournament.save_results(results_path)
def play_game( environment_name, Agent, agent_settings, replay_directory=None, ): env_module = get_env_module(environment_name) environment = env_module.Environment() mcts_agent_1 = Agent(environment=environment, **agent_settings) mcts_agent_2 = Agent(environment=environment, **agent_settings) # Play environment.add_agent(mcts_agent_1) environment.add_agent(mcts_agent_2) _, was_early_stopped = environment.run() mcts_agent_1.record_replay(replay_directory, was_early_stopped) mcts_agent_2.record_replay(replay_directory, was_early_stopped)
def retrieve_move_lists(environment, species, batch_num): env_module = get_env_module(environment) replay_directory = find_batch_directory(environment, species, batch_num) print("\nGetting move lists") games = [] i = 1 seen_games = set() for replay in iter_replay_data(replay_directory, env_module.State): if i % 500 == 0: print(f"{i} replays parsed") i += 1 if replay.game_id in seen_games: continue seen_games.add(replay.game_id) moves = [] for position in replay.positions: moves.append(position.chosen_action_id) games.append(moves) return games
def sample_surprise( environment, species, batch, raw_error_range=None, discounted_error_range=None, max_state_span=MAX_STATE_SPAN, only_terminal=False, ): env_module = get_env_module(environment) rep_paths = sample_batch_replay_files(environment, species, batch) for replay_path in rep_paths: agent_replay = AgentReplay.from_path(replay_path, env_module.State) surprises = find_surprises( agent_replay=agent_replay, raw_error_range=raw_error_range, discounted_error_range=discounted_error_range, max_state_span=max_state_span, only_terminal=only_terminal, ) if not surprises: continue analyze_surprise(agent_replay, replay_path, surprises[0]) break
species = get_species(species_name) agent_class = species.AgentClass agent_settings = species.agent_settings(environment, generation, play_setting="play") # Fix the amount of time per move for bots if "move_consideration_time" in agent_settings: agent_settings["move_consideration_time"] = consideration_time return agent_class, agent_settings P1_agent_class, p1_agent_settings = setup_agent(p1_bot_name, consideration_time) P2_agent_class, p2_agent_settings = setup_agent(p2_bot_name, consideration_time) env_module = get_env_module(environment) environment = env_module.Environment() agent_1 = P1_agent_class(environment=environment, **p1_agent_settings) agent_2 = P2_agent_class(environment=environment, **p2_agent_settings) environment.add_agent(agent_1) environment.add_agent(agent_2) environment.setup() environment.run()
def run_worker(args): # What is this, Perl?? ( environment, bot_species, batch_num, max_positions, worker_num, num_workers, ) = args env_class = get_env_module(environment) env = env_class.Environment() replay_directory = find_batch_directory(environment, bot_species, batch_num) print("Collecting Samples", replay_directory) value_meta = [] value_features = [] value_labels = [] policy_meta = [] policy_features = [] policy_labels = [] for position_num, sample in enumerate( generate_training_samples( replay_directory, env_class.State, env_class.generate_features, env, worker_num=worker_num, num_workers=num_workers, )): if position_num >= (max_positions - 1): break # game_bucket, features, labels (or just label for value) meta_info = sample[1] # [game_bucket, generation, ...] if sample[0] == "value": value_meta.append(meta_info) value_features.append(sample[2]) # [[float, ...]] value_labels.append(sample[3]) # [int, ...] else: policy_meta.append(meta_info) policy_features.append(sample[2]) policy_labels.append(sample[3]) # [[float, ...], ...] datasets = [ ("value_meta", value_meta), ("value_features", value_features), ("value_labels", value_labels), ("policy_meta", policy_meta), ("policy_features", policy_features), ("policy_labels", policy_labels), ] for sample_type, data in datasets: basename = f"{sample_type}_samples_{worker_num + 1:04d}of{num_workers:04d}.npy" parsed_samples_path = f"{replay_directory}/{basename}" numpy.save(parsed_samples_path, data) print(f"Saved: {parsed_samples_path}") return position_num
time.sleep(speed) current_position_idx += 1 def replay_game_from_position( self, initial_position, environment, species, generation, num_turns_to_play=1_000_000, agent_setting_overrides=None, ): # Setup game # - inline import needed for circular dep... XXX: fix from agent_configuration import configure_agent env_module = get_env_module(self.environment_name) env = env_module.Environment() Agent, agent_settings = configure_agent(environment, species, generation, "self_play") if agent_setting_overrides: for k, v in agent_setting_overrides.items(): agent_settings[k] = v agent_1 = Agent(environment=env, **agent_settings) agent_2 = Agent(environment=env, **agent_settings) env.add_agent(agent_1) env.add_agent(agent_2) game_state = env.initial_state()