def batch_surprise_replays(environment, species, batch): rep_paths = sample_batch_replay_files(environment, species, batch) collected_surprises = [] for rep in rep_paths: print("finding good surprise replays") ar = AgentReplay.from_path(rep) # Get best surprises for a game ar.annotate_surprises() ar.surprises.sort(key=lambda x: x.score, reverse=True) surprises = list(ar.iter_surprises(only_terminal=False)) if not surprises: # Some have no terminal? print("No terminal?") continue best_surprise = surprises[0] if best_surprise.score < 0.4: continue collected_surprises.append(ar) if len(collected_surprises) >= 20: break # Play replays of surprise spans while True: for rep in collected_surprises: best_surprise = rep.surprises[0] rep.cli_play( best_surprise.initial_position_index, best_surprise.final_position_index, )
def hindsight_convergence( environment, species, batch, ): # Gather all the surprises rep_paths = sample_batch_replay_files(environment, species, batch) surprises = [] orig_divs = [] orig_maps = [] hind_divs = [] hind_maps = [] for i, rep in enumerate(rep_paths): ar = AgentReplay.from_path(rep) ar.annotate_surprises(min_score=.2, max_state_span=MAX_STATE_SPAN) for surprise in ar.iter_surprises( error_range=ERROR_RANGE, only_terminal=False, ): surprises.append(surprise) policy_overrides = ar.surprise_policy_overrides( surprise, only_opponents=True) orig_div, orig_map, hind_div, hind_map = hindsight_comparison( ar, species, 2, 14, surprise.initial_position_index, policy_overrides, ) orig_divs.append(orig_div) orig_maps.append(orig_map) hind_divs.append(hind_div) hind_maps.append(hind_map) # Only take highest surprise break rprint("\n\n[bold green]Original[/bold green]") rprint("\nDivergence:") describe_sample(orig_divs) rprint("\nMAP@1:") describe_sample(orig_maps) rprint("\n[bold green]Hindsight[/bold green]") rprint("\nDivergence:") describe_sample(hind_divs) rprint("\nMAP@1:") describe_sample(hind_maps)
def iter_replay_data( replay_directory, StateClass, worker_num=0, num_workers=1, ): for file_name in os.listdir(replay_directory): if not file_name.endswith(".json"): continue file_path = os.path.join(replay_directory, file_name) if not is_my_task(file_path, worker_num, num_workers): continue try: agent_replay = AgentReplay.from_path(file_path, StateClass) except Exception as e: print(f"Exception JSON decoding Replay: {file_name}", e) continue yield agent_replay
def replay_from_surprise(environment, bot, replay_id, position_span): ar = AgentReplay.from_path(replay_path) ar.annotate_surprises(min_score=MIN_SCORE, max_state_span=MAX_STATE_SPAN) for surprise in ar.iter_surprises(error_range=ERROR_RANGE, ): best_surprise = surprise break species, generation = bot.split("-") generation = int(generation) initial_position = best_surprise.initial_position_index policy_overrides = ar.surprise_policy_overrides(best_surprise) pprint.pprint(policy_overrides) ar.replay_game_from_position( initial_position, species, generation, # original bot 3, { # "move_consideration_time": 0.2, "policy_overrides": policy_overrides, })
def sample_surprise( environment, species, batch, raw_error_range=None, discounted_error_range=None, max_state_span=MAX_STATE_SPAN, only_terminal=False, ): env_module = get_env_module(environment) rep_paths = sample_batch_replay_files(environment, species, batch) for replay_path in rep_paths: agent_replay = AgentReplay.from_path(replay_path, env_module.State) surprises = find_surprises( agent_replay=agent_replay, raw_error_range=raw_error_range, discounted_error_range=discounted_error_range, max_state_span=max_state_span, only_terminal=only_terminal, ) if not surprises: continue analyze_surprise(agent_replay, replay_path, surprises[0]) break
elif sys.argv[1] == "replay_game": # Replay a game from a replay position # connect_four gbdt-11 aa8fd68f3668-0 26-28 environment, bot, replay_id, position_span, hindsight, num_moves = sys.argv[ 2:] species, generation = bot.split("-") generation = int(generation) ipos, fpos = position_span.split("-") position_span = [int(ipos), int(fpos)] hindsight = True if hindsight == "hind" else False num_moves = int(num_moves) replay_path = AgentReplay.find_path(environment, replay_id) ar = AgentReplay.from_path(replay_path) ar.annotate_surprises() policy_overrides = [None] * len(ar.agent_nums) if hindsight: surprise = ar.find_surprise(position_span=position_span) policy_overrides = ar.surprise_policy_overrides(surprise) ar.replay_game_from_position( position_span[0], species, generation, # original bot num_moves, # num moves { "full_search_steps": 800, # "move_consideration_time": 0.2,