Exemple #1
0
def batch_surprise_replays(environment, species, batch):
    rep_paths = sample_batch_replay_files(environment, species, batch)

    collected_surprises = []
    for rep in rep_paths:
        print("finding good surprise replays")
        ar = AgentReplay.from_path(rep)

        # Get best surprises for a game
        ar.annotate_surprises()
        ar.surprises.sort(key=lambda x: x.score, reverse=True)
        surprises = list(ar.iter_surprises(only_terminal=False))
        if not surprises:
            # Some have no terminal?
            print("No terminal?")
            continue

        best_surprise = surprises[0]
        if best_surprise.score < 0.4:
            continue
        collected_surprises.append(ar)
        if len(collected_surprises) >= 20:
            break

    # Play replays of surprise spans
    while True:
        for rep in collected_surprises:
            best_surprise = rep.surprises[0]
            rep.cli_play(
                best_surprise.initial_position_index,
                best_surprise.final_position_index,
            )
Exemple #2
0
def hindsight_convergence(
    environment,
    species,
    batch,
):
    # Gather all the surprises
    rep_paths = sample_batch_replay_files(environment, species, batch)
    surprises = []
    orig_divs = []
    orig_maps = []

    hind_divs = []
    hind_maps = []
    for i, rep in enumerate(rep_paths):
        ar = AgentReplay.from_path(rep)
        ar.annotate_surprises(min_score=.2, max_state_span=MAX_STATE_SPAN)
        for surprise in ar.iter_surprises(
                error_range=ERROR_RANGE,
                only_terminal=False,
        ):
            surprises.append(surprise)
            policy_overrides = ar.surprise_policy_overrides(
                surprise, only_opponents=True)
            orig_div, orig_map, hind_div, hind_map = hindsight_comparison(
                ar,
                species,
                2,
                14,
                surprise.initial_position_index,
                policy_overrides,
            )
            orig_divs.append(orig_div)
            orig_maps.append(orig_map)

            hind_divs.append(hind_div)
            hind_maps.append(hind_map)

            # Only take highest surprise
            break

    rprint("\n\n[bold green]Original[/bold green]")
    rprint("\nDivergence:")
    describe_sample(orig_divs)
    rprint("\nMAP@1:")
    describe_sample(orig_maps)

    rprint("\n[bold green]Hindsight[/bold green]")
    rprint("\nDivergence:")
    describe_sample(hind_divs)
    rprint("\nMAP@1:")
    describe_sample(hind_maps)
Exemple #3
0
    def record_replay(self, output_dir):
        replay = AgentReplay.from_agent(self)

        # Write replay
        # - mkdir -p replay path if it doesn't exist.
        game_id = self.environment.id
        agent_num = self.agent_num
        output_path = f"{output_dir}/{game_id}-{agent_num}.json"
        full_path_mkdir_p(output_path)
        with open(output_path, 'w') as fout:
            fout.write(json.dumps(replay.marshall()))
        if settings.VERBOSITY >= 2:
            print("Saved replay:", output_path)
        return output_path, replay
Exemple #4
0
def iter_replay_data(
    replay_directory,
    StateClass,
    worker_num=0,
    num_workers=1,
):
    for file_name in os.listdir(replay_directory):
        if not file_name.endswith(".json"):
            continue
        file_path = os.path.join(replay_directory, file_name)

        if not is_my_task(file_path, worker_num, num_workers):
            continue

        try:
            agent_replay = AgentReplay.from_path(file_path, StateClass)
        except Exception as e:
            print(f"Exception JSON decoding Replay: {file_name}", e)
            continue
        yield agent_replay
Exemple #5
0
def replay_from_surprise(environment, bot, replay_id, position_span):
    ar = AgentReplay.from_path(replay_path)
    ar.annotate_surprises(min_score=MIN_SCORE, max_state_span=MAX_STATE_SPAN)
    for surprise in ar.iter_surprises(error_range=ERROR_RANGE, ):
        best_surprise = surprise
        break

    species, generation = bot.split("-")
    generation = int(generation)

    initial_position = best_surprise.initial_position_index
    policy_overrides = ar.surprise_policy_overrides(best_surprise)
    pprint.pprint(policy_overrides)

    ar.replay_game_from_position(
        initial_position,
        species,
        generation,  # original bot
        3,
        {
            # "move_consideration_time": 0.2,
            "policy_overrides": policy_overrides,
        })
Exemple #6
0
def sample_surprise(
    environment,
    species,
    batch,
    raw_error_range=None,
    discounted_error_range=None,
    max_state_span=MAX_STATE_SPAN,
    only_terminal=False,
):
    env_module = get_env_module(environment)
    rep_paths = sample_batch_replay_files(environment, species, batch)
    for replay_path in rep_paths:
        agent_replay = AgentReplay.from_path(replay_path, env_module.State)
        surprises = find_surprises(
            agent_replay=agent_replay,
            raw_error_range=raw_error_range,
            discounted_error_range=discounted_error_range,
            max_state_span=max_state_span,
            only_terminal=only_terminal,
        )
        if not surprises:
            continue
        analyze_surprise(agent_replay, replay_path, surprises[0])
        break
Exemple #7
0
        hindsight_convergence("connect_four", "gbdt", 2)
        sys.exit()

    elif sys.argv[1] == "replay_game":
        # Replay a game from a replay position
        # connect_four gbdt-11 aa8fd68f3668-0 26-28
        environment, bot, replay_id, position_span, hindsight, num_moves = sys.argv[
            2:]
        species, generation = bot.split("-")
        generation = int(generation)
        ipos, fpos = position_span.split("-")
        position_span = [int(ipos), int(fpos)]
        hindsight = True if hindsight == "hind" else False
        num_moves = int(num_moves)

        replay_path = AgentReplay.find_path(environment, replay_id)

        ar = AgentReplay.from_path(replay_path)

        ar.annotate_surprises()
        policy_overrides = [None] * len(ar.agent_nums)
        if hindsight:
            surprise = ar.find_surprise(position_span=position_span)
            policy_overrides = ar.surprise_policy_overrides(surprise)

        ar.replay_game_from_position(
            position_span[0],
            species,
            generation,  # original bot
            num_moves,  # num moves
            {