def __init__(self,  mask, observation=None, initial_pattern_count=100, max_size=100000, tile_set: TileSet = None,
                 initial_tile_size=10, max_tiles=100):
        self.base_mask = mask
        self.observation_shape, self.mask_array, self.span = None, None, None
        self.padded_array, self.flattened_padded_array, self.observed_patterns = None, None, None

        if observation is not None:
            self.initialize(observation)

        if tile_set is not None:
            if tile_set.nr_of_known_tiles() > initial_tile_size:
                initial_tile_size = tile_set.nr_of_known_tiles()

        # data storage
        self.patterns = np.zeros((initial_pattern_count, np.sum(mask)+1))  # mask + action
        self.pattern_results = np.zeros((initial_pattern_count, initial_tile_size))  # nr of observations per result
        # todo (to not compute arg_max() for every prediction): store  most likely tile and its number of occurrences
        # self.most_likely_result = np.zeros(initial_pattern_count)
        # self.occurrences_of_most_likely_result = np.zeros(initial_tile_size)

        self.current_max_patterns = initial_pattern_count
        self.total_max_patterns = max_size

        self.tile_set = tile_set
        self.current_max_tiles = initial_tile_size
        self.total_max_tiles = max_tiles

        self.known_patterns = dict()
def evaluate_agent(agent,
                   game,
                   levels,
                   repetitions=20,
                   tile_set=None,
                   result_folder=None):
    results_per_level = {}

    for level in levels:
        result_per_trial = dict()
        env = gym.make("gvgai-" + game + '-' + level)
        #env = gym_gvgai.make("gvgai-" + game + '-' + level)

        for repetition in range(repetitions):
            state_observation = env.reset()

            if tile_set is None:
                tile_set = TileSet(state_observation, 10)
            else:
                tile_set.add_new_frame(state_observation)

            actions = env.unwrapped.get_action_meanings()
            total_score = 0
            replay = SparseReplay(
                game, level,
                tile_set.map_frame_to_lfm_state(state_observation))

            pbar = tqdm.trange(2000,
                               desc=f"evaluation of: " + game + "-" + level)
            for tick in range(2000):
                pbar.update(1)
                action_id = agent.act(state_observation, actions)
                state_observation, diff_score, done, debug = env.step(
                    action_id)
                replay.add_frame(
                    action_id,
                    tile_set.map_frame_to_lfm_state(state_observation),
                    diff_score, debug["winner"])

                total_score += diff_score
                if done:
                    break
            pbar.close()
            result_per_trial[repetition] = [
                tick, total_score, debug["winner"], replay
            ]

            if result_folder is not None:
                tile_set.write_to_file(f"{result_folder}\\tile_set.bin")
                replay.write_to_file(
                    f"{result_folder}\\replay_data\\{level}_{repetition}.sreplay"
                )
                with open(f"{result_folder}\\results.txt", "wb") as file:
                    pickle.dump(results_per_level, file)
        results_per_level[level] = result_per_trial
        env.close()

    return results_per_level
예제 #3
0
    def __init__(self, tile_set: TileSet = None, initial_tile_size=10, max_tiles=1000,
                 initial_pattern_count=1000, max_size=10000):
        self.tile_set = tile_set

        # data storage
        if tile_set is not None:
            if tile_set.nr_of_known_tiles() > initial_tile_size:
                initial_tile_size = tile_set.nr_of_known_tiles()

        self.patterns = np.zeros((initial_pattern_count, initial_tile_size*4))  # mask + action and target columns
        self.reward = np.zeros(initial_pattern_count)  # mask + action and target columns

        # set maximum for rows and columns, current_max values represent the current numpy limits which can be extended
        self.current_max_rows = initial_pattern_count
        self.max_rows = max_size
        self.current_max_tiles = initial_tile_size
        self.max_tile_columns = max_tiles*4

        self.known_patterns = set()
def figure_different_tile_sizes():
    initial_frame = imageio.imread('example_game_state.png')

    plt.imshow(initial_frame[::-1, :, :], origin='lower')
    for x in range(0, initial_frame.shape[0] // 10 + 1):
        plt.axhline(min(x * 10, 99), c="r")
    for y in range(0, initial_frame.shape[1] // 10 + 1):
        plt.axvline(min(y * 10, 99), c="r")
    plt.xticks([0, 20, 40, 60, 80, 99], [0, 20, 40, 60, 80, 100], fontsize=16)
    plt.yticks([0, 20, 40, 60, 80, 99], [0, 20, 40, 60, 80, 100], fontsize=16)
    plt.savefig("tiling_tile_size_10.pdf")
    plt.show()

    tile_set_10 = TileSet(initial_frame, 10)
    tile_set_10.add_new_frame(initial_frame)
    tile_set_10.plot_tile_dict("tile_dict_tile_size_10.pdf")

    plt.imshow(initial_frame[::-1, :, :], origin='lower')
    for x in range(0, initial_frame.shape[0] // 20 + 1):
        plt.axhline(min(x * 20, 99), c="r")
    for y in range(0, initial_frame.shape[1] // 20 + 1):
        plt.axvline(min(y * 20, 99), c="r")
    plt.xticks([0, 20, 40, 60, 80, 99], [0, 20, 40, 60, 80, 100], fontsize=16)
    plt.yticks([0, 20, 40, 60, 80, 99], [0, 20, 40, 60, 80, 100], fontsize=16)
    plt.savefig("tiling_tile_size_20.pdf")
    plt.show()

    tile_set_20 = TileSet(initial_frame, 20)
    tile_set_20.add_new_frame(initial_frame)
    tile_set_20.plot_tile_dict("tile_dict_tile_size_20.pdf")
            evaluation_levels = ['lvl0-v0', 'lvl1-v0']
        else:
            evaluation_levels = [
                'lvl0-v0', 'lvl1-v0', 'lvl2-v0', 'lvl3-v0', 'lvl4-v0'
            ]
        print("evaluate", game, "on", evaluation_levels)

        reset_levels_for_game(
            "gvgai-" + game,
            f"..\\data\\additional_training_levels\\gvgai-{game}\\")

        success = False

        # load symmetric data
        model_folder = f"..\\data\\paper_training\\{game}\\symmetric_active_learning_optimized\\"
        tile_set = TileSet.load_from_file(model_folder + "tile_set.bin")
        tile_set.threshold = THRESHOLDS[game]
        lfm_data = LocalForwardModelData.load_from_file(model_folder +
                                                        "lfm_data.bin")
        sm_data = ScoreModelData.load_from_file(model_folder + "sm_data.bin",
                                                tile_set)

        # train Models
        lfm = LocalForwardModel(DecisionTreeClassifier(), lfm_data, True)
        lfm.train()
        sm = ScoreModel(DecisionTreeClassifier(), sm_data, True)
        sm.train()

        agent = SimpleBFS(100,
                          forward_model=lfm,
                          score_model=sm,
예제 #6
0
def actively_train_agent_model(level_generator,
                               lfm_data,
                               sm_data,
                               tile_set_threshold,
                               tile_set=None,
                               target_folder=None,
                               max_ticks=1500,
                               reps_per_level=20):
    results = dict()

    skip_level = None
    agent = MaxUnknownPatternsStateActionSelection(lfm_data, sm_data)
    for training_run, (env, game, level,
                       initial_frame) in enumerate(level_generator):
        if level == skip_level:
            if target_folder is not None and ((training_run + 1) %
                                              reps_per_level) == 0:
                tile_set.write_to_file(
                    f"{target_folder}\\checkpoints\\tile_set_{level.split('-')[0]}.bin"
                )
                lfm_data.write_to_file(
                    f"{target_folder}\\checkpoints\\lfm_data_{level.split('-')[0]}.bin"
                )
                sm_data.write_to_file(
                    f"{target_folder}\\checkpoints\\sm_data_{level.split('-')[0]}.bin"
                )
            continue
        else:
            skip_level = None

        if tile_set is None:
            tile_set = TileSet(initial_frame, 10, None, tile_set_threshold)
            sm_data.set_tile_set(tile_set)
            lfm_data.set_tile_set(tile_set)

        prev_lfm_state = tile_set.map_frame_to_lfm_state(initial_frame,
                                                         add_frame=True)
        lfm_data.initialize(prev_lfm_state)
        replay = SparseReplay(game, level, prev_lfm_state)
        actions = env.unwrapped.get_action_meanings()

        # agent.reset_patterns()
        agent.candidate_solution = [-1, ()]
        agent.set_state_shape(prev_lfm_state.shape)

        # play the level
        done = False
        unknown_patterns = -1

        total_score = 0
        ticks = 0
        pbar = tqdm.trange(
            max_ticks,
            desc=f"training run {training_run}: playing {game}-{level}")
        while not done and unknown_patterns != 0:
            # select and apply action
            #action_id, unknown_patterns = agent.select_action(prev_lfm_state, range(len(actions)))
            action_id, unknown_patterns = random.choice(range(len(actions))), 1
            if unknown_patterns == 0 and ticks == 0:
                # the level has nothing new to offer. skip until another level is given to the agent
                skip_level = level
                break

            frame, diff_score, done, debug = env.step(action_id)
            ticks += 1

            # update models
            lfm_state = tile_set.map_frame_to_lfm_state(frame, add_frame=True)
            lfm_patterns = lfm_data.add_observation(prev_lfm_state, action_id,
                                                    lfm_state)
            sm_pattern = sm_data.add_observation(prev_lfm_state, lfm_state,
                                                 diff_score)
            #agent.add_observation(prev_lfm_state, lfm_state, action_id, lfm_patterns, sm_pattern,
            #                      range(len(actions)), done)
            env.render()

            # update records
            pbar.update(1)
            prev_lfm_state = lfm_state
            total_score += diff_score
            replay.add_frame(action_id, lfm_state,
                             len(agent.known_lfm_patterns), debug["winner"])
            if ticks > max_ticks:
                break

        pbar.close()

        if target_folder is not None and ticks > 0:
            tile_set.write_to_file(f"{target_folder}tile_set_tmp.bin")
            lfm_data.write_to_file(f"{target_folder}lfm_data_tmp.bin")
            sm_data.write_to_file(f"{target_folder}sm_data_tmp.bin")
            replay.write_to_file(
                f"{target_folder}replay_data\\{training_run}_{game}_{level}.sreplay"
            )

        if target_folder is not None and ticks > 0:
            tile_set.write_to_file(f"{target_folder}tile_set.bin")
            lfm_data.write_to_file(f"{target_folder}lfm_data.bin")
            sm_data.write_to_file(f"{target_folder}sm_data.bin")
            replay.write_to_file(
                f"{target_folder}replay_data\\{training_run}_{game}_{level}.sreplay"
            )
            # replay.create_animation(tile_set, f'..\\replay\\replay_video\\{training_run}_{game}_{level}.mp4')

        results[training_run] = [replay.length, total_score, debug["winner"]]
    return results, lfm_data, sm_data, tile_set
def evaluate_lfm_agent(agent,
                       game,
                       levels,
                       repetitions=20,
                       tile_set=None,
                       result_folder=None,
                       max_ticks=2000):
    results_per_level = {}

    for level in levels:
        result_per_trial = dict()
        #env = gym_gvgai.make("gvgai-" + game + '-' + level)
        env = gym.make("gvgai-" + game + '-' + level)

        for repetition in range(repetitions):
            state_observation = env.reset()
            agent_time = 0

            if tile_set is None:
                tile_set = TileSet(state_observation, 10)

            actions = env.unwrapped.get_action_meanings()
            #print(actions)
            total_score = 0
            lfm_state = tile_set.classify_frame_to_lfm_state(state_observation)
            replay = SparseReplay(game, level, lfm_state)
            agent.re_initialize(lfm_state, range(len(actions)))

            pbar = tqdm.trange(2000,
                               desc=f"evaluation of: " + game + "-" + level)
            for tick in range(2000):
                pbar.update(1)

                start = time.time()
                action_id = agent.get_next_action(lfm_state,
                                                  range(len(actions)))
                end = time.time()
                agent_time += end - start

                state_observation, diff_score, done, debug = env.step(
                    action_id)
                lfm_state = tile_set.classify_frame_to_lfm_state(
                    state_observation)
                replay.add_frame(action_id, lfm_state, diff_score,
                                 debug["winner"])
                env.render()
                total_score += diff_score

                if debug["winner"] == 'PLAYER_WINS' or debug["winner"] == 3:
                    diff_score += 1000
                if debug["winner"] == 'PLAYER_LOSES' or (
                        debug["winner"] == 2 and game != "waterpuzzle"):
                    diff_score -= 1000

                start = time.time()
                agent.add_observation(lfm_state, diff_score,
                                      range(len(actions)))
                end = time.time()
                agent_time += end - start

                if done:
                    break
            pbar.close()
            result_per_trial[repetition] = [
                tick, total_score, debug["winner"], replay
            ]
            print(
                debug["winner"],
                f"after {tick} ticks with an average decision time of {round(agent_time/max(1, tick),3)}s"
            )

            if result_folder is not None:
                tile_set.write_to_file(f"{result_folder}\\tile_set.bin")
                replay.write_to_file(
                    f"{result_folder}\\replay_data\\{level}_{repetition}.sreplay"
                )
                with open(f"{result_folder}\\results.txt", "wb") as file:
                    pickle.dump(results_per_level, file)
        results_per_level[level] = result_per_trial
        env.close()

        if result_folder is not None:
            tile_set.write_to_file(f"{result_folder}\\tile_set.bin")
            replay.write_to_file(
                f"{result_folder}\\replay_data\\{level}_{repetition}.sreplay")
            with open(f"{result_folder}\\results.txt", "wb") as file:
                pickle.dump(results_per_level, file)

    return results_per_level
from agents.models.tile_map import TileSet

if __name__ == "__main__":
    waterpuzzle = TileSet.load_from_file("tile_set_waterpuzzle.bin")
    waterpuzzle.plot_tile_dict()

    golddigger = TileSet.load_from_file("tile_set_golddigger.bin")
    golddigger.plot_tile_dict()

    treasurekeeper = TileSet.load_from_file("tile_set_treasurekeeper.bin")
    treasurekeeper.plot_tile_dict()
예제 #9
0
def plot_state(lfm_state, filename):
    state = tile_set.map_lfm_state_to_frame(lfm_state)
    plt.imshow(state[::-1, :, :], origin='lower')
    for x in [1, 2, 3, 4]:
        plt.axhline(x * 10 - 0.5, c="k")
        plt.axvline(x * 10 - 0.5, c="k")
    plt.axis("off")
    plt.savefig(filename)
    plt.show()


if __name__ == "__main__":
    prediction_errors()

    tile_set = TileSet.load_from_file("tile_set.bin")
    tile_set.plot_tile_dict()

    original = np.zeros((5, 5))
    original[:, :] = 5
    original[0, 0] = 0
    original[0, 1:4] = 1
    original[0, 4] = 2
    original[1:4, 0] = 3
    original[1:4, 4] = 3
    original[4, 0] = 7
    original[4, 1:4] = 1
    original[4, 4] = 8
    original[2, 2] = 18
    plot_state(original, "original_state.pdf")
        pattern_to_plot = np.zeros(mask.shape)
        pattern_to_plot[:] = -2
        pattern_to_plot[mask] = patterns[i, :-1]

        pattern = visualize_pattern(pattern_to_plot, tile_set)
        ax[x, y].imshow(pattern)
        ax[x, y].axis('off')
    if filename is not None:
        plt.savefig(filename)
    plt.show()

    return fig, ax


if __name__ == "__main__":
    tile_set = TileSet.load_from_file("tile_set_waterpuzzle.bin")
    tile_set.plot_tile_dict()

    # original game-state
    game_state = np.zeros((3, 4))
    game_state[0, 0] = 1
    game_state[0, 1:4] = 1
    game_state[2, 1:4] = 1
    game_state[1, 0] = 3
    game_state[0, 1] = 0
    game_state[1, 1] = 4

    mask = CrossNeighborhoodPattern(1).get_mask()
    lfm_data = LocalForwardModelData(mask, game_state)
    tile_set.plot_lfm_state(game_state, "original_lfm_state.pdf")