def __init__(self, mask, observation=None, initial_pattern_count=100, max_size=100000, tile_set: TileSet = None, initial_tile_size=10, max_tiles=100): self.base_mask = mask self.observation_shape, self.mask_array, self.span = None, None, None self.padded_array, self.flattened_padded_array, self.observed_patterns = None, None, None if observation is not None: self.initialize(observation) if tile_set is not None: if tile_set.nr_of_known_tiles() > initial_tile_size: initial_tile_size = tile_set.nr_of_known_tiles() # data storage self.patterns = np.zeros((initial_pattern_count, np.sum(mask)+1)) # mask + action self.pattern_results = np.zeros((initial_pattern_count, initial_tile_size)) # nr of observations per result # todo (to not compute arg_max() for every prediction): store most likely tile and its number of occurrences # self.most_likely_result = np.zeros(initial_pattern_count) # self.occurrences_of_most_likely_result = np.zeros(initial_tile_size) self.current_max_patterns = initial_pattern_count self.total_max_patterns = max_size self.tile_set = tile_set self.current_max_tiles = initial_tile_size self.total_max_tiles = max_tiles self.known_patterns = dict()
def evaluate_agent(agent, game, levels, repetitions=20, tile_set=None, result_folder=None): results_per_level = {} for level in levels: result_per_trial = dict() env = gym.make("gvgai-" + game + '-' + level) #env = gym_gvgai.make("gvgai-" + game + '-' + level) for repetition in range(repetitions): state_observation = env.reset() if tile_set is None: tile_set = TileSet(state_observation, 10) else: tile_set.add_new_frame(state_observation) actions = env.unwrapped.get_action_meanings() total_score = 0 replay = SparseReplay( game, level, tile_set.map_frame_to_lfm_state(state_observation)) pbar = tqdm.trange(2000, desc=f"evaluation of: " + game + "-" + level) for tick in range(2000): pbar.update(1) action_id = agent.act(state_observation, actions) state_observation, diff_score, done, debug = env.step( action_id) replay.add_frame( action_id, tile_set.map_frame_to_lfm_state(state_observation), diff_score, debug["winner"]) total_score += diff_score if done: break pbar.close() result_per_trial[repetition] = [ tick, total_score, debug["winner"], replay ] if result_folder is not None: tile_set.write_to_file(f"{result_folder}\\tile_set.bin") replay.write_to_file( f"{result_folder}\\replay_data\\{level}_{repetition}.sreplay" ) with open(f"{result_folder}\\results.txt", "wb") as file: pickle.dump(results_per_level, file) results_per_level[level] = result_per_trial env.close() return results_per_level
def __init__(self, tile_set: TileSet = None, initial_tile_size=10, max_tiles=1000, initial_pattern_count=1000, max_size=10000): self.tile_set = tile_set # data storage if tile_set is not None: if tile_set.nr_of_known_tiles() > initial_tile_size: initial_tile_size = tile_set.nr_of_known_tiles() self.patterns = np.zeros((initial_pattern_count, initial_tile_size*4)) # mask + action and target columns self.reward = np.zeros(initial_pattern_count) # mask + action and target columns # set maximum for rows and columns, current_max values represent the current numpy limits which can be extended self.current_max_rows = initial_pattern_count self.max_rows = max_size self.current_max_tiles = initial_tile_size self.max_tile_columns = max_tiles*4 self.known_patterns = set()
def figure_different_tile_sizes(): initial_frame = imageio.imread('example_game_state.png') plt.imshow(initial_frame[::-1, :, :], origin='lower') for x in range(0, initial_frame.shape[0] // 10 + 1): plt.axhline(min(x * 10, 99), c="r") for y in range(0, initial_frame.shape[1] // 10 + 1): plt.axvline(min(y * 10, 99), c="r") plt.xticks([0, 20, 40, 60, 80, 99], [0, 20, 40, 60, 80, 100], fontsize=16) plt.yticks([0, 20, 40, 60, 80, 99], [0, 20, 40, 60, 80, 100], fontsize=16) plt.savefig("tiling_tile_size_10.pdf") plt.show() tile_set_10 = TileSet(initial_frame, 10) tile_set_10.add_new_frame(initial_frame) tile_set_10.plot_tile_dict("tile_dict_tile_size_10.pdf") plt.imshow(initial_frame[::-1, :, :], origin='lower') for x in range(0, initial_frame.shape[0] // 20 + 1): plt.axhline(min(x * 20, 99), c="r") for y in range(0, initial_frame.shape[1] // 20 + 1): plt.axvline(min(y * 20, 99), c="r") plt.xticks([0, 20, 40, 60, 80, 99], [0, 20, 40, 60, 80, 100], fontsize=16) plt.yticks([0, 20, 40, 60, 80, 99], [0, 20, 40, 60, 80, 100], fontsize=16) plt.savefig("tiling_tile_size_20.pdf") plt.show() tile_set_20 = TileSet(initial_frame, 20) tile_set_20.add_new_frame(initial_frame) tile_set_20.plot_tile_dict("tile_dict_tile_size_20.pdf")
evaluation_levels = ['lvl0-v0', 'lvl1-v0'] else: evaluation_levels = [ 'lvl0-v0', 'lvl1-v0', 'lvl2-v0', 'lvl3-v0', 'lvl4-v0' ] print("evaluate", game, "on", evaluation_levels) reset_levels_for_game( "gvgai-" + game, f"..\\data\\additional_training_levels\\gvgai-{game}\\") success = False # load symmetric data model_folder = f"..\\data\\paper_training\\{game}\\symmetric_active_learning_optimized\\" tile_set = TileSet.load_from_file(model_folder + "tile_set.bin") tile_set.threshold = THRESHOLDS[game] lfm_data = LocalForwardModelData.load_from_file(model_folder + "lfm_data.bin") sm_data = ScoreModelData.load_from_file(model_folder + "sm_data.bin", tile_set) # train Models lfm = LocalForwardModel(DecisionTreeClassifier(), lfm_data, True) lfm.train() sm = ScoreModel(DecisionTreeClassifier(), sm_data, True) sm.train() agent = SimpleBFS(100, forward_model=lfm, score_model=sm,
def actively_train_agent_model(level_generator, lfm_data, sm_data, tile_set_threshold, tile_set=None, target_folder=None, max_ticks=1500, reps_per_level=20): results = dict() skip_level = None agent = MaxUnknownPatternsStateActionSelection(lfm_data, sm_data) for training_run, (env, game, level, initial_frame) in enumerate(level_generator): if level == skip_level: if target_folder is not None and ((training_run + 1) % reps_per_level) == 0: tile_set.write_to_file( f"{target_folder}\\checkpoints\\tile_set_{level.split('-')[0]}.bin" ) lfm_data.write_to_file( f"{target_folder}\\checkpoints\\lfm_data_{level.split('-')[0]}.bin" ) sm_data.write_to_file( f"{target_folder}\\checkpoints\\sm_data_{level.split('-')[0]}.bin" ) continue else: skip_level = None if tile_set is None: tile_set = TileSet(initial_frame, 10, None, tile_set_threshold) sm_data.set_tile_set(tile_set) lfm_data.set_tile_set(tile_set) prev_lfm_state = tile_set.map_frame_to_lfm_state(initial_frame, add_frame=True) lfm_data.initialize(prev_lfm_state) replay = SparseReplay(game, level, prev_lfm_state) actions = env.unwrapped.get_action_meanings() # agent.reset_patterns() agent.candidate_solution = [-1, ()] agent.set_state_shape(prev_lfm_state.shape) # play the level done = False unknown_patterns = -1 total_score = 0 ticks = 0 pbar = tqdm.trange( max_ticks, desc=f"training run {training_run}: playing {game}-{level}") while not done and unknown_patterns != 0: # select and apply action #action_id, unknown_patterns = agent.select_action(prev_lfm_state, range(len(actions))) action_id, unknown_patterns = random.choice(range(len(actions))), 1 if unknown_patterns == 0 and ticks == 0: # the level has nothing new to offer. skip until another level is given to the agent skip_level = level break frame, diff_score, done, debug = env.step(action_id) ticks += 1 # update models lfm_state = tile_set.map_frame_to_lfm_state(frame, add_frame=True) lfm_patterns = lfm_data.add_observation(prev_lfm_state, action_id, lfm_state) sm_pattern = sm_data.add_observation(prev_lfm_state, lfm_state, diff_score) #agent.add_observation(prev_lfm_state, lfm_state, action_id, lfm_patterns, sm_pattern, # range(len(actions)), done) env.render() # update records pbar.update(1) prev_lfm_state = lfm_state total_score += diff_score replay.add_frame(action_id, lfm_state, len(agent.known_lfm_patterns), debug["winner"]) if ticks > max_ticks: break pbar.close() if target_folder is not None and ticks > 0: tile_set.write_to_file(f"{target_folder}tile_set_tmp.bin") lfm_data.write_to_file(f"{target_folder}lfm_data_tmp.bin") sm_data.write_to_file(f"{target_folder}sm_data_tmp.bin") replay.write_to_file( f"{target_folder}replay_data\\{training_run}_{game}_{level}.sreplay" ) if target_folder is not None and ticks > 0: tile_set.write_to_file(f"{target_folder}tile_set.bin") lfm_data.write_to_file(f"{target_folder}lfm_data.bin") sm_data.write_to_file(f"{target_folder}sm_data.bin") replay.write_to_file( f"{target_folder}replay_data\\{training_run}_{game}_{level}.sreplay" ) # replay.create_animation(tile_set, f'..\\replay\\replay_video\\{training_run}_{game}_{level}.mp4') results[training_run] = [replay.length, total_score, debug["winner"]] return results, lfm_data, sm_data, tile_set
def evaluate_lfm_agent(agent, game, levels, repetitions=20, tile_set=None, result_folder=None, max_ticks=2000): results_per_level = {} for level in levels: result_per_trial = dict() #env = gym_gvgai.make("gvgai-" + game + '-' + level) env = gym.make("gvgai-" + game + '-' + level) for repetition in range(repetitions): state_observation = env.reset() agent_time = 0 if tile_set is None: tile_set = TileSet(state_observation, 10) actions = env.unwrapped.get_action_meanings() #print(actions) total_score = 0 lfm_state = tile_set.classify_frame_to_lfm_state(state_observation) replay = SparseReplay(game, level, lfm_state) agent.re_initialize(lfm_state, range(len(actions))) pbar = tqdm.trange(2000, desc=f"evaluation of: " + game + "-" + level) for tick in range(2000): pbar.update(1) start = time.time() action_id = agent.get_next_action(lfm_state, range(len(actions))) end = time.time() agent_time += end - start state_observation, diff_score, done, debug = env.step( action_id) lfm_state = tile_set.classify_frame_to_lfm_state( state_observation) replay.add_frame(action_id, lfm_state, diff_score, debug["winner"]) env.render() total_score += diff_score if debug["winner"] == 'PLAYER_WINS' or debug["winner"] == 3: diff_score += 1000 if debug["winner"] == 'PLAYER_LOSES' or ( debug["winner"] == 2 and game != "waterpuzzle"): diff_score -= 1000 start = time.time() agent.add_observation(lfm_state, diff_score, range(len(actions))) end = time.time() agent_time += end - start if done: break pbar.close() result_per_trial[repetition] = [ tick, total_score, debug["winner"], replay ] print( debug["winner"], f"after {tick} ticks with an average decision time of {round(agent_time/max(1, tick),3)}s" ) if result_folder is not None: tile_set.write_to_file(f"{result_folder}\\tile_set.bin") replay.write_to_file( f"{result_folder}\\replay_data\\{level}_{repetition}.sreplay" ) with open(f"{result_folder}\\results.txt", "wb") as file: pickle.dump(results_per_level, file) results_per_level[level] = result_per_trial env.close() if result_folder is not None: tile_set.write_to_file(f"{result_folder}\\tile_set.bin") replay.write_to_file( f"{result_folder}\\replay_data\\{level}_{repetition}.sreplay") with open(f"{result_folder}\\results.txt", "wb") as file: pickle.dump(results_per_level, file) return results_per_level
from agents.models.tile_map import TileSet if __name__ == "__main__": waterpuzzle = TileSet.load_from_file("tile_set_waterpuzzle.bin") waterpuzzle.plot_tile_dict() golddigger = TileSet.load_from_file("tile_set_golddigger.bin") golddigger.plot_tile_dict() treasurekeeper = TileSet.load_from_file("tile_set_treasurekeeper.bin") treasurekeeper.plot_tile_dict()
def plot_state(lfm_state, filename): state = tile_set.map_lfm_state_to_frame(lfm_state) plt.imshow(state[::-1, :, :], origin='lower') for x in [1, 2, 3, 4]: plt.axhline(x * 10 - 0.5, c="k") plt.axvline(x * 10 - 0.5, c="k") plt.axis("off") plt.savefig(filename) plt.show() if __name__ == "__main__": prediction_errors() tile_set = TileSet.load_from_file("tile_set.bin") tile_set.plot_tile_dict() original = np.zeros((5, 5)) original[:, :] = 5 original[0, 0] = 0 original[0, 1:4] = 1 original[0, 4] = 2 original[1:4, 0] = 3 original[1:4, 4] = 3 original[4, 0] = 7 original[4, 1:4] = 1 original[4, 4] = 8 original[2, 2] = 18 plot_state(original, "original_state.pdf")
pattern_to_plot = np.zeros(mask.shape) pattern_to_plot[:] = -2 pattern_to_plot[mask] = patterns[i, :-1] pattern = visualize_pattern(pattern_to_plot, tile_set) ax[x, y].imshow(pattern) ax[x, y].axis('off') if filename is not None: plt.savefig(filename) plt.show() return fig, ax if __name__ == "__main__": tile_set = TileSet.load_from_file("tile_set_waterpuzzle.bin") tile_set.plot_tile_dict() # original game-state game_state = np.zeros((3, 4)) game_state[0, 0] = 1 game_state[0, 1:4] = 1 game_state[2, 1:4] = 1 game_state[1, 0] = 3 game_state[0, 1] = 0 game_state[1, 1] = 4 mask = CrossNeighborhoodPattern(1).get_mask() lfm_data = LocalForwardModelData(mask, game_state) tile_set.plot_lfm_state(game_state, "original_lfm_state.pdf")