def trial(robot: Agent) -> List[int]: maze = Maze() move_history = [] for i in range(5000): if i % 1000 == 0: print(i) while not maze.is_complete(): state, _ = maze.get_state_and_reward() action = robot.choose_action(state, maze.allowed_states[state]) maze.update_maze(action) state, reward = maze.get_state_and_reward() robot.update_state_history(state, reward) if maze.steps > 1000: maze.robot_position = State(5, 5) robot.learn() move_history.append(maze.steps) maze.reset() return move_history
class Tester(): def __init__(self): coins = generate_coins(testing_map.data) self.hero_position = generate_hero(testing_map.data) episodeSnapshot = EpisodeSnapshot('static/map/testing.json', coins, self.hero_position) self.game = Game(episodeSnapshot, True) self.env = Maze(episode_threshold=None) def on_coin_grabbed(self, maze_position): """Works as a callback when a coin is grabbed. A new one is generated based on this event. Args: maze_position (tuple): a position of the currently collected coin """ coin = generate_coins(testing_map.data, count=1, grabbed_coin_position=maze_position)[0] self.game.append_coin(coin) self.env.update_reward_matrix() def test(self): """Testing process of the agent differs a bit from the training one. There isn't a explicit way of how to end the episode unless the agent steps out of the road, so coins are generated automatically in an infinite loop. """ agent.load_pretrained_model() obs = self.env.reset(testing_map.data, self.hero_position, self.on_coin_grabbed) done = False actions = [] reward_sum = 0 visualization_done = False while not visualization_done: if not done: action = agent.choose_action(obs) next_obs, reward, done = self.env.step(action) reward_sum += reward obs = next_obs actions.append(action) if actions: visualization_done, _ = self.game.play(Move(actions.pop(0))) else: visualization_done, _ = self.game.play() for event in pygame.event.get(): if event.type == pygame.QUIT: visualization_done = True done = True self.game.gameOver(reward_sum)
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(asctime)s: %(message)s", datefmt="%H:%M:%S") maze = np.array([[0, 1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 1, 0, 1, 0, 0], [0, 0, 0, 1, 1, 0, 1, 0], [0, 1, 0, 1, 0, 0, 0, 0], [1, 0, 0, 1, 0, 1, 0, 0], [0, 0, 0, 1, 0, 1, 1, 1], [0, 1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0]]) # 0 = free, 1 = occupied game = Maze(maze) if 0: # only show the maze game.render("moves") game.reset() if 0: # play using random model model = RandomModel(game) model.train() if 0: # train using tabular Q-learning model = QTableModel(game, name="QTableModel") h, w, _, _ = model.train(discount=0.90, exploration_rate=0.10, learning_rate=0.10, episodes=200) if 0: # train using tabular Q-learning and an eligibility trace (aka TD-lamba) model = QTableTraceModel(game) h, w, _, _ = model.train(discount=0.90,
def test_single(model, maze_width=11): maze_matrix = generate_prims_maze_matrix(maze_width) maze = Maze(maze_matrix) initial_cell = random.choice(maze.free_cells) maze.reset(initial_cell) return play_game(model, maze, initial_cell), maze