def trial(robot: Agent) -> List[int]: maze = Maze() move_history = [] for i in range(5000): if i % 1000 == 0: print(i) while not maze.is_complete(): state, _ = maze.get_state_and_reward() action = robot.choose_action(state, maze.allowed_states[state]) maze.update_maze(action) state, reward = maze.get_state_and_reward() robot.update_state_history(state, reward) if maze.steps > 1000: maze.robot_position = State(5, 5) robot.learn() move_history.append(maze.steps) maze.reset() return move_history
import numpy as np from environment import Maze from agent import Agent from constants import maze_configuration if __name__ == '__main__': maze = Maze(maze_configuration) robot = Agent(maze.allowed_states, alpha=0.1, exploration_factor=0.25) move_history = [] robot.printRewardMap() for episode in range(5000): if episode % 1000 == 0: print(episode) robot.printRewardMap() while not maze.isGameOver(): state, _ = maze.getStateAndReward() action = robot.chooseAction(state, maze.allowed_states[state]) maze.updateMaze(action) state, reward = maze.getStateAndReward() robot.updateStateHistory(state, reward) if maze.steps > 1000: maze.robot_position = (5, 5) robot.learn() move_history.append(maze.steps) maze = Maze(maze_configuration)