def valueIterationSmall(self, state, reward, gamma, V1): # Similar to valueIteration function # does not calculate buffers around ghosts (cause it would be too small) # meant for maps smaller than 10 x 10 corners = api.corners(state) walls = api.walls(state) food = api.food(state) ghosts = api.ghosts(state) capsules = api.capsules(state) maxWidth = self.getLayoutWidth(corners) - 1 maxHeight = self.getLayoutHeight(corners) - 1 if not (0 < gamma <= 1): raise ValueError("MDP must have a gamma between 0 and 1.") # Implement Bellman equation with 10-loop iteration # Since smaller maps do not require as big of a value iteration loop loops = 100 while loops > 0: V = V1.copy() # This will store the old values for i in range(maxWidth): for j in range(maxHeight): # Exclude any food because in this case it is the terminal state if (i, j) not in walls and (i, j) not in food and ( i, j) not in ghosts and (i, j) not in capsules: V1[(i, j)] = reward + gamma * self.getTransition(i, j, V) loops -= 1
def getAction(self, state): self.makeMap(state) self.addWallsToMap(state) self.addConsumablesToMap(state) self.updateGhosts(state) # Get the actions we can try, and remove "STOP" if that is one of them. pacman = api.whereAmI(state) legal = api.legalActions(state) ghosts = api.ghosts(state) corners = api.corners(state) layoutHeight = self.getLayoutHeight(corners) layoutWidth = self.getLayoutWidth(corners) if (layoutHeight-1)<8 and (layoutWidth-1)<8: for i in range (100): self.valIterS(state,0.68,-0.1) else: for i in range (50): self.valIterM(state,0.8,-0.1) plannedMove = self.plannedMove(pacman[0],pacman[1]) #self.dankMap.prettyDisplay() #Feel free to uncomment this if you like to see the values generated if Directions.STOP in legal: legal.remove(Directions.STOP) #Input the calculated move for our next move return api.makeMove(plannedMove, legal)
def initialize(self, state): # get location of all visible food foods = api.food(state) #get location of all corners corners = api.corners(state) #get location of all visible capsules capsules = api.capsules(state) # Get the actions we can try, and remove "STOP" if that is one of them. legal = api.legalActions(state) #get location of all visible walls walls = api.walls(state) #get pacmans position pacman = api.whereAmI(state) x = pacman[0] y = pacman[1] if self.map == None: width = 0 height = 0 for corner in corners: if corner[0] > width: width = corner[0] if corner[1] > height: height = corner[1] self.map = [["?" for y in range(height)] for x in range(width)] for wall in walls: self.map[wall[0]][wall[1]] = "W" for food in foods: self.map[food[0]][food[1]] = "F" for capsule in capsules: self.map[capsule[0]][capsule[1]] = "F" self.map[x][y] = "0" self.init = True
def valueIteration(self, state, reward, gamma, valueMap): self.reward = reward self.gamma = gamma self.V1 = valueMap corners = api.corners(state) walls = api.walls(state) maxWidth = self.getLayoutWidth(corners) - 1 maxHeight = self.getLayoutHeight(corners) - 1 if not (0 < self.gamma <= 1): raise ValueError("MDP must have a gamma between 0 and 1.") # Implement Bellman equation with 15-loop iteration loops = 50 while loops > 0: V = self.V1.copy() # This will store the old values for i in range(maxWidth): for j in range(maxHeight): # Exclude any food because in this case it is the terminal state if (i, j) not in walls and self.V1[(i, j)] != 5: self.V1[(i, j)] = self.reward + self.gamma * self.getTransition(i, j, V) loops -= 1 return self.V1
def getAction(self,state): walls = api.walls(state) width,height = api.corners(state)[-1] legal = api.legalActions(state) me = api.whereAmI(state) food = api.food(state) ghosts = api.ghosts(state) capsules = api.capsules(state) direction = Directions.STOP x, y = me if not hasattr(self, 'map'): self.createMap(walls, width + 1, height + 1) self.checkForCapsules(capsules, legal, ghosts) legal = self.solveLoop(ghosts, legal) if len(ghosts): self.memorizeGhosts(ghosts) if self.counter < 0: for ghost in ghosts: legal = self.checkForGhosts(ghost, me, legal) direction = self.pickMove(me, legal, width + 1, height + 1, food) self.updatePosition(me, 1, self.map) self.printMap(self.map) self.last = direction return direction
def makeMap(self,state): corners = api.corners(state) #print corners height = self.getLayoutHeight(corners) width = self.getLayoutWidth(corners) self.map1 = Grid(width, height) self.map2 = Grid(width, height)
def getAction(self, state): value_list = [] legal = api.legalActions(state) corner = api.corners(state) pacman = api.whereAmI(state) pacman_x = pacman[0] pacman_y = pacman[1] legal_width = corner[3][0] legal_height = corner[3][1] # policy iteration and evaluation # get the value of four directions and select the direction corresponding to # the maximum value as Pacman's decision. map_effect = gridworld().map_valuegeneration(state, (legal_width, legal_height)) value_list.append(map_effect[(pacman_x-1, pacman_y)]) value_list.append(map_effect[(pacman_x+1, pacman_y)]) value_list.append(map_effect[(pacman_x, pacman_y + 1)]) value_list.append(map_effect[(pacman_x, pacman_y - 1)]) max_value = value_list.index(max(value_list)) # print 'map_effect' # print map_effect # print 'value_list' # print value_list # print 'max_value' # print max_value if max_value == 0: return api.makeMove(Directions.WEST, legal) if max_value == 1: return api.makeMove(Directions.EAST, legal) if max_value == 2: return api.makeMove(Directions.NORTH, legal) if max_value == 3: return api.makeMove(Directions.SOUTH, legal)
def getAction(self, state): legal = api.legalActions(state) corners = api.corners(state) print(corners) return api.makeMove(Directions.STOP, legal)
def generateRewardGrid(self, state): # a negative incentive for non-terminal states # this is an incentive for taking the shortest route initialValue = -5 # initialize 2d array with correct dimensions (w, h) = api.corners(state)[3] rewardGrid = [[initialValue for x in range(w + 1)] for y in range(h + 1)] ghosts = api.ghosts(state) foods = api.food(state) walls = api.walls(state) for (x, y) in foods: rewardGrid[y][x] = 100 # fill a radius around each ghost with negative reward # size of radius dependent on number of foods remaining # pacman feels no fear when almost winning radius = 5 if len(foods) > 3 else 2 for (x, y) in ghosts: self.floodFill(rewardGrid, int(x), int(y), radius) for (x, y) in walls: rewardGrid[y][x] = 0 return rewardGrid
def registerInitialState(self, state): self.initial_num_food = len(api.food(state)) self.corners = api.corners(state) self.width = max(self.corners)[0] + 1 # max x coordinate + 1 self.height = max(self.corners, key=itemgetter(1))[1] + 1 # max y coordinate + 1 self.walls = api.walls(state)
def getAction(self, state): """ The function to work out next intended action carried out. Parameters: None Returns: Directions: Intended action that Pacman will carry out. """ current_pos = api.whereAmI(state) corners = api.corners(state) food = api.food(state) ghosts = api.ghosts(state) ghost_scared_time = api.ghostStatesWithTimes(state)[0][1] walls = api.walls(state) legal = api.legalActions(state) capsules = api.capsules(state) protected_coords = walls + ghosts + [current_pos] width = max(corners)[0] + 1 height = max(corners, key=itemgetter(1))[1] + 1 board = self.create_board(width, height, -0.04) board.set_position_values(food, 1) board.set_position_values(walls, 'x') board.set_position_values(capsules, 2) if ghost_scared_time < 5: board.set_position_values(ghosts, -3) # for i in range(height): # for j in range(width): # print board[i, j], # print # print print "GHOST LIST: ", ghosts for x, y in ghosts: # set the surrounding area around the ghost to half the reward of the ghost # avoids changing the reward of the ghost itself, the pacman and the walls # print "GHOST Coordinates: " + str(x) + " " + str(y) x_coordinates = [x - 1, x, x + 1] y_coordinates = [y - 1, y, y + 1] # print "X/Y Coordinates: " + str(x_coordinates) + " " + str(y_coordinates) for x_coord in x_coordinates: for y_coord in y_coordinates: if (x_coord, y_coord) not in protected_coords: # print("index: " + str((board.convert_y(y_coord), x_coord))) converted_y = board.convert_y(y_coord) # print "VALUE: " + str(board[board.convert_y(y), x]) board[converted_y, x_coord] = board[board.convert_y(y), x] / 2 # print "VALUE PART 2: " + str(board[converted_y, x_coord]) board = self.value_iteration(state, board) expected_utility = self.calculate_expected_utility( state, board, abs(current_pos[1] - (height - 1)), current_pos[0]) return max([(utility, action) for utility, action in expected_utility if action in legal])[1]
def register_initial_state(state): ''' Sets Grid and Point classes' static constants dependant on state, and MDPAgent.ITERATION_LIMIT. Args: state: Current game state. ''' Grid.HEIGHT = max([h for _, h in api.corners(state)]) + 1 Grid.WIDTH = max([w for w, _ in api.corners(state)]) + 1 if Grid.WIDTH > 7 and Grid.HEIGHT > 7: # mediumClassic or bigger Grid.GHOST_RADIUS = 3 Grid.MAX_DISTANCE = Grid.HEIGHT + Grid.WIDTH - 4 Grid.WALLS = set(api.walls(state)) MDPAgent.ITERATION_LIMIT = int( ceil(sqrt(Grid.HEIGHT * Grid.WIDTH)) * 2 )
def final(self, state): walls = api.walls(state) width, height = api.corners(state)[-1] self.last = None self.createMap(walls, width + 1, height + 1) self.pos = (0,0) self.capsule = (0,0) self.ghosts = [(0,0)] food = api.food(state)
def getAction(self, state): # Demonstrates the information that Pacman can access about the state # of the game. print "-" * 30 #divider # What are the current moves available legal = api.legalActions(state) print "Legal moves: ", legal # Where is Pacman? pacman = api.whereAmI(state) print "Pacman position: ", pacman # Where are the ghosts? print "Ghost positions:" theGhosts = api.ghosts(state) for i in range(len(theGhosts)): print theGhosts[i] # How far away are the ghosts? print "Distance to ghosts:" for i in range(len(theGhosts)): print util.manhattanDistance(pacman,theGhosts[i]) # Where are the capsules? print "Capsule locations:" print api.capsules(state) # Where is the food? print "Food locations: " print api.food(state) print len(api.food(state)) # Where are the walls? print "Wall locations: " print api.walls(state) print "Corners: " print api.corners(state) # getAction has to return a move. Here we pass "STOP" to the # API to ask Pacman to stay where they are. return api.makeMove(random.choice(legal), legal)
def map_size(self,state): """Get the maximum of row and column of layout by api. Parameter: state: the state of pacman. Returns a tuple of row and column. """ corners = zip(*(api.corners(state))) return max(corners[0]), max(corners[1])
def getGridSize(self, state): ## Assign initial grid size at start of game # # Grid size used later on to iterate through self.utilMap corners = api.corners(state) width = corners[1][0] - corners[0][0] + 1 height = corners[2][1] - corners[0][1] + 1 self.gridWidth = width self.gridHeight = height
def initialize(self, state): #sets the reward of each grid self.reward = None #set the utility of each grid self.utility = None # get location of all visible food foods = api.food(state) #get location of all corners corners = api.corners(state) #get location of all visible capsules capsules = api.capsules(state) #get location of all visible walls walls = api.walls(state) #get pacmans position pacman = api.whereAmI(state) #pacman's x position pacmanX = pacman[0] #pacman's y position pacmanY = pacman[1] #if the internal map has not been initialized if self.reward == None and self.utility == None: #finds the dimension of the map by location the extremes, in this case the corners width = 0 height = 0 for corner in corners: if corner[0] > width: width = corner[0] if corner[1] > height: height = corner[1] #once the size of the map has been identified, initialize the rewards of each position with the approriate value self.reward = [[self.baseReward for y in range(height+1)] for x in range(width+1)] #do the same with the utility, however with random values between 0 and 1 self.utility = [[random() for y in range(height+1)] for x in range(width+1)] #now add in all the information pacman knows initially. starting with all known locations of food for food in foods: #set the reward of food to the value defined above self.reward[food[0]][food[1]] = self.foodReward #self.utility[food[0]][food[1]] = self.foodReward #set the reward of capsules with the reward defined above for capsule in capsules: self.reward[capsule[0]][capsule[1]] = self.capsuleReward #self.utility[capsule[0]][capsule[1]] = self.capsuleReward #now mark the location of the walls on the map, using "W" for wall in walls: self.reward[wall[0]][wall[1]] = "W" self.utility[wall[0]][wall[1]] = "W" #set init to true as the map has been initialized self.init = True
def _initialize(self, state): ''' Generate an empty 2D-matrix representing the array ''' corners = api.corners(state) max_row = 0 max_col = 0 for coordinate in corners: max_row = max(coordinate[0], max_row) max_col = max(coordinate[1], max_col) return [[MazeEntity.EMPTY_CELL for col in range(max_row + 1)] for col in range(max_col + 1)]
def registerInitialState(self, state): print("Round " + str(self.__round) + " running...") corners = api.corners(state) # optimal parameter setting for smallGrid if self.__TOP_RIGHT_WALL_CORNER_smallGrid in corners: self.__SAFETY_DISTANCE = 2 self.__DISCOUNT_FACTOR = 0.6 self.__GHOSTBUSTER_MODE = self.__INACTIVE_GHOSTBUSTER_MODE # optimal parameter setting for mediumClassic elif self.__TOP_RIGHT_WALL_CORNER_mediumClassic in corners: self.__SAFETY_DISTANCE = 4 self.__DISCOUNT_FACTOR = 0.7 self.__GHOSTBUSTER_MODE = self.__INACTIVE_GHOSTBUSTER_MODE
def map_size(self, state): """ Calculate the height and width of map """ # use corner function to find the size of the map corner_list = api.corners(state) corner_x_list = [] corner_y_list = [] for corner in corner_list: corner_x_list.append(corner[0]) corner_y_list.append(corner[1]) self.grid_width = max(corner_x_list) + 1 self.grid_height = max(corner_y_list) + 1
def updateMap(self, state): corners = api.corners(state) ghosts = api.ghosts(state) me = api.whereAmI(state) walls = api.walls(state) capsules = api.capsules(state) foods = api.food(state) # Width and Height of Map width, height = corners[3][0] + 1, corners[3][1] + 1 # Generate empty world Map if (self.worldMap is None): self.worldMap = [[[' ', self.emptyReward, 0, Directions.STOP] for x in range(width)] for y in range(height)] self.setMap(me[0], me[1], ['M', self.meReward, 0, Directions.STOP]) for food in foods: self.setMap(food[0], food[1], ['F', self.foodReward, 0, Directions.STOP]) for capsule in capsules: self.setMap(capsule[0], capsule[1], ['C', self.capsuleReward, 0, Directions.STOP]) for wall in walls: self.setMap(wall[0], wall[1], ['W', self.wallReward, 0, Directions.STOP]) for ghost in ghosts: self.setMap(ghost[0], ghost[1], ['G', self.ghostReward, 0, Directions.STOP]) else: self.clearMapKeepState(state) self.setThing(me[1], me[0], 'M') for food in foods: self.setThing(food[1], food[0], 'F') for capsule in capsules: self.setThing(capsule[1], capsule[0], 'C') for wall in walls: self.setThing(wall[1], wall[0], 'W') for ghost in ghosts: self.setThing(ghost[1], ghost[0], 'G') self.setReward(me[0], me[1], self.meReward) for food in foods: self.setReward(food[0], food[1], self.foodReward) for capsule in capsules: self.setReward(capsule[0], capsule[1], self.capsuleReward) for wall in walls: self.setReward(wall[0], wall[1], self.wallReward) for ghost in ghosts: self.setReward(ghost[0], ghost[1], self.ghostReward)
def registerInitialState(self, state): print "Initialising Map..." corners = api.corners(state) # The furthest corner from (0, 0) gives the width and height of the map (given it starts at (0, 0)) (width, height) = sorted(corners, key=lambda x: util.manhattanDistance( (0, 0), x), reverse=True)[0] self.map.initialise(height, width, api.walls(state)) # Set base values for rewards (set food but not ghosts) self.map.initialiseRewards(api.food(state)) # Print map when debugging if DEBUG: self.map.display()
def getAction(self, state): #self.updateFoodInMap(state) #self.map.prettyDisplay() corners = api.corners(state) print self.map.getValue(1, 1) #print api.corners(state) # Get the actions we can try, and remove "STOP" if that is one of them. legal = api.legalActions(state) if Directions.STOP in legal: legal.remove(Directions.STOP) # Random choice between the legal options. return api.makeMove(random.choice(legal), legal)
def getAction(self, state): legal = api.legalActions(state) if Directions.STOP in legal: legal.remove(Directions.STOP) coners = api.corners(state) pacman = api.whereAmI(state) food = api.food(state) Capsules = api.capsules(state) ghosts = api.ghosts(state) if len(self.detected) == 0: self.states.push((pacman, Directions.STOP)) if not self.states.isEmpty(): self.detected.append(pacman) success_node = [] for directs in legal: if directs == Directions.WEST: success_node.append((pacman[0] - 1, pacman[1])) if directs == Directions.EAST: success_node.append((pacman[0] + 1, pacman[1])) if directs == Directions.NORTH: success_node.append((pacman[0], pacman[1] + 1)) if directs == Directions.SOUTH: success_node.append((pacman[0], pacman[1] - 1)) for index in range(len(success_node)): if not success_node[index] in self.detected and ( success_node[index] in food or success_node[index] in Capsules): self.states.push((success_node[index], legal[index])) return (api.makeMove(legal[index], legal)) last, acted = self.states.pop() if acted == Directions.NORTH: return (api.makeMove(Directions.SOUTH, legal)) if acted == Directions.SOUTH: return (api.makeMove(Directions.NORTH, legal)) if acted == Directions.WEST: return (api.makeMove(Directions.EAST, legal)) if acted == Directions.EAST: return (api.makeMove(Directions.WEST, legal)) return (api.makeMove(Directions.STOP, legal))
def buildMap(self, state, directions): gameMap = {} corners = api.corners(state) for x in range(corners[0][0], corners[1][0] + 1): for y in range(corners[2][0], corners[3][1] + 1): coord = (x, y) gameMap[coord] = (-0.04, 0) walls = api.walls(state) for wall in walls: gameMap[wall] = (-1000, 0) largeOrSmall = True if corners[1][0] < 10 and corners[3][1] < 10: largeOrSmall = False return self.updateMap(state, gameMap, directions, largeOrSmall)
def getNextDirection(pacman, corner, walls, legal, theFood, ghostArray): #global variables distance = util.manhattanDistance(pacman, corner) corners = api.corners(state) #check if there are any ghosts if len(ghostArray) > 0: return runFromGhosts() else: #check if any food can be eaten if (len(theFood) == 0): return getToCornersOrFood() else: #found food, hence eat it return eatFood()
def initialize(self, state): #print "initializing map" #sets the path of pacman to be empty self.path = [] #sets the internal map of pacman to be empty self.map = None # get location of all visible food foods = api.food(state) #get location of all corners corners = api.corners(state) #get location of all visible capsules capsules = api.capsules(state) # Get the actions we can try, and remove "STOP" if that is one of them. #get location of all visible walls walls = api.walls(state) #get pacmans position pacman = api.whereAmI(state) pacmanX = pacman[0] pacmanY = pacman[1] #if the internal map has not been initialized if self.map == None: #finds the dimension of the map by location the extremes, in this case the corners width = 0 height = 0 for corner in corners: if corner[0] > width: width = corner[0] if corner[1] > height: height = corner[1] #once the size of the map has been identified, fill it up with "?", as pacman does not know what is in there self.map = [["?" for y in range(height+1)] for x in range(width+1)] #now add in all the information pacman knows initially. starting with all known locations of food for food in foods: #use "F" to mark food on the map self.map[food[0]][food[1]] = "F" #now mark the location of capsules on the map, this time using "C" for capsule in capsules: self.map[capsule[0]][capsule[1]] = "C" #now mark the location of the walls on the map, using "W" for wall in walls: self.map[wall[0]][wall[1]] = "W" #last pacman knows where it is, so mark that as "P" self.map[pacmanX][pacmanY] = "P" #set init to true as the map has been initialized self.init = True
def initialize(self, state): #sets the reward of each grid self.reward = None #set the utility of each grid self.utility = None # get location of all visible food foods = api.food(state) #get location of all corners corners = api.corners(state) #get location of all visible capsules capsules = api.capsules(state) #get location of all visible walls walls = api.walls(state) #get pacmans position pacman = api.whereAmI(state) pacmanX = pacman[0] pacmanY = pacman[1] #if the internal map has not been initialized if self.reward == None and self.utility == None: #finds the dimension of the map by location the extremes, in this case the corners width = 0 height = 0 for corner in corners: if corner[0] > width: width = corner[0] if corner[1] > height: height = corner[1] #once the size of the map has been identified, fill it up with "?", as pacman does not know what is in there self.reward = [[-1 for y in range(height + 1)] for x in range(width + 1)] self.utility = [[random() for y in range(height + 1)] for x in range(width + 1)] #now add in all the information pacman knows initially. starting with all known locations of food for food in foods: #use "F" to mark food on the map self.reward[food[0]][food[1]] = 10 #now mark the location of capsules on the map, this time using "C" for capsule in capsules: self.reward[capsule[0]][capsule[1]] = 5 #now mark the location of the walls on the map, using "W" for wall in walls: self.reward[wall[0]][wall[1]] = "W" self.utility[wall[0]][wall[1]] = "W" #set init to true as the map has been initialized self.init = True
def mapSize(self, state): """Get the size of layout. Calculate the maximum of row and col of the map by using the corners' coordinates Args: state: The state of an agent (configuration, speed, scared, etc). Returns: The maximum index of row and maximum index of col """ # unzip corners tuple to two list [col indexs] [row indexs] # and then zip two list to tuple [(col indexs)(row indexs)] corners = zip(*(api.corners(state))) # return the maximum col index and row index return max(corners[0]), max(corners[1])
def getAction(self, state): print "-" * 30 legal = api.legalActions(state) corners = api.corners(state) maxWidth = self.getLayoutWidth(corners) - 1 maxHeight = self.getLayoutHeight(corners) - 1 # This function updates all locations at every state # for every action retrieved by getAction, thi3s map is recalibrated valueMap = self.makeValueMap(state) # If the map is large enough, calculate buffers around ghosts # also use higher number of iteration loops to get a more reasonable policy if maxWidth >= 10 and maxHeight >= 10: self.valueIteration(state, -0.5, 0.7, valueMap) else: self.valueIterationSmall(state, 0.2, 0.7, valueMap) print "best move: " print self.getPolicy(state, valueMap) # Update values in map with iterations for i in range(self.map.getWidth()): for j in range(self.map.getHeight()): if self.map.getValue(i, j) != "#": self.map.setValue(i, j, valueMap[(i, j)]) self.map.prettyDisplay() # If the key of the move with MEU = n_util, return North as the best decision # And so on... if self.getPolicy(state, valueMap) == "n_util": return api.makeMove('North', legal) if self.getPolicy(state, valueMap) == "s_util": return api.makeMove('South', legal) if self.getPolicy(state, valueMap) == "e_util": return api.makeMove('East', legal) if self.getPolicy(state, valueMap) == "w_util": return api.makeMove('West', legal)