def getAction(self, state): # find the facing direction of ghost self.current_ghosts_states = api.ghostStatesWithTimes(state) valid_facing = [(1, 0), (-1, 0), (0, 1), (0, -1), (0, 0)] self.ghosts_facing = [] for i in range(len(self.current_ghosts_states)): facing_of_ghost = (int( round(self.current_ghosts_states[i][0][0] - self.last_ghosts_states[i][0][0])), int( round(self.current_ghosts_states[i][0][1] - self.last_ghosts_states[i][0][1]))) # elated by pacman if facing_of_ghost not in valid_facing: facing_of_ghost = (0, 0) self.ghosts_facing.append(facing_of_ghost) self.last_ghosts_states = self.current_ghosts_states # search optimal policy and do an optimal action self.initialRewardMap(state) pacman = api.whereAmI(state) utilities_map = self.updateUtilities() legal = api.legalActions(state) action_vectors = [Actions.directionToVector(a, 1) for a in legal] optic_action = max( map( lambda x: (float( utilities_map.getValue(x[0] + pacman[0], x[1] + pacman[1]) ), x), action_vectors)) return api.makeMove(Actions.vectorToDirection(optic_action[1]), legal)
def ghost_update(self, state): """ Update ghosts to the map """ ghosts = api.ghostStatesWithTimes(state) for ghost in ghosts: # if ghosts are scared and the time of remaining scared # is longer than 2 (set 2 for safety) if ghost[1] > 2: # ignore the ghosts continue else: # if ghosts are not scared or the time of being scared # is less than 2 ghost_x = int(ghost[0][0]) ghost_y = int(ghost[0][1]) # set ghost reward to ghost self.map.set_value(ghost_x, ghost_y, self.ghost_reward) # for safety, also assign ghost_reward to cells next to ghosts ghost_neighbors = self.four_neighbors(ghost_x, ghost_y) for neighbor in ghost_neighbors: # if not wall if neighbor not in api.walls(state): self.map.set_value(neighbor[0], neighbor[1], self.ghost_reward)
def getAction(self, state): """ The function to work out next intended action carried out. Parameters: None Returns: Directions: Intended action that Pacman will carry out. """ current_pos = api.whereAmI(state) corners = api.corners(state) food = api.food(state) ghosts = api.ghosts(state) ghost_scared_time = api.ghostStatesWithTimes(state)[0][1] walls = api.walls(state) legal = api.legalActions(state) capsules = api.capsules(state) protected_coords = walls + ghosts + [current_pos] width = max(corners)[0] + 1 height = max(corners, key=itemgetter(1))[1] + 1 board = self.create_board(width, height, -0.04) board.set_position_values(food, 1) board.set_position_values(walls, 'x') board.set_position_values(capsules, 2) if ghost_scared_time < 5: board.set_position_values(ghosts, -3) # for i in range(height): # for j in range(width): # print board[i, j], # print # print print "GHOST LIST: ", ghosts for x, y in ghosts: # set the surrounding area around the ghost to half the reward of the ghost # avoids changing the reward of the ghost itself, the pacman and the walls # print "GHOST Coordinates: " + str(x) + " " + str(y) x_coordinates = [x - 1, x, x + 1] y_coordinates = [y - 1, y, y + 1] # print "X/Y Coordinates: " + str(x_coordinates) + " " + str(y_coordinates) for x_coord in x_coordinates: for y_coord in y_coordinates: if (x_coord, y_coord) not in protected_coords: # print("index: " + str((board.convert_y(y_coord), x_coord))) converted_y = board.convert_y(y_coord) # print "VALUE: " + str(board[board.convert_y(y), x]) board[converted_y, x_coord] = board[board.convert_y(y), x] / 2 # print "VALUE PART 2: " + str(board[converted_y, x_coord]) board = self.value_iteration(state, board) expected_utility = self.calculate_expected_utility( state, board, abs(current_pos[1] - (height - 1)), current_pos[0]) return max([(utility, action) for utility, action in expected_utility if action in legal])[1]
def registerInitialState(self, state): # self.numberOfGame += 1 print "Running registerInitialState!" # Make a map of the right size self.makeMap(state) self.addWallsToMap(state) self.reward_map = self.map.copy() # map base codes self.is_small_grid = True if self.reward_map.getWidth() < 9: self.dis_count = 0.8 self.tolerant = 0.0001 else: self.is_small_grid = False self.dis_count = 0.85 self.tolerant = 0.000001 # initialize the position of ghosts self.current_ghosts_states = api.ghostStatesWithTimes(state) self.last_ghosts_states = api.ghostStatesWithTimes(state)
def updateGhosts(self,state): ghosts = api.ghosts(state) ghostStatesWithTimes= api.ghostStatesWithTimes(state) for g in ghostStatesWithTimes: ghostVal = 1.2*g[1]-20 self.map.setValue(int(g[0][0]),int(g[0][1]),ghostVal) #g[0] here is the coordinates of the respective ghost # and g[1] is how much time till ghosts revert from edible state to hunting pacman # with 0 being not edible anymore self.dankMap =self.map #update map to dankMap
def bellmanUpdate( self, state ): #preforms bellmans over the entire map using bellmans equation on each cell ghostList = api.ghosts(state) foodList = api.food(state) wallList = api.walls(state) scaredTime = api.ghostStatesWithTimes(state)[0][1] capsuleList = api.capsules(state) width = self.map.width height = self.map.height done = False while not done: #loops over map preforming bellmans until previous map from the last iteration is equal to the map at the end of the new iteration oldMap = deepcopy(self.map) for x in range(0, width): for y in range(0, height): if oldMap[x][y] != None: bestUtil = -1000 moves = [ oldMap[x][y + 1], oldMap[x + 1][y], oldMap[x][y - 1], oldMap[x - 1][y] ] # list of all possible moves from the current cell for i in range( len(moves) ): # finds the best util possible based on all legal moves to uses in value iteration if moves[i] != None: tutil = moves[i] * 0.8 if moves[i - 1] != None: tutil += moves[i - 1] * 0.1 else: tutil += oldMap[x][y] * 0.1 if moves[(i + 1) % 4] != None: tutil += moves[(i + 1) % 4] * 0.1 else: tutil += oldMap[x][y] * 0.1 if tutil > bestUtil: bestUtil = deepcopy(tutil) self.map[x][y] = (bestUtil * 0.9) + self.reward( x, y, state, ghostList, foodList, capsuleList, scaredTime, wallList) #bellmans equation using rewards functon done = self.checkSame( oldMap, self.map ) #checks to see whether old map is the same as new map
def getAction(self, state): # Demonstrates the information that Pacman can access about the state # of the game. # What are the current moves available legal = api.legalActions(state) print "Legal moves: ", legal # Where is Pacman? pacman = api.whereAmI(state) print "Pacman position: ", pacman # Where are the ghosts? print "Ghost positions:" theGhosts = api.ghosts(state) for i in range(len(theGhosts)): print theGhosts[i] print "timer" moreGhosts = api.ghostStatesWithTimes(state) for i in range(len(moreGhosts)): print moreGhosts[i] # How far away are the ghosts? print "Distance to ghosts:" for i in range(len(theGhosts)): print util.manhattanDistance(pacman, theGhosts[i]) # Where are the capsules? print "Capsule locations:" print api.capsules(state) # Where is the food? print "Food locations: " print api.food(state) # Where are the walls? print "Wall locations: " print api.walls(state) # getAction has to return a move. Here we pass "STOP" to the # API to ask Pacman to stay where they are. return api.makeMove(Directions.STOP, legal)
def findGhosts(self, state): #get the current states of the ghosts ghostStates = api.ghostStatesWithTimes(state) #reset the lists to empty self.scaredGhosts = [] self.ghosts = [] self.adjGhosts = [] #for every ghost add them to the approriate list for ghostStates in ghostStates: #if the ghosts are scared if ghostStates[1] > 0: #floor and ceiling it? self.scaredGhosts.append(ghostStates[0]) else: self.ghosts.append(ghostStates[0]) #if the ghosts are scared we want to stay away from the vicinity of the ghost for ghost in self.ghosts: for move in self.possibleMoves: self.adjGhosts.append(self.sumPair(ghost, move[0]))
def updateMap(self,state): #print "updateMap" reward = deepcopy(self.reward) ''' # get location of all visible food foods = api.food(state) #get location of all visible capsules capsules = api.capsules(state) ''' ghosts = api.ghostStatesWithTimes(state) ''' #get location of all visible walls walls = api.walls(state) #now add in all the information pacman knows initially. starting with all known locations of food for food in foods: #use "F" to mark food on the map reward[food[0]][food[1]] = 10 #now mark the location of capsules on the map, this time using "C" for capsule in capsules: reward[capsule[0]][capsule[1]] = 5 ''' for ghost in ghosts: ghostX = int(ghost[0][0]) ghostY = int(ghost[0][1]) if(ghost[1] > 0): reward[ghostX][ghostY] = self.scaredGhostReward if(ghostX == ghost[0][0] and ghostY == ghost[0][1]): reward[ghostX + 1][ghostY + 1] = self.scaredGhostReward else: #reward = self.markGhost(state, reward) #need to change reward of adjacent squares as well reward[ghostX][ghostY] = self.ghostReward ''' #now mark the location of the walls on the map, using "W" for wall in walls: self.reward[wall[0]][wall[1]] = "W" self.utility[wall[0]][wall[1]] = "W" ''' return reward
def createMap(self, state): grid = list() for row in range(self.max_x + 1): rowList = [0] * (self.max_y + 1) grid.append(rowList) for food in api.food(state): grid[food[0]][food[1]] = self.food for capsule in api.capsules(state): grid[capsule[0]][capsule[1]] = self.capsule for ghostState in api.ghostStatesWithTimes(state): grid[int(ghostState[0][0])][int( ghostState[0][1])] = self.ghostState if ( ghostState[1] < 3) else self.food if (util.manhattanDistance(ghostState[0], api.whereAmI(state)) < 5): grid[int(ghostState[0][0]) + self.buffer][int( ghostState[0][1])] = -10 if ( ghostState[1] < 3) else self.food grid[int( ghostState[0][0])][int(ghostState[0][1]) + self.buffer] = -10 if ( ghostState[1] < 3) else self.food grid[int(ghostState[0][0]) - self.buffer][int( ghostState[0][1])] = -10 if ( ghostState[1] < 3) else self.food grid[int( ghostState[0][0])][int(ghostState[0][1]) - self.buffer] = -10 if ( ghostState[1] < 3) else self.food for wall in api.walls(state): grid[wall[0]][wall[1]] = -10 return grid
def updateMap(self, state, gameMap, directions, largeOrSmall): food = api.food(state) capsules = api.capsules(state) pacman = api.whereAmI(state) """ #method 3 - food reward based on going column by column # [(1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8), (2, 1), (2, 5), (2, 9), # (3, 1), (3, 3), (3, 4), (3, 5), (3, 6), (3, 7), (3, 9), (4, 1), (4, 2), (4, 3), (4, 5), (4, 7), # (4, 8), (4, 9), (5, 3), (5, 5), (5, 7), (6, 1), (6, 2), (6, 3), (6, 4), (6, 5), (6, 6), (6, 7), # (6, 8), (6, 9), (7, 1), (7, 3), (7, 7), (7, 9), (8, 1), (8, 3), (8, 7), (8, 9), (9, 3), (9, 7), # (9, 9), (10, 1), (10, 3), (10, 7), (10, 9), (11, 1), (11, 3), (11, 7), (11, 9), (12, 1), (12, 3), # (12, 7), (12, 9), (13, 1), (13, 2), (13, 3), (13, 4), (13, 5), (13, 6), (13, 7), (13, 8), (13, 9), # (14, 3), (14, 5), (14, 7), (15, 1), (15, 2), (15, 3), (15, 5), (15, 7), (15, 8), (15, 9), (16, 1), # (16, 3), (16, 4), (16, 5), (16, 6), (16, 7), (16, 9), (17, 1), (17, 5), (17, 9), (18, 2), (18, 3), # (18, 4), (18, 5), (18, 6), (18, 7), (18, 8), (18, 9)] 1--> 5 2-->20 10 3-->30 20 ... 9-->90 10-->1-->10 """ if (largeOrSmall == True): for f in food: # if f[0] > 9: # foodReward = (f[0]/10) # foodReward = (foodReward * 5) +5 # else: # foodReward = (f[0] * 5)+5 foodReward = (f[0] * 5) + 5 gameMap[f] = (foodReward, 0) else: for f in food: gameMap[f] = (10, 0) #method 2 - food reward based on distance to pacman # if (largeOrSmall==True): # for f in food: # foodDistance = (util.manhattanDistance(pacman,f)) # foodReward = (10 / foodDistance) # gameMap[f] = (foodReward, 0) # print food # else: # for f in food: # gameMap[f] = (10, 0) #Method 1 = if there's nothing near the food # if (largeOrSmall== True) and (len(food) < size[2]) # for f in food: # #gameMap[f] = (10, 0) # foodNeighbours = [Actions.getSuccessor(food, move) for move in directions] # empty = 0 # for neighbour in foodNeighbours: # empty+= gameMap[neighbour][0] # if empty < 11 # gameMap[f] = (20,0) for capsule in capsules: gameMap[capsule] = (50, 0) if largeOrSmall == True: ghostTimer = api.ghostStatesWithTimes(state) for ghost in ghostTimer: #in case ghost is in location .5 ghostLoc = (int(round(ghost[0][0])), int(round(ghost[0][1]))) ghostNeighbours = [ Actions.getSuccessor(ghostLoc, move) for move in directions ] if ghost[1] < 3: if (util.manhattanDistance(pacman, ghostLoc) < 5): #if ghost is 5 away from pacman, set all ghost neighbours reward lower gameMap[ghostLoc] = (-150, 0) for ghostN in ghostNeighbours: gameMap[ghostN] = (-100, 0) else: gameMap[ghostLoc] = (-200, 0) # was 100 else: # if ghost is edible gameMap[ghostLoc] = (5, 0) else: #if in a smaller grid ghosts = api.ghosts(state) for ghost in ghosts: ghostLoc = (int(round(ghost[0])), int(round(ghost[1]))) gameMap[ghostLoc] = (-100, 0) pacman = api.whereAmI(state) gameMap[pacman] = (-0.04, 0) return gameMap
def calculateUtility(self, state): discount_factor = 0.6 new_utility_values = [] food_reward = 2 ghost_reward = -500 maximum_change = 0.00001 uti_need_change = True #Update the utility while uti_need_change == True: new_utility_values = [] for i in range(self.map.getWidth()): for j in range(self.map.getHeight()): #Utility calculation by using value iteration with bellman equation current_utility = self.map.getUtility(i, j) current_reward = 0 # The reward value if this position have no ghost and no food, #If this grid contains a food, change the reward to 5 if self.map.getValue(i, j) == '*': current_reward = food_reward ghostStates_list = api.ghostStatesWithTimes(state) ghost_list = api.ghosts(state) #Notice: if the ghost is scared, its speed will reduce #Notice: The time is reduced from 36 to 0 if (i, j) in ghost_list: for ghost, time in ghostStates_list: if ghost == (i, j): #A relative "greedy" approach if time >= 12: current_reward = 0 else: current_reward = ghost_reward #There is no need to update the utility if it's a wall if self.map.getValue(i, j) != '%': #Find the effect by taking action a from current state east = (i + 1, j) west = (i - 1, j) north = (i, j + 1) south = (i, j - 1) dires = [east, west, north, south] init_index = 0 for dire in dires: #If the result by taking this action will hit the wall, the utility value for this position should be the utility of current state if self.map.getValue(dire[0], dire[1]) == '%': dires[init_index] = (i, j) init_index += 1 utilities_list = [] current_utilities_list = [] #Get the utility of the effects state(order: east, west, north, south) for dire in dires: current_utilities_list.append( self.map.getUtility(dire[0], dire[1])) #calculate the utilities, the number 0.8 is given by the document north_utility = 0.8 * current_utilities_list[ 2] + 0.1 * current_utilities_list[ 0] + 0.1 * current_utilities_list[1] utilities_list.append(north_utility) south_utility = 0.8 * current_utilities_list[ 3] + 0.1 * current_utilities_list[ 0] + 0.1 * current_utilities_list[1] utilities_list.append(south_utility) west_utility = 0.8 * current_utilities_list[ 1] + 0.1 * current_utilities_list[ 2] + 0.1 * current_utilities_list[3] utilities_list.append(west_utility) east_utility = 0.8 * current_utilities_list[ 0] + 0.1 * current_utilities_list[ 2] + 0.1 * current_utilities_list[3] utilities_list.append(east_utility) max_utility = max( utilities_list) #Find the max value of them new_utility = current_reward + ( discount_factor * max_utility ) # Value iteration update function self.map.setUtility( i, j, new_utility) #Set the new utility to this location utility_changed = abs(current_utility - new_utility) new_utility_values.append(utility_changed) #Faster convergence speed, but precision might be lost if max(new_utility_values) < maximum_change: uti_need_change = False
def makeValueMap(self, state): # This function returns a dictionary of all possible coordinates on a grid # As well as all the values that are assigned to each coordinate-category # Food is given a value of 5 # Empty spaces are given a value of 0 # Capsules are given a value of 5 food = api.food(state) walls = api.walls(state) capsules = api.capsules(state) pacman = api.whereAmI(state) corners = api.corners(state) # If pacman's location has not been recorded in a list of visited locations # Record it if pacman not in self.visited: self.visited.append(pacman) # Up for i in food: if i not in self.foodMap: self.foodMap.append(i) for i in walls: if i not in self.wallMap: self.wallMap.append(i) for i in capsules: if i not in self.capsuleMap: self.capsuleMap.append(i) # Create a dictionary storing all # Food, wall and capsule locations, while assigning values to them self.foodDict = dict.fromkeys(self.foodMap, 5) self.wallDict = dict.fromkeys(self.wallMap, '#') self.capsuleDict = dict.fromkeys(self.capsuleMap, 5) # Initiate valueMap to store all coordinates valueMap = {} valueMap.update(self.foodDict) valueMap.update(self.wallDict) valueMap.update(self.capsuleDict) # Using the APIs to get coordinates tends to leave out pacman # Initial position # This will sweep through all available coordinates # And add the square to the list with 0 for i in range(self.getLayoutWidth(corners) - 1): for j in range(self.getLayoutHeight(corners) - 1): if (i, j) not in valueMap.keys(): valueMap[(i, j)] = 0 # Update function. If pacman has been seen to visit a square # It means he has eaten the food or capsules there # Thus, set their values to 0 for i in self.foodMap: if i in self.visited: valueMap[i] = 0 for i in self.capsuleMap: if i in self.visited: valueMap[i] = 0 # Another update function # Updates the location of the ghost ghosts = api.ghosts(state) ghostStates = api.ghostStatesWithTimes(state) for i in valueMap.keys(): for j in range(len(ghosts)): ghostTime = ghostStates[j][1] #Convert coordinates to int (keys are stored as int, but coordinates from API are stored as float) if ((int(ghosts[j][0])), (int(ghosts[j][1]))) == i: valueMap[i] = -10 #elif ((int(ghosts[j][0])), (int(ghosts[j][1]))) == i and ghostTime >= 5: # valueMap[i] = 5 return valueMap