def updateGhostsToMap(self, state): ghost = api.ghosts(state) for i in range(len(ghost)): if (api.ghostStates(state)[i][1] != 1): self.map.setValue(int(ghost[i][0]), int(ghost[i][1]), (-self.initialFood, 0)) self.aroundGhosts(int(ghost[i][0]), int(ghost[i][1]), -self.initialFood, 0) else: self.map.setValue(int(ghost[i][0]), int(ghost[i][1]), (-0.02, 0))
def updateMap(self, state): ghost = api.ghosts(state) ghostStates = api.ghostStates(state) capsule = api.capsules(state) food = api.food(state) wall = api.walls(state) for node in self.mapDictionary: if node in wall: self.mapDictionary.update({node: 'X'}) return self.mapDictionary
def getGhostsAndStates(self, state, distance): ''' Get ghosts within a certain distance, will return an array of ghost locations with its status (Whether it is active or not)''' # Get List of Ghosts within certain area. ghosts = api.distanceLimited(self.ghosts, state, distance) # Get List of Ghosts with States in whole map. ghostStates = api.ghostStates(state) # Temporary List to Return Dangerous Ghosts in specified area. dangerousGhosts = [] # Find dangerous ghosts and add to dangerous ghosts list if (len(ghosts) != 0): for ghost in ghosts: for ghostState in ghostStates: if (ghost == ghostState[0]): dangerousGhosts.append(ghostState) return dangerousGhosts
def rewardMapping(self, state): """ Populate a dictionary of reward values for each traversable square in the map """ walls = self.walls food = set(api.food(state)) ghostStates = api.ghostStates(state) capsules = set(api.capsules(state)) # initialise all states to reward of -1, the default value for a blank square self.rewardDict = {key: -1 for key in self.grid if key not in walls} # initialise all states' utilities to 0 whilst we're at it self.utils = {key: 0 for key in self.grid if key not in walls} # update the reward dictionary with reward values at locations of food and capsules foodDict = {k: 10 for k, v in self.rewardDict.items() if k in food} self.rewardDict.update(foodDict) capsuleDict = { k: 20 for k, v in self.rewardDict.items() if k in capsules } self.rewardDict.update(capsuleDict) # loop through the ghost locations and their statuses indicating whether they're scared; if they # are scared, ignore them. Otherwise, assign negative rewards to both: a) their locations and b) # a radius of variable width around them. for j in ghostStates: if j[0] in self.rewardDict.keys(): if j[1] == 0: self.rewardDict[j[0]] = -50 ''' adjust size of exclusion zone around ghosts with 3rd argument below ''' ghostNeighbours = self.ghostRadius(state, j[0], 2) ghostRadius = { k: -25 for k, v in self.rewardDict.items() if k in ghostNeighbours } self.rewardDict.update(ghostRadius)
def getAction(self, state): # Get the actions we can try, and remove "STOP" if that is one of them. legal = api.legalActions(state) pacloc = api.whereAmI(state) ghoststates = api.ghostStates(state) ghostcors = [] if Directions.STOP in legal: legal.remove(Directions.STOP) self.updatefood(state) #Making a list of ghost coordinates which are still active and not edible # So these ghost coordinates will get negative value during iteration for i in range(len(ghoststates)): # converting ghosts coordinates in integer ghostX = ghoststates[i][0][0] ghostY = ghoststates[i][0][1] ghostXY = (int(ghostX), int(ghostY)) if ghostXY in self.pcMap and ghoststates[i][1] == 0: ghostcors.append((int(ghostX), int(ghostY))) #Create a buffer around ghost so pacman will avoid it eghost = ((ghostXY[0] + 1), ghostXY[1]) wghost = ((ghostXY[0] - 1)), ghostXY[1] nghost = ((ghostXY[0]), ghostXY[1] + 1) sghost = ((ghostXY[0]), ghostXY[1] - 1) ghostcors.append(eghost) ghostcors.append(wghost) ghostcors.append(nghost) ghostcors.append(sghost) # The value iteration will be called until all the value of the bellman # stops changing from the previous values for i in range(100): stable = self.valueIteration(state, ghostcors) if stable == 0: #print "stable at ", i break # East Coordinates if Directions.EAST in legal: eastofpacman = (pacloc[0] + 1, pacloc[1]) else: eastofpacman = pacloc # West Coordinates if Directions.WEST in legal: westofpacman = (pacloc[0] - 1, pacloc[1]) else: westofpacman = pacloc # North Coordinates if Directions.NORTH in legal: northofpacman = (pacloc[0], pacloc[1] + 1) else: northofpacman = pacloc # South Coordinates if Directions.SOUTH in legal: southofpacman = ((pacloc[0], pacloc[1] - 1)) else: southofpacman = pacloc maxutil = -999 makeMove = 'null' unorth = self.iterationDict[northofpacman] usouth = self.iterationDict[southofpacman] ueast = self.iterationDict[eastofpacman] uwest = self.iterationDict[westofpacman] # Get a movement policy for pacman using the # value iteration map that is updated at every move if Directions.NORTH in legal: movnorth = ((0.8 * unorth) + (0.1 * uwest) + (0.1 * ueast)) if movnorth > maxutil: maxutil = movnorth makeMove = Directions.NORTH if Directions.EAST in legal: moveast = ((0.8 * ueast) + (0.1 * unorth) + (0.1 * usouth)) if moveast > maxutil: maxutil = moveast makeMove = Directions.EAST if Directions.SOUTH in legal: movsouth = ((0.8 * usouth) + (0.1 * uwest) + (0.1 * ueast)) if movsouth > maxutil: maxutil = movsouth makeMove = Directions.SOUTH if Directions.WEST in legal: movwest = ((0.8 * uwest) + (0.1 * unorth) + (0.1 * usouth)) if movwest > maxutil: maxutil = movwest makeMove = Directions.WEST if makeMove != 'null': self.itrStart = 0 return api.makeMove(makeMove, legal) else: print "where am I ?"
def __value_iteration(self, state, debug_mode, deep_debug_mode, ghostbuster_mode): """initialize data structures for value iteration""" ghosts_states = api.ghostStates(state) edible_ghosts = [] hostile_ghosts = [] early_stopping_point = self.__NORMAL_EARLY_STOPPING_POINT for ghost_state in ghosts_states: if ghost_state[1] == 1: edible_ghosts.append( (int(ghost_state[0][0]), int(ghost_state[0][1]))) else: hostile_ghosts.append(ghost_state[0]) if debug_mode: print("\tedible_ghosts=" + str(edible_ghosts)) print("\thostile_ghosts=" + str(hostile_ghosts)) # inactive ghostbuster mode self.__rewards, self.__utilities = self.__initialize_data_structures( self.__foods, self.__FOOD, self.__walls, self.__floors, hostile_ghosts, safety_distance=self.__SAFETY_DISTANCE, threat_decay_rate=self.__THREAT_DECAY_RATE) early_stopping_point = self.__NORMAL_EARLY_STOPPING_POINT if len(self.__foods) < 10: early_stopping_point = self.__SPARSE_EARLY_STOPPING_POINT # defensive ghostbuster mode if ghostbuster_mode == self.__DEFENSIVE_GHOSTBUSTER_MODE: if len(edible_ghosts) > 0: self.__rewards, self.__utilities = self.__initialize_data_structures( edible_ghosts, self.__EDIBLE, self.__walls, self.__floors, hostile_ghosts, safety_distance=self.__SAFETY_DISTANCE, threat_decay_rate=self.__THREAT_DECAY_RATE) early_stopping_point = self.__SPARSE_EARLY_STOPPING_POINT # offensive ghostbuster_mode if ghostbuster_mode == self.__OFFENSIVE_GHOSTBUSTER_MODE: if len(edible_ghosts) > 0: self.__rewards, self.__utilities = self.__initialize_data_structures( edible_ghosts, self.__EDIBLE, self.__walls, self.__floors, hostile_ghosts, safety_distance=self.__SAFETY_DISTANCE, threat_decay_rate=self.__THREAT_DECAY_RATE) early_stopping_point = self.__SPARSE_EARLY_STOPPING_POINT elif len(self.__capsules) > 0: self.__rewards, self.__utilities = self.__initialize_data_structures( self.__capsules, self.__CAPSULE, self.__walls, self.__floors, hostile_ghosts, safety_distance=self.__SAFETY_DISTANCE, threat_decay_rate=self.__THREAT_DECAY_RATE) early_stopping_point = self.__SPARSE_EARLY_STOPPING_POINT """use value iteration to update utilites until convergence or early stopping point""" stopping_point = None for i in range(early_stopping_point): stopping_point = i + 1 self.__utilities, fully_convergent, total_entropy = self.__update_utilities( self.__walls, self.__neighbors, self.__rewards, self.__utilities, discount_factor=self.__DISCOUNT_FACTOR, convergence_tolerance=self.__CONVERGENCE_TOLERANCE, ignoring_walls=False, maximum_mode=True) if deep_debug_mode: self.__print_data_structure(self.__walls, self.__rewards) self.__print_data_structure(self.__walls, self.__utilities) print("\ttotal_entropy=" + "{:+10.3f}".format(total_entropy)) if fully_convergent: break if debug_mode: self.__print_data_structure(self.__walls, self.__rewards) self.__print_data_structure(self.__walls, self.__utilities) print("\ttotal_entropy=" + "{:+10.3f}".format(total_entropy)) print("\tstopping_point=" + str(stopping_point))
def compute(self, state, Dictionary): ghost = api.ghosts(state) ghostStates = api.ghostStates(state) capsule = api.capsules(state) food = api.food(state) wall = api.walls(state) # 1 ghost if len(ghost) == 1: # ghost co-ordinates and its states ghost1x = int(ghostStates[0][0][0]) ghost1y = int(ghostStates[0][0][1]) ghost1xy = (ghost1x, ghost1y) ghost1State = ghostStates[0][1] doNotEnter = [] if ghost1State == 0: for i in range(1, 2): if (ghost1x + i, ghost1y) in self.mapCoords: doNotEnter.append((ghost1x + i, ghost1y)) if (ghost1x - i, ghost1y) in self.mapCoords: doNotEnter.append((ghost1x + i, ghost1y)) if (ghost1x, ghost1y + i) in self.mapCoords: doNotEnter.append((ghost1x, ghost1y + i)) if (ghost1x, ghost1y - i) in self.mapCoords: doNotEnter.append((ghost1x, ghost1y - i)) #rewards and penalty ghostPenalty = -500 capsuleReward = 10 foodReward = 10 safeGhost = 10 noReward = 0 doNotEnterPenalty = -500 discountFactor = 0.9 iterations = 50 while iterations > 0: # a copy of the map, values are stored between iterations copy = Dictionary.copy() for x, y in Dictionary: #value iteration if (x, y) in self.pathCoords and (x, y) == ghost1xy and ghost1State == 0: Dictionary[(x, y)] = ghostPenalty + discountFactor * self.expectedUtitlity(copy, int(round(x)), int(round(y))) elif (x, y) in self.pathCoords and (x, y) == ghost1xy and ghost1State == 1: Dictionary[(x, y)] = safeGhost + discountFactor * self.expectedUtitlity(copy, int(round(x)), int(round(y))) elif (x, y) in self.pathCoords and (x, y) in doNotEnter: Dictionary[(x,y)] = doNotEnterPenalty + discountFactor * self.expectedUtitlity(copy, x, y) elif (x, y) in self.pathCoords and (x, y) not in food and (x, y) not in ghost and (x, y) not in capsule: Dictionary[(x, y)] = noReward + discountFactor * self.expectedUtitlity(copy, x, y) elif (x, y) in self.pathCoords and (x, y) in capsule: Dictionary[(x, y)] = capsuleReward + discountFactor * self.expectedUtitlity(copy, x, y) elif (x, y) in self.pathCoords and (x, y) in food: Dictionary[(x, y)] = foodReward + discountFactor * self.expectedUtitlity(copy, x, y) iterations -= 1 # 2 ghosts if len(ghost) == 2: # ghost co-ordinates and its states ghost1x = int(ghostStates[0][0][0]) ghost1y = int(ghostStates[0][0][1]) ghost1xy = (ghost1x, ghost1y) ghost1State = ghostStates[0][1] ghost2x = int(ghostStates[1][0][0]) ghost2y = int(ghostStates[1][0][1]) ghost2xy = (ghost2x, ghost2y) ghost2State = ghostStates[1][1] doNotEnter = [] if ghost1State == 0: for i in range (1, 3): if (ghost1x + i, ghost1y) in self.pathCoords: doNotEnter.append((ghost1x + i, ghost1y)) if (ghost1x - i, ghost1y) in self.pathCoords: doNotEnter.append((ghost1x - i, ghost1y)) if (ghost1x, ghost1y + i) in self.pathCoords: doNotEnter.append((ghost1x, ghost1y + i)) if (ghost1x, ghost1y - i) in self.pathCoords: doNotEnter.append((ghost1x, ghost1y - i)) #rewards and penalty ghostPenalty = -500 capsuleReward = 10 foodReward = 10 safeGhost = 10 noReward = 0 doNotEnterPenalty = -500 discountFactor = 0.9 iterations = 50 # value iteration while iterations > 0: copy = Dictionary.copy() for x, y in Dictionary: if (x, y) in self.pathCoords and (x, y) == ghost1xy and ghost1State == 0: Dictionary[(x, y)] = ghostPenalty + discountFactor * self.expectedUtitlity(copy, int(round(x)), int(round(y))) elif (x, y) in self.pathCoords and (x, y) == ghost2xy and ghost2State == 0: Dictionary[(x, y)] = ghostPenalty + discountFactor * self.expectedUtitlity(copy, int(round(x)), int(round(y))) elif (x, y) in self.pathCoords and (x, y) == ghost1xy and ghost1State == 1: Dictionary[(x, y)] = safeGhost + discountFactor * self.expectedUtitlity(copy, int(round(x)), int(round(y))) elif (x, y) in self.pathCoords and (x, y) == ghost2xy and ghost2State == 1: Dictionary[(x, y)] = safeGhost + discountFactor * self.expectedUtitlity(copy, int(round(x)), int(round(y))) elif (x, y) in self.pathCoords and (x, y) in doNotEnter: Dictionary[(x, y)] = doNotEnterPenalty + discountFactor * self.expectedUtitlity(copy, int(round(x)), int(round(y))) elif (x, y) in self.pathCoords and (x, y) in capsule: Dictionary[(x, y)] = capsuleReward + discountFactor * self.expectedUtitlity(copy, x, y) elif (x, y) in self.pathCoords and (x, y) in food: Dictionary[(x, y)] = foodReward + discountFactor * self.expectedUtitlity(copy, x, y) elif (x, y) in self.pathCoords and (x, y) not in food and (x, y) not in capsule and (x, y) not in ghost: Dictionary[(x, y)] = noReward + discountFactor * self.expectedUtitlity(copy, x, y) iterations -= 1
def __init__(self, state): # Required information about the world pacmanOrg = api.whereAmI(state) self.walls = api.walls(state) self.caps = api.capsules(state) self.reward = api.food(state) self.loss = api.ghosts(state) if (len(self.loss) > 0): self.loss[0] = (int(self.loss[0][0]), int(self.loss[0][1])) # Ignore scared ghosts for ghost in api.ghostStates(state): if (ghost[0] in self.loss and ghost[1] == 1): self.loss.remove(ghost[0]) # Grid dimentions based on last wall co-ordinate self.x1 = self.walls[len(self.walls) - 1][0] + 1 self.y1 = self.walls[len(self.walls) - 1][1] + 1 self.grid = [[0 for y in range(self.y1)] for x in range(self.x1)] #Intialize default utilities will be used as rewards for x in range(self.x1): for y in range(self.y1): closeGhosts = api.distanceLimited(self.loss, state, 4) if (x, y) in self.walls: self.grid[x][y] = None elif (x, y) in self.loss: self.grid[x][y] = -20 elif (x, y) in self.caps: self.grid[x][y] = 10 # Larger grids elif (self.x1 > 7): # If there are nearby ghosts subtract ghost score divided by distance if (len(closeGhosts) >= 1): if (x, y) in self.reward: self.grid[x][y] = 1 else: self.grid[x][y] = 0 else: if (x, y) in self.reward: self.grid[x][y] = 1 else: self.grid[x][y] = 0 #Smaller Grids else: if (x, y) in self.reward: if (len(self.reward) > 1): if ((x, y) == (1, 1)): self.grid[x][y] = 5 else: self.grid[x][y] = 1 else: self.grid[x][y] = 10 else: self.grid[x][y] = 0 # Override rewards for legal spaces near ghosts if (self.x1 > 7): for y in range(self.y1): for x in range(self.x1): if (x, y) in self.loss: if (x + 1, y) not in self.walls: self.grid[x + 1][y] = -15 if (x + 2, y) not in self.walls and (x + 2) < self.x1: self.grid[x + 2][y] = -10 if (x - 1, y) not in self.walls: self.grid[x - 1][y] = -15 if (x - 2, y) not in self.walls and (x - 2) > 0: self.grid[x - 2][y] = -10 if (x, y + 1) not in self.walls: self.grid[x][y + 1] = -15 if (x, y + 2) not in self.walls and (y + 2) < self.y1: self.grid[x][y + 2] = -10 if (x, y - 1) not in self.walls: self.grid[x][y - 1] = -15 if (x, y - 2) not in self.walls and (y - 2) > 0: self.grid[x][y - 2] = -10 if (x + 1, y + 1) not in self.walls: self.grid[x + 1][y + 1] = -10 if (x + 1, y - 1) not in self.walls: self.grid[x + 1][y - 1] = -10 if (x - 1, y + 1) not in self.walls: self.grid[x - 1][y + 1] = -10 if (x - 1, y - 1) not in self.walls: self.grid[x - 1][y - 1] = -10