def getAction(self, state): # Create the rewards map, identify the locations we will iterate and populate the rewards rewardMap = map(state) rewardMap.identifyLocationsToIterate() rewardMap.populateRewards() # Create a utility map, copied from the rewards map. It is a copy because this is the one we will iterate # whilst keeping the rewards map constant. Copy is an imported library, and deepcopy ensures we get a new # copy with no pointers to the old object. utilityMap = copy.deepcopy(rewardMap) continueIterating = True iterationCount = 0 while continueIterating: iterationCount = iterationCount + 1 oldUtilityMap = copy.deepcopy( utilityMap ) # We temporarily store the old map so we can check for convergance after iteration utilityMap = self.singleIteration( rewardMap, utilityMap, api.whereAmI(state)) #This is the iteration. if self.converged(oldUtilityMap, utilityMap): continueIterating = False # check for convergence # Once iteration is complete, we identify the possible moves from our current location, use a separate # function to identify the best move, and attempt to make that move. legal = utilityMap.getLegalMoves(api.whereAmI(state)) # bestMove = self.getBestMove(api.whereAmI(state), legal, utilityMap) return api.makeMove(bestMove, legal)
def getAction(self, state): legalMoves = state.getLegalPacmanActions() currentDirection = state.getPacmanState().configuration.direction #if all outermost walls already explored, go towards food if it can be seen... if len(legalMoves) > 3 and api.whereAmI(state) in self.visited: return self.foodWithin5(state, currentDirection, legalMoves) #always go left to visit outermost walls else: self.visited.append(api.whereAmI(state)) if currentDirection == Directions.STOP: currentDirection = Directions.NORTH if Directions.LEFT[currentDirection] in legalMoves: self.last = Directions.LEFT[currentDirection] return self.last if currentDirection in legalMoves: self.last = currentDirection return self.last if Directions.RIGHT[currentDirection] in legalMoves: self.last = Directions.RIGHT[currentDirection] return self.last if Directions.LEFT[ Directions.LEFT[currentDirection]] in legalMoves: self.last = Directions.LEFT[Directions.LEFT[currentDirection]] return self.last return Directions.STOP
def registerInitialState(self, state): print "Running registerInitialState for MDPAgent!" print "I'm at:" print api.whereAmI(state) self.makeMap(state) self.addWallsToMap(state) self.updateFoodInMap(state) self.calculateUtility(state)
def registerInitialState(self, state): print "Running registerInitialState for MDPAgent!" print "I'm at:" print api.whereAmI(state) self.makeMap(state) self.addWallsToMap(state) self.map.display()
def registerInitialState(self, state): print "Running registerInitialState for MDPAgent!" print "I'm at:" print api.whereAmI(state) # Make map. taken from lab 5 solutions (Parsons, 2017) self.makeMap(state) self.addWallsToMap(state) self.map.display()
def _determine_best_action(self, state, legal_moves): """ This function handles the logic for querying the map the expected utility of each action and returns the best one. """ maze = Maze(state) moves = maze.getEU(api.whereAmI(state)[1], api.whereAmI(state)[0]) #print("moves", str(moves)) return self._arg_max( {k: v for k, v in moves.iteritems() if k in legal_moves})
def getAction(self, state): self.pacman = api.whereAmI(state) self.legal = api.legalActions(state) if not self.init: self.initialize(state) else: # if self.reward[self.pacman[0]][self.pacman[1]] < 10: # self.reward[self.pacman[0]][self.pacman[1]] = self.reward[self.pacman[0]][self.pacman[1]] - 1 # else: self.reward[self.pacman[0]][self.pacman[1]] = -1 print "reward" for row in self.reward: print row self.updateMap(state) #print "\nreward" #for row in self.reward: # print row self.bellman(state) #print "utility" #for row in self.utility: # print row return self.getMove(state) return api.makeMove(Directions.STOP, self.legal)
def deGhost(self, state): # Avoid ghosts # # When running from a ghost, if pacman turns a corner, # pacman can no longer see ghost, and thus may backtrack # towards the ghost, causing him to get caught. # Avoid this by going straight and never backtracking # for 2 steps. Allows pacman to turn corners without losing # track of ghost and backtracking towards it self.update(state) # print "Avoiding ghosts" cur = api.whereAmI(state) legal = api.legalActions(state) legal.remove(Directions.STOP) if len(legal) > 1: #Remove option to backtrack legal.remove(self.oppositeDirection(state, self.last)) #Go straight if possible if self.last in legal: return self.last self.last = random.choice(legal) return self.last
def initialize(self, state): # get location of all visible food foods = api.food(state) #get location of all corners corners = api.corners(state) #get location of all visible capsules capsules = api.capsules(state) # Get the actions we can try, and remove "STOP" if that is one of them. legal = api.legalActions(state) #get location of all visible walls walls = api.walls(state) #get pacmans position pacman = api.whereAmI(state) x = pacman[0] y = pacman[1] if self.map == None: width = 0 height = 0 for corner in corners: if corner[0] > width: width = corner[0] if corner[1] > height: height = corner[1] self.map = [["?" for y in range(height)] for x in range(width)] for wall in walls: self.map[wall[0]][wall[1]] = "W" for food in foods: self.map[food[0]][food[1]] = "F" for capsule in capsules: self.map[capsule[0]][capsule[1]] = "F" self.map[x][y] = "0" self.init = True
def getAction(self,state): walls = api.walls(state) width,height = api.corners(state)[-1] legal = api.legalActions(state) me = api.whereAmI(state) food = api.food(state) ghosts = api.ghosts(state) capsules = api.capsules(state) direction = Directions.STOP x, y = me if not hasattr(self, 'map'): self.createMap(walls, width + 1, height + 1) self.checkForCapsules(capsules, legal, ghosts) legal = self.solveLoop(ghosts, legal) if len(ghosts): self.memorizeGhosts(ghosts) if self.counter < 0: for ghost in ghosts: legal = self.checkForGhosts(ghost, me, legal) direction = self.pickMove(me, legal, width + 1, height + 1, food) self.updatePosition(me, 1, self.map) self.printMap(self.map) self.last = direction return direction
def getAction(self, state): start = api.whereAmI(state) # generate rewards for map locations in each game cycle to reflect changing conditions self.rewardMapping(state) # create the dictionary of states at the start of the game if not self.stateDict: self.stateMapping(state) # get converged utilities from value iteration gridVals = self.valueIteration(state) # get and assign the utility values and their associated locations to the Grid object, then print this map # representation to the console valKeys = gridVals.keys() valVals = gridVals.values() for i in range(len(gridVals)): y = valKeys[i][0] x = valKeys[i][1] val = '{:3.2f}'.format(valVals[i]) self.map.setValue(y, x, val) self.map.prettyDisplay() # optimal policy for Pacman: get the location of the optimal utility from his current position sPrime = self.bestMove(state, gridVals) # required argument for makeMove() legal = api.legalActions(state) # return a move based on the direction returned by singleMove() and sPrime return api.makeMove(self.singleMove(start, sPrime), legal)
def getAction(self, state): self.updateMap(state) self.policyIteration() pacman = api.whereAmI(state) legal = api.legalActions(state) move = self.policy[pacman] return api.makeMove(move, legal)
def getAction(self, state): print "-" * 30 #divider ghosts = api.ghosts(state) #get state of ghosts legal = state.getLegalPacmanActions() #Again, get a list of pacman's legal actions last = state.getPacmanState().configuration.direction #store last move pacman = api.whereAmI(state) #retrieve location of pacman food = api.food(state) #retrieve location of food walls = api.walls(state) #how to call getfoodvalmap method. #In reality, the reward should be the final value-iteration of the grid. foodVal = self.getValueMap(state, 10) print foodVal #example on how to use getPacMEU function currentUtil = self.getPacMEU(pacman[0], pacman[1], foodVal, legal) print "Utility values: " print currentUtil print max(currentUtil.values()) #example on how to use getMEU function foodUtil = self.getMEU((18, 3), foodVal, walls) print "max utility for (18, 3) is: " print foodUtil if Directions.STOP in legal: legal.remove(Directions.STOP) # Random choice between the legal options. return api.makeMove(random.choice(legal), legal) """
def where_to_move(self, state, map): """Calculate the maximum expected utility to make decision of where to move. Parameters: state: the state of pacman. map: the value map. Returns the direction of move. """ pacman = api.whereAmI(state) # Update utilities of pacman current postion utilities = self.calculate_utilities(pacman, map) # Get the e maximum expected utility and store values & keys in dictionary move_decision = max(zip(utilities.values(),utilities.keys())) # Return the direction of move if move_decision[1] == 'north_u': return Directions.NORTH if move_decision[1] == 'south_u': return Directions.SOUTH if move_decision[1] == 'west_u': return Directions.WEST if move_decision[1] == 'east_u': return Directions.EAST
def foodWithin2(self, state, currentDirection, legalMoves): cur = api.whereAmI(state) #north ##can see food within 2 units if (cur[0], cur[1] + 1) in api.food(state) or (cur[0], cur[1] + 2) in api.food(state): print "n" return Directions.NORTH #east ##can see food within 2 units if (cur[0] + 1, cur[1]) in api.food(state) or (cur[0] + 2, cur[1]) in api.food(state): print "e" return Directions.EAST #south ##can see food within 2 units if (cur[0], cur[1] - 1) in api.food(state) or (cur[0], cur[1] - 2) in api.food(state): print "s" return Directions.SOUTH #west ##can see food within 2 units if (cur[0] - 1, cur[1]) in api.food(state) or (cur[0] + 2, cur[1]) in api.food(state): print "w" return Directions.WEST legalMoves.remove(Directions.STOP) return random.choice(legalMoves)
def getAction(self, state): # find the facing direction of ghost self.current_ghosts_states = api.ghostStatesWithTimes(state) valid_facing = [(1, 0), (-1, 0), (0, 1), (0, -1), (0, 0)] self.ghosts_facing = [] for i in range(len(self.current_ghosts_states)): facing_of_ghost = (int( round(self.current_ghosts_states[i][0][0] - self.last_ghosts_states[i][0][0])), int( round(self.current_ghosts_states[i][0][1] - self.last_ghosts_states[i][0][1]))) # elated by pacman if facing_of_ghost not in valid_facing: facing_of_ghost = (0, 0) self.ghosts_facing.append(facing_of_ghost) self.last_ghosts_states = self.current_ghosts_states # search optimal policy and do an optimal action self.initialRewardMap(state) pacman = api.whereAmI(state) utilities_map = self.updateUtilities() legal = api.legalActions(state) action_vectors = [Actions.directionToVector(a, 1) for a in legal] optic_action = max( map( lambda x: (float( utilities_map.getValue(x[0] + pacman[0], x[1] + pacman[1]) ), x), action_vectors)) return api.makeMove(Actions.vectorToDirection(optic_action[1]), legal)
def calculate_utility(direction, map, state): next_location = nextLocation(direction, api.whereAmI(state)) # Since a longer path is worse, we negate it unseen_value = -map[next_location].distance_to_unseen() utility = UTILITY['unseen'] * unseen_value \ + UTILITY['ghost'] * ghost_value(direction, state) return utility
def getAction(self, state): self.pacman = api.whereAmI(state) self.legal = api.legalActions(state) self.ghosts = api.ghosts(state) if not self.init: self.initialize(state) else: # if self.reward[self.pacman[0]][self.pacman[1]] < 10: # self.reward[self.pacman[0]][self.pacman[1]] = self.reward[self.pacman[0]][self.pacman[1]] - 1 # else: self.reward[self.pacman[0]][self.pacman[1]] = self.baseReward reward = self.updateMap(state) self.bellman(state, reward) print '' for row in reward: print row print '' for row in self.utility: print row #return self.getMove(state) return api.makeMove(Directions.STOP, self.legal)
def getAction(self, state): self.makeMap(state) self.addWallsToMap(state) self.addConsumablesToMap(state) self.updateGhosts(state) # Get the actions we can try, and remove "STOP" if that is one of them. pacman = api.whereAmI(state) legal = api.legalActions(state) ghosts = api.ghosts(state) corners = api.corners(state) layoutHeight = self.getLayoutHeight(corners) layoutWidth = self.getLayoutWidth(corners) if (layoutHeight-1)<8 and (layoutWidth-1)<8: for i in range (100): self.valIterS(state,0.68,-0.1) else: for i in range (50): self.valIterM(state,0.8,-0.1) plannedMove = self.plannedMove(pacman[0],pacman[1]) #self.dankMap.prettyDisplay() #Feel free to uncomment this if you like to see the values generated if Directions.STOP in legal: legal.remove(Directions.STOP) #Input the calculated move for our next move return api.makeMove(plannedMove, legal)
def getAction(self, state): #current possible moves legal = state.getLegalPacmanActions() #get current position pacman = api.whereAmI(state) pacmanX = pacman[0] pacmanY = pacman[1] #get food locations food = api.food(state) foodLoc = [] # get Distance for loc in food: foodLoc.append((abs(loc[0]-pacmanX + loc[1]-pacmanY),(loc[0]-pacmanX, loc[1]-pacmanY))) print foodLoc #Prevent it from stopping if Directions.STOP in legal: legal.remove(Directions.STOP) pick = random.choice(legal) return api.makeMove(pick, legal)
def getAction(self, state): value_list = [] legal = api.legalActions(state) corner = api.corners(state) pacman = api.whereAmI(state) pacman_x = pacman[0] pacman_y = pacman[1] legal_width = corner[3][0] legal_height = corner[3][1] # policy iteration and evaluation # get the value of four directions and select the direction corresponding to # the maximum value as Pacman's decision. map_effect = gridworld().map_valuegeneration(state, (legal_width, legal_height)) value_list.append(map_effect[(pacman_x-1, pacman_y)]) value_list.append(map_effect[(pacman_x+1, pacman_y)]) value_list.append(map_effect[(pacman_x, pacman_y + 1)]) value_list.append(map_effect[(pacman_x, pacman_y - 1)]) max_value = value_list.index(max(value_list)) # print 'map_effect' # print map_effect # print 'value_list' # print value_list # print 'max_value' # print max_value if max_value == 0: return api.makeMove(Directions.WEST, legal) if max_value == 1: return api.makeMove(Directions.EAST, legal) if max_value == 2: return api.makeMove(Directions.NORTH, legal) if max_value == 3: return api.makeMove(Directions.SOUTH, legal)
def getAction(self, state): # Get legal actions legal = api.legalActions(state) # Get location of Pacman pacman = api.whereAmI(state) # Get location of Ghosts locGhosts = api.ghosts(state) #print "locGhosts: ", locGhosts # Get distance between pacman and the ghosts for i in locGhosts: p_g_dist = util.manhattanDistance(pacman, i) # Get distance between ghosts g_g_dist = util.manhattanDistance(locGhosts[0], locGhosts[1]) #print "g_g_dist:", g_g_dist # Get distance between pacman and first Ghost dist = [] dist.append(locGhosts[0][0] - pacman[0]) dist.append(locGhosts[0][1] - pacman[1]) return api.makeMove(Directions.STOP, legal)
def __maximum_expected_utility(self, state, debug_mode): # the location of agent agent_location = api.whereAmI(state) if debug_mode: print("\tagent_location=" + str(agent_location)) # discover the legal actions legal = api.legalActions(state) # remove STOP to increase mobility legal.remove(Directions.STOP) # decide next move based on maximum expected utility action, maximum_expected_utility = None, None for direction in legal: utility = self.__utilities[self.__neighbors[agent_location] [direction]][1] if action == None or maximum_expected_utility == None: action = direction maximum_expected_utility = utility expected_utility = utility if debug_mode: print("\tdirection=" + str(direction) + "\texpected_utility=" + str(expected_utility)) if expected_utility > maximum_expected_utility: action = direction maximum_expected_utility = expected_utility if debug_mode: print("\taction=" + str(action)) return action
def getAction(self, state): legal = api.legalActions(state) if Directions.STOP in legal: legal.remove(Directions.STOP) #SELECT TARGET target = api.food(state)[0] print target pacman = api.whereAmI(state) print "Pacman position: ", pacman if self.backsteps == 0: if pacman[0] >= target[0]: if Directions.WEST in legal: return api.makeMove(Directions.WEST, legal) else: if Directions.EAST in legal: return api.makeMove(Directions.EAST, legal) if pacman[1] >= target[1]: if Directions.SOUTH in legal: return api.makeMove(Directions.SOUTH, legal) else: if Directions.NORTH in legal: return api.makeMove(Directions.NORTH, legal) self.backsteps = 2 #IT REACHES HERE ONLY ONCE BOTH DIRECTIONS IT WANTS TO GO ARE ILLEGAL, SO: BACKSTEP 2 STOPS TOWARDS RANDOM LEGAL DIRECTION self.backstep_direction = random.choice(legal) self.backsteps -= 1 return api.makeMove(self.backstep_direction, legal)
def getAction(self, state): legal = api.legalActions(state) if Directions.STOP in legal: legal.remove(Directions.STOP) target = (1, 1) print target print "Food locations: " print len(api.food(state)) pacman = api.whereAmI(state) print "Pacman position: ", pacman if self.backsteps == 0: if pacman[0] >= target[0]: if Directions.WEST in legal: return api.makeMove(Directions.WEST, legal) else: if Directions.EAST in legal: return api.makeMove(Directions.EAST, legal) if pacman[1] >= target[1]: if Directions.SOUTH in legal: return api.makeMove(Directions.SOUTH, legal) else: if Directions.NORTH in legal: return api.makeMove(Directions.NORTH, legal) self.backsteps = 2 self.backstep_direction = random.choice(legal) self.backsteps -= 1 return api.makeMove(self.backstep_direction, legal)
def getAction(self, state): """ The function to work out next intended action carried out. Parameters: None Returns: Directions: Intended action that Pacman will carry out. """ current_pos = api.whereAmI(state) corners = api.corners(state) food = api.food(state) ghosts = api.ghosts(state) ghost_scared_time = api.ghostStatesWithTimes(state)[0][1] walls = api.walls(state) legal = api.legalActions(state) capsules = api.capsules(state) protected_coords = walls + ghosts + [current_pos] width = max(corners)[0] + 1 height = max(corners, key=itemgetter(1))[1] + 1 board = self.create_board(width, height, -0.04) board.set_position_values(food, 1) board.set_position_values(walls, 'x') board.set_position_values(capsules, 2) if ghost_scared_time < 5: board.set_position_values(ghosts, -3) # for i in range(height): # for j in range(width): # print board[i, j], # print # print print "GHOST LIST: ", ghosts for x, y in ghosts: # set the surrounding area around the ghost to half the reward of the ghost # avoids changing the reward of the ghost itself, the pacman and the walls # print "GHOST Coordinates: " + str(x) + " " + str(y) x_coordinates = [x - 1, x, x + 1] y_coordinates = [y - 1, y, y + 1] # print "X/Y Coordinates: " + str(x_coordinates) + " " + str(y_coordinates) for x_coord in x_coordinates: for y_coord in y_coordinates: if (x_coord, y_coord) not in protected_coords: # print("index: " + str((board.convert_y(y_coord), x_coord))) converted_y = board.convert_y(y_coord) # print "VALUE: " + str(board[board.convert_y(y), x]) board[converted_y, x_coord] = board[board.convert_y(y), x] / 2 # print "VALUE PART 2: " + str(board[converted_y, x_coord]) board = self.value_iteration(state, board) expected_utility = self.calculate_expected_utility( state, board, abs(current_pos[1] - (height - 1)), current_pos[0]) return max([(utility, action) for utility, action in expected_utility if action in legal])[1]
def getAction(self, state): self.updateMap(state) pacman = api.whereAmI(state) legal = api.legalActions(state) move = self.policy[pacman] print self.values print move return api.makeMove(move, legal)
def getAction(self, state): legal = state.getLegalPacmanActions() #Again, get a list of pacman's legal actions if Directions.STOP in legal: legal.remove(Directions.STOP) pacman = api.whereAmI(state) #retrieve location of pacman food = api.food(state) #retrieve location of food #Distance of food dist = [] # initiate list of distances for i in range(len(food)): dist.append(util.manhattanDistance(pacman, food[i])) minIndex = dist.index(min(dist)) #get index of min dist value (assuming the array remains ordered) closestFood = food[minIndex] #current position coordinates x1, y1 = pacman[0], pacman[1] x2, y2 = closestFood print "closest food is: " print closestFood print "pacman's location is: " print pacman print "list of distances: " print dist #if pacman is to the West of closest food, then goEast = True and so on... goEast = x1 < x2 and y1 == y2 goWest = x1 > x2 and y1 == y2 goNorth = x1 == x2 and y1 < y2 goSouth = x1 == x2 and y1 > y2 last = state.getPacmanState().configuration.direction if x1 == 9 and y1 == 1: return api.makeMove(random.choice(legal), legal) else: pass if Directions.EAST in legal and (goEast): return api.makeMove('East', legal) elif Directions.WEST in legal and (goWest): return api.makeMove('West', legal) elif Directions.NORTH in legal and (goNorth): return api.makeMove('North', legal) elif Directions.SOUTH in legal and (goSouth): return api.makeMove('South', legal) elif last in legal: #if pacman doesnt find a move he can do, he just repeats the last move. return api.makeMove(last, legal) #this makes it so that the closest food isn't across the wall from him next else: return api.makeMove(random.choice(legal), legal) #just return a random move when he's out of moves
def updateBuffer(self, state): # Keeps track of pacmans last 8 moves # Used for determining whether pacman is # stuck or in endless loop back and forth cur = api.whereAmI(state) self.prevBuffer.insert(0, cur) temp = self.prevBuffer[:8] self.prevBuffer = temp
def getAction(self, state): """ The function to work out next intended action carried out. Parameters: None Returns: Directions: Intended action that Pacman will carry out. """ current_pos = api.whereAmI(state) food = api.food(state) # make sure all ghost coordinates are ints rather than floats ghosts = [(int(x), int(y)) for x, y in api.ghosts(state)] legal = api.legalActions(state) capsules = api.capsules(state) food_multiplier = ( (0.8 * len(food) / float(self.initial_num_food))**2) + 6 ghost_multiplier = ( (0.2 * len(food) / float(self.initial_num_food))**2) + 3 board = Board(self.width, self.height, -0.04) board.set_position_values(self.walls, 'x') board.set_position_values(capsules, 2 * food_multiplier) board.set_position_values(food, 1 * food_multiplier) board.set_position_values(ghosts, -7 * ghost_multiplier) # rewards of ghosts, walls and current position cannot be overridden protected_pos = set(ghosts + self.walls + [current_pos]) # setting a much more negative reward for potential positions ghosts can occupy # in two moves. for ghost in ghosts: # loop through potential positions that the ghost can occupy if it were # to move now for pos in self.get_next_pos(ghost): if pos not in protected_pos: # set the reward value of surrounding positions of ghosts to -6 * # ghost multiplier. board[int(board.convert_y(pos[1])), int(pos[0])] = -6 * ghost_multiplier for position in self.get_next_pos(pos): # loop through potential positions that the ghost can occupy if # it were to move two times. if position not in protected_pos: board[int(board.convert_y(position[1])), int(position[0])] = -6 * ghost_multiplier board = self.value_iteration(state, board) # call value iteration expected_utility = self.calculate_expected_utility( state, board, board.convert_y(current_pos[1]), current_pos[0]) # returns action associated to the max utility out of all the legal actions. return api.makeMove( max([(utility, action) for utility, action in expected_utility if action in legal])[1], legal)