def getAction(self, game_state: GameState) -> str: """ Returns the minimax action from the current gameState using self.depth and self.evaluationFunction. Here are some method calls that might be useful when implementing minimax. gameState.getLegalActions(agentIndex): Returns a list of legal actions for an agent agentIndex=0 means Pacman, ghosts are >= 1 gameState.generateSuccessor(agentIndex, action): Returns the successor game state after an agent takes an action gameState.getNumAgents(): Returns the total number of agents in the game """ legal_actions = game_state.getLegalActions(agentIndex=0) best_action_index = max(range(len(legal_actions)), key=lambda action_num: self.min_value( state=game_state.generateSuccessor( agentIndex=0, action=legal_actions[action_num], ), depth=self.depth, ghost_num=1, )) return legal_actions[best_action_index]
def min_value(self, state: GameState, depth: int, ghost_num: int) -> int: # Game over or search depth has been reached if state.isLose() or state.isWin() or depth <= 0: return self.evaluationFunction(state) # Sanity check: valid ghost number? assert 1 <= ghost_num < state.getNumAgents() legal_actions = state.getLegalActions(ghost_num) successors = [ state.generateSuccessor(ghost_num, ghost_action) for ghost_action in legal_actions ] # If this is the last ghost, next optimizer should be from pacman's # perspective next_optimizer = self.max_value \ if ghost_num == state.getNumAgents() - 1 \ else self.min_value # If this is the last ghost, decrement depth next_depth = depth - 1 \ if ghost_num == state.getNumAgents() - 1 \ else depth utilities = [ next_optimizer(state, next_depth, ghost_num + 1) for state in successors ] return min(utilities)
def searchTree(state: GameState, depth: int, agent: int): actions = state.getLegalActions(agent) nextAgent = (agent + 1) % state.getNumAgents() if state.isLose() or state.isWin() or len(actions) == 0: return [state.getScore(), None] elif depth == 0: return [self.evaluationFunction(state), None] elif agent == 0: successors = [ searchTree(state.generateSuccessor(agent, action), depth, nextAgent)[0] for action in actions ] maximum = max(successors) maxIndex = successors.index(maximum) return [maximum, actions[maxIndex]] else: nextDepth = depth if nextAgent == 0: nextDepth -= 1 successors = [ searchTree(state.generateSuccessor(agent, action), nextDepth, nextAgent)[0] for action in actions ] expected = sum(successors) * 1.0 / len(successors) return [expected, None]
def _alphabeta(self, gameState: GameState, idx: int, ab: List[float]) -> Tuple[float, str]: n = gameState.getNumAgents() if idx / n >= self.depth or gameState.isWin() or gameState.isLose(): return (self.evaluationFunction(gameState), None) agent = idx % n legalActions = gameState.getLegalActions(agent) pacman = (agent == 0) idx0 = int(pacman) idx1 = int(not pacman) mod = 1 if pacman else -1 best_score = -float('inf') * mod best_action = None for legalAction in legalActions: s = gameState.generateSuccessor(agent, legalAction) score = self._alphabeta(s, idx + 1, [*ab])[0] if score * mod > best_score * mod: best_score, best_action = score, legalAction if best_score * mod > ab[idx0] * mod: break ab[idx1] = max(ab[idx1] * mod, best_score * mod) * mod return (best_score, best_action)
def getAction(self, gameState: GameState): """ You do not need to change this method, but you're welcome to. getAction chooses among the best options according to the evaluation function. Just like in the previous project, getAction takes a GameState and returns some Directions.X for some X in the set {NORTH, SOUTH, WEST, EAST, STOP} """ # Collect legal moves and successor states legalMoves = gameState.getLegalActions() # print(legalMoves) # Choose one of the best actions scores = [self.evaluationFunction( gameState, action) for action in legalMoves] # print(scores) bestScore = max(scores) # print(bestScore) bestIndices = [index for index in range( len(scores)) if scores[index] == bestScore] # print(bestIndices) # Pick randomly among the best chosenIndex = random.choice(bestIndices) # print(chosenIndex) "Add more of your code here if you want to" return legalMoves[chosenIndex]
def getAction(self, game_state: GameState): """ Returns the minimax action using self.depth and self.evaluationFunction """ "*** YOUR CODE HERE ***" # Generate candidate actions legal_actions = game_state.getLegalActions(self.pacman_index) # if Directions.STOP in legal_actions: # legal_actions.remove(Directions.STOP) alpha = -math.inf beta = math.inf scores = [] for action in legal_actions: successor = game_state.getNextState(self.pacman_index, action) # since we're expanding the root node, we need to call min_value since the next node is a min node value = self.min_value(successor, depth=0, ghost_index=0, alpha=alpha, beta=beta) scores.append(value) # can't prune on the root node alpha = max(alpha, value) best_score = max(scores) best_indices = [index for index in range(len(scores)) if scores[index] == best_score] chosen_index = random.choice(best_indices) # Pick randomly among the best return legal_actions[chosen_index]
def rb_directional_expectimax(self, cur_state: GameState, turn: int, agent: int, depth_limit: int, depth: int, ghost_num: int): if turn == agent: depth += 1 if depth >= depth_limit or cur_state.isWin() or cur_state.isLose(): return self.evaluationFunction(cur_state) if turn == agent: # if Pacman's turn cur_max = np.NINF for action in cur_state.getLegalPacmanActions( ): # iterating over children gameStates child_state = cur_state.generateSuccessor(turn, action) cur_max = max( cur_max, self.rb_directional_expectimax( child_state, (turn + 1) % (ghost_num + 1), agent, depth_limit, depth, ghost_num)) return cur_max else: # if ghost turn assert turn > agent ghost_legal_moves = cur_state.getLegalActions(turn) ghost = DirectionalGhost(turn) # assert len(ghost_legal_moves) is not 0 expectancy = 0 for action in ghost_legal_moves: child_state = cur_state.generateSuccessor(turn, action) dist = ghost.getDistribution(cur_state) # print(dist) expectancy += (dist[action]) * (self.rb_directional_expectimax( child_state, (turn + 1) % (ghost_num + 1), agent, depth_limit, depth, ghost_num)) if math.isnan(expectancy): expectancy = 0 return expectancy
def min_value(self, game_state: GameState, depth=0, ghost_index=0, alpha=-math.inf, beta=math.inf): # next_ghost_to_move ghost_index += 1 if self.is_a_new_level_of_search(game_state, ghost_index): depth = depth + 1 if game_state.isWin() or game_state.isLose(): return self.evaluationFunction(game_state) value = math.inf legal_actions = game_state.getLegalActions(ghost_index) for action in legal_actions: successor = game_state.getNextState(ghost_index, action) if self.is_a_new_level_of_search(game_state, ghost_index): # let's move on with pacman since this is the last agent (new max node) value = min(value, self.max_value(successor, depth=depth, alpha=alpha, beta=beta)) else: # next on the tree is another minimizer, lets continue with another ghost value = min(value, self.min_value(successor, depth=depth, ghost_index=ghost_index, alpha=alpha, beta=beta)) if value < alpha: return value beta = min(beta, value) return value
def max_value( self, state: GameState, depth: int, alpha: int, beta: int, actor: Optional[int] = None, ) -> int: # Sanity check: have all the ghosts been evaluated the last round? if actor is not None: assert actor == state.getNumAgents() # Game over or search depth has been reached if state.isLose() or state.isWin() or depth <= 0: return self.evaluationFunction(state) legal_actions = state.getLegalActions(agentIndex=0) utility = -inf for action in legal_actions: successor = state.generateSuccessor(agentIndex=0, action=action) utility = max( utility, self.min_value(successor, depth, alpha, beta, ghost_num=1), ) if utility > beta: return utility alpha = max(alpha, utility) return utility
def exp_value(self, game_state: GameState, depth=0, ghost_index=0): # next_ghost_to_move ghost_index += 1 if self.is_a_new_level_of_search(game_state, ghost_index): depth = depth + 1 if game_state.isWin() or game_state.isLose(): return self.evaluationFunction(game_state) value = 0 legal_actions = game_state.getLegalActions(ghost_index) for action in legal_actions: successor = game_state.getNextState(ghost_index, action) probability = 1 / len(legal_actions) if self.is_a_new_level_of_search(game_state, ghost_index): # let's move on with pacman since this is the last agent (new max node) value += probability * self.max_value(successor, depth=depth) else: # next on the tree is another minimizer, lets continue with another ghost value += probability * self.exp_value(successor, depth=depth, ghost_index=ghost_index) return value
def getAction(self, gameState: GameState) -> str: """ Returns the minimax action using self.depth and self.evaluationFunction """ # BEGIN_YOUR_CODE (our solution is 36 lines of code, but don't worry if you deviate from this) def getVal(s, d, agentIndex, alpha=float('-inf'), beta=float('inf'), evalFn=self.evaluationFunction): nextAgentIndex = 0 if agentIndex == s.getNumAgents( ) - 1 else agentIndex + 1 actions = s.getLegalActions(agentIndex) if len(actions) == 0: return s.getScore() elif d == 0: if agentIndex != 0: raise Exception( f"Unexpected agentIndex {agentIndex} != {0}") return evalFn(s) elif agentIndex == 0: maxVal = float('-inf') # actions.sort(key=lambda a: evalFn(s.generateSuccessor(agentIndex, a)), reverse=True) for a in actions: maxVal = max( maxVal, getVal(s.generateSuccessor(agentIndex, a), d, nextAgentIndex, alpha, beta)) alpha = max(alpha, maxVal) if alpha >= beta: break return maxVal else: nextD = d - (1 if agentIndex == s.getNumAgents() - 1 else 0) minVal = float('inf') # actions.sort(key=lambda a: evalFn(s.generateSuccessor(agentIndex, a)), reverse=False) for a in actions: minVal = min( minVal, getVal(s.generateSuccessor(agentIndex, a), nextD, nextAgentIndex, alpha, beta)) beta = min(beta, minVal) if alpha >= beta: break return minVal targetVal = getVal(gameState, self.depth, 0) # print(f"AlphaBetaAgent value of state = {targetVal}") legalActions = gameState.getLegalActions(0) actions = [ a for a in legalActions if getVal( gameState.generateSuccessor(0, a), self.depth, 1) == targetVal ] return random.choice(actions)
def getAction(self, gameState: GameState): """ getAction chooses among the best options according to the evaluation function. getAction takes a GameState and returns some Directions.X for some X in the set {North, South, West, East} ------------------------------------------------------------------------------ Description of GameState and helper functions: A GameState specifies the full game state, including the food, capsules, agent configurations and score changes. In this function, the |gameState| argument is an object of GameState class. Following are a few of the helper methods that you can use to query a GameState object to gather information about the present state of Pac-Man, the ghosts and the maze. gameState.getLegalActions(agentIndex): Returns the legal actions for the agent specified. Returns Pac-Man's legal moves by default. gameState.generateSuccessor(agentIndex, action): Returns the successor state after the specified agent takes the action. Pac-Man is always agent 0. gameState.getPacmanState(): Returns an AgentState object for pacman (in game.py) state.configuration.pos gives the current position state.direction gives the travel vector gameState.getGhostStates(): Returns list of AgentState objects for the ghosts gameState.getNumAgents(): Returns the total number of agents in the game gameState.getScore(): Returns the score corresponding to the current state of the game The GameState class is defined in pacman.py and you might want to look into that for other helper methods, though you don't need to. """ # Collect legal moves and successor states legalMoves = gameState.getLegalActions() # Choose one of the best actions scores = [ self.evaluationFunction(gameState, action) for action in legalMoves ] bestScore = max(scores) bestIndices = [ index for index in range(len(scores)) if scores[index] == bestScore ] chosenIndex = random.choice( bestIndices) # Pick randomly among the best return legalMoves[chosenIndex]
def min_value( self, state: GameState, depth: int, alpha: int, beta: int, ghost_num: int, ) -> int: # Game over or search depth has been reached if state.isLose() or state.isWin() or depth <= 0: return self.evaluationFunction(state) # Sanity check: valid ghost number? assert 1 <= ghost_num < state.getNumAgents() legal_actions = state.getLegalActions(ghost_num) # If this is the last ghost, next optimizer should be from pacman's # perspective next_optimizer = self.max_value \ if ghost_num == state.getNumAgents() - 1 \ else self.min_value # If this is the last ghost, decrement depth next_depth = depth - 1 if ghost_num == state.getNumAgents( ) - 1 else depth utility = inf for action in legal_actions: successor = state.generateSuccessor( agentIndex=ghost_num, action=action, ) utility = min( utility, next_optimizer( successor, next_depth, alpha, beta, ghost_num + 1, ), ) if utility < alpha: return utility beta = min(beta, utility) return utility
def max_value(self, game_state: GameState, depth): if self.is_terminal_state(game_state, depth): return self.evaluationFunction(game_state) value = -math.inf legal_actions = game_state.getLegalActions(self.pacman_index) for action in legal_actions: successor = game_state.getNextState(self.pacman_index, action) value = max(value, self.exp_value(successor, depth=depth, ghost_index=0)) return value
def getAction(self, gameState: GameState): """ Returns the minimax action from the current gameState using self.depth and self.evaluationFunction. """ _max = float("-inf") action = None for move in gameState.getLegalActions(0): util = minimax(self.evaluationFunction, 1, 0, gameState.generateSuccessor(0, move), self.depth) if util > _max or _max == float("-inf"): _max = util action = move return action
def expimax(evalFunc: classmethod, agent: int, depth: int, gameState: GameState, maxDepth: int) -> float: if gameState.isLose() or gameState.isWin() or depth == maxDepth: return evalFunc(gameState) if agent == 0: return max(expimax(evalFunc, 1, depth, gameState.generateSuccessor(agent, action), maxDepth) for action in gameState.getLegalActions(agent)) else: nextAgent = agent + 1 if gameState.getNumAgents() == nextAgent: nextAgent = 0 if nextAgent == 0: depth += 1 val = 0 for action in gameState.getLegalActions(agent): val += expimax(evalFunc, nextAgent, depth, gameState.generateSuccessor(agent, action), maxDepth) return val
def max_value(self, game_state: GameState, depth, alpha=-math.inf, beta=math.inf): if self.is_terminal_state(game_state, depth): return self.evaluationFunction(game_state) value = -math.inf legal_actions = game_state.getLegalActions(self.pacman_index) for action in legal_actions: successor = game_state.getNextState(self.pacman_index, action) value = max(value, self.min_value(successor, depth=depth, ghost_index=0, alpha=alpha, beta=beta)) if value > beta: return value alpha = max(alpha, value) return value
def _minimax(self, gameState: GameState, idx: int) -> Tuple[float, str]: n = gameState.getNumAgents() if idx / n >= self.depth or gameState.isWin() or gameState.isLose(): return (self.evaluationFunction(gameState), None) agent = idx % n legalActions = gameState.getLegalActions(agent) mod = 1 if agent == 0 else -1 best_score = -float('inf') * mod best_action = None for legalAction in legalActions: s = gameState.generateSuccessor(agent, legalAction) score = self._minimax(s, idx + 1)[0] if score * mod > best_score * mod: best_score, best_action = score, legalAction return (best_score, best_action)
def getAction(self, game_state: GameState) -> str: """ Returns the minimax action from the current gameState using self.depth and self.evaluationFunction. Here are some method calls that might be useful when implementing minimax. gameState.getLegalActions(agentIndex): Returns a list of legal actions for an agent agentIndex=0 means Pacman, ghosts are >= 1 gameState.generateSuccessor(agentIndex, action): Returns the successor game state after an agent takes an action gameState.getNumAgents(): Returns the total number of agents in the game """ legal_actions = game_state.getLegalActions(agentIndex=0) alpha, beta = -inf, inf utility = -inf for action_num in range(len(legal_actions)): successor = game_state.generateSuccessor( agentIndex=0, action=legal_actions[action_num], ) utility = max( utility, self.min_value( successor, depth=self.depth, alpha=alpha, beta=beta, ghost_num=1, ), ) if utility > alpha: best_action_index = action_num alpha = utility return legal_actions[best_action_index]
def getAction(self, game_state: GameState): """ Returns the minimax action from the current gameState using self.depth and self.evaluationFunction. Here are some method calls that might be useful when implementing minimax. gameState.getLegalActions(agentIndex): Returns a list of legal actions for an agent agentIndex=0 means Pacman, ghosts are >= 1 gameState.getNextState(agentIndex, action): Returns the child game state after an agent takes an action gameState.getNumAgents(): Returns the total number of agents in the game gameState.isWin(): Returns whether or not the game state is a winning state gameState.isLose(): Returns whether or not the game state is a losing state """ "*** YOUR CODE HERE ***" # Generate candidate actions legal_actions = game_state.getLegalActions(self.pacman_index) # if Directions.STOP in legal_actions: # legal_actions.remove(Directions.STOP) # since we're expanding the root node, we need to call min_value since the next node is a min node scores = [self.min_value(game_state.getNextState(self.pacman_index, action), depth=0, ghost_index=0) for action in legal_actions] best_score = max(scores) best_indices = [index for index in range(len(scores)) if scores[index] == best_score] chosen_index = random.choice(best_indices) # Pick randomly among the best # input('next') return legal_actions[chosen_index]
def getAction(self, gameState: GameState) -> str: """ Returns the expectimax action using self.depth and self.evaluationFunction All ghosts should be modeled as choosing uniformly at random from their legal moves. """ # BEGIN_YOUR_CODE (our solution is 20 lines of code, but don't worry if you deviate from this) def getVal(s, d, agentIndex, evalFn=self.evaluationFunction): nextAgentIndex = 0 if agentIndex == s.getNumAgents( ) - 1 else agentIndex + 1 actions = s.getLegalActions(agentIndex) if len(actions) == 0: return s.getScore() elif d == 0: if agentIndex != 0: raise Exception( f"Unexpected agentIndex {agentIndex} != {0}") return evalFn(s) elif agentIndex == 0: return max( getVal(s.generateSuccessor(agentIndex, a), d, nextAgentIndex) for a in actions) else: nextD = d - (1 if agentIndex == s.getNumAgents() - 1 else 0) return sum((1 / len(actions)) * getVal( s.generateSuccessor(agentIndex, a), nextD, nextAgentIndex) for a in actions) targetVal = getVal(gameState, self.depth, 0) # print(f"MinimaxAgent value of state = {targetVal}") legalActions = gameState.getLegalActions(0) actions = [ a for a in legalActions if getVal( gameState.generateSuccessor(0, a), self.depth, 1) == targetVal ] return random.choice(actions)
def max_value(self, state: GameState, depth: int, actor: Optional[int] = None) -> int: # Sanity check: have all the ghosts been evaluated the last round? if actor is not None: assert actor == state.getNumAgents() # Game over or search depth has been reached if state.isLose() or state.isWin() or depth <= 0: return self.evaluationFunction(state) legal_actions = state.getLegalActions(agentIndex=0) successors = [ state.generateSuccessor(agentIndex=0, action=action) for action in legal_actions ] utilities = [ self.min_value(state, depth, ghost_num=1) for state in successors ] return max(utilities)
def _expectimax(self, gameState: GameState, idx: int) -> Tuple[float, str]: n = gameState.getNumAgents() if idx / n >= self.depth or gameState.isWin() or gameState.isLose(): return (self.evaluationFunction(gameState), None) agent = idx % n legalActions = gameState.getLegalActions(agent) n_actions = len(legalActions) ret_score = -float('inf') if agent == 0 else 0 ret_action = None for legalAction in legalActions: s = gameState.generateSuccessor(agent, legalAction) score = self._expectimax(s, idx + 1)[0] if agent != 0: ret_score += score / n_actions elif score > ret_score: ret_score, ret_action = score, legalAction return (ret_score, ret_action)
def getAction(self, game_state: GameState): """ Returns the expectimax action using self.depth and self.evaluationFunction All ghosts should be modeled as choosing uniformly at random from their legal moves. """ "*** YOUR CODE HERE ***" # Generate candidate actions legal_actions = game_state.getLegalActions(self.pacman_index) # if Directions.STOP in legal_actions: # legal_actions.remove(Directions.STOP) # since we're expanding the root node, we need to call min_value since the next node is a min node scores = [self.exp_value(game_state.getNextState(self.pacman_index, action), depth=0, ghost_index=0) for action in legal_actions] best_score = max(scores) best_indices = [index for index in range(len(scores)) if scores[index] == best_score] chosen_index = random.choice(best_indices) # Pick randomly among the best # input('next') return legal_actions[chosen_index]
def searchTree(state: GameState, depth: int, agent: int, a, b): actions = state.getLegalActions(agent) nextAgent = (agent + 1) % state.getNumAgents() if state.isLose() or state.isWin() or len(actions) == 0: return [state.getScore(), None] elif depth == 0: return [self.evaluationFunction(state), None] elif agent == 0: value = float('-inf') successors = [] for action in actions: curr = searchTree(state.generateSuccessor(agent, action), depth, nextAgent, a, b)[0] successors.append(curr) value = max(value, curr) a = max(a, value) if a >= b: break maxIndex = successors.index(value) return [value, actions[maxIndex]] else: nextDepth = depth if nextAgent == 0: nextDepth -= 1 value = float('inf') successors = [] for action in actions: curr = searchTree(state.generateSuccessor(agent, action), nextDepth, nextAgent, a, b)[0] successors.append(curr) value = min(value, curr) b = min(b, value) if a >= b: break minIndex = successors.index(value) return [value, actions[minIndex]]
def getAction(self, gameState: GameState) -> str: """ Returns the minimax action from the current gameState using self.depth and self.evaluationFunction. Terminal states can be found by one of the following: pacman won, pacman lost or there are no legal moves. Here are some method calls that might be useful when implementing minimax. gameState.getLegalActions(agentIndex): Returns a list of legal actions for an agent agentIndex=0 means Pacman, ghosts are >= 1 gameState.generateSuccessor(agentIndex, action): Returns the successor game state after an agent takes an action gameState.getNumAgents(): Returns the total number of agents in the game gameState.getScore(): Returns the score corresponding to the current state of the game gameState.isWin(): Returns True if it's a winning state gameState.isLose(): Returns True if it's a losing state self.depth: The depth to which search should continue """ # BEGIN_YOUR_CODE (our solution is 20 lines of code, but don't worry if you deviate from this) def getVal(s, d, agentIndex, evalFn=self.evaluationFunction): nextAgentIndex = 0 if agentIndex == s.getNumAgents( ) - 1 else agentIndex + 1 actions = s.getLegalActions(agentIndex) if len(actions) == 0: return s.getScore() elif d == 0: if agentIndex != 0: raise Exception( f"Unexpected agentIndex {agentIndex} != {0}") return evalFn(s) elif agentIndex == 0: return max( getVal(s.generateSuccessor(agentIndex, a), d, nextAgentIndex) for a in actions) else: nextD = d - (1 if agentIndex == s.getNumAgents() - 1 else 0) return min( getVal(s.generateSuccessor(agentIndex, a), nextD, nextAgentIndex) for a in actions) targetVal = getVal(gameState, self.depth, 0) # print(f"MinimaxAgent value of state = {targetVal}") legalActions = gameState.getLegalActions(0) actions = [ a for a in legalActions if getVal( gameState.generateSuccessor(0, a), self.depth, 1) == targetVal ] return random.choice(actions)