def rb_directional_expectimax(self, cur_state: GameState, turn: int, agent: int, depth_limit: int, depth: int, ghost_num: int): if turn == agent: depth += 1 if depth >= depth_limit or cur_state.isWin() or cur_state.isLose(): return self.evaluationFunction(cur_state) if turn == agent: # if Pacman's turn cur_max = np.NINF for action in cur_state.getLegalPacmanActions( ): # iterating over children gameStates child_state = cur_state.generateSuccessor(turn, action) cur_max = max( cur_max, self.rb_directional_expectimax( child_state, (turn + 1) % (ghost_num + 1), agent, depth_limit, depth, ghost_num)) return cur_max else: # if ghost turn assert turn > agent ghost_legal_moves = cur_state.getLegalActions(turn) ghost = DirectionalGhost(turn) # assert len(ghost_legal_moves) is not 0 expectancy = 0 for action in ghost_legal_moves: child_state = cur_state.generateSuccessor(turn, action) dist = ghost.getDistribution(cur_state) # print(dist) expectancy += (dist[action]) * (self.rb_directional_expectimax( child_state, (turn + 1) % (ghost_num + 1), agent, depth_limit, depth, ghost_num)) if math.isnan(expectancy): expectancy = 0 return expectancy
def searchTree(state: GameState, depth: int, agent: int): actions = state.getLegalActions(agent) nextAgent = (agent + 1) % state.getNumAgents() if state.isLose() or state.isWin() or len(actions) == 0: return [state.getScore(), None] elif depth == 0: return [self.evaluationFunction(state), None] elif agent == 0: successors = [ searchTree(state.generateSuccessor(agent, action), depth, nextAgent)[0] for action in actions ] maximum = max(successors) maxIndex = successors.index(maximum) return [maximum, actions[maxIndex]] else: nextDepth = depth if nextAgent == 0: nextDepth -= 1 successors = [ searchTree(state.generateSuccessor(agent, action), nextDepth, nextAgent)[0] for action in actions ] expected = sum(successors) * 1.0 / len(successors) return [expected, None]
def minimax(evalFunc: classmethod, agent: int, depth: int, gameState: GameState, maxDepth: int) -> float: if gameState.isLose() or gameState.isWin() or depth == maxDepth: return evalFunc(gameState) if agent == 0: return max(minimax(evalFunc, 1, depth, gameState.generateSuccessor(agent, state), maxDepth) for state in gameState.getLegalActions(agent)) else: nextAgent = agent + 1 if gameState.getNumAgents() == nextAgent: nextAgent = 0 if nextAgent == 0: depth += 1 return min(minimax(evalFunc, nextAgent, depth, gameState.generateSuccessor(agent, state), maxDepth) for state in gameState.getLegalActions(agent))
def getAction(self, game_state: GameState) -> str: """ Returns the minimax action from the current gameState using self.depth and self.evaluationFunction. Here are some method calls that might be useful when implementing minimax. gameState.getLegalActions(agentIndex): Returns a list of legal actions for an agent agentIndex=0 means Pacman, ghosts are >= 1 gameState.generateSuccessor(agentIndex, action): Returns the successor game state after an agent takes an action gameState.getNumAgents(): Returns the total number of agents in the game """ legal_actions = game_state.getLegalActions(agentIndex=0) best_action_index = max(range(len(legal_actions)), key=lambda action_num: self.min_value( state=game_state.generateSuccessor( agentIndex=0, action=legal_actions[action_num], ), depth=self.depth, ghost_num=1, )) return legal_actions[best_action_index]
def max_value( self, state: GameState, depth: int, alpha: int, beta: int, actor: Optional[int] = None, ) -> int: # Sanity check: have all the ghosts been evaluated the last round? if actor is not None: assert actor == state.getNumAgents() # Game over or search depth has been reached if state.isLose() or state.isWin() or depth <= 0: return self.evaluationFunction(state) legal_actions = state.getLegalActions(agentIndex=0) utility = -inf for action in legal_actions: successor = state.generateSuccessor(agentIndex=0, action=action) utility = max( utility, self.min_value(successor, depth, alpha, beta, ghost_num=1), ) if utility > beta: return utility alpha = max(alpha, utility) return utility
def min_value(self, state: GameState, depth: int, ghost_num: int) -> int: # Game over or search depth has been reached if state.isLose() or state.isWin() or depth <= 0: return self.evaluationFunction(state) # Sanity check: valid ghost number? assert 1 <= ghost_num < state.getNumAgents() legal_actions = state.getLegalActions(ghost_num) successors = [ state.generateSuccessor(ghost_num, ghost_action) for ghost_action in legal_actions ] # If this is the last ghost, next optimizer should be from pacman's # perspective next_optimizer = self.max_value \ if ghost_num == state.getNumAgents() - 1 \ else self.min_value # If this is the last ghost, decrement depth next_depth = depth - 1 \ if ghost_num == state.getNumAgents() - 1 \ else depth utilities = [ next_optimizer(state, next_depth, ghost_num + 1) for state in successors ] return min(utilities)
def _alphabeta(self, gameState: GameState, idx: int, ab: List[float]) -> Tuple[float, str]: n = gameState.getNumAgents() if idx / n >= self.depth or gameState.isWin() or gameState.isLose(): return (self.evaluationFunction(gameState), None) agent = idx % n legalActions = gameState.getLegalActions(agent) pacman = (agent == 0) idx0 = int(pacman) idx1 = int(not pacman) mod = 1 if pacman else -1 best_score = -float('inf') * mod best_action = None for legalAction in legalActions: s = gameState.generateSuccessor(agent, legalAction) score = self._alphabeta(s, idx + 1, [*ab])[0] if score * mod > best_score * mod: best_score, best_action = score, legalAction if best_score * mod > ab[idx0] * mod: break ab[idx1] = max(ab[idx1] * mod, best_score * mod) * mod return (best_score, best_action)
def getAction(self, gameState: GameState) -> str: """ Returns the minimax action using self.depth and self.evaluationFunction """ # BEGIN_YOUR_CODE (our solution is 36 lines of code, but don't worry if you deviate from this) def getVal(s, d, agentIndex, alpha=float('-inf'), beta=float('inf'), evalFn=self.evaluationFunction): nextAgentIndex = 0 if agentIndex == s.getNumAgents( ) - 1 else agentIndex + 1 actions = s.getLegalActions(agentIndex) if len(actions) == 0: return s.getScore() elif d == 0: if agentIndex != 0: raise Exception( f"Unexpected agentIndex {agentIndex} != {0}") return evalFn(s) elif agentIndex == 0: maxVal = float('-inf') # actions.sort(key=lambda a: evalFn(s.generateSuccessor(agentIndex, a)), reverse=True) for a in actions: maxVal = max( maxVal, getVal(s.generateSuccessor(agentIndex, a), d, nextAgentIndex, alpha, beta)) alpha = max(alpha, maxVal) if alpha >= beta: break return maxVal else: nextD = d - (1 if agentIndex == s.getNumAgents() - 1 else 0) minVal = float('inf') # actions.sort(key=lambda a: evalFn(s.generateSuccessor(agentIndex, a)), reverse=False) for a in actions: minVal = min( minVal, getVal(s.generateSuccessor(agentIndex, a), nextD, nextAgentIndex, alpha, beta)) beta = min(beta, minVal) if alpha >= beta: break return minVal targetVal = getVal(gameState, self.depth, 0) # print(f"AlphaBetaAgent value of state = {targetVal}") legalActions = gameState.getLegalActions(0) actions = [ a for a in legalActions if getVal( gameState.generateSuccessor(0, a), self.depth, 1) == targetVal ] return random.choice(actions)
def min_value( self, state: GameState, depth: int, alpha: int, beta: int, ghost_num: int, ) -> int: # Game over or search depth has been reached if state.isLose() or state.isWin() or depth <= 0: return self.evaluationFunction(state) # Sanity check: valid ghost number? assert 1 <= ghost_num < state.getNumAgents() legal_actions = state.getLegalActions(ghost_num) # If this is the last ghost, next optimizer should be from pacman's # perspective next_optimizer = self.max_value \ if ghost_num == state.getNumAgents() - 1 \ else self.min_value # If this is the last ghost, decrement depth next_depth = depth - 1 if ghost_num == state.getNumAgents( ) - 1 else depth utility = inf for action in legal_actions: successor = state.generateSuccessor( agentIndex=ghost_num, action=action, ) utility = min( utility, next_optimizer( successor, next_depth, alpha, beta, ghost_num + 1, ), ) if utility < alpha: return utility beta = min(beta, utility) return utility
def searchTree(state: GameState, depth: int, agent: int, a, b): actions = state.getLegalActions(agent) nextAgent = (agent + 1) % state.getNumAgents() if state.isLose() or state.isWin() or len(actions) == 0: return [state.getScore(), None] elif depth == 0: return [self.evaluationFunction(state), None] elif agent == 0: value = float('-inf') successors = [] for action in actions: curr = searchTree(state.generateSuccessor(agent, action), depth, nextAgent, a, b)[0] successors.append(curr) value = max(value, curr) a = max(a, value) if a >= b: break maxIndex = successors.index(value) return [value, actions[maxIndex]] else: nextDepth = depth if nextAgent == 0: nextDepth -= 1 value = float('inf') successors = [] for action in actions: curr = searchTree(state.generateSuccessor(agent, action), nextDepth, nextAgent, a, b)[0] successors.append(curr) value = min(value, curr) b = min(b, value) if a >= b: break minIndex = successors.index(value) return [value, actions[minIndex]]
def getAction(self, gameState: GameState): """ Returns the minimax action from the current gameState using self.depth and self.evaluationFunction. """ _max = float("-inf") action = None for move in gameState.getLegalActions(0): util = minimax(self.evaluationFunction, 1, 0, gameState.generateSuccessor(0, move), self.depth) if util > _max or _max == float("-inf"): _max = util action = move return action
def getAction(self, game_state: GameState) -> str: """ Returns the minimax action from the current gameState using self.depth and self.evaluationFunction. Here are some method calls that might be useful when implementing minimax. gameState.getLegalActions(agentIndex): Returns a list of legal actions for an agent agentIndex=0 means Pacman, ghosts are >= 1 gameState.generateSuccessor(agentIndex, action): Returns the successor game state after an agent takes an action gameState.getNumAgents(): Returns the total number of agents in the game """ legal_actions = game_state.getLegalActions(agentIndex=0) alpha, beta = -inf, inf utility = -inf for action_num in range(len(legal_actions)): successor = game_state.generateSuccessor( agentIndex=0, action=legal_actions[action_num], ) utility = max( utility, self.min_value( successor, depth=self.depth, alpha=alpha, beta=beta, ghost_num=1, ), ) if utility > alpha: best_action_index = action_num alpha = utility return legal_actions[best_action_index]
def _minimax(self, gameState: GameState, idx: int) -> Tuple[float, str]: n = gameState.getNumAgents() if idx / n >= self.depth or gameState.isWin() or gameState.isLose(): return (self.evaluationFunction(gameState), None) agent = idx % n legalActions = gameState.getLegalActions(agent) mod = 1 if agent == 0 else -1 best_score = -float('inf') * mod best_action = None for legalAction in legalActions: s = gameState.generateSuccessor(agent, legalAction) score = self._minimax(s, idx + 1)[0] if score * mod > best_score * mod: best_score, best_action = score, legalAction return (best_score, best_action)
def max_value(self, state: GameState, depth: int, actor: Optional[int] = None) -> int: # Sanity check: have all the ghosts been evaluated the last round? if actor is not None: assert actor == state.getNumAgents() # Game over or search depth has been reached if state.isLose() or state.isWin() or depth <= 0: return self.evaluationFunction(state) legal_actions = state.getLegalActions(agentIndex=0) successors = [ state.generateSuccessor(agentIndex=0, action=action) for action in legal_actions ] utilities = [ self.min_value(state, depth, ghost_num=1) for state in successors ] return max(utilities)
def getAction(self, gameState: GameState) -> str: """ Returns the expectimax action using self.depth and self.evaluationFunction All ghosts should be modeled as choosing uniformly at random from their legal moves. """ # BEGIN_YOUR_CODE (our solution is 20 lines of code, but don't worry if you deviate from this) def getVal(s, d, agentIndex, evalFn=self.evaluationFunction): nextAgentIndex = 0 if agentIndex == s.getNumAgents( ) - 1 else agentIndex + 1 actions = s.getLegalActions(agentIndex) if len(actions) == 0: return s.getScore() elif d == 0: if agentIndex != 0: raise Exception( f"Unexpected agentIndex {agentIndex} != {0}") return evalFn(s) elif agentIndex == 0: return max( getVal(s.generateSuccessor(agentIndex, a), d, nextAgentIndex) for a in actions) else: nextD = d - (1 if agentIndex == s.getNumAgents() - 1 else 0) return sum((1 / len(actions)) * getVal( s.generateSuccessor(agentIndex, a), nextD, nextAgentIndex) for a in actions) targetVal = getVal(gameState, self.depth, 0) # print(f"MinimaxAgent value of state = {targetVal}") legalActions = gameState.getLegalActions(0) actions = [ a for a in legalActions if getVal( gameState.generateSuccessor(0, a), self.depth, 1) == targetVal ] return random.choice(actions)
def _expectimax(self, gameState: GameState, idx: int) -> Tuple[float, str]: n = gameState.getNumAgents() if idx / n >= self.depth or gameState.isWin() or gameState.isLose(): return (self.evaluationFunction(gameState), None) agent = idx % n legalActions = gameState.getLegalActions(agent) n_actions = len(legalActions) ret_score = -float('inf') if agent == 0 else 0 ret_action = None for legalAction in legalActions: s = gameState.generateSuccessor(agent, legalAction) score = self._expectimax(s, idx + 1)[0] if agent != 0: ret_score += score / n_actions elif score > ret_score: ret_score, ret_action = score, legalAction return (ret_score, ret_action)
def getAction(self, gameState: GameState) -> str: """ Returns the minimax action from the current gameState using self.depth and self.evaluationFunction. Terminal states can be found by one of the following: pacman won, pacman lost or there are no legal moves. Here are some method calls that might be useful when implementing minimax. gameState.getLegalActions(agentIndex): Returns a list of legal actions for an agent agentIndex=0 means Pacman, ghosts are >= 1 gameState.generateSuccessor(agentIndex, action): Returns the successor game state after an agent takes an action gameState.getNumAgents(): Returns the total number of agents in the game gameState.getScore(): Returns the score corresponding to the current state of the game gameState.isWin(): Returns True if it's a winning state gameState.isLose(): Returns True if it's a losing state self.depth: The depth to which search should continue """ # BEGIN_YOUR_CODE (our solution is 20 lines of code, but don't worry if you deviate from this) def getVal(s, d, agentIndex, evalFn=self.evaluationFunction): nextAgentIndex = 0 if agentIndex == s.getNumAgents( ) - 1 else agentIndex + 1 actions = s.getLegalActions(agentIndex) if len(actions) == 0: return s.getScore() elif d == 0: if agentIndex != 0: raise Exception( f"Unexpected agentIndex {agentIndex} != {0}") return evalFn(s) elif agentIndex == 0: return max( getVal(s.generateSuccessor(agentIndex, a), d, nextAgentIndex) for a in actions) else: nextD = d - (1 if agentIndex == s.getNumAgents() - 1 else 0) return min( getVal(s.generateSuccessor(agentIndex, a), nextD, nextAgentIndex) for a in actions) targetVal = getVal(gameState, self.depth, 0) # print(f"MinimaxAgent value of state = {targetVal}") legalActions = gameState.getLegalActions(0) actions = [ a for a in legalActions if getVal( gameState.generateSuccessor(0, a), self.depth, 1) == targetVal ] return random.choice(actions)