Exemple #1
0
 def rb_directional_expectimax(self, cur_state: GameState, turn: int,
                               agent: int, depth_limit: int, depth: int,
                               ghost_num: int):
     if turn == agent:
         depth += 1
     if depth >= depth_limit or cur_state.isWin() or cur_state.isLose():
         return self.evaluationFunction(cur_state)
     if turn == agent:  # if Pacman's turn
         cur_max = np.NINF
         for action in cur_state.getLegalPacmanActions(
         ):  # iterating over children gameStates
             child_state = cur_state.generateSuccessor(turn, action)
             cur_max = max(
                 cur_max,
                 self.rb_directional_expectimax(
                     child_state, (turn + 1) % (ghost_num + 1), agent,
                     depth_limit, depth, ghost_num))
         return cur_max
     else:  # if ghost turn
         assert turn > agent
         ghost_legal_moves = cur_state.getLegalActions(turn)
         ghost = DirectionalGhost(turn)
         # assert len(ghost_legal_moves) is not 0
         expectancy = 0
         for action in ghost_legal_moves:
             child_state = cur_state.generateSuccessor(turn, action)
             dist = ghost.getDistribution(cur_state)
             # print(dist)
             expectancy += (dist[action]) * (self.rb_directional_expectimax(
                 child_state, (turn + 1) %
                 (ghost_num + 1), agent, depth_limit, depth, ghost_num))
             if math.isnan(expectancy):
                 expectancy = 0
         return expectancy
Exemple #2
0
 def searchTree(state: GameState, depth: int, agent: int):
     actions = state.getLegalActions(agent)
     nextAgent = (agent + 1) % state.getNumAgents()
     if state.isLose() or state.isWin() or len(actions) == 0:
         return [state.getScore(), None]
     elif depth == 0:
         return [self.evaluationFunction(state), None]
     elif agent == 0:
         successors = [
             searchTree(state.generateSuccessor(agent, action), depth,
                        nextAgent)[0] for action in actions
         ]
         maximum = max(successors)
         maxIndex = successors.index(maximum)
         return [maximum, actions[maxIndex]]
     else:
         nextDepth = depth
         if nextAgent == 0:
             nextDepth -= 1
         successors = [
             searchTree(state.generateSuccessor(agent, action),
                        nextDepth, nextAgent)[0] for action in actions
         ]
         expected = sum(successors) * 1.0 / len(successors)
         return [expected, None]
Exemple #3
0
def minimax(evalFunc: classmethod, agent: int, depth: int, gameState: GameState, maxDepth: int) -> float:
    if gameState.isLose() or gameState.isWin() or depth == maxDepth:
        return evalFunc(gameState)
    if agent == 0:
        return max(minimax(evalFunc, 1, depth, gameState.generateSuccessor(agent, state), maxDepth) for state in gameState.getLegalActions(agent))
    else:
        nextAgent = agent + 1
        if gameState.getNumAgents() == nextAgent:
            nextAgent = 0
        if nextAgent == 0:
            depth += 1
        return min(minimax(evalFunc, nextAgent, depth, gameState.generateSuccessor(agent, state), maxDepth) for state in gameState.getLegalActions(agent))
Exemple #4
0
    def getAction(self, game_state: GameState) -> str:
        """
          Returns the minimax action from the current gameState using
          self.depth and self.evaluationFunction.

          Here are some method calls that might be useful when implementing
          minimax.

          gameState.getLegalActions(agentIndex): Returns a list of legal
          actions for an agent agentIndex=0 means Pacman, ghosts are >= 1

          gameState.generateSuccessor(agentIndex, action): Returns the
          successor game state after an agent takes an action

          gameState.getNumAgents(): Returns the total number of agents in the
          game """
        legal_actions = game_state.getLegalActions(agentIndex=0)

        best_action_index = max(range(len(legal_actions)),
                                key=lambda action_num: self.min_value(
                                    state=game_state.generateSuccessor(
                                        agentIndex=0,
                                        action=legal_actions[action_num],
                                    ),
                                    depth=self.depth,
                                    ghost_num=1,
                                ))
        return legal_actions[best_action_index]
Exemple #5
0
    def max_value(
        self,
        state: GameState,
        depth: int,
        alpha: int,
        beta: int,
        actor: Optional[int] = None,
    ) -> int:
        # Sanity check: have all the ghosts been evaluated the last round?
        if actor is not None:
            assert actor == state.getNumAgents()

        # Game over or search depth has been reached
        if state.isLose() or state.isWin() or depth <= 0:
            return self.evaluationFunction(state)

        legal_actions = state.getLegalActions(agentIndex=0)

        utility = -inf
        for action in legal_actions:
            successor = state.generateSuccessor(agentIndex=0, action=action)
            utility = max(
                utility,
                self.min_value(successor, depth, alpha, beta, ghost_num=1),
            )

            if utility > beta:
                return utility

            alpha = max(alpha, utility)

        return utility
Exemple #6
0
    def min_value(self, state: GameState, depth: int, ghost_num: int) -> int:

        # Game over or search depth has been reached
        if state.isLose() or state.isWin() or depth <= 0:
            return self.evaluationFunction(state)

        # Sanity check: valid ghost number?
        assert 1 <= ghost_num < state.getNumAgents()

        legal_actions = state.getLegalActions(ghost_num)

        successors = [
            state.generateSuccessor(ghost_num, ghost_action)
            for ghost_action in legal_actions
        ]

        # If this is the last ghost, next optimizer should be from pacman's
        # perspective
        next_optimizer = self.max_value \
            if ghost_num == state.getNumAgents() - 1 \
            else self.min_value

        # If this is the last ghost, decrement depth
        next_depth = depth - 1 \
            if ghost_num == state.getNumAgents() - 1 \
            else depth

        utilities = [
            next_optimizer(state, next_depth, ghost_num + 1)
            for state in successors
        ]

        return min(utilities)
Exemple #7
0
    def _alphabeta(self, gameState: GameState, idx: int, ab: List[float]) -> Tuple[float, str]:
        n = gameState.getNumAgents()

        if idx / n >= self.depth or gameState.isWin() or gameState.isLose():
            return (self.evaluationFunction(gameState), None)

        agent = idx % n
        legalActions = gameState.getLegalActions(agent)

        pacman = (agent == 0)
        idx0 = int(pacman)
        idx1 = int(not pacman)
        mod = 1 if pacman else -1
        best_score = -float('inf') * mod
        best_action = None
        for legalAction in legalActions:
            s = gameState.generateSuccessor(agent, legalAction)
            score = self._alphabeta(s, idx + 1, [*ab])[0]
            if score * mod > best_score * mod:
                best_score, best_action = score, legalAction
            if best_score * mod > ab[idx0] * mod:
                break
            ab[idx1] = max(ab[idx1] * mod, best_score * mod) * mod
        
        return (best_score, best_action)
Exemple #8
0
    def getAction(self, gameState: GameState) -> str:
        """
      Returns the minimax action using self.depth and self.evaluationFunction
    """

        # BEGIN_YOUR_CODE (our solution is 36 lines of code, but don't worry if you deviate from this)
        def getVal(s,
                   d,
                   agentIndex,
                   alpha=float('-inf'),
                   beta=float('inf'),
                   evalFn=self.evaluationFunction):
            nextAgentIndex = 0 if agentIndex == s.getNumAgents(
            ) - 1 else agentIndex + 1
            actions = s.getLegalActions(agentIndex)
            if len(actions) == 0:
                return s.getScore()
            elif d == 0:
                if agentIndex != 0:
                    raise Exception(
                        f"Unexpected agentIndex {agentIndex} != {0}")
                return evalFn(s)
            elif agentIndex == 0:
                maxVal = float('-inf')
                # actions.sort(key=lambda a: evalFn(s.generateSuccessor(agentIndex, a)), reverse=True)
                for a in actions:
                    maxVal = max(
                        maxVal,
                        getVal(s.generateSuccessor(agentIndex, a), d,
                               nextAgentIndex, alpha, beta))
                    alpha = max(alpha, maxVal)
                    if alpha >= beta:
                        break
                return maxVal
            else:
                nextD = d - (1 if agentIndex == s.getNumAgents() - 1 else 0)
                minVal = float('inf')
                # actions.sort(key=lambda a: evalFn(s.generateSuccessor(agentIndex, a)), reverse=False)
                for a in actions:
                    minVal = min(
                        minVal,
                        getVal(s.generateSuccessor(agentIndex, a), nextD,
                               nextAgentIndex, alpha, beta))
                    beta = min(beta, minVal)
                    if alpha >= beta:
                        break
                return minVal

        targetVal = getVal(gameState, self.depth, 0)
        # print(f"AlphaBetaAgent value of state = {targetVal}")
        legalActions = gameState.getLegalActions(0)
        actions = [
            a for a in legalActions if getVal(
                gameState.generateSuccessor(0, a), self.depth, 1) == targetVal
        ]

        return random.choice(actions)
Exemple #9
0
    def min_value(
        self,
        state: GameState,
        depth: int,
        alpha: int,
        beta: int,
        ghost_num: int,
    ) -> int:

        # Game over or search depth has been reached
        if state.isLose() or state.isWin() or depth <= 0:
            return self.evaluationFunction(state)

        # Sanity check: valid ghost number?
        assert 1 <= ghost_num < state.getNumAgents()

        legal_actions = state.getLegalActions(ghost_num)

        # If this is the last ghost, next optimizer should be from pacman's
        # perspective
        next_optimizer = self.max_value \
            if ghost_num == state.getNumAgents() - 1 \
            else self.min_value

        # If this is the last ghost, decrement depth
        next_depth = depth - 1 if ghost_num == state.getNumAgents(
        ) - 1 else depth

        utility = inf
        for action in legal_actions:
            successor = state.generateSuccessor(
                agentIndex=ghost_num,
                action=action,
            )
            utility = min(
                utility,
                next_optimizer(
                    successor,
                    next_depth,
                    alpha,
                    beta,
                    ghost_num + 1,
                ),
            )

            if utility < alpha:
                return utility

            beta = min(beta, utility)

        return utility
Exemple #10
0
 def searchTree(state: GameState, depth: int, agent: int, a, b):
     actions = state.getLegalActions(agent)
     nextAgent = (agent + 1) % state.getNumAgents()
     if state.isLose() or state.isWin() or len(actions) == 0:
         return [state.getScore(), None]
     elif depth == 0:
         return [self.evaluationFunction(state), None]
     elif agent == 0:
         value = float('-inf')
         successors = []
         for action in actions:
             curr = searchTree(state.generateSuccessor(agent, action),
                               depth, nextAgent, a, b)[0]
             successors.append(curr)
             value = max(value, curr)
             a = max(a, value)
             if a >= b:
                 break
         maxIndex = successors.index(value)
         return [value, actions[maxIndex]]
     else:
         nextDepth = depth
         if nextAgent == 0:
             nextDepth -= 1
         value = float('inf')
         successors = []
         for action in actions:
             curr = searchTree(state.generateSuccessor(agent, action),
                               nextDepth, nextAgent, a, b)[0]
             successors.append(curr)
             value = min(value, curr)
             b = min(b, value)
             if a >= b:
                 break
         minIndex = successors.index(value)
         return [value, actions[minIndex]]
Exemple #11
0
    def getAction(self, gameState: GameState):
        """
        Returns the minimax action from the current gameState using self.depth
        and self.evaluationFunction.

        """
        _max = float("-inf")
        action = None
        for move in gameState.getLegalActions(0):
            util = minimax(self.evaluationFunction, 1, 0,
                           gameState.generateSuccessor(0, move), self.depth)
            if util > _max or _max == float("-inf"):
                _max = util
                action = move

        return action
Exemple #12
0
    def getAction(self, game_state: GameState) -> str:
        """
          Returns the minimax action from the current gameState using
          self.depth and self.evaluationFunction.

          Here are some method calls that might be useful when implementing
          minimax.

          gameState.getLegalActions(agentIndex): Returns a list of legal
          actions for an agent agentIndex=0 means Pacman, ghosts are >= 1

          gameState.generateSuccessor(agentIndex, action): Returns the
          successor game state after an agent takes an action

          gameState.getNumAgents(): Returns the total number of agents in the
          game
        """
        legal_actions = game_state.getLegalActions(agentIndex=0)

        alpha, beta = -inf, inf
        utility = -inf

        for action_num in range(len(legal_actions)):
            successor = game_state.generateSuccessor(
                agentIndex=0,
                action=legal_actions[action_num],
            )
            utility = max(
                utility,
                self.min_value(
                    successor,
                    depth=self.depth,
                    alpha=alpha,
                    beta=beta,
                    ghost_num=1,
                ),
            )

            if utility > alpha:
                best_action_index = action_num
                alpha = utility

        return legal_actions[best_action_index]
Exemple #13
0
    def _minimax(self, gameState: GameState, idx: int) -> Tuple[float, str]:
        n = gameState.getNumAgents()

        if idx / n >= self.depth or gameState.isWin() or gameState.isLose():
            return (self.evaluationFunction(gameState), None)

        agent = idx % n
        legalActions = gameState.getLegalActions(agent)

        mod = 1 if agent == 0 else -1
        best_score = -float('inf') * mod
        best_action = None
        for legalAction in legalActions:
            s = gameState.generateSuccessor(agent, legalAction)
            score = self._minimax(s, idx + 1)[0]
            if score * mod > best_score * mod:
                best_score, best_action = score, legalAction
        
        return (best_score, best_action)
Exemple #14
0
    def max_value(self,
                  state: GameState,
                  depth: int,
                  actor: Optional[int] = None) -> int:
        # Sanity check: have all the ghosts been evaluated the last round?
        if actor is not None:
            assert actor == state.getNumAgents()

        # Game over or search depth has been reached
        if state.isLose() or state.isWin() or depth <= 0:
            return self.evaluationFunction(state)

        legal_actions = state.getLegalActions(agentIndex=0)
        successors = [
            state.generateSuccessor(agentIndex=0, action=action)
            for action in legal_actions
        ]
        utilities = [
            self.min_value(state, depth, ghost_num=1) for state in successors
        ]

        return max(utilities)
Exemple #15
0
    def getAction(self, gameState: GameState) -> str:
        """
      Returns the expectimax action using self.depth and self.evaluationFunction

      All ghosts should be modeled as choosing uniformly at random from their
      legal moves.
    """

        # BEGIN_YOUR_CODE (our solution is 20 lines of code, but don't worry if you deviate from this)
        def getVal(s, d, agentIndex, evalFn=self.evaluationFunction):
            nextAgentIndex = 0 if agentIndex == s.getNumAgents(
            ) - 1 else agentIndex + 1
            actions = s.getLegalActions(agentIndex)
            if len(actions) == 0:
                return s.getScore()
            elif d == 0:
                if agentIndex != 0:
                    raise Exception(
                        f"Unexpected agentIndex {agentIndex} != {0}")
                return evalFn(s)
            elif agentIndex == 0:
                return max(
                    getVal(s.generateSuccessor(agentIndex, a), d,
                           nextAgentIndex) for a in actions)
            else:
                nextD = d - (1 if agentIndex == s.getNumAgents() - 1 else 0)
                return sum((1 / len(actions)) * getVal(
                    s.generateSuccessor(agentIndex, a), nextD, nextAgentIndex)
                           for a in actions)

        targetVal = getVal(gameState, self.depth, 0)
        # print(f"MinimaxAgent value of state = {targetVal}")
        legalActions = gameState.getLegalActions(0)
        actions = [
            a for a in legalActions if getVal(
                gameState.generateSuccessor(0, a), self.depth, 1) == targetVal
        ]

        return random.choice(actions)
Exemple #16
0
    def _expectimax(self, gameState: GameState, idx: int) -> Tuple[float, str]:
        n = gameState.getNumAgents()

        if idx / n >= self.depth or gameState.isWin() or gameState.isLose():
            return (self.evaluationFunction(gameState), None)

        agent = idx % n
        legalActions = gameState.getLegalActions(agent)
        n_actions = len(legalActions)

        ret_score = -float('inf') if agent == 0 else 0
        ret_action = None

        for legalAction in legalActions:
            s = gameState.generateSuccessor(agent, legalAction)
            score = self._expectimax(s, idx + 1)[0]
            if agent != 0:
                ret_score += score / n_actions
            elif score > ret_score:
                ret_score, ret_action = score, legalAction
        
        return (ret_score, ret_action)
Exemple #17
0
    def getAction(self, gameState: GameState) -> str:
        """
      Returns the minimax action from the current gameState using self.depth
      and self.evaluationFunction. Terminal states can be found by one of the following:
      pacman won, pacman lost or there are no legal moves.

      Here are some method calls that might be useful when implementing minimax.

      gameState.getLegalActions(agentIndex):
        Returns a list of legal actions for an agent
        agentIndex=0 means Pacman, ghosts are >= 1

      gameState.generateSuccessor(agentIndex, action):
        Returns the successor game state after an agent takes an action

      gameState.getNumAgents():
        Returns the total number of agents in the game

      gameState.getScore():
        Returns the score corresponding to the current state of the game

      gameState.isWin():
        Returns True if it's a winning state

      gameState.isLose():
        Returns True if it's a losing state

      self.depth:
        The depth to which search should continue

    """

        # BEGIN_YOUR_CODE (our solution is 20 lines of code, but don't worry if you deviate from this)
        def getVal(s, d, agentIndex, evalFn=self.evaluationFunction):
            nextAgentIndex = 0 if agentIndex == s.getNumAgents(
            ) - 1 else agentIndex + 1
            actions = s.getLegalActions(agentIndex)
            if len(actions) == 0:
                return s.getScore()
            elif d == 0:
                if agentIndex != 0:
                    raise Exception(
                        f"Unexpected agentIndex {agentIndex} != {0}")
                return evalFn(s)
            elif agentIndex == 0:
                return max(
                    getVal(s.generateSuccessor(agentIndex, a), d,
                           nextAgentIndex) for a in actions)
            else:
                nextD = d - (1 if agentIndex == s.getNumAgents() - 1 else 0)
                return min(
                    getVal(s.generateSuccessor(agentIndex, a), nextD,
                           nextAgentIndex) for a in actions)

        targetVal = getVal(gameState, self.depth, 0)
        # print(f"MinimaxAgent value of state = {targetVal}")
        legalActions = gameState.getLegalActions(0)
        actions = [
            a for a in legalActions if getVal(
                gameState.generateSuccessor(0, a), self.depth, 1) == targetVal
        ]

        return random.choice(actions)