def min_value(self, game_state: GameState, depth=0, ghost_index=0, alpha=-math.inf, beta=math.inf):

        # next_ghost_to_move
        ghost_index += 1

        if self.is_a_new_level_of_search(game_state, ghost_index):
            depth = depth + 1

        if game_state.isWin() or game_state.isLose():
            return self.evaluationFunction(game_state)

        value = math.inf

        legal_actions = game_state.getLegalActions(ghost_index)

        for action in legal_actions:
            successor = game_state.getNextState(ghost_index, action)

            if self.is_a_new_level_of_search(game_state, ghost_index):
                # let's move on with pacman since this is the last agent (new max node)
                value = min(value, self.max_value(successor, depth=depth, alpha=alpha, beta=beta))
            else:
                # next on the tree is another minimizer, lets continue with another ghost
                value = min(value,
                            self.min_value(successor, depth=depth, ghost_index=ghost_index, alpha=alpha, beta=beta))

            if value < alpha:
                return value
            beta = min(beta, value)

        return value
    def exp_value(self, game_state: GameState, depth=0, ghost_index=0):

        # next_ghost_to_move
        ghost_index += 1

        if self.is_a_new_level_of_search(game_state, ghost_index):
            depth = depth + 1

        if game_state.isWin() or game_state.isLose():
            return self.evaluationFunction(game_state)

        value = 0

        legal_actions = game_state.getLegalActions(ghost_index)

        for action in legal_actions:
            successor = game_state.getNextState(ghost_index, action)

            probability = 1 / len(legal_actions)

            if self.is_a_new_level_of_search(game_state, ghost_index):
                # let's move on with pacman since this is the last agent (new max node)
                value += probability * self.max_value(successor, depth=depth)
            else:
                # next on the tree is another minimizer, lets continue with another ghost
                value += probability * self.exp_value(successor, depth=depth, ghost_index=ghost_index)

        return value
Exemple #3
0
    def min_value(self, state: GameState, depth: int, ghost_num: int) -> int:

        # Game over or search depth has been reached
        if state.isLose() or state.isWin() or depth <= 0:
            return self.evaluationFunction(state)

        # Sanity check: valid ghost number?
        assert 1 <= ghost_num < state.getNumAgents()

        legal_actions = state.getLegalActions(ghost_num)

        successors = [
            state.generateSuccessor(ghost_num, ghost_action)
            for ghost_action in legal_actions
        ]

        # If this is the last ghost, next optimizer should be from pacman's
        # perspective
        next_optimizer = self.max_value \
            if ghost_num == state.getNumAgents() - 1 \
            else self.min_value

        # If this is the last ghost, decrement depth
        next_depth = depth - 1 \
            if ghost_num == state.getNumAgents() - 1 \
            else depth

        utilities = [
            next_optimizer(state, next_depth, ghost_num + 1)
            for state in successors
        ]

        return min(utilities)
Exemple #4
0
 def searchTree(state: GameState, depth: int, agent: int):
     actions = state.getLegalActions(agent)
     nextAgent = (agent + 1) % state.getNumAgents()
     if state.isLose() or state.isWin() or len(actions) == 0:
         return [state.getScore(), None]
     elif depth == 0:
         return [self.evaluationFunction(state), None]
     elif agent == 0:
         successors = [
             searchTree(state.generateSuccessor(agent, action), depth,
                        nextAgent)[0] for action in actions
         ]
         maximum = max(successors)
         maxIndex = successors.index(maximum)
         return [maximum, actions[maxIndex]]
     else:
         nextDepth = depth
         if nextAgent == 0:
             nextDepth -= 1
         successors = [
             searchTree(state.generateSuccessor(agent, action),
                        nextDepth, nextAgent)[0] for action in actions
         ]
         expected = sum(successors) * 1.0 / len(successors)
         return [expected, None]
Exemple #5
0
    def max_value(
        self,
        state: GameState,
        depth: int,
        alpha: int,
        beta: int,
        actor: Optional[int] = None,
    ) -> int:
        # Sanity check: have all the ghosts been evaluated the last round?
        if actor is not None:
            assert actor == state.getNumAgents()

        # Game over or search depth has been reached
        if state.isLose() or state.isWin() or depth <= 0:
            return self.evaluationFunction(state)

        legal_actions = state.getLegalActions(agentIndex=0)

        utility = -inf
        for action in legal_actions:
            successor = state.generateSuccessor(agentIndex=0, action=action)
            utility = max(
                utility,
                self.min_value(successor, depth, alpha, beta, ghost_num=1),
            )

            if utility > beta:
                return utility

            alpha = max(alpha, utility)

        return utility
Exemple #6
0
    def _alphabeta(self, gameState: GameState, idx: int, ab: List[float]) -> Tuple[float, str]:
        n = gameState.getNumAgents()

        if idx / n >= self.depth or gameState.isWin() or gameState.isLose():
            return (self.evaluationFunction(gameState), None)

        agent = idx % n
        legalActions = gameState.getLegalActions(agent)

        pacman = (agent == 0)
        idx0 = int(pacman)
        idx1 = int(not pacman)
        mod = 1 if pacman else -1
        best_score = -float('inf') * mod
        best_action = None
        for legalAction in legalActions:
            s = gameState.generateSuccessor(agent, legalAction)
            score = self._alphabeta(s, idx + 1, [*ab])[0]
            if score * mod > best_score * mod:
                best_score, best_action = score, legalAction
            if best_score * mod > ab[idx0] * mod:
                break
            ab[idx1] = max(ab[idx1] * mod, best_score * mod) * mod
        
        return (best_score, best_action)
Exemple #7
0
 def rb_directional_expectimax(self, cur_state: GameState, turn: int,
                               agent: int, depth_limit: int, depth: int,
                               ghost_num: int):
     if turn == agent:
         depth += 1
     if depth >= depth_limit or cur_state.isWin() or cur_state.isLose():
         return self.evaluationFunction(cur_state)
     if turn == agent:  # if Pacman's turn
         cur_max = np.NINF
         for action in cur_state.getLegalPacmanActions(
         ):  # iterating over children gameStates
             child_state = cur_state.generateSuccessor(turn, action)
             cur_max = max(
                 cur_max,
                 self.rb_directional_expectimax(
                     child_state, (turn + 1) % (ghost_num + 1), agent,
                     depth_limit, depth, ghost_num))
         return cur_max
     else:  # if ghost turn
         assert turn > agent
         ghost_legal_moves = cur_state.getLegalActions(turn)
         ghost = DirectionalGhost(turn)
         # assert len(ghost_legal_moves) is not 0
         expectancy = 0
         for action in ghost_legal_moves:
             child_state = cur_state.generateSuccessor(turn, action)
             dist = ghost.getDistribution(cur_state)
             # print(dist)
             expectancy += (dist[action]) * (self.rb_directional_expectimax(
                 child_state, (turn + 1) %
                 (ghost_num + 1), agent, depth_limit, depth, ghost_num))
             if math.isnan(expectancy):
                 expectancy = 0
         return expectancy
Exemple #8
0
def minimax(evalFunc: classmethod, agent: int, depth: int, gameState: GameState, maxDepth: int) -> float:
    if gameState.isLose() or gameState.isWin() or depth == maxDepth:
        return evalFunc(gameState)
    if agent == 0:
        return max(minimax(evalFunc, 1, depth, gameState.generateSuccessor(agent, state), maxDepth) for state in gameState.getLegalActions(agent))
    else:
        nextAgent = agent + 1
        if gameState.getNumAgents() == nextAgent:
            nextAgent = 0
        if nextAgent == 0:
            depth += 1
        return min(minimax(evalFunc, nextAgent, depth, gameState.generateSuccessor(agent, state), maxDepth) for state in gameState.getLegalActions(agent))
Exemple #9
0
    def min_value(
        self,
        state: GameState,
        depth: int,
        alpha: int,
        beta: int,
        ghost_num: int,
    ) -> int:

        # Game over or search depth has been reached
        if state.isLose() or state.isWin() or depth <= 0:
            return self.evaluationFunction(state)

        # Sanity check: valid ghost number?
        assert 1 <= ghost_num < state.getNumAgents()

        legal_actions = state.getLegalActions(ghost_num)

        # If this is the last ghost, next optimizer should be from pacman's
        # perspective
        next_optimizer = self.max_value \
            if ghost_num == state.getNumAgents() - 1 \
            else self.min_value

        # If this is the last ghost, decrement depth
        next_depth = depth - 1 if ghost_num == state.getNumAgents(
        ) - 1 else depth

        utility = inf
        for action in legal_actions:
            successor = state.generateSuccessor(
                agentIndex=ghost_num,
                action=action,
            )
            utility = min(
                utility,
                next_optimizer(
                    successor,
                    next_depth,
                    alpha,
                    beta,
                    ghost_num + 1,
                ),
            )

            if utility < alpha:
                return utility

            beta = min(beta, utility)

        return utility
def betterEvaluationFunction(currentGameState:GameState):
    """
    Your extreme ghost-hunting, pellet-nabbing, food-gobbling, unstoppable
    evaluation function (question 5).

    DESCRIPTION: <write something here so we know what you did>
    """
    features = [0.]*8
    weights = [20000,10,1,1,45,0.5,3,1]
    #1/number_food,1/cloest_food_dis,
    #1/num_pallets, 1/cloest_pallet, if_empty_pallet(encourage to eat the last pallet), scared_time(encourage the pacman to eat pallet to increase scared_time)
    #encourage to eat the ghost, some scared time becomes 0, some are not
    #1/distance_to_ghost
    #weights = []
    if currentGameState.isWin():
        #print(currentGameState.getFood().asList())
        return float("inf")
    if currentGameState.isLose():
        return float("-inf")
    pac_pos = currentGameState.getPacmanPosition()
    food_pos = currentGameState.getFood().asList()
    num_food = len(food_pos)
    ghost_states = currentGameState.getGhostStates()
    ghost_scared_times = [ghostState.scaredTimer for ghostState in ghost_states]
    ghost_pos = [g.getPosition() for g in ghost_states]
    pallets = currentGameState.getCapsules()
    features[0] = 1/num_food
    food_dis = sorted([util.manhattanDistance(fp,pac_pos) for fp in food_pos])
    features[1] = 1/food_dis[0]
    pallets_dis = sorted([util.manhattanDistance(p,pac_pos) for p in pallets])
    if pallets_dis:
        features[2] = 1/len(pallets_dis)
        features[3] = 1/pallets_dis[0]
    features[4] = int(not pallets) # if pallets become zero, we need to encourage this
    features[5] = sum(ghost_scared_times)
    features[6] = 1 if 0 in ghost_scared_times and sum(ghost_scared_times)>0 else 0
    # if some ghost's scared time becomes zero, some are not, then some ghost is eaten
    # for those states after the ghost is eaten, this will always be 1, but does not afftect the action selection, because we compare the relative relation
    ghost_dis = [util.manhattanDistance(g, pac_pos) for g in ghost_pos]
    minDis = min(ghost_dis)
    min_dis_ghost_inds = [i for i in range(len(ghost_dis)) if ghost_dis[i] == minDis]
    min_dis_ghost_scared_times = [ghost_scared_times[i] for i in min_dis_ghost_inds]
    if any(st < minDis for st in min_dis_ghost_scared_times):
        features[7] = -1.0 / minDis  # if any closet ghost has no enough scared time, then the agent is in danger
    else:
        features[7] = 1.0 / minDis  # otherwise, there is no danger
    return sum(w*f for w,f in zip(weights,features))
Exemple #11
0
    def _minimax(self, gameState: GameState, idx: int) -> Tuple[float, str]:
        n = gameState.getNumAgents()

        if idx / n >= self.depth or gameState.isWin() or gameState.isLose():
            return (self.evaluationFunction(gameState), None)

        agent = idx % n
        legalActions = gameState.getLegalActions(agent)

        mod = 1 if agent == 0 else -1
        best_score = -float('inf') * mod
        best_action = None
        for legalAction in legalActions:
            s = gameState.generateSuccessor(agent, legalAction)
            score = self._minimax(s, idx + 1)[0]
            if score * mod > best_score * mod:
                best_score, best_action = score, legalAction
        
        return (best_score, best_action)
Exemple #12
0
    def max_value(self,
                  state: GameState,
                  depth: int,
                  actor: Optional[int] = None) -> int:
        # Sanity check: have all the ghosts been evaluated the last round?
        if actor is not None:
            assert actor == state.getNumAgents()

        # Game over or search depth has been reached
        if state.isLose() or state.isWin() or depth <= 0:
            return self.evaluationFunction(state)

        legal_actions = state.getLegalActions(agentIndex=0)
        successors = [
            state.generateSuccessor(agentIndex=0, action=action)
            for action in legal_actions
        ]
        utilities = [
            self.min_value(state, depth, ghost_num=1) for state in successors
        ]

        return max(utilities)
Exemple #13
0
    def _expectimax(self, gameState: GameState, idx: int) -> Tuple[float, str]:
        n = gameState.getNumAgents()

        if idx / n >= self.depth or gameState.isWin() or gameState.isLose():
            return (self.evaluationFunction(gameState), None)

        agent = idx % n
        legalActions = gameState.getLegalActions(agent)
        n_actions = len(legalActions)

        ret_score = -float('inf') if agent == 0 else 0
        ret_action = None

        for legalAction in legalActions:
            s = gameState.generateSuccessor(agent, legalAction)
            score = self._expectimax(s, idx + 1)[0]
            if agent != 0:
                ret_score += score / n_actions
            elif score > ret_score:
                ret_score, ret_action = score, legalAction
        
        return (ret_score, ret_action)
def betterEvaluationFunction(currentGameState: GameState):
    """
    Your extreme ghost-hunting, pellet-nabbing, food-gobbling, unstoppable
    evaluation function (question 5).

    DESCRIPTION: <write something here so we know what you did>
    """
    "*** YOUR CODE HERE ***"
    # 首先判断当前是否已经结束
    if currentGameState.isWin():
        return float('inf')
    if currentGameState.isLose():
        return -float('inf')
    score = currentGameState.getScore()
    # 考虑food
    foods = currentGameState.getFood().asList()
    foodDis = [
        util.manhattanDistance(currentGameState.getPacmanPosition(), food)
        for food in foods
    ]
    foodDis.sort(reverse=True)
    numFoods = 3
    if currentGameState.getNumFood() < 3:
        numFoods = currentGameState.getNumFood()
    score -= foodDis[0] * 1.5
    # for i in range(numFoods):
    #     score-=(numFoods-i)*foodDis[i]
    # 如果附近有food 最好可以吃到
    score -= (currentGameState.getNumFood() * 4)
    # 考虑ghost
    ghostDis = [
        util.manhattanDistance(currentGameState.getPacmanPosition(), ghost)
        for ghost in currentGameState.getGhostPositions()
    ]
    score += max(3, min(ghostDis)) * 2
    # 考虑capsule
    score -= len(currentGameState.getCapsules()) * 4
    return score
Exemple #15
0
 def searchTree(state: GameState, depth: int, agent: int, a, b):
     actions = state.getLegalActions(agent)
     nextAgent = (agent + 1) % state.getNumAgents()
     if state.isLose() or state.isWin() or len(actions) == 0:
         return [state.getScore(), None]
     elif depth == 0:
         return [self.evaluationFunction(state), None]
     elif agent == 0:
         value = float('-inf')
         successors = []
         for action in actions:
             curr = searchTree(state.generateSuccessor(agent, action),
                               depth, nextAgent, a, b)[0]
             successors.append(curr)
             value = max(value, curr)
             a = max(a, value)
             if a >= b:
                 break
         maxIndex = successors.index(value)
         return [value, actions[maxIndex]]
     else:
         nextDepth = depth
         if nextAgent == 0:
             nextDepth -= 1
         value = float('inf')
         successors = []
         for action in actions:
             curr = searchTree(state.generateSuccessor(agent, action),
                               nextDepth, nextAgent, a, b)[0]
             successors.append(curr)
             value = min(value, curr)
             b = min(b, value)
             if a >= b:
                 break
         minIndex = successors.index(value)
         return [value, actions[minIndex]]
 def is_terminal_state(self, game_state: GameState, current_depth):
     return game_state.isWin() or game_state.isLose() or current_depth == self.depth