def min_value(self, game_state: GameState, depth=0, ghost_index=0, alpha=-math.inf, beta=math.inf): # next_ghost_to_move ghost_index += 1 if self.is_a_new_level_of_search(game_state, ghost_index): depth = depth + 1 if game_state.isWin() or game_state.isLose(): return self.evaluationFunction(game_state) value = math.inf legal_actions = game_state.getLegalActions(ghost_index) for action in legal_actions: successor = game_state.getNextState(ghost_index, action) if self.is_a_new_level_of_search(game_state, ghost_index): # let's move on with pacman since this is the last agent (new max node) value = min(value, self.max_value(successor, depth=depth, alpha=alpha, beta=beta)) else: # next on the tree is another minimizer, lets continue with another ghost value = min(value, self.min_value(successor, depth=depth, ghost_index=ghost_index, alpha=alpha, beta=beta)) if value < alpha: return value beta = min(beta, value) return value
def exp_value(self, game_state: GameState, depth=0, ghost_index=0): # next_ghost_to_move ghost_index += 1 if self.is_a_new_level_of_search(game_state, ghost_index): depth = depth + 1 if game_state.isWin() or game_state.isLose(): return self.evaluationFunction(game_state) value = 0 legal_actions = game_state.getLegalActions(ghost_index) for action in legal_actions: successor = game_state.getNextState(ghost_index, action) probability = 1 / len(legal_actions) if self.is_a_new_level_of_search(game_state, ghost_index): # let's move on with pacman since this is the last agent (new max node) value += probability * self.max_value(successor, depth=depth) else: # next on the tree is another minimizer, lets continue with another ghost value += probability * self.exp_value(successor, depth=depth, ghost_index=ghost_index) return value
def min_value(self, state: GameState, depth: int, ghost_num: int) -> int: # Game over or search depth has been reached if state.isLose() or state.isWin() or depth <= 0: return self.evaluationFunction(state) # Sanity check: valid ghost number? assert 1 <= ghost_num < state.getNumAgents() legal_actions = state.getLegalActions(ghost_num) successors = [ state.generateSuccessor(ghost_num, ghost_action) for ghost_action in legal_actions ] # If this is the last ghost, next optimizer should be from pacman's # perspective next_optimizer = self.max_value \ if ghost_num == state.getNumAgents() - 1 \ else self.min_value # If this is the last ghost, decrement depth next_depth = depth - 1 \ if ghost_num == state.getNumAgents() - 1 \ else depth utilities = [ next_optimizer(state, next_depth, ghost_num + 1) for state in successors ] return min(utilities)
def searchTree(state: GameState, depth: int, agent: int): actions = state.getLegalActions(agent) nextAgent = (agent + 1) % state.getNumAgents() if state.isLose() or state.isWin() or len(actions) == 0: return [state.getScore(), None] elif depth == 0: return [self.evaluationFunction(state), None] elif agent == 0: successors = [ searchTree(state.generateSuccessor(agent, action), depth, nextAgent)[0] for action in actions ] maximum = max(successors) maxIndex = successors.index(maximum) return [maximum, actions[maxIndex]] else: nextDepth = depth if nextAgent == 0: nextDepth -= 1 successors = [ searchTree(state.generateSuccessor(agent, action), nextDepth, nextAgent)[0] for action in actions ] expected = sum(successors) * 1.0 / len(successors) return [expected, None]
def max_value( self, state: GameState, depth: int, alpha: int, beta: int, actor: Optional[int] = None, ) -> int: # Sanity check: have all the ghosts been evaluated the last round? if actor is not None: assert actor == state.getNumAgents() # Game over or search depth has been reached if state.isLose() or state.isWin() or depth <= 0: return self.evaluationFunction(state) legal_actions = state.getLegalActions(agentIndex=0) utility = -inf for action in legal_actions: successor = state.generateSuccessor(agentIndex=0, action=action) utility = max( utility, self.min_value(successor, depth, alpha, beta, ghost_num=1), ) if utility > beta: return utility alpha = max(alpha, utility) return utility
def _alphabeta(self, gameState: GameState, idx: int, ab: List[float]) -> Tuple[float, str]: n = gameState.getNumAgents() if idx / n >= self.depth or gameState.isWin() or gameState.isLose(): return (self.evaluationFunction(gameState), None) agent = idx % n legalActions = gameState.getLegalActions(agent) pacman = (agent == 0) idx0 = int(pacman) idx1 = int(not pacman) mod = 1 if pacman else -1 best_score = -float('inf') * mod best_action = None for legalAction in legalActions: s = gameState.generateSuccessor(agent, legalAction) score = self._alphabeta(s, idx + 1, [*ab])[0] if score * mod > best_score * mod: best_score, best_action = score, legalAction if best_score * mod > ab[idx0] * mod: break ab[idx1] = max(ab[idx1] * mod, best_score * mod) * mod return (best_score, best_action)
def rb_directional_expectimax(self, cur_state: GameState, turn: int, agent: int, depth_limit: int, depth: int, ghost_num: int): if turn == agent: depth += 1 if depth >= depth_limit or cur_state.isWin() or cur_state.isLose(): return self.evaluationFunction(cur_state) if turn == agent: # if Pacman's turn cur_max = np.NINF for action in cur_state.getLegalPacmanActions( ): # iterating over children gameStates child_state = cur_state.generateSuccessor(turn, action) cur_max = max( cur_max, self.rb_directional_expectimax( child_state, (turn + 1) % (ghost_num + 1), agent, depth_limit, depth, ghost_num)) return cur_max else: # if ghost turn assert turn > agent ghost_legal_moves = cur_state.getLegalActions(turn) ghost = DirectionalGhost(turn) # assert len(ghost_legal_moves) is not 0 expectancy = 0 for action in ghost_legal_moves: child_state = cur_state.generateSuccessor(turn, action) dist = ghost.getDistribution(cur_state) # print(dist) expectancy += (dist[action]) * (self.rb_directional_expectimax( child_state, (turn + 1) % (ghost_num + 1), agent, depth_limit, depth, ghost_num)) if math.isnan(expectancy): expectancy = 0 return expectancy
def minimax(evalFunc: classmethod, agent: int, depth: int, gameState: GameState, maxDepth: int) -> float: if gameState.isLose() or gameState.isWin() or depth == maxDepth: return evalFunc(gameState) if agent == 0: return max(minimax(evalFunc, 1, depth, gameState.generateSuccessor(agent, state), maxDepth) for state in gameState.getLegalActions(agent)) else: nextAgent = agent + 1 if gameState.getNumAgents() == nextAgent: nextAgent = 0 if nextAgent == 0: depth += 1 return min(minimax(evalFunc, nextAgent, depth, gameState.generateSuccessor(agent, state), maxDepth) for state in gameState.getLegalActions(agent))
def min_value( self, state: GameState, depth: int, alpha: int, beta: int, ghost_num: int, ) -> int: # Game over or search depth has been reached if state.isLose() or state.isWin() or depth <= 0: return self.evaluationFunction(state) # Sanity check: valid ghost number? assert 1 <= ghost_num < state.getNumAgents() legal_actions = state.getLegalActions(ghost_num) # If this is the last ghost, next optimizer should be from pacman's # perspective next_optimizer = self.max_value \ if ghost_num == state.getNumAgents() - 1 \ else self.min_value # If this is the last ghost, decrement depth next_depth = depth - 1 if ghost_num == state.getNumAgents( ) - 1 else depth utility = inf for action in legal_actions: successor = state.generateSuccessor( agentIndex=ghost_num, action=action, ) utility = min( utility, next_optimizer( successor, next_depth, alpha, beta, ghost_num + 1, ), ) if utility < alpha: return utility beta = min(beta, utility) return utility
def betterEvaluationFunction(currentGameState:GameState): """ Your extreme ghost-hunting, pellet-nabbing, food-gobbling, unstoppable evaluation function (question 5). DESCRIPTION: <write something here so we know what you did> """ features = [0.]*8 weights = [20000,10,1,1,45,0.5,3,1] #1/number_food,1/cloest_food_dis, #1/num_pallets, 1/cloest_pallet, if_empty_pallet(encourage to eat the last pallet), scared_time(encourage the pacman to eat pallet to increase scared_time) #encourage to eat the ghost, some scared time becomes 0, some are not #1/distance_to_ghost #weights = [] if currentGameState.isWin(): #print(currentGameState.getFood().asList()) return float("inf") if currentGameState.isLose(): return float("-inf") pac_pos = currentGameState.getPacmanPosition() food_pos = currentGameState.getFood().asList() num_food = len(food_pos) ghost_states = currentGameState.getGhostStates() ghost_scared_times = [ghostState.scaredTimer for ghostState in ghost_states] ghost_pos = [g.getPosition() for g in ghost_states] pallets = currentGameState.getCapsules() features[0] = 1/num_food food_dis = sorted([util.manhattanDistance(fp,pac_pos) for fp in food_pos]) features[1] = 1/food_dis[0] pallets_dis = sorted([util.manhattanDistance(p,pac_pos) for p in pallets]) if pallets_dis: features[2] = 1/len(pallets_dis) features[3] = 1/pallets_dis[0] features[4] = int(not pallets) # if pallets become zero, we need to encourage this features[5] = sum(ghost_scared_times) features[6] = 1 if 0 in ghost_scared_times and sum(ghost_scared_times)>0 else 0 # if some ghost's scared time becomes zero, some are not, then some ghost is eaten # for those states after the ghost is eaten, this will always be 1, but does not afftect the action selection, because we compare the relative relation ghost_dis = [util.manhattanDistance(g, pac_pos) for g in ghost_pos] minDis = min(ghost_dis) min_dis_ghost_inds = [i for i in range(len(ghost_dis)) if ghost_dis[i] == minDis] min_dis_ghost_scared_times = [ghost_scared_times[i] for i in min_dis_ghost_inds] if any(st < minDis for st in min_dis_ghost_scared_times): features[7] = -1.0 / minDis # if any closet ghost has no enough scared time, then the agent is in danger else: features[7] = 1.0 / minDis # otherwise, there is no danger return sum(w*f for w,f in zip(weights,features))
def _minimax(self, gameState: GameState, idx: int) -> Tuple[float, str]: n = gameState.getNumAgents() if idx / n >= self.depth or gameState.isWin() or gameState.isLose(): return (self.evaluationFunction(gameState), None) agent = idx % n legalActions = gameState.getLegalActions(agent) mod = 1 if agent == 0 else -1 best_score = -float('inf') * mod best_action = None for legalAction in legalActions: s = gameState.generateSuccessor(agent, legalAction) score = self._minimax(s, idx + 1)[0] if score * mod > best_score * mod: best_score, best_action = score, legalAction return (best_score, best_action)
def max_value(self, state: GameState, depth: int, actor: Optional[int] = None) -> int: # Sanity check: have all the ghosts been evaluated the last round? if actor is not None: assert actor == state.getNumAgents() # Game over or search depth has been reached if state.isLose() or state.isWin() or depth <= 0: return self.evaluationFunction(state) legal_actions = state.getLegalActions(agentIndex=0) successors = [ state.generateSuccessor(agentIndex=0, action=action) for action in legal_actions ] utilities = [ self.min_value(state, depth, ghost_num=1) for state in successors ] return max(utilities)
def _expectimax(self, gameState: GameState, idx: int) -> Tuple[float, str]: n = gameState.getNumAgents() if idx / n >= self.depth or gameState.isWin() or gameState.isLose(): return (self.evaluationFunction(gameState), None) agent = idx % n legalActions = gameState.getLegalActions(agent) n_actions = len(legalActions) ret_score = -float('inf') if agent == 0 else 0 ret_action = None for legalAction in legalActions: s = gameState.generateSuccessor(agent, legalAction) score = self._expectimax(s, idx + 1)[0] if agent != 0: ret_score += score / n_actions elif score > ret_score: ret_score, ret_action = score, legalAction return (ret_score, ret_action)
def betterEvaluationFunction(currentGameState: GameState): """ Your extreme ghost-hunting, pellet-nabbing, food-gobbling, unstoppable evaluation function (question 5). DESCRIPTION: <write something here so we know what you did> """ "*** YOUR CODE HERE ***" # 首先判断当前是否已经结束 if currentGameState.isWin(): return float('inf') if currentGameState.isLose(): return -float('inf') score = currentGameState.getScore() # 考虑food foods = currentGameState.getFood().asList() foodDis = [ util.manhattanDistance(currentGameState.getPacmanPosition(), food) for food in foods ] foodDis.sort(reverse=True) numFoods = 3 if currentGameState.getNumFood() < 3: numFoods = currentGameState.getNumFood() score -= foodDis[0] * 1.5 # for i in range(numFoods): # score-=(numFoods-i)*foodDis[i] # 如果附近有food 最好可以吃到 score -= (currentGameState.getNumFood() * 4) # 考虑ghost ghostDis = [ util.manhattanDistance(currentGameState.getPacmanPosition(), ghost) for ghost in currentGameState.getGhostPositions() ] score += max(3, min(ghostDis)) * 2 # 考虑capsule score -= len(currentGameState.getCapsules()) * 4 return score
def searchTree(state: GameState, depth: int, agent: int, a, b): actions = state.getLegalActions(agent) nextAgent = (agent + 1) % state.getNumAgents() if state.isLose() or state.isWin() or len(actions) == 0: return [state.getScore(), None] elif depth == 0: return [self.evaluationFunction(state), None] elif agent == 0: value = float('-inf') successors = [] for action in actions: curr = searchTree(state.generateSuccessor(agent, action), depth, nextAgent, a, b)[0] successors.append(curr) value = max(value, curr) a = max(a, value) if a >= b: break maxIndex = successors.index(value) return [value, actions[maxIndex]] else: nextDepth = depth if nextAgent == 0: nextDepth -= 1 value = float('inf') successors = [] for action in actions: curr = searchTree(state.generateSuccessor(agent, action), nextDepth, nextAgent, a, b)[0] successors.append(curr) value = min(value, curr) b = min(b, value) if a >= b: break minIndex = successors.index(value) return [value, actions[minIndex]]
def is_terminal_state(self, game_state: GameState, current_depth): return game_state.isWin() or game_state.isLose() or current_depth == self.depth