def min_value(self, board: Board, depth, alpha, beta): """ function MIN-VALUE(state, alpha, beta) returns a utility value if TERMINAL-TEST(state) the return UTILITY(state) v <- +infinity for each a in ACTIONS(state) do v <- MIN(v, MAX-VALUE(RESULT(state, a), alpha, beta)) if v <= alpha then return v beta <- MIN(beta, v) return v """ if self.time_left() < self.TIMER_THRESHOLD: raise SearchTimeout() mini = self if board.is_winner(mini) or board.is_loser(mini): return board.utility(mini) if depth <= 0: # what is the score function of the opponent? certainly not ours! return self.score(board, mini) min_value = math.inf for move in board.get_legal_moves(): min_value = min( min_value, self.max_value(board.forecast_move(move), depth - 1, alpha, beta)) if min_value <= alpha: return min_value beta = min(beta, min_value) return min_value
def custom_score_3(game: Board, player) -> float: def future_open_move_locations(game, player): future_step_locations = [] blank_spaces = game.get_blank_spaces() player_location = game.get_player_location(player) s_moves = [] s_moves.extend(second_moves_outside) s_moves.extend(square_moves) for move in s_moves: location = tuple(map(sum, zip(player_location, move))) if location in blank_spaces: future_step_locations.append(location) return future_step_locations def future_open_move_locations_amount(game, player): return len(future_open_move_locations(game, player)) if game.is_winner(player): return INFINITY if game.is_loser(player): return NEG_INFINITY future_step_locations_amount = future_open_move_locations_amount(game, player) player_moves_count = len(game.get_legal_moves(player)) opponent_moves = len(game.get_legal_moves(game.get_opponent(player))) return float(player_moves_count / 2 + future_step_locations_amount / 3 - opponent_moves / 2)
def max_value(self, board: Board, depth, alpha, beta): """ function MAX-VALUE(state, alpha, beta) returns a utility value if TERMINAL-TEST(state) the return UTILITY(state) v <- -infinity for each a in ACTIONS(state) do v <- MAX(v, MIN-VALUE(RESULT(state, a), alpha, beta)) if v >= beta then return v alpha <- MAX(alpha, v) return v """ if self.time_left() < self.TIMER_THRESHOLD: raise SearchTimeout() maxi = self if board.is_winner(maxi) or board.is_loser(maxi): return board.utility(maxi) if depth <= 0: return self.score(board, maxi) max_value = -math.inf for move in board.get_legal_moves(): max_value = max( max_value, self.min_value(board.forecast_move(move), depth - 1, alpha, beta)) if max_value >= beta: return max_value alpha = max(alpha, max_value) return max_value
def minimax(self, game: Board, depth: int) -> tuple: """Implement depth-limited minimax search algorithm as described in the lectures. This should be a modified version of MINIMAX-DECISION in the AIMA text. https://github.com/aimacode/aima-pseudocode/blob/master/md/Minimax-Decision.md ********************************************************************** You MAY add additional methods to this class, or define helper functions to implement the required functionality. ********************************************************************** Parameters ---------- game : isolation.Board An instance of the Isolation game `Board` class representing the current game state depth : int Depth is an integer representing the maximum number of plies to search in the game tree before aborting Returns ------- (int, int) The board coordinates of the best move found in the current search; (-1, -1) if there are no legal moves Notes ----- (1) You MUST use the `self.score()` method for board evaluation to pass the project tests; you cannot call any other evaluation function directly. (2) If you use any helper functions (e.g., as shown in the AIMA pseudocode) then you must copy the timer check into the top of each helper function or else your agent will timeout during testing. """ if self.time_left() < self.TIMER_THRESHOLD: raise SearchTimeout() if game.is_loser(game.active_player): return NEGATIVE_MOVE legal_moves = game.get_legal_moves(game.active_player) self.current_best_move = NEGATIVE_MOVE max_val = NEG_INFINITY for next_move in legal_moves: new_max = self.min_value(game.forecast_move(next_move), depth - 1) if new_max >= max_val: max_val = new_max self.current_best_move = next_move return self.current_best_move
def min_value(self, game: Board, depth: int) -> float: if self.time_left() < self.TIMER_THRESHOLD: raise SearchTimeout() if game.is_loser(game.active_player): return INFINITY if depth <= 0: return self.score(game, game.inactive_player) legal_moves = game.get_legal_moves(game.active_player) move = INFINITY for next_move in legal_moves: move = min(move, self.max_value(game.forecast_move(next_move), depth - 1)) return move
def min_value(self, game: Board, depth: int, alpha: float, beta: float) -> float: if self.time_left() < self.TIMER_THRESHOLD: raise SearchTimeout() if depth <= 0 or game.is_loser(game.active_player): return self.score(game, game.inactive_player) legal_moves = game.get_legal_moves(game.active_player) move = INFINITY for next_move in legal_moves: move = min(move, self.max_value(game.forecast_move(next_move), depth - 1, alpha, beta)) if move <= alpha: return move beta = min(beta, move) return beta
def min_value(self, board: Board, depth): """ function MIN-VALUE(state) returns a utility value if TERMINAL-TEST(state) then return UTILITY(state) v <- infinity for each a in ACTIONS(state) do v <- MIN(v, MAX-VALUE(RESULT(state, a))) return v """ if self.time_left() < self.TIMER_THRESHOLD: raise SearchTimeout() mini = self if board.is_winner(mini) or board.is_loser(mini): return board.utility(mini) if depth <= 0: return self.score(board, mini) min_value = math.inf for move in board.get_legal_moves(): min_value = min( min_value, self.max_value(board.forecast_move(move), depth - 1)) return min_value
def custom_score(game: Board, player): """Calculate the heuristic value of a game state from the point of view of the given player. This should be the best heuristic function for your project submission. Note: this function should be called from within a Player instance as `self.score()` -- you should not need to call this function directly. Parameters ---------- game : `isolation.Board` An instance of `isolation.Board` encoding the current state of the game (e.g., player locations and blocked cells). player : object A player instance in the current game (i.e., an object corresponding to one of the player objects `game.__player_1__` or `game.__player_2__`.) Returns ------- float The heuristic value of the current game state to the specified player. """ if game.is_loser(player): return float("-inf") if game.is_winner(player): return float("inf") opponent = game.get_opponent(player) progress = blanks_left_percent(game) if progress < 0.5: if others_toe(game, player, opponent): return 1 # close in on opponent w, h = game.get_player_location(opponent) y, x = game.get_player_location(player) return float(-(h - y)**2 - (w - x)**2) else: # tread on his toes multiplier = 1.0 if others_toe(game, player, opponent): multiplier = 2.0 # if a toe is hit, we put some positive weight on players moves own_moves = len(game.get_legal_moves(player)) opp_moves = len(game.get_legal_moves(opponent)) return float(own_moves - opp_moves) * multiplier
def max_value(self, board: Board, depth): """ function MAX-VALUE(state) returns a utility value if TERMINAL-TEST(state) then return UTILITY(state) v <- -infinity for each a in ACTIONS(state) do v <- MAX(v, MIN-VALUE(RESULT(state, a))) return v """ if self.time_left() < self.TIMER_THRESHOLD: raise SearchTimeout() maxi = self utility = board.utility(maxi) if utility != 0: return utility if depth <= 0: return self.score(board, maxi) max_value = -math.inf for move in board.get_legal_moves(): max_value = max( max_value, self.min_value(board.forecast_move(move), depth - 1)) return max_value
def alphabeta(self, game: Board, depth, alpha=float("-inf"), beta=float("inf")) -> tuple: """Implement depth-limited minimax search with alpha-beta pruning as described in the lectures. This should be a modified version of ALPHA-BETA-SEARCH in the AIMA text https://github.com/aimacode/aima-pseudocode/blob/master/md/Alpha-Beta-Search.md ********************************************************************** You MAY add additional methods to this class, or define helper functions to implement the required functionality. ********************************************************************** Parameters ---------- game : isolation.Board An instance of the Isolation game `Board` class representing the current game state depth : int Depth is an integer representing the maximum number of plies to search in the game tree before aborting alpha : float Alpha limits the lower bound of search on minimizing layers beta : float Beta limits the upper bound of search on maximizing layers Returns ------- (int, int) The board coordinates of the best move found in the current search; (-1, -1) if there are no legal moves Notes ----- (1) You MUST use the `self.score()` method for board evaluation to pass the project tests; you cannot call any other evaluation function directly. (2) If you use any helper functions (e.g., as shown in the AIMA pseudocode) then you must copy the timer check into the top of each helper function or else your agent will timeout during testing. """ if self.time_left() < self.TIMER_THRESHOLD: raise SearchTimeout() # TODO: finish this function! move = NEG_INFINITY alpha_move = (alpha, NEGATIVE_MOVE) for next_move in game.get_legal_moves(game.active_player): move = max(move, self.min_value(game.forecast_move(next_move), depth - 1, alpha_move[0], beta)) if move == alpha_move[0]: continue alpha_move = max(alpha_move, (move, next_move)) return alpha_move[1]
def custom_score(game: Board, player): """Calculate the heuristic value of a game state from the point of view of the given player. This should be the best heuristic function for your project submission. Note: this function should be called from within a Player instance as `self.score()` -- you should not need to call this function directly. Parameters ---------- game : `isolation.Board` An instance of `isolation.Board` encoding the current state of the game (e.g., player locations and blocked cells). player : object A player instance in the current game (i.e., an object corresponding to one of the player objects `game.__player_1__` or `game.__player_2__`.) Returns ------- float The heuristic value of the current game state to the specified player. results ======= 2 forecasts: ------------ Match # Opponent AB_Custom Won | Lost 1 Random 10 | 0 2 MM_Open 7 | 3 3 MM_Center 8 | 2 4 MM_Improved 10 | 0 5 AB_Open 4 | 6 6 AB_Center 4 | 6 7 AB_Improved 4 | 6 -------------------------------------------------------------------------- Win Rate: 67.1% 1 forecast: ----------- Match # Opponent AB_Custom Won | Lost 1 Random 10 | 0 2 MM_Open 6 | 4 3 MM_Center 9 | 1 4 MM_Improved 8 | 2 5 AB_Open 4 | 6 6 AB_Center 7 | 3 7 AB_Improved 5 | 5 -------------------------------------------------------------------------- Win Rate: 70.0% 2 forecasts but only in potential endgame (last 15 rounds): ----------------------------------------------------------- first try: Match # Opponent AB_Custom Won | Lost 1 Random 9 | 1 2 MM_Open 6 | 4 3 MM_Center 9 | 1 4 MM_Improved 6 | 4 5 AB_Open 5 | 5 6 AB_Center 3 | 7 7 AB_Improved 5 | 5 -------------------------------------------------------------------------- Win Rate: 61.4% second try: Match # Opponent AB_Custom Won | Lost 1 Random 10 | 0 2 MM_Open 9 | 1 3 MM_Center 8 | 2 4 MM_Improved 7 | 3 5 AB_Open 6 | 4 6 AB_Center 6 | 4 7 AB_Improved 4 | 6 -------------------------------------------------------------------------- Win Rate: 71.4% VS AB_Improved: =============== 2 forecasts but before potential endgame (except 15 rounds): ---------------------------------------------------------- Match # Opponent AB_Custom Won | Lost 1 AB_Improved 22 | 18 -------------------------------------------------------------------------- Win Rate: 55.0% """ if game.is_loser(player): return float("-inf") if game.is_winner(player): return float("inf") opponent = game.get_opponent(player) own_moves = [] opp_moves = [] if len(game.get_blank_spaces()) <= 25: # 2 forecasts for move_player in game.get_legal_moves(player): moves_opponent = game.forecast_move(move_player).get_legal_moves( opponent) for move_opponent in moves_opponent: ply = game.forecast_move(move_opponent) own_moves += ply.get_legal_moves(player) opp_moves += ply.get_legal_moves(opponent) else: own_moves = game.get_legal_moves(player) opp_moves = game.get_legal_moves(opponent) return float(len(own_moves) - len(opp_moves))