def generate_move_alpha_beta( board: Board, player: BoardPiece, saved_state: Optional[SavedState] ) -> Tuple[PlayerAction, Optional[SavedState]]: """ Agent selects a move based on a minimax depth first search, with alpha-beta pruning. :param board: 2d array representing current state of the game :param player: the player who made the last move (active player) :param saved_state: ??? :return: the agent's selected move """ # If the board is empty, play in the center column if np.all(board == NO_PLAYER): action = np.floor(np.median(np.arange(board.shape[1]))) return PlayerAction(action), saved_state board_cp = board.copy() # Call alpha_beta alpha0 = -100000 beta0 = 100000 score, action = alpha_beta(board_cp, player, True, 0, alpha0, beta0) return PlayerAction(action), saved_state
def generate_move_alpha_beta( board: Board, player: BoardPiece, saved_state: Optional[SavedState] ) -> Tuple[PlayerAction, Optional[SavedState]]: """ Agent selects a move based on a minimax depth first search, with alpha-beta pruning. """ # If the board is empty, play in the center column if np.all(board == NO_PLAYER): action = np.floor(np.median(np.arange(board.shape[1]))) return PlayerAction(action), saved_state board_cp = board.copy() # Set the max depth to search # max_depth = 0 # Call alpha_beta after initializing the alpha and beta values (+/- 'inf') a0 = -100000 b0 = 100000 score, action = alpha_beta(board_cp, player, True, 0, a0, b0) # score, action = alpha_beta_root(board_cp, player, True, max_depth, a0, b0) return PlayerAction(action), saved_state
def user_move(board: Board, _player: BoardPiece, saved_state: Optional[SavedState]): """ Prompts the human player to select a column to play in :param board: 2d array representing current state of the game :param _player: the player making the current move (active player) :param saved_state: ??? :return: returns the chosen action and the saved state """ # Initialize the action as a negative and cast as type PlayerAction action = PlayerAction(-1) # While action is not a valid move while not (0 <= action < board.shape[1]): try: # Human player input action action = input("Select column to play (0-6): ") # If no input is entered, raise IndexError if not action: raise IndexError # Cast as type PlayerAction action = PlayerAction(action) # Test whether the column is full. If it is, top_row will throw # an IndexError top_row(board, action) except IndexError: print('This is not a valid action. Please choose again.') action = -1 continue return action, saved_state
def minimax(board: Board, player: BoardPiece, max_player: bool, depth: int) -> Tuple[GameScore, Optional[PlayerAction]]: """ """ # Make a list of columns that can be played in potential_actions = np.argwhere(board[-1, :] == 0) potential_actions = potential_actions.reshape(potential_actions.size) # If the node is at the max depth, a terminal node, or is the root node # return the heursitic score of the node max_depth = 4 # if depth == 0 or np.all(board != 0): if depth == max_depth or np.all(board != 0): return heuristic_solver(board, player, max_player), None # return heuristic_solver_bits(board, player, max_player), None # For each potential action, call minimax if max_player: score = -np.inf for col in potential_actions: # Add a shortcut for blocking wins if (depth == 0 and connect_four(apply_player_action(board, col, BoardPiece(player % 2 + 1), True), BoardPiece(player % 2 + 1), col)): return GameScore(100), PlayerAction(col) # Apply the current action and call alpha_beta new_board = apply_player_action(board, col, player, True) new_score, temp = minimax(new_board, BoardPiece(player % 2 + 1), False, depth + 1) new_score -= 5 * depth # Check whether the score updates if new_score > score: score = new_score action = col return GameScore(score), PlayerAction(action) else: score = np.inf for col in potential_actions: # Apply the current action and call alpha_beta new_board = apply_player_action(board, col, player, True) new_score, temp = minimax(new_board, BoardPiece(player % 2 + 1), True, depth + 1) new_score += 5 * depth # Check whether the score updates if new_score < score: score = new_score action = col return GameScore(score), PlayerAction(action)
def generate_move_minimax(board: Board, player: BoardPiece, saved_state: Optional[SavedState] ) -> Tuple[PlayerAction, Optional[SavedState]]: """ Agent selects a move based on a minimax depth first search """ # If the board is empty, play in the center column if np.all(board == NO_PLAYER): action = np.floor(np.median(np.arange(board.shape[1]))) return PlayerAction(action), saved_state board_cp = board.copy() # Call minimax score, action = minimax(board_cp, player, True, 0) return PlayerAction(action), saved_state
def generate_move_random(board: Board, player: BoardPiece, saved_state: Optional[SavedState] ) -> Tuple[PlayerAction, Optional[SavedState]]: """ Choose a valid, non-full column randomly and return it as action :param board: 2d array representing current state of the game :param player: the player making the current move (active player) :param saved_state: ??? :return: returns a tuple containing the randomly generated move, and the saved state """ free_cols = np.arange(board.shape[1])[np.argwhere(board[-1, :] == 0)] free_cols = free_cols.reshape(len(free_cols)) action = PlayerAction(np.random.choice(free_cols)) return action, saved_state
def alpha_beta(board: Board, player: BoardPiece, max_player: bool, depth: int, alpha: GameScore, beta: GameScore) -> Tuple[GameScore, Optional[PlayerAction]]: """ Recursively call alpha_beta to build a game tree to a pre-determined max depth. Once at the max depth, or at a terminal node, calculate and return the heuristic score. Scores farther down the tree are penalized. Shortcuts are built in to: 1. Automatically take a win 2. Automatically block a loss 3. Return a large score for a win at any depth :param board: 2d array representing current state of the game :param player: the player who made the last move (active player) :param max_player: boolean indicating whether the depth at which alpha_beta is called from is a maximizing or minimizing player :param depth: the current depth in the game tree :param alpha: the currently best score for the maximizing player along the path to root :param beta: the currently best score for the minimizing player along the path to root :return: the best action and the associated score """ # Make a list of columns that can be played in potential_actions = np.argwhere(board[-1, :] == 0) potential_actions = potential_actions.reshape(potential_actions.size) # If the node is at the max depth or a terminal node calculate the score max_depth = 4 win_score = 150 state_p = check_end_state(board, player) # state_np = check_end_state(board, BoardPiece(player % 2 + 1)) if state_p == GameState.IS_WIN: if max_player: return GameScore(win_score), None else: return GameScore(-win_score), None # elif state_np == GameState.IS_WIN: # if max_player: # return GameScore(-win_score), None # else: # return GameScore(win_score), None elif state_p == GameState.IS_DRAW: return 0, None elif depth == max_depth: return heuristic_solver(board, player, max_player), None # return heuristic_solver_bits(board, player, max_player), None # # If this is the root call, check for wins and block/win, prioritize wins # win_score = 150 # if depth == 0: # for col in potential_actions: # if connect_four(apply_player_action(board, col, player, True), # player, col): # return GameScore(win_score), PlayerAction(col) # for col in potential_actions: # if connect_four(apply_player_action(board, col, # BoardPiece(player % 2 + 1), True), # BoardPiece(player % 2 + 1), col): # return GameScore(win_score), PlayerAction(col) # For each potential action, call alpha_beta if max_player: # score = -np.inf score = -100000 for col in potential_actions: # Apply the current action and call alpha_beta new_board = apply_player_action(board, col, player, True) new_score, temp = alpha_beta(new_board, BoardPiece(player % 2 + 1), False, depth + 1, alpha, beta) new_score -= 5 * depth # Check whether the score updates if new_score > score: score = new_score action = col # Check whether we can prune the rest of the branch if score >= beta: # print('Pruned a branch') break # Check whether alpha updates the score if score > alpha: alpha = score return GameScore(score), PlayerAction(action) else: # score = np.inf score = 100000 for col in potential_actions: # Apply the current action and call alpha_beta new_board = apply_player_action(board, col, player, True) new_score, temp = alpha_beta(new_board, BoardPiece(player % 2 + 1), True, depth + 1, alpha, beta) new_score += 5 * depth # Check whether the score updates if new_score < score: score = new_score action = col # Check whether we can prune the rest of the branch if score <= alpha: # print('Pruned a branch') break # Check whether alpha updates the score if score < beta: beta = score return GameScore(score), PlayerAction(action)
def alpha_beta( board: Board, player: BoardPiece, max_player: bool, depth: int, alpha: GameScore, beta: GameScore, ) -> Tuple[GameScore, Optional[PlayerAction]]: """ """ # Make a list of columns that can be played in potential_actions = np.argwhere(board[-1, :] == 0) potential_actions = potential_actions.reshape(potential_actions.size) # If the node is at the max depth or a terminal node calculate the score max_depth = 6 win_score = 150 state_p = check_end_state(board, player) state_np = check_end_state(board, BoardPiece(player % 2 + 1)) # if depth == max_depth or np.all(board != 0): # return heuristic_solver(board, player, max_player), None # # return heuristic_solver_bits(board, player, max_player), None if state_p == GameState.IS_WIN: if max_player: return GameScore(win_score), None else: return GameScore(-win_score), None elif state_np == GameState.IS_WIN: if max_player: return GameScore(-win_score), None else: return GameScore(win_score), None elif depth == max_depth: return heuristic_solver(board, player, max_player), None # return heuristic_solver_bits(board, player, max_player), None elif state_p == GameState.IS_DRAW: return 0, None # # If this is the root call, check for wins and block/win, prioritize wins # win_score = 150 # if depth == 0: # for col in potential_actions: # if connect_four(apply_player_action(board, col, player, True), # player, col): # return GameScore(win_score), PlayerAction(col) # for col in potential_actions: # if connect_four(apply_player_action(board, col, # BoardPiece(player % 2 + 1), True), # BoardPiece(player % 2 + 1), col): # return GameScore(win_score), PlayerAction(col) # For each potential action, call alpha_beta if max_player: score = -100000 for col in potential_actions: # Apply the current action and call alpha_beta new_board = apply_player_action(board, col, player, True) new_score, temp = alpha_beta(new_board, BoardPiece(player % 2 + 1), False, depth + 1, alpha, beta) new_score -= 5 * depth # Check whether the score updates if new_score > score: score = new_score action = col # Check whether we can prune the rest of the branch if score >= beta: # print('Pruned a branch') break # Check whether alpha updates the score if score > alpha: alpha = score return GameScore(score), PlayerAction(action) else: score = 100000 for col in potential_actions: # Apply the current action and call alpha_beta new_board = apply_player_action(board, col, player, True) new_score, temp = alpha_beta(new_board, BoardPiece(player % 2 + 1), True, depth + 1, alpha, beta) new_score += 5 * depth # Check whether the score updates if new_score < score: score = new_score action = col # Check whether we can prune the rest of the branch if score <= alpha: # print('Pruned a branch') break # Check whether alpha updates the score if score < beta: beta = score return GameScore(score), PlayerAction(action)
def alpha_beta(board: Board, player: BoardPiece, max_player: bool, depth: int, alpha: GameScore, beta: GameScore, init: bool = False) -> Tuple[GameScore, Optional[PlayerAction]]: """ """ # Make a list of columns that can be played in potential_actions = np.argwhere(board[-1, :] == 0) potential_actions = potential_actions.reshape(potential_actions.size) # If the node is at the max depth or a terminal node calcaulte the score if depth == 0 or np.all(board != 0): # return heuristic_solver(board, player, max_player), None return heuristic_solver_bits(board, player, max_player), None # If this is the root call, check for wins and block/win if init: for col in potential_actions: if connect_four( apply_player_action(board, col, BoardPiece(player % 2 + 1), True), BoardPiece(player % 2 + 1), col): return GameScore(100), PlayerAction(col) elif connect_four(apply_player_action(board, col, player, True), player, col): return GameScore(100), PlayerAction(col) # For each potential action, call alpha_beta if max_player: score = -np.inf for col in potential_actions: # Apply the current action and call alpha_beta new_board = apply_player_action(board, col, player, True) new_score, temp = alpha_beta(new_board, BoardPiece(player % 2 + 1), False, depth - 1, alpha, beta) # TODO: figure out how to fix this rule with depth counting backwards new_score -= 5 * (6 - depth) # Check whether the score updates if new_score > score: score = new_score action = col # Check whether we can prune the rest of the branch if score >= beta: # print('Pruned a branch') break # Check whether alpha updates the score if score > alpha: alpha = score return GameScore(score), PlayerAction(action) else: score = np.inf for col in potential_actions: # Apply the current action and call alpha_beta new_board = apply_player_action(board, col, player, True) new_score, temp = alpha_beta(new_board, BoardPiece(player % 2 + 1), True, depth - 1, alpha, beta) # TODO: figure out how to fix this rule with depth counting backwards new_score += 5 * (6 - depth) # Check whether the score updates if new_score < score: score = new_score action = col # Check whether we can prune the rest of the branch if score <= alpha: # print('Pruned a branch') break # Check whether alpha updates the score if score < beta: beta = score return GameScore(score), PlayerAction(action)