def __init__(self, board: Bitmap, mask: Bitmap, board_shp: Tuple, node_col: PlayerAction, max_player: bool): """ Parameters board = bitmap representing positions of current player mask = bitmap representing positions of both players board_shp = tuple giving the shape of the board (rows, columns) node_col = the column played to create this node (game state) max_player = indicates whether current player is the max player """ # Update the game state and save game state attributes self.board, self.mask = board, mask self.shape: Tuple = board_shp self.node_col: int = node_col self.max_player: bool = max_player self.state = check_end_state(self.board, self.mask, self.shape) # Node attributes # Randomize the order of actions (i.e. the order of node creation) self.actions: List[int] = valid_actions(self.mask, board_shp) np.random.shuffle(self.actions) self.children: List[Connect4Node] = [] # Upper Confidence Bound 1 (UCB1) attributes self.si: float = 0 self.wi: float = 0
def test_node_initialization(): # Initialize a game player = cm.PLAYER1 arr_bd, bit_bd, mask_bd, player = generate_full_board(player, 1) bd_shp = arr_bd.shape # Generate a board that is not a win, with only a single piece missing while cm.check_end_state(bit_bd, mask_bd, bd_shp) == cm.GameState.IS_WIN: arr_bd, bit_bd, mask_bd, player = generate_full_board(player, 1) print(cm.pretty_print_board(arr_bd)) # Test whether node initializes and plays in proper (only empty) column move = agmcts.generate_move_mcts(arr_bd, player, None)[0] empty_col = cm.valid_actions(mask_bd, bd_shp)[0] print('MCTS plays in column {}'.format(move)) assert move == empty_col
def sim_game(self): """ Simulates one iteration of a game from the current game state This function applies random actions until the game reaches a terminal state, either a win or a draw. It then returns the value associated with this state, which is propagated back up the tree to the root, updating the stats along the way. Returns True if max_player wins False if min_player wins -1 if the result is a draw """ # Randomly choose a valid action until the game ends sim_board, sim_mask = self.board, self.mask game_state = check_end_state(sim_board, sim_mask, self.shape) curr_max_p = self.max_player while game_state == GameState.STILL_PLAYING: # Randomly select an action action = np.random.choice(valid_actions(sim_mask, self.shape)) # Apply the action to the board sim_board, sim_mask = apply_action_cp(sim_board, sim_mask, action, self.shape) # Update the max_player boolean curr_max_p = not curr_max_p # Check the game state after the new action is applied game_state = check_end_state(sim_board, sim_mask, self.shape) if game_state == GameState.IS_WIN: # TODO: possibly change how the score calculation works # (i.e. return integers here instead of booleans) if curr_max_p: return True else: return False elif game_state == GameState.IS_DRAW: return -1 else: print('Error in Simulation')
def alpha_beta(board: Bitmap, mask: Bitmap, max_player: bool, depth: int, alpha: GameScore, beta: GameScore, board_shp: Tuple) -> Tuple[GameScore, Optional[PlayerAction]]: """ Recursively call alpha_beta to build a game tree to a pre-determined max depth. Once at the max depth, or at a terminal node, calculate and return the heuristic score. Scores farther down the tree are penalized. :param board: bitmap representing positions of current player :param mask: bitmap representing positions of both players :param max_player: boolean indicating whether the depth at which alpha_beta is called from is a maximizing or minimizing player :param depth: the current depth in the game tree :param alpha: the currently best score for the maximizing player along the path to root :param beta: the currently best score for the minimizing player along the path to root :param board_shp: the shape of the game board :return: the best action and the associated score """ # If the node is at the max depth or a terminal node calculate the score max_depth = 7 win_score = 150 state_p = check_end_state(board ^ mask, mask, board_shp) if state_p == GameState.IS_WIN: if max_player: return GameScore(-win_score), None else: return GameScore(win_score), None elif state_p == GameState.IS_DRAW: return 0, None elif depth == max_depth: return heuristic_solver_bits(board, mask, board_shp[0], max_player), None # For each potential action, call alpha_beta pot_actions = valid_actions(mask, board_shp) if max_player: score = -100000 action = -1 for col in pot_actions: # Apply the current action min_board, new_mask = apply_action_cp(board, mask, col, board_shp) # Call alpha-beta new_score, temp = alpha_beta(min_board, new_mask, False, depth + 1, alpha, beta, board_shp) new_score -= depth # Check whether the score updates if new_score > score: score = new_score action = col # Check whether we can prune the rest of the branch if score >= beta: # print('Pruned a branch') break # Check whether alpha updates the score if score > alpha: alpha = score return GameScore(score), PlayerAction(action) else: score = 100000 action = -1 for col in pot_actions: # Apply the current action max_board, new_mask = apply_action_cp(board, mask, col, board_shp) # Call alpha-beta new_score, temp = alpha_beta(max_board, new_mask, True, depth + 1, alpha, beta, board_shp) new_score += depth # Check whether the score updates if new_score < score: score = new_score action = col # Check whether we can prune the rest of the branch if score <= alpha: # print('Pruned a branch') break # Check whether alpha updates the score if score < beta: beta = score return GameScore(score), PlayerAction(action)
def alpha_beta_oracle(board: cm.Bitmap, mask: cm.Bitmap, max_player: bool, alpha: GameScore, beta: GameScore, board_shp: Tuple, depth: int) -> Tuple[GameScore, Optional[int]]: """ Function used to find guaranteed future wins, based on optimal play A guaranteed win for the max_player will return a score modified by the depth at which the win should occur. The number of moves in which the player should win is returned, along with the score. Guaranteed losses are accounted for in a similar way. """ max_depth = 8 win_score = 100 state_p = cm.check_end_state(board ^ mask, mask, board_shp) if state_p == cm.GameState.IS_WIN: if max_player: return GameScore(-win_score), None else: return GameScore(win_score), None elif depth == max_depth: return GameScore(0), None # For each potential action, call alpha_beta pot_actions = cm.valid_actions(mask, board_shp) if max_player: score = -100000 for col in pot_actions: # Apply the current action min_board, new_mask = cm.apply_action_cp(board, mask, col, board_shp) # Call alpha-beta new_score, _ = alpha_beta_oracle(min_board, new_mask, False, alpha, beta, board_shp, depth + 1) new_score -= depth # Check whether the score updates if new_score > score: score = new_score # Check whether we can prune the rest of the branch if score >= beta: break # Check whether alpha updates the score if score > alpha: alpha = score # If this is the root node, return the optimal number of moves if depth == 0: if score > 0: return GameScore(score), 2 * (win_score - score) + 1 else: return GameScore(score), 2 * (win_score + score) else: return GameScore(score), None else: score = 100000 for col in pot_actions: # Apply the current action, continue if column is full max_board, new_mask = cm.apply_action_cp(board, mask, col, board_shp) # Call alpha-beta new_score, _ = alpha_beta_oracle(max_board, new_mask, True, alpha, beta, board_shp, depth + 1) new_score += depth # Check whether the score updates if new_score < score: score = new_score # Check whether we can prune the rest of the branch if score <= alpha: break # Check whether beta updates the score if score < beta: beta = score return GameScore(score), None