def generate_full_board(player, empty_spaces=0): # Generate an empty board arr_board = cm.initialize_game_state() # Convert board to bitmap bit_board, bit_mask = cm.board_to_bitmap(arr_board, player) # Calculate the board shape bd_shp = arr_board.shape # While the board is not full, continue placing pieces while popcount(bit_mask) != bd_shp[0] * bd_shp[1] - empty_spaces: # Select a random move in a column that is not full move = -1 while not (0 <= move < bd_shp[1]): move = np.random.choice(bd_shp[1]) try: move = cm.PlayerAction(move) cm.top_row(arr_board, move) except IndexError: move = -1 # Apply the move to both boards cm.apply_action(arr_board, move, player) bit_board, bit_mask = cm.apply_action_cp(bit_board, bit_mask, move, bd_shp) # Switch to the next player player = cm.BoardPiece(player % 2 + 1) return arr_board, bit_board, bit_mask, player
def traverse(self): """ Searches the tree until a node with unexpanded children is found This function is called recursively during the selection phase of MCTS. Recursion ceases once it reaches a node with unexpanded children. At this point, a new child is created from the node's list of actions, and the remainder of the game is simulated. The stats are then updated and propagated up to the root node, which made the original call. Parameters node = node selected by root node or previous select_action call """ # Check whether the current node is a terminal state if self.state == GameState.IS_WIN: if self.max_player: return True else: return False elif self.state == GameState.IS_DRAW: return -1 # If any children are unexpanded, expand them and run a simulation if len(self.children) < len(self.actions): # Select the next randomized action in the list action = PlayerAction(self.actions[len(self.children)]) # Apply the action to the current board child_bd, child_msk = apply_action_cp(self.board, self.mask, action, self.shape) # Add the new child to the node new_child = Connect4Node(child_bd, child_msk, self.shape, action, not self.max_player) # If the game does not end, continue building the tree self.add_child(new_child) # Simulate the game to completion max_win = new_child.sim_game() # Update the child's stats new_child.update_stats(max_win) # Else, continue tree traversal else: next_node_ind = self.ucb1_select() next_child = self.children[next_node_ind] max_win = next_child.traverse() # Update new child's stats based on the result of a simulation self.update_stats(max_win) return max_win
def sim_game(self): """ Simulates one iteration of a game from the current game state This function applies random actions until the game reaches a terminal state, either a win or a draw. It then returns the value associated with this state, which is propagated back up the tree to the root, updating the stats along the way. Returns True if max_player wins False if min_player wins -1 if the result is a draw """ # Randomly choose a valid action until the game ends sim_board, sim_mask = self.board, self.mask game_state = check_end_state(sim_board, sim_mask, self.shape) curr_max_p = self.max_player while game_state == GameState.STILL_PLAYING: # Randomly select an action action = np.random.choice(valid_actions(sim_mask, self.shape)) # Apply the action to the board sim_board, sim_mask = apply_action_cp(sim_board, sim_mask, action, self.shape) # Update the max_player boolean curr_max_p = not curr_max_p # Check the game state after the new action is applied game_state = check_end_state(sim_board, sim_mask, self.shape) if game_state == GameState.IS_WIN: # TODO: possibly change how the score calculation works # (i.e. return integers here instead of booleans) if curr_max_p: return True else: return False elif game_state == GameState.IS_DRAW: return -1 else: print('Error in Simulation')
def alpha_beta(board: Bitmap, mask: Bitmap, max_player: bool, depth: int, alpha: GameScore, beta: GameScore, board_shp: Tuple) -> Tuple[GameScore, Optional[PlayerAction]]: """ Recursively call alpha_beta to build a game tree to a pre-determined max depth. Once at the max depth, or at a terminal node, calculate and return the heuristic score. Scores farther down the tree are penalized. :param board: bitmap representing positions of current player :param mask: bitmap representing positions of both players :param max_player: boolean indicating whether the depth at which alpha_beta is called from is a maximizing or minimizing player :param depth: the current depth in the game tree :param alpha: the currently best score for the maximizing player along the path to root :param beta: the currently best score for the minimizing player along the path to root :param board_shp: the shape of the game board :return: the best action and the associated score """ # If the node is at the max depth or a terminal node calculate the score max_depth = 7 win_score = 150 state_p = check_end_state(board ^ mask, mask, board_shp) if state_p == GameState.IS_WIN: if max_player: return GameScore(-win_score), None else: return GameScore(win_score), None elif state_p == GameState.IS_DRAW: return 0, None elif depth == max_depth: return heuristic_solver_bits(board, mask, board_shp[0], max_player), None # For each potential action, call alpha_beta pot_actions = valid_actions(mask, board_shp) if max_player: score = -100000 action = -1 for col in pot_actions: # Apply the current action min_board, new_mask = apply_action_cp(board, mask, col, board_shp) # Call alpha-beta new_score, temp = alpha_beta(min_board, new_mask, False, depth + 1, alpha, beta, board_shp) new_score -= depth # Check whether the score updates if new_score > score: score = new_score action = col # Check whether we can prune the rest of the branch if score >= beta: # print('Pruned a branch') break # Check whether alpha updates the score if score > alpha: alpha = score return GameScore(score), PlayerAction(action) else: score = 100000 action = -1 for col in pot_actions: # Apply the current action max_board, new_mask = apply_action_cp(board, mask, col, board_shp) # Call alpha-beta new_score, temp = alpha_beta(max_board, new_mask, True, depth + 1, alpha, beta, board_shp) new_score += depth # Check whether the score updates if new_score < score: score = new_score action = col # Check whether we can prune the rest of the branch if score <= alpha: # print('Pruned a branch') break # Check whether alpha updates the score if score < beta: beta = score return GameScore(score), PlayerAction(action)
def alpha_beta_oracle(board: cm.Bitmap, mask: cm.Bitmap, max_player: bool, alpha: GameScore, beta: GameScore, board_shp: Tuple, depth: int) -> Tuple[GameScore, Optional[int]]: """ Function used to find guaranteed future wins, based on optimal play A guaranteed win for the max_player will return a score modified by the depth at which the win should occur. The number of moves in which the player should win is returned, along with the score. Guaranteed losses are accounted for in a similar way. """ max_depth = 8 win_score = 100 state_p = cm.check_end_state(board ^ mask, mask, board_shp) if state_p == cm.GameState.IS_WIN: if max_player: return GameScore(-win_score), None else: return GameScore(win_score), None elif depth == max_depth: return GameScore(0), None # For each potential action, call alpha_beta pot_actions = cm.valid_actions(mask, board_shp) if max_player: score = -100000 for col in pot_actions: # Apply the current action min_board, new_mask = cm.apply_action_cp(board, mask, col, board_shp) # Call alpha-beta new_score, _ = alpha_beta_oracle(min_board, new_mask, False, alpha, beta, board_shp, depth + 1) new_score -= depth # Check whether the score updates if new_score > score: score = new_score # Check whether we can prune the rest of the branch if score >= beta: break # Check whether alpha updates the score if score > alpha: alpha = score # If this is the root node, return the optimal number of moves if depth == 0: if score > 0: return GameScore(score), 2 * (win_score - score) + 1 else: return GameScore(score), 2 * (win_score + score) else: return GameScore(score), None else: score = 100000 for col in pot_actions: # Apply the current action, continue if column is full max_board, new_mask = cm.apply_action_cp(board, mask, col, board_shp) # Call alpha-beta new_score, _ = alpha_beta_oracle(max_board, new_mask, True, alpha, beta, board_shp, depth + 1) new_score += depth # Check whether the score updates if new_score < score: score = new_score # Check whether we can prune the rest of the branch if score <= alpha: break # Check whether beta updates the score if score < beta: beta = score return GameScore(score), None
def test_mcts_algorithm(): """ MCTS plays against itself and tries to catch guaranteed wins Use the oracle in the while loop. Calculates statistics based on how well it performs at playing optimally once a guaranteed win is detected by the oracle. """ # Set parameter values and initialize counters a0 = -100000 b0 = 100000 n_games = 40 n_wins = 0 n_wins_opt = 0 n_def_wins = 0 for i in range(n_games): # Generate an empty board arr_board = cm.initialize_game_state() # Convert board to bitmap player = cm.PLAYER1 bit_b, bit_m = cm.board_to_bitmap(arr_board, player) # Calculate the board shape bd_shp = arr_board.shape # Initialize the board state variable bd_state = cm.check_end_state(bit_b, bit_m, bd_shp) # Initialize a list of moves mv_list = [] # Initialize counters mv_cnt = 0 num_mvs = 0 def_win = False while bd_state == cm.GameState.STILL_PLAYING: # Generate an action using MCTS action, _ = generate_move(arr_board.copy(), player, None) # Update the list of moves mv_list.append(action) # Apply the action to both boards cm.apply_action(arr_board, action, player) bit_b, bit_m = cm.apply_action_cp(bit_b, bit_m, action, bd_shp) # Switch to the next player player = cm.BoardPiece(player % 2 + 1) # Check for guaranteed win, if none detected, continue playing if not def_win: score, depth = alpha_beta_oracle(bit_b, bit_m, True, a0, b0, bd_shp, 0) # If a win is guaranteed, determine when it should occur if score > 50 and abs(score) < 200: print('Score returned is {}'.format(score)) num_mvs = depth n_def_wins += 1 def_win = True print(cm.pretty_print_board(arr_board)) print('Last move by player {}, in column {}, player {} ' 'should win in {} move(s) at most'.format( player % 2 + 1, action, player, num_mvs)) # Once a win is detected, check whether MCTS finds it optimally else: mv_cnt += 1 print(cm.pretty_print_board(arr_board)) bd_state = cm.check_end_state(bit_b ^ bit_m, bit_m, bd_shp) if bd_state == cm.GameState.IS_WIN: print(mv_list) print('Player {} won in {} move(s)'.format( player % 2 + 1, mv_cnt)) n_wins += 1 if mv_cnt <= num_mvs: n_wins_opt += 1 break # Check the game state bd_state = cm.check_end_state(bit_b, bit_m, bd_shp) # Print the number of wins and how many were optimal print('The MCTS algorithm clinched {:4.1f}% of its guaranteed wins, ' 'and won in an optimal number of moves {}% of the time'.format( 100 * (n_wins / n_def_wins), 100 * (n_wins_opt / n_wins)))
def test_alpha_beta_oracle(): # Generate an empty board arr_board = cm.initialize_game_state() # Convert board to bitmap player = cm.PLAYER1 bit_board, bit_mask = cm.board_to_bitmap(arr_board, player) # Calculate the board shape bd_shp = arr_board.shape a0 = -100000 b0 = 100000 # Define a list of moves move_list = [3, 3, 4, 4, 5, 5] for mv in move_list[:-2]: # Apply the move to both boards cm.apply_action(arr_board, mv, player) bit_board, bit_mask = cm.apply_action_cp(bit_board, bit_mask, mv, bd_shp) # Switch to the next player player = cm.BoardPiece(player % 2 + 1) print(cm.pretty_print_board(arr_board)) score, depth = alpha_beta_oracle(bit_board, bit_mask, True, a0, b0, bd_shp, 0) print('Player {} should win in {} moves.'.format( ('X' if player == cm.BoardPiece(1) else 'O'), depth)) assert depth == 3 # Apply next move to both boards cm.apply_action(arr_board, move_list[-2], player) bit_board, bit_mask = cm.apply_action_cp(bit_board, bit_mask, move_list[-2], bd_shp) # Switch to the next player player = cm.BoardPiece(player % 2 + 1) print(cm.pretty_print_board(arr_board)) score, depth = alpha_beta_oracle(bit_board, bit_mask, True, a0, b0, bd_shp, 0) print('Player {} should lose in {} moves.'.format( ('X' if player == cm.BoardPiece(1) else 'O'), depth)) assert depth == 2 # Apply next move to both boards cm.apply_action(arr_board, move_list[-1], player) bit_board, bit_mask = cm.apply_action_cp(bit_board, bit_mask, move_list[-1], bd_shp) # Switch to the next player player = cm.BoardPiece(player % 2 + 1) print(cm.pretty_print_board(arr_board)) score, depth = alpha_beta_oracle(bit_board, bit_mask, True, a0, b0, bd_shp, 0) print('Player {} should win in {} move.'.format( ('X' if player == cm.BoardPiece(1) else 'O'), depth)) assert depth == 1 print('\n##########################################################\n') # Generate an empty board arr_board = cm.initialize_game_state() # Convert board to bitmap player = cm.PLAYER1 bit_board, bit_mask = cm.board_to_bitmap(arr_board, player) # Full game # move_list = [3, 2, 3, 3, 3, 2, 2, 2, 5, 4, 0, 4, 4, 4, 1, 1, 5, 2, 6] move_list = [3, 2, 3, 3, 3, 2, 2, 2, 5, 4, 0, 4, 4, 4] for mv in move_list[:-1]: # Apply the move to both boards cm.apply_action(arr_board, mv, player) bit_board, bit_mask = cm.apply_action_cp(bit_board, bit_mask, mv, bd_shp) # Switch to the next player player = cm.BoardPiece(player % 2 + 1) print(cm.pretty_print_board(arr_board)) action, _ = generate_move(arr_board.copy(), player, None) print('MCTS plays in column {}'.format(action)) try: assert (action == 2 or action == 5) except AssertionError: print('NOTE: MCTS doesn\'t block this win unless it is given ' 'over 5s to search. It should play in column 2 or 5.') # Apply next move to both boards cm.apply_action(arr_board, move_list[-1], player) bit_board, bit_mask = cm.apply_action_cp(bit_board, bit_mask, move_list[-1], bd_shp) # Switch to the next player player = cm.BoardPiece(player % 2 + 1) print(cm.pretty_print_board(arr_board)) score, depth = alpha_beta_oracle(bit_board, bit_mask, True, a0, b0, bd_shp, 0) print('Player {} should win in {} move.'.format( ('X' if player == cm.BoardPiece(1) else 'O'), depth)) assert depth == 5 print('\n##########################################################\n') # Test other hard coded boards move_list_list = [[3, 4, 3, 3, 1, 0, 4, 4, 1, 1, 3, 0, 0, 4, 5, 5], [ 3, 3, 4, 5, 1, 2, 4, 4, 3, 4, 3, 4, 4, 3, 1, 1, 0, 5, 1, 5, 5, 1, 0, 0 ]] # Full games # [3, 4, 3, 3, 1, 0, 4, 4, 1, 1, 3, 0, 0, 4, 5, 5, 4, 6, 2, 2, 2] # [3, 4, 3, 3, 1, 0, 4, 4, 1, 1, 3, 0, 0, 4, 4, 1, 1, 5, 5, 5, 3, # 5, 5, 4, 5, 0, 2, 0, 2] for move_list in move_list_list: # Generate an empty board arr_board = cm.initialize_game_state() # Convert board to bitmap player = cm.PLAYER1 bit_board, bit_mask = cm.board_to_bitmap(arr_board, player) for mv in move_list: # Apply the move to both boards cm.apply_action(arr_board, mv, player) bit_board, bit_mask = cm.apply_action_cp(bit_board, bit_mask, mv, bd_shp) # Switch to the next player player = cm.BoardPiece(player % 2 + 1) # Print the current board state print(cm.pretty_print_board(arr_board)) # Check for guaranteed wins score, depth = alpha_beta_oracle(bit_board, bit_mask, True, a0, b0, bd_shp, 0) print('It is Player {}\'s turn. They should win in {} moves.'.format( ('X' if player == cm.BoardPiece(1) else 'O'), depth)) action, _ = generate_move(arr_board.copy(), player, None) print('Player {} plays in column {}'.format( ('X' if player == cm.BoardPiece(1) else 'O'), action)) print('\n##########################################################\n')