def __alpha_beta(self, node: GameTreeNode, depth: int, alpha: float, beta: float, player: int): """ Search game to determine best action; uses negamax implementation and alpha-beta pruning. Args: node (GameTreeNode): The root or parent node. depth (int): The depth of the current search. alpha (float): The best value found for current player. beta (float): The best value found for the opponent. player (int) : Can take either 1 or -1 (Current player == 1 and Opponent == -1) Returns: A number (float) representing the best move possible for the player. """ if self._game.is_terminal(node) or depth == self._depth: return self._eval_cls.compute_heuristic(node.state, depth) if player == 1: best_val = -math.inf # generate children and recursively apply the alpha beta search to each child for child in self._game.generate_moves(node.state, node.get_board_num(), player): ret_val = self.__alpha_beta(child, depth + 1, alpha, beta, -player) best_val = max(best_val, ret_val) alpha = max(alpha, best_val) # We only need keep track of the children generated right below the root # so that we can find the best move if depth == 0: node.children.append(child) child.alpha = ret_val # we can prune on this condition if beta <= alpha: return best_val return best_val else: best_val = math.inf for child in self._game.generate_moves(node.state, node.get_board_num(), player): best_val = min( best_val, self.__alpha_beta(child, depth + 1, alpha, beta, -player)) beta = min(beta, best_val) if beta <= alpha: return best_val return best_val
def generate_moves(self, state: np.ndarray, curr_board: int, player: int): """ Generates all possible moves for current player by looking at empty squares as potential moves Player 1 = 1, Player 2 = -1. Arguments: state (numpy array): Numpy array representing current state of the game. curr_board (int): The current board that the next player must be made on. player (int): The current player. """ # create local copy current board in play board = np.frombuffer(self._hash_to_board[state[curr_board]], dtype='i1') # read-only modifiable_board = np.empty_like(board) modifiable_board[:] = board # create a local copy of the global state updated_state = np.empty_like(state) updated_state[:] = state for i in np.where(modifiable_board == 0)[0][1:]: # set board modifiable_board[i] = player # create a copy of global state and pass that down to the child prev_board = updated_state[curr_board] updated_state[curr_board] = self.board_to_hash(modifiable_board) yield GameTreeNode(updated_state, i, parent=curr_board) # reset board modifiable_board[i] = 0 updated_state[curr_board] = prev_board
def test_avoid_loss_in_next_move_5(game_cls, heuristic_func): state = np.array( [ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, -1, 0, 1, 0, 0, 0, 1, 0, -1], [0, 0, 0, 0, -1, 1, 0, -1, 0, 1], [0, 0, 1, -1, 0, 0, -1, 0, 0, 0], # move made here [0, 0, 0, 0, -1, 1, 0, 0, 1, 0], [0, 1, 0, 0, 0, -1, -1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 1], [0, 0, -1, -1, 1, 0, 0, 0, 0, 0], [0, 1, -1, 0, 0, 0, 0, 0, -1, 0], [0, -1, 1, 1, 0, 0, 0, 0, 0, -1] ], dtype='i1') parameterized_state = np.array([game_cls.board_to_hash(b) for b in state]) start_node = GameTreeNode(parameterized_state, 3) m = AlphaBeta(start_node, game_cls, heuristic_func, 5) best_move = m.run() # print_depth_1_nodes(start_node, best_move, m.nodes_generated) assert best_move == 4
def test_generate_best_move_opponent_depth_2(game_cls, heuristic_func): state = np.array( [ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, -1, -1, 0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, -1, 0, 0, 0, 0], [0, 0, 0, -1, 1, -1, -1, 0, 1, 1], [0, 0, 1, 0, 0, 0, 0, -1, -1, 0], # board that we make a move on [0, 1, 0, 1, 0, -1, 0, 1, -1, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [0, -1, 0, 0, -1, 1, 0, 0, 0, 0], [0, 1, 0, 1, -1, 0, 0, 0, 0, 0], [0, 0, 0, -1, 0, 1, 0, 1, 0, -1] ], dtype='i1') parameterized_state = np.array([game_cls.board_to_hash(b) for b in state]) start_node = GameTreeNode(parameterized_state, 4) m = AlphaBeta(start_node, game_cls, heuristic_func, 5) best_move = m.run() # print_depth_1_nodes(start_node, best_move, m.nodes_generated) assert best_move == 6
def test_avoid_losing_in_next_turn_4(game_cls, heuristic_func): """ Checks to see that negamax avoids allowing the opponent to win in the next turn """ state = np.array( [ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, -1, -1, 0, 1, 0, -1, 0, 0], [0, 0, 0, 1, -1, 0, -1, 1, 0, 0], [0, 0, 1, 1, 0, -1, 0, 0, 0, -1], [0, 0, -1, 0, 1, 0, 0, 0, -1, 0 ], # the board we are need to make a move on [0, -1, 0, 0, 0, 1, 1, -1, 0, -1], [0, -1, 1, 0, 0, 0, 0, -1, 1, 0], [0, 1, 0, 0, 0, -1, 1, 0, -1, 1], [0, 0, 0, 0, -1, 1, 0, 0, 0, 1], [0, 1, 0, -1, 1, 0, -1, 0, 0, 0] ], dtype='i1') parameterized_state = np.array([game_cls.board_to_hash(b) for b in state]) start_node = GameTreeNode(parameterized_state, 4) m = AlphaBeta(start_node, game_cls, heuristic_func, 5) best_move = m.run() # print_depth_1_nodes(start_node, best_move, m.nodes_generated) assert best_move != 6
def test_negamax_avoid_loss_in_next_turn_1(game_cls, heuristic_func): """ Checks to see that negamax avoids allowing the opponent to win in the next turn """ state = np.array( [ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, -1, -1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], # The board we must make a move on [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, -1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] ], dtype='i1') parameterized_state = np.array([game_cls.board_to_hash(b) for b in state]) start_node = GameTreeNode(parameterized_state, 3) m = AlphaBeta(start_node, game_cls, heuristic_func, 5) best_move = m.run() try: assert best_move != 1 except AssertionError: print_depth_1_nodes(start_node, best_move, m.nodes_generated) raise
def test_not_win_state(game_cls: Game): """ Checks that an almost empty board has no terminal nodes """ ALMOST_BOARD = np.array( [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, -1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype='i1') parameterized_board = np.array( [game_cls.board_to_hash(b) for b in ALMOST_BOARD]) node = GameTreeNode(parameterized_board, 4) assert game_cls.is_terminal(node) is False
def play(self): """ Choose a move to play """ self._number_moves_made += 1 # update game statistics # convert global board into an array of hash values parameterized_state = np.array([self._game.board_to_hash(b) for b in self._boards]) # create new GameTeeNode with root state node = GameTreeNode(parameterized_state, self._curr) # Run alpha beta search at depth 7 n = AlphaBeta(node, self._game, self._heuristic, 7).run() # Place the next move n self.place(self._curr, n, self._player) return n
def filled_board_state(game_cls: Game): parameterized_state = np.array( [game_cls.board_to_hash(b) for b in FILLED_BOARD]) return GameTreeNode(parameterized_state, 4)
def initial_board_state(): """ Creates an instance of GameTreeNode that contains INTITIAL_BOARD as its state. """ n = GameTreeNode(INITIAL_BOARD, 5) return n
def multiple_wins_state_node(): return GameTreeNode(BOARD_WITH_MULTIPLE_WINS, 1)
def partial_board_state_node(): return GameTreeNode(PARTIAL_BOARD, 5)
def full_board_state_node(): return GameTreeNode(FULL_BOARD, 1)
def initial_board_state_node(game_cls): parameterized_board = np.array( [game_cls.board_to_hash(b) for b in INITIAL_BOARD]) return GameTreeNode(parameterized_board, 5)
def almost_full_board(): return GameTreeNode(ALMOST_FULL, 1)
def partial_board(): return GameTreeNode(PARTIAL_BOARD, 8)