def negamax_alpha_beta(board: np.ndarray, player: BoardPiece, depth: int, alpha: float, beta: float) -> float: """ Search game tree using alpha-beta pruning with negamax. :param board: current board state :param player: current player :param depth: max depth to search in game tree :param alpha: alpha value for pruning :param beta: beta value for pruning :return: """ # if we're at an end state, if (depth == 0) or check_game_over(board): return evaluate_end_state(board, player) # otherwise loop over child nodes other_player = BoardPiece(player % 2 + 1) value = -np.inf for move in get_valid_moves(board): value = max( value, -negamax_alpha_beta( apply_player_action(board, move, player, copy=True), other_player, depth - 1, -beta, -alpha)) alpha = max(alpha, value) if alpha >= beta: break # print(f'value:{value}') # print(f'depth = {depth}; end state = {check_game_over(board)}; player = {player}') # print(f'move:{move}; max value:{value}') return value
def negamax( board: np.ndarray, player: BoardPiece, depth: int, ) -> float: """ Search game tree using plain negamax. This is "colorless" negamax -- it assumes the heuristic value is from the perspective of the player its called on :param board: current board state :param player: current player :param depth: max depth to search in game tree :return: """ # if we're at an end state, if (depth == 0) or check_game_over(board): return evaluate_end_state(board, player) # otherwise loop over child nodes other_player = BoardPiece(player % 2 + 1) value = -np.inf for move in get_valid_moves(board): value = max( value, -negamax(apply_player_action(board, move, player, copy=True), other_player, depth - 1)) # print(f'value:{value}') # print(f'depth = {depth}; end state = {check_game_over(board)}; player = {player}') # print(f'move:{move}; max value:{value}') return value
def test_get_valid_moves(): from agents.common import get_valid_moves from agents.common import initialize_game_state dummy_board = initialize_game_state() all_moves = np.arange(dummy_board.shape[1]) assert np.all(get_valid_moves(dummy_board) == all_moves)
def unexpanded_moves(self) -> list: """ Return which moves have not been expanded yet. :return: list of unexpanded moves (it's a list so we can pop it, later) """ # return [m for m in self.legal_moves if m not in self.expanded_moves] if self._unexpanded_moves is None: self._unexpanded_moves = list(get_valid_moves(self.board)) else: return self._unexpanded_moves
def __init__(self, board: np.ndarray, to_play: BoardPiece, last_move: PlayerAction = None, parent=None): # board self.board = board # parent self.to_play = to_play # which player's turn it is self.last_move = last_move # what move resulted in the board self.parent = parent # parent node -- the previous state # children methods self.children = {} # dict of children resulting from valid moves self.legal_moves = list(get_valid_moves(board)) self._unexpanded_moves = list(get_valid_moves(board)) # moves not evaluated yet # self._unexpanded_moves = None # moves not evaluated yet self.expanded_moves = [] # MCTS methods self.n_plays = 0 self.n_wins = 0
def generate_move_random( board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState] ) -> Tuple[PlayerAction, Optional[SavedState]]: """ Choose a valid, non-full column randomly and return it as `action` :param board: :param player: :param saved_state: :return: """ open_moves = get_valid_moves(board) action = np.random.choice(open_moves) # TODO: what to do with saved_state? return action, saved_state
def minimax_value(board: np.ndarray, player: BoardPiece, maxing: bool, depth: int) -> float: """ :param board: :param player: :param maxing: :param depth: :return: """ other_player = BoardPiece(player % 2 + 1) valid_moves = get_valid_moves(board) value = 0 if depth == 0 or check_game_over(board): return evaluate_end_state(board, player) elif maxing is True: value = -np.inf for _, move in enumerate(valid_moves): # print('Maxing') # print('move:', move) MMv = minimax_value(board=apply_player_action(board, move, player, copy=True), player=player, maxing=False, depth=depth - 1) # print('MM value:', MMv) value = max(value, MMv) else: value = np.inf for _, move in enumerate(valid_moves): # print('Mining') # print('move:', move) MMv = minimax_value(board=apply_player_action(board, move, player, copy=True), player=player, maxing=True, depth=depth - 1) # print('MM value:', MMv) value = min(value, MMv) return value
def simulate(self, node: MonteCarloNode) -> Union[BoardPiece, GameState]: """ Simulate a game from a given node -- outcome is either player or GameState.IS_DRAW :param node: :return: """ current_rollout_state = node.board.copy() curr_player = node.to_play while not check_game_over(current_rollout_state): possible_moves = get_valid_moves(current_rollout_state) if possible_moves.size > 1: action = np.random.choice(list(possible_moves)) else: action = possible_moves current_rollout_state = apply_player_action(current_rollout_state, action, curr_player, copy=True) curr_player = BoardPiece(curr_player % 2 + 1) return evaluate_end_state(current_rollout_state)
def alpha_beta_value(board: np.ndarray, player: BoardPiece, maxing: bool, depth: int, alpha, beta) -> float: other_player = BoardPiece(player % 2 + 1) valid_moves = get_valid_moves(board) if depth == 0 or check_game_over(board): return evaluate_end_state(board, player) elif maxing is True: value = -np.inf for _, move in enumerate(valid_moves): ABv = alpha_beta_value(board=apply_player_action(board, move, player, copy=True), player=player, maxing=False, depth=depth - 1, alpha=alpha, beta=beta) value = max(value, ABv) alpha = max(alpha, value) if alpha >= beta: break return value else: value = np.inf for _, move in enumerate(valid_moves): ABv = alpha_beta_value(board=apply_player_action(board, move, player, copy=True), player=player, maxing=True, depth=depth - 1, alpha=alpha, beta=beta) value = min(value, ABv) beta = min(beta, value) if beta <= alpha: break return value
def test_best_play(): # check that best plays are max n_plays tree = MonteCarlo(player) tree.make_node(initial_state, player) key = hash(initial_state.tostring()) + hash(player) root = tree.nodes[key] tree.run_search(root.board, root.to_play, n_sims=5000) # check that best move is the max of n_plays of children scores = [root.get_child(a).n_plays for a in root.legal_moves] assert tree.best_play( root.board, root.to_play)[0] == root.legal_moves[np.argmax(scores)] # check that winning moves are selected for c in get_valid_moves(initial_state): near_win = copy.deepcopy(initial_state) near_win[:3, c] = player # print(near_win) tree = MonteCarlo(player) tree.make_node(near_win, player) tree.run_search(near_win, player, n_sims=1000) # print(tree.get_stats(near_win, player)) assert tree.best_play(near_win, player)[0] == c
def generate_move_minimax( board: np.ndarray, player: BoardPiece, saved_state: Optional[SavedState] ) -> Tuple[PlayerAction, Optional[SavedState]]: """ :param board: :param player: :param saved_state: :return: """ open_moves = get_valid_moves(board) print(f'Open moves: {open_moves}') new_states = [ apply_player_action(board, move, player, copy=True) for move in open_moves ] # if a move results in a win, play it winning_moves = np.array([ check_end_state(state, player) for state in new_states ]) == GameState.IS_WIN if np.any(winning_moves): actions = open_moves[np.argwhere(winning_moves)].squeeze() if actions.size > 1: action = np.random.choice(actions) else: action = actions print(f'playing action {action} for a win') return action, saved_state # if a move results in blocking an opponent's win, play it other_player = BoardPiece(player % 2 + 1) new_states_other = [ apply_player_action(board, move, other_player, copy=True) for move in open_moves ] blocking_moves = np.array([ check_end_state(state, other_player) for state in new_states_other ]) == GameState.IS_WIN if np.any(blocking_moves): actions = open_moves[np.argwhere(blocking_moves)].squeeze() if actions.size > 1: action = np.random.choice(actions) else: action = actions print(f'playing action {action} for a block') return action, saved_state # otherwise, use the heuristic function to score possible states # scores = [minimax_value(apply_player_action(board, move, player, copy=True), player, True, MAX_DEPTH) for move in open_moves] scores = [ alpha_beta_value(apply_player_action(board, move, player, copy=True), player, True, MAX_DEPTH, alpha=-np.inf, beta=np.inf) for move in open_moves ] # randomly select among best moves if np.sum(scores == np.max(scores)) > 1: best_moves = open_moves[np.argwhere( scores == np.max(scores))].squeeze() action = np.random.choice(best_moves) else: action = open_moves[np.argmax(scores)].squeeze() print(f'Heuristic values: {scores}') print(f'playing action {action} with heuristic value {np.max(scores)}') return action, saved_state
def test_init_node(): assert np.all(initial_node.legal_moves == get_valid_moves(initial_state)) assert np.all(initial_node.legal_moves == initial_node.unexpanded_moves) assert np.all(initial_node.board == initial_state)