Esempio n. 1
0
def negamax_alpha_beta(board: np.ndarray, player: BoardPiece, depth: int,
                       alpha: float, beta: float) -> float:
    """
    Search game tree using alpha-beta pruning with negamax.

    :param board: current board state
    :param player: current player
    :param depth: max depth to search in game tree
    :param alpha: alpha value for pruning
    :param beta: beta value for pruning
    :return:
    """

    # if we're at an end state,
    if (depth == 0) or check_game_over(board):
        return evaluate_end_state(board, player)

    # otherwise loop over child nodes
    other_player = BoardPiece(player % 2 + 1)
    value = -np.inf
    for move in get_valid_moves(board):
        value = max(
            value, -negamax_alpha_beta(
                apply_player_action(board, move, player, copy=True),
                other_player, depth - 1, -beta, -alpha))
        alpha = max(alpha, value)
        if alpha >= beta:
            break
    # print(f'value:{value}')
    # print(f'depth = {depth}; end state = {check_game_over(board)}; player = {player}')
    # print(f'move:{move}; max value:{value}')
    return value
Esempio n. 2
0
def negamax(
    board: np.ndarray,
    player: BoardPiece,
    depth: int,
) -> float:
    """
    Search game tree using plain negamax.
    This is "colorless" negamax -- it assumes the heuristic value is from the perspective of the player its called on

    :param board: current board state
    :param player: current player
    :param depth: max depth to search in game tree
    :return:
    """
    # if we're at an end state,
    if (depth == 0) or check_game_over(board):
        return evaluate_end_state(board, player)

    # otherwise loop over child nodes
    other_player = BoardPiece(player % 2 + 1)
    value = -np.inf
    for move in get_valid_moves(board):
        value = max(
            value,
            -negamax(apply_player_action(board, move, player, copy=True),
                     other_player, depth - 1))
    # print(f'value:{value}')
    # print(f'depth = {depth}; end state = {check_game_over(board)}; player = {player}')
    # print(f'move:{move}; max value:{value}')
    return value
Esempio n. 3
0
def test_check_game_over():
    from agents.common import check_game_over
    from agents.common import initialize_game_state

    dummy_board = initialize_game_state()
    player = PLAYER1
    assert check_game_over(dummy_board) is False

    horizontal_win_player1 = dummy_board.copy()
    horizontal_win_player1[0, 0:4] = PLAYER1
    assert check_game_over(horizontal_win_player1) is True

    # check a vertical win
    vertical_win_player1 = dummy_board.copy()
    vertical_win_player1[0:4, 0] = PLAYER1
    assert check_game_over(vertical_win_player1) is True

    # check a diagonal win
    diagonal_win_player1 = dummy_board.copy()
    for i in range(4):
        diagonal_win_player1[i, i] = PLAYER1
    assert check_game_over(diagonal_win_player1) is True
Esempio n. 4
0
def minimax_value(board: np.ndarray, player: BoardPiece, maxing: bool,
                  depth: int) -> float:
    """

    :param board:
    :param player:
    :param maxing:
    :param depth:
    :return:
    """
    other_player = BoardPiece(player % 2 + 1)
    valid_moves = get_valid_moves(board)
    value = 0

    if depth == 0 or check_game_over(board):
        return evaluate_end_state(board, player)
    elif maxing is True:
        value = -np.inf
        for _, move in enumerate(valid_moves):
            # print('Maxing')
            # print('move:', move)
            MMv = minimax_value(board=apply_player_action(board,
                                                          move,
                                                          player,
                                                          copy=True),
                                player=player,
                                maxing=False,
                                depth=depth - 1)
            # print('MM value:', MMv)
            value = max(value, MMv)
    else:
        value = np.inf
        for _, move in enumerate(valid_moves):
            # print('Mining')
            # print('move:', move)
            MMv = minimax_value(board=apply_player_action(board,
                                                          move,
                                                          player,
                                                          copy=True),
                                player=player,
                                maxing=True,
                                depth=depth - 1)
            # print('MM value:', MMv)
            value = min(value, MMv)
    return value
Esempio n. 5
0
File: mcts.py Progetto: tah0/conn4
    def simulate(self, node: MonteCarloNode) -> Union[BoardPiece, GameState]:
        """
        Simulate a game from a given node -- outcome is either player or GameState.IS_DRAW

        :param node:
        :return:
        """
        current_rollout_state = node.board.copy()
        curr_player = node.to_play
        while not check_game_over(current_rollout_state):
            possible_moves = get_valid_moves(current_rollout_state)
            if possible_moves.size > 1:
                action = np.random.choice(list(possible_moves))
            else:
                action = possible_moves

            current_rollout_state = apply_player_action(current_rollout_state, action, curr_player, copy=True)
            curr_player = BoardPiece(curr_player % 2 + 1)
        return evaluate_end_state(current_rollout_state)
Esempio n. 6
0
File: mcts.py Progetto: tah0/conn4
    def run_search(self, state: np.ndarray, to_play: BoardPiece, n_sims=3000):
        """
        Find an unexpanded, non-terminal node, expand it, simulate a game from it, and backprop the result.

        :param state:
        :param to_play: which player's turn it is
        :param n_sims: iterations to run
        :return:
        """
        self.make_node(state, to_play)

        for _ in range(n_sims):
            node = self.select(state, to_play)
            if check_game_over(node.board) is False:
                node = self.expand(node)
                winner = self.simulate(node)
            else:
                winner = self.simulate(node)
            self.backpropagate(node, winner)
Esempio n. 7
0
def alpha_beta_value(board: np.ndarray, player: BoardPiece, maxing: bool,
                     depth: int, alpha, beta) -> float:
    other_player = BoardPiece(player % 2 + 1)
    valid_moves = get_valid_moves(board)

    if depth == 0 or check_game_over(board):
        return evaluate_end_state(board, player)
    elif maxing is True:
        value = -np.inf
        for _, move in enumerate(valid_moves):
            ABv = alpha_beta_value(board=apply_player_action(board,
                                                             move,
                                                             player,
                                                             copy=True),
                                   player=player,
                                   maxing=False,
                                   depth=depth - 1,
                                   alpha=alpha,
                                   beta=beta)
            value = max(value, ABv)
            alpha = max(alpha, value)
            if alpha >= beta:
                break
        return value
    else:
        value = np.inf
        for _, move in enumerate(valid_moves):
            ABv = alpha_beta_value(board=apply_player_action(board,
                                                             move,
                                                             player,
                                                             copy=True),
                                   player=player,
                                   maxing=True,
                                   depth=depth - 1,
                                   alpha=alpha,
                                   beta=beta)
            value = min(value, ABv)
            beta = min(beta, value)
            if beta <= alpha:
                break
        return value
Esempio n. 8
0
File: mcts.py Progetto: tah0/conn4
 def is_game_over(self):
     """
     Check if this node is a terminal game state (including draws, wins).
     :return: Bool
     """
     return check_game_over(self.board)