コード例 #1
0
def generate_move_alpha_beta(
    board: Board, player: BoardPiece, saved_state: Optional[SavedState]
) -> Tuple[PlayerAction, Optional[SavedState]]:
    """
    Agent selects a move based on a minimax depth first search, with
    alpha-beta pruning.

    :param board: 2d array representing current state of the game
    :param player: the player who made the last move (active player)
    :param saved_state: ???

    :return: the agent's selected move
    """

    # If the board is empty, play in the center column
    if np.all(board == NO_PLAYER):
        action = np.floor(np.median(np.arange(board.shape[1])))
        return PlayerAction(action), saved_state

    board_cp = board.copy()
    # Call alpha_beta
    alpha0 = -100000
    beta0 = 100000
    score, action = alpha_beta(board_cp, player, True, 0, alpha0, beta0)

    return PlayerAction(action), saved_state
コード例 #2
0
def generate_move_alpha_beta(
    board: Board, player: BoardPiece, saved_state: Optional[SavedState]
) -> Tuple[PlayerAction, Optional[SavedState]]:
    """
    Agent selects a move based on a minimax depth first search, with
    alpha-beta pruning.


    """

    # If the board is empty, play in the center column
    if np.all(board == NO_PLAYER):
        action = np.floor(np.median(np.arange(board.shape[1])))
        return PlayerAction(action), saved_state

    board_cp = board.copy()
    # Set the max depth to search
    # max_depth = 0
    # Call alpha_beta after initializing the alpha and beta values (+/- 'inf')
    a0 = -100000
    b0 = 100000
    score, action = alpha_beta(board_cp, player, True, 0, a0, b0)
    # score, action = alpha_beta_root(board_cp, player, True, max_depth, a0, b0)

    return PlayerAction(action), saved_state
コード例 #3
0
def user_move(board: Board, _player: BoardPiece,
              saved_state: Optional[SavedState]):
    """ Prompts the human player to select a column to play in

    :param board: 2d array representing current state of the game
    :param _player: the player making the current move (active player)
    :param saved_state: ???

    :return: returns the chosen action and the saved state
    """

    # Initialize the action as a negative and cast as type PlayerAction
    action = PlayerAction(-1)

    # While action is not a valid move
    while not (0 <= action < board.shape[1]):
        try:
            # Human player input action
            action = input("Select column to play (0-6): ")
            # If no input is entered, raise IndexError
            if not action:
                raise IndexError
            # Cast as type PlayerAction
            action = PlayerAction(action)
            # Test whether the column is full. If it is, top_row will throw
            # an IndexError
            top_row(board, action)
        except IndexError:
            print('This is not a valid action. Please choose again.')
            action = -1
            continue

    return action, saved_state
コード例 #4
0
def minimax(board: Board, player: BoardPiece, max_player: bool,
            depth: int) -> Tuple[GameScore, Optional[PlayerAction]]:
    """

    """
    # Make a list of columns that can be played in
    potential_actions = np.argwhere(board[-1, :] == 0)
    potential_actions = potential_actions.reshape(potential_actions.size)

    # If the node is at the max depth, a terminal node, or is the root node
    # return the heursitic score of the node
    max_depth = 4
    # if depth == 0 or np.all(board != 0):
    if depth == max_depth or np.all(board != 0):
        return heuristic_solver(board, player, max_player), None
        # return heuristic_solver_bits(board, player, max_player), None

    # For each potential action, call minimax
    if max_player:
        score = -np.inf
        for col in potential_actions:
            # Add a shortcut for blocking wins
            if (depth == 0 and connect_four(apply_player_action(board, col,
                                                                BoardPiece(player % 2 + 1), True),
                                            BoardPiece(player % 2 + 1), col)):
                return GameScore(100), PlayerAction(col)

            # Apply the current action and call alpha_beta
            new_board = apply_player_action(board, col, player, True)
            new_score, temp = minimax(new_board, BoardPiece(player % 2 + 1),
                                      False, depth + 1)
            new_score -= 5 * depth
            # Check whether the score updates
            if new_score > score:
                score = new_score
                action = col
        return GameScore(score), PlayerAction(action)
    else:
        score = np.inf
        for col in potential_actions:
            # Apply the current action and call alpha_beta
            new_board = apply_player_action(board, col, player, True)
            new_score, temp = minimax(new_board, BoardPiece(player % 2 + 1),
                                      True, depth + 1)
            new_score += 5 * depth
            # Check whether the score updates
            if new_score < score:
                score = new_score
                action = col
        return GameScore(score), PlayerAction(action)
コード例 #5
0
def generate_move_minimax(board: Board, player: BoardPiece,
                          saved_state: Optional[SavedState]
                          ) -> Tuple[PlayerAction, Optional[SavedState]]:
    """
    Agent selects a move based on a minimax depth first search


    """

    # If the board is empty, play in the center column
    if np.all(board == NO_PLAYER):
        action = np.floor(np.median(np.arange(board.shape[1])))
        return PlayerAction(action), saved_state

    board_cp = board.copy()
    # Call minimax
    score, action = minimax(board_cp, player, True, 0)

    return PlayerAction(action), saved_state
コード例 #6
0
ファイル: random.py プロジェクト: marshineer/PCP2020
def generate_move_random(board: Board, player: BoardPiece,
                         saved_state: Optional[SavedState]
                         ) -> Tuple[PlayerAction, Optional[SavedState]]:
    """ Choose a valid, non-full column randomly and return it as action

    :param board: 2d array representing current state of the game
    :param player: the player making the current move (active player)
    :param saved_state: ???
    :return: returns a tuple containing the randomly generated move, and the
             saved state
    """

    free_cols = np.arange(board.shape[1])[np.argwhere(board[-1, :] == 0)]
    free_cols = free_cols.reshape(len(free_cols))
    action = PlayerAction(np.random.choice(free_cols))

    return action, saved_state
コード例 #7
0
def alpha_beta(board: Board, player: BoardPiece, max_player: bool, depth: int,
               alpha: GameScore,
               beta: GameScore) -> Tuple[GameScore, Optional[PlayerAction]]:
    """
    Recursively call alpha_beta to build a game tree to a pre-determined
    max depth. Once at the max depth, or at a terminal node, calculate and
    return the heuristic score. Scores farther down the tree are penalized.

    Shortcuts are built in to:
    1. Automatically take a win
    2. Automatically block a loss
    3. Return a large score for a win at any depth

    :param board: 2d array representing current state of the game
    :param player: the player who made the last move (active player)
    :param max_player: boolean indicating whether the depth at which alpha_beta
                       is called from is a maximizing or minimizing player
    :param depth: the current depth in the game tree
    :param alpha: the currently best score for the maximizing player along the
                  path to root
    :param beta: the currently best score for the minimizing player along the
                  path to root

    :return: the best action and the associated score
    """
    # Make a list of columns that can be played in
    potential_actions = np.argwhere(board[-1, :] == 0)
    potential_actions = potential_actions.reshape(potential_actions.size)

    # If the node is at the max depth or a terminal node calculate the score
    max_depth = 4
    win_score = 150
    state_p = check_end_state(board, player)
    # state_np = check_end_state(board, BoardPiece(player % 2 + 1))
    if state_p == GameState.IS_WIN:
        if max_player:
            return GameScore(win_score), None
        else:
            return GameScore(-win_score), None
    # elif state_np == GameState.IS_WIN:
    #     if max_player:
    #         return GameScore(-win_score), None
    #     else:
    #         return GameScore(win_score), None
    elif state_p == GameState.IS_DRAW:
        return 0, None
    elif depth == max_depth:
        return heuristic_solver(board, player, max_player), None
        # return heuristic_solver_bits(board, player, max_player), None

    # # If this is the root call, check for wins and block/win, prioritize wins
    # win_score = 150
    # if depth == 0:
    #     for col in potential_actions:
    #         if connect_four(apply_player_action(board, col, player, True),
    #                         player, col):
    #             return GameScore(win_score), PlayerAction(col)
    #     for col in potential_actions:
    #         if connect_four(apply_player_action(board, col,
    #                         BoardPiece(player % 2 + 1), True),
    #                         BoardPiece(player % 2 + 1), col):
    #             return GameScore(win_score), PlayerAction(col)

    # For each potential action, call alpha_beta
    if max_player:
        # score = -np.inf
        score = -100000
        for col in potential_actions:
            # Apply the current action and call alpha_beta
            new_board = apply_player_action(board, col, player, True)
            new_score, temp = alpha_beta(new_board, BoardPiece(player % 2 + 1),
                                         False, depth + 1, alpha, beta)
            new_score -= 5 * depth
            # Check whether the score updates
            if new_score > score:
                score = new_score
                action = col
            # Check whether we can prune the rest of the branch
            if score >= beta:
                # print('Pruned a branch')
                break
            # Check whether alpha updates the score
            if score > alpha:
                alpha = score
        return GameScore(score), PlayerAction(action)
    else:
        # score = np.inf
        score = 100000
        for col in potential_actions:
            # Apply the current action and call alpha_beta
            new_board = apply_player_action(board, col, player, True)
            new_score, temp = alpha_beta(new_board, BoardPiece(player % 2 + 1),
                                         True, depth + 1, alpha, beta)
            new_score += 5 * depth
            # Check whether the score updates
            if new_score < score:
                score = new_score
                action = col
            # Check whether we can prune the rest of the branch
            if score <= alpha:
                # print('Pruned a branch')
                break
            # Check whether alpha updates the score
            if score < beta:
                beta = score
        return GameScore(score), PlayerAction(action)
コード例 #8
0
def alpha_beta(
    board: Board,
    player: BoardPiece,
    max_player: bool,
    depth: int,
    alpha: GameScore,
    beta: GameScore,
) -> Tuple[GameScore, Optional[PlayerAction]]:
    """

    """
    # Make a list of columns that can be played in
    potential_actions = np.argwhere(board[-1, :] == 0)
    potential_actions = potential_actions.reshape(potential_actions.size)

    # If the node is at the max depth or a terminal node calculate the score
    max_depth = 6
    win_score = 150
    state_p = check_end_state(board, player)
    state_np = check_end_state(board, BoardPiece(player % 2 + 1))
    # if depth == max_depth or np.all(board != 0):
    #     return heuristic_solver(board, player, max_player), None
    #     # return heuristic_solver_bits(board, player, max_player), None
    if state_p == GameState.IS_WIN:
        if max_player:
            return GameScore(win_score), None
        else:
            return GameScore(-win_score), None
    elif state_np == GameState.IS_WIN:
        if max_player:
            return GameScore(-win_score), None
        else:
            return GameScore(win_score), None
    elif depth == max_depth:
        return heuristic_solver(board, player, max_player), None
        # return heuristic_solver_bits(board, player, max_player), None
    elif state_p == GameState.IS_DRAW:
        return 0, None

    # # If this is the root call, check for wins and block/win, prioritize wins
    # win_score = 150
    # if depth == 0:
    #     for col in potential_actions:
    #         if connect_four(apply_player_action(board, col, player, True),
    #                         player, col):
    #             return GameScore(win_score), PlayerAction(col)
    #     for col in potential_actions:
    #         if connect_four(apply_player_action(board, col,
    #                                             BoardPiece(player % 2 + 1), True),
    #                         BoardPiece(player % 2 + 1), col):
    #             return GameScore(win_score), PlayerAction(col)

    # For each potential action, call alpha_beta
    if max_player:
        score = -100000
        for col in potential_actions:
            # Apply the current action and call alpha_beta
            new_board = apply_player_action(board, col, player, True)
            new_score, temp = alpha_beta(new_board, BoardPiece(player % 2 + 1),
                                         False, depth + 1, alpha, beta)
            new_score -= 5 * depth
            # Check whether the score updates
            if new_score > score:
                score = new_score
                action = col
            # Check whether we can prune the rest of the branch
            if score >= beta:
                # print('Pruned a branch')
                break
            # Check whether alpha updates the score
            if score > alpha:
                alpha = score
        return GameScore(score), PlayerAction(action)
    else:
        score = 100000
        for col in potential_actions:
            # Apply the current action and call alpha_beta
            new_board = apply_player_action(board, col, player, True)
            new_score, temp = alpha_beta(new_board, BoardPiece(player % 2 + 1),
                                         True, depth + 1, alpha, beta)
            new_score += 5 * depth
            # Check whether the score updates
            if new_score < score:
                score = new_score
                action = col
            # Check whether we can prune the rest of the branch
            if score <= alpha:
                # print('Pruned a branch')
                break
            # Check whether alpha updates the score
            if score < beta:
                beta = score
        return GameScore(score), PlayerAction(action)
コード例 #9
0
def alpha_beta(board: Board,
               player: BoardPiece,
               max_player: bool,
               depth: int,
               alpha: GameScore,
               beta: GameScore,
               init: bool = False) -> Tuple[GameScore, Optional[PlayerAction]]:
    """

    """
    # Make a list of columns that can be played in
    potential_actions = np.argwhere(board[-1, :] == 0)
    potential_actions = potential_actions.reshape(potential_actions.size)

    # If the node is at the max depth or a terminal node calcaulte the score
    if depth == 0 or np.all(board != 0):
        # return heuristic_solver(board, player, max_player), None
        return heuristic_solver_bits(board, player, max_player), None

    # If this is the root call, check for wins and block/win
    if init:
        for col in potential_actions:
            if connect_four(
                    apply_player_action(board, col,
                                        BoardPiece(player % 2 + 1), True),
                    BoardPiece(player % 2 + 1), col):
                return GameScore(100), PlayerAction(col)
            elif connect_four(apply_player_action(board, col, player, True),
                              player, col):
                return GameScore(100), PlayerAction(col)

    # For each potential action, call alpha_beta
    if max_player:
        score = -np.inf
        for col in potential_actions:
            # Apply the current action and call alpha_beta
            new_board = apply_player_action(board, col, player, True)
            new_score, temp = alpha_beta(new_board, BoardPiece(player % 2 + 1),
                                         False, depth - 1, alpha, beta)
            # TODO: figure out how to fix this rule with depth counting backwards
            new_score -= 5 * (6 - depth)
            # Check whether the score updates
            if new_score > score:
                score = new_score
                action = col
            # Check whether we can prune the rest of the branch
            if score >= beta:
                # print('Pruned a branch')
                break
            # Check whether alpha updates the score
            if score > alpha:
                alpha = score
        return GameScore(score), PlayerAction(action)
    else:
        score = np.inf
        for col in potential_actions:
            # Apply the current action and call alpha_beta
            new_board = apply_player_action(board, col, player, True)
            new_score, temp = alpha_beta(new_board, BoardPiece(player % 2 + 1),
                                         True, depth - 1, alpha, beta)
            # TODO: figure out how to fix this rule with depth counting backwards
            new_score += 5 * (6 - depth)
            # Check whether the score updates
            if new_score < score:
                score = new_score
                action = col
            # Check whether we can prune the rest of the branch
            if score <= alpha:
                # print('Pruned a branch')
                break
            # Check whether alpha updates the score
            if score < beta:
                beta = score
        return GameScore(score), PlayerAction(action)