예제 #1
0
def generate_move_minimax(
    board: Board, player: Player, saved_state: Optional[SavedState]
) -> Tuple[PlayerAction, Optional[SavedState]]:
    """
    Generates a move for player on the current board.
    Optionally consumes and updates a saved state.
    :param board: The board state
    :param player: The player to generate a move for
    :param saved_state: (optional) Saved state
    :return: Returns the calculated move and the new saved state
    """

    # first, check if a winning move is possible. This can fix a problem where MiniMax
    # would not finish the game since all moves will have
    for move in board.actions():
        nb = board.drop_piece_copy(move)
        if nb.check_winner(player):
            return move, saved_state

    # retrieve our MiniMaxCache from saved state if possible, otherwise create the cache object
    cache = saved_state if type(
        saved_state) == MiniMaxCache2 else MiniMaxCache2()

    # calculate the MiniMax result
    minimax_result = mini_max(board,
                              alpha=-np.inf,
                              beta=np.inf,
                              depth=3,
                              max_player=player,
                              cache=cache)

    print(
        f"generate_move_minimax() returned move {minimax_result.best_move}, h={minimax_result.value}"
    )
    return minimax_result.best_move, cache
예제 #2
0
def generate_move_random(
    board: Board, player: Player, saved_state: Optional[SavedState]
) -> Tuple[PlayerAction, Optional[SavedState]]:
    """
    Generates a random move for a given board state.
    :param board: The board to generate the move for
    :param player: The player to generate a move for
    :param saved_state: The saved state (not used here)
    :return: The randomly generated move and the passed saved state
    """

    print(f"I'm the random agent. Playing as {board.player}")

    available_columns = board.actions()

    action = PlayerAction(rnd.choice(available_columns))
    print(f"Choosing to play {action}!")

    return action, saved_state
예제 #3
0
def mini_max(board: Board, alpha: float, beta: float, depth: int, max_player: Player, cache: MiniMaxCache2) \
        -> MiniMaxResult:
    """
    Execute the MiniMax algorithm with alpha-beta-pruning on a board.
    :param board: The board
    :param alpha: Lower boundary
    :param beta: Upper boundary
    :param depth: Maximum depth
    :param max_player: The maximizing player ID
    :param cache: The MiniMax cache to use
    :return: Returns the best possible result after traversing the tree with the given depth.
    """

    # first, let's see if we already cached this value
    cached_value = cache.get(board, max_player)
    if cached_value is not None:
        return cached_value

    # If we hit max depth or the board is in a final state, calculate and return the heuristic.
    # We cannot decide about the best move here, so leave it empty.
    if depth == 0 or board.get_state() != GameState.ONGOING:
        return MiniMaxResult(None,
                             calculate_heuristic(board, max_player).value)

    # collect a list of available moves from the board and order them according to moveOrder
    # which orders moves based on their distance to the middle column
    available_moves = board.actions()
    available_moves = [move for move in moveOrder if move in available_moves]

    # check if we are maximizing right now
    is_maximizing = (board.player == max_player)

    # store min and max value, initialize with boundaries
    max_value, min_value = alpha, beta
    # initialize the best move randomly
    best_move: Column = random.choice(available_moves)

    # enumerate all available moves. Recurse for each while respecting is_maximizing and adjusting
    # min_value and max_value accordingly.
    for move in available_moves:
        # create a board with the new state
        child_board = board.drop_piece_copy(move)

        if is_maximizing:
            min_result = mini_max(child_board, max_value, beta, depth - 1,
                                  max_player, cache)
            if min_result == np.inf:
                # we cannot perform better than +inf.
                return MiniMaxResult(move, np.inf)
            elif min_result.value > max_value:
                max_value = min_result.value
                best_move = move
                if max_value >= beta:
                    # beta pruning
                    break
        else:
            max_result = mini_max(child_board, alpha, min_value, depth - 1,
                                  max_player, cache)
            if max_result.value == -np.inf:
                # we cannot perform better than -inf.
                return MiniMaxResult(move, -np.inf)
            elif max_result.value < min_value:
                min_value = max_result.value
                best_move = move
                if min_value <= alpha:
                    # alpha pruning
                    break

    # return the best move and according heuristic
    return MiniMaxResult(best_move, max_value if is_maximizing else min_value)