def _compute_num_cnots(U):
    r"""Compute the number of CNOTs required to implement a U in SU(4). This is based on
    the trace of

    .. math::

        \gamma(U) = (E^\dag U E) (E^\dag U E)^T,

    and follows the arguments of this paper: https://arxiv.org/abs/quant-ph/0308045.
    """
    u = math.dot(Edag, math.dot(U, E))
    gammaU = math.dot(u, math.T(u))
    trace = math.trace(gammaU)

    # Case: 0 CNOTs (tensor product), the trace is +/- 4
    # We need a tolerance of around 1e-7 here in order to work with the case where U
    # is specified with 8 decimal places.
    if math.allclose(trace, 4, atol=1e-7) or math.allclose(
            trace, -4, atol=1e-7):
        return 0

    # To distinguish between 1/2 CNOT cases, we need to look at the eigenvalues
    evs = math.linalg.eigvals(gammaU)

    sorted_evs = math.sort(math.imag(evs))

    # Case: 1 CNOT, the trace is 0, and the eigenvalues of gammaU are [-1j, -1j, 1j, 1j]
    # Checking the eigenvalues is needed because of some special 2-CNOT cases that yield
    # a trace 0.
    if math.allclose(trace, 0j, atol=1e-7) and math.allclose(
            sorted_evs, [-1, -1, 1, 1]):
        return 1

    # Case: 2 CNOTs, the trace has only a real part (or is 0)
    if math.allclose(math.imag(trace), 0.0, atol=1e-7):
        return 2

    # For the case with 3 CNOTs, the trace is a non-zero complex number
    # with both real and imaginary parts.
    return 3
def _decomposition_3_cnots(U, wires):
    r"""The most general form of this decomposition is U = (A \otimes B) V (C \otimes D),
    where V is as depicted in the circuit below:
     -╭U- = -C--╭X--RZ(d)--╭C---------╭X--A-
     -╰U- = -D--╰C--RY(b)--╰X--RY(a)--╰C--B-
    """

    # First we add a SWAP as per v1 of arXiv:0308033, which helps with some
    # rearranging of gates in the decomposition (it will cancel out the fact
    # that we need to add a SWAP to fix the determinant in another part later).
    swap_U = np.exp(1j * np.pi / 4) * math.dot(math.cast_like(SWAP, U), U)

    # Choose the rotation angles of RZ, RY in the two-qubit decomposition.
    # They are chosen as per Proposition V.1 in quant-ph/0308033 and are based
    # on the phases of the eigenvalues of :math:`E^\dagger \gamma(U) E`, where
    #    \gamma(U) = (E^\dag U E) (E^\dag U E)^T.
    # The rotation angles can be computed as follows (any three eigenvalues can be used)
    u = math.dot(Edag, math.dot(swap_U, E))
    gammaU = math.dot(u, math.T(u))
    evs, _ = math.linalg.eig(gammaU)

    # We will sort the angles so that results are consistent across interfaces.
    angles = math.sort([math.angle(ev) for ev in evs])

    x, y, z = angles[0], angles[1], angles[2]

    # Compute functions of the eigenvalues; there are different options in v1
    # vs. v3 of the paper, I'm not entirely sure why. This is the version from v3.
    alpha = (x + y) / 2
    beta = (x + z) / 2
    delta = (z + y) / 2

    # This is the interior portion of the decomposition circuit
    interior_decomp = [
        qml.CNOT(wires=[wires[1], wires[0]]),
        qml.RZ(delta, wires=wires[0]),
        qml.RY(beta, wires=wires[1]),
        qml.CNOT(wires=wires),
        qml.RY(alpha, wires=wires[1]),
        qml.CNOT(wires=[wires[1], wires[0]]),
    ]

    # We need the matrix representation of this interior part, V, in order to
    # decompose U = (A \otimes B) V (C \otimes D)
    #
    # Looking at the decomposition above, V has determinant -1 (because there
    # are 3 CNOTs, each with determinant -1). The relationship between U and V
    # requires that both are in SU(4), so we add a SWAP after to V. We will see
    # how this gets fixed later.
    #
    # -╭V- = -╭X--RZ(d)--╭C---------╭X--╭SWAP-
    # -╰V- = -╰C--RY(b)--╰X--RY(a)--╰C--╰SWAP-

    RZd = qml.RZ(math.cast_like(delta, 1j), wires=wires[0]).matrix
    RYb = qml.RY(beta, wires=wires[0]).matrix
    RYa = qml.RY(alpha, wires=wires[0]).matrix

    V_mats = [
        CNOT10,
        math.kron(RZd, RYb), CNOT01,
        math.kron(math.eye(2), RYa), CNOT10, SWAP
    ]

    V = math.convert_like(math.eye(4), U)

    for mat in V_mats:
        V = math.dot(math.cast_like(mat, U), V)

    # Now we need to find the four SU(2) operations A, B, C, D
    A, B, C, D = _extract_su2su2_prefactors(swap_U, V)

    # At this point, we have the following:
    # -╭U-╭SWAP- = --C--╭X-RZ(d)-╭C-------╭X-╭SWAP--A
    # -╰U-╰SWAP- = --D--╰C-RZ(b)-╰X-RY(a)-╰C-╰SWAP--B
    #
    # Using the relationship that SWAP(A \otimes B) SWAP = B \otimes A,
    # -╭U-╭SWAP- = --C--╭X-RZ(d)-╭C-------╭X--B--╭SWAP-
    # -╰U-╰SWAP- = --D--╰C-RZ(b)-╰X-RY(a)-╰C--A--╰SWAP-
    #
    # Now the SWAPs cancel, giving us the desired decomposition
    # (up to a global phase).
    # -╭U- = --C--╭X-RZ(d)-╭C-------╭X--B--
    # -╰U- = --D--╰C-RZ(b)-╰X-RY(a)-╰C--A--

    A_ops = zyz_decomposition(A, wires[1])
    B_ops = zyz_decomposition(B, wires[0])
    C_ops = zyz_decomposition(C, wires[0])
    D_ops = zyz_decomposition(D, wires[1])

    # Return the full decomposition
    return C_ops + D_ops + interior_decomp + A_ops + B_ops
def _decomposition_2_cnots(U, wires):
    r"""If 2 CNOTs are required, we can write the circuit as
     -╭U- = -A--╭X--RZ(d)--╭X--C-
     -╰U- = -B--╰C--RX(p)--╰C--D-
    We need to find the angles for the Z and X rotations such that the inner
    part has the same spectrum as U, and then we can recover A, B, C, D.
    """
    # Compute the rotation angles
    u = math.dot(Edag, math.dot(U, E))
    gammaU = math.dot(u, math.T(u))
    evs, _ = math.linalg.eig(gammaU)

    # These choices are based on Proposition III.3 of
    # https://arxiv.org/abs/quant-ph/0308045
    # There is, however, a special case where the circuit has the form
    # -╭U- = -A--╭C--╭X--C-
    # -╰U- = -B--╰X--╰C--D-
    #
    # or some variant of this, where the two CNOTs are adjacent.
    #
    # What happens here is that the set of evs is -1, -1, 1, 1 and we can write
    # -╭U- = -A--╭X--SZ--╭X--C-
    # -╰U- = -B--╰C--SX--╰C--D-
    # where SZ and SX are square roots of Z and X respectively. (This
    # decomposition comes from using Hadamards to flip the direction of the
    # first CNOT, and then decomposing them and merging single-qubit gates.) For
    # some reason this case is not handled properly with the full algorithm, so
    # we treat it separately.

    sorted_evs = math.sort(math.real(evs))

    if math.allclose(sorted_evs, [-1, -1, 1, 1]):
        interior_decomp = [
            qml.CNOT(wires=[wires[1], wires[0]]),
            qml.S(wires=wires[0]),
            qml.SX(wires=wires[1]),
            qml.CNOT(wires=[wires[1], wires[0]]),
        ]

        # S \otimes SX
        inner_matrix = S_SX
    else:
        # For the non-special case, the eigenvalues come in conjugate pairs.
        # We need to find two non-conjugate eigenvalues to extract the angles.
        x = math.angle(evs[0])
        y = math.angle(evs[1])

        # If it was the conjugate, grab a different eigenvalue.
        if math.allclose(x, -y):
            y = math.angle(evs[2])

        delta = (x + y) / 2
        phi = (x - y) / 2

        interior_decomp = [
            qml.CNOT(wires=[wires[1], wires[0]]),
            qml.RZ(delta, wires=wires[0]),
            qml.RX(phi, wires=wires[1]),
            qml.CNOT(wires=[wires[1], wires[0]]),
        ]

        RZd = qml.RZ(math.cast_like(delta, 1j), wires=0).matrix
        RXp = qml.RX(phi, wires=0).matrix
        inner_matrix = math.kron(RZd, RXp)

    # We need the matrix representation of this interior part, V, in order to
    # decompose U = (A \otimes B) V (C \otimes D)
    V = math.dot(math.cast_like(CNOT10, U),
                 math.dot(inner_matrix, math.cast_like(CNOT10, U)))

    # Now we find the A, B, C, D in SU(2), and return the decomposition
    A, B, C, D = _extract_su2su2_prefactors(U, V)

    A_ops = zyz_decomposition(A, wires[0])
    B_ops = zyz_decomposition(B, wires[1])
    C_ops = zyz_decomposition(C, wires[0])
    D_ops = zyz_decomposition(D, wires[1])

    return C_ops + D_ops + interior_decomp + A_ops + B_ops