def _get_sx_vz_2cx_efficient_euler(self, decomposition, target_decomposed):
        """
        Decomposition of SU(4) gate for device with SX, virtual RZ, and CNOT gates assuming
        two CNOT gates are needed.

        This first decomposes each unitary from the KAK decomposition into ZXZ on the source
        qubit of the CNOTs and XZX on the targets in order to commute operators to beginning and
        end of decomposition. The beginning and ending single qubit gates are then
        collapsed and re-decomposed with the single qubit decomposer. This last step could be avoided
        if performance is a concern.
        """
        best_nbasis = 2  # by assumption
        num_1q_uni = len(decomposition)
        # list of euler angle decompositions on qubits 0 and 1
        euler_q0 = np.empty((num_1q_uni // 2, 3), dtype=float)
        euler_q1 = np.empty((num_1q_uni // 2, 3), dtype=float)
        global_phase = 0.0

        # decompose source unitaries to zxz
        zxz_decomposer = OneQubitEulerDecomposer("ZXZ")
        for iqubit, decomp in enumerate(decomposition[0::2]):
            euler_angles = zxz_decomposer.angles_and_phase(decomp)
            euler_q0[iqubit, [1, 2, 0]] = euler_angles[:3]
            global_phase += euler_angles[3]
        # decompose target unitaries to xzx
        xzx_decomposer = OneQubitEulerDecomposer("XZX")
        for iqubit, decomp in enumerate(decomposition[1::2]):
            euler_angles = xzx_decomposer.angles_and_phase(decomp)
            euler_q1[iqubit, [1, 2, 0]] = euler_angles[:3]
            global_phase += euler_angles[3]
        qc = QuantumCircuit(2)
        qc.global_phase = target_decomposed.global_phase
        qc.global_phase -= best_nbasis * self.basis.global_phase
        qc.global_phase += global_phase

        # TODO: make this more effecient to avoid double decomposition
        # prepare beginning 0th qubit local unitary
        circ = QuantumCircuit(1)
        circ.rz(euler_q0[0][0], 0)
        circ.rx(euler_q0[0][1], 0)
        circ.rz(euler_q0[0][2] + euler_q0[1][0] + math.pi / 2, 0)
        # re-decompose to basis of 1q decomposer
        qceuler = self._decomposer1q(Operator(circ).data)
        qc.compose(qceuler, [0], inplace=True)

        # prepare beginning 1st qubit local unitary
        circ = QuantumCircuit(1)
        circ.rx(euler_q1[0][0], 0)
        circ.rz(euler_q1[0][1], 0)
        circ.rx(euler_q1[0][2] + euler_q1[1][0], 0)
        qceuler = self._decomposer1q(Operator(circ).data)
        qc.compose(qceuler, [1], inplace=True)

        qc.cx(0, 1)
        # the central decompositions are dependent on the specific form of the
        # unitaries coming out of the two qubit decomposer which have some flexibility
        # of choice.
        qc.sx(0)
        qc.rz(euler_q0[1][1] - math.pi, 0)
        qc.sx(0)
        qc.rz(euler_q1[1][1], 1)
        qc.global_phase += math.pi / 2

        qc.cx(0, 1)

        circ = QuantumCircuit(1)
        circ.rz(euler_q0[1][2] + euler_q0[2][0] + math.pi / 2, 0)
        circ.rx(euler_q0[2][1], 0)
        circ.rz(euler_q0[2][2], 0)
        qceuler = self._decomposer1q(Operator(circ).data)
        qc.compose(qceuler, [0], inplace=True)
        circ = QuantumCircuit(1)
        circ.rx(euler_q1[1][2] + euler_q1[2][0], 0)
        circ.rz(euler_q1[2][1], 0)
        circ.rx(euler_q1[2][2], 0)
        qceuler = self._decomposer1q(Operator(circ).data)
        qc.compose(qceuler, [1], inplace=True)

        return qc
Beispiel #2
0
from qiskit.circuit import Gate, ControlledGate
from qiskit.circuit import QuantumCircuit
from qiskit.circuit import QuantumRegister, Qubit
from qiskit.circuit.exceptions import CircuitError
from qiskit.circuit._utils import _compute_control_matrix
from qiskit.circuit.quantumcircuit import _qasm_escape_gate_name
from qiskit.circuit.library.standard_gates import U3Gate
from qiskit.extensions.quantum_initializer import isometry
from qiskit.quantum_info.operators.predicates import matrix_equal
from qiskit.quantum_info.operators.predicates import is_unitary_matrix
from qiskit.quantum_info.synthesis.one_qubit_decompose import OneQubitEulerDecomposer
from qiskit.quantum_info.synthesis.two_qubit_decompose import two_qubit_cnot_decompose
from qiskit.extensions.exceptions import ExtensionError

_DECOMPOSER1Q = OneQubitEulerDecomposer("U3")


class UnitaryGate(Gate):
    """Class for representing unitary gates"""
    def __init__(self, data, label=None):
        """Create a gate from a numeric unitary matrix.

        Args:
            data (matrix or Operator): unitary operator.
            label (str): unitary name for backend [Default: None].

        Raises:
            ExtensionError: if input data is not an N-qubit unitary operator.
        """
        if hasattr(data, "to_matrix"):
    (a non-equivalent root of SWAP from the TwoQubitWeylPartialSWAPEquiv
    similar to how x = (±sqrt(x))**2 )

    This gate binds 3 parameters, we make it canonical by setting:
        K2l = Id .
    """
    def specialize(self):
        self.a = self.b = _closest_partial_swap(self.a, self.b, -self.c)
        self.c = -self.a
        self.K1l = self.K1l @ self.K2l
        self.K1r = self.K1r @ _ipz @ self.K2l @ _ipz
        self.K2r = _ipz @ self.K2l.T.conj() @ _ipz @ self.K2r
        self.K2l = _id.copy()


_oneq_xyx = OneQubitEulerDecomposer("XYX")
_oneq_zyz = OneQubitEulerDecomposer("ZYZ")


class TwoQubitWeylControlledEquiv(TwoQubitWeylDecomposition):
    """U ~ Ud(α, 0, 0) ~ Ctrl-U

    This gate binds 4 parameters, we make it canonical by setting:
        K2l = Ry(θl).Rx(λl) ,
        K2r = Ry(θr).Rx(λr) .
    """

    _default_1q_basis = "XYX"

    def specialize(self):
        self.b = self.c = 0
Beispiel #4
0
    def __init__(self, gate, basis_fidelity=1.0, euler_basis=None):
        self.gate = gate
        self.basis_fidelity = basis_fidelity

        basis = self.basis = TwoQubitWeylDecomposition(Operator(gate).data)
        if euler_basis is not None:
            self._decomposer1q = OneQubitEulerDecomposer(euler_basis)
        else:
            self._decomposer1q = OneQubitEulerDecomposer('U3')

        # FIXME: find good tolerances
        self.is_supercontrolled = np.isclose(
            basis.a, np.pi / 4) and np.isclose(basis.c, 0.)

        # Create some useful matrices U1, U2, U3 are equivalent to the basis,
        # expand as Ui = Ki1.Ubasis.Ki2
        b = basis.b
        K11l = 1 / (1 + 1j) * np.array(
            [[-1j * np.exp(-1j * b), np.exp(-1j * b)],
             [-1j * np.exp(1j * b), -np.exp(1j * b)]],
            dtype=complex)
        K11r = 1 / np.sqrt(2) * np.array(
            [[1j * np.exp(-1j * b), -np.exp(-1j * b)],
             [np.exp(1j * b), -1j * np.exp(1j * b)]],
            dtype=complex)
        K12l = 1 / (1 + 1j) * np.array([[1j, 1j], [-1, 1]], dtype=complex)
        K12r = 1 / np.sqrt(2) * np.array([[1j, 1], [-1, -1j]], dtype=complex)
        K32lK21l = 1 / np.sqrt(2) * np.array(
            [[1 + 1j * np.cos(2 * b), 1j * np.sin(2 * b)],
             [1j * np.sin(2 * b), 1 - 1j * np.cos(2 * b)]],
            dtype=complex)
        K21r = 1 / (1 - 1j) * np.array(
            [[-1j * np.exp(-2j * b), np.exp(-2j * b)],
             [1j * np.exp(2j * b), np.exp(2j * b)]],
            dtype=complex)
        K22l = 1 / np.sqrt(2) * np.array([[1, -1], [1, 1]], dtype=complex)
        K22r = np.array([[0, 1], [-1, 0]], dtype=complex)
        K31l = 1 / np.sqrt(2) * np.array(
            [[np.exp(-1j * b), np.exp(-1j * b)],
             [-np.exp(1j * b), np.exp(1j * b)]],
            dtype=complex)
        K31r = 1j * np.array([[np.exp(1j * b), 0], [0, -np.exp(-1j * b)]],
                             dtype=complex)
        K32r = 1 / (1 - 1j) * np.array(
            [[np.exp(1j * b), -np.exp(-1j * b)],
             [-1j * np.exp(1j * b), -1j * np.exp(-1j * b)]],
            dtype=complex)
        k1ld = basis.K1l.T.conj()
        k1rd = basis.K1r.T.conj()
        k2ld = basis.K2l.T.conj()
        k2rd = basis.K2r.T.conj()

        # Pre-build the fixed parts of the matrices used in 3-part decomposition
        self.u0l = K31l.dot(k1ld)
        self.u0r = K31r.dot(k1rd)
        self.u1l = k2ld.dot(K32lK21l).dot(k1ld)
        self.u1ra = k2rd.dot(K32r)
        self.u1rb = K21r.dot(k1rd)
        self.u2la = k2ld.dot(K22l)
        self.u2lb = K11l.dot(k1ld)
        self.u2ra = k2rd.dot(K22r)
        self.u2rb = K11r.dot(k1rd)
        self.u3l = k2ld.dot(K12l)
        self.u3r = k2rd.dot(K12r)

        # Pre-build the fixed parts of the matrices used in the 2-part decomposition
        self.q0l = K12l.T.conj().dot(k1ld)
        self.q0r = K12r.T.conj().dot(_ipz).dot(k1rd)
        self.q1la = k2ld.dot(K11l.T.conj())
        self.q1lb = K11l.dot(k1ld)
        self.q1ra = k2rd.dot(_ipz).dot(K11r.T.conj())
        self.q1rb = K11r.dot(k1rd)
        self.q2l = k2ld.dot(K12l)
        self.q2r = k2rd.dot(K12r)

        # Decomposition into different number of gates
        # In the future could use different decomposition functions for different basis classes, etc
        if not self.is_supercontrolled:
            warnings.warn(
                "Only know how to decompose properly for supercontrolled basis gate. "
                "This gate is ~Ud({}, {}, {})".format(basis.a, basis.b,
                                                      basis.c))
        self.decomposition_fns = [
            self.decomp0, self.decomp1, self.decomp2_supercontrolled,
            self.decomp3_supercontrolled
        ]
import warnings

import numpy as np
import scipy.linalg as la

from qiskit.circuit.quantumregister import QuantumRegister
from qiskit.circuit.quantumcircuit import QuantumCircuit
from qiskit.circuit.library.standard_gates.u3 import U3Gate
from qiskit.circuit.library.standard_gates.x import CXGate
from qiskit.exceptions import QiskitError
from qiskit.quantum_info.operators.predicates import is_unitary_matrix
from qiskit.quantum_info.synthesis.weyl import weyl_coordinates
from qiskit.quantum_info.synthesis.one_qubit_decompose import OneQubitEulerDecomposer

_CUTOFF_PRECISION = 1e-12
_DECOMPOSER1Q = OneQubitEulerDecomposer('U3')


def euler_angles_1q(unitary_matrix):
    """DEPRECATED: Compute Euler angles for a single-qubit gate.

    Find angles (theta, phi, lambda) such that
    unitary_matrix = phase * Rz(phi) * Ry(theta) * Rz(lambda)

    Args:
        unitary_matrix (ndarray): 2x2 unitary matrix

    Returns:
        tuple: (theta, phi, lambda) Euler angles of SU(2)

    Raises:
class TwoQubitBasisDecomposer:
    """A class for decomposing 2-qubit unitaries into minimal number of uses of a 2-qubit
    basis gate.

    Args:
        gate (Gate): Two-qubit gate to be used in the KAK decomposition.
        basis_fidelity (float): Fidelity to be assumed for applications of KAK Gate. Default 1.0.
        euler_basis (str): Basis string to be provided to OneQubitEulerDecomposer for 1Q synthesis.
            Valid options are ['ZYZ', 'ZXZ', 'XYX', 'U', 'U3', 'U1X', 'PSX', 'ZSX', 'RR'].
            Default 'U3'.
        pulse_optimize (None or bool): If True, try to do decomposition which minimizes
            local unitaries in between entangling gates. This will raise an exception if an
            optimal decomposition is not implemented. Currently, only [{CX, SX, RZ}] is known.
            If False, don't attempt optimization. If None, attempt optimization but don't raise
            if unknown.
    """
    def __init__(self,
                 gate,
                 basis_fidelity=1.0,
                 euler_basis=None,
                 pulse_optimize=None):
        self.gate = gate
        self.basis_fidelity = basis_fidelity
        self.pulse_optimize = pulse_optimize

        basis = self.basis = TwoQubitWeylDecomposition(Operator(gate).data)
        if euler_basis is not None:
            self._decomposer1q = OneQubitEulerDecomposer(euler_basis)
        else:
            self._decomposer1q = OneQubitEulerDecomposer("U3")

        # FIXME: find good tolerances
        self.is_supercontrolled = math.isclose(
            basis.a, np.pi / 4) and math.isclose(basis.c, 0.0)

        # Create some useful matrices U1, U2, U3 are equivalent to the basis,
        # expand as Ui = Ki1.Ubasis.Ki2
        b = basis.b
        K11l = (1 / (1 + 1j) * np.array(
            [
                [-1j * cmath.exp(-1j * b),
                 cmath.exp(-1j * b)],
                [-1j * cmath.exp(1j * b), -cmath.exp(1j * b)],
            ],
            dtype=complex,
        ))
        K11r = (1 / math.sqrt(2) * np.array(
            [
                [1j * cmath.exp(-1j * b), -cmath.exp(-1j * b)],
                [cmath.exp(1j * b), -1j * cmath.exp(1j * b)],
            ],
            dtype=complex,
        ))
        K12l = 1 / (1 + 1j) * np.array([[1j, 1j], [-1, 1]], dtype=complex)
        K12r = 1 / math.sqrt(2) * np.array([[1j, 1], [-1, -1j]], dtype=complex)
        K32lK21l = (1 / math.sqrt(2) * np.array(
            [
                [1 + 1j * np.cos(2 * b), 1j * np.sin(2 * b)],
                [1j * np.sin(2 * b), 1 - 1j * np.cos(2 * b)],
            ],
            dtype=complex,
        ))
        K21r = (1 / (1 - 1j) * np.array(
            [
                [-1j * cmath.exp(-2j * b),
                 cmath.exp(-2j * b)],
                [1j * cmath.exp(2j * b),
                 cmath.exp(2j * b)],
            ],
            dtype=complex,
        ))
        K22l = 1 / math.sqrt(2) * np.array([[1, -1], [1, 1]], dtype=complex)
        K22r = np.array([[0, 1], [-1, 0]], dtype=complex)
        K31l = (1 / math.sqrt(2) * np.array(
            [[cmath.exp(-1j * b), cmath.exp(-1j * b)],
             [-cmath.exp(1j * b), cmath.exp(1j * b)]],
            dtype=complex,
        ))
        K31r = 1j * np.array(
            [[cmath.exp(1j * b), 0], [0, -cmath.exp(-1j * b)]], dtype=complex)
        K32r = (1 / (1 - 1j) * np.array(
            [
                [cmath.exp(1j * b), -cmath.exp(-1j * b)],
                [-1j * cmath.exp(1j * b), -1j * cmath.exp(-1j * b)],
            ],
            dtype=complex,
        ))
        k1ld = basis.K1l.T.conj()
        k1rd = basis.K1r.T.conj()
        k2ld = basis.K2l.T.conj()
        k2rd = basis.K2r.T.conj()

        # Pre-build the fixed parts of the matrices used in 3-part decomposition
        self.u0l = K31l.dot(k1ld)
        self.u0r = K31r.dot(k1rd)
        self.u1l = k2ld.dot(K32lK21l).dot(k1ld)
        self.u1ra = k2rd.dot(K32r)
        self.u1rb = K21r.dot(k1rd)
        self.u2la = k2ld.dot(K22l)
        self.u2lb = K11l.dot(k1ld)
        self.u2ra = k2rd.dot(K22r)
        self.u2rb = K11r.dot(k1rd)
        self.u3l = k2ld.dot(K12l)
        self.u3r = k2rd.dot(K12r)

        # Pre-build the fixed parts of the matrices used in the 2-part decomposition
        self.q0l = K12l.T.conj().dot(k1ld)
        self.q0r = K12r.T.conj().dot(_ipz).dot(k1rd)
        self.q1la = k2ld.dot(K11l.T.conj())
        self.q1lb = K11l.dot(k1ld)
        self.q1ra = k2rd.dot(_ipz).dot(K11r.T.conj())
        self.q1rb = K11r.dot(k1rd)
        self.q2l = k2ld.dot(K12l)
        self.q2r = k2rd.dot(K12r)

        # Decomposition into different number of gates
        # In the future could use different decomposition functions for different basis classes, etc
        if not self.is_supercontrolled:
            warnings.warn(
                "Only know how to decompose properly for supercontrolled basis gate. "
                "This gate is ~Ud({}, {}, {})".format(basis.a, basis.b,
                                                      basis.c),
                stacklevel=2,
            )
        self.decomposition_fns = [
            self.decomp0,
            self.decomp1,
            self.decomp2_supercontrolled,
            self.decomp3_supercontrolled,
        ]
        self._rqc = None

    def traces(self, target):
        """Give the expected traces :math:`|Tr(U \\cdot Utarget^dag)|` for different number of
        basis gates."""
        # Future gotcha: extending this to non-supercontrolled basis.
        # Careful: closest distance between a1,b1,c1 and a2,b2,c2 may be between reflections.
        # This doesn't come up if either c1==0 or c2==0 but otherwise be careful.
        ta, tb, tc = target.a, target.b, target.c
        bb = self.basis.b
        return [
            4 * complex(
                math.cos(ta) * math.cos(tb) * math.cos(tc),
                math.sin(ta) * math.sin(tb) * math.sin(tc),
            ),
            4 * complex(
                math.cos(math.pi / 4 - ta) * math.cos(bb - tb) * math.cos(tc),
                math.sin(math.pi / 4 - ta) * math.sin(bb - tb) * math.sin(tc),
            ),
            4 * math.cos(tc),
            4,
        ]

    @staticmethod
    def decomp0(target):
        """Decompose target ~Ud(x, y, z) with 0 uses of the basis gate.
        Result Ur has trace:
        :math:`|Tr(Ur.Utarget^dag)| = 4|(cos(x)cos(y)cos(z)+ j sin(x)sin(y)sin(z)|`,
        which is optimal for all targets and bases"""

        U0l = target.K1l.dot(target.K2l)
        U0r = target.K1r.dot(target.K2r)
        return U0r, U0l

    def decomp1(self, target):
        """Decompose target ~Ud(x, y, z) with 1 uses of the basis gate ~Ud(a, b, c).
        Result Ur has trace:
        .. math::

            |Tr(Ur.Utarget^dag)| = 4|cos(x-a)cos(y-b)cos(z-c) + j sin(x-a)sin(y-b)sin(z-c)|

        which is optimal for all targets and bases with z==0 or c==0"""
        # FIXME: fix for z!=0 and c!=0 using closest reflection (not always in the Weyl chamber)
        U0l = target.K1l.dot(self.basis.K1l.T.conj())
        U0r = target.K1r.dot(self.basis.K1r.T.conj())
        U1l = self.basis.K2l.T.conj().dot(target.K2l)
        U1r = self.basis.K2r.T.conj().dot(target.K2r)

        return U1r, U1l, U0r, U0l

    def decomp2_supercontrolled(self, target):
        """Decompose target ~Ud(x, y, z) with 2 uses of the basis gate.

        For supercontrolled basis ~Ud(pi/4, b, 0), all b, result Ur has trace
        .. math::

            |Tr(Ur.Utarget^dag)| = 4cos(z)

        which is the optimal approximation for basis of CNOT-class ``~Ud(pi/4, 0, 0)``
        or DCNOT-class ``~Ud(pi/4, pi/4, 0)`` and any target.
        May be sub-optimal for b!=0 (e.g. there exists exact decomposition for any target using B
        ``B~Ud(pi/4, pi/8, 0)``, but not this decomposition.)
        This is an exact decomposition for supercontrolled basis and target ``~Ud(x, y, 0)``.
        No guarantees for non-supercontrolled basis.
        """

        U0l = target.K1l.dot(self.q0l)
        U0r = target.K1r.dot(self.q0r)
        U1l = self.q1la.dot(rz_array(-2 * target.a)).dot(self.q1lb)
        U1r = self.q1ra.dot(rz_array(2 * target.b)).dot(self.q1rb)
        U2l = self.q2l.dot(target.K2l)
        U2r = self.q2r.dot(target.K2r)

        return U2r, U2l, U1r, U1l, U0r, U0l

    def decomp3_supercontrolled(self, target):
        """Decompose target with 3 uses of the basis.
        This is an exact decomposition for supercontrolled basis ~Ud(pi/4, b, 0), all b,
        and any target. No guarantees for non-supercontrolled basis."""

        U0l = target.K1l.dot(self.u0l)
        U0r = target.K1r.dot(self.u0r)
        U1l = self.u1l
        U1r = self.u1ra.dot(rz_array(-2 * target.c)).dot(self.u1rb)
        U2l = self.u2la.dot(rz_array(-2 * target.a)).dot(self.u2lb)
        U2r = self.u2ra.dot(rz_array(2 * target.b)).dot(self.u2rb)
        U3l = self.u3l.dot(target.K2l)
        U3r = self.u3r.dot(target.K2r)

        return U3r, U3l, U2r, U2l, U1r, U1l, U0r, U0l

    def __call__(self,
                 target,
                 basis_fidelity=None,
                 *,
                 _num_basis_uses=None) -> QuantumCircuit:
        """Decompose a two-qubit unitary over fixed basis + SU(2) using the best approximation given
        that each basis application has a finite fidelity.

        You can force a particular approximation by passing _num_basis_uses.
        """
        basis_fidelity = basis_fidelity or self.basis_fidelity
        target = np.asarray(target, dtype=complex)

        target_decomposed = TwoQubitWeylDecomposition(target)
        traces = self.traces(target_decomposed)
        expected_fidelities = [
            trace_to_fid(traces[i]) * basis_fidelity**i for i in range(4)
        ]

        best_nbasis = int(np.argmax(expected_fidelities))
        if _num_basis_uses is not None:
            best_nbasis = _num_basis_uses
        decomposition = self.decomposition_fns[best_nbasis](target_decomposed)

        # attempt pulse optimal decomposition
        try:
            if self.pulse_optimize in {None, True}:
                return_circuit = self._pulse_optimal_chooser(
                    best_nbasis, decomposition, target_decomposed)
                if return_circuit:
                    return return_circuit
        except QiskitError:
            if self.pulse_optimize:
                raise

        # do default decomposition
        q = QuantumRegister(2)
        decomposition_euler = [
            self._decomposer1q._decompose(x) for x in decomposition
        ]
        return_circuit = QuantumCircuit(q)
        return_circuit.global_phase = target_decomposed.global_phase
        return_circuit.global_phase -= best_nbasis * self.basis.global_phase
        if best_nbasis == 2:
            return_circuit.global_phase += np.pi
        for i in range(best_nbasis):
            return_circuit.compose(decomposition_euler[2 * i], [q[0]],
                                   inplace=True)
            return_circuit.compose(decomposition_euler[2 * i + 1], [q[1]],
                                   inplace=True)
            return_circuit.append(self.gate, [q[0], q[1]])
        return_circuit.compose(decomposition_euler[2 * best_nbasis], [q[0]],
                               inplace=True)
        return_circuit.compose(decomposition_euler[2 * best_nbasis + 1],
                               [q[1]],
                               inplace=True)
        return return_circuit

    def _pulse_optimal_chooser(self, best_nbasis, decomposition,
                               target_decomposed):
        """Determine method to find pulse optimal circuit. This method may be
        removed once a more general approach is used.

        Returns:
            QuantumCircuit: pulse optimal quantum circuit.
            None: Probably nbasis=1 and original circuit is fine.

        Raises:
            QiskitError: Decomposition for selected basis not implemented.
        """
        circuit = None
        if self.pulse_optimize and best_nbasis in {0, 1}:
            # already pulse optimal
            return None
        elif self.pulse_optimize and best_nbasis > 3:
            raise QiskitError(
                f"Unexpected number of entangling gates ({best_nbasis}) in decomposition."
            )
        if self._decomposer1q.basis in {"ZSX", "ZSXX"}:
            if isinstance(self.gate, CXGate):
                if best_nbasis == 3:
                    circuit = self._get_sx_vz_3cx_efficient_euler(
                        decomposition, target_decomposed)
                elif best_nbasis == 2:
                    circuit = self._get_sx_vz_2cx_efficient_euler(
                        decomposition, target_decomposed)
            else:
                raise QiskitError(
                    "pulse_optimizer currently only works with CNOT entangling gate"
                )
        else:
            raise QiskitError(
                '"pulse_optimize" currently only works with ZSX basis '
                f"({self._decomposer1q.basis} used)")
        return circuit

    def _get_sx_vz_2cx_efficient_euler(self, decomposition, target_decomposed):
        """
        Decomposition of SU(4) gate for device with SX, virtual RZ, and CNOT gates assuming
        two CNOT gates are needed.

        This first decomposes each unitary from the KAK decomposition into ZXZ on the source
        qubit of the CNOTs and XZX on the targets in order to commute operators to beginning and
        end of decomposition. The beginning and ending single qubit gates are then
        collapsed and re-decomposed with the single qubit decomposer. This last step could be avoided
        if performance is a concern.
        """
        best_nbasis = 2  # by assumption
        num_1q_uni = len(decomposition)
        # list of euler angle decompositions on qubits 0 and 1
        euler_q0 = np.empty((num_1q_uni // 2, 3), dtype=float)
        euler_q1 = np.empty((num_1q_uni // 2, 3), dtype=float)
        global_phase = 0.0

        # decompose source unitaries to zxz
        zxz_decomposer = OneQubitEulerDecomposer("ZXZ")
        for iqubit, decomp in enumerate(decomposition[0::2]):
            euler_angles = zxz_decomposer.angles_and_phase(decomp)
            euler_q0[iqubit, [1, 2, 0]] = euler_angles[:3]
            global_phase += euler_angles[3]
        # decompose target unitaries to xzx
        xzx_decomposer = OneQubitEulerDecomposer("XZX")
        for iqubit, decomp in enumerate(decomposition[1::2]):
            euler_angles = xzx_decomposer.angles_and_phase(decomp)
            euler_q1[iqubit, [1, 2, 0]] = euler_angles[:3]
            global_phase += euler_angles[3]
        qc = QuantumCircuit(2)
        qc.global_phase = target_decomposed.global_phase
        qc.global_phase -= best_nbasis * self.basis.global_phase
        qc.global_phase += global_phase

        # TODO: make this more effecient to avoid double decomposition
        # prepare beginning 0th qubit local unitary
        circ = QuantumCircuit(1)
        circ.rz(euler_q0[0][0], 0)
        circ.rx(euler_q0[0][1], 0)
        circ.rz(euler_q0[0][2] + euler_q0[1][0] + math.pi / 2, 0)
        # re-decompose to basis of 1q decomposer
        qceuler = self._decomposer1q(Operator(circ).data)
        qc.compose(qceuler, [0], inplace=True)

        # prepare beginning 1st qubit local unitary
        circ = QuantumCircuit(1)
        circ.rx(euler_q1[0][0], 0)
        circ.rz(euler_q1[0][1], 0)
        circ.rx(euler_q1[0][2] + euler_q1[1][0], 0)
        qceuler = self._decomposer1q(Operator(circ).data)
        qc.compose(qceuler, [1], inplace=True)

        qc.cx(0, 1)
        # the central decompositions are dependent on the specific form of the
        # unitaries coming out of the two qubit decomposer which have some flexibility
        # of choice.
        qc.sx(0)
        qc.rz(euler_q0[1][1] - math.pi, 0)
        qc.sx(0)
        qc.rz(euler_q1[1][1], 1)
        qc.global_phase += math.pi / 2

        qc.cx(0, 1)

        circ = QuantumCircuit(1)
        circ.rz(euler_q0[1][2] + euler_q0[2][0] + math.pi / 2, 0)
        circ.rx(euler_q0[2][1], 0)
        circ.rz(euler_q0[2][2], 0)
        qceuler = self._decomposer1q(Operator(circ).data)
        qc.compose(qceuler, [0], inplace=True)
        circ = QuantumCircuit(1)
        circ.rx(euler_q1[1][2] + euler_q1[2][0], 0)
        circ.rz(euler_q1[2][1], 0)
        circ.rx(euler_q1[2][2], 0)
        qceuler = self._decomposer1q(Operator(circ).data)
        qc.compose(qceuler, [1], inplace=True)

        return qc

    def _get_sx_vz_3cx_efficient_euler(self, decomposition, target_decomposed):
        """
        Decomposition of SU(4) gate for device with SX, virtual RZ, and CNOT gates assuming
        three CNOT gates are needed.

        This first decomposes each unitary from the KAK decomposition into ZXZ on the source
        qubit of the CNOTs and XZX on the targets in order commute operators to beginning and
        end of decomposition. Inserting Hadamards reverses the direction of the CNOTs and transforms
        a variable Rx -> variable virtual Rz. The beginning and ending single qubit gates are then
        collapsed and re-decomposed with the single qubit decomposer. This last step could be avoided
        if performance is a concern.
        """
        best_nbasis = 3  # by assumption
        num_1q_uni = len(decomposition)
        # create structure to hold euler angles: 1st index represents unitary "group" wrt cx
        # 2nd index represents index of euler triple.
        euler_q0 = np.empty((num_1q_uni // 2, 3), dtype=float)
        euler_q1 = np.empty((num_1q_uni // 2, 3), dtype=float)
        global_phase = 0.0
        atol = 1e-10  # absolute tolerance for floats

        # decompose source unitaries to zxz
        zxz_decomposer = OneQubitEulerDecomposer("ZXZ")
        for iqubit, decomp in enumerate(decomposition[0::2]):
            euler_angles = zxz_decomposer.angles_and_phase(decomp)
            euler_q0[iqubit, [1, 2, 0]] = euler_angles[:3]
            global_phase += euler_angles[3]
        # decompose target unitaries to xzx
        xzx_decomposer = OneQubitEulerDecomposer("XZX")
        for iqubit, decomp in enumerate(decomposition[1::2]):
            euler_angles = xzx_decomposer.angles_and_phase(decomp)
            euler_q1[iqubit, [1, 2, 0]] = euler_angles[:3]
            global_phase += euler_angles[3]

        qc = QuantumCircuit(2)
        qc.global_phase = target_decomposed.global_phase
        qc.global_phase -= best_nbasis * self.basis.global_phase
        qc.global_phase += global_phase

        x12 = euler_q0[1][2] + euler_q0[2][0]
        x12_isNonZero = not math.isclose(x12, 0, abs_tol=atol)
        x12_isOddMult = None
        x12_isPiMult = math.isclose(math.sin(x12), 0, abs_tol=atol)
        if x12_isPiMult:
            x12_isOddMult = math.isclose(math.cos(x12), -1, abs_tol=atol)
            x12_phase = math.pi * math.cos(x12)
        x02_add = x12 - euler_q0[1][0]
        x12_isHalfPi = math.isclose(x12, math.pi / 2, abs_tol=atol)

        # TODO: make this more effecient to avoid double decomposition
        circ = QuantumCircuit(1)
        circ.rz(euler_q0[0][0], 0)
        circ.rx(euler_q0[0][1], 0)
        if x12_isNonZero and x12_isPiMult:
            circ.rz(euler_q0[0][2] - x02_add, 0)
        else:
            circ.rz(euler_q0[0][2] + euler_q0[1][0], 0)
        circ.h(0)
        qceuler = self._decomposer1q(Operator(circ).data)
        qc.compose(qceuler, [0], inplace=True)

        circ = QuantumCircuit(1)
        circ.rx(euler_q1[0][0], 0)
        circ.rz(euler_q1[0][1], 0)
        circ.rx(euler_q1[0][2] + euler_q1[1][0], 0)
        circ.h(0)
        qceuler = self._decomposer1q(Operator(circ).data)
        qc.compose(qceuler, [1], inplace=True)

        qc.cx(1, 0)

        if x12_isPiMult:
            # even or odd multiple
            if x12_isNonZero:
                qc.global_phase += x12_phase
            if x12_isNonZero and x12_isOddMult:
                qc.rz(-euler_q0[1][1], 0)
            else:
                qc.rz(euler_q0[1][1], 0)
                qc.global_phase += math.pi
        if x12_isHalfPi:
            qc.sx(0)
            qc.global_phase -= math.pi / 4
        elif x12_isNonZero and not x12_isPiMult:
            # this is non-optimal but doesn't seem to occur currently
            if self.pulse_optimize is None:
                qc.compose(self._decomposer1q(Operator(RXGate(x12)).data), [0],
                           inplace=True)
            else:
                raise QiskitError(
                    "possible non-pulse-optimal decomposition encountered")
        if math.isclose(euler_q1[1][1], math.pi / 2, abs_tol=atol):
            qc.sx(1)
            qc.global_phase -= math.pi / 4
        else:
            # this is non-optimal but doesn't seem to occur currently
            if self.pulse_optimize is None:
                qc.compose(self._decomposer1q(
                    Operator(RXGate(euler_q1[1][1])).data), [1],
                           inplace=True)
            else:
                raise QiskitError(
                    "possible non-pulse-optimal decomposition encountered")
        qc.rz(euler_q1[1][2] + euler_q1[2][0], 1)

        qc.cx(1, 0)

        qc.rz(euler_q0[2][1], 0)
        if math.isclose(euler_q1[2][1], math.pi / 2, abs_tol=atol):
            qc.sx(1)
            qc.global_phase -= math.pi / 4
        else:
            # this is non-optimal but doesn't seem to occur currently
            if self.pulse_optimize is None:
                qc.compose(self._decomposer1q(
                    Operator(RXGate(euler_q1[2][1])).data), [1],
                           inplace=True)
            else:
                raise QiskitError(
                    "possible non-pulse-optimal decomposition encountered")

        qc.cx(1, 0)

        circ = QuantumCircuit(1)
        circ.h(0)
        circ.rz(euler_q0[2][2] + euler_q0[3][0], 0)
        circ.rx(euler_q0[3][1], 0)
        circ.rz(euler_q0[3][2], 0)
        qceuler = self._decomposer1q(Operator(circ).data)
        qc.compose(qceuler, [0], inplace=True)

        circ = QuantumCircuit(1)
        circ.h(0)
        circ.rx(euler_q1[2][2] + euler_q1[3][0], 0)
        circ.rz(euler_q1[3][1], 0)
        circ.rx(euler_q1[3][2], 0)
        qceuler = self._decomposer1q(Operator(circ).data)
        qc.compose(qceuler, [1], inplace=True)

        # TODO: fix the sign problem to avoid correction here
        if cmath.isclose(target_decomposed.unitary_matrix[0, 0],
                         -(Operator(qc).data[0, 0]),
                         abs_tol=atol):
            qc.global_phase += math.pi
        return qc

    def num_basis_gates(self, unitary):
        """Computes the number of basis gates needed in
        a decomposition of input unitary
        """
        unitary = np.asarray(unitary, dtype=complex)
        a, b, c = weyl_coordinates(unitary)[:]
        traces = [
            4 * (math.cos(a) * math.cos(b) * math.cos(c) +
                 1j * math.sin(a) * math.sin(b) * math.sin(c)),
            4 * (math.cos(np.pi / 4 - a) * math.cos(self.basis.b - b) *
                 math.cos(c) + 1j * math.sin(np.pi / 4 - a) *
                 math.sin(self.basis.b - b) * math.sin(c)),
            4 * math.cos(c),
            4,
        ]
        return np.argmax([
            trace_to_fid(traces[i]) * self.basis_fidelity**i for i in range(4)
        ])
    def _get_sx_vz_3cx_efficient_euler(self, decomposition, target_decomposed):
        """
        Decomposition of SU(4) gate for device with SX, virtual RZ, and CNOT gates assuming
        three CNOT gates are needed.

        This first decomposes each unitary from the KAK decomposition into ZXZ on the source
        qubit of the CNOTs and XZX on the targets in order commute operators to beginning and
        end of decomposition. Inserting Hadamards reverses the direction of the CNOTs and transforms
        a variable Rx -> variable virtual Rz. The beginning and ending single qubit gates are then
        collapsed and re-decomposed with the single qubit decomposer. This last step could be avoided
        if performance is a concern.
        """
        best_nbasis = 3  # by assumption
        num_1q_uni = len(decomposition)
        # create structure to hold euler angles: 1st index represents unitary "group" wrt cx
        # 2nd index represents index of euler triple.
        euler_q0 = np.empty((num_1q_uni // 2, 3), dtype=float)
        euler_q1 = np.empty((num_1q_uni // 2, 3), dtype=float)
        global_phase = 0.0
        atol = 1e-10  # absolute tolerance for floats

        # decompose source unitaries to zxz
        zxz_decomposer = OneQubitEulerDecomposer("ZXZ")
        for iqubit, decomp in enumerate(decomposition[0::2]):
            euler_angles = zxz_decomposer.angles_and_phase(decomp)
            euler_q0[iqubit, [1, 2, 0]] = euler_angles[:3]
            global_phase += euler_angles[3]
        # decompose target unitaries to xzx
        xzx_decomposer = OneQubitEulerDecomposer("XZX")
        for iqubit, decomp in enumerate(decomposition[1::2]):
            euler_angles = xzx_decomposer.angles_and_phase(decomp)
            euler_q1[iqubit, [1, 2, 0]] = euler_angles[:3]
            global_phase += euler_angles[3]

        qc = QuantumCircuit(2)
        qc.global_phase = target_decomposed.global_phase
        qc.global_phase -= best_nbasis * self.basis.global_phase
        qc.global_phase += global_phase

        x12 = euler_q0[1][2] + euler_q0[2][0]
        x12_isNonZero = not math.isclose(x12, 0, abs_tol=atol)
        x12_isOddMult = None
        x12_isPiMult = math.isclose(math.sin(x12), 0, abs_tol=atol)
        if x12_isPiMult:
            x12_isOddMult = math.isclose(math.cos(x12), -1, abs_tol=atol)
            x12_phase = math.pi * math.cos(x12)
        x02_add = x12 - euler_q0[1][0]
        x12_isHalfPi = math.isclose(x12, math.pi / 2, abs_tol=atol)

        # TODO: make this more effecient to avoid double decomposition
        circ = QuantumCircuit(1)
        circ.rz(euler_q0[0][0], 0)
        circ.rx(euler_q0[0][1], 0)
        if x12_isNonZero and x12_isPiMult:
            circ.rz(euler_q0[0][2] - x02_add, 0)
        else:
            circ.rz(euler_q0[0][2] + euler_q0[1][0], 0)
        circ.h(0)
        qceuler = self._decomposer1q(Operator(circ).data)
        qc.compose(qceuler, [0], inplace=True)

        circ = QuantumCircuit(1)
        circ.rx(euler_q1[0][0], 0)
        circ.rz(euler_q1[0][1], 0)
        circ.rx(euler_q1[0][2] + euler_q1[1][0], 0)
        circ.h(0)
        qceuler = self._decomposer1q(Operator(circ).data)
        qc.compose(qceuler, [1], inplace=True)

        qc.cx(1, 0)

        if x12_isPiMult:
            # even or odd multiple
            if x12_isNonZero:
                qc.global_phase += x12_phase
            if x12_isNonZero and x12_isOddMult:
                qc.rz(-euler_q0[1][1], 0)
            else:
                qc.rz(euler_q0[1][1], 0)
                qc.global_phase += math.pi
        if x12_isHalfPi:
            qc.sx(0)
            qc.global_phase -= math.pi / 4
        elif x12_isNonZero and not x12_isPiMult:
            # this is non-optimal but doesn't seem to occur currently
            if self.pulse_optimize is None:
                qc.compose(self._decomposer1q(Operator(RXGate(x12)).data), [0],
                           inplace=True)
            else:
                raise QiskitError(
                    "possible non-pulse-optimal decomposition encountered")
        if math.isclose(euler_q1[1][1], math.pi / 2, abs_tol=atol):
            qc.sx(1)
            qc.global_phase -= math.pi / 4
        else:
            # this is non-optimal but doesn't seem to occur currently
            if self.pulse_optimize is None:
                qc.compose(self._decomposer1q(
                    Operator(RXGate(euler_q1[1][1])).data), [1],
                           inplace=True)
            else:
                raise QiskitError(
                    "possible non-pulse-optimal decomposition encountered")
        qc.rz(euler_q1[1][2] + euler_q1[2][0], 1)

        qc.cx(1, 0)

        qc.rz(euler_q0[2][1], 0)
        if math.isclose(euler_q1[2][1], math.pi / 2, abs_tol=atol):
            qc.sx(1)
            qc.global_phase -= math.pi / 4
        else:
            # this is non-optimal but doesn't seem to occur currently
            if self.pulse_optimize is None:
                qc.compose(self._decomposer1q(
                    Operator(RXGate(euler_q1[2][1])).data), [1],
                           inplace=True)
            else:
                raise QiskitError(
                    "possible non-pulse-optimal decomposition encountered")

        qc.cx(1, 0)

        circ = QuantumCircuit(1)
        circ.h(0)
        circ.rz(euler_q0[2][2] + euler_q0[3][0], 0)
        circ.rx(euler_q0[3][1], 0)
        circ.rz(euler_q0[3][2], 0)
        qceuler = self._decomposer1q(Operator(circ).data)
        qc.compose(qceuler, [0], inplace=True)

        circ = QuantumCircuit(1)
        circ.h(0)
        circ.rx(euler_q1[2][2] + euler_q1[3][0], 0)
        circ.rz(euler_q1[3][1], 0)
        circ.rx(euler_q1[3][2], 0)
        qceuler = self._decomposer1q(Operator(circ).data)
        qc.compose(qceuler, [1], inplace=True)

        # TODO: fix the sign problem to avoid correction here
        if cmath.isclose(target_decomposed.unitary_matrix[0, 0],
                         -(Operator(qc).data[0, 0]),
                         abs_tol=atol):
            qc.global_phase += math.pi
        return qc
class TwoQubitBasisDecomposer():
    """A class for decomposing 2-qubit unitaries into minimal number of uses of a 2-qubit
    basis gate.

    Args:
        gate (Gate): Two-qubit gate to be used in the KAK decomposition.
        basis_fidelity (float): Fidelity to be assumed for applications of KAK Gate. Default 1.0.
        euler_basis (str): Basis string to be provided to OneQubitEulerDecomposer for 1Q synthesis.
            Valid options are ['ZYZ', 'ZXZ', 'XYX', 'U', 'U3', 'U1X', 'PSX', 'ZSX', 'RR'].
            Default 'U3'.
    """
    def __init__(self, gate, basis_fidelity=1.0, euler_basis=None):
        self.gate = gate
        self.basis_fidelity = basis_fidelity

        basis = self.basis = TwoQubitWeylDecomposition(Operator(gate).data)
        if euler_basis is not None:
            self._decomposer1q = OneQubitEulerDecomposer(euler_basis)
        else:
            self._decomposer1q = OneQubitEulerDecomposer('U3')

        # FIXME: find good tolerances
        self.is_supercontrolled = np.isclose(
            basis.a, np.pi / 4) and np.isclose(basis.c, 0.)

        # Create some useful matrices U1, U2, U3 are equivalent to the basis,
        # expand as Ui = Ki1.Ubasis.Ki2
        b = basis.b
        K11l = 1 / (1 + 1j) * np.array(
            [[-1j * np.exp(-1j * b), np.exp(-1j * b)],
             [-1j * np.exp(1j * b), -np.exp(1j * b)]],
            dtype=complex)
        K11r = 1 / np.sqrt(2) * np.array(
            [[1j * np.exp(-1j * b), -np.exp(-1j * b)],
             [np.exp(1j * b), -1j * np.exp(1j * b)]],
            dtype=complex)
        K12l = 1 / (1 + 1j) * np.array([[1j, 1j], [-1, 1]], dtype=complex)
        K12r = 1 / np.sqrt(2) * np.array([[1j, 1], [-1, -1j]], dtype=complex)
        K32lK21l = 1 / np.sqrt(2) * np.array(
            [[1 + 1j * np.cos(2 * b), 1j * np.sin(2 * b)],
             [1j * np.sin(2 * b), 1 - 1j * np.cos(2 * b)]],
            dtype=complex)
        K21r = 1 / (1 - 1j) * np.array(
            [[-1j * np.exp(-2j * b), np.exp(-2j * b)],
             [1j * np.exp(2j * b), np.exp(2j * b)]],
            dtype=complex)
        K22l = 1 / np.sqrt(2) * np.array([[1, -1], [1, 1]], dtype=complex)
        K22r = np.array([[0, 1], [-1, 0]], dtype=complex)
        K31l = 1 / np.sqrt(2) * np.array(
            [[np.exp(-1j * b), np.exp(-1j * b)],
             [-np.exp(1j * b), np.exp(1j * b)]],
            dtype=complex)
        K31r = 1j * np.array([[np.exp(1j * b), 0], [0, -np.exp(-1j * b)]],
                             dtype=complex)
        K32r = 1 / (1 - 1j) * np.array(
            [[np.exp(1j * b), -np.exp(-1j * b)],
             [-1j * np.exp(1j * b), -1j * np.exp(-1j * b)]],
            dtype=complex)
        k1ld = basis.K1l.T.conj()
        k1rd = basis.K1r.T.conj()
        k2ld = basis.K2l.T.conj()
        k2rd = basis.K2r.T.conj()

        # Pre-build the fixed parts of the matrices used in 3-part decomposition
        self.u0l = K31l.dot(k1ld)
        self.u0r = K31r.dot(k1rd)
        self.u1l = k2ld.dot(K32lK21l).dot(k1ld)
        self.u1ra = k2rd.dot(K32r)
        self.u1rb = K21r.dot(k1rd)
        self.u2la = k2ld.dot(K22l)
        self.u2lb = K11l.dot(k1ld)
        self.u2ra = k2rd.dot(K22r)
        self.u2rb = K11r.dot(k1rd)
        self.u3l = k2ld.dot(K12l)
        self.u3r = k2rd.dot(K12r)

        # Pre-build the fixed parts of the matrices used in the 2-part decomposition
        self.q0l = K12l.T.conj().dot(k1ld)
        self.q0r = K12r.T.conj().dot(_ipz).dot(k1rd)
        self.q1la = k2ld.dot(K11l.T.conj())
        self.q1lb = K11l.dot(k1ld)
        self.q1ra = k2rd.dot(_ipz).dot(K11r.T.conj())
        self.q1rb = K11r.dot(k1rd)
        self.q2l = k2ld.dot(K12l)
        self.q2r = k2rd.dot(K12r)

        # Decomposition into different number of gates
        # In the future could use different decomposition functions for different basis classes, etc
        if not self.is_supercontrolled:
            warnings.warn(
                "Only know how to decompose properly for supercontrolled basis gate. "
                "This gate is ~Ud({}, {}, {})".format(basis.a, basis.b,
                                                      basis.c))
        self.decomposition_fns = [
            self.decomp0, self.decomp1, self.decomp2_supercontrolled,
            self.decomp3_supercontrolled
        ]

    def traces(self, target):
        """Give the expected traces :math:`|Tr(U \\cdot Utarget^dag)|` for different number of
        basis gates."""
        # Future gotcha: extending this to non-supercontrolled basis.
        # Careful: closest distance between a1,b1,c1 and a2,b2,c2 may be between reflections.
        # This doesn't come up if either c1==0 or c2==0 but otherwise be careful.

        return [
            4 * (np.cos(target.a) * np.cos(target.b) * np.cos(target.c) +
                 1j * np.sin(target.a) * np.sin(target.b) * np.sin(target.c)),
            4 *
            (np.cos(np.pi / 4 - target.a) * np.cos(self.basis.b - target.b) *
             np.cos(target.c) + 1j * np.sin(np.pi / 4 - target.a) *
             np.sin(self.basis.b - target.b) * np.sin(target.c)),
            4 * np.cos(target.c), 4
        ]

    @staticmethod
    def decomp0(target, eps=1e-15):
        """Decompose target ~Ud(x, y, z) with 0 uses of the basis gate.
        Result Ur has trace:
        :math:`|Tr(Ur.Utarget^dag)| = 4|(cos(x)cos(y)cos(z)+ j sin(x)sin(y)sin(z)|`,
        which is optimal for all targets and bases"""

        U0l = target.K1l.dot(target.K2l)
        U0r = target.K1r.dot(target.K2r)
        U0l.real[abs(U0l.real) < eps] = 0.0
        U0l.imag[abs(U0l.imag) < eps] = 0.0
        U0r.real[abs(U0r.real) < eps] = 0.0
        U0r.imag[abs(U0r.imag) < eps] = 0.0
        return U0r, U0l

    def decomp1(self, target):
        """Decompose target ~Ud(x, y, z) with 1 uses of the basis gate ~Ud(a, b, c).
        Result Ur has trace:
        .. math::

            |Tr(Ur.Utarget^dag)| = 4|cos(x-a)cos(y-b)cos(z-c) + j sin(x-a)sin(y-b)sin(z-c)|

        which is optimal for all targets and bases with z==0 or c==0"""
        # FIXME: fix for z!=0 and c!=0 using closest reflection (not always in the Weyl chamber)
        U0l = target.K1l.dot(self.basis.K1l.T.conj())
        U0r = target.K1r.dot(self.basis.K1r.T.conj())
        U1l = self.basis.K2l.T.conj().dot(target.K2l)
        U1r = self.basis.K2r.T.conj().dot(target.K2r)

        return U1r, U1l, U0r, U0l

    def decomp2_supercontrolled(self, target):
        """Decompose target ~Ud(x, y, z) with 2 uses of the basis gate.

        For supercontrolled basis ~Ud(pi/4, b, 0), all b, result Ur has trace
        .. math::

            |Tr(Ur.Utarget^dag)| = 4cos(z)

        which is the optimal approximation for basis of CNOT-class ``~Ud(pi/4, 0, 0)``
        or DCNOT-class ``~Ud(pi/4, pi/4, 0)`` and any target.
        May be sub-optimal for b!=0 (e.g. there exists exact decomposition for any target using B
        ``B~Ud(pi/4, pi/8, 0)``, but not this decomposition.)
        This is an exact decomposition for supercontrolled basis and target ``~Ud(x, y, 0)``.
        No guarantees for non-supercontrolled basis.
        """

        U0l = target.K1l.dot(self.q0l)
        U0r = target.K1r.dot(self.q0r)
        U1l = self.q1la.dot(rz_array(-2 * target.a)).dot(self.q1lb)
        U1r = self.q1ra.dot(rz_array(2 * target.b)).dot(self.q1rb)
        U2l = self.q2l.dot(target.K2l)
        U2r = self.q2r.dot(target.K2r)

        return U2r, U2l, U1r, U1l, U0r, U0l

    def decomp3_supercontrolled(self, target):
        """Decompose target with 3 uses of the basis.
        This is an exact decomposition for supercontrolled basis ~Ud(pi/4, b, 0), all b,
        and any target. No guarantees for non-supercontrolled basis."""

        U0l = target.K1l.dot(self.u0l)
        U0r = target.K1r.dot(self.u0r)
        U1l = self.u1l
        U1r = self.u1ra.dot(rz_array(-2 * target.c)).dot(self.u1rb)
        U2l = self.u2la.dot(rz_array(-2 * target.a)).dot(self.u2lb)
        U2r = self.u2ra.dot(rz_array(2 * target.b)).dot(self.u2rb)
        U3l = self.u3l.dot(target.K2l)
        U3r = self.u3r.dot(target.K2r)

        return U3r, U3l, U2r, U2l, U1r, U1l, U0r, U0l

    def __call__(self, target, basis_fidelity=None):
        """Decompose a two-qubit unitary over fixed basis + SU(2) using the best approximation given
        that each basis application has a finite fidelity.
        """
        basis_fidelity = basis_fidelity or self.basis_fidelity
        if hasattr(target, 'to_operator'):
            # If input is a BaseOperator subclass this attempts to convert
            # the object to an Operator so that we can extract the underlying
            # numpy matrix from `Operator.data`.
            target = target.to_operator().data
        if hasattr(target, 'to_matrix'):
            # If input is Gate subclass or some other class object that has
            # a to_matrix method this will call that method.
            target = target.to_matrix()
        # Convert to numpy array incase not already an array
        target = np.asarray(target, dtype=complex)
        # Check input is a 2-qubit unitary
        if target.shape != (4, 4):
            raise QiskitError(
                "TwoQubitBasisDecomposer: expected 4x4 matrix for target")
        if not is_unitary_matrix(target):
            raise QiskitError(
                "TwoQubitBasisDecomposer: target matrix is not unitary.")

        target_decomposed = TwoQubitWeylDecomposition(target)
        traces = self.traces(target_decomposed)
        expected_fidelities = [
            trace_to_fid(traces[i]) * basis_fidelity**i for i in range(4)
        ]

        best_nbasis = np.argmax(expected_fidelities)
        decomposition = self.decomposition_fns[best_nbasis](target_decomposed)
        decomposition_euler = [
            self._decomposer1q._decompose(x) for x in decomposition
        ]

        q = QuantumRegister(2)
        return_circuit = QuantumCircuit(q)
        return_circuit.global_phase = target_decomposed.global_phase
        return_circuit.global_phase -= best_nbasis * self.basis.global_phase
        if best_nbasis == 2:
            return_circuit.global_phase += np.pi
        for i in range(best_nbasis):
            return_circuit.compose(decomposition_euler[2 * i], [q[0]],
                                   inplace=True)
            return_circuit.compose(decomposition_euler[2 * i + 1], [q[1]],
                                   inplace=True)
            return_circuit.append(self.gate, [q[0], q[1]])
        return_circuit.compose(decomposition_euler[2 * best_nbasis], [q[0]],
                               inplace=True)
        return_circuit.compose(decomposition_euler[2 * best_nbasis + 1],
                               [q[1]],
                               inplace=True)

        return return_circuit

    def num_basis_gates(self, unitary):
        """ Computes the number of basis gates needed in
        a decomposition of input unitary
        """
        if hasattr(unitary, 'to_operator'):
            unitary = unitary.to_operator().data
        if hasattr(unitary, 'to_matrix'):
            unitary = unitary.to_matrix()
        unitary = np.asarray(unitary, dtype=complex)
        a, b, c = weyl_coordinates(unitary)[:]
        traces = [
            4 * (np.cos(a) * np.cos(b) * np.cos(c) +
                 1j * np.sin(a) * np.sin(b) * np.sin(c)), 4 *
            (np.cos(np.pi / 4 - a) * np.cos(self.basis.b - b) * np.cos(c) + 1j
             * np.sin(np.pi / 4 - a) * np.sin(self.basis.b - b) * np.sin(c)),
            4 * np.cos(c), 4
        ]
        return np.argmax([
            trace_to_fid(traces[i]) * self.basis_fidelity**i for i in range(4)
        ])