def compute_vjp(dy, jac): """Convenience function to compute the vector-Jacobian product for a given vector of gradient outputs and a Jacobian. Args: dy (tensor_like): vector of gradient outputs jac (tensor_like): Jacobian matrix. For an n-dimensional ``dy`` vector, the first n-dimensions of ``jac`` should match the shape of ``dy``. Returns: tensor_like: the vector-Jacobian product """ if jac is None: return None dy_row = math.reshape(dy, [-1]) if not isinstance(dy_row, np.ndarray): jac = math.convert_like(jac, dy_row) jac = math.reshape(jac, [dy_row.shape[0], -1]) try: if math.allclose(dy, 0): # If the dy vector is zero, then the # corresponding element of the VJP will be zero. num_params = jac.shape[1] return math.convert_like(np.zeros([num_params]), dy) except (AttributeError, TypeError): pass return math.tensordot(jac, dy_row, [[0], [0]])
def _su2su2_to_tensor_products(U): r"""Given a matrix :math:`U = A \otimes B` in SU(2) x SU(2), extract the two SU(2) operations A and B. This process has been described in detail in the Appendix of Coffey & Deiotte https://link.springer.com/article/10.1007/s11128-009-0156-3 """ # First, write A = [[a1, a2], [-a2*, a1*]], which we can do for any SU(2) element. # Then, A \otimes B = [[a1 B, a2 B], [-a2*B, a1*B]] = [[C1, C2], [C3, C4]] # where the Ci are 2x2 matrices. C1 = U[0:2, 0:2] C2 = U[0:2, 2:4] C3 = U[2:4, 0:2] C4 = U[2:4, 2:4] # From the definition of A \otimes B, C1 C4^\dag = a1^2 I, so we can extract a1 C14 = math.dot(C1, math.conj(math.T(C4))) a1 = math.sqrt(math.cast_like(C14[0, 0], 1j)) # Similarly, -C2 C3^\dag = a2^2 I, so we can extract a2 C23 = math.dot(C2, math.conj(math.T(C3))) a2 = math.sqrt(-math.cast_like(C23[0, 0], 1j)) # This gets us a1, a2 up to a sign. To resolve the sign, ensure that # C1 C2^dag = a1 a2* I C12 = math.dot(C1, math.conj(math.T(C2))) if not math.allclose(a1 * math.conj(a2), C12[0, 0]): a2 *= -1 # Construct A A = math.stack( [math.stack([a1, a2]), math.stack([-math.conj(a2), math.conj(a1)])]) # Next, extract B. Can do from any of the C, just need to be careful in # case one of the elements of A is 0. if not math.allclose(A[0, 0], 0.0, atol=1e-6): B = C1 / math.cast_like(A[0, 0], 1j) else: B = C2 / math.cast_like(A[0, 1], 1j) return math.convert_like(A, U), math.convert_like(B, U)
def test_convert_tensor_like(self, t1, t2): """Test that converting t1 like t2 results in t1 being cast to the same tensor type as t2""" res = fn.convert_like(t1, t2) # if tensorflow or pytorch, extract view of underlying data if hasattr(res, "numpy"): res = res.numpy() if hasattr(t2, "numpy"): t2 = t2.numpy() assert fn.allequal(res, t1) assert isinstance(res, np.ndarray if isinstance(t2, (list, tuple)) else t2.__class__)
def _decomposition_3_cnots(U, wires): r"""The most general form of this decomposition is U = (A \otimes B) V (C \otimes D), where V is as depicted in the circuit below: -╭U- = -C--╭X--RZ(d)--╭C---------╭X--A- -╰U- = -D--╰C--RY(b)--╰X--RY(a)--╰C--B- """ # First we add a SWAP as per v1 of arXiv:0308033, which helps with some # rearranging of gates in the decomposition (it will cancel out the fact # that we need to add a SWAP to fix the determinant in another part later). swap_U = np.exp(1j * np.pi / 4) * math.dot(math.cast_like(SWAP, U), U) # Choose the rotation angles of RZ, RY in the two-qubit decomposition. # They are chosen as per Proposition V.1 in quant-ph/0308033 and are based # on the phases of the eigenvalues of :math:`E^\dagger \gamma(U) E`, where # \gamma(U) = (E^\dag U E) (E^\dag U E)^T. # The rotation angles can be computed as follows (any three eigenvalues can be used) u = math.dot(Edag, math.dot(swap_U, E)) gammaU = math.dot(u, math.T(u)) evs, _ = math.linalg.eig(gammaU) # We will sort the angles so that results are consistent across interfaces. angles = math.sort([math.angle(ev) for ev in evs]) x, y, z = angles[0], angles[1], angles[2] # Compute functions of the eigenvalues; there are different options in v1 # vs. v3 of the paper, I'm not entirely sure why. This is the version from v3. alpha = (x + y) / 2 beta = (x + z) / 2 delta = (z + y) / 2 # This is the interior portion of the decomposition circuit interior_decomp = [ qml.CNOT(wires=[wires[1], wires[0]]), qml.RZ(delta, wires=wires[0]), qml.RY(beta, wires=wires[1]), qml.CNOT(wires=wires), qml.RY(alpha, wires=wires[1]), qml.CNOT(wires=[wires[1], wires[0]]), ] # We need the matrix representation of this interior part, V, in order to # decompose U = (A \otimes B) V (C \otimes D) # # Looking at the decomposition above, V has determinant -1 (because there # are 3 CNOTs, each with determinant -1). The relationship between U and V # requires that both are in SU(4), so we add a SWAP after to V. We will see # how this gets fixed later. # # -╭V- = -╭X--RZ(d)--╭C---------╭X--╭SWAP- # -╰V- = -╰C--RY(b)--╰X--RY(a)--╰C--╰SWAP- RZd = qml.RZ(math.cast_like(delta, 1j), wires=wires[0]).matrix RYb = qml.RY(beta, wires=wires[0]).matrix RYa = qml.RY(alpha, wires=wires[0]).matrix V_mats = [ CNOT10, math.kron(RZd, RYb), CNOT01, math.kron(math.eye(2), RYa), CNOT10, SWAP ] V = math.convert_like(math.eye(4), U) for mat in V_mats: V = math.dot(math.cast_like(mat, U), V) # Now we need to find the four SU(2) operations A, B, C, D A, B, C, D = _extract_su2su2_prefactors(swap_U, V) # At this point, we have the following: # -╭U-╭SWAP- = --C--╭X-RZ(d)-╭C-------╭X-╭SWAP--A # -╰U-╰SWAP- = --D--╰C-RZ(b)-╰X-RY(a)-╰C-╰SWAP--B # # Using the relationship that SWAP(A \otimes B) SWAP = B \otimes A, # -╭U-╭SWAP- = --C--╭X-RZ(d)-╭C-------╭X--B--╭SWAP- # -╰U-╰SWAP- = --D--╰C-RZ(b)-╰X-RY(a)-╰C--A--╰SWAP- # # Now the SWAPs cancel, giving us the desired decomposition # (up to a global phase). # -╭U- = --C--╭X-RZ(d)-╭C-------╭X--B-- # -╰U- = --D--╰C-RZ(b)-╰X-RY(a)-╰C--A-- A_ops = zyz_decomposition(A, wires[1]) B_ops = zyz_decomposition(B, wires[0]) C_ops = zyz_decomposition(C, wires[0]) D_ops = zyz_decomposition(D, wires[1]) # Return the full decomposition return C_ops + D_ops + interior_decomp + A_ops + B_ops
def test_convert_scalar(self, t_like): """Test that a python scalar is converted to a scalar tensor""" res = fn.convert_like(5, t_like) assert isinstance(res, t_like.__class__) assert res.ndim == 0 assert fn.allequal(res, [5])
def vjp(self, measurements, dy, starting_state=None, use_device_state=False): """Generate the processing function required to compute the vector-Jacobian products of a tape. This function can be used with multiple expectation values or a quantum state. When a quantum state is given, .. code-block:: python vjp_f = dev.vjp([qml.state()], dy) vjp = vjp_f(tape) computes :math:`w = (w_1,\cdots,w_m)` where .. math:: w_k = \\langle v| \\frac{\partial}{\partial \\theta_k} | \psi_{\pmb{\\theta}} \\rangle. Here, :math:`m` is the total number of trainable parameters, :math:`\pmb{\\theta}` is the vector of trainable parameters and :math:`\psi_{\pmb{\\theta}}` is the output quantum state. Args: measurements (list): List of measurement processes for vector-Jacobian product. Now it must be expectation values or a quantum state. dy (tensor_like): Gradient-output vector. Must have shape matching the output shape of the corresponding tape, i.e. number of measrurements if the return type is expectation or :math:`2^N` if the return type is statevector starting_state (tensor_like): post-forward pass state to start execution with. It should be complex-valued. Takes precedence over ``use_device_state``. use_device_state (bool): use current device state to initialize. A forward pass of the same circuit should be the last thing the device has executed. If a ``starting_state`` is provided, that takes precedence. Returns: The processing function required to compute the vector-Jacobian products of a tape. """ if self.shots is not None: warn( "Requested adjoint differentiation to be computed with finite shots." " The derivative is always exact when using the adjoint differentiation method.", UserWarning, ) tape_return_type = self._check_adjdiff_supported_measurements( measurements) if math.allclose(dy, 0) or tape_return_type is None: return lambda tape: math.convert_like( np.zeros(len(tape.trainable_params)), dy) if tape_return_type is Expectation: if len(dy) != len(measurements): raise ValueError( "Number of observables in the tape must be the same as the length of dy in the vjp method" ) if np.iscomplexobj(dy): raise ValueError( "The vjp method only works with a real-valued dy when the tape is returning an expectation value" ) ham = qml.Hamiltonian(dy, [m.obs for m in measurements]) def processing_fn(tape): nonlocal ham num_params = len(tape.trainable_params) if num_params == 0: return np.array([], dtype=self._state.dtype) new_tape = tape.copy() new_tape._measurements = [qml.expval(ham)] return self.adjoint_jacobian(new_tape, starting_state, use_device_state).reshape(-1) return processing_fn if tape_return_type is State: if len(dy) != 2**len(self.wires): raise ValueError( "Size of the provided vector dy must be the same as the size of the statevector" ) if np.isrealobj(dy): warn( "The vjp method only works with complex-valued dy when the tape is returning a statevector. Upcasting dy." ) dy = dy.astype(self.C_DTYPE) def processing_fn(tape): nonlocal dy processed_data = self._process_jacobian_tape( tape, starting_state, use_device_state) return adjoint_diff.statevector_vjp( processed_data["state_vector"], processed_data["ops_serialized"], dy, processed_data["tp_shift"], ) return processing_fn
def vjp(tape, dy, gradient_fn, gradient_kwargs=None): r"""Generate the gradient tapes and processing function required to compute the vector-Jacobian products of a tape. Consider a function :math:`\mathbf{f}(\mathbf{x})`. The Jacobian is given by .. math:: \mathbf{J}_{\mathbf{f}}(\mathbf{x}) = \begin{pmatrix} \frac{\partial f_1}{\partial x_1} &\cdots &\frac{\partial f_1}{\partial x_n}\\ \vdots &\ddots &\vdots\\ \frac{\partial f_m}{\partial x_1} &\cdots &\frac{\partial f_m}{\partial x_n}\\ \end{pmatrix}. During backpropagation, the chain rule is applied. For example, consider the cost function :math:`h = y\circ f: \mathbb{R}^n \rightarrow \mathbb{R}`, where :math:`y: \mathbb{R}^m \rightarrow \mathbb{R}`. The gradient is: .. math:: \nabla h(\mathbf{x}) = \frac{\partial y}{\partial \mathbf{f}} \frac{\partial \mathbf{f}}{\partial \mathbf{x}} = \frac{\partial y}{\partial \mathbf{f}} \mathbf{J}_{\mathbf{f}}(\mathbf{x}). Denote :math:`d\mathbf{y} = \frac{\partial y}{\partial \mathbf{f}}`; we can write this in the form of a matrix multiplication: .. math:: \left[\nabla h(\mathbf{x})\right]_{j} = \sum_{i=0}^m d\mathbf{y}_i ~ \mathbf{J}_{ij}. Thus, we can see that the gradient of the cost function is given by the so-called **vector-Jacobian product**; the product of the row-vector :math:`d\mathbf{y}`, representing the gradient of subsequent components of the cost function, and :math:`\mathbf{J}`, the Jacobian of the current node of interest. Args: tape (.QuantumTape): quantum tape to differentiate dy (tensor_like): Gradient-output vector. Must have shape matching the output shape of the corresponding tape. gradient_fn (callable): the gradient transform to use to differentiate the tape gradient_kwargs (dict): dictionary of keyword arguments to pass when determining the gradients of tapes Returns: tensor_like or None: Vector-Jacobian product. Returns None if the tape has no trainable parameters. **Example** Consider the following Torch-compatible quantum tape: .. code-block:: python import torch from pennylane.interfaces.torch import TorchInterface x = torch.tensor([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], requires_grad=True, dtype=torch.float64) with TorchInterface.apply(qml.tape.JacobianTape()) as tape: qml.RX(x[0, 0], wires=0) qml.RY(x[0, 1], wires=1) qml.RZ(x[0, 2], wires=0) qml.CNOT(wires=[0, 1]) qml.RX(x[1, 0], wires=1) qml.RY(x[1, 1], wires=0) qml.RZ(x[1, 2], wires=1) qml.expval(qml.PauliZ(0)) qml.probs(wires=1) We can use the ``vjp`` function to compute the vector-Jacobian product, given a gradient-output vector ``dy``: >>> dy = torch.tensor([1., 1., 1.], dtype=torch.float64) >>> vjp_tapes, fn = qml.gradients.vjp(tape, dy, qml.gradients.param_shift) Note that ``dy`` has shape ``(3,)``, matching the output dimension of the tape (1 expectation and 2 probability values). Executing the VJP tapes, and applying the processing function: >>> dev = qml.device("default.qubit", wires=2) >>> vjp = fn([t.execute(dev) for t in vjp_tapes]) >>> vjp tensor([-0.6069, -0.0451, 0.0451, -0.0139, -0.2809, 0.2809], dtype=torch.float64, grad_fn=<ViewBackward>) The output VJP is also differentiable with respect to the tape parameters: >>> cost = torch.sum(vjp) >>> cost.backward() >>> x.grad tensor([[-1.1025e+00, -2.0554e-01, -1.4917e-01], [-1.9429e-09, -9.1580e-01, 1.3878e-09]], dtype=torch.float64) """ gradient_kwargs = gradient_kwargs or {} num_params = len(tape.trainable_params) if num_params == 0: # The tape has no trainable parameters; the VJP # is simply none. return [], lambda _: None try: if math.allclose(dy, 0): # If the dy vector is zero, then the # corresponding element of the VJP will be zero, # and we can avoid a quantum computation. return [], lambda _: math.convert_like(np.zeros([num_params]), dy) except (AttributeError, TypeError): pass gradient_tapes, fn = gradient_fn(tape, **gradient_kwargs) def processing_fn(results): # postprocess results to compute the Jacobian jac = fn(results) return compute_vjp(dy, jac) return gradient_tapes, processing_fn