def testShouldRecordAndStopRecord(self):
     with forwardprop.ForwardGradientAccumulator() as acc:
         c = constant_op.constant(1.)
         c_tangent = constant_op.constant(2.)
         acc.watch(c, c_tangent)
         with backprop.GradientTape() as tape:
             self.assertFalse(tape_lib.should_record_backprop([c]))
             self.assertEqual(
                 1,
                 pywrap_tensorflow.TFE_Py_TapeSetPossibleGradientTypes([c]))
             tape.watch(c)
             self.assertEqual(
                 2,
                 pywrap_tensorflow.TFE_Py_TapeSetPossibleGradientTypes([c]))
             self.assertTrue(tape_lib.should_record_backprop([c]))
             with tape_lib.stop_recording():
                 self.assertEqual(
                     0,
                     pywrap_tensorflow.TFE_Py_TapeSetPossibleGradientTypes(
                         [c]))
                 self.assertFalse(tape_lib.should_record_backprop([c]))
                 d = c * 2.
             self.assertEqual(
                 2,
                 pywrap_tensorflow.TFE_Py_TapeSetPossibleGradientTypes([c]))
             self.assertTrue(tape_lib.should_record_backprop([c]))
             self.assertFalse(tape_lib.should_record_backprop([d]))
             self.assertIsNone(acc.jvp(d))
         self.assertIsNone(tape.gradient(d, c))
Exemple #2
0
 def testForwardOverBackwardMemoryEfficiency(self, forward_prop_first):
   # Watching depends depends on nesting, not creation order
   c = constant_op.constant(1.)
   if forward_prop_first:
     forward_accumulator = forwardprop.ForwardAccumulator(c, .1)
     gradient_tape = backprop.GradientTape()
   else:
     gradient_tape = backprop.GradientTape()
     forward_accumulator = forwardprop.ForwardAccumulator(c, .1)
   try:
     gc.disable()
     with gradient_tape as tape:
       # Adding and removing the tape multiple times in different nesting
       # patterns does not affect watch ordering.
       pass
     with forward_accumulator as acc:
       with gradient_tape as tape:
         tape.watch(c)
         d = math_ops.cos(c)
         self.assertFalse(tape_lib.should_record_backprop((acc.jvp(d),)))
         e = math_ops.cos(acc.jvp(d))
         math_ops.cos(e)
         weak_e = weakref.ref(e)
         del e
         self.assertIsNone(weak_e())
       self.assertIsNone(tape.gradient(acc.jvp(d), c))
   finally:
     gc.enable()
Exemple #3
0
 def testBackwardOverForward(self, forward_prop_first):
   c = constant_op.constant(1.)
   # Watching depends depends on nesting, not creation order
   if forward_prop_first:
     forward_accumulator = forwardprop.ForwardAccumulator(c, .1)
     gradient_tape = backprop.GradientTape()
   else:
     gradient_tape = backprop.GradientTape()
     forward_accumulator = forwardprop.ForwardAccumulator(c, .1)
   with gradient_tape as tape:
     with forward_accumulator as acc:
       tape.watch(c)
       d = math_ops.cos(c)
       self.assertTrue(tape_lib.should_record_backprop((acc.jvp(d),)))
     self.assertAllClose(-.1 * math_ops.cos(1.),
                         tape.gradient(acc.jvp(d), c))
Exemple #4
0
 def testBatchBackwardOverForward(self, forward_prop_first):
   x = constant_op.constant(1.)
   tangents = random_ops.random_normal(shape=[10], seed=1)
   expected = [-t * math_ops.cos(1.) for t in tangents]
   if forward_prop_first:
     batch_acc = forwardprop.ForwardAccumulator._batch_accumulator(x, tangents)
     gradient_tape = backprop.GradientTape(persistent=True)
   else:
     gradient_tape = backprop.GradientTape(persistent=True)
     batch_acc = forwardprop.ForwardAccumulator._batch_accumulator(x, tangents)
   with gradient_tape as tape:
     with batch_acc as acc:
       tape.watch(x)
       y = math_ops.cos(x)
       self.assertTrue(tape_lib.should_record_backprop((acc.jvp(y),)))
       jvps = acc.jvp(y)
     d2y_dx2 = [tape.gradient(dy_dx, x) for dy_dx in jvps]
   self.assertAllClose(expected, d2y_dx2)
Exemple #5
0
    def _update_trainable_params(self):
        params = self.get_parameters(trainable_only=False)

        trainable_params = set()

        for idx, p in enumerate(params):
            # Determine which input tensors/Variables are being recorded for backpropagation.
            # The function should_record_backprop, documented here:
            # https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/eager/tape.py#L167
            # accepts lists of *Tensors* (not Variables), returning True if all are being watched by one or more
            # existing gradient tapes, False if not.

            if isinstance(p, (tf.Variable, tf.Tensor)) and should_record_backprop(
                # we need to convert any Variable objects to Tensors here, otherwise
                # should_record_backprop will raise an error
                [tf.convert_to_tensor(p)]
            ):
                trainable_params.add(idx)

        self.trainable_params = trainable_params
Exemple #6
0
def requires_grad(tensor):
    """Returns True if the tensor is considered trainable.

    .. warning::

        The implemetation depends on the contained tensor type, and
        may be context dependent.

        For example, Torch tensors and PennyLane tensors track trainability
        as a property of the tensor itself. TensorFlow, on the other hand,
        only tracks trainability if being watched by a gradient tape.

    Args:
        tensor (tensor_like): input tensor

    **Example**

    Calling this function on a PennyLane NumPy array:

    >>> x = np.array([1., 5.], requires_grad=True)
    >>> requires_grad(x)
    True
    >>> x.requires_grad = False
    >>> requires_grad(x)
    False

    PyTorch has similar behaviour.

    With TensorFlow, the output is dependent on whether the tensor
    is currently being watched by a gradient tape:

    >>> x = tf.Variable([0.6, 0.1])
    >>> requires_grad(x)
    False
    >>> with tf.GradientTape() as tape:
    ...     print(requires_grad(x))
    True

    While TensorFlow constants are by default not trainable, they can be
    manually watched by the gradient tape:

    >>> x = tf.constant([0.6, 0.1])
    >>> with tf.GradientTape() as tape:
    ...     print(requires_grad(x))
    False
    >>> with tf.GradientTape() as tape:
    ...     tape.watch([x])
    ...     print(requires_grad(x))
    True
    """
    interface = get_interface(tensor)

    if interface == "tensorflow":
        import tensorflow as tf

        try:
            from tensorflow.python.eager.tape import should_record_backprop
        except ImportError:  # pragma: no cover
            from tensorflow.python.eager.tape import should_record as should_record_backprop

        return should_record_backprop([tf.convert_to_tensor(tensor)])

    if interface in ("torch", "autograd"):
        return tensor.requires_grad

    if interface == "numpy":
        return False

    if interface == "jax":
        return True

    raise ValueError(f"Argument {tensor} is an unknown object")
Exemple #7
0
 def requires_grad(self):
     return should_record_backprop([self.astensor(self.data)])
Exemple #8
0
    def _TFQNode(*input_, **input_kwargs):
        # Determine which input tensors/Variables are being recorded for backpropagation.
        # The function should_record_backprop, documented here:
        # https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/eager/tape.py#L163
        # accepts lists of *tensors* (not Variables), returning True if all are being watched by one or more
        # existing gradient tape, False if not.
        trainable_args = {
            idx
            for idx, i in enumerate(input_)
            if isinstance(i, (Variable, tf.Tensor))
            and should_record_backprop([tf.convert_to_tensor(i)])
        }

        # detach all input Tensors, convert to NumPy array
        args = [i.numpy() if isinstance(i, (Variable, tf.Tensor)) else i for i in input_]
        kwargs = {
            k: v.numpy() if isinstance(v, (Variable, tf.Tensor)) else v
            for k, v in input_kwargs.items()
        }

        # if NumPy array is scalar, convert to a Python float
        args = [i.tolist() if (isinstance(i, np.ndarray) and not i.shape) else i for i in args]
        kwargs = {
            k: v.tolist() if (isinstance(v, np.ndarray) and not v.shape) else v
            for k, v in kwargs.items()
        }

        # evaluate the QNode
        qnode.set_trainable_args(trainable_args)
        res = qnode(*args, **kwargs)

        if not isinstance(res, np.ndarray):
            # scalar result, cast to NumPy scalar
            res = np.array(res)

        def grad(grad_output, **tfkwargs):
            """Returns the vector-Jacobian product"""
            # evaluate the Jacobian matrix of the QNode
            variables = tfkwargs.get("variables", None)
            qnode.set_trainable_args(trainable_args)
            jacobian = qnode.jacobian(args, kwargs)
            jacobian = tf.constant(jacobian, dtype=dtype)

            # Reshape gradient output array as a 2D row-vector.
            grad_output_row = tf.transpose(tf.reshape(grad_output, [-1, 1]))

            # Calculate the vector-Jacobian matrix product, and flatten the output.
            grad_input = tf.matmul(grad_output_row, jacobian)
            grad_input = tf.reshape(grad_input, [-1])

            grad_input_unflattened = unflatten_tf(grad_input, input_)[0]

            for idx in set(range(len(args))) - trainable_args:
                # If a particular input argument is non-differentiable,
                # replace the corresponding position in the gradient with None.
                grad_input_unflattened[idx] = None

            if variables is not None:
                return grad_input_unflattened, variables

            return grad_input_unflattened

        return tf.convert_to_tensor(res, dtype=dtype), grad
Exemple #9
0
    def _TFQNode(*input_, **input_kwargs):
        # Determine which input tensors/Variables are being recorded for backpropagation.
        # The function should_record_backprop, documented here:
        # https://github.com/tensorflow/tensorflow/tree/master/tensorflow/python/eager/tape.py#L163
        # accepts lists of *tensors* (not Variables), returning True if all are being watched by one or more
        # existing gradient tape, False if not.
        requires_grad = [
            should_record_backprop([tf.convert_to_tensor(i)])
            if isinstance(i, (Variable, tf.Tensor))
            else False
            for i in input_
        ]

        # detach all input Tensors, convert to NumPy array
        args = [i.numpy() if isinstance(i, (Variable, tf.Tensor)) else i for i in input_]
        kwargs = {
            k: v.numpy() if isinstance(v, (Variable, tf.Tensor)) else v
            for k, v in input_kwargs.items()
        }

        # if NumPy array is scalar, convert to a Python float
        args = [i.tolist() if (isinstance(i, np.ndarray) and not i.shape) else i for i in args]
        kwargs = {
            k: v.tolist() if (isinstance(v, np.ndarray) and not v.shape) else v
            for k, v in kwargs.items()
        }

        # evaluate the QNode
        res = qnode(*args, **kwargs)

        if not isinstance(res, np.ndarray):
            # scalar result, cast to NumPy scalar
            res = np.array(res)

        def grad(grad_output, **tfkwargs):
            """Returns the vector-Jacobian product"""
            diff_indices = None
            non_diff_indices = set()

            # determine the QNode variables which should be differentiated
            for differentiable, arg_variable in zip(requires_grad, qnode.arg_vars):
                if not differentiable:
                    indices = [i.idx for i in _flatten(arg_variable)]
                    non_diff_indices.update(indices)

            if non_diff_indices:
                diff_indices = set(range(qnode.num_variables)) - non_diff_indices

            # evaluate the Jacobian matrix of the QNode
            variables = tfkwargs.get("variables", None)
            jacobian = qnode.jacobian(args, kwargs, wrt=diff_indices)
            jacobian = tf.constant(jacobian, dtype=dtype)

            # Reshape gradient output array as a 2D row-vector.
            grad_output_row = tf.transpose(tf.reshape(grad_output, [-1, 1]))

            # Calculate the vector-Jacobian matrix product, and flatten the output.
            grad_input = tf.matmul(grad_output_row, jacobian)
            grad_input = tf.reshape(grad_input, [-1])

            if non_diff_indices:
                # TensorFlow requires we return a gradient of size (num_variables,)
                res = np.zeros([qnode.num_variables])
                indices = np.fromiter(diff_indices, dtype=np.int64)
                res[indices] = grad_input
                grad_input = tf.constant(res, dtype=dtype)

            grad_input_unflattened = unflatten_tf(grad_input, input_)[0]

            if variables is not None:
                return grad_input_unflattened, variables

            return grad_input_unflattened

        return tf.convert_to_tensor(res, dtype=dtype), grad