def gradient(self, target, sources): """Computes the gradient using information traced by the tape. Args: target: the tensor to be differentiated. sources: a list of Tensors or Variables, the target will be differentiated with respect to the sources. Returns: a list of Tensors (or IndexedSlices, or None), one for each element in `sources`. Raises: RuntimeError: if called inside the context of the tape, or if called more than once. """ if self._tape is None: raise RuntimeError("GradientTape.gradient can only be called once " "on non-persistent tapes, and " "only when the context manager has exited.") sources = [x.handle if isinstance(x, resource_variable_ops.ResourceVariable) else x for x in sources] grad = imperative_grad.imperative_grad( _default_vspace, self._tape, [target], sources) if not self._persistent: self._tape = None return grad
def gradient(self, target, sources, output_gradients=None): """Computes the gradient using operations recorded in context of this tape. Args: target: Tensor (or list of tensors) to be differentiated. sources: a list or nested structure of Tensors or Variables. `target` will be differentiated against elements in `sources`. output_gradients: a list of gradients, one for each element of target. Defaults to None. Returns: a list or nested structure of Tensors (or IndexedSlices, or None), one for each element in `sources`. Returned structure is the same as the structure of `sources`. Raises: RuntimeError: if called inside the context of the tape, or if called more than once on a non-persistent tape. """ if self._tape is None: raise RuntimeError( "GradientTape.gradient can only be called once on " "non-persistent tapes.") if self._recording: if not self._persistent: self._pop_tape() else: logging.log_first_n( logging.WARN, "Calling GradientTape.gradient on a persistent " "tape inside it's context is significantly less " "efficient than calling it outside the context (it " "causes the gradient ops to be recorded on the " "tape, leading to increased CPU and memory usage). " "Only call GradientTape.gradient inside the " "context if you actually want to trace the " "gradient in order to compute higher order " "derrivatives.", 1) flat_sources = nest.flatten(sources) flat_sources = [_handle_or_self(x) for x in flat_sources] if output_gradients is not None: output_gradients = [ None if x is None else ops.convert_to_tensor(x) for x in nest.flatten(output_gradients) ] flat_grad = imperative_grad.imperative_grad( _default_vspace, self._tape, nest.flatten(target), flat_sources, output_gradients=output_gradients) if not self._persistent: self._tape = None grad = nest.pack_sequence_as(sources, flat_grad) return grad
def grad_fn(*args, **kwds): """Computes the gradient of the wrapped function.""" this_tape = tape.push_new_tape() try: end_node = f(*args, **kwds) if end_node is None: raise ValueError( "Cannot differentiate a function that returns None; " "did you forget to return a value from {}?".format( f.__name__)) finally: tape.pop_tape(this_tape) # Note: variables are returned in construction order. This ensures unique # order across executions. variables = this_tape.watched_variables() if not variables: raise ValueError("No trainable variables were accessed while the " "function was being computed.") sources = [v.handle for v in variables] for s in sources: if getattr(s, "is_packed", False): raise ValueError( "GradientTape.gradient is not supported on packed EagerTensors yet." ) grad = imperative_grad.imperative_grad(this_tape, nest.flatten(end_node), sources) return end_node, list(zip(grad, variables))
def grad_fn(*args): """Computes the gradient of the wrapped function.""" this_tape = tape.push_new_tape() try: end_node = f(*args) if end_node is None: raise ValueError("Cannot differentiate a function that returns None; " "did you forget to return a value from {}?".format( f.__name__)) finally: tape.pop_tape(this_tape) # Sorting variables by id, which is monotonically increasing in construction # order. This ensures unique order across executions. variables = list(sorted(this_tape.watched_variables(), key=lambda v: v.handle._id)) # pylint: disable=protected-access sources = [x.handle for x in variables] if not sources: raise ValueError("No trainable variables were accessed while the " "function was being computed.") grad = imperative_grad.imperative_grad(_default_vspace, this_tape, nest.flatten(end_node), sources) return end_node, list(zip(grad, variables))
def gradient(self, target, sources): """Computes the gradient using information traced by the tape. Args: target: the tensor to be differentiated. sources: a list of Tensors or Variables, the target will be differentiated with respect to the sources. Returns: a list of Tensors (or IndexedSlices, or None), one for each element in `sources`. Raises: RuntimeError: if called inside the context of the tape, or if called more than once. """ if self._tape is None: raise RuntimeError("GradientTape.gradient can only be called once " "on non-persistent tapes, and " "only when the context manager has exited.") sources = [ x.handle if isinstance(x, resource_variable_ops.ResourceVariable) else x for x in sources ] grad = imperative_grad.imperative_grad(_default_vspace, self._tape, [target], sources) if not self._persistent: self._tape = None return grad
def grad_fn(*args): """Computes the gradient of the wrapped function.""" tape.push_new_tape() try: end_node = f(*args) if end_node is None: raise ValueError( "Cannot differentiate a function that returns None; " "did you forget to return a value from {}?".format( f.__name__)) finally: popped_tape = tape.pop_tape() # Sorting variables by id, which is monotonically increasing in construction # order. This ensures unique order across executions. variables = list( sorted(popped_tape.watched_variables(), key=lambda v: v.handle._id)) # pylint: disable=protected-access sources = [x.handle for x in variables] if not sources: raise ValueError("No trainable variables were accessed while the " "function was being computed.") grad = imperative_grad.imperative_grad(_default_vspace, popped_tape, nest.flatten(end_node), sources) return end_node, list(zip(grad, variables))
def vjp(dy=None): if dy is not None: dy = [ops.convert_to_tensor(x) for x in nest.flatten(dy)] return imperative_grad.imperative_grad(this_tape, nest.flatten(result), sources, output_gradients=dy)
def grad_fn(*args): """Computes the gradient of the wrapped function.""" tape.push_new_tape() end_node = f(*args) variables = tape.top_tape_watched_variables() sources = [x.handle for x in variables] grad = imperative_grad.imperative_grad(_default_vspace, nest.flatten(end_node), sources) return end_node, list(zip(grad, variables))
def grad_fn(*args): """Computes the gradient of the wrapped function.""" tape.push_new_tape() end_node = f(*args) variables = tape.top_tape_watched_variables() sources = [x.handle for x in variables] grad = imperative_grad.imperative_grad(_default_vspace, nest.flatten(end_node), sources) return end_node, list(zip(grad, variables))
def gradient(self, target, sources, output_gradients=None): """Computes the gradient using operations recorded in context of this tape. Args: target: Tensor (or list of tensors) to be differentiated. sources: a list or nested structure of Tensors or Variables. `target` will be differentiated against elements in `sources`. output_gradients: a list of gradients, one for each element of target. Defaults to None. Returns: a list or nested structure of Tensors (or IndexedSlices, or None), one for each element in `sources`. Returned structure is the same as the structure of `sources`. Raises: RuntimeError: if called inside the context of the tape, or if called more than once on a non-persistent tape. """ if self._tape is None: raise RuntimeError("GradientTape.gradient can only be called once on " "non-persistent tapes.") if self._recording: if not self._persistent: self._pop_tape() else: logging.log_first_n(logging.WARN, "Calling GradientTape.gradient on a persistent " "tape inside it's context is significantly less " "efficient than calling it outside the context (it " "causes the gradient ops to be recorded on the " "tape, leading to increased CPU and memory usage). " "Only call GradientTape.gradient inside the " "context if you actually want to trace the " "gradient in order to compute higher order " "derrivatives.", 1) flat_sources = nest.flatten(sources) flat_sources = [_handle_or_self(x) for x in flat_sources] if output_gradients is not None: output_gradients = [None if x is None else ops.convert_to_tensor(x) for x in nest.flatten(output_gradients)] flat_grad = imperative_grad.imperative_grad( self._tape, nest.flatten(target), flat_sources, output_gradients=output_gradients) if not self._persistent: self._tape = None grad = nest.pack_sequence_as(sources, flat_grad) return grad
def grad_fn(*args): """Computes the gradient of the wrapped function.""" tape.push_new_tape() end_node = f(*args) variables = tape.top_tape_watched_variables() sources = [x.handle for x in variables] if not sources: raise ValueError("no trainable variables were accessed while the " "function was being computed.") grad = imperative_grad.imperative_grad(_default_vspace, tape.pop_tape(), nest.flatten(end_node), sources) return end_node, list(zip(grad, variables))
def grad_fn(*args): """Computes the gradient of the wrapped function.""" tape.push_new_tape() end_node = f(*args) variables = tape.top_tape_watched_variables() sources = [x.handle for x in variables] if not sources: raise ValueError("no trainable variables were accessed while the " "function was being computed.") grad = imperative_grad.imperative_grad(_default_vspace, tape.pop_tape(), nest.flatten(end_node), sources) return end_node, list(zip(grad, variables))
def gradient(self, target, sources, output_gradients=None): """Computes the gradient using operations recorded in context of this tape. Args: target: Tensor (or list of tensors) to be differentiated. sources: a list or nested structure of Tensors or Variables. `target` will be differentiated against elements in `sources`. output_gradients: a list of gradients, one for each element of target. Defaults to None. Returns: a list or nested structure of Tensors (or IndexedSlices, or None), one for each element in `sources`. Returned structure is the same as the structure of `sources`. Raises: RuntimeError: if called inside the context of the tape, or if called more than once on a non-persistent tape. """ if self._tape is None: raise RuntimeError("GradientTape.gradient can only be called once " "on non-persistent tapes, and " "only when the context manager has exited.") flat_sources = nest.flatten(sources) flat_sources = [_handle_or_self(x) for x in flat_sources] if output_gradients is not None: output_gradients = [ None if x is None else ops.convert_to_tensor(x) for x in nest.flatten(output_gradients) ] flat_grad = imperative_grad.imperative_grad( _default_vspace, self._tape, nest.flatten(target), flat_sources, output_gradients=output_gradients) if not self._persistent: self._tape = None grad = nest.pack_sequence_as(sources, flat_grad) return grad
def decorated(*args, **kwds): """Computes the value and gradient of the decorated function.""" dy = kwds.pop("dy", None) if dy is not None: dy = ops.convert_to_tensor(dy) assert not kwds, "The gradient function can't take keyword arguments." tape.push_new_tape() sources = [] args = [ ops.convert_to_tensor(args[i]) if i in parameter_positions else args[i] for i in range(len(args)) ] args = _ensure_unique_tensor_objects(parameter_positions, args) for i in parameter_positions: sources.append(args[i]) tape.watch(args[i]) result = f(*args) return result, imperative_grad.imperative_grad( _default_vspace, tape.pop_tape(), nest.flatten(result), sources, output_gradients=nest.flatten(dy) if dy is not None else None)
def decorated(*args, **kwds): """Computes the value and gradient of the decorated function.""" dy = kwds.pop("dy", None) if dy is not None: dy = ops.convert_to_tensor(dy) assert not kwds, "The gradient function can't take keyword arguments." tape.push_new_tape() sources = [] args = [ ops.convert_to_tensor(args[i]) if i in parameter_positions else args[i] for i in range(len(args)) ] args = _ensure_unique_tensor_objects(parameter_positions, args) for i in parameter_positions: sources.append(args[i]) tape.watch(args[i]) result = f(*args) return result, imperative_grad.imperative_grad( _default_vspace, nest.flatten(result), sources, output_gradients=nest.flatten(dy) if dy is not None else None)
def grad_fn(*args): """Computes the gradient of the wrapped function.""" tape.push_new_tape() try: end_node = f(*args) if end_node is None: raise ValueError( "Cannot differentiate a function that returns None; " "did you forget to return a value from {}?".format( f.__name__)) finally: popped_tape = tape.pop_tape() variables = popped_tape.watched_variables() sources = [x.handle for x in variables] if not sources: raise ValueError("No trainable variables were accessed while the " "function was being computed.") grad = imperative_grad.imperative_grad(_default_vspace, popped_tape, nest.flatten(end_node), sources) return end_node, list(zip(grad, variables))
def grad_fn(*args): """Computes the gradient of the wrapped function.""" tape.push_new_tape() try: end_node = f(*args) if end_node is None: raise ValueError("Cannot differentiate a function that returns None; " "did you forget to return a value from {}?".format( f.__name__)) finally: popped_tape = tape.pop_tape() variables = popped_tape.watched_variables() sources = [x.handle for x in variables] if not sources: raise ValueError("No trainable variables were accessed while the " "function was being computed.") grad = imperative_grad.imperative_grad(_default_vspace, popped_tape, nest.flatten(end_node), sources) return end_node, list(zip(grad, variables))
def gradient(self, target, sources, output_gradients=None): """Computes the gradient using operations recorded in context of this tape. Args: target: Tensor (or list of tensors) to be differentiated. sources: a list or nested structure of Tensors or Variables. `target` will be differentiated against elements in `sources`. output_gradients: a list of gradients, one for each element of target. Defaults to None. Returns: a list or nested structure of Tensors (or IndexedSlices, or None), one for each element in `sources`. Returned structure is the same as the structure of `sources`. Raises: RuntimeError: if called inside the context of the tape, or if called more than once on a non-persistent tape. """ if self._tape is None: raise RuntimeError("GradientTape.gradient can only be called once " "on non-persistent tapes, and " "only when the context manager has exited.") flat_sources = nest.flatten(sources) flat_sources = [_handle_or_self(x) for x in flat_sources] if output_gradients is not None: output_gradients = [None if x is None else ops.convert_to_tensor(x) for x in nest.flatten(output_gradients)] flat_grad = imperative_grad.imperative_grad( _default_vspace, self._tape, nest.flatten(target), flat_sources, output_gradients=output_gradients) if not self._persistent: self._tape = None grad = nest.pack_sequence_as(sources, flat_grad) return grad
def grad_fn(*args, **kwds): """Computes the gradient of the wrapped function.""" this_tape = tape.push_new_tape() try: end_node = f(*args, **kwds) if end_node is None: raise ValueError("Cannot differentiate a function that returns None; " "did you forget to return a value from {}?".format( f.__name__)) finally: tape.pop_tape(this_tape) # Note: variables are returned in construction order. This ensures unique # order across executions. variables = this_tape.watched_variables() if not variables: raise ValueError("No trainable variables were accessed while the " "function was being computed.") sources = [v.handle for v in variables] grad = imperative_grad.imperative_grad(this_tape, nest.flatten(end_node), sources) return end_node, list(zip(grad, variables))
def gradient(self, target, sources, output_gradients=None, unconnected_gradients=UnconnectedGradients.NONE): """Computes the gradient using operations recorded in context of this tape. Args: target: Tensor (or list of tensors) to be differentiated. sources: a list or nested structure of Tensors or Variables. `target` will be differentiated against elements in `sources`. output_gradients: a list of gradients, one for each element of target. Defaults to None. unconnected_gradients: a value which can either hold 'none' or 'zero' and alters the value which will be returned if the target and sources are unconnected. The possible values and effects are detailed in 'UnconnectedGradients' and it defaults to 'none'. Returns: a list or nested structure of Tensors (or IndexedSlices, or None), one for each element in `sources`. Returned structure is the same as the structure of `sources`. Raises: RuntimeError: if called inside the context of the tape, or if called more than once on a non-persistent tape. ValueError: if the target is a variable or if unconnected gradients is called with an unknown value. """ if self._tape is None: raise RuntimeError( "GradientTape.gradient can only be called once on " "non-persistent tapes.") if self._recording: if not self._persistent: self._pop_tape() else: logging.log_first_n( logging.WARN, "Calling GradientTape.gradient on a persistent " "tape inside its context is significantly less " "efficient than calling it outside the context (it " "causes the gradient ops to be recorded on the " "tape, leading to increased CPU and memory usage). " "Only call GradientTape.gradient inside the " "context if you actually want to trace the " "gradient in order to compute higher order " "derivatives.", 1) flat_targets = [] for t in nest.flatten(target): if resource_variable_ops.is_resource_variable(t): with self: t = ops.convert_to_tensor(t) flat_targets.append(t) flat_sources = nest.flatten(sources) flat_sources = [_handle_or_self(x) for x in flat_sources] if output_gradients is not None: output_gradients = [ None if x is None else ops.convert_to_tensor(x) for x in nest.flatten(output_gradients) ] flat_grad = imperative_grad.imperative_grad( self._tape, flat_targets, flat_sources, output_gradients=output_gradients, unconnected_gradients=unconnected_gradients) if not self._persistent: self._tape = None grad = nest.pack_sequence_as(sources, flat_grad) return grad
def gradient(self, target, sources, output_gradients=None, unconnected_gradients=UnconnectedGradients.NONE): """Computes the gradient using operations recorded in context of this tape. Args: target: Tensor (or list of tensors) to be differentiated. sources: a list or nested structure of Tensors or Variables. `target` will be differentiated against elements in `sources`. output_gradients: a list of gradients, one for each element of target. Defaults to None. unconnected_gradients: a value which can either hold 'none' or 'zero' and alters the value which will be returned if the target and sources are unconnected. The possible values and effects are detailed in 'UnconnectedGradients' and it defaults to 'none'. Returns: a list or nested structure of Tensors (or IndexedSlices, or None), one for each element in `sources`. Returned structure is the same as the structure of `sources`. Raises: RuntimeError: if called inside the context of the tape, or if called more than once on a non-persistent tape. ValueError: if the target is a variable or if unconnected gradients is called with an unknown value. """ if self._tape is None: raise RuntimeError("GradientTape.gradient can only be called once on " "non-persistent tapes.") if self._recording: if not self._persistent: self._pop_tape() else: logging.log_first_n( logging.WARN, "Calling GradientTape.gradient on a persistent " "tape inside its context is significantly less " "efficient than calling it outside the context (it " "causes the gradient ops to be recorded on the " "tape, leading to increased CPU and memory usage). " "Only call GradientTape.gradient inside the " "context if you actually want to trace the " "gradient in order to compute higher order " "derivatives.", 1) flat_targets = [] for t in nest.flatten(target): if not t.dtype.is_floating: logging.vlog( logging.WARN, "The dtype of the target tensor must be " "floating (e.g. tf.float32) when calling GradientTape.gradient, " "got %r", t.dtype) if resource_variable_ops.is_resource_variable(t): with self: t = ops.convert_to_tensor(t) flat_targets.append(t) flat_sources = nest.flatten(sources) flat_sources_raw = flat_sources flat_sources = [_handle_or_self(x) for x in flat_sources] for t in flat_sources_raw: if not t.dtype.is_floating: logging.vlog( logging.WARN, "The dtype of the source tensor must be " "floating (e.g. tf.float32) when calling GradientTape.gradient, " "got %r", t.dtype) if output_gradients is not None: output_gradients = [None if x is None else ops.convert_to_tensor(x) for x in nest.flatten(output_gradients)] flat_grad = imperative_grad.imperative_grad( self._tape, flat_targets, flat_sources, output_gradients=output_gradients, sources_raw=flat_sources_raw, unconnected_gradients=unconnected_gradients) if not self._persistent: self._tape = None grad = nest.pack_sequence_as(sources, flat_grad) return grad
def vjp(dy=None): if dy is not None: dy = [ops.convert_to_tensor(x) for x in nest.flatten(dy)] return imperative_grad.imperative_grad( this_tape, nest.flatten(result), sources, output_gradients=dy)
def vjp(dy=None): return imperative_grad.imperative_grad( _default_vspace, t, nest.flatten(result), sources, output_gradients=nest.flatten(dy) if dy is not None else None)
def gradient(self, target, sources, output_gradients=None, unconnected_gradients=UnconnectedGradients.NONE): """Computes the gradient using operations recorded in context of this tape. Note: Unless you set `persistent=True` a GradientTape can only be used to compute one set of gradients (or jacobians). Args: target: a list or nested structure of Tensors or Variables to be differentiated. sources: a list or nested structure of Tensors or Variables. `target` will be differentiated against elements in `sources`. output_gradients: a list of gradients, one for each element of target. Defaults to None. unconnected_gradients: a value which can either hold 'none' or 'zero' and alters the value which will be returned if the target and sources are unconnected. The possible values and effects are detailed in 'UnconnectedGradients' and it defaults to 'none'. Returns: a list or nested structure of Tensors (or IndexedSlices, or None), one for each element in `sources`. Returned structure is the same as the structure of `sources`. Raises: RuntimeError: If called on a used, non-persistent tape. RuntimeError: If called inside the context of the tape. ValueError: If the target is a variable or if unconnected gradients is called with an unknown value. """ if self._tape is None: raise RuntimeError("A non-persistent GradientTape can only be used to" "compute one set of gradients (or jacobians)") if self._recording: if not self._persistent: self._pop_tape() else: logging.log_first_n( logging.WARN, "Calling GradientTape.gradient on a persistent " "tape inside its context is significantly less " "efficient than calling it outside the context (it " "causes the gradient ops to be recorded on the " "tape, leading to increased CPU and memory usage). " "Only call GradientTape.gradient inside the " "context if you actually want to trace the " "gradient in order to compute higher order " "derivatives.", 1) num_ndarrays = 0 flat_targets = [] for t in nest.flatten(target): if not backprop_util.IsTrainable(t): logging.vlog( logging.WARN, "The dtype of the target tensor must be " "floating (e.g. tf.float32) when calling GradientTape.gradient, " "got %r", t.dtype) if resource_variable_ops.is_resource_variable(t): with self: t = ops.convert_to_tensor(t) elif isinstance(t, np_arrays.ndarray): t = t.data num_ndarrays += 1 flat_targets.append(t) # Only rewrap if all targets are ndarray. If not, prefer tensors. rewrap_as_ndarray = num_ndarrays == len(flat_targets) flat_sources = nest.flatten(sources) flat_sources_raw = flat_sources flat_sources = [_handle_or_self(x) for x in flat_sources] for t in flat_sources_raw: if not backprop_util.IsTrainable(t): logging.vlog( logging.WARN, "The dtype of the source tensor must be " "floating (e.g. tf.float32) when calling GradientTape.gradient, " "got %r", t.dtype) if getattr(t, "is_packed", False): raise ValueError( "GradientTape.gradient is not supported on packed EagerTensors yet." ) if output_gradients is not None: output_gradients = [None if x is None else ops.convert_to_tensor(x) for x in nest.flatten(output_gradients)] flat_grad = imperative_grad.imperative_grad( self._tape, flat_targets, flat_sources, output_gradients=output_gradients, sources_raw=flat_sources_raw, unconnected_gradients=unconnected_gradients) if not self._persistent: # Keep track of watched variables before setting tape to None self._watched_variables = self._tape.watched_variables() self._tape = None if rewrap_as_ndarray: def _tensor_to_ndarray(x): if x is not None: return np_arrays.tensor_to_ndarray(x) return None flat_grad = nest.map_structure(_tensor_to_ndarray, flat_grad) grad = nest.pack_sequence_as(sources, flat_grad) return grad
def vjp(dy=None): return imperative_grad.imperative_grad( _default_vspace, t, nest.flatten(result), sources, output_gradients=nest.flatten(dy) if dy is not None else None)