Exemple #1
0
def _record_gradient(op_name, inputs, attrs, results, ctx, name):
    """Records gradients for a TensorFlow operation.

  Args:
    op_name: Name of the TensorFlow operation (see REGISTER_OP in C++ code) to
      execute.
    inputs: A flat list of Tensor object inputs to the operation.
    attrs: A tuple with alternating string attr names and attr values for this
      operation.
    results: The results of the operation (as a flat list).
    ctx: The value of context.context().
    name: Customized name for the operation.

  Returns:
    A list of maybe-wrapped results. Either Tensors or TensorNodes.

  Raises:
    An exception on error.
  """
    if not tape.could_possibly_record():
        return

    if op_name in _ops_which_dont_need_outputs:
        op_outputs = None
    else:
        # TODO(apassos) this line creates a weak circular reference where the
        # backprop function keeps an output alive which in turn keeps the tape entry
        # alive which keeps the backprop function alive. Figure out how to break
        # this up without breaking second derivatives of ops like Exp whose
        # gradients depend only on the outputs.
        op_outputs = results

    if op_name in _ops_which_dont_need_inputs:
        op_inputs = None
    else:
        op_inputs = inputs

    num_inputs = len(inputs)

    def grad_fn(*orig_outputs):
        """Generated gradient function."""
        result = _magic_gradient_function(op_name, attrs, num_inputs,
                                          op_inputs, op_outputs, orig_outputs)
        if _tracing:
            print("Gradient for", (name if name else op_name), "inputs",
                  op_inputs, "output_grads", orig_outputs, "gradients", result)
        return result

    inputs = [ops.internal_convert_to_tensor(x, ctx=ctx) for x in inputs]
    tape.record_operation(op_name, results, inputs, [], grad_fn)
    if _tracing:
        print("Computed op", (name if name else op_name), "inputs", inputs,
              "outputs", results)
Exemple #2
0
def _record_gradient(op_name, inputs, attrs, results, ctx, name):
  """Records gradients for a TensorFlow operation.

  Args:
    op_name: Name of the TensorFlow operation (see REGISTER_OP in C++ code) to
      execute.
    inputs: A flat list of Tensor object inputs to the operation.
    attrs: A tuple with alternating string attr names and attr values for this
      operation.
    results: The results of the operation (as a flat list).
    ctx: The value of context.context().
    name: Customized name for the operation.

  Returns:
    A list of maybe-wrapped results. Either Tensors or TensorNodes.

  Raises:
    An exception on error.
  """
  if not tape.could_possibly_record():
    return

  if op_name in _ops_which_dont_need_outputs:
    op_outputs = None
  else:
    # TODO(apassos) this line creates a weak circular reference where the
    # backprop function keeps an output alive which in turn keeps the tape entry
    # alive which keeps the backprop function alive. Figure out how to break
    # this up without breaking second derivatives of ops like Exp whose
    # gradients depend only on the outputs.
    op_outputs = results

  if op_name in _ops_which_dont_need_inputs:
    op_inputs = None
  else:
    op_inputs = inputs

  num_inputs = len(inputs)

  def grad_fn(*orig_outputs):
    """Generated gradient function."""
    result = _magic_gradient_function(op_name, attrs, num_inputs,
                                      op_inputs, op_outputs, orig_outputs)
    if _tracing:
      print("Gradient for", (name if name else op_name), "inputs", op_inputs,
            "output_grads", orig_outputs, "gradients", result)
    return result

  inputs = [ops.internal_convert_to_tensor(x, ctx=ctx) for x in inputs]
  tape.record_operation(op_name, results, inputs, [], grad_fn)
  if _tracing:
    print("Computed op", (name if name else op_name), "inputs", inputs,
          "outputs", results)
Exemple #3
0
 def __init__(self):
   if context.in_eager_mode() and tape.could_possibly_record():
     raise ValueError("Cannot isolate Eager execution with an active tape.")
   # In Eager, Graphs set a container which isolates resources, and maintain a
   # VariableStore which caches ResourceVariable objects created through
   # get_variable. So setting the default Graph has the side effect of
   # isolating Eager resources.
   with context.eager_mode():
     # Create the graph in Eager mode, as this provides stricter semantics
     # (i.e. has a unique container prefix). This prevents implicit sharing
     # when a Graph-mode graph is created and then Eager mode is enabled (an
     # error through enable_eager_execution, but common with context managers
     # in unit tests).
     self._graph_as_default_context_manager = ops.Graph().as_default()
Exemple #4
0
 def __init__(self):
   if context.in_eager_mode() and tape.could_possibly_record():
     raise ValueError("Cannot isolate Eager execution with an active tape.")
   # In Eager, Graphs set a container which isolates resources, and maintain a
   # VariableStore which caches ResourceVariable objects created through
   # get_variable. So setting the default Graph has the side effect of
   # isolating Eager resources.
   with context.eager_mode():
     # Create the graph in Eager mode, as this provides stricter semantics
     # (i.e. has a unique container prefix). This prevents implicit sharing
     # when a Graph-mode graph is created and then Eager mode is enabled (an
     # error through enable_eager_execution, but common with context managers
     # in unit tests).
     self._graph_as_default_context_manager = ops.Graph().as_default()
Exemple #5
0
    def __call__(self, device, token, args):
        """Calls `self._func` in eager mode, recording the tape if needed."""
        use_tape_cache = (self._support_graph_mode_gradient
                          or tape_lib.could_possibly_record())

        if use_tape_cache:
            with backprop.GradientTape() as tape:
                for tensor in args:
                    for t in nest.flatten(tensor):
                        if backprop_util.IsTrainable(t):
                            tape.watch(t)
                outputs = self._call(device, args)
            tape_cache[compat.as_bytes(token)] = (tape, args, outputs)
        else:
            outputs = self._call(device, args)

        return outputs
Exemple #6
0
    def inner(*args,
              _checkpoint=False,
              _watch_vars=None,
              _force_seed=False,
              **kwargs):
        if _force_seed:
            if isinstance(_force_seed, Iterator):
                seed = next(_force_seed)
            else:
                seed = random.randint(1, 1 << 31)

        if _checkpoint and tape.could_possibly_record():
            if _watch_vars is None:
                _watch_vars = []

            watch_args = []

            flat_inputs = nest.flatten(args) + nest.flatten(
                list(kwargs.values()))
            flat_inputs = [x for x in flat_inputs if tf.is_tensor(x)]
            flat_inputs = [x for x in flat_inputs if x.dtype == tf.float32]
            unique_inputs = [
                x.deref()
                for x in set(x.experimental_ref() for x in flat_inputs)
            ]

            unique_vars = [
                v.deref()
                for v in set(v.experimental_ref() for v in _watch_vars)
                if not any(v is inp for inp in flat_inputs)
            ]

            watches = unique_inputs + unique_vars
            tensor_watches = [tf.convert_to_tensor(x) for x in watches]

            with tape.stop_recording():
                if _force_seed:
                    tf.random.set_seed(seed)

                result = f(*args, **kwargs)
                flat_result = nest.flatten(result)
                # No idea what the point of this is but they do it in tf.custom_gradient so I'm doing it too
                flat_result = [tf.identity(x) for x in flat_result]
                output = nest.pack_sequence_as(result, flat_result)
            del flat_inputs
            del result
            del unique_inputs
            del unique_vars

            def grad(*output_grads):
                with tf.GradientTape() as g:
                    g.watch(watches)
                    if _force_seed:
                        tf.random.set_seed(seed)
                    recomputed_output = f(*args, **kwargs)
                    recomputed_output = [
                        tf.identity(x) for x in nest.flatten(recomputed_output)
                    ]

                grads = g.gradient(recomputed_output,
                                   watches,
                                   output_gradients=output_grads)
                del g
                return grads

            tape.record_operation(str(f), flat_result, tensor_watches, grad)

            return output
        else:
            if _force_seed:
                tf.random.set_seed(seed)
            return f(*args, **kwargs)