def grad_fn(*args, **kwds): """Computes the gradient of the wrapped function.""" this_tape = tape.push_new_tape() try: end_node = f(*args, **kwds) if end_node is None: raise ValueError("Cannot differentiate a function that returns None; " "did you forget to return a value from {}?".format( f.__name__)) finally: tape.pop_tape(this_tape) # Note: variables are returned in construction order. This ensures unique # order across executions. variables = this_tape.watched_variables() if not variables: raise ValueError("No trainable variables were accessed while the " "function was being computed.") sources = [v.handle for v in variables] for s in sources: if getattr(s, "is_packed", False): raise ValueError( "GradientTape.gradient is not supported on packed EagerTensors yet." ) grad = imperative_grad.imperative_grad(this_tape, nest.flatten(end_node), sources) return end_node, list(zip(grad, variables))
def grad_fn(*args): """Computes the gradient of the wrapped function.""" this_tape = tape.push_new_tape() try: end_node = f(*args) if end_node is None: raise ValueError("Cannot differentiate a function that returns None; " "did you forget to return a value from {}?".format( f.__name__)) finally: tape.pop_tape(this_tape) # Sorting variables by id, which is monotonically increasing in construction # order. This ensures unique order across executions. # TODO(josh11b): Move the sort to the C++ implementation in pywrap_tfe_src.cc. variables = list(sorted(this_tape.watched_variables(), key=lambda v: v.handle._id)) # pylint: disable=protected-access sources = [x.handle for x in variables] if not sources: raise ValueError("No trainable variables were accessed while the " "function was being computed.") grad = imperative_grad.imperative_grad(_default_vspace, this_tape, nest.flatten(end_node), sources) return end_node, list(zip(grad, variables))
def grad_fn(*args): """Computes the gradient of the wrapped function.""" this_tape = tape.push_new_tape() try: end_node = f(*args) if end_node is None: raise ValueError("Cannot differentiate a function that returns None; " "did you forget to return a value from {}?".format( f.__name__)) finally: tape.pop_tape(this_tape) # Sorting variables by id, which is monotonically increasing in construction # order. This ensures unique order across executions. variables = list(sorted(this_tape.watched_variables(), key=lambda v: v.handle._id)) # pylint: disable=protected-access sources = [x.handle for x in variables] if not sources: raise ValueError("No trainable variables were accessed while the " "function was being computed.") grad = imperative_grad.imperative_grad(_default_vspace, this_tape, nest.flatten(end_node), sources) return end_node, list(zip(grad, variables))
def decorated(*args, **kwds): """Computes the value and gradient of the decorated function.""" parameter_positions = _get_arg_spec(f, params, args) assert not kwds, "The gradient function can't take keyword arguments." this_tape = tape.push_new_tape(persistent=persistent) try: sources = [] args = [ ops.convert_to_tensor(args[i]) if i in parameter_positions else args[i] for i in range(len(args)) ] args = _ensure_unique_tensor_objects(parameter_positions, args) for i in parameter_positions: sources.append(args[i]) tape.watch(args[i]) result = f(*args) if result is None: raise ValueError("Cannot differentiate a function that returns None; " "did you forget to return a value from {}?".format( f.__name__)) flat_result = nest.flatten(result) flat_result = [gen_array_ops.identity(x) for x in flat_result] result = nest.pack_sequence_as(result, flat_result) finally: tape.pop_tape(this_tape) def vjp(dy=None): if dy is not None: dy = [ops.convert_to_tensor(x) for x in nest.flatten(dy)] return imperative_grad.imperative_grad( _default_vspace, this_tape, nest.flatten(result), sources, output_gradients=dy) return result, vjp
def decorated(*args, **kwds): """Computes the value and gradient of the decorated function.""" parameter_positions = _get_arg_spec(f, params, args) assert not kwds, "The gradient function can't take keyword arguments." this_tape = tape.push_new_tape(persistent=persistent) try: sources = [] args = [ ops.convert_to_tensor(args[i]) if i in parameter_positions else args[i] for i in range(len(args)) ] args = _ensure_unique_tensor_objects(parameter_positions, args) for i in parameter_positions: sources.append(args[i]) tape.watch(this_tape, args[i]) result = f(*args) if result is None: raise ValueError("Cannot differentiate a function that returns None; " "did you forget to return a value from {}?".format( f.__name__)) flat_result = nest.flatten(result) flat_result = [gen_array_ops.identity(x) for x in flat_result] result = nest.pack_sequence_as(result, flat_result) finally: tape.pop_tape(this_tape) def vjp(dy=None): if dy is not None: dy = [ops.convert_to_tensor(x) for x in nest.flatten(dy)] return imperative_grad.imperative_grad( this_tape, nest.flatten(result), sources, output_gradients=dy) return result, vjp
def _defun_internal(name, func, args, kwds): """Defines and returns graph-mode version of func.""" graph_key = ops.get_default_graph()._graph_key # pylint: disable=protected-access with context.graph_mode(): captures = {} tmp_graph = CapturingGraph(captures) # Inherit the graph key, since this is used for matching variables in # optimizers. tmp_graph._graph_key = graph_key # pylint: disable=protected-access # Copy the graph collections to ensure summaries and other things work. This # lets the function access (but not mutate) collections of the containing # graph, such as the global step and the summary writer collections. curr_graph = ops.get_default_graph() for collection in curr_graph.collections: tmp_graph.get_collection_ref(collection)[:] = curr_graph.get_collection( collection) with tmp_graph.as_default(): func_inputs = _get_defun_inputs(args) with capture_tensors(captures): this_tape = tape.push_new_tape() try: func_outputs = func(*func_inputs, **kwds) finally: tape.pop_tape(this_tape) variables = this_tape.watched_variables() # Returning a closed-over tensor as an output does not trigger a # call to convert_to_tensor, so we manually capture all such tensors. outputs_list = _flatten(func_outputs) func_def_outputs = [ _convert_to_graph_tensor(x) for x in outputs_list if x is not None ] ids = list(sorted(captures.keys())) if ids: extra_inputs, extra_placeholders = zip(* [captures[x] for x in ids]) else: extra_inputs = [] extra_placeholders = [] output_shapes = tuple( x.shape if isinstance(x, ops.Tensor) else None for x in outputs_list) flat_inputs = [x for x in nest.flatten(func_inputs) if isinstance(x, ops.Tensor)] all_inputs = flat_inputs + list(extra_placeholders) all_ignored_ops = frozenset(x.op for x in all_inputs) fname = _inference_name(name) operations = tuple(x for x in tmp_graph.get_operations() if x not in all_ignored_ops) # Register any other functions defined in the graph # TODO(ashankar): Oh lord, forgive me for this lint travesty. if context.in_eager_mode(): for f in tmp_graph._functions.values(): # pylint: disable=protected-access # TODO(ashankar): What about the gradient registry? _register(f._c_func) # pylint: disable=protected-access return GraphModeFunction( fname, all_inputs, extra_inputs, tmp_graph, operations, func_def_outputs, func_outputs, output_shapes, variables)
def decorated(*args, **kwds): """Computes the value and gradient of the decorated function.""" assert not kwds, "The gradient function can't take keyword arguments." tape.push_new_tape() sources = [] args = [ ops.convert_to_tensor(args[i]) if i in parameter_positions else args[i] for i in range(len(args)) ] args = _ensure_unique_tensor_objects(parameter_positions, args) for i in parameter_positions: sources.append(args[i]) tape.watch(args[i]) result = f(*args) t = tape.pop_tape() def vjp(dy=None): return imperative_grad.imperative_grad( _default_vspace, t, nest.flatten(result), sources, output_gradients=nest.flatten(dy) if dy is not None else None) return result, vjp
def decorated(*args, **kwds): """Computes the value and gradient of the decorated function.""" parameter_positions = _get_arg_spec(f, params, args) assert not kwds, "The gradient function can't take keyword arguments." tape.push_new_tape() try: sources = [] args = [ ops.convert_to_tensor(args[i]) if i in parameter_positions else args[i] for i in range(len(args)) ] args = _ensure_unique_tensor_objects(parameter_positions, args) for i in parameter_positions: sources.append(args[i]) tape.watch(args[i]) result = f(*args) flat_result = nest.flatten(result) flat_result = [gen_array_ops.identity(x) for x in flat_result] result = nest.pack_sequence_as(result, flat_result) finally: t = tape.pop_tape() def vjp(dy=None): if dy is not None: dy = [ops.convert_to_tensor(x) for x in nest.flatten(dy)] return imperative_grad.imperative_grad(_default_vspace, t, nest.flatten(result), sources, output_gradients=dy) return result, vjp
def grad_fn(*args): """Computes the gradient of the wrapped function.""" tape.push_new_tape() end_node = f(*args) variables = tape.top_tape_watched_variables() sources = [x.handle for x in variables] grad = imperative_grad.imperative_grad(_default_vspace, tape.pop_tape(), nest.flatten(end_node), sources) return end_node, list(zip(grad, variables))
def _defun_internal(name, func, args, kwds): """Defines and returns graph-mode version of func.""" container_prefix = ops.get_default_graph()._container_prefix # pylint: disable=protected-access with context.graph_mode(): captures = {} tmp_graph = CapturingGraph(captures) # Inherit the container prefix, since this is used for error checking when # isolating eager execution (the container prefix at creation must match the # container prefix when used, and variables accessed in the defun will be # used in the outside context). tmp_graph._container_prefix = container_prefix # pylint: disable=protected-access # Copy the graph collections to ensure summaries and other things work. This # lets the function access (but not mutate) collections of the containing # graph, such as the global step and the summary writer collections. curr_graph = ops.get_default_graph() for collection in curr_graph.collections: tmp_graph.get_collection_ref( collection)[:] = curr_graph.get_collection(collection) with tmp_graph.as_default(): func_inputs = _get_defun_inputs(args) with capture_tensors(captures): tape.push_new_tape() try: func_outputs = func(*func_inputs, **kwds) finally: variables = tape.pop_tape().watched_variables() ids = list(sorted(captures.keys())) if ids: extra_inputs, extra_placeholders = zip( *[captures[x] for x in ids]) else: extra_inputs = [] extra_placeholders = [] outputs_list = nest.flatten(func_outputs) output_shapes = tuple(x.shape for x in outputs_list if x is not None) flat_inputs = [ x for x in nest.flatten(func_inputs) if isinstance(x, ops.Tensor) ] all_inputs = flat_inputs + list(extra_placeholders) all_ignored_ops = frozenset(x.op for x in all_inputs) func_def_outputs = [x for x in outputs_list if x is not None] fname = _inference_name(name) operations = tuple(x for x in tmp_graph.get_operations() if x not in all_ignored_ops) # Register any other functions defined in the graph # TODO(ashankar): Oh lord, forgive me for this lint travesty. for f in tmp_graph._functions.values(): # pylint: disable=protected-access # TODO(ashankar): What about the gradient registry? _register(f._c_func) # pylint: disable=protected-access return GraphModeFunction(fname, all_inputs, extra_inputs, tmp_graph, operations, func_def_outputs, func_outputs, output_shapes, variables)
def grad_fn(*args, **kwds): """Computes the gradient of the wrapped function.""" this_tape = tape.push_new_tape() try: end_node = f(*args, **kwds) if end_node is None: raise ValueError("Cannot differentiate a function that returns None; " "did you forget to return a value from {}?".format( f.__name__)) finally: tape.pop_tape(this_tape) # Note: variables are returned in construction order. This ensures unique # order across executions. variables = this_tape.watched_variables() if not variables: raise ValueError("No trainable variables were accessed while the " "function was being computed.") sources = [v.handle for v in variables] grad = imperative_grad.imperative_grad(this_tape, nest.flatten(end_node), sources) return end_node, list(zip(grad, variables))
def grad_fn(*args): """Computes the gradient of the wrapped function.""" tape.push_new_tape() end_node = f(*args) variables = tape.top_tape_watched_variables() sources = [x.handle for x in variables] if not sources: raise ValueError("no trainable variables were accessed while the " "function was being computed.") grad = imperative_grad.imperative_grad(_default_vspace, tape.pop_tape(), nest.flatten(end_node), sources) return end_node, list(zip(grad, variables))
def grad_fn(*args, **kwds): """Computes the gradient of the wrapped function.""" tape.push_new_tape() end_node = f(*args) start_node = tape.pop_tape() ag_core.active_progenitors.remove(start_node) if not ag_core.isnode(end_node): raise ValueError( "Target not part of a computation being traced. %s" % end_node) if start_node not in end_node.progenitors: raise ValueError("Target not derived from source. %s %s" % (end_node.progenitors, repr(start_node))) output_gradients = kwds.get("output_gradients", None) if output_gradients is None: output_gradients = _ones(end_node.shape, end_node.dtype) grad = ag_core.backward_pass(output_gradients, end_node, start_node) return end_node.value, _aggregate_grads(grad.gradients)
def testTapeGC(self): # TODO(apassos) figure out how to test this without using tape internal # APIs. tape.push_new_tape() def f(): x = constant_op.constant(1.0) tape.watch(x) x = gradient_is_constant(x) x = gradient_is_constant(x) x = gradient_is_constant(x) f() t = tape.pop_tape() tensor_tape, op_tape = t.export() self.assertEqual(len(tensor_tape), 1) # The watched tensor will remain on # the tape self.assertEqual(len(op_tape), 0) # No operations should remain on the tape
def grad_fn(*args): """Computes the gradient of the wrapped function.""" tape.push_new_tape() try: end_node = f(*args) if end_node is None: raise ValueError( "Cannot differentiate a function that returns None; " "did you forget to return a value from {}?".format( f.__name__)) finally: popped_tape = tape.pop_tape() variables = popped_tape.watched_variables() sources = [x.handle for x in variables] if not sources: raise ValueError("No trainable variables were accessed while the " "function was being computed.") grad = imperative_grad.imperative_grad(_default_vspace, popped_tape, nest.flatten(end_node), sources) return end_node, list(zip(grad, variables))
def grad_fn(*args): """Computes the gradient of the wrapped function.""" tape.push_new_tape() try: end_node = f(*args) if end_node is None: raise ValueError("Cannot differentiate a function that returns None; " "did you forget to return a value from {}?".format( f.__name__)) finally: popped_tape = tape.pop_tape() variables = popped_tape.watched_variables() sources = [x.handle for x in variables] if not sources: raise ValueError("No trainable variables were accessed while the " "function was being computed.") grad = imperative_grad.imperative_grad(_default_vspace, popped_tape, nest.flatten(end_node), sources) return end_node, list(zip(grad, variables))
def imperative_grad( vspace, target, sources, output_gradients=None): """Computes gradients from the imperatively defined tape on top of the stack. Works by filtering the tape, computing how many downstream usages are of each tensor and entry, and repeatedly applying backward functions until we have gradients for all sources. Args: vspace: the vector space in which to differentiate. target: either a Tensor or list of Tensors to be differentiated. sources: list of Tensors for which we want gradients output_gradients: if not None, a list of gradient provided for each Target, or None if we are to use the target's computed downstream gradient. Returns: the gradient wrt each of the sources. Raises: RuntimeError: if something goes wrong. ValueError: if there is no sequence of differentiable operations connecting a source and any target Tensor. This can happen either if the target is not computed based on the source, if the tracing was set up incorrectly, or if only non-differentiable functions of the source were used in the computation of target. """ if not tape._tape_stack.stack: # pylint: disable=protected-access raise RuntimeError("Computing a gradient with no tape present") bp_tape = tape.pop_tape() tensor_to_op, op_to_entry = bp_tape.export() # This overwrites the op_to_entry variable, which will release all memory used # to keep traces that are irrelevant to the gradient computation we're doing # here. id_sources = [vspace.tensor_id(t) for t in sources] tensor_usage_counts, op_to_entry, op_missing_tensor = _prepare_backprop( vspace, target, tensor_to_op, op_to_entry, id_sources) ready_ops = _initialize_backprop_stack(op_to_entry, op_missing_tensor) gradients = _initial_gradients(vspace, target, output_gradients, tensor_usage_counts) gradients_size = dict() # Now exhaust the backprop stack while ready_ops: op = ready_ops.pop() op_trace = op_to_entry.pop(op) out_gradients = [gradients.pop(t, None) for t in op_trace.output_ids] # Cache the last used zero tensor. We reuse it if the next one # we need is of the same shape and dtype. This is very helpful in # large splits and should have negligible overhead in other cases. last_shape_and_dtype = None last_zeros = None for i in range(len(out_gradients)): if out_gradients[i] is None: # TODO(apassos) this should be in the right device none_indices = _grad_fn_accepts_none_for_indices.get( op_trace.op_type, None) if none_indices is None or i not in none_indices: shape_and_dtype = op_trace.output_shape_and_dtype[i] if shape_and_dtype != last_shape_and_dtype: last_shape_and_dtype = shape_and_dtype last_zeros = vspace.zeros(*shape_and_dtype) out_gradients[i] = last_zeros else: out_gradients[i] = vspace.aggregate_fn(out_gradients[i]) in_gradients = op_trace.backward_function(*(out_gradients)) for i, t in enumerate(op_trace.input_ids): if in_gradients[i] is not None: vspace.add_new_grads_fn(gradients, gradients_size, t, in_gradients[i]) if tensor_usage_counts.get(t, 0) > 0: tensor_usage_counts[t] -= 1 if (t in tensor_to_op and tensor_usage_counts[t] == 0 and t not in id_sources): in_op = tensor_to_op[t] if in_op is None: continue if op_missing_tensor.get(in_op, 0) > 0: op_missing_tensor[in_op] -= 1 if op_missing_tensor.get(in_op, 0) == 0: ready_ops.append(in_op) result = [] for i, s in enumerate(sources): g = gradients.get(vspace.tensor_id(s), None) if g is None: result.append(None) else: result.append(vspace.aggregate_fn(g)) return result
def __exit__(self, typ, value, traceback): tape.pop_tape(self._tape)
def func_graph_from_py_func(name, python_func, args, kwargs, signature=None, func_graph=None, autograph=False, add_control_dependencies=True, arg_names=None, op_return_value=None, collections=None): """Returns a `FuncGraph` generated from `python_func`. Args: name: an identifier for the function. python_func: the Python function to trace. args: the positional args with which the Python function should be called; ignored if a signature is provided. kwargs: the keyword args with which the Python function should be called; ignored if a signature is provided. signature: a possibly nested sequence of `TensorSpecs` specifying the shapes and dtypes of the arguments. When a signature is provided, `args` and `kwargs` are ignored, and `python_func` is traced with Tensors conforming to `signature`. If `None`, the shapes and dtypes are inferred from the inputs. func_graph: Optional. An instance of FuncGraph. If provided, we will use this graph else a new one is built and returned. autograph: whether to use autograph to compile `python_func`. See https://www.tensorflow.org/guide/autograph for more information. add_control_dependencies: If True, automatically adds control dependencies to ensure program order matches execution order and stateful ops always execute. arg_names: Optional list of argument names, used to give input placeholders recognizable names. op_return_value: Optional. A Tensor. If set and `python_func` returns Operations, those return values will be replaced with this value. If not set, returning an Operation triggers an error. collections: a dictionary of collections this FuncGraph should start with. If not specified (None), the FuncGraph will read (but not write to) the outer graph's collections that are not whitelisted, and both read and write to the outer graph's collections that are whitelisted. The current whitelisted collections are the global variables, the local variables, and the trainable variables. Defaults to None. Returns: A FuncGraph. Raises: TypeError: If any of `python_func`'s return values is neither `None` nor a `Tensor`. """ if op_return_value is not None: assert isinstance(op_return_value, ops.Tensor), op_return_value if func_graph is None: func_graph = FuncGraph(name, collections=collections) assert isinstance(func_graph, FuncGraph) if add_control_dependencies: control_manager = AutomaticControlDependencies else: control_manager = ops.NullContextmanager with func_graph.as_default(), control_manager() as a: current_scope = variable_scope.get_variable_scope() default_use_recource = current_scope.use_resource current_scope.set_use_resource(True) if signature is not None: args = signature kwargs = {} # Creates and names placeholders for all arguments. func_args = _get_defun_inputs_from_args(args, arg_names) func_kwargs = _get_defun_inputs_from_kwargs(kwargs) # Convert all Tensors into TensorSpecs before saving the structured inputs. # If storing pure concrete functions that are not called through polymorphic # functions, we don't have access to FunctionSpec, so we need to call the # TensorSpecs by their `arg_names` for later binding. func_graph.structured_input_signature = ( convert_structure_to_signature(func_args, arg_names), convert_structure_to_signature(func_kwargs)) # Note: `nest.flatten` sorts by keys, as does `_deterministic_dict_values`. # Variables to help check whether mutation happens in calling the function # Copy the recursive list, tuple and map structure, but not base objects func_args_before = nest.pack_sequence_as(func_args, nest.flatten(func_args)) func_kwargs_before = nest.pack_sequence_as( func_kwargs, nest.flatten(func_kwargs)) def convert(x): """Converts a function output to a Tensor.""" if x is None: return None if op_return_value is not None and isinstance(x, ops.Operation): # TODO(b/79881896): we currently can't capture external control deps, so # this won't work if x needs to be captured (i.e. if python_func returns # captured Operations). with ops.control_dependencies([x]): x = array_ops.identity(op_return_value) elif not isinstance(x, tensor_array_ops.TensorArray): try: x = ops.convert_to_tensor_or_composite(x) except (ValueError, TypeError): raise TypeError( "To be compatible with tf.contrib.eager.defun, Python functions " "must return zero or more Tensors; in compilation of %s, found " "return value of type %s, which is not a Tensor." % (str(python_func), type(x))) if add_control_dependencies: x = a.mark_as_return(x) return x this_tape = tape.push_new_tape() try: if autograph: from tensorflow.python import autograph # pylint: disable=g-import-not-at-top _, original_func = tf_decorator.unwrap(python_func) def wrapper(*args, **kwargs): # Note: functions annotated with @tf.function should always be # converted even though they would meet autograph's whitelisting # criteria. # If this assumption is ever broken, converted_call will need to # handle the possibility of original_func still being a shim, e.g. # bound to WeakrefSelf. return autograph.converted_call( original_func, None, autograph.ConversionOptions( verbose=autograph.Verbosity.BRIEF, recursive=True, strip_decorators=(def_function.function,), optional_features=(), force_conversion=True, ), *args, **kwargs) # Wrapping around a decorator allows checks like tf_inspect.getargspec # to be accurate. converted_func = tf_decorator.make_decorator(original_func, wrapper) tf_decorator.rewrap(python_func, original_func, converted_func) func_outputs = python_func(*func_args, **func_kwargs) # invariant: `func_outputs` contains only Tensors, IndexedSlices, # SparseTensors, TensorArrays and `None`s. func_outputs = nest.map_structure(convert, func_outputs) check_mutation(func_args_before, func_args) check_mutation(func_kwargs_before, func_kwargs) finally: tape.pop_tape(this_tape) current_scope.set_use_resource(default_use_recource) # Variables in `func_args`, `func_kwargs` should be explicit inputs # to the function, not captured inputs. tape_variables = this_tape.watched_variables() arg_variables = set() inputs = [] for arg in nest.flatten(func_args) + nest.flatten(func_kwargs): if isinstance(arg, resource_variable_ops.ResourceVariable): # Even if an argument variable was not used in the function, we've # already manually captured the resource Tensor when creating argument # placeholders. resource_placeholder = func_graph.captures.pop(arg.handle) arg_variables.add(arg) inputs.append(resource_placeholder) elif isinstance(arg, ops.Tensor): inputs.append(arg) variables = [v for v in tape_variables if v not in arg_variables] func_graph.inputs = inputs + list(func_graph.captures.values()) func_graph.structured_outputs = func_outputs # Returning a closed-over tensor does not trigger convert_to_tensor. func_graph.outputs.extend( func_graph.capture(x) for x in flatten(func_graph.structured_outputs) if x is not None) func_graph.variables = variables # Register any other functions defined in the graph. with ops.init_scope(): if context.executing_eagerly(): for f in func_graph._functions.values(): # pylint: disable=protected-access # TODO(ashankar): What about the gradient registry? context.add_function(f._c_func.func) # pylint: disable=protected-access return func_graph
def _defun_internal(name, func, args, kwds): """Defines and returns graph-mode version of func.""" graph_key = ops.get_default_graph()._graph_key # pylint: disable=protected-access with context.graph_mode(): captures = {} tmp_graph = CapturingGraph(captures) # Inherit the graph key, since this is used for matching variables in # optimizers. tmp_graph._graph_key = graph_key # pylint: disable=protected-access # Copy the graph collections to ensure summaries and other things work. This # lets the function access (but not mutate) collections of the containing # graph, such as the global step and the summary writer collections. curr_graph = ops.get_default_graph() for collection in curr_graph.collections: tmp_graph.get_collection_ref( collection)[:] = curr_graph.get_collection(collection) with tmp_graph.as_default(), AutomaticControlDependencies() as a: func_inputs = _get_defun_inputs(args) def convert(x): if x is None: return None x = ops.convert_to_tensor_or_indexed_slices(x) x = a.mark_as_return(x) return x with capture_tensors(captures): this_tape = tape.push_new_tape() try: func_outputs = func(*func_inputs, **kwds) func_outputs = nest.map_structure(convert, func_outputs) finally: tape.pop_tape(this_tape) variables = this_tape.watched_variables() # Returning a closed-over tensor as an output does not trigger a # call to convert_to_tensor, so we manually capture all such tensors. outputs_list = _flatten(func_outputs) func_def_outputs = [ _convert_to_graph_tensor(x) for x in outputs_list if x is not None ] ids = list(sorted(captures.keys())) if ids: extra_inputs, extra_placeholders = zip( *[captures[x] for x in ids]) else: extra_inputs = [] extra_placeholders = [] output_shapes = tuple( x.shape if isinstance(x, ops.Tensor) else None for x in outputs_list) flat_inputs = [ x for x in nest.flatten(func_inputs) if isinstance(x, ops.Tensor) ] all_inputs = flat_inputs + list(extra_placeholders) all_ignored_ops = frozenset(x.op for x in all_inputs) fname = _inference_name(name) operations = tuple(x for x in tmp_graph.get_operations() if x not in all_ignored_ops) # Register any other functions defined in the graph # TODO(ashankar): Oh lord, forgive me for this lint travesty. if context.executing_eagerly(): for f in tmp_graph._functions.values(): # pylint: disable=protected-access # TODO(ashankar): What about the gradient registry? _register(f._c_func.func) # pylint: disable=protected-access return GraphModeFunction(fname, all_inputs, extra_inputs, tmp_graph, operations, func_def_outputs, func_outputs, output_shapes, variables)
def __exit__(self, typ, value, traceback): self._tape = tape.pop_tape()
def func_graph_from_py_func(name, python_func, args, kwargs, signature=None, func_graph=None, experimental_autograph=False, add_control_dependencies=True, arg_names=None, op_return_value=None): """Returns a `FuncGraph` generated from `python_func`. Args: name: an identifier for the function. python_func: the Python function to trace. args: the positional args with which the Python function should be called; ignored if a signature is provided. kwargs: the keyword args with which the Python function should be called; ignored if a signature is provided. signature: a possibly nested sequence of `TensorSpecs` specifying the shapes and dtypes of the arguments. When a signature is provided, `args` and `kwargs` are ignored, and `python_func` is traced with Tensors conforming to `signature`. If `None`, the shapes and dtypes are inferred from the inputs. func_graph: Optional. An instance of FuncGraph. If provided, we will use this graph else a new one is built and returned. experimental_autograph: whether to use autograph to compile `python_func`. See https://www.tensorflow.org/guide/autograph for more information. add_control_dependencies: If True, automatically adds control dependencies to ensure program order matches execution order and stateful ops always execute. arg_names: Optional list of argument names, used to give input placeholders recognizable names. op_return_value: Optional. A Tensor. If set and `python_func` returns Operations, those return values will be replaced with this value. If not set, returning an Operation triggers an error. Returns: A FuncGraph. Raises: TypeError: If any of `python_func`'s return values is neither `None` nor a `Tensor`. """ if op_return_value is not None: assert isinstance(op_return_value, ops.Tensor), op_return_value if func_graph is None: func_graph = FuncGraph(name) assert isinstance(func_graph, FuncGraph) if add_control_dependencies: control_manager = AutomaticControlDependencies else: control_manager = ops.NullContextmanager with func_graph.as_default(), control_manager() as a: current_scope = variable_scope.get_variable_scope() default_use_recource = current_scope.use_resource current_scope.set_use_resource(True) if signature is not None: args = signature kwargs = {} func_args = _get_defun_inputs_from_args(args, arg_names) func_kwargs = _get_defun_inputs_from_kwargs(kwargs) # Note: `nest.flatten` sorts by keys, as does `_deterministic_dict_values`. # Variables to help check whether mutation happens in calling the function # Copy the recursive list, tuple and map structure, but not base objects func_args_before = nest.pack_sequence_as(func_args, nest.flatten(func_args)) func_kwargs_before = nest.pack_sequence_as(func_kwargs, nest.flatten(func_kwargs)) def convert(x): """Converts a function output to a Tensor.""" if x is None: return None if op_return_value is not None and isinstance(x, ops.Operation): # TODO(b/79881896): we currently can't capture external control deps, so # this won't work if x needs to be captured (i.e. if python_func returns # captured Operations). with ops.control_dependencies([x]): x = array_ops.identity(op_return_value) else: try: x = ops.convert_to_tensor_or_indexed_slices(x) except (ValueError, TypeError): raise TypeError( "To be compatible with tf.contrib.eager.defun, Python functions " "must return zero or more Tensors; in compilation of %s, found " "return value of type %s, which is not a Tensor." % (str(python_func), type(x))) if add_control_dependencies: x = a.mark_as_return(x) return x this_tape = tape.push_new_tape() try: if experimental_autograph: func_outputs = autograph.converted_call( python_func, None, autograph.ConversionOptions( verbose=True, recursive=True, strip_decorators=(function.defun, ), optional_features=(), ), *func_args, **func_kwargs) else: func_outputs = python_func(*func_args, **func_kwargs) # invariant: `func_outputs` contains only Tensors and `None`s. func_outputs = nest.map_structure(convert, func_outputs) check_mutation(func_args_before, func_args) check_mutation(func_kwargs_before, func_kwargs) finally: tape.pop_tape(this_tape) current_scope.set_use_resource(default_use_recource) # Variables in `func_args`, `func_kwargs` should be explicit inputs # to the function, not captured inputs. tape_variables = this_tape.watched_variables() arg_variables = set() inputs = [] for arg in nest.flatten(func_args) + nest.flatten(func_kwargs): if isinstance(arg, resource_variable_ops.ResourceVariable): try: resource_placeholder = func_graph.captures.pop(arg.handle) arg_variables.add(arg) except KeyError: # This case occurs if a Variable among the inputs is not actually # used by the function; we still add an explicit input for it # because the user should presumably pass the Variable as an input # to the corresponding graph function. resource_placeholder = _create_substitute_placeholder( arg.handle) inputs.append(resource_placeholder) elif isinstance(arg, ops.Tensor): inputs.append(arg) variables = [v for v in tape_variables if v not in arg_variables] func_graph.inputs = inputs + list(func_graph.captures.values()) func_graph.structured_outputs = func_outputs # Returning a closed-over tensor does not trigger convert_to_tensor. func_graph.outputs.extend( func_graph.capture(x) for x in flatten(func_graph.structured_outputs) if x is not None) func_graph.variables = variables # Register any other functions defined in the graph. with ops.init_scope(): if context.executing_eagerly(): for f in func_graph._functions.values(): # pylint: disable=protected-access # TODO(ashankar): What about the gradient registry? context.add_function(f._c_func.func) # pylint: disable=protected-access return func_graph
def _defun_internal(name, func, args, kwds): """Defines and returns graph-mode version of func.""" container_prefix = ops.get_default_graph()._container_prefix # pylint: disable=protected-access with context.graph_mode(): captures = {} tmp_graph = CapturingGraph(captures) # Inherit the container prefix, since this is used for error checking when # isolating eager execution (the container prefix at creation must match the # container prefix when used, and variables accessed in the defun will be # used in the outside context). tmp_graph._container_prefix = container_prefix # pylint: disable=protected-access # Copy the graph collections to ensure summaries and other things work. This # lets the function access (but not mutate) collections of the containing # graph, such as the global step and the summary writer collections. curr_graph = ops.get_default_graph() for collection in curr_graph.collections: tmp_graph.get_collection_ref(collection)[:] = curr_graph.get_collection( collection) with tmp_graph.as_default(): func_inputs = _get_defun_inputs(args) with capture_tensors(captures): tape.push_new_tape() try: func_outputs = func(*func_inputs, **kwds) finally: variables = tape.pop_tape().watched_variables() ids = list(sorted(captures.keys())) if ids: extra_inputs, extra_placeholders = zip(* [captures[x] for x in ids]) else: extra_inputs = [] extra_placeholders = [] outputs_list = nest.flatten(func_outputs) output_shapes = [x.shape for x in outputs_list if x is not None] flat_inputs = [ x for x in nest.flatten(func_inputs) if isinstance(x, ops.Tensor) ] all_inputs = flat_inputs + list(extra_placeholders) func_def_outputs = [x for x in outputs_list if x is not None] inference_function_def = make_function_def( tmp_graph, tmp_graph.get_operations(), all_inputs, func_def_outputs) # Register any other functions defined in the graph # TODO(ashankar): Oh lord, forgive me for this lint travesty. for f in tmp_graph._functions.values(): # pylint: disable=protected-access # TODO(ashankar): What about the gradient registry? _register_with_name(f.name, f.definition) _register_with_name(_inference_name(name), inference_function_def) return GraphModeFunction( all_inputs, extra_inputs, inference_function_def, tmp_graph, tmp_graph.get_operations(), func_outputs, _map_sequence_obj_to_idx(func_def_outputs), output_shapes, variables=variables)
def _pop_tape(self): if not self._recording: raise ValueError("Tape is not recording.") tape.pop_tape(self._tape) self._recording = False
def func_graph_from_py_func(name, python_func, args, kwargs, signature=None, func_graph=None, autograph=False, autograph_options=None, add_control_dependencies=True, arg_names=None, op_return_value=None, collections=None, capture_by_value=None): """Returns a `FuncGraph` generated from `python_func`. Args: name: an identifier for the function. python_func: the Python function to trace. args: the positional args with which the Python function should be called; ignored if a signature is provided. kwargs: the keyword args with which the Python function should be called; ignored if a signature is provided. signature: a possibly nested sequence of `TensorSpecs` specifying the shapes and dtypes of the arguments. When a signature is provided, `args` and `kwargs` are ignored, and `python_func` is traced with Tensors conforming to `signature`. If `None`, the shapes and dtypes are inferred from the inputs. func_graph: Optional. An instance of FuncGraph. If provided, we will use this graph else a new one is built and returned. autograph: whether to use autograph to compile `python_func`. See https://www.tensorflow.org/guide/autograph for more information. autograph_options: additional knobs to control when `autograph=True`. See https://www.tensorflow.org/guide/autograph for more information. add_control_dependencies: If True, automatically adds control dependencies to ensure program order matches execution order and stateful ops always execute. arg_names: Optional list of argument names, used to give input placeholders recognizable names. op_return_value: Optional. A Tensor. If set and `python_func` returns Operations, those return values will be replaced with this value. If not set, returning an Operation triggers an error. collections: a dictionary of collections this FuncGraph should start with. If not specified (None), the FuncGraph will read (but not write to) the outer graph's collections that are not whitelisted, and both read and write to the outer graph's collections that are whitelisted. The current whitelisted collections are the global variables, the local variables, and the trainable variables. Defaults to None. capture_by_value: An optional boolean. If True, the func graph will capture Variables by value instead of reference. By default inherit from outer graphs, and failing that will default to False. Returns: A FuncGraph. Raises: TypeError: If any of `python_func`'s return values is neither `None` nor a `Tensor`. """ if op_return_value is not None: assert isinstance(op_return_value, ops.Tensor), op_return_value if func_graph is None: func_graph = FuncGraph(name, collections=collections, capture_by_value=capture_by_value) assert isinstance(func_graph, FuncGraph) if add_control_dependencies: control_manager = AutomaticControlDependencies() else: control_manager = ops.NullContextmanager() with func_graph.as_default(), control_manager as a: current_scope = variable_scope.get_variable_scope() default_use_recource = current_scope.use_resource current_scope.set_use_resource(True) if signature is not None: args = signature kwargs = {} # Creates and names placeholders for all arguments. func_args = _get_defun_inputs_from_args(args, arg_names) func_kwargs = _get_defun_inputs_from_kwargs(kwargs) # Convert all Tensors into TensorSpecs before saving the structured inputs. # If storing pure concrete functions that are not called through polymorphic # functions, we don't have access to FunctionSpec, so we need to call the # TensorSpecs by their `arg_names` for later binding. func_graph.structured_input_signature = ( convert_structure_to_signature(func_args, arg_names), convert_structure_to_signature(func_kwargs)) flat_func_args = nest.flatten(func_args) flat_func_kwargs = nest.flatten(func_kwargs) # Temporarily set inputs to allow graph building code to inspect # them. Reassigned below. func_graph.inputs = [ arg for arg in flat_func_args + flat_func_kwargs if isinstance(arg, ops.Tensor) ] # Note: `nest.flatten` sorts by keys, as does `_deterministic_dict_values`. # Variables to help check whether mutation happens in calling the function # Copy the recursive list, tuple and map structure, but not base objects func_args_before = nest.pack_sequence_as(func_args, flat_func_args) func_kwargs_before = nest.pack_sequence_as(func_kwargs, flat_func_kwargs) def convert(x): """Converts a function output to a Tensor.""" if x is None: return None if op_return_value is not None and isinstance(x, ops.Operation): # TODO(b/79881896): we currently can't capture external control deps, so # this won't work if x needs to be captured (i.e. if python_func returns # captured Operations). with ops.control_dependencies([x]): x = array_ops.identity(op_return_value) elif not isinstance(x, tensor_array_ops.TensorArray): try: x = ops.convert_to_tensor_or_composite(x) except (ValueError, TypeError): raise TypeError( "To be compatible with tf.contrib.eager.defun, Python functions " "must return zero or more Tensors; in compilation of %s, found " "return value of type %s, which is not a Tensor." % (str(python_func), type(x))) if add_control_dependencies: x = a.mark_as_return(x) return x this_tape = tape.push_new_tape() try: if autograph: from tensorflow.python import autograph # pylint: disable=g-import-not-at-top _, original_func = tf_decorator.unwrap(python_func) def wrapper(*args, **kwargs): # Note: functions annotated with @tf.function should always be # converted even though they would meet autograph's whitelisting # criteria. # If this assumption is ever broken, converted_call will need to # handle the possibility of original_func still being a shim, e.g. # bound to WeakrefSelf. return autograph.converted_call( original_func, None, autograph.ConversionOptions( verbose=autograph.Verbosity.BRIEF, recursive=True, strip_decorators=(def_function.function, ), optional_features=autograph_options, force_conversion=True, ), args, kwargs) # Wrapping around a decorator allows checks like tf_inspect.getargspec # to be accurate. converted_func = tf_decorator.make_decorator( original_func, wrapper) tf_decorator.rewrap(python_func, original_func, converted_func) func_outputs = python_func(*func_args, **func_kwargs) # invariant: `func_outputs` contains only Tensors, IndexedSlices, # SparseTensors, TensorArrays and `None`s. func_outputs = nest.map_structure(convert, func_outputs) check_mutation(func_args_before, func_args) check_mutation(func_kwargs_before, func_kwargs) finally: tape.pop_tape(this_tape) current_scope.set_use_resource(default_use_recource) # Variables in `func_args`, `func_kwargs` should be explicit inputs # to the function, not captured inputs. tape_variables = this_tape.watched_variables() arg_variables = set() inputs = [] for arg in nest.flatten(func_args) + nest.flatten(func_kwargs): if isinstance(arg, resource_variable_ops.ResourceVariable): # Even if an argument variable was not used in the function, we've # already manually captured the resource Tensor when creating argument # placeholders. resource_placeholder = func_graph.captures.pop( arg.handle, None) if resource_placeholder is None: continue arg_variables.add(arg) inputs.append(resource_placeholder) elif isinstance(arg, ops.Tensor): inputs.append(arg) variables = [v for v in tape_variables if v not in arg_variables] func_graph.inputs = inputs + list(func_graph.captures.values()) func_graph.structured_outputs = func_outputs # Returning a closed-over tensor does not trigger convert_to_tensor. func_graph.outputs.extend( func_graph.capture(x) for x in flatten(func_graph.structured_outputs) if x is not None) func_graph.variables = variables if add_control_dependencies: func_graph.control_outputs.extend(control_manager.ops_which_must_run) # Register any other functions defined in the graph. with ops.init_scope(): if context.executing_eagerly(): for f in func_graph._functions.values(): # pylint: disable=protected-access # TODO(ashankar): What about the gradient registry? context.add_function(f._c_func.func) # pylint: disable=protected-access return func_graph
def imperative_grad(target, sources, output_gradients=None): """Computes gradients from the imperatively defined tape on top of the stack. Works by filtering the tape, computing how many downstream usages are of each tensor and entry, and repeatedly applying backward functions until we have gradients for all sources. Args: target: either a Tensor or list of Tensors to be differentiated. sources: list of Tensors for which we want gradients output_gradients: if not None, a list of gradient provided for each Target, or None if we are to use the target's computed downstream gradient. Returns: the gradient wrt each of the sources. Raises: RuntimeError: if something goes wrong. ValueError: if there is no sequence of differentiable operations connecting a source and any target Tensor. This can happen either if the target is not computed based on the source, if the tracing was set up incorrectly, or if only non-differentiable functions of the source were used in the computation of target. """ if not tape._tape_stack.stack: # pylint: disable=protected-access raise RuntimeError("Computing a gradient with no tape present") bp_tape = tape.pop_tape() tensor_to_op, op_to_entry = bp_tape.export() # This overwrites the op_to_entry variable, which will release all memory used # to keep traces that are irrelevant to the gradient computation we're doing # here. id_sources = [ops.tensor_id(t) for t in sources] tensor_usage_counts, op_to_entry, op_missing_tensor = _prepare_backprop( target, tensor_to_op, op_to_entry, id_sources) ready_ops = _initialize_backprop_stack(op_to_entry, op_missing_tensor) gradients = _initial_gradients(target, output_gradients, tensor_usage_counts) # Now exhaust the backprop stack while ready_ops: op = ready_ops.pop() op_trace = op_to_entry.pop(op) out_gradients = [gradients.pop(t, None) for t in op_trace.output_ids] for i in range(len(out_gradients)): if out_gradients[i] is None: # TODO(apassos) this should be in the right device none_indices = _grad_fn_accepts_none_for_indices.get( op_trace.op_type, None) if none_indices is None or i not in none_indices: out_gradients[i] = array_ops.zeros( *op_trace.output_shape_and_dtype[i]) else: out_gradients[i] = _aggregate_grads(out_gradients[i]) in_gradients = op_trace.backward_function(*(out_gradients + op_trace.side_outputs)) in_gradients = ([in_gradients] if isinstance( in_gradients, (ops.Tensor, ops.IndexedSlices, type(None))) else in_gradients) for i, t in enumerate(op_trace.input_ids): if in_gradients[i] is not None: gradients[t].append(in_gradients[i]) if tensor_usage_counts.get(t, 0) > 0: tensor_usage_counts[t] -= 1 if (t in tensor_to_op and tensor_usage_counts[t] == 0 and t not in id_sources): in_op = tensor_to_op[t] if in_op is None: continue if op_missing_tensor.get(in_op, 0) > 0: op_missing_tensor[in_op] -= 1 if op_missing_tensor.get(in_op, 0) == 0: ready_ops.append(in_op) result = [] for i, s in enumerate(sources): g = gradients.get(ops.tensor_id(s), None) if g is None: # TODO(apassos): figure out a way to summarize why sources and targets are # not connected. raise ValueError( "There is no sequence of operations connecting source " "tensor %s (%s) to any of the target Tensors. This is " "commonly caused by the tape not recording all " "operations in the forward pass or if by mistake a " "source was only used in non-differentiable operations." % (i, s)) result.append(_aggregate_grads(g)) return result
def imperative_grad( target, sources, output_gradients=None): """Computes gradients from the imperatively defined tape on top of the stack. Works by filtering the tape, computing how many downstream usages are of each tensor and entry, and repeatedly applying backward functions until we have gradients for all sources. Args: target: either a Tensor or list of Tensors to be differentiated. sources: list of Tensors for which we want gradients output_gradients: if not None, a list of gradient provided for each Target, or None if we are to use the target's computed downstream gradient. Returns: the gradient wrt each of the sources. Raises: RuntimeError: if something goes wrong. ValueError: if there is no sequence of differentiable operations connecting a source and any target Tensor. This can happen either if the target is not computed based on the source, if the tracing was set up incorrectly, or if only non-differentiable functions of the source were used in the computation of target. """ if not tape._tape_stack.stack: # pylint: disable=protected-access raise RuntimeError("Computing a gradient with no tape present") bp_tape = tape.pop_tape() tensor_to_op, op_to_entry = bp_tape.export() # This overwrites the op_to_entry variable, which will release all memory used # to keep traces that are irrelevant to the gradient computation we're doing # here. id_sources = [ops.tensor_id(t) for t in sources] tensor_usage_counts, op_to_entry, op_missing_tensor = _prepare_backprop( target, tensor_to_op, op_to_entry, id_sources) ready_ops = _initialize_backprop_stack(op_to_entry, op_missing_tensor) gradients = _initial_gradients(target, output_gradients, tensor_usage_counts) gradients_size = dict() # Now exhaust the backprop stack while ready_ops: op = ready_ops.pop() op_trace = op_to_entry.pop(op) out_gradients = [gradients.pop(t, None) for t in op_trace.output_ids] for i in range(len(out_gradients)): if out_gradients[i] is None: # TODO(apassos) this should be in the right device none_indices = _grad_fn_accepts_none_for_indices.get( op_trace.op_type, None) if none_indices is None or i not in none_indices: out_gradients[i] = array_ops.zeros( *op_trace.output_shape_and_dtype[i]) else: out_gradients[i] = _aggregate_grads(out_gradients[i]) in_gradients = op_trace.backward_function( *(out_gradients + op_trace.side_outputs)) in_gradients = ([in_gradients] if isinstance(in_gradients, (ops.Tensor, ops.IndexedSlices, type(None))) else in_gradients) for i, t in enumerate(op_trace.input_ids): if in_gradients[i] is not None: _add_new_grads(gradients, gradients_size, t, in_gradients[i]) if tensor_usage_counts.get(t, 0) > 0: tensor_usage_counts[t] -= 1 if (t in tensor_to_op and tensor_usage_counts[t] == 0 and t not in id_sources): in_op = tensor_to_op[t] if in_op is None: continue if op_missing_tensor.get(in_op, 0) > 0: op_missing_tensor[in_op] -= 1 if op_missing_tensor.get(in_op, 0) == 0: ready_ops.append(in_op) result = [] for i, s in enumerate(sources): g = gradients.get(ops.tensor_id(s), None) if g is None: # TODO(apassos): figure out a way to summarize why sources and targets are # not connected. raise ValueError("There is no sequence of operations connecting source " "tensor %s (%s) to any of the target Tensors. This is " "commonly caused by the tape not recording all " "operations in the forward pass or if by mistake a " "source was only used in non-differentiable operations." % (i, s)) result.append(_aggregate_grads(g)) return result