def grad(x): with backprop.GradientTape() as tape: tape.watch(x) y = nn_ops.l2_loss(nn_ops.relu(x)) return tape.gradient(y, x)
def step(c): x = array_ops.identity(42.) with backprop.GradientTape() as tape: tape.watch(x) y = comm_fn(x) * c return tape.gradient(y, x)
def _grad_function(): with backprop.GradientTape() as tape: primal_out = f() g, = tape.gradient(primal_out, tape.watched_variables()) return g
def _benchmark_matmul_read_variable_with_tape(self, m, num_iters): with backprop.GradientTape() as tape: tape.watch(m) self._benchmark_gen_math_ops_matmul(m, transpose_b=False, num_iters=num_iters)
def benchmark_tf_gradient_forward_identity(self): with backprop.GradientTape() as tape: m = self._m_2 tape.watch(m) self._run(lambda: gen_array_ops.identity(m), 30000)
def _test_fn(): with backprop.GradientTape() as tape: x = array_ops.ones([5, 5]) tape.watch(x) y = math_ops.reduce_sum(x, axis=constant_op.constant(1)) return y, tape.gradient(y, x)
def replica_step(): with backprop.GradientTape() as tape: y = model(x) return tape.gradient(y, x)
def f_grad(): with backprop.GradientTape() as tape: logits = constant_op.constant([1., 2.]) tape.watch(logits) out = f(constant_op.constant(1), logits) return tape.gradient(out, logits)
def testGradientTapeVariable(self): v = resource_variable_ops.ResourceVariable(1.0, name='v') with backprop.GradientTape() as g: y = v * v grad = g.gradient(y, [v])[0] self.assertAllEqual(grad, 2.0)
def f(): x = constant_op.constant(1.0) with backprop.GradientTape() as tape: y = v0 * x dy = tape.gradient(y, v0) return dy
def step_fn(inputs): images, targets = inputs with backprop.GradientTape() as tape: loss = compute_loss2(images, targets) grads = tape.gradient(loss, model2.variables) optimizer.apply_gradients(zip(grads, model2.variables))
def f(x): assert x.dtype == dtypes.float64 with backprop.GradientTape() as tape: tape.watch(x) y = nn_ops.selu(x) return tape.gradient(y, x)
def f(x): with backprop.GradientTape(persistent=True) as tape: tape.watch(x) y = nn_ops.elu(x) dy = tape.gradient(y, x) return tape.gradient(dy, x)
def f(x): with backprop.GradientTape() as tape: tape.watch(x) y = nn_ops.relu(x) return tape.gradient(y, x)
def check_tape_safe(self, operator, skip_options=None): """Check gradients are not None w.r.t. operator.variables. Meant to be called from the derived class. This ensures grads are not w.r.t every variable in operator.variables. If more fine-grained testing is needed, a custom test should be written. Args: operator: LinearOperator. Exact checks done will depend on hints. skip_options: Optional list of CheckTapeSafeSkipOptions. Makes this test skip particular checks. """ skip_options = skip_options or [] if not operator.variables: raise AssertionError("`operator.variables` was empty") def _assert_not_none(iterable): for item in iterable: self.assertIsNotNone(item) # Tape tests that can be run on every operator below. with backprop.GradientTape() as tape: _assert_not_none( tape.gradient(operator.to_dense(), operator.variables)) with backprop.GradientTape() as tape: _assert_not_none( tape.gradient(operator.adjoint().to_dense(), operator.variables)) x = math_ops.cast(array_ops.ones(shape=operator.H.shape_tensor()[:-1]), operator.dtype) with backprop.GradientTape() as tape: _assert_not_none( tape.gradient(operator.matvec(x), operator.variables)) # Tests for square, but possibly non-singular operators below. if not operator.is_square: return for option in [ CheckTapeSafeSkipOptions.DETERMINANT, CheckTapeSafeSkipOptions.LOG_ABS_DETERMINANT, CheckTapeSafeSkipOptions.DIAG_PART, CheckTapeSafeSkipOptions.TRACE, ]: with backprop.GradientTape() as tape: if option not in skip_options: _assert_not_none( tape.gradient( getattr(operator, option)(), operator.variables)) # Tests for non-singular operators below. if operator.is_non_singular is False: # pylint: disable=g-bool-id-comparison return with backprop.GradientTape() as tape: _assert_not_none( tape.gradient(operator.inverse().to_dense(), operator.variables)) with backprop.GradientTape() as tape: _assert_not_none( tape.gradient(operator.solvevec(x), operator.variables)) # Tests for SPD operators below. if not (operator.is_self_adjoint and operator.is_positive_definite): return with backprop.GradientTape() as tape: _assert_not_none( tape.gradient(operator.cholesky().to_dense(), operator.variables))
def g(): x = constant_op.constant([3.14, 2.68, 7.69]) with backprop.GradientTape() as tape: tape.watch(x) y = f(x) return tape.gradient(y, x)
def _f(*params): with backprop.GradientTape() as tape: tape.watch(params) primals_out = f(*params) return tape.gradient(primals_out, params[argnums])
def _compute_gradient(function): with backprop.GradientTape() as tape: inp = constant_op.constant(1.) tape.watch(inp) output = function(inp) return tape.gradient(output, inp)
def model_fn(mock_model): with backprop.GradientTape(persistent=True) as gtape: result = fn2(mock_model) grads = gtape.gradient(result, [v.get() for v in mock_model.variables]) return grads
def g(x): x = ops.convert_to_tensor(x) with backprop.GradientTape() as tape: tape.watch(x) y = f(x) return y, tape.gradient(y, x)
def func(): with backprop.GradientTape() as gt: gt.watch(m) y = f(m, m, transpose_b=transpose_b) _ = gt.gradient(y, m)
def _jvp_helper(op_name, attr_tuple, inputs, outputs, tangents): """Computes a Jacobian-vector product for an op. Note that this function would be wasteful if executed eagerly. It runs the backward gradient function and throws away the result just to record its operations on a GradientTape. These unused ops are pruned away when this function is traced. Args: op_name: A string, the type of operation being executed. attr_tuple: Attributes of the operation. inputs: A flat list of input Tensors to the operation. outputs: A flat list of output Tensors from the operation. tangents: A flat list of Tensors, same shape as `inputs`. Returns: A flat list of tangents corresponding to `outputs`. """ with _TRACE_COUNT_CONSISTENCY_LOCK: # Just make sure writes don't clobber each other's increments; reads in # _jvp_dispatch do not lock. _TRACE_COUNT[op_name] = _TRACE_COUNT.get(op_name, 0) + 1 special_case = _SPECIAL_CASES.get(op_name, None) if special_case is not None: return special_case(attr_tuple, inputs, outputs, tangents) if not outputs: # tape.gradients([], inputs) doesn't make much sense return [] # Generally inner GradientTapes won't function while outer accumulators are # recording. We temporarily reset forwardprop state to allow GradientTapes to # function here. with forwardprop_util.push_forwardprop_state(): trainable_inputs = [] trainable_indices = [] nontrivial_tangents = [] for input_index, tensor in enumerate(inputs): if backprop_util.IsTrainable(tensor): trainable_inputs.append(tensor) trainable_indices.append(input_index) nontrivial_tangents.append(tangents[input_index]) with backprop.GradientTape() as transpose_tape: with backprop.GradientTape() as backfunc_tape: backfunc_tape.watch(trainable_inputs) execute.record_gradient(op_name, inputs, attr_tuple, outputs) forwardprop_aids = [] trainable_outputs = [] nontrivial_output_indices = [] for output_index, output in enumerate(outputs): if backprop_util.IsTrainable(output): forwardprop_aids.append( array_ops.ones_like(output, name="unused_forwardprop_aid")) trainable_outputs.append(output) nontrivial_output_indices.append(output_index) transpose_tape.watch(forwardprop_aids) grads = backfunc_tape.gradient( trainable_outputs, trainable_inputs, forwardprop_aids, unconnected_gradients=UnconnectedGradients.ZERO) nontrivial_output_tangents = transpose_tape.gradient( grads, forwardprop_aids, output_gradients=nontrivial_tangents) output_tangents = [None] * len(outputs) for index, tangent in zip(nontrivial_output_indices, nontrivial_output_tangents): output_tangents[index] = tangent return output_tangents
def _benchmark_read_variable_with_tape(self, m, num_iters): with backprop.GradientTape() as tape: tape.watch(m) self._run(m.value, num_iters)
def step(c): x = constant_op.constant(42.) with backprop.GradientTape() as tape: tape.watch(x) y = comm_fn(x) * c return tape.gradient(y, x)
def f(): with backprop.GradientTape(): pass
def _nextafter_gradient(self, x1, x2): with backprop.GradientTape() as tape: tape.watch(x1) tape.watch(x2) y = math_ops.nextafter(x1, x2) return tape.gradient(y, [x1, x2])
def _grad_function(primal): with backprop.GradientTape() as tape: tape.watch(primal) primal_out = f(primal) return tape.gradient(primal_out, primal)
def _graph_mode_decorator(f, *args, **kwargs): """Implement custom gradient decorator for graph mode.""" # TODO(rsepassi): Add support for kwargs if kwargs: raise ValueError( "The custom_gradient decorator currently supports keywords " "arguments only when eager execution is enabled.") name = "CustomGradient-%s" % ops.uid() args = [ops.convert_to_tensor(x) for x in args] # Checking global and local variables attempts to ensure that no non-resource # Variables are added to the graph. current_var_scope = variable_scope.get_variable_scope() before_vars = set(current_var_scope.global_variables() + current_var_scope.local_variables()) with backprop.GradientTape() as tape: result, grad_fn = f(*args) after_vars = set(current_var_scope.global_variables() + current_var_scope.local_variables()) new_vars = after_vars - before_vars for v in new_vars: if not resource_variable_ops.is_resource_variable(v): raise TypeError( "All variables used by a function wrapped with @custom_gradient must " "be `ResourceVariable`s. Ensure that no `variable_scope` is created " "with `use_resource=False`.") # The variables that grad_fn needs to return gradients for are the set of # variables used that are *not* part of the inputs. variables = list(set(tape.watched_variables()) - set(args)) grad_argspec = tf_inspect.getfullargspec(grad_fn) variables_in_signature = ("variables" in grad_argspec.args or grad_argspec.varkw) if variables and not variables_in_signature: raise TypeError("If using @custom_gradient with a function that " "uses variables, then grad_fn must accept a keyword " "argument 'variables'.") if variables_in_signature and not variables: # User seems to intend to use variables but none were captured. if not variable_scope.get_variable_scope().use_resource: raise TypeError( "If using @custom_gradient with a function that " "uses variables, the enclosing variable scope must " "have use_resource=True.") else: logging.warn( "@custom_gradient grad_fn has 'variables' in signature, but " "no ResourceVariables were used on the forward pass.") flat_result = nest.flatten(result) all_tensors = flat_result + args + variables def tape_grad_fn(*result_grads): """Custom grad fn wrapper.""" result_grads = result_grads[:len(flat_result)] if variables: input_grads, variable_grads = grad_fn(*result_grads, variables=variables) if len(variable_grads) != len(variables): raise ValueError("Must return gradient for each variable from " "@custom_gradient grad_fn.") else: input_grads = grad_fn(*result_grads) variable_grads = [] # Need to return one value per input to the IdentityN, so pad the # gradients of the inputs of the custom_gradient function with the # gradients of the outputs as well. input_grads = nest.flatten(input_grads) return ([None] * len(flat_result)) + input_grads + variable_grads @ops.RegisterGradient(name) def internal_grad_fn(unused_op, *result_grads): # pylint: disable=unused-variable """Custom grad fn wrapper.""" return tape_grad_fn(*result_grads) original_tensors = all_tensors with ops.get_default_graph().gradient_override_map({"IdentityN": name}): all_tensors = array_ops.identity_n(all_tensors) # Propagate handle data for happier shape inference for resource variables. for i, t in enumerate(original_tensors): if t.dtype == dtypes.resource and hasattr(t, "_handle_data"): all_tensors[i]._handle_data = t._handle_data # pylint: disable=protected-access tape_lib.record_operation(f.__name__, all_tensors, original_tensors, tape_grad_fn) for ot, t in zip(original_tensors, all_tensors): copy_handle_data(ot, t) return nest.pack_sequence_as(structure=result, flat_sequence=all_tensors[:len(flat_result)])
def almost_outer_fn(x): with backprop.GradientTape() as tp: tp.watch(x) result = middle_fn(x, 1.0) grad = tp.gradient(result, x) return grad
def train_step(): with backprop.GradientTape() as tape: loss = mini_model(array_ops.ones([1, 10])) grads = tape.gradient(loss, mini_model.weights) grads_and_vars = zip(grads, mini_model.weights) optimizer.apply_gradients(grads_and_vars)