def in_grad_function(): with ops.name_scope(op.name + "_grad"): # pylint: disable=protected-access with ops.get_default_graph()._original_op(op): # pylint: enable=protected-access if grad_fn: # If grad_fn was found, do not use SymbolicGradient even for # functions. in_grads = _AsList(grad_fn(op, *out_grads)) else: # For function call ops, we add a 'SymbolicGradient' # node to the graph to compute gradients. f_in = [x for x in op.inputs] + out_grads f_types = [x.dtype for x in op.inputs] # pylint: disable=protected-access in_grads = _AsList( functional_ops._symbolic_gradient( f_in, f_types, op.type)) # pylint: enable=protected-access _VerifyGeneratedGradients(in_grads, op) if gate_gradients and len( [x for x in in_grads if x is not None]) > 1: in_grads = control_flow_ops.tuple(in_grads) _LogOpGradients(op, out_grads, in_grads) in_grads = [ x if x is not None else tf.zeros( tf.shape(op.inputs[i]), dtype=op.inputs[i].dtype) for i, x in enumerate(in_grads) ] return in_grads
def XSquarePlusOneGrad(x, dy): dx = functional_ops._symbolic_gradient( input=[x, dy], Tout=[tf.float32], # This line on define_function to register the above # function with name "XSquarePlusOneFn" f="XSquarePlusOneFn", name="dx") return dx
def XSquarePlusOneGrad(x, dy): dx = functional_ops._symbolic_gradient( input=[x, dy], Tout=[tf.float32], # This line on define_function to register the above # function with name "XSquarePlusOneFn" f="XSquarePlusOneFn", name="dx") return dx
def _SymGrad(op, out_grads): """Backprop through a function call node op given its outputs' gradients.""" f_in = [x for x in op.inputs] + out_grads f_types = [x.dtype for x in op.inputs] f = attr_value_pb2.NameAttrList() f.name = op.type for k in op.node_def.attr: f.attr[k].CopyFrom(op.node_def.attr[k]) # pylint: disable=protected-access in_grads = functional_ops._symbolic_gradient(input=f_in, Tout=f_types, f=f) # pylint: enable=protected-access return in_grads
def testSymGradShape(self): g = tf.Graph() with g.as_default(): x = tf.placeholder(tf.float32, [25, 4]) y = tf.placeholder(tf.float32, [200, 100]) dz = tf.placeholder(tf.float32, [1]) # We assume Foo is a function of (x, y) -> (z) Then, Foo's # gradient function is (x, y, dz) -> (dx, dy). dx's shape # should be the same as x's; and dy's shape should be the same # as y's. dx, dy = functional_ops._symbolic_gradient(input=[x, y, dz], Tout=[tf.float32] * 2, f="Foo") self.assertEqual(x.get_shape(), dx.get_shape()) self.assertEqual(y.get_shape(), dy.get_shape())
def testSymGradShape(self): g = ops.Graph() with g.as_default(): x = array_ops.placeholder(dtypes.float32, [25, 4]) y = array_ops.placeholder(dtypes.float32, [200, 100]) dz = array_ops.placeholder(dtypes.float32, [1]) # We assume Foo is a function of (x, y) -> (z) Then, Foo's # gradient function is (x, y, dz) -> (dx, dy). dx's shape # should be the same as x's; and dy's shape should be the same # as y's. dx, dy = functional_ops._symbolic_gradient( input=[x, y, dz], Tout=[dtypes.float32] * 2, f="Foo") self.assertEqual(x.get_shape(), dx.get_shape()) self.assertEqual(y.get_shape(), dy.get_shape())
def XSquarePlusOneGrad(x, dy): dx = functional_ops._symbolic_gradient(input=[x, dy], Tout=[tf.float32], f="XSquarePlusOne", name="dx") return dx
def XSquarePlusOneGrad(x, dy): dx = functional_ops._symbolic_gradient( input=[x, dy], Tout=[dtypes.float32], f="XSquarePlusOneFn", name="dx") return dx
def gradients(ys, xs, grad_ys=None, name="gradients", colocate_gradients_with_ops=False, gate_gradients=False, aggregation_method=None): """Constructs symbolic partial derivatives of `ys` w.r.t. x in `xs`. `ys` and `xs` are each a `Tensor` or a list of tensors. `grad_ys` is a list of `Tensor`, holding the gradients received by the `ys`. The list must be the same length as `ys`. `gradients()` adds ops to the graph to output the partial derivatives of `ys` with respect to `xs`. It returns a list of `Tensor` of length `len(xs)` where each tensor is the `sum(dy/dx)` for y in `ys`. `grad_ys` is a list of tensors of the same length as `ys` that holds the initial gradients for each y in `ys`. When `grad_ys` is None, we fill in a tensor of '1's of the shape of y for each y in `ys`. A user can provide their own initial `grad_ys` to compute the derivatives using a different initial gradient for each y (e.g., if one wanted to weight the gradient differently for each value in each y). Args: ys: A `Tensor` or list of tensors to be differentiated. xs: A `Tensor` or list of tensors to be used for differentiation. grad_ys: Optional. A `Tensor` or list of tensors the same size as `ys` and holding the gradients computed for each y in `ys`. name: Optional name to use for grouping all the gradient ops together. defaults to 'gradients'. colocate_gradients_with_ops: If True, try colocating gradients with the corresponding op. gate_gradients: If True, add a tuple around the gradients returned for an operations. This avoids some race conditions. aggregation_method: Specifies the method used to combine gradient terms. Accepted values are constants defined in the class `AggregationMethod`. Returns: A list of `sum(dy/dx)` for each x in `xs`. Raises: LookupError: if one of the operations between `x` and `y` does not have a registered gradient function. ValueError: if the arguments are invalid. """ ys = _AsList(ys) xs = _AsList(xs) if grad_ys is None: grad_ys = [None] * len(ys) else: grad_ys = _AsList(grad_ys) with ops.op_scope(ys + xs + grad_ys, name, "gradients"): ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y") xs = ops.convert_n_to_tensor_or_indexed_slices(xs, name="x") grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops) # The approach we take here is as follows: Create a list of all ops in the # subgraph between the ys and xs. Visit these ops in reverse order of ids # to ensure that when we visit an op the gradients w.r.t its outputs have # been collected. Then aggregate these gradients if needed, call the op's # gradient function, and add the generated gradients to the gradients for # its input. # Initialize the pending count for ops in the connected subgraph from ys # to the xs. to_ops = [t.op for t in ys] from_ops = [t.op for t in xs] pending_count, loop_state = _PendingCount(ops.get_default_graph(), to_ops, from_ops) # Iterate over the collected ops. # # grads: op => list of gradients received on each output endpoint of the # op. The gradients for each endpoint are initially collected as a list. # When it is time to call the op's gradient function, for each endpoint we # aggregate the list of received gradients into a Add() Operation if there # is more than one. grads = {} # Add the initial gradients for the ys. for y, grad_y in zip(ys, grad_ys): _SetGrad(grads, y, grad_y) # Initialize queue with to_ops. queue = collections.deque() # Add the ops in 'to_ops' into the queue. to_ops_set = set() for op in to_ops: # 'ready' handles the case where one output gradient relies on # another output's gradient. # pylint: disable=protected-access ready = (pending_count[op._id] == 0) if ready and op._id not in to_ops_set: to_ops_set.add(op._id) queue.append(op) if loop_state: # The "unused" exits of the loops are added to ys. As an example, # people often write: # v1, _ = While(p, b, [x1, x2]) # result = gradients(v1, x1) # The exit node of x2 is not included by the betweenness analysis. # But we need it if x2 is involved in computing v1. So we add it # back in backprop with a zeros_like gradient. loop_exits = loop_state.GetAllLoopExits() for y in loop_exits: if pending_count[y.op._id] == 0 and y.op._id not in to_ops_set: if _IsFloat(y): # Floating-point outputs get a zero gradient. _SetGrad(grads, y, loop_state.ZerosLikeForExit(y)) queue.append(y.op) # The set of 'from_ops'. stop_ops = _StopOps(from_ops, pending_count) while queue: # generate gradient subgraph for op. op = queue.popleft() with ops.device(_GetGradsDevice(op, colocate_gradients_with_ops)): if loop_state: loop_state.EnterGradWhileContext(op) out_grads = _AggregatedGrads(grads, op, loop_state, aggregation_method) grad_fn = None # pylint: disable=protected-access is_func_call = ops.get_default_graph()._is_function(op.type) # pylint: enable=protected-access if not is_func_call and any( out_grads) and op._id not in stop_ops: # pylint: enable=protected-access # A grad_fn must be defined, either as a function or as None # for ops that do not have gradients. try: grad_fn = ops.get_gradient_function(op) except LookupError: raise LookupError( "No gradient defined for operation '%s' (op type: %s)" % (op.name, op.type)) if (grad_fn or is_func_call) and any(out_grads): # NOTE: If _AggregatedGrads didn't compute a value for the i'th # output, it means that the cost does not depend on output[i], # therefore dC/doutput[i] is 0. for i, out_grad in enumerate(out_grads): if not out_grad and _IsFloat(op.outputs[i]): # Only floating-point outputs get a zero gradient. Gradient # functions should ignore the gradient for other outputs. if loop_state: out_grads[i] = loop_state.ZerosLike(op, i) else: out_grads[i] = array_ops.zeros_like( op.outputs[i]) with ops.name_scope(op.name + "_grad"): # pylint: disable=protected-access with ops.get_default_graph()._original_op(op): # pylint: enable=protected-access wrapped_op = op if loop_state: wrapped_op = loop_state.MakeWrapper(op) if is_func_call: # For function call ops, we add a 'SymbolicGradient' # node to the graph to compute gradients. f_in = [x for x in op.inputs] + out_grads f_types = [x.dtype for x in op.inputs] # pylint: disable=protected-access in_grads = _AsList( functional_ops._symbolic_gradient( f_in, f_types, op.type)) # pylint: enable=protected-access else: in_grads = _AsList( grad_fn(wrapped_op, *out_grads)) _VerifyGeneratedGradients(in_grads, op) if gate_gradients and len( tuple(filter(None, in_grads))) > 1: in_grads = control_flow_ops.tuple(in_grads) logging.vlog(1, "Gradient for '" + op.name + "'") logging.vlog(1, " in --> %s", ", ".join([x.name for x in out_grads if x])) logging.vlog(1, " out --> %s", ", ".join([x.name for x in in_grads if x])) else: # If no grad_fn is defined or none of out_grads is available, # just propagates a list of None backwards. in_grads = [None] * len(op.inputs) for t_in, in_grad in zip(op.inputs, in_grads): if in_grad: _SetGrad(grads, t_in, in_grad) if loop_state: loop_state.ExitGradWhileContext(op) # update pending count for the inputs of op. # pylint: disable=protected-access for x in op.inputs: pending_count[x.op._id] -= 1 ready = (pending_count[x.op._id] == 0) if loop_state and not ready: ready = (pending_count[x.op._id] > 0 and control_flow_ops.IsLoopSwitch(x.op)) if ready: queue.append(x.op) for x in op.control_inputs: pending_count[x._id] -= 1 if pending_count[x._id] is 0: queue.append(x) # pylint: enable=protected-access return [_GetGrad(grads, x) for x in xs]
def gradients(ys, xs, grad_ys=None, name="gradients", colocate_gradients_with_ops=False, gate_gradients=False, aggregation_method=None): """Constructs symbolic partial derivatives of sum of `ys` w.r.t. x in `xs`. `ys` and `xs` are each a `Tensor` or a list of tensors. `grad_ys` is a list of `Tensor`, holding the gradients received by the `ys`. The list must be the same length as `ys`. `gradients()` adds ops to the graph to output the partial derivatives of `ys` with respect to `xs`. It returns a list of `Tensor` of length `len(xs)` where each tensor is the `sum(dy/dx)` for y in `ys`. `grad_ys` is a list of tensors of the same length as `ys` that holds the initial gradients for each y in `ys`. When `grad_ys` is None, we fill in a tensor of '1's of the shape of y for each y in `ys`. A user can provide their own initial `grad_ys` to compute the derivatives using a different initial gradient for each y (e.g., if one wanted to weight the gradient differently for each value in each y). Args: ys: A `Tensor` or list of tensors to be differentiated. xs: A `Tensor` or list of tensors to be used for differentiation. grad_ys: Optional. A `Tensor` or list of tensors the same size as `ys` and holding the gradients computed for each y in `ys`. name: Optional name to use for grouping all the gradient ops together. defaults to 'gradients'. colocate_gradients_with_ops: If True, try colocating gradients with the corresponding op. gate_gradients: If True, add a tuple around the gradients returned for an operations. This avoids some race conditions. aggregation_method: Specifies the method used to combine gradient terms. Accepted values are constants defined in the class `AggregationMethod`. Returns: A list of `sum(dy/dx)` for each x in `xs`. Raises: LookupError: if one of the operations between `x` and `y` does not have a registered gradient function. ValueError: if the arguments are invalid. """ ys = _AsList(ys) xs = _AsList(xs) if grad_ys is None: grad_ys = [None] * len(ys) else: grad_ys = _AsList(grad_ys) with ops.name_scope(name, "gradients", ys + xs + grad_ys): ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y") xs = ops.convert_n_to_tensor_or_indexed_slices(xs, name="x") grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops) # The approach we take here is as follows: Create a list of all ops in the # subgraph between the ys and xs. Visit these ops in reverse order of ids # to ensure that when we visit an op the gradients w.r.t its outputs have # been collected. Then aggregate these gradients if needed, call the op's # gradient function, and add the generated gradients to the gradients for # its input. # Initialize the pending count for ops in the connected subgraph from ys # to the xs. to_ops = [t.op for t in ys] from_ops = [t.op for t in xs] pending_count, loop_state = _PendingCount(ops.get_default_graph(), to_ops, from_ops, colocate_gradients_with_ops) # Iterate over the collected ops. # # grads: op => list of gradients received on each output endpoint of the # op. The gradients for each endpoint are initially collected as a list. # When it is time to call the op's gradient function, for each endpoint we # aggregate the list of received gradients into a Add() Operation if there # is more than one. grads = {} # Add the initial gradients for the ys. for y, grad_y in zip(ys, grad_ys): _SetGrad(grads, y, grad_y) # Initialize queue with to_ops. queue = collections.deque() # Add the ops in 'to_ops' into the queue. to_ops_set = set() for op in to_ops: # 'ready' handles the case where one output gradient relies on # another output's gradient. # pylint: disable=protected-access ready = (pending_count[op._id] == 0) if ready and op._id not in to_ops_set: to_ops_set.add(op._id) queue.append(op) # pylint: enable=protected-access if loop_state: loop_exits = loop_state.ProcessUnusedLoopExits( pending_count, to_ops_set) for y in loop_exits: if _IsTrainable(y): _SetGrad(grads, y, loop_state.ZerosLikeForExit(y)) queue.append(y.op) # The set of 'from_ops'. stop_ops = _StopOps(from_ops, pending_count) while queue: # generate gradient subgraph for op. op = queue.popleft() with _maybe_colocate_with(op, colocate_gradients_with_ops): if loop_state: loop_state.EnterGradWhileContext(op, before=True) out_grads = _AggregatedGrads(grads, op, loop_state, aggregation_method) if loop_state: loop_state.ExitGradWhileContext(op, before=True) grad_fn = None # pylint: disable=protected-access is_func_call = ops.get_default_graph()._is_function(op.type) has_out_grads = any( isinstance(g, ops.Tensor) or g for g in out_grads) if has_out_grads and (op._id not in stop_ops): if is_func_call: grad_fn = ops.get_default_graph()._get_function( op.type).python_grad_func # pylint: enable=protected-access else: # A grad_fn must be defined, either as a function or as None # for ops that do not have gradients. try: grad_fn = ops.get_gradient_function(op) except LookupError: raise LookupError( "No gradient defined for operation '%s' (op type: %s)" % (op.name, op.type)) if loop_state: loop_state.EnterGradWhileContext(op, before=False) if (grad_fn or is_func_call) and has_out_grads: # NOTE: If _AggregatedGrads didn't compute a value for the i'th # output, it means that the cost does not depend on output[i], # therefore dC/doutput[i] is 0. for i, out_grad in enumerate(out_grads): if (not isinstance(out_grad, ops.Tensor) and not out_grad) and _IsTrainable(op.outputs[i]): # Only floating-point outputs get a zero gradient. Gradient # functions should ignore the gradient for other outputs. if loop_state: out_grads[i] = loop_state.ZerosLike(op, i) else: out_grads[ i] = control_flow_ops.ZerosLikeOutsideLoop( op, i) with ops.name_scope(op.name + "_grad"): # pylint: disable=protected-access with ops.get_default_graph()._original_op(op): # pylint: enable=protected-access if grad_fn: # If grad_fn was found, do not use SymbolicGradient even for # functions. in_grads = _AsList(grad_fn(op, *out_grads)) else: # For function call ops, we add a 'SymbolicGradient' # node to the graph to compute gradients. f_in = [x for x in op.inputs] + out_grads f_types = [x.dtype for x in op.inputs] # pylint: disable=protected-access in_grads = _AsList( functional_ops._symbolic_gradient( f_in, f_types, op.type)) # pylint: enable=protected-access _VerifyGeneratedGradients(in_grads, op) if gate_gradients and len( [x for x in in_grads if x is not None]) > 1: in_grads = control_flow_ops.tuple(in_grads) _LogOpGradients(op, out_grads, in_grads) else: # If no grad_fn is defined or none of out_grads is available, # just propagates a list of None backwards. in_grads = [None] * len(op.inputs) for t_in, in_grad in zip(op.inputs, in_grads): if in_grad is not None: if isinstance(in_grad, ops.Tensor): in_grad.set_shape(t_in.get_shape()) _SetGrad(grads, t_in, in_grad) if loop_state: loop_state.ExitGradWhileContext(op, before=False) # Update pending count for the inputs of op and enqueue ready ops. _UpdatePendingAndEnqueueReady(grads, op, queue, pending_count, loop_state) if loop_state: loop_state.PostProcessing() return [_GetGrad(grads, x) for x in xs]
def gradients(ys, xs, grad_ys=None, name="gradients", colocate_gradients_with_ops=False, gate_gradients=False, aggregation_method=None): """Constructs symbolic partial derivatives of sum of `ys` w.r.t. x in `xs`. `ys` and `xs` are each a `Tensor` or a list of tensors. `grad_ys` is a list of `Tensor`, holding the gradients received by the `ys`. The list must be the same length as `ys`. `gradients()` adds ops to the graph to output the partial derivatives of `ys` with respect to `xs`. It returns a list of `Tensor` of length `len(xs)` where each tensor is the `sum(dy/dx)` for y in `ys`. `grad_ys` is a list of tensors of the same length as `ys` that holds the initial gradients for each y in `ys`. When `grad_ys` is None, we fill in a tensor of '1's of the shape of y for each y in `ys`. A user can provide their own initial `grad_ys` to compute the derivatives using a different initial gradient for each y (e.g., if one wanted to weight the gradient differently for each value in each y). Args: ys: A `Tensor` or list of tensors to be differentiated. xs: A `Tensor` or list of tensors to be used for differentiation. grad_ys: Optional. A `Tensor` or list of tensors the same size as `ys` and holding the gradients computed for each y in `ys`. name: Optional name to use for grouping all the gradient ops together. defaults to 'gradients'. colocate_gradients_with_ops: If True, try colocating gradients with the corresponding op. gate_gradients: If True, add a tuple around the gradients returned for an operations. This avoids some race conditions. aggregation_method: Specifies the method used to combine gradient terms. Accepted values are constants defined in the class `AggregationMethod`. Returns: A list of `sum(dy/dx)` for each x in `xs`. Raises: LookupError: if one of the operations between `x` and `y` does not have a registered gradient function. ValueError: if the arguments are invalid. """ ys = _AsList(ys) xs = _AsList(xs) if grad_ys is None: grad_ys = [None] * len(ys) else: grad_ys = _AsList(grad_ys) with ops.op_scope(ys + xs + grad_ys, name, "gradients"): ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y") xs = ops.convert_n_to_tensor_or_indexed_slices(xs, name="x") grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops) # The approach we take here is as follows: Create a list of all ops in the # subgraph between the ys and xs. Visit these ops in reverse order of ids # to ensure that when we visit an op the gradients w.r.t its outputs have # been collected. Then aggregate these gradients if needed, call the op's # gradient function, and add the generated gradients to the gradients for # its input. # Initialize the pending count for ops in the connected subgraph from ys # to the xs. to_ops = [t.op for t in ys] from_ops = [t.op for t in xs] pending_count, loop_state = _PendingCount(ops.get_default_graph(), to_ops, from_ops) # Iterate over the collected ops. # # grads: op => list of gradients received on each output endpoint of the # op. The gradients for each endpoint are initially collected as a list. # When it is time to call the op's gradient function, for each endpoint we # aggregate the list of received gradients into a Add() Operation if there # is more than one. grads = {} # Add the initial gradients for the ys. for y, grad_y in zip(ys, grad_ys): _SetGrad(grads, y, grad_y) # Initialize queue with to_ops. queue = collections.deque() # Add the ops in 'to_ops' into the queue. to_ops_set = set() for op in to_ops: # 'ready' handles the case where one output gradient relies on # another output's gradient. # pylint: disable=protected-access ready = (pending_count[op._id] == 0) if ready and op._id not in to_ops_set: to_ops_set.add(op._id) queue.append(op) if loop_state: # The "unused" exits of the loops are added to ys. As an example, # people often write: # v1, _ = While(p, b, [x1, x2]) # result = gradients(v1, x1) # The exit node of x2 is not included by the betweenness analysis. # But we need it if x2 is involved in computing v1. So we add it # back in backprop with a zeros_like gradient. loop_exits = loop_state.GetAllLoopExits() for y in loop_exits: if pending_count[y.op._id] == 0 and y.op._id not in to_ops_set: if _IsFloat(y): # Floating-point outputs get a zero gradient. _SetGrad(grads, y, loop_state.ZerosLikeForExit(y)) queue.append(y.op) # The set of 'from_ops'. stop_ops = _StopOps(from_ops, pending_count) while queue: # generate gradient subgraph for op. op = queue.popleft() with _maybe_colocate_with(op, colocate_gradients_with_ops): if loop_state: loop_state.EnterGradWhileContext(op, before=True) out_grads = _AggregatedGrads(grads, op, loop_state, aggregation_method) if loop_state: loop_state.ExitGradWhileContext(op, before=True) grad_fn = None # pylint: disable=protected-access is_func_call = ops.get_default_graph()._is_function(op.type) if not is_func_call and any( isinstance(g, ops.Tensor) or g for g in out_grads) and ( op._id not in stop_ops): # pylint: enable=protected-access # A grad_fn must be defined, either as a function or as None # for ops that do not have gradients. try: grad_fn = ops.get_gradient_function(op) except LookupError: raise LookupError( "No gradient defined for operation '%s' (op type: %s)" % (op.name, op.type)) if loop_state: loop_state.EnterGradWhileContext(op, before=False) if (grad_fn or is_func_call) and any( isinstance(g, ops.Tensor) or g for g in out_grads): # NOTE: If _AggregatedGrads didn't compute a value for the i'th # output, it means that the cost does not depend on output[i], # therefore dC/doutput[i] is 0. for i, out_grad in enumerate(out_grads): if (not isinstance(out_grad, ops.Tensor) and not out_grad) and _IsFloat(op.outputs[i]): # Only floating-point outputs get a zero gradient. Gradient # functions should ignore the gradient for other outputs. if loop_state: out_grads[i] = loop_state.ZerosLike(op, i) else: out_grads[i] = control_flow_ops.ZerosLikeOutsideLoop(op, i) with ops.name_scope(op.name + "_grad"): # pylint: disable=protected-access with ops.get_default_graph()._original_op(op): # pylint: enable=protected-access if is_func_call: # For function call ops, we add a 'SymbolicGradient' # node to the graph to compute gradients. f_in = [x for x in op.inputs] + out_grads f_types = [x.dtype for x in op.inputs] # pylint: disable=protected-access in_grads = _AsList(functional_ops._symbolic_gradient( f_in, f_types, op.type)) # pylint: enable=protected-access else: in_grads = _AsList(grad_fn(op, *out_grads)) _VerifyGeneratedGradients(in_grads, op) if gate_gradients and len( [x for x in in_grads if x is not None]) > 1: in_grads = control_flow_ops.tuple(in_grads) logging.vlog(1, "Gradient for '" + op.name + "'") def _FilterGrad(x): if x is None: return False if isinstance(x, (list, tuple)): return bool(x) else: return True logging.vlog(1, " in --> %s", ", ".join([x.name for x in out_grads if _FilterGrad(x)])) logging.vlog(1, " out --> %s", ", ".join([x.name for x in in_grads if _FilterGrad(x)])) else: # If no grad_fn is defined or none of out_grads is available, # just propagates a list of None backwards. in_grads = [None] * len(op.inputs) for t_in, in_grad in zip(op.inputs, in_grads): if in_grad is not None: _SetGrad(grads, t_in, in_grad) if loop_state: loop_state.ExitGradWhileContext(op, before=False) # update pending count for the inputs of op. # pylint: disable=protected-access for x in op.inputs: pending_count[x.op._id] -= 1 ready = (pending_count[x.op._id] == 0) if loop_state and not ready: ready = (pending_count[x.op._id] > 0 and control_flow_ops.IsLoopSwitch(x.op)) if ready: queue.append(x.op) for x in op.control_inputs: pending_count[x._id] -= 1 if pending_count[x._id] is 0: queue.append(x) # pylint: enable=protected-access return [_GetGrad(grads, x) for x in xs]