Ejemplos de _symbolic_gradient en Python, ejemplos de tensorflow.python.ops.functional_ops._symbolic_gradient en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: short_circuit_gradients.py Proyecto: More-Dist-TensorFlow/distributed_tensorflow

 def in_grad_function():
     with ops.name_scope(op.name + "_grad"):
         # pylint: disable=protected-access
         with ops.get_default_graph()._original_op(op):
             # pylint: enable=protected-access
             if grad_fn:
                 # If grad_fn was found, do not use SymbolicGradient even for
                 # functions.
                 in_grads = _AsList(grad_fn(op, *out_grads))
             else:
                 # For function call ops, we add a 'SymbolicGradient'
                 # node to the graph to compute gradients.
                 f_in = [x for x in op.inputs] + out_grads
                 f_types = [x.dtype for x in op.inputs]
                 # pylint: disable=protected-access
                 in_grads = _AsList(
                     functional_ops._symbolic_gradient(
                         f_in, f_types, op.type))
                 # pylint: enable=protected-access
             _VerifyGeneratedGradients(in_grads, op)
             if gate_gradients and len(
                 [x for x in in_grads if x is not None]) > 1:
                 in_grads = control_flow_ops.tuple(in_grads)
         _LogOpGradients(op, out_grads, in_grads)
     in_grads = [
         x if x is not None else tf.zeros(
             tf.shape(op.inputs[i]), dtype=op.inputs[i].dtype)
         for i, x in enumerate(in_grads)
     ]
     return in_grads

Ejemplo n.º 2

0

Mostrar archivo

Archivo: function_test.py Proyecto: apollos/tensorflow

 def XSquarePlusOneGrad(x, dy):
   dx = functional_ops._symbolic_gradient(
       input=[x, dy],
       Tout=[tf.float32],
       # This line on define_function to register the above
       # function with name "XSquarePlusOneFn"
       f="XSquarePlusOneFn",
       name="dx")
   return dx

Ejemplo n.º 3

0

Mostrar archivo

 def XSquarePlusOneGrad(x, dy):
     dx = functional_ops._symbolic_gradient(
         input=[x, dy],
         Tout=[tf.float32],
         # This line on define_function to register the above
         # function with name "XSquarePlusOneFn"
         f="XSquarePlusOneFn",
         name="dx")
     return dx

Ejemplo n.º 4

0

Mostrar archivo

def _SymGrad(op, out_grads):
  """Backprop through a function call node op given its outputs' gradients."""
  f_in = [x for x in op.inputs] + out_grads
  f_types = [x.dtype for x in op.inputs]
  f = attr_value_pb2.NameAttrList()
  f.name = op.type
  for k in op.node_def.attr:
    f.attr[k].CopyFrom(op.node_def.attr[k])
  # pylint: disable=protected-access
  in_grads = functional_ops._symbolic_gradient(input=f_in, Tout=f_types, f=f)
  # pylint: enable=protected-access
  return in_grads

Ejemplo n.º 5

0

Mostrar archivo

Archivo: function_test.py Proyecto: brchiu/tensorflow

 def testSymGradShape(self):
     g = tf.Graph()
     with g.as_default():
         x = tf.placeholder(tf.float32, [25, 4])
         y = tf.placeholder(tf.float32, [200, 100])
         dz = tf.placeholder(tf.float32, [1])
         # We assume Foo is a function of (x, y) -> (z) Then, Foo's
         # gradient function is (x, y, dz) -> (dx, dy).  dx's shape
         # should be the same as x's; and dy's shape should be the same
         # as y's.
         dx, dy = functional_ops._symbolic_gradient(input=[x, y, dz], Tout=[tf.float32] * 2, f="Foo")
         self.assertEqual(x.get_shape(), dx.get_shape())
         self.assertEqual(y.get_shape(), dy.get_shape())

Ejemplo n.º 6

0

Mostrar archivo

Archivo: function_test.py Proyecto: geronald10/SikemasTC

 def testSymGradShape(self):
   g = ops.Graph()
   with g.as_default():
     x = array_ops.placeholder(dtypes.float32, [25, 4])
     y = array_ops.placeholder(dtypes.float32, [200, 100])
     dz = array_ops.placeholder(dtypes.float32, [1])
     # We assume Foo is a function of (x, y) -> (z) Then, Foo's
     # gradient function is (x, y, dz) -> (dx, dy).  dx's shape
     # should be the same as x's; and dy's shape should be the same
     # as y's.
     dx, dy = functional_ops._symbolic_gradient(
         input=[x, y, dz], Tout=[dtypes.float32] * 2, f="Foo")
     self.assertEqual(x.get_shape(), dx.get_shape())
     self.assertEqual(y.get_shape(), dy.get_shape())

Ejemplo n.º 7

0

Mostrar archivo

 def XSquarePlusOneGrad(x, dy):
     dx = functional_ops._symbolic_gradient(input=[x, dy],
                                            Tout=[tf.float32],
                                            f="XSquarePlusOne",
                                            name="dx")
     return dx

Ejemplo n.º 8

0

Mostrar archivo

Archivo: function_test.py Proyecto: AbhinavJain13/tensorflow

 def XSquarePlusOneGrad(x, dy):
   dx = functional_ops._symbolic_gradient(
       input=[x, dy], Tout=[dtypes.float32], f="XSquarePlusOneFn", name="dx")
   return dx

Ejemplo n.º 9

0

Mostrar archivo

Archivo: gradients.py Proyecto: laurentk67/mytensorflow

def gradients(ys,
              xs,
              grad_ys=None,
              name="gradients",
              colocate_gradients_with_ops=False,
              gate_gradients=False,
              aggregation_method=None):
    """Constructs symbolic partial derivatives of `ys` w.r.t. x in `xs`.

  `ys` and `xs` are each a `Tensor` or a list of tensors.  `grad_ys`
  is a list of `Tensor`, holding the gradients received by the
  `ys`. The list must be the same length as `ys`.

  `gradients()` adds ops to the graph to output the partial
  derivatives of `ys` with respect to `xs`.  It returns a list of
  `Tensor` of length `len(xs)` where each tensor is the `sum(dy/dx)`
  for y in `ys`.

  `grad_ys` is a list of tensors of the same length as `ys` that holds
  the initial gradients for each y in `ys`.  When `grad_ys` is None,
  we fill in a tensor of '1's of the shape of y for each y in `ys`.  A
  user can provide their own initial `grad_ys` to compute the
  derivatives using a different initial gradient for each y (e.g., if
  one wanted to weight the gradient differently for each value in
  each y).

  Args:
    ys: A `Tensor` or list of tensors to be differentiated.
    xs: A `Tensor` or list of tensors to be used for differentiation.
    grad_ys: Optional. A `Tensor` or list of tensors the same size as
      `ys` and holding the gradients computed for each y in `ys`.
    name: Optional name to use for grouping all the gradient ops together.
      defaults to 'gradients'.
    colocate_gradients_with_ops: If True, try colocating gradients with
      the corresponding op.
    gate_gradients: If True, add a tuple around the gradients returned
      for an operations.  This avoids some race conditions.
    aggregation_method: Specifies the method used to combine gradient terms.
      Accepted values are constants defined in the class `AggregationMethod`.

  Returns:
    A list of `sum(dy/dx)` for each x in `xs`.

  Raises:
    LookupError: if one of the operations between `x` and `y` does not
      have a registered gradient function.
    ValueError: if the arguments are invalid.

  """
    ys = _AsList(ys)
    xs = _AsList(xs)
    if grad_ys is None:
        grad_ys = [None] * len(ys)
    else:
        grad_ys = _AsList(grad_ys)
    with ops.op_scope(ys + xs + grad_ys, name, "gradients"):
        ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y")
        xs = ops.convert_n_to_tensor_or_indexed_slices(xs, name="x")
        grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops)

        # The approach we take here is as follows: Create a list of all ops in the
        # subgraph between the ys and xs.  Visit these ops in reverse order of ids
        # to ensure that when we visit an op the gradients w.r.t its outputs have
        # been collected.  Then aggregate these gradients if needed, call the op's
        # gradient function, and add the generated gradients to the gradients for
        # its input.

        # Initialize the pending count for ops in the connected subgraph from ys
        # to the xs.
        to_ops = [t.op for t in ys]
        from_ops = [t.op for t in xs]
        pending_count, loop_state = _PendingCount(ops.get_default_graph(),
                                                  to_ops, from_ops)

        # Iterate over the collected ops.
        #
        # grads: op => list of gradients received on each output endpoint of the
        # op.  The gradients for each endpoint are initially collected as a list.
        # When it is time to call the op's gradient function, for each endpoint we
        # aggregate the list of received gradients into a Add() Operation if there
        # is more than one.
        grads = {}

        # Add the initial gradients for the ys.
        for y, grad_y in zip(ys, grad_ys):
            _SetGrad(grads, y, grad_y)

        # Initialize queue with to_ops.
        queue = collections.deque()
        # Add the ops in 'to_ops' into the queue.
        to_ops_set = set()
        for op in to_ops:
            # 'ready' handles the case where one output gradient relies on
            # another output's gradient.
            # pylint: disable=protected-access
            ready = (pending_count[op._id] == 0)
            if ready and op._id not in to_ops_set:
                to_ops_set.add(op._id)
                queue.append(op)

        if loop_state:
            # The "unused" exits of the loops are added to ys. As an example,
            # people often write:
            #         v1, _ = While(p, b, [x1, x2])
            #         result = gradients(v1, x1)
            # The exit node of x2 is not included by the betweenness analysis.
            # But we need it if x2 is involved in computing v1. So we add it
            # back in backprop with a zeros_like gradient.
            loop_exits = loop_state.GetAllLoopExits()
            for y in loop_exits:
                if pending_count[y.op._id] == 0 and y.op._id not in to_ops_set:
                    if _IsFloat(y):
                        # Floating-point outputs get a zero gradient.
                        _SetGrad(grads, y, loop_state.ZerosLikeForExit(y))
                    queue.append(y.op)

        # The set of 'from_ops'.
        stop_ops = _StopOps(from_ops, pending_count)
        while queue:
            # generate gradient subgraph for op.
            op = queue.popleft()
            with ops.device(_GetGradsDevice(op, colocate_gradients_with_ops)):
                if loop_state:
                    loop_state.EnterGradWhileContext(op)
                out_grads = _AggregatedGrads(grads, op, loop_state,
                                             aggregation_method)
                grad_fn = None

                # pylint: disable=protected-access
                is_func_call = ops.get_default_graph()._is_function(op.type)
                # pylint: enable=protected-access

                if not is_func_call and any(
                        out_grads) and op._id not in stop_ops:
                    # pylint: enable=protected-access
                    # A grad_fn must be defined, either as a function or as None
                    # for ops that do not have gradients.
                    try:
                        grad_fn = ops.get_gradient_function(op)
                    except LookupError:
                        raise LookupError(
                            "No gradient defined for operation '%s' (op type: %s)"
                            % (op.name, op.type))
                if (grad_fn or is_func_call) and any(out_grads):
                    # NOTE: If _AggregatedGrads didn't compute a value for the i'th
                    # output, it means that the cost does not depend on output[i],
                    # therefore dC/doutput[i] is 0.
                    for i, out_grad in enumerate(out_grads):
                        if not out_grad and _IsFloat(op.outputs[i]):
                            # Only floating-point outputs get a zero gradient. Gradient
                            # functions should ignore the gradient for other outputs.
                            if loop_state:
                                out_grads[i] = loop_state.ZerosLike(op, i)
                            else:
                                out_grads[i] = array_ops.zeros_like(
                                    op.outputs[i])
                    with ops.name_scope(op.name + "_grad"):
                        # pylint: disable=protected-access
                        with ops.get_default_graph()._original_op(op):
                            # pylint: enable=protected-access
                            wrapped_op = op
                            if loop_state:
                                wrapped_op = loop_state.MakeWrapper(op)
                            if is_func_call:
                                # For function call ops, we add a 'SymbolicGradient'
                                # node to the graph to compute gradients.
                                f_in = [x for x in op.inputs] + out_grads
                                f_types = [x.dtype for x in op.inputs]
                                # pylint: disable=protected-access
                                in_grads = _AsList(
                                    functional_ops._symbolic_gradient(
                                        f_in, f_types, op.type))
                                # pylint: enable=protected-access
                            else:
                                in_grads = _AsList(
                                    grad_fn(wrapped_op, *out_grads))
                            _VerifyGeneratedGradients(in_grads, op)
                            if gate_gradients and len(
                                    tuple(filter(None, in_grads))) > 1:
                                in_grads = control_flow_ops.tuple(in_grads)
                    logging.vlog(1, "Gradient for '" + op.name + "'")
                    logging.vlog(1, "  in  --> %s",
                                 ", ".join([x.name for x in out_grads if x]))
                    logging.vlog(1, "  out --> %s",
                                 ", ".join([x.name for x in in_grads if x]))
                else:
                    # If no grad_fn is defined or none of out_grads is available,
                    # just propagates a list of None backwards.
                    in_grads = [None] * len(op.inputs)
                for t_in, in_grad in zip(op.inputs, in_grads):
                    if in_grad:
                        _SetGrad(grads, t_in, in_grad)
                if loop_state:
                    loop_state.ExitGradWhileContext(op)

            # update pending count for the inputs of op.
            # pylint: disable=protected-access
            for x in op.inputs:
                pending_count[x.op._id] -= 1
                ready = (pending_count[x.op._id] == 0)
                if loop_state and not ready:
                    ready = (pending_count[x.op._id] > 0
                             and control_flow_ops.IsLoopSwitch(x.op))
                if ready:
                    queue.append(x.op)
            for x in op.control_inputs:
                pending_count[x._id] -= 1
                if pending_count[x._id] is 0:
                    queue.append(x)
            # pylint: enable=protected-access
    return [_GetGrad(grads, x) for x in xs]

Ejemplo n.º 10

0

Mostrar archivo

Archivo: gradients.py Proyecto: PaullMP/TensorFlowT

def gradients(ys,
              xs,
              grad_ys=None,
              name="gradients",
              colocate_gradients_with_ops=False,
              gate_gradients=False,
              aggregation_method=None):
    """Constructs symbolic partial derivatives of sum of `ys` w.r.t. x in `xs`.

  `ys` and `xs` are each a `Tensor` or a list of tensors.  `grad_ys`
  is a list of `Tensor`, holding the gradients received by the
  `ys`. The list must be the same length as `ys`.

  `gradients()` adds ops to the graph to output the partial
  derivatives of `ys` with respect to `xs`.  It returns a list of
  `Tensor` of length `len(xs)` where each tensor is the `sum(dy/dx)`
  for y in `ys`.

  `grad_ys` is a list of tensors of the same length as `ys` that holds
  the initial gradients for each y in `ys`.  When `grad_ys` is None,
  we fill in a tensor of '1's of the shape of y for each y in `ys`.  A
  user can provide their own initial `grad_ys` to compute the
  derivatives using a different initial gradient for each y (e.g., if
  one wanted to weight the gradient differently for each value in
  each y).

  Args:
    ys: A `Tensor` or list of tensors to be differentiated.
    xs: A `Tensor` or list of tensors to be used for differentiation.
    grad_ys: Optional. A `Tensor` or list of tensors the same size as
      `ys` and holding the gradients computed for each y in `ys`.
    name: Optional name to use for grouping all the gradient ops together.
      defaults to 'gradients'.
    colocate_gradients_with_ops: If True, try colocating gradients with
      the corresponding op.
    gate_gradients: If True, add a tuple around the gradients returned
      for an operations.  This avoids some race conditions.
    aggregation_method: Specifies the method used to combine gradient terms.
      Accepted values are constants defined in the class `AggregationMethod`.

  Returns:
    A list of `sum(dy/dx)` for each x in `xs`.

  Raises:
    LookupError: if one of the operations between `x` and `y` does not
      have a registered gradient function.
    ValueError: if the arguments are invalid.

  """
    ys = _AsList(ys)
    xs = _AsList(xs)
    if grad_ys is None:
        grad_ys = [None] * len(ys)
    else:
        grad_ys = _AsList(grad_ys)

    with ops.name_scope(name, "gradients", ys + xs + grad_ys):
        ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y")
        xs = ops.convert_n_to_tensor_or_indexed_slices(xs, name="x")
        grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops)

        # The approach we take here is as follows: Create a list of all ops in the
        # subgraph between the ys and xs.  Visit these ops in reverse order of ids
        # to ensure that when we visit an op the gradients w.r.t its outputs have
        # been collected.  Then aggregate these gradients if needed, call the op's
        # gradient function, and add the generated gradients to the gradients for
        # its input.

        # Initialize the pending count for ops in the connected subgraph from ys
        # to the xs.
        to_ops = [t.op for t in ys]
        from_ops = [t.op for t in xs]
        pending_count, loop_state = _PendingCount(ops.get_default_graph(),
                                                  to_ops, from_ops,
                                                  colocate_gradients_with_ops)

        # Iterate over the collected ops.
        #
        # grads: op => list of gradients received on each output endpoint of the
        # op.  The gradients for each endpoint are initially collected as a list.
        # When it is time to call the op's gradient function, for each endpoint we
        # aggregate the list of received gradients into a Add() Operation if there
        # is more than one.
        grads = {}

        # Add the initial gradients for the ys.
        for y, grad_y in zip(ys, grad_ys):
            _SetGrad(grads, y, grad_y)

        # Initialize queue with to_ops.
        queue = collections.deque()
        # Add the ops in 'to_ops' into the queue.
        to_ops_set = set()
        for op in to_ops:
            # 'ready' handles the case where one output gradient relies on
            # another output's gradient.
            # pylint: disable=protected-access
            ready = (pending_count[op._id] == 0)
            if ready and op._id not in to_ops_set:
                to_ops_set.add(op._id)
                queue.append(op)
            # pylint: enable=protected-access

        if loop_state:
            loop_exits = loop_state.ProcessUnusedLoopExits(
                pending_count, to_ops_set)
            for y in loop_exits:
                if _IsTrainable(y):
                    _SetGrad(grads, y, loop_state.ZerosLikeForExit(y))
                    queue.append(y.op)

        # The set of 'from_ops'.
        stop_ops = _StopOps(from_ops, pending_count)
        while queue:
            # generate gradient subgraph for op.
            op = queue.popleft()
            with _maybe_colocate_with(op, colocate_gradients_with_ops):
                if loop_state:
                    loop_state.EnterGradWhileContext(op, before=True)
                out_grads = _AggregatedGrads(grads, op, loop_state,
                                             aggregation_method)
                if loop_state:
                    loop_state.ExitGradWhileContext(op, before=True)

                grad_fn = None
                # pylint: disable=protected-access
                is_func_call = ops.get_default_graph()._is_function(op.type)
                has_out_grads = any(
                    isinstance(g, ops.Tensor) or g for g in out_grads)
                if has_out_grads and (op._id not in stop_ops):
                    if is_func_call:
                        grad_fn = ops.get_default_graph()._get_function(
                            op.type).python_grad_func
                        # pylint: enable=protected-access
                    else:
                        # A grad_fn must be defined, either as a function or as None
                        # for ops that do not have gradients.
                        try:
                            grad_fn = ops.get_gradient_function(op)
                        except LookupError:
                            raise LookupError(
                                "No gradient defined for operation '%s' (op type: %s)"
                                % (op.name, op.type))
                if loop_state:
                    loop_state.EnterGradWhileContext(op, before=False)
                if (grad_fn or is_func_call) and has_out_grads:
                    # NOTE: If _AggregatedGrads didn't compute a value for the i'th
                    # output, it means that the cost does not depend on output[i],
                    # therefore dC/doutput[i] is 0.
                    for i, out_grad in enumerate(out_grads):
                        if (not isinstance(out_grad, ops.Tensor) and
                                not out_grad) and _IsTrainable(op.outputs[i]):
                            # Only floating-point outputs get a zero gradient. Gradient
                            # functions should ignore the gradient for other outputs.
                            if loop_state:
                                out_grads[i] = loop_state.ZerosLike(op, i)
                            else:
                                out_grads[
                                    i] = control_flow_ops.ZerosLikeOutsideLoop(
                                        op, i)
                    with ops.name_scope(op.name + "_grad"):
                        # pylint: disable=protected-access
                        with ops.get_default_graph()._original_op(op):
                            # pylint: enable=protected-access
                            if grad_fn:
                                # If grad_fn was found, do not use SymbolicGradient even for
                                # functions.
                                in_grads = _AsList(grad_fn(op, *out_grads))
                            else:
                                # For function call ops, we add a 'SymbolicGradient'
                                # node to the graph to compute gradients.
                                f_in = [x for x in op.inputs] + out_grads
                                f_types = [x.dtype for x in op.inputs]
                                # pylint: disable=protected-access
                                in_grads = _AsList(
                                    functional_ops._symbolic_gradient(
                                        f_in, f_types, op.type))
                                # pylint: enable=protected-access
                            _VerifyGeneratedGradients(in_grads, op)
                            if gate_gradients and len(
                                [x for x in in_grads if x is not None]) > 1:
                                in_grads = control_flow_ops.tuple(in_grads)
                    _LogOpGradients(op, out_grads, in_grads)
                else:
                    # If no grad_fn is defined or none of out_grads is available,
                    # just propagates a list of None backwards.
                    in_grads = [None] * len(op.inputs)
                for t_in, in_grad in zip(op.inputs, in_grads):
                    if in_grad is not None:
                        if isinstance(in_grad, ops.Tensor):
                            in_grad.set_shape(t_in.get_shape())
                        _SetGrad(grads, t_in, in_grad)
                if loop_state:
                    loop_state.ExitGradWhileContext(op, before=False)

            # Update pending count for the inputs of op and enqueue ready ops.
            _UpdatePendingAndEnqueueReady(grads, op, queue, pending_count,
                                          loop_state)

    if loop_state:
        loop_state.PostProcessing()
    return [_GetGrad(grads, x) for x in xs]

Ejemplo n.º 11

0

Mostrar archivo

Archivo: gradients.py Proyecto: 6779660/tensorflow

def gradients(ys,
              xs,
              grad_ys=None,
              name="gradients",
              colocate_gradients_with_ops=False,
              gate_gradients=False,
              aggregation_method=None):
  """Constructs symbolic partial derivatives of sum of `ys` w.r.t. x in `xs`.

  `ys` and `xs` are each a `Tensor` or a list of tensors.  `grad_ys`
  is a list of `Tensor`, holding the gradients received by the
  `ys`. The list must be the same length as `ys`.

  `gradients()` adds ops to the graph to output the partial
  derivatives of `ys` with respect to `xs`.  It returns a list of
  `Tensor` of length `len(xs)` where each tensor is the `sum(dy/dx)`
  for y in `ys`.

  `grad_ys` is a list of tensors of the same length as `ys` that holds
  the initial gradients for each y in `ys`.  When `grad_ys` is None,
  we fill in a tensor of '1's of the shape of y for each y in `ys`.  A
  user can provide their own initial `grad_ys` to compute the
  derivatives using a different initial gradient for each y (e.g., if
  one wanted to weight the gradient differently for each value in
  each y).

  Args:
    ys: A `Tensor` or list of tensors to be differentiated.
    xs: A `Tensor` or list of tensors to be used for differentiation.
    grad_ys: Optional. A `Tensor` or list of tensors the same size as
      `ys` and holding the gradients computed for each y in `ys`.
    name: Optional name to use for grouping all the gradient ops together.
      defaults to 'gradients'.
    colocate_gradients_with_ops: If True, try colocating gradients with
      the corresponding op.
    gate_gradients: If True, add a tuple around the gradients returned
      for an operations.  This avoids some race conditions.
    aggregation_method: Specifies the method used to combine gradient terms.
      Accepted values are constants defined in the class `AggregationMethod`.

  Returns:
    A list of `sum(dy/dx)` for each x in `xs`.

  Raises:
    LookupError: if one of the operations between `x` and `y` does not
      have a registered gradient function.
    ValueError: if the arguments are invalid.

  """
  ys = _AsList(ys)
  xs = _AsList(xs)
  if grad_ys is None:
    grad_ys = [None] * len(ys)
  else:
    grad_ys = _AsList(grad_ys)
  with ops.op_scope(ys + xs + grad_ys, name, "gradients"):
    ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name="y")
    xs = ops.convert_n_to_tensor_or_indexed_slices(xs, name="x")
    grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops)

    # The approach we take here is as follows: Create a list of all ops in the
    # subgraph between the ys and xs.  Visit these ops in reverse order of ids
    # to ensure that when we visit an op the gradients w.r.t its outputs have
    # been collected.  Then aggregate these gradients if needed, call the op's
    # gradient function, and add the generated gradients to the gradients for
    # its input.

    # Initialize the pending count for ops in the connected subgraph from ys
    # to the xs.
    to_ops = [t.op for t in ys]
    from_ops = [t.op for t in xs]
    pending_count, loop_state = _PendingCount(ops.get_default_graph(),
                                              to_ops, from_ops)

    # Iterate over the collected ops.
    #
    # grads: op => list of gradients received on each output endpoint of the
    # op.  The gradients for each endpoint are initially collected as a list.
    # When it is time to call the op's gradient function, for each endpoint we
    # aggregate the list of received gradients into a Add() Operation if there
    # is more than one.
    grads = {}

    # Add the initial gradients for the ys.
    for y, grad_y in zip(ys, grad_ys):
      _SetGrad(grads, y, grad_y)

    # Initialize queue with to_ops.
    queue = collections.deque()
    # Add the ops in 'to_ops' into the queue.
    to_ops_set = set()
    for op in to_ops:
      # 'ready' handles the case where one output gradient relies on
      # another output's gradient.
      # pylint: disable=protected-access
      ready = (pending_count[op._id] == 0)
      if ready and op._id not in to_ops_set:
        to_ops_set.add(op._id)
        queue.append(op)

    if loop_state:
      # The "unused" exits of the loops are added to ys. As an example,
      # people often write:
      #         v1, _ = While(p, b, [x1, x2])
      #         result = gradients(v1, x1)
      # The exit node of x2 is not included by the betweenness analysis.
      # But we need it if x2 is involved in computing v1. So we add it
      # back in backprop with a zeros_like gradient.
      loop_exits = loop_state.GetAllLoopExits()
      for y in loop_exits:
        if pending_count[y.op._id] == 0 and y.op._id not in to_ops_set:
          if _IsFloat(y):
            # Floating-point outputs get a zero gradient.
            _SetGrad(grads, y, loop_state.ZerosLikeForExit(y))
          queue.append(y.op)

    # The set of 'from_ops'.
    stop_ops = _StopOps(from_ops, pending_count)
    while queue:
      # generate gradient subgraph for op.
      op = queue.popleft()
      with _maybe_colocate_with(op, colocate_gradients_with_ops):
        if loop_state:
          loop_state.EnterGradWhileContext(op, before=True)
        out_grads = _AggregatedGrads(grads, op, loop_state, aggregation_method)
        if loop_state:
          loop_state.ExitGradWhileContext(op, before=True)

        grad_fn = None
        # pylint: disable=protected-access
        is_func_call = ops.get_default_graph()._is_function(op.type)
        if not is_func_call and any(
            isinstance(g, ops.Tensor) or g for g in out_grads) and (
                op._id not in stop_ops):
          # pylint: enable=protected-access
          # A grad_fn must be defined, either as a function or as None
          # for ops that do not have gradients.
          try:
            grad_fn = ops.get_gradient_function(op)
          except LookupError:
            raise LookupError(
                "No gradient defined for operation '%s' (op type: %s)" %
                (op.name, op.type))

        if loop_state:
          loop_state.EnterGradWhileContext(op, before=False)
        if (grad_fn or is_func_call) and any(
            isinstance(g, ops.Tensor) or g for g in out_grads):
          # NOTE: If _AggregatedGrads didn't compute a value for the i'th
          # output, it means that the cost does not depend on output[i],
          # therefore dC/doutput[i] is 0.
          for i, out_grad in enumerate(out_grads):
            if (not isinstance(out_grad, ops.Tensor)
                and not out_grad) and _IsFloat(op.outputs[i]):
              # Only floating-point outputs get a zero gradient. Gradient
              # functions should ignore the gradient for other outputs.
              if loop_state:
                out_grads[i] = loop_state.ZerosLike(op, i)
              else:
                out_grads[i] = control_flow_ops.ZerosLikeOutsideLoop(op, i)
          with ops.name_scope(op.name + "_grad"):
            # pylint: disable=protected-access
            with ops.get_default_graph()._original_op(op):
              # pylint: enable=protected-access
              if is_func_call:
                # For function call ops, we add a 'SymbolicGradient'
                # node to the graph to compute gradients.
                f_in = [x for x in op.inputs] + out_grads
                f_types = [x.dtype for x in op.inputs]
                # pylint: disable=protected-access
                in_grads = _AsList(functional_ops._symbolic_gradient(
                    f_in, f_types, op.type))
                # pylint: enable=protected-access
              else:
                in_grads = _AsList(grad_fn(op, *out_grads))
              _VerifyGeneratedGradients(in_grads, op)
              if gate_gradients and len(
                  [x for x in in_grads if x is not None]) > 1:
                in_grads = control_flow_ops.tuple(in_grads)
          logging.vlog(1, "Gradient for '" + op.name + "'")
          def _FilterGrad(x):
            if x is None:
              return False
            if isinstance(x, (list, tuple)):
              return bool(x)
            else:
              return True
          logging.vlog(1, "  in  --> %s",
                       ", ".join([x.name for x in out_grads if _FilterGrad(x)]))
          logging.vlog(1, "  out --> %s",
                       ", ".join([x.name for x in in_grads if _FilterGrad(x)]))
        else:
          # If no grad_fn is defined or none of out_grads is available,
          # just propagates a list of None backwards.
          in_grads = [None] * len(op.inputs)
        for t_in, in_grad in zip(op.inputs, in_grads):
          if in_grad is not None:
            _SetGrad(grads, t_in, in_grad)
        if loop_state:
          loop_state.ExitGradWhileContext(op, before=False)

      # update pending count for the inputs of op.
      # pylint: disable=protected-access
      for x in op.inputs:
        pending_count[x.op._id] -= 1
        ready = (pending_count[x.op._id] == 0)
        if loop_state and not ready:
          ready = (pending_count[x.op._id] > 0 and
                   control_flow_ops.IsLoopSwitch(x.op))
        if ready:
          queue.append(x.op)
      for x in op.control_inputs:
        pending_count[x._id] -= 1
        if pending_count[x._id] is 0:
          queue.append(x)
      # pylint: enable=protected-access
  return [_GetGrad(grads, x) for x in xs]