Example #1
0
def aggregate_tensors_or_indexed_slices(values,
                                        accumulation_fn=math_ops.add_n):
    """Aggregate tensors using `accumulation_fn` and IndexedSlices via concat."""
    if any(isinstance(v, ops.IndexedSlices) for v in values):
        return gradients_impl._AggregateIndexedSlicesGradients(values)  # pylint: disable=protected-access
    else:
        return accumulation_fn(values)
 def testMixedTensorAndIndexedSlices(self):
     t0 = math_ops._as_indexed_slices(
         constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
     t1 = constant_op.constant([[0., 0.], [5, 6], [7., 8.]])
     total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]])
     result = gradients_impl._AggregateIndexedSlicesGradients([t0, t1])
     self._assert_indexed_slices_equal(total, result)
Example #3
0
def _all_reduce_using_copy(tensors_across_devices, use_mean):
    """Does an all-reduce of a list of tensors by copying to the current device.

  The tensors are copied to the current device and then reduced.

  Args:
    tensors_across_devices: A list of tensors, each on a different device.
    use_mean: Whether to take the mean of the tensors instead of a sum:
  Returns:
    A reduced tensor on the current device.
  """
    assert tensors_across_devices
    if isinstance(tensors_across_devices[0], tf.IndexedSlices):
        reduced_tensor = gradients_impl._AggregateIndexedSlicesGradients(
            tensors_across_devices)
        if use_mean:
            val = tf.multiply(reduced_tensor.values,
                              float(1. / len(tensors_across_devices)))
            reduced_tensor = tf.IndexedSlices(val, reduced_tensor.indices,
                                              reduced_tensor.dense_shape)
    else:
        reduced_tensor = tf.add_n(tensors_across_devices)
        if use_mean:
            reduced_tensor *= 1. / len(tensors_across_devices)
    return reduced_tensor
 def testMultipleGradientsWithNones(self):
     t0 = math_ops._as_indexed_slices(
         constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
     t1 = math_ops._as_indexed_slices(
         constant_op.constant([[0., 0.], [5, 6], [7., 8.]]))
     t3 = None
     total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]])
     result = gradients_impl._AggregateIndexedSlicesGradients([t0, t1, t3])
     self._assert_indexed_slices_equal(total, result)
Example #5
0
 def testMixedTensorAndIndexedSlices(self):
   t0 = math_ops._as_indexed_slices(constant_op.constant(
       [[1., 2.], [0, 0], [3., 4.]]))
   t1 = constant_op.constant(
       [[0., 0.], [5, 6], [7., 8.]])
   total = constant_op.constant(
       [[1., 2.], [5, 6], [10., 12.]])
   result = gradients_impl._AggregateIndexedSlicesGradients([t0, t1])
   self._assert_indexed_slices_equal(total, result)
Example #6
0
 def testMultipleGradientsWithNones(self):
   t0 = math_ops._as_indexed_slices(constant_op.constant(
       [[1., 2.], [0, 0], [3., 4.]]))
   t1 = math_ops._as_indexed_slices(constant_op.constant(
       [[0., 0.], [5, 6], [7., 8.]]))
   t3 = None
   total = constant_op.constant(
       [[1., 2.], [5, 6], [10., 12.]])
   result = gradients_impl._AggregateIndexedSlicesGradients([t0, t1, t3])
   self._assert_indexed_slices_equal(total, result)
Example #7
0
def aggregate_single_gradient_using_copy(grad_and_vars, use_mean,
                                         check_inf_nan):
  """Calculate the average gradient for a shared variable across all towers.

  Note that this function provides a synchronization point across all towers.

  Args:
    grad_and_vars: A list or tuple of (gradient, variable) tuples. Each
      (gradient, variable) pair within the outer list represents the gradient
      of the variable calculated for a single tower, and the number of pairs
      equals the number of towers.
    use_mean: if True, mean is taken, else sum of gradients is taken.
    check_inf_nan: check grads for nans and infs.

  Returns:
    The tuple ([(average_gradient, variable),], has_nan_or_inf) where the
      gradient has been averaged across all towers. The variable is chosen from
      the first tower. The has_nan_or_inf indicates the grads has nan or inf.
  """
  grads = [g for g, _ in grad_and_vars]
  if any(isinstance(g, tf.IndexedSlices) for g in grads):
    # TODO(reedwm): All-reduce IndexedSlices more effectively.
    grad = gradients_impl._AggregateIndexedSlicesGradients(grads)  # pylint: disable=protected-access
  else:
    grad = tf.add_n(grads)

  if use_mean and len(grads) > 1:
    grad = tf.scalar_mul(1.0 / len(grads), grad)

  v = grad_and_vars[0][1]
  if check_inf_nan:
    with tf.name_scope('check_for_inf_and_nan'):
      has_nan_or_inf = tf.logical_not(tf.reduce_all(tf.is_finite(grads)))
    return (grad, v), has_nan_or_inf
  else:
    return (grad, v), None
def aggregate_tensors_or_indexed_slices(values, accumulation_fn=math_ops.add_n):
  """Aggregate tensors using `accumulation_fn` and IndexedSlices via concat."""
  if any(isinstance(v, ops.IndexedSlices) for v in values):
    return gradients_impl._AggregateIndexedSlicesGradients(values)  # pylint: disable=protected-access
  else:
    return accumulation_fn(values)
 def testOneGradient(self):
     t = math_ops._as_indexed_slices(
         constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
     result = gradients_impl._AggregateIndexedSlicesGradients([t])
     self._assert_indexed_slices_equal(t, result)
 def testNoGradients(self):
     self.assertIsNone(gradients_impl._AggregateIndexedSlicesGradients([]))
Example #11
0
 def testOneGradient(self):
   t = math_ops._as_indexed_slices(constant_op.constant(
       [[1., 2.], [0, 0], [3., 4.]]))
   result = gradients_impl._AggregateIndexedSlicesGradients([t])
   self._assert_indexed_slices_equal(t, result)
Example #12
0
 def testNoGradients(self):
   self.assertIsNone(gradients_impl._AggregateIndexedSlicesGradients([]))