def testMixedTensorAndIndexedSlices(self): t0 = math_ops._as_indexed_slices( constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) t1 = constant_op.constant([[0., 0.], [5, 6], [7., 8.]]) total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]]) result = gradients_util._AggregateIndexedSlicesGradients([t0, t1]) self._assert_indexed_slices_equal(total, result)
def aggregate_tensors_or_indexed_slices(values, accumulation_fn=math_ops.add_n): """Aggregate tensors using `accumulation_fn` and IndexedSlices via concat.""" if any(isinstance(v, ops.IndexedSlices) for v in values): return gradients_util._AggregateIndexedSlicesGradients(values) # pylint: disable=protected-access else: return accumulation_fn(values)
def aggregate_single_gradient_using_copy(grad_and_vars, use_mean, check_inf_nan): """Calculate the average gradient for a shared variable across all towers. Note that this function provides a synchronization point across all towers. Args: grad_and_vars: A list or tuple of (gradient, variable) tuples. Each (gradient, variable) pair within the outer list represents the gradient of the variable calculated for a single tower, and the number of pairs equals the number of towers. use_mean: if True, mean is taken, else sum of gradients is taken. check_inf_nan: check grads for nans and infs. Returns: The tuple ([(average_gradient, variable),], has_nan_or_inf) where the gradient has been averaged across all towers. The variable is chosen from the first tower. The has_nan_or_inf indicates the grads has nan or inf. """ grads = [g for g, _ in grad_and_vars] if any(isinstance(g, tf.IndexedSlices) for g in grads): grad = gradients_util._AggregateIndexedSlicesGradients(grads) # pylint: disable=protected-access else: grad = tf.add_n(grads) if use_mean and len(grads) > 1: grad = tf.scalar_mul(1.0 / len(grads), grad) v = grad_and_vars[0][1] if check_inf_nan: with tf.name_scope('check_for_inf_and_nan'): has_nan_or_inf = tf.logical_not(tf.reduce_all(tf.is_finite(grads))) return (grad, v), has_nan_or_inf else: return (grad, v), None
def testMixedTensorAndIndexedSlices(self): t0 = math_ops._as_indexed_slices(constant_op.constant( [[1., 2.], [0, 0], [3., 4.]])) t1 = constant_op.constant( [[0., 0.], [5, 6], [7., 8.]]) total = constant_op.constant( [[1., 2.], [5, 6], [10., 12.]]) result = gradients_util._AggregateIndexedSlicesGradients([t0, t1]) self._assert_indexed_slices_equal(total, result)
def testMultipleGradientsWithNones(self): t0 = math_ops._as_indexed_slices( constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) t1 = math_ops._as_indexed_slices( constant_op.constant([[0., 0.], [5, 6], [7., 8.]])) t3 = None total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]]) result = gradients_util._AggregateIndexedSlicesGradients([t0, t1, t3]) self._assert_indexed_slices_equal(total, result)
def testMultipleGradientsWithNones(self): t0 = math_ops._as_indexed_slices(constant_op.constant( [[1., 2.], [0, 0], [3., 4.]])) t1 = math_ops._as_indexed_slices(constant_op.constant( [[0., 0.], [5, 6], [7., 8.]])) t3 = None total = constant_op.constant( [[1., 2.], [5, 6], [10., 12.]]) result = gradients_util._AggregateIndexedSlicesGradients([t0, t1, t3]) self._assert_indexed_slices_equal(total, result)
def testOneGradient(self): t = math_ops._as_indexed_slices(constant_op.constant( [[1., 2.], [0, 0], [3., 4.]])) result = gradients_util._AggregateIndexedSlicesGradients([t]) self._assert_indexed_slices_equal(t, result)
def testNoGradients(self): self.assertIsNone(gradients_util._AggregateIndexedSlicesGradients([]))
def testOneGradient(self): t = math_ops._as_indexed_slices( constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) result = gradients_util._AggregateIndexedSlicesGradients([t]) self._assert_indexed_slices_equal(t, result)