Example #1
0
def push_pull(tensor, scope='', average=True, device_dense='', device_sparse='',
              compression=Compression.none, enable_async=False):
    """Perform an push_pull on a tf.Tensor or tf.IndexedSlices.
    Arguments:
        tensor: tf.Tensor, tf.Variable, or tf.IndexedSlices to reduce.
                The shape of the input must be identical across all ranks.
        scope: the graph name scope
        average: If True, computes the average over all ranks.
                 Otherwise, computes the sum over all ranks.
        device_dense: Device to be used for dense tensors. Uses GPU by default.
        device_sparse: Device to be used for sparse tensors. Uses GPU by default.
        compression: Compression algorithm used to reduce the amount of data
                     sent and received by each worker node.  Defaults to not
                     using compression.
    Returns:
        A tensor of the same shape and type as `tensor`, summed across all
        processes.
    """
    with tf.device(device_dense):
        byteps_size = tf.cast(size(), dtype=tensor.dtype)
        tensor_compressed, ctx = compression.compress(tensor)
        summed_tensor_compressed = _push_pull(tensor_compressed, scope)
        summed_tensor = compression.decompress(summed_tensor_compressed, ctx)
        if not enable_async:
            new_tensor = (tf.div(summed_tensor, byteps_size)
                          if average else summed_tensor)
        else: # no need to average for async training
            new_tensor = summed_tensor
    return new_tensor
Example #2
0
 def reduce_implementation(self, reduce_op, per_replica_value,
                           destinations):
     if tf_cross_device_ops.check_destinations(destinations):
         devices = tf_cross_device_ops.get_devices_from(destinations)
     else:
         devices = tf_cross_device_ops.get_devices_from(per_replica_value)
     reduce_to_device = devices[0]
     logging.log_first_n(logging.INFO,
                         "Using byteps push pull to aggregate values", 1)
     reduced = _simple_reduce(per_replica_value, reduce_to_device,
                              self.accumulation_fn, reduce_op)
     if size() > 1:
         reduced = _push_pull(reduced)
     return reduced
Example #3
0
def push_pull(tensor,
              scope='',
              average=None,
              device_dense='',
              device_sparse='',
              compression=Compression.none,
              op=None,
              enable_async=False):
    """Perform an push_pull on a tf.Tensor or tf.IndexedSlices.
    Arguments:
        tensor: tf.Tensor, tf.Variable, or tf.IndexedSlices to reduce.
                The shape of the input must be identical across all ranks.
        average:
            .. warning:: .. deprecated

                Use `op` instead. Will be removed.

        scope: the graph name scope
        average: If True, computes the average over all ranks.
                 Otherwise, computes the sum over all ranks.
        device_dense: Device to be used for dense tensors. Uses GPU by default.
        device_sparse: Device to be used for sparse tensors. Uses GPU by default.
        compression: Compression algorithm used to reduce the amount of data
                     sent and received by each worker node.  Defaults to not
                     using compression.
        op: The reduction operation to combine tensors across different ranks.
            Defaults to Average if None is given.

    Returns:
        A tensor of the same shape and type as `tensor`, summed across all
        processes.
    """
    op = handle_average_backwards_compatibility(op, average).value
    # Averaging happens in framework code, so translate that to Sum for the actual call
    true_op = Sum if op == Average else op

    with tf.device(device_dense):
        byteps_size = tf.cast(size(), dtype=tensor.dtype)
        tensor_compressed, ctx = compression.compress(tensor)
        summed_tensor_compressed = _push_pull(tensor_compressed, scope)
        summed_tensor = compression.decompress(summed_tensor_compressed, ctx)
        if not enable_async:
            _div = tf.div if hasattr(tf, 'div') else tf.math.divide
            new_tensor = (_div(summed_tensor, byteps_size)
                          if op == Average else summed_tensor)
        else:  # no need to average for async training
            new_tensor = summed_tensor
    return new_tensor