Python aggregate_gradients_using_nccl 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: tensorflow.contrib.distribute.python.cross_tower_utils

메소드/함수: aggregate_gradients_using_nccl

hotexamples.com에서의 예제들: 4

Python aggregate_gradients_using_nccl - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 tensorflow.contrib.distribute.python.cross_tower_utils.aggregate_gradients_using_nccl에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: cross_tower_ops.py 프로젝트: AnishShah/tensorflow

  def _batch_all_reduce(self, aggregation, per_device_values):
    """All reduce algorithm in a batch."""
    logging.log_first_n(
        logging.INFO, "batch_all_reduce invoked for batches size = %d with "
        "algorithm = %s, num_packs = %d, agg_small_grads_max_bytes = %d and "
        "agg_small_grads_max_group = %d" %
        (len(per_device_values), self._all_reduce_alg, self._num_packs,
         self._agg_small_grads_max_bytes, self._agg_small_grads_max_group), 10)
    destinations = per_device_values[0].devices
    grouped = _group_value_by_device(per_device_values)

    device_grad_packs, tensor_packer = _pack_tensors(
        grouped, self._num_packs, self._agg_small_grads_max_bytes,
        self._agg_small_grads_max_group)

    # The actual aggregation of the repacked gradients. Note that they are
    # sharded among different aggregation trees. So it is important to strike
    # the balance on num_splits.
    if self._all_reduce_alg == "nccl":
      # TODO(yuefengz): merge this into the all-reduce library.
      reduced = cross_tower_utils.aggregate_gradients_using_nccl(
          device_grad_packs)
    else:
      # TODO(yuefengz): check that gpu ids in `destinations` are in ascending
      # order.
      reduced = (
          cross_tower_utils.aggregate_gradients_using_hierarchical_copy(
              destinations, device_grad_packs))

    reduced = _unpack_tensors(reduced, tensor_packer)
    return _ungroup_and_make_mirrored(reduced, per_device_values[0].devices,
                                      aggregation)

예제 #2

파일 보기

  def _batch_all_reduce(self, aggregation, per_replica_values):
    """All reduce algorithm in a batch."""
    logging.log_first_n(
        logging.INFO, "batch_all_reduce invoked for batches size = %d with "
        "algorithm = %s, num_packs = %d, agg_small_grads_max_bytes = %d and "
        "agg_small_grads_max_group = %d" %
        (len(per_replica_values), self._all_reduce_alg, self._num_packs,
         self._agg_small_grads_max_bytes, self._agg_small_grads_max_group), 10)
    destinations = per_replica_values[0].devices
    grouped = _group_value_by_device(per_replica_values)

    device_grad_packs, tensor_packer = _pack_tensors(
        grouped, self._num_packs, self._agg_small_grads_max_bytes,
        self._agg_small_grads_max_group)

    # The actual aggregation of the repacked gradients. Note that they are
    # sharded among different aggregation trees. So it is important to strike
    # the balance on num_splits.
    if self._all_reduce_alg == "nccl":
      # TODO(yuefengz): merge this into the all-reduce library.
      reduced = cross_tower_utils.aggregate_gradients_using_nccl(
          device_grad_packs)
    else:
      # TODO(yuefengz): check that gpu ids in `destinations` are in ascending
      # order.
      reduced = (
          cross_tower_utils.aggregate_gradients_using_hierarchical_copy(
              destinations, device_grad_packs))

    reduced = _unpack_tensors(reduced, tensor_packer)
    return _ungroup_and_make_mirrored(reduced, per_replica_values[0].devices,
                                      aggregation)

예제 #3

파일 보기

  def _batch_all_reduce(self, method_string, per_device_values):
    """All reduce algorithm in a batch."""
    destinations = per_device_values[0].devices
    grouped = _group_value_by_device(per_device_values)
    if self.num_packs > 0:
      logging.info(
          "batch_all_reduce invoked for batches size = %d with "
          "algorithm = %s and num_packs = %d", len(per_device_values),
          self.all_reduce_alg, self.num_packs)
      tensor_packer = ConcatAndSplitPacker(self.num_packs)
      device_grad_packs = tensor_packer.pack(grouped)
    elif (self.agg_small_grads_max_bytes > 0 and
          self.agg_small_grads_max_group > 0):
      logging.info(
          "batch_all_reduce invoked for batches size = %d with "
          "algorithm = %s, agg_small_grads_max_bytes = %d and "
          "agg_small_grads_max_group = %d", len(per_device_values),
          self.all_reduce_alg, self.agg_small_grads_max_bytes,
          self.agg_small_grads_max_group)
      tensor_packer = AggregateSmallTensorPacker(
          self.agg_small_grads_max_bytes, self.agg_small_grads_max_group)
      device_grad_packs = tensor_packer.pack(grouped)
    else:
      logging.info(
          "batch_all_reduce invoked for batches size = %d with algorithm = %s",
          len(per_device_values), self.all_reduce_alg)
      tensor_packer = None
      device_grad_packs = grouped

    # The actual aggregation of the repacked gradients. Note that they are
    # sharded among different aggregation trees. So it is important to strike
    # the balance on num_splits.
    if self.all_reduce_alg == "nccl":
      reduced = cross_tower_utils.aggregate_gradients_using_nccl(
          device_grad_packs)
    else:
      # TODO (yuefengz): check that gpu ids in `destinations` are in ascending id:637
      # https://github.com/imdone/tensorflow/issues/638
      # order.
      reduced = (
          cross_tower_utils.aggregate_gradients_using_hierarchical_copy(
              destinations, device_grad_packs))

    if tensor_packer:
      reduced = tensor_packer.unpack(reduced)

    return _ungroup_and_make_mirrored(reduced, per_device_values[0].devices,
                                      method_string)

예제 #4

파일 보기

파일: cross_tower_ops.py 프로젝트: bikong2/tensorflow

  def _batch_all_reduce(self, method_string, per_device_values):
    """All reduce algorithm in a batch."""
    destinations = per_device_values[0].devices
    grouped = _group_value_by_device(per_device_values)
    if self.num_packs > 0:
      logging.info(
          "batch_all_reduce invoked for batches size = %d with "
          "algorithm = %s and num_packs = %d", len(per_device_values),
          self.all_reduce_alg, self.num_packs)
      tensor_packer = ConcatAndSplitPacker(self.num_packs)
      device_grad_packs = tensor_packer.pack(grouped)
    elif (self.agg_small_grads_max_bytes > 0 and
          self.agg_small_grads_max_group > 0):
      logging.info(
          "batch_all_reduce invoked for batches size = %d with "
          "algorithm = %s, agg_small_grads_max_bytes = %d and "
          "agg_small_grads_max_group = %d", len(per_device_values),
          self.all_reduce_alg, self.agg_small_grads_max_bytes,
          self.agg_small_grads_max_group)
      tensor_packer = AggregateSmallTensorPacker(
          self.agg_small_grads_max_bytes, self.agg_small_grads_max_group)
      device_grad_packs = tensor_packer.pack(grouped)
    else:
      logging.info(
          "batch_all_reduce invoked for batches size = %d with algorithm = %s",
          len(per_device_values), self.all_reduce_alg)
      tensor_packer = None
      device_grad_packs = grouped

    # The actual aggregation of the repacked gradients. Note that they are
    # sharded among different aggregation trees. So it is important to strike
    # the balance on num_splits.
    if self.all_reduce_alg == "nccl":
      reduced = cross_tower_utils.aggregate_gradients_using_nccl(
          device_grad_packs)
    else:
      # TODO(yuefengz): check that gpu ids in `destinations` are in ascending
      # order.
      reduced = (
          cross_tower_utils.aggregate_gradients_using_hierarchical_copy(
              destinations, device_grad_packs))

    if tensor_packer:
      reduced = tensor_packer.unpack(reduced)

    return _ungroup_and_make_mirrored(reduced, per_device_values[0].devices,
                                      method_string)