def choose_the_best(devices, session_config=None):
  """Find the best CrossDeviceOps locally given a `tf.compat.v1.ConfigProto`.

  Args:
    devices: a list of devices passed to `tf.distribute.Strategy`.
    session_config: a `tf.compat.v1.ConfigProto` or `None`. If `None`, it will
      make decision based on all local devices.

  Returns:
    A subclass of `CrossDeviceOps`.
  """
  requested_devices = set([device_util.canonicalize(d) for d in devices])
  machine_devices = device_lib.list_local_devices(session_config=session_config)
  using_devices = set()
  for d in machine_devices:
    if device_util.canonicalize(d.name) in requested_devices:
      using_devices.add(d.name)

  if len(using_devices) != len(requested_devices):
    logging.warning(
        "Some requested devices in `tf.distribute.Strategy` are not visible "
        "to TensorFlow: %s", ",".join(list(requested_devices - using_devices)))
    return ReductionToOneDevice()

  if any("gpu" not in d.lower() for d in using_devices):
    logging.warning("There is non-GPU devices in `tf.distribute.Strategy`, not "
                    "using nccl allreduce.")
    return ReductionToOneDevice()

  if kernels.get_registered_kernels_for_op("NcclAllReduce"):
    return NcclAllReduce(num_packs=1)
  else:
    logging.warning("Nccl kernel is not found, not using nccl allreduce.")
    return ReductionToOneDevice()
Ejemplo n.º 2
0
 def placer(op):
     if all(['CPU' in kernel_def.device_type
             for kernel_def in kernels.get_registered_kernels_for_op(op.type).kernel]):
         # It assumes an op has a CPU kernel by default.
         new_device = DeviceSpecV2.from_string(self._local_worker_device). \
             replace(device_type='CPU', device_index=0)
     else:
         new_device = DeviceSpecV2.from_string(replica_device)
     return new_device
Ejemplo n.º 3
0
 def testNcclHintFallbackToRingReduce(self):
     """Tests that setting `communication_hint=nccl` works on non-GPU builds."""
     if kernels.get_registered_kernels_for_op('NcclAllReduce'):
         self.skipTest('Run only on non-GPU environments')
     self._testCollectiveReduce(
         inputs=[[0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1],
                 [0.3, 1.3, 2.3, 3.3, 4.3, 5.3, 6.3, 7.3]],
         expected=[0.2, 1.2, 2.2, 3.2, 4.2, 5.2, 6.2, 7.2],
         set_graph_key=False,
         communication_hint='nccl')
Ejemplo n.º 4
0
def choose_the_best(devices, session_config=None):
    """Find the best CrossDeviceOps locally given a `tf.compat.v1.ConfigProto`.

  Args:
    devices: a list of devices passed to `tf.distribute.Strategy`.
    session_config: a `tf.compat.v1.ConfigProto` or `None`. If `None`, it will
      make decision based on all logical devices.

  Returns:
    A subclass of `CrossDeviceOps`.
  """
    requested_devices = set(device_util.canonicalize(d) for d in devices)
    if ops.executing_eagerly_outside_functions():
        logical_gpus = context.context().list_logical_devices(
            device_type="GPU")
        physical_gpus = context.context().list_physical_devices(
            device_type="GPU")
        if len(logical_gpus) != len(physical_gpus):
            logging.warning(
                "NCCL is not supported when using virtual GPUs, falling"
                "back to reduction to one device")
            return ReductionToOneDevice()

        machine_devices = context.context().list_logical_devices()
    else:
        machine_devices = device_lib.list_local_devices(
            session_config=session_config)
    using_devices = set()
    for d in machine_devices:
        if device_util.canonicalize(d.name) in requested_devices:
            using_devices.add(d.name)

    if len(using_devices) != len(requested_devices):
        logging.warning(
            "Some requested devices in `tf.distribute.Strategy` are not visible "
            "to TensorFlow: %s",
            ",".join(list(requested_devices - using_devices)))

    if any("gpu" not in d.lower() for d in requested_devices):
        logging.warning(
            "There are non-GPU devices in `tf.distribute.Strategy`, "
            "not using nccl allreduce.")
        return ReductionToOneDevice()

    if kernels.get_registered_kernels_for_op("NcclAllReduce"):
        return NcclAllReduce(num_packs=1)
    else:
        logging.warning("Nccl kernel is not found, not using nccl allreduce.")
        return ReductionToOneDevice()
Ejemplo n.º 5
0
 def testFindsAtLeastOneKernel(self):
     kernel_list = kernels.get_registered_kernels_for_op("KernelLabel")
     self.assertGreater(len(kernel_list.kernel), 0)
     self.assertEqual(kernel_list.kernel[0].op, "KernelLabel")