コード例 #1
0
    def _assert_mirrored_equal(self,
                               left_list,
                               right_list,
                               sess=None,
                               run_options=None):
        if not isinstance(left_list, list):
            left_list, right_list = [left_list], [right_list]

        for left, right in zip(left_list, right_list):
            self.assertEqual(type(left), type(right))

            # Convert Mirrored to a list since sess.run(Mirrored) only returns one
            # value.
            if isinstance(left, value_lib.Mirrored):
                left, right = left.values, right.values
            else:
                # When there's only one replica Mirrored is automatically unwrapped.
                left, right = [left], [right]

            for left_value, right_value in zip(left, right):
                self.assertEqual(device_util.resolve(left_value.device),
                                 device_util.resolve(right_value.device))

            # Densify IndexedSlices.
            left = [ops.convert_to_tensor(v) for v in left]
            right = [ops.convert_to_tensor(v) for v in right]
            if not context.executing_eagerly():
                left, right = sess.run((left, right), options=run_options)
            for left_value, right_value in zip(left, right):
                self.assertAllEqual(left_value, right_value)
コード例 #2
0
 def _assert_indexed_slices_equal(self, left, right):
     self.assertIsInstance(left, ops.IndexedSlices)
     self.assertIsInstance(right, ops.IndexedSlices)
     self.assertEqual(device_util.resolve(left.device),
                      device_util.resolve(right.device))
     self.assertAllEqual(self.evaluate(ops.convert_to_tensor(left)),
                         self.evaluate(ops.convert_to_tensor(right)))
コード例 #3
0
def _get_devices(devices):
  if isinstance(devices, (tuple, list)):
    return tuple(device_util.resolve(d) for d in devices)
  elif isinstance(devices, value_lib.DistributedValues):
    return devices._devices
  elif isinstance(devices, ops.Tensor):
    return (device_util.resolve(devices.device),)
  return (device_util.resolve(devices),)
コード例 #4
0
 def _assert_indexed_slices_equal(self, left, right):
   self.assertIsInstance(left, ops.IndexedSlices)
   self.assertIsInstance(right, ops.IndexedSlices)
   self.assertEqual(device_util.resolve(left.device),
                    device_util.resolve(right.device))
   self.assertAllEqual(
       self.evaluate(ops.convert_to_tensor(left)),
       self.evaluate(ops.convert_to_tensor(right)))
コード例 #5
0
 def testResolveWithDeviceScope(self):
     with ops.device("/gpu:0"):
         self.assertEqual(device_util.resolve("/job:worker/task:1/cpu:0"),
                          "/job:worker/replica:0/task:1/device:CPU:0")
         self.assertEqual(device_util.resolve("/job:worker/task:1"),
                          "/job:worker/replica:0/task:1/device:GPU:0")
     with ops.device("/job:worker"):
         self.assertEqual(device_util.resolve("/cpu:0"),
                          "/job:worker/replica:0/task:0/device:CPU:0")
コード例 #6
0
def get_devices_from(destinations):
  if isinstance(destinations, value_lib.DistributedValues):
    return destinations.devices
  elif isinstance(destinations, value_lib.LogicalDeviceSpec):
    return destinations.device_map.logical_to_actual_devices(
        destinations.logical_device)
  elif isinstance(destinations, six.string_types):
    return (device_util.resolve(destinations),)
  return (device_util.resolve(destinations.device),)
コード例 #7
0
  def testCopyTensor(self):
    with ops.device("/cpu:0"):
      t = constant_op.constant([[1., 2.], [0, 0], [3., 4.]])
    destination = "/gpu:0"
    result = cross_device_utils.copy_tensor_or_indexed_slices_to_device(
        t, destination)

    self._assert_values_equal(t, result)
    self.assertEqual(device_util.resolve(destination),
                     device_util.resolve(result.device))
コード例 #8
0
  def testCopyTensor(self):
    with ops.device("/cpu:0"):
      t = constant_op.constant([[1., 2.], [0, 0], [3., 4.]])
    destination = "/gpu:0"
    result = cross_device_utils.copy_tensor_or_indexed_slices_to_device(
        t, destination)

    self._assert_values_equal(t, result)
    self.assertEqual(device_util.resolve(destination),
                     device_util.resolve(result.device))
コード例 #9
0
 def testResolveWithDeviceScope(self):
   with ops.device("/gpu:0"):
     self.assertEqual(
         device_util.resolve("/job:worker/task:1/cpu:0"),
         "/job:worker/replica:0/task:1/device:CPU:0")
     self.assertEqual(
         device_util.resolve("/job:worker/task:1"),
         "/job:worker/replica:0/task:1/device:GPU:0")
   with ops.device("/job:worker"):
     self.assertEqual(
         device_util.resolve("/cpu:0"),
         "/job:worker/replica:0/task:0/device:CPU:0")
コード例 #10
0
def get_devices_from(destinations):
  if isinstance(destinations, value_lib.DistributedValues):
    return list(destinations.devices)
  elif isinstance(destinations, (resource_variable_ops.ResourceVariable,
                                 value_lib.AggregatingVariable)):
    return [destinations.device]
  elif isinstance(destinations, six.string_types):
    return [device_util.resolve(destinations)]
  elif isinstance(destinations, (list, tuple)):
    return [device_util.resolve(destination) for destination in destinations]
  else:
    return [destinations.device]
コード例 #11
0
    def testCopyIndexedSlicesNoDenseShape(self):
        with ops.device("/cpu:0"):
            t = indexed_slices.IndexedSlices(indices=array_ops.identity([0]),
                                             values=array_ops.identity([1.]))
        destination = "/gpu:0"
        result = cross_device_utils.copy_tensor_or_indexed_slices_to_device(
            t, destination)

        self.assertIsInstance(result, indexed_slices.IndexedSlices)
        self.assertAllEqual(t.indices, result.indices)
        self.assertAllEqual(t.values, result.values)
        self.assertEqual(device_util.resolve(destination),
                         device_util.resolve(result.device))
コード例 #12
0
 def __init__(self, container_strategy, device):
     super(OneDeviceExtended, self).__init__(container_strategy)
     self._device = device_util.resolve(device)
     suffix_loc = self._device.rfind("/")
     self._input_device = self._device[:suffix_loc] + "/device:CPU:0"
     worker_device_pairs = [(self._input_device, [self._device])]
     self._input_workers = input_lib.InputWorkers(worker_device_pairs)
コード例 #13
0
  def _initialize_multi_worker(self, devices):
    """Initializes the object for multi-worker training."""
    self._local_mode = False

    assert devices, "Must specify at least one device."
    assert len(set(devices)) == len(devices), (
        "No duplicates allowed in `devices` argument.")
    # TODO(josh11b): Require at least 2 devices?
    self._devices = [device_util.resolve(d) for d in devices]
    self._canonical_device_set = set(self._devices)
    self._device_index = values.PerReplica(
        {d: i for i, d in enumerate(devices)})

    device_dict = _group_device_list(devices)
    self._workers = []
    self._worker_devices = []
    for job in ["chief", "worker"]:
      for task in range(len(device_dict.get(job, []))):
        worker = "/job:%s/task:%d" % (job, task)
        self._workers.append(worker)
        self._worker_devices.append((worker, device_dict[job][task]))

    # Setting `_default_device` will add a device scope in the
    # distribution.scope. We set the default device to the first worker. When
    # users specify device under distribution.scope by
    #   with tf.device("/cpu:0"):
    #     ...
    # their ops will end up on the cpu device of its first worker, e.g.
    # "/job:worker/task:0/device:CPU:0". Note this is not used in replica mode.
    self._default_device = self._workers[0]

    self._inferred_cross_device_ops = cross_device_ops_lib.MultiWorkerAllReduce(
        self._workers, _infer_num_gpus_per_worker(self._devices))
コード例 #14
0
    def _initialize_multi_worker(self, devices):
        """Initializes the object for multi-worker training."""
        self._local_mode = False

        assert devices, "Must specify at least one device."
        devices = tuple(device_util.resolve(d) for d in devices)
        assert len(set(devices)) == len(devices), (
            "No duplicates allowed in `devices` argument: %s" % devices)
        # TODO(josh11b): Require at least 2 devices?

        device_dict = _group_device_list(devices)
        workers = []
        worker_devices = []
        for job in ("chief", "worker"):
            for task in range(len(device_dict.get(job, []))):
                worker = "/job:%s/task:%d" % (job, task)
                workers.append(worker)
                worker_devices.append((worker, device_dict[job][task]))

        # Setting `_default_device` will add a device scope in the
        # distribution.scope. We set the default device to the first worker. When
        # users specify device under distribution.scope by
        #   with tf.device("/cpu:0"):
        #     ...
        # their ops will end up on the cpu device of its first worker, e.g.
        # "/job:worker/task:0/device:CPU:0". Note this is not used in replica mode.
        self._default_device = workers[0]
        self._host_input_device = numpy_dataset.SingleDevice(workers[0])

        self._device_map = values.ReplicaDeviceMap(devices)
        self._input_workers = input_lib.InputWorkers(self._device_map,
                                                     worker_devices)
        self._inferred_cross_device_ops = cross_device_ops_lib.MultiWorkerAllReduce(
            workers, _infer_num_gpus_per_worker(devices))
コード例 #15
0
  def _initialize_multi_worker(self, devices):
    """Initializes the object for multi-worker training."""
    self._local_mode = False

    assert devices, "Must specify at least one device."
    devices = tuple(device_util.resolve(d) for d in devices)
    assert len(set(devices)) == len(devices), (
        "No duplicates allowed in `devices` argument: %s" % devices)
    # TODO(josh11b): Require at least 2 devices?

    device_dict = _group_device_list(devices)
    workers = []
    worker_devices = []
    for job in ("chief", "worker"):
      for task in range(len(device_dict.get(job, []))):
        worker = "/job:%s/task:%d" % (job, task)
        workers.append(worker)
        worker_devices.append((worker, device_dict[job][task]))

    # Setting `_default_device` will add a device scope in the
    # distribution.scope. We set the default device to the first worker. When
    # users specify device under distribution.scope by
    #   with tf.device("/cpu:0"):
    #     ...
    # their ops will end up on the cpu device of its first worker, e.g.
    # "/job:worker/task:0/device:CPU:0". Note this is not used in replica mode.
    self._default_device = workers[0]
    self._host_input_device = numpy_dataset.SingleDevice(workers[0])

    self._device_map = values.ReplicaDeviceMap(devices)
    self._input_workers = input_lib.InputWorkers(
        self._device_map, worker_devices)
    self._inferred_cross_device_ops = cross_device_ops_lib.MultiWorkerAllReduce(
        workers, _infer_num_gpus_per_worker(devices))
コード例 #16
0
    def _initialize_local(self, num_gpus, devices):
        """Initializes the object for local training."""
        self._cluster_spec = None
        # Convert `num_gpus` into `devices`, shouldn't specify both.
        if devices is None:
            if num_gpus is None:
                num_gpus = context.num_gpus()
            if num_gpus == 0:
                devices = ["/device:CPU:0"]
            else:
                devices = ["/device:GPU:%d" % d for d in range(num_gpus)]
        elif num_gpus is not None:
            raise ValueError(
                "Must only specify one of `devices` and `num_gpus`.")
        self._num_gpus = num_gpus
        # TODO(yuefengz): consider setting the default device.

        assert devices, "Must specify at least one device."
        assert len(set(devices)) == len(devices), (
            "No duplicates allowed in `devices` argument.")
        # TODO(josh11b): Require at least 2 devices?
        self._devices = [device_util.resolve(d) for d in devices]
        self._canonical_device_set = set(self._devices)
        self._device_index = values.PerReplica(
            {d: i
             for i, d in enumerate(devices)})
コード例 #17
0
    def _initialize_multi_worker(self, devices):
        """Initializes the object for multi-worker training."""
        self._local_mode = False

        assert devices, "Must specify at least one device."
        assert len(set(devices)) == len(devices), (
            "No duplicates allowed in `devices` argument.")
        # TODO(josh11b): Require at least 2 devices?
        self._devices = tuple(device_util.resolve(d) for d in devices)
        self._canonical_device_set = set(self._devices)
        self._device_index = values.PerReplica(
            {d: i
             for i, d in enumerate(devices)})

        device_dict = _group_device_list(devices)
        self._workers = []
        self._worker_devices = []
        for job in ["chief", "worker"]:
            for task in range(len(device_dict.get(job, []))):
                worker = "/job:%s/task:%d" % (job, task)
                self._workers.append(worker)
                self._worker_devices.append((worker, device_dict[job][task]))

        # Setting `_default_device` will add a device scope in the
        # distribution.scope. We set the default device to the first worker. When
        # users specify device under distribution.scope by
        #   with tf.device("/cpu:0"):
        #     ...
        # their ops will end up on the cpu device of its first worker, e.g.
        # "/job:worker/task:0/device:CPU:0". Note this is not used in replica mode.
        self._default_device = self._workers[0]

        self._inferred_cross_device_ops = cross_device_ops_lib.MultiWorkerAllReduce(
            self._workers, _infer_num_gpus_per_worker(self._devices))
コード例 #18
0
def get_device_map_from(destinations):
  if isinstance(destinations, (value_lib.DistributedValues,
                               value_lib.LogicalDeviceSpec)):
    return destinations.device_map, destinations.logical_device
  if isinstance(destinations, six.string_types):
    device = device_util.resolve(destinations)
  else:
    device = destinations.device
  return value_lib.SingleDeviceMap(device), 0
コード例 #19
0
def get_device_map_from(destinations):
    if isinstance(destinations,
                  (value_lib.DistributedValues, value_lib.LogicalDeviceSpec)):
        return destinations.device_map, destinations.logical_device
    if isinstance(destinations, six.string_types):
        device = device_util.resolve(destinations)
    else:
        device = destinations.device
    return value_lib.SingleDeviceMap(device), 0
コード例 #20
0
def get_devices_from(destinations):
  if isinstance(destinations, value_lib.DistributedValues):
    return destinations.devices
  elif isinstance(destinations, value_lib.LogicalDeviceSpec):
    return destinations.device_map.logical_to_actual_devices(
        destinations.logical_device)
  elif isinstance(destinations, six.string_types):
    return (device_util.resolve(destinations),)
  return (destinations.device,)
コード例 #21
0
 def _assert_values_equal(self, left, right):
   self.assertEqual(type(left), type(right))
   if isinstance(left, (list, tuple)):
     for l, r in zip(left, right):
       self._assert_values_equal(l, r)
   else:
     if isinstance(left, value_lib.DistributedValues):
       self.assertEqual(set(left._devices), set(right._devices))
       self._assert_values_equal(left.values, right.values)
     else:
       self.assertEqual(
           device_util.resolve(left.device), device_util.resolve(right.device))
       if isinstance(left, ops.IndexedSlices):
         self._assert_indexed_slices_equal(left, right)
       elif context.executing_eagerly():
         self.assertEqual(left.numpy(), right.numpy())
       else:
         with self.cached_session() as sess:
           self.assertEqual(sess.run(left), sess.run(right))
コード例 #22
0
 def _initialize_strategy(self, devices):
   # The _initialize_strategy method is intended to be used by distribute
   # coordinator as well.
   assert devices, "Must specify at least one device."
   devices = tuple(device_util.resolve(d) for d in devices)
   assert len(set(devices)) == len(devices), (
       "No duplicates allowed in `devices` argument: %s" % (devices,))
   if _is_device_list_single_worker(devices):
     self._initialize_single_worker(devices)
   else:
     self._initialize_multi_worker(devices)
コード例 #23
0
 def _initialize_local(self, devices):
   """Initializes the object for local training."""
   self._local_mode = True
   assert devices, "Must specify at least one device."
   devices = tuple(device_util.resolve(d) for d in devices)
   assert len(set(devices)) == len(devices), (
       "No duplicates allowed in `devices` argument: %s" % devices)
   # TODO(josh11b): Require at least 2 devices?
   self._device_map = values.ReplicaDeviceMap(devices)
   self._input_workers = values.InputWorkers(self._device_map)
   self._inferred_cross_device_ops = cross_device_ops_lib.choose_the_best(
       devices)
コード例 #24
0
 def _assert_values_equal(self, left, right, sess):
     self.assertEqual(type(left), type(right))
     if isinstance(left, (list, tuple)):
         for l, r in zip(left, right):
             self._assert_values_equal(l, r, sess)
     else:
         if isinstance(left, value_lib.DistributedValues):
             self.assertEqual(set(left.devices), set(right.devices))
             self._assert_values_equal(left.values, right.values, sess)
         else:
             self.assertEqual(device_util.resolve(left.device),
                              device_util.resolve(right.device))
             if isinstance(left, ops.IndexedSlices):
                 self._assert_indexed_slices_equal(left, right)
             elif context.executing_eagerly():
                 self.assertEqual(left.numpy(), right.numpy())
             else:
                 run_options = config_pb2.RunOptions()
                 run_options.experimental.collective_graph_key = 6
                 self.assertEqual(sess.run(left, options=run_options),
                                  sess.run(right, options=run_options))
コード例 #25
0
  def testDistributeDatasetFunctionHostPrefetch(self, distribution):
    data = [5., 6., 7., 8.]
    input_iterator = iter(
        distribution.distribute_datasets_from_function(
            lambda _: get_dataset_from_tensor_slices(data),
            distribute_lib.InputOptions(experimental_fetch_to_device=False)))

    local_results = distribution.experimental_local_results(
        input_iterator.get_next())

    for result in local_results:
      self.assertEqual(result.backing_device,
                       device_util.resolve("/device:CPU:0"))
コード例 #26
0
    def testDistributeDatasetFunctionHostPrefetch(self, distribution):
        data = [5., 6., 7., 8.]
        distribution.extended._set_prefetch_on_host(True)  # pylint: disable=protected-access
        input_iterator = iter(
            distribution.experimental_distribute_datasets_from_function(
                lambda _: get_dataset_from_tensor_slices(data)))

        local_results = distribution.experimental_local_results(
            input_iterator.get_next())

        for result in local_results:
            self.assertEqual(result.backing_device,
                             device_util.resolve("/device:CPU:0"))
コード例 #27
0
 def _initialize_local(self, devices):
   """Initializes the object for local training."""
   self._local_mode = True
   assert devices, "Must specify at least one device."
   devices = tuple(device_util.resolve(d) for d in devices)
   assert len(set(devices)) == len(devices), (
       "No duplicates allowed in `devices` argument: %s" % devices)
   # TODO(josh11b): Require at least 2 devices?
   self._device_map = values.ReplicaDeviceMap(devices)
   self._input_workers = input_lib.InputWorkers(self._device_map)
   self._inferred_cross_device_ops = cross_device_ops_lib.choose_the_best(
       devices)
   self._host_input_device = numpy_dataset.SingleDevice("/cpu:0")
コード例 #28
0
 def __init__(self, container_strategy, devices):
     super(MirroredFunctionExtended, self).__init__(container_strategy)
     if devices is None:
         devices = mirrored_strategy.all_devices()
     if not devices:
         raise ValueError(
             "Got an empty `devices` list. Please make sure the "
             "`devices` you pass in is not empty.")
     device_tuple = tuple(device_util.resolve(d) for d in devices)
     assert len(set(device_tuple)) == len(device_tuple), (
         "No duplicates allowed in `devices` argument: %s" % (devices, ))
     self._devices = device_tuple
     self._retrace_functions_for_each_device = False
コード例 #29
0
  def _initialize_local(self, devices):
    """Initializes the object for local training."""
    self._local_mode = True
    assert devices, "Must specify at least one device."
    assert len(set(devices)) == len(devices), (
        "No duplicates allowed in `devices` argument.")
    # TODO(josh11b): Require at least 2 devices?
    self._devices = [device_util.resolve(d) for d in devices]
    self._canonical_device_set = set(self._devices)
    self._device_index = values.PerReplica(
        {d: i for i, d in enumerate(devices)})

    self._inferred_cross_device_ops = cross_device_ops_lib.choose_the_best(
        devices)
コード例 #30
0
 def _initialize_strategy(self, devices):
   # The _initialize_strategy method is intended to be used by distribute
   # coordinator as well.
   assert devices, "Must specify at least one device."
   devices = tuple(device_util.resolve(d) for d in devices)
   assert len(set(devices)) == len(devices), (
       "No duplicates allowed in `devices` argument: %s" % (devices,))
   if _is_device_list_single_worker(devices):
     self._initialize_single_worker(devices)
     if self._prefer_collective_ops and (
         isinstance(self._cross_device_ops, cross_device_ops_lib.NcclAllReduce)
         or isinstance(self._inferred_cross_device_ops,
                       cross_device_ops_lib.NcclAllReduce)):
       self._use_collective_ops(devices)
       self._inferred_cross_device_ops = None
     logging.info("Using MirroredStrategy with devices %r", devices)
   else:
     self._initialize_multi_worker(devices)
コード例 #31
0
  def _initialize_multi_worker(self, num_gpus, cluster_spec):
    """Initializes the object for multi-worker training."""
    cluster_spec = multi_worker_util.normalize_cluster_spec(cluster_spec)
    self._cluster_spec = cluster_spec

    self._workers = []
    for job in ["chief", "worker"]:
      for task in range(len(cluster_spec.as_dict().get(job, []))):
        self._workers.append("/job:%s/task:%d" % (job, task))

    if num_gpus is None:
      raise ValueError("`num_gpus` is required if `cluster_spec` is given.")
    if num_gpus > 0:
      self._worker_devices = [
          (worker, [
              device_util.canonicalize(worker + "/device:GPU:%d" % gpu)
              for gpu in range(num_gpus)
          ]) for worker in self._workers
      ]
    else:
      self._worker_devices = [
          (worker, [device_util.canonicalize(worker, "/device:CPU:0")])
          for worker in self._workers
      ]

    devices = nest.flatten([l for _, l in self._worker_devices])

    # Setting `_default_device` will add a device scope in the
    # distribution.scope. We set the default device to the first worker. When
    # users specify device under distribution.scope by
    #   with tf.device("/cpu:0"):
    #     ...
    # their ops will end up on the cpu device of its first worker, e.g.
    # "/job:worker/task:0/device:CPU:0". Note this is not used in replica mode.
    self._default_device = self._workers[0]

    assert devices, "Must specify at least one device."
    assert len(set(devices)) == len(devices), (
        "No duplicates allowed in `devices` argument.")
    # TODO(josh11b): Require at least 2 devices?
    self._devices = [device_util.resolve(d) for d in devices]
    self._canonical_device_set = set(self._devices)
    self._device_index = values.PerReplica(
        {d: i for i, d in enumerate(devices)})
コード例 #32
0
    def _initialize_multi_worker(self, num_gpus, cluster_spec):
        """Initializes the object for multi-worker training."""
        cluster_spec = multi_worker_util.normalize_cluster_spec(cluster_spec)
        self._cluster_spec = cluster_spec

        self._workers = []
        for job in ["chief", "worker"]:
            for task in range(len(cluster_spec.as_dict().get(job, []))):
                self._workers.append("/job:%s/task:%d" % (job, task))

        if num_gpus is None:
            raise ValueError(
                "`num_gpus` is required if `cluster_spec` is given.")
        if num_gpus > 0:
            self._worker_devices = [(worker, [
                device_util.canonicalize(worker + "/device:GPU:%d" % gpu)
                for gpu in range(num_gpus)
            ]) for worker in self._workers]
        else:
            self._worker_devices = [
                (worker, [device_util.canonicalize(worker, "/device:CPU:0")])
                for worker in self._workers
            ]

        devices = nest.flatten([l for _, l in self._worker_devices])

        # Setting `_default_device` will add a device scope in the
        # distribution.scope. We set the default device to the first worker. When
        # users specify device under distribution.scope by
        #   with tf.device("/cpu:0"):
        #     ...
        # their ops will end up on the cpu device of its first worker, e.g.
        # "/job:worker/task:0/device:CPU:0". Note this is not used in replica mode.
        self._default_device = self._workers[0]

        assert devices, "Must specify at least one device."
        assert len(set(devices)) == len(devices), (
            "No duplicates allowed in `devices` argument.")
        # TODO(josh11b): Require at least 2 devices?
        self._devices = [device_util.resolve(d) for d in devices]
        self._canonical_device_set = set(self._devices)
        self._device_index = values.PerReplica(
            {d: i
             for i, d in enumerate(devices)})
コード例 #33
0
    def _initialize_local(self, devices):
        """Initializes the object for local training.

        ZJW - We modify this protected function for fixing a bug:
            We must pass an argument `session_config` to cross_device_ops_lib.choose_the_best().
            Otherwise, all the gpu memory will be allocated when calling device_lib.list_local_devices()
            in choose_the_best().

            Finally it's still a compromise because session config `allow_growth` turns to futility.
        """
        self._local_mode = True
        assert devices, "Must specify at least one device."
        assert len(set(devices)) == len(devices), (
            "No duplicates allowed in `devices` argument.")
        # TODO(josh11b): Require at least 2 devices?
        self._devices = tuple(device_util.resolve(d) for d in devices)
        self._canonical_device_set = set(self._devices)
        self._device_index = values.PerReplica(
            {d: i for i, d in enumerate(devices)})

        self._inferred_cross_device_ops = cross_device_ops_lib.choose_the_best(
            devices, session_config=self._zjw_session_config)
コード例 #34
0
  def _initialize_local(self, num_gpus, devices):
    """Initializes the object for local training."""
    self._cluster_spec = None
    # Convert `num_gpus` into `devices`, shouldn't specify both.
    if devices is None:
      if num_gpus is None:
        num_gpus = context.num_gpus()
      if num_gpus == 0:
        devices = ["/device:CPU:0"]
      else:
        devices = ["/device:GPU:%d" % d for d in range(num_gpus)]
    elif num_gpus is not None:
      raise ValueError("Must only specify one of `devices` and `num_gpus`.")
    self._num_gpus = num_gpus
    # TODO(yuefengz): consider setting the default device.

    assert devices, "Must specify at least one device."
    assert len(set(devices)) == len(devices), (
        "No duplicates allowed in `devices` argument.")
    # TODO(josh11b): Require at least 2 devices?
    self._devices = [device_util.resolve(d) for d in devices]
    self._canonical_device_set = set(self._devices)
    self._device_index = values.PerReplica(
        {d: i for i, d in enumerate(devices)})
コード例 #35
0
  def _testReductionAndBroadcast(self, cross_device_ops, devices):
    if context.num_gpus() < sum(1 for d in devices if "GPU" in d.upper()):
      self.skipTest("Not enough GPUs")

    with self.cached_session() as sess:
      values = [constant_op.constant(float(d)) for d in range(len(devices))]
      per_replica = _make_per_replica(values, devices)
      mean = (len(devices) - 1.) / 2.

      values_2 = [constant_op.constant(d + 1.0) for d in range(len(devices))]
      per_replica_2 = _make_per_replica(values_2, devices)
      mean_2 = mean + 1.

      destination_mirrored = _fake_mirrored(1., devices)
      destination_different = _fake_mirrored(1.,
                                             device_util.resolve(_cpu_device))
      destination_str = device_util.resolve(_cpu_device)

      all_destinations = [
          destination_mirrored,
          destination_different,
          destination_str,
      ]

      # test reduce()
      for destinations in all_destinations:
        self._assert_mirrored_equal(
            cross_device_ops.reduce(
                reduce_util.ReduceOp.MEAN,
                per_replica,
                destinations=destinations), _fake_mirrored(mean, destinations),
            sess)
        self._assert_mirrored_equal(
            cross_device_ops.reduce(
                reduce_util.ReduceOp.MEAN,
                per_replica_2,
                destinations=destinations),
            _fake_mirrored(mean_2, destinations), sess)
        self._assert_mirrored_equal(
            cross_device_ops.reduce(
                reduce_util.ReduceOp.SUM,
                per_replica,
                destinations=destinations),
            _fake_mirrored(mean * len(devices), destinations), sess)
        self._assert_mirrored_equal(
            cross_device_ops.reduce(
                reduce_util.ReduceOp.SUM,
                per_replica_2,
                destinations=destinations),
            _fake_mirrored(mean_2 * len(devices), destinations), sess)

      # test batch_reduce()
      for d1, d2 in itertools.product(all_destinations, all_destinations):
        self._assert_mirrored_equal(
            cross_device_ops.batch_reduce(reduce_util.ReduceOp.MEAN,
                                          [(per_replica, d1),
                                           (per_replica_2, d2)]),
            [_fake_mirrored(mean, d1),
             _fake_mirrored(mean_2, d2)], sess)
        self._assert_mirrored_equal(
            cross_device_ops.batch_reduce(reduce_util.ReduceOp.SUM,
                                          [(per_replica, d1),
                                           (per_replica_2, d2)]),
            [
                _fake_mirrored(mean * len(devices), d1),
                _fake_mirrored(mean_2 * len(devices), d2)
            ], sess)

      # test broadcast()
      for destinations in all_destinations:
        self._assert_mirrored_equal(
            cross_device_ops.broadcast(constant_op.constant(1.), destinations),
            _fake_mirrored(1., destinations), sess)
コード例 #36
0
 def __init__(self, container_strategy, device):
     super(OneDeviceExtended, self).__init__(container_strategy)
     self._device = device_util.resolve(device)
     self._input_device = device_util.get_host_for_device(self._device)
コード例 #37
0
def get_devices_from(destinations):
    if isinstance(destinations, value_lib.DistributedValues):
        return destinations._devices  # pylint: disable=protected-access
    elif isinstance(destinations, six.string_types):
        return (device_util.resolve(destinations), )
    return (device_util.resolve(destinations.device), )
コード例 #38
0
def _get_devices(devices):
  if isinstance(devices, (tuple, list)):
    return tuple(device_util.resolve(d) for d in devices)
  elif isinstance(devices, value_lib.DistributedValues):
    return devices.devices
  return (device_util.resolve(devices),)