Example #1
0
  def __init__(self,
               container_strategy,
               tpu_cluster_resolver=None,
               steps_per_run=None,
               device_assignment=None):
    super(TPUExtended, self).__init__(container_strategy)

    if tpu_cluster_resolver is None:
      tpu_cluster_resolver = resolver_lib.TPUClusterResolver("")

    if steps_per_run is None:
      # TODO(frankchn): Warn when we are being used by DS/Keras and this is
      # not specified.
      steps_per_run = 1

    self._tpu_cluster_resolver = tpu_cluster_resolver
    self._tpu_metadata = get_tpu_system_metadata(self._tpu_cluster_resolver)
    self._device_assignment = device_assignment

    # Device assignment is currently only supported for 1 core case.
    if self._device_assignment:
      assert isinstance(self._device_assignment,
                        device_assignment_lib.DeviceAssignment)
      if self._device_assignment.num_replicas != 1:
        raise ValueError("Device assignment is only supported for a single "
                         "core single replica case currently.")
      if self._device_assignment.num_cores_per_replica != 1:
        raise ValueError("Device assignment is only supported for a single "
                         "core single replica case currently.")
      if not all(self._device_assignment.core_assignment[0][0] == [0, 0, 0]):
        raise ValueError("Device assignment is only supported for a single "
                         "core single replica case currently.")

    # TODO(jhseu): Switch to DeviceAssignment to support pods and model
    # parallelism.
    self._device_index = {
        d.name: i for i, d in enumerate(self._tpu_metadata.devices)
        if "device:TPU:" in d.name
    }
    self._host_device = self.get_host_cpu_device(0)
    self._tpu_devices = tuple(sorted(self._device_index.keys()))
    # Only create variables for the number of replicas we're running.
    self._tpu_devices = self._tpu_devices[:self._num_replicas_in_sync]
    self._device_map = values.ReplicaDeviceMap(self._tpu_devices)

    # For input:
    input_device_map = values.ReplicaDeviceMap(tuple(
        self.get_host_cpu_device(hid) for hid in range(self.num_hosts)))
    worker_devices = [
        (self.get_host(hid), [self.get_host_cpu_device(hid)])
        for hid in range(self.num_hosts)
    ]
    self._input_workers = input_lib.InputWorkers(
        input_device_map, worker_devices)

    # TODO(sourabhbajaj): Remove this once performance of running one step
    # at a time is comparable to multiple steps.
    self.steps_per_run = steps_per_run
    self._require_static_shapes = True
Example #2
0
    def __init__(self,
                 container_strategy,
                 tpu_cluster_resolver=None,
                 steps_per_run=None,
                 num_cores=None):
        super(TPUExtended, self).__init__(container_strategy)

        if tpu_cluster_resolver is None:
            tpu_cluster_resolver = resolver_lib.TPUClusterResolver("")

        if steps_per_run is None:
            # TODO(frankchn): Warn when we are being used by DS/Keras and this is
            # not specified.
            steps_per_run = 1

        self._tpu_cluster_resolver = tpu_cluster_resolver
        self._tpu_metadata = get_tpu_system_metadata(
            self._tpu_cluster_resolver)
        # TODO(sourabhbajaj): Change this from num_cores to metadata_override
        self._num_cores_override = num_cores

        # TODO(jhseu): Switch to DeviceAssignment to support pods and model
        # parallelism.
        self._device_index = {
            d.name: i
            for i, d in enumerate(self._tpu_metadata.devices)
            if "device:TPU:" in d.name
        }
        self._host_device = self.get_host_cpu_device(0)
        self._tpu_devices = tuple(sorted(self._device_index.keys()))
        # Only create variables for the number of replicas we're running.
        self._tpu_devices = self._tpu_devices[:self._num_replicas_in_sync]
        self._device_map = values.ReplicaDeviceMap(self._tpu_devices)

        # For input:
        input_device_map = values.ReplicaDeviceMap(
            tuple(
                self.get_host_cpu_device(hid)
                for hid in range(self.num_hosts)))
        worker_devices = [(self.get_host(hid), [self.get_host_cpu_device(hid)])
                          for hid in range(self.num_hosts)]
        self._input_workers = values.InputWorkers(input_device_map,
                                                  worker_devices)

        # TODO(sourabhbajaj): Remove this once performance of running one step
        # at a time is comparable to multiple steps.
        self.steps_per_run = steps_per_run
        self._require_static_shapes = True

        # Initialize the TPU devices.
        self._initialize_tpu()
Example #3
0
  def testInitializableIterator(self):
    with context.graph_mode():
      devices = ["/device:CPU:0"]
      # Using random input since that is only allowed with initializable
      # iterator.
      dataset = dataset_ops.Dataset.from_tensor_slices(
          random_ops.random_uniform((10,)))

      device_map = values.ReplicaDeviceMap(devices)
      input_workers = values.InputWorkers(device_map)
      per_replica_dataset = values.PerReplicaDataset(dataset, input_workers, 0)
      iterator = per_replica_dataset.make_initializable_iterator()

      self.evaluate(iterator.initializer)
      next_element = iterator.get_next_as_list()
      for _ in range(10):
        self.evaluate(next_element)

      # Should fail after the input is finished.
      with self.assertRaises(errors.OutOfRangeError):
        self.evaluate(next_element)

      # After re-initializing the iterator, should be able to iterate again.
      self.evaluate(iterator.initializer)
      for _ in range(10):
        self.evaluate(next_element)
Example #4
0
  def testMirroredStratParaAsync(self):
    """Tests RNG/MirrorStrategy interaction #3.

    The user can create n independent RNGs outside strategy.scope(), where n
    is the number of replicas, and give one to each replica. The replicas can
    thus get different random-number streams.
    """
    shape = [3, 4]
    dtype = dtypes.int32
    gens = random.get_global_generator().split(count=2)
    devices = ["/cpu:0", test_util.gpu_device_name()]
    strat = MirroredStrategy(devices=devices)
    # Use `PerReplica` to specify which `gen` is sent to which replica
    gens = dist_values.PerReplica(
        device_map=dist_values.ReplicaDeviceMap(devices),
        values=[[g] for g in gens])
    with strat.scope():
      def f(gen):
        t1 = gen.uniform_full_int(shape=shape, dtype=dtype)
        t2 = gen.uniform_full_int(shape=shape, dtype=dtype)
        t = array_ops.stack([t1, t2])
        return t
      results = strat.extended.call_for_each_replica(
          fn=f, args=gens)
      values = results.values
      self.assertAllEqual(2, len(values))
      self.assertAllDifferent(values)
Example #5
0
  def __init__(self,
               container_strategy,
               tpu_cluster_resolver=None,
               steps_per_run=None,
               device_assignment=None):
    super(TPUExtended, self).__init__(container_strategy)

    if tpu_cluster_resolver is None:
      tpu_cluster_resolver = TPUClusterResolver("")

    if steps_per_run is None:
      # TODO(frankchn): Warn when we are being used by DS/Keras and this is
      # not specified.
      steps_per_run = 1

    self._tpu_function_cache = weakref.WeakKeyDictionary()
    self._tpu_cluster_resolver = tpu_cluster_resolver
    self._tpu_metadata = get_tpu_system_metadata(self._tpu_cluster_resolver)
    self._device_assignment = device_assignment

    # Device assignment is currently only supported for 1 core case.
    if self._device_assignment:
      assert isinstance(self._device_assignment,
                        device_assignment_lib.DeviceAssignment)
      if self._device_assignment.num_replicas != 1:
        raise ValueError("Device assignment is only supported for a single "
                         "core single replica case currently.")
      if self._device_assignment.num_cores_per_replica != 1:
        raise ValueError("Device assignment is only supported for a single "
                         "core single replica case currently.")
      if not all(self._device_assignment.core_assignment[0][0] == [0, 0, 0]):
        raise ValueError("Device assignment is only supported for a single "
                         "core single replica case currently.")

    # TODO(jhseu): Switch to DeviceAssignment to support pods and model
    # parallelism.
    self._tpu_devices = [d.name for d in self._tpu_metadata.devices
                         if "device:TPU:" in d.name]

    self._host_device = device_util.get_host_for_device(self._tpu_devices[0])

    # Only create variables for the number of replicas we're running.
    self._tpu_devices = self._tpu_devices[:self._num_replicas_in_sync]
    self._device_map = values.ReplicaDeviceMap(self._tpu_devices)

    # Preload the data onto the TPUs.
    input_worker_devices = collections.OrderedDict()
    for tpu_device in self._tpu_devices:
      host_device = device_util.get_host_for_device(tpu_device)
      input_worker_devices.setdefault(host_device, [])
      input_worker_devices[host_device].append(tpu_device)
    self._input_workers = input_lib.InputWorkers(
        self._device_map, tuple(input_worker_devices.items()))

    # TODO(sourabhbajaj): Remove this once performance of running one step
    # at a time is comparable to multiple steps.
    self.steps_per_run = steps_per_run
    self._require_static_shapes = True

    self.experimental_enable_get_next_as_optional = True
Example #6
0
    def _create_iterator(self, input_type, dataset_fn, worker_device_pairs,
                         devices, split_batch_by, enable_get_next_as_optional):
        device_map = values.ReplicaDeviceMap(devices)
        input_workers = input_lib.InputWorkers(device_map, worker_device_pairs)

        if input_type == "input_fn":
            input_contexts = []
            for i in range(input_workers.num_workers):
                input_contexts.append(
                    distribute_lib.InputContext(
                        num_input_pipelines=input_workers.num_workers,
                        input_pipeline_id=i,
                        num_replicas_in_sync=len(devices)))

            iterator = input_lib.InputFunctionIterator(
                dataset_fn,
                input_workers,
                input_contexts,
                _enable_get_next_as_optional=enable_get_next_as_optional)
        else:
            iterator = input_lib.DatasetIterator(
                dataset_fn(distribute_lib.InputContext()),
                input_workers,
                split_batch_by,
                _enable_get_next_as_optional=enable_get_next_as_optional)
        return iterator
Example #7
0
    def predict(actions, state):
        state = state.copy()
        # break down the inputs along the batch dimension to form equal sized
        # tensors in each replica.
        num_replicas = strategy.num_replicas_in_sync
        actions = tf.split(actions, num_replicas)
        state = {
            key: tf.split(value, num_replicas)
            for key, value in state.items()
        }
        devices = values.ReplicaDeviceMap(strategy.extended.worker_devices)
        dist_actions = values.PerReplica(devices, tuple(actions))
        dist_state = []
        for i in range(num_replicas):
            dist_state.append({key: value[i] for key, value in state.items()})
        dist_state = values.PerReplica(devices, tuple(dist_state))

        dist_predictions = strategy.experimental_run_v2(model.predict,
                                                        args=(dist_actions,
                                                              dist_state))
        dist_predictions = {
            key: strategy.experimental_local_results(value)
            for key, value in dist_predictions.items()
        }
        predictions = {
            key: tf.concat(value, axis=0)
            for key, value in dist_predictions.items()
        }
        return predictions
Example #8
0
    def __init__(self,
                 container_strategy,
                 tpu_cluster_resolver=None,
                 steps_per_run=None,
                 device_assignment=None):
        super(TPUExtended, self).__init__(container_strategy)

        if tpu_cluster_resolver is None:
            tpu_cluster_resolver = TPUClusterResolver("")

        if steps_per_run is None:
            # TODO(frankchn): Warn when we are being used by DS/Keras and this is
            # not specified.
            steps_per_run = 1

        self._tpu_function_cache = weakref.WeakKeyDictionary()
        self._tpu_cluster_resolver = tpu_cluster_resolver
        self._tpu_metadata = get_tpu_system_metadata(
            self._tpu_cluster_resolver)
        self._device_assignment = device_assignment

        self._tpu_devices = [
            d.name for d in self._tpu_metadata.devices
            if "device:TPU:" in d.name
        ]

        # Only create variables for the number of replicas we're running.
        if device_assignment is not None:
            job_name = device_spec.DeviceSpecV2.from_string(
                self._tpu_devices[0]).job

            self._tpu_devices = []
            for replica_id in range(device_assignment.num_replicas):
                tpu_device = device_assignment.tpu_device(replica=replica_id,
                                                          logical_core=0,
                                                          job=job_name)
                tpu_device = device_util.canonicalize(tpu_device)
                self._tpu_devices.append(tpu_device)

        self._host_device = device_util.get_host_for_device(
            self._tpu_devices[0])

        self._device_map = values.ReplicaDeviceMap(self._tpu_devices)

        # Preload the data onto the TPUs.
        input_worker_devices = collections.OrderedDict()
        for tpu_device in self._tpu_devices:
            host_device = device_util.get_host_for_device(tpu_device)
            input_worker_devices.setdefault(host_device, [])
            input_worker_devices[host_device].append(tpu_device)
        self._input_workers = input_lib.InputWorkers(
            self._device_map, tuple(input_worker_devices.items()))

        # TODO(sourabhbajaj): Remove this once performance of running one step
        # at a time is comparable to multiple steps.
        self.steps_per_run = steps_per_run
        self._require_static_shapes = True

        self.experimental_enable_get_next_as_optional = True
        self.experimental_enable_dynamic_batch_size = True
Example #9
0
  def testNamedTupleEstimatorSpec(self):
    with context.graph_mode(), ops.Graph().as_default():
      devices = []
      created_estimator_specs = []

      for device_id in range(3):
        spec = model_fn_lib.EstimatorSpec(
            mode=model_fn_lib.ModeKeys.TRAIN,
            loss=constant_op.constant(device_id / 2),
            train_op=array_ops.identity(constant_op.constant(device_id)))
        devices.append(_device_str(device_id))
        created_estimator_specs.append(spec)

      device_map = values.ReplicaDeviceMap(devices)
      merged_estimator_spec = values.regroup(
          device_map, created_estimator_specs)

      self.assertTrue(
          isinstance(merged_estimator_spec, model_fn_lib.EstimatorSpec))
      self.assertEqual(model_fn_lib.ModeKeys.TRAIN, merged_estimator_spec.mode)
      for device_id in range(3):
        d = _device_str(device_id)
        self.assertEqual(created_estimator_specs[device_id].loss,
                         merged_estimator_spec.loss.get(d))
        self.assertEqual(created_estimator_specs[device_id].train_op,
                         merged_estimator_spec.train_op.get(d))
        # Scaffold is populated by `EstimatorSpec.__new__`.
        self.assertEqual(created_estimator_specs[device_id].scaffold,
                         merged_estimator_spec.scaffold.get(d))
        # Also test that we can undo the merge using select_replica()
        self.assertEqual(created_estimator_specs[device_id],
                         values.select_replica(device_id,
                                               merged_estimator_spec))
Example #10
0
  def testNested(self):
    device_map = values.ReplicaDeviceMap((_device_str(0), _device_str(1)))
    result = values.regroup(device_map,
                            (_nested_value("1"), _nested_value("2")))
    self.assertIsInstance(result, tuple)
    self.assertEqual(3, len(result))
    self._is_per_replica(result[0], ["a1", "a2"])
    self._is_per_replica(result[2], ["h1", "h2"])

    self.assertIsInstance(result[1], list)
    self.assertEqual(3, len(result[1]))
    self._is_per_replica(result[1][0], ["b1", "b2"])
    self._is_per_replica(result[1][2], ["g1", "g2"])

    self.assertIsInstance(result[1][1], dict)
    self.assertEqual(set(["c", "e"]), set(result[1][1].keys()))
    self._is_per_replica(result[1][1]["c"], ["d1", "d2"])
    self._is_per_replica(result[1][1]["e"], ["f1", "f2"])

    # Also test that we can undo the merge using select_replica()
    self.assertEqual(_nested_value("1"),
                     values.select_replica(0, result))
    self.assertEqual(_nested_value("2"),
                     values.select_replica(1, result))
    # select_device_mirrored() should fail due to non-mirrored values
    with self.assertRaises(TypeError):
      values.select_device_mirrored(_device_str(0), result)
    with self.assertRaises(TypeError):
      values.select_device_mirrored(_device_str(1), result)
Example #11
0
  def testWrapClass(self):
    # Normally a mirrored value would be the same across devices, but
    # for a test it is convenient to be able to tell the values apart.
    device_map = values.ReplicaDeviceMap((_device_str(0), _device_str(1)))
    result = values.regroup(device_map,
                            (_nested_value("1"), _nested_value("2")),
                            values.Mirrored)
    self.assertIsInstance(result, tuple)
    self.assertEqual(3, len(result))
    self._is_per_replica(result[0], ["a1", "a2"], values.Mirrored)
    self._is_per_replica(result[2], ["h1", "h2"], values.Mirrored)

    self.assertIsInstance(result[1], list)
    self.assertEqual(3, len(result[1]))
    self._is_per_replica(result[1][0], ["b1", "b2"], values.Mirrored)
    self._is_per_replica(result[1][2], ["g1", "g2"], values.Mirrored)

    self.assertIsInstance(result[1][1], dict)
    self.assertEqual(set(["c", "e"]), set(result[1][1].keys()))
    self._is_per_replica(result[1][1]["c"], ["d1", "d2"], values.Mirrored)
    self._is_per_replica(result[1][1]["e"], ["f1", "f2"], values.Mirrored)

    # Also test that we can undo the merge using select_replica()
    self.assertEqual(_nested_value("1"),
                     values.select_replica(0, result))
    self.assertEqual(_nested_value("2"),
                     values.select_replica(1, result))
    # Values are marked as mirrored, so select_device_mirrored() is allowed.
    self.assertEqual(_nested_value("1"),
                     values.select_device_mirrored(_device_str(0), result))
    self.assertEqual(_nested_value("2"),
                     values.select_device_mirrored(_device_str(1), result))
Example #12
0
def _make_mirrored_indexed_slices(devices, values, indices, dense_shape):
    values = [
        _make_indexed_slices(values, indices, dense_shape, d) for d in devices
    ]
    return value_lib.regroup(value_lib.ReplicaDeviceMap(devices),
                             values,
                             wrap_class=value_lib.Mirrored)
Example #13
0
    def distributed_function(x, y, sample_weights, learning_phase=None):
      """A single step of the distributed execution across replicas."""
      del learning_phase

      # TODO(b/129653859):  Simplify after PerReplica can be the input of
      # `def_function.function`.  `regroup` calls and re-wrapping in
      # PerReplica won't be needed then.
      if isinstance(strategy, one_device_strategy.OneDeviceStrategy):
        device_map = values.SingleDeviceMap(devices[0])
        wrap_class = lambda d, x: x
      else:
        device_map = values.ReplicaDeviceMap(devices)
        wrap_class = values.PerReplica

      # Transform each lists of lists of values into per replica objects
      # in the case of mirrored strategy.  For example, for 2 replicas:
      # [[x0, y0], [x1, y1]] > [PerReplica(d0:x0, d1:x1),
      #                         PerReplica(d0:y0, d1:y1)]
      x = values.regroup(device_map, x, wrap_class)
      y = values.regroup(device_map, y, wrap_class) if y else None
      sample_weights = values.regroup(device_map, sample_weights,
                                      wrap_class) if sample_weights else None

      # Call `Model.{train,test,predict}_on_batch` on every replica passing
      # PerReplicas as arguments.  On every replica inside this call, each
      # PerReplica object will return the value for that replica.  The outputs
      # are PerReplicas too.
      outputs = strategy.experimental_run_v2(
          per_replica_function, args=(x, y, sample_weights))
      # Out of PerReplica outputs reduce or pick values to return.
      all_outputs = unwrap_outputs(
          strategy, outputs, with_loss_tensor=(mode != ModeKeys.PREDICT))
      return all_outputs
    def _initialize_multi_worker(self, devices):
        """Initializes the object for multi-worker training."""
        self._local_mode = False

        assert devices, "Must specify at least one device."
        devices = tuple(device_util.resolve(d) for d in devices)
        assert len(set(devices)) == len(devices), (
            "No duplicates allowed in `devices` argument: %s" % devices)
        # TODO(josh11b): Require at least 2 devices?

        device_dict = _group_device_list(devices)
        workers = []
        worker_devices = []
        for job in ("chief", "worker"):
            for task in range(len(device_dict.get(job, []))):
                worker = "/job:%s/task:%d" % (job, task)
                workers.append(worker)
                worker_devices.append((worker, device_dict[job][task]))

        # Setting `_default_device` will add a device scope in the
        # distribution.scope. We set the default device to the first worker. When
        # users specify device under distribution.scope by
        #   with tf.device("/cpu:0"):
        #     ...
        # their ops will end up on the cpu device of its first worker, e.g.
        # "/job:worker/task:0/device:CPU:0". Note this is not used in replica mode.
        self._default_device = workers[0]
        self._host_input_device = numpy_dataset.SingleDevice(workers[0])

        self._device_map = values.ReplicaDeviceMap(devices)
        self._input_workers = input_lib.InputWorkers(self._device_map,
                                                     worker_devices)
        self._inferred_cross_device_ops = cross_device_ops_lib.MultiWorkerAllReduce(
            workers, _infer_num_gpus_per_worker(devices))
 def testWrapAListOfTwoTuples(self):
   device_map = values.ReplicaDeviceMap((_device_str(0), _device_str(1)))
   result = values.regroup(device_map, [("1", "2"), ("3", "4")])
   self.assertIsInstance(result, tuple)
   self.assertEqual(2, len(result))
   self._is_per_replica(result[0], ("1", "3"), values.PerReplica)
   self._is_per_replica(result[1], ("2", "4"), values.PerReplica)
Example #16
0
    def _initialize_local(self, cluster_resolver):
        """Initialize internal devices for local training."""
        worker_device = device_util.canonicalize("/device:CPU:0")
        num_gpus = cluster_resolver.num_accelerators()
        # Define compute devices which is a list of device strings and one for each
        # replica. When there are GPUs, replicate operations on these GPUs.
        # Otherwise, place operations on CPU.
        if num_gpus > 0:
            compute_devices = tuple(
                map("/device:GPU:{}".format, range(num_gpus)))
        else:
            compute_devices = (_LOCAL_CPU, )

        self._device_map = values.ReplicaDeviceMap(compute_devices)
        self._input_workers = input_lib.InputWorkers(
            self._device_map, [(worker_device, compute_devices)])

        # If there is only one GPU, put everything on that GPU. Otherwise, place
        # variables on CPU.
        if num_gpus == 1:
            assert len(compute_devices) == 1
            self._variable_device = _LOCAL_GPU_0
            self._parameter_devices = (_LOCAL_GPU_0, )
        else:
            self._variable_device = _LOCAL_CPU
            self._parameter_devices = (_LOCAL_CPU, )

        self._is_chief = True
        self._cluster_spec = None
        self._task_type = None
        self._task_id = None

        logging.info(
            "ParameterServerStrategy with compute_devices = %r, "
            "variable_device = %r", compute_devices, self._variable_device)
Example #17
0
 def observe(images, actions, rewards, state):
     images = tf.to_float(images) / 255.0 - 0.5
     # break down the inputs along the batch dimension to form equal sized
     # tensors in each replica.
     num_replicas = strategy.num_replicas_in_sync
     images = tf.split(images, num_replicas)
     actions = tf.split(actions, num_replicas)
     state = {
         key: tf.split(value, num_replicas)
         for key, value in state.items()
     }
     devices = values.ReplicaDeviceMap(strategy.extended.worker_devices)
     dist_images = values.PerReplica(devices, tuple(images))
     dist_actions = values.PerReplica(devices, tuple(actions))
     dist_state = []
     for i in range(num_replicas):
         dist_state.append({key: value[i] for key, value in state.items()})
     dist_state = values.PerReplica(devices, tuple(dist_state))
     _, dist_posteriors = strategy.experimental_run_v2(model.observe,
                                                       args=(dist_actions,
                                                             dist_images,
                                                             dist_state))
     dist_posteriors = {
         key: strategy.experimental_local_results(value)
         for key, value in dist_posteriors.items()
     }
     posteriors = {
         key: tf.concat(value, axis=0)
         for key, value in dist_posteriors.items()
     }
     posteriors = {key: value[:, -1] for key, value in posteriors.items()}
     posteriors['rewards'] = rewards[:, -1]
     return posteriors
 def _initialize_local(self, devices):
     """Initializes the object for local training."""
     self._local_mode = True
     self._device_map = values.ReplicaDeviceMap(devices)
     self._input_workers = input_lib.InputWorkers(self._device_map)
     self._inferred_cross_device_ops = None if self._cross_device_ops else (
         cross_device_ops_lib.choose_the_best(devices))
     self._host_input_device = numpy_dataset.SingleDevice("/cpu:0")
Example #19
0
 def testValueErrorForIterator(self):
   # Incompatiable arguments.
   d1 = "/device:GPU:0"
   d2 = "/device:GPU:1"
   device_map = values.ReplicaDeviceMap([d1, d2])
   input_workers = values.InputWorkers(
       device_map, (("w1", (d1,)), ("w2", (d2,))))
   with self.assertRaises(ValueError):
     values.MultiWorkerDataIterator([("w1", None)], input_workers)
Example #20
0
def _fake_mirrored(value, devices):
    """Create a faked Mirrored object for testing.

  All components of the returned Mirrored have the same objects, which is not
  true in reality.
  """
    devices = _get_devices(devices)
    return value_lib.Mirrored(value_lib.ReplicaDeviceMap(devices),
                              [value] * len(devices))
Example #21
0
def _make_replica_local(method, strategy=None):
  device_map = values.ReplicaDeviceMap(_devices)
  v = []
  for d, n, init in zip(_devices, ["v", "v/replica"], [1., 2.]):
    with ops.device(d):
      v.append(variable_scope.get_variable(
          name=n, initializer=init, use_resource=True))
  replica_local = values.ReplicaLocalVariable(strategy, device_map, v, method)
  return v, replica_local
Example #22
0
  def testOneDevice(self):
    device_map = values.ReplicaDeviceMap((_device_str(0),))
    result = values.regroup(device_map, (_nested_value("1"),))
    # On one device regroup() and select_replica() are basically identity.
    self.assertEqual(_nested_value("1"), result)
    self.assertEqual(_nested_value("1"),
                     values.select_replica(0, result))

    # The one exception has to do with MirroredVariables.
    d = "/device:CPU:0"
    with ops.device(d):
      v = variable_scope.get_variable(
          name="v", initializer=1., use_resource=True)
      device_map = values.ReplicaDeviceMap((d,))
    mirrored = values.MirroredVariable(None, device_map, (v,),
                                       variable_scope.VariableAggregation.SUM)
    result = values.regroup(device_map, (v,))
    self.assertIs(mirrored, result)
Example #23
0
 def testContainsIndexedSlices_PerReplica(self):
     t0 = math_ops._as_indexed_slices(
         constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
     t1 = math_ops._as_indexed_slices(
         constant_op.constant([[0., 0.], [5, 6], [7., 8.]]))
     device_map = value_lib.ReplicaDeviceMap(("/gpu:0", "/cpu:0"))
     per_replica = value_lib.PerReplica(device_map, (t0, t1))
     self.assertTrue(
         cross_device_utils.contains_indexed_slices(per_replica))
Example #24
0
  def testPassPerReplica(self, distribution):
    @function.defun
    def fn1(mock_model, factor):
      return mock_model(factor)

    device_map = values.ReplicaDeviceMap(("/device:CPU:0", "/device:GPU:0"))
    factors = values.PerReplica(device_map, (5.0, 3.0))
    expected_result = values.PerReplica(device_map, (5.0 * 1.25, 3.0 * 1.25))
    self._call_and_check(distribution, fn1, [factors], expected_result, [fn1])
Example #25
0
    def _create_variable(self, next_creator, *args, **kwargs):
        colocate_with = kwargs.pop("colocate_with", None)
        if colocate_with is None:
            device_map = values.ReplicaDeviceMap([self._variable_device])
            logical_device = 0
        elif isinstance(colocate_with, numpy_dataset.SingleDevice):
            with ops.device(colocate_with.device):
                return next_creator(*args, **kwargs)
        else:
            device_map = colocate_with.device_map
            logical_device = colocate_with.logical_device

        def _real_creator(devices, *args, **kwargs):
            assert len(devices) == 1
            assert devices[0] == self._variable_device

            # The chief worker will initialize and broadcast the value to
            # the other workers. Always done on the host.
            kwargs["initial_value"] = self._get_variable_creator_initial_value(
                replica_id=0,  # First (and only) replica on each worker.
                device=self._host_device,
                primary_var=None,
                **kwargs)

            # We always place sync-on-read variables on the IPU. They will
            # be transfered and reduced on the hosts only when read.
            synchronization = kwargs.get("synchronization")
            if (not self._variables_on_host or synchronization
                    == variable_scope.VariableSynchronization.ON_READ):
                with ops.device(self._ipu_device):
                    return [next_creator(*args, **kwargs)]

            # Cache a snapshot of the variable on the IPU device,
            # otherwise the XLA cluster containing the ops consuming the
            # variable might be moved to the host to be colocated with it.
            kwargs["caching_device"] = self._ipu_device

            # In case we are inside an ipu_jit_scope, we need to override it
            # to disable XLA for variable initialization on the host.
            disable_xla = {
                "_XlaCompile": attr_value_pb2.AttrValue(b=False),
                "_XlaScope": attr_value_pb2.AttrValue(s=b''),
            }

            graph = ops.get_default_graph()
            with ops.device(self._host_device), \
                graph._attr_scope(disable_xla):  # pylint: disable=protected-access
                return [next_creator(*args, **kwargs)]

        # For tf1: use distribute_lib.create_mirrored_variable
        return values.create_mirrored_variable(self._container_strategy(),
                                               device_map, logical_device,
                                               _real_creator,
                                               IPUMirroredVariable,
                                               IPUSyncOnReadVariable, *args,
                                               **kwargs)
Example #26
0
 def testGetEager(self):
   with ops.device("/device:CPU:0"):
     one = constant_op.constant(1)
     two = constant_op.constant(2)
     device_map = values.ReplicaDeviceMap(("/device:CPU:0", "/device:GPU:0"))
     v = values.DistributedValues(device_map, (one, two))
     self.assertEqual(two, v.get("/device:GPU:0"))
     self.assertEqual(one, v.get())
     with self.assertRaises(ValueError):
       self.assertIsNone(v.get("/device:GPU:2"))
Example #27
0
    def __init__(self, container_strategy, ipu_device, cpu_device):
        super().__init__(container_strategy)
        self._ipu_device = ipu_device
        self._cpu_device = cpu_device

        device_map = values.ReplicaDeviceMap([self._cpu_device])

        worker_device_pairs = [("", [self._cpu_device])]
        self._input_workers = input_lib.InputWorkers(device_map,
                                                     worker_device_pairs)
Example #28
0
 def _test_dataset(self, dataset_fn, worker_devices, devices,
                   expected_values):
   device_map = values.ReplicaDeviceMap(devices)
   input_workers = values.InputWorkers(device_map, worker_devices)
   multi_worker_dataset = values.MultiWorkerDataset(
       dataset_fn, input_workers)
   multi_worker_iterator = multi_worker_dataset.make_initializable_iterator()
   with self.cached_session() as sess:
     sess.run(multi_worker_iterator.initializer)
     self._test_iterator(sess, multi_worker_iterator, devices, expected_values)
Example #29
0
  def testVariableOnAnotherDevice(self):
    v = variable_scope.get_variable(
        name="v", initializer=[1.], use_resource=True)
    device_map = values.ReplicaDeviceMap(("/job:foo/device:CPU:0",))
    mirrored = values.MirroredVariable(None, device_map, (v,),
                                       variable_scope.VariableAggregation.MEAN)

    self.assertEqual(v.name, mirrored.name)
    self.assertEqual(v.dtype, mirrored.dtype)
    self.assertEqual(v.shape, mirrored.shape)
Example #30
0
 def testFetchAMirroredVariable(self, distribution):
   with self.session(graph=ops.Graph()) as sess, distribution.scope():
     with ops.device("/device:GPU:0"):
       v = variable_scope.get_variable(
           name="v", initializer=1., use_resource=True)
     mirrored = values.MirroredVariable(
         distribution, values.ReplicaDeviceMap(("/device:GPU:0",)), (v,),
         variable_scope.VariableAggregation.MEAN)
     sess.run(variables_lib.global_variables_initializer())
     sess.run({"complicated": mirrored})