Esempio n. 1
0
    def testOneDevicePerWorker(self, input_type, api_type, iteration_type,
                               enable_get_next_as_optional):
        if tf2.enabled():
            dataset_fn = lambda _: dataset_ops.DatasetV2.range(4)
        else:
            dataset_fn = lambda _: dataset_ops.Dataset.range(4)
        dataset_or_input_fn = self._create_dataset_or_input_fn(
            input_type, dataset_fn)

        strategy = mirrored_strategy.MirroredStrategy(
            devices=(self._cpu_devices()[0][1] + self._cpu_devices()[1][1]),
            cross_device_ops=cross_device_ops_lib.MultiWorkerAllReduce(
                ["/job:worker/task:0", "/job:worker/task:1"], 1))
        worker_devices = self._cpu_devices()
        with context.graph_mode(), strategy.scope(), self.cached_session(
        ) as sess:

            if input_type == "dataset":
                # Autosharded
                expected_values = [[0, 1], [2, 3]]
            else:
                expected_values = [[0, 0], [1, 1], [2, 2], [3, 3]]
            strategy.extended.experimental_enable_get_next_as_optional = (
                enable_get_next_as_optional)
            self._test_input_iteration(input_type,
                                       api_type,
                                       iteration_type,
                                       dataset_or_input_fn,
                                       worker_devices,
                                       expected_values,
                                       strategy,
                                       sess=sess)
Esempio n. 2
0
  def testDifferentDatasets(self, input_type, api_type, iteration_type):
    def dataset_fn(ctx):
      if ctx.input_pipeline_id == 0:
        return dataset_ops.Dataset.range(8).batch(2)
      else:
        return dataset_ops.Dataset.range(9).batch(2)
    dataset_or_input_fn = self._create_dataset_or_input_fn(
        input_type, dataset_fn)

    strategy = mirrored_strategy.MirroredStrategy(
        devices=(self._cpu_and_one_gpu_devices()[0][1] +
                 self._cpu_and_one_gpu_devices()[1][1]),
        cross_device_ops=cross_device_ops_lib.MultiWorkerAllReduce(
            ["/job:worker/task:0", "/job:worker/task:1"], 2))
    worker_devices = self._cpu_and_one_gpu_devices()
    with context.graph_mode(), strategy.scope(), self.cached_session() as sess:

      expected_values = [[[0, 1], [2, 3], [0, 1], [2, 3]],
                         [[4, 5], [6, 7], [4, 5], [6, 7]], [[], [], [8], []]]
      strategy.extended.experimental_enable_get_next_as_optional = True
      self._test_input_iteration(
          input_type,
          api_type,
          iteration_type,
          dataset_or_input_fn,
          worker_devices,
          expected_values,
          strategy,
          sess=sess)
Esempio n. 3
0
    def testAutoshardingOption(self, input_type, api_type, iteration_type,
                               auto_shard_policy):
        ds_option = dataset_ops.Options()
        ds_option.experimental_distribute.auto_shard_policy = auto_shard_policy
        if tf2.enabled():
            dataset_fn = (lambda _: dataset_ops.DatasetV2.range(4).
                          with_options(ds_option))
        else:
            dataset_fn = (
                lambda _: dataset_ops.Dataset.range(4).with_options(ds_option))
        dataset_or_input_fn = self._create_dataset_or_input_fn(
            input_type, dataset_fn)

        strategy = mirrored_strategy.MirroredStrategy(
            devices=(self._cpu_devices()[0][1] + self._cpu_devices()[1][1]),
            cross_device_ops=cross_device_ops_lib.MultiWorkerAllReduce(
                ["/job:worker/task:0", "/job:worker/task:1"], 1))
        worker_devices = self._cpu_devices()
        with context.graph_mode(), self.cached_session() as sess:
            if auto_shard_policy == AutoShardPolicy.AUTO:
                expected_values = [[0, 1], [2, 3]]
            else:
                expected_values = [[0, 0], [1, 1], [2, 2], [3, 3]]
            self._test_input_iteration(input_type, api_type, iteration_type,
                                       dataset_or_input_fn, worker_devices,
                                       expected_values, strategy, sess)
Esempio n. 4
0
  def testUnevenDatasetBatches(self, input_type, api_type, iteration_type):
    strategy = mirrored_strategy.MirroredStrategy(
        devices=(self._cpu_and_one_gpu_devices()[0][1] +
                 self._cpu_and_one_gpu_devices()[1][1]),
        cross_device_ops=cross_device_ops_lib.MultiWorkerAllReduce(
            ["/job:worker/task:0", "/job:worker/task:1"], 2))
    if tf2.enabled():
      dataset_fn = lambda _: dataset_ops.DatasetV2.range(9).batch(2)
    else:
      dataset_fn = lambda _: dataset_ops.Dataset.range(9).batch(2)
    dataset_or_input_fn = self._create_dataset_or_input_fn(
        input_type, dataset_fn)

    worker_devices = self._cpu_and_one_gpu_devices()
    with context.graph_mode(), strategy.scope(), self.cached_session() as sess:
      if input_type == "dataset":
        # Autosharded
        expected_values = [[[0, 1], [4, 5], [2, 3], [6, 7]], [[8], [], [], []]]
      else:
        expected_values = [[[0, 1], [2, 3], [0, 1], [2, 3]],
                           [[4, 5], [6, 7], [4, 5], [6, 7]], [[8], [], [8], []]]
      strategy.extended.experimental_enable_get_next_as_optional = True
      self._test_input_iteration(
          input_type,
          api_type,
          iteration_type,
          dataset_or_input_fn,
          worker_devices,
          expected_values,
          strategy,
          sess=sess)
Esempio n. 5
0
  def _initialize_multi_worker(self, devices):
    """Initializes the object for multi-worker training."""
    self._local_mode = False

    assert devices, "Must specify at least one device."
    assert len(set(devices)) == len(devices), (
        "No duplicates allowed in `devices` argument.")
    # TODO(josh11b): Require at least 2 devices?
    self._devices = [device_util.resolve(d) for d in devices]
    self._canonical_device_set = set(self._devices)
    self._device_index = values.PerReplica(
        {d: i for i, d in enumerate(devices)})

    device_dict = _group_device_list(devices)
    self._workers = []
    self._worker_devices = []
    for job in ["chief", "worker"]:
      for task in range(len(device_dict.get(job, []))):
        worker = "/job:%s/task:%d" % (job, task)
        self._workers.append(worker)
        self._worker_devices.append((worker, device_dict[job][task]))

    # Setting `_default_device` will add a device scope in the
    # distribution.scope. We set the default device to the first worker. When
    # users specify device under distribution.scope by
    #   with tf.device("/cpu:0"):
    #     ...
    # their ops will end up on the cpu device of its first worker, e.g.
    # "/job:worker/task:0/device:CPU:0". Note this is not used in replica mode.
    self._default_device = self._workers[0]

    self._inferred_cross_device_ops = cross_device_ops_lib.MultiWorkerAllReduce(
        self._workers, _infer_num_gpus_per_worker(self._devices))
    def _configure(self,
                   session_config=None,
                   cluster_spec=None,
                   task_type=None,
                   task_id=None):
        del task_type, task_id

        if session_config:
            session_config.CopyFrom(self._update_config_proto(session_config))

        if cluster_spec:
            self._initialize_multi_worker(self._num_gpus, cluster_spec)

        if self._cross_device_ops is None:
            if self._cluster_spec:
                # It currently cannot detect the toplogy of remote workers. So we
                # hard-code the multi-worker all-reduce algorithm for now.
                if len(self._workers) == 1:
                    # The default is "nccl".
                    self._cross_device_ops = (
                        cross_device_ops_lib.AllReduceCrossDeviceOps())
                else:
                    # The default is hierarchical reduce and broadcast.
                    self._cross_device_ops = cross_device_ops_lib.MultiWorkerAllReduce(
                        self._workers, self._num_gpus)
            else:
                self._cross_device_ops = cross_device_ops_lib.choose_the_best(
                    self._devices, session_config=session_config)
Esempio n. 7
0
    def testTwoDevicesPerWorker(self, input_type, api_type, iteration_type,
                                enable_get_next_as_optional):
        strategy = mirrored_strategy.MirroredStrategy(
            devices=(self._cpu_and_one_gpu_devices()[0][1] +
                     self._cpu_and_one_gpu_devices()[1][1]),
            cross_device_ops=cross_device_ops_lib.MultiWorkerAllReduce(
                ["/job:worker/task:0", "/job:worker/task:1"], 2))
        worker_devices = self._cpu_and_one_gpu_devices()
        with context.graph_mode(), strategy.scope(), self.cached_session(
        ) as sess:
            if tf2.enabled():
                dataset_fn = lambda _: dataset_ops.DatasetV2.range(4)
            else:
                dataset_fn = lambda _: dataset_ops.Dataset.range(4)

            if input_type == "dataset":
                # Autosharded
                expected_values = [[0, 2, 1, 3]]
            else:
                expected_values = [[0, 1, 0, 1], [2, 3, 2, 3]]
            self._test_input_iteration(
                input_type,
                api_type,
                iteration_type,
                dataset_fn,
                worker_devices,
                expected_values,
                strategy,
                sess=sess,
                enable_get_next_as_optional=enable_get_next_as_optional)
    def _initialize_multi_worker(self, devices):
        """Initializes the object for multi-worker training."""
        self._local_mode = False

        assert devices, "Must specify at least one device."
        devices = tuple(device_util.resolve(d) for d in devices)
        assert len(set(devices)) == len(devices), (
            "No duplicates allowed in `devices` argument: %s" % devices)
        # TODO(josh11b): Require at least 2 devices?

        device_dict = _group_device_list(devices)
        workers = []
        worker_devices = []
        for job in ("chief", "worker"):
            for task in range(len(device_dict.get(job, []))):
                worker = "/job:%s/task:%d" % (job, task)
                workers.append(worker)
                worker_devices.append((worker, device_dict[job][task]))

        # Setting `_default_device` will add a device scope in the
        # distribution.scope. We set the default device to the first worker. When
        # users specify device under distribution.scope by
        #   with tf.device("/cpu:0"):
        #     ...
        # their ops will end up on the cpu device of its first worker, e.g.
        # "/job:worker/task:0/device:CPU:0". Note this is not used in replica mode.
        self._default_device = workers[0]
        self._host_input_device = numpy_dataset.SingleDevice(workers[0])

        self._device_map = values.ReplicaDeviceMap(devices)
        self._input_workers = input_lib.InputWorkers(self._device_map,
                                                     worker_devices)
        self._inferred_cross_device_ops = cross_device_ops_lib.MultiWorkerAllReduce(
            workers, _infer_num_gpus_per_worker(devices))
Esempio n. 9
0
class MultiWorkerCrossDeviceOpsTest(multi_worker_test_base.MultiWorkerTestBase,
                                    CrossDeviceOpsTestBase):

  worker_devices = [
      "/job:worker/replica:0/task:0", "/job:worker/replica:0/task:1"
  ]
  multi_worker_allreduce_combinations = combinations.combine(
      cross_device_ops=[
          combinations.NamedObject(
              "MultiWorkerAllReduce",
              cross_device_ops_lib.MultiWorkerAllReduce(worker_devices, 2,
                                                        ("pscpu/pscpu", 2, -1),
                                                        0)),
          combinations.NamedObject(
              "MultiWorkerAllReducePack",
              cross_device_ops_lib.MultiWorkerAllReduce(worker_devices, 2,
                                                        ("pscpu/pscpu", 2, -1),
                                                        1)),
          combinations.NamedObject(
              "MultiWorkerAllReduceMultipleSpecs",
              cross_device_ops_lib.MultiWorkerAllReduce(
                  worker_devices, 2, [("pscpu/pscpu", 2, 100),
                                      ("xring", 2, -1)], 0)),
      ],
      devices=[
          [
              "/job:worker/replica:0/task:0/device:CPU:0",
              "/job:worker/replica:0/task:1/device:CPU:0"
          ],
          [
              "/job:worker/replica:0/task:0/device:GPU:0",
              "/job:worker/replica:0/task:1/device:GPU:0"
          ],
          [
              "/job:worker/replica:0/task:0/device:GPU:0",
              "/job:worker/replica:0/task:0/device:GPU:1",
              "/job:worker/replica:0/task:1/device:GPU:0",
              "/job:worker/replica:0/task:1/device:GPU:1"
          ],
      ],
      mode=["graph"])

  @combinations.generate(multi_worker_allreduce_combinations)
  def testReductionAndBroadcast(self, cross_device_ops, devices):
    # Mimic the default device of multi-worker strategies.
    with ops.device("/job:worker/replica:0/task:0"):
      self._testReductionAndBroadcast(cross_device_ops, devices)
Esempio n. 10
0
def get_strategy_object(strategy_cls):
    if strategy_cls == mirrored_strategy.MirroredStrategy:
        return strategy_cls(
            mirrored_strategy.all_local_devices(),
            cross_device_ops=cross_device_ops_lib.MultiWorkerAllReduce(
                ['/job:worker/task:0', '/job:worker/task:1'],
                context.num_gpus()))
    else:
        # CollectiveAllReduceStrategy and ParameterServerStrategy.
        return strategy_cls()
Esempio n. 11
0
    def testTupleDataset(self, input_type, api_type, iteration_type,
                         enable_get_next_as_optional):
        strategy = mirrored_strategy.MirroredStrategy(
            devices=(self._cpu_devices()[0][1] + self._cpu_devices()[1][1]),
            cross_device_ops=cross_device_ops_lib.MultiWorkerAllReduce(
                ["/job:worker/task:0", "/job:worker/task:1"], 1))
        worker_devices = self._cpu_devices()

        def dataset_fn(ctx):
            del ctx
            if tf2.enabled():
                dataset1 = dataset_ops.DatasetV2.range(4)
                dataset2 = dataset_ops.DatasetV2.range(4).map(lambda x: x**2)
                return dataset_ops.DatasetV2.zip((dataset1, dataset2))
            else:
                dataset1 = dataset_ops.Dataset.range(4)
                dataset2 = dataset_ops.Dataset.range(4).map(lambda x: x**2)
                return dataset_ops.Dataset.zip((dataset1, dataset2))

        dataset_or_input_fn = self._create_dataset_or_input_fn(
            input_type, dataset_fn)

        with context.graph_mode(), strategy.scope(), self.cached_session(
        ) as sess:

            if input_type == "dataset":
                # Autosharded
                expected_values = [[(0, 0), (1, 1)], [(2, 4), (3, 9)]]
            else:
                expected_values = [[(i, i**2), (i, i**2)] for i in range(0, 4)]
            strategy.extended.experimental_enable_get_next_as_optional = (
                enable_get_next_as_optional)
            self._test_input_iteration(input_type,
                                       api_type,
                                       iteration_type,
                                       dataset_or_input_fn,
                                       worker_devices,
                                       expected_values,
                                       strategy,
                                       sess=sess)
Esempio n. 12
0
 def _make_cross_device_ops(self, num_gpus_per_worker):
   return cross_device_ops_lib.MultiWorkerAllReduce(
       ["/job:worker/task:0", "/job:worker/task:1", "/job:worker/task:2"],
       num_gpus_per_worker)
Esempio n. 13
0
 def _make_cross_device_ops(self):
   return cross_device_ops_lib.MultiWorkerAllReduce(
       ["/job:chief/task:0", "/job:worker/task:0", "/job:worker/task:1"],
       context.num_gpus())
Esempio n. 14
0
      updated_var_values = self.evaluate(mock_model.variables)
      # All variables start at 1.0 and get two updates of 0.25.
      self.assertAllEqual(0.5 * np.ones([10, 1]), updated_var_values[0])
      self.assertAllEqual([0.5], updated_var_values[1])


@combinations.generate(
    combinations.combine(
        distribution=[
            combinations.NamedDistribution(
                "Mirrored",
                # pylint: disable=g-long-lambda
                lambda: mirrored_strategy.MirroredStrategy(
                    devices=mirrored_strategy.all_local_devices(),
                    cross_device_ops=cross_device_ops_lib.MultiWorkerAllReduce([
                        "/job:worker/task:0", "/job:worker/task:1"
                    ], context.num_gpus())),
                required_gpus=1)
        ],
        mode=["graph"]))
class MultiWorkerMirroredStrategyTest(
    multi_worker_test_base.MultiWorkerTestBase,
    strategy_test_lib.DistributionTestBase):

  def _configure_distribution_strategy(self, distribution):
    cluster_spec = server_lib.ClusterSpec({
        "worker": ["/job:worker/task:0", "/job:worker/task:1"]
    })
    distribution.configure(cluster_spec=cluster_spec)

  def test_num_replicas_in_sync(self, distribution):
Esempio n. 15
0
class MultiWorkerCrossDeviceOpsTest(multi_worker_test_base.MultiWorkerTestBase,
                                    CrossDeviceOpsTestBase):

    worker_devices = [
        "/job:worker/replica:0/task:0", "/job:worker/replica:0/task:1"
    ]
    multi_worker_allreduce_combinations = combinations.combine(
        cross_device_ops=[
            combinations.NamedObject(
                "MultiWorkerAllReduce",
                cross_device_ops_lib.MultiWorkerAllReduce(
                    worker_devices, 2, ("pscpu/pscpu", 2, -1), 0, 0, 0)),
            combinations.NamedObject(
                "MultiWorkerAllReducePack",
                cross_device_ops_lib.MultiWorkerAllReduce(
                    worker_devices, 2, ("pscpu/pscpu", 2, -1), 1, 0, 0)),
            combinations.NamedObject(
                "MultiWorkerAllReduceAggregation",
                cross_device_ops_lib.MultiWorkerAllReduce(
                    worker_devices, 2, ("pscpu/pscpu", 2, -1), 0, 100, 10)),
            combinations.NamedObject(
                "MultiWorkerAllReduceMultipleSpecs",
                cross_device_ops_lib.MultiWorkerAllReduce(
                    worker_devices, 2, [("pscpu/pscpu", 2, 100),
                                        ("xring", 2, -1)], 0, 0, 0)),
        ],
        distribution=[
            combinations.NamedDistribution(
                "MirroredCPU",
                lambda: mirrored_strategy.MirroredStrategy(num_gpus_per_worker=
                                                           0),
                required_gpus=0),
            combinations.NamedDistribution(
                "Mirrored1GPU",
                lambda: mirrored_strategy.MirroredStrategy(num_gpus_per_worker=
                                                           1),
                required_gpus=1),
            combinations.NamedDistribution(
                "Mirrored2GPUs",
                lambda: mirrored_strategy.MirroredStrategy(num_gpus_per_worker=
                                                           2),
                required_gpus=2),
            # pylint: disable=g-long-lambda
            combinations.NamedDistribution(
                "CoreMirroredCPU",
                lambda: mirrored_strategy.CoreMirroredStrategy(
                    ["/device:CPU:0"]),
                required_gpus=0),
            combinations.NamedDistribution(
                "CoreMirrored1GPU",
                lambda: mirrored_strategy.CoreMirroredStrategy(
                    ["/device:GPU:0"]),
                required_gpus=1),
            combinations.NamedDistribution(
                "CoreMirrored2GPUs",
                lambda: mirrored_strategy.CoreMirroredStrategy(
                    ["/device:GPU:0", "/device:GPU:1"]),
                required_gpus=2),
        ],
        mode=["graph"])

    @combinations.generate(multi_worker_allreduce_combinations)
    def testReductionAndBroadcast(self, cross_device_ops, distribution):
        distribution.configure(
            cluster_spec={
                "worker": [
                    "/job:worker/replica:0/task:0",
                    "/job:worker/replica:0/task:1"
                ]
            })
        with distribution.scope():
            self._testReductionAndBroadcast(cross_device_ops, distribution)