コード例 #1
0
 def testAggregateTensors(self):
     t0 = constant_op.constant([[1., 2.], [0, 0], [3., 4.]])
     t1 = constant_op.constant([[0., 0.], [5, 6], [7., 8.]])
     total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]])
     result = cross_device_utils.aggregate_tensors_or_indexed_slices(
         [t0, t1])
     self._assert_values_equal(total, result)
コード例 #2
0
    def _test_reduce_indexed_slices(self,
                                    task_type,
                                    task_id,
                                    num_gpus,
                                    communication,
                                    batch_reduce,
                                    variable_length,
                                    local_mode=False):
        collective_all_reduce, devices, master_target = self._get_test_objects(
            task_type,
            task_id,
            num_gpus,
            communication=communication,
            local_mode=local_mode)
        if local_mode:
            num_workers = 1
            worker_device = None
        else:
            num_workers = len(self._cluster_spec.get("chief", [])) + len(
                self._cluster_spec.get("worker", []))
            worker_device = "/job:%s/task:%d" % (task_type, task_id)
        with ops.Graph().as_default(), \
             ops.device(worker_device), \
             self.cached_session(target=master_target) as sess:
            per_replica = self._get_indexed_slices(
                devices, (task_id or 0) * max(num_gpus, 1), variable_length)

            if batch_reduce:
                result = collective_all_reduce.batch_reduce(
                    reduce_util.ReduceOp.SUM, [(per_replica, per_replica)])[0]
            else:
                result = collective_all_reduce.reduce(reduce_util.ReduceOp.SUM,
                                                      per_replica, per_replica)
            if num_gpus > 1:
                self.assertIsInstance(result, value_lib.Mirrored)

            run_options = config_pb2.RunOptions()
            run_options.experimental.collective_graph_key = 7
            if num_gpus > 1:
                result = sess.run(
                    [ops.convert_to_tensor(v) for v in result.values],
                    options=run_options)[0]
            else:
                result = sess.run(ops.convert_to_tensor(result),
                                  options=run_options)

            # Reduce the same indexed slices on CPU locally as our expected results.
            devices_cpu = [(worker_device or "") + "/device:CPU:0"
                           ] * (max(num_gpus, 1) * num_workers)
            per_replica_on_cpu = self._get_indexed_slices(devices_cpu,
                                                          0,
                                                          variable_length,
                                                          as_per_replica=False)
            expected_result = cross_device_utils.aggregate_tensors_or_indexed_slices(
                per_replica_on_cpu)
            expected_result = sess.run(ops.convert_to_tensor(expected_result))

            self.assertAllEqual(expected_result, result)
コード例 #3
0
 def testAggregateIndexedSlices(self):
   t0 = math_ops._as_indexed_slices(
       constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
   t1 = math_ops._as_indexed_slices(
       constant_op.constant([[0., 0.], [5, 6], [7., 8.]]))
   total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]])
   result = cross_device_utils.aggregate_tensors_or_indexed_slices([t0, t1])
   self.assertIsInstance(result, ops.IndexedSlices)
   self._assert_values_equal(total, result)
コード例 #4
0
 def testAggregateIndexedSlices(self):
   t0 = math_ops._as_indexed_slices(
       constant_op.constant([[1., 2.], [0, 0], [3., 4.]]))
   t1 = math_ops._as_indexed_slices(
       constant_op.constant([[0., 0.], [5, 6], [7., 8.]]))
   total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]])
   result = cross_device_utils.aggregate_tensors_or_indexed_slices([t0, t1])
   self.assertIsInstance(result, ops.IndexedSlices)
   self._assert_values_equal(total, result)
コード例 #5
0
  def _test_reduce_indexed_slices(self,
                                  task_type,
                                  task_id,
                                  num_gpus,
                                  batch_reduce,
                                  local_mode=False):
    collective_all_reduce, devices, master_target = self._get_test_objects(
        task_type, task_id, num_gpus, local_mode=local_mode)
    if local_mode:
      num_workers = 1
      worker_device = None
    else:
      num_workers = len(self._cluster_spec.get("chief", [])) + len(
          self._cluster_spec.get("worker", []))
      worker_device = "/job:%s/task:%d" % (task_type, task_id)
    with ops.Graph().as_default(), \
         ops.device(worker_device), \
         self.cached_session(target=master_target) as sess:
      per_replica = self._get_indexed_slices(devices,
                                             (task_id or 0) * max(num_gpus, 1))

      if batch_reduce:
        result = collective_all_reduce.batch_reduce(
            reduce_util.ReduceOp.SUM, [(per_replica, per_replica)])[0]
      else:
        result = collective_all_reduce.reduce(reduce_util.ReduceOp.SUM,
                                              per_replica, per_replica)
      self.assertIsInstance(result, value_lib.Mirrored)

      run_options = config_pb2.RunOptions()
      run_options.experimental.collective_graph_key = 7
      result = sess.run([ops.convert_to_tensor(v) for v in result.values],
                        options=run_options)[0]

      # Reduce the same indexed slices on CPU locally as our expected results.
      devices_cpu = [(worker_device or "") + "/device:CPU:0"] * (
          max(num_gpus, 1) * num_workers)
      per_replica_on_cpu = self._get_indexed_slices(
          devices_cpu, 0, as_per_replica=False)
      expected_result = cross_device_utils.aggregate_tensors_or_indexed_slices(
          per_replica_on_cpu)
      expected_result = sess.run(ops.convert_to_tensor(expected_result))

      self.assertAllEqual(expected_result, result)
      return True
コード例 #6
0
def _simple_reduce(per_replica_value, reduce_to_device, accumulation_fn,
                   reduce_op):
  # pylint: disable=g-missing-docstring
  all_values = per_replica_value.values
  if not all_values:
    raise ValueError("`per_replica_value` must be non-empty")
  count = len(all_values)

  with ops.device(reduce_to_device):
    with context.device_policy(context.DEVICE_PLACEMENT_SILENT):
      reduced = cross_device_utils.aggregate_tensors_or_indexed_slices(
          all_values, accumulation_fn)
      if reduce_op == reduce_util.ReduceOp.MEAN:
        reduced = cross_device_utils.divide_by_n_tensors_or_indexed_slices(
            reduced, count)
      elif reduce_op != reduce_util.ReduceOp.SUM:
        raise ValueError("`reduce_op` must be Reduce.SUM or Reduce.MEAN.")
  return reduced
コード例 #7
0
def _simple_reduce(per_replica_value, reduce_to_device, accumulation_fn,
                   reduce_op):
  # pylint: disable=g-missing-docstring
  all_values = per_replica_value.values
  if not all_values:
    raise ValueError("`per_replica_value` must be non-empty")
  count = len(all_values)

  with ops.device(reduce_to_device):
    with context.device_policy(context.DEVICE_PLACEMENT_SILENT):
      reduced = cross_device_utils.aggregate_tensors_or_indexed_slices(
          all_values, accumulation_fn)
      if reduce_op == reduce_util.ReduceOp.MEAN:
        reduced = cross_device_utils.divide_by_n_tensors_or_indexed_slices(
            reduced, count)
      elif reduce_op != reduce_util.ReduceOp.SUM:
        raise ValueError("`reduce_op` must be Reduce.SUM or Reduce.MEAN.")
  return reduced
コード例 #8
0
 def testAggregateTensors(self):
   t0 = constant_op.constant([[1., 2.], [0, 0], [3., 4.]])
   t1 = constant_op.constant([[0., 0.], [5, 6], [7., 8.]])
   total = constant_op.constant([[1., 2.], [5, 6], [10., 12.]])
   result = cross_device_utils.aggregate_tensors_or_indexed_slices([t0, t1])
   self._assert_values_equal(total, result)