def _assert_indexed_slices_equal(self, left, right): self.assertIsInstance(left, ops.IndexedSlices) self.assertIsInstance(right, ops.IndexedSlices) self.assertEqual(device_util.resolve(left.device), device_util.resolve(right.device)) self.assertAllEqual(self.evaluate(ops.convert_to_tensor(left)), self.evaluate(ops.convert_to_tensor(right)))
def _get_devices_from(destinations): if isinstance(destinations, value_lib.DistributedValues): return list(destinations.devices) elif isinstance(destinations, six.string_types): return [device_util.resolve(destinations)] else: return [device_util.resolve(destination) for destination in destinations]
def get_devices_from(destinations): if isinstance(destinations, value_lib.DistributedValues): return list(destinations.devices) elif isinstance(destinations, six.string_types): return [device_util.resolve(destinations)] else: return [device_util.resolve(destination) for destination in destinations]
def _assert_indexed_slices_equal(self, left, right): self.assertIsInstance(left, ops.IndexedSlices) self.assertIsInstance(right, ops.IndexedSlices) self.assertEqual(device_util.resolve(left.device), device_util.resolve(right.device)) self.assertAllEqual( self.evaluate(ops.convert_to_tensor(left)), self.evaluate(ops.convert_to_tensor(right)))
def testResolveWithDeviceScope(self): with ops.device("/gpu:0"): self.assertEqual(device_util.resolve("/job:worker/task:1/cpu:0"), "/job:worker/replica:0/task:1/device:CPU:0") self.assertEqual(device_util.resolve("/job:worker/task:1"), "/job:worker/replica:0/task:1/device:GPU:0") with ops.device("/job:worker"): self.assertEqual(device_util.resolve("/cpu:0"), "/job:worker/replica:0/task:0/device:CPU:0")
def testCopyTensor(self): with ops.device("/cpu:0"): t = constant_op.constant([[1., 2.], [0, 0], [3., 4.]]) destination = "/gpu:0" result = cross_tower_utils.copy_tensor_or_indexed_slices_to_device( t, destination) self._assert_values_equal(t, result) self.assertEqual(device_util.resolve(destination), device_util.resolve(result.device))
def get_devices_from(destinations): if isinstance(destinations, value_lib.DistributedValues): return list(destinations.devices) elif isinstance(destinations, (resource_variable_ops.ResourceVariable, value_lib.AggregatingVariable)): return [destinations.device] elif isinstance(destinations, six.string_types): return [device_util.resolve(destinations)] else: return [device_util.resolve(destination) for destination in destinations]
def testCopyTensor(self): with ops.device("/cpu:0"): t = constant_op.constant([[1., 2.], [0, 0], [3., 4.]]) destination = "/gpu:0" result = cross_device_utils.copy_tensor_or_indexed_slices_to_device( t, destination) self._assert_values_equal(t, result) self.assertEqual(device_util.resolve(destination), device_util.resolve(result.device))
def testResolveWithDeviceScope(self): with ops.device("/gpu:0"): self.assertEqual( device_util.resolve("/job:worker/task:1/cpu:0"), "/job:worker/replica:0/task:1/device:CPU:0") self.assertEqual( device_util.resolve("/job:worker/task:1"), "/job:worker/replica:0/task:1/device:GPU:0") with ops.device("/job:worker"): self.assertEqual( device_util.resolve("/cpu:0"), "/job:worker/replica:0/task:0/device:CPU:0")
def _get_devices_from(self, colocate_with=None): if colocate_with is None: return self._devices elif isinstance(colocate_with, values.DistributedValues): # pylint: disable=protected-access return list(colocate_with._index.keys()) elif isinstance(colocate_with, six.string_types): return [device_util.resolve(colocate_with)] elif isinstance(colocate_with, list): return [device_util.resolve(d) for d in colocate_with] else: return colocate_with
def get_devices_from(destinations): if isinstance(destinations, value_lib.DistributedValues): return list(destinations.devices) elif isinstance(destinations, (resource_variable_ops.ResourceVariable, value_lib.AggregatingVariable)): return [destinations.device] elif isinstance(destinations, six.string_types): return [device_util.resolve(destinations)] elif isinstance(destinations, (list, tuple)): return [device_util.resolve(destination) for destination in destinations] else: return [destinations.device]
def _get_devices_from(self, colocate_with=None): if colocate_with is None: return self._devices elif isinstance(colocate_with, values.DistributedValues): # pylint: disable=protected-access return list(colocate_with._index.keys()) elif isinstance(colocate_with, six.string_types): return [device_util.resolve(colocate_with)] elif isinstance(colocate_with, list): return [device_util.resolve(d) for d in colocate_with] else: return colocate_with
def _initialize_local(self, num_gpus, devices): """Initializes the object for local training.""" self._cluster_spec = None # Convert `num_gpus` into `devices`, shouldn't specify both. if devices is None: if num_gpus is None: num_gpus = context.num_gpus() if num_gpus == 0: devices = ["/device:CPU:0"] else: devices = ["/device:GPU:%d" % d for d in range(num_gpus)] elif num_gpus is not None: raise ValueError( "Must only specify one of `devices` and `num_gpus`.") self._num_gpus = num_gpus # TODO(yuefengz): consider setting the default device. assert devices, "Must specify at least one device." assert len(set(devices)) == len(devices), ( "No duplicates allowed in `devices` argument.") # TODO(josh11b): Require at least 2 devices? self._devices = [device_util.resolve(d) for d in devices] self._canonical_device_set = set(self._devices) self._device_index = values.PerReplica( {d: i for i, d in enumerate(devices)})
def __init__(self, devices=None, num_gpus=None, cross_tower_ops=None, prefetch_on_device=None): super(MirroredStrategy, self).__init__() # Convert `num_gpus` into `devices`, shouldn't specify both. if devices is None: if num_gpus is None: num_gpus = context.num_gpus() devices = ["/device:GPU:%d" % d for d in range(num_gpus)] elif num_gpus is not None: raise ValueError( "Must only specify one of `devices` and `num_gpus`.") assert devices, "Must specify at least one device." assert len(set(devices)) == len(devices), ( "No duplicates allowed in `devices` argument.") # TODO(josh11b): Require at least 2 devices? self._devices = [device_util.resolve(d) for d in devices] self._canonical_device_set = set(self._devices) self._device_index = values.PerDevice( dict((d, i) for i, d in enumerate(devices))) self._cross_tower_ops = cross_tower_ops self._prefetch_on_device = prefetch_on_device
def distribute_dataset(self, dataset_fn): if self._cluster_spec: return values.MultiWorkerDataset( partial(self._call_dataset_fn, dataset_fn), self._worker_device_map, self._prefetch_on_device) else: return values.PerDeviceDataset( self._call_dataset_fn(dataset_fn), self._devices, self._prefetch_on_device, source_device=device_util.resolve("/device:CPU:0"))
def _initialize_multi_worker(self, num_gpus, cluster_spec): """Initializes the object for multi-worker training.""" cluster_spec = multi_worker_util.normalize_cluster_spec(cluster_spec) self._cluster_spec = cluster_spec self._workers = [] for job in ["chief", "worker"]: for task in range(len(cluster_spec.as_dict().get(job, []))): self._workers.append("/job:%s/task:%d" % (job, task)) if num_gpus is None: raise ValueError( "`num_gpus` is required if `cluster_spec` is given.") if num_gpus > 0: self._worker_device_map = { worker: [ device_util.canonicalize(worker + "/device:GPU:%d" % gpu) for gpu in range(num_gpus) ] for worker in self._workers } else: self._worker_device_map = { worker: [device_util.canonicalize(worker, "/device:CPU:0")] for worker in self._workers } devices = nest.flatten(self._worker_device_map) # Setting `_default_device` will add a device scope in the # distribution.scope. We set the default device to the first worker. When # users specify device under distribution.scope by # with tf.device("/cpu:0"): # ... # their ops will end up on the cpu device of its first worker, e.g. # "/job:worker/task:0/device:CPU:0". Note this is not used in tower mode. self._default_device = self._workers[0] assert devices, "Must specify at least one device." assert len(set(devices)) == len(devices), ( "No duplicates allowed in `devices` argument.") # TODO(josh11b): Require at least 2 devices? self._devices = [device_util.resolve(d) for d in devices] self._canonical_device_set = set(self._devices) self._device_index = values.PerDevice( {d: i for i, d in enumerate(devices)})
def _initialize_multi_worker(self, num_gpus, cluster_spec): """Initializes the object for multi-worker training.""" cluster_spec = multi_worker_util.normalize_cluster_spec(cluster_spec) self._cluster_spec = cluster_spec self._workers = [] for job in ["chief", "worker"]: for task in range(len(cluster_spec.as_dict().get(job, []))): self._workers.append("/job:%s/task:%d" % (job, task)) if num_gpus is None: raise ValueError("`num_gpus` is required if `cluster_spec` is given.") if num_gpus > 0: self._worker_device_map = { worker: [ device_util.canonicalize(worker + "/device:GPU:%d" % gpu) for gpu in range(num_gpus) ] for worker in self._workers } else: self._worker_device_map = { worker: [device_util.canonicalize(worker, "/device:CPU:0")] for worker in self._workers } devices = nest.flatten(self._worker_device_map) # Setting `_default_device` will add a device scope in the # distribution.scope. We set the default device to the first worker. When # users specify device under distribution.scope by # with tf.device("/cpu:0"): # ... # their ops will end up on the cpu device of its first worker, e.g. # "/job:worker/task:0/device:CPU:0". Note this is not used in tower mode. self._default_device = self._workers[0] assert devices, "Must specify at least one device." assert len(set(devices)) == len(devices), ( "No duplicates allowed in `devices` argument.") # TODO(josh11b): Require at least 2 devices? self._devices = [device_util.resolve(d) for d in devices] self._canonical_device_set = set(self._devices) self._device_index = values.PerDevice( {d: i for i, d in enumerate(devices)})
def fetch(self, val, destination="/device:CPU:0", fn=lambda x: x): """Return a copy of `val` or `fn(val)` on `destination`. This is useful for getting a mirrored value onto a device. It will attempt to avoid a copy by checking if the value is already on the destination device. Args: val: Value (which may be mirrored) to copy. destination: A device string to copy the value to. fn: An optional function to apply to the value on the source device, before copying. Returns: A `Tensor` on `destination`. """ _require_cross_tower_context(self) assert isinstance(destination, six.string_types) destination = device_util.resolve(destination) return self._fetch(val, destination, fn)
def _initialize_local(self, num_gpus, devices): """Initializes the object for local training.""" self._cluster_spec = None # Convert `num_gpus` into `devices`, shouldn't specify both. if devices is None: if num_gpus is None: num_gpus = context.num_gpus() if num_gpus == 0: devices = ["/device:CPU:0"] else: devices = ["/device:GPU:%d" % d for d in range(num_gpus)] elif num_gpus is not None: raise ValueError("Must only specify one of `devices` and `num_gpus`.") self._num_gpus = num_gpus # TODO(yuefengz): consider setting the default device. assert devices, "Must specify at least one device." assert len(set(devices)) == len(devices), ( "No duplicates allowed in `devices` argument.") # TODO(josh11b): Require at least 2 devices? self._devices = [device_util.resolve(d) for d in devices] self._canonical_device_set = set(self._devices) self._device_index = values.PerDevice({d: i for i, d in enumerate(devices)})
def __init__(self, devices=None, num_gpus=None, cross_tower_ops=None, prefetch_on_device=None): super(MirroredStrategy, self).__init__() # Convert `num_gpus` into `devices`, shouldn't specify both. if devices is None: if num_gpus is None: num_gpus = context.num_gpus() devices = ["/device:GPU:%d" % d for d in range(num_gpus)] elif num_gpus is not None: raise ValueError("Must only specify one of `devices` and `num_gpus`.") assert devices, "Must specify at least one device." assert len(set(devices)) == len(devices), ( "No duplicates allowed in `devices` argument.") # TODO(josh11b): Require at least 2 devices? self._devices = [device_util.resolve(d) for d in devices] self._canonical_device_set = set(self._devices) self._device_index = values.PerDevice( dict((d, i) for i, d in enumerate(devices))) self._cross_tower_ops = cross_tower_ops self._prefetch_on_device = prefetch_on_device
def __init__(self, devices=None, num_gpus=None, cluster_spec=None, cross_tower_ops=None, prefetch_on_device=None): super(MirroredStrategy, self).__init__() if cluster_spec: if devices is not None: raise ValueError( "Specifying devices when `cluster_spec` is also given " "is not supported in MirroredStrategy.") # TODO(yuefengz): use the utility method to normalize cluster_spec. if isinstance(cluster_spec, (dict, cluster_pb2.ClusterDef)): cluster_spec = server_lib.ClusterSpec(cluster_spec) elif not isinstance(cluster_spec, server_lib.ClusterSpec): raise ValueError( "`cluster_spec' should be dict or a `tf.train.ClusterSpec` or a " "`tf.train.ClusterDef` object") self._cluster_spec = cluster_spec self._workers = [] for job in sorted(cluster_spec.jobs): for task in range(cluster_spec.num_tasks(job)): self._workers.append("/job:%s/task:%d" % (job, task)) if num_gpus is None: raise ValueError( "`num_gpus` is required if `cluster_spec` is given.") self._num_gpus = num_gpus if num_gpus > 0: self._worker_device_map = { worker: [ device_util.canonicalize(worker + "/device:GPU:%d" % gpu) for gpu in range(num_gpus) ] for worker in self._workers } else: self._worker_device_map = { worker: [device_util.canonicalize(worker, "/device:CPU:0")] for worker in self._workers } devices = nest.flatten(self._worker_device_map) # Setting `_default_device` will add a device scope in the # distribution.scope. We set the default device to the first worker. When # users specify device under distribution.scope by # with tf.device("/cpu:0"): # ... # their ops will end up on the cpu device of its first worker, e.g. # "/job:worker/task:0/device:CPU:0". Note this is not used in tower mode. self._default_device = self._workers[0] else: self._cluster_spec = None # Convert `num_gpus` into `devices`, shouldn't specify both. if devices is None: if num_gpus is None: num_gpus = context.num_gpus() devices = ["/device:GPU:%d" % d for d in range(num_gpus)] elif num_gpus is not None: raise ValueError( "Must only specify one of `devices` and `num_gpus`.") # TODO(yuefengz): consider setting the default device. assert devices, "Must specify at least one device." assert len(set(devices)) == len(devices), ( "No duplicates allowed in `devices` argument.") # TODO(josh11b): Require at least 2 devices? self._devices = [device_util.resolve(d) for d in devices] self._canonical_device_set = set(self._devices) self._device_index = values.PerDevice( {d: i for i, d in enumerate(devices)}) self._cross_tower_ops = cross_tower_ops self._prefetch_on_device = prefetch_on_device