def __init__(self, dataset, devices, max_buffer_size=1, prefetch_buffer_size=1, source_device="/cpu:0"): """Constructs a MultiDeviceIterator. Args: dataset: The input dataset to be iterated over. devices: The list of devices to fetch data to. max_buffer_size: Maximum size of the host side per device buffer to keep. prefetch_buffer_size: if > 1, then we setup a buffer on each device to prefetch into. source_device: The host device to place the `dataset` on. Raises: RuntimeError: If run in Eager mode. """ if context.executing_eagerly(): # TODO(rohanj): Fix this. Tracking bug: b/116467184 raise RuntimeError( "MultiDeviceIterator is not currently supported in " "Eager mode.") self._dataset = dataset self._devices = devices self._source_device = source_device self._source_device_tensor = ops.convert_to_tensor(source_device) self._flat_output_shapes = nest.flatten( sparse.as_dense_shapes(self._dataset.output_shapes, self._dataset.output_classes)) self._flat_output_types = nest.flatten( sparse.as_dense_types(self._dataset.output_types, self._dataset.output_classes)) # Create the MultiDeviceIterator. with ops.device(self._source_device): self._multi_device_iterator_resource = ( gen_dataset_ops.multi_device_iterator( devices=self._devices, shared_name="", container="", output_types=self._flat_output_types, output_shapes=self._flat_output_shapes)) # The incarnation ID is used to ensure consistency between the per-device # iterators and the multi-device iterator. self._incarnation_id = gen_dataset_ops.multi_device_iterator_init( self._dataset._as_variant_tensor(), # pylint: disable=protected-access self._multi_device_iterator_resource, max_buffer_size=max_buffer_size) # TODO(rohanj): Explore the possibility of the MultiDeviceIterator to # initialize the device side of the pipeline. This would allow the # MultiDeviceIterator to choose, for example, to move some transformations # into the device side from its input. It might be useful in rewriting. # Create the per device iterators. self._device_iterators = [] i = 0 for device in self._devices: ds = _PerDeviceGenerator(i, self._multi_device_iterator_resource, self._incarnation_id, self._source_device_tensor, device, self._dataset.output_shapes, self._dataset.output_types, self._dataset.output_classes) if prefetch_buffer_size > 0: ds = ds.prefetch(prefetch_buffer_size) with ops.device(device): self._device_iterators.append(ds.make_initializable_iterator()) i += 1 device_iterator_initializers = [ iterator.initializer for iterator in self._device_iterators ] self._initializer = control_flow_ops.group( *device_iterator_initializers)
def __init__(self, dataset, devices, max_buffer_size=1, prefetch_buffer_size=1, source_device="/cpu:0"): """Constructs a MultiDeviceIterator. Args: dataset: The input dataset to be iterated over. devices: The list of devices to fetch data to. max_buffer_size: Maximum size of the host side per device buffer to keep. prefetch_buffer_size: if > 1, then we setup a buffer on each device to prefetch into. source_device: The host device to place the `dataset` on. In order to prevent deadlocks, if the prefetch_buffer_size is greater than the max_buffer_size, we set the max_buffer_size to prefetch_buffer_size. """ options = dataset_ops.Options() options.experimental_distribute.num_devices = len(devices) dataset = dataset.with_options(options) self._dataset = dataset._apply_options() # pylint: disable=protected-access self._experimental_slack = dataset.options().experimental_slack self._devices = devices self._source_device = source_device self._source_device_tensor = ops.convert_to_tensor(source_device) self._max_buffer_size = max_buffer_size self._prefetch_buffer_size = prefetch_buffer_size if self._prefetch_buffer_size > self._max_buffer_size: self._max_buffer_size = self._prefetch_buffer_size # Create the MultiDeviceIterator. with ops.device(self._source_device): # TODO(b/121378567): Get rid of this shared_name hack. shared_name = "" if context.executing_eagerly(): shared_name = context.shared_name() self._multi_device_iterator_resource = ( gen_dataset_ops.multi_device_iterator( devices=self._devices, shared_name=shared_name, container="", **self._dataset._flat_structure)) # pylint: disable=protected-access if context.executing_eagerly(): # Delete the resource when this object is deleted self._resource_deleter = resource_variable_ops.EagerResourceDeleter( handle=self._multi_device_iterator_resource, handle_device=self._source_device) # The incarnation ID is used to ensure consistency between the per-device # iterators and the multi-device iterator. self._incarnation_id = gen_dataset_ops.multi_device_iterator_init( self._dataset._variant_tensor, # pylint: disable=protected-access self._multi_device_iterator_resource, max_buffer_size=self._max_buffer_size) self._prototype_device_datasets = [] for i, device in enumerate(self._devices): with ops.device(device): ds = _PerDeviceGenerator(i, self._multi_device_iterator_resource, self._incarnation_id, self._source_device_tensor, self._dataset.element_spec) self._prototype_device_datasets.append(ds) # TODO(rohanj): Explore the possibility of the MultiDeviceIterator to # initialize the device side of the pipeline. This would allow the # MultiDeviceIterator to choose, for example, to move some transformations # into the device side from its input. It might be useful in rewriting. # Create the per device iterators. self._device_iterators = [] for i, device in enumerate(self._devices): with ops.device(device): ds = _create_device_dataset(self._prototype_device_datasets[i], self._incarnation_id, self._prefetch_buffer_size, self._experimental_slack) if context.executing_eagerly(): self._device_iterators.append( dataset_ops.make_one_shot_iterator(ds)) else: self._device_iterators.append( dataset_ops.make_initializable_iterator(ds)) if not context.executing_eagerly(): device_iterator_initializers = [ iterator.initializer for iterator in self._device_iterators ] self._initializer = control_flow_ops.group( *device_iterator_initializers)
def __init__(self, dataset, devices, max_buffer_size=1, prefetch_buffer_size=1, source_device="/cpu:0"): """Constructs a MultiDeviceIterator. Args: dataset: The input dataset to be iterated over. devices: The list of devices to fetch data to. max_buffer_size: Maximum size of the host side per device buffer to keep. prefetch_buffer_size: if > 1, then we setup a buffer on each device to prefetch into. source_device: The host device to place the `dataset` on. Raises: RuntimeError: If run in Eager mode. """ self._dataset = dataset._apply_options() # pylint: disable=protected-access self._devices = devices self._source_device = source_device self._source_device_tensor = ops.convert_to_tensor(source_device) # Create the MultiDeviceIterator. with ops.device(self._source_device): # TODO(b/121378567): Get rid of this shared_name hack. shared_name = "" if context.executing_eagerly(): shared_name = context.shared_name() self._multi_device_iterator_resource = ( gen_dataset_ops.multi_device_iterator( devices=self._devices, shared_name=shared_name, container="", **dataset_ops.flat_structure(dataset))) if context.executing_eagerly(): # Delete the resource when this object is deleted self._resource_deleter = resource_variable_ops.EagerResourceDeleter( handle=self._multi_device_iterator_resource, handle_device=self._source_device) # The incarnation ID is used to ensure consistency between the per-device # iterators and the multi-device iterator. self._incarnation_id = gen_dataset_ops.multi_device_iterator_init( self._dataset._variant_tensor, # pylint: disable=protected-access self._multi_device_iterator_resource, max_buffer_size=max_buffer_size) # TODO(rohanj): Explore the possibility of the MultiDeviceIterator to # initialize the device side of the pipeline. This would allow the # MultiDeviceIterator to choose, for example, to move some transformations # into the device side from its input. It might be useful in rewriting. # Create the per device iterators. self._device_iterators = [] for i, device in enumerate(self._devices): with ops.device(device): ds = _PerDeviceGenerator(i, self._multi_device_iterator_resource, self._incarnation_id, self._source_device_tensor, dataset._element_structure) # pylint: disable=protected-access if prefetch_buffer_size > 0: ds = ds.prefetch(prefetch_buffer_size) # TODO(jsimsa): Enable auto-tuning and optimizations when supported for # non-CPU devices. options = dataset_ops.Options() options.experimental_autotune = False options.experimental_optimization.apply_default_optimizations = False ds = ds.with_options(options) if context.executing_eagerly(): self._device_iterators.append( dataset_ops.make_one_shot_iterator(ds)) else: self._device_iterators.append( dataset_ops.make_initializable_iterator(ds)) if not context.executing_eagerly(): device_iterator_initializers = [ iterator.initializer for iterator in self._device_iterators ] self._initializer = control_flow_ops.group( *device_iterator_initializers)
def __init__(self, dataset, devices, max_buffer_size=1, prefetch_buffer_size=1, source_device="/cpu:0"): """Constructs a MultiDeviceIterator. Args: dataset: The input dataset to be iterated over. devices: The list of devices to fetch data to. max_buffer_size: Maximum size of the host side per device buffer to keep. prefetch_buffer_size: if > 1, then we setup a buffer on each device to prefetch into. source_device: The host device to place the `dataset` on. Raises: RuntimeError: If run in Eager mode. """ if context.executing_eagerly(): # TODO(rohanj): Fix this. Tracking bug: b/116467184 raise RuntimeError("MultiDeviceIterator is not currently supported in " "Eager mode.") self._dataset = dataset._apply_options() # pylint: disable=protected-access self._devices = devices self._source_device = source_device self._source_device_tensor = ops.convert_to_tensor(source_device) self._flat_output_shapes = nest.flatten( sparse.as_dense_shapes(self._dataset.output_shapes, self._dataset.output_classes)) self._flat_output_types = nest.flatten( sparse.as_dense_types(self._dataset.output_types, self._dataset.output_classes)) # Create the MultiDeviceIterator. with ops.device(self._source_device): self._multi_device_iterator_resource = ( gen_dataset_ops.multi_device_iterator( devices=self._devices, shared_name="", container="", output_types=self._flat_output_types, output_shapes=self._flat_output_shapes)) # The incarnation ID is used to ensure consistency between the per-device # iterators and the multi-device iterator. self._incarnation_id = gen_dataset_ops.multi_device_iterator_init( self._dataset._as_variant_tensor(), # pylint: disable=protected-access self._multi_device_iterator_resource, max_buffer_size=max_buffer_size) # TODO(rohanj): Explore the possibility of the MultiDeviceIterator to # initialize the device side of the pipeline. This would allow the # MultiDeviceIterator to choose, for example, to move some transformations # into the device side from its input. It might be useful in rewriting. # Create the per device iterators. self._device_iterators = [] i = 0 for device in self._devices: ds = _PerDeviceGenerator( i, self._multi_device_iterator_resource, self._incarnation_id, self._source_device_tensor, device, self._dataset.output_shapes, self._dataset.output_types, self._dataset.output_classes) if prefetch_buffer_size > 0: ds = ds.prefetch(prefetch_buffer_size) with ops.device(device): self._device_iterators.append(ds.make_initializable_iterator()) i += 1 device_iterator_initializers = [ iterator.initializer for iterator in self._device_iterators ] self._initializer = control_flow_ops.group(*device_iterator_initializers)
def __init__(self, dataset, devices, max_buffer_size=1, prefetch_buffer_size=1, source_device="/cpu:0"): """Constructs a MultiDeviceIterator. Args: dataset: The input dataset to be iterated over. devices: The list of devices to fetch data to. max_buffer_size: Maximum size of the host side per device buffer to keep. prefetch_buffer_size: if > 1, then we setup a buffer on each device to prefetch into. source_device: The host device to place the `dataset` on. Raises: RuntimeError: If run in Eager mode. """ self._dataset = dataset._apply_options() # pylint: disable=protected-access self._devices = devices self._source_device = source_device self._source_device_tensor = ops.convert_to_tensor(source_device) # Create the MultiDeviceIterator. with ops.device(self._source_device): # TODO(b/121378567): Get rid of this shared_name hack. shared_name = "" if context.executing_eagerly(): # Ensure a unique name when eager execution is enabled to avoid spurious # sharing issues. shared_name += str(ops.uid()) self._multi_device_iterator_resource = ( gen_dataset_ops.multi_device_iterator( devices=self._devices, shared_name=shared_name, container="", **dataset_ops.flat_structure(dataset))) if context.executing_eagerly(): # Delete the resource when this object is deleted self._resource_deleter = resource_variable_ops.EagerResourceDeleter( handle=self._multi_device_iterator_resource, handle_device=self._source_device) # The incarnation ID is used to ensure consistency between the per-device # iterators and the multi-device iterator. self._incarnation_id = gen_dataset_ops.multi_device_iterator_init( self._dataset._variant_tensor, # pylint: disable=protected-access self._multi_device_iterator_resource, max_buffer_size=max_buffer_size) # TODO(rohanj): Explore the possibility of the MultiDeviceIterator to # initialize the device side of the pipeline. This would allow the # MultiDeviceIterator to choose, for example, to move some transformations # into the device side from its input. It might be useful in rewriting. # Create the per device iterators. self._device_iterators = [] for i, device in enumerate(self._devices): with ops.device(device): ds = _PerDeviceGenerator( i, self._multi_device_iterator_resource, self._incarnation_id, self._source_device_tensor, dataset._element_structure) # pylint: disable=protected-access if prefetch_buffer_size > 0: ds = ds.prefetch(prefetch_buffer_size) # TODO(jsimsa): Enable auto-tuning and optimizations when supported for # non-CPU devices. options = dataset_ops.Options() options.experimental_autotune = False options.experimental_optimization.apply_default_optimizations = False ds = ds.with_options(options) if context.executing_eagerly(): self._device_iterators.append(dataset_ops.make_one_shot_iterator(ds)) else: self._device_iterators.append( dataset_ops.make_initializable_iterator(ds)) if not context.executing_eagerly(): device_iterator_initializers = [ iterator.initializer for iterator in self._device_iterators ] self._initializer = control_flow_ops.group(*device_iterator_initializers)
def __init__(self, dataset, devices, max_buffer_size=1, prefetch_buffer_size=1, source_device="/cpu:0"): """Constructs a MultiDeviceIterator. Args: dataset: The input dataset to be iterated over. devices: The list of devices to fetch data to. max_buffer_size: Maximum size of the host side per device buffer to keep. prefetch_buffer_size: if > 1, then we setup a buffer on each device to prefetch into. source_device: The host device to place the `dataset` on. In order to prevent deadlocks, if the prefetch_buffer_size is greater than the max_buffer_size, we set the max_buffer_size to prefetch_buffer_size. """ options = dataset_ops.Options() options.experimental_distribute.num_devices = len(devices) dataset = dataset.with_options(options) self._dataset = dataset._apply_options() # pylint: disable=protected-access self._experimental_slack = dataset.options().experimental_slack self._devices = devices self._source_device = source_device self._source_device_tensor = ops.convert_to_tensor(source_device) self._max_buffer_size = max_buffer_size self._prefetch_buffer_size = prefetch_buffer_size if self._prefetch_buffer_size > self._max_buffer_size: self._max_buffer_size = self._prefetch_buffer_size # Create the MultiDeviceIterator. with ops.device(self._source_device): # TODO(b/121378567): Get rid of this shared_name hack. shared_name = "" if context.executing_eagerly(): shared_name = context.shared_name() self._multi_device_iterator_resource = ( gen_dataset_ops.multi_device_iterator( devices=self._devices, shared_name=shared_name, container="", **dataset_ops.flat_structure(self._dataset))) if context.executing_eagerly(): # Delete the resource when this object is deleted self._resource_deleter = resource_variable_ops.EagerResourceDeleter( handle=self._multi_device_iterator_resource, handle_device=self._source_device) # The incarnation ID is used to ensure consistency between the per-device # iterators and the multi-device iterator. self._incarnation_id = gen_dataset_ops.multi_device_iterator_init( self._dataset._variant_tensor, # pylint: disable=protected-access self._multi_device_iterator_resource, max_buffer_size=self._max_buffer_size) self._prototype_device_datasets = [] for i, device in enumerate(self._devices): with ops.device(device): ds = _PerDeviceGenerator( i, self._multi_device_iterator_resource, self._incarnation_id, self._source_device_tensor, self._dataset._element_structure) # pylint: disable=protected-access self._prototype_device_datasets.append(ds) # TODO(rohanj): Explore the possibility of the MultiDeviceIterator to # initialize the device side of the pipeline. This would allow the # MultiDeviceIterator to choose, for example, to move some transformations # into the device side from its input. It might be useful in rewriting. # Create the per device iterators. self._device_iterators = [] for i, device in enumerate(self._devices): with ops.device(device): ds = self._create_device_dataset(i) if context.executing_eagerly(): self._device_iterators.append(dataset_ops.make_one_shot_iterator(ds)) else: self._device_iterators.append( dataset_ops.make_initializable_iterator(ds)) if not context.executing_eagerly(): device_iterator_initializers = [ iterator.initializer for iterator in self._device_iterators ] self._initializer = control_flow_ops.group(*device_iterator_initializers)