Ejemplo n.º 1
0
 def _apply_fn(dataset):
     options = dataset_ops.Options()
     options.experimental_autotune = False
     opt_options = optimization_options.OptimizationOptions()
     opt_options.apply_default_optimizations = False
     options.experimental_optimization = opt_options
     return _CopyToDeviceDataset(
         dataset, target_device=target_device,
         source_device=source_device).with_options(options)
Ejemplo n.º 2
0
 def testOptionsHaveDefaults(self):
     options1 = dataset_ops.Options()
     options2 = dataset_ops.Options()
     self.assertIsNot(options1.experimental_optimization,
                      options2.experimental_optimization)
     self.assertIsNot(options1.threading, options2.threading)
     self.assertEqual(options1.experimental_optimization,
                      optimization_options.OptimizationOptions())
     self.assertEqual(options1.threading,
                      threading_options.ThreadingOptions())
Ejemplo n.º 3
0
 def _make_dataset(node_names):
     dataset = base_dataset.apply(optimization.assert_next(node_names))
     dataset = dataset.map(map_fn, num_parallel_calls)
     dataset = dataset.batch(100)
     options = dataset_ops.Options()
     opt_options = optimization_options.OptimizationOptions()
     opt_options.map_and_batch_fusion = False
     options.experimental_optimization = opt_options
     dataset = dataset.with_options(options)
     return dataset
Ejemplo n.º 4
0
    def testOptimizationDisableDefault(self):
        """Tests that we can disable all static optimizations enabled by default.

    If the `apply_default_optimizations` optimization options flag is False,
    only explicitly enabled optimizations will be applied.
    """
        options = dataset_ops.Options()
        opt_options = optimization_options.OptimizationOptions()
        opt_options.hoist_random_uniform = True
        opt_options.apply_default_optimizations = False
        options.experimental_optimization = opt_options
        expected_optimizations = ["hoist_random_uniform"]
        self.assertEqual(options._static_optimizations(),
                         expected_optimizations)
    def _compare(self, input_dataset, map_fn, batch_size, input_size, str_id):
        num_elems = int(np.sum([np.prod(x) for x in input_size]))
        name_template = "{}__batch_size_{}_input_element_size_{}_{}"

        base_dataset = input_dataset.map(map_fn).batch(batch_size)

        options = dataset_ops.Options()
        opt_options = optimization_options.OptimizationOptions()
        # Disable default map_and_batch_fusion optimization
        opt_options.map_and_batch_fusion = False
        options.experimental_optimization = opt_options
        base_dataset = base_dataset.with_options(options)

        unoptimized_op = dataset_ops.make_one_shot_iterator(
            base_dataset).get_next()

        optimized_options = dataset_ops.Options()
        opt_options = optimization_options.OptimizationOptions()
        opt_options.map_vectorization = True
        optimized_options.experimental_optimization = opt_options
        optimized = base_dataset.with_options(optimized_options)
        optimized_op = dataset_ops.make_one_shot_iterator(optimized).get_next()

        unoptimized_time = self._run(unoptimized_op,
                                     name=name_template.format(
                                         str_id, batch_size, num_elems,
                                         "unoptimized"))
        optimized_time = self._run(optimized_op,
                                   name=name_template.format(
                                       str_id, batch_size, num_elems,
                                       "optimized"))

        print("Batch size: {}\n"
              "Input element size: {}\n"
              "Transformation: {}\n"
              "Speedup: {}\n".format(batch_size, input_size, str_id,
                                     (unoptimized_time / optimized_time)))
Ejemplo n.º 6
0
    def testSkipEagerOptimizationWithCapturedRefVar(self, dataset_fn):
        """Tests that default optimizations are disabled with ref variables."""
        variable = variable_scope.get_variable("v",
                                               initializer=0,
                                               use_resource=False)
        assign_op = variable.assign_add(1)

        unoptimized_dataset = dataset_fn(variable)

        options = dataset_ops.Options()
        opt_options = optimization_options.OptimizationOptions()
        opt_options.noop_elimination = True
        opt_options.map_and_batch_fusion = True
        options.experimental_optimization = opt_options
        optimized_dataset = unoptimized_dataset.with_options(options)

        # Check that warning is logged.
        warnings.simplefilter("always")
        with warnings.catch_warnings(record=True) as w:
            optimized_it = optimized_dataset.make_initializable_iterator()

        self.assertGreaterEqual(len(w), 1)
        expected = (
            "tf.data static optimizations are not compatible with "
            "tf.Variable. The following optimizations will be disabled: %s."
            " To enable optimizations, use resource variables instead by "
            "calling `tf.enable_resource_variables()` at the start of the "
            "program." % (", ".join(opt_options._static_optimizations())))
        self.assertTrue(any([expected in str(warning) for warning in w]))

        # Check that outputs are the same in the optimized and unoptimized cases,
        # when the variable value is changing.
        unoptimized_it = unoptimized_dataset.make_initializable_iterator()
        with ops.control_dependencies([assign_op]):
            unoptimized_output = unoptimized_it.get_next()
            optimized_output = optimized_it.get_next()

        self.evaluate(variable.initializer)
        self.evaluate((unoptimized_it.initializer, optimized_it.initializer))
        while True:
            try:
                unoptimized, optimized = self.evaluate(
                    (unoptimized_output, optimized_output))
                self.assertEqual(unoptimized, optimized)
            except errors.OutOfRangeError:
                break
Ejemplo n.º 7
0
    def _get_test_datasets(self,
                           base_dataset,
                           map_fn,
                           num_parallel_calls=None,
                           expect_optimized=True):
        """Given base dataset and map fn, creates test datasets.

    Returns a tuple of (unoptimized dataset, optimized dataset). The
    unoptimized dataset has the assertion that Batch follows Map. The optimized
    dataset has the assertion that Map follows Batch, and has the
    "map_vectorization" optimization applied.

    Args:
      base_dataset: Input dataset to map->batch
      map_fn: Map function to use
      num_parallel_calls: (Optional.) num_parallel_calls argument for map
      expect_optimized: (Optional.) Whether we expect the optimization to take
        place, in which case we will assert that Batch is followed by Map,
        otherwise Map followed by Batch. Defaults to True.

    Returns:
      Tuple of (unoptimized dataset, optimized dataset).
    """
        map_node_name = "Map" if num_parallel_calls is None else "ParallelMap"

        def _make_dataset(node_names):
            dataset = base_dataset.apply(optimization.assert_next(node_names))
            dataset = dataset.map(map_fn, num_parallel_calls)
            dataset = dataset.batch(100)
            options = dataset_ops.Options()
            opt_options = optimization_options.OptimizationOptions()
            opt_options.map_and_batch_fusion = False
            options.experimental_optimization = opt_options
            dataset = dataset.with_options(options)
            return dataset

        unoptimized = _make_dataset([map_node_name, "Batch"])
        optimized = _make_dataset(["Batch", map_node_name] if expect_optimized
                                  else [map_node_name, "Batch"])
        options = dataset_ops.Options()
        opt_options = optimization_options.OptimizationOptions()
        opt_options.map_vectorization = True
        options.experimental_optimization = opt_options
        optimized = optimized.with_options(options)
        return unoptimized, optimized
Ejemplo n.º 8
0
    def testOptimizationNestedDatasetWithModifiedRetval(self):
        def flat_map_fn(_):
            dataset = dataset_ops.Dataset.from_tensors(0)
            dataset = dataset.apply(optimization.assert_next(["MapAndBatch"]))
            # Should be fused by map and batch fusion
            dataset = dataset.map(lambda x: x)
            dataset = dataset.batch(1)
            return dataset

        dataset = dataset_ops.Dataset.range(1)
        dataset = dataset.flat_map(flat_map_fn)

        # TODO(b/120558523): We use Options instead of _OptimizeDataset directly
        # here because of a bug with chaining _OptimizeDatasets when there are
        # nested dataset functions
        options = dataset_ops.Options()
        opt_options = optimization_options.OptimizationOptions()
        opt_options.map_and_batch_fusion = True
        options.experimental_optimization = opt_options
        dataset = dataset.with_options(options)
        self.assertDatasetProduces(dataset, expected_output=[[0]])
Ejemplo n.º 9
0
    def __init__(self,
                 dataset,
                 devices,
                 max_buffer_size=1,
                 prefetch_buffer_size=1,
                 source_device="/cpu:0"):
        """Constructs a MultiDeviceIterator.

    Args:
      dataset: The input dataset to be iterated over.
      devices: The list of devices to fetch data to.
      max_buffer_size: Maximum size of the host side per device buffer to keep.
      prefetch_buffer_size: if > 1, then we setup a buffer on each device
        to prefetch into.
      source_device: The host device to place the `dataset` on.

    Raises:
      RuntimeError: If run in Eager mode.
    """
        if context.executing_eagerly():
            # TODO(rohanj): Fix this. Tracking bug: b/116467184
            raise RuntimeError(
                "MultiDeviceIterator is not currently supported in "
                "Eager mode.")
        self._dataset = dataset._apply_options()  # pylint: disable=protected-access
        self._devices = devices
        self._source_device = source_device
        self._source_device_tensor = ops.convert_to_tensor(source_device)

        # Create the MultiDeviceIterator.
        with ops.device(self._source_device):
            self._multi_device_iterator_resource = (
                gen_dataset_ops.multi_device_iterator(
                    devices=self._devices,
                    shared_name="",
                    container="",
                    **dataset_ops.flat_structure(dataset)))

            # The incarnation ID is used to ensure consistency between the per-device
            # iterators and the multi-device iterator.
            self._incarnation_id = gen_dataset_ops.multi_device_iterator_init(
                self._dataset._as_variant_tensor(),  # pylint: disable=protected-access
                self._multi_device_iterator_resource,
                max_buffer_size=max_buffer_size)

        # TODO(rohanj): Explore the possibility of the MultiDeviceIterator to
        # initialize the device side of the pipeline. This would allow the
        # MultiDeviceIterator to choose, for example, to move some transformations
        # into the device side from its input. It might be useful in rewriting.
        # Create the per device iterators.
        self._device_iterators = []
        for i, device in enumerate(self._devices):
            ds = _PerDeviceGenerator(i, self._multi_device_iterator_resource,
                                     self._incarnation_id,
                                     self._source_device_tensor, device,
                                     dataset._element_structure)  # pylint: disable=protected-access
            if prefetch_buffer_size > 0:
                ds = ds.prefetch(prefetch_buffer_size)
            # TODO(jsimsa): Enable auto-tuning and optimizations when supported for
            # non-CPU devices.
            options = dataset_ops.Options()
            options.experimental_autotune = False
            opt_options = optimization_options.OptimizationOptions()
            opt_options.apply_default_optimizations = False
            options.experimental_optimization = opt_options
            ds = ds.with_options(options)
            with ops.device(device):
                self._device_iterators.append(ds.make_initializable_iterator())

        device_iterator_initializers = [
            iterator.initializer for iterator in self._device_iterators
        ]
        self._initializer = control_flow_ops.group(
            *device_iterator_initializers)