def testMapAndBatch(self): dataset = dataset_ops.Dataset.range(100) dataset = dataset.apply( testing.assert_next(["MapAndBatch", "Prefetch", "FiniteTake"])) dataset = dataset.map(lambda x: x + 1, num_parallel_calls=dataset_ops.AUTOTUNE) dataset = dataset.batch(10) dataset = dataset.take(5) dataset = self._enable_autotune_buffers(dataset) self.assertDatasetProduces( dataset, [list(range(i + 1, i + 11)) for i in range(0, 50, 10)])
def testParallelInterleave(self): dataset = dataset_ops.Dataset.range(100) dataset = dataset.apply( testing.assert_next( ["ParallelInterleave", "Prefetch", "FiniteTake"])) dataset = dataset.interleave( lambda x: dataset_ops.Dataset.from_tensors(x + 1), num_parallel_calls=dataset_ops.AUTOTUNE) dataset = dataset.take(50) dataset = self._enable_autotune_buffers(dataset) self.assertDatasetProduces(dataset, range(1, 51))
def testOptionsGraphRoundTripOptimization(self, map_parallelization): dataset = dataset_ops.Dataset.range(6) options = options_lib.Options() options.experimental_optimization.map_parallelization = ( map_parallelization) dataset = dataset.with_options(options) dataset = self.graphRoundTrip(dataset) expected = "ParallelMap" if map_parallelization else "Map" dataset = dataset.apply(testing.assert_next([expected])) dataset = dataset.map(lambda x: x*x) self.assertDatasetProduces(dataset, expected_output=[0, 1, 4, 9, 16, 25])
def testOptimizationDisableIntraOpParallelism(self, dataset_fn, expected_output, apply_autotune): dataset = dataset_fn() dataset = dataset.apply(testing.assert_next(["MaxIntraOpParallelism"])) if apply_autotune is not None: options = dataset_ops.Options() options.experimental_optimization.autotune = apply_autotune dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, expected_output=expected_output)
def testMapAndBatchFusion(self): dataset = dataset_ops.Dataset.range(10).apply( testing.assert_next(["MapAndBatch" ])).map(lambda x: x * x).batch(10) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.map_and_batch_fusion = True dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, expected_output=[[x * x for x in range(10)]])
def testOptimizationDisableIntraOpParallelism(self, dataset_fn, expected_output): os.environ["TF_DATA_EXPERIMENT_OPT_IN"] = "disable_intra_op_parallelism" os.environ["TF_JOB_NAME"] = "test_job" dataset = dataset_fn() dataset = dataset.apply(testing.assert_next(["MaxIntraOpParallelism"])) self.assertDatasetProduces(dataset, expected_output=expected_output) del os.environ["TF_DATA_EXPERIMENT_OPT_IN"] del os.environ["TF_JOB_NAME"]
def testCapturedConstant(self): captured_t = constant_op.constant(42, dtype=dtypes.int64) def fn(x): return x + captured_t dataset = dataset_ops.Dataset.range(5).apply( testing.assert_next(["ParallelMap"])).map(fn) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.map_parallelization = True dataset = dataset.with_options(options) self.assertDatasetProduces( dataset, expected_output=[x + 42 for x in range(5)])
def testParallelMap(self): dataset = dataset_ops.Dataset.range(100) parallel_map = "ParallelMap" if compat.forward_compatible(2020, 3, 6): parallel_map = "ParallelMapV2" dataset = dataset.apply( testing.assert_next([parallel_map, "Prefetch", "FiniteTake"])) dataset = dataset.map(lambda x: x + 1, num_parallel_calls=dataset_ops.AUTOTUNE) dataset = dataset.take(50) dataset = self._enable_autotune_buffers(dataset) self.assertDatasetProduces(dataset, range(1, 51))
def testFilterWithRandomUniformFusion(self): dataset = dataset_ops.Dataset.range(10000000).apply( testing.assert_next(["Sampling"])) dataset = dataset.filter(lambda _: random_ops.random_uniform([]) < 0.05) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.filter_with_random_uniform_fusion = True dataset = dataset.with_options(options) get_next = self.getNext(dataset) self.evaluate(get_next())
def testAutotuneOption(self, apply_autotune): next_nodes = ["ParallelMap"] if (apply_autotune is not False) else ["Map"] # pylint: disable=g-bool-id-comparison dataset = dataset_ops.Dataset.range(4).apply( testing.assert_next(next_nodes)).map(lambda x: x + 2) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.map_parallelization = True if apply_autotune is not None: options.experimental_optimization.autotune = apply_autotune dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, expected_output=[2, 3, 4, 5])
def testAutotuneOption(self): dataset = dataset_ops.Dataset.from_tensors(0) dataset = dataset.map(lambda x: x).apply(testing.assert_next(["Root"])) options = options_lib.Options() options.experimental_optimization.apply_default_optimizations = False options.autotune.enabled = True dataset = dataset.with_options(options) get_next = self.getNext(dataset) self.assertEqual(0, self.evaluate(get_next())) with self.assertRaises(errors.OutOfRangeError): self.evaluate(get_next())
def testAssertNextShort(self): dataset = dataset_ops.Dataset.from_tensors(0).apply( testing.assert_next(["Map", "Whoops"])).map(lambda x: x) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.autotune = False dataset = dataset.with_options(options) self.assertDatasetProduces( dataset, expected_error=( errors.InvalidArgumentError, "Asserted next 2 transformations but encountered only 1."))
def testOptimizationGraduatedExperiments(self, apply_autotune): dataset = dataset_ops.Dataset.range(6) dataset = dataset.apply( testing.assert_next(["MaxIntraOpParallelism", "PrivateThreadPool"])) if apply_autotune is not None: options = dataset_ops.Options() options.experimental_optimization.autotune = apply_autotune dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, expected_output=list(range(6)))
def testFileShardingWithLegacyRebatch(self): # Tests that RebatchDatasetV1 is a passthrough op. self._setUpFiles(num_files=5, num_records_per_file=10) dataset = dataset_ops.Dataset.list_files(self.test_filenames, shuffle=False) dataset = dataset.apply( testing.assert_next(["Shard", "FlatMap", "Batch", "Rebatch"])) dataset = dataset.flat_map(core_readers.TFRecordDataset) dataset = dataset.batch(5) dataset = distribute._LegacyRebatchDataset(dataset, num_replicas=5) dataset = distribute._AutoShardDataset(dataset, 5, 3) expected = [[self._record(3, i)] for i in range(10)] self.assertDatasetProduces(dataset, expected)
def testAssertNextInvalid(self): dataset = dataset_ops.Dataset.from_tensors(0).apply( testing.assert_next(["Whoops"])).map(lambda x: x) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False dataset = dataset.with_options(options) self.assertDatasetProduces( dataset, expected_error=( errors.InvalidArgumentError, "Asserted Whoops transformation at offset 0 but encountered " "Map transformation instead."))
def testOptimizationMapParallelization(self, autotune, set_env): if set_env: os.environ["TF_DATA_EXPERIMENT_OPT_IN"] = "map_parallelization" os.environ["TF_JOB_NAME"] = "test_job" dataset = dataset_ops.Dataset.range(5) if autotune and set_env: dataset = dataset.apply(testing.assert_next(["ParallelMap"])) else: dataset = dataset.apply(testing.assert_next(["Map"])) dataset = dataset.map(lambda x: x + 1) options = dataset_ops.Options() options.experimental_optimization.autotune = autotune dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, expected_output=list(range(1, 6))) if set_env: del os.environ["TF_DATA_EXPERIMENT_OPT_IN"] del os.environ["TF_JOB_NAME"]
def testShardWithRebatch(self): # Tests that RebatchDatasetV2 is a passthrough op. dataset = dataset_ops.Dataset.list_files(self.test_filenames, shuffle=False) dataset = dataset.apply( testing.assert_next(["Shard", "FlatMap", "Batch", "Rebatch"])) dataset = dataset.flat_map(core_readers.TFRecordDataset) dataset = dataset.batch(5) dataset = distribute._RebatchDataset(dataset, batch_sizes=5) dataset = distribute._AutoShardDataset(dataset, 5, 3) nxt = self.getNext(dataset) self.evaluate(nxt())
def testDebugModeSequentialExecution(self): ds = dataset_ops.Dataset.range(10) ds = ds.apply( testing.assert_next(["Interleave", "Map", "Batch", "FiniteTake"])) ds = ds.interleave(lambda x: dataset_ops.Dataset.from_tensors(x), cycle_length=10, num_parallel_calls=10) ds = ds.map(lambda x: x * x, num_parallel_calls=10) ds = ds.batch(batch_size=5, num_parallel_calls=2) ds = ds.prefetch(buffer_size=2) ds = ds.take(2) self.assertDatasetProduces(ds, [[0, 1, 4, 9, 16], [25, 36, 49, 64, 81]])
def testOptionsGraphRoundTripOptimization(self, map_parallelization): if not tf_compat.forward_compatible(2021, 4, 25): self.skipTest("Behavior is currently not supported") dataset = dataset_ops.Dataset.range(6) options = dataset_ops.Options() options.experimental_optimization.map_parallelization = ( map_parallelization) dataset = dataset.with_options(options) dataset = self.graphRoundTrip(dataset) expected = "ParallelMap" if map_parallelization else "Map" dataset = dataset.apply(testing.assert_next([expected])) dataset = dataset.map(lambda x: x*x) self.assertDatasetProduces(dataset, expected_output=[0, 1, 4, 9, 16, 25])
def testOptimizationInjectPrefetch(self, autotune, set_env): if set_env: os.environ["TF_DATA_EXPERIMENT_OPT_IN"] = "inject_prefetch" os.environ["TF_JOB_NAME"] = "test_job" dataset = dataset_ops.Dataset.range(5) dataset = dataset.map(lambda x: x + 1, num_parallel_calls=dataset_ops.AUTOTUNE) if autotune and set_env: dataset = dataset.apply(testing.assert_next(["Prefetch"])) else: dataset = dataset.apply(testing.assert_next(["Root"])) options = options_lib.Options() options.autotune.enabled = autotune dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, expected_output=list(range(1, 6))) if set_env: del os.environ["TF_DATA_EXPERIMENT_OPT_IN"] del os.environ["TF_JOB_NAME"]
def testFileShardingWithRebatch(self): # Tests that RebatchDatasetV2 is a passthrough op. self._setUpFiles(num_files=3, num_records_per_file=5) dataset = dataset_ops.Dataset.list_files(self.test_filenames, shuffle=False) dataset = dataset.apply( testing.assert_next(["Shard", "FlatMap", "Batch", "Rebatch"])) dataset = dataset.flat_map(core_readers.TFRecordDataset) dataset = dataset.batch(5) dataset = distribute._RebatchDataset(dataset, batch_sizes=[2, 1, 2]) dataset = distribute._AutoShardDataset(dataset, 3, 1) expected = [[self._record(1, 0), self._record(1, 1)], [self._record(1, 2)], [self._record(1, 3), self._record(1, 4)]] self.assertDatasetProduces(dataset, expected)
def testOptimizationDisableIntraOpParallelism(self): os.environ[ "TF_DATA_EXPERIMENT_OPT_IN"] = "disable_intra_op_parallelism" os.environ["TF_JOB_NAME"] = "test_job" dataset = dataset_ops.Dataset.range(10) dataset = dataset.apply(testing.assert_next(["MaxIntraOpParallelism"])) options = dataset_ops.Options() dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, expected_output=list(range(10))) del os.environ["TF_DATA_EXPERIMENT_OPT_IN"] del os.environ["TF_JOB_NAME"]
def testCapturedInputs(self): a = constant_op.constant(3, dtype=dtypes.int64) b = constant_op.constant(4, dtype=dtypes.int64) some_tensor = math_ops.mul(a, b) # We currently do not support functions with captured inputs. dataset = dataset_ops.Dataset.range(1).apply( testing.assert_next(["Map", "Map" ])).map(lambda x: some_tensor).map(lambda x: x) options = options_lib.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.map_fusion = True dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, expected_output=[some_tensor])
def test_stateful_ops_map_with_random_ops(self): with test_util.deterministic_ops(): def map_fn(x): return x + random_ops.random_uniform( (), 0, 2, dtype=dtypes.int64, seed=1) dataset = dataset_ops.Dataset.range(5) dataset = dataset.apply(testing.assert_next(["Map", "ParallelMap"])) dataset = dataset.map(map_fn, num_parallel_calls=5) get_next = self.getNext(dataset, requires_initialization=True) for i in range(5): self.assertIn(self.evaluate(get_next()), [i, i + 1])
def testParallelInterleave(self): dataset = dataset_ops.Dataset.range(100) parallel_interleave = "ParallelInterleaveV2" if compat.forward_compatible(2020, 2, 20): parallel_interleave = "ParallelInterleaveV3" dataset = dataset.apply( testing.assert_next( [parallel_interleave, "Prefetch", "FiniteTake"])) dataset = dataset.interleave( lambda x: dataset_ops.Dataset.from_tensors(x + 1), num_parallel_calls=dataset_ops.AUTOTUNE) dataset = dataset.take(50) dataset = self._enable_autotune_buffers(dataset) self.assertDatasetProduces(dataset, range(1, 51))
def testCapturedVariable(self): captured_t = variables.Variable(42, dtype=dtypes.int64) def fn(x): return x + captured_t dataset = dataset_ops.Dataset.range(5).apply( testing.assert_next(["Map"])).map(fn) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.map_parallelization = True dataset = dataset.with_options(options) self.evaluate(variables.global_variables_initializer()) self.assertDatasetProduces( dataset, expected_output=[x + 42 for x in range(5)], requires_initialization=True)
def testNoopElimination(self): a = constant_op.constant(1, dtype=dtypes.int64) b = constant_op.constant(2, dtype=dtypes.int64) some_tensor = math_ops.mul(a, b) dataset = dataset_ops.Dataset.range(5) dataset = dataset.apply( testing.assert_next( ["FiniteRepeat", "FiniteSkip", "Prefetch", "MemoryCacheImpl"])) dataset = dataset.repeat(some_tensor).skip(5).take(-1).skip(0).repeat( 1).prefetch(0).prefetch(1).cache() options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.noop_elimination = True dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, expected_output=range(5))
def testSimpleReorderingV2(self): dataset = dataset_ops.Dataset.range(100) dataset = dataset.apply( testing.assert_next([ "FiniteSkip", "FiniteTake", "Shard", "ParallelMap", "Prefetch" ])) dataset = dataset.map(lambda x: x + 1, num_parallel_calls=10) dataset = dataset.skip(10) dataset = dataset.prefetch(1) dataset = dataset.take(50) dataset = dataset.shard(2, 0) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.reorder_data_discarding_ops = True dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, range(11, 61, 2))
def testCapturedInputs(self): a = constant_op.constant(1, dtype=dtypes.float32) b = constant_op.constant(0, dtype=dtypes.float32) some_tensor = math_ops.mul(a, b) def random_with_capture(_): return some_tensor + random_ops.random_uniform( [], minval=1, maxval=10, dtype=dtypes.float32, seed=42) dataset = dataset_ops.Dataset.range(5).apply( testing.assert_next(["Zip[0]", "Map"])).map(random_with_capture) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.hoist_random_uniform = True dataset = dataset.with_options(options) self._testDataset(dataset)
def testCopyToDeviceHostOptimizations(self): host_dataset = dataset_ops.Dataset.range(10) host_dataset = host_dataset.apply(testing.assert_next(["MapAndBatch"])) host_dataset = host_dataset.map(lambda x: x * x).batch(10) device_dataset = host_dataset.apply( prefetching_ops.copy_to_device("/cpu:1")) with ops.device("/cpu:1"): iterator = dataset_ops.make_one_shot_iterator(device_dataset) next_element = iterator.get_next() worker_config = config_pb2.ConfigProto(device_count={"CPU": 2}) with self.test_session(config=worker_config): self.assertAllEqual([x * x for x in range(10)], self.evaluate(next_element)) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element)