def testOptimizationNonSerializable(self): dataset = dataset_ops.Dataset.from_tensors(0) dataset = dataset.apply(optimization.assert_next(["FiniteSkip"])) dataset = dataset.skip(0) # Should not be removed by noop elimination dataset = dataset.apply(optimization.non_serializable()) dataset = dataset.apply(optimization.assert_next(["MemoryCacheImpl"])) dataset = dataset.skip(0) # Should be removed by noop elimination dataset = dataset.cache() dataset = dataset_ops._OptimizeDataset(dataset, ["noop_elimination"]) self.assertDatasetProduces(dataset, expected_output=[0])
def testOptimizationNonSerializable(self): dataset = dataset_ops.Dataset.from_tensors(0) dataset = dataset.apply(optimization.assert_next(["FiniteSkip"])) dataset = dataset.skip(0) # Should not be removed by noop elimination dataset = dataset.apply(optimization.non_serializable()) dataset = dataset.apply(optimization.assert_next(["MemoryCacheImpl"])) dataset = dataset.skip(0) # Should be removed by noop elimination dataset = dataset.cache() options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.noop_elimination = True dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, expected_output=[0])
def testFilterFusion(self, map_function, predicates): dataset = dataset_ops.Dataset.range(5).apply( optimization.assert_next(["Map", "Filter", "MemoryCacheImpl"])).map(map_function) for predicate in predicates: dataset = dataset.filter(predicate) dataset = dataset.cache() options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.filter_fusion = True dataset = dataset.with_options(options) expected_output = [] for x in range(5): r = map_function(x) filtered = False for predicate in predicates: if isinstance(r, tuple): b = predicate(*r) # Pass tuple as multiple arguments. else: b = predicate(r) if not self.evaluate(b): filtered = True break if not filtered: expected_output.append(r) self.assertDatasetProduces(dataset, expected_output=expected_output)
def flat_map_fn(_): dataset = dataset_ops.Dataset.from_tensors(0) dataset = dataset.apply(optimization.assert_next(["MapAndBatch"])) # Should be fused by map and batch fusion dataset = dataset.map(lambda x: x) dataset = dataset.batch(1) return dataset
def testNoopElimination(self): a = constant_op.constant(1, dtype=dtypes.int64) b = constant_op.constant(2, dtype=dtypes.int64) some_tensor = math_ops.mul(a, b) dataset = dataset_ops.Dataset.range(5) dataset = dataset.apply( optimization.assert_next( ["FiniteRepeat", "FiniteSkip", "Prefetch", "Prefetch"])) dataset = dataset.repeat(some_tensor).skip(5).prefetch(0).take(-1).skip( 0).repeat(1).prefetch(0) options = dataset_ops.Options() options.experimental_noop_elimination = True dataset = dataset.with_options(options) iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() with self.test_session() as sess: for x in range(5): result = sess.run(get_next) self.assertAllEqual(result, x) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next)
def testFilterFusion(self, map_function, predicates): dataset = dataset_ops.Dataset.range(5).apply( optimization.assert_next(["Map", "Filter", "MemoryCacheImpl"])).map(map_function) for predicate in predicates: dataset = dataset.filter(predicate) dataset = dataset.cache() options = dataset_ops.Options() options.experimental_filter_fusion = True dataset = dataset.with_options(options) iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() with self.cached_session() as sess: for x in range(5): r = map_function(x) filtered = False for predicate in predicates: if isinstance(r, tuple): b = predicate(*r) # Pass tuple as multiple arguments. else: b = predicate(r) if not sess.run(b): filtered = True break if not filtered: result = sess.run(get_next) self.assertAllEqual(r, result) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next)
def testMapFusion(self, functions): dataset = dataset_ops.Dataset.range(5).apply( optimization.assert_next(["Map", "MemoryCacheImpl"])) for function in functions: dataset = dataset.map(function) dataset = dataset.cache() options = dataset_ops.Options() options.experimental_map_fusion = True dataset = dataset.with_options(options) iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() with self.cached_session() as sess: for x in range(5): result = sess.run(get_next) r = x for function in functions: if isinstance(r, tuple): r = function(*r) # Pass tuple as multiple arguments. else: r = function(r) self.assertAllEqual(r, result) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next)
def testAssertNext(self): dataset = dataset_ops.Dataset.from_tensors(0).apply( optimization.assert_next(["Map"])).map(lambda x: x) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, expected_output=[0])
def testLatencyStatsOptimization(self): stats_aggregator = stats_ops.StatsAggregator() dataset = dataset_ops.Dataset.from_tensors(1).apply( optimization.assert_next( ["LatencyStats", "Map", "LatencyStats", "Prefetch", "LatencyStats"])).map(lambda x: x * x).prefetch(1).apply( stats_ops.set_stats_aggregator(stats_aggregator)) options = dataset_ops.Options() options.experimental_latency_all_edges = True dataset = dataset.with_options(options) iterator = dataset.make_initializable_iterator() get_next = iterator.get_next() summary_t = stats_aggregator.get_summary() with self.cached_session() as sess: sess.run(iterator.initializer) self.assertEqual(1 * 1, sess.run(get_next)) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) summary_str = sess.run(summary_t) self._assertSummaryHasCount(summary_str, "record_latency_TensorDataset/_1", 1) self._assertSummaryHasCount(summary_str, "record_latency_MapDataset/_4", 1) self._assertSummaryHasCount(summary_str, "record_latency_PrefetchDataset/_6", 1)
def testMapFilterFusion(self, function, predicate): dataset = dataset_ops.Dataset.range(10).apply( optimization.assert_next( ["Map", "FilterByLastComponent"])).map(function).filter(predicate) options = dataset_ops.Options() options.experimental_map_and_filter_fusion = True dataset = dataset.with_options(options) self._testMapAndFilter(dataset, function, predicate)
def testAssertNext(self): dataset = dataset_ops.Dataset.from_tensors(0).apply( optimization.assert_next(["Map"])).map(lambda x: x) iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() with self.cached_session() as sess: self.assertEqual(0, sess.run(get_next))
def testAssertNextInvalid(self): dataset = dataset_ops.Dataset.from_tensors(0).apply( optimization.assert_next(["Whoops"])).map(lambda x: x) self.assertDatasetProduces( dataset, expected_error=( errors.InvalidArgumentError, "Asserted Whoops transformation at offset 0 but encountered " "Map transformation instead."))
def testMakeNumaAware(self): dataset = dataset_ops.Dataset.range(10).apply( optimization.assert_next(["NumaMapAndBatch"])).apply( batching.map_and_batch(lambda x: x * x, 10)) options = dataset_ops.Options() options.experimental_numa_aware = True dataset = dataset.with_options(options) self.assertDatasetProduces( dataset, expected_output=[[x * x for x in range(10)]])
def testMapAndBatchFusion(self): dataset = dataset_ops.Dataset.range(10).apply( optimization.assert_next( ["MapAndBatch"])).map(lambda x: x * x).batch(10) options = dataset_ops.Options() options.experimental_map_and_batch_fusion = True dataset = dataset.with_options(options) self.assertDatasetProduces( dataset, expected_output=[[x * x for x in range(10)]])
def testHoisting(self, function, will_optimize): dataset = dataset_ops.Dataset.range(5).apply( optimization.assert_next( ["Zip[0]", "Map"] if will_optimize else ["Map"])).map(function) options = dataset_ops.Options() options.experimental_hoist_random_uniform = True dataset = dataset.with_options(options) self._testDataset(dataset)
def _make_dataset(node_names): dataset = base_dataset.apply(optimization.assert_next(node_names)) dataset = dataset.map(map_fn, num_parallel_calls) dataset = dataset.batch(100) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.map_and_batch_fusion = False dataset = dataset.with_options(options) return dataset
def testMapParallelization(self, function, should_optimize): next_nodes = ["ParallelMap"] if should_optimize else ["Map"] dataset = dataset_ops.Dataset.range(5).apply( optimization.assert_next(next_nodes)).map(function) options = dataset_ops.Options() options.experimental_map_parallelization = True dataset = dataset.with_options(options) if should_optimize: self.assertDatasetProduces( dataset, expected_output=[function(x) for x in range(5)])
def testAssertNextShort(self): dataset = dataset_ops.Dataset.from_tensors(0).apply( optimization.assert_next(["Map", "Whoops"])).map(lambda x: x) iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() with self.cached_session() as sess: with self.assertRaisesRegexp( errors.InvalidArgumentError, "Asserted next 2 transformations but encountered only 1."): sess.run(get_next)
def testAssertNextShort(self): dataset = dataset_ops.Dataset.from_tensors(0).apply( optimization.assert_next(["Map", "Whoops"])).map(lambda x: x) options = dataset_ops.Options() options.experimental_autotune = False dataset = dataset.with_options(options) self.assertDatasetProduces( dataset, expected_error=( errors.InvalidArgumentError, "Asserted next 2 transformations but encountered only 1."))
def testOptimizationDefault(self): dataset = dataset_ops.Dataset.range(10).apply( optimization.assert_next(["Map", "Batch"])).map(lambda x: x * x).batch(10) iterator = dataset.with_options( dataset_ops.Options()).make_one_shot_iterator() get_next = iterator.get_next() with self.cached_session() as sess: self.assertAllEqual([x * x for x in range(10)], sess.run(get_next)) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next)
def testFilterWithRandomUniformFusion(self): dataset = dataset_ops.Dataset.range(10000000).apply( optimization.assert_next(["Sampling"])) dataset = dataset.filter(lambda _: random_ops.random_uniform([]) < 0.05) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.filter_with_random_uniform_fusion = True dataset = dataset.with_options(options) get_next = self.getNext(dataset) self.evaluate(get_next())
def testAutotuneOption(self): dataset = dataset_ops.Dataset.from_tensors(0) dataset = dataset.map(lambda x: x).apply( optimization.assert_next(["Model"])) options = dataset_ops.Options() options.experimental_autotune = True options.experimental_optimization.apply_default_optimizations = False dataset = dataset.with_options(options) get_next = self.getNext(dataset) self.assertEqual(0, self.evaluate(get_next())) with self.assertRaises(errors.OutOfRangeError): self.evaluate(get_next())
def testMapParallelizationWithCapturedConstant(self): """Tests that functions with captured constants are parallelized.""" captured_t = constant_op.constant(42, dtype=dtypes.int64) def fn(x): return x + captured_t dataset = dataset_ops.Dataset.range(5).apply( optimization.assert_next(["ParallelMap"])).map(fn) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.map_parallelization = True dataset = dataset.with_options(options) self.assertDatasetProduces( dataset, expected_output=[x + 42 for x in range(5)])
def testMapAndBatchFusion(self): dataset = dataset_ops.Dataset.range(10).apply( optimization.assert_next( ["MapAndBatch"])).map(lambda x: x * x).batch(10) options = dataset_ops.Options() options.experimental_map_and_batch_fusion = True dataset = dataset.with_options(options) iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() with self.cached_session() as sess: self.assertAllEqual([x * x for x in range(10)], sess.run(get_next)) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next)
def testNoopElimination(self): a = constant_op.constant(1, dtype=dtypes.int64) b = constant_op.constant(2, dtype=dtypes.int64) some_tensor = math_ops.mul(a, b) dataset = dataset_ops.Dataset.range(5) dataset = dataset.apply( optimization.assert_next( ["FiniteRepeat", "FiniteSkip", "Prefetch", "MemoryCacheImpl"])) dataset = dataset.repeat(some_tensor).skip(5).take(-1).skip(0).repeat( 1).prefetch(0).prefetch(1).cache() options = dataset_ops.Options() options.experimental_noop_elimination = True dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, expected_output=range(5))
def testAutotuneOption(self): dataset = dataset_ops.Dataset.from_tensors(0) dataset = dataset.map(lambda x: x).apply( optimization.assert_next(["Model"])) options = dataset_ops.Options() options.experimental_autotune = True dataset = dataset.with_options(options) iterator = dataset_ops.make_one_shot_iterator(dataset) get_next = iterator.get_next() with self.cached_session() as sess: self.assertEqual(0, self.evaluate(get_next)) with self.assertRaises(errors.OutOfRangeError): self.evaluate(get_next)
def testAdditionalInputs(self): a = constant_op.constant(1, dtype=dtypes.float32) b = constant_op.constant(0, dtype=dtypes.float32) some_tensor = math_ops.mul(a, b) def random_with_capture(_): return some_tensor + random_ops.random_uniform( [], minval=1, maxval=10, dtype=dtypes.float32, seed=42) dataset = dataset_ops.Dataset.range(5).apply( optimization.assert_next(["Zip[0]", "Map"])).map(random_with_capture) options = dataset_ops.Options() options.experimental_hoist_random_uniform = True dataset = dataset.with_options(options) self._testDataset(dataset)
def testMapParallelizationWithCapturedVariable(self): """Tests that functions with captured variables are not parallelized.""" captured_t = variables.Variable(42, dtype=dtypes.int64) def fn(x): return x + captured_t dataset = dataset_ops.Dataset.range(5).apply( optimization.assert_next(["Map"])).map(fn) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_optimization.map_parallelization = True dataset = dataset.with_options(options) self.evaluate(variables.global_variables_initializer()) self.assertDatasetProduces( dataset, expected_output=[x + 42 for x in range(5)], requires_initialization=True)
def testCapturedInputs(self): a = constant_op.constant(3, dtype=dtypes.int64) b = constant_op.constant(4, dtype=dtypes.int64) some_tensor = math_ops.mul(a, b) function = lambda x: x * x def predicate(y): return math_ops.less(math_ops.cast(y, dtypes.int64), some_tensor) # We are currently not supporting functions with captured inputs. dataset = dataset_ops.Dataset.range(10).apply( optimization.assert_next(["Map", "Filter"])).map(function).filter(predicate) options = dataset_ops.Options() options.experimental_map_and_filter_fusion = True dataset = dataset.with_options(options) self._testMapAndFilter(dataset, function, predicate)
def testShuffleAndRepeatFusion(self): dataset = dataset_ops.Dataset.range(10).apply( optimization.assert_next(["ShuffleAndRepeat"])).shuffle(10).repeat(2) options = dataset_ops.Options() options.experimental_shuffle_and_repeat_fusion = True dataset = dataset.with_options(options) get_next = self.getNext(dataset) for _ in range(2): results = [] for _ in range(10): results.append(self.evaluate(get_next())) self.assertAllEqual([x for x in range(10)], sorted(results)) with self.assertRaises(errors.OutOfRangeError): self.evaluate(get_next()) with self.assertRaises(errors.OutOfRangeError): self.evaluate(get_next())
def testOptimization(self): dataset = dataset_ops.Dataset.range(10) dataset = dataset.apply(optimization.assert_next(["MemoryCacheImpl"])) dataset = dataset.skip(0) # this should be optimized away dataset = dataset.cache() options = dataset_ops.Options() options.experimental_optimization.noop_elimination = True dataset = dataset.with_options(options) multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator( dataset, ["/cpu:1", "/cpu:2"]) config = config_pb2.ConfigProto(device_count={"CPU": 3}) with self.test_session(config=config): self.evaluate(multi_device_iterator.initializer) for i in range(0, 10, 2): elem_on_1, elem_on_2 = multi_device_iterator.get_next() self.assertEqual(i, self.evaluate(elem_on_1)) self.assertEqual(i + 1, self.evaluate(elem_on_2)) with self.assertRaises(errors.OutOfRangeError): elem_on_1, elem_on_2 = multi_device_iterator.get_next() self.evaluate(elem_on_1) self.evaluate(elem_on_2)
def testNoopElimination(self): a = constant_op.constant(1, dtype=dtypes.int64) b = constant_op.constant(2, dtype=dtypes.int64) some_tensor = math_ops.mul(a, b) dataset = dataset_ops.Dataset.range(5) dataset = dataset.apply( optimization.assert_next( ["FiniteRepeat", "FiniteSkip", "Prefetch", "MemoryCacheImpl"])) dataset = dataset.repeat(some_tensor).skip(5).take(-1).skip(0).repeat( 1).prefetch(0).prefetch(1).cache() options = dataset_ops.Options() options.experimental_noop_elimination = True dataset = dataset.with_options(options) iterator = dataset.make_one_shot_iterator() get_next = iterator.get_next() with self.test_session() as sess: for x in range(5): result = sess.run(get_next) self.assertAllEqual(result, x) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next)
def testLatencyStatsOptimization(self): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.from_tensors(1).apply( optimization.assert_next([ "LatencyStats", "Map", "LatencyStats", "Prefetch", "LatencyStats" ])).map(lambda x: x * x).prefetch(1) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_stats.latency_all_edges = True options.experimental_stats.aggregator = aggregator dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, expected_output=[1], requires_initialization=True, num_test_iterations=1) handle = self.getHandle(aggregator) self.assertStatisticsHasCount( handle, self.regexForNodeName("record_latency::TensorDataset"), 1) self.assertStatisticsHasCount( handle, self.regexForNodeName("record_latency::MapDataset"), 1) self.assertStatisticsHasCount( handle, self.regexForNodeName("record_latency::PrefetchDataset"), 1)
def testLatencyStatsOptimization(self): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.from_tensors(1).apply( optimization.assert_next([ "LatencyStats", "Map", "LatencyStats", "Prefetch", "LatencyStats" ])).map(lambda x: x * x).prefetch(1) options = dataset_ops.Options() options.experimental_stats = stats_options.StatsOptions() options.experimental_stats.latency_all_edges = True options.experimental_stats.aggregator = aggregator dataset = dataset.with_options(options) self.assertDatasetProduces(dataset, expected_output=[1], requires_initialization=True, num_test_iterations=1) summary_t = aggregator.get_summary() summary_str = self.evaluate(summary_t) self._assertSummaryHasCount(summary_str, "record_latency_TensorDataset/_1", 1) self._assertSummaryHasCount(summary_str, "record_latency_MapDataset/_4", 1) self._assertSummaryHasCount(summary_str, "record_latency_PrefetchDataset/_6", 1)
def make_dataset(node_names): dataset = base_dataset.apply(optimization.assert_next(node_names)) dataset = apply_fn_1(dataset) dataset = apply_fn_2(dataset) return dataset
def _make_dataset(node_names): dataset = base_dataset.apply(optimization.assert_next(node_names)) dataset = dataset.apply(batching.map_and_batch(map_fn, 100)) return dataset
def _make_dataset(node_names): return base_dataset.apply(optimization.assert_next(node_names)).map( map_fn, num_parallel_calls=num_parallel_calls).batch(batch_size)
def testNoRegularMap(self): dataset = dataset_ops.Dataset.range(100) dataset = dataset.apply(optimization.assert_next(["Map", "FiniteTake"])) dataset = dataset.map(lambda x: x + 1).take(50) dataset = self._enable_autotune_buffers(dataset) self.assertDatasetProduces(dataset, range(1, 51))
def testOptions(self): dataset = dataset_ops.Dataset.range(5) dataset = dataset.apply(optimization.assert_next(["MapAndBatch"])) dataset = dataset.map(lambda x: x).batch(5) self.evaluate(dataset.reduce(0, lambda state, value: state))
def flat_map_fn(_): dataset = dataset_ops.Dataset.from_tensors(0) dataset = dataset.apply(optimization.assert_next(["MemoryCacheImpl"])) dataset = dataset.skip(0) # Should be removed by noop elimination dataset = dataset.cache() return dataset
def testAssertNext(self): dataset = dataset_ops.Dataset.from_tensors(0).apply( optimization.assert_next(["Map"])).map(lambda x: x) self.assertDatasetProduces(dataset, expected_output=[0])