def testFilteredElementsStats(self): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(101).filter( lambda x: math_ops.equal(math_ops.mod(x, 3), 0)) dataset = self.datasetExperimentalStats(dataset, aggregator) next_element = self.getNext(dataset, requires_initialization=True) for i in range(34): self.assertEqual(i * 3, self.evaluate(next_element())) handle = self.getHandle(aggregator) if i != 0: self.assertStatisticsHasScalarValue( handle, self.regexForNodeName("FilterDataset", "dropped_elements"), float(i * 2)) self.assertStatisticsHasScalarValue( handle, self.regexForNodeName("FilterDataset", "filtered_elements"), float(i + 1)) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element()) handle = self.getHandle(aggregator) self.assertStatisticsHasScalarValue( handle, self.regexForNodeName("FilterDataset", "dropped_elements"), 67.0) self.assertStatisticsHasScalarValue( handle, self.regexForNodeName("FilterDataset", "filtered_elements"), 34.0)
def _benchmark(self, interleave_version, num_elements, initial_delay_us=0, remainder_delay_us=0, cycle_length=10, iters=100, num_parallel_calls=None, attach_stats_aggregator=False, name=None): ds = self.make_dataset(interleave_version, initial_delay_us, remainder_delay_us, cycle_length, num_parallel_calls) if attach_stats_aggregator: aggregator = stats_aggregator.StatsAggregator() opts = dataset_ops.Options() opts.experimental_stats.aggregator = aggregator ds = ds.with_options(opts) ds = ds.skip(num_elements) deltas = [] for _ in range(iters): start = time.time() next(iter(ds)) deltas.append(time.time() - start) self.report_benchmark(iters=iters, wall_time=np.median(deltas), name=name)
def testMultipleDatasetWithPrefixes(self, dataset_transformation): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(100).apply( stats_ops.latency_stats("record_latency")) dataset = dataset_transformation(dataset, aggregator, prefix="dataset1") dataset2 = dataset_ops.Dataset.range(100).apply( stats_ops.latency_stats("record_latency")) dataset2 = dataset_transformation(dataset2, aggregator, prefix="dataset2") next_element1 = self.getNext(dataset, requires_initialization=True) next_element2 = self.getNext(dataset2, requires_initialization=True) for i in range(100): self.assertEqual(i * 2, self.evaluate(next_element1() + next_element2())) self._assertSummaryHasCount( self.evaluate(aggregator.get_summary()), "dataset1_record_latency", float(i + 1)) self._assertSummaryHasCount( self.evaluate(aggregator.get_summary()), "dataset2_record_latency", float(i + 1)) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element1()) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element2()) self._assertSummaryHasCount(self.evaluate(aggregator.get_summary()), "dataset1_record_latency", 100.0) self._assertSummaryHasCount(self.evaluate(aggregator.get_summary()), "dataset2_record_latency", 100.0)
def testPrefetchBufferUtilization(self, dataset_transformation): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(100).map(lambda x: array_ops.tile( [x], ops.convert_to_tensor([x]))).prefetch(-1) dataset = dataset_transformation(dataset, aggregator) next_element = self.getNext(dataset, requires_initialization=True) for i in range(100): self.assertAllEqual(np.array([i] * i, dtype=np.int64), self.evaluate(next_element())) summary_str = self.evaluate(aggregator.get_summary()) self._assertSummaryHasCount( summary_str, self.regexForNodeName("PrefetchDataset", "buffer_utilization"), float(i + 1)) self._assertSummaryContains( summary_str, self.regexForNodeName("PrefetchDataset", "buffer_capacity")) self._assertSummaryContains( summary_str, self.regexForNodeName("PrefetchDataset", "buffer_size")) self._assertSummaryHasRange( summary_str, self.regexForNodeName("PrefetchDataset", "buffer_utilization"), 0, 1) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element()) summary_str = self.evaluate(aggregator.get_summary()) self._assertSummaryHasCount( summary_str, self.regexForNodeName("PrefetchDataset", "buffer_utilization"), 100)
def testFilteredElementsStats(self, dataset_transformation): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(101).filter( lambda x: math_ops.equal(math_ops.mod(x, 3), 0)) dataset = dataset_transformation(dataset, aggregator) next_element = self.getNext(dataset, requires_initialization=True) for i in range(34): self.assertEqual(i * 3, self.evaluate(next_element())) summary_str = self.evaluate(aggregator.get_summary()) if i is not 0: self._assertSummaryHasScalarValue( summary_str, self.regexForNodeName("FilterDataset", "dropped_elements"), float(i * 2)) self._assertSummaryHasScalarValue( summary_str, self.regexForNodeName("FilterDataset", "filtered_elements"), float(i + 1)) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element()) summary_str = self.evaluate(aggregator.get_summary()) self._assertSummaryHasScalarValue( summary_str, self.regexForNodeName("FilterDataset", "dropped_elements"), 67.0) self._assertSummaryHasScalarValue( summary_str, self.regexForNodeName("FilterDataset", "filtered_elements"), 34.0)
def testLatencyStatsOptimizationV2(self): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.from_tensors(1).apply( optimization.assert_next([ "LatencyStats", "Map", "LatencyStats", "Prefetch", "LatencyStats" ])).map(lambda x: x * x).prefetch(1) options = dataset_ops.Options() options.experimental_stats = stats_options.StatsOptions(aggregator) dataset = dataset.with_options(options) iterator = dataset.make_initializable_iterator() get_next = iterator.get_next() summary_t = aggregator.get_summary() with self.cached_session() as sess: sess.run(iterator.initializer) self.assertEqual(1, sess.run(get_next)) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) summary_str = sess.run(summary_t) self._assertSummaryHasCount(summary_str, "record_latency_TensorDataset/_1", 1) self._assertSummaryHasCount(summary_str, "record_latency_MapDataset/_4", 1) self._assertSummaryHasCount(summary_str, "record_latency_PrefetchDataset/_6", 1)
def _benchmark(self, interleave_version, num_elements, initial_delay_us=0, remainder_delay_us=0, cycle_length=10, iters=100, num_parallel_calls=None, attach_stats_aggregator=False, name=None): dataset = self.make_dataset(interleave_version=interleave_version, initial_delay=initial_delay_us, remainder_delay=remainder_delay_us, cycle_length=cycle_length, num_parallel_calls=num_parallel_calls) if attach_stats_aggregator: aggregator = stats_aggregator.StatsAggregator() opts = dataset_ops.Options() opts.experimental_stats.aggregator = aggregator dataset = dataset.with_options(opts) self.run_and_report_benchmark(dataset=dataset, num_elements=num_elements, iters=iters, warmup=True, name=name)
def testBytesProduced(self, dataset_transformation): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(100).map( lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).apply( stats_ops.bytes_produced_stats("bytes_produced")) dataset = dataset_transformation(dataset, aggregator) next_element = self.getNext(dataset, requires_initialization=True) summary_t = aggregator.get_summary() expected_sum = 0.0 for i in range(100): self.assertAllEqual(np.array([i] * i, dtype=np.int64), self.evaluate(next_element())) summary_str = self.evaluate(aggregator.get_summary()) self._assertSummaryHasCount(summary_str, "bytes_produced", float(i + 1)) expected_sum += i * 8.0 self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element()) # TODO(shivaniagrawal): ntentional breaking case summary_str = self.evaluate(summary_t) self._assertSummaryHasCount(summary_str, "bytes_produced", 100.0) self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum)
def testMultiplePrefetchStats(self, dataset_transformation): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(10).prefetch(2).map( lambda x: math_ops.add(x, 2)).prefetch(1) dataset = dataset_transformation(dataset, aggregator) next_element = self.getNext(dataset, requires_initialization=True) for i in range(10): self.assertEqual(i + 2, self.evaluate(next_element())) summary_str = self.evaluate(aggregator.get_summary()) # TODO(shivaniagarwal): using exact name of prefetch node than the regex, # to differentiate between two prefetch. This might break in future, at # which point, it would be best to disable this test. self._assertSummaryHasScalarValue( summary_str, "PrefetchDataset/_5::buffer_capacity", 2) self._assertSummaryContains(summary_str, "PrefetchDataset/_5::buffer_size") self._assertSummaryHasScalarValue( summary_str, "PrefetchDataset/_8::buffer_capacity", 1) self._assertSummaryContains(summary_str, "PrefetchDataset/_8::buffer_size") with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element())
def _testParallelCallsStats(self, dataset_fn, dataset_name, num_output, dataset_transformation, function_processing_time=False, check_elements=True): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_fn() dataset = dataset_transformation(dataset, aggregator) iterator = dataset_ops.make_initializable_iterator(dataset) next_element = iterator.get_next() summary_t = aggregator.get_summary() with self.cached_session() as sess: sess.run(iterator.initializer) for i in range(num_output): next_ = sess.run(next_element) if check_elements: self.assertAllEqual(np.array([i] * i, dtype=np.int64), next_) summary_str = sess.run(summary_t) if function_processing_time: self._assertSummaryHasCountMoreOrEqualGeneralisedTag( summary_str, "::execution_time", float(i + 1)) self._assertSummaryContains( summary_str, dataset_name + "::num_parallel_calls") self._assertSummaryContains( summary_str, dataset_name + "::active_parallel_calls") with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) if function_processing_time: summary_str = sess.run(summary_t) self._assertSummaryHasCountMoreOrEqualGeneralisedTag( summary_str, "::execution_time", float(num_output))
def testMultipleTags(self): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(100).apply( stats_ops.latency_stats("record_latency")).apply( stats_ops.latency_stats("record_latency_2")) dataset = self.datasetExperimentalStats(dataset, aggregator) next_element = self.getNext(dataset, requires_initialization=True) for i in range(100): self.assertEqual(i, self.evaluate(next_element())) handle = self.getHandle(aggregator) self.assertStatisticsHasCount(handle, "record_latency", float(i + 1), 2 * i + 3, offset=1) self.assertStatisticsHasCount(handle, "record_latency_2", float(i + 1), 2 * i + 3) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element()) handle = self.getHandle(aggregator) self.assertStatisticsHasCount(handle, "record_latency", 100.0, 201, offset=1) self.assertStatisticsHasCount(handle, "record_latency_2", 100.0, 201)
def testPrefetchBufferUtilization(self, dataset_transformation): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(100).map(lambda x: array_ops.tile( [x], ops.convert_to_tensor([x]))).prefetch(-1) dataset = dataset_transformation(dataset, aggregator) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() summary_t = aggregator.get_summary() with self.cached_session() as sess: self.evaluate(iterator.initializer) for i in range(100): self.assertAllEqual(np.array([i] * i, dtype=np.int64), self.evaluate(next_element)) summary_str = self.evaluate(summary_t) self._assertSummaryHasCount(summary_str, "Prefetch::buffer_utilization", float(i + 1)) self._assertSummaryContains(summary_str, "Prefetch::buffer_capacity") self._assertSummaryContains(summary_str, "Prefetch::buffer_size") self._assertSummaryHasRange(summary_str, "Prefetch::buffer_utilization", 0, 1) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element) summary_str = self.evaluate(summary_t) self._assertSummaryHasCount(summary_str, "Prefetch::buffer_utilization", 100)
def testMultipleDatasetWithPrefixes(self, dataset_transformation): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(100).apply( stats_ops.latency_stats("record_latency")) dataset = dataset_transformation(dataset, aggregator, prefix="dataset1") dataset2 = dataset_ops.Dataset.range(100).apply( stats_ops.latency_stats("record_latency")) dataset2 = dataset_transformation(dataset2, aggregator, prefix="dataset2") iterator_0 = dataset.make_initializable_iterator() iterator_1 = dataset2.make_initializable_iterator() next_element = iterator_0.get_next() + iterator_1.get_next() summary_t = aggregator.get_summary() with self.test_session() as sess: self.evaluate([iterator_0.initializer, iterator_1.initializer]) for i in range(100): self.assertEqual(i * 2, self.evaluate(next_element)) self._assertSummaryHasCount(self.evaluate(summary_t), "dataset1_record_latency", float(i + 1)) self._assertSummaryHasCount(self.evaluate(summary_t), "dataset2_record_latency", float(i + 1)) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element) self._assertSummaryHasCount(self.evaluate(summary_t), "dataset1_record_latency", 100.0) self._assertSummaryHasCount(self.evaluate(summary_t), "dataset2_record_latency", 100.0)
def testMultiplePrefetchStats(self): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(10).prefetch(2).filter( lambda x: math_ops.equal(math_ops.mod(x, 2), 0)).prefetch(1) dataset = self.datasetExperimentalStats(dataset, aggregator) next_element = self.getNext(dataset, requires_initialization=True) for i in range(5): self.assertEqual(i * 2, self.evaluate(next_element())) handle = self.getHandle(aggregator) # TODO(shivaniagarwal): using exact name of prefetch node than the regex, # to differentiate between two prefetch. This might break in future, at # which point, it would be best to disable this test. self.assertStatisticsHasScalarValue( handle, "PrefetchDataset/_5::buffer_capacity", 2) self.assertStatisticsContains(handle, "PrefetchDataset/_5::buffer_size") self.assertStatisticsHasScalarValue( handle, "PrefetchDataset/_8::buffer_capacity", 1) self.assertStatisticsContains(handle, "PrefetchDataset/_8::buffer_size") with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element())
def testBytesProduced(self, dataset_transformation): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(100).map( lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).apply( stats_ops.bytes_produced_stats("bytes_produced")) dataset = dataset_transformation(dataset, aggregator) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() summary_t = aggregator.get_summary() with self.cached_session() as sess: self.evaluate(iterator.initializer) expected_sum = 0.0 for i in range(100): self.assertAllEqual(np.array([i] * i, dtype=np.int64), self.evaluate(next_element)) summary_str = self.evaluate(summary_t) self._assertSummaryHasCount(summary_str, "bytes_produced", float(i + 1)) expected_sum += i * 8.0 self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element) summary_str = self.evaluate(summary_t) self._assertSummaryHasCount(summary_str, "bytes_produced", 100.0) self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum)
def testLatencyStatsOptimizationAutotuneOn(self): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.from_tensors(1).apply( testing.assert_next([ "LatencyStats", "Map", "LatencyStats", "Prefetch", "LatencyStats", "MaxIntraOpParallelism", "LatencyStats", "Model", "SetStatsAggregator" ])).map(lambda x: x * x).prefetch(1) options = dataset_ops.Options() options.experimental_optimization.apply_default_optimizations = False options.experimental_stats.latency_all_edges = True options.experimental_stats.aggregator = aggregator dataset = dataset.with_options(options) self.assertDatasetProduces( dataset, expected_output=[1], requires_initialization=True, num_test_iterations=1) handle = self.getHandle(aggregator) self.assertStatisticsHasCount( handle, self.regexForNodeName("record_latency::TensorDataset"), 1) self.assertStatisticsHasCount( handle, self.regexForNodeName("record_latency::MapDataset"), 1) self.assertStatisticsHasCount( handle, self.regexForNodeName("record_latency::PrefetchDataset"), 1)
def parallelCallsStats(self, dataset_fn, dataset_names, num_output, function_processing_time=False, check_elements=True): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_fn() dataset = self.datasetExperimentalStats(dataset, aggregator) next_element = self.getNext(dataset, requires_initialization=True) for i in range(num_output): value = self.evaluate(next_element()) if check_elements: self.assertAllEqual(np.array([i] * i, dtype=np.int64), value) handle = self.getHandle(aggregator) for dataset_name in dataset_names: if function_processing_time: self.assertStatisticsHasCount(handle, r"(.*)::execution_time$", float(i + 1), greater_than=True) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element()) for dataset_name in dataset_names: self.assertStatisticsContains( handle, self.regexForNodeName(dataset_name, "thread_utilization")) if function_processing_time: handle = self.getHandle(aggregator) for dataset_name in dataset_names: self.assertStatisticsHasCount(handle, r"(.*)::execution_time$", float(num_output), greater_than=True)
def _testParallelCallsStats(self, dataset_fn, dataset_name, num_output, dataset_transformation, function_processing_time=False, check_elements=True): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_fn() dataset = dataset_transformation(dataset, aggregator) next_element = self.getNext(dataset, requires_initialization=True) for i in range(num_output): next_ = self.evaluate(next_element()) if check_elements: self.assertAllEqual(np.array([i] * i, dtype=np.int64), next_) summary_str = self.evaluate(aggregator.get_summary()) if function_processing_time: self._assertSummaryHasCountMoreOrEqualGeneralisedTag( summary_str, "::execution_time", float(i + 1)) self._assertSummaryContains(summary_str, dataset_name + "::thread_utilization") with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element()) if function_processing_time: summary_str = self.evaluate(aggregator.get_summary()) self._assertSummaryHasCountMoreOrEqualGeneralisedTag( summary_str, "::execution_time", float(num_output))
def testFilteredElementsStats(self, dataset_transformation): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(101).filter( lambda x: math_ops.equal(math_ops.mod(x, 3), 0)) dataset = dataset_transformation(dataset, aggregator) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() summary_t = aggregator.get_summary() with self.test_session() as sess: self.evaluate(iterator.initializer) for i in range(34): self.assertEqual(i * 3, self.evaluate(next_element)) if i is not 0: self._assertSummaryHasScalarValue( self.evaluate(summary_t), "Filter::dropped_elements", float(i * 2)) self._assertSummaryHasScalarValue(self.evaluate(summary_t), "Filter::filtered_elements", float(i + 1)) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element) self._assertSummaryHasScalarValue(self.evaluate(summary_t), "Filter::dropped_elements", 67.0) self._assertSummaryHasScalarValue(self.evaluate(summary_t), "Filter::filtered_elements", 34.0)
def testFeaturesStats(self, dataset_transformation): num_epochs = 5 total_records = num_epochs * self._num_records batch_size = 2 aggregator = stats_aggregator.StatsAggregator() def dataset_fn(): return self.make_batch_feature(filenames=self.test_filenames[0], num_epochs=num_epochs, batch_size=batch_size, shuffle=True, shuffle_seed=5, drop_final_batch=False) num_output = total_records // batch_size if total_records % batch_size: num_output = total_records // batch_size + 1 self._testParallelCallsStats( dataset_fn, {self.regexForNodeName("ExperimentalParseExampleDataset")}, num_output, dataset_transformation, check_elements=False) dataset = dataset_transformation(dataset_fn(), aggregator, prefix="record_stats") next_element = self.getNext(dataset, requires_initialization=True) for _ in range(num_output): self.evaluate(next_element()) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element()) self._assertSummaryHasCount( self.evaluate(aggregator.get_summary()), self.regexForNodeName( "record_stats_ExperimentalParseExampleDataset", "features_count"), total_records) self._assertSummaryHasCount( self.evaluate(aggregator.get_summary()), self.regexForNodeName( "record_stats_ExperimentalParseExampleDataset", "feature_values_count"), total_records) self._assertSummaryHasSum( self.evaluate(aggregator.get_summary()), self.regexForNodeName( "record_stats_ExperimentalParseExampleDataset", "features_count"), total_records * 4) self._assertSummaryHasSum( self.evaluate(aggregator.get_summary()), self.regexForNodeName( "record_stats_ExperimentalParseExampleDataset", "feature_values_count"), self._sum_keywords(1) * num_epochs + 3 * total_records)
def benchmark_stats(self): dataset = dataset_ops.Dataset.range(1).repeat() dataset = dataset.apply(batching.map_and_batch(lambda x: x + 1, 1), num_parallel_calls=32) aggregator = stats_aggregator.StatsAggregator() options = dataset_ops.Options() options.experimental_stats.aggregator = aggregator dataset = dataset.with_options(options) self.run_and_report_benchmark(dataset, num_elements=1000, name="stats")
def benchmark_stats(self): for stats in [True, False]: dataset = dataset_ops.Dataset.range(1000).repeat() dataset = dataset.map(lambda x: x + 1, num_parallel_calls=32) options = dataset_ops.Options() options.experimental_deterministic = False if stats: aggregator = stats_aggregator.StatsAggregator() options.experimental_stats.aggregator = aggregator dataset = dataset.with_options(options) self.run_and_report_benchmark( dataset, num_elements=10000, name="stats_%s" % stats)
def DISABLED_testFeaturesStats(self): num_epochs = 5 total_records = num_epochs * self._num_records batch_size = 2 def dataset_fn(): return self.make_batch_feature(filenames=self._filenames[0], num_epochs=num_epochs, batch_size=batch_size, shuffle=True, shuffle_seed=5, drop_final_batch=False) num_output = total_records // batch_size if total_records % batch_size: num_output = total_records // batch_size + 1 self.parallelCallsStats(dataset_fn, {"ParseExampleDatasetV2"}, num_output, check_elements=False) aggregator = stats_aggregator.StatsAggregator() dataset = self.datasetExperimentalStats(dataset_fn(), aggregator, prefix="record_stats") next_element = self.getNext(dataset, requires_initialization=True) for _ in range(num_output): self.evaluate(next_element()) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element()) handle = self.getHandle(aggregator) self.assertStatisticsHasCount( handle, self.regexForNodeName("record_stats::ParseExampleDatasetV2", "features_count"), total_records) self.assertStatisticsHasCount( handle, self.regexForNodeName("record_stats::ParseExampleDatasetV2", "feature_values_count"), total_records) self.assertStatisticsHasSum( handle, self.regexForNodeName("record_stats::ParseExampleDatasetV2", "features_count"), total_records * 4) self.assertStatisticsHasSum( handle, self.regexForNodeName("record_stats::ParseExampleDatasetV2", "feature_values_count"), self._sum_keywords(1) * num_epochs + 3 * total_records)
def testFeaturesStats(self, dataset_transformation): num_epochs = 5 total_records = num_epochs * self._num_records batch_size = 2 aggregator = stats_aggregator.StatsAggregator() def dataset_fn(): return self.make_batch_feature(filenames=self.test_filenames[0], num_epochs=num_epochs, batch_size=batch_size, shuffle=True, shuffle_seed=5, drop_final_batch=False) num_output = total_records // batch_size if total_records % batch_size: num_output = total_records // batch_size + 1 self._testParallelCallsStats(dataset_fn, "ParseExample", num_output, dataset_transformation, check_elements=False) dataset = dataset_transformation(dataset_fn(), aggregator, prefix="record_stats") iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() summary_t = aggregator.get_summary() with self.test_session() as sess: self.evaluate(iterator.initializer) for _ in range(num_output): sess.run(next_element) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) self._assertSummaryHasCount(sess.run(summary_t), "record_stats_features", total_records) self._assertSummaryHasCount(sess.run(summary_t), "record_stats_feature-values", total_records) self._assertSummaryHasSum(sess.run(summary_t), "record_stats_features", total_records * 4) self._assertSummaryHasSum( sess.run(summary_t), "record_stats_feature-values", self._sum_keywords(1) * num_epochs + 3 * total_records)
def testLatencyStats(self, dataset_transformation): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(100).apply( stats_ops.latency_stats("record_latency")) dataset = dataset_transformation(dataset, aggregator) next_element = self.getNext(dataset, requires_initialization=True) for i in range(100): self.assertEqual(i, self.evaluate(next_element())) self._assertSummaryHasCount( self.evaluate(aggregator.get_summary()), "record_latency", float(i + 1)) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element()) self._assertSummaryHasCount( self.evaluate(aggregator.get_summary()), "record_latency", 100.0)
def testPrefetchBufferScalars(self, dataset_transformation): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(10).map( lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).prefetch(1) dataset = dataset_transformation(dataset, aggregator) next_element = self.getNext(dataset, requires_initialization=True) for i in range(10): self.assertAllEqual( np.array([i] * i, dtype=np.int64), self.evaluate(next_element())) summary_str = self.evaluate(aggregator.get_summary()) self._assertSummaryHasScalarValue(summary_str, "Prefetch::buffer_capacity", 1) self._assertSummaryHasScalarValue(summary_str, "Prefetch::buffer_size", 1) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element())
def testPrefetchBufferUtilization(self): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(100).map(lambda x: array_ops.tile( [x], ops.convert_to_tensor([x]))).prefetch(-1) dataset = self.datasetExperimentalStats(dataset, aggregator) next_element = self.getNext(dataset, requires_initialization=True) for i in range(100): self.assertAllEqual(np.array([i] * i, dtype=np.int64), self.evaluate(next_element())) handle = self.getHandle(aggregator) self.assertStatisticsHasCount(handle, self.regexForNodeName( "PrefetchDataset", "buffer_utilization"), float(i + 1), 3 * i + 4, offset=2) self.assertStatisticsContains( handle, self.regexForNodeName("PrefetchDataset", "buffer_capacity"), 3 * i + 4) self.assertStatisticsContains(handle, self.regexForNodeName( "PrefetchDataset", "buffer_size"), 3 * i + 4, offset=1) self.assertStatisticsHasRange(handle, self.regexForNodeName( "PrefetchDataset", "buffer_utilization"), 0, 1, 3 * i + 4, offset=2) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element()) handle = self.getHandle(aggregator) self.assertStatisticsHasCount(handle, self.regexForNodeName( "PrefetchDataset", "buffer_utilization"), 100, 301, offset=2)
def testReinitialize(self): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(100).apply( stats_ops.latency_stats("record_latency")) dataset = self.datasetExperimentalStats(dataset, aggregator) for j in range(5): next_element = self.getNext(dataset, requires_initialization=True) for i in range(100): self.assertEqual(i, self.evaluate(next_element())) handle = self.getHandle(aggregator) self.assertStatisticsHasCount(handle, "record_latency", float((j * 100) + i + 1), (j * 100) + i + 2) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element()) handle = self.getHandle(aggregator) self.assertStatisticsHasCount(handle, "record_latency", (j + 1) * 100.0, (j * 100) + 101)
def testLatencyStats(self, dataset_transformation): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(100).apply( stats_ops.latency_stats("record_latency")) dataset = dataset_transformation(dataset, aggregator) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() summary_t = aggregator.get_summary() with self.cached_session() as sess: self.evaluate(iterator.initializer) for i in range(100): self.assertEqual(i, self.evaluate(next_element)) self._assertSummaryHasCount(sess.run(summary_t), "record_latency", float(i + 1)) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) self._assertSummaryHasCount(self.evaluate(summary_t), "record_latency", 100.0)
def DISABLED_testMultipleIteratorsSameAggregator(self): aggregator = stats_aggregator.StatsAggregator() dataset = dataset_ops.Dataset.range(100).apply( stats_ops.latency_stats("record_latency")) dataset = self.datasetExperimentalStats(dataset, aggregator) next_element1 = self.getNext(dataset, requires_initialization=True) next_element2 = self.getNext(dataset, requires_initialization=True) for i in range(100): self.assertEqual(i * 2, self.evaluate(next_element1() + next_element2())) handle = self.getHandle(aggregator) self.assertStatisticsHasCount(handle, "record_latency", float(2 * (i + 1)), 2 * i + 3) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element1()) with self.assertRaises(errors.OutOfRangeError): self.evaluate(next_element2()) handle = self.getHandle(aggregator) self.assertStatisticsHasCount(handle, "record_latency", 200.0, 201)