def testFeaturesStats(self): num_epochs = 5 total_records = num_epochs * self._num_records batch_size = 2 stats_aggregator = stats_ops.StatsAggregator() dataset = self.make_batch_feature( filenames=self.test_filenames[0], num_epochs=num_epochs, batch_size=batch_size, shuffle=True, shuffle_seed=5, drop_final_batch=True).apply( stats_ops.set_stats_aggregator(stats_aggregator)) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() summary_t = stats_aggregator.get_summary() with self.test_session() as sess: sess.run(iterator.initializer) for _ in range(total_records // batch_size): sess.run(next_element) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) self._assertSummaryHasCount( sess.run(summary_t), "record_stats:features", total_records) self._assertSummaryHasCount( sess.run(summary_t), "record_stats:feature-values", total_records) self._assertSummaryHasSum( sess.run(summary_t), "record_stats:features", total_records * 3) self._assertSummaryHasSum( sess.run(summary_t), "record_stats:feature-values", self._sum_keywords(1) * num_epochs + 2 * total_records)
def testBytesProduced(self): stats_aggregator = stats_ops.StatsAggregator() dataset = dataset_ops.Dataset.range(100).map( lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).apply( stats_ops.bytes_produced_stats("bytes_produced")).apply( stats_ops.set_stats_aggregator(stats_aggregator)) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() summary_t = stats_aggregator.get_summary() with self.test_session() as sess: sess.run(iterator.initializer) expected_sum = 0.0 for i in range(100): self.assertAllEqual( np.array([i] * i, dtype=np.int64), sess.run(next_element)) summary_str = sess.run(summary_t) self._assertSummaryHasCount(summary_str, "bytes_produced", float(i + 1)) expected_sum += i * 8.0 self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) summary_str = sess.run(summary_t) self._assertSummaryHasCount(summary_str, "bytes_produced", 100.0) self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum)
def testLatencyStatsOptimization(self): stats_aggregator = stats_ops.StatsAggregator() dataset = dataset_ops.Dataset.from_tensors(1).apply( optimization.assert_next( ["LatencyStats", "Map", "LatencyStats", "Prefetch", "LatencyStats"])).map(lambda x: x * x).prefetch(1).apply( optimization.optimize(["latency_all_edges"])).apply( stats_ops.set_stats_aggregator(stats_aggregator)) iterator = dataset.make_initializable_iterator() get_next = iterator.get_next() summary_t = stats_aggregator.get_summary() with self.test_session() as sess: sess.run(iterator.initializer) self.assertEqual(1 * 1, sess.run(get_next)) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) summary_str = sess.run(summary_t) self._assertSummaryHasCount(summary_str, "record_latency_TensorDataset/_1", 1) self._assertSummaryHasCount(summary_str, "record_latency_MapDataset/_4", 1) self._assertSummaryHasCount(summary_str, "record_latency_PrefetchDataset/_6", 1)
def testLatencyStatsOptimization(self): stats_aggregator = stats_ops.StatsAggregator() dataset = dataset_ops.Dataset.from_tensors(1).apply( optimization.assert_next([ "LatencyStats", "Map", "LatencyStats", "Prefetch", "LatencyStats" ])).map(lambda x: x * x).prefetch(1).apply( optimization.optimize(["latency_all_edges"])).apply( stats_ops.set_stats_aggregator(stats_aggregator)) iterator = dataset.make_initializable_iterator() get_next = iterator.get_next() summary_t = stats_aggregator.get_summary() with self.test_session() as sess: sess.run(iterator.initializer) self.assertEqual(1 * 1, sess.run(get_next)) with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) summary_str = sess.run(summary_t) self._assertSummaryHasCount(summary_str, "record_latency_TensorDataset/_1", 1) self._assertSummaryHasCount(summary_str, "record_latency_MapDataset/_4", 1) self._assertSummaryHasCount(summary_str, "record_latency_PrefetchDataset/_6", 1)
def testBytesProduced(self): stats_aggregator = stats_ops.StatsAggregator() dataset = dataset_ops.Dataset.range(100).map( lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).apply( stats_ops.bytes_produced_stats("bytes_produced")).apply( stats_ops.set_stats_aggregator(stats_aggregator)) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() summary_t = stats_aggregator.get_summary() with self.test_session() as sess: sess.run(iterator.initializer) expected_sum = 0.0 for i in range(100): self.assertAllEqual( np.array([i] * i, dtype=np.int64), sess.run(next_element)) summary_str = sess.run(summary_t) self._assertSummaryHasCount(summary_str, "bytes_produced", float(i + 1)) expected_sum += i * 8.0 self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) summary_str = sess.run(summary_t) self._assertSummaryHasCount(summary_str, "bytes_produced", 100.0) self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum)
def testFeaturesStats(self): num_epochs = 5 total_records = num_epochs * self._num_records batch_size = 2 stats_aggregator = stats_ops.StatsAggregator() dataset = self.make_batch_feature( filenames=self.test_filenames[0], num_epochs=num_epochs, batch_size=batch_size, shuffle=True, shuffle_seed=5, drop_final_batch=True).apply( stats_ops.set_stats_aggregator(stats_aggregator)) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() summary_t = stats_aggregator.get_summary() with self.test_session() as sess: sess.run(iterator.initializer) for _ in range(total_records // batch_size): sess.run(next_element) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) self._assertSummaryHasCount(sess.run(summary_t), "record_stats:features", total_records) self._assertSummaryHasCount(sess.run(summary_t), "record_stats:feature-values", total_records) self._assertSummaryHasSum(sess.run(summary_t), "record_stats:features", total_records * 3) self._assertSummaryHasSum( sess.run(summary_t), "record_stats:feature-values", self._sum_keywords(1) * num_epochs + 2 * total_records)
def testPrefetchBufferUtilization(self): stats_aggregator = stats_ops.StatsAggregator() dataset = dataset_ops.Dataset.range(100).map( lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).prefetch( -1).apply(stats_ops.set_stats_aggregator(stats_aggregator)) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() summary_t = stats_aggregator.get_summary() with self.cached_session() as sess: sess.run(iterator.initializer) for i in range(100): self.assertAllEqual( np.array([i] * i, dtype=np.int64), sess.run(next_element)) summary_str = sess.run(summary_t) self._assertSummaryHasCount(summary_str, "Prefetch::buffer_utilization", float(i + 1)) self._assertSummaryContains(summary_str, "Prefetch::buffer_capacity") self._assertSummaryContains(summary_str, "Prefetch::buffer_size") self._assertSummaryHasRange(summary_str, "Prefetch::buffer_utilization", 0, 1) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) summary_str = sess.run(summary_t) self._assertSummaryHasCount(summary_str, "Prefetch::buffer_utilization", 100)
def testLatencyStats(self): stats_aggregator = stats_ops.StatsAggregator() dataset = dataset_ops.Dataset.range(100).apply( stats_ops.latency_stats("record_latency")).apply( stats_ops.set_stats_aggregator(stats_aggregator)) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() summary_t = stats_aggregator.get_summary() with self.test_session() as sess: sess.run(iterator.initializer) for i in range(100): self.assertEqual(i, sess.run(next_element)) self._assertSummaryHasCount( sess.run(summary_t), "record_latency", float(i + 1)) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 100.0)
def testLatencyStats(self): stats_aggregator = stats_ops.StatsAggregator() dataset = dataset_ops.Dataset.range(100).apply( stats_ops.latency_stats("record_latency")).apply( stats_ops.set_stats_aggregator(stats_aggregator)) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() summary_t = stats_aggregator.get_summary() with self.test_session() as sess: sess.run(iterator.initializer) for i in range(100): self.assertEqual(i, sess.run(next_element)) self._assertSummaryHasCount( sess.run(summary_t), "record_latency", float(i + 1)) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 100.0)
def testMultipleIteratorsSameAggregator(self): stats_aggregator = stats_ops.StatsAggregator() dataset = dataset_ops.Dataset.range(100).apply( stats_ops.latency_stats("record_latency")).apply( stats_ops.set_stats_aggregator(stats_aggregator)) iterator_0 = dataset.make_initializable_iterator() iterator_1 = dataset.make_initializable_iterator() next_element = iterator_0.get_next() + iterator_1.get_next() summary_t = stats_aggregator.get_summary() with self.cached_session() as sess: sess.run([iterator_0.initializer, iterator_1.initializer]) for i in range(100): self.assertEqual(i * 2, sess.run(next_element)) self._assertSummaryHasCount( sess.run(summary_t), "record_latency", float(2 * (i + 1))) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 200.0)
def testFilteredElementsStats(self): stats_aggregator = stats_ops.StatsAggregator() dataset = dataset_ops.Dataset.range(101).filter( lambda x: math_ops.equal(math_ops.mod(x, 3), 0)).apply( stats_ops.set_stats_aggregator(stats_aggregator)) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() summary_t = stats_aggregator.get_summary() with self.test_session() as sess: sess.run(iterator.initializer) for i in range(34): self.assertEqual(i * 3, sess.run(next_element)) if i is not 0: self._assertSummaryHasScalarValue( sess.run(summary_t), "Filter::dropped_elements", float(i * 2)) self._assertSummaryHasScalarValue( sess.run(summary_t), "Filter::filtered_elements", float(i + 1)) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) self._assertSummaryHasScalarValue( sess.run(summary_t), "Filter::dropped_elements", 67.0) self._assertSummaryHasScalarValue( sess.run(summary_t), "Filter::filtered_elements", 34.0)
def testFilteredElementsStats(self): stats_aggregator = stats_ops.StatsAggregator() dataset = dataset_ops.Dataset.range(101).filter( lambda x: math_ops.equal(math_ops.mod(x, 3), 0)).apply( stats_ops.set_stats_aggregator(stats_aggregator)) iterator = dataset.make_initializable_iterator() next_element = iterator.get_next() summary_t = stats_aggregator.get_summary() with self.test_session() as sess: sess.run(iterator.initializer) for i in range(34): self.assertEqual(i * 3, sess.run(next_element)) if i is not 0: self._assertSummaryHasScalarValue( sess.run(summary_t), "Filter::dropped_elements", float(i * 2)) self._assertSummaryHasScalarValue( sess.run(summary_t), "Filter::filtered_elements", float(i + 1)) with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) self._assertSummaryHasScalarValue( sess.run(summary_t), "Filter::dropped_elements", 67.0) self._assertSummaryHasScalarValue( sess.run(summary_t), "Filter::filtered_elements", 34.0)
def _build_dataset_stats_aggregator(self): stats_aggregator = stats_ops.StatsAggregator() return dataset_ops.Dataset.range(10).apply( stats_ops.set_stats_aggregator(stats_aggregator))
def _build_dataset_stats_aggregator(self): stats_aggregator = stats_ops.StatsAggregator() return dataset_ops.Dataset.range(10).apply( stats_ops.set_stats_aggregator(stats_aggregator))