def testPrefetchBufferUtilization(self):
        stats_aggregator = stats_ops.StatsAggregator()
        dataset = dataset_ops.Dataset.range(100).map(lambda x: array_ops.tile(
            [x], ops.convert_to_tensor([x]))).prefetch(-1).apply(
                stats_ops.set_stats_aggregator(stats_aggregator))
        iterator = dataset.make_initializable_iterator()
        next_element = iterator.get_next()
        summary_t = stats_aggregator.get_summary()

        with self.cached_session() as sess:
            sess.run(iterator.initializer)
            for i in range(100):
                self.assertAllEqual(np.array([i] * i, dtype=np.int64),
                                    sess.run(next_element))
                summary_str = sess.run(summary_t)
                self._assertSummaryHasCount(summary_str,
                                            "Prefetch::buffer_utilization",
                                            float(i + 1))
                self._assertSummaryContains(summary_str,
                                            "Prefetch::buffer_capacity")
                self._assertSummaryContains(summary_str,
                                            "Prefetch::buffer_size")
                self._assertSummaryHasRange(summary_str,
                                            "Prefetch::buffer_utilization", 0,
                                            1)
            with self.assertRaises(errors.OutOfRangeError):
                sess.run(next_element)
            summary_str = sess.run(summary_t)
            self._assertSummaryHasCount(summary_str,
                                        "Prefetch::buffer_utilization", 100)
    def testMultipleDatasetWithTags(self):
        stats_aggregator = stats_ops.StatsAggregator()
        dataset = dataset_ops.Dataset.range(100).apply(
            stats_ops.latency_stats("record_latency")).apply(
                stats_ops.set_stats_aggregator(stats_aggregator, "dataset1"))
        dataset2 = dataset_ops.Dataset.range(100).apply(
            stats_ops.latency_stats("record_latency")).apply(
                stats_ops.set_stats_aggregator(stats_aggregator, "dataset2"))
        iterator_0 = dataset.make_initializable_iterator()
        iterator_1 = dataset2.make_initializable_iterator()
        next_element = iterator_0.get_next() + iterator_1.get_next()
        summary_t = stats_aggregator.get_summary()

        with self.test_session() as sess:
            sess.run([iterator_0.initializer, iterator_1.initializer])
            for i in range(100):
                self.assertEqual(i * 2, sess.run(next_element))
                self._assertSummaryHasCount(sess.run(summary_t),
                                            "dataset1_record_latency",
                                            float(i + 1))
                self._assertSummaryHasCount(sess.run(summary_t),
                                            "dataset2_record_latency",
                                            float(i + 1))
            with self.assertRaises(errors.OutOfRangeError):
                sess.run(next_element)
            self._assertSummaryHasCount(sess.run(summary_t),
                                        "dataset1_record_latency", 100.0)
            self._assertSummaryHasCount(sess.run(summary_t),
                                        "dataset2_record_latency", 100.0)
    def testBytesProduced(self):
        stats_aggregator = stats_ops.StatsAggregator()
        dataset = dataset_ops.Dataset.range(100).map(
            lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).apply(
                stats_ops.bytes_produced_stats("bytes_produced")).apply(
                    stats_ops.set_stats_aggregator(stats_aggregator))
        iterator = dataset.make_initializable_iterator()
        next_element = iterator.get_next()
        summary_t = stats_aggregator.get_summary()

        with self.cached_session() as sess:
            sess.run(iterator.initializer)
            expected_sum = 0.0
            for i in range(100):
                self.assertAllEqual(np.array([i] * i, dtype=np.int64),
                                    sess.run(next_element))
                summary_str = sess.run(summary_t)
                self._assertSummaryHasCount(summary_str, "bytes_produced",
                                            float(i + 1))
                expected_sum += i * 8.0
                self._assertSummaryHasSum(summary_str, "bytes_produced",
                                          expected_sum)
            with self.assertRaises(errors.OutOfRangeError):
                sess.run(next_element)
            summary_str = sess.run(summary_t)
            self._assertSummaryHasCount(summary_str, "bytes_produced", 100.0)
            self._assertSummaryHasSum(summary_str, "bytes_produced",
                                      expected_sum)
Beispiel #4
0
  def testFeaturesStats(self):
    num_epochs = 5
    total_records = num_epochs * self._num_records
    batch_size = 2
    stats_aggregator = stats_ops.StatsAggregator()
    dataset = self.make_batch_feature(
        filenames=self.test_filenames[0],
        num_epochs=num_epochs,
        batch_size=batch_size,
        shuffle=True,
        shuffle_seed=5,
        drop_final_batch=False).apply(
            stats_ops.set_stats_aggregator(stats_aggregator, "record_stats"))
    iterator = dataset.make_initializable_iterator()
    next_element = iterator.get_next()
    summary_t = stats_aggregator.get_summary()

    with self.test_session() as sess:
      sess.run(iterator.initializer)
      for _ in range(total_records // batch_size + 1 if total_records %
                     batch_size else total_records // batch_size):
        sess.run(next_element)

      with self.assertRaises(errors.OutOfRangeError):
        sess.run(next_element)
      self._assertSummaryHasCount(
          sess.run(summary_t), "record_stats_features", total_records)
      self._assertSummaryHasCount(
          sess.run(summary_t), "record_stats_feature-values", total_records)
      self._assertSummaryHasSum(
          sess.run(summary_t), "record_stats_features", total_records * 4)
      self._assertSummaryHasSum(
          sess.run(summary_t), "record_stats_feature-values",
          self._sum_keywords(1) * num_epochs + 3 * total_records)
    def testFilteredElementsStats(self):
        stats_aggregator = stats_ops.StatsAggregator()
        dataset = dataset_ops.Dataset.range(101).filter(
            lambda x: math_ops.equal(math_ops.mod(x, 3), 0)).apply(
                stats_ops.set_stats_aggregator(stats_aggregator))
        iterator = dataset.make_initializable_iterator()
        next_element = iterator.get_next()
        summary_t = stats_aggregator.get_summary()

        with self.test_session() as sess:
            sess.run(iterator.initializer)
            for i in range(34):
                self.assertEqual(i * 3, sess.run(next_element))
                if i is not 0:
                    self._assertSummaryHasScalarValue(
                        sess.run(summary_t), "Filter::dropped_elements",
                        float(i * 2))
                self._assertSummaryHasScalarValue(sess.run(summary_t),
                                                  "Filter::filtered_elements",
                                                  float(i + 1))
            with self.assertRaises(errors.OutOfRangeError):
                sess.run(next_element)
            self._assertSummaryHasScalarValue(sess.run(summary_t),
                                              "Filter::dropped_elements", 67.0)
            self._assertSummaryHasScalarValue(sess.run(summary_t),
                                              "Filter::filtered_elements",
                                              34.0)
Beispiel #6
0
    def _testParallelCallsStats(self,
                                dataset_fn,
                                dataset_name,
                                num_output,
                                function_processing_time=False,
                                check_elements=True):
        stats_aggregator = stats_ops.StatsAggregator()
        dataset = dataset_fn().apply(
            stats_ops.set_stats_aggregator(stats_aggregator))
        iterator = dataset.make_initializable_iterator()
        next_element = iterator.get_next()
        summary_t = stats_aggregator.get_summary()

        with self.cached_session() as sess:
            sess.run(iterator.initializer)
            for i in range(num_output):
                next_ = sess.run(next_element)
                if check_elements:
                    self.assertAllEqual(np.array([i] * i, dtype=np.int64),
                                        next_)
                summary_str = sess.run(summary_t)
                if function_processing_time:
                    self._assertSummaryHasCountMoreOrEqualGeneralisedTag(
                        summary_str, "::execution_time", float(i + 1))
                self._assertSummaryContains(
                    summary_str, dataset_name + "::num_parallel_calls")
                self._assertSummaryContains(
                    summary_str, dataset_name + "::active_parallel_calls")
            with self.assertRaises(errors.OutOfRangeError):
                sess.run(next_element)
            if function_processing_time:
                summary_str = sess.run(summary_t)
                self._assertSummaryHasCountMoreOrEqualGeneralisedTag(
                    summary_str, "::execution_time", float(num_output))
Beispiel #7
0
    def testLatencyStatsOptimization(self):
        stats_aggregator = stats_ops.StatsAggregator()
        dataset = dataset_ops.Dataset.from_tensors(1).apply(
            optimization.assert_next([
                "LatencyStats", "Map", "LatencyStats", "Prefetch",
                "LatencyStats"
            ])).map(lambda x: x * x).prefetch(1).apply(
                stats_ops.set_stats_aggregator(stats_aggregator))
        options = dataset_ops.Options()
        options.experimental_latency_all_edges = True
        dataset = dataset.with_options(options)
        iterator = dataset.make_initializable_iterator()
        get_next = iterator.get_next()
        summary_t = stats_aggregator.get_summary()

        with self.cached_session() as sess:
            sess.run(iterator.initializer)
            self.assertEqual(1 * 1, sess.run(get_next))
            with self.assertRaises(errors.OutOfRangeError):
                sess.run(get_next)
            summary_str = sess.run(summary_t)
            self._assertSummaryHasCount(summary_str,
                                        "record_latency_TensorDataset/_1", 1)
            self._assertSummaryHasCount(summary_str,
                                        "record_latency_MapDataset/_4", 1)
            self._assertSummaryHasCount(summary_str,
                                        "record_latency_PrefetchDataset/_6", 1)
Beispiel #8
0
  def testLatencyStats(self):
    stats_aggregator = stats_ops.StatsAggregator()
    dataset = dataset_ops.Dataset.range(100).apply(
        stats_ops.latency_stats("record_latency")).apply(
            stats_ops.set_stats_aggregator(stats_aggregator))
    iterator = dataset.make_initializable_iterator()
    next_element = iterator.get_next()
    summary_t = stats_aggregator.get_summary()

    with self.cached_session() as sess:
      sess.run(iterator.initializer)
      for i in range(100):
        self.assertEqual(i, sess.run(next_element))
        self._assertSummaryHasCount(
            sess.run(summary_t), "record_latency", float(i + 1))
      with self.assertRaises(errors.OutOfRangeError):
        sess.run(next_element)
      self._assertSummaryHasCount(sess.run(summary_t), "record_latency", 100.0)
Beispiel #9
0
 def testLatencyStatsOptimization(self):
   stats_aggregator = stats_ops.StatsAggregator()
   dataset = dataset_ops.Dataset.from_tensors(1).apply(
       optimization.assert_next(
           ["LatencyStats", "Map", "LatencyStats", "Prefetch",
            "LatencyStats"])).map(lambda x: x * x).prefetch(1).apply(
                stats_ops.set_stats_aggregator(stats_aggregator))
   options = dataset_ops.Options()
   options.experimental_optimization.apply_default_optimizations = False
   options.experimental_stats.latency_all_edges = True
   options.experimental_stats.aggregator = aggregator
   dataset = dataset.with_options(options)
   self.assertDatasetProduces(
       dataset,
       expected_output=[1],
       requires_initialization=True,
       num_test_iterations=1)
   handle = self.getHandle(aggregator)
   self.assertStatisticsHasCount(
       handle, self.regexForNodeName("record_latency::TensorDataset"), 1)
   self.assertStatisticsHasCount(
       handle, self.regexForNodeName("record_latency::MapDataset"), 1)
   self.assertStatisticsHasCount(
       handle, self.regexForNodeName("record_latency::PrefetchDataset"), 1)
Beispiel #10
0
 def _build_dataset_stats_aggregator(self):
     stats_aggregator = stats_ops.StatsAggregator()
     return dataset_ops.Dataset.range(10).apply(
         stats_ops.set_stats_aggregator(stats_aggregator))