def testFilteredElementsStats(self):
        aggregator = stats_aggregator.StatsAggregator()
        dataset = dataset_ops.Dataset.range(101).filter(
            lambda x: math_ops.equal(math_ops.mod(x, 3), 0))
        dataset = self.datasetExperimentalStats(dataset, aggregator)
        next_element = self.getNext(dataset, requires_initialization=True)

        for i in range(34):
            self.assertEqual(i * 3, self.evaluate(next_element()))
            handle = self.getHandle(aggregator)
            if i != 0:
                self.assertStatisticsHasScalarValue(
                    handle,
                    self.regexForNodeName("FilterDataset", "dropped_elements"),
                    float(i * 2))
            self.assertStatisticsHasScalarValue(
                handle,
                self.regexForNodeName("FilterDataset", "filtered_elements"),
                float(i + 1))
        with self.assertRaises(errors.OutOfRangeError):
            self.evaluate(next_element())
        handle = self.getHandle(aggregator)
        self.assertStatisticsHasScalarValue(
            handle, self.regexForNodeName("FilterDataset", "dropped_elements"),
            67.0)
        self.assertStatisticsHasScalarValue(
            handle, self.regexForNodeName("FilterDataset",
                                          "filtered_elements"), 34.0)
Exemple #2
0
    def _benchmark(self,
                   interleave_version,
                   num_elements,
                   initial_delay_us=0,
                   remainder_delay_us=0,
                   cycle_length=10,
                   iters=100,
                   num_parallel_calls=None,
                   attach_stats_aggregator=False,
                   name=None):
        ds = self.make_dataset(interleave_version, initial_delay_us,
                               remainder_delay_us, cycle_length,
                               num_parallel_calls)
        if attach_stats_aggregator:
            aggregator = stats_aggregator.StatsAggregator()
            opts = dataset_ops.Options()
            opts.experimental_stats.aggregator = aggregator
            ds = ds.with_options(opts)

        ds = ds.skip(num_elements)
        deltas = []
        for _ in range(iters):
            start = time.time()
            next(iter(ds))
            deltas.append(time.time() - start)
        self.report_benchmark(iters=iters,
                              wall_time=np.median(deltas),
                              name=name)
    def testMultipleDatasetWithPrefixes(self, dataset_transformation):
        aggregator = stats_aggregator.StatsAggregator()
        dataset = dataset_ops.Dataset.range(100).apply(
            stats_ops.latency_stats("record_latency"))
        dataset = dataset_transformation(dataset,
                                         aggregator,
                                         prefix="dataset1")
        dataset2 = dataset_ops.Dataset.range(100).apply(
            stats_ops.latency_stats("record_latency"))
        dataset2 = dataset_transformation(dataset2,
                                          aggregator,
                                          prefix="dataset2")
        next_element1 = self.getNext(dataset, requires_initialization=True)
        next_element2 = self.getNext(dataset2, requires_initialization=True)

        for i in range(100):
            self.assertEqual(i * 2,
                             self.evaluate(next_element1() + next_element2()))
            self._assertSummaryHasCount(
                self.evaluate(aggregator.get_summary()),
                "dataset1_record_latency", float(i + 1))
            self._assertSummaryHasCount(
                self.evaluate(aggregator.get_summary()),
                "dataset2_record_latency", float(i + 1))
        with self.assertRaises(errors.OutOfRangeError):
            self.evaluate(next_element1())
        with self.assertRaises(errors.OutOfRangeError):
            self.evaluate(next_element2())
        self._assertSummaryHasCount(self.evaluate(aggregator.get_summary()),
                                    "dataset1_record_latency", 100.0)
        self._assertSummaryHasCount(self.evaluate(aggregator.get_summary()),
                                    "dataset2_record_latency", 100.0)
 def testPrefetchBufferUtilization(self, dataset_transformation):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_ops.Dataset.range(100).map(lambda x: array_ops.tile(
         [x], ops.convert_to_tensor([x]))).prefetch(-1)
     dataset = dataset_transformation(dataset, aggregator)
     next_element = self.getNext(dataset, requires_initialization=True)
     for i in range(100):
         self.assertAllEqual(np.array([i] * i, dtype=np.int64),
                             self.evaluate(next_element()))
         summary_str = self.evaluate(aggregator.get_summary())
         self._assertSummaryHasCount(
             summary_str,
             self.regexForNodeName("PrefetchDataset", "buffer_utilization"),
             float(i + 1))
         self._assertSummaryContains(
             summary_str,
             self.regexForNodeName("PrefetchDataset", "buffer_capacity"))
         self._assertSummaryContains(
             summary_str,
             self.regexForNodeName("PrefetchDataset", "buffer_size"))
         self._assertSummaryHasRange(
             summary_str,
             self.regexForNodeName("PrefetchDataset", "buffer_utilization"),
             0, 1)
     with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element())
     summary_str = self.evaluate(aggregator.get_summary())
     self._assertSummaryHasCount(
         summary_str,
         self.regexForNodeName("PrefetchDataset", "buffer_utilization"),
         100)
    def testFilteredElementsStats(self, dataset_transformation):
        aggregator = stats_aggregator.StatsAggregator()
        dataset = dataset_ops.Dataset.range(101).filter(
            lambda x: math_ops.equal(math_ops.mod(x, 3), 0))
        dataset = dataset_transformation(dataset, aggregator)
        next_element = self.getNext(dataset, requires_initialization=True)

        for i in range(34):
            self.assertEqual(i * 3, self.evaluate(next_element()))
            summary_str = self.evaluate(aggregator.get_summary())
            if i is not 0:
                self._assertSummaryHasScalarValue(
                    summary_str,
                    self.regexForNodeName("FilterDataset", "dropped_elements"),
                    float(i * 2))
            self._assertSummaryHasScalarValue(
                summary_str,
                self.regexForNodeName("FilterDataset", "filtered_elements"),
                float(i + 1))
        with self.assertRaises(errors.OutOfRangeError):
            self.evaluate(next_element())
        summary_str = self.evaluate(aggregator.get_summary())
        self._assertSummaryHasScalarValue(
            summary_str,
            self.regexForNodeName("FilterDataset", "dropped_elements"), 67.0)
        self._assertSummaryHasScalarValue(
            summary_str,
            self.regexForNodeName("FilterDataset", "filtered_elements"), 34.0)
Exemple #6
0
    def testLatencyStatsOptimizationV2(self):
        aggregator = stats_aggregator.StatsAggregator()
        dataset = dataset_ops.Dataset.from_tensors(1).apply(
            optimization.assert_next([
                "LatencyStats", "Map", "LatencyStats", "Prefetch",
                "LatencyStats"
            ])).map(lambda x: x * x).prefetch(1)
        options = dataset_ops.Options()
        options.experimental_stats = stats_options.StatsOptions(aggregator)
        dataset = dataset.with_options(options)
        iterator = dataset.make_initializable_iterator()
        get_next = iterator.get_next()
        summary_t = aggregator.get_summary()

        with self.cached_session() as sess:
            sess.run(iterator.initializer)
            self.assertEqual(1, sess.run(get_next))
            with self.assertRaises(errors.OutOfRangeError):
                sess.run(get_next)
            summary_str = sess.run(summary_t)
            self._assertSummaryHasCount(summary_str,
                                        "record_latency_TensorDataset/_1", 1)
            self._assertSummaryHasCount(summary_str,
                                        "record_latency_MapDataset/_4", 1)
            self._assertSummaryHasCount(summary_str,
                                        "record_latency_PrefetchDataset/_6", 1)
    def _benchmark(self,
                   interleave_version,
                   num_elements,
                   initial_delay_us=0,
                   remainder_delay_us=0,
                   cycle_length=10,
                   iters=100,
                   num_parallel_calls=None,
                   attach_stats_aggregator=False,
                   name=None):
        dataset = self.make_dataset(interleave_version=interleave_version,
                                    initial_delay=initial_delay_us,
                                    remainder_delay=remainder_delay_us,
                                    cycle_length=cycle_length,
                                    num_parallel_calls=num_parallel_calls)
        if attach_stats_aggregator:
            aggregator = stats_aggregator.StatsAggregator()
            opts = dataset_ops.Options()
            opts.experimental_stats.aggregator = aggregator
            dataset = dataset.with_options(opts)

        self.run_and_report_benchmark(dataset=dataset,
                                      num_elements=num_elements,
                                      iters=iters,
                                      warmup=True,
                                      name=name)
Exemple #8
0
    def testBytesProduced(self, dataset_transformation):
        aggregator = stats_aggregator.StatsAggregator()
        dataset = dataset_ops.Dataset.range(100).map(
            lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).apply(
                stats_ops.bytes_produced_stats("bytes_produced"))
        dataset = dataset_transformation(dataset, aggregator)
        next_element = self.getNext(dataset, requires_initialization=True)
        summary_t = aggregator.get_summary()

        expected_sum = 0.0
        for i in range(100):
            self.assertAllEqual(np.array([i] * i, dtype=np.int64),
                                self.evaluate(next_element()))
            summary_str = self.evaluate(aggregator.get_summary())
            self._assertSummaryHasCount(summary_str, "bytes_produced",
                                        float(i + 1))
            expected_sum += i * 8.0
            self._assertSummaryHasSum(summary_str, "bytes_produced",
                                      expected_sum)
        with self.assertRaises(errors.OutOfRangeError):
            self.evaluate(next_element())
        # TODO(shivaniagrawal): ntentional breaking case
        summary_str = self.evaluate(summary_t)
        self._assertSummaryHasCount(summary_str, "bytes_produced", 100.0)
        self._assertSummaryHasSum(summary_str, "bytes_produced", expected_sum)
    def testMultiplePrefetchStats(self, dataset_transformation):

        aggregator = stats_aggregator.StatsAggregator()
        dataset = dataset_ops.Dataset.range(10).prefetch(2).map(
            lambda x: math_ops.add(x, 2)).prefetch(1)

        dataset = dataset_transformation(dataset, aggregator)
        next_element = self.getNext(dataset, requires_initialization=True)

        for i in range(10):
            self.assertEqual(i + 2, self.evaluate(next_element()))
            summary_str = self.evaluate(aggregator.get_summary())
            # TODO(shivaniagarwal): using exact name of prefetch node than the regex,
            # to differentiate between two prefetch. This might break in future, at
            # which point, it would be best to disable this test.
            self._assertSummaryHasScalarValue(
                summary_str, "PrefetchDataset/_5::buffer_capacity", 2)
            self._assertSummaryContains(summary_str,
                                        "PrefetchDataset/_5::buffer_size")
            self._assertSummaryHasScalarValue(
                summary_str, "PrefetchDataset/_8::buffer_capacity", 1)
            self._assertSummaryContains(summary_str,
                                        "PrefetchDataset/_8::buffer_size")
        with self.assertRaises(errors.OutOfRangeError):
            self.evaluate(next_element())
    def _testParallelCallsStats(self,
                                dataset_fn,
                                dataset_name,
                                num_output,
                                dataset_transformation,
                                function_processing_time=False,
                                check_elements=True):
        aggregator = stats_aggregator.StatsAggregator()
        dataset = dataset_fn()
        dataset = dataset_transformation(dataset, aggregator)
        iterator = dataset_ops.make_initializable_iterator(dataset)
        next_element = iterator.get_next()
        summary_t = aggregator.get_summary()

        with self.cached_session() as sess:
            sess.run(iterator.initializer)
            for i in range(num_output):
                next_ = sess.run(next_element)
                if check_elements:
                    self.assertAllEqual(np.array([i] * i, dtype=np.int64),
                                        next_)
                summary_str = sess.run(summary_t)
                if function_processing_time:
                    self._assertSummaryHasCountMoreOrEqualGeneralisedTag(
                        summary_str, "::execution_time", float(i + 1))
                self._assertSummaryContains(
                    summary_str, dataset_name + "::num_parallel_calls")
                self._assertSummaryContains(
                    summary_str, dataset_name + "::active_parallel_calls")
            with self.assertRaises(errors.OutOfRangeError):
                sess.run(next_element)
            if function_processing_time:
                summary_str = sess.run(summary_t)
                self._assertSummaryHasCountMoreOrEqualGeneralisedTag(
                    summary_str, "::execution_time", float(num_output))
    def testMultipleTags(self):
        aggregator = stats_aggregator.StatsAggregator()
        dataset = dataset_ops.Dataset.range(100).apply(
            stats_ops.latency_stats("record_latency")).apply(
                stats_ops.latency_stats("record_latency_2"))
        dataset = self.datasetExperimentalStats(dataset, aggregator)

        next_element = self.getNext(dataset, requires_initialization=True)

        for i in range(100):
            self.assertEqual(i, self.evaluate(next_element()))
            handle = self.getHandle(aggregator)
            self.assertStatisticsHasCount(handle,
                                          "record_latency",
                                          float(i + 1),
                                          2 * i + 3,
                                          offset=1)
            self.assertStatisticsHasCount(handle, "record_latency_2",
                                          float(i + 1), 2 * i + 3)
        with self.assertRaises(errors.OutOfRangeError):
            self.evaluate(next_element())
        handle = self.getHandle(aggregator)
        self.assertStatisticsHasCount(handle,
                                      "record_latency",
                                      100.0,
                                      201,
                                      offset=1)
        self.assertStatisticsHasCount(handle, "record_latency_2", 100.0, 201)
    def testPrefetchBufferUtilization(self, dataset_transformation):
        aggregator = stats_aggregator.StatsAggregator()
        dataset = dataset_ops.Dataset.range(100).map(lambda x: array_ops.tile(
            [x], ops.convert_to_tensor([x]))).prefetch(-1)
        dataset = dataset_transformation(dataset, aggregator)
        iterator = dataset.make_initializable_iterator()
        next_element = iterator.get_next()
        summary_t = aggregator.get_summary()

        with self.cached_session() as sess:
            self.evaluate(iterator.initializer)
            for i in range(100):
                self.assertAllEqual(np.array([i] * i, dtype=np.int64),
                                    self.evaluate(next_element))
                summary_str = self.evaluate(summary_t)
                self._assertSummaryHasCount(summary_str,
                                            "Prefetch::buffer_utilization",
                                            float(i + 1))
                self._assertSummaryContains(summary_str,
                                            "Prefetch::buffer_capacity")
                self._assertSummaryContains(summary_str,
                                            "Prefetch::buffer_size")
                self._assertSummaryHasRange(summary_str,
                                            "Prefetch::buffer_utilization", 0,
                                            1)
            with self.assertRaises(errors.OutOfRangeError):
                self.evaluate(next_element)
            summary_str = self.evaluate(summary_t)
            self._assertSummaryHasCount(summary_str,
                                        "Prefetch::buffer_utilization", 100)
    def testMultipleDatasetWithPrefixes(self, dataset_transformation):
        aggregator = stats_aggregator.StatsAggregator()
        dataset = dataset_ops.Dataset.range(100).apply(
            stats_ops.latency_stats("record_latency"))
        dataset = dataset_transformation(dataset,
                                         aggregator,
                                         prefix="dataset1")
        dataset2 = dataset_ops.Dataset.range(100).apply(
            stats_ops.latency_stats("record_latency"))
        dataset2 = dataset_transformation(dataset2,
                                          aggregator,
                                          prefix="dataset2")
        iterator_0 = dataset.make_initializable_iterator()
        iterator_1 = dataset2.make_initializable_iterator()
        next_element = iterator_0.get_next() + iterator_1.get_next()
        summary_t = aggregator.get_summary()

        with self.test_session() as sess:
            self.evaluate([iterator_0.initializer, iterator_1.initializer])
            for i in range(100):
                self.assertEqual(i * 2, self.evaluate(next_element))
                self._assertSummaryHasCount(self.evaluate(summary_t),
                                            "dataset1_record_latency",
                                            float(i + 1))
                self._assertSummaryHasCount(self.evaluate(summary_t),
                                            "dataset2_record_latency",
                                            float(i + 1))
            with self.assertRaises(errors.OutOfRangeError):
                self.evaluate(next_element)
            self._assertSummaryHasCount(self.evaluate(summary_t),
                                        "dataset1_record_latency", 100.0)
            self._assertSummaryHasCount(self.evaluate(summary_t),
                                        "dataset2_record_latency", 100.0)
    def testMultiplePrefetchStats(self):

        aggregator = stats_aggregator.StatsAggregator()
        dataset = dataset_ops.Dataset.range(10).prefetch(2).filter(
            lambda x: math_ops.equal(math_ops.mod(x, 2), 0)).prefetch(1)

        dataset = self.datasetExperimentalStats(dataset, aggregator)
        next_element = self.getNext(dataset, requires_initialization=True)

        for i in range(5):
            self.assertEqual(i * 2, self.evaluate(next_element()))
            handle = self.getHandle(aggregator)
            # TODO(shivaniagarwal): using exact name of prefetch node than the regex,
            # to differentiate between two prefetch. This might break in future, at
            # which point, it would be best to disable this test.
            self.assertStatisticsHasScalarValue(
                handle, "PrefetchDataset/_5::buffer_capacity", 2)
            self.assertStatisticsContains(handle,
                                          "PrefetchDataset/_5::buffer_size")
            self.assertStatisticsHasScalarValue(
                handle, "PrefetchDataset/_8::buffer_capacity", 1)
            self.assertStatisticsContains(handle,
                                          "PrefetchDataset/_8::buffer_size")
        with self.assertRaises(errors.OutOfRangeError):
            self.evaluate(next_element())
    def testBytesProduced(self, dataset_transformation):
        aggregator = stats_aggregator.StatsAggregator()
        dataset = dataset_ops.Dataset.range(100).map(
            lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).apply(
                stats_ops.bytes_produced_stats("bytes_produced"))
        dataset = dataset_transformation(dataset, aggregator)
        iterator = dataset.make_initializable_iterator()
        next_element = iterator.get_next()
        summary_t = aggregator.get_summary()

        with self.cached_session() as sess:
            self.evaluate(iterator.initializer)
            expected_sum = 0.0
            for i in range(100):
                self.assertAllEqual(np.array([i] * i, dtype=np.int64),
                                    self.evaluate(next_element))
                summary_str = self.evaluate(summary_t)
                self._assertSummaryHasCount(summary_str, "bytes_produced",
                                            float(i + 1))
                expected_sum += i * 8.0
                self._assertSummaryHasSum(summary_str, "bytes_produced",
                                          expected_sum)
            with self.assertRaises(errors.OutOfRangeError):
                self.evaluate(next_element)
            summary_str = self.evaluate(summary_t)
            self._assertSummaryHasCount(summary_str, "bytes_produced", 100.0)
            self._assertSummaryHasSum(summary_str, "bytes_produced",
                                      expected_sum)
Exemple #16
0
 def testLatencyStatsOptimizationAutotuneOn(self):
   aggregator = stats_aggregator.StatsAggregator()
   dataset = dataset_ops.Dataset.from_tensors(1).apply(
       testing.assert_next([
           "LatencyStats", "Map", "LatencyStats", "Prefetch", "LatencyStats",
           "MaxIntraOpParallelism", "LatencyStats", "Model",
           "SetStatsAggregator"
       ])).map(lambda x: x * x).prefetch(1)
   options = dataset_ops.Options()
   options.experimental_optimization.apply_default_optimizations = False
   options.experimental_stats.latency_all_edges = True
   options.experimental_stats.aggregator = aggregator
   dataset = dataset.with_options(options)
   self.assertDatasetProduces(
       dataset,
       expected_output=[1],
       requires_initialization=True,
       num_test_iterations=1)
   handle = self.getHandle(aggregator)
   self.assertStatisticsHasCount(
       handle, self.regexForNodeName("record_latency::TensorDataset"), 1)
   self.assertStatisticsHasCount(
       handle, self.regexForNodeName("record_latency::MapDataset"), 1)
   self.assertStatisticsHasCount(
       handle, self.regexForNodeName("record_latency::PrefetchDataset"), 1)
    def parallelCallsStats(self,
                           dataset_fn,
                           dataset_names,
                           num_output,
                           function_processing_time=False,
                           check_elements=True):
        aggregator = stats_aggregator.StatsAggregator()
        dataset = dataset_fn()
        dataset = self.datasetExperimentalStats(dataset, aggregator)
        next_element = self.getNext(dataset, requires_initialization=True)

        for i in range(num_output):
            value = self.evaluate(next_element())
            if check_elements:
                self.assertAllEqual(np.array([i] * i, dtype=np.int64), value)
            handle = self.getHandle(aggregator)
            for dataset_name in dataset_names:
                if function_processing_time:
                    self.assertStatisticsHasCount(handle,
                                                  r"(.*)::execution_time$",
                                                  float(i + 1),
                                                  greater_than=True)
        with self.assertRaises(errors.OutOfRangeError):
            self.evaluate(next_element())
        for dataset_name in dataset_names:
            self.assertStatisticsContains(
                handle,
                self.regexForNodeName(dataset_name, "thread_utilization"))
        if function_processing_time:
            handle = self.getHandle(aggregator)
            for dataset_name in dataset_names:
                self.assertStatisticsHasCount(handle,
                                              r"(.*)::execution_time$",
                                              float(num_output),
                                              greater_than=True)
Exemple #18
0
  def _testParallelCallsStats(self,
                              dataset_fn,
                              dataset_name,
                              num_output,
                              dataset_transformation,
                              function_processing_time=False,
                              check_elements=True):
    aggregator = stats_aggregator.StatsAggregator()
    dataset = dataset_fn()
    dataset = dataset_transformation(dataset, aggregator)
    next_element = self.getNext(dataset, requires_initialization=True)

    for i in range(num_output):
      next_ = self.evaluate(next_element())
      if check_elements:
        self.assertAllEqual(np.array([i] * i, dtype=np.int64), next_)
      summary_str = self.evaluate(aggregator.get_summary())
      if function_processing_time:
        self._assertSummaryHasCountMoreOrEqualGeneralisedTag(
            summary_str, "::execution_time", float(i + 1))
      self._assertSummaryContains(summary_str,
                                  dataset_name + "::thread_utilization")
    with self.assertRaises(errors.OutOfRangeError):
      self.evaluate(next_element())
    if function_processing_time:
      summary_str = self.evaluate(aggregator.get_summary())
      self._assertSummaryHasCountMoreOrEqualGeneralisedTag(
          summary_str, "::execution_time", float(num_output))
    def testFilteredElementsStats(self, dataset_transformation):
        aggregator = stats_aggregator.StatsAggregator()
        dataset = dataset_ops.Dataset.range(101).filter(
            lambda x: math_ops.equal(math_ops.mod(x, 3), 0))
        dataset = dataset_transformation(dataset, aggregator)
        iterator = dataset.make_initializable_iterator()
        next_element = iterator.get_next()
        summary_t = aggregator.get_summary()

        with self.test_session() as sess:
            self.evaluate(iterator.initializer)
            for i in range(34):
                self.assertEqual(i * 3, self.evaluate(next_element))
                if i is not 0:
                    self._assertSummaryHasScalarValue(
                        self.evaluate(summary_t), "Filter::dropped_elements",
                        float(i * 2))
                self._assertSummaryHasScalarValue(self.evaluate(summary_t),
                                                  "Filter::filtered_elements",
                                                  float(i + 1))
            with self.assertRaises(errors.OutOfRangeError):
                self.evaluate(next_element)
            self._assertSummaryHasScalarValue(self.evaluate(summary_t),
                                              "Filter::dropped_elements", 67.0)
            self._assertSummaryHasScalarValue(self.evaluate(summary_t),
                                              "Filter::filtered_elements",
                                              34.0)
    def testFeaturesStats(self, dataset_transformation):
        num_epochs = 5
        total_records = num_epochs * self._num_records
        batch_size = 2
        aggregator = stats_aggregator.StatsAggregator()

        def dataset_fn():
            return self.make_batch_feature(filenames=self.test_filenames[0],
                                           num_epochs=num_epochs,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           shuffle_seed=5,
                                           drop_final_batch=False)

        num_output = total_records // batch_size
        if total_records % batch_size:
            num_output = total_records // batch_size + 1

        self._testParallelCallsStats(
            dataset_fn,
            {self.regexForNodeName("ExperimentalParseExampleDataset")},
            num_output,
            dataset_transformation,
            check_elements=False)

        dataset = dataset_transformation(dataset_fn(),
                                         aggregator,
                                         prefix="record_stats")

        next_element = self.getNext(dataset, requires_initialization=True)

        for _ in range(num_output):
            self.evaluate(next_element())

        with self.assertRaises(errors.OutOfRangeError):
            self.evaluate(next_element())
        self._assertSummaryHasCount(
            self.evaluate(aggregator.get_summary()),
            self.regexForNodeName(
                "record_stats_ExperimentalParseExampleDataset",
                "features_count"), total_records)
        self._assertSummaryHasCount(
            self.evaluate(aggregator.get_summary()),
            self.regexForNodeName(
                "record_stats_ExperimentalParseExampleDataset",
                "feature_values_count"), total_records)
        self._assertSummaryHasSum(
            self.evaluate(aggregator.get_summary()),
            self.regexForNodeName(
                "record_stats_ExperimentalParseExampleDataset",
                "features_count"), total_records * 4)
        self._assertSummaryHasSum(
            self.evaluate(aggregator.get_summary()),
            self.regexForNodeName(
                "record_stats_ExperimentalParseExampleDataset",
                "feature_values_count"),
            self._sum_keywords(1) * num_epochs + 3 * total_records)
Exemple #21
0
 def benchmark_stats(self):
     dataset = dataset_ops.Dataset.range(1).repeat()
     dataset = dataset.apply(batching.map_and_batch(lambda x: x + 1, 1),
                             num_parallel_calls=32)
     aggregator = stats_aggregator.StatsAggregator()
     options = dataset_ops.Options()
     options.experimental_stats.aggregator = aggregator
     dataset = dataset.with_options(options)
     self.run_and_report_benchmark(dataset, num_elements=1000, name="stats")
 def benchmark_stats(self):
   for stats in [True, False]:
     dataset = dataset_ops.Dataset.range(1000).repeat()
     dataset = dataset.map(lambda x: x + 1, num_parallel_calls=32)
     options = dataset_ops.Options()
     options.experimental_deterministic = False
     if stats:
       aggregator = stats_aggregator.StatsAggregator()
       options.experimental_stats.aggregator = aggregator
     dataset = dataset.with_options(options)
     self.run_and_report_benchmark(
         dataset, num_elements=10000, name="stats_%s" % stats)
    def DISABLED_testFeaturesStats(self):
        num_epochs = 5
        total_records = num_epochs * self._num_records
        batch_size = 2

        def dataset_fn():
            return self.make_batch_feature(filenames=self._filenames[0],
                                           num_epochs=num_epochs,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           shuffle_seed=5,
                                           drop_final_batch=False)

        num_output = total_records // batch_size
        if total_records % batch_size:
            num_output = total_records // batch_size + 1

        self.parallelCallsStats(dataset_fn, {"ParseExampleDatasetV2"},
                                num_output,
                                check_elements=False)

        aggregator = stats_aggregator.StatsAggregator()
        dataset = self.datasetExperimentalStats(dataset_fn(),
                                                aggregator,
                                                prefix="record_stats")

        next_element = self.getNext(dataset, requires_initialization=True)

        for _ in range(num_output):
            self.evaluate(next_element())

        with self.assertRaises(errors.OutOfRangeError):
            self.evaluate(next_element())
        handle = self.getHandle(aggregator)
        self.assertStatisticsHasCount(
            handle,
            self.regexForNodeName("record_stats::ParseExampleDatasetV2",
                                  "features_count"), total_records)
        self.assertStatisticsHasCount(
            handle,
            self.regexForNodeName("record_stats::ParseExampleDatasetV2",
                                  "feature_values_count"), total_records)
        self.assertStatisticsHasSum(
            handle,
            self.regexForNodeName("record_stats::ParseExampleDatasetV2",
                                  "features_count"), total_records * 4)
        self.assertStatisticsHasSum(
            handle,
            self.regexForNodeName("record_stats::ParseExampleDatasetV2",
                                  "feature_values_count"),
            self._sum_keywords(1) * num_epochs + 3 * total_records)
    def testFeaturesStats(self, dataset_transformation):
        num_epochs = 5
        total_records = num_epochs * self._num_records
        batch_size = 2
        aggregator = stats_aggregator.StatsAggregator()

        def dataset_fn():
            return self.make_batch_feature(filenames=self.test_filenames[0],
                                           num_epochs=num_epochs,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           shuffle_seed=5,
                                           drop_final_batch=False)

        num_output = total_records // batch_size
        if total_records % batch_size:
            num_output = total_records // batch_size + 1

        self._testParallelCallsStats(dataset_fn,
                                     "ParseExample",
                                     num_output,
                                     dataset_transformation,
                                     check_elements=False)

        dataset = dataset_transformation(dataset_fn(),
                                         aggregator,
                                         prefix="record_stats")
        iterator = dataset.make_initializable_iterator()
        next_element = iterator.get_next()
        summary_t = aggregator.get_summary()

        with self.test_session() as sess:
            self.evaluate(iterator.initializer)
            for _ in range(num_output):
                sess.run(next_element)

            with self.assertRaises(errors.OutOfRangeError):
                sess.run(next_element)
            self._assertSummaryHasCount(sess.run(summary_t),
                                        "record_stats_features", total_records)
            self._assertSummaryHasCount(sess.run(summary_t),
                                        "record_stats_feature-values",
                                        total_records)
            self._assertSummaryHasSum(sess.run(summary_t),
                                      "record_stats_features",
                                      total_records * 4)
            self._assertSummaryHasSum(
                sess.run(summary_t), "record_stats_feature-values",
                self._sum_keywords(1) * num_epochs + 3 * total_records)
Exemple #25
0
  def testLatencyStats(self, dataset_transformation):
    aggregator = stats_aggregator.StatsAggregator()
    dataset = dataset_ops.Dataset.range(100).apply(
        stats_ops.latency_stats("record_latency"))
    dataset = dataset_transformation(dataset, aggregator)
    next_element = self.getNext(dataset, requires_initialization=True)

    for i in range(100):
      self.assertEqual(i, self.evaluate(next_element()))
      self._assertSummaryHasCount(
          self.evaluate(aggregator.get_summary()), "record_latency",
          float(i + 1))
    with self.assertRaises(errors.OutOfRangeError):
      self.evaluate(next_element())
    self._assertSummaryHasCount(
        self.evaluate(aggregator.get_summary()), "record_latency", 100.0)
Exemple #26
0
  def testPrefetchBufferScalars(self, dataset_transformation):
    aggregator = stats_aggregator.StatsAggregator()
    dataset = dataset_ops.Dataset.range(10).map(
        lambda x: array_ops.tile([x], ops.convert_to_tensor([x]))).prefetch(1)
    dataset = dataset_transformation(dataset, aggregator)
    next_element = self.getNext(dataset, requires_initialization=True)

    for i in range(10):
      self.assertAllEqual(
          np.array([i] * i, dtype=np.int64), self.evaluate(next_element()))
      summary_str = self.evaluate(aggregator.get_summary())
      self._assertSummaryHasScalarValue(summary_str,
                                        "Prefetch::buffer_capacity", 1)
      self._assertSummaryHasScalarValue(summary_str, "Prefetch::buffer_size", 1)
    with self.assertRaises(errors.OutOfRangeError):
      self.evaluate(next_element())
 def testPrefetchBufferUtilization(self):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_ops.Dataset.range(100).map(lambda x: array_ops.tile(
         [x], ops.convert_to_tensor([x]))).prefetch(-1)
     dataset = self.datasetExperimentalStats(dataset, aggregator)
     next_element = self.getNext(dataset, requires_initialization=True)
     for i in range(100):
         self.assertAllEqual(np.array([i] * i, dtype=np.int64),
                             self.evaluate(next_element()))
         handle = self.getHandle(aggregator)
         self.assertStatisticsHasCount(handle,
                                       self.regexForNodeName(
                                           "PrefetchDataset",
                                           "buffer_utilization"),
                                       float(i + 1),
                                       3 * i + 4,
                                       offset=2)
         self.assertStatisticsContains(
             handle,
             self.regexForNodeName("PrefetchDataset", "buffer_capacity"),
             3 * i + 4)
         self.assertStatisticsContains(handle,
                                       self.regexForNodeName(
                                           "PrefetchDataset",
                                           "buffer_size"),
                                       3 * i + 4,
                                       offset=1)
         self.assertStatisticsHasRange(handle,
                                       self.regexForNodeName(
                                           "PrefetchDataset",
                                           "buffer_utilization"),
                                       0,
                                       1,
                                       3 * i + 4,
                                       offset=2)
     with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element())
     handle = self.getHandle(aggregator)
     self.assertStatisticsHasCount(handle,
                                   self.regexForNodeName(
                                       "PrefetchDataset",
                                       "buffer_utilization"),
                                   100,
                                   301,
                                   offset=2)
    def testReinitialize(self):
        aggregator = stats_aggregator.StatsAggregator()
        dataset = dataset_ops.Dataset.range(100).apply(
            stats_ops.latency_stats("record_latency"))
        dataset = self.datasetExperimentalStats(dataset, aggregator)

        for j in range(5):
            next_element = self.getNext(dataset, requires_initialization=True)
            for i in range(100):
                self.assertEqual(i, self.evaluate(next_element()))
                handle = self.getHandle(aggregator)
                self.assertStatisticsHasCount(handle, "record_latency",
                                              float((j * 100) + i + 1),
                                              (j * 100) + i + 2)
            with self.assertRaises(errors.OutOfRangeError):
                self.evaluate(next_element())
            handle = self.getHandle(aggregator)
            self.assertStatisticsHasCount(handle, "record_latency",
                                          (j + 1) * 100.0, (j * 100) + 101)
    def testLatencyStats(self, dataset_transformation):
        aggregator = stats_aggregator.StatsAggregator()
        dataset = dataset_ops.Dataset.range(100).apply(
            stats_ops.latency_stats("record_latency"))
        dataset = dataset_transformation(dataset, aggregator)
        iterator = dataset.make_initializable_iterator()
        next_element = iterator.get_next()
        summary_t = aggregator.get_summary()

        with self.cached_session() as sess:
            self.evaluate(iterator.initializer)
            for i in range(100):
                self.assertEqual(i, self.evaluate(next_element))
                self._assertSummaryHasCount(sess.run(summary_t),
                                            "record_latency", float(i + 1))
            with self.assertRaises(errors.OutOfRangeError):
                sess.run(next_element)
            self._assertSummaryHasCount(self.evaluate(summary_t),
                                        "record_latency", 100.0)
    def DISABLED_testMultipleIteratorsSameAggregator(self):
        aggregator = stats_aggregator.StatsAggregator()
        dataset = dataset_ops.Dataset.range(100).apply(
            stats_ops.latency_stats("record_latency"))
        dataset = self.datasetExperimentalStats(dataset, aggregator)
        next_element1 = self.getNext(dataset, requires_initialization=True)
        next_element2 = self.getNext(dataset, requires_initialization=True)

        for i in range(100):
            self.assertEqual(i * 2,
                             self.evaluate(next_element1() + next_element2()))
            handle = self.getHandle(aggregator)
            self.assertStatisticsHasCount(handle, "record_latency",
                                          float(2 * (i + 1)), 2 * i + 3)
        with self.assertRaises(errors.OutOfRangeError):
            self.evaluate(next_element1())
        with self.assertRaises(errors.OutOfRangeError):
            self.evaluate(next_element2())
        handle = self.getHandle(aggregator)
        self.assertStatisticsHasCount(handle, "record_latency", 200.0, 201)