Beispiel #1
0
    def test_compute_bounds_stddevs_from_mean(self,
                                              comparison,
                                              std_devs,
                                              expected_bounds,
                                              inclusive=False):
        # mean = 3, stddev = ~1.414
        value_history = range(1, 6)

        class _FakeMetricStore:
            def get_metric_history(*args, **kwargs):
                return [
                    bigquery_client.MetricHistoryRow('', '',
                                                     datetime.datetime.now(),
                                                     '', v)
                    for v in value_history
                ]

        assertion = metrics_pb2.Assertion(
            std_devs_from_mean=metrics_pb2.Assertion.StdDevsFromMean(
                comparison=metrics_pb2.Assertion.Comparison.Value(comparison),
                std_devs=std_devs,
            ),
            inclusive_bounds=inclusive,
        )
        collector = base.BaseCollector(
            metrics_pb2.TestCompletedEvent(benchmark_id="test_benchmark"),
            None, _FakeMetricStore())
        bounds = collector.compute_bounds("metric_key", assertion)
        self.assertSequenceAlmostEqual(
            dataclasses.astuple(bounds),
            # EQUAL is always inclusive
            dataclasses.astuple(
                utils.Bounds(*expected_bounds, inclusive
                             or comparison == 'EQUAL')),
            places=3)
Beispiel #2
0
    def test_compute_bounds_percent_difference_with_mean_value(
            self, comparison, pct_diff, expected_bounds, inclusive=False):
        # mean = 3
        value_history = range(1, 6)

        class _FakeMetricStore:
            def get_metric_history(*args, **kwargs):
                return [
                    bigquery_client.MetricHistoryRow('', '',
                                                     datetime.datetime.now(),
                                                     '', v)
                    for v in value_history
                ]

        assertion = metrics_pb2.Assertion(
            percent_difference=metrics_pb2.Assertion.PercentDifference(
                comparison=metrics_pb2.Assertion.Comparison.Value(comparison),
                use_historical_mean=True,
                percent=pct_diff,
            ),
            inclusive_bounds=inclusive,
        )
        collector = base.BaseCollector(
            metrics_pb2.TestCompletedEvent(benchmark_id="test_benchmark"),
            None, _FakeMetricStore())
        bounds = collector.compute_bounds("metric_key", assertion)
        self.assertSequenceAlmostEqual(
            dataclasses.astuple(bounds),
            dataclasses.astuple(utils.Bounds(*expected_bounds, inclusive)),
            places=3)
 def test_assert_duration(self):
     metric_source = metrics_pb2.MetricSource(
         literals=metrics_pb2.LiteralSource(
             assertions={
                 "duration":
                 metrics_pb2.Assertion(
                     within_bounds=metrics_pb2.Assertion.WithinBounds(
                         lower_bound=100,
                         upper_bound=200,
                     ),
                     inclusive_bounds=False,
                 )
             }))
     event = metrics_pb2.TestCompletedEvent(
         benchmark_id="test_benchmark",
         duration=duration_pb2.Duration(seconds=150),
         metric_collection_config=metrics_pb2.MetricCollectionConfig(
             sources=[metric_source]))
     collector = literal_collector.LiteralCollector(
         event=event, raw_source=metric_source)
     points = collector.metric_points()
     self.assertLen(points, 1)
     self.assertEqual(points[0].metric_key, 'duration')
     self.assertEqual(points[0].metric_value, 150)
     self.assertEqual(points[0].bounds, utils.Bounds(100, 200, False))
Beispiel #4
0
 def test_compute_bounds_within_bounds(self,
                                       lower,
                                       upper,
                                       expected_bounds,
                                       inclusive=False):
     assertion = metrics_pb2.Assertion(
         within_bounds=metrics_pb2.Assertion.WithinBounds(
             lower_bound=lower,
             upper_bound=upper,
         ),
         inclusive_bounds=inclusive,
     )
     collector = base.BaseCollector(
         metrics_pb2.TestCompletedEvent(benchmark_id="test_benchmark"),
         None)
     bounds = collector.compute_bounds("metric_key", assertion)
     self.assertEqual(bounds, utils.Bounds(*expected_bounds, inclusive))
    def test_aggregate_metrics_with_assertion(self):
        metric_source = metrics_pb2.MetricSource(
            tensorboard=metrics_pb2.TensorBoardSource(
                include_tags=[
                    metrics_pb2.TensorBoardSource.TagStrategy(
                        tag_pattern="eval/*",
                        strategies=[
                            metrics_pb2.TensorBoardSource.FINAL,
                            metrics_pb2.TensorBoardSource.MAX,
                            metrics_pb2.TensorBoardSource.MIN,
                        ])
                ],
                aggregate_assertions=[
                    metrics_pb2.TensorBoardSource.AggregateAssertion(
                        tag='eval/accuracy',
                        strategy=metrics_pb2.TensorBoardSource.MAX,
                        assertion=metrics_pb2.Assertion(
                            within_bounds=metrics_pb2.Assertion.WithinBounds(
                                lower_bound=.4,
                                upper_bound=1.0,
                            ),
                            inclusive_bounds=True,
                        ))
                ]))
        event = metrics_pb2.TestCompletedEvent(
            benchmark_id="test_benchmark",
            output_path=self.temp_dir,
            metric_collection_config=metrics_pb2.MetricCollectionConfig(
                sources=[metric_source]))
        collector = tensorboard_collector.TensorBoardCollector(
            event=event, raw_source=metric_source)
        points = list(collector.metric_points())

        self.assertCountEqual(
            points,
            [
                utils.MetricPoint('eval/accuracy_max', .5,
                                  utils.Bounds(.4, 1.0, True)),
                utils.MetricPoint('eval/accuracy_min', .125, utils.NO_BOUNDS),
                utils.MetricPoint('eval/accuracy_final', .25, utils.NO_BOUNDS),
            ],
        )
Beispiel #6
0
 def test_compute_bounds_percent_difference_with_target_value(
         self,
         comparison,
         target,
         pct_diff,
         expected_bounds,
         inclusive=False):
     assertion = metrics_pb2.Assertion(
         percent_difference=metrics_pb2.Assertion.PercentDifference(
             comparison=metrics_pb2.Assertion.Comparison.Value(comparison),
             value=target,
             percent=pct_diff,
         ),
         inclusive_bounds=inclusive,
     )
     collector = base.BaseCollector(
         metrics_pb2.TestCompletedEvent(benchmark_id="test_benchmark"),
         None)
     bounds = collector.compute_bounds("metric_key", assertion)
     self.assertEqual(bounds, utils.Bounds(*expected_bounds, inclusive))
    def test_include_and_exclude(self):
        metric_source = metrics_pb2.MetricSource(
            tensorboard=metrics_pb2.TensorBoardSource(
                include_tags=[
                    metrics_pb2.TensorBoardSource.TagStrategy(
                        tag_pattern="*",
                        strategies=[
                            metrics_pb2.TensorBoardSource.FINAL,
                        ])
                ],
                exclude_tags=[
                    'foo',
                    'train/*',
                ],
                aggregate_assertions=[
                    metrics_pb2.TensorBoardSource.AggregateAssertion(
                        tag='foo',
                        strategy=metrics_pb2.TensorBoardSource.MIN,
                        assertion=metrics_pb2.Assertion(
                            within_bounds=metrics_pb2.Assertion.WithinBounds(
                                lower_bound=0.,
                                upper_bound=2.,
                            )))
                ]))
        event = metrics_pb2.TestCompletedEvent(
            benchmark_id="test_benchmark",
            output_path=self.temp_dir,
            metric_collection_config=metrics_pb2.MetricCollectionConfig(
                sources=[metric_source]))
        collector = tensorboard_collector.TensorBoardCollector(
            event=event, raw_source=metric_source)
        points = list(collector.metric_points())

        self.assertCountEqual(
            points,
            [
                utils.MetricPoint('eval/accuracy_final', .25, utils.NO_BOUNDS),
                utils.MetricPoint('foo_min', 1, utils.Bounds(0., 2., False)),
            ],
        )
Beispiel #8
0
    def test_min_time(self, window, timestamp, expected_min):
        start_time = timestamp_pb2.Timestamp()
        start_time.FromDatetime(datetime.datetime(2021, 2, 16, 0, 0, 0))

        min_timestamp = timestamp_pb2.Timestamp()
        if timestamp:
            min_timestamp.FromDatetime(timestamp)

        time_window = duration_pb2.Duration()
        if window:
            time_window.FromTimedelta(window)

        assertion = metrics_pb2.Assertion(
            std_devs_from_mean=metrics_pb2.Assertion.StdDevsFromMean(
                comparison=metrics_pb2.Assertion.Comparison.WITHIN,
                std_devs=1,
            ),
            time_window=time_window,
            min_timestamp=min_timestamp,
        )

        metric_store = bigquery_client.BigQueryMetricStore(
            'fake_dataset', 'fake_project')
        metric_store.get_metric_history = mock.Mock(return_value=[])

        collector = base.BaseCollector(
            metrics_pb2.TestCompletedEvent(
                benchmark_id="test_benchmark",
                start_time=start_time,
            ), None, metric_store)

        collector.compute_bounds("metric_key", assertion)

        metric_store.get_metric_history.assert_called_with(
            benchmark_id="test_benchmark",
            metric_key="metric_key",
            min_time=expected_min)
Beispiel #9
0
    def test_compute_bounds_fixed_value(self,
                                        comparison,
                                        threshold_value,
                                        expected_bounds,
                                        inclusive=False):

        assertion = metrics_pb2.Assertion(
            fixed_value=metrics_pb2.Assertion.FixedValue(
                comparison=metrics_pb2.Assertion.Comparison.Value(comparison),
                value=threshold_value,
            ),
            inclusive_bounds=inclusive,
        )
        collector = base.BaseCollector(
            metrics_pb2.TestCompletedEvent(benchmark_id="test_benchmark"),
            None)
        bounds = collector.compute_bounds("metric_key", assertion)
        self.assertSequenceAlmostEqual(
            dataclasses.astuple(bounds),
            # EQUAL is always inclusive
            dataclasses.astuple(
                utils.Bounds(*expected_bounds, inclusive
                             or comparison == 'EQUAL')),
            places=3)
  def test_metric_collection_config(self, gcs_subdir):
    job = _job_from_dict({
      'metadata': {
        'name': 'job-name',
        'namespace': 'namespace',
        'labels': {
          'benchmarkId': 'test-job',
        },
        'annotations': {
          'ml-testing-accelerators/metric-config': json.dumps({
            'sources':  [{
              'literals': {
                'assertions': {
                  'duration': {
                    'within_bounds': {
                      'lower_bound': 1,
                      'upper_bound': 2,
                    }
                  }
                }
              }
            }]
          })
        }
      },
      'status': {
        'startTime': _START_TIME,
        'completionTime': _END_TIME,
        'succeeded': 1,
        'conditions': [
          {
            'status': True,
            'type': 'Complete',
            'lastTransitionTime': _END_TIME,
          }
        ]
      }
    })
    if gcs_subdir:
      job.metadata.annotations['ml-testing-accelerators/gcs-subdir'] = gcs_subdir

    actual_event = event_publisher.create_test_completed_event(
      job,
      model_output_bucket='gs://fake-bucket',
      cluster_name='cluster-name',
      cluster_location='cluster-location',
      project='project-id'
    )
    actual_mcc = actual_event.metric_collection_config

    expected_mcc = metrics_pb2.MetricCollectionConfig(
      sources=[
        metrics_pb2.MetricSource(
          literals=metrics_pb2.LiteralSource(
            assertions={
              'duration': metrics_pb2.Assertion(
                within_bounds=metrics_pb2.Assertion.WithinBounds(
                  lower_bound=1,
                  upper_bound=2,
                )
              )
            }
          )
        )
      ]
    )
    self.assertEqual(actual_event.output_path, os.path.join('gs://fake-bucket', gcs_subdir or '', 'job-name'))
    self.assertProtoEqual(expected_mcc, actual_mcc)
Beispiel #11
0
    def test_get_metrics_from_perfzero_summary(self):
        temp_dir = self.create_tempdir().full_path
        summary_dir = os.path.join(temp_dir, 'date_and_time')
        pathlib.Path(summary_dir).mkdir(parents=True, exist_ok=True)
        summary_path = os.path.join(summary_dir, 'perfzero_summary.json')
        with open(summary_path, 'w') as f:
            json.dump(
                {
                    "execution_id": "execution_id",
                    "execution_timestamp": 1234567890.1,
                    "benchmark_result": {
                        "wall_time":
                        1234,
                        "metrics": [{
                            "name": "exp_per_second",
                            "value": 1.1,
                        }, {
                            "name": "avg_exp_per_second",
                            "value": 2.2,
                        }, {
                            "name": "startup_time",
                            "value": 3.3
                        }],
                    },
                    "benchmark_info": {
                        "not": "important",
                    },
                    "setup_info": {},
                    "ml_framework_info": {
                        "not": "important",
                    },
                    "system_info": {
                        "not": "important"
                    },
                    "process_info": {
                        "max_rss": 4.4,
                        "max_vms": 5.5,
                        "max_cpu_percent": 6.6,
                    }
                }, f)

        metric_source = metrics_pb2.MetricSource(
            perfzero=metrics_pb2.PerfZeroSource(
                assertions={
                    'total_wall_time':
                    metrics_pb2.Assertion(
                        within_bounds=metrics_pb2.Assertion.WithinBounds(
                            lower_bound=1230,
                            upper_bound=1240,
                        )),
                    'exp_per_second':
                    metrics_pb2.Assertion(
                        within_bounds=metrics_pb2.Assertion.WithinBounds(
                            lower_bound=1,
                            upper_bound=100,
                        ), )
                }))
        event = metrics_pb2.TestCompletedEvent(
            benchmark_id="test_benchmark",
            output_path=temp_dir,
            metric_collection_config=metrics_pb2.MetricCollectionConfig(
                sources=[metric_source]))

        collector = perfzero_collector.PerfZeroCollector(
            event=event, raw_source=metric_source)
        points = list(collector.metric_points())
        self.assertCountEqual(
            points,
            {
                utils.MetricPoint("total_wall_time", 1234,
                                  utils.Bounds(1230., 1240., False)),
                utils.MetricPoint("exp_per_second", 1.1,
                                  utils.Bounds(1., 100., False)),
                utils.MetricPoint("avg_exp_per_second", 2.2, utils.NO_BOUNDS),
                utils.MetricPoint("startup_time", 3.3, utils.NO_BOUNDS),
                utils.MetricPoint("process_info/max_rss", 4.4,
                                  utils.NO_BOUNDS),
                utils.MetricPoint("process_info/max_vms", 5.5,
                                  utils.NO_BOUNDS),
                utils.MetricPoint("process_info/max_cpu_percent", 6.6,
                                  utils.NO_BOUNDS),
            },
        )