def test_assert_duration(self): metric_source = metrics_pb2.MetricSource( literals=metrics_pb2.LiteralSource( assertions={ "duration": metrics_pb2.Assertion( within_bounds=metrics_pb2.Assertion.WithinBounds( lower_bound=100, upper_bound=200, ), inclusive_bounds=False, ) })) event = metrics_pb2.TestCompletedEvent( benchmark_id="test_benchmark", duration=duration_pb2.Duration(seconds=150), metric_collection_config=metrics_pb2.MetricCollectionConfig( sources=[metric_source])) collector = literal_collector.LiteralCollector( event=event, raw_source=metric_source) points = collector.metric_points() self.assertLen(points, 1) self.assertEqual(points[0].metric_key, 'duration') self.assertEqual(points[0].metric_value, 150) self.assertEqual(points[0].bounds, utils.Bounds(100, 200, False))
def test_aggregate_metrics_include_all_strategies(self): metric_source = metrics_pb2.MetricSource( tensorboard=metrics_pb2.TensorBoardSource(include_tags=[ metrics_pb2.TensorBoardSource.TagStrategy( tag_pattern="*", strategies=[ metrics_pb2.TensorBoardSource.FINAL, metrics_pb2.TensorBoardSource.MAX, metrics_pb2.TensorBoardSource.MIN, metrics_pb2.TensorBoardSource.AVERAGE, metrics_pb2.TensorBoardSource.MEDIAN, ]) ])) event = metrics_pb2.TestCompletedEvent( benchmark_id="test_benchmark", output_path=self.temp_dir, metric_collection_config=metrics_pb2.MetricCollectionConfig( sources=[metric_source])) collector = tensorboard_collector.TensorBoardCollector( event=event, raw_source=metric_source) points = list(collector.metric_points()) metric_to_value = {key: value for key, value, _ in points} self.assertDictEqual( metric_to_value, { 'foo_final': 2, 'foo_min': 1, 'foo_max': 2, 'foo_average': 1.5, 'foo_median': 1.5, 'eval/accuracy_final': .25, 'eval/accuracy_min': .125, 'eval/accuracy_max': .5, 'eval/accuracy_average': np.mean([.125, .25, .5]), 'eval/accuracy_median': np.median([.125, .25, .5]), 'train/bar_final': 100, 'train/bar_min': 10, 'train/bar_max': 100, 'train/bar_average': np.mean([10, 100, 100]), 'train/bar_median': np.median([10, 100, 100]), }) for _, _, bounds in points: self.assertEqual(bounds, utils.NO_BOUNDS)
def test_aggregate_metrics_with_assertion(self): metric_source = metrics_pb2.MetricSource( tensorboard=metrics_pb2.TensorBoardSource( include_tags=[ metrics_pb2.TensorBoardSource.TagStrategy( tag_pattern="eval/*", strategies=[ metrics_pb2.TensorBoardSource.FINAL, metrics_pb2.TensorBoardSource.MAX, metrics_pb2.TensorBoardSource.MIN, ]) ], aggregate_assertions=[ metrics_pb2.TensorBoardSource.AggregateAssertion( tag='eval/accuracy', strategy=metrics_pb2.TensorBoardSource.MAX, assertion=metrics_pb2.Assertion( within_bounds=metrics_pb2.Assertion.WithinBounds( lower_bound=.4, upper_bound=1.0, ), inclusive_bounds=True, )) ])) event = metrics_pb2.TestCompletedEvent( benchmark_id="test_benchmark", output_path=self.temp_dir, metric_collection_config=metrics_pb2.MetricCollectionConfig( sources=[metric_source])) collector = tensorboard_collector.TensorBoardCollector( event=event, raw_source=metric_source) points = list(collector.metric_points()) self.assertCountEqual( points, [ utils.MetricPoint('eval/accuracy_max', .5, utils.Bounds(.4, 1.0, True)), utils.MetricPoint('eval/accuracy_min', .125, utils.NO_BOUNDS), utils.MetricPoint('eval/accuracy_final', .25, utils.NO_BOUNDS), ], )
def test_include_and_exclude(self): metric_source = metrics_pb2.MetricSource( tensorboard=metrics_pb2.TensorBoardSource( include_tags=[ metrics_pb2.TensorBoardSource.TagStrategy( tag_pattern="*", strategies=[ metrics_pb2.TensorBoardSource.FINAL, ]) ], exclude_tags=[ 'foo', 'train/*', ], aggregate_assertions=[ metrics_pb2.TensorBoardSource.AggregateAssertion( tag='foo', strategy=metrics_pb2.TensorBoardSource.MIN, assertion=metrics_pb2.Assertion( within_bounds=metrics_pb2.Assertion.WithinBounds( lower_bound=0., upper_bound=2., ))) ])) event = metrics_pb2.TestCompletedEvent( benchmark_id="test_benchmark", output_path=self.temp_dir, metric_collection_config=metrics_pb2.MetricCollectionConfig( sources=[metric_source])) collector = tensorboard_collector.TensorBoardCollector( event=event, raw_source=metric_source) points = list(collector.metric_points()) self.assertCountEqual( points, [ utils.MetricPoint('eval/accuracy_final', .25, utils.NO_BOUNDS), utils.MetricPoint('foo_min', 1, utils.Bounds(0., 2., False)), ], )
def test_metric_collection_config(self, gcs_subdir): job = _job_from_dict({ 'metadata': { 'name': 'job-name', 'namespace': 'namespace', 'labels': { 'benchmarkId': 'test-job', }, 'annotations': { 'ml-testing-accelerators/metric-config': json.dumps({ 'sources': [{ 'literals': { 'assertions': { 'duration': { 'within_bounds': { 'lower_bound': 1, 'upper_bound': 2, } } } } }] }) } }, 'status': { 'startTime': _START_TIME, 'completionTime': _END_TIME, 'succeeded': 1, 'conditions': [ { 'status': True, 'type': 'Complete', 'lastTransitionTime': _END_TIME, } ] } }) if gcs_subdir: job.metadata.annotations['ml-testing-accelerators/gcs-subdir'] = gcs_subdir actual_event = event_publisher.create_test_completed_event( job, model_output_bucket='gs://fake-bucket', cluster_name='cluster-name', cluster_location='cluster-location', project='project-id' ) actual_mcc = actual_event.metric_collection_config expected_mcc = metrics_pb2.MetricCollectionConfig( sources=[ metrics_pb2.MetricSource( literals=metrics_pb2.LiteralSource( assertions={ 'duration': metrics_pb2.Assertion( within_bounds=metrics_pb2.Assertion.WithinBounds( lower_bound=1, upper_bound=2, ) ) } ) ) ] ) self.assertEqual(actual_event.output_path, os.path.join('gs://fake-bucket', gcs_subdir or '', 'job-name')) self.assertProtoEqual(expected_mcc, actual_mcc)
def test_get_metrics_from_perfzero_summary(self): temp_dir = self.create_tempdir().full_path summary_dir = os.path.join(temp_dir, 'date_and_time') pathlib.Path(summary_dir).mkdir(parents=True, exist_ok=True) summary_path = os.path.join(summary_dir, 'perfzero_summary.json') with open(summary_path, 'w') as f: json.dump( { "execution_id": "execution_id", "execution_timestamp": 1234567890.1, "benchmark_result": { "wall_time": 1234, "metrics": [{ "name": "exp_per_second", "value": 1.1, }, { "name": "avg_exp_per_second", "value": 2.2, }, { "name": "startup_time", "value": 3.3 }], }, "benchmark_info": { "not": "important", }, "setup_info": {}, "ml_framework_info": { "not": "important", }, "system_info": { "not": "important" }, "process_info": { "max_rss": 4.4, "max_vms": 5.5, "max_cpu_percent": 6.6, } }, f) metric_source = metrics_pb2.MetricSource( perfzero=metrics_pb2.PerfZeroSource( assertions={ 'total_wall_time': metrics_pb2.Assertion( within_bounds=metrics_pb2.Assertion.WithinBounds( lower_bound=1230, upper_bound=1240, )), 'exp_per_second': metrics_pb2.Assertion( within_bounds=metrics_pb2.Assertion.WithinBounds( lower_bound=1, upper_bound=100, ), ) })) event = metrics_pb2.TestCompletedEvent( benchmark_id="test_benchmark", output_path=temp_dir, metric_collection_config=metrics_pb2.MetricCollectionConfig( sources=[metric_source])) collector = perfzero_collector.PerfZeroCollector( event=event, raw_source=metric_source) points = list(collector.metric_points()) self.assertCountEqual( points, { utils.MetricPoint("total_wall_time", 1234, utils.Bounds(1230., 1240., False)), utils.MetricPoint("exp_per_second", 1.1, utils.Bounds(1., 100., False)), utils.MetricPoint("avg_exp_per_second", 2.2, utils.NO_BOUNDS), utils.MetricPoint("startup_time", 3.3, utils.NO_BOUNDS), utils.MetricPoint("process_info/max_rss", 4.4, utils.NO_BOUNDS), utils.MetricPoint("process_info/max_vms", 5.5, utils.NO_BOUNDS), utils.MetricPoint("process_info/max_cpu_percent", 6.6, utils.NO_BOUNDS), }, )