def test_add_raw_data(): """Test interface BenchmarkResult.add_raw_data().""" result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS) result.add_raw_data('metric1', 'raw log 1', False) result.add_raw_data('metric1', 'raw log 2', False) assert (result.raw_data['metric1'][0] == 'raw log 1') assert (result.raw_data['metric1'][1] == 'raw log 2') assert (result.type == BenchmarkType.MICRO) assert (result.return_code == ReturnCode.SUCCESS) result = BenchmarkResult('model', BenchmarkType.MODEL, ReturnCode.SUCCESS) result.add_raw_data('metric1', [1, 2, 3], False) result.add_raw_data('metric1', [4, 5, 6], False) assert (result.raw_data['metric1'][0] == [1, 2, 3]) assert (result.raw_data['metric1'][1] == [4, 5, 6]) assert (result.type == BenchmarkType.MODEL) assert (result.return_code == ReturnCode.SUCCESS) # Test log_raw_data = True. result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS) result.add_raw_data('metric1', 'raw log 1', True) result.add_raw_data('metric1', 'raw log 2', True) assert (result.type == BenchmarkType.MICRO) assert (result.return_code == ReturnCode.SUCCESS) raw_data_file = os.path.join(os.getcwd(), 'rawdata.log') assert (os.path.isfile(raw_data_file)) os.remove(raw_data_file)
def _preprocess(self): """Preprocess/preparation operations before the benchmarking. Return: True if _preprocess() succeed. """ self.add_parser_arguments() ret, self._args, unknown = self.parse_args() if not ret: self._result = BenchmarkResult(self._name, self._benchmark_type, ReturnCode.INVALID_ARGUMENT) return False self._result = BenchmarkResult(self._name, self._benchmark_type, ReturnCode.SUCCESS, run_count=self._args.run_count) if not isinstance(self._benchmark_type, BenchmarkType): logger.error( 'Invalid benchmark type - benchmark: {}, type: {}'.format( self._name, type(self._benchmark_type))) self._result.set_return_code(ReturnCode.INVALID_BENCHMARK_TYPE) return False return True
def test_fambench(): """Test FAMBench benchmarks.""" benchmark_name = 'fambench' (benchmark_class, predefine_params) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(benchmark_name, Platform.CUDA) assert (benchmark_class) benchmark = benchmark_class(benchmark_name) assert (benchmark._benchmark_type == BenchmarkType.DOCKER) assert (benchmark._image_uri == 'superbench/benchmark:cuda11.1.1-fambench') assert (benchmark._container_name == 'fambench-benchmarks') assert (benchmark._entrypoint == '/workspace/FAMBench/benchmarks/run_all_benchmarks.sh') assert (benchmark._cmd is None) benchmark._result = BenchmarkResult(benchmark._name, benchmark._benchmark_type, ReturnCode.SUCCESS) benchmark._args = SimpleNamespace(log_raw_data=False) raw_output = """ benchmark implementation mode config score units batch_latency_95_sec DLRM OOTB eval tiny 152.800399 ex/s 0.515052 DLRM OOTB train tiny 35.483686 ex/s None DLRM UBENCH train linear_[(2,2,2,2,2)] 3.679281e-07 TF/s None XLMR OOTB eval default-config 1.015586 ex/s 16.463461 """ assert (benchmark._process_raw_result(0, raw_output)) assert (benchmark.result['dlrm_ootb_eval_tiny_ex_s'][0] == 152.800399) assert (benchmark.result['dlrm_ootb_train_tiny_ex_s'][0] == 35.483686) assert (benchmark.result['dlrm_ubench_train_linear_[(2,2,2,2,2)]_tf_s'][0] == 3.679281e-07) assert (benchmark.result['xlmr_ootb_eval_default_config_ex_s'][0] == 1.015586)
def test_add_result(): """Test interface BenchmarkResult.add_result().""" result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS) result.add_result('metric1', 300) result.add_result('metric1', 200) assert (result.result['metric1'][0] == 300) assert (result.result['metric1'][1] == 200)
def test_tensorrt_inference_result_parsing(self, test_raw_log): """Test tensorrt-inference benchmark result parsing.""" (benchmark_cls, _) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark( self.benchmark_name, Platform.CUDA) benchmark = benchmark_cls(self.benchmark_name, parameters='') benchmark._args = SimpleNamespace( pytorch_models=['model_0', 'model_1'], log_raw_data=False) benchmark._result = BenchmarkResult(self.benchmark_name, BenchmarkType.MICRO, ReturnCode.SUCCESS, run_count=1) # Positive case - valid raw output self.assertTrue(benchmark._process_raw_result(0, test_raw_log)) self.assertEqual(ReturnCode.SUCCESS, benchmark.return_code) self.assertEqual(6 + benchmark.default_metric_count, len(benchmark.result)) for tag in ['mean', '99']: self.assertEqual(0.5, benchmark.result[f'model_0_gpu_time_{tag}'][0]) self.assertEqual(0.6, benchmark.result[f'model_0_host_time_{tag}'][0]) self.assertEqual( 1.0, benchmark.result[f'model_0_end_to_end_time_{tag}'][0]) # Negative case - invalid raw output self.assertFalse(benchmark._process_raw_result(1, 'Invalid raw output'))
def test_set_timestamp(): """Test interface BenchmarkResult.set_timestamp().""" result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS) start_time = '2021-02-03 16:59:49' end_time = '2021-02-03 17:00:08' result.set_timestamp(start_time, end_time) assert (result.start_time == start_time) assert (result.end_time == end_time)
def test_set_return_code(): """Test interface BenchmarkResult.set_return_code().""" result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS) assert (result.return_code == ReturnCode.SUCCESS) assert (result.result['return_code'] == [ReturnCode.SUCCESS.value]) result.set_return_code(ReturnCode.INVALID_ARGUMENT) assert (result.return_code == ReturnCode.INVALID_ARGUMENT) assert (result.result['return_code'] == [ReturnCode.INVALID_ARGUMENT.value]) result.set_return_code(ReturnCode.INVALID_BENCHMARK_RESULT) assert (result.return_code == ReturnCode.INVALID_BENCHMARK_RESULT) assert (result.result['return_code'] == [ReturnCode.INVALID_BENCHMARK_RESULT.value])
def test_serialize_deserialize(): """Test serialization/deserialization and compare the results.""" # Result with one metric. result = BenchmarkResult('pytorch-bert-base1', BenchmarkType.MICRO, ReturnCode.SUCCESS, run_count=2) result.add_result('metric1', 300, ReduceType.MAX) result.add_result('metric1', 200, ReduceType.MAX) result.add_result('metric2', 100, ReduceType.AVG) result.add_raw_data('metric1', [1, 2, 3], False) result.add_raw_data('metric1', [4, 5, 6], False) result.add_raw_data('metric1', [7, 8, 9], False) start_time = '2021-02-03 16:59:49' end_time = '2021-02-03 17:00:08' result.set_timestamp(start_time, end_time) result.set_benchmark_type(BenchmarkType.MICRO) expected = ( '{"name": "pytorch-bert-base1", "type": "micro", "run_count": 2, "return_code": 0, ' '"start_time": "2021-02-03 16:59:49", "end_time": "2021-02-03 17:00:08", ' '"raw_data": {"metric1": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}, ' '"result": {"return_code": [0], "metric1": [300, 200], "metric2": [100]}, ' '"reduce_op": {"return_code": null, "metric1": "max", "metric2": "avg"}}' ) assert (result.to_string() == expected)
def test_set_benchmark_type(): """Test interface BenchmarkResult.set_benchmark_type().""" result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS) result.set_benchmark_type(BenchmarkType.MICRO) assert (result.type == BenchmarkType.MICRO)
def test_rocm_onnxruntime_performance(): """Test onnxruntime model benchmark.""" benchmark_name = 'onnxruntime-ort-models' (benchmark_class, predefine_params ) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark( benchmark_name, Platform.ROCM) assert (benchmark_class) benchmark = benchmark_class(benchmark_name) assert (benchmark._benchmark_type == BenchmarkType.DOCKER) assert (benchmark._image_uri == 'superbench/benchmark:rocm4.3.1-onnxruntime1.9.0') assert (benchmark._container_name == 'rocm-onnxruntime-model-benchmarks') assert ( benchmark._entrypoint == '/stage/onnxruntime-training-examples/huggingface/azureml/run_benchmark.sh' ) assert (benchmark._cmd is None) benchmark._result = BenchmarkResult(benchmark._name, benchmark._benchmark_type, ReturnCode.SUCCESS) benchmark._args = SimpleNamespace(log_raw_data=False) raw_output = """ __superbench__ begin bert-large-uncased ngpu=1 "samples_per_second": 21.829 __superbench__ begin bert-large-uncased ngpu=8 "samples_per_second": 147.181 __superbench__ begin distilbert-base-uncased ngpu=1 "samples_per_second": 126.827 __superbench__ begin distilbert-base-uncased ngpu=8 "samples_per_second": 966.796 __superbench__ begin gpt2 ngpu=1 "samples_per_second": 20.46 __superbench__ begin gpt2 ngpu=8 "samples_per_second": 151.089 __superbench__ begin facebook/bart-large ngpu=1 "samples_per_second": 66.171 __superbench__ begin facebook/bart-large ngpu=8 "samples_per_second": 370.343 __superbench__ begin roberta-large ngpu=1 "samples_per_second": 37.103 __superbench__ begin roberta-large ngpu=8 "samples_per_second": 274.455 """ assert (benchmark._process_raw_result(0, raw_output)) assert ( benchmark.result['bert_large_uncased_ngpu_1_throughput'][0] == 21.829) assert ( benchmark.result['bert_large_uncased_ngpu_8_throughput'][0] == 147.181) assert (benchmark.result['distilbert_base_uncased_ngpu_1_throughput'][0] == 126.827) assert (benchmark.result['distilbert_base_uncased_ngpu_8_throughput'][0] == 966.796) assert (benchmark.result['gpt2_ngpu_1_throughput'][0] == 20.46) assert (benchmark.result['gpt2_ngpu_8_throughput'][0] == 151.089) assert ( benchmark.result['facebook_bart_large_ngpu_1_throughput'][0] == 66.171) assert (benchmark.result['facebook_bart_large_ngpu_8_throughput'][0] == 370.343) assert (benchmark.result['roberta_large_ngpu_1_throughput'][0] == 37.103) assert (benchmark.result['roberta_large_ngpu_8_throughput'][0] == 274.455)