def test_pytorch_matmul(): """Test pytorch-matmul benchmark.""" context = BenchmarkRegistry.create_benchmark_context( 'matmul', platform=Platform.CUDA, parameters='--run_count 2 --num_steps 20', framework=Framework.PYTORCH) assert (BenchmarkRegistry.is_benchmark_context_valid(context)) benchmark = BenchmarkRegistry.launch_benchmark(context) # Check basic information. assert (benchmark) assert (benchmark.name == 'pytorch-matmul') assert (benchmark.type == BenchmarkType.MICRO) # Check predefined parameters of sharding-matmul benchmark. assert (benchmark._args.mode == [ShardingMode.NOSHARDING]) # Check parameters specified in BenchmarkContext. assert (benchmark._args.run_count == 2) assert (benchmark._args.num_steps == 20) # Check results and metrics. assert (benchmark.run_count == 2) assert (benchmark.return_code == ReturnCode.SUCCESS) assert (len(benchmark.raw_data['nosharding_time']) == benchmark.run_count) assert (len( benchmark.raw_data['nosharding_time'][0]) == benchmark._args.num_steps) assert (len(benchmark.result['nosharding_time']) == benchmark.run_count)
def test_kernel_launch_overhead(): """Test kernel-launch benchmark.""" context = BenchmarkRegistry.create_benchmark_context( 'kernel-launch', parameters='--num_warmup 200 --num_steps 20000 --interval 100') assert (BenchmarkRegistry.is_benchmark_context_valid(context)) benchmark = BenchmarkRegistry.launch_benchmark(context) # Check basic information. assert (benchmark) assert (benchmark.name == 'kernel-launch') assert (benchmark.type == BenchmarkType.MICRO) # Check parameters specified in BenchmarkContext. assert (benchmark._args.num_warmup == 200) assert (benchmark._args.num_steps == 20000) assert (benchmark._args.interval == 100) # Check results and metrics. assert (benchmark.run_count == 1) assert (benchmark.return_code == ReturnCode.SUCCESS) assert ('raw_output_0' in benchmark.raw_data) assert (len(benchmark.raw_data['raw_output_0']) == 1) assert (isinstance(benchmark.raw_data['raw_output_0'][0], str)) for metric in ['event_time', 'wall_time']: assert (metric in benchmark.result) assert (len(benchmark.result[metric]) == 1) assert (isinstance(benchmark.result[metric][0], numbers.Number))
def __exec_benchmark(self, benchmark_full_name, context): """Launch benchmark for context. Args: benchmark_full_name (str): Benchmark full name. context (BenchmarkContext): Benchmark context to launch. Return: dict: Benchmark result. """ try: benchmark = BenchmarkRegistry.launch_benchmark(context) if benchmark: logger.info('benchmark: %s, return code: %s, result: %s.', benchmark.name, benchmark.return_code, benchmark.result) if benchmark.return_code.value == 0: logger.info('Executor succeeded in %s.', benchmark_full_name) else: logger.error('Executor failed in %s.', benchmark_full_name) result = json.loads(benchmark.serialized_result) result['name'] = benchmark_full_name return result else: logger.error('Executor failed in %s, invalid context.', benchmark_full_name) except Exception as e: logger.error(e) logger.error('Executor failed in %s.', benchmark_full_name) return None
def test_pytorch_computation_communication_overlap_fake_distributed(): """Test pytorch-computation-communication-overlap benchmark on single gpu.""" context = BenchmarkRegistry.create_benchmark_context( 'computation-communication-overlap', parameters='--num_warmup 5 --num_steps 10 --ratio 5', framework=Framework.PYTORCH ) port = network.get_free_port() assert (port) utils.setup_simulated_ddp_distributed_env(1, 0, port) benchmark = BenchmarkRegistry.launch_benchmark(context) # Check basic information. assert (benchmark) assert (isinstance(benchmark, ComputationCommunicationOverlap)) assert (benchmark.name == 'pytorch-computation-communication-overlap') assert (benchmark.type == BenchmarkType.MICRO) # Check predefined parameters of sharding-matmul benchmark. assert (benchmark._args.kernel == [ComputationKernelType.MUL, ComputationKernelType.MATMUL]) # Check parameters specified in BenchmarkContext. assert (benchmark._args.num_steps == 10) # Check results and metrics. assert (benchmark.run_count == 1) assert (benchmark.return_code == ReturnCode.SUCCESS) assert (len(benchmark.raw_data) == len(benchmark._args.kernel)) assert (len(benchmark.result) == len(benchmark._args.kernel) + benchmark.default_metric_count) utils.clean_simulated_ddp_distributed_env()
def benchmark_in_one_process(context, world_size, local_rank, port, queue): """Function to setup env for DDP initialization and run the benchmark in each single process.""" setup_simulated_ddp_distributed_env(world_size, local_rank, port) benchmark = BenchmarkRegistry.launch_benchmark(context) # parser object must be removed becaues it can not be serialized. benchmark._parser = None queue.put(benchmark) clean_simulated_ddp_distributed_env()
def test_pytorch_bert_base(): """Test pytorch-bert-base benchmark.""" context = BenchmarkRegistry.create_benchmark_context( 'bert-base', platform=Platform.CUDA, parameters= '--batch_size 1 --num_classes 5 --seq_len 8 --num_warmup 2 --num_steps 4 \ --model_action train inference', framework=Framework.PYTORCH) assert (BenchmarkRegistry.is_benchmark_context_valid(context)) benchmark = BenchmarkRegistry.launch_benchmark(context) # Check basic information. assert (benchmark) assert (isinstance(benchmark, PytorchBERT)) assert (benchmark.name == 'pytorch-bert-base') assert (benchmark.type == BenchmarkType.MODEL) # Check predefined parameters of resnet101 model. assert (benchmark._args.hidden_size == 768) assert (benchmark._args.num_hidden_layers == 12) assert (benchmark._args.num_attention_heads == 12) assert (benchmark._args.intermediate_size == 3072) # Check parameters specified in BenchmarkContext. assert (benchmark._args.batch_size == 1) assert (benchmark._args.num_classes == 5) assert (benchmark._args.seq_len == 8) assert (benchmark._args.num_warmup == 2) assert (benchmark._args.num_steps == 4) # Check dataset scale. assert (len(benchmark._dataset) == benchmark._args.sample_count * benchmark._world_size) # Check results and metrics. assert (benchmark.run_count == 1) assert (benchmark.return_code == ReturnCode.SUCCESS) for metric in [ 'fp32_train_step_time', 'fp32_train_throughput', 'fp16_train_step_time', 'fp16_train_throughput', 'fp32_inference_step_time', 'fp32_inference_throughput', 'fp16_inference_step_time', 'fp16_inference_throughput' ]: assert (len(benchmark.raw_data[metric]) == benchmark.run_count) assert (len( benchmark.raw_data[metric][0]) == benchmark._args.num_steps) assert (len(benchmark.result[metric]) == benchmark.run_count)
def test_tcp_connectivity(self): """Test tcp-connectivity benchmark.""" context = BenchmarkRegistry.create_benchmark_context( 'tcp-connectivity', parameters= '--hostfile /tmp/superbench/hostfile.test --port 80 --parallel 2', ) assert (BenchmarkRegistry.is_benchmark_context_valid(context)) benchmark = BenchmarkRegistry.launch_benchmark(context) # Check basic information. assert (benchmark) assert (isinstance(benchmark, TCPConnectivityBenchmark)) assert (benchmark.name == 'tcp-connectivity') assert (benchmark.type == BenchmarkType.MICRO) # Check parameters specified in BenchmarkContext. assert (benchmark._args.hostfile == '/tmp/superbench/hostfile.test') assert (benchmark._args.port == 80) assert (benchmark._args.count == 10) assert (benchmark._args.timeout == 1) assert (benchmark._args.parallel == 2) print(benchmark.result) assert (benchmark.result) # Check results and metrics. assert (benchmark.result['api.github.com_successed_count'][0] == 10) assert (benchmark.result['api.github.com_failed_count'][0] == 0) assert (benchmark.result['api.github.com_success_rate'][0] == 100.0) assert (isinstance(benchmark.result['api.github.com_time_min'][0], numbers.Number)) assert (isinstance(benchmark.result['api.github.com_time_max'][0], numbers.Number)) assert (isinstance(benchmark.result['api.github.com_time_avg'][0], numbers.Number)) assert (isinstance(benchmark.result['localhost_successed_count'][0], numbers.Number)) assert (isinstance(benchmark.result['localhost_failed_count'][0], numbers.Number)) assert (isinstance(benchmark.result['localhost_time_max'][0], numbers.Number)) assert (isinstance(benchmark.result['localhost_time_min'][0], numbers.Number)) assert (isinstance(benchmark.result['localhost_time_avg'][0], numbers.Number)) assert (benchmark.return_code == ReturnCode.SUCCESS)
def run_pytorch_lstm(parameters='', check_metrics=[]): """Test pytorch-lstm benchmark.""" context = BenchmarkRegistry.create_benchmark_context( 'lstm', platform=Platform.CUDA, parameters=parameters, framework=Framework.PYTORCH) assert (BenchmarkRegistry.is_benchmark_context_valid(context)) benchmark = BenchmarkRegistry.launch_benchmark(context) # Check basic information. assert (benchmark) assert (isinstance(benchmark, PytorchLSTM)) assert (benchmark.name == 'pytorch-lstm') assert (benchmark.type == BenchmarkType.MODEL) # Check predefined parameters of lstm model. assert (benchmark._args.input_size == 256) assert (benchmark._args.hidden_size == 1024) assert (benchmark._args.num_layers == 8) # Check parameters specified in BenchmarkContext. assert (benchmark._args.batch_size == 1) assert (benchmark._args.num_classes == 5) assert (benchmark._args.seq_len == 8) assert (benchmark._args.num_warmup == 2) assert (benchmark._args.num_steps == 4) # Check dataset scale. assert (len(benchmark._dataset) == benchmark._args.sample_count * benchmark._world_size) # Check results and metrics. assert (benchmark.run_count == 1) assert (benchmark.return_code == ReturnCode.SUCCESS) for metric in check_metrics: assert (len(benchmark.raw_data[metric]) == benchmark.run_count) assert (len( benchmark.raw_data[metric][0]) == benchmark._args.num_steps) assert (len(benchmark.result[metric]) == benchmark.run_count)
def run_pytorch_cnn(models=[], parameters='', check_metrics=[]): """Run pytorch cnn benchmarks.""" for model in models: context = BenchmarkRegistry.create_benchmark_context( model, platform=Platform.CUDA, parameters=parameters, framework=Framework.PYTORCH) assert (BenchmarkRegistry.is_benchmark_context_valid(context)) benchmark = BenchmarkRegistry.launch_benchmark(context) # Check basic information. assert (benchmark) assert (isinstance(benchmark, PytorchCNN)) assert (benchmark.name == 'pytorch-' + model) assert (benchmark.type == BenchmarkType.MODEL) # Check predefined parameters of resnet101 model. assert (benchmark._args.model_type == model) # Check parameters specified in BenchmarkContext. assert (benchmark._args.batch_size == 1) assert (benchmark._args.image_size == 224) assert (benchmark._args.num_classes == 5) assert (benchmark._args.num_warmup == 2) assert (benchmark._args.num_steps == 4) # Check Dataset. assert (len(benchmark._dataset) == benchmark._args.sample_count * benchmark._world_size) # Check results and metrics. assert (benchmark.run_count == 1) assert (benchmark.return_code == ReturnCode.SUCCESS) for metric in check_metrics: assert (len(benchmark.raw_data[metric]) == benchmark.run_count) assert (len( benchmark.raw_data[metric][0]) == benchmark._args.num_steps) assert (len(benchmark.result[metric]) == benchmark.run_count)
def test_pytorch_sharding_matmul(): """Test pytorch-sharding-matmul benchmark.""" context = BenchmarkRegistry.create_benchmark_context( 'sharding-matmul', platform=Platform.CUDA, parameters='--run_count 2 --num_steps 20', framework=Framework.PYTORCH) assert (BenchmarkRegistry.is_benchmark_context_valid(context)) port = network.get_free_port() assert (port) utils.setup_simulated_ddp_distributed_env(1, 0, port) benchmark = BenchmarkRegistry.launch_benchmark(context) # Check basic information. assert (benchmark) assert (isinstance(benchmark, ShardingMatmul)) assert (benchmark.name == 'pytorch-sharding-matmul') assert (benchmark.type == BenchmarkType.MICRO) # Check predefined parameters of sharding-matmul benchmark. assert (benchmark._args.mode == [ ShardingMode.ALLREDUCE, ShardingMode.ALLGATHER ]) # Check parameters specified in BenchmarkContext. assert (benchmark._args.run_count == 2) assert (benchmark._args.num_steps == 20) # Check results and metrics. assert (benchmark.run_count == 2) assert (benchmark.return_code == ReturnCode.SUCCESS) for metric in ['allreduce_time', 'allgather_time']: assert (len(benchmark.raw_data[metric]) == benchmark.run_count) assert (len( benchmark.raw_data[metric][0]) == benchmark._args.num_steps) assert (len(benchmark.result[metric]) == benchmark.run_count) utils.clean_simulated_ddp_distributed_env()
def test_pytorch_empty_cache(): """Test PytorchBase class.""" # Register mnist benchmark. BenchmarkRegistry.register_benchmark('pytorch-mnist', PytorchMNIST) # Test cache empty by manually calling torch.cuda.empty_cache(). parameters = '--batch_size 32 --num_warmup 8 --num_steps 64 --model_action train' benchmark = PytorchMNIST('pytorch-mnist', parameters=parameters) assert (benchmark) assert (benchmark._preprocess()) assert (benchmark._benchmark()) del benchmark assert (torch.cuda.memory_stats()['reserved_bytes.all.current'] > 0) torch.cuda.empty_cache() assert (torch.cuda.memory_stats()['reserved_bytes.all.current'] == 0) # Test automatic cache empty. context = BenchmarkRegistry.create_benchmark_context( 'pytorch-mnist', parameters='--batch_size 32 --num_warmup 8 --num_steps 64 --model_action train' ) benchmark = BenchmarkRegistry.launch_benchmark(context) assert (benchmark) assert (torch.cuda.memory_stats()['reserved_bytes.all.current'] == 0)
# Copyright (c) Microsoft Corporation. # Licensed under the MIT license. """Micro benchmark example for ONNXRuntime inference performance. Commands to run: python3 examples/benchmarks/ort_inference_performance.py """ from superbench.benchmarks import BenchmarkRegistry, Platform from superbench.common.utils import logger if __name__ == '__main__': context = BenchmarkRegistry.create_benchmark_context( 'ort-inference', platform=Platform.CUDA, parameters='--pytorch_models resnet50 resnet101 --precision float16') benchmark = BenchmarkRegistry.launch_benchmark(context) if benchmark: logger.info('benchmark: {}, return code: {}, result: {}'.format( benchmark.name, benchmark.return_code, benchmark.result))
def test_cublas_functions(): """Test cublas-function benchmark.""" # Test for default configuration context = BenchmarkRegistry.create_benchmark_context( 'cublas-function', platform=Platform.CUDA, parameters='--num_warmup 10 --num_steps 10 --num_in_step 100') assert (BenchmarkRegistry.is_benchmark_context_valid(context)) benchmark = BenchmarkRegistry.launch_benchmark(context) # Check basic information. assert (benchmark) assert (benchmark.name == 'cublas-function') assert (benchmark.type == BenchmarkType.MICRO) # Check parameters specified in BenchmarkContext. assert (benchmark._args.num_warmup == 10) assert (benchmark._args.num_steps == 10) assert (benchmark._args.num_in_step == 100) # Check results and metrics. assert (benchmark.run_count == 1) assert (benchmark.return_code == ReturnCode.SUCCESS) assert ('raw_output_0' in benchmark.raw_data) assert (len(benchmark.raw_data['raw_output_0']) == 1) assert (isinstance(benchmark.raw_data['raw_output_0'][0], str)) assert (19 <= len(benchmark.result)) for metric in list(benchmark.result.keys()): assert (len(benchmark.result[metric]) == 1) assert (isinstance(benchmark.result[metric][0], numbers.Number)) if metric != 'return_code': assert (len( benchmark.raw_data[metric][0]) == benchmark._args.num_steps) # Test for custom configuration custom_config_str = '{"name":"cublasCgemm","m":512,"n":512,"k":32,"transa":1,"transb":0}' context = BenchmarkRegistry.create_benchmark_context( 'cublas-function', platform=Platform.CUDA, parameters= '--num_warmup 10 --num_steps 10 --num_in_step 100 --config_json_str ' + custom_config_str) assert (BenchmarkRegistry.is_benchmark_context_valid(context)) benchmark = BenchmarkRegistry.launch_benchmark(context) # Check basic information. assert (benchmark) assert (benchmark.name == 'cublas-function') assert (benchmark.type == BenchmarkType.MICRO) # Check parameters specified in BenchmarkContext. assert (benchmark._args.num_warmup == 10) assert (benchmark._args.num_steps == 10) assert (benchmark._args.num_in_step == 100) # Check results and metrics. assert (benchmark.run_count == 1) assert (benchmark.return_code == ReturnCode.SUCCESS) assert ('raw_output_0' in benchmark.raw_data) assert (len(benchmark.raw_data['raw_output_0']) == 1) assert (isinstance(benchmark.raw_data['raw_output_0'][0], str)) assert (1 + benchmark.default_metric_count == len(benchmark.result)) for metric in list(benchmark.result.keys()): assert (len(benchmark.result[metric]) == 1) assert (isinstance(benchmark.result[metric][0], numbers.Number)) if metric != 'return_code': assert (len( benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
def test_launch_benchmark(): """Test interface BenchmarkRegistry.launch_benchmark().""" # Register benchmarks for testing. BenchmarkRegistry.register_benchmark('accumulation', AccumulationBenchmark, parameters='--upper_bound 5', platform=Platform.CPU) # Launch benchmark. context = BenchmarkRegistry.create_benchmark_context( 'accumulation', platform=Platform.CPU, parameters='--lower_bound 1') benchmark = BenchmarkRegistry.launch_benchmark(context) assert (benchmark) assert (benchmark.name == 'accumulation') assert (benchmark.type == BenchmarkType.MICRO) assert (benchmark.run_count == 1) assert (benchmark.return_code == ReturnCode.SUCCESS) assert (benchmark.raw_data == {'accumulation_result': ['1,3,6,10']}) assert (benchmark.result == { 'return_code': [0], 'accumulation_result': [10] }) # Replace the timestamp as null. result = re.sub(r'\"\d+-\d+-\d+ \d+:\d+:\d+\"', 'null', benchmark.serialized_result) expected = ( '{"name": "accumulation", "type": "micro", "run_count": 1, ' '"return_code": 0, "start_time": null, "end_time": null, ' '"raw_data": {"accumulation_result": ["1,3,6,10"]}, ' '"result": {"return_code": [0], "accumulation_result": [10]}, ' '"reduce_op": {"return_code": null, "accumulation_result": null}}') assert (result == expected) # Launch benchmark with overridden parameters. context = BenchmarkRegistry.create_benchmark_context( 'accumulation', platform=Platform.CPU, parameters='--lower_bound 1 --upper_bound 4') benchmark = BenchmarkRegistry.launch_benchmark(context) assert (benchmark) assert (benchmark.name == 'accumulation') assert (benchmark.type == BenchmarkType.MICRO) assert (benchmark.run_count == 1) assert (benchmark.return_code == ReturnCode.SUCCESS) assert (benchmark.raw_data == {'accumulation_result': ['1,3,6']}) assert (benchmark.result == { 'return_code': [0], 'accumulation_result': [6] }) # Replace the timestamp as null. result = re.sub(r'\"\d+-\d+-\d+ \d+:\d+:\d+\"', 'null', benchmark.serialized_result) expected = ( '{"name": "accumulation", "type": "micro", "run_count": 1, ' '"return_code": 0, "start_time": null, "end_time": null, ' '"raw_data": {"accumulation_result": ["1,3,6"]}, ' '"result": {"return_code": [0], "accumulation_result": [6]}, ' '"reduce_op": {"return_code": null, "accumulation_result": null}}') assert (result == expected) # Failed to launch benchmark due to 'benchmark not found'. context = BenchmarkRegistry.create_benchmark_context( 'accumulation-fail', Platform.CPU, parameters='--lower_bound 1 --upper_bound 4', framework=Framework.PYTORCH) benchmark = BenchmarkRegistry.launch_benchmark(context) assert (benchmark is None) # Failed to launch benchmark due to 'unknown arguments'. context = BenchmarkRegistry.create_benchmark_context( 'accumulation', platform=Platform.CPU, parameters='--lower_bound 1 --test 4') benchmark = BenchmarkRegistry.launch_benchmark(context) assert (benchmark) assert (benchmark.return_code == ReturnCode.INVALID_ARGUMENT) # Failed to launch benchmark due to 'invalid arguments'. context = BenchmarkRegistry.create_benchmark_context( 'accumulation', platform=Platform.CPU, parameters='--lower_bound 1 --upper_bound x') benchmark = BenchmarkRegistry.launch_benchmark(context) assert (benchmark) assert (benchmark.return_code == ReturnCode.INVALID_ARGUMENT)
def test_cudnn_functions(): """Test cudnn-function benchmark.""" # Test for default configuration context = BenchmarkRegistry.create_benchmark_context( 'cudnn-function', platform=Platform.CUDA, parameters='--num_warmup 10 --num_steps 10 --num_in_step 100') assert (BenchmarkRegistry.is_benchmark_context_valid(context)) benchmark = BenchmarkRegistry.launch_benchmark(context) # Check basic information. assert (benchmark) assert (benchmark.name == 'cudnn-function') assert (benchmark.type == BenchmarkType.MICRO) # Check parameters specified in BenchmarkContext. assert (benchmark._args.num_warmup == 10) assert (benchmark._args.num_steps == 10) assert (benchmark._args.num_in_step == 100) # Check results and metrics. assert (benchmark.run_count == 1) assert (benchmark.return_code == ReturnCode.SUCCESS) assert ('raw_output_0' in benchmark.raw_data) assert (len(benchmark.raw_data['raw_output_0']) == 1) assert (isinstance(benchmark.raw_data['raw_output_0'][0], str)) assert (18 <= len(benchmark.result)) for metric in list(benchmark.result.keys()): assert (len(benchmark.result[metric]) == 1) assert (isinstance(benchmark.result[metric][0], numbers.Number)) if metric != 'return_code': assert (len( benchmark.raw_data[metric][0]) == benchmark._args.num_steps) # Test for custom configuration custom_config_str = '{"algo":0,"arrayLength":2,"convType":0,"dilationA":[1,1],"filterStrideA":[1,1],' \ + '"filterDims":[32,128,3,3],"inputDims":[32,128,14,14],"inputStride":[25088,196,14,1],"inputType":0,'\ + '"mode":1,"name":"cudnnConvolutionBackwardFilter","outputDims":[32,32,14,14],'\ + '"outputStride":[6272,196,14,1],"padA":[1,1],"tensorOp":false}' context = BenchmarkRegistry.create_benchmark_context( 'cudnn-function', platform=Platform.CUDA, parameters= '--num_warmup 10 --num_steps 10 --num_in_step 100 --config_json_str ' + custom_config_str) assert (BenchmarkRegistry.is_benchmark_context_valid(context)) benchmark = BenchmarkRegistry.launch_benchmark(context) # Check basic information. assert (benchmark) assert (benchmark.name == 'cudnn-function') assert (benchmark.type == BenchmarkType.MICRO) # Check parameters specified in BenchmarkContext. assert (benchmark._args.num_warmup == 10) assert (benchmark._args.num_steps == 10) assert (benchmark._args.num_in_step == 100) # Check results and metrics. assert (benchmark.run_count == 1) assert (benchmark.return_code == ReturnCode.SUCCESS) assert ('raw_output_0' in benchmark.raw_data) assert (len(benchmark.raw_data['raw_output_0']) == 1) assert (isinstance(benchmark.raw_data['raw_output_0'][0], str)) assert (1 + benchmark.default_metric_count == len(benchmark.result)) for metric in list(benchmark.result.keys()): assert (len(benchmark.result[metric]) == 1) assert (isinstance(benchmark.result[metric][0], numbers.Number)) if metric != 'return_code': assert (len( benchmark.raw_data[metric][0]) == benchmark._args.num_steps)