def AddSamples(self, samples, benchmark, benchmark_spec): """Adds data samples to the publisher. Args: samples: Either a list of Sample objects (preferred) or a list of 3 or 4-tuples (deprecated). The tuples contain the metric name (string), the value (float), and unit (string) of each sample. If a 4th element is included, it is a dictionary of metadata associated with the sample. benchmark: string. The name of the benchmark. benchmark_spec: BenchmarkSpec. Benchmark specification. """ for s in samples: # Convert input in deprecated format to Sample objects. if isinstance(s, (list, tuple)): if len(s) not in (3, 4): raise ValueError( 'Invalid sample "{0}": should be 3- or 4-tuple.'.format(s)) s = Sample(*s) # Annotate the sample. sample = dict(s.asdict()) sample['test'] = benchmark for meta_provider in self.metadata_providers: sample['metadata'] = meta_provider.AddMetadata( sample['metadata'], benchmark_spec) sample['product_name'] = FLAGS.product_name sample['official'] = FLAGS.official sample['owner'] = FLAGS.owner sample['timestamp'] = time.time() sample['run_uri'] = self.run_uri sample['sample_uri'] = str(uuid.uuid4()) self.samples.append(sample)
def testTrainResults(self): samples = mlperf_benchmark.MakeSamplesFromOutput({}, self.contents) golden = [ Sample('Eval Accuracy', 5.96720390021801, '%', { 'epoch': 0, 'times': 0.0, 'version': '0.5.0' }), Sample('Eval Accuracy', 36.89168393611908, '%', { 'epoch': 4, 'times': 1164.691000699997, 'version': '0.5.0' }), Sample('Eval Accuracy', 49.114990234375, '%', { 'epoch': 8, 'times': 2329.8028297424316, 'version': '0.5.0' }), Sample('Eval Accuracy', 53.01310420036316, '%', { 'epoch': 12, 'times': 3498.9867885112762, 'version': '0.5.0' }), Sample('Eval Accuracy', 53.55224609375, '%', { 'epoch': 16, 'times': 4667.747241735458, 'version': '0.5.0' }), Sample('Eval Accuracy', 54.87263798713684, '%', { 'epoch': 20, 'times': 5831.299504995346, 'version': '0.5.0' }), Sample('Eval Accuracy', 54.70173954963684, '%', { 'epoch': 24, 'times': 6996.661015510559, 'version': '0.5.0' }), Sample('Eval Accuracy', 56.72810673713684, '%', { 'epoch': 28, 'times': 8160.468462944031, 'version': '0.5.0' }), Sample('Eval Accuracy', 70.751953125, '%', { 'epoch': 32, 'times': 9329.49914598465, 'version': '0.5.0' }), Sample('Eval Accuracy', 71.368408203125, '%', { 'epoch': 36, 'times': 10494.261439800262, 'version': '0.5.0' }), Sample('Eval Accuracy', 71.49454951286316, '%', { 'epoch': 40, 'times': 11657.773159980774, 'version': '0.5.0' }), Sample('Eval Accuracy', 70.70515751838684, '%', { 'epoch': 44, 'times': 12823.00942158699, 'version': '0.5.0' }), Sample('Eval Accuracy', 70.65632939338684, '%', { 'epoch': 48, 'times': 13988.791482448578, 'version': '0.5.0' }), Sample('Eval Accuracy', 70.562744140625, '%', { 'epoch': 52, 'times': 15154.056546211243, 'version': '0.5.0' }), Sample('Eval Accuracy', 70.88623046875, '%', { 'epoch': 56, 'times': 16318.724472999573, 'version': '0.5.0' }), Sample('Eval Accuracy', 74.67244267463684, '%', { 'epoch': 60, 'times': 17482.81353545189, 'version': '0.5.0' }), Sample('Eval Accuracy', 75.00407099723816, '%', { 'epoch': 61, 'times': 17788.61406970024, 'version': '0.5.0' }), Sample('Times', 18183, 'seconds', {}) ] self.assertEqual(samples, golden)
def testTrainResults(self): samples = mlperf_benchmark.MakeSamplesFromOutput({'version': 'v0.6.0'}, self.contents, use_tpu=True, model='resnet') golden = [ Sample('Eval Accuracy', 32.322001457214355, '%', { 'epoch': 4, 'times': 0.0, 'version': 'v0.6.0' }), Sample('Eval Accuracy', 40.342000126838684, '%', { 'epoch': 8, 'times': 164.16299986839294, 'version': 'v0.6.0' }), Sample('Eval Accuracy', 48.21600019931793, '%', { 'epoch': 12, 'times': 328.239000082016, 'version': 'v0.6.0' }), Sample('Eval Accuracy', 51.749998331069946, '%', { 'epoch': 16, 'times': 492.335000038147, 'version': 'v0.6.0' }), Sample('Eval Accuracy', 52.851998805999756, '%', { 'epoch': 20, 'times': 656.4279999732971, 'version': 'v0.6.0' }), Sample('Eval Accuracy', 52.99599766731262, '%', { 'epoch': 24, 'times': 820.5209999084473, 'version': 'v0.6.0' }), Sample('Eval Accuracy', 60.44999957084656, '%', { 'epoch': 28, 'times': 984.6259999275208, 'version': 'v0.6.0' }), Sample('Eval Accuracy', 62.775999307632446, '%', { 'epoch': 32, 'times': 1148.7119998931885, 'version': 'v0.6.0' }), Sample('Eval Accuracy', 66.22400283813477, '%', { 'epoch': 36, 'times': 1312.8050000667572, 'version': 'v0.6.0' }), Sample('Eval Accuracy', 67.34600067138672, '%', { 'epoch': 40, 'times': 1476.9070000648499, 'version': 'v0.6.0' }), Sample('Eval Accuracy', 70.77400088310242, '%', { 'epoch': 44, 'times': 1640.994999885559, 'version': 'v0.6.0' }), Sample('Eval Accuracy', 72.40599989891052, '%', { 'epoch': 48, 'times': 1805.085000038147, 'version': 'v0.6.0' }), Sample('Eval Accuracy', 73.85799884796143, '%', { 'epoch': 52, 'times': 1969.1849999427795, 'version': 'v0.6.0' }), Sample('Eval Accuracy', 75.26000142097473, '%', { 'epoch': 56, 'times': 2133.2750000953674, 'version': 'v0.6.0' }), Sample('Eval Accuracy', 76.0420024394989, '%', { 'epoch': 60, 'times': 2297.3669998645782, 'version': 'v0.6.0' }) ] self.assertEqual(samples, golden)
def testT2TGpuOutput(self): self.maxDiff = None path = os.path.join(os.path.dirname(__file__), '..', 'data', 't2t_gpu_output.txt') with open(path) as fp: t2t_contents = fp.read() samples = t2t_benchmark._MakeSamplesFromOutput({'use_tpu': False}, t2t_contents) golden = [ Sample(metric='Global Steps Per Second', value=3.04983, unit='global_steps/sec', metadata={ 'index': 0, 'use_tpu': False }, timestamp=0), Sample(metric='Global Steps Per Second', value=4.12771, unit='global_steps/sec', metadata={ 'index': 1, 'use_tpu': False }, timestamp=0), Sample(metric='Global Steps Per Second', value=4.11027, unit='global_steps/sec', metadata={ 'index': 2, 'use_tpu': False }, timestamp=0), Sample(metric='Global Steps Per Second', value=4.10924, unit='global_steps/sec', metadata={ 'index': 3, 'use_tpu': False }, timestamp=0), Sample(metric='Global Steps Per Second', value=4.12186, unit='global_steps/sec', metadata={ 'index': 4, 'use_tpu': False }, timestamp=0), Sample(metric='Global Steps Per Second', value=4.08434, unit='global_steps/sec', metadata={ 'index': 5, 'use_tpu': False }, timestamp=0), Sample(metric='Global Steps Per Second', value=4.10174, unit='global_steps/sec', metadata={ 'index': 6, 'use_tpu': False }, timestamp=0), Sample(metric='Global Steps Per Second', value=4.11809, unit='global_steps/sec', metadata={ 'index': 7, 'use_tpu': False }, timestamp=0), Sample(metric='Global Steps Per Second', value=4.10496, unit='global_steps/sec', metadata={ 'index': 8, 'use_tpu': False }, timestamp=0), Sample(metric='Eval Loss', value=7.2263174, unit='', metadata={ 'use_tpu': False, 'step': 1000 }, timestamp=0), Sample(metric='Accuracy', value=13.972055999999998, unit='%', metadata={ 'use_tpu': False, 'step': 1000 }, timestamp=0), Sample(metric='Accuracy Per Sequence', value=0.0, unit='%', metadata={ 'use_tpu': False, 'step': 1000 }, timestamp=0), Sample(metric='Negative Log Perplexity', value=-7.2263174, unit='perplexity', metadata={ 'use_tpu': False, 'step': 1000 }, timestamp=0), Sample(metric='Top 5 Accuracy', value=24.800399000000002, unit='%', metadata={ 'use_tpu': False, 'step': 1000 }, timestamp=0) ] self.assertEqual(samples, golden)
def testT2TTpuOutput(self): self.maxDiff = None path = os.path.join(os.path.dirname(__file__), '..', 'data', 't2t_tpu_output.txt') with open(path) as fp: t2t_contents = fp.read() samples = t2t_benchmark._MakeSamplesFromOutput({'use_tpu': True}, t2t_contents) golden = [ Sample(metric='Global Steps Per Second', value=1.85777, unit='global_steps/sec', metadata={ 'use_tpu': True, 'index': 0 }, timestamp=0), Sample(metric='Global Steps Per Second', value=5.06989, unit='global_steps/sec', metadata={ 'use_tpu': True, 'index': 1 }, timestamp=0), Sample(metric='Examples Per Second', value=118.897, unit='examples/sec', metadata={ 'use_tpu': True, 'index': 0 }, timestamp=0), Sample(metric='Examples Per Second', value=324.473, unit='examples/sec', metadata={ 'use_tpu': True, 'index': 1 }, timestamp=0), Sample(metric='Eval Loss', value=3.9047337, unit='', metadata={ 'use_tpu': True, 'step': 1000 }, timestamp=0), Sample(metric='Accuracy', value=32.064167, unit='%', metadata={ 'use_tpu': True, 'step': 1000 }, timestamp=0), Sample(metric='Accuracy Per Sequence', value=0.0, unit='%', metadata={ 'use_tpu': True, 'step': 1000 }, timestamp=0), Sample(metric='Negative Log Perplexity', value=-4.501835, unit='perplexity', metadata={ 'use_tpu': True, 'step': 1000 }, timestamp=0), Sample(metric='Top 5 Accuracy', value=50.96436, unit='%', metadata={ 'use_tpu': True, 'step': 1000 }, timestamp=0), Sample(metric='Eval Loss', value=3.7047337, unit='', metadata={ 'use_tpu': True, 'step': 1200 }, timestamp=0), Sample(metric='Accuracy', value=33.064167, unit='%', metadata={ 'use_tpu': True, 'step': 1200 }, timestamp=0), Sample(metric='Accuracy Per Sequence', value=0.0, unit='%', metadata={ 'use_tpu': True, 'step': 1200 }, timestamp=0), Sample(metric='Negative Log Perplexity', value=-4.101835, unit='perplexity', metadata={ 'use_tpu': True, 'step': 1200 }, timestamp=0), Sample(metric='Top 5 Accuracy', value=55.96436, unit='%', metadata={ 'use_tpu': True, 'step': 1200 }, timestamp=0) ] self.assertEqual(samples, golden)
def testTrainResults(self): samples = mlperf_inference_benchmark.MakePerformanceSamplesFromOutput( {'version': 'v1.1'}, self.bert_performance_contents) metadata = { 'mlperf 50.00 percentile latency (ns)': '40533329', 'mlperf 90.00 percentile latency (ns)': '51387550', 'mlperf 95.00 percentile latency (ns)': '54956149', 'mlperf 97.00 percentile latency (ns)': '57792422', 'mlperf 99.00 percentile latency (ns)': '82056764', 'mlperf 99.90 percentile latency (ns)': '543294654940', 'mlperf Completed samples per second': '3102.49', 'mlperf Max latency (ns)': '605456500256', 'mlperf Mean latency (ns)': '3037717062', 'mlperf Min duration satisfied': 'Yes', 'mlperf Min latency (ns)': '4126840', 'mlperf Min queries satisfied': 'Yes', 'mlperf Mode': 'PerformanceOnly', 'mlperf Performance constraints satisfied': 'Yes', 'mlperf Result is': 'VALID', 'mlperf SUT name': 'BERT SERVER', 'mlperf Scenario': 'Server', 'mlperf Scheduled samples per second': '3102.76', 'mlperf accuracy_level': '99%', 'mlperf accuracy_log_probability': '0', 'mlperf accuracy_log_rng_seed': '0', 'mlperf accuracy_log_sampling_target': '0', 'mlperf benchmark': 'Benchmark.BERT', 'mlperf coalesced_tensor': 'True', 'mlperf config_name': 'A100-SXM4-40GBx1_bert_Server', 'mlperf config_ver': 'custom_k_99_MaxP', 'mlperf cpu_freq': 'None', 'mlperf gpu_batch_size': '64', 'mlperf gpu_copy_streams': '1', 'mlperf gpu_inference_streams': '2', 'mlperf gpu_num_bundles': '2', 'mlperf inference_server': 'custom', 'mlperf input_dtype': 'int32', 'mlperf input_format': 'linear', 'mlperf log_dir': '/work/build/logs/2021.10.27-20.51.11', 'mlperf max_async_queries': '0', 'mlperf max_duration (ms)': '0', 'mlperf max_query_count': '0', 'mlperf min_duration (ms)': '600000', 'mlperf min_query_count': '270336', 'mlperf optimization_level': 'plugin-enabled', 'mlperf performance_issue_same': '0', 'mlperf performance_issue_same_index': '0', 'mlperf performance_issue_unique': '0', 'mlperf performance_sample_count': '10833', 'mlperf power_limit': 'None', 'mlperf precision': 'int8', 'mlperf print_timestamps': '0', 'mlperf qsl_rng_seed': '1624344308455410291', 'mlperf sample_index_rng_seed': '517984244576520566', 'mlperf samples_per_query': '1', 'mlperf scenario': 'Scenario.Server', 'mlperf schedule_rng_seed': '10051496985653635065', 'mlperf server_target_qps': '3100', 'mlperf system': 'A100-SXM4-40GBx1', 'mlperf system_id': 'A100-SXM4-40GBx1', 'mlperf target_latency (ns)': '130000000', 'mlperf tensor_path': '${PREPROCESSED_DATA_DIR}/squad_tokenized/input_ids.npy,' '${PREPROCESSED_DATA_DIR}/squad_tokenized/segment_ids.npy,' '${PREPROCESSED_DATA_DIR}/squad_tokenized/input_mask.npy', 'mlperf use_cpu': 'False', 'mlperf use_graphs': 'True', 'version': 'v1.1' } golden = Sample(metric='throughput', value=3102.76, unit='samples/s', metadata=metadata) self.assertSamplesEqualUpToTimestamp(golden, samples[0]) samples = mlperf_inference_benchmark.MakeAccuracySamplesFromOutput( {'version': 'v1.1'}, self.bert_accuracy_contents) metadata = { 'mlperf benchmark': 'Benchmark.BERT', 'mlperf coalesced_tensor': 'True', 'mlperf gpu_batch_size': '64', 'mlperf gpu_copy_streams': '1', 'mlperf gpu_inference_streams': '2', 'mlperf input_dtype': 'int32', 'mlperf input_format': 'linear', 'mlperf precision': 'int8', 'mlperf scenario': 'Scenario.Server', 'mlperf server_target_qps': '3100', 'mlperf system': 'A100-SXM4-40GBx1', 'mlperf tensor_path': '${PREPROCESSED_DATA_DIR}/squad_tokenized/input_ids.npy,' '${PREPROCESSED_DATA_DIR}/squad_tokenized/segment_ids.npy,' '${PREPROCESSED_DATA_DIR}/squad_tokenized/input_mask.npy', 'mlperf use_graphs': 'True', 'mlperf config_name': 'A100-SXM4-40GBx1_bert_Server', 'mlperf config_ver': 'custom_k_99_MaxP', 'mlperf accuracy_level': '99%', 'mlperf optimization_level': 'plugin-enabled', 'mlperf inference_server': 'custom', 'mlperf system_id': 'A100-SXM4-40GBx1', 'mlperf use_cpu': 'False', 'mlperf power_limit': 'None', 'mlperf cpu_freq': 'None', 'mlperf test_mode': 'AccuracyOnly', 'mlperf fast': 'True', 'mlperf gpu_num_bundles': '2', 'mlperf log_dir': '/work/build/logs/2021.11.09-05.18.28', 'Threshold': 89.965, 'version': 'v1.1' } golden = Sample(metric='accuracy', value=90.376, unit='%', metadata=metadata) self.assertSamplesEqualUpToTimestamp(golden, samples[0]) samples = mlperf_inference_benchmark.MakePerformanceSamplesFromOutput( {'version': 'v1.1'}, self.dlrm_performance_contents) metadata = { 'mlperf benchmark': 'Benchmark.DLRM', 'mlperf coalesced_tensor': 'True', 'mlperf gpu_batch_size': '262100', 'mlperf gpu_copy_streams': '1', 'mlperf gpu_inference_streams': '1', 'mlperf input_dtype': 'int8', 'mlperf input_format': 'chw4', 'mlperf precision': 'int8', 'mlperf scenario': 'Scenario.Server', 'mlperf server_target_qps': '2100000', 'mlperf system': 'A100-SXM4-40GBx8', 'mlperf tensor_path': '${PREPROCESSED_DATA_DIR}/criteo/full_recalib/' 'numeric_int8_chw4.npy,' '${PREPROCESSED_DATA_DIR}/criteo/full_recalib/' 'categorical_int32.npy', 'mlperf use_graphs': 'False', 'mlperf config_name': 'A100-SXM4-40GBx8_dlrm_Server', 'mlperf config_ver': 'custom_k_99_MaxP', 'mlperf accuracy_level': '99%', 'mlperf optimization_level': 'plugin-enabled', 'mlperf inference_server': 'custom', 'mlperf system_id': 'A100-SXM4-40GBx8', 'mlperf use_cpu': 'False', 'mlperf power_limit': 'None', 'mlperf cpu_freq': 'None', 'mlperf gpu_num_bundles': '2', 'mlperf log_dir': '/work/build/logs/2021.11.13-04.12.53', 'mlperf SUT name': 'DLRM SERVER', 'mlperf Scenario': 'Server', 'mlperf Mode': 'PerformanceOnly', 'mlperf Scheduled samples per second': '2102380.29', 'mlperf Result is': 'VALID', 'mlperf Performance constraints satisfied': 'Yes', 'mlperf Min duration satisfied': 'Yes', 'mlperf Min queries satisfied': 'Yes', 'mlperf Completed samples per second': '2102359.14', 'mlperf Min latency (ns)': '159697', 'mlperf Max latency (ns)': '12452412', 'mlperf Mean latency (ns)': '1375416', 'mlperf 50.00 percentile latency (ns)': '1285505', 'mlperf 90.00 percentile latency (ns)': '1984044', 'mlperf 95.00 percentile latency (ns)': '2319343', 'mlperf 97.00 percentile latency (ns)': '2568660', 'mlperf 99.00 percentile latency (ns)': '3507998', 'mlperf 99.90 percentile latency (ns)': '5628323', 'mlperf samples_per_query': '1', 'mlperf target_latency (ns)': '30000000', 'mlperf max_async_queries': '0', 'mlperf min_duration (ms)': '60000', 'mlperf max_duration (ms)': '0', 'mlperf min_query_count': '1', 'mlperf max_query_count': '0', 'mlperf qsl_rng_seed': '1624344308455410291', 'mlperf sample_index_rng_seed': '517984244576520566', 'mlperf schedule_rng_seed': '10051496985653635065', 'mlperf accuracy_log_rng_seed': '0', 'mlperf accuracy_log_probability': '0', 'mlperf accuracy_log_sampling_target': '0', 'mlperf print_timestamps': '0', 'mlperf performance_issue_unique': '0', 'mlperf performance_issue_same': '0', 'mlperf performance_issue_same_index': '0', 'mlperf performance_sample_count': '204800', 'version': 'v1.1' } golden = Sample(metric='throughput', value=2102380.0, unit='samples/s', metadata=metadata) self.assertSamplesEqualUpToTimestamp(golden, samples[0]) samples = mlperf_inference_benchmark.MakeAccuracySamplesFromOutput( {'version': 'v1.1'}, self.dlrm_accuracy_contents) metadata = { 'Threshold': 79.448, 'mlperf accuracy_level': '99%', 'mlperf benchmark': 'Benchmark.DLRM', 'mlperf coalesced_tensor': 'True', 'mlperf config_name': 'A100-SXM4-40GBx8_dlrm_Server', 'mlperf config_ver': 'custom_k_99_MaxP', 'mlperf cpu_freq': 'None', 'mlperf fast': 'True', 'mlperf gpu_batch_size': '262100', 'mlperf gpu_copy_streams': '1', 'mlperf gpu_inference_streams': '1', 'mlperf gpu_num_bundles': '2', 'mlperf inference_server': 'custom', 'mlperf input_dtype': 'int8', 'mlperf input_format': 'chw4', 'mlperf log_dir': '/work/build/logs/2021.11.13-06.24.26', 'mlperf optimization_level': 'plugin-enabled', 'mlperf power_limit': 'None', 'mlperf precision': 'int8', 'mlperf scenario': 'Scenario.Server', 'mlperf server_target_qps': '2100000', 'mlperf system': 'A100-SXM4-40GBx8', 'mlperf system_id': 'A100-SXM4-40GBx8', 'mlperf tensor_path': '${PREPROCESSED_DATA_DIR}/criteo/full_recalib/' 'numeric_int8_chw4.npy,' '${PREPROCESSED_DATA_DIR}/criteo/full_recalib/' 'categorical_int32.npy', 'mlperf test_mode': 'AccuracyOnly', 'mlperf use_cpu': 'False', 'mlperf use_graphs': 'False', 'version': 'v1.1' } golden = Sample(metric='accuracy', value=80.185, unit='%', metadata=metadata) self.assertSamplesEqualUpToTimestamp(golden, samples[0]) print(samples[0])
def testTpuResults(self): samples = resnet_benchmark._MakeSamplesFromOutput( self.metadata, self.tpu_contents) golden = [ Sample( 'Loss', 6.3166966, '', { 'num_examples_per_epoch': 1251.1, 'epoch': 0.9999200703381026, 'duration': 423, 'step': 1251 }), Sample( 'Loss', 5.30481, '', { 'num_examples_per_epoch': 1251.1, 'epoch': 1.9998401406762052, 'duration': 783, 'step': 2502 }), Sample( 'Global Steps Per Second', 3.47162, 'global_steps/sec', { 'num_examples_per_epoch': 1251.1, 'epoch': 1.9998401406762052, 'duration': 783, 'step': 2502 }), Sample( 'Examples Per Second', 3554.94, 'examples/sec', { 'num_examples_per_epoch': 1251.1, 'epoch': 1.9998401406762052, 'duration': 783, 'step': 2502 }), Sample( 'Loss', 4.3771253, '', { 'num_examples_per_epoch': 1251.1, 'epoch': 2.9997602110143076, 'duration': 1139, 'step': 3753 }), Sample( 'Global Steps Per Second', 3.51319, 'global_steps/sec', { 'num_examples_per_epoch': 1251.1, 'epoch': 2.9997602110143076, 'duration': 1139, 'step': 3753 }), Sample( 'Examples Per Second', 3597.51, 'examples/sec', { 'num_examples_per_epoch': 1251.1, 'epoch': 2.9997602110143076, 'duration': 1139, 'step': 3753 }), Sample( 'Loss', 3.9155605, '', { 'num_examples_per_epoch': 1251.1, 'epoch': 3.996483094876509, 'duration': 1486, 'step': 5000 }), Sample( 'Global Steps Per Second', 3.60089, 'global_steps/sec', { 'num_examples_per_epoch': 1251.1, 'epoch': 3.996483094876509, 'duration': 1486, 'step': 5000 }), Sample( 'Examples Per Second', 3687.31, 'examples/sec', { 'num_examples_per_epoch': 1251.1, 'epoch': 3.996483094876509, 'duration': 1486, 'step': 5000 }), Sample( 'Loss', 3.774139, '', { 'num_examples_per_epoch': 1251.1, 'epoch': 4.996403165214612, 'duration': 1968, 'step': 6251 }), Sample( 'Loss', 3.2543745, '', { 'num_examples_per_epoch': 1251.1, 'epoch': 5.996323235552714, 'duration': 2327, 'step': 7502 }), Sample( 'Global Steps Per Second', 3.48231, 'global_steps/sec', { 'num_examples_per_epoch': 1251.1, 'epoch': 5.996323235552714, 'duration': 2327, 'step': 7502 }), Sample( 'Examples Per Second', 3565.89, 'examples/sec', { 'num_examples_per_epoch': 1251.1, 'epoch': 5.996323235552714, 'duration': 2327, 'step': 7502 }), Sample( 'Loss', 3.1598916, '', { 'num_examples_per_epoch': 1251.1, 'epoch': 6.996243305890816, 'duration': 2685, 'step': 8753 }), Sample( 'Global Steps Per Second', 3.49526, 'global_steps/sec', { 'num_examples_per_epoch': 1251.1, 'epoch': 6.996243305890816, 'duration': 2685, 'step': 8753 }), Sample( 'Examples Per Second', 3579.15, 'examples/sec', { 'num_examples_per_epoch': 1251.1, 'epoch': 6.996243305890816, 'duration': 2685, 'step': 8753 }), Sample( 'Loss', 3.054053, '', { 'num_examples_per_epoch': 1251.1, 'epoch': 7.992966189753018, 'duration': 3031, 'step': 10000 }), Sample( 'Global Steps Per Second', 3.60296, 'global_steps/sec', { 'num_examples_per_epoch': 1251.1, 'epoch': 7.992966189753018, 'duration': 3031, 'step': 10000 }), Sample( 'Examples Per Second', 3689.43, 'examples/sec', { 'num_examples_per_epoch': 1251.1, 'epoch': 7.992966189753018, 'duration': 3031, 'step': 10000 }), Sample( 'Eval Loss', 3.636791, '', { 'num_examples_per_epoch': 1251.1, 'epoch': 3.996483094876509, 'duration': 1539, 'step': 5000 }), Sample( 'Top 1 Accuracy', 35.95581, '%', { 'num_examples_per_epoch': 1251.1, 'epoch': 3.996483094876509, 'duration': 1539, 'step': 5000 }), Sample( 'Top 5 Accuracy', 63.112384, '%', { 'num_examples_per_epoch': 1251.1, 'epoch': 3.996483094876509, 'duration': 1539, 'step': 5000 }), Sample( 'Eval Loss', 3.0327156, '', { 'num_examples_per_epoch': 1251.1, 'epoch': 7.992966189753018, 'duration': 3082, 'step': 10000 }), Sample( 'Top 1 Accuracy', 49.57479, '%', { 'num_examples_per_epoch': 1251.1, 'epoch': 7.992966189753018, 'duration': 3082, 'step': 10000 }), Sample( 'Top 5 Accuracy', 75.47607400000001, '%', { 'num_examples_per_epoch': 1251.1, 'epoch': 7.992966189753018, 'duration': 3082, 'step': 10000 }), Sample('Elapsed Seconds', 34890, 'seconds', {'num_examples_per_epoch': 1251.1}) ] self.assertEqual(samples, golden)
def testGpuResults(self): samples = resnet_benchmark._MakeSamplesFromOutput( self.metadata, self.gpu_contents) golden = [ Sample( 'Loss', 7.98753, '', { 'num_examples_per_epoch': 1251.1, 'epoch': 0, 'step': 0, 'duration': 35 }), Sample( 'Global Steps Per Second', 2.52565, 'global_steps/sec', { 'num_examples_per_epoch': 1251.1, 'epoch': 0, 'step': 0, 'duration': 35 }), Sample( 'Loss', 7.9780626, '', { 'num_examples_per_epoch': 1251.1, 'epoch': 0.07992966189753017, 'step': 100, 'duration': 75 }), Sample( 'Global Steps Per Second', 2.75627, 'global_steps/sec', { 'num_examples_per_epoch': 1251.1, 'epoch': 0.07992966189753017, 'step': 100, 'duration': 75 }), Sample( 'Loss', 7.9498286, '', { 'num_examples_per_epoch': 1251.1, 'epoch': 0.15985932379506035, 'step': 200, 'duration': 111 }), Sample( 'Global Steps Per Second', 2.72345, 'global_steps/sec', { 'num_examples_per_epoch': 1251.1, 'epoch': 0.15985932379506035, 'step': 200, 'duration': 111 }), Sample( 'Loss', 7.9504285, '', { 'num_examples_per_epoch': 1251.1, 'epoch': 0.23978898569259055, 'step': 300, 'duration': 148 }), Sample( 'Global Steps Per Second', 2.74449, 'global_steps/sec', { 'num_examples_per_epoch': 1251.1, 'epoch': 0.23978898569259055, 'step': 300, 'duration': 148 }), Sample( 'Loss', 7.9720306, '', { 'num_examples_per_epoch': 1251.1, 'epoch': 0.3197186475901207, 'step': 400, 'duration': 184 }), Sample( 'Global Steps Per Second', 2.68677, 'global_steps/sec', { 'num_examples_per_epoch': 1251.1, 'epoch': 0.3197186475901207, 'step': 400, 'duration': 184 }), Sample( 'Loss', 7.9649105, '', { 'num_examples_per_epoch': 1251.1, 'epoch': 0.3996483094876509, 'step': 500, 'duration': 221 }), Sample( 'Eval Loss', 7.8702474, '', { 'num_examples_per_epoch': 1251.1, 'epoch': 1.5985932379506036, 'step': 2000, 'duration': 920 }), Sample( 'Top 1 Accuracy', 0.5941901399999999, '%', { 'num_examples_per_epoch': 1251.1, 'epoch': 1.5985932379506036, 'step': 2000, 'duration': 920 }), Sample( 'Top 5 Accuracy', 2.1947023, '%', { 'num_examples_per_epoch': 1251.1, 'epoch': 1.5985932379506036, 'step': 2000, 'duration': 920 }), Sample('Elapsed Seconds', 920, 'seconds', {'num_examples_per_epoch': 1251.1}) ] self.assertEqual(samples, golden)
def testTrainResults(self): samples = mlperf_inference_benchmark.MakePerformanceSamplesFromOutput( {'version': 'v1.1'}, self.performance_contents) metadata = { 'loadgen_version': '1.1 @ ed7044310a', 'loadgen_build_date_local': '2022-04-12T14:16:07.377200', 'loadgen_build_date_utc': '2022-04-12T14:16:07.377209', 'loadgen_git_commit_date': '2021-08-11T17:36:26+01:00', 'loadgen_git_status_message': '', 'test_datetime': '2022-04-12T14:37:18Z', 'sut_name': 'BERT SERVER', 'qsl_name': 'BERT QSL', 'qsl_reported_total_count': 10833, 'qsl_reported_performance_count': 10833, 'requested_scenario': 'Server', 'requested_test_mode': 'PerformanceOnly', 'requested_server_target_qps': 360, 'requested_server_target_latency_ns': 130000000, 'requested_server_target_latency_percentile': 0.99, 'requested_server_coalesce_queries': True, 'requested_server_find_peak_qps_decimals_of_precision': 1, 'requested_server_find_peak_qps_boundary_step_size': 1, 'requested_server_max_async_queries': 0, 'requested_server_num_issue_query_threads': 0, 'requested_min_duration_ms': 600000, 'requested_max_duration_ms': 0, 'requested_min_query_count': 270336, 'requested_max_query_count': 0, 'requested_qsl_rng_seed': 1624344308455410291, 'requested_sample_index_rng_seed': 517984244576520566, 'requested_schedule_rng_seed': 10051496985653635065, 'requested_accuracy_log_rng_seed': 0, 'requested_accuracy_log_probability': 0, 'requested_accuracy_log_sampling_target': 0, 'requested_print_timestamps': False, 'requested_performance_issue_unique': False, 'requested_performance_issue_same': False, 'requested_performance_issue_same_index': 0, 'requested_performance_sample_count_override': 10833, 'effective_scenario': 'Server', 'effective_test_mode': 'PerformanceOnly', 'effective_samples_per_query': 1, 'effective_target_qps': 360, 'effective_target_latency_ns': 130000000, 'effective_target_latency_percentile': 0.99, 'effective_max_async_queries': 0, 'effective_target_duration_ms': 600000, 'effective_min_duration_ms': 600000, 'effective_max_duration_ms': 0, 'effective_min_query_count': 270336, 'effective_max_query_count': 0, 'effective_min_sample_count': 270336, 'effective_qsl_rng_seed': 1624344308455410291, 'effective_sample_index_rng_seed': 517984244576520566, 'effective_schedule_rng_seed': 10051496985653635065, 'effective_accuracy_log_rng_seed': 0, 'effective_accuracy_log_probability': 0, 'effective_accuracy_log_sampling_target': 0, 'effective_print_timestamps': False, 'effective_performance_issue_unique': False, 'effective_performance_issue_same': False, 'effective_performance_issue_same_index': 0, 'effective_performance_sample_count': 10833, 'generic_message': 'Starting performance mode', 'generated_query_count': 270336, 'generated_samples_per_query': 1, 'generated_query_duration': 751081766900, 'logger_swap_request_slots_retry_count': 0, 'logger_swap_request_slots_retry_retry_count': 0, 'logger_swap_request_slots_retry_reencounter_count': 0, 'logger_start_reading_entries_retry_count': 0, 'logger_tls_total_log_cas_fail_count': 0, 'logger_tls_total_swap_buffers_slot_retry_count': 0, 'power_begin': '04-12-2022 14:37:18.983', 'power_end': '04-12-2022 14:51:28.239', 'result_validity': 'INVALID', 'result_perf_constraints_met': False, 'result_min_duration_met': True, 'result_min_queries_met': True, 'result_invalid_reason': 'Reduce target QPS to improve latency. ', 'result_scheduled_samples_per_sec': 359.93, 'result_completed_samples_per_sec': 321.067, 'result_min_latency_ns': 6669619, 'result_max_latency_ns': 846285311653, 'result_mean_latency_ns': 48257222252, 'result_50.00_percentile_latency_ns': 45675229779, 'result_90.00_percentile_latency_ns': 82432475255, 'result_95.00_percentile_latency_ns': 87117424239, 'result_97.00_percentile_latency_ns': 89161842381, 'result_99.00_percentile_latency_ns': 90813602755, 'result_99.90_percentile_latency_ns': 735809900637, 'version': 'v1.1' } golden = Sample(metric='throughput', value=321.067, unit='samples per second', metadata=metadata) sample = samples[0] sample.metadata.pop('loaded_qsl_set') sample.metadata.pop('loadgen_git_log_message') sample.metadata.pop('loadgen_file_sha1') self.assertSamplesEqualUpToTimestamp(golden, sample) samples = mlperf_inference_benchmark.MakeAccuracySamplesFromOutput( {'version': 'v1.1'}, self.bert_accuracy_contents) metadata = { 'mlperf benchmark': 'Benchmark.BERT', 'mlperf coalesced_tensor': 'True', 'mlperf gpu_batch_size': '64', 'mlperf gpu_copy_streams': '1', 'mlperf gpu_inference_streams': '2', 'mlperf input_dtype': 'int32', 'mlperf input_format': 'linear', 'mlperf precision': 'int8', 'mlperf scenario': 'Scenario.Server', 'mlperf server_target_qps': '3100', 'mlperf system': 'A100-SXM4-40GBx1', 'mlperf tensor_path': '${PREPROCESSED_DATA_DIR}/squad_tokenized/input_ids.npy,' '${PREPROCESSED_DATA_DIR}/squad_tokenized/segment_ids.npy,' '${PREPROCESSED_DATA_DIR}/squad_tokenized/input_mask.npy', 'mlperf use_graphs': 'True', 'mlperf config_name': 'A100-SXM4-40GBx1_bert_Server', 'mlperf config_ver': 'custom_k_99_MaxP', 'mlperf accuracy_level': '99%', 'mlperf optimization_level': 'plugin-enabled', 'mlperf inference_server': 'custom', 'mlperf system_id': 'A100-SXM4-40GBx1', 'mlperf use_cpu': 'False', 'mlperf power_limit': 'None', 'mlperf cpu_freq': 'None', 'mlperf test_mode': 'AccuracyOnly', 'mlperf gpu_num_bundles': '2', 'mlperf log_dir': '/work/build/logs/2021.11.09-05.18.28', 'Threshold': 89.965, 'version': 'v1.1' } golden = Sample(metric='accuracy', value=90.376, unit='%', metadata=metadata) self.assertSamplesEqualUpToTimestamp(golden, samples[0]) samples = mlperf_inference_benchmark.MakeAccuracySamplesFromOutput( {'version': 'v1.1'}, self.dlrm_accuracy_contents) metadata = { 'Threshold': 79.448, 'mlperf accuracy_level': '99%', 'mlperf benchmark': 'Benchmark.DLRM', 'mlperf coalesced_tensor': 'True', 'mlperf config_name': 'A100-SXM4-40GBx8_dlrm_Server', 'mlperf config_ver': 'custom_k_99_MaxP', 'mlperf cpu_freq': 'None', 'mlperf gpu_batch_size': '262100', 'mlperf gpu_copy_streams': '1', 'mlperf gpu_inference_streams': '1', 'mlperf gpu_num_bundles': '2', 'mlperf inference_server': 'custom', 'mlperf input_dtype': 'int8', 'mlperf input_format': 'chw4', 'mlperf log_dir': '/work/build/logs/2021.11.13-06.24.26', 'mlperf optimization_level': 'plugin-enabled', 'mlperf power_limit': 'None', 'mlperf precision': 'int8', 'mlperf scenario': 'Scenario.Server', 'mlperf server_target_qps': '2100000', 'mlperf system': 'A100-SXM4-40GBx8', 'mlperf system_id': 'A100-SXM4-40GBx8', 'mlperf tensor_path': '${PREPROCESSED_DATA_DIR}/criteo/full_recalib/' 'numeric_int8_chw4.npy,' '${PREPROCESSED_DATA_DIR}/criteo/full_recalib/' 'categorical_int32.npy', 'mlperf test_mode': 'AccuracyOnly', 'mlperf use_cpu': 'False', 'mlperf use_graphs': 'False', 'version': 'v1.1' } golden = Sample(metric='accuracy', value=80.185, unit='%', metadata=metadata) self.assertSamplesEqualUpToTimestamp(golden, samples[0])
def testTrainResults(self): samples = mlperf_inference_benchmark.MakeSamplesFromOutput( {'version': 'v1.1'}, self.contents) metadata = { 'mlperf 50.00 percentile latency (ns)': '40533329', 'mlperf 90.00 percentile latency (ns)': '51387550', 'mlperf 95.00 percentile latency (ns)': '54956149', 'mlperf 97.00 percentile latency (ns)': '57792422', 'mlperf 99.00 percentile latency (ns)': '82056764', 'mlperf 99.90 percentile latency (ns)': '543294654940', 'mlperf Completed samples per second': '3102.49', 'mlperf Max latency (ns)': '605456500256', 'mlperf Mean latency (ns)': '3037717062', 'mlperf Min duration satisfied': 'Yes', 'mlperf Min latency (ns)': '4126840', 'mlperf Min queries satisfied': 'Yes', 'mlperf Mode': 'PerformanceOnly', 'mlperf Performance constraints satisfied': 'Yes', 'mlperf Result is': 'VALID', 'mlperf SUT name': 'BERT SERVER', 'mlperf Scenario': 'Server', 'mlperf Scheduled samples per second': '3102.76', 'mlperf accuracy_level': '99%', 'mlperf accuracy_log_probability': '0', 'mlperf accuracy_log_rng_seed': '0', 'mlperf accuracy_log_sampling_target': '0', 'mlperf active_sms': '60', 'mlperf benchmark': 'Benchmark.BERT', 'mlperf bert_opt_seqlen': '384', 'mlperf coalesced_tensor': 'True', 'mlperf config_name': 'A100-SXM4-40GBx1_bert_Server', 'mlperf config_ver': 'custom_k_99_MaxP', 'mlperf cpu_freq': 'None', 'mlperf enable_interleaved': 'False', 'mlperf gpu_batch_size': '64', 'mlperf gpu_copy_streams': '1', 'mlperf gpu_inference_streams': '2', 'mlperf gpu_num_bundles': '2', 'mlperf graphs_max_seqlen': '200', 'mlperf inference_server': 'custom', 'mlperf input_dtype': 'int32', 'mlperf input_format': 'linear', 'mlperf log_dir': '/work/build/logs/2021.10.27-20.51.11', 'mlperf max_async_queries': '0', 'mlperf max_duration (ms)': '0', 'mlperf max_query_count': '0', 'mlperf min_duration (ms)': '600000', 'mlperf min_query_count': '270336', 'mlperf optimization_level': 'plugin-enabled', 'mlperf performance_issue_same': '0', 'mlperf performance_issue_same_index': '0', 'mlperf performance_issue_unique': '0', 'mlperf performance_sample_count': '10833', 'mlperf power_limit': 'None', 'mlperf precision': 'int8', 'mlperf print_timestamps': '0', 'mlperf qsl_rng_seed': '1624344308455410291', 'mlperf sample_index_rng_seed': '517984244576520566', 'mlperf samples_per_query': '1', 'mlperf scenario': 'Scenario.Server', 'mlperf schedule_rng_seed': '10051496985653635065', 'mlperf server_num_issue_query_threads': '1', 'mlperf server_target_qps': '3100', 'mlperf soft_drop': '0.99', 'mlperf system': 'A100-SXM4-40GBx1', 'mlperf system_id': 'A100-SXM4-40GBx1', 'mlperf target_latency (ns)': '130000000', 'mlperf tensor_path': '${PREPROCESSED_DATA_DIR}/squad_tokenized/input_ids.npy,' '${PREPROCESSED_DATA_DIR}/squad_tokenized/segment_ids.npy,' '${PREPROCESSED_DATA_DIR}/squad_tokenized/input_mask.npy', 'mlperf use_cpu': 'False', 'mlperf use_graphs': 'True', 'version': 'v1.1' } golden = Sample(metric='throughput', value=3102.76, unit='samples/s', metadata=metadata) self.assertSamplesEqualUpToTimestamp(golden, samples[0])