def AddSamples(self, samples, benchmark, benchmark_spec):
    """Adds data samples to the publisher.

    Args:
      samples: Either a list of Sample objects (preferred) or a list of 3 or
        4-tuples (deprecated). The tuples contain the metric name (string), the
        value (float), and unit (string) of each sample. If a 4th element is
        included, it is a dictionary of metadata associated with the sample.
      benchmark: string. The name of the benchmark.
      benchmark_spec: BenchmarkSpec. Benchmark specification.
    """
    for s in samples:
      # Convert input in deprecated format to Sample objects.
      if isinstance(s, (list, tuple)):
        if len(s) not in (3, 4):
          raise ValueError(
              'Invalid sample "{0}": should be 3- or 4-tuple.'.format(s))
        s = Sample(*s)

      # Annotate the sample.
      sample = dict(s.asdict())
      sample['test'] = benchmark

      for meta_provider in self.metadata_providers:
        sample['metadata'] = meta_provider.AddMetadata(
            sample['metadata'], benchmark_spec)

      sample['product_name'] = FLAGS.product_name
      sample['official'] = FLAGS.official
      sample['owner'] = FLAGS.owner
      sample['timestamp'] = time.time()
      sample['run_uri'] = self.run_uri
      sample['sample_uri'] = str(uuid.uuid4())
      self.samples.append(sample)
 def testTrainResults(self):
     samples = mlperf_benchmark.MakeSamplesFromOutput({}, self.contents)
     golden = [
         Sample('Eval Accuracy', 5.96720390021801, '%', {
             'epoch': 0,
             'times': 0.0,
             'version': '0.5.0'
         }),
         Sample('Eval Accuracy', 36.89168393611908, '%', {
             'epoch': 4,
             'times': 1164.691000699997,
             'version': '0.5.0'
         }),
         Sample('Eval Accuracy', 49.114990234375, '%', {
             'epoch': 8,
             'times': 2329.8028297424316,
             'version': '0.5.0'
         }),
         Sample('Eval Accuracy', 53.01310420036316, '%', {
             'epoch': 12,
             'times': 3498.9867885112762,
             'version': '0.5.0'
         }),
         Sample('Eval Accuracy', 53.55224609375, '%', {
             'epoch': 16,
             'times': 4667.747241735458,
             'version': '0.5.0'
         }),
         Sample('Eval Accuracy', 54.87263798713684, '%', {
             'epoch': 20,
             'times': 5831.299504995346,
             'version': '0.5.0'
         }),
         Sample('Eval Accuracy', 54.70173954963684, '%', {
             'epoch': 24,
             'times': 6996.661015510559,
             'version': '0.5.0'
         }),
         Sample('Eval Accuracy', 56.72810673713684, '%', {
             'epoch': 28,
             'times': 8160.468462944031,
             'version': '0.5.0'
         }),
         Sample('Eval Accuracy', 70.751953125, '%', {
             'epoch': 32,
             'times': 9329.49914598465,
             'version': '0.5.0'
         }),
         Sample('Eval Accuracy', 71.368408203125, '%', {
             'epoch': 36,
             'times': 10494.261439800262,
             'version': '0.5.0'
         }),
         Sample('Eval Accuracy', 71.49454951286316, '%', {
             'epoch': 40,
             'times': 11657.773159980774,
             'version': '0.5.0'
         }),
         Sample('Eval Accuracy', 70.70515751838684, '%', {
             'epoch': 44,
             'times': 12823.00942158699,
             'version': '0.5.0'
         }),
         Sample('Eval Accuracy', 70.65632939338684, '%', {
             'epoch': 48,
             'times': 13988.791482448578,
             'version': '0.5.0'
         }),
         Sample('Eval Accuracy', 70.562744140625, '%', {
             'epoch': 52,
             'times': 15154.056546211243,
             'version': '0.5.0'
         }),
         Sample('Eval Accuracy', 70.88623046875, '%', {
             'epoch': 56,
             'times': 16318.724472999573,
             'version': '0.5.0'
         }),
         Sample('Eval Accuracy', 74.67244267463684, '%', {
             'epoch': 60,
             'times': 17482.81353545189,
             'version': '0.5.0'
         }),
         Sample('Eval Accuracy', 75.00407099723816, '%', {
             'epoch': 61,
             'times': 17788.61406970024,
             'version': '0.5.0'
         }),
         Sample('Times', 18183, 'seconds', {})
     ]
     self.assertEqual(samples, golden)
Beispiel #3
0
 def testTrainResults(self):
     samples = mlperf_benchmark.MakeSamplesFromOutput({'version': 'v0.6.0'},
                                                      self.contents,
                                                      use_tpu=True,
                                                      model='resnet')
     golden = [
         Sample('Eval Accuracy', 32.322001457214355, '%', {
             'epoch': 4,
             'times': 0.0,
             'version': 'v0.6.0'
         }),
         Sample('Eval Accuracy', 40.342000126838684, '%', {
             'epoch': 8,
             'times': 164.16299986839294,
             'version': 'v0.6.0'
         }),
         Sample('Eval Accuracy', 48.21600019931793, '%', {
             'epoch': 12,
             'times': 328.239000082016,
             'version': 'v0.6.0'
         }),
         Sample('Eval Accuracy', 51.749998331069946, '%', {
             'epoch': 16,
             'times': 492.335000038147,
             'version': 'v0.6.0'
         }),
         Sample('Eval Accuracy', 52.851998805999756, '%', {
             'epoch': 20,
             'times': 656.4279999732971,
             'version': 'v0.6.0'
         }),
         Sample('Eval Accuracy', 52.99599766731262, '%', {
             'epoch': 24,
             'times': 820.5209999084473,
             'version': 'v0.6.0'
         }),
         Sample('Eval Accuracy', 60.44999957084656, '%', {
             'epoch': 28,
             'times': 984.6259999275208,
             'version': 'v0.6.0'
         }),
         Sample('Eval Accuracy', 62.775999307632446, '%', {
             'epoch': 32,
             'times': 1148.7119998931885,
             'version': 'v0.6.0'
         }),
         Sample('Eval Accuracy', 66.22400283813477, '%', {
             'epoch': 36,
             'times': 1312.8050000667572,
             'version': 'v0.6.0'
         }),
         Sample('Eval Accuracy', 67.34600067138672, '%', {
             'epoch': 40,
             'times': 1476.9070000648499,
             'version': 'v0.6.0'
         }),
         Sample('Eval Accuracy', 70.77400088310242, '%', {
             'epoch': 44,
             'times': 1640.994999885559,
             'version': 'v0.6.0'
         }),
         Sample('Eval Accuracy', 72.40599989891052, '%', {
             'epoch': 48,
             'times': 1805.085000038147,
             'version': 'v0.6.0'
         }),
         Sample('Eval Accuracy', 73.85799884796143, '%', {
             'epoch': 52,
             'times': 1969.1849999427795,
             'version': 'v0.6.0'
         }),
         Sample('Eval Accuracy', 75.26000142097473, '%', {
             'epoch': 56,
             'times': 2133.2750000953674,
             'version': 'v0.6.0'
         }),
         Sample('Eval Accuracy', 76.0420024394989, '%', {
             'epoch': 60,
             'times': 2297.3669998645782,
             'version': 'v0.6.0'
         })
     ]
     self.assertEqual(samples, golden)
Beispiel #4
0
    def testT2TGpuOutput(self):
        self.maxDiff = None
        path = os.path.join(os.path.dirname(__file__), '..', 'data',
                            't2t_gpu_output.txt')
        with open(path) as fp:
            t2t_contents = fp.read()

        samples = t2t_benchmark._MakeSamplesFromOutput({'use_tpu': False},
                                                       t2t_contents)

        golden = [
            Sample(metric='Global Steps Per Second',
                   value=3.04983,
                   unit='global_steps/sec',
                   metadata={
                       'index': 0,
                       'use_tpu': False
                   },
                   timestamp=0),
            Sample(metric='Global Steps Per Second',
                   value=4.12771,
                   unit='global_steps/sec',
                   metadata={
                       'index': 1,
                       'use_tpu': False
                   },
                   timestamp=0),
            Sample(metric='Global Steps Per Second',
                   value=4.11027,
                   unit='global_steps/sec',
                   metadata={
                       'index': 2,
                       'use_tpu': False
                   },
                   timestamp=0),
            Sample(metric='Global Steps Per Second',
                   value=4.10924,
                   unit='global_steps/sec',
                   metadata={
                       'index': 3,
                       'use_tpu': False
                   },
                   timestamp=0),
            Sample(metric='Global Steps Per Second',
                   value=4.12186,
                   unit='global_steps/sec',
                   metadata={
                       'index': 4,
                       'use_tpu': False
                   },
                   timestamp=0),
            Sample(metric='Global Steps Per Second',
                   value=4.08434,
                   unit='global_steps/sec',
                   metadata={
                       'index': 5,
                       'use_tpu': False
                   },
                   timestamp=0),
            Sample(metric='Global Steps Per Second',
                   value=4.10174,
                   unit='global_steps/sec',
                   metadata={
                       'index': 6,
                       'use_tpu': False
                   },
                   timestamp=0),
            Sample(metric='Global Steps Per Second',
                   value=4.11809,
                   unit='global_steps/sec',
                   metadata={
                       'index': 7,
                       'use_tpu': False
                   },
                   timestamp=0),
            Sample(metric='Global Steps Per Second',
                   value=4.10496,
                   unit='global_steps/sec',
                   metadata={
                       'index': 8,
                       'use_tpu': False
                   },
                   timestamp=0),
            Sample(metric='Eval Loss',
                   value=7.2263174,
                   unit='',
                   metadata={
                       'use_tpu': False,
                       'step': 1000
                   },
                   timestamp=0),
            Sample(metric='Accuracy',
                   value=13.972055999999998,
                   unit='%',
                   metadata={
                       'use_tpu': False,
                       'step': 1000
                   },
                   timestamp=0),
            Sample(metric='Accuracy Per Sequence',
                   value=0.0,
                   unit='%',
                   metadata={
                       'use_tpu': False,
                       'step': 1000
                   },
                   timestamp=0),
            Sample(metric='Negative Log Perplexity',
                   value=-7.2263174,
                   unit='perplexity',
                   metadata={
                       'use_tpu': False,
                       'step': 1000
                   },
                   timestamp=0),
            Sample(metric='Top 5 Accuracy',
                   value=24.800399000000002,
                   unit='%',
                   metadata={
                       'use_tpu': False,
                       'step': 1000
                   },
                   timestamp=0)
        ]
        self.assertEqual(samples, golden)
Beispiel #5
0
    def testT2TTpuOutput(self):
        self.maxDiff = None
        path = os.path.join(os.path.dirname(__file__), '..', 'data',
                            't2t_tpu_output.txt')
        with open(path) as fp:
            t2t_contents = fp.read()

        samples = t2t_benchmark._MakeSamplesFromOutput({'use_tpu': True},
                                                       t2t_contents)

        golden = [
            Sample(metric='Global Steps Per Second',
                   value=1.85777,
                   unit='global_steps/sec',
                   metadata={
                       'use_tpu': True,
                       'index': 0
                   },
                   timestamp=0),
            Sample(metric='Global Steps Per Second',
                   value=5.06989,
                   unit='global_steps/sec',
                   metadata={
                       'use_tpu': True,
                       'index': 1
                   },
                   timestamp=0),
            Sample(metric='Examples Per Second',
                   value=118.897,
                   unit='examples/sec',
                   metadata={
                       'use_tpu': True,
                       'index': 0
                   },
                   timestamp=0),
            Sample(metric='Examples Per Second',
                   value=324.473,
                   unit='examples/sec',
                   metadata={
                       'use_tpu': True,
                       'index': 1
                   },
                   timestamp=0),
            Sample(metric='Eval Loss',
                   value=3.9047337,
                   unit='',
                   metadata={
                       'use_tpu': True,
                       'step': 1000
                   },
                   timestamp=0),
            Sample(metric='Accuracy',
                   value=32.064167,
                   unit='%',
                   metadata={
                       'use_tpu': True,
                       'step': 1000
                   },
                   timestamp=0),
            Sample(metric='Accuracy Per Sequence',
                   value=0.0,
                   unit='%',
                   metadata={
                       'use_tpu': True,
                       'step': 1000
                   },
                   timestamp=0),
            Sample(metric='Negative Log Perplexity',
                   value=-4.501835,
                   unit='perplexity',
                   metadata={
                       'use_tpu': True,
                       'step': 1000
                   },
                   timestamp=0),
            Sample(metric='Top 5 Accuracy',
                   value=50.96436,
                   unit='%',
                   metadata={
                       'use_tpu': True,
                       'step': 1000
                   },
                   timestamp=0),
            Sample(metric='Eval Loss',
                   value=3.7047337,
                   unit='',
                   metadata={
                       'use_tpu': True,
                       'step': 1200
                   },
                   timestamp=0),
            Sample(metric='Accuracy',
                   value=33.064167,
                   unit='%',
                   metadata={
                       'use_tpu': True,
                       'step': 1200
                   },
                   timestamp=0),
            Sample(metric='Accuracy Per Sequence',
                   value=0.0,
                   unit='%',
                   metadata={
                       'use_tpu': True,
                       'step': 1200
                   },
                   timestamp=0),
            Sample(metric='Negative Log Perplexity',
                   value=-4.101835,
                   unit='perplexity',
                   metadata={
                       'use_tpu': True,
                       'step': 1200
                   },
                   timestamp=0),
            Sample(metric='Top 5 Accuracy',
                   value=55.96436,
                   unit='%',
                   metadata={
                       'use_tpu': True,
                       'step': 1200
                   },
                   timestamp=0)
        ]
        self.assertEqual(samples, golden)
Beispiel #6
0
    def testTrainResults(self):
        samples = mlperf_inference_benchmark.MakePerformanceSamplesFromOutput(
            {'version': 'v1.1'}, self.bert_performance_contents)
        metadata = {
            'mlperf 50.00 percentile latency (ns)':
            '40533329',
            'mlperf 90.00 percentile latency (ns)':
            '51387550',
            'mlperf 95.00 percentile latency (ns)':
            '54956149',
            'mlperf 97.00 percentile latency (ns)':
            '57792422',
            'mlperf 99.00 percentile latency (ns)':
            '82056764',
            'mlperf 99.90 percentile latency (ns)':
            '543294654940',
            'mlperf Completed samples per second':
            '3102.49',
            'mlperf Max latency (ns)':
            '605456500256',
            'mlperf Mean latency (ns)':
            '3037717062',
            'mlperf Min duration satisfied':
            'Yes',
            'mlperf Min latency (ns)':
            '4126840',
            'mlperf Min queries satisfied':
            'Yes',
            'mlperf Mode':
            'PerformanceOnly',
            'mlperf Performance constraints satisfied':
            'Yes',
            'mlperf Result is':
            'VALID',
            'mlperf SUT name':
            'BERT SERVER',
            'mlperf Scenario':
            'Server',
            'mlperf Scheduled samples per second':
            '3102.76',
            'mlperf accuracy_level':
            '99%',
            'mlperf accuracy_log_probability':
            '0',
            'mlperf accuracy_log_rng_seed':
            '0',
            'mlperf accuracy_log_sampling_target':
            '0',
            'mlperf benchmark':
            'Benchmark.BERT',
            'mlperf coalesced_tensor':
            'True',
            'mlperf config_name':
            'A100-SXM4-40GBx1_bert_Server',
            'mlperf config_ver':
            'custom_k_99_MaxP',
            'mlperf cpu_freq':
            'None',
            'mlperf gpu_batch_size':
            '64',
            'mlperf gpu_copy_streams':
            '1',
            'mlperf gpu_inference_streams':
            '2',
            'mlperf gpu_num_bundles':
            '2',
            'mlperf inference_server':
            'custom',
            'mlperf input_dtype':
            'int32',
            'mlperf input_format':
            'linear',
            'mlperf log_dir':
            '/work/build/logs/2021.10.27-20.51.11',
            'mlperf max_async_queries':
            '0',
            'mlperf max_duration (ms)':
            '0',
            'mlperf max_query_count':
            '0',
            'mlperf min_duration (ms)':
            '600000',
            'mlperf min_query_count':
            '270336',
            'mlperf optimization_level':
            'plugin-enabled',
            'mlperf performance_issue_same':
            '0',
            'mlperf performance_issue_same_index':
            '0',
            'mlperf performance_issue_unique':
            '0',
            'mlperf performance_sample_count':
            '10833',
            'mlperf power_limit':
            'None',
            'mlperf precision':
            'int8',
            'mlperf print_timestamps':
            '0',
            'mlperf qsl_rng_seed':
            '1624344308455410291',
            'mlperf sample_index_rng_seed':
            '517984244576520566',
            'mlperf samples_per_query':
            '1',
            'mlperf scenario':
            'Scenario.Server',
            'mlperf schedule_rng_seed':
            '10051496985653635065',
            'mlperf server_target_qps':
            '3100',
            'mlperf system':
            'A100-SXM4-40GBx1',
            'mlperf system_id':
            'A100-SXM4-40GBx1',
            'mlperf target_latency (ns)':
            '130000000',
            'mlperf tensor_path':
            '${PREPROCESSED_DATA_DIR}/squad_tokenized/input_ids.npy,'
            '${PREPROCESSED_DATA_DIR}/squad_tokenized/segment_ids.npy,'
            '${PREPROCESSED_DATA_DIR}/squad_tokenized/input_mask.npy',
            'mlperf use_cpu':
            'False',
            'mlperf use_graphs':
            'True',
            'version':
            'v1.1'
        }
        golden = Sample(metric='throughput',
                        value=3102.76,
                        unit='samples/s',
                        metadata=metadata)
        self.assertSamplesEqualUpToTimestamp(golden, samples[0])

        samples = mlperf_inference_benchmark.MakeAccuracySamplesFromOutput(
            {'version': 'v1.1'}, self.bert_accuracy_contents)
        metadata = {
            'mlperf benchmark':
            'Benchmark.BERT',
            'mlperf coalesced_tensor':
            'True',
            'mlperf gpu_batch_size':
            '64',
            'mlperf gpu_copy_streams':
            '1',
            'mlperf gpu_inference_streams':
            '2',
            'mlperf input_dtype':
            'int32',
            'mlperf input_format':
            'linear',
            'mlperf precision':
            'int8',
            'mlperf scenario':
            'Scenario.Server',
            'mlperf server_target_qps':
            '3100',
            'mlperf system':
            'A100-SXM4-40GBx1',
            'mlperf tensor_path':
            '${PREPROCESSED_DATA_DIR}/squad_tokenized/input_ids.npy,'
            '${PREPROCESSED_DATA_DIR}/squad_tokenized/segment_ids.npy,'
            '${PREPROCESSED_DATA_DIR}/squad_tokenized/input_mask.npy',
            'mlperf use_graphs':
            'True',
            'mlperf config_name':
            'A100-SXM4-40GBx1_bert_Server',
            'mlperf config_ver':
            'custom_k_99_MaxP',
            'mlperf accuracy_level':
            '99%',
            'mlperf optimization_level':
            'plugin-enabled',
            'mlperf inference_server':
            'custom',
            'mlperf system_id':
            'A100-SXM4-40GBx1',
            'mlperf use_cpu':
            'False',
            'mlperf power_limit':
            'None',
            'mlperf cpu_freq':
            'None',
            'mlperf test_mode':
            'AccuracyOnly',
            'mlperf fast':
            'True',
            'mlperf gpu_num_bundles':
            '2',
            'mlperf log_dir':
            '/work/build/logs/2021.11.09-05.18.28',
            'Threshold':
            89.965,
            'version':
            'v1.1'
        }
        golden = Sample(metric='accuracy',
                        value=90.376,
                        unit='%',
                        metadata=metadata)
        self.assertSamplesEqualUpToTimestamp(golden, samples[0])

        samples = mlperf_inference_benchmark.MakePerformanceSamplesFromOutput(
            {'version': 'v1.1'}, self.dlrm_performance_contents)
        metadata = {
            'mlperf benchmark':
            'Benchmark.DLRM',
            'mlperf coalesced_tensor':
            'True',
            'mlperf gpu_batch_size':
            '262100',
            'mlperf gpu_copy_streams':
            '1',
            'mlperf gpu_inference_streams':
            '1',
            'mlperf input_dtype':
            'int8',
            'mlperf input_format':
            'chw4',
            'mlperf precision':
            'int8',
            'mlperf scenario':
            'Scenario.Server',
            'mlperf server_target_qps':
            '2100000',
            'mlperf system':
            'A100-SXM4-40GBx8',
            'mlperf tensor_path':
            '${PREPROCESSED_DATA_DIR}/criteo/full_recalib/'
            'numeric_int8_chw4.npy,'
            '${PREPROCESSED_DATA_DIR}/criteo/full_recalib/'
            'categorical_int32.npy',
            'mlperf use_graphs':
            'False',
            'mlperf config_name':
            'A100-SXM4-40GBx8_dlrm_Server',
            'mlperf config_ver':
            'custom_k_99_MaxP',
            'mlperf accuracy_level':
            '99%',
            'mlperf optimization_level':
            'plugin-enabled',
            'mlperf inference_server':
            'custom',
            'mlperf system_id':
            'A100-SXM4-40GBx8',
            'mlperf use_cpu':
            'False',
            'mlperf power_limit':
            'None',
            'mlperf cpu_freq':
            'None',
            'mlperf gpu_num_bundles':
            '2',
            'mlperf log_dir':
            '/work/build/logs/2021.11.13-04.12.53',
            'mlperf SUT name':
            'DLRM SERVER',
            'mlperf Scenario':
            'Server',
            'mlperf Mode':
            'PerformanceOnly',
            'mlperf Scheduled samples per second':
            '2102380.29',
            'mlperf Result is':
            'VALID',
            'mlperf Performance constraints satisfied':
            'Yes',
            'mlperf Min duration satisfied':
            'Yes',
            'mlperf Min queries satisfied':
            'Yes',
            'mlperf Completed samples per second':
            '2102359.14',
            'mlperf Min latency (ns)':
            '159697',
            'mlperf Max latency (ns)':
            '12452412',
            'mlperf Mean latency (ns)':
            '1375416',
            'mlperf 50.00 percentile latency (ns)':
            '1285505',
            'mlperf 90.00 percentile latency (ns)':
            '1984044',
            'mlperf 95.00 percentile latency (ns)':
            '2319343',
            'mlperf 97.00 percentile latency (ns)':
            '2568660',
            'mlperf 99.00 percentile latency (ns)':
            '3507998',
            'mlperf 99.90 percentile latency (ns)':
            '5628323',
            'mlperf samples_per_query':
            '1',
            'mlperf target_latency (ns)':
            '30000000',
            'mlperf max_async_queries':
            '0',
            'mlperf min_duration (ms)':
            '60000',
            'mlperf max_duration (ms)':
            '0',
            'mlperf min_query_count':
            '1',
            'mlperf max_query_count':
            '0',
            'mlperf qsl_rng_seed':
            '1624344308455410291',
            'mlperf sample_index_rng_seed':
            '517984244576520566',
            'mlperf schedule_rng_seed':
            '10051496985653635065',
            'mlperf accuracy_log_rng_seed':
            '0',
            'mlperf accuracy_log_probability':
            '0',
            'mlperf accuracy_log_sampling_target':
            '0',
            'mlperf print_timestamps':
            '0',
            'mlperf performance_issue_unique':
            '0',
            'mlperf performance_issue_same':
            '0',
            'mlperf performance_issue_same_index':
            '0',
            'mlperf performance_sample_count':
            '204800',
            'version':
            'v1.1'
        }
        golden = Sample(metric='throughput',
                        value=2102380.0,
                        unit='samples/s',
                        metadata=metadata)
        self.assertSamplesEqualUpToTimestamp(golden, samples[0])

        samples = mlperf_inference_benchmark.MakeAccuracySamplesFromOutput(
            {'version': 'v1.1'}, self.dlrm_accuracy_contents)
        metadata = {
            'Threshold':
            79.448,
            'mlperf accuracy_level':
            '99%',
            'mlperf benchmark':
            'Benchmark.DLRM',
            'mlperf coalesced_tensor':
            'True',
            'mlperf config_name':
            'A100-SXM4-40GBx8_dlrm_Server',
            'mlperf config_ver':
            'custom_k_99_MaxP',
            'mlperf cpu_freq':
            'None',
            'mlperf fast':
            'True',
            'mlperf gpu_batch_size':
            '262100',
            'mlperf gpu_copy_streams':
            '1',
            'mlperf gpu_inference_streams':
            '1',
            'mlperf gpu_num_bundles':
            '2',
            'mlperf inference_server':
            'custom',
            'mlperf input_dtype':
            'int8',
            'mlperf input_format':
            'chw4',
            'mlperf log_dir':
            '/work/build/logs/2021.11.13-06.24.26',
            'mlperf optimization_level':
            'plugin-enabled',
            'mlperf power_limit':
            'None',
            'mlperf precision':
            'int8',
            'mlperf scenario':
            'Scenario.Server',
            'mlperf server_target_qps':
            '2100000',
            'mlperf system':
            'A100-SXM4-40GBx8',
            'mlperf system_id':
            'A100-SXM4-40GBx8',
            'mlperf tensor_path':
            '${PREPROCESSED_DATA_DIR}/criteo/full_recalib/'
            'numeric_int8_chw4.npy,'
            '${PREPROCESSED_DATA_DIR}/criteo/full_recalib/'
            'categorical_int32.npy',
            'mlperf test_mode':
            'AccuracyOnly',
            'mlperf use_cpu':
            'False',
            'mlperf use_graphs':
            'False',
            'version':
            'v1.1'
        }
        golden = Sample(metric='accuracy',
                        value=80.185,
                        unit='%',
                        metadata=metadata)
        self.assertSamplesEqualUpToTimestamp(golden, samples[0])
        print(samples[0])
 def testTpuResults(self):
     samples = resnet_benchmark._MakeSamplesFromOutput(
         self.metadata, self.tpu_contents)
     golden = [
         Sample(
             'Loss', 6.3166966, '', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 0.9999200703381026,
                 'duration': 423,
                 'step': 1251
             }),
         Sample(
             'Loss', 5.30481, '', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 1.9998401406762052,
                 'duration': 783,
                 'step': 2502
             }),
         Sample(
             'Global Steps Per Second', 3.47162, 'global_steps/sec', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 1.9998401406762052,
                 'duration': 783,
                 'step': 2502
             }),
         Sample(
             'Examples Per Second', 3554.94, 'examples/sec', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 1.9998401406762052,
                 'duration': 783,
                 'step': 2502
             }),
         Sample(
             'Loss', 4.3771253, '', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 2.9997602110143076,
                 'duration': 1139,
                 'step': 3753
             }),
         Sample(
             'Global Steps Per Second', 3.51319, 'global_steps/sec', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 2.9997602110143076,
                 'duration': 1139,
                 'step': 3753
             }),
         Sample(
             'Examples Per Second', 3597.51, 'examples/sec', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 2.9997602110143076,
                 'duration': 1139,
                 'step': 3753
             }),
         Sample(
             'Loss', 3.9155605, '', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 3.996483094876509,
                 'duration': 1486,
                 'step': 5000
             }),
         Sample(
             'Global Steps Per Second', 3.60089, 'global_steps/sec', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 3.996483094876509,
                 'duration': 1486,
                 'step': 5000
             }),
         Sample(
             'Examples Per Second', 3687.31, 'examples/sec', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 3.996483094876509,
                 'duration': 1486,
                 'step': 5000
             }),
         Sample(
             'Loss', 3.774139, '', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 4.996403165214612,
                 'duration': 1968,
                 'step': 6251
             }),
         Sample(
             'Loss', 3.2543745, '', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 5.996323235552714,
                 'duration': 2327,
                 'step': 7502
             }),
         Sample(
             'Global Steps Per Second', 3.48231, 'global_steps/sec', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 5.996323235552714,
                 'duration': 2327,
                 'step': 7502
             }),
         Sample(
             'Examples Per Second', 3565.89, 'examples/sec', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 5.996323235552714,
                 'duration': 2327,
                 'step': 7502
             }),
         Sample(
             'Loss', 3.1598916, '', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 6.996243305890816,
                 'duration': 2685,
                 'step': 8753
             }),
         Sample(
             'Global Steps Per Second', 3.49526, 'global_steps/sec', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 6.996243305890816,
                 'duration': 2685,
                 'step': 8753
             }),
         Sample(
             'Examples Per Second', 3579.15, 'examples/sec', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 6.996243305890816,
                 'duration': 2685,
                 'step': 8753
             }),
         Sample(
             'Loss', 3.054053, '', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 7.992966189753018,
                 'duration': 3031,
                 'step': 10000
             }),
         Sample(
             'Global Steps Per Second', 3.60296, 'global_steps/sec', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 7.992966189753018,
                 'duration': 3031,
                 'step': 10000
             }),
         Sample(
             'Examples Per Second', 3689.43, 'examples/sec', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 7.992966189753018,
                 'duration': 3031,
                 'step': 10000
             }),
         Sample(
             'Eval Loss', 3.636791, '', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 3.996483094876509,
                 'duration': 1539,
                 'step': 5000
             }),
         Sample(
             'Top 1 Accuracy', 35.95581, '%', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 3.996483094876509,
                 'duration': 1539,
                 'step': 5000
             }),
         Sample(
             'Top 5 Accuracy', 63.112384, '%', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 3.996483094876509,
                 'duration': 1539,
                 'step': 5000
             }),
         Sample(
             'Eval Loss', 3.0327156, '', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 7.992966189753018,
                 'duration': 3082,
                 'step': 10000
             }),
         Sample(
             'Top 1 Accuracy', 49.57479, '%', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 7.992966189753018,
                 'duration': 3082,
                 'step': 10000
             }),
         Sample(
             'Top 5 Accuracy', 75.47607400000001, '%', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 7.992966189753018,
                 'duration': 3082,
                 'step': 10000
             }),
         Sample('Elapsed Seconds', 34890, 'seconds',
                {'num_examples_per_epoch': 1251.1})
     ]
     self.assertEqual(samples, golden)
 def testGpuResults(self):
     samples = resnet_benchmark._MakeSamplesFromOutput(
         self.metadata, self.gpu_contents)
     golden = [
         Sample(
             'Loss', 7.98753, '', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 0,
                 'step': 0,
                 'duration': 35
             }),
         Sample(
             'Global Steps Per Second', 2.52565, 'global_steps/sec', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 0,
                 'step': 0,
                 'duration': 35
             }),
         Sample(
             'Loss', 7.9780626, '', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 0.07992966189753017,
                 'step': 100,
                 'duration': 75
             }),
         Sample(
             'Global Steps Per Second', 2.75627, 'global_steps/sec', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 0.07992966189753017,
                 'step': 100,
                 'duration': 75
             }),
         Sample(
             'Loss', 7.9498286, '', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 0.15985932379506035,
                 'step': 200,
                 'duration': 111
             }),
         Sample(
             'Global Steps Per Second', 2.72345, 'global_steps/sec', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 0.15985932379506035,
                 'step': 200,
                 'duration': 111
             }),
         Sample(
             'Loss', 7.9504285, '', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 0.23978898569259055,
                 'step': 300,
                 'duration': 148
             }),
         Sample(
             'Global Steps Per Second', 2.74449, 'global_steps/sec', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 0.23978898569259055,
                 'step': 300,
                 'duration': 148
             }),
         Sample(
             'Loss', 7.9720306, '', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 0.3197186475901207,
                 'step': 400,
                 'duration': 184
             }),
         Sample(
             'Global Steps Per Second', 2.68677, 'global_steps/sec', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 0.3197186475901207,
                 'step': 400,
                 'duration': 184
             }),
         Sample(
             'Loss', 7.9649105, '', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 0.3996483094876509,
                 'step': 500,
                 'duration': 221
             }),
         Sample(
             'Eval Loss', 7.8702474, '', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 1.5985932379506036,
                 'step': 2000,
                 'duration': 920
             }),
         Sample(
             'Top 1 Accuracy', 0.5941901399999999, '%', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 1.5985932379506036,
                 'step': 2000,
                 'duration': 920
             }),
         Sample(
             'Top 5 Accuracy', 2.1947023, '%', {
                 'num_examples_per_epoch': 1251.1,
                 'epoch': 1.5985932379506036,
                 'step': 2000,
                 'duration': 920
             }),
         Sample('Elapsed Seconds', 920, 'seconds',
                {'num_examples_per_epoch': 1251.1})
     ]
     self.assertEqual(samples, golden)
    def testTrainResults(self):
        samples = mlperf_inference_benchmark.MakePerformanceSamplesFromOutput(
            {'version': 'v1.1'}, self.performance_contents)
        metadata = {
            'loadgen_version': '1.1 @ ed7044310a',
            'loadgen_build_date_local': '2022-04-12T14:16:07.377200',
            'loadgen_build_date_utc': '2022-04-12T14:16:07.377209',
            'loadgen_git_commit_date': '2021-08-11T17:36:26+01:00',
            'loadgen_git_status_message': '',
            'test_datetime': '2022-04-12T14:37:18Z',
            'sut_name': 'BERT SERVER',
            'qsl_name': 'BERT QSL',
            'qsl_reported_total_count': 10833,
            'qsl_reported_performance_count': 10833,
            'requested_scenario': 'Server',
            'requested_test_mode': 'PerformanceOnly',
            'requested_server_target_qps': 360,
            'requested_server_target_latency_ns': 130000000,
            'requested_server_target_latency_percentile': 0.99,
            'requested_server_coalesce_queries': True,
            'requested_server_find_peak_qps_decimals_of_precision': 1,
            'requested_server_find_peak_qps_boundary_step_size': 1,
            'requested_server_max_async_queries': 0,
            'requested_server_num_issue_query_threads': 0,
            'requested_min_duration_ms': 600000,
            'requested_max_duration_ms': 0,
            'requested_min_query_count': 270336,
            'requested_max_query_count': 0,
            'requested_qsl_rng_seed': 1624344308455410291,
            'requested_sample_index_rng_seed': 517984244576520566,
            'requested_schedule_rng_seed': 10051496985653635065,
            'requested_accuracy_log_rng_seed': 0,
            'requested_accuracy_log_probability': 0,
            'requested_accuracy_log_sampling_target': 0,
            'requested_print_timestamps': False,
            'requested_performance_issue_unique': False,
            'requested_performance_issue_same': False,
            'requested_performance_issue_same_index': 0,
            'requested_performance_sample_count_override': 10833,
            'effective_scenario': 'Server',
            'effective_test_mode': 'PerformanceOnly',
            'effective_samples_per_query': 1,
            'effective_target_qps': 360,
            'effective_target_latency_ns': 130000000,
            'effective_target_latency_percentile': 0.99,
            'effective_max_async_queries': 0,
            'effective_target_duration_ms': 600000,
            'effective_min_duration_ms': 600000,
            'effective_max_duration_ms': 0,
            'effective_min_query_count': 270336,
            'effective_max_query_count': 0,
            'effective_min_sample_count': 270336,
            'effective_qsl_rng_seed': 1624344308455410291,
            'effective_sample_index_rng_seed': 517984244576520566,
            'effective_schedule_rng_seed': 10051496985653635065,
            'effective_accuracy_log_rng_seed': 0,
            'effective_accuracy_log_probability': 0,
            'effective_accuracy_log_sampling_target': 0,
            'effective_print_timestamps': False,
            'effective_performance_issue_unique': False,
            'effective_performance_issue_same': False,
            'effective_performance_issue_same_index': 0,
            'effective_performance_sample_count': 10833,
            'generic_message': 'Starting performance mode',
            'generated_query_count': 270336,
            'generated_samples_per_query': 1,
            'generated_query_duration': 751081766900,
            'logger_swap_request_slots_retry_count': 0,
            'logger_swap_request_slots_retry_retry_count': 0,
            'logger_swap_request_slots_retry_reencounter_count': 0,
            'logger_start_reading_entries_retry_count': 0,
            'logger_tls_total_log_cas_fail_count': 0,
            'logger_tls_total_swap_buffers_slot_retry_count': 0,
            'power_begin': '04-12-2022 14:37:18.983',
            'power_end': '04-12-2022 14:51:28.239',
            'result_validity': 'INVALID',
            'result_perf_constraints_met': False,
            'result_min_duration_met': True,
            'result_min_queries_met': True,
            'result_invalid_reason': 'Reduce target QPS to improve latency. ',
            'result_scheduled_samples_per_sec': 359.93,
            'result_completed_samples_per_sec': 321.067,
            'result_min_latency_ns': 6669619,
            'result_max_latency_ns': 846285311653,
            'result_mean_latency_ns': 48257222252,
            'result_50.00_percentile_latency_ns': 45675229779,
            'result_90.00_percentile_latency_ns': 82432475255,
            'result_95.00_percentile_latency_ns': 87117424239,
            'result_97.00_percentile_latency_ns': 89161842381,
            'result_99.00_percentile_latency_ns': 90813602755,
            'result_99.90_percentile_latency_ns': 735809900637,
            'version': 'v1.1'
        }
        golden = Sample(metric='throughput',
                        value=321.067,
                        unit='samples per second',
                        metadata=metadata)
        sample = samples[0]
        sample.metadata.pop('loaded_qsl_set')
        sample.metadata.pop('loadgen_git_log_message')
        sample.metadata.pop('loadgen_file_sha1')
        self.assertSamplesEqualUpToTimestamp(golden, sample)

        samples = mlperf_inference_benchmark.MakeAccuracySamplesFromOutput(
            {'version': 'v1.1'}, self.bert_accuracy_contents)
        metadata = {
            'mlperf benchmark':
            'Benchmark.BERT',
            'mlperf coalesced_tensor':
            'True',
            'mlperf gpu_batch_size':
            '64',
            'mlperf gpu_copy_streams':
            '1',
            'mlperf gpu_inference_streams':
            '2',
            'mlperf input_dtype':
            'int32',
            'mlperf input_format':
            'linear',
            'mlperf precision':
            'int8',
            'mlperf scenario':
            'Scenario.Server',
            'mlperf server_target_qps':
            '3100',
            'mlperf system':
            'A100-SXM4-40GBx1',
            'mlperf tensor_path':
            '${PREPROCESSED_DATA_DIR}/squad_tokenized/input_ids.npy,'
            '${PREPROCESSED_DATA_DIR}/squad_tokenized/segment_ids.npy,'
            '${PREPROCESSED_DATA_DIR}/squad_tokenized/input_mask.npy',
            'mlperf use_graphs':
            'True',
            'mlperf config_name':
            'A100-SXM4-40GBx1_bert_Server',
            'mlperf config_ver':
            'custom_k_99_MaxP',
            'mlperf accuracy_level':
            '99%',
            'mlperf optimization_level':
            'plugin-enabled',
            'mlperf inference_server':
            'custom',
            'mlperf system_id':
            'A100-SXM4-40GBx1',
            'mlperf use_cpu':
            'False',
            'mlperf power_limit':
            'None',
            'mlperf cpu_freq':
            'None',
            'mlperf test_mode':
            'AccuracyOnly',
            'mlperf gpu_num_bundles':
            '2',
            'mlperf log_dir':
            '/work/build/logs/2021.11.09-05.18.28',
            'Threshold':
            89.965,
            'version':
            'v1.1'
        }
        golden = Sample(metric='accuracy',
                        value=90.376,
                        unit='%',
                        metadata=metadata)
        self.assertSamplesEqualUpToTimestamp(golden, samples[0])

        samples = mlperf_inference_benchmark.MakeAccuracySamplesFromOutput(
            {'version': 'v1.1'}, self.dlrm_accuracy_contents)
        metadata = {
            'Threshold':
            79.448,
            'mlperf accuracy_level':
            '99%',
            'mlperf benchmark':
            'Benchmark.DLRM',
            'mlperf coalesced_tensor':
            'True',
            'mlperf config_name':
            'A100-SXM4-40GBx8_dlrm_Server',
            'mlperf config_ver':
            'custom_k_99_MaxP',
            'mlperf cpu_freq':
            'None',
            'mlperf gpu_batch_size':
            '262100',
            'mlperf gpu_copy_streams':
            '1',
            'mlperf gpu_inference_streams':
            '1',
            'mlperf gpu_num_bundles':
            '2',
            'mlperf inference_server':
            'custom',
            'mlperf input_dtype':
            'int8',
            'mlperf input_format':
            'chw4',
            'mlperf log_dir':
            '/work/build/logs/2021.11.13-06.24.26',
            'mlperf optimization_level':
            'plugin-enabled',
            'mlperf power_limit':
            'None',
            'mlperf precision':
            'int8',
            'mlperf scenario':
            'Scenario.Server',
            'mlperf server_target_qps':
            '2100000',
            'mlperf system':
            'A100-SXM4-40GBx8',
            'mlperf system_id':
            'A100-SXM4-40GBx8',
            'mlperf tensor_path':
            '${PREPROCESSED_DATA_DIR}/criteo/full_recalib/'
            'numeric_int8_chw4.npy,'
            '${PREPROCESSED_DATA_DIR}/criteo/full_recalib/'
            'categorical_int32.npy',
            'mlperf test_mode':
            'AccuracyOnly',
            'mlperf use_cpu':
            'False',
            'mlperf use_graphs':
            'False',
            'version':
            'v1.1'
        }
        golden = Sample(metric='accuracy',
                        value=80.185,
                        unit='%',
                        metadata=metadata)
        self.assertSamplesEqualUpToTimestamp(golden, samples[0])
Beispiel #10
0
 def testTrainResults(self):
     samples = mlperf_inference_benchmark.MakeSamplesFromOutput(
         {'version': 'v1.1'}, self.contents)
     metadata = {
         'mlperf 50.00 percentile latency (ns)':
         '40533329',
         'mlperf 90.00 percentile latency (ns)':
         '51387550',
         'mlperf 95.00 percentile latency (ns)':
         '54956149',
         'mlperf 97.00 percentile latency (ns)':
         '57792422',
         'mlperf 99.00 percentile latency (ns)':
         '82056764',
         'mlperf 99.90 percentile latency (ns)':
         '543294654940',
         'mlperf Completed samples per second':
         '3102.49',
         'mlperf Max latency (ns)':
         '605456500256',
         'mlperf Mean latency (ns)':
         '3037717062',
         'mlperf Min duration satisfied':
         'Yes',
         'mlperf Min latency (ns)':
         '4126840',
         'mlperf Min queries satisfied':
         'Yes',
         'mlperf Mode':
         'PerformanceOnly',
         'mlperf Performance constraints satisfied':
         'Yes',
         'mlperf Result is':
         'VALID',
         'mlperf SUT name':
         'BERT SERVER',
         'mlperf Scenario':
         'Server',
         'mlperf Scheduled samples per second':
         '3102.76',
         'mlperf accuracy_level':
         '99%',
         'mlperf accuracy_log_probability':
         '0',
         'mlperf accuracy_log_rng_seed':
         '0',
         'mlperf accuracy_log_sampling_target':
         '0',
         'mlperf active_sms':
         '60',
         'mlperf benchmark':
         'Benchmark.BERT',
         'mlperf bert_opt_seqlen':
         '384',
         'mlperf coalesced_tensor':
         'True',
         'mlperf config_name':
         'A100-SXM4-40GBx1_bert_Server',
         'mlperf config_ver':
         'custom_k_99_MaxP',
         'mlperf cpu_freq':
         'None',
         'mlperf enable_interleaved':
         'False',
         'mlperf gpu_batch_size':
         '64',
         'mlperf gpu_copy_streams':
         '1',
         'mlperf gpu_inference_streams':
         '2',
         'mlperf gpu_num_bundles':
         '2',
         'mlperf graphs_max_seqlen':
         '200',
         'mlperf inference_server':
         'custom',
         'mlperf input_dtype':
         'int32',
         'mlperf input_format':
         'linear',
         'mlperf log_dir':
         '/work/build/logs/2021.10.27-20.51.11',
         'mlperf max_async_queries':
         '0',
         'mlperf max_duration (ms)':
         '0',
         'mlperf max_query_count':
         '0',
         'mlperf min_duration (ms)':
         '600000',
         'mlperf min_query_count':
         '270336',
         'mlperf optimization_level':
         'plugin-enabled',
         'mlperf performance_issue_same':
         '0',
         'mlperf performance_issue_same_index':
         '0',
         'mlperf performance_issue_unique':
         '0',
         'mlperf performance_sample_count':
         '10833',
         'mlperf power_limit':
         'None',
         'mlperf precision':
         'int8',
         'mlperf print_timestamps':
         '0',
         'mlperf qsl_rng_seed':
         '1624344308455410291',
         'mlperf sample_index_rng_seed':
         '517984244576520566',
         'mlperf samples_per_query':
         '1',
         'mlperf scenario':
         'Scenario.Server',
         'mlperf schedule_rng_seed':
         '10051496985653635065',
         'mlperf server_num_issue_query_threads':
         '1',
         'mlperf server_target_qps':
         '3100',
         'mlperf soft_drop':
         '0.99',
         'mlperf system':
         'A100-SXM4-40GBx1',
         'mlperf system_id':
         'A100-SXM4-40GBx1',
         'mlperf target_latency (ns)':
         '130000000',
         'mlperf tensor_path':
         '${PREPROCESSED_DATA_DIR}/squad_tokenized/input_ids.npy,'
         '${PREPROCESSED_DATA_DIR}/squad_tokenized/segment_ids.npy,'
         '${PREPROCESSED_DATA_DIR}/squad_tokenized/input_mask.npy',
         'mlperf use_cpu':
         'False',
         'mlperf use_graphs':
         'True',
         'version':
         'v1.1'
     }
     golden = Sample(metric='throughput',
                     value=3102.76,
                     unit='samples/s',
                     metadata=metadata)
     self.assertSamplesEqualUpToTimestamp(golden, samples[0])