def benchmark_using_loadgen(scenario_str, mode_str, samples_in_mem, config_filepath): "Perform the benchmark using python API for the LoadGen librar" scenario = { 'SingleStream': lg.TestScenario.SingleStream, 'MultiStream': lg.TestScenario.MultiStream, 'Server': lg.TestScenario.Server, 'Offline': lg.TestScenario.Offline, }[scenario_str] mode = { 'AccuracyOnly': lg.TestMode.AccuracyOnly, 'PerformanceOnly': lg.TestMode.PerformanceOnly, 'SubmissionRun': lg.TestMode.SubmissionRun, }[mode_str] ts = lg.TestSettings() if (config_filepath): ts.FromConfig(config_filepath, 'random_model_name', scenario_str) ts.scenario = scenario ts.mode = mode sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(dataset_size, samples_in_mem, load_query_samples, unload_query_samples) log_settings = lg.LogSettings() log_settings.enable_trace = False lg.StartTestWithLogSettings(sut, qsl, ts, log_settings) lg.DestroyQSL(qsl) lg.DestroySUT(sut)
def benchmark_using_loadgen(): "Perform the benchmark using python API for the LoadGen library" scenario = { 'SingleStream': lg.TestScenario.SingleStream, 'MultiStream': lg.TestScenario.MultiStream, 'Server': lg.TestScenario.Server, 'Offline': lg.TestScenario.Offline, }[LOADGEN_SCENARIO] mode = { 'AccuracyOnly': lg.TestMode.AccuracyOnly, 'PerformanceOnly': lg.TestMode.PerformanceOnly, 'SubmissionRun': lg.TestMode.SubmissionRun, }[LOADGEN_MODE] ts = lg.TestSettings() ts.FromConfig(MLPERF_CONF_PATH, MODEL_NAME, LOADGEN_SCENARIO) ts.FromConfig(USER_CONF_PATH, MODEL_NAME, LOADGEN_SCENARIO) ts.scenario = scenario ts.mode = mode sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(LOADGEN_DATASET_SIZE, LOADGEN_BUFFER_SIZE, load_query_samples, unload_query_samples) log_settings = lg.LogSettings() log_settings.enable_trace = False lg.StartTestWithLogSettings(sut, qsl, ts, log_settings) lg.DestroyQSL(qsl) lg.DestroySUT(sut)
def benchmark_using_loadgen(): "Perform the benchmark using python API for the LoadGen library" global model # Load the [cached] Torch model torchvision_version = '' # master by default try: import torchvision torchvision_version = ':v' + torchvision.__version__ except Exception: pass model = torch.hub.load('pytorch/vision' + torchvision_version, MODEL_NAME, pretrained=True) model.eval() # move the model to GPU for speed if available if USE_CUDA: model.to('cuda') scenario = { 'SingleStream': lg.TestScenario.SingleStream, 'MultiStream': lg.TestScenario.MultiStream, 'Server': lg.TestScenario.Server, 'Offline': lg.TestScenario.Offline, }[LOADGEN_SCENARIO] mode = { 'AccuracyOnly': lg.TestMode.AccuracyOnly, 'PerformanceOnly': lg.TestMode.PerformanceOnly, 'SubmissionRun': lg.TestMode.SubmissionRun, }[LOADGEN_MODE] ts = lg.TestSettings() ts.FromConfig(MLPERF_CONF_PATH, MODEL_NAME, LOADGEN_SCENARIO) ts.FromConfig(USER_CONF_PATH, MODEL_NAME, LOADGEN_SCENARIO) ts.scenario = scenario ts.mode = mode if LOADGEN_MULTISTREAMNESS: ts.multi_stream_samples_per_query = int(LOADGEN_MULTISTREAMNESS) if LOADGEN_COUNT_OVERRIDE: ts.min_query_count = int(LOADGEN_COUNT_OVERRIDE) ts.max_query_count = int(LOADGEN_COUNT_OVERRIDE) sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(LOADGEN_DATASET_SIZE, LOADGEN_BUFFER_SIZE, load_query_samples, unload_query_samples) log_settings = lg.LogSettings() log_settings.enable_trace = False lg.StartTestWithLogSettings(sut, qsl, ts, log_settings) lg.DestroyQSL(qsl) lg.DestroySUT(sut)
def main(): args = get_args() if args.backend == "pytorch": assert not args.quantized, "Quantized model is only supported by onnxruntime backend!" assert not args.profile, "Profiling is only supported by onnxruntime backend!" from pytorch_SUT import get_pytorch_sut sut = get_pytorch_sut() elif args.backend == "tf": assert not args.quantized, "Quantized model is only supported by onnxruntime backend!" assert not args.profile, "Profiling is only supported by onnxruntime backend!" from tf_SUT import get_tf_sut sut = get_tf_sut() elif args.backend == "tf_estimator": assert not args.quantized, "Quantized model is only supported by onnxruntime backend!" assert not args.profile, "Profiling is only supported by onnxruntime backend!" from tf_estimator_SUT import get_tf_estimator_sut sut = get_tf_estimator_sut() elif args.backend == "onnxruntime": from onnxruntime_SUT import get_onnxruntime_sut sut = get_onnxruntime_sut(args) else: raise ValueError("Unknown backend: {:}".format(args.backend)) settings = lg.TestSettings() settings.scenario = scenario_map[args.scenario] settings.FromConfig(args.mlperf_conf, "bert", args.scenario) settings.FromConfig(args.user_conf, "bert", args.scenario) if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly else: settings.mode = lg.TestMode.PerformanceOnly log_path = "build/logs" if not os.path.exists(log_path): os.makedirs(log_path) log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = log_path log_output_settings.copy_summary_to_stdout = True log_settings = lg.LogSettings() log_settings.log_output = log_output_settings print("Running LoadGen test...") lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) if args.accuracy: cmd = "python3 accuracy-squad.py" subprocess.check_call(cmd, shell=True) print("Done!") print("Destroying SUT...") lg.DestroySUT(sut.sut) print("Destroying QSL...") lg.DestroyQSL(sut.qsl.qsl)
def main(argv): settings = mlperf_loadgen.TestSettings() settings.scenario = mlperf_loadgen.TestScenario.SingleStream settings.mode = mlperf_loadgen.TestMode.PerformanceOnly sut = mlperf_loadgen.ConstructSUT( issue_query, flush_queries, process_latencies) qsl = mlperf_loadgen.ConstructQSL( 1024 * 1024, 1024, load_samples_to_ram, unload_samples_from_ram) mlperf_loadgen.StartTest(sut, qsl, settings) mlperf_loadgen.DestroyQSL(qsl) mlperf_loadgen.DestroySUT(sut)
def main(argv): settings = mlperf_loadgen.TestSettings() settings.scenario = mlperf_loadgen.TestScenario.Offline settings.mode = mlperf_loadgen.TestMode.PerformanceOnly settings.offline_expected_qps = 1000 sut = mlperf_loadgen.ConstructSUT( issue_query, flush_queries, process_latencies) qsl = mlperf_loadgen.ConstructQSL( 1024, 128, load_samples_to_ram, unload_samples_from_ram) mlperf_loadgen.StartTest(sut, qsl, settings) mlperf_loadgen.DestroyQSL(qsl) mlperf_loadgen.DestroySUT(sut)
def main(argv): settings = mlperf_loadgen.TestSettings() settings.scenario = mlperf_loadgen.TestScenario.MultiStream settings.mode = mlperf_loadgen.TestMode.SubmissionRun settings.samples_per_query = 4 settings.target_qps = 1000 settings.target_latency_ns = 1000000000 sut = mlperf_loadgen.ConstructSUT(issue_query) qsl = mlperf_loadgen.ConstructQSL(1024, 128, load_samples_to_ram, unload_samples_from_ram) mlperf_loadgen.StartTest(sut, qsl, settings) mlperf_loadgen.DestroyQSL(qsl) mlperf_loadgen.DestroySUT(sut)
def main(argv): settings = mlperf_loadgen.TestSettings() settings.scenario = mlperf_loadgen.TestScenario.SingleStream settings.mode = mlperf_loadgen.TestMode.AccuracyOnly settings.single_stream_expected_latency_ns = 1000000 settings.min_query_count = 100 settings.min_duration_ms = 10000 sut = mlperf_loadgen.ConstructSUT(issue_query, flush_queries, process_latencies) qsl = mlperf_loadgen.ConstructQSL(1024, 128, load_samples_to_ram, unload_samples_from_ram) mlperf_loadgen.StartTest(sut, qsl, settings) mlperf_loadgen.DestroyQSL(qsl) mlperf_loadgen.DestroySUT(sut)
def main(): args = get_args() batch_size = args.offline_batch_size if args.scenario == "Offline" else 1 settings = lg.TestSettings() settings.scenario = scenario_map[args.scenario] settings.FromConfig(args.mlperf_conf, "rnnt", args.scenario) settings.FromConfig(args.user_conf, "rnnt", args.scenario) issued_query_count = None if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly issued_query_count = 2513 else: settings.mode = lg.TestMode.PerformanceOnly issued_query_count = settings.min_query_count log_path = args.log_dir os.makedirs(log_path, exist_ok=True) log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = log_path log_output_settings.copy_summary_to_stdout = True log_settings = lg.LogSettings() log_settings.log_output = log_output_settings if args.backend == "pytorch": from pytorch_SUT import PytorchSUT sut = PytorchSUT(args.pytorch_config_toml, args.pytorch_checkpoint, args.dataset_dir, args.manifest, args.perf_count, issued_query_count, args.scenario, args.machine_conf, batch_size, args.cores_for_loadgen, args.cores_per_instance, args.debug, args.cosim, args.profile, args.ipex, args.bf16, args.warmup) else: raise ValueError("Unknown backend: {:}".format(args.backend)) print("Running Loadgen test...") lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) if args.accuracy: cmd = f"python3 accuracy_eval.py --log_dir {log_path} --dataset_dir {args.dataset_dir} --manifest {args.manifest}" print(f"Running accuracy script: {cmd}") subprocess.check_call(cmd, shell=True) lg.DestroySUT(sut.sut) print("Done!")
def main(): args = get_args() if args.backend == "pytorch": from pytorch_SUT import get_pytorch_sut sut = get_pytorch_sut(args.model_dir, args.preprocessed_data_dir, args.performance_count) elif args.backend == "onnxruntime": from onnxruntime_SUT import get_onnxruntime_sut sut = get_onnxruntime_sut(args.onnx_model, args.preprocessed_data_dir, args.performance_count) else: raise ValueError("Unknown backend: {:}".format(args.backend)) settings = lg.TestSettings() settings.scenario = scenario_map[args.scenario] settings.FromConfig(args.mlperf_conf, "3d-unet", args.scenario) settings.FromConfig(args.user_conf, "3d-unet", args.scenario) if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly else: settings.mode = lg.TestMode.PerformanceOnly log_path = "build/logs" if not os.path.exists(log_path): os.makedirs(log_path) log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = log_path log_output_settings.copy_summary_to_stdout = True log_settings = lg.LogSettings() log_settings.log_output = log_output_settings print("Running Loadgen test...") lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) if args.accuracy: print("Running accuracy script...") cmd = "python3 brats_eval.py" subprocess.check_call(cmd, shell=True) print("Done!") print("Destroying SUT...") lg.DestroySUT(sut.sut) print("Destroying QSL...") lg.DestroyQSL(sut.qsl.qsl)
def main(argv): settings = mlperf_loadgen.TestSettings() settings.scenario = mlperf_loadgen.TestScenario.Server settings.mode = mlperf_loadgen.TestMode.PerformanceOnly settings.server_target_qps = 100 settings.server_target_latency_ns = 100000000 settings.min_query_count = 100 settings.min_duration_ms = 10000 sut = mlperf_loadgen.ConstructSUT(issue_query, flush_queries, process_latencies) qsl = mlperf_loadgen.ConstructQSL(1024, 128, load_samples_to_ram, unload_samples_from_ram) mlperf_loadgen.StartTest(sut, qsl, settings) mlperf_loadgen.DestroyQSL(qsl) mlperf_loadgen.DestroySUT(sut)
def main(argv): settings = mlperf_loadgen.TestSettings() settings.scenario = mlperf_loadgen.TestScenario.SingleStream settings.mode = mlperf_loadgen.TestMode.PerformanceOnly settings.single_stream_expected_latency_ns = 1000000 settings.enable_spec_overrides = True settings.override_target_latency_ns = 100000000 settings.override_min_query_count = 100 settings.override_min_duration_ms = 10000 sut = mlperf_loadgen.ConstructSUT(issue_query, process_latencies) qsl = mlperf_loadgen.ConstructQSL(1024, 128, load_samples_to_ram, unload_samples_from_ram) mlperf_loadgen.StartTest(sut, qsl, settings) mlperf_loadgen.DestroyQSL(qsl) mlperf_loadgen.DestroySUT(sut)
def benchmark_using_loadgen(): "Perform the benchmark using python API for the LoadGen library" global num_classes global model_output_volume pycuda_context, max_batch_size, input_volume, model_output_volume, num_layers = initialize_predictor( ) num_classes = len(class_labels) scenario = { 'SingleStream': lg.TestScenario.SingleStream, 'MultiStream': lg.TestScenario.MultiStream, 'Server': lg.TestScenario.Server, 'Offline': lg.TestScenario.Offline, }[LOADGEN_SCENARIO] mode = { 'AccuracyOnly': lg.TestMode.AccuracyOnly, 'PerformanceOnly': lg.TestMode.PerformanceOnly, 'SubmissionRun': lg.TestMode.SubmissionRun, }[LOADGEN_MODE] ts = lg.TestSettings() ts.FromConfig(MLPERF_CONF_PATH, MODEL_NAME, LOADGEN_SCENARIO) ts.FromConfig(USER_CONF_PATH, MODEL_NAME, LOADGEN_SCENARIO) ts.scenario = scenario ts.mode = mode if LOADGEN_MULTISTREAMNESS: ts.multi_stream_samples_per_query = int(LOADGEN_MULTISTREAMNESS) if LOADGEN_COUNT_OVERRIDE: ts.min_query_count = int(LOADGEN_COUNT_OVERRIDE) ts.max_query_count = int(LOADGEN_COUNT_OVERRIDE) sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(LOADGEN_DATASET_SIZE, LOADGEN_BUFFER_SIZE, load_query_samples, unload_query_samples) log_settings = lg.LogSettings() log_settings.enable_trace = False lg.StartTestWithLogSettings(sut, qsl, ts, log_settings) lg.DestroyQSL(qsl) lg.DestroySUT(sut) pycuda_context.pop()
def get_test_settings(self) -> lg.TestSettings: settings = lg.TestSettings() settings.min_duration_ms = self._duration_ms or 600000 if self._scenario == "single_stream": settings.scenario = lg.TestScenario.SingleStream settings.min_query_count = (self._query_count or _DEFAULT_SINGLE_STREAM_QUERY_COUNT) settings.single_stream_target_latency_percentile = ( self._target_latency_percentile or _DEFAULT_SINGLE_STREAM_LATENCY_PERCENTILE) settings.single_stream_expected_latency_ns = ( self._target_latency_ns or _DEFAULT_SINGLE_STREAM_TARGET_LATENCY) elif self._scenario == "multi_stream": settings.scenario = lg.TestScenario.MultiStream settings.multi_stream_target_qps = self._qps or _DEFAULT_MULTI_STREAM_QPS settings.min_query_count = (self._query_count or _DEFAULT_MULTI_STREAM_QUERY_COUNT) settings.multi_stream_target_latency_percentile = ( self._target_latency_percentile or _DEFAULT_MULTI_STREAM_LATENCY_PERCENTILE) settings.multi_stream_target_latency_ns = ( self._target_latency_ns or _DEFAULT_MULTI_STREAM_TARGET_LATENCY) elif self._scenario == "server": settings.scenario = lg.TestScenario.Server settings.min_query_count = (self._query_count or _DEFAULT_SERVER_QUERY_COUNT) settings.server_target_qps = self._qps or _DEFAULT_SERVER_QPS settings.server_target_latency_ns = ( self._target_latency_ns or _DEFAULT_SERVER_TARGET_LATENCY) settings.server_target_latency_percentile = ( self._target_latency_percentile or _DEFAULT_SERVER_LATENCY_PERCENTILE) else: raise ValueError("Unsupported scenario.") settings.mode = lg.TestMode.PerformanceOnly return settings
def main(argv): del argv settings = mlperf_loadgen.TestSettings() settings.scenario = mlperf_loadgen.TestScenario.MultiStreamFree settings.mode = mlperf_loadgen.TestMode.PerformanceOnly settings.multi_stream_target_latency_ns = 100000000 settings.multi_stream_samples_per_query = 4 settings.multi_stream_max_async_queries = 2 settings.min_query_count = 100 settings.min_duration_ms = 10000 sut = mlperf_loadgen.ConstructSUT(issue_query, flush_queries, process_latencies) qsl = mlperf_loadgen.ConstructQSL(1024, 128, load_samples_to_ram, unload_samples_from_ram) mlperf_loadgen.StartTest(sut, qsl, settings) mlperf_loadgen.DestroyQSL(qsl) mlperf_loadgen.DestroySUT(sut)
def __init__(self, session, ds, optimization_config, onnx_output_names): self.session = session self.threads = optimization_config.threads_num self.max_batchsize = optimization_config.dynamic_batching_size self.ds = ds self.onnx_output_names = onnx_output_names self.guess = None self.cv = threading.Condition() self.done = False self.q_idx = [] self.q_query_id = [] self.workers = [] self.settings = lg.TestSettings() self.settings.scenario = lg.TestScenario.Server self.settings.mode = lg.TestMode.FindPeakPerformance log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = optimization_config.result_path log_output_settings.copy_summary_to_stdout = False self.log_settings = lg.LogSettings() self.log_settings.enable_trace = False self.log_settings.log_output = log_output_settings self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries, self.process_latencies) self.qsl = lg.ConstructQSL(QUERY_COUNT, QUERY_COUNT, ds.load_query_samples, ds.unload_query_samples) self.settings.server_coalesce_queries = True self.settings.server_target_latency_ns = int(optimization_config.max_latency_ms * NANO_SEC / MILLI_SEC) self.settings.server_target_latency_percentile = optimization_config.max_latency_percentile self.settings.min_duration_ms = optimization_config.min_duration_sec * MILLI_SEC # start all threads for _ in range(self.threads): worker = threading.Thread(target=self.handle_tasks, args=(self.cv,)) worker.daemon = True self.workers.append(worker) worker.start() time.sleep(1)
def benchmark_using_loadgen(): "Perform the benchmark using python API for the LoadGen library" global pycuda_context initialize_predictor() scenario = { 'SingleStream': lg.TestScenario.SingleStream, 'MultiStream': lg.TestScenario.MultiStream, 'Server': lg.TestScenario.Server, 'Offline': lg.TestScenario.Offline, }[LOADGEN_SCENARIO] mode = { 'AccuracyOnly': lg.TestMode.AccuracyOnly, 'PerformanceOnly': lg.TestMode.PerformanceOnly, 'SubmissionRun': lg.TestMode.SubmissionRun, }[LOADGEN_MODE] ts = lg.TestSettings() if LOADGEN_CONF_FILE: ts.FromConfig(LOADGEN_CONF_FILE, 'random_model_name', LOADGEN_SCENARIO) ts.scenario = scenario ts.mode = mode if LOADGEN_MULTISTREAMNESS: ts.multi_stream_samples_per_query = int(LOADGEN_MULTISTREAMNESS) sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(LOADGEN_DATASET_SIZE, LOADGEN_BUFFER_SIZE, load_query_samples, unload_query_samples) log_settings = lg.LogSettings() log_settings.enable_trace = False lg.StartTestWithLogSettings(sut, qsl, ts, log_settings) lg.DestroyQSL(qsl) lg.DestroySUT(sut) pycuda_context.pop()
def main(): args = get_args() if args.backend == "pytorch": from pytorch_SUT import PytorchSUT sut = PytorchSUT(args.pytorch_config_toml, args.pytorch_checkpoint, args.dataset_dir, args.manifest, args.perf_count) else: raise ValueError("Unknown backend: {:}".format(args.backend)) settings = lg.TestSettings() settings.scenario = scenario_map[args.scenario] settings.FromConfig(args.mlperf_conf, "rnnt", args.scenario) settings.FromConfig(args.user_conf, "rnnt", args.scenario) if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly else: settings.mode = lg.TestMode.PerformanceOnly log_path = args.log_dir os.makedirs(log_path, exist_ok=True) log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = log_path log_output_settings.copy_summary_to_stdout = True log_settings = lg.LogSettings() log_settings.log_output = log_output_settings print("Running Loadgen test...") lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) if args.accuracy: cmd = f"python3 accuracy_eval.py --log_dir {log_path} --dataset_dir {args.dataset_dir} --manifest {args.manifest}" print(f"Running accuracy script: {cmd}") subprocess.check_call(cmd, shell=True) print("Done!")
def eval_func(model): args = get_args() if args.backend == "pytorch": from pytorch_SUT import get_pytorch_sut sut = get_pytorch_sut(model, args.preprocessed_data_dir, args.performance_count) elif args.backend == "onnxruntime": from onnxruntime_SUT import get_onnxruntime_sut sut = get_onnxruntime_sut(args.model, args.preprocessed_data_dir, args.performance_count) elif args.backend == "tf": from tf_SUT import get_tf_sut sut = get_tf_sut(args.model, args.preprocessed_data_dir, args.performance_count) elif args.backend == "ov": from ov_SUT import get_ov_sut sut = get_ov_sut(args.model, args.preprocessed_data_dir, args.performance_count) else: raise ValueError("Unknown backend: {:}".format(args.backend)) settings = lg.TestSettings() settings.scenario = scenario_map[args.scenario] settings.FromConfig(args.mlperf_conf, "3d-unet", args.scenario) settings.FromConfig(args.user_conf, "3d-unet", args.scenario) if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly else: settings.mode = lg.TestMode.PerformanceOnly log_path = "build/logs" if not os.path.exists(log_path): os.makedirs(log_path) log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = log_path log_output_settings.copy_summary_to_stdout = True log_settings = lg.LogSettings() log_settings.log_output = log_output_settings print("Running Loadgen test...") if args.benchmark: start = time.time() lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) if args.benchmark: end = time.time() if args.accuracy: print("Running accuracy script...") process = subprocess.Popen(['python3', 'accuracy-brats.py'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = process.communicate() print(out) print("Done!", float(err)) if args.benchmark: print('Batch size = 1') print('Latency: %.3f ms' % ((end - start) * 1000 / sut.qsl.count)) print('Throughput: %.3f images/sec' % (sut.qsl.count / (end - start))) print('Accuracy: {mean:.5f}'.format(mean=float(err))) print("Destroying SUT...") lg.DestroySUT(sut.sut) print("Destroying QSL...") lg.DestroyQSL(sut.qsl.qsl) return float(err)
def main(): global last_timeing args = get_args() log.info(args) # find backend backend = get_backend(args.backend) # override image format if given image_format = args.data_format if args.data_format else backend.image_format( ) # --count applies to accuracy mode only and can be used to limit the number of images # for testing. For perf model we always limit count to 200. count = args.count if not count: if not args.accuracy: count = 200 # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[ args.dataset] ds = wanted_dataset(data_path=args.dataset_path, image_list=args.dataset_list, name=args.dataset, image_format=image_format, pre_process=pre_proc, use_cache=args.cache, count=count, **kwargs) # load model to backend model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs) final_results = { "runtime": model.name(), "version": model.version(), "time": int(time.time()), "cmdline": str(args), } # # make one pass over the dataset to validate accuracy # count = ds.get_item_count() # warmup ds.load_query_samples([0]) for _ in range(5): img, _ = ds.get_samples([0]) _ = backend.predict({backend.inputs[0]: img}) ds.unload_query_samples(None) for scenario in args.scenario: runner_map = { lg.TestScenario.SingleStream: RunnerBase, lg.TestScenario.MultiStream: QueueRunner, lg.TestScenario.Server: QueueRunner, lg.TestScenario.Offline: QueueRunner } runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc, max_batchsize=args.max_batchsize) def issue_queries(query_samples): runner.enqueue(query_samples) def flush_queries(): pass def process_latencies(latencies_ns): # called by loadgen to show us the recorded latencies global last_timeing last_timeing = [t / NANO_SEC for t in latencies_ns] settings = lg.TestSettings() settings.scenario = scenario settings.mode = lg.TestMode.PerformanceOnly if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly if args.time: # override the time we want to run settings.min_duration_ms = args.time * MILLI_SEC settings.max_duration_ms = args.time * MILLI_SEC if args.qps: qps = float(args.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps if scenario == lg.TestScenario.SingleStream: settings.min_query_count = args.queries_single settings.max_query_count = args.queries_single elif scenario == lg.TestScenario.MultiStream: settings.min_query_count = args.queries_multi settings.max_query_count = args.queries_multi settings.multi_stream_samples_per_query = 4 elif scenario == lg.TestScenario.Server: max_latency = args.max_latency elif scenario == lg.TestScenario.Offline: settings.min_query_count = args.queries_offline settings.max_query_count = args.queries_offline sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(count, min(count, 1000), ds.load_query_samples, ds.unload_query_samples) if scenario == lg.TestScenario.Server: for target_latency in max_latency: log.info("starting {}, latency={}".format( scenario, target_latency)) settings.server_target_latency_ns = int(target_latency * NANO_SEC) result_dict = { "good": 0, "total": 0, "scenario": str(scenario) } runner.start_run(result_dict, args.accuracy) lg.StartTest(sut, qsl, settings) if not last_timeing: last_timeing = runner.result_timing if args.accuracy: post_proc.finalize(result_dict, ds, output_dir=os.path.dirname(args.output)) add_results(final_results, "{}-{}".format(scenario, target_latency), result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy) else: log.info("starting {}".format(scenario)) result_dict = {"good": 0, "total": 0, "scenario": str(scenario)} runner.start_run(result_dict, args.accuracy) lg.StartTest(sut, qsl, settings) if not last_timeing: last_timeing = runner.result_timing if args.accuracy: post_proc.finalize(result_dict, ds, output_dir=os.path.dirname(args.output)) add_results(final_results, "{}".format(scenario), result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut) # # write final results # if args.output: with open(args.output, "w") as f: json.dump(final_results, f, sort_keys=True, indent=4)
def main(): global last_timeing args = get_args() log.info(args) # find backend backend = get_backend(args.backend) # override image format if given image_format = args.data_format if args.data_format else backend.image_format( ) # --count applies to accuracy mode only and can be used to limit the number of images # for testing. For perf model we always limit count to 200. count_override = False count = args.count if count: count_override = True # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[ args.dataset] ds = wanted_dataset(data_path=args.dataset_path, image_list=args.dataset_list, name=args.dataset, image_format=image_format, pre_process=pre_proc, use_cache=args.cache, count=count, **kwargs) # load model to backend model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs) final_results = { "runtime": model.name(), "version": model.version(), "time": int(time.time()), "cmdline": str(args), } config = os.path.abspath(args.config) if not os.path.exists(config): log.error("{} not found".format(config)) sys.exit(1) if args.output: output_dir = os.path.abspath(args.output) os.makedirs(output_dir, exist_ok=True) os.chdir(output_dir) # # make one pass over the dataset to validate accuracy # count = ds.get_item_count() # warmup ds.load_query_samples([0]) for _ in range(5): img, _ = ds.get_samples([0]) _ = backend.predict({backend.inputs[0]: img}) ds.unload_query_samples(None) scenario = SCENARIO_MAP[args.scenario] runner_map = { lg.TestScenario.SingleStream: RunnerBase, lg.TestScenario.MultiStream: QueueRunner, lg.TestScenario.Server: QueueRunner, lg.TestScenario.Offline: QueueRunner } runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc, max_batchsize=args.max_batchsize) def issue_queries(query_samples): runner.enqueue(query_samples) def flush_queries(): pass def process_latencies(latencies_ns): # called by loadgen to show us the recorded latencies global last_timeing last_timeing = [t / NANO_SEC for t in latencies_ns] settings = lg.TestSettings() settings.FromConfig(config, args.model_name, args.scenario) settings.scenario = scenario settings.mode = lg.TestMode.PerformanceOnly if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly if args.find_peak_performance: settings.mode = lg.TestMode.FindPeakPerformance if args.time: # override the time we want to run settings.min_duration_ms = args.time * MILLI_SEC #settings.max_duration_ms = args.time * MILLI_SEC if count_override: settings.min_query_count = count # settings.max_query_count = count if args.min_query_count: settings.min_query_count = args.min_query_count if args.samples_per_query: settings.multi_stream_samples_per_query = args.samples_per_query if args.max_latency: settings.single_stream_expected_latency_ns = int(args.max_latency * NANO_SEC) settings.server_target_latency_ns = int(args.max_latency * NANO_SEC) settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC) def set_qps(current_qps): settings.server_target_qps = current_qps settings.offline_expected_qps = current_qps settings.multi_stream_target_qps = current_qps return current_qps if args.qps: qps = set_qps(args.qps) lower_qps = -1 upper_qps = -1 qps_passed = {} while True: print("schedual qps:", qps) sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(count, min(count, 500), ds.load_query_samples, ds.unload_query_samples) log.info("starting {}".format(scenario)) result_dict = {"good": 0, "total": 0, "scenario": str(scenario)} runner.start_run(result_dict, args.accuracy) print("max query count:", settings.max_query_count) lg.StartTest(sut, qsl, settings) if not last_timeing: last_timeing = runner.result_timing if args.accuracy: post_proc.finalize(result_dict, ds, output_dir=args.output) took = time.time() - ds.last_loaded add_results(final_results, "{}".format(scenario), result_dict, last_timeing, took, args.accuracy) lg.DestroyQSL(qsl) lg.DestroySUT(sut) # # write final results # if args.output: with open("results.json", "w") as f: json.dump(final_results, f, sort_keys=True, indent=4) if args.scenario != 'Server': break if args.auto_qps is False: break if lower_qps == -1 or upper_qps == -1: base_qps = len(last_timeing) / took upper_qps = base_qps * 1.5 lower_qps = base_qps * 0.5 qps = set_qps(lower_qps) continue latency_percentile_ns = np.percentile( last_timeing, settings.server_target_latency_percentile * 100) * NANO_SEC if latency_percentile_ns < settings.server_target_latency_ns and qps > upper_qps * 0.98: print("target qps:", qps) break if upper_qps - lower_qps < 1 and lower_qps in qps_passed: print("target qps:", lower_qps) break if latency_percentile_ns > settings.server_target_latency_ns: #reduce qps print("reduce qps, bound:[%d, %d]" % (lower_qps, upper_qps)) upper_qps = qps if qps == lower_qps: lower_qps = lower_qps * 0.5 qps = set_qps(lower_qps) continue if qps > lower_qps: qps = set_qps((lower_qps + upper_qps) / 2) continue if latency_percentile_ns < settings.server_target_latency_ns: #increase qps qps_passed[qps] = None print("increase qps, bound:[%d, %d]" % (lower_qps, upper_qps)) lower_qps = qps if qps == upper_qps: upper_qps = upper_qps * 1.5 qps = set_qps(upper_qps) continue if qps < upper_qps: qps = set_qps((lower_qps + upper_qps) / 2) continue runner.finish()
def main(): global num_sockets global start_time global item_total global last_timeing args = get_args() log.info(args) config = os.path.abspath(args.config) user_config = os.path.abspath(args.user_config) if not os.path.exists(config): log.error("{} not found".format(config)) sys.exit(1) if not os.path.exists(user_config): log.error("{} not found".format(user_config)) sys.exit(1) if args.output: output_dir = os.path.abspath(args.output) os.makedirs(output_dir, exist_ok=True) os.chdir(output_dir) lock = multiprocessing.Lock() init_counter = multiprocessing.Value("i", 0) total_samples = multiprocessing.Value("i", 0) dsQueue = multiprocessing.Queue() outQueue = multiprocessing.Queue() inQueue = multiprocessing.JoinableQueue(num_sockets * 4) consumers = [ Consumer(inQueue, outQueue, dsQueue, lock, init_counter, total_samples, i, args) for i in range(num_sockets) ] for c in consumers: c.start() # Wait until subprocess ready while init_counter.value < num_sockets: time.sleep(2) # Start response thread response_worker = threading.Thread(target=response_loadgen, args=(outQueue, args.accuracy)) response_worker.daemon = True response_worker.start() scenario = SCENARIO_MAP[args.scenario] runner_map = { lg.TestScenario.Server: QueueRunner, lg.TestScenario.Offline: QueueRunner } runner = runner_map[scenario](inQueue, max_batchsize=args.max_batchsize) def issue_queries(response_ids, query_sample_indexes): runner.enqueue(response_ids, query_sample_indexes) def flush_queries(): pass def process_latencies(latencies_ns): # called by loadgen to show us the recorded latencies global last_timeing last_timeing = [t / NANO_SEC for t in latencies_ns] settings = lg.TestSettings() settings.FromConfig(config, args.model, args.scenario) settings.FromConfig(user_config, args.model, args.scenario) settings.scenario = scenario settings.mode = lg.TestMode.PerformanceOnly if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly settings.performance_sample_count_override = total_samples.value if args.find_peak_performance: settings.mode = lg.TestMode.FindPeakPerformance if args.duration: settings.min_duration_ms = args.duration settings.max_duration_ms = args.duration if args.target_qps: settings.server_target_qps = float(args.target_qps) settings.offline_expected_qps = float(args.target_qps) if args.count_queries: settings.min_query_count = args.count_queries settings.max_query_count = args.count_queries if args.samples_per_query_multistream: settings.multi_stream_samples_per_query = args.samples_per_query_multistream if args.max_latency: settings.server_target_latency_ns = int(args.max_latency * NANO_SEC) settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC) def load_query_samples(sample_list): # Wait until subprocess ready global start_time for _ in range(num_sockets): dsQueue.put(sample_list) while init_counter.value < 2 * num_sockets: time.sleep(2) start_time = time.time() def unload_query_samples(sample_list): pass import torch import criteo sut = lg.ConstructFastSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL( total_samples.value, min(total_samples.value, args.samples_per_query_offline), load_query_samples, unload_query_samples) log.info("starting {}".format(scenario)) result_dict = { "good": 0, "total": 0, "roc_auc": 0, "scenario": str(scenario) } lg.StartTest(sut, qsl, settings) if not last_timeing: last_timeing = item_timing if args.accuracy: result_dict["good"] = item_good result_dict["total"] = item_total result_dict["roc_auc"] = criteo.auc_score(item_results) final_results = { "runtime": "pytorch-native-dlrm", "version": torch.__version__, "time": int(time.time()), "cmdline": str(args), } add_results(final_results, "{}".format(scenario), result_dict, last_timeing, time.time() - start_time, args.accuracy) inQueue.join() for _ in consumers: inQueue.put(None) for c in consumers: c.join() outQueue.put(None) lg.DestroyQSL(qsl) lg.DestroyFastSUT(sut) # write final results if args.output: with open("results.json", "w") as f: json.dump(final_results, f, sort_keys=True, indent=4)
def main(): global last_timeing args = get_args() log.info(args) # find backend backend = get_backend(args.backend, args.dataset, args.max_ind_range, args.data_sub_sample_rate, args.use_gpu) # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[args.dataset] # --count-samples can be used to limit the number of samples used for testing ds = wanted_dataset(data_path=args.dataset_path, name=args.dataset, pre_process=pre_proc, # currently an identity function use_cache=args.cache, # currently not used count=args.count_samples, samples_to_aggregate_fix=args.samples_to_aggregate_fix, samples_to_aggregate_min=args.samples_to_aggregate_min, samples_to_aggregate_max=args.samples_to_aggregate_max, samples_to_aggregate_quantile_file=args.samples_to_aggregate_quantile_file, samples_to_aggregate_trace_file=args.samples_to_aggregate_trace_file, test_num_workers=args.test_num_workers, max_ind_range=args.max_ind_range, sub_sample_rate=args.data_sub_sample_rate, mlperf_bin_loader=args.mlperf_bin_loader, **kwargs) # load model to backend model = backend.load(args.model_path, inputs=args.inputs, outputs=args.outputs) final_results = { "runtime": model.name(), "version": model.version(), "time": int(time.time()), "cmdline": str(args), } mlperf_conf = os.path.abspath(args.mlperf_conf) if not os.path.exists(mlperf_conf): log.error("{} not found".format(mlperf_conf)) sys.exit(1) user_conf = os.path.abspath(args.user_conf) if not os.path.exists(user_conf): log.error("{} not found".format(user_conf)) sys.exit(1) if args.output: output_dir = os.path.abspath(args.output) os.makedirs(output_dir, exist_ok=True) os.chdir(output_dir) # # make one pass over the dataset to validate accuracy # count = ds.get_item_count() # warmup ds.load_query_samples([0]) for _ in range(5): batch_dense_X, batch_lS_o, batch_lS_i, _, _ = ds.get_samples([0]) _ = backend.predict(batch_dense_X, batch_lS_o, batch_lS_i) ds.unload_query_samples(None) scenario = SCENARIO_MAP[args.scenario] runner_map = { lg.TestScenario.SingleStream: RunnerBase, lg.TestScenario.MultiStream: QueueRunner, lg.TestScenario.Server: QueueRunner, lg.TestScenario.Offline: QueueRunner } runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc, max_batchsize=args.max_batchsize) def issue_queries(query_samples): runner.enqueue(query_samples) def flush_queries(): pass def process_latencies(latencies_ns): # called by loadgen to show us the recorded latencies global last_timeing last_timeing = [t / NANO_SEC for t in latencies_ns] settings = lg.TestSettings() settings.FromConfig(mlperf_conf, args.model_path, args.scenario) settings.FromConfig(user_conf, args.model_path, args.scenario) settings.scenario = scenario settings.mode = lg.TestMode.PerformanceOnly if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly if args.find_peak_performance: settings.mode = lg.TestMode.FindPeakPerformance if args.duration: settings.min_duration_ms = args.duration settings.max_duration_ms = args.duration if args.target_qps: settings.server_target_qps = float(args.target_qps) settings.offline_expected_qps = float(args.target_qps) if args.count_queries: settings.min_query_count = args.count_queries settings.max_query_count = args.count_queries if args.samples_per_query_multistream: settings.multi_stream_samples_per_query = args.samples_per_query_multistream if args.max_latency: settings.server_target_latency_ns = int(args.max_latency * NANO_SEC) settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC) sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(count, min(count, args.samples_per_query_offline), ds.load_query_samples, ds.unload_query_samples) log.info("starting {}".format(scenario)) result_dict = {"good": 0, "total": 0, "roc_auc": 0, "scenario": str(scenario)} runner.start_run(result_dict, args.accuracy) lg.StartTest(sut, qsl, settings) if not last_timeing: last_timeing = runner.result_timing if args.accuracy: post_proc.finalize(result_dict, ds, output_dir=args.output) add_results(final_results, "{}".format(scenario), result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut) # # write final results # if args.output: with open("results.json", "w") as f: json.dump(final_results, f, sort_keys=True, indent=4)
def main(): global so global last_timeing global last_loaded global result_timeing args = get_args() log.info(args) # find backend backend = get_backend(args.backend) # --count applies to accuracy mode only and can be used to limit the number of images # for testing. For perf model we always limit count to 200. count_override = False count = args.count if count: count_override = True """ Python signature go_initialize(backend, model_path, dataset_path, count, use_gpu, gpu_id, trace_level, max_batchsize) """ count, err = go_initialize(backend, args.model_path, args.dataset_path, count, args.use_gpu, args.gpu_id, args.trace_level, args.max_batchsize) if (err != 'nil'): print(err) raise RuntimeError('initialization in go failed') mlperf_conf = os.path.abspath(args.mlperf_conf) if not os.path.exists(mlperf_conf): log.error("{} not found".format(mlperf_conf)) sys.exit(1) user_conf = os.path.abspath(args.user_conf) if not os.path.exists(user_conf): log.error("{} not found".format(user_conf)) sys.exit(1) log_dir = None if args.log_dir: log_dir = os.path.abspath(args.log_dir) os.makedirs(log_dir, exist_ok=True) scenario = SCENARIO_MAP[args.scenario] def issue_queries(query_samples): global so global last_timeing global result_timeing idx = np.array([q.index for q in query_samples]).astype(np.int32) query_id = [q.id for q in query_samples] if args.dataset == 'brats2019': start = time.time() response_array_refs = [] response = [] for i, qid in enumerate(query_id): processed_results = so.IssueQuery(1, idx[i][np.newaxis]) processed_results = json.loads( processed_results.decode('utf-8')) response_array = array.array( "B", np.array(processed_results[0], np.float16).tobytes()) response_array_refs.append(response_array) bi = response_array.buffer_info() response.append(lg.QuerySampleResponse(qid, bi[0], bi[1])) result_timeing.append(time.time() - start) lg.QuerySamplesComplete(response) else: start = time.time() processed_results = so.IssueQuery(len(idx), idx) result_timeing.append(time.time() - start) processed_results = json.loads(processed_results.decode('utf-8')) response_array_refs = [] response = [] for idx, qid in enumerate(query_id): response_array = array.array( "B", np.array(processed_results[idx], np.float32).tobytes()) response_array_refs.append(response_array) bi = response_array.buffer_info() response.append(lg.QuerySampleResponse(qid, bi[0], bi[1])) lg.QuerySamplesComplete(response) def flush_queries(): pass def process_latencies(latencies_ns): # called by loadgen to show us the recorded latencies global last_timeing last_timeing = [t / NANO_SEC for t in latencies_ns] def load_query_samples(sample_list): global so global last_loaded err = go_load_query_samples(sample_list, so) last_loaded = time.time() if (err != ''): print(err) raise RuntimeError('load query samples failed') def unload_query_samples(sample_list): global so err = go_unload_query_samples(sample_list, so) if (err != ''): print(err) raise RuntimeError('unload query samples failed') settings = lg.TestSettings() if args.model_name != "": settings.FromConfig(mlperf_conf, args.model_name, args.scenario) settings.FromConfig(user_conf, args.model_name, args.scenario) settings.scenario = scenario settings.mode = lg.TestMode.PerformanceOnly if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly if args.find_peak_performance: settings.mode = lg.TestMode.FindPeakPerformance if args.time: # override the time we want to run settings.min_duration_ms = args.time * MILLI_SEC settings.max_duration_ms = args.time * MILLI_SEC if args.qps: qps = float(args.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps if count_override: settings.min_query_count = count settings.max_query_count = count if args.samples_per_query: settings.multi_stream_samples_per_query = args.samples_per_query if args.max_latency: settings.server_target_latency_ns = int(args.max_latency * NANO_SEC) settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC) sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(count, min(count, 500), load_query_samples, unload_query_samples) log.info("starting {}".format(scenario)) log_path = os.path.realpath(args.log_dir) log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = log_path log_output_settings.copy_summary_to_stdout = True log_settings = lg.LogSettings() log_settings.log_output = log_output_settings # log_settings.enable_trace = True # lg.StartTest(sut, qsl, settings) lg.StartTestWithLogSettings(sut, qsl, settings, log_settings) if not last_timeing: last_timeing = result_timeing if args.accuracy: accuracy_script_paths = { 'coco': os.path.realpath( '../inference/vision/classification_and_detection/tools/accuracy-coco.py' ), 'imagenet': os.path.realpath( '../inference/vision/classification_and_detection/tools/accuracy-imagenet.py' ), 'squad': os.path.realpath('../inference/language/bert/accuracy-squad.py'), 'brats2019': os.path.realpath( '../inference/vision/medical_imaging/3d-unet/accuracy-brats.py' ), } accuracy_script_path = accuracy_script_paths[args.dataset] accuracy_file_path = os.path.join(log_dir, 'mlperf_log_accuracy.json') data_dir = os.environ['DATA_DIR'] if args.dataset == 'coco': if args.use_inv_map: subprocess.check_call( 'python3 {} --mlperf-accuracy-file {} --coco-dir {} --use-inv-map' .format(accuracy_script_path, accuracy_file_path, data_dir), shell=True) else: subprocess.check_call( 'python3 {} --mlperf-accuracy-file {} --coco-dir {}'. format(accuracy_script_path, accuracy_file_path, data_dir), shell=True) elif args.dataset == 'imagenet': # imagenet subprocess.check_call( 'python3 {} --mlperf-accuracy-file {} --imagenet-val-file {}'. format(accuracy_script_path, accuracy_file_path, os.path.join(data_dir, 'val_map.txt')), shell=True) elif args.dataset == 'squad': # squad vocab_path = os.path.join(data_dir, 'vocab.txt') val_path = os.path.join(data_dir, 'dev-v1.1.json') out_path = os.path.join(log_dir, 'predictions.json') cache_path = os.path.join(data_dir, 'eval_features.pickle') subprocess.check_call( 'python3 {} --vocab_file {} --val_data {} --log_file {} --out_file {} --features_cache_file {} --max_examples {}' .format(accuracy_script_path, vocab_path, val_path, accuracy_file_path, out_path, cache_path, count), shell=True) elif args.dataset == 'brats2019': # brats2019 base_dir = os.path.realpath( '../inference/vision/medical_imaging/3d-unet/build') post_dir = os.path.join(base_dir, 'postprocessed_data') label_dir = os.path.join( base_dir, 'raw_data/nnUNet_raw_data/Task043_BraTS2019/labelsTr') os.makedirs(post_dir, exist_ok=True) subprocess.check_call( 'python3 {} --log_file {} --preprocessed_data_dir {} --postprocessed_data_dir {} --label_data_dir {}' .format(accuracy_script_path, accuracy_file_path, data_dir, post_dir, label_dir), shell=True) else: raise RuntimeError('Dataset not Implemented.') lg.DestroyQSL(qsl) lg.DestroySUT(sut) """ Python signature go_finalize(so) """ err = go_finalize(so) if (err != ''): print(err) raise RuntimeError('finialize in go failed')
def main(): global last_timeing args = get_args() log.info(args) # find backend backend = get_backend(args.backend) # override image format if given image_format = args.data_format if args.data_format else backend.image_format() # --count applies to accuracy mode only and can be used to limit the number of images # for testing. For perf model we always limit count to 200. count_override = False count = args.count if count: count_override = True # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[args.dataset] ds = wanted_dataset(data_path=args.dataset_path, image_list=args.dataset_list, name=args.dataset, image_format=image_format, pre_process=pre_proc, use_cache=args.cache, count=count, **kwargs) # load model to backend model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs) final_results = { "runtime": model.name(), "version": model.version(), "time": int(time.time()), "cmdline": str(args), } config = os.path.abspath(args.config) if not os.path.exists(config): log.error("{} not found".format(config)) sys.exit(1) if args.output: output_dir = os.path.abspath(args.output) os.makedirs(output_dir, exist_ok=True) os.chdir(output_dir) # # make one pass over the dataset to validate accuracy # count = ds.get_item_count() # warmup warmup_queries = range(args.max_batchsize) ds.load_query_samples(warmup_queries) for _ in range(2): img, _ = ds.get_samples(warmup_queries) _ = backend.predict({backend.inputs[0]: img}) ds.unload_query_samples(None) scenario = SCENARIO_MAP[args.scenario] runner_map = { lg.TestScenario.SingleStream: RunnerBase, lg.TestScenario.MultiStream: QueueRunner, lg.TestScenario.Server: QueueRunner, lg.TestScenario.Offline: QueueRunner } runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc, max_batchsize=args.max_batchsize) def issue_queries(query_samples): runner.enqueue(query_samples) def flush_queries(): pass def process_latencies(latencies_ns): # called by loadgen to show us the recorded latencies global last_timeing last_timeing = [t / NANO_SEC for t in latencies_ns] settings = lg.TestSettings() settings.FromConfig(config, args.model_name, args.scenario) settings.scenario = scenario settings.mode = lg.TestMode.PerformanceOnly if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly if args.find_peak_performance: settings.mode = lg.TestMode.FindPeakPerformance if args.time: # override the time we want to run settings.min_duration_ms = args.time * MILLI_SEC settings.max_duration_ms = args.time * MILLI_SEC if args.qps: qps = float(args.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps if count_override: settings.min_query_count = count settings.max_query_count = count if args.samples_per_query: settings.multi_stream_samples_per_query = args.samples_per_query if args.max_latency: settings.server_target_latency_ns = int(args.max_latency * NANO_SEC) settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC) # override target latency when it needs to be less than 1ms if args.model_name == "mobilenet": settings.single_stream_expected_latency_ns = 200000 elif args.model_name == "resnet50": settings.single_stream_expected_latency_ns = 900000 elif args.model_name == "ssd-mobilenet": settings.single_stream_expected_latency_ns = 1000000 sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) #qsl = lg.ConstructQSL(count, min(count, 500), ds.load_query_samples, ds.unload_query_samples) qsl = lg.ConstructQSL(count, min(count, 1024), ds.load_query_samples, ds.unload_query_samples) log.info("starting {}".format(scenario)) result_dict = {"good": 0, "total": 0, "scenario": str(scenario)} runner.start_run(result_dict, args.accuracy) if args.enable_trace: lg.StartTest(sut, qsl, settings) else: logsettings = lg.LogSettings() logsettings.enable_trace = False lg.StartTestWithLogSettings(sut, qsl, settings, logsettings) if not last_timeing: last_timeing = runner.result_timing if args.accuracy: post_proc.finalize(result_dict, ds, output_dir=args.output) add_results(final_results, "{}".format(scenario), result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut) # # write final results # if args.output: with open("results.json", "w") as f: json.dump(final_results, f, sort_keys=True, indent=4)
def main(): global num_ins global num_cpus global in_queue_cnt global out_queue_cnt global batching global queries_so_far global Latencies queries_so_far = 0 args = get_args() log.info(args) scenario = args.scenario accuracy_mode = args.accuracy perf_count = args.perf_count batch_size = args.batch_size num_ins = args.num_instance num_cpus = args.num_phy_cpus batching = args.batching # Read Loadgen and workload config parameters settings = lg.TestSettings() settings.scenario = scenario_map[scenario] settings.FromConfig(args.mlperf_conf, "bert", scenario) settings.FromConfig(args.user_conf, "bert", scenario) settings.mode = lg.TestMode.AccuracyOnly if accuracy_mode else lg.TestMode.PerformanceOnly # Establish communication queues lock = multiprocessing.Lock() init_counter = multiprocessing.Value("i", 0) calibrate_counter = multiprocessing.Value("i", 0) out_queue = multiprocessing.Queue() # Create consumers consumers = [] if scenario == "Server": from parse_server_config import configParser buckets = configParser("machine_conf.json") cutoffs = list(buckets.keys()) batch_sizes = {} in_queue = {j: multiprocessing.JoinableQueue() for j in buckets} proc_idx = 0 num_cpus = 0 total_ins = 0 for cutoff in list(buckets.keys()): batch_sizes[cutoff] = buckets[cutoff]["batch_size"] num_ins = buckets[cutoff]["instances"] cpus_per_instance = buckets[cutoff]["cpus_per_instance"] num_cpus = num_ins * cpus_per_instance total_ins += num_ins for j in range(num_ins): consumer = Consumer(in_queue[cutoff], out_queue, lock, init_counter, calibrate_counter, proc_idx, num_ins, args, cutoff) consumer.start_core_idx = proc_idx consumer.end_core_idx = proc_idx + cpus_per_instance - 1 consumers.append(consumer) proc_idx = consumer.end_core_idx + 1 num_ins = total_ins else: total_ins = num_ins in_queue = MultiprocessShapeBasedQueue() consumers = [ Consumer(in_queue, out_queue, lock, init_counter, calibrate_counter, i, num_ins, args) for i in range(num_ins) ] for c in consumers: c.start() # Dataset object used by constructQSL data_set = BERTDataSet(args.vocab, args.perf_count) if scenario == "Server": issue_queue = InQueueServer(in_queue, batch_sizes, data_set, settings.min_query_count) else: issue_queue = InQueue(in_queue, batch_size, data_set) # Wait until all sub-processors are ready block_until(init_counter, total_ins, 2) # Start response thread response_worker = threading.Thread(target=response_loadgen, args=(out_queue, )) response_worker.daemon = True response_worker.start() def issue_queries(query_samples): # It's called by loadgen to send query to SUT issue_queue.put(query_samples) sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(data_set.count, data_set.perf_count, load_query_samples, unload_query_samples) log_path = "build/logs" if not os.path.exists(log_path): os.makedirs(log_path) log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = log_path log_output_settings.copy_summary_to_stdout = True log_settings = lg.LogSettings() log_settings.log_output = log_output_settings lg.StartTestWithLogSettings(sut, qsl, settings, log_settings) # Wait until outQueue done while out_queue_cnt < in_queue_cnt: time.sleep(0.2) if scenario == "Server": for i in in_queue: in_queue[i].join() for j in range(buckets[i]["cpus_per_instance"]): in_queue[i].put(None) else: for i in range(num_ins): in_queue.put(None) for c in consumers: c.join() out_queue.put(None) if accuracy_mode: cmd = "python accuracy-squad.py --log_file={}/mlperf_log_accuracy.json".format( log_path) subprocess.check_call(cmd, shell=True) lg.DestroyQSL(qsl) lg.DestroySUT(sut)
def main(): """ Runs 3D UNet performing KiTS19 Kidney Tumore Segmentation task as below: 1. instantiate SUT and QSL for the chosen backend 2. configure LoadGen for the chosen scenario 3. configure MLPerf logger 4. start LoadGen 5. collect logs and if needed evaluate inference results 6. clean up """ # scenarios in LoadGen scenario_map = { "SingleStream": lg.TestScenario.SingleStream, "Offline": lg.TestScenario.Offline, "Server": lg.TestScenario.Server, "MultiStream": lg.TestScenario.MultiStream } args = get_args() # instantiate SUT as per requested backend; QSL is also instantiated if args.backend == "pytorch": from pytorch_SUT import get_sut elif args.backend == "pytorch_checkpoint": from pytorch_checkpoint_SUT import get_sut elif args.backend == "onnxruntime": from onnxruntime_SUT import get_sut elif args.backend == "tensorflow": from tensorflow_SUT import get_sut else: raise ValueError("Unknown backend: {:}".format(args.backend)) sut = get_sut(args.model, args.preprocessed_data_dir, args.performance_count) # setup LoadGen settings = lg.TestSettings() settings.scenario = scenario_map[args.scenario] settings.FromConfig(args.mlperf_conf, "3d-unet", args.scenario) settings.FromConfig(args.user_conf, "3d-unet", args.scenario) if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly else: settings.mode = lg.TestMode.PerformanceOnly # set up mlperf logger log_path = Path("build", "logs").absolute() log_path.mkdir(parents=True, exist_ok=True) log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = str(log_path) log_output_settings.copy_summary_to_stdout = True log_settings = lg.LogSettings() log_settings.log_output = log_output_settings # start running test, from LoadGen print("Running Loadgen test...") lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) # if needed check accuracy if args.accuracy: print("Checking accuracy...") cmd = "python3 accuracy_kits.py" subprocess.check_call(cmd, shell=True) # all done print("Done!") # cleanup print("Destroying SUT...") lg.DestroySUT(sut.sut) print("Destroying QSL...") lg.DestroyQSL(sut.qsl.qsl)
def benchmark_using_loadgen(): "Perform the benchmark using python API for the LoadGen library" global funnel_should_be_running, warmup_mode, openme_data scenario = { 'SingleStream': lg.TestScenario.SingleStream, 'MultiStream': lg.TestScenario.MultiStream, 'Server': lg.TestScenario.Server, 'Offline': lg.TestScenario.Offline, }[LOADGEN_SCENARIO] mode = { 'AccuracyOnly': lg.TestMode.AccuracyOnly, 'PerformanceOnly': lg.TestMode.PerformanceOnly, 'SubmissionRun': lg.TestMode.SubmissionRun, }[LOADGEN_MODE] ts = lg.TestSettings() if LOADGEN_CONFIG_FILE: ts.FromConfig(LOADGEN_CONFIG_FILE, 'random_model_name', LOADGEN_SCENARIO) ts.scenario = scenario ts.mode = mode if LOADGEN_MULTISTREAMNESS: ts.multi_stream_samples_per_query = int(LOADGEN_MULTISTREAMNESS) if LOADGEN_MAX_DURATION_S: ts.max_duration_ms = int(LOADGEN_MAX_DURATION_S)*1000 if LOADGEN_COUNT_OVERRIDE: ts.min_query_count = int(LOADGEN_COUNT_OVERRIDE) ts.max_query_count = int(LOADGEN_COUNT_OVERRIDE) if LOADGEN_TARGET_QPS: target_qps = float(LOADGEN_TARGET_QPS) ts.multi_stream_target_qps = target_qps ts.server_target_qps = target_qps ts.offline_expected_qps = target_qps sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(LOADGEN_DATASET_SIZE, LOADGEN_BUFFER_SIZE, load_query_samples, unload_query_samples) log_settings = lg.LogSettings() log_settings.enable_trace = False funnel_thread = threading.Thread(target=send_responses, args=()) funnel_should_be_running = True funnel_thread.start() if LOADGEN_WARMUP_SAMPLES: warmup_id_range = list(range(LOADGEN_WARMUP_SAMPLES)) load_query_samples(warmup_id_range) warmup_mode = True print("Sending out the warm-up samples, waiting for responses...") issue_queries([lg.QuerySample(id,id) for id in warmup_id_range]) while len(in_progress)>0: # waiting for the in_progress queue to clear up time.sleep(1) print(" Done!") warmup_mode = False lg.StartTestWithLogSettings(sut, qsl, ts, log_settings) funnel_should_be_running = False # politely ask the funnel_thread to end funnel_thread.join() # wait for it to actually end from_workers.close() to_workers.close() lg.DestroyQSL(qsl) lg.DestroySUT(sut) if SIDELOAD_JSON: with open(SIDELOAD_JSON, 'w') as sideload_fd: json.dump(openme_data, sideload_fd, indent=4, sort_keys=True)
def run(): """Runs the offline mode.""" global last_timing # Initiazation final_results, count, runner = setup() # # run the benchmark with timing # runner.start_pool() def issue_query_offline(query_samples): """Adds query to the queue.""" for i in [1]: idx = np.array([q.index for q in query_samples]) query_id = np.array([q.id for q in query_samples]) batch_size = FLAGS.batch_size[0] for i in range(0, len(query_samples), batch_size): runner.enqueue(query_id[i:i + batch_size], idx[i:i + batch_size]) def flush_queries(): pass def process_latencies(latencies_ns): global last_timing last_timing = [t / 1e9 for t in latencies_ns] sut = lg.ConstructSUT(issue_query_offline, flush_queries, process_latencies) masters = [] outdir = FLAGS.outdir if FLAGS.outdir else tempfile.mkdtemp() export_outdir = FLAGS.export_outdir if FLAGS.export_outdir else outdir export_outdir = os.path.join(export_outdir, "export_model") def load_query_samples(sample_list): """Load query samples.""" runner.ds.load_query_samples(sample_list) # Find tpu master. if FLAGS.num_tpus == 1: runner.model.update_qsl(runner.ds.get_image_list_inmemory()) else: for i in range(FLAGS.num_tpus): runner.models[i].update_qsl(runner.ds.get_image_list_inmemory()) def warmup(): """Warmup the TPUs.""" load_query_samples([0]) if FLAGS.num_tpus == 1: log.info("warmup ...") runner.warmup(0) log.info("warmup done") else: for cloud_tpu_id in range(FLAGS.num_tpus): log.info("warmup %d...", cloud_tpu_id) runner.warmup(0, cloud_tpu_id) log.info("warmup %d done", cloud_tpu_id) # After warmup, give the system a moment to quiesce before putting it under # load. time.sleep(1) if FLAGS.num_tpus == 1: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) master = tpu_cluster_resolver.get_master() runner.model.build_and_export( FLAGS.model, export_model_path=export_outdir, batch_size=FLAGS.batch_size, master=master, scenario=FLAGS.scenario) runner.model.load(export_model_path=export_outdir, master=master) else: # Use the first TPU instance to build and export the graph. tpu_names = FLAGS.tpu_name tpu_names = tpu_names.split(",") for tpu_name in tpu_names: tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver( tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project) masters.append(tpu_cluster_resolver.get_master()) runner.models[0].build_and_export( FLAGS.model, export_model_path=export_outdir, batch_size=FLAGS.batch_size, master=masters[0], scenario=FLAGS.scenario) def init_fn(cloud_tpu_id): """Init and warmup each cloud tpu.""" runner.models[cloud_tpu_id].load( export_model_path=export_outdir, master=masters[cloud_tpu_id]) threads = [] for i in range(FLAGS.num_tpus): thread = threading.Thread(target=init_fn, args=(i,)) threads.append(thread) thread.start() for thread in threads: thread.join() warmup() qsl = lg.ConstructQSL(count, min(count, 1024), load_query_samples, runner.ds.unload_query_samples) test_scenarios = FLAGS.scenario if test_scenarios is None: test_scenarios_list = [] else: test_scenarios_list = test_scenarios.split(",") max_latency = FLAGS.max_latency max_latency_list = max_latency.split(",") for scenario in test_scenarios_list: for target_latency in max_latency_list: log.info("starting %s, latency=%s", scenario, target_latency) settings = lg.TestSettings() log.info(scenario) if FLAGS.accuracy: settings.mode = lg.TestMode.AccuracyOnly settings.scenario = utils.SCENARIO_MAP[scenario] if FLAGS.qps: qps = float(FLAGS.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps if FLAGS.time: settings.min_duration_ms = 60 * MILLI_SEC settings.max_duration_ms = 0 qps = FLAGS.qps or 100 settings.min_query_count = qps * FLAGS.time settings.max_query_count = int(1.1 * qps * FLAGS.time) else: settings.min_query_count = (1 << 21) if FLAGS.time or FLAGS.qps and FLAGS.accuracy: settings.mode = lg.TestMode.PerformanceOnly # FIXME: add SubmissionRun once available target_latency_ns = int(float(target_latency) * (NANO_SEC / MILLI_SEC)) settings.single_stream_expected_latency_ns = target_latency_ns settings.multi_stream_target_latency_ns = target_latency_ns settings.server_target_latency_ns = target_latency_ns log_settings = lg.LogSettings() # TODO(brianderson): figure out how to use internal file path. log_settings.log_output.outdir = tempfile.mkdtemp() log_settings.log_output.copy_detail_to_stdout = True log_settings.log_output.copy_summary_to_stdout = True log_settings.enable_trace = False result_dict = {"good": 0, "total": 0, "scenario": str(scenario)} runner.start_run(result_dict, FLAGS.accuracy) lg.StartTestWithLogSettings(sut, qsl, settings, log_settings) if FLAGS.accuracy: runner.get_post_process().finalize(result_dict, runner.ds) utils.add_results( final_results, "{}-{}".format(scenario, target_latency), result_dict, last_timing, time.time() - runner.ds.last_loaded) # # write final results # if FLAGS.outdir: outfile = os.path.join(FLAGS.outdir, "results.txt") with tf.gfile.Open(outfile, "w") as f: json.dump(final_results, f, sort_keys=True, indent=4) else: json.dump(final_results, sys.stdout, sort_keys=True, indent=4) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut)
def main(): global last_timeing args = get_args() log.info(args) # find backend backend = get_backend(args.backend) if getattr(backend, "max_batchsize", -1) != -1: backend.max_batchsize = args.max_batchsize # override image format if given image_format = args.data_format if args.data_format else backend.image_format( ) # --count applies to accuracy mode only and can be used to limit the number of images # for testing. For perf model we always limit count to 200. count_override = False count = args.count if count: count_override = True # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[ args.dataset] ds = wanted_dataset(data_path=args.dataset_path, image_list=args.dataset_list, name=args.dataset, image_format=image_format, pre_process=pre_proc, use_cache=args.cache, count=count, **kwargs) # load model to backend model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs) final_results = { "runtime": model.name(), "version": model.version(), "time": int(time.time()), "cmdline": str(args), } mlperf_conf = os.path.abspath(args.mlperf_conf) if not os.path.exists(mlperf_conf): log.error("{} not found".format(mlperf_conf)) sys.exit(1) user_conf = os.path.abspath(args.user_conf) if not os.path.exists(user_conf): log.error("{} not found".format(user_conf)) sys.exit(1) audit_config_cp_loc = None if args.output: output_dir = os.path.abspath(args.output) os.makedirs(output_dir, exist_ok=True) # Check if audit.config file is used, copy to output directory before # we chdir to that location so loadgen can find it audit_files = glob.glob( "ncoresw/mlperf/vision/classification_and_detection/*audit.config") if len(audit_files): log.info("Found audit.config (" + audit_files[0] + ")") audit_config_cp_loc = os.path.join(output_dir, "audit.config") # If user already put audit.config at `output` directory, then use # that one. Otherwise, copy the one we found in the current # directory (before chdir to new output directory). if os.path.exists(audit_config_cp_loc): log.info( "WARNING: audit.config already exists, so cannot copy over new audit file!" ) log.info(audit_config_cp_loc) audit_config_cp_loc = None else: shutil.copy(audit_files[0], audit_config_cp_loc) os.chdir(output_dir) # # make one pass over the dataset to validate accuracy # count = ds.get_item_count() # warmup warmup_queries = range(args.max_batchsize) ds.load_query_samples(warmup_queries) for _ in range(2): img, _ = ds.get_samples(warmup_queries) _ = backend.predict({backend.inputs[0]: img}) ds.unload_query_samples(None) scenario = SCENARIO_MAP[args.scenario] runner_map = { lg.TestScenario.SingleStream: RunnerBase, lg.TestScenario.MultiStream: QueueRunner, lg.TestScenario.Server: QueueRunner, lg.TestScenario.Offline: QueueRunner } runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc, max_batchsize=args.max_batchsize) def issue_queries(query_samples): runner.enqueue(query_samples) def flush_queries(): pass def process_latencies(latencies_ns): # called by loadgen to show us the recorded latencies global last_timeing last_timeing = [t / NANO_SEC for t in latencies_ns] settings = lg.TestSettings() settings.FromConfig(mlperf_conf, args.model_name, args.scenario) settings.FromConfig(user_conf, args.model_name, args.scenario) settings.scenario = scenario settings.mode = lg.TestMode.PerformanceOnly if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly if args.find_peak_performance: settings.mode = lg.TestMode.FindPeakPerformance if args.time: # override the time we want to run settings.min_duration_ms = args.time * MILLI_SEC settings.max_duration_ms = args.time * MILLI_SEC if args.qps: qps = float(args.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps if count_override: settings.min_query_count = count settings.max_query_count = count if args.samples_per_query: settings.multi_stream_samples_per_query = args.samples_per_query if args.max_latency: settings.server_target_latency_ns = int(args.max_latency * NANO_SEC) settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC) # override target latency when it needs to be less than 1ms if args.model_name == "mobilenet": settings.single_stream_expected_latency_ns = 200000 elif args.model_name == "resnet50": settings.single_stream_expected_latency_ns = 900000 elif args.model_name == "ssd-mobilenet": settings.single_stream_expected_latency_ns = 900000 sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(count, min(count, 1024), ds.load_query_samples, ds.unload_query_samples) log.info("starting {}".format(scenario)) result_dict = {"good": 0, "total": 0, "scenario": str(scenario)} runner.start_run(result_dict, args.accuracy) lg.StartTest(sut, qsl, settings) if not last_timeing: last_timeing = runner.result_timing if args.accuracy: post_proc.finalize(result_dict, ds, output_dir=args.output) add_results(final_results, "{}".format(scenario), result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut) # Dump the summary logs to stdout for convenience log.info("Output dir: " + os.path.abspath(output_dir)) with open(os.path.join(output_dir, "mlperf_log_summary.txt"), 'r') as f: log.info(f.read()) # Output accuracy txt file if args.accuracy: with open(os.path.join(output_dir, "accuracy.txt"), "w") as f_acc: # SSD accuracy calculation #---------------------------------------- # The mAP is already stored in result_dict["mAP"], but we'll call # `accuracy_coco()` just to keep the submission process consistent. if args.model_name == "ssd-mobilenet": accuracy_str = accuracy.CocoAcc( mlperf_accuracy_file=os.path.join( output_dir, "mlperf_log_accuracy.json"), coco_dir=args.dataset_path).get_accuracy() + "\n" f_acc.write(accuracy_str) log.info(accuracy_str) if args.model_name == "ssd-resnet34": accuracy_str = accuracy.CocoAcc( mlperf_accuracy_file=os.path.join( output_dir, "mlperf_log_accuracy.json"), coco_dir=args.dataset_path, use_inv_map=True, remove_48_empty_images=False).get_accuracy() + "\n" f_acc.write(accuracy_str) log.info(accuracy_str) # ImageNet accuracy calculation #---------------------------------------- # The good / total values are already stored in result_dict["good"] # and result_dict["total"], but we'll call `accuracy_imagenet()` # just to keep the submission process consistent. else: accuracy_str = accuracy.ImagenetAcc( mlperf_accuracy_file=os.path.join( output_dir, "mlperf_log_accuracy.json"), imagenet_val_file=os.path.join( args.dataset_path, "val_map.txt")).get_accuracy() + "\n" f_acc.write(accuracy_str) log.info(accuracy_str) # # write final results # if args.output: with open("results.json", "w") as f: json.dump(final_results, f, sort_keys=True, indent=4) if audit_config_cp_loc != None: os.remove(audit_config_cp_loc) backend_destroy = getattr(backend, "destroy", None) if callable(backend_destroy): backend.destroy()