def main(): args = get_args() if args.backend == "pytorch": assert not args.quantized, "Quantized model is only supported by onnxruntime backend!" assert not args.profile, "Profiling is only supported by onnxruntime backend!" from pytorch_SUT import get_pytorch_sut sut = get_pytorch_sut() elif args.backend == "tf": assert not args.quantized, "Quantized model is only supported by onnxruntime backend!" assert not args.profile, "Profiling is only supported by onnxruntime backend!" from tf_SUT import get_tf_sut sut = get_tf_sut() elif args.backend == "tf_estimator": assert not args.quantized, "Quantized model is only supported by onnxruntime backend!" assert not args.profile, "Profiling is only supported by onnxruntime backend!" from tf_estimator_SUT import get_tf_estimator_sut sut = get_tf_estimator_sut() elif args.backend == "onnxruntime": from onnxruntime_SUT import get_onnxruntime_sut sut = get_onnxruntime_sut(args) else: raise ValueError("Unknown backend: {:}".format(args.backend)) settings = lg.TestSettings() settings.scenario = scenario_map[args.scenario] settings.FromConfig(args.mlperf_conf, "bert", args.scenario) settings.FromConfig(args.user_conf, "bert", args.scenario) if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly else: settings.mode = lg.TestMode.PerformanceOnly log_path = "build/logs" if not os.path.exists(log_path): os.makedirs(log_path) log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = log_path log_output_settings.copy_summary_to_stdout = True log_settings = lg.LogSettings() log_settings.log_output = log_output_settings print("Running LoadGen test...") lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) if args.accuracy: cmd = "python3 accuracy-squad.py" subprocess.check_call(cmd, shell=True) print("Done!") print("Destroying SUT...") lg.DestroySUT(sut.sut) print("Destroying QSL...") lg.DestroyQSL(sut.qsl.qsl)
def main(): args = get_args() batch_size = args.offline_batch_size if args.scenario == "Offline" else 1 settings = lg.TestSettings() settings.scenario = scenario_map[args.scenario] settings.FromConfig(args.mlperf_conf, "rnnt", args.scenario) settings.FromConfig(args.user_conf, "rnnt", args.scenario) issued_query_count = None if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly issued_query_count = 2513 else: settings.mode = lg.TestMode.PerformanceOnly issued_query_count = settings.min_query_count log_path = args.log_dir os.makedirs(log_path, exist_ok=True) log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = log_path log_output_settings.copy_summary_to_stdout = True log_settings = lg.LogSettings() log_settings.log_output = log_output_settings if args.backend == "pytorch": from pytorch_SUT import PytorchSUT sut = PytorchSUT(args.pytorch_config_toml, args.pytorch_checkpoint, args.dataset_dir, args.manifest, args.perf_count, issued_query_count, args.scenario, args.machine_conf, batch_size, args.cores_for_loadgen, args.cores_per_instance, args.debug, args.cosim, args.profile, args.ipex, args.bf16, args.warmup) else: raise ValueError("Unknown backend: {:}".format(args.backend)) print("Running Loadgen test...") lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) if args.accuracy: cmd = f"python3 accuracy_eval.py --log_dir {log_path} --dataset_dir {args.dataset_dir} --manifest {args.manifest}" print(f"Running accuracy script: {cmd}") subprocess.check_call(cmd, shell=True) lg.DestroySUT(sut.sut) print("Done!")
def main(): args = get_args() if args.backend == "pytorch": from pytorch_SUT import get_pytorch_sut sut = get_pytorch_sut(args.model_dir, args.preprocessed_data_dir, args.performance_count) elif args.backend == "onnxruntime": from onnxruntime_SUT import get_onnxruntime_sut sut = get_onnxruntime_sut(args.onnx_model, args.preprocessed_data_dir, args.performance_count) else: raise ValueError("Unknown backend: {:}".format(args.backend)) settings = lg.TestSettings() settings.scenario = scenario_map[args.scenario] settings.FromConfig(args.mlperf_conf, "3d-unet", args.scenario) settings.FromConfig(args.user_conf, "3d-unet", args.scenario) if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly else: settings.mode = lg.TestMode.PerformanceOnly log_path = "build/logs" if not os.path.exists(log_path): os.makedirs(log_path) log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = log_path log_output_settings.copy_summary_to_stdout = True log_settings = lg.LogSettings() log_settings.log_output = log_output_settings print("Running Loadgen test...") lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) if args.accuracy: print("Running accuracy script...") cmd = "python3 brats_eval.py" subprocess.check_call(cmd, shell=True) print("Done!") print("Destroying SUT...") lg.DestroySUT(sut.sut) print("Destroying QSL...") lg.DestroyQSL(sut.qsl.qsl)
def __init__(self, session, ds, optimization_config, onnx_output_names): self.session = session self.threads = optimization_config.threads_num self.max_batchsize = optimization_config.dynamic_batching_size self.ds = ds self.onnx_output_names = onnx_output_names self.guess = None self.cv = threading.Condition() self.done = False self.q_idx = [] self.q_query_id = [] self.workers = [] self.settings = lg.TestSettings() self.settings.scenario = lg.TestScenario.Server self.settings.mode = lg.TestMode.FindPeakPerformance log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = optimization_config.result_path log_output_settings.copy_summary_to_stdout = False self.log_settings = lg.LogSettings() self.log_settings.enable_trace = False self.log_settings.log_output = log_output_settings self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries, self.process_latencies) self.qsl = lg.ConstructQSL(QUERY_COUNT, QUERY_COUNT, ds.load_query_samples, ds.unload_query_samples) self.settings.server_coalesce_queries = True self.settings.server_target_latency_ns = int(optimization_config.max_latency_ms * NANO_SEC / MILLI_SEC) self.settings.server_target_latency_percentile = optimization_config.max_latency_percentile self.settings.min_duration_ms = optimization_config.min_duration_sec * MILLI_SEC # start all threads for _ in range(self.threads): worker = threading.Thread(target=self.handle_tasks, args=(self.cv,)) worker.daemon = True self.workers.append(worker) worker.start() time.sleep(1)
def main(): args = get_args() if args.backend == "pytorch": from pytorch_SUT import PytorchSUT sut = PytorchSUT(args.pytorch_config_toml, args.pytorch_checkpoint, args.dataset_dir, args.manifest, args.perf_count) else: raise ValueError("Unknown backend: {:}".format(args.backend)) settings = lg.TestSettings() settings.scenario = scenario_map[args.scenario] settings.FromConfig(args.mlperf_conf, "rnnt", args.scenario) settings.FromConfig(args.user_conf, "rnnt", args.scenario) if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly else: settings.mode = lg.TestMode.PerformanceOnly log_path = args.log_dir os.makedirs(log_path, exist_ok=True) log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = log_path log_output_settings.copy_summary_to_stdout = True log_settings = lg.LogSettings() log_settings.log_output = log_output_settings print("Running Loadgen test...") lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) if args.accuracy: cmd = f"python3 accuracy_eval.py --log_dir {log_path} --dataset_dir {args.dataset_dir} --manifest {args.manifest}" print(f"Running accuracy script: {cmd}") subprocess.check_call(cmd, shell=True) print("Done!")
def main(): """ Runs 3D UNet performing KiTS19 Kidney Tumore Segmentation task as below: 1. instantiate SUT and QSL for the chosen backend 2. configure LoadGen for the chosen scenario 3. configure MLPerf logger 4. start LoadGen 5. collect logs and if needed evaluate inference results 6. clean up """ # scenarios in LoadGen scenario_map = { "SingleStream": lg.TestScenario.SingleStream, "Offline": lg.TestScenario.Offline, "Server": lg.TestScenario.Server, "MultiStream": lg.TestScenario.MultiStream } args = get_args() # instantiate SUT as per requested backend; QSL is also instantiated if args.backend == "pytorch": from pytorch_SUT import get_sut elif args.backend == "pytorch_checkpoint": from pytorch_checkpoint_SUT import get_sut elif args.backend == "onnxruntime": from onnxruntime_SUT import get_sut elif args.backend == "tensorflow": from tensorflow_SUT import get_sut else: raise ValueError("Unknown backend: {:}".format(args.backend)) sut = get_sut(args.model, args.preprocessed_data_dir, args.performance_count) # setup LoadGen settings = lg.TestSettings() settings.scenario = scenario_map[args.scenario] settings.FromConfig(args.mlperf_conf, "3d-unet", args.scenario) settings.FromConfig(args.user_conf, "3d-unet", args.scenario) if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly else: settings.mode = lg.TestMode.PerformanceOnly # set up mlperf logger log_path = Path("build", "logs").absolute() log_path.mkdir(parents=True, exist_ok=True) log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = str(log_path) log_output_settings.copy_summary_to_stdout = True log_settings = lg.LogSettings() log_settings.log_output = log_output_settings # start running test, from LoadGen print("Running Loadgen test...") lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) # if needed check accuracy if args.accuracy: print("Checking accuracy...") cmd = "python3 accuracy_kits.py" subprocess.check_call(cmd, shell=True) # all done print("Done!") # cleanup print("Destroying SUT...") lg.DestroySUT(sut.sut) print("Destroying QSL...") lg.DestroyQSL(sut.qsl.qsl)
def eval_func(model): args = get_args() if args.backend == "pytorch": from pytorch_SUT import get_pytorch_sut sut = get_pytorch_sut(model, args.preprocessed_data_dir, args.performance_count) elif args.backend == "onnxruntime": from onnxruntime_SUT import get_onnxruntime_sut sut = get_onnxruntime_sut(args.model, args.preprocessed_data_dir, args.performance_count) elif args.backend == "tf": from tf_SUT import get_tf_sut sut = get_tf_sut(args.model, args.preprocessed_data_dir, args.performance_count) elif args.backend == "ov": from ov_SUT import get_ov_sut sut = get_ov_sut(args.model, args.preprocessed_data_dir, args.performance_count) else: raise ValueError("Unknown backend: {:}".format(args.backend)) settings = lg.TestSettings() settings.scenario = scenario_map[args.scenario] settings.FromConfig(args.mlperf_conf, "3d-unet", args.scenario) settings.FromConfig(args.user_conf, "3d-unet", args.scenario) if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly else: settings.mode = lg.TestMode.PerformanceOnly log_path = "build/logs" if not os.path.exists(log_path): os.makedirs(log_path) log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = log_path log_output_settings.copy_summary_to_stdout = True log_settings = lg.LogSettings() log_settings.log_output = log_output_settings print("Running Loadgen test...") if args.benchmark: start = time.time() lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) if args.benchmark: end = time.time() if args.accuracy: print("Running accuracy script...") process = subprocess.Popen(['python3', 'accuracy-brats.py'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = process.communicate() print(out) print("Done!", float(err)) if args.benchmark: print('Batch size = 1') print('Latency: %.3f ms' % ((end - start) * 1000 / sut.qsl.count)) print('Throughput: %.3f images/sec' % (sut.qsl.count / (end - start))) print('Accuracy: {mean:.5f}'.format(mean=float(err))) print("Destroying SUT...") lg.DestroySUT(sut.sut) print("Destroying QSL...") lg.DestroyQSL(sut.qsl.qsl) return float(err)
def main(): global last_timeing args = get_args() log.info(args) # find backend backend = get_backend(args.backend) # override image format if given image_format = args.data_format if args.data_format else backend.image_format( ) # --count applies to accuracy mode only and can be used to limit the number of images # for testing. For perf model we always limit count to 200. count_override = False count = args.count if count: count_override = True # dataset to use wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[ args.dataset] ds = wanted_dataset(data_path=args.dataset_path, image_list=args.dataset_list, name=args.dataset, image_format=image_format, pre_process=pre_proc, use_cache=args.cache, count=count, **kwargs) # load model to backend model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs) final_results = { "runtime": model.name(), "version": model.version(), "time": int(time.time()), "cmdline": str(args), } mlperf_conf = os.path.abspath(args.mlperf_conf) if not os.path.exists(mlperf_conf): log.error("{} not found".format(mlperf_conf)) sys.exit(1) user_conf = os.path.abspath(args.user_conf) if not os.path.exists(user_conf): log.error("{} not found".format(user_conf)) sys.exit(1) if args.output: output_dir = os.path.abspath(args.output) os.makedirs(output_dir, exist_ok=True) os.chdir(output_dir) # # make one pass over the dataset to validate accuracy # count = ds.get_item_count() # warmup ds.load_query_samples([0]) for _ in range(5): img, _ = ds.get_samples([0]) _ = backend.predict({backend.inputs[0]: img}) ds.unload_query_samples(None) scenario = SCENARIO_MAP[args.scenario] runner_map = { lg.TestScenario.SingleStream: RunnerBase, lg.TestScenario.MultiStream: QueueRunner, lg.TestScenario.Server: QueueRunner, lg.TestScenario.Offline: QueueRunner } runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc, max_batchsize=args.max_batchsize) def issue_queries(query_samples): runner.enqueue(query_samples) def flush_queries(): pass def process_latencies(latencies_ns): # called by loadgen to show us the recorded latencies global last_timeing last_timeing = [t / NANO_SEC for t in latencies_ns] log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = output_dir log_output_settings.copy_summary_to_stdout = False log_settings = lg.LogSettings() log_settings.enable_trace = args.debug log_settings.log_output = log_output_settings settings = lg.TestSettings() settings.FromConfig(mlperf_conf, args.model_name, args.scenario) settings.FromConfig(user_conf, args.model_name, args.scenario) settings.scenario = scenario settings.mode = lg.TestMode.PerformanceOnly if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly if args.find_peak_performance: settings.mode = lg.TestMode.FindPeakPerformance if args.time: # override the time we want to run settings.min_duration_ms = args.time * MILLI_SEC settings.max_duration_ms = args.time * MILLI_SEC if args.qps: qps = float(args.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps if count_override: settings.min_query_count = count settings.max_query_count = count if args.samples_per_query: settings.multi_stream_samples_per_query = args.samples_per_query if args.max_latency: settings.server_target_latency_ns = int(args.max_latency * NANO_SEC) settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC) sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(count, min(count, 500), ds.load_query_samples, ds.unload_query_samples) log.info("starting {}".format(scenario)) result_dict = {"good": 0, "total": 0, "scenario": str(scenario)} runner.start_run(result_dict, args.accuracy) lg.StartTestWithLogSettings(sut, qsl, settings, log_settings) if not last_timeing: last_timeing = runner.result_timing if args.accuracy: post_proc.finalize(result_dict, ds, output_dir=args.output) add_results(final_results, "{}".format(scenario), result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy) runner.finish() lg.DestroyQSL(qsl) lg.DestroySUT(sut) # # write final results # if args.output: with open("results.json", "w") as f: json.dump(final_results, f, sort_keys=True, indent=4)
def main(): global num_ins global num_cpus global in_queue_cnt global out_queue_cnt global batching global queries_so_far global Latencies queries_so_far = 0 args = get_args() log.info(args) scenario = args.scenario accuracy_mode = args.accuracy perf_count = args.perf_count batch_size = args.batch_size num_ins = args.num_instance num_cpus = args.num_phy_cpus batching = args.batching # Read Loadgen and workload config parameters settings = lg.TestSettings() settings.scenario = scenario_map[scenario] settings.FromConfig(args.mlperf_conf, "bert", scenario) settings.FromConfig(args.user_conf, "bert", scenario) settings.mode = lg.TestMode.AccuracyOnly if accuracy_mode else lg.TestMode.PerformanceOnly # Establish communication queues lock = multiprocessing.Lock() init_counter = multiprocessing.Value("i", 0) calibrate_counter = multiprocessing.Value("i", 0) out_queue = multiprocessing.Queue() # Create consumers consumers = [] if scenario == "Server": from parse_server_config import configParser buckets = configParser("machine_conf.json") cutoffs = list(buckets.keys()) batch_sizes = {} in_queue = {j: multiprocessing.JoinableQueue() for j in buckets} proc_idx = 0 num_cpus = 0 total_ins = 0 for cutoff in list(buckets.keys()): batch_sizes[cutoff] = buckets[cutoff]["batch_size"] num_ins = buckets[cutoff]["instances"] cpus_per_instance = buckets[cutoff]["cpus_per_instance"] num_cpus = num_ins * cpus_per_instance total_ins += num_ins for j in range(num_ins): consumer = Consumer(in_queue[cutoff], out_queue, lock, init_counter, calibrate_counter, proc_idx, num_ins, args, cutoff) consumer.start_core_idx = proc_idx consumer.end_core_idx = proc_idx + cpus_per_instance - 1 consumers.append(consumer) proc_idx = consumer.end_core_idx + 1 num_ins = total_ins else: total_ins = num_ins in_queue = MultiprocessShapeBasedQueue() consumers = [ Consumer(in_queue, out_queue, lock, init_counter, calibrate_counter, i, num_ins, args) for i in range(num_ins) ] for c in consumers: c.start() # Dataset object used by constructQSL data_set = BERTDataSet(args.vocab, args.perf_count) if scenario == "Server": issue_queue = InQueueServer(in_queue, batch_sizes, data_set, settings.min_query_count) else: issue_queue = InQueue(in_queue, batch_size, data_set) # Wait until all sub-processors are ready block_until(init_counter, total_ins, 2) # Start response thread response_worker = threading.Thread(target=response_loadgen, args=(out_queue, )) response_worker.daemon = True response_worker.start() def issue_queries(query_samples): # It's called by loadgen to send query to SUT issue_queue.put(query_samples) sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(data_set.count, data_set.perf_count, load_query_samples, unload_query_samples) log_path = "build/logs" if not os.path.exists(log_path): os.makedirs(log_path) log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = log_path log_output_settings.copy_summary_to_stdout = True log_settings = lg.LogSettings() log_settings.log_output = log_output_settings lg.StartTestWithLogSettings(sut, qsl, settings, log_settings) # Wait until outQueue done while out_queue_cnt < in_queue_cnt: time.sleep(0.2) if scenario == "Server": for i in in_queue: in_queue[i].join() for j in range(buckets[i]["cpus_per_instance"]): in_queue[i].put(None) else: for i in range(num_ins): in_queue.put(None) for c in consumers: c.join() out_queue.put(None) if accuracy_mode: cmd = "python accuracy-squad.py --log_file={}/mlperf_log_accuracy.json".format( log_path) subprocess.check_call(cmd, shell=True) lg.DestroyQSL(qsl) lg.DestroySUT(sut)
def main(): global so global last_timeing global last_loaded global result_timeing args = get_args() log.info(args) # find backend backend = get_backend(args.backend) # --count applies to accuracy mode only and can be used to limit the number of images # for testing. For perf model we always limit count to 200. count_override = False count = args.count if count: count_override = True """ Python signature go_initialize(backend, model_path, dataset_path, count, use_gpu, gpu_id, trace_level, max_batchsize) """ count, err = go_initialize(backend, args.model_path, args.dataset_path, count, args.use_gpu, args.gpu_id, args.trace_level, args.max_batchsize) if (err != 'nil'): print(err) raise RuntimeError('initialization in go failed') mlperf_conf = os.path.abspath(args.mlperf_conf) if not os.path.exists(mlperf_conf): log.error("{} not found".format(mlperf_conf)) sys.exit(1) user_conf = os.path.abspath(args.user_conf) if not os.path.exists(user_conf): log.error("{} not found".format(user_conf)) sys.exit(1) log_dir = None if args.log_dir: log_dir = os.path.abspath(args.log_dir) os.makedirs(log_dir, exist_ok=True) scenario = SCENARIO_MAP[args.scenario] def issue_queries(query_samples): global so global last_timeing global result_timeing idx = np.array([q.index for q in query_samples]).astype(np.int32) query_id = [q.id for q in query_samples] if args.dataset == 'brats2019': start = time.time() response_array_refs = [] response = [] for i, qid in enumerate(query_id): processed_results = so.IssueQuery(1, idx[i][np.newaxis]) processed_results = json.loads( processed_results.decode('utf-8')) response_array = array.array( "B", np.array(processed_results[0], np.float16).tobytes()) response_array_refs.append(response_array) bi = response_array.buffer_info() response.append(lg.QuerySampleResponse(qid, bi[0], bi[1])) result_timeing.append(time.time() - start) lg.QuerySamplesComplete(response) else: start = time.time() processed_results = so.IssueQuery(len(idx), idx) result_timeing.append(time.time() - start) processed_results = json.loads(processed_results.decode('utf-8')) response_array_refs = [] response = [] for idx, qid in enumerate(query_id): response_array = array.array( "B", np.array(processed_results[idx], np.float32).tobytes()) response_array_refs.append(response_array) bi = response_array.buffer_info() response.append(lg.QuerySampleResponse(qid, bi[0], bi[1])) lg.QuerySamplesComplete(response) def flush_queries(): pass def process_latencies(latencies_ns): # called by loadgen to show us the recorded latencies global last_timeing last_timeing = [t / NANO_SEC for t in latencies_ns] def load_query_samples(sample_list): global so global last_loaded err = go_load_query_samples(sample_list, so) last_loaded = time.time() if (err != ''): print(err) raise RuntimeError('load query samples failed') def unload_query_samples(sample_list): global so err = go_unload_query_samples(sample_list, so) if (err != ''): print(err) raise RuntimeError('unload query samples failed') settings = lg.TestSettings() if args.model_name != "": settings.FromConfig(mlperf_conf, args.model_name, args.scenario) settings.FromConfig(user_conf, args.model_name, args.scenario) settings.scenario = scenario settings.mode = lg.TestMode.PerformanceOnly if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly if args.find_peak_performance: settings.mode = lg.TestMode.FindPeakPerformance if args.time: # override the time we want to run settings.min_duration_ms = args.time * MILLI_SEC settings.max_duration_ms = args.time * MILLI_SEC if args.qps: qps = float(args.qps) settings.server_target_qps = qps settings.offline_expected_qps = qps if count_override: settings.min_query_count = count settings.max_query_count = count if args.samples_per_query: settings.multi_stream_samples_per_query = args.samples_per_query if args.max_latency: settings.server_target_latency_ns = int(args.max_latency * NANO_SEC) settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC) sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(count, min(count, 500), load_query_samples, unload_query_samples) log.info("starting {}".format(scenario)) log_path = os.path.realpath(args.log_dir) log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = log_path log_output_settings.copy_summary_to_stdout = True log_settings = lg.LogSettings() log_settings.log_output = log_output_settings # log_settings.enable_trace = True # lg.StartTest(sut, qsl, settings) lg.StartTestWithLogSettings(sut, qsl, settings, log_settings) if not last_timeing: last_timeing = result_timeing if args.accuracy: accuracy_script_paths = { 'coco': os.path.realpath( '../inference/vision/classification_and_detection/tools/accuracy-coco.py' ), 'imagenet': os.path.realpath( '../inference/vision/classification_and_detection/tools/accuracy-imagenet.py' ), 'squad': os.path.realpath('../inference/language/bert/accuracy-squad.py'), 'brats2019': os.path.realpath( '../inference/vision/medical_imaging/3d-unet/accuracy-brats.py' ), } accuracy_script_path = accuracy_script_paths[args.dataset] accuracy_file_path = os.path.join(log_dir, 'mlperf_log_accuracy.json') data_dir = os.environ['DATA_DIR'] if args.dataset == 'coco': if args.use_inv_map: subprocess.check_call( 'python3 {} --mlperf-accuracy-file {} --coco-dir {} --use-inv-map' .format(accuracy_script_path, accuracy_file_path, data_dir), shell=True) else: subprocess.check_call( 'python3 {} --mlperf-accuracy-file {} --coco-dir {}'. format(accuracy_script_path, accuracy_file_path, data_dir), shell=True) elif args.dataset == 'imagenet': # imagenet subprocess.check_call( 'python3 {} --mlperf-accuracy-file {} --imagenet-val-file {}'. format(accuracy_script_path, accuracy_file_path, os.path.join(data_dir, 'val_map.txt')), shell=True) elif args.dataset == 'squad': # squad vocab_path = os.path.join(data_dir, 'vocab.txt') val_path = os.path.join(data_dir, 'dev-v1.1.json') out_path = os.path.join(log_dir, 'predictions.json') cache_path = os.path.join(data_dir, 'eval_features.pickle') subprocess.check_call( 'python3 {} --vocab_file {} --val_data {} --log_file {} --out_file {} --features_cache_file {} --max_examples {}' .format(accuracy_script_path, vocab_path, val_path, accuracy_file_path, out_path, cache_path, count), shell=True) elif args.dataset == 'brats2019': # brats2019 base_dir = os.path.realpath( '../inference/vision/medical_imaging/3d-unet/build') post_dir = os.path.join(base_dir, 'postprocessed_data') label_dir = os.path.join( base_dir, 'raw_data/nnUNet_raw_data/Task043_BraTS2019/labelsTr') os.makedirs(post_dir, exist_ok=True) subprocess.check_call( 'python3 {} --log_file {} --preprocessed_data_dir {} --postprocessed_data_dir {} --label_data_dir {}' .format(accuracy_script_path, accuracy_file_path, data_dir, post_dir, label_dir), shell=True) else: raise RuntimeError('Dataset not Implemented.') lg.DestroyQSL(qsl) lg.DestroySUT(sut) """ Python signature go_finalize(so) """ err = go_finalize(so) if (err != ''): print(err) raise RuntimeError('finialize in go failed')
def main(): args = get_args() if args.backend == "pytorch": from pytorch_SUT import PytorchSUT sut = PytorchSUT(args.pytorch_config_toml, args.pytorch_checkpoint, args.dataset_dir, args.manifest, args.perf_count) model = sut.greedy_decoder._model else: raise ValueError("Unknown backend: {:}".format(args.backend)) settings = lg.TestSettings() settings.scenario = scenario_map[args.scenario] settings.FromConfig(args.mlperf_conf, "rnnt", args.scenario) settings.FromConfig(args.user_conf, "rnnt", args.scenario) if args.accuracy: settings.mode = lg.TestMode.AccuracyOnly else: settings.mode = lg.TestMode.PerformanceOnly log_path = args.log_dir os.makedirs(log_path, exist_ok=True) log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = log_path log_output_settings.copy_summary_to_stdout = True log_settings = lg.LogSettings() log_settings.log_output = log_output_settings pattern = [ 'accuracy=\d+.\d+', 'samples_per_query : \d+', 'Samples per second: \d+.\d+' ] def eval_func(model): print("Running Loadgen test...") sut.greedy_decoder._model = model lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) if args.accuracy: cmd = f"python3 accuracy_eval.py --log_dir {log_path} \ --dataset_dir {args.dataset_dir} --manifest {args.manifest}" out = subprocess.check_output(cmd, shell=True) out = out.decode() regex_accu = re.compile(pattern[0]) accu = float(regex_accu.findall(out)[0].split('=')[1]) return accu return 0 def perf_func(model): print("Running Loadgen test...") sut.greedy_decoder._model = model lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings) if not args.accuracy: file_path = os.path.join(log_path, 'mlperf_log_summary.txt') f = open(file_path, 'r', encoding='UTF-8') file_content = f.read() f.close() regex_batch = re.compile(pattern[1]) regex_thro = re.compile(pattern[2]) samples_per_query = int( regex_batch.findall(file_content)[0].split(': ')[1]) samples_per_second = float( regex_thro.findall(file_content)[0].split(': ')[1]) print('Batch size = %d' % samples_per_query) print('Latency: %.3f ms' % ((1 / samples_per_second) * 1000)) print('Throughput: %.3f samples/sec' % samples_per_second) if args.tune: # Dynamic Quantization with LPOT from lpot.experimental import Quantization, common quantizer = Quantization("./conf.yaml") quantizer.model = common.Model(model) quantizer.eval_func = eval_func q_model = quantizer() q_model.save(args.tuned_checkpoint) if args.benchmark: if args.int8: from lpot.utils.pytorch import load new_model = load( os.path.abspath(os.path.expanduser(args.tuned_checkpoint)), model) else: new_model = model perf_func(new_model) print("Done!", flush=True)
def main(): global num_ins global num_cpus global in_queue_cnt global out_queue_cnt global batching global bs_step args = get_args() log.info(args) scenario = args.scenario accuracy_mode = args.accuracy perf_count = args.perf_count batch_size = args.batch_size num_ins = args.num_instance num_cpus = args.num_phy_cpus batching = args.batching ## TODO, remove log.info('Run with {} instance on {} cpus: '.format(num_ins, num_cpus)) # Establish communication queues lock = multiprocessing.Lock() init_counter = multiprocessing.Value("i", 0) calibrate_counter = multiprocessing.Value("i", 0) out_queue = multiprocessing.Queue() in_queue = MultiprocessShapeBasedQueue() if args.perf_calibrate: with open('prof_new.py', 'w') as f: print('prof_bs_step = {}'.format(bs_step), file=f) print('prof_map = {', file=f) # Start consumers consumers = [ Consumer(in_queue, out_queue, lock, init_counter, calibrate_counter, i, num_ins, args) for i in range(num_ins) ] for c in consumers: c.start() # used by constructQSL data_set = BERTDataSet(args.vocab, args.perf_count) issue_queue = InQueue(in_queue, batch_size, data_set) # Wait until all sub-processors ready to do calibration block_until(calibrate_counter, num_ins) # Wait until all sub-processors done calibration block_until(calibrate_counter, 2 * num_ins) if args.perf_calibrate: with open('prof_new.py', 'a') as f: print('}', file=f) sys.exit(0) # Wait until all sub-processors are ready block_until(init_counter, num_ins) # Start response thread response_worker = threading.Thread(target=response_loadgen, args=(out_queue, )) response_worker.daemon = True response_worker.start() # Start loadgen settings = lg.TestSettings() settings.scenario = scenario_map[scenario] settings.FromConfig(args.mlperf_conf, "bert", scenario) settings.FromConfig(args.user_conf, "bert", scenario) settings.mode = lg.TestMode.AccuracyOnly if accuracy_mode else lg.TestMode.PerformanceOnly # TODO, for debug, remove #settings.server_target_qps = 40 #settings.server_target_latency_ns = 100000000 #settings.min_query_count = 100 #settings.min_duration_ms = 10000 def issue_queries(query_samples): # It's called by loadgen to send query to SUT issue_queue.put(query_samples) sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) qsl = lg.ConstructQSL(data_set.count, data_set.perf_count, load_query_samples, unload_query_samples) log_path = "build/logs" if not os.path.exists(log_path): os.makedirs(log_path) log_output_settings = lg.LogOutputSettings() log_output_settings.outdir = log_path log_output_settings.copy_summary_to_stdout = True log_settings = lg.LogSettings() log_settings.log_output = log_output_settings #lg.StartTest(sut, qsl, settings) lg.StartTestWithLogSettings(sut, qsl, settings, log_settings) # Wait until outQueue done while out_queue_cnt < in_queue_cnt: time.sleep(0.2) in_queue.join() for i in range(num_ins): in_queue.put(None) for c in consumers: c.join() out_queue.put(None) if accuracy_mode: cmd = "python accuracy-squad.py --log_file={}/mlperf_log_accuracy.json".format( log_path) subprocess.check_call(cmd, shell=True) lg.DestroyQSL(qsl) lg.DestroySUT(sut)