예제 #1
0
def main():
    args = get_args()

    if args.backend == "pytorch":
        assert not args.quantized, "Quantized model is only supported by onnxruntime backend!"
        assert not args.profile, "Profiling is only supported by onnxruntime backend!"
        from pytorch_SUT import get_pytorch_sut
        sut = get_pytorch_sut()
    elif args.backend == "tf":
        assert not args.quantized, "Quantized model is only supported by onnxruntime backend!"
        assert not args.profile, "Profiling is only supported by onnxruntime backend!"
        from tf_SUT import get_tf_sut
        sut = get_tf_sut()
    elif args.backend == "tf_estimator":
        assert not args.quantized, "Quantized model is only supported by onnxruntime backend!"
        assert not args.profile, "Profiling is only supported by onnxruntime backend!"
        from tf_estimator_SUT import get_tf_estimator_sut
        sut = get_tf_estimator_sut()
    elif args.backend == "onnxruntime":
        from onnxruntime_SUT import get_onnxruntime_sut
        sut = get_onnxruntime_sut(args)
    else:
        raise ValueError("Unknown backend: {:}".format(args.backend))

    settings = lg.TestSettings()
    settings.scenario = scenario_map[args.scenario]
    settings.FromConfig(args.mlperf_conf, "bert", args.scenario)
    settings.FromConfig(args.user_conf, "bert", args.scenario)

    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    else:
        settings.mode = lg.TestMode.PerformanceOnly

    log_path = "build/logs"
    if not os.path.exists(log_path):
        os.makedirs(log_path)
    log_output_settings = lg.LogOutputSettings()
    log_output_settings.outdir = log_path
    log_output_settings.copy_summary_to_stdout = True
    log_settings = lg.LogSettings()
    log_settings.log_output = log_output_settings

    print("Running LoadGen test...")
    lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings)

    if args.accuracy:
        cmd = "python3 accuracy-squad.py"
        subprocess.check_call(cmd, shell=True)

    print("Done!")

    print("Destroying SUT...")
    lg.DestroySUT(sut.sut)

    print("Destroying QSL...")
    lg.DestroyQSL(sut.qsl.qsl)
예제 #2
0
def main():
    args = get_args()

    batch_size = args.offline_batch_size if args.scenario == "Offline" else 1

    settings = lg.TestSettings()
    settings.scenario = scenario_map[args.scenario]
    settings.FromConfig(args.mlperf_conf, "rnnt", args.scenario)
    settings.FromConfig(args.user_conf, "rnnt", args.scenario)

    issued_query_count = None
    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
        issued_query_count = 2513
    else:
        settings.mode = lg.TestMode.PerformanceOnly
        issued_query_count = settings.min_query_count

    log_path = args.log_dir
    os.makedirs(log_path, exist_ok=True)
    log_output_settings = lg.LogOutputSettings()
    log_output_settings.outdir = log_path
    log_output_settings.copy_summary_to_stdout = True
    log_settings = lg.LogSettings()
    log_settings.log_output = log_output_settings

    if args.backend == "pytorch":
        from pytorch_SUT import PytorchSUT
        sut = PytorchSUT(args.pytorch_config_toml, args.pytorch_checkpoint,
                         args.dataset_dir, args.manifest, args.perf_count,
                         issued_query_count, args.scenario, args.machine_conf,
                         batch_size, args.cores_for_loadgen,
                         args.cores_per_instance, args.debug, args.cosim,
                         args.profile, args.ipex, args.bf16, args.warmup)
    else:
        raise ValueError("Unknown backend: {:}".format(args.backend))

    print("Running Loadgen test...")
    lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings)

    if args.accuracy:
        cmd = f"python3 accuracy_eval.py --log_dir {log_path} --dataset_dir {args.dataset_dir} --manifest {args.manifest}"
        print(f"Running accuracy script: {cmd}")
        subprocess.check_call(cmd, shell=True)

    lg.DestroySUT(sut.sut)

    print("Done!")
예제 #3
0
def main():
    args = get_args()

    if args.backend == "pytorch":
        from pytorch_SUT import get_pytorch_sut
        sut = get_pytorch_sut(args.model_dir, args.preprocessed_data_dir,
                              args.performance_count)
    elif args.backend == "onnxruntime":
        from onnxruntime_SUT import get_onnxruntime_sut
        sut = get_onnxruntime_sut(args.onnx_model, args.preprocessed_data_dir,
                                  args.performance_count)
    else:
        raise ValueError("Unknown backend: {:}".format(args.backend))

    settings = lg.TestSettings()
    settings.scenario = scenario_map[args.scenario]
    settings.FromConfig(args.mlperf_conf, "3d-unet", args.scenario)
    settings.FromConfig(args.user_conf, "3d-unet", args.scenario)

    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    else:
        settings.mode = lg.TestMode.PerformanceOnly

    log_path = "build/logs"
    if not os.path.exists(log_path):
        os.makedirs(log_path)
    log_output_settings = lg.LogOutputSettings()
    log_output_settings.outdir = log_path
    log_output_settings.copy_summary_to_stdout = True
    log_settings = lg.LogSettings()
    log_settings.log_output = log_output_settings

    print("Running Loadgen test...")
    lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings)

    if args.accuracy:
        print("Running accuracy script...")
        cmd = "python3 brats_eval.py"
        subprocess.check_call(cmd, shell=True)

    print("Done!")

    print("Destroying SUT...")
    lg.DestroySUT(sut.sut)

    print("Destroying QSL...")
    lg.DestroyQSL(sut.qsl.qsl)
예제 #4
0
    def __init__(self, session, ds, optimization_config, onnx_output_names):

        self.session = session
        self.threads = optimization_config.threads_num
        self.max_batchsize = optimization_config.dynamic_batching_size
        self.ds = ds
        self.onnx_output_names = onnx_output_names
        self.guess = None

        self.cv = threading.Condition()
        self.done = False
        self.q_idx = []
        self.q_query_id = []
        self.workers = []

        self.settings = lg.TestSettings()
        self.settings.scenario = lg.TestScenario.Server
        self.settings.mode = lg.TestMode.FindPeakPerformance

        log_output_settings = lg.LogOutputSettings()
        log_output_settings.outdir = optimization_config.result_path
        log_output_settings.copy_summary_to_stdout = False
        self.log_settings = lg.LogSettings()
        self.log_settings.enable_trace = False
        self.log_settings.log_output = log_output_settings

        self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries, self.process_latencies)
        self.qsl = lg.ConstructQSL(QUERY_COUNT, QUERY_COUNT, ds.load_query_samples, ds.unload_query_samples)

        self.settings.server_coalesce_queries = True
        self.settings.server_target_latency_ns = int(optimization_config.max_latency_ms * NANO_SEC / MILLI_SEC)
        self.settings.server_target_latency_percentile = optimization_config.max_latency_percentile
        self.settings.min_duration_ms = optimization_config.min_duration_sec * MILLI_SEC

        # start all threads
        for _ in range(self.threads):
            worker = threading.Thread(target=self.handle_tasks, args=(self.cv,))
            worker.daemon = True
            self.workers.append(worker)
            worker.start()
        time.sleep(1)
예제 #5
0
파일: run.py 프로젝트: xz10620/inference
def main():
    args = get_args()

    if args.backend == "pytorch":
        from pytorch_SUT import PytorchSUT
        sut = PytorchSUT(args.pytorch_config_toml, args.pytorch_checkpoint,
                         args.dataset_dir, args.manifest, args.perf_count)
    else:
        raise ValueError("Unknown backend: {:}".format(args.backend))

    settings = lg.TestSettings()
    settings.scenario = scenario_map[args.scenario]
    settings.FromConfig(args.mlperf_conf, "rnnt", args.scenario)
    settings.FromConfig(args.user_conf, "rnnt", args.scenario)

    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    else:
        settings.mode = lg.TestMode.PerformanceOnly

    log_path = args.log_dir
    os.makedirs(log_path, exist_ok=True)
    log_output_settings = lg.LogOutputSettings()
    log_output_settings.outdir = log_path
    log_output_settings.copy_summary_to_stdout = True
    log_settings = lg.LogSettings()
    log_settings.log_output = log_output_settings

    print("Running Loadgen test...")
    lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings)

    if args.accuracy:
        cmd = f"python3 accuracy_eval.py --log_dir {log_path} --dataset_dir {args.dataset_dir} --manifest {args.manifest}"
        print(f"Running accuracy script: {cmd}")
        subprocess.check_call(cmd, shell=True)

    print("Done!")
예제 #6
0
파일: run.py 프로젝트: mlcommons/inference
def main():
    """
    Runs 3D UNet performing KiTS19 Kidney Tumore Segmentation task as below:

    1. instantiate SUT and QSL for the chosen backend
    2. configure LoadGen for the chosen scenario
    3. configure MLPerf logger
    4. start LoadGen
    5. collect logs and if needed evaluate inference results
    6. clean up
    """
    # scenarios in LoadGen
    scenario_map = {
        "SingleStream": lg.TestScenario.SingleStream,
        "Offline": lg.TestScenario.Offline,
        "Server": lg.TestScenario.Server,
        "MultiStream": lg.TestScenario.MultiStream
    }

    args = get_args()

    # instantiate SUT as per requested backend; QSL is also instantiated
    if args.backend == "pytorch":
        from pytorch_SUT import get_sut
    elif args.backend == "pytorch_checkpoint":
        from pytorch_checkpoint_SUT import get_sut
    elif args.backend == "onnxruntime":
        from onnxruntime_SUT import get_sut
    elif args.backend == "tensorflow":
        from tensorflow_SUT import get_sut
    else:
        raise ValueError("Unknown backend: {:}".format(args.backend))
    sut = get_sut(args.model, args.preprocessed_data_dir,
                  args.performance_count)

    # setup LoadGen
    settings = lg.TestSettings()
    settings.scenario = scenario_map[args.scenario]
    settings.FromConfig(args.mlperf_conf, "3d-unet", args.scenario)
    settings.FromConfig(args.user_conf, "3d-unet", args.scenario)
    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    else:
        settings.mode = lg.TestMode.PerformanceOnly

    # set up mlperf logger
    log_path = Path("build", "logs").absolute()
    log_path.mkdir(parents=True, exist_ok=True)
    log_output_settings = lg.LogOutputSettings()
    log_output_settings.outdir = str(log_path)
    log_output_settings.copy_summary_to_stdout = True
    log_settings = lg.LogSettings()
    log_settings.log_output = log_output_settings

    # start running test, from LoadGen
    print("Running Loadgen test...")
    lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings)

    # if needed check accuracy
    if args.accuracy:
        print("Checking accuracy...")
        cmd = "python3 accuracy_kits.py"
        subprocess.check_call(cmd, shell=True)

    # all done
    print("Done!")

    # cleanup
    print("Destroying SUT...")
    lg.DestroySUT(sut.sut)
    print("Destroying QSL...")
    lg.DestroyQSL(sut.qsl.qsl)
예제 #7
0
def eval_func(model):
    args = get_args()

    if args.backend == "pytorch":
        from pytorch_SUT import get_pytorch_sut
        sut = get_pytorch_sut(model, args.preprocessed_data_dir,
                              args.performance_count)
    elif args.backend == "onnxruntime":
        from onnxruntime_SUT import get_onnxruntime_sut
        sut = get_onnxruntime_sut(args.model, args.preprocessed_data_dir,
                                  args.performance_count)
    elif args.backend == "tf":
        from tf_SUT import get_tf_sut
        sut = get_tf_sut(args.model, args.preprocessed_data_dir,
                         args.performance_count)
    elif args.backend == "ov":
        from ov_SUT import get_ov_sut
        sut = get_ov_sut(args.model, args.preprocessed_data_dir,
                         args.performance_count)
    else:
        raise ValueError("Unknown backend: {:}".format(args.backend))

    settings = lg.TestSettings()
    settings.scenario = scenario_map[args.scenario]
    settings.FromConfig(args.mlperf_conf, "3d-unet", args.scenario)
    settings.FromConfig(args.user_conf, "3d-unet", args.scenario)

    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    else:
        settings.mode = lg.TestMode.PerformanceOnly

    log_path = "build/logs"
    if not os.path.exists(log_path):
        os.makedirs(log_path)
    log_output_settings = lg.LogOutputSettings()
    log_output_settings.outdir = log_path
    log_output_settings.copy_summary_to_stdout = True
    log_settings = lg.LogSettings()
    log_settings.log_output = log_output_settings

    print("Running Loadgen test...")
    if args.benchmark:
        start = time.time()
    lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings, log_settings)
    if args.benchmark:
        end = time.time()

    if args.accuracy:
        print("Running accuracy script...")
        process = subprocess.Popen(['python3', 'accuracy-brats.py'],
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE)
        out, err = process.communicate()

        print(out)
        print("Done!", float(err))

        if args.benchmark:
            print('Batch size = 1')
            print('Latency: %.3f ms' % ((end - start) * 1000 / sut.qsl.count))
            print('Throughput: %.3f images/sec' % (sut.qsl.count /
                                                   (end - start)))
            print('Accuracy: {mean:.5f}'.format(mean=float(err)))

    print("Destroying SUT...")
    lg.DestroySUT(sut.sut)

    print("Destroying QSL...")
    lg.DestroyQSL(sut.qsl.qsl)
    return float(err)
예제 #8
0
파일: main.py 프로젝트: prime91/inference
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format(
    )

    # --count applies to accuracy mode only and can be used to limit the number of images
    # for testing. For perf model we always limit count to 200.
    count_override = False
    count = args.count
    if count:
        count_override = True

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[
        args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=count,
                        **kwargs)
    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    mlperf_conf = os.path.abspath(args.mlperf_conf)
    if not os.path.exists(mlperf_conf):
        log.error("{} not found".format(mlperf_conf))
        sys.exit(1)

    user_conf = os.path.abspath(args.user_conf)
    if not os.path.exists(user_conf):
        log.error("{} not found".format(user_conf))
        sys.exit(1)

    if args.output:
        output_dir = os.path.abspath(args.output)
        os.makedirs(output_dir, exist_ok=True)
        os.chdir(output_dir)

    #
    # make one pass over the dataset to validate accuracy
    #
    count = ds.get_item_count()

    # warmup
    ds.load_query_samples([0])
    for _ in range(5):
        img, _ = ds.get_samples([0])
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    scenario = SCENARIO_MAP[args.scenario]
    runner_map = {
        lg.TestScenario.SingleStream: RunnerBase,
        lg.TestScenario.MultiStream: QueueRunner,
        lg.TestScenario.Server: QueueRunner,
        lg.TestScenario.Offline: QueueRunner
    }
    runner = runner_map[scenario](model,
                                  ds,
                                  args.threads,
                                  post_proc=post_proc,
                                  max_batchsize=args.max_batchsize)

    def issue_queries(query_samples):
        runner.enqueue(query_samples)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    log_output_settings = lg.LogOutputSettings()
    log_output_settings.outdir = output_dir
    log_output_settings.copy_summary_to_stdout = False
    log_settings = lg.LogSettings()
    log_settings.enable_trace = args.debug
    log_settings.log_output = log_output_settings

    settings = lg.TestSettings()
    settings.FromConfig(mlperf_conf, args.model_name, args.scenario)
    settings.FromConfig(user_conf, args.model_name, args.scenario)
    settings.scenario = scenario
    settings.mode = lg.TestMode.PerformanceOnly
    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.time:
        # override the time we want to run
        settings.min_duration_ms = args.time * MILLI_SEC
        settings.max_duration_ms = args.time * MILLI_SEC

    if args.qps:
        qps = float(args.qps)
        settings.server_target_qps = qps
        settings.offline_expected_qps = qps

    if count_override:
        settings.min_query_count = count
        settings.max_query_count = count

    if args.samples_per_query:
        settings.multi_stream_samples_per_query = args.samples_per_query
    if args.max_latency:
        settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
        settings.multi_stream_target_latency_ns = int(args.max_latency *
                                                      NANO_SEC)

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(count, min(count, 500), ds.load_query_samples,
                          ds.unload_query_samples)

    log.info("starting {}".format(scenario))
    result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
    runner.start_run(result_dict, args.accuracy)

    lg.StartTestWithLogSettings(sut, qsl, settings, log_settings)

    if not last_timeing:
        last_timeing = runner.result_timing
    if args.accuracy:
        post_proc.finalize(result_dict, ds, output_dir=args.output)

    add_results(final_results, "{}".format(scenario), result_dict,
                last_timeing,
                time.time() - ds.last_loaded, args.accuracy)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)

    #
    # write final results
    #
    if args.output:
        with open("results.json", "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
def main():
    global num_ins
    global num_cpus
    global in_queue_cnt
    global out_queue_cnt
    global batching
    global queries_so_far
    global Latencies

    queries_so_far = 0

    args = get_args()
    log.info(args)
    scenario = args.scenario
    accuracy_mode = args.accuracy
    perf_count = args.perf_count
    batch_size = args.batch_size
    num_ins = args.num_instance
    num_cpus = args.num_phy_cpus
    batching = args.batching

    # Read Loadgen and workload config parameters
    settings = lg.TestSettings()
    settings.scenario = scenario_map[scenario]
    settings.FromConfig(args.mlperf_conf, "bert", scenario)
    settings.FromConfig(args.user_conf, "bert", scenario)
    settings.mode = lg.TestMode.AccuracyOnly if accuracy_mode else lg.TestMode.PerformanceOnly

    # Establish communication queues
    lock = multiprocessing.Lock()
    init_counter = multiprocessing.Value("i", 0)
    calibrate_counter = multiprocessing.Value("i", 0)
    out_queue = multiprocessing.Queue()

    # Create consumers
    consumers = []
    if scenario == "Server":
        from parse_server_config import configParser

        buckets = configParser("machine_conf.json")
        cutoffs = list(buckets.keys())
        batch_sizes = {}

        in_queue = {j: multiprocessing.JoinableQueue() for j in buckets}
        proc_idx = 0
        num_cpus = 0
        total_ins = 0
        for cutoff in list(buckets.keys()):
            batch_sizes[cutoff] = buckets[cutoff]["batch_size"]
            num_ins = buckets[cutoff]["instances"]
            cpus_per_instance = buckets[cutoff]["cpus_per_instance"]
            num_cpus = num_ins * cpus_per_instance
            total_ins += num_ins

            for j in range(num_ins):
                consumer = Consumer(in_queue[cutoff], out_queue, lock,
                                    init_counter, calibrate_counter, proc_idx,
                                    num_ins, args, cutoff)
                consumer.start_core_idx = proc_idx
                consumer.end_core_idx = proc_idx + cpus_per_instance - 1
                consumers.append(consumer)
                proc_idx = consumer.end_core_idx + 1

        num_ins = total_ins

    else:
        total_ins = num_ins
        in_queue = MultiprocessShapeBasedQueue()
        consumers = [
            Consumer(in_queue, out_queue, lock, init_counter,
                     calibrate_counter, i, num_ins, args)
            for i in range(num_ins)
        ]

    for c in consumers:
        c.start()

    # Dataset object used by constructQSL
    data_set = BERTDataSet(args.vocab, args.perf_count)
    if scenario == "Server":
        issue_queue = InQueueServer(in_queue, batch_sizes, data_set,
                                    settings.min_query_count)
    else:
        issue_queue = InQueue(in_queue, batch_size, data_set)

    # Wait until all sub-processors are ready
    block_until(init_counter, total_ins, 2)

    # Start response thread
    response_worker = threading.Thread(target=response_loadgen,
                                       args=(out_queue, ))
    response_worker.daemon = True
    response_worker.start()

    def issue_queries(query_samples):
        # It's called by loadgen to send query to SUT
        issue_queue.put(query_samples)

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(data_set.count, data_set.perf_count,
                          load_query_samples, unload_query_samples)

    log_path = "build/logs"
    if not os.path.exists(log_path):
        os.makedirs(log_path)
    log_output_settings = lg.LogOutputSettings()
    log_output_settings.outdir = log_path
    log_output_settings.copy_summary_to_stdout = True
    log_settings = lg.LogSettings()
    log_settings.log_output = log_output_settings

    lg.StartTestWithLogSettings(sut, qsl, settings, log_settings)

    # Wait until outQueue done
    while out_queue_cnt < in_queue_cnt:
        time.sleep(0.2)

    if scenario == "Server":
        for i in in_queue:
            in_queue[i].join()
            for j in range(buckets[i]["cpus_per_instance"]):
                in_queue[i].put(None)
    else:
        for i in range(num_ins):
            in_queue.put(None)

    for c in consumers:
        c.join()
    out_queue.put(None)

    if accuracy_mode:
        cmd = "python accuracy-squad.py --log_file={}/mlperf_log_accuracy.json".format(
            log_path)
        subprocess.check_call(cmd, shell=True)

    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)
예제 #10
0
def main():

    global so
    global last_timeing
    global last_loaded
    global result_timeing

    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)

    # --count applies to accuracy mode only and can be used to limit the number of images
    # for testing. For perf model we always limit count to 200.
    count_override = False
    count = args.count
    if count:
        count_override = True
    """
    Python signature
    go_initialize(backend, model_path, dataset_path, count, use_gpu, gpu_id, trace_level, max_batchsize)
    """

    count, err = go_initialize(backend, args.model_path, args.dataset_path,
                               count, args.use_gpu, args.gpu_id,
                               args.trace_level, args.max_batchsize)

    if (err != 'nil'):
        print(err)
        raise RuntimeError('initialization in go failed')

    mlperf_conf = os.path.abspath(args.mlperf_conf)
    if not os.path.exists(mlperf_conf):
        log.error("{} not found".format(mlperf_conf))
        sys.exit(1)

    user_conf = os.path.abspath(args.user_conf)
    if not os.path.exists(user_conf):
        log.error("{} not found".format(user_conf))
        sys.exit(1)

    log_dir = None

    if args.log_dir:
        log_dir = os.path.abspath(args.log_dir)
        os.makedirs(log_dir, exist_ok=True)

    scenario = SCENARIO_MAP[args.scenario]

    def issue_queries(query_samples):
        global so
        global last_timeing
        global result_timeing
        idx = np.array([q.index for q in query_samples]).astype(np.int32)
        query_id = [q.id for q in query_samples]
        if args.dataset == 'brats2019':
            start = time.time()
            response_array_refs = []
            response = []
            for i, qid in enumerate(query_id):
                processed_results = so.IssueQuery(1, idx[i][np.newaxis])
                processed_results = json.loads(
                    processed_results.decode('utf-8'))
                response_array = array.array(
                    "B",
                    np.array(processed_results[0], np.float16).tobytes())
                response_array_refs.append(response_array)
                bi = response_array.buffer_info()
                response.append(lg.QuerySampleResponse(qid, bi[0], bi[1]))
            result_timeing.append(time.time() - start)
            lg.QuerySamplesComplete(response)
        else:
            start = time.time()
            processed_results = so.IssueQuery(len(idx), idx)
            result_timeing.append(time.time() - start)
            processed_results = json.loads(processed_results.decode('utf-8'))
            response_array_refs = []
            response = []
            for idx, qid in enumerate(query_id):
                response_array = array.array(
                    "B",
                    np.array(processed_results[idx], np.float32).tobytes())
                response_array_refs.append(response_array)
                bi = response_array.buffer_info()
                response.append(lg.QuerySampleResponse(qid, bi[0], bi[1]))
            lg.QuerySamplesComplete(response)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    def load_query_samples(sample_list):
        global so
        global last_loaded
        err = go_load_query_samples(sample_list, so)
        last_loaded = time.time()
        if (err != ''):
            print(err)
            raise RuntimeError('load query samples failed')

    def unload_query_samples(sample_list):
        global so
        err = go_unload_query_samples(sample_list, so)
        if (err != ''):
            print(err)
            raise RuntimeError('unload query samples failed')

    settings = lg.TestSettings()
    if args.model_name != "":
        settings.FromConfig(mlperf_conf, args.model_name, args.scenario)
        settings.FromConfig(user_conf, args.model_name, args.scenario)
    settings.scenario = scenario
    settings.mode = lg.TestMode.PerformanceOnly
    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.time:
        # override the time we want to run
        settings.min_duration_ms = args.time * MILLI_SEC
        settings.max_duration_ms = args.time * MILLI_SEC

    if args.qps:
        qps = float(args.qps)
        settings.server_target_qps = qps
        settings.offline_expected_qps = qps

    if count_override:
        settings.min_query_count = count
        settings.max_query_count = count

    if args.samples_per_query:
        settings.multi_stream_samples_per_query = args.samples_per_query
    if args.max_latency:
        settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
        settings.multi_stream_target_latency_ns = int(args.max_latency *
                                                      NANO_SEC)

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(count, min(count, 500), load_query_samples,
                          unload_query_samples)

    log.info("starting {}".format(scenario))

    log_path = os.path.realpath(args.log_dir)
    log_output_settings = lg.LogOutputSettings()
    log_output_settings.outdir = log_path
    log_output_settings.copy_summary_to_stdout = True
    log_settings = lg.LogSettings()
    log_settings.log_output = log_output_settings
    # log_settings.enable_trace = True
    # lg.StartTest(sut, qsl, settings)
    lg.StartTestWithLogSettings(sut, qsl, settings, log_settings)

    if not last_timeing:
        last_timeing = result_timeing

    if args.accuracy:
        accuracy_script_paths = {
            'coco':
            os.path.realpath(
                '../inference/vision/classification_and_detection/tools/accuracy-coco.py'
            ),
            'imagenet':
            os.path.realpath(
                '../inference/vision/classification_and_detection/tools/accuracy-imagenet.py'
            ),
            'squad':
            os.path.realpath('../inference/language/bert/accuracy-squad.py'),
            'brats2019':
            os.path.realpath(
                '../inference/vision/medical_imaging/3d-unet/accuracy-brats.py'
            ),
        }
        accuracy_script_path = accuracy_script_paths[args.dataset]
        accuracy_file_path = os.path.join(log_dir, 'mlperf_log_accuracy.json')
        data_dir = os.environ['DATA_DIR']
        if args.dataset == 'coco':
            if args.use_inv_map:
                subprocess.check_call(
                    'python3 {} --mlperf-accuracy-file {} --coco-dir {} --use-inv-map'
                    .format(accuracy_script_path, accuracy_file_path,
                            data_dir),
                    shell=True)
            else:
                subprocess.check_call(
                    'python3 {} --mlperf-accuracy-file {} --coco-dir {}'.
                    format(accuracy_script_path, accuracy_file_path, data_dir),
                    shell=True)
        elif args.dataset == 'imagenet':  # imagenet
            subprocess.check_call(
                'python3 {} --mlperf-accuracy-file {} --imagenet-val-file {}'.
                format(accuracy_script_path, accuracy_file_path,
                       os.path.join(data_dir, 'val_map.txt')),
                shell=True)
        elif args.dataset == 'squad':  # squad
            vocab_path = os.path.join(data_dir, 'vocab.txt')
            val_path = os.path.join(data_dir, 'dev-v1.1.json')
            out_path = os.path.join(log_dir, 'predictions.json')
            cache_path = os.path.join(data_dir, 'eval_features.pickle')
            subprocess.check_call(
                'python3 {} --vocab_file {} --val_data {} --log_file {} --out_file {} --features_cache_file {} --max_examples {}'
                .format(accuracy_script_path, vocab_path, val_path,
                        accuracy_file_path, out_path, cache_path, count),
                shell=True)
        elif args.dataset == 'brats2019':  # brats2019
            base_dir = os.path.realpath(
                '../inference/vision/medical_imaging/3d-unet/build')
            post_dir = os.path.join(base_dir, 'postprocessed_data')
            label_dir = os.path.join(
                base_dir,
                'raw_data/nnUNet_raw_data/Task043_BraTS2019/labelsTr')
            os.makedirs(post_dir, exist_ok=True)
            subprocess.check_call(
                'python3 {} --log_file {} --preprocessed_data_dir {} --postprocessed_data_dir {} --label_data_dir {}'
                .format(accuracy_script_path, accuracy_file_path, data_dir,
                        post_dir, label_dir),
                shell=True)
        else:
            raise RuntimeError('Dataset not Implemented.')

    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)
    """
    Python signature
    go_finalize(so)
    """
    err = go_finalize(so)
    if (err != ''):
        print(err)
        raise RuntimeError('finialize in go failed')
예제 #11
0
def main():
    args = get_args()

    if args.backend == "pytorch":
        from pytorch_SUT import PytorchSUT
        sut = PytorchSUT(args.pytorch_config_toml, args.pytorch_checkpoint,
                         args.dataset_dir, args.manifest, args.perf_count)
        model = sut.greedy_decoder._model
    else:
        raise ValueError("Unknown backend: {:}".format(args.backend))

    settings = lg.TestSettings()
    settings.scenario = scenario_map[args.scenario]
    settings.FromConfig(args.mlperf_conf, "rnnt", args.scenario)
    settings.FromConfig(args.user_conf, "rnnt", args.scenario)

    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    else:
        settings.mode = lg.TestMode.PerformanceOnly

    log_path = args.log_dir
    os.makedirs(log_path, exist_ok=True)
    log_output_settings = lg.LogOutputSettings()
    log_output_settings.outdir = log_path
    log_output_settings.copy_summary_to_stdout = True
    log_settings = lg.LogSettings()
    log_settings.log_output = log_output_settings

    pattern = [
        'accuracy=\d+.\d+', 'samples_per_query : \d+',
        'Samples per second: \d+.\d+'
    ]

    def eval_func(model):
        print("Running Loadgen test...")
        sut.greedy_decoder._model = model
        lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings,
                                    log_settings)
        if args.accuracy:
            cmd = f"python3 accuracy_eval.py --log_dir {log_path} \
               --dataset_dir {args.dataset_dir} --manifest {args.manifest}"

            out = subprocess.check_output(cmd, shell=True)
            out = out.decode()
            regex_accu = re.compile(pattern[0])
            accu = float(regex_accu.findall(out)[0].split('=')[1])
            return accu
        return 0

    def perf_func(model):
        print("Running Loadgen test...")
        sut.greedy_decoder._model = model
        lg.StartTestWithLogSettings(sut.sut, sut.qsl.qsl, settings,
                                    log_settings)
        if not args.accuracy:
            file_path = os.path.join(log_path, 'mlperf_log_summary.txt')
            f = open(file_path, 'r', encoding='UTF-8')
            file_content = f.read()
            f.close()
            regex_batch = re.compile(pattern[1])
            regex_thro = re.compile(pattern[2])
            samples_per_query = int(
                regex_batch.findall(file_content)[0].split(': ')[1])
            samples_per_second = float(
                regex_thro.findall(file_content)[0].split(': ')[1])
            print('Batch size = %d' % samples_per_query)
            print('Latency: %.3f ms' % ((1 / samples_per_second) * 1000))
            print('Throughput: %.3f samples/sec' % samples_per_second)

    if args.tune:
        # Dynamic Quantization with LPOT
        from lpot.experimental import Quantization, common
        quantizer = Quantization("./conf.yaml")
        quantizer.model = common.Model(model)
        quantizer.eval_func = eval_func
        q_model = quantizer()
        q_model.save(args.tuned_checkpoint)

    if args.benchmark:
        if args.int8:
            from lpot.utils.pytorch import load
            new_model = load(
                os.path.abspath(os.path.expanduser(args.tuned_checkpoint)),
                model)
        else:
            new_model = model
        perf_func(new_model)

    print("Done!", flush=True)
예제 #12
0
def main():
    global num_ins
    global num_cpus
    global in_queue_cnt
    global out_queue_cnt
    global batching
    global bs_step

    args = get_args()
    log.info(args)
    scenario = args.scenario
    accuracy_mode = args.accuracy
    perf_count = args.perf_count
    batch_size = args.batch_size
    num_ins = args.num_instance
    num_cpus = args.num_phy_cpus
    batching = args.batching

    ## TODO, remove
    log.info('Run with {} instance on {} cpus: '.format(num_ins, num_cpus))

    # Establish communication queues
    lock = multiprocessing.Lock()
    init_counter = multiprocessing.Value("i", 0)
    calibrate_counter = multiprocessing.Value("i", 0)
    out_queue = multiprocessing.Queue()
    in_queue = MultiprocessShapeBasedQueue()

    if args.perf_calibrate:
        with open('prof_new.py', 'w') as f:
            print('prof_bs_step = {}'.format(bs_step), file=f)
            print('prof_map = {', file=f)

    # Start consumers
    consumers = [
        Consumer(in_queue, out_queue, lock, init_counter, calibrate_counter, i,
                 num_ins, args) for i in range(num_ins)
    ]
    for c in consumers:
        c.start()

    # used by constructQSL
    data_set = BERTDataSet(args.vocab, args.perf_count)
    issue_queue = InQueue(in_queue, batch_size, data_set)

    # Wait until all sub-processors ready to do calibration
    block_until(calibrate_counter, num_ins)
    # Wait until all sub-processors done calibration
    block_until(calibrate_counter, 2 * num_ins)
    if args.perf_calibrate:
        with open('prof_new.py', 'a') as f:
            print('}', file=f)
        sys.exit(0)
    # Wait until all sub-processors are ready
    block_until(init_counter, num_ins)

    # Start response thread
    response_worker = threading.Thread(target=response_loadgen,
                                       args=(out_queue, ))
    response_worker.daemon = True
    response_worker.start()

    # Start loadgen
    settings = lg.TestSettings()
    settings.scenario = scenario_map[scenario]
    settings.FromConfig(args.mlperf_conf, "bert", scenario)
    settings.FromConfig(args.user_conf, "bert", scenario)
    settings.mode = lg.TestMode.AccuracyOnly if accuracy_mode else lg.TestMode.PerformanceOnly

    # TODO, for debug, remove
    #settings.server_target_qps = 40
    #settings.server_target_latency_ns = 100000000
    #settings.min_query_count = 100
    #settings.min_duration_ms = 10000

    def issue_queries(query_samples):
        # It's called by loadgen to send query to SUT
        issue_queue.put(query_samples)

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(data_set.count, data_set.perf_count,
                          load_query_samples, unload_query_samples)

    log_path = "build/logs"
    if not os.path.exists(log_path):
        os.makedirs(log_path)
    log_output_settings = lg.LogOutputSettings()
    log_output_settings.outdir = log_path
    log_output_settings.copy_summary_to_stdout = True
    log_settings = lg.LogSettings()
    log_settings.log_output = log_output_settings

    #lg.StartTest(sut, qsl, settings)
    lg.StartTestWithLogSettings(sut, qsl, settings, log_settings)

    # Wait until outQueue done
    while out_queue_cnt < in_queue_cnt:
        time.sleep(0.2)

    in_queue.join()
    for i in range(num_ins):
        in_queue.put(None)
    for c in consumers:
        c.join()
    out_queue.put(None)

    if accuracy_mode:
        cmd = "python accuracy-squad.py --log_file={}/mlperf_log_accuracy.json".format(
            log_path)
        subprocess.check_call(cmd, shell=True)

    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)