Example #1
0
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format()

    # --count applies to accuracy mode only and can be used to limit the number of images
    # for testing. For perf model we always limit count to 200.
    count_override = False
    count = args.count
    if count:
        count_override = True

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=count, **kwargs)
    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    mlperf_conf = os.path.abspath(args.mlperf_conf)
    if not os.path.exists(mlperf_conf):
        log.error("{} not found".format(mlperf_conf))
        sys.exit(1)

    user_conf = os.path.abspath(args.user_conf)
    if not os.path.exists(user_conf):
        log.error("{} not found".format(user_conf))
        sys.exit(1)

    if args.output:
        output_dir = os.path.abspath(args.output)
        os.makedirs(output_dir, exist_ok=True)
        os.chdir(output_dir)

    #
    # make one pass over the dataset to validate accuracy
    #
    count = ds.get_item_count()

    # warmup
    ds.load_query_samples([0])
    for _ in range(5):
        img, _ = ds.get_samples([0])
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    scenario = SCENARIO_MAP[args.scenario]
    runner_map = {
        lg.TestScenario.SingleStream: RunnerBase,
        lg.TestScenario.MultiStream: QueueRunner,
        lg.TestScenario.Server: QueueRunner,
        lg.TestScenario.Offline: QueueRunner
    }
    runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc, max_batchsize=args.max_batchsize)

    def issue_queries(query_samples):
        runner.enqueue(query_samples)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    log_output_settings = lg.LogOutputSettings()
    log_output_settings.outdir = output_dir
    log_output_settings.copy_summary_to_stdout = False
    log_settings = lg.LogSettings()
    log_settings.enable_trace = args.debug
    log_settings.log_output = log_output_settings

    settings = lg.TestSettings()
    settings.FromConfig(mlperf_conf, args.model_name, args.scenario)
    settings.FromConfig(user_conf, args.model_name, args.scenario)
    settings.scenario = scenario
    settings.mode = lg.TestMode.PerformanceOnly
    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.time:
        # override the time we want to run
        settings.min_duration_ms = args.time * MILLI_SEC
        settings.max_duration_ms = args.time * MILLI_SEC

    if args.qps:
        qps = float(args.qps)
        settings.server_target_qps = qps
        settings.offline_expected_qps = qps

    if count_override:
        settings.min_query_count = count
        settings.max_query_count = count

    if args.samples_per_query:
        settings.multi_stream_samples_per_query = args.samples_per_query
    if args.max_latency:
        settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
        settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC)

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(count, min(count, 500), ds.load_query_samples, ds.unload_query_samples)

    log.info("starting {}".format(scenario))
    result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
    runner.start_run(result_dict, args.accuracy)

    lg.StartTestWithLogSettings(sut, qsl, settings, log_settings)

    if not last_timeing:
        last_timeing = runner.result_timing
    if args.accuracy:
        post_proc.finalize(result_dict, ds, output_dir=args.output)

    add_results(final_results, "{}".format(scenario),
                result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)

    #
    # write final results
    #
    if args.output:
        with open("results.json", "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
Example #2
0
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend, args.dataset, args.max_ind_range,
                          args.data_sub_sample_rate, args.use_gpu)

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[
        args.dataset]

    # --count-samples can be used to limit the number of samples used for testing
    ds = wanted_dataset(
        data_path=args.dataset_path,
        name=args.dataset,
        pre_process=pre_proc,  # currently an identity function
        use_cache=args.cache,  # currently not used
        count=args.count_samples,
        samples_to_aggregate_fix=args.samples_to_aggregate_fix,
        samples_to_aggregate_min=args.samples_to_aggregate_min,
        samples_to_aggregate_max=args.samples_to_aggregate_max,
        samples_to_aggregate_quantile_file=args.
        samples_to_aggregate_quantile_file,
        samples_to_aggregate_trace_file=args.samples_to_aggregate_trace_file,
        test_num_workers=args.test_num_workers,
        max_ind_range=args.max_ind_range,
        sub_sample_rate=args.data_sub_sample_rate,
        mlperf_bin_loader=args.mlperf_bin_loader,
        **kwargs)
    # load model to backend
    model = backend.load(args.model_path,
                         inputs=args.inputs,
                         outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    config = os.path.abspath(args.config)
    if not os.path.exists(config):
        log.error("{} not found".format(config))
        sys.exit(1)

    if args.output:
        output_dir = os.path.abspath(args.output)
        os.makedirs(output_dir, exist_ok=True)
        os.chdir(output_dir)

    #
    # make one pass over the dataset to validate accuracy
    #
    count = ds.get_item_count()
    # warmup
    ds.load_query_samples([0])

    for _ in range(5):
        batch_dense_X, batch_lS_o, batch_lS_i, _, _ = ds.get_samples([0])
        _ = backend.predict(batch_dense_X, batch_lS_o, batch_lS_i)

    ds.unload_query_samples(None)

    scenario = SCENARIO_MAP[args.scenario]
    runner_map = {
        lg.TestScenario.SingleStream: RunnerBase,
        lg.TestScenario.MultiStream: QueueRunner,
        lg.TestScenario.Server: QueueRunner,
        lg.TestScenario.Offline: QueueRunner
    }

    runner = runner_map[scenario](model,
                                  ds,
                                  args.threads,
                                  post_proc=post_proc,
                                  max_batchsize=args.max_batchsize)

    def issue_queries(query_samples):
        runner.enqueue(query_samples)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    settings = lg.TestSettings()
    settings.FromConfig(config, args.model, args.scenario)
    settings.scenario = scenario
    settings.mode = lg.TestMode.PerformanceOnly

    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly

    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.duration:
        settings.min_duration_ms = args.duration
        settings.max_duration_ms = args.duration

    if args.target_qps:
        settings.server_target_qps = float(args.target_qps)
        settings.offline_expected_qps = float(args.target_qps)

    if args.count_queries:
        settings.min_query_count = args.count_queries
        settings.max_query_count = args.count_queries

    if args.samples_per_query_multistream:
        settings.multi_stream_samples_per_query = args.samples_per_query_multistream

    if args.max_latency:
        settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
        settings.multi_stream_target_latency_ns = int(args.max_latency *
                                                      NANO_SEC)

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(count, min(count, args.samples_per_query_offline),
                          ds.load_query_samples, ds.unload_query_samples)

    log.info("starting {}".format(scenario))
    result_dict = {
        "good": 0,
        "total": 0,
        "roc_auc": 0,
        "scenario": str(scenario)
    }
    runner.start_run(result_dict, args.accuracy)
    lg.StartTest(sut, qsl, settings)

    if not last_timeing:
        last_timeing = runner.result_timing
    if args.accuracy:
        post_proc.finalize(result_dict, ds, output_dir=args.output)
    add_results(final_results, "{}".format(scenario), result_dict,
                last_timeing,
                time.time() - ds.last_loaded, args.accuracy)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)

    #
    # write final results
    #
    if args.output:
        with open("results.json", "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
Example #3
0
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format()

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=args.count, **kwargs)
    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    #
    # make one pass over the dataset to validate accuracy
    #
    count = args.count if args.count else ds.get_item_count()

    if args.accuracy:
        #
        # accuracy pass
        #
        log.info("starting accuracy pass on {} items".format(count))
        last_timeing = []
        runner = RunnerBase(model, ds, args.threads, post_proc=post_proc)
        result_dict = {"good": 0, "total": 0, "scenario": "Accuracy"}
        runner.start_run(result_dict, True)
        start = time.time()
        for idx in range(0, count):
            ds.load_query_samples([idx])
            data, label = ds.get_samples([idx])
            start_one = time.time()
            runner.enqueue([idx], [idx], data, label)
            last_timeing.append(time.time() - start_one)
        runner.finish()
        # aggregate results
        post_proc.finalize(result_dict, ds, output_dir=os.path.dirname(args.output))
        add_results(final_results, "Accuracy", result_dict, last_timeing, time.time() - start)

    # warmup
    ds.load_query_samples([0])
    for _ in range(5):
        img, _ = ds.get_samples([0])
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    for scenario in args.scenario:
        runner_map = {
            lg.TestScenario.SingleStream: RunnerBase,
            lg.TestScenario.MultiStream: QueueRunner,
            lg.TestScenario.Server: QueueRunner,
            lg.TestScenario.Offline: QueueRunner
        }
        runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc)

        def issue_query(query_samples):
            # called by loadgen to issue queries
            idx = [q.index for q in query_samples]
            query_id = [q.id for q in query_samples]
            data, label = ds.get_samples(idx)
            runner.enqueue(query_id, idx, data, label)

        def process_latencies(latencies_ns):
            # called by loadgen to show us the recorded latencies
            global last_timeing
            last_timeing = [t / 1e9 for t in latencies_ns]

        settings = lg.TestSettings()
        settings.enable_spec_overrides = True
        settings.scenario = scenario
        settings.mode = lg.TestMode.PerformanceOnly
        settings.multi_stream_samples_per_query = 8

        if args.time:
            # override the time we want to run
            settings.enable_spec_overrides = True
            settings.override_min_duration_ms = args.time * MILLI_SEC
            settings.override_max_duration_ms = args.time * MILLI_SEC

        if args.qps:
            qps = float(args.qps)
            settings.server_target_qps = qps
            settings.offline_expected_qps = qps

        # mlperf rules - min queries
        if scenario == lg.TestScenario.SingleStream:
            settings.override_min_query_count = args.queries_single
            settings.override_max_query_count = args.queries_single
        else:
            settings.override_min_query_count = args.queries_multi
            settings.override_max_query_count = args.queries_multi

        sut = lg.ConstructSUT(issue_query, process_latencies)
        qsl = lg.ConstructQSL(count, min(count, 1000), ds.load_query_samples, ds.unload_query_samples)

        for target_latency in args.max_latency:
            log.info("starting {}, latency={}".format(scenario, target_latency))

            settings.single_stream_expected_latency_ns = int(target_latency * NANO_SEC)
            settings.override_target_latency_ns = int(target_latency * NANO_SEC)

            result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
            runner.start_run(result_dict, False)
            lg.StartTest(sut, qsl, settings)

            add_results(final_results, "{}-{}".format(scenario, target_latency),
                        result_dict, last_timeing, time.time() - ds.last_loaded)

        runner.finish()
        lg.DestroyQSL(qsl)
        lg.DestroySUT(sut)

    #
    # write final results
    #
    if args.output:
        with open(args.output, "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
Example #4
0
            .format(self.count, qitem.sample_id[0]))
        self.count += 1

        return self.count


if __name__ == "__main__":
    runner = DummyRunner()

    runner.start_worker()

    settings = mlperf_loadgen.TestSettings()
    settings.scenario = mlperf_loadgen.TestScenario.SingleStream
    settings.mode = mlperf_loadgen.TestMode.PerformanceOnly

    # Specify exactly how many queries need to be made
    settings.min_query_count = 3003
    settings.max_query_count = 3003

    total_queries = 256  # Maximum sample ID + 1
    perf_queries = 8  # TBD: Doesn't seem to have an effect

    sut = mlperf_loadgen.ConstructSUT(runner.enqueue, flush_queries,
                                      process_latencies)
    qsl = mlperf_loadgen.ConstructQSL(total_queries, perf_queries,
                                      runner.load_samples_to_ram,
                                      runner.unload_samples_from_ram)
    mlperf_loadgen.StartTest(sut, qsl, settings)
    mlperf_loadgen.DestroyQSL(qsl)
    mlperf_loadgen.DestroySUT(sut)
Example #5
0
def main():
    args = get_args()

    print(args)

    # find backend
    backend = get_backend(args.backend)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format(
    )

    # dataset to use
    wanted_dataset, preprocessor, postprocessor, kwargs = SUPPORTED_DATASETS[
        args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=preprocessor,
                        use_cache=args.cache,
                        count=args.count,
                        **kwargs)

    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)

    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    #
    # make one pass over the dataset to validate accuracy
    #
    count = args.count if args.count else ds.get_item_count()

    runner = Runner(model, ds, args.threads, post_process=postprocessor)
    runner.start_pool()

    # warmup
    log.info("warmup ...")
    ds.load_query_samples([0])
    for _ in range(100):
        img, _ = ds.get_samples([0])
        _ = backend.predict({backend.inputs[0]: img})

    def issue_query(query_samples):
        idx = [q.index for q in query_samples]
        query_id = [q.id for q in query_samples]
        data, label = ds.get_samples(idx)
        runner.enqueue(query_id, data, label)

    sut = lg.ConstructSUT(issue_query)
    qsl = lg.ConstructQSL(count, args.time, ds.load_query_samples,
                          ds.unload_query_samples)
    scenarios = [
        # lg.TestScenario.SingleStream,
        lg.TestScenario.MultiStream,
        # lg.TestScenario.Cloud,
        # lg.TestScenario.Offline,
    ]
    for scenario in scenarios:
        for target_latency in args.max_latency:
            log.info("starting {}, latency={}".format(scenario,
                                                      target_latency))
            settings = lg.TestSettings()
            settings.scenario = scenario
            settings.mode = lg.TestMode.SubmissionRun
            settings.samples_per_query = 4  # FIXME: we don't want to know about this
            settings.target_qps = 1000  # FIXME: we don't want to know about this
            settings.target_latency_ns = int(target_latency * 1000000000)

            result_list = []
            result_dict = {"good": 0, "total": 0}
            runner.start_run(result_list, result_dict)
            start = time.time()
            lg.StartTest(sut, qsl, settings)
            add_results(final_results, "{}-{}".format(scenario,
                                                      target_latency),
                        result_dict, result_list,
                        time.time() - start)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)

    #
    # write final results
    #
    if args.output:
        with open(args.output, "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
Example #6
0
def main():
    global num_ins
    global num_phy_cpus
    global in_queue_cnt
    global out_queue_cnt

    args = get_args()
    log.info(args)

    num_ins = args.num_instance
    num_phy_cpus = args.num_phy_cpus
    log.info('Run with {} instance on {} cpus'.format(num_ins, num_phy_cpus))

    mlperf_conf = os.path.abspath(args.mlperf_conf)
    if not os.path.exists(mlperf_conf):
        log.error("{} not found".format(mlperf_conf))
        sys.exit(1)

    user_conf = os.path.abspath(args.user_conf)
    if not os.path.exists(user_conf):
        log.error("{} not found".format(user_conf))
        sys.exit(1)

    image_format = 'NCHW'
    dataset = "imagenet"
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[dataset]

    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        cache_dir=args.cache_dir,
                        count=args.count,
                        use_int8=args.use_int8_dataset,
                        num_workers=num_phy_cpus,
                        **kwargs)

    # Establish communication queues
    log.info('Start comsumer queue and response thread')
    lock = multiprocessing.Lock()
    init_counter = multiprocessing.Value("i", 0)
    in_queue = multiprocessing.JoinableQueue()
    out_queue = multiprocessing.Queue()
    ds_queue = multiprocessing.Queue()

    # Start consumers
    consumers = [Consumer(in_queue, out_queue, ds_queue, lock, init_counter, i, args)
                 for i in range(num_ins)]
    for c in consumers:
        c.start()

    # Wait until all sub-processors are ready
    block_until(init_counter, num_ins, 2)

    # Start response thread
    response_worker = threading.Thread(
        target=response_loadgen, args=(out_queue,))
    response_worker.daemon = True
    response_worker.start()

    scenario = SCENARIO_MAP[args.scenario]
    runner = QueueRunner(in_queue, args.batch_size)

    def issue_queries(response_ids, query_sample_indexes):
        runner.put(response_ids, query_sample_indexes)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        log.info("Average latency: {}".format(np.mean(latencies_ns)))
        log.info("Median latency: {}".format(np.percentile(latencies_ns, 50)))
        log.info("90 percentile latency: {}".format(np.percentile(latencies_ns, 90)))

    def load_query_samples(sample_list):
        for _ in range(num_ins):
            ds_queue.put(sample_list)
        block_until(init_counter, 2 * num_ins, 2)

    def unload_query_samples(sample_list):
        pass

    settings = lg.TestSettings()
    settings.FromConfig(mlperf_conf, "resnet50", args.scenario)
    settings.FromConfig(user_conf, "resnet50", args.scenario)
    settings.scenario = scenario
    settings.mode = lg.TestMode.PerformanceOnly
    if args.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    if args.find_peak_performance:
        settings.mode = lg.TestMode.FindPeakPerformance

    if args.qps:
        qps = float(args.qps)
        settings.server_target_qps = qps
        settings.offline_expected_qps = qps

    count = ds.get_item_count()
    perf_count = 1024
    if args.accuracy:
        perf_count = count
    sut = lg.ConstructFastSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(count, perf_count, load_query_samples, unload_query_samples)

    log.info("starting {}".format(scenario))
    lg.StartTest(sut, qsl, settings)

    # Wait until outQueue done
    while out_queue_cnt < in_queue_cnt:
        time.sleep(0.2)

    in_queue.join()
    for i in range(num_ins):
        in_queue.put('DONE')
    for c in consumers:
        c.join()
    out_queue.put('DONE')

    if args.accuracy:
        output_file = 'accuracy.txt'
        if args.output_file:
            output_file = args.output_file
        cmd = "python tools/accuracy-imagenet.py " \
              "--mlperf-accuracy-file=mlperf_log_accuracy.json " \
              "--imagenet-val-file=val_map.txt --output-file={}".format(output_file)
        cmd = cmd.split(' ')
        subprocess.check_call(cmd)

    lg.DestroyQSL(qsl)
    lg.DestroyFastSUT(sut)

    log.info('Test done.')
Example #7
0
def main(argv):
    del argv

    global last_timeing

    if FLAGS.scenario == "Server":
        # Disable garbage collection for realtime performance.
        gc.disable()

    # define backend
    backend = BackendTensorflow()

    # override image format if given
    image_format = FLAGS.data_format if FLAGS.data_format else backend.image_format(
    )

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[
        FLAGS.dataset]
    ds = wanted_dataset(data_path=FLAGS.dataset_path,
                        image_list=FLAGS.dataset_list,
                        name=FLAGS.dataset,
                        image_format=image_format,
                        use_cache=FLAGS.cache,
                        count=FLAGS.count,
                        cache_dir=FLAGS.cache_dir,
                        annotation_file=FLAGS.annotation_file,
                        use_space_to_depth=FLAGS.use_space_to_depth)
    # load model to backend
    # TODO(wangtao): parse flags to params.
    params = dict(ssd_model.default_hparams().values())
    params["conv0_space_to_depth"] = FLAGS.use_space_to_depth
    params["use_bfloat16"] = FLAGS.use_bfloat16
    params["use_fused_bn"] = FLAGS.use_fused_bn

    masters = []
    tpu_names = FLAGS.tpu_name
    tpu_names = tpu_names.split(",")
    for tpu_name in tpu_names:
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)
        masters.append(tpu_cluster_resolver.get_master())

    #
    # make one pass over the dataset to validate accuracy
    #
    count = FLAGS.count if FLAGS.count else ds.get_item_count()

    #
    # warmup
    #
    log.info("warmup ...")

    batch_size = FLAGS.batch_size[0] if FLAGS.scenario == "Offline" else 1
    backend_lists = []
    for _ in range(len(tpu_names)):
        backend = BackendTensorflow()
        backend_lists.append(backend)
    runner = QueueRunner(backend_lists,
                         ds,
                         FLAGS.threads,
                         post_proc=post_proc,
                         max_batchsize=batch_size)

    runner.start_run({}, FLAGS.accuracy)

    def issue_queries(query_samples):
        for i in [1]:
            runner.enqueue(query_samples)

    def flush_queries():
        pass

    def process_latencies(latencies_ns):
        # called by loadgen to show us the recorded latencies
        global last_timeing
        last_timeing = [t / NANO_SEC for t in latencies_ns]

    tf.logging.info("starting {}, latency={}".format(FLAGS.scenario,
                                                     FLAGS.max_latency))
    settings = lg.TestSettings()
    tf.logging.info(FLAGS.scenario)
    settings.scenario = SCENARIO_MAP[FLAGS.scenario]
    settings.qsl_rng_seed = FLAGS.qsl_rng_seed
    settings.sample_index_rng_seed = FLAGS.sample_index_rng_seed
    settings.schedule_rng_seed = FLAGS.schedule_rng_seed

    if FLAGS.accuracy:
        settings.mode = lg.TestMode.AccuracyOnly
    else:
        settings.mode = lg.TestMode.PerformanceOnly

    if FLAGS.qps:
        qps = float(FLAGS.qps)
        settings.server_target_qps = qps
        settings.offline_expected_qps = qps

    if FLAGS.time:
        settings.min_duration_ms = FLAGS.time * MILLI_SEC
        settings.max_duration_ms = 0
        qps = FLAGS.qps or 100
        settings.min_query_count = qps * FLAGS.time
        settings.max_query_count = 0
    else:
        settings.min_query_count = 270336
        settings.max_query_count = 0

    target_latency_ns = int(float(FLAGS.max_latency) * NANO_SEC)
    settings.single_stream_expected_latency_ns = target_latency_ns
    settings.multi_stream_target_latency_ns = target_latency_ns
    settings.server_target_latency_ns = target_latency_ns

    log_settings = lg.LogSettings()
    log_settings.log_output.outdir = tempfile.mkdtemp()
    log_settings.log_output.copy_detail_to_stdout = True
    log_settings.log_output.copy_summary_to_stdout = True
    log_settings.enable_trace = False

    def load_query_samples(sample_list):
        """Load query samples and warmup the model."""
        ds.load_query_samples(sample_list)
        data = ds.get_image_list_inmemory()

        def init_fn(cloud_tpu_id):
            tf.logging.info("Load model for %dth cloud tpu", cloud_tpu_id)
            runner.models[cloud_tpu_id].load(
                FLAGS.model,
                FLAGS.output_model_dir,
                data,
                params,
                batch_size=FLAGS.batch_size,
                master=masters[cloud_tpu_id],
                scenario=FLAGS.scenario,
                batch_timeout_micros=FLAGS.batch_timeout_micros)

            # Init TPU.
            for it in range(FLAGS.init_iterations):
                tf.logging.info("Initialize cloud tpu at iteration %d", it)
                for batch_size in FLAGS.batch_size:
                    example, _ = ds.get_indices([sample_list[0]] * batch_size)
                    _ = runner.models[cloud_tpu_id].predict(example)

        threads = []
        for i in range(len(tpu_names)):
            thread = threading.Thread(target=init_fn, args=(i, ))
            threads.append(thread)
            thread.start()

        for thread in threads:
            thread.join()

    sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
    qsl = lg.ConstructQSL(count, min(count, 350), load_query_samples,
                          ds.unload_query_samples)

    lg.StartTestWithLogSettings(sut, qsl, settings, log_settings)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)

    tf.io.gfile.mkdir(FLAGS.outdir)

    for oldfile in tf.gfile.Glob(
            os.path.join(log_settings.log_output.outdir, "*")):
        basename = os.path.basename(oldfile)
        newfile = os.path.join(FLAGS.outdir, basename)
        tf.gfile.Copy(oldfile, newfile, overwrite=True)

    if FLAGS.accuracy:
        with tf.gfile.Open(os.path.join(FLAGS.outdir, "results.txt"),
                           "w") as f:
            results = {"mAP": accuracy_coco.main()}
            json.dump(results, f, sort_keys=True, indent=4)
Example #8
0
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format(
    )

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[
        args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=args.count,
                        **kwargs)
    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    #
    # make one pass over the dataset to validate accuracy
    #
    count = args.count if args.count else ds.get_item_count()

    runner = Runner(model, ds, args.threads, post_proc=post_proc)

    #
    # warmup
    #
    log.info("warmup ...")
    ds.load_query_samples([0])
    for _ in range(5):
        img, _ = ds.get_samples([0])
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    if args.accuracy:
        #
        # accuracy pass
        #
        log.info("starting accuracy pass on {} items".format(count))
        runner.start_pool(nolg=True)
        result_dict = {
            "good": 0,
            "total": 0,
            "scenario": "Accuracy",
            "timing": []
        }
        runner.start_run(result_dict, True)
        start = time.time()
        for idx in range(0, count):
            ds.load_query_samples([idx])
            data, label = ds.get_samples([idx])
            runner.enqueue([idx], [idx], data, label)
        runner.finish()
        # aggregate results
        post_proc.finalize(result_dict,
                           ds,
                           output_dir=os.path.dirname(args.output))
        last_timeing = result_dict["timing"]
        del result_dict["timing"]
        add_results(final_results, "Accuracy", result_dict, last_timeing,
                    time.time() - start)

    #
    # run the benchmark with timing
    #
    runner.start_pool()

    def issue_query(query_samples):
        idx = [q.index for q in query_samples]
        query_id = [q.id for q in query_samples]
        data, label = ds.get_samples(idx)
        runner.enqueue(query_id, idx, data, label)

    def process_latencies(latencies_ns):
        global last_timeing
        last_timeing = [t / 1e9 for t in latencies_ns]

    sut = lg.ConstructSUT(issue_query, process_latencies)
    qsl = lg.ConstructQSL(count, min(count, 1000), ds.load_query_samples,
                          ds.unload_query_samples)

    for scenario in args.scenario:
        for target_latency in args.max_latency:
            log.info("starting {}, latency={}".format(scenario,
                                                      target_latency))
            settings = lg.TestSettings()
            log.info(scenario)
            if str(scenario) == 'TestMode.AccuracyOnly':
                settings.mode = scenario
            else:
                settings.scenario = scenario

            if args.qps:
                settings.enable_spec_overrides = True
                qps = float(args.qps)
                settings.server_target_qps = qps
                settings.offline_expected_qps = qps

            if args.time:
                settings.enable_spec_overrides = True
                settings.override_min_duration_ms = args.time * MILLI_SEC
                settings.override_max_duration_ms = args.time * MILLI_SEC
                qps = args.qps or 100
                settings.override_min_query_count = qps * args.time
                settings.override_max_query_count = qps * args.time

            if args.time or args.qps and str(
                    scenario) != 'TestMode.AccuracyOnly':
                settings.mode = lg.TestMode.PerformanceOnly
            # FIXME: add SubmissionRun once available

            settings.enable_spec_overrides = True
            settings.single_stream_expected_latency_ns = int(target_latency *
                                                             NANO_SEC)
            settings.override_target_latency_ns = int(target_latency *
                                                      NANO_SEC)

            result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
            runner.start_run(result_dict, False)
            lg.StartTest(sut, qsl, settings)

            add_results(final_results, "{}-{}".format(scenario,
                                                      target_latency),
                        result_dict, last_timeing,
                        time.time() - ds.last_loaded)

    #
    # write final results
    #
    if args.output:
        with open(args.output, "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)

    runner.finish()
    lg.DestroyQSL(qsl)
    lg.DestroySUT(sut)
Example #9
0
def main():
    global last_timeing
    args = get_args()

    log.info(args)

    # find backend
    backend = get_backend(args.backend)

    # override image format if given
    image_format = args.data_format if args.data_format else backend.image_format()

    # --count applies to accuracy mode only and can be used to limit the number of images
    # for testing. For perf model we always limit count to 200.
    count = args.count
    if not count:
        if not args.accuracy:
            count = 200

    # dataset to use
    wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[args.dataset]
    ds = wanted_dataset(data_path=args.dataset_path,
                        image_list=args.dataset_list,
                        name=args.dataset,
                        image_format=image_format,
                        pre_process=pre_proc,
                        use_cache=args.cache,
                        count=count, **kwargs)
    # load model to backend
    model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs)
    final_results = {
        "runtime": model.name(),
        "version": model.version(),
        "time": int(time.time()),
        "cmdline": str(args),
    }

    #
    # make one pass over the dataset to validate accuracy
    #
    count = ds.get_item_count()

    # warmup
    ds.load_query_samples([0])
    for _ in range(5):
        img, _ = ds.get_samples([0])
        _ = backend.predict({backend.inputs[0]: img})
    ds.unload_query_samples(None)

    for scenario in args.scenario:
        runner_map = {
            lg.TestScenario.SingleStream: RunnerBase,
            lg.TestScenario.MultiStream: QueueRunner,
            lg.TestScenario.Server: QueueRunner,
            lg.TestScenario.Offline: QueueRunner
        }
        runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc, max_batchsize=args.max_batchsize)

        def issue_queries(query_samples):
            runner.enqueue(query_samples)

        def flush_queries(): pass

        def process_latencies(latencies_ns):
            # called by loadgen to show us the recorded latencies
            global last_timeing
            last_timeing = [t / NANO_SEC for t in latencies_ns]

        settings = lg.TestSettings()
        settings.scenario = scenario
        settings.mode = lg.TestMode.PerformanceOnly
        if args.accuracy:
            settings.mode = lg.TestMode.AccuracyOnly
        if args.find_peak_performance:
            settings.mode = lg.TestMode.FindPeakPerformance

        if args.time:
            # override the time we want to run
            settings.min_duration_ms = args.time * MILLI_SEC
            settings.max_duration_ms = args.time * MILLI_SEC

        if args.qps:
            qps = float(args.qps)
            settings.server_target_qps = qps
            settings.offline_expected_qps = qps

        if scenario == lg.TestScenario.SingleStream:
            settings.min_query_count = args.queries_single
            settings.max_query_count = args.queries_single
        elif scenario == lg.TestScenario.MultiStream:
            settings.min_query_count = args.queries_multi
            settings.max_query_count = args.queries_multi
            settings.multi_stream_samples_per_query = 4
        elif scenario == lg.TestScenario.Server:
            max_latency = args.max_latency
        elif scenario == lg.TestScenario.Offline:
            settings.min_query_count = args.queries_offline
            settings.max_query_count = args.queries_offline

        sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies)
        qsl = lg.ConstructQSL(count, min(count, 1000), ds.load_query_samples, ds.unload_query_samples)

        if scenario == lg.TestScenario.Server:
            for target_latency in max_latency:
                log.info("starting {}, latency={}".format(scenario, target_latency))
                settings.server_target_latency_ns = int(target_latency * NANO_SEC)

                result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
                runner.start_run(result_dict, args.accuracy)
                lg.StartTest(sut, qsl, settings)

                if not last_timeing:
                    last_timeing = runner.result_timing
                if args.accuracy:
                    post_proc.finalize(result_dict, ds, output_dir=os.path.dirname(args.output))
                add_results(final_results, "{}-{}".format(scenario, target_latency),
                            result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy)
        else:
            log.info("starting {}".format(scenario))
            result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
            runner.start_run(result_dict, args.accuracy)
            lg.StartTest(sut, qsl, settings)

            if not last_timeing:
                last_timeing = runner.result_timing
            if args.accuracy:
                post_proc.finalize(result_dict, ds, output_dir=os.path.dirname(args.output))
            add_results(final_results, "{}".format(scenario),
                        result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy)

        runner.finish()
        lg.DestroyQSL(qsl)
        lg.DestroySUT(sut)

    #
    # write final results
    #
    if args.output:
        with open(args.output, "w") as f:
            json.dump(final_results, f, sort_keys=True, indent=4)
Example #10
0
 def __del__(self):
     lg.DestroyQSL(self.qsl)
     print("Finished destroying QSL.")
Example #11
0
def main():
  global last_timeing
  args = get_args()

  log.info(args)

  backend = BackendTensorRT()

  ds = Imagenet(
      data_path=args.dataset_path,
      use_cache=args.cache,
      batch_size=args.batch_size,
      image_size=args.image_size,
      calib_file='cal_image_list_option_%d.txt' % args.calib_file)

  model = backend.load(args, ds=ds)

  final_results = {
      "runtime": model.name(),
      "version": model.version(),
      "time": int(time.time()),
      "cmdline": str(args),
  }

  config = os.path.abspath(args.config)
  assert(os.path.exists(config)), "%s not existed!" % config
  user_config = os.path.abspath(args.user_config)
  assert(os.path.exists(user_config)), "%s not existed!" % user_config

  base_path = os.path.dirname(os.path.realpath(__file__))
  if args.output:
    output_dir = os.path.abspath(args.output)
    os.makedirs(output_dir, exist_ok=True)
    os.chdir(output_dir)

  post_proc = PostProcessCommon(offset=0)
  runner = QueueRunner(
      model, ds, args.threads, post_proc=post_proc, batch_size=args.batch_size)

  def issue_queries(ids, indices):
    runner.enqueue(ids, indices)

  def flush_queries():
    pass

  def process_latencies(latencies_ns):
    global last_timeing
    last_timeing = [t / NANO_SEC for t in latencies_ns]

  settings = lg.TestSettings()
  model_name = 'OFAnet-AutoSinian'
  settings.FromConfig(config, model_name, args.scenario)
  settings.FromConfig(user_config, model_name, args.scenario)

  if args.audit_test:
    audit_config_path = base_path + '/audit%s.config' % args.audit_test
    settings.FromConfig(audit_config_path, model_name, args.scenario)

  scenario = SCENARIO_MAP[args.scenario]
  settings.scenario = scenario
  settings.mode = lg.TestMode.PerformanceOnly
  if args.accuracy:
    settings.mode = lg.TestMode.AccuracyOnly

  sut = lg.ConstructFastSUT(issue_queries, flush_queries, process_latencies)
  qsl = lg.ConstructQSL(ds.get_item_count(), args.batch_size,
                        ds.load_query_samples, ds.unload_query_samples)

  log.info("starting {}".format(scenario))
  result_dict = {"good": 0, "total": 0, "scenario": str(scenario)}
  runner.start_run(result_dict, args.accuracy)
  start = time.time()
  lg.StartTest(sut, qsl, settings)
  post_proc.finalize(result_dict)
  add_results(final_results, "{}".format(scenario), result_dict, last_timeing,
              runner.finishTime - ds.last_loaded, args)

  runner.finish()
  lg.DestroyQSL(qsl)
  lg.DestroyFastSUT(sut)

  if args.output:
    with open("results.json", "w") as f:
      json.dump(final_results, f, sort_keys=True, indent=2)
Example #12
0
def main(argv):
    del argv

    settings = mlperf_loadgen.TestSettings()
    settings.qsl_rng_seed = FLAGS.qsl_rng_seed
    settings.sample_index_rng_seed = FLAGS.sample_index_rng_seed
    settings.schedule_rng_seed = FLAGS.schedule_rng_seed
    if FLAGS.accuracy_mode:
        settings.mode = mlperf_loadgen.TestMode.AccuracyOnly
    else:
        settings.mode = mlperf_loadgen.TestMode.PerformanceOnly
    settings.scenario = SCENARIO_MAP[FLAGS.scenario]
    if FLAGS.qps:
        qps = float(FLAGS.qps)
        settings.server_target_qps = qps
        settings.offline_expected_qps = qps

    if FLAGS.scenario == "Offline" or FLAGS.scenario == "Server":
        masters = FLAGS.master
        masters = masters.split(",")
        if len(masters) < 1:
            masters = [FLAGS.master]

        runner = loadgen_gnmt.GNMTRunner(input_file=FLAGS.input_file,
                                         ckpt_path=FLAGS.ckpt_path,
                                         hparams_path=FLAGS.hparams_path,
                                         vocab_prefix=FLAGS.vocab_prefix,
                                         outdir=FLAGS.outdir,
                                         batch_size=FLAGS.batch_size,
                                         verbose=FLAGS.verbose,
                                         masters=masters,
                                         scenario=FLAGS.scenario)

        runner.load(FLAGS.batch_timeout_micros)

        # Specify exactly how many queries need to be made
        settings.min_query_count = FLAGS.qps * FLAGS.time
        settings.max_query_count = 0
        settings.min_duration_ms = 60 * MILLI_SEC
        settings.max_duration_ms = 0
        settings.server_target_latency_ns = int(0.25 * NANO_SEC)
        settings.server_target_latency_percentile = 0.97

    else:
        print("Invalid scenario selected")
        assert False

    # Create a thread in the GNMTRunner to start accepting work
    runner.start_worker()

    # Maximum sample ID + 1
    total_queries = FLAGS.query_count
    # Select the same subset of $perf_queries samples
    perf_queries = FLAGS.query_count

    sut = mlperf_loadgen.ConstructSUT(runner.enqueue, flush_queries,
                                      generic_loadgen.process_latencies)
    qsl = mlperf_loadgen.ConstructQSL(total_queries, perf_queries,
                                      runner.load_samples_to_ram,
                                      runner.unload_samples_from_ram)

    log_settings = mlperf_loadgen.LogSettings()
    log_settings.log_output.outdir = tempfile.mkdtemp()
    # Disable detail logs to prevent it from stepping on the summary
    # log in stdout on some systems.
    log_settings.log_output.copy_detail_to_stdout = False
    log_settings.log_output.copy_summary_to_stdout = True
    log_settings.enable_trace = False
    mlperf_loadgen.StartTestWithLogSettings(sut, qsl, settings, log_settings)

    runner.finish()
    mlperf_loadgen.DestroyQSL(qsl)
    mlperf_loadgen.DestroySUT(sut)

    for oldfile in tf.gfile.Glob(
            os.path.join(log_settings.log_output.outdir, "*")):
        basename = os.path.basename(oldfile)
        newfile = os.path.join(FLAGS.outdir, basename)
        tf.gfile.Copy(oldfile, newfile, overwrite=True)

    if FLAGS.accuracy_mode:
        log_accuracy = os.path.join(log_settings.log_output.outdir,
                                    "mlperf_log_accuracy.json")
        tf.gfile.Copy(FLAGS.reference, "/tmp/reference")
        bleu = process_accuracy.get_accuracy("/tmp/reference", log_accuracy)
        print("BLEU: %.2f" % (bleu * 100))  # pylint: disable=superfluous-parens