def main(): args = parse_args() onnx_filepath = args.onnx_filepath batch_size = args.batch_size inputs = generate_random_inputs(onnx_filepath, batch_size) input_names = get_input_names(onnx_filepath) output_names = get_output_names(onnx_filepath) inputs_dict = {name: value for name, value in zip(input_names, inputs)} # ONNXRuntime inference print("Executing model with ONNXRuntime...") sess_options = onnxruntime.SessionOptions() with override_onnx_batch_size(onnx_filepath, batch_size) as override_onnx_filepath: ort_network = onnxruntime.InferenceSession(override_onnx_filepath, sess_options) ort_outputs = ort_network.run(output_names, inputs_dict) # DeepSparse Engine inference print("Executing model with DeepSparse Engine...") dse_network = compile_model(onnx_filepath, batch_size=batch_size) dse_outputs = dse_network(inputs) verify_outputs(dse_outputs, ort_outputs) print("DeepSparse Engine output matches ONNXRuntime output")
def main(): args = parse_args() model = fetch_model(args.model_name) batch_size = args.batch_size num_cores = args.num_cores # Gather batch of data batch = model.sample_batch(batch_size=batch_size) batched_inputs = batch["inputs"] batched_outputs = batch["outputs"] # Compile model for inference print("Compiling {} model with DeepSparse Engine".format(model.architecture_id)) engine = compile_model(model, batch_size, num_cores) print(engine) # INFERENCE # Record output from inference through the DeepSparse Engine print("Executing...") predicted_outputs = engine(batched_inputs) # Compare against reference model output verify_outputs(predicted_outputs, batched_outputs) # BENCHMARK # Record output from executing through the DeepSparse engine print("Benchmarking...") results = engine.benchmark(batched_inputs) print(results)
def test_benchmark(self, model: Model, batch_size: int): """ Test the Engine.benchmark() interface """ m = model() batch = m.sample_batch(batch_size=batch_size) inputs = batch["inputs"] outputs = batch["outputs"] engine = compile_model(m, batch_size) results = engine.benchmark(inputs, include_outputs=True) for output in results.outputs: verify_outputs(output, outputs)
def main(): args = parse_args() onnx_filepath = args.onnx_filepath batch_size = args.batch_size num_cores = args.num_cores num_iterations = args.num_iterations num_warmup_iterations = args.num_warmup_iterations inputs = generate_random_inputs(onnx_filepath, batch_size) input_names = get_input_names(onnx_filepath) output_names = get_output_names(onnx_filepath) inputs_dict = {name: value for name, value in zip(input_names, inputs)} # Benchmark ONNXRuntime print("Benchmarking model with ONNXRuntime...") sess_options = onnxruntime.SessionOptions() sess_options.intra_op_num_threads = num_cores with override_onnx_batch_size(onnx_filepath, batch_size) as override_onnx_filepath: ort_network = onnxruntime.InferenceSession(override_onnx_filepath, sess_options) ort_results = BenchmarkResults() for i in range(num_warmup_iterations): ort_network.run(output_names, inputs_dict) for i in range(num_iterations): start = time.time() output = ort_network.run(output_names, inputs_dict) end = time.time() ort_results.append_batch( time_start=start, time_end=end, batch_size=batch_size, outputs=output ) # Benchmark DeepSparse Engine print("Benchmarking model with DeepSparse Engine...") dse_network = compile_model(onnx_filepath, batch_size, num_cores) dse_results = dse_network.benchmark( inputs, num_iterations, num_warmup_iterations, include_outputs=True ) for dse_output, ort_output in zip(dse_results.outputs, ort_results.outputs): verify_outputs(dse_output, ort_output) print("ONNXRuntime", ort_results) print() print("DeepSparse Engine", dse_results)
def test_engine(self, model: Model, batch_size: int): """ Test the Engine.inference interfaces """ m = model() batch = m.sample_batch(batch_size=batch_size) inputs = batch["inputs"] outputs = batch["outputs"] print("compile model") engine = compile_model(m, batch_size) print("engine callable") pred_outputs = engine(inputs) verify_outputs(pred_outputs, outputs) print("engine run") pred_outputs = engine.run(inputs) verify_outputs(pred_outputs, outputs) print("engine mapped_run") pred_outputs = engine.mapped_run(inputs) assert len(pred_outputs) == len(outputs) print("engine timed_run") pred_outputs, elapsed = engine.timed_run(inputs) verify_outputs(pred_outputs, outputs)
def main(): args = parse_args() model = fetch_model(args.model_name) batch_size = args.batch_size num_cores = args.num_cores # Gather batch of data batch = model.sample_batch(batch_size=batch_size) batched_inputs = batch["inputs"] batched_outputs = batch["outputs"] # Compile model for inference print("Compiling {} model with DeepSparse Engine".format( model.architecture_id)) engine = compile_model(model, batch_size, num_cores) print(engine) # INFERENCE # Record output from inference through the DeepSparse Engine print("Executing...") predicted_outputs = engine(batched_inputs) # Compare against reference model output verify_outputs(predicted_outputs, batched_outputs) if "labels" in batch: batched_labels = batch["labels"] # Measure accuracy against ground truth labels accuracy = calculate_top1_accuracy(predicted_outputs[-1], batched_labels[0]) print("Top-1 Accuracy for batch size {}: {:.2f}%".format( batch_size, accuracy)) # BENCHMARK # Record output from executing through the DeepSparse engine print("Benchmarking...") results = engine.benchmark(batched_inputs) print(results)