def main(): args = parse_args() onnx_filepath = args.onnx_filepath batch_size = args.batch_size inputs = generate_random_inputs(onnx_filepath, batch_size) input_names = get_input_names(onnx_filepath) output_names = get_output_names(onnx_filepath) inputs_dict = {name: value for name, value in zip(input_names, inputs)} # ONNXRuntime inference print("Executing model with ONNXRuntime...") sess_options = onnxruntime.SessionOptions() with override_onnx_batch_size(onnx_filepath, batch_size) as override_onnx_filepath: ort_network = onnxruntime.InferenceSession(override_onnx_filepath, sess_options) ort_outputs = ort_network.run(output_names, inputs_dict) # DeepSparse Engine inference print("Executing model with DeepSparse Engine...") dse_network = compile_model(onnx_filepath, batch_size=batch_size) dse_outputs = dse_network(inputs) verify_outputs(dse_outputs, ort_outputs) print("DeepSparse Engine output matches ONNXRuntime output")
def main(): args = parse_args() onnx_filepath = args.onnx_filepath batch_size = args.batch_size num_cores = args.num_cores num_iterations = args.num_iterations num_warmup_iterations = args.num_warmup_iterations inputs = generate_random_inputs(onnx_filepath, batch_size) input_names = get_input_names(onnx_filepath) output_names = get_output_names(onnx_filepath) inputs_dict = {name: value for name, value in zip(input_names, inputs)} # Benchmark ONNXRuntime print("Benchmarking model with ONNXRuntime...") sess_options = onnxruntime.SessionOptions() sess_options.intra_op_num_threads = num_cores with override_onnx_batch_size(onnx_filepath, batch_size) as override_onnx_filepath: ort_network = onnxruntime.InferenceSession(override_onnx_filepath, sess_options) ort_results = BenchmarkResults() for i in range(num_warmup_iterations): ort_network.run(output_names, inputs_dict) for i in range(num_iterations): start = time.time() output = ort_network.run(output_names, inputs_dict) end = time.time() ort_results.append_batch( time_start=start, time_end=end, batch_size=batch_size, outputs=output ) # Benchmark DeepSparse Engine print("Benchmarking model with DeepSparse Engine...") dse_network = compile_model(onnx_filepath, batch_size, num_cores) dse_results = dse_network.benchmark( inputs, num_iterations, num_warmup_iterations, include_outputs=True ) for dse_output, ort_output in zip(dse_results.outputs, ort_results.outputs): verify_outputs(dse_output, ort_output) print("ONNXRuntime", ort_results) print() print("DeepSparse Engine", dse_results)
def __init__( self, model: Union[str, Model, File], batch_size: int, num_cores: Union[None, int], input_shapes: List[List[int]] = None, ): _validate_ort_import() self._model_path = model_to_path(model) self._batch_size = _validate_batch_size(batch_size) self._num_cores = num_cores self._input_shapes = input_shapes self._input_names = get_input_names(self._model_path) self._output_names = get_output_names(self._model_path) sess_options = onnxruntime.SessionOptions() sess_options.log_severity_level = 3 if num_cores is not None: sess_options.intra_op_num_threads = num_cores # TODO (michael): Unfortunately we are stacking overrides here, this can be # cleaned up once we pass the loaded ONNX around and not paths if self._input_shapes: with override_onnx_input_shapes( self._model_path, self._input_shapes ) as input_override_model_path: with override_onnx_batch_size( input_override_model_path, batch_size ) as batch_override_model_path: self._eng_net = onnxruntime.InferenceSession( batch_override_model_path, sess_options ) else: with override_onnx_batch_size( self._model_path, batch_size ) as batch_override_model_path: self._eng_net = onnxruntime.InferenceSession( batch_override_model_path, sess_options )