def testRunModelMultipleThreads(self): available_providers = onnxrt.get_available_providers() # Skip this test for a "pure" DML onnxruntime python wheel. We keep this test enabled for instances where both DML and CUDA # EPs are available (Windows GPU CI pipeline has this config) - this test will pass because CUDA has higher precendence than DML # and the nodes are assigned to only the CUDA EP (which supports this test) if ('DmlExecutionProvider' in available_providers and not 'CUDAExecutionProvider' in available_providers): print( "Skipping testRunModelMultipleThreads as the DML EP does not support calling Run() on different threads using the same session object " ) else: so = onnxrt.SessionOptions() so.log_verbosity_level = 1 so.logid = "MultiThreadsTest" sess = onnxrt.InferenceSession(get_name("mul_1.onnx"), sess_options=so) ro1 = onnxrt.RunOptions() ro1.logid = "thread1" t1 = threading.Thread(target=self.run_model, args=(sess, ro1)) ro2 = onnxrt.RunOptions() ro2.logid = "thread2" t2 = threading.Thread(target=self.run_model, args=(sess, ro2)) t1.start() t2.start() t1.join() t2.join()
def testRunModelMultipleThreads(self): so = onnxrt.SessionOptions() so.log_verbosity_level = 1 so.logid = "MultiThreadsTest" sess = onnxrt.InferenceSession(get_name("mul_1.onnx"), sess_options=so) ro1 = onnxrt.RunOptions() ro1.logid = "thread1" t1 = threading.Thread(target=self.run_model, args=(sess, ro1)) ro2 = onnxrt.RunOptions() ro2.logid = "thread2" t2 = threading.Thread(target=self.run_model, args=(sess, ro2)) t1.start() t2.start() t1.join() t2.join()
def Run(self, model_str: str, inputs_str: List[str]): model = onnx.ModelProto() model.ParseFromString(model_str) def deserialize_tp(tp_str): tp = onnx.TensorProto() tp.ParseFromString(tp_str) return tp input_tps = map(deserialize_tp, inputs_str) input_arrs = map(onnx.numpy_helper.to_array, input_tps) input_names = [x.name for x in model.graph.input] inputs = dict(zip(input_names, input_arrs)) sess_options = rt.SessionOptions() sess_options.graph_optimization_level = rt.GraphOptimizationLevel(0) sess_options.log_severity_level = 3 sess = rt.InferenceSession( model.SerializeToString(), sess_options=sess_options, providers=["CPUExecutionProvider"], ) output_names = [x.name for x in sess.get_outputs()] run_options = rt.RunOptions() run_options.log_severity_level = 3 output_arrs = sess.run(output_names, inputs, run_options=run_options) return [ onnx.numpy_helper.from_array(x).SerializeToString() for x in output_arrs ]
def testAllocationPlanWorksWithOnlyExecutePathToFetchesOption(self): """ (inp0) (inp1) | \/ | | /\ | Add Sub | | (tsor0) (tsor1) | | Neg Neg | | (outp0) (outp1) In this model, tsor0 and tsor1 has the same size. Allocation plan sets tsor1 to re-uses tsor0's memory. With run_options.only_execute_path_to_fetches == True and only to fetch outp1, the Add op is not executed. As a result tsor0 is not allocated through computation. It would fail to allocate tsor1 via re-use tsor0. This case is handled specifically in ExecutionFrame::AllocateAsPerAllocationPlan(). This test is to ensure that the case is covered. """ name = get_name("alloc_tensor_reuse.onnx") sess = onnxrt.InferenceSession(name) run_options = onnxrt.RunOptions() run_options.only_execute_path_to_fetches = True inp0, inp1 = np.ones((10,), dtype=np.float32), np.ones((10,), dtype=np.float32) session_run_results = sess.run(['outp0'], {'inp0': inp0, 'inp1': inp1}, run_options) assert_allclose(session_run_results[0], -(inp0 + inp1)) session_run_results = sess.run(['outp1'], {'inp0': inp0, 'inp1': inp1}, run_options) assert_allclose(session_run_results[0], -(inp0 - inp1))
def testConfigureRunVerbosityLevel(self): ro = onnxrt.RunOptions() ro.run_log_verbosity_level = 1 ro.run_tag = "testtag123" # use onnxruntime_ostream_redirect to redirect c++ stdout/stderr to python sys.stdout and sys.stderr with onnxruntime_ostream_redirect(stdout=True, stderr=True): sess = onnxrt.InferenceSession(self.get_name("mul_1.pb")) x = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32) sess.run([], {'X': x}, run_options=ro) output = sys.stderr.getvalue() self.assertTrue('[I:onnxruntime:testtag123,' in output)
def run_onnx_inference(input_tensor: np.ndarray) -> np.ndarray: """ Perform inference on ONNX model """ input_tensor = onnx_preprocessing(input_tensor) sess_options = onnxruntime.RunOptions() sess_options.log_verbosity_level = 0 ort_session = onnxruntime.InferenceSession(ONNX_MODEL_FILE) ort_inputs = {ort_session.get_inputs()[0].name: input_tensor} ort_outputs = ort_session.run(None, ort_inputs) return ort_outputs
def eval_step(self, *args, **kwargs): r"""Evaluation step method Args: *args: Arbitrary arguments that are used as model input (data only) **kwargs: Arbitrary keyword arguments that are used as model input (data only) Returns: ordered :py:obj:`list` with model outputs as described by :py:attr:`.ORTTrainer.model_desc` """ # Get data. CombineTorchModelLossFn takes label as last input and outputs loss first sample_input = self._prepare_model_input(self.model_desc.inputs, None, None, *args, **kwargs) # Export model to ONNX if self._onnx_model is None: if self._torch_model is not None: self._init_onnx_model(sample_input) else: raise RuntimeError( "Model is uninitialized. Only ONNX and PyTorch models are supported" ) # Prepare input/output description inputs_desc = self.model_desc.inputs outputs_desc = self.model_desc.outputs if self._train_step_info.fetches: outputs_desc = [ o_desc for o_desc in outputs_desc if o_desc.name in self._train_step_info.fetches ] if len(outputs_desc) != len(self._train_step_info.fetches): raise RuntimeError( "The specified fetches list contains invalid output names") # Normalize input if not isinstance(sample_input, (list, tuple)): sample_input = (sample_input, ) # RunOptions run_options = ort.RunOptions() run_options.only_execute_path_to_fetches = True run_options.training_mode = False # Run a eval step and return session_run_results = self._training_session_run_helper( False, sample_input, inputs_desc, outputs_desc, run_options) # Output must be returned in the same order as defined in the model description results = [session_run_results[o_desc.name] for o_desc in outputs_desc] return results[0] if len(results) == 1 else results
def forward(model, inputs=None, input_shapes: Optional[TensorShapes] = None) -> Dict[str, np.ndarray]: if input_shapes is None: input_shapes = {} sess_options = rt.SessionOptions() sess_options.graph_optimization_level = rt.GraphOptimizationLevel(0) sess_options.log_severity_level = 3 sess = rt.InferenceSession(model.SerializeToString(), sess_options=sess_options) if inputs is None: inputs = generate_rand_input(model, input_shapes=input_shapes) outputs = [x.name for x in sess.get_outputs()] run_options = rt.RunOptions() run_options.log_severity_level = 3 res = OrderedDict(zip(outputs, sess.run(outputs, inputs, run_options=run_options))) return res
def eval_step(self, *args, **kwargs): """ inputs: model inputs and/or labels. outputs: if 'fetches' is not provided, outputs are loss and (if in mixed mode and is finishing gradient accumulation) all_finite. if fetches is provided, outputs contains these requested with fetches. fetches: names of requested outputs """ # with model_loss_cls, the last input is label, first output is loss input, fetches = self._prepare_input_and_fetches( self.model_desc_.inputs_, None, None, *args, **kwargs) if self.onnx_model_ is None: if self.torch_model_ is not None: self._init_onnx_model(input) else: raise RuntimeError( "Model is unintialized. Please ensure a valid ONNX model or PyTorch model is provided to this Trainer." ) input_desc = self.model_desc_.inputs_[0:len(input)] if fetches is None: output_desc = self.model_desc_.outputs_ else: output_desc = [ output for fetch in fetches for output in self.model_desc_.outputs_ if output.name_ == fetch ] if not isinstance(input, (list, tuple)): input = (input, ) run_options = ort.RunOptions() run_options.only_execute_path_to_fetches = True run_options.training_mode = False session_run_results = ort_training_session_run_helper( self.session, self.eval_io_binding, input, input_desc, output_desc, self.device_, run_options) if len(session_run_results) == 1: return session_run_results[list(session_run_results.keys())[0]] else: return [ session_run_results[output_desc.name_] for output_desc in output_desc ]
def _forward( model: onnx.ModelProto, extra_output_nodes: Optional[List[onnx.NodeProto]] = None ) -> OrderedDict[str, np.ndarray]: # add outputs of the argument nodes as model outputs. if extra_output_nodes is not None: model = deepcopy(model) for node in extra_output_nodes: for output in node.output: value_info = onnx.ValueInfoProto(name=output) model.graph.output.append(value_info) # create ONNX runtime session sess_options = onnxrt.SessionOptions() sess_options.graph_optimization_level = onnxrt.GraphOptimizationLevel(0) sess_options.log_severity_level = 3 sess = onnxrt.InferenceSession( model.SerializeToString(), sess_options=sess_options, providers=["CPUExecutionProvider"], ) # get names of input nodes that are not initializers input_names = set([v.name for v in model.graph.input]) init_names = set([v.name for v in model.graph.initializer]) input_names = input_names - init_names # generate random inputs inputs = {} for v in model.graph.input: name = v.name shape = (d.dim_value for d in v.type.tensor_type.shape.dim) dtype = _numpy_dtype(v.type.tensor_type.elem_type) if name in input_names: inputs[name] = np.random.rand(*shape).astype(dtype) output_names = [x.name for x in sess.get_outputs()] run_options = onnxrt.RunOptions() run_options.log_severity_level = 3 outputs = sess.run(output_names, inputs, run_options=run_options) return OrderedDict(zip(output_names, outputs))
def forward( model: onnx.ModelProto, inputs: Dict[str, np.ndarray] = None, input_shapes: Optional[TensorShapes] = None) -> Dict[str, np.ndarray]: """Run forward on a model. Args: model (onnx.ModelProto): Input ONNX model. inputs (Dict[str, np.ndarray], optional): Inputs of the model. input_shapes (TensorShapes, optional): Input shapes of the model. Returns: Dict[str, np.ndarray]: Outputs of the model. """ if input_shapes is None: input_shapes = {} sess_options = rt.SessionOptions() # load custom lib for onnxruntime in mmcv ort_custom_op_path = '' try: from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() except ImportError: pass if os.path.exists(ort_custom_op_path): sess_options.register_custom_ops_library(ort_custom_op_path) sess_options.graph_optimization_level = rt.GraphOptimizationLevel(0) sess_options.log_severity_level = 3 sess = rt.InferenceSession(model.SerializeToString(), sess_options=sess_options, providers=['CPUExecutionProvider']) if inputs is None: inputs = generate_rand_input(model, input_shapes=input_shapes) outputs = [x.name for x in sess.get_outputs()] run_options = rt.RunOptions() run_options.log_severity_level = 3 res = OrderedDict( zip(outputs, sess.run(outputs, inputs, run_options=run_options))) return res
def forward(model, input_data: Optional[Tensors] = None, input_shapes: Optional[TensorShapes] = None, custom_lib: Optional[str] = None) -> Tensors: if input_shapes is None: input_shapes = {} sess_options = rt.SessionOptions() if custom_lib is not None: if os.path.exists(custom_lib): sess_options.register_custom_ops_library(custom_lib) else: print("No such file '{}'".format(custom_lib), file=sys.stderr) exit(1) sess_options.graph_optimization_level = rt.GraphOptimizationLevel(0) sess_options.log_severity_level = 3 sess = rt.InferenceSession(model.SerializeToString(), sess_options=sess_options, providers=['CPUExecutionProvider']) input_names = get_input_names(model) inputs = {} for name in input_names: if input_data is not None and input_data.get(name, None) is not None: inputs[name] = input_data[name] else: if input_shapes is not None and input_shapes.get(name, None) is not None: shape = input_shapes[name] else: shape = get_shape(model, name) inputs.update(generate_specific_rand_input(model, {name: shape})) outputs = [x.name for x in sess.get_outputs()] run_options = rt.RunOptions() run_options.log_severity_level = 3 res = OrderedDict( zip(outputs, sess.run(outputs, inputs, run_options=run_options))) return res
def forward( model: onnx.ModelProto, inputs: Tensors, custom_lib: Optional[str]=None ) -> Dict[str, np.ndarray]: sess_options = rt.SessionOptions() if custom_lib is not None: if os.path.exists(custom_lib): sess_options.register_custom_ops_library(custom_lib) else: raise ValueError("No such file '{}'".format(custom_lib)) sess_options.graph_optimization_level = rt.GraphOptimizationLevel(0) sess_options.log_severity_level = 3 sess = rt.InferenceSession( model.SerializeToString(), sess_options=sess_options, providers=["CPUExecutionProvider"], ) outputs = [x.name for x in sess.get_outputs()] run_options = rt.RunOptions() run_options.log_severity_level = 3 res = OrderedDict( zip(outputs, sess.run(outputs, inputs, run_options=run_options)) ) return res
def detect(): global INPUT_LAYER_NAME OPENME = {} setup_time_begin = time.time() # Load preprocessed image filenames: with open(IMAGE_LIST_FILE, 'r') as f: image_list = [s.strip() for s in f] images_total_count = len(image_list) first_index = SKIP_IMAGES last_index = BATCH_COUNT * BATCH_SIZE + first_index if first_index > images_total_count or last_index > images_total_count: print('********************************************') print('') print('DATASET SIZE EXCEEDED !!!') print('Dataset size : {}'.format(images_total_count)) print('CK_SKIP_IMAGES: {}'.format(SKIP_IMAGES)) print('CK_BATCH_COUNT: {}'.format(BATCH_COUNT)) print('CK_BATCH_SIZE : {}'.format(BATCH_SIZE)) print('') print('********************************************') image_list = image_list[SKIP_IMAGES:BATCH_COUNT * BATCH_SIZE + SKIP_IMAGES] # Local list of processed files with open(IMAGE_LIST_FILE_NAME, 'w') as f: for line in image_list: f.write('{}\n'.format(line)) # Load the ONNX model from file sess_options = rt.SessionOptions() # sess_options.session_log_verbosity_level = 0 if CPU_THREADS > 0: sess_options.enable_sequential_execution = False sess_options.session_thread_pool_size = CPU_THREADS graph_load_time_begin = time.time() sess = rt.InferenceSession(MODEL_PATH, sess_options) graph_load_time = time.time() - graph_load_time_begin input_layer_names = [ x.name for x in sess.get_inputs() ] # FIXME: check that INPUT_LAYER_NAME belongs to this list INPUT_LAYER_NAME = INPUT_LAYER_NAME or input_layer_names[0] output_layer_names = [ x.name for x in sess.get_outputs() ] # FIXME: check that OUTPUT_LAYER_NAME belongs to this list model_input_shape = sess.get_inputs()[0].shape model_input_type = sess.get_inputs()[0].type model_input_type = np.uint8 if model_input_type == 'tensor(uint8)' else np.float32 # FIXME: there must be a more humane way! # a more portable way to detect the number of classes for output in sess.get_outputs(): if output.name == OUTPUT_LAYER_LABELS: model_classes = output.shape[1] labels = load_labels(LABELS_PATH) #bg_class_offset = model_classes-len(labels) # 1 means the labels represent classes 1..1000 and the background class 0 has to be skipped bg_class_offset = 1 if MODEL_DATA_LAYOUT == 'NHWC': (samples, height, width, channels) = model_input_shape else: (samples, channels, height, width) = model_input_shape print("Data layout: {}".format(MODEL_DATA_LAYOUT)) print("Input layers: {}".format(input_layer_names)) print("Output layers: {}".format(output_layer_names)) print("Input layer name: " + INPUT_LAYER_NAME) print("Expected input shape: {}".format(model_input_shape)) print("Expected input type: {}".format(model_input_type)) print("Output layer names: " + ", ".join( [OUTPUT_LAYER_BBOXES, OUTPUT_LAYER_LABELS, OUTPUT_LAYER_SCORES])) print("Data normalization: {}".format(MODEL_NORMALIZE_DATA)) print("Background/unlabelled classes to skip: {}".format(bg_class_offset)) print("") setup_time = time.time() - setup_time_begin # Run batched mode test_time_begin = time.time() total_load_time = 0 total_detection_time = 0 first_detection_time = 0 images_loaded = 0 ## Due to error in ONNX Resnet34 model class_map = None if (SKIPPED_CLASSES): class_map = [] for i in range(len(labels) + bg_class_offset): if i not in SKIPPED_CLASSES: class_map.append(i) for image_index in range(BATCH_COUNT): if FULL_REPORT or (image_index % 10 == 0): print("\nBatch {} of {}".format(image_index + 1, BATCH_COUNT)) begin_time = time.time() file_name, width, height = image_list[image_index].split(";") width = float(width) height = float(height) img_file = os.path.join(IMAGE_DIR, file_name) batch_data = load_preprocessed_file(img_file).astype(model_input_type) load_time = time.time() - begin_time total_load_time += load_time images_loaded += 1 if FULL_REPORT: print("Batch loaded in %fs" % load_time) # Detect batch begin_time = time.time() run_options = rt.RunOptions() # run_options.run_log_verbosity_level = 0 batch_results = sess.run( [OUTPUT_LAYER_BBOXES, OUTPUT_LAYER_LABELS, OUTPUT_LAYER_SCORES], {INPUT_LAYER_NAME: batch_data}, run_options) detection_time = time.time() - begin_time if FULL_REPORT: print("Batch classified in %fs" % detection_time) total_detection_time += detection_time # Remember first batch prediction time if image_index == 0: first_detection_time = detection_time # Process results # res_name = file.with.some.name.ext -> file.with.some.name.txt res_name = ".".join(file_name.split(".")[:-1]) + ".txt" res_file = os.path.join(DETECTIONS_OUT_DIR, res_name) with open(res_file, 'w') as f: f.write('{:d} {:d}\n'.format(int(width), int(height))) for i in range(len(batch_results[2][0])): score = batch_results[2][0][i] if score > SCORE_THRESHOLD: if class_map: class_num = class_map[batch_results[1][0][i]] else: class_num = batch_results[1][0][i] + bg_class_offset class_name = labels[class_num - bg_class_offset] box = batch_results[0][0][i] x1 = box[0] * width y1 = box[1] * height x2 = box[2] * width y2 = box[3] * height f.write( '{:.2f} {:.2f} {:.2f} {:.2f} {:.3f} {} {}\n'.format( x1, y1, x2, y2, score, class_num, class_name)) test_time = time.time() - test_time_begin if BATCH_COUNT > 1: avg_detection_time = (total_detection_time - first_detection_time) / ( images_loaded - BATCH_SIZE) else: avg_detection_time = total_detection_time / images_loaded avg_load_time = total_load_time / images_loaded # Save processed images ids list to be able to run # evaluation without repeating detections (CK_SKIP_DETECTION=YES) # with open(IMAGE_LIST_FILE, 'w') as f: # f.write(json.dumps(processed_image_ids)) OPENME['setup_time_s'] = setup_time OPENME['test_time_s'] = test_time OPENME['load_images_time_total_s'] = total_load_time OPENME['load_images_time_avg_s'] = avg_load_time OPENME['prediction_time_total_s'] = total_detection_time OPENME['prediction_time_avg_s'] = avg_detection_time OPENME['avg_time_ms'] = avg_detection_time * 1000 OPENME[ 'avg_fps'] = 1.0 / avg_detection_time if avg_detection_time > 0 else 0 run_time_state = {"run_time_state": OPENME} with open(TIMER_JSON, 'w') as o: json.dump(run_time_state, o, indent=2, sort_keys=True) return
def train_step(self, *args, **kwargs): r"""Train step method After forward pass, an ordered list with all outputs described at :py:attr:`ORTTrainer.model_desc` is returned. Additional information relevant to the train step is maintend by :py:attr:`ORTTrainer._train_step_info`. See :py:class:`.TrainStepInfo` for details. Args: *args: Arbitrary arguments that are used as model input (data only) **kwargs: Arbitrary keyword arguments that are used as model input (data only) Returns: ordered :py:obj:`list` with model outputs as described by :py:attr:`ORTTrainer.model_desc` """ # Export model to ONNX if self._onnx_model is None: sample_input = self._prepare_model_input(self.model_desc.inputs, None, None, *args, **kwargs) self._init_onnx_model(sample_input) # Prepare inputs+lr and output descriptions inputs_desc = self._model_desc_inputs_with_lr outputs_desc = self.model_desc.outputs # Train step must be incremented *before* gradient accumulation code # Gradients are accumulated when # self._train_step_info.step % self.options.batch.gradient_accumulation_steps != 0, # and they are updated otherwise self._train_step_info.step += 1 # RunOptions run_options = None mixed_precision_without_fetches = False if self._train_step_info.fetches: outputs_desc = [o_desc for o_desc in outputs_desc if o_desc.name in self._train_step_info.fetches] if len(outputs_desc) != len(self._train_step_info.fetches): raise RuntimeError("The specified fetches list contains invalid output names") elif self._train_step_info.step % self.options.batch.gradient_accumulation_steps != 0: run_options = ort.RunOptions() run_options.only_execute_path_to_fetches = True outputs_desc = self._model_desc_outputs_with_gradient_accumulation elif self.options.mixed_precision.enabled: mixed_precision_without_fetches = True outputs_desc = self._model_desc_outputs_with_all_finite # Update Learning Rate if Necessary lr = self.optim_config.lr if self.options.lr_scheduler: lr = self.options.lr_scheduler._step(self._train_step_info)[0] # Loss Scale for mixed precision loss_scale = None if self.options.mixed_precision.enabled: loss_scaler = self.options.mixed_precision.loss_scaler assert loss_scaler, "Loss scaler is required when mixed precision is enabled" loss_scale = torch.tensor([loss_scaler.loss_scale]) inputs_desc = self._model_desc_inputs_with_lr_and_loss_scale # Get data. CombineTorchModelLossFn takes label as last input and outputs loss first input = self._prepare_model_input(inputs_desc, lr, loss_scale, *args, **kwargs) # Normalize input if not isinstance(args, (list, tuple)): args = (args,) # Run a train step and return session_run_results = self._training_session_run_helper(True, input, inputs_desc, outputs_desc, run_options) if mixed_precision_without_fetches: # After session run with all_fp32_gradients_finite, we need to clear the training I/O binding's output # Otherwise next run with only_execute_path_to_fetches will lead to gradient all reduce # because all_fp32_gradients_finite is still in the feed. self._train_io_binding.clear_binding_outputs() is_all_finite = session_run_results[self.model_desc.all_finite.name] self._train_step_info.all_finite = is_all_finite if loss_scaler: loss_scaler.update(self._train_step_info) if is_all_finite: # Optimization step must be incremented *after* optimization is successful self._train_step_info.optimization_step += 1 elif self._train_step_info.step % self.options.batch.gradient_accumulation_steps == 0: # Optimization step must be incremented *after* optimization is successful self._train_step_info.optimization_step += 1 # Output must be returned in the same order as defined in the model description # or in the order specified by TrainStepInfo.fetches, if applicable if self._train_step_info.fetches: results = [session_run_results[o_desc] for o_desc in self._train_step_info.fetches] else: results = [session_run_results[o_desc.name] for o_desc in self.model_desc.outputs] return results[0] if len (results) == 1 else results
def compare_runtime(test, decimal=5, options=None, verbose=False, context=None, comparable_outputs=None): """ The function compares the expected output (computed with the model before being converted to ONNX) and the ONNX output produced with module *onnxruntime*. :param test: dictionary with the following keys: - *onnx*: onnx model (filename or object) - *expected*: expected output (filename pkl or object) - *data*: input data (filename pkl or object) :param decimal: precision of the comparison :param options: comparison options :param context: specifies custom operators :param verbose: in case of error, the function may print more information on the standard output :param comparable_outputs: compare only these outputs :return: tuple (outut, lambda function to run the predictions) The function does not return anything but raises an error if the comparison failed. """ lambda_onnx = None if context is None: context = {} load = load_data_and_model(test, **context) if verbose: print("[compare_runtime] test '{}' loaded".format(test['onnx'])) onx = test['onnx'] if options is None: if isinstance(onx, str): options = extract_options(onx) else: options = {} elif options is None: options = {} elif not isinstance(options, dict): raise TypeError("options must be a dictionary.") try: import onnxruntime except ImportError as e: warnings.warn("Unable to import onnxruntime.") return None if verbose: print("[compare_runtime] InferenceSession('{}')".format(onx)) try: sess = onnxruntime.InferenceSession(onx) except ExpectedAssertionError as expe: raise expe except Exception as e: if "CannotLoad" in options: raise ExpectedAssertionError( "Unable to load onnx '{0}' due to\n{1}".format(onx, e)) else: if verbose: import onnx model = onnx.load(onx) smodel = "\nJSON ONNX\n" + str(model) else: smodel = "" raise OnnxRuntimeAssertionError( "Unable to load onnx '{0}'\nONNX\n{1}".format(onx, smodel)) input = load["data"] DF = options.pop('DF', False) if DF: inputs = {c: input[c].values for c in input.columns} for k in inputs: if inputs[k].dtype == numpy.float64: inputs[k] = inputs[k].astype(numpy.float32) inputs[k] = inputs[k].reshape((inputs[k].shape[0], 1)) else: if isinstance(input, dict): inputs = input elif isinstance(input, (list, numpy.ndarray, pandas.DataFrame)): inp = sess.get_inputs() if len(inp) == len(input): inputs = {i.name: v for i, v in zip(inp, input)} elif len(inp) == 1: inputs = {inp[0].name: input} elif isinstance(input, numpy.ndarray): shape = sum(i.shape[1] if len(i.shape) == 2 else i.shape[0] for i in inp) if shape == input.shape[1]: inputs = {n.name: input[:, i] for i, n in enumerate(inp)} else: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0} != original shape {1}, onnx='{2}'" .format(len(inp), input.shape, onx)) elif isinstance(input, list): try: array_input = numpy.array(input) except Exception as e: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0} != original {1}, onnx='{2}'" .format(len(inp), len(input), onx)) shape = sum(i.shape[1] for i in inp) if shape == array_input.shape[1]: inputs = {} c = 0 for i, n in enumerate(inp): d = c + n.shape[1] inputs[n.name] = _create_column( [row[c:d] for row in input], n.type) c = d else: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0} != original shape {1}, onnx='{2}'*" .format(len(inp), array_input.shape, onx)) elif isinstance(input, pandas.DataFrame): try: array_input = numpy.array(input) except Exception as e: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0} != original {1}, onnx='{2}'" .format(len(inp), len(input), onx)) shape = sum(i.shape[1] for i in inp) if shape == array_input.shape[1]: inputs = {} c = 0 for i, n in enumerate(inp): d = c + n.shape[1] inputs[n.name] = _create_column( input.iloc[:, c:d], n.type) c = d else: raise OnnxRuntimeAssertionError( "Wrong number of inputs onnx {0}={1} columns != original shape {2}, onnx='{3}'*" .format(len(inp), shape, array_input.shape, onx)) else: raise OnnxRuntimeAssertionError( "Wrong type of inputs onnx {0}, onnx='{2}'".format( type(input), onx)) else: raise OnnxRuntimeAssertionError( "Dict or list is expected, not {0}".format(type(input))) for k in inputs: if isinstance(inputs[k], list): inputs[k] = numpy.array(inputs[k]) OneOff = options.pop('OneOff', False) OneOffArray = options.pop('OneOffArray', False) options.pop('SklCol', False) # unused here but in dump_data_and_model if OneOff or OneOffArray: if verbose: print( "[compare_runtime] OneOff: type(inputs)={} len={} OneOffArray={}" .format(type(input), len(inputs), OneOffArray)) if len(inputs) == 1 and not OneOffArray: name, values = list(inputs.items())[0] res = [] for input in values: try: one = sess.run(None, {name: input}) if lambda_onnx is None: lambda_onnx = lambda: sess.run(None, {name: input}) if verbose: import pprint pprint.pprint(one) except ExpectedAssertionError as expe: raise expe except Exception as e: raise OnnxRuntimeAssertionError( "Unable to run onnx '{0}' due to {1}".format(onx, e)) res.append(one) if verbose: print("[compare_runtime] OneOff: _post_process_output1") output = _post_process_output(res) else: def to_array(vv): if isinstance( vv, (numpy.ndarray, numpy.int64, numpy.float32, str)): return numpy.array([vv]) else: return numpy.array([vv], dtype=numpy.float32) t = list(inputs.items())[0] res = [] for i in range(0, len(t[1])): iii = {k: to_array(v[i]) for k, v in inputs.items()} try: one = sess.run(None, iii) if lambda_onnx is None: lambda_onnx = lambda: sess.run(None, iii) if verbose: import pprint pprint.pprint(one) except ExpectedAssertionError as expe: raise expe except Exception as e: if verbose: import onnx model = onnx.load(onx) smodel = "\nJSON ONNX\n" + str(model) else: smodel = "" raise OnnxRuntimeAssertionError( "Unable to run onnx '{0}' due to {1}{2}".format( onx, e, smodel)) res.append(one) if verbose: print("[compare_runtime] OneOff: _post_process_output2") output = _post_process_output(res) if OneOffArray: if isinstance(output, list): pass elif not isinstance(output, numpy.ndarray): raise TypeError("output must be an array, not {}".format( type(output))) else: output = [output] else: if verbose: print("[compare_runtime] type(inputs)={} len={} names={}".format( type(input), len(inputs), list(sorted(inputs)))) if verbose: run_options = onnxruntime.RunOptions() run_options.run_log_verbosity_level = 5 else: run_options = None try: output = sess.run(None, inputs, run_options) lambda_onnx = lambda: sess.run(None, inputs) if verbose: import pprint pprint.pprint(output) except ExpectedAssertionError as expe: raise expe except RuntimeError as e: if "-Fail" in onx: raise ExpectedAssertionError( "onnxruntime cannot compute the prediction for '{0}'". format(onx)) else: if verbose: import onnx model = onnx.load(onx) smodel = "\nJSON ONNX\n" + str(model) else: smodel = "" raise OnnxRuntimeAssertionError( "onnxruntime cannot compute the prediction for '{0}' due to {1}{2}" .format(onx, e, smodel)) except Exception as e: raise OnnxRuntimeAssertionError( "Unable to run onnx '{0}' due to {1}".format(onx, e)) if verbose: print("[compare_runtime] done type={}".format(type(output))) output0 = output.copy() if comparable_outputs: cmp_exp = [load["expected"][o] for o in comparable_outputs] cmp_out = [output[o] for o in comparable_outputs] else: cmp_exp = load["expected"] cmp_out = output try: _compare_expected(cmp_exp, cmp_out, sess, onx, decimal=decimal, verbose=verbose, **options) except ExpectedAssertionError as expe: raise expe except Exception as e: if verbose: import onnx model = onnx.load(onx) smodel = "\nJSON ONNX\n" + str(model) else: smodel = "" raise OnnxRuntimeAssertionError( "Model '{0}' has discrepencies.\n{1}: {2}{3}".format( onx, type(e), e, smodel)) return output0, lambda_onnx
def main(): global INPUT_LAYER_NAME OPENME = {} setup_time_begin = time.time() # Load the ONNX model from file sess_options = rt.SessionOptions() # sess_options.session_log_verbosity_level = 0 if CPU_THREADS > 0: sess_options.enable_sequential_execution = False sess_options.session_thread_pool_size = CPU_THREADS graph_load_time_begin = time.time() sess = rt.InferenceSession(MODEL_PATH, sess_options) graph_load_time = time.time() - graph_load_time_begin input_layer_names = [ x.name for x in sess.get_inputs() ] # FIXME: check that INPUT_LAYER_NAME belongs to this list INPUT_LAYER_NAME = INPUT_LAYER_NAME or input_layer_names[0] output_layer_names = [ x.name for x in sess.get_outputs() ] # FIXME: check that OUTPUT_LAYER_NAME belongs to this list model_input_shape = sess.get_inputs()[0].shape model_input_type = sess.get_inputs()[0].type model_input_type = np.uint8 if model_input_type == 'tensor(uint8)' else np.float32 # FIXME: there must be a more humane way! # a more portable way to detect the number of classes for output in sess.get_outputs(): if output.name == OUTPUT_LAYER_LABELS: model_classes = output.shape[1] print("Data layout: {}".format(MODEL_DATA_LAYOUT)) print("Input layers: {}".format(input_layer_names)) print("Output layers: {}".format(output_layer_names)) print("Input layer name: " + INPUT_LAYER_NAME) print("Expected input shape: {}".format(model_input_shape)) print("Expected input type: {}".format(model_input_type)) print("Output layer names: " + ", ".join( [OUTPUT_LAYER_BBOXES, OUTPUT_LAYER_LABELS, OUTPUT_LAYER_SCORES])) print("Data normalization: {}".format(MODEL_NORMALIZE_DATA)) print("Background/unlabelled classes to skip: {}".format(bg_class_offset)) print("") try: expected_batch_size = int(model_input_shape[0]) if BATCH_SIZE != expected_batch_size: raise Exception( "expected_batch_size={}, desired CK_BATCH_SIZE={}, they do not match - exiting." .format(expected_batch_size, BATCH_SIZE)) except ValueError: max_batch_size = None setup_time = time.time() - setup_time_begin # Run batched mode test_time_begin = time.time() total_load_time = 0 next_batch_offset = 0 total_inference_time = 0 first_inference_time = 0 images_loaded = 0 for batch_index in range(BATCH_COUNT): batch_number = batch_index + 1 begin_time = time.time() current_batch_offset = next_batch_offset batch_data, next_batch_offset = load_preprocessed_batch( image_filenames, current_batch_offset) load_time = time.time() - begin_time total_load_time += load_time images_loaded += BATCH_SIZE # Detect batch begin_time = time.time() run_options = rt.RunOptions() # run_options.run_log_verbosity_level = 0 batch_results = sess.run( [OUTPUT_LAYER_BBOXES, OUTPUT_LAYER_LABELS, OUTPUT_LAYER_SCORES], {INPUT_LAYER_NAME: batch_data}, run_options) inference_time = time.time() - begin_time print("[batch {} of {}] loading={:.2f} ms, inference={:.2f} ms".format( batch_number, BATCH_COUNT, load_time * 1000, inference_time * 1000)) total_inference_time += inference_time # Remember first batch prediction time if batch_index == 0: first_inference_time = inference_time # Process results for index_in_batch in range(BATCH_SIZE): global_image_index = current_batch_offset + index_in_batch width_orig, height_orig = original_w_h[global_image_index] filename_orig = image_filenames[global_image_index] detections_filename = os.path.splitext(filename_orig)[0] + '.txt' detections_filepath = os.path.join(DETECTIONS_OUT_DIR, detections_filename) with open(detections_filepath, 'w') as f: f.write('{:d} {:d}\n'.format(width_orig, height_orig)) for i in range(len(batch_results[2][index_in_batch])): confidence = batch_results[2][index_in_batch][i] if confidence > SCORE_THRESHOLD: class_number = int(batch_results[1][index_in_batch][i]) if class_map: class_number = class_map[class_number] else: class_number = class_number box = batch_results[0][index_in_batch][i] x1 = box[0] * width_orig y1 = box[1] * height_orig x2 = box[2] * width_orig y2 = box[3] * height_orig class_label = class_labels[class_number - bg_class_offset] f.write('{:.2f} {:.2f} {:.2f} {:.2f} {:.3f} {} {}\n'. format(x1, y1, x2, y2, confidence, class_number, class_label)) test_time = time.time() - test_time_begin if BATCH_COUNT > 1: avg_inference_time = (total_inference_time - first_inference_time) / ( images_loaded - BATCH_SIZE) else: avg_inference_time = total_inference_time / images_loaded avg_load_time = total_load_time / images_loaded # Save processed images ids list to be able to run # evaluation without repeating detections (CK_SKIP_DETECTION=YES) # with open(IMAGE_LIST_FILE, 'w') as f: # f.write(json.dumps(processed_image_ids)) OPENME['setup_time_s'] = setup_time OPENME['test_time_s'] = test_time OPENME['load_images_time_total_s'] = total_load_time OPENME['load_images_time_avg_s'] = avg_load_time OPENME['prediction_time_total_s'] = total_inference_time OPENME['prediction_time_avg_s'] = avg_inference_time OPENME['avg_time_ms'] = avg_inference_time * 1000 OPENME[ 'avg_fps'] = 1.0 / avg_inference_time if avg_inference_time > 0 else 0 run_time_state = {"run_time_state": OPENME} with open(TIMER_JSON, 'w') as o: json.dump(run_time_state, o, indent=2, sort_keys=True)
def train_step(self, *args, **kwargs): """ inputs: model inputs, labels, learning rate, and, if in mixed_precision mode, loss_scale. outputs: if fetches is not provided, outputs are loss and (if in mixed mode and is finishing gradient accumulation) all_finite. if fetches is provided, outputs contains these requested with fetches. fetches: names of requested outputs """ # inputs to the ONNX model includes inputs to the original PyTorch model # plus learning rate and loss_scale if self.use_mixed_precision is True. # 1. when there are internal learning_rate and loss_scale (in fp16 cases) generators, # *args and **kwargs together contain ONLY and COMPLETE inputs to the PyTorch model. # In this case, changes to the training script is minimized. # 2. without internal learning rate and loss scale (in fp16 cases) generators, # *args and **kwargs passed in from the training script shall contains # inputs to the PyTorch model plus learning_rate and loss_scale. # it optionally contains the fetches. # localized arguments (*args) contains inputs to the ONNX model. # named arguments can contain both inputs, learning_rate and loss_scale, and the fetches learning_rate, loss_scale = None, None if self.get_lr_this_step_ is not None: # $args, **kwargs contains inputs to the pytorch model lr_this_step = self.get_lr_this_step_(self.global_step_) learning_rate = torch.tensor([lr_this_step]) if self.loss_scaler_ is not None and self.use_mixed_precision: loss_scale = torch.tensor([self.loss_scaler_.loss_scale_]) if self.onnx_model_ is None: sample_input, _ = self._prepare_input_and_fetches( self.model_desc_.inputs_, None, None, *args, **kwargs) self._init_onnx_model(sample_input) if self.use_mixed_precision: input, fetches = self._prepare_input_and_fetches( self.input_desc_with_lr_and_loss_scale, learning_rate, loss_scale, *args, **kwargs) assert len(self.input_desc_with_lr_and_loss_scale) == len(input) input_descs = self.input_desc_with_lr_and_loss_scale else: input, fetches = self._prepare_input_and_fetches( self.input_desc_with_lr, learning_rate, loss_scale, *args, **kwargs) assert len(self.input_desc_with_lr) == len(input) input_descs = self.input_desc_with_lr self.current_step += 1 # handle gradient accumulation in fully optimized mode run_options = None has_if_all_finite = False if fetches: output_desc = [ output for fetch in fetches for output in self.model_desc_.outputs_ if output.name_ == fetch ] elif self.current_step % self.gradient_accumulation_steps != 0: run_options = ort.RunOptions() run_options.only_execute_path_to_fetches = True run_options.training_mode = True output_desc = self.output_desc_with_group_accumulated_gradients elif self.use_mixed_precision: has_if_all_finite = True output_desc = self.output_desc_with_all_fp_16_or_fp32_gradients_finite else: output_desc = self.model_desc_.outputs_ if not isinstance(input, (list, tuple)): input = (input, ) session_run_results = ort_training_session_run_helper( self.session, self.train_io_binding, input, input_descs, output_desc, self.device_, run_options) if has_if_all_finite: # After session run with all_fp32_gradients_finite, we need to clear the iobinding's output state. # Otherwise next run with only_execute_path_to_fetches will lead to gradient all reduce # because all_fp32_gradients_finite is still in the feed. self.train_io_binding.clear_binding_outputs() all_finite = session_run_results[ self.output_desc_with_all_fp_16_or_fp32_gradients_finite[-1]. name_] if self.loss_scaler_ is not None: self.loss_scaler_.update_loss_scale(all_finite) if all_finite: # optimization has done, increase self.global_step_ self.global_step_ = self.global_step_ + 1 elif self.current_step % self.gradient_accumulation_steps == 0: # optimization has done, increase self.global_step_ self.global_step_ = self.global_step_ + 1 if fetches is not None: results = [session_run_results[fetch] for fetch in fetches] elif has_if_all_finite and self.loss_scaler_ is None: # return descripted outputs plus the all_finite flag so that the training script can handle loss scaling. results = [ session_run_results[output_desc.name_] for output_desc in self.output_desc_with_all_fp_16_or_fp32_gradients_finite ] else: results = [ session_run_results[output_desc.name_] for output_desc in self.model_desc_.outputs_ ] return results[0] if len(results) == 1 else results
import numpy as np import onnxruntime as ort os.environ["ORT_TENSORRT_INT8_ENABLE"] = "1" os.environ["ORT_TENSORRT_INT8_USE_NATIVE_CALIBRATION_TABLE"] = "0" os.environ["ORT_TENSORRT_ENGINE_CACHE_ENABLE"] = "1" sess_opt = ort.SessionOptions() sess_opt.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL print("Create inference session...") execution_provider = ["TensorrtExecutionProvider", "CUDAExecutionProvider"] sess = ort.InferenceSession("model.onnx", sess_options=sess_opt, providers=execution_provider) run_opt = ort.RunOptions() sequence = 128 batch = 1 input_ids = np.ones((batch, sequence), dtype=np.int64) attention_mask = np.ones((batch, sequence), dtype=np.int64) token_type_ids = np.ones((batch, sequence), dtype=np.int64) print("Warm up phase...") sess.run( None, { sess.get_inputs()[0].name: input_ids, sess.get_inputs()[1].name: attention_mask, sess.get_inputs()[2].name: token_type_ids, },
#sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_EXTENDED sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL sess_options.enable_profiling = True sess_options.log_severity_level = 0 sess_options.log_verbosity_level = 0 # For OnnxRuntime 1.2.0, you might need set intra_op_num_threads to 1 to enable OpenMP # sess_options.intra_op_num_threads=1 # For OnnxRuntime 1.3.0 or later, it is recommended to use the default setting so you need not set it. # Specify providers when you use onnxruntime-gpu for CPU inference. session = onnxruntime.InferenceSession(export_model_path, sess_options, providers=['CPUExecutionProvider']) run_options = onnxruntime.RunOptions() run_options.log_severity_level = 0 run_options.log_verbosity_level = 0 latency = [] for i in range(total_samples): data = dataset[i] ort_inputs = { 'input_ids': data[0].cpu().reshape(1, max_seq_length).numpy(), 'input_mask': data[1].cpu().reshape(1, max_seq_length).numpy(), 'segment_ids': data[2].cpu().reshape(1, max_seq_length).numpy() } start = time.time() ort_outputs = session.run(None, ort_inputs, run_options=run_options) latency.append(time.time() - start) print("OnnxRuntime cpu Inference time = {} ms".format(