def load(weights_path, img_shape, conf_thres=0.4, iou_thres=0.5, device='cuda:0') -> Yolov5Predictor: """ img_shape: (height, width) """ if (weights_path[-5:] == '.onnx'): if (onnxruntime.get_device() != 'GPU'): print( "WARNING: Onnx not running in GPU! (onnxruntime.get_device()='%s')" % onnxruntime.get_device()) onnx_model = onnx.load(weights_path) onnx.checker.check_model(onnx_model) ort_session = onnxruntime.InferenceSession(weights_path) print("onnx session providers:", ort_session.get_providers()) # ort_session.set_providers(['CUDAExecutionProvider']) yolo_model = ort_session stride = 32 # FIXME else: yolo_model = torch.load( weights_path, map_location=device, pickle_module=unpickler)['model'].float().eval() stride = int(yolo_model.stride.max()) predictor = Yolov5Predictor(img_shape, conf_thres, iou_thres, yolo_model, stride=stride) return predictor
def prepare(cls, model, device=None, **kwargs): """ Load the model and creates a :class:`onnxruntime.InferenceSession` ready to be used as a backend. :param model: ModelProto (returned by `onnx.load`), string for a filename or bytes for a serialized model :param device: requested device for the computation, None means the default one which depends on the compilation settings :param kwargs: see :class:`onnxruntime.SessionOptions` :return: :class:`onnxruntime.InferenceSession` """ if isinstance(model, OnnxRuntimeBackendRep): return model elif isinstance(model, InferenceSession): return OnnxRuntimeBackendRep(model) elif isinstance(model, (str, bytes)): options = SessionOptions() for k, v in kwargs.items(): if hasattr(options, k): setattr(options, k, v) inf = InferenceSession(model, options) if device is not None and not cls.supports_device(device): raise RuntimeError( "Incompatible device expected '{0}', got '{1}'".format( device, get_device())) return cls.prepare(inf, device, **kwargs) else: # type: ModelProto check_model(model) bin = model.SerializeToString() return cls.prepare(bin, device, **kwargs)
def __init__(self, cam_type="video_file", model_dir="./default_model"): self.lock = threading.Lock() #self.model = self.load_model( # model_dir, is_default_model=True, is_scenario_model=False #) self.model = None self.model_uri = None self.model_downloading = False self.lva_mode = LVA_MODE self.image_shape = [IMG_HEIGHT, IMG_WIDTH] self.is_scenario = False self.detection_mode = "PD" self.threshold = 0.3 self.send_video_to_cloud = False # Part that we want to detect self.parts = [] self.is_gpu = onnxruntime.get_device() == "GPU" if self.is_gpu: self.max_total_frame_rate = GPU_MAX_FRAME_RATE else: self.max_total_frame_rate = CPU_MAX_FRAME_RATE self.update_frame_rate_by_number_of_streams(1)
def __init__(self, onnx_file, class_names, device_id): super(ONNXRuntimeDetector, self).__init__(class_names, device_id) import onnxruntime as ort # get the custom op path ort_custom_op_path = '' try: from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() except (ImportError, ModuleNotFoundError): warnings.warn('If input model has custom op from mmcv, \ you may have to build mmcv with ONNXRuntime from source.') session_options = ort.SessionOptions() # register custom op for onnxruntime if osp.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) sess = ort.InferenceSession(onnx_file, session_options) providers = ['CPUExecutionProvider'] options = [{}] is_cuda_available = ort.get_device() == 'GPU' if is_cuda_available: providers.insert(0, 'CUDAExecutionProvider') options.insert(0, {'device_id': device_id}) sess.set_providers(providers, options) self.sess = sess self.io_binding = sess.io_binding() self.output_names = [_.name for _ in sess.get_outputs()] self.is_cuda_available = is_cuda_available
def run_onnxruntime(self, model_path, input_dict, output_names): """Run test against onnxruntime backend.""" providers = ['CPUExecutionProvider'] if rt.get_device() == "GPU": gpus = os.environ.get("CUDA_VISIBLE_DEVICES") if gpus is None or len(gpus) > 1: providers = ['CUDAExecutionProvider'] opt = rt.SessionOptions() # in case of issues with the runtime, one can enable more logging # opt.log_severity_level = 0 # opt.log_verbosity_level = 255 # opt.enable_profiling = True m = rt.InferenceSession(model_path, sess_options=opt, providers=providers) results = m.run(output_names, input_dict) if compare_perf: count = 10 time_start = time.time() for _ in range(count): _ = m.run(output_names, input_dict.keys()) time_end = time.time() val = str((time_end - time_start) / count) print(f'==== avg ort name={self.name}, time={val}') return results
def run_onnxruntime(self, model_path, inputs, output_names, use_custom_ops=False): """Run test against onnxruntime backend.""" import onnxruntime as rt providers = ['CPUExecutionProvider'] if rt.get_device() == "GPU": gpus = os.environ.get("CUDA_VISIBLE_DEVICES") if gpus is None or len(gpus) > 1: providers = ['CUDAExecutionProvider'] opt = rt.SessionOptions() if use_custom_ops: from onnxruntime_extensions import get_library_path opt.register_custom_ops_library(get_library_path()) # in case of issues with the runtime, one can enable more logging # opt.log_severity_level = 0 # opt.log_verbosity_level = 255 # opt.enable_profiling = True m = rt.InferenceSession(model_path, opt, providers=providers) results = m.run(output_names, inputs) return results
def supports_device(cls, device): """ Check whether the backend is compiled with particular device support. In particular it's used in the testing suite. """ if device == "CUDA": device = "GPU" return device in get_device()
def __init__(self, onnx_session, prediction_heads, language, device): if str(device) == "cuda" and onnxruntime.get_device() != "GPU": raise Exception(f"Device {device} not available for Inference. For CPU, run pip install onnxruntime and" f"for GPU run pip install onnxruntime-gpu") self.onnx_session = onnx_session self.prediction_heads = prediction_heads self.device = device self.language = language
def __init__(self, onnxmodel): # path of all the models need to be put right here # tinyYolo_object_detection_model = "models/mrcnn.onnx" # this part of the code will load all types of models right at the beginning, these models will include onnx # models made for classification, object detection, etc # self.heavy_object_detection_session = onnxruntime.InferenceSession(tinyYolo_object_detection_model, None) self.lighter_object_detection_session = onnxruntime.InferenceSession( onnxmodel, None) print(onnxruntime.get_device())
def __init__(self, onnx_path): self.session_option = onnxruntime.SessionOptions() self.session_option.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL self.ort_session = onnxruntime.InferenceSession(onnx_path, sess_options=self.session_option) self.ort_session.set_providers(['CUDAExecutionProvider']) self.io_binding = self.ort_session.io_binding() self.io_binding.bind_output(self.ort_session.get_outputs()[0].name) # self.io_binding.bind_output(self.ort_session.get_outputs()[1].name) print(onnxruntime.get_device())
def from_pytorch(model, dummy_data, file_name, device): """A function to convert a pytorch model to an onnx model Once converted perform tests to ensure model is converted correctly :param model: A pytorch neural network model :type model: torch.nn.Module :param dummy_data: Some data that takes the shape of the neural network inputs. This doesnt have to be from training or test it can be random. ie a tensor(batch_size, input1, input2, inputn) :type dummy_data: tensor :param file_name: location for file output :type file_name: str :param device: device for pytorch compute :type device: torch.device """ model.to(device) dummy_data = dummy_data.to(device) out = model(dummy_data) torch.onnx.export(model, dummy_data, file_name, export_params=True, opset_version=10, do_constant_folding=True, input_names=['input'], output_names=['output'], dynamic_axes={ 'input': { 0: 'batch_size' }, 'output': { 0: 'batch_size' } }) onnx_model = onnx.load(file_name) onnx.checker.check_model(onnx_model) print(onnxruntime.get_device()) ort_session = onnxruntime.InferenceSession(file_name) # compute ONNX Runtime output prediction ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(dummy_data)} ort_outs = ort_session.run(None, ort_inputs) # compare ONNX Runtime and PyTorch results np.testing.assert_allclose(to_numpy(out), ort_outs[0], rtol=1e-03, atol=1e-05) print( "Exported model has been tested with ONNXRuntime, and the result looks good!" )
def __init__(self, model_dir, cam_type="video_file", cam_source=CAM_SOURCE): # def __init__(self, model_dir, cam_type="video_file", cam_source="./mov_bbb.mp4"): # def __init__(self, model_dir, cam_type="video_file", cam_source="./sample_video/video_1min.mp4"): # def __init__(self, model_dir, cam_type="rtsp", cam_source="rtsp://52.229.36.89:554/media/catvideo.mkv"): # Default system params self.render = False self.lock = threading.Lock() self.cam_type = cam_type self.cam_source = cam_source self.cam = cv2.VideoCapture(normalize_rtsp(cam_source)) self.cam_is_alive = False self.model = self.load_model(model_dir, is_default_model=True) self.model_uri = None self.last_img = None self.last_edge_img = None self.last_drawn_img = None self.last_prediction = [] self.last_prediction_count = {} self.part_detection_id = None self.confidence_min = 30 * 0.01 self.confidence_max = 30 * 0.01 self.max_images = 10 self.last_upload_time = 0 self.is_upload_image = False self.current_uploaded_images = {} self.detection_success_num = 0 self.detection_unidentified_num = 0 self.detection_total = 0 self.detections = [] self.threshold = 0.3 self.has_aoi = False self.aoi_info = None # Part that we want to detect self.parts = [] self.is_gpu = (onnxruntime.get_device() == 'GPU') self.average_inference_time = 0 # IoT Hub self.iothub_is_send = False self.iothub_threshold = 0.5 self.iothub_fpm = 0 self.iothub_last_send_time = time.time() self.iothub_interval = 99999999
def is_compatible(cls, model, device=None, **kwargs): """ Return whether the model is compatible with the backend. :param model: unused :param device: None to use the default device or a string (ex: `'CPU'`) :return: boolean """ if device is None: device = get_device() return cls.supports_device(device)
def onnx_model_load(onnx_file): sess_options = onnxruntime.SessionOptions() # sess_options.log_severity_level = 0 # Set graph optimization level sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_EXTENDED onnx_model = onnxruntime.InferenceSession(onnx_file, sess_options) onnx_model.set_providers(['CUDAExecutionProvider']) print("Onnx model engine: ", onnx_model.get_providers(), "Device: ", onnxruntime.get_device()) return onnx_model
def loadOnnx(self): onnxPath, oFile = self.getOnnxModelPath() self.logger.info( "Loading onnx from {} OnnxRuntime Device {} OnnxRuntime Version {}" .format(onnxPath, ort.get_device(), ort.__version__)) sess = ort.InferenceSession(onnxPath) inNames = [x.name for x in sess.get_inputs()] outNames = sess.get_outputs()[0].name self.logger.info("Onnx loaded inputs {} outputs {}".format( inNames, outNames)) inNames = [x.name for x in sess.get_inputs()] outNames = sess.get_outputs()[0].name return sess, inNames, outNames
def __init__(self, base_directory, context): # Save some variables in the init. This way we can track inference times during the batch request. self.total_time = 0 self.n_requests = 0 urllib.request.urlretrieve( "https://github.com/onnx/models/raw/main/vision/super_resolution/sub_pixel_cnn_2016/model/super-resolution-10.onnx", "super_resolution.onnx") print(onnxruntime.get_device()) self.ort_session = onnxruntime.InferenceSession( "super_resolution.onnx", providers=['CUDAExecutionProvider']) print(self.ort_session.get_providers()) print("Initialising My Deployment")
def prepare(cls, model, device=None, **kwargs): """ Load the model and creates a :class:`onnxruntime.InferenceSession` ready to be used as a backend. :param model: ModelProto (returned by `onnx.load`), string for a filename or bytes for a serialized model :param device: requested device for the computation, None means the default one which depends on the compilation settings :param kwargs: see :class:`onnxruntime.SessionOptions` :return: :class:`onnxruntime.InferenceSession` """ if isinstance(model, OnnxRuntimeBackendRep): return model elif isinstance(model, InferenceSession): return OnnxRuntimeBackendRep(model) elif isinstance(model, (str, bytes)): options = SessionOptions() for k, v in kwargs.items(): if hasattr(options, k): setattr(options, k, v) inf = InferenceSession(model, options) # backend API is primarily used for ONNX test/validation. As such, we should disable session.run() fallback # which may hide test failures. inf.disable_fallback() if device is not None and not cls.supports_device(device): raise RuntimeError( "Incompatible device expected '{0}', got '{1}'".format( device, get_device())) return cls.prepare(inf, device, **kwargs) else: # type: ModelProto # check_model serializes the model anyways, so serialize the model once here # and reuse it below in the cls.prepare call to avoid an additional serialization # only works with onnx >= 1.10.0 hence the version check onnx_version = tuple(map(int, (version.version.split(".")[:3]))) onnx_supports_serialized_model_check = onnx_version >= (1, 10, 0) bin_or_model = model.SerializeToString( ) if onnx_supports_serialized_model_check else model check_model(bin_or_model) opset_supported, error_message = cls.is_opset_supported(model) if not opset_supported: raise unittest.SkipTest(error_message) # Now bin might be serialized, if it's not we need to serialize it otherwise we'll have # an infinite recursive call bin = bin_or_model if not isinstance(bin, (str, bytes)): bin = bin.SerializeToString() return cls.prepare(bin, device, **kwargs)
def main(): res = load_model("bert_en_cased_L-12_H-768_A-12-v3", verbose=1) pathname, output, inputs, outputs, onnx_inputs = res output_names = outputs structured_outputs = [ "answer_types", "tf_op_layer_end_logits", "tf_op_layer_start_logits", "unique_ids" ] perf_iter = 5 rtol = 0.01 atol = 0.0001 print("[main] testing ONNX %r" % output) m = rt.InferenceSession(output) results_onnx = m.run(output_names.split(','), onnx_inputs) print("[main] got results, testing perf") start = time.time() for _ in range(perf_iter): _ = m.run(output_names.split(','), onnx_inputs) onnx_runtime_ms = (time.time() - start) / perf_iter * 1000 print("[main] ONNX perf:", onnx_runtime_ms) print("[main] loading TF") imported = tf.saved_model.load(".", tags=['serve']) concrete_func = imported.signatures["serving_default"] tf_inputs = {} for k, v in onnx_inputs.items(): tf_inputs[k.split(":")[0]] = tf.constant(v) tf_func = tf.function(concrete_func) print("[main] running TF") tf_results_d = tf_func(**tf_inputs) #results_tf = [tf_results_d[output].numpy() for output in structured_outputs] print("[main] got results, testing perf") start = time.time() for _ in range(perf_iter): _ = concrete_func(**tf_inputs) tf_runtime_ms = (time.time() - start) / perf_iter * 1000 print("[main] TF perf:", tf_runtime_ms) # for tf_res, onnx_res in zip(results_tf, results_onnx): # np.testing.assert_allclose(tf_res, onnx_res, rtol=rtol, atol=atol) print("[main] Results match") print('[main] device', rt.get_device(), rt.__version__, rt.__file__) print("[main] TF perf, ONNX perf, ratio") print("[main]", tf_runtime_ms, onnx_runtime_ms, tf_runtime_ms / onnx_runtime_ms)
def __init__(self, onnx_path, config=None, **kwargs): self.sess = rt.InferenceSession(onnx_path) width = self.sess.get_inputs()[0].shape[2] if config is None: self.config = dict(height=32, width=300, batch_size=128, input_name=['the_input:0'], output_name=['y_pred/truediv:0'], device='cpu') else: self.config = config self.config['width'] = width if rt.get_device() == "GPU": self.config["device"] = "GPU"
def run_onnxruntime(self, name, model_proto, inputs, outputs, external_tensor_storage=None): """Run test against onnxruntime backend.""" import onnxruntime as rt model_path = utils.save_onnx_model( TEMP_DIR, name, inputs, model_proto, include_test_data=True, as_text=utils.is_debug_mode(), external_tensor_storage=external_tensor_storage) logger.info("Model saved to %s", model_path) providers = ['CPUExecutionProvider'] if rt.get_device() == "GPU": gpus = os.environ.get("CUDA_VISIBLE_DEVICES") if gpus is None or len(gpus) > 1: providers = ['CUDAExecutionProvider'] opt = rt.SessionOptions() if self.use_custom_ops: from onnxruntime_extensions import get_library_path opt.register_custom_ops_library(get_library_path()) if self.ort_profile is not None: opt.enable_profiling = True m = rt.InferenceSession(model_path, sess_options=opt, providers=providers) results = m.run(outputs, inputs) if self.perf: n = 0 start = time.time() stop = start + PERF_TIME while time.time() < stop: for _ in range(PERF_STEP): _ = m.run(outputs, inputs) n += PERF_STEP self.onnx_runtime = 1000 * (time.time() - start) / n logger.info("ORT perf {:.2f}ms/inference, n={}".format( self.onnx_runtime, n)) if self.ort_profile is not None: tmp_path = m.end_profiling() shutil.move(tmp_path, self.ort_profile) return results
def load_onnx_wav2vec2_featurizer(onnx_path): import onnxruntime as ort import soundfile as sf print("ORT Device:", ort.get_device()) ort_session = ort.InferenceSession(onnx_path) def _featurize(path): input_values, rate = sf.read(path, dtype=np.float32) assert rate == 16_000 input_values = input_values.reshape(1, -1) hidden_state = ort_session.run( None, {"input_values": input_values})[0].squeeze(0) return hidden_state return _featurize
def create_inference_session(onnx_model, options=None, verbose=False): """Return an ONNX inference session Args: onnx_model (str): The onnx str path of the model options: Option to use for Inference Session. Defaults to None. verbose (bool, optional): Defaults to False. Returns: Inference Session """ ort_session = ort.InferenceSession(onnx_model, sess_options=options) if verbose: print("ONNX runtime: ", ort.get_device()) return ort_session
def __init__(self, model_path='tiny-yolov3-11.onnx', class_path='coco_classes.txt'): self.name = 'tiny_yolov3' self.sess = rt.InferenceSession(model_path) self.inputs = self.sess.get_inputs() self.outputs = self.sess.get_outputs() self.classes = [line.rstrip('\n') for line in open('coco_classes.txt')] # print input/output details print("backend: {}".format(rt.get_device())) print("inputs:") for i, input in enumerate(self.inputs): print("{} - {}: {} - {}".format( i, input.name, input.shape, input.type)) print("outputs:") for i, output in enumerate(self.outputs): print("{} - {}: {} - {}".format( i, output.name, output.shape, output.type))
def __init__(self, onnx_file, class_names, device_id): super(ONNXRuntimeClassifier, self).__init__() sess = ort.InferenceSession(onnx_file) providers = ['CPUExecutionProvider'] options = [{}] is_cuda_available = ort.get_device() == 'GPU' if is_cuda_available: providers.insert(0, 'CUDAExecutionProvider') options.insert(0, {'device_id': device_id}) sess.set_providers(providers, options) self.sess = sess self.CLASSES = class_names self.device_id = device_id self.io_binding = sess.io_binding() self.output_names = [_.name for _ in sess.get_outputs()] self.is_cuda_available = is_cuda_available
def main(image_filename): # Load labels with open(LABELS_FILENAME, 'r') as f: labels = [l.strip() for l in f.readlines()] od_model = ONNXRuntimeObjectDetection(MODEL_FILENAME, labels) print('using', onnxruntime.get_device()) print('reading image') image = Image.open(image_filename) print('predict 10 times') pres = [] infs = [] posts = [] totals = [] N = 10 def _format(x): return int(x * 100) / 100 # first image's result isn't accurate predictions, pre, inf, post = od_model.predict_image(image) for i in range(N): predictions, pre, inf, post = od_model.predict_image(image) pre *= 1000 inf *= 1000 post *= 1000 pres.append(pre) infs.append(inf) posts.append(post) totals.append(pre + inf + post) print('predicting image ...') print(' pre_process :', _format(pre), 'ms') print(' inference :', _format(inf), 'ms') print(' post_process:', _format(post), 'ms') print(' all :', _format(pre + inf + post), 'ms') print('Average (', N, 'images )') print(' pre_process :', _format(sum(pres) / N), 'ms') print(' inference :', _format(sum(infs) / N), 'ms') print(' post_process:', _format(sum(posts) / N), 'ms') print(' all :', _format(sum(totals) / N), 'ms')
def __init__(self, onnx_file: str, cfg: Any, device_id: int, show_score: bool = False): SingleStageTextDetector.__init__(self, cfg.model.backbone, cfg.model.neck, cfg.model.bbox_head, cfg.model.train_cfg, cfg.model.test_cfg, cfg.model.pretrained) TextDetectorMixin.__init__(self, show_score) import onnxruntime as ort # get the custom op path ort_custom_op_path = '' try: from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() except (ImportError, ModuleNotFoundError): warnings.warn('If input model has custom op from mmcv, \ you may have to build mmcv with ONNXRuntime from source.') session_options = ort.SessionOptions() # register custom op for onnxruntime if osp.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) sess = ort.InferenceSession(onnx_file, session_options) providers = ['CPUExecutionProvider'] options = [{}] is_cuda_available = ort.get_device() == 'GPU' if is_cuda_available: providers.insert(0, 'CUDAExecutionProvider') options.insert(0, {'device_id': device_id}) sess.set_providers(providers, options) self.sess = sess self.device_id = device_id self.io_binding = sess.io_binding() self.output_names = [_.name for _ in sess.get_outputs()] for name in self.output_names: self.io_binding.bind_output(name) self.cfg = cfg
def __init__(self, model_path='updated_arcface.onnx', images_path='./dataset', pickle_path='./data.pickle'): self.name = 'FriendDetector' self.sess = rt.InferenceSession(model_path) self.inputs = self.sess.get_inputs() self.outputs = self.sess.get_outputs() # load dataset image_label_data = {} pickle_label_data = {} if envars.USE_IMAGE_DATASET(): print('loading from images in: {}'.format(images_path)) image_label_data = train.images_to_dict(images_path, self) if envars.USE_PICKLE_DATASET() and os.path.exists(pickle_path): print('loading from pickle: {}'.format(pickle_path)) pickle_label_data = train.load_dict_from_disk(pickle_path) combined_labels = train.merge_dicts(image_label_data, pickle_label_data) if envars.SAVE_DATASET_TO_PICKLE(): print('saving dataset to: {}'.format(pickle_path)) train.save_dict_to_disk(combined_labels, pickle_path) X, y = train.dict_to_compressed_mat(combined_labels) if not y: print('data not found. aborting') sys.exit(1) self.X = X self.labels = y print('found labels: {}'.format(y)) print('dataset shape: {}'.format(X.shape)) # print input/output details print("backend: {}".format(rt.get_device())) print("inputs:") for i, input in enumerate(self.inputs): print("{} - {}: {} - {}".format( i, input.name, input.shape, input.type)) print("outputs:") for i, output in enumerate(self.outputs): print("{} - {}: {} - {}".format( i, output.name, output.shape, output.type))
def prepare(cls, model, device=None, **kwargs): """ Load the model and creates a :class:`onnxruntime.InferenceSession` ready to be used as a backend. :param model: ModelProto (returned by `onnx.load`), string for a filename or bytes for a serialized model :param device: requested device for the computation, None means the default one which depends on the compilation settings :param kwargs: see :class:`onnxruntime.SessionOptions` :return: :class:`onnxruntime.InferenceSession` """ if isinstance(model, OnnxRuntimeBackendRep): return model elif isinstance(model, InferenceSession): return OnnxRuntimeBackendRep(model) elif isinstance(model, (str, bytes)): options = SessionOptions() for k, v in kwargs.items(): if hasattr(options, k): setattr(options, k, v) inf = InferenceSession(model, options) # backend API is primarily used for ONNX test/validation. As such, we should disable session.run() fallback # which may hide test failures. inf.disable_fallback() if device is not None and not cls.supports_device(device): raise RuntimeError( "Incompatible device expected '{0}', got '{1}'".format( device, get_device())) return cls.prepare(inf, device, **kwargs) else: # type: ModelProto check_model(model) opset_supported, error_message = cls.is_opset_supported(model) if not opset_supported: raise unittest.SkipTest(error_message) bin = model.SerializeToString() return cls.prepare(bin, device, **kwargs)
def evaluate_onnx(model, encoder, size, onnx_file): from export_model import export_onnx import onnxruntime as ort export_onnx(model, size, onnx_file) print("Device: " + ort.get_device()) ort_session = ort.InferenceSession(onnx_file) def f(image, nms_params=detection_table.nms_defaults): assert image.dim( ) == 3, "evaluate_onnx: expected image of 3d [H,W,C], got: " + str( image.shape) image_size = (image.shape[1], image.shape[0]) image = image.unsqueeze(0) prediction = ort_session.run(None, {'input': image.numpy()}) prediction = map_tensors(prediction, torch.squeeze, 0) return encoder.decode(image_size, prediction, nms_params=nms_params) return f
def __init__(self, onnx_file, cfg, device_id): super(ONNXRuntimeEditing, self).__init__() ort_custom_op_path = '' try: from mmcv.ops import get_onnxruntime_op_path ort_custom_op_path = get_onnxruntime_op_path() except (ImportError, ModuleNotFoundError): warnings.warn('If input model has custom op from mmcv, \ you may have to build mmcv with ONNXRuntime from source.') session_options = ort.SessionOptions() # register custom op for onnxruntime if osp.exists(ort_custom_op_path): session_options.register_custom_ops_library(ort_custom_op_path) sess = ort.InferenceSession(onnx_file, session_options) providers = ['CPUExecutionProvider'] options = [{}] is_cuda_available = ort.get_device() == 'GPU' if is_cuda_available: providers.insert(0, 'CUDAExecutionProvider') options.insert(0, {'device_id': device_id}) sess.set_providers(providers, options) self.sess = sess self.device_id = device_id self.io_binding = sess.io_binding() self.output_names = [_.name for _ in sess.get_outputs()] base_model = build_model(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) if isinstance(base_model, BaseMattor): WraperClass = ONNXRuntimeMattor elif isinstance(base_model, BasicRestorer): WraperClass = ONNXRuntimeRestorer self.wraper = WraperClass(self.sess, self.io_binding, self.output_names, base_model)