def load_model(self, core, model_xml, device, model_type, num_reqs=1, cpu_extension=''): """Loads a model in the Inference Engine format""" # Plugin initialization for specified device and load extensions library if specified if cpu_extension and 'CPU' in device: core.add_extension(cpu_extension, 'CPU') # Read IR log.info('Reading {} model {}'.format(model_type, model_xml)) self.model = core.read_model(model_xml) if len(self.model.inputs) not in self.get_allowed_inputs_len(): raise RuntimeError( "Supports topologies with only {} inputs, but got {}".format( self.get_allowed_inputs_len(), len(self.model.inputs))) if len(self.model.outputs) not in self.get_allowed_outputs_len(): raise RuntimeError( "Supports topologies with only {} outputs, but got {}".format( self.get_allowed_outputs_len(), len(self.model.outputs))) self.input_tensor_name = self.model.inputs[0].get_any_name() self.output_tensor_name = self.model.outputs[0].get_any_name() # Loading model to the plugin compiled_model = core.compile_model(self.model, device) self.infer_queue = AsyncInferQueue(compiled_model, num_reqs) self.infer_queue.set_callback(self.completion_callback) log.info('The {} model {} is loaded to {}'.format( model_type, model_xml, device))
class IEModel: def __init__(self, model_path, core, target_device, num_requests, model_type): log.info('Reading {} model {}'.format(model_type, model_path)) self.model = core.read_model(model_path) if len(self.model.inputs) != 1: log.error("Demo supports only models with 1 input") sys.exit(1) if len(self.model.outputs) != 1: log.error("Demo supports only models with 1 output") sys.exit(1) self.outputs = {} compiled_model = core.compile_model(self.model, target_device) self.output_tensor = compiled_model.outputs[0] self.input_name = self.model.inputs[0].get_any_name() self.input_shape = self.model.inputs[0].shape self.num_requests = num_requests self.infer_queue = AsyncInferQueue(compiled_model, num_requests) self.infer_queue.set_callback(self.completion_callback) log.info('The {} model {} is loaded to {}'.format(model_type, model_path, target_device)) def completion_callback(self, infer_request, id): self.outputs[id] = infer_request.results[self.output_tensor] def async_infer(self, frame, req_id): input_data = {self.input_name: frame} self.infer_queue.start_async(input_data, req_id) def wait_request(self, req_id): self.infer_queue[req_id].wait() return self.outputs.pop(req_id, None)
def __init__(self, model_path, device, core, num_requests, model_type, output_shape=None): log.info('Reading {} model {}'.format(model_type, model_path)) self.model = core.read_model(model_path) if len(self.model.inputs) != 1: raise RuntimeError("The {} wrapper supports only models with 1 input layer".format(model_type)) self.outputs = {} compiled_model = core.compile_model(self.model, device) self.infer_queue = AsyncInferQueue(compiled_model, num_requests) self.infer_queue.set_callback(self.completion_callback) log.info('The {} model {} is loaded to {}'.format(model_type, model_path, device)) self.input_tensor_name = self.model.inputs[0].get_any_name() if len(self.model.outputs) > 1: if output_shape is not None: candidates = [] for output_tensor in self.model.outputs: if len(output_tensor.partial_shape) != len(output_shape): continue if output_tensor.partial_shape[1] == output_shape[1]: candidates.append(output_tensor.get_any_name()) if len(candidates) != 1: raise RuntimeError("One output is expected") self.output_tensor_name = candidates[0] else: raise RuntimeError("One output is expected") else: self.output_tensor_name = self.model.outputs[0].get_any_name() self.input_size = self.model.input(self.input_tensor_name).shape
def deploy(self, device, max_requests=1): self.max_requests = max_requests compiled_model = self.core.compile_model(self.model, device) self.output_tensor = compiled_model.outputs[0] self.infer_queue = AsyncInferQueue(compiled_model, self.max_requests) self.infer_queue.set_callback(self.completion_callback) log.info('The {} model {} is loaded to {}'.format( self.model_type, self.model_path, device))
def deploy(self, device, plugin_config, max_requests=1): self.max_requests = max_requests compiled_model = self.core.compile_model(self.model, device, config=plugin_config) self.infer_queue = AsyncInferQueue(compiled_model, self.max_requests) self.infer_queue.set_callback(self.completion_callback) log.info('The {} model {} is loaded to {}'.format( self.model_type, self.model_path, device))
def load_model(self): self.compiled_model = self.core.compile_model(self.model, self.device, self.plugin_config) self.async_queue = AsyncInferQueue(self.compiled_model, self.max_num_requests) if self.max_num_requests == 0: # +1 to use it as a buffer of the pipeline self.async_queue = AsyncInferQueue(self.compiled_model, len(self.async_queue) + 1) log.info('The model {} is loaded to {}'.format("from buffer" if self.model_from_buffer else self.model_path, self.device)) self.log_runtime_settings()
def test_results_async_infer(device): jobs = 8 num_request = 4 core = Core() func = core.read_model(test_net_xml, test_net_bin) exec_net = core.compile_model(func, device) infer_queue = AsyncInferQueue(exec_net, num_request) jobs_done = [{"finished": False, "latency": 0} for _ in range(jobs)] def callback(request, job_id): jobs_done[job_id]["finished"] = True jobs_done[job_id]["latency"] = request.latency img = read_image() infer_queue.set_callback(callback) assert infer_queue.is_ready for i in range(jobs): infer_queue.start_async({"data": img}, i) infer_queue.wait_all() request = exec_net.create_infer_request() outputs = request.infer({0: img}) for i in range(num_request): np.allclose(list(outputs.values()), list(infer_queue[i].results.values()))
def test_infer_queue_is_ready(device): core = Core() param = ops.parameter([10]) model = Model(ops.relu(param), [param]) compiled = core.compile_model(model, device) infer_queue = AsyncInferQueue(compiled, 1) def callback(request, _): time.sleep(0.001) infer_queue.set_callback(callback) assert infer_queue.is_ready() infer_queue.start_async() assert not infer_queue.is_ready() infer_queue.wait_all()
def create_infer_requests(self, exe_network): if self.api_type == 'sync': requests = [exe_network.create_infer_request()] else: requests = AsyncInferQueue(exe_network, self.nireq) self.nireq = len(requests) return requests
def create_infer_requests(self, compiled_model): if self.api_type == 'sync': requests = [compiled_model.create_infer_request()] else: requests = AsyncInferQueue(compiled_model, self.nireq) self.nireq = len(requests) return requests
def _process_dataset_async(self, stats_layout, sampler, print_progress=False, need_metrics_per_sample=False, requests_num=0): """Performs model inference on specified dataset subset asynchronously :param stats_layout: dict of stats collection functions {node_name: [fn]}(optional) :param sampler: sampling dataset to make inference :param print_progress: whether to print inference progress :param need_metrics_per_sample: whether to collect metrics for each batch :param requests_num: number of infer requests """ def completion_callback(request, user_data): start_time, batch_id = user_data predictions = request.results self._process_infer_output(stats_layout, predictions, batch_annotations, batch_meta, need_metrics_per_sample) # Print progress if self._print_inference_progress(progress_log_fn, batch_id, len(sampler), start_time, time()): start_time = time() progress_log_fn = logger.info if print_progress else logger.debug self._ie.set_config( { 'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO', 'CPU_BIND_THREAD': 'YES' }, self._device) # Load model to the plugin compiled_model = self._ie.compile_model(model=self._model, device_name=self._device) optimal_requests_num = compiled_model.get_metric( 'OPTIMAL_NUMBER_OF_INFER_REQUESTS') requests_num = optimal_requests_num if requests_num == 0 else requests_num logger.debug('Async mode requests number: %d', requests_num) infer_queue = AsyncInferQueue(compiled_model, requests_num) progress_log_fn('Start inference of %d images', len(sampler)) sampler_iter = iter(enumerate(sampler)) # Start inference start_time = time() infer_queue.set_callback(completion_callback) for batch_id, data_batch in sampler_iter: batch_annotations, image_batch, batch_meta = self._process_batch( data_batch) infer_queue.start_async( self._fill_input(compiled_model, image_batch), (start_time, batch_id)) infer_queue.wait_all() progress_log_fn('Inference finished')
def get_infer_queue(self, log=True): if self.config.get('num_requests', 'AUTO') == 'AUTO': num_requests = 0 else: num_requests = self.num_requests queue = AsyncInferQueue(self.exec_network, num_requests) if log: print_info('Prepared async infer queue with {} requests'.format( len(queue))) return queue
def __init__(self, model_path, core, target_device, num_requests, model_type): log.info('Reading {} model {}'.format(model_type, model_path)) self.model = core.read_model(model_path) if len(self.model.inputs) != 1: log.error("Demo supports only models with 1 input") sys.exit(1) if len(self.model.outputs) != 1: log.error("Demo supports only models with 1 output") sys.exit(1) self.outputs = {} compiled_model = core.compile_model(self.model, target_device) self.output_tensor = compiled_model.outputs[0] self.input_name = self.model.inputs[0].get_any_name() self.input_shape = self.model.inputs[0].shape self.num_requests = num_requests self.infer_queue = AsyncInferQueue(compiled_model, num_requests) self.infer_queue.set_callback(self.completion_callback) log.info('The {} model {} is loaded to {}'.format(model_type, model_path, target_device))
def _init_model(self, inp_h, inp_w): # For better efficiency, model is initialized for batch_size 1 and every sample processed independently inp_shape = [1, inp_h, inp_w, 3] self.net.reshape({self.input_name: inp_shape}) # Load network to device if "CPU" in self.device: self.core.set_property( "CPU", { "CPU_THROUGHPUT_STREAMS": "CPU_THROUGHPUT_AUTO", "CPU_BIND_THREAD": "YES" }) if "GPU" in self.device: self.core.set_property( "GPU", {"GPU_THROUGHPUT_STREAMS": "GPU_THROUGHPUT_AUTO"}) compiled_model = self.core.compile_model(self.net, self.device) num_requests = compiled_model.get_property( "OPTIMAL_NUMBER_OF_INFER_REQUESTS") print(f"OpenVINO uses {num_requests} inference requests") self.infer_queue = AsyncInferQueue(compiled_model, num_requests)
class IEModel: # pylint: disable=too-few-public-methods """ Class that allows worknig with OpenVINO Runtime model. """ def __init__(self, model_path, device, core, num_requests, model_type, output_shape=None): log.info('Reading {} model {}'.format(model_type, model_path)) self.model = core.read_model(model_path) if len(self.model.inputs) != 1: raise RuntimeError("The {} wrapper supports only models with 1 input layer".format(model_type)) self.outputs = {} compiled_model = core.compile_model(self.model, device) self.infer_queue = AsyncInferQueue(compiled_model, num_requests) self.infer_queue.set_callback(self.completion_callback) log.info('The {} model {} is loaded to {}'.format(model_type, model_path, device)) self.input_tensor_name = self.model.inputs[0].get_any_name() if len(self.model.outputs) > 1: if output_shape is not None: candidates = [] for output_tensor in self.model.outputs: if len(output_tensor.partial_shape) != len(output_shape): continue if output_tensor.partial_shape[1] == output_shape[1]: candidates.append(output_tensor.get_any_name()) if len(candidates) != 1: raise RuntimeError("One output is expected") self.output_tensor_name = candidates[0] else: raise RuntimeError("One output is expected") else: self.output_tensor_name = self.model.outputs[0].get_any_name() self.input_size = self.model.input(self.input_tensor_name).shape def completion_callback(self, infer_request, id): self.outputs[id] = infer_request.get_tensor(self.output_tensor_name).data[:] def infer(self, data): """Runs model on the specified input""" self.async_infer(data, 0) return self.wait_request(0) def async_infer(self, data, req_id): """Requests model inference for the specified input""" input_data = {self.input_tensor_name: data} self.infer_queue.start_async(input_data, req_id) def wait_request(self, req_id): """Waits for the model output by the specified request ID""" self.infer_queue.wait_all() try: return self.outputs.pop(req_id) except KeyError: return None
def infer_async(compiled_model, number_iter, num_request, get_slice): result = None infer_queue = AsyncInferQueue(compiled_model, num_request) iteration = 0 inference_time = time() while iteration < max(number_iter, num_request): idle_id = infer_queue.get_idle_request_id() if idle_id < 0: infer_queue.wait(num_requests=1) idle_id = infer_queue.get_idle_request_id() utils.set_input_to_blobs(infer_queue[idle_id], get_slice(iteration)) infer_queue.start_async() iteration += 1 infer_queue.wait_all() inference_time = time() - inference_time if number_iter == 1: request_results = [utils.get_request_result(request) for request in infer_queue] output_names = request_results[0].keys() result = dict.fromkeys(output_names, None) for key in result: result[key] = np.concatenate([result[key] for result in request_results], axis=0) return result, inference_time
def load_model(self, core, model_path, device, model_type, num_reqs=1): """Loads a model in the Inference Engine format""" log.info('Reading {} model {}'.format(model_type, model_path)) self.model = core.read_model(model_path) if len(self.model.inputs) not in self.get_allowed_inputs_len(): raise RuntimeError( "Supports topologies with only {} inputs, but got {}".format( self.get_allowed_inputs_len(), len(self.model.inputs))) if len(self.model.outputs) not in self.get_allowed_outputs_len(): raise RuntimeError( "Supports topologies with only {} outputs, but got {}".format( self.get_allowed_outputs_len(), len(self.model.outputs))) self.input_tensor_name = self.model.inputs[0].get_any_name() self.output_tensor_name = self.model.outputs[0].get_any_name() # Loading model to the plugin compiled_model = core.compile_model(self.model, device) self.infer_queue = AsyncInferQueue(compiled_model, num_reqs) self.infer_queue.set_callback(self.completion_callback) log.info('The {} model {} is loaded to {}'.format( model_type, model_path, device))
def test_infer_queue_fail_on_py_model(device): jobs = 1 num_request = 1 core = Core() model = core.read_model(test_net_xml, test_net_bin) compiled = core.compile_model(model, device) infer_queue = AsyncInferQueue(compiled, num_request) def callback(request, _): request = request + 21 img = read_image() infer_queue.set_callback(callback) with pytest.raises(TypeError) as e: for _ in range(jobs): infer_queue.start_async({"data": img}) infer_queue.wait_all() assert "unsupported operand type(s) for +" in str(e.value)
def test_infer_queue_fail_on_cpp_model(device): jobs = 6 num_request = 4 core = Core() model = core.read_model(test_net_xml, test_net_bin) compiled = core.compile_model(model, device) infer_queue = AsyncInferQueue(compiled, num_request) def callback(request, _): request.get_tensor("Unknown") img = read_image() infer_queue.set_callback(callback) with pytest.raises(RuntimeError) as e: for _ in range(jobs): infer_queue.start_async({"data": img}) infer_queue.wait_all() assert "Port for tensor name Unknown was not found" in str(e.value)
def test_infer_queue(device): jobs = 8 num_request = 4 core = Core() model = core.read_model(test_net_xml, test_net_bin) compiled = core.compile_model(model, device) infer_queue = AsyncInferQueue(compiled, num_request) jobs_done = [{"finished": False, "latency": 0} for _ in range(jobs)] def callback(request, job_id): jobs_done[job_id]["finished"] = True jobs_done[job_id]["latency"] = request.latency img = read_image() infer_queue.set_callback(callback) for i in range(jobs): infer_queue.start_async({"data": img}, i) infer_queue.wait_all() assert all(job["finished"] for job in jobs_done) assert all(job["latency"] > 0 for job in jobs_done)
class Module: def __init__(self, core, model_path, model_type): self.core = core self.model_type = model_type log.info('Reading {} model {}'.format(model_type, model_path)) self.model = core.read_model(model_path) self.model_path = model_path self.active_requests = 0 self.clear() def deploy(self, device, plugin_config, max_requests=1): self.max_requests = max_requests compiled_model = self.core.compile_model(self.model, device, config=plugin_config) self.infer_queue = AsyncInferQueue(compiled_model, self.max_requests) self.infer_queue.set_callback(self.completion_callback) log.info('The {} model {} is loaded to {}'.format( self.model_type, self.model_path, device)) def completion_callback(self, infer_request, id): self.outputs[id] = next(iter(infer_request.results.values())) def enqueue(self, input): self.clear() if self.max_requests <= self.active_requests: log.warning('Processing request rejected - too many requests') return False self.infer_queue.start_async(input, self.active_requests) self.active_requests += 1 return True def wait(self): if self.active_requests <= 0: return self.infer_queue.wait_all() self.active_requests = 0 def get_outputs(self): self.wait() return [v for _, v in sorted(self.outputs.items())] def clear(self): self.outputs = {} def infer(self, inputs): self.clear() self.start_async(*inputs) return self.postprocess()
gil_released = True thread = Thread(target=detect_gil) thread.start() func(*args) if not gil_released: pytest.xfail(reason="Depend on condition race") thread.join() device = os.environ.get("TEST_DEVICE") if os.environ.get("TEST_DEVICE") else "CPU" core = Core() core.set_property({"PERF_COUNT": "YES"}) param = ops.parameter([224, 224]) model = Model(ops.relu(param), [param]) compiled = core.compile_model(model, device) infer_queue = AsyncInferQueue(compiled, 1) user_stream = io.BytesIO() # AsyncInferQueue def test_gil_released_async_infer_queue_start_async(): infer_queue.start_async() check_gil_released_safe(infer_queue.start_async) def test_gil_released_async_infer_queue_is_ready(): infer_queue.start_async() check_gil_released_safe(infer_queue.is_ready)
def test_infer_queue_get_idle_handle(device): param = ops.parameter([10]) model = Model(ops.relu(param), [param]) core = Core() compiled = core.compile_model(model, device) queue = AsyncInferQueue(compiled, 2) niter = 10 for _ in range(len(queue)): queue.start_async() queue.wait_all() for request in queue: assert request.wait_for(0) for _ in range(niter): idle_id = queue.get_idle_request_id() assert queue[idle_id].wait_for(0) queue.start_async() queue.wait_all()
class IEModel: """Class for inference of models in the Inference Engine format""" def __init__(self, core, model_path, device, model_type, num_reqs=1, cpu_extension=''): self.load_model(core, model_path, device, model_type, num_reqs, cpu_extension) self.outputs = {} def _preprocess(self, img): _, _, h, w = self.get_input_shape() img = np.expand_dims(cv2.resize(img, (w, h)).transpose(2, 0, 1), axis=0) return img def completion_callback(self, infer_request, id): self.outputs[id] = infer_request.get_tensor( self.output_tensor_name).data[:] def forward(self, img): """Performs forward pass of the wrapped IE model""" self.forward_async(img, 0) self.infer_queue.wait_all() return self.outputs.pop(0) def forward_async(self, img, req_id): input_data = {self.input_tensor_name: self._preprocess(img)} self.infer_queue.start_async(input_data, req_id) def grab_all_async(self): self.infer_queue.wait_all() return [self.outputs.pop(i) for i in range(len(self.outputs))] def get_allowed_inputs_len(self): return (1, 2) def get_allowed_outputs_len(self): return (1, 2, 3, 4, 5) def get_input_shape(self): """Returns an input shape of the wrapped IE model""" return self.model.inputs[0].shape def load_model(self, core, model_xml, device, model_type, num_reqs=1, cpu_extension=''): """Loads a model in the Inference Engine format""" # Plugin initialization for specified device and load extensions library if specified if cpu_extension and 'CPU' in device: core.add_extension(cpu_extension, 'CPU') # Read IR log.info('Reading {} model {}'.format(model_type, model_xml)) self.model = core.read_model(model_xml) if len(self.model.inputs) not in self.get_allowed_inputs_len(): raise RuntimeError( "Supports topologies with only {} inputs, but got {}".format( self.get_allowed_inputs_len(), len(self.model.inputs))) if len(self.model.outputs) not in self.get_allowed_outputs_len(): raise RuntimeError( "Supports topologies with only {} outputs, but got {}".format( self.get_allowed_outputs_len(), len(self.model.outputs))) self.input_tensor_name = self.model.inputs[0].get_any_name() self.output_tensor_name = self.model.outputs[0].get_any_name() # Loading model to the plugin compiled_model = core.compile_model(self.model, device) self.infer_queue = AsyncInferQueue(compiled_model, num_reqs) self.infer_queue.set_callback(self.completion_callback) log.info('The {} model {} is loaded to {}'.format( model_type, model_xml, device))
class OpenVINOSession: def __init__(self, cfg, device): self.core = Core() self.xml_path = cfg["init_weights"] + ".xml" self.device = device # Convert a frozen graph to OpenVINO IR format if not os.path.exists(self.xml_path): subprocess.run( [ "mo", "--output_dir", os.path.dirname(cfg["init_weights"]), "--input_model", cfg["init_weights"] + ".pb", "--input_shape", "[1, 747, 832, 3]", "--extensions", os.path.join(os.path.dirname(__file__), "mo_extensions"), "--data_type", "FP16", ], check=True, ) # Read network into memory self.net = self.core.read_model(self.xml_path) self.input_name = self.net.inputs[0].get_any_name() self.output_name = self.net.outputs[0].get_any_name() self.infer_queue = None def _init_model(self, inp_h, inp_w): # For better efficiency, model is initialized for batch_size 1 and every sample processed independently inp_shape = [1, inp_h, inp_w, 3] self.net.reshape({self.input_name: inp_shape}) # Load network to device if "CPU" in self.device: self.core.set_property( "CPU", { "CPU_THROUGHPUT_STREAMS": "CPU_THROUGHPUT_AUTO", "CPU_BIND_THREAD": "YES", }, ) if "GPU" in self.device: self.core.set_property( "GPU", {"GPU_THROUGHPUT_STREAMS": "GPU_THROUGHPUT_AUTO"}) compiled_model = self.core.compile_model(self.net, self.device) num_requests = compiled_model.get_property( "OPTIMAL_NUMBER_OF_INFER_REQUESTS") print(f"OpenVINO uses {num_requests} inference requests") self.infer_queue = AsyncInferQueue(compiled_model, num_requests) def run(self, out_name, feed_dict): inp_name, inp = next(iter(feed_dict.items())) if self.infer_queue is None: self._init_model(inp.shape[1], inp.shape[2]) batch_size = inp.shape[0] batch_output = np.zeros([batch_size] + self.net.outputs[out_name].shape, dtype=np.float32) def completion_callback(request, inp_id): output = next(iter(request.results.values())) batch_output[out_id] = output self.infer_queue.set_callback(completion_callback) for inp_id in range(batch_size): self.infer_queue.start_async({inp_name: inp[inp_id:inp_id + 1]}, inp_id) self.infer_queue.wait_all() return batch_output.reshape(-1, 3)
def main() -> int: log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout) args = parse_args() # --------------------------- Step 1. Initialize OpenVINO Runtime Core ------------------------------------------------ log.info('Creating OpenVINO Runtime Core') core = Core() # --------------------------- Step 2. Read a model -------------------------------------------------------------------- log.info(f'Reading the model: {args.model}') # (.xml and .bin files) or (.onnx file) model = core.read_model(args.model) if len(model.inputs) != 1: log.error('Sample supports only single input topologies') return -1 if len(model.outputs) != 1: log.error('Sample supports only single output topologies') return -1 # --------------------------- Step 3. Set up input -------------------------------------------------------------------- # Read input images images = [cv2.imread(image_path) for image_path in args.input] # Resize images to model input dims _, _, h, w = model.input().shape resized_images = [cv2.resize(image, (w, h)) for image in images] # Add N dimension input_tensors = [np.expand_dims(image, 0) for image in resized_images] # --------------------------- Step 4. Apply preprocessing ------------------------------------------------------------- ppp = PrePostProcessor(model) # 1) Set input tensor information: # - input() provides information about a single model input # - precision of tensor is supposed to be 'u8' # - layout of data is 'NHWC' ppp.input().tensor() \ .set_element_type(Type.u8) \ .set_layout(Layout('NHWC')) # noqa: N400 # 2) Here we suppose model has 'NCHW' layout for input ppp.input().model().set_layout(Layout('NCHW')) # 3) Set output tensor information: # - precision of tensor is supposed to be 'f32' ppp.output().tensor().set_element_type(Type.f32) # 4) Apply preprocessing modifing the original 'model' model = ppp.build() # --------------------------- Step 5. Loading model to the device ----------------------------------------------------- log.info('Loading the model to the plugin') compiled_model = core.compile_model(model, args.device) # --------------------------- Step 6. Create infer request queue ------------------------------------------------------ log.info('Starting inference in asynchronous mode') infer_queue = AsyncInferQueue(compiled_model, len(input_tensors)) infer_queue.set_callback(completion_callback) # --------------------------- Step 7. Do inference -------------------------------------------------------------------- for i, input_tensor in enumerate(input_tensors): infer_queue.start_async({0: input_tensor}, args.input[i]) infer_queue.wait_all() # ---------------------------------------------------------------------------------------------------------------------- log.info( 'This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool\n' ) return 0
class OpenvinoAdapter(ModelAdapter): """ Works with OpenVINO model """ def __init__(self, core, model_path, weights_path=None, model_parameters={}, device='CPU', plugin_config=None, max_num_requests=0): self.core = core self.model_path = model_path self.device = device self.plugin_config = plugin_config self.max_num_requests = max_num_requests self.model_parameters = model_parameters self.model_parameters['input_layouts'] = Layout.parse_layouts( self.model_parameters.get('input_layouts', None)) if isinstance(model_path, (str, Path)): if Path(model_path).suffix == ".onnx" and weights_path: log.warning( 'For model in ONNX format should set only "model_path" parameter.' 'The "weights_path" will be omitted') self.model_from_buffer = isinstance(model_path, bytes) and isinstance( weights_path, bytes) log.info('Reading model {}'.format( 'from buffer' if self.model_from_buffer else model_path)) weights = weights_path if self.model_from_buffer else '' self.model = core.read_model(model_path, weights) def load_model(self): self.compiled_model = self.core.compile_model(self.model, self.device, self.plugin_config) self.async_queue = AsyncInferQueue(self.compiled_model, self.max_num_requests) if self.max_num_requests == 0: # +1 to use it as a buffer of the pipeline self.async_queue = AsyncInferQueue(self.compiled_model, len(self.async_queue) + 1) log.info('The model {} is loaded to {}'.format( "from buffer" if self.model_from_buffer else self.model_path, self.device)) self.log_runtime_settings() def log_runtime_settings(self): devices = set(parse_devices(self.device)) if 'AUTO' not in devices: for device in devices: try: nstreams = self.compiled_model.get_property( device + '_THROUGHPUT_STREAMS') log.info('\tDevice: {}'.format(device)) log.info('\t\tNumber of streams: {}'.format(nstreams)) if device == 'CPU': nthreads = self.compiled_model.get_property( 'CPU_THREADS_NUM') log.info('\t\tNumber of threads: {}'.format( nthreads if int(nthreads) else 'AUTO')) except RuntimeError: pass log.info('\tNumber of model infer requests: {}'.format( len(self.async_queue))) def get_input_layers(self): inputs = {} for input in self.model.inputs: input_layout = self.get_layout_for_input(input) inputs[input.get_any_name()] = Metadata( input.get_names(), list(input.shape), input_layout, input.get_element_type().get_type_name()) inputs = self._get_meta_from_ngraph(inputs) return inputs def get_layout_for_input(self, input) -> str: input_layout = '' if self.model_parameters['input_layouts']: input_layout = Layout.from_user_layouts( input.get_names(), self.model_parameters['input_layouts']) if not input_layout: if not layout_helpers.get_layout(input).empty: input_layout = Layout.from_openvino(input) else: input_layout = Layout.from_shape(input.shape) return input_layout def get_output_layers(self): outputs = {} for output in self.model.outputs: output_shape = output.partial_shape.get_min_shape( ) if self.model.is_dynamic() else output.shape outputs[output.get_any_name()] = Metadata( output.get_names(), list(output_shape), precision=output.get_element_type().get_type_name()) outputs = self._get_meta_from_ngraph(outputs) return outputs def reshape_model(self, new_shape): new_shape = {k: PartialShape(v) for k, v in new_shape.items()} self.model.reshape(new_shape) def get_raw_result(self, request): raw_result = { key: request.get_tensor(key).data[:] for key in self.get_output_layers().keys() } return raw_result def infer_sync(self, dict_data): self.infer_request = self.async_queue[ self.async_queue.get_idle_request_id()] self.infer_request.infer(dict_data) return self.get_raw_result(self.infer_request) def infer_async(self, dict_data, callback_data) -> None: self.async_queue.start_async(dict_data, (self.get_raw_result, callback_data)) def set_callback(self, callback_fn): self.async_queue.set_callback(callback_fn) def is_ready(self) -> bool: return self.async_queue.is_ready() def await_all(self) -> None: self.async_queue.wait_all() def await_any(self) -> None: self.async_queue.get_idle_request_id() def _get_meta_from_ngraph(self, layers_info): for node in self.model.get_ordered_ops(): layer_name = node.get_friendly_name() if layer_name not in layers_info.keys(): continue layers_info[layer_name].meta = node.get_attributes() layers_info[layer_name].type = node.get_type_name() return layers_info def operations_by_type(self, operation_type): layers_info = {} for node in self.model.get_ordered_ops(): if node.get_type_name() == operation_type: layer_name = node.get_friendly_name() layers_info[layer_name] = Metadata(type=node.get_type_name(), meta=node.get_attributes()) return layers_info