def __init__(self, args): self.gpu_ext = args.gpu_lib self.allow_grow = args.allow_grow and not args.no_show log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() if args.cpu_lib and 'CPU' in {args.d_fd, args.d_lm, args.d_reid}: core.add_extension(args.cpu_lib, 'CPU') self.face_detector = FaceDetector(core, args.m_fd, args.fd_input_size, confidence_threshold=args.t_fd, roi_scale_factor=args.exp_r_fd) self.landmarks_detector = LandmarksDetector(core, args.m_lm) self.face_identifier = FaceIdentifier(core, args.m_reid, match_threshold=args.t_id, match_algo=args.match_algo) self.face_detector.deploy(args.d_fd, self.get_config(args.d_fd)) self.landmarks_detector.deploy(args.d_lm, self.get_config(args.d_lm), self.QUEUE_SIZE) self.face_identifier.deploy(args.d_reid, self.get_config(args.d_reid), self.QUEUE_SIZE) log.debug('Building faces database using images from {}'.format( args.fg)) self.faces_database = FacesDatabase( args.fg, self.face_identifier, self.landmarks_detector, self.face_detector if args.run_detector else None, args.no_show) self.face_identifier.set_faces_database(self.faces_database) log.info('Database is built, registered {} identities'.format( len(self.faces_database)))
def load_core(device, cpu_extension=None): log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() if device == "CPU" and cpu_extension: core.add_extension(cpu_extension, "CPU") return core
def test_add_extension(device): model = bytes(b"""<net name="Network" version="10"> <layers> <layer name="in1" type="Parameter" id="0" version="opset1"> <data element_type="f32" shape="2,2,2,1"/> <output> <port id="0" precision="FP32"> <dim>2</dim> <dim>2</dim> <dim>2</dim> <dim>1</dim> </port> </output> </layer> <layer name="operation" id="1" type="Template" version="custom_opset"> <data add="11"/> <input> <port id="1" precision="FP32"> <dim>2</dim> <dim>2</dim> <dim>2</dim> <dim>1</dim> </port> </input> <output> <port id="2" precision="FP32"> <dim>2</dim> <dim>2</dim> <dim>2</dim> <dim>1</dim> </port> </output> </layer> <layer name="output" type="Result" id="2" version="opset1"> <input> <port id="0" precision="FP32"> <dim>2</dim> <dim>2</dim> <dim>2</dim> <dim>1</dim> </port> </input> </layer> </layers> <edges> <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/> <edge from-layer="1" from-port="2" to-layer="2" to-port="0"/> </edges> </net>""") core = Core() if platform == "win32": core.add_extension(library_path="template_extension.dll") else: core.add_extension(library_path="libtemplate_extension.so") func = core.read_model(model=model, init_from_buffer=True) assert isinstance(func, Model)
def test_add_extension(): class EmptyExtension(Extension): def __init__(self) -> None: super().__init__() core = Core() core.add_extension(EmptyExtension()) core.add_extension([EmptyExtension(), EmptyExtension()]) model = core.read_model(model=test_net_xml, weights=test_net_bin) assert isinstance(model, Model)
def get_model_with_template_extension(): core = Core() ir = bytes(b"""<net name="Activation" version="10"> <layers> <layer name="in1" type="Parameter" id="0" version="opset1"> <data shape="1,3,22,22" element_type="f32"/> <output> <port id="0" precision="FP32" names="in_data"> <dim>1</dim> <dim>3</dim> <dim>22</dim> <dim>22</dim> </port> </output> </layer> <layer name="activation" id="1" type="Identity" version="extension"> <input> <port id="1" precision="FP32"> <dim>1</dim> <dim>3</dim> <dim>22</dim> <dim>22</dim> </port> </input> <output> <port id="2" precision="FP32" names="out_data"> <dim>1</dim> <dim>3</dim> <dim>22</dim> <dim>22</dim> </port> </output> </layer> <layer name="output" type="Result" id="2" version="opset1"> <input> <port id="0" precision="FP32"> <dim>1</dim> <dim>3</dim> <dim>22</dim> <dim>22</dim> </port> </input> </layer> </layers> <edges> <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/> <edge from-layer="1" from-port="2" to-layer="2" to-port="0"/> </edges> </net>""") if platform == "win32": core.add_extension(library_path="openvino_template_extension.dll") else: core.add_extension(library_path="libopenvino_template_extension.so") return core, core.read_model(ir)
def __init__(self, model_path, device, cpu_extension): log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() if cpu_extension and device == 'CPU': core.add_extension(cpu_extension, 'CPU') log.info('Reading model {}'.format(model_path)) self.model = core.read_model(model_path) self.input_tensor_name = "Placeholder" compiled_model = core.compile_model(self.model, device) self.infer_request = compiled_model.create_infer_request() log.info('The model {} is loaded to {}'.format(model_path, device))
def test_op_extension_via_frontend_extension(): skip_if_onnx_frontend_is_disabled() # use specific (openvino.frontend) import here from openvino.frontend import OpExtension from openvino.runtime import Core ie = Core() ie.add_extension(OpExtension("FW_OV_OP")) ie.add_extension(OpExtension("OV_OP", "FW_OP_1")) ie.add_extension( OpExtension("OV_OP", "FW_OP_2", { "ov_attribute_1": "fw_attribute_1", "ov_attribute_2": "fw_attribute_2" })) ie.add_extension( OpExtension("OV_OP", "FW_OP_3", { "ov_attribute_1": "fw_attribute_1", "ov_attribute_2": "fw_attribute_2" }, { "ov_attribute_str": "string", "ov_attribute_int": 4, "ov_attribute_bool": True, "ov_attribute_float": 4., "ov_attribute_vec_string": ["str1", "str2", "str3"], "ov_attribute_vec_int": [1, 2, 3, 4, 5, 6, 7], "ov_attribute_vec_bool": [True, False, True], "ov_attribute_vec_float": [1., 2., 3., 4., 5., 6., 7.] })) model = ie.read_model(onnx_model_filename) assert model
def __init__(self, model_path, device, cpu_extension): log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() if cpu_extension and device == 'CPU': core.add_extension(cpu_extension, 'CPU') log.info('Reading model {}'.format(model_path)) self.model = core.read_model(model_path) self.input_tensor_name = self.model.inputs[0].get_any_name() self.input_size = self.model.input(self.input_tensor_name).shape self.nchw_layout = self.input_size[1] == 3 compiled_model = core.compile_model(self.model, device) self.infer_request = compiled_model.create_infer_request() log.info('The model {} is loaded to {}'.format(model_path, device))
def test_op_extension_specify_wrong_opset(opset_prefix): skip_if_onnx_frontend_is_disabled() # use specific (openvino.frontend.onnx) import here from openvino.frontend.onnx import OpExtension from openvino.runtime import Core ie = Core() # add extensions fw_operation = "Floor" ov_operation = opset_prefix + fw_operation ie.add_extension(OpExtension(ov_operation, fw_operation)) with pytest.raises(Exception): ie.read_model(onnx_model_for_op_extension_test)
def test_op_extension_specify_opset(opset_prefix): skip_if_onnx_frontend_is_disabled() # use specific (openvino.frontend.onnx) import here from openvino.frontend.onnx import OpExtension from openvino.runtime import Core ie = Core() # check the model is valid model = ie.read_model(onnx_model_for_op_extension_test) assert model # add extensions fw_operation = "Floor" ov_operation = opset_prefix + fw_operation ie.add_extension(OpExtension(ov_operation, fw_operation)) model = ie.read_model(onnx_model_for_op_extension_test) assert model
def test_op_extension_via_frontend_extension_map_attributes(): skip_if_onnx_frontend_is_disabled() # use common (openvino.frontend) import here from openvino.frontend import OpExtension from openvino.runtime import Core ie = Core() # check the model is valid model = ie.read_model(onnx_model_for_op_extension_test) assert model # add extensions ie.add_extension(OpExtension("Elu", "Elu", {"alpha": "alpha"})) ie.add_extension(OpExtension("Concat", {"axis": "axis"}, {"axis": 0})) ie.add_extension( OpExtension("AvgPool", "AveragePool", { "kernel": "kernel_shape", "strides": "strides", "auto_pad": "auto_pad" }, { "pads_begin": [0, 0], "pads_end": [1, 1], "exclude-pad": True, "rounding_type": "floor" })) model = ie.read_model(onnx_model_for_op_extension_test) assert model
def test_add_extension_template_extension(device): ir = bytes(b"""<net name="Activation" version="10"> <layers> <layer name="in1" type="Parameter" id="0" version="opset1"> <data shape="1,3,22,22" element_type="f32"/> <output> <port id="0" precision="FP32" names="in_data"> <dim>1</dim> <dim>3</dim> <dim>22</dim> <dim>22</dim> </port> </output> </layer> <layer name="activation" id="1" type="Identity" version="extension"> <input> <port id="1" precision="FP32"> <dim>1</dim> <dim>3</dim> <dim>22</dim> <dim>22</dim> </port> </input> <output> <port id="2" precision="FP32" names="out_data"> <dim>1</dim> <dim>3</dim> <dim>22</dim> <dim>22</dim> </port> </output> </layer> <layer name="output" type="Result" id="2" version="opset1"> <input> <port id="0" precision="FP32"> <dim>1</dim> <dim>3</dim> <dim>22</dim> <dim>22</dim> </port> </input> </layer> </layers> <edges> <edge from-layer="0" from-port="0" to-layer="1" to-port="1"/> <edge from-layer="1" from-port="2" to-layer="2" to-port="0"/> </edges> </net>""") core = Core() if platform == "win32": core.add_extension(library_path="ov_template_extension.dll") else: core.add_extension(library_path="libov_template_extension.so") model = core.read_model(model=ir) assert isinstance(model, Model) before_reshape = PartialShape([1, 3, 22, 22]) after_reshape = PartialShape([8, 9, 33, 66]) new_shapes = {"in_data": after_reshape} assert model.input().partial_shape == before_reshape model.reshape(new_shapes) assert model.input().partial_shape == after_reshape # CVS-74584 del model
class Benchmark: def __init__(self, device: str, number_infer_requests: int = 0, number_iterations: int = None, duration_seconds: int = None, api_type: str = 'async', inference_only = None): self.device = device self.core = Core() self.nireq = number_infer_requests if api_type == 'async' else 1 self.niter = number_iterations self.duration_seconds = get_duration_seconds(duration_seconds, self.niter, self.device) self.api_type = api_type self.inference_only = inference_only self.latency_groups = [] def __del__(self): del self.core def add_extension(self, path_to_extension: str=None, path_to_cldnn_config: str=None): if path_to_cldnn_config: self.core.set_property(GPU_DEVICE_NAME, {'CONFIG_FILE': path_to_cldnn_config}) logger.info(f'GPU extensions is loaded {path_to_cldnn_config}') if path_to_extension: self.core.add_extension(extension_path=path_to_extension) logger.info(f'CPU extensions is loaded {path_to_extension}') def get_version_info(self) -> str: logger.info(f"OpenVINO:\n{'': <9}{'API version':.<24} {get_version()}") version_string = 'Device info\n' for device, version in self.core.get_versions(self.device).items(): version_string += f"{'': <9}{device}\n" version_string += f"{'': <9}{version.description:.<24}{' version'} {version.major}.{version.minor}\n" version_string += f"{'': <9}{'Build':.<24} {version.build_number}\n" return version_string def set_config(self, config = {}): for device in config.keys(): self.core.set_property(device, config[device]) def set_cache_dir(self, cache_dir: str): self.core.set_property({'CACHE_DIR': cache_dir}) def read_model(self, path_to_model: str): model_filename = os.path.abspath(path_to_model) head, ext = os.path.splitext(model_filename) weights_filename = os.path.abspath(head + BIN_EXTENSION) if ext == XML_EXTENSION else "" return self.core.read_model(model_filename, weights_filename) def create_infer_requests(self, compiled_model): if self.api_type == 'sync': requests = [compiled_model.create_infer_request()] else: requests = AsyncInferQueue(compiled_model, self.nireq) self.nireq = len(requests) return requests def first_infer(self, requests): if self.api_type == 'sync': requests[0].infer() return requests[0].latency else: id = requests.get_idle_request_id() requests.start_async() requests.wait_all() return requests[id].latency def update_progress_bar(self, progress_bar, exec_time, progress_count): if self.duration_seconds: # calculate how many progress intervals are covered by current iteration. # depends on the current iteration time and time of each progress interval. # Previously covered progress intervals must be skipped. progress_interval_time = self.duration_seconds / progress_bar.total_num new_progress = int(exec_time / progress_interval_time - progress_count) progress_bar.add_progress(new_progress) progress_count += new_progress elif self.niter: progress_bar.add_progress(1) return progress_count def sync_inference(self, request, data_queue, progress_bar): progress_count = 0 exec_time = 0 iteration = 0 times = [] start_time = datetime.utcnow() while (self.niter and iteration < self.niter) or \ (self.duration_seconds and exec_time < self.duration_seconds): if self.inference_only == False: request.set_input_tensors(data_queue.get_next_input()) request.infer() times.append(request.latency) iteration += 1 exec_time = (datetime.utcnow() - start_time).total_seconds() if progress_bar: progress_count = self.update_progress_bar(progress_bar, exec_time, progress_count) total_duration_sec = (datetime.utcnow() - start_time).total_seconds() return sorted(times), total_duration_sec, iteration def async_inference_only(self, infer_queue, progress_bar): progress_count = 0 exec_time = 0 iteration = 0 times = [] in_fly = set() start_time = datetime.utcnow() while (self.niter and iteration < self.niter) or \ (self.duration_seconds and exec_time < self.duration_seconds) or \ (iteration % self.nireq): idle_id = infer_queue.get_idle_request_id() if idle_id in in_fly: times.append(infer_queue[idle_id].latency) else: in_fly.add(idle_id) infer_queue.start_async() iteration += 1 exec_time = (datetime.utcnow() - start_time).total_seconds() if progress_bar: progress_count = self.update_progress_bar(progress_bar, exec_time, progress_count) infer_queue.wait_all() total_duration_sec = (datetime.utcnow() - start_time).total_seconds() for infer_request_id in in_fly: times.append(infer_queue[infer_request_id].latency) return sorted(times), total_duration_sec, iteration def async_inference_full_mode(self, infer_queue, data_queue, progress_bar, pcseq): progress_count = 0 processed_frames = 0 exec_time = 0 iteration = 0 times = [] num_groups = len(self.latency_groups) in_fly = set() start_time = datetime.utcnow() while (self.niter and iteration < self.niter) or \ (self.duration_seconds and exec_time < self.duration_seconds) or \ (iteration % num_groups): processed_frames += data_queue.get_next_batch_size() idle_id = infer_queue.get_idle_request_id() if idle_id in in_fly: times.append(infer_queue[idle_id].latency) if pcseq: self.latency_groups[infer_queue.userdata[idle_id]].times.append(infer_queue[idle_id].latency) else: in_fly.add(idle_id) group_id = data_queue.current_group_id infer_queue[idle_id].set_input_tensors(data_queue.get_next_input()) infer_queue.start_async(userdata=group_id) iteration += 1 exec_time = (datetime.utcnow() - start_time).total_seconds() if progress_bar: progress_count = self.update_progress_bar(progress_bar, exec_time, progress_count) infer_queue.wait_all() total_duration_sec = (datetime.utcnow() - start_time).total_seconds() for infer_request_id in in_fly: times.append(infer_queue[infer_request_id].latency) return sorted(times), total_duration_sec, processed_frames, iteration def main_loop(self, requests, data_queue, batch_size, latency_percentile, progress_bar, pcseq): if self.api_type == 'sync': times, total_duration_sec, iteration = self.sync_inference(requests[0], data_queue, progress_bar) elif self.inference_only: times, total_duration_sec, iteration = self.async_inference_only(requests, progress_bar) fps = len(batch_size) * iteration / total_duration_sec else: times, total_duration_sec, processed_frames, iteration = self.async_inference_full_mode(requests, data_queue, progress_bar, pcseq) fps = processed_frames / total_duration_sec median_latency_ms = percentile(times, latency_percentile) avg_latency_ms = sum(times) / len(times) min_latency_ms = times[0] max_latency_ms = times[-1] if self.api_type == 'sync': fps = len(batch_size) * 1000 / median_latency_ms if pcseq: for group in self.latency_groups: if group.times: group.times.sort() group.avg = sum(group.times) / len(group.times) group.min = group.times[0] group.max = group.times[-1] if progress_bar: progress_bar.finish() return fps, median_latency_ms, avg_latency_ms, min_latency_ms, max_latency_ms, total_duration_sec, iteration
def main(): args = build_argparser().parse_args() cap = open_images_capture(args.input, args.loop) # Plugin initialization for specified device and load extensions library if specified. log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() if args.cpu_extension and 'CPU' in args.device: core.add_extension(args.cpu_extension, 'CPU') # Read IR log.info('Reading Mask-RCNN model {}'.format(args.mask_rcnn_model)) mask_rcnn_model = core.read_model(args.mask_rcnn_model) input_tensor_name = 'image' try: n, c, h, w = mask_rcnn_model.input(input_tensor_name).shape if n != 1: raise RuntimeError( 'Only batch 1 is supported by the demo application') except RuntimeError: raise RuntimeError( 'Demo supports only topologies with the following input tensor name: {}' .format(input_tensor_name)) required_output_names = {'boxes', 'labels', 'masks', 'text_features.0'} for output_tensor_name in required_output_names: try: mask_rcnn_model.output(output_tensor_name) except RuntimeError: raise RuntimeError( 'Demo supports only topologies with the following output tensor names: {}' .format(', '.join(required_output_names))) log.info('Reading Text Recognition Encoder model {}'.format( args.text_enc_model)) text_enc_model = core.read_model(args.text_enc_model) log.info('Reading Text Recognition Decoder model {}'.format( args.text_dec_model)) text_dec_model = core.read_model(args.text_dec_model) mask_rcnn_compiled_model = core.compile_model(mask_rcnn_model, device_name=args.device) mask_rcnn_infer_request = mask_rcnn_compiled_model.create_infer_request() log.info('The Mask-RCNN model {} is loaded to {}'.format( args.mask_rcnn_model, args.device)) text_enc_compiled_model = core.compile_model(text_enc_model, args.device) text_enc_infer_request = text_enc_compiled_model.create_infer_request() log.info('The Text Recognition Encoder model {} is loaded to {}'.format( args.text_enc_model, args.device)) text_dec_compiled_model = core.compile_model(text_dec_model, args.device) text_dec_infer_request = text_dec_compiled_model.create_infer_request() log.info('The Text Recognition Decoder model {} is loaded to {}'.format( args.text_dec_model, args.device)) hidden_shape = text_dec_model.input(args.trd_input_prev_hidden).shape text_dec_output_names = { args.trd_output_symbols_distr, args.trd_output_cur_hidden } if args.no_track: tracker = None else: tracker = StaticIOUTracker() if args.delay: delay = args.delay else: delay = int(cap.get_type() in ('VIDEO', 'CAMERA')) visualizer = Visualizer(['__background__', 'text'], show_boxes=args.show_boxes, show_scores=args.show_scores) frames_processed = 0 metrics = PerformanceMetrics() video_writer = cv2.VideoWriter() start_time = perf_counter() frame = cap.read() if frame is None: raise RuntimeError("Can't read an image from the input") presenter = monitors.Presenter(args.utilization_monitors, 45, (frame.shape[1] // 4, frame.shape[0] // 8)) if args.output and not video_writer.open( args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(), (frame.shape[1], frame.shape[0])): raise RuntimeError("Can't open video writer") while frame is not None: if not args.keep_aspect_ratio: # Resize the image to a target size. scale_x = w / frame.shape[1] scale_y = h / frame.shape[0] input_image = cv2.resize(frame, (w, h)) else: # Resize the image to keep the same aspect ratio and to fit it to a window of a target size. scale_x = scale_y = min(h / frame.shape[0], w / frame.shape[1]) input_image = cv2.resize(frame, None, fx=scale_x, fy=scale_y) input_image_size = input_image.shape[:2] input_image = np.pad(input_image, ((0, h - input_image_size[0]), (0, w - input_image_size[1]), (0, 0)), mode='constant', constant_values=0) # Change data layout from HWC to CHW. input_image = input_image.transpose((2, 0, 1)) input_image = input_image.reshape((n, c, h, w)).astype(np.float32) # Run the MaskRCNN model. mask_rcnn_infer_request.infer({input_tensor_name: input_image}) outputs = { name: mask_rcnn_infer_request.get_tensor(name).data[:] for name in required_output_names } # Parse detection results of the current request boxes = outputs['boxes'][:, :4] scores = outputs['boxes'][:, 4] classes = outputs['labels'].astype(np.uint32) raw_masks = outputs['masks'] text_features = outputs['text_features.0'] # Filter out detections with low confidence. detections_filter = scores > args.prob_threshold scores = scores[detections_filter] classes = classes[detections_filter] boxes = boxes[detections_filter] raw_masks = raw_masks[detections_filter] text_features = text_features[detections_filter] boxes[:, 0::2] /= scale_x boxes[:, 1::2] /= scale_y masks = [] for box, cls, raw_mask in zip(boxes, classes, raw_masks): mask = segm_postprocess(box, raw_mask, frame.shape[0], frame.shape[1]) masks.append(mask) texts = [] for feature in text_features: feature = next( iter( text_enc_infer_request.infer({ 'input': np.expand_dims(feature, axis=0) }).values())) feature = np.reshape(feature, (feature.shape[0], feature.shape[1], -1)) feature = np.transpose(feature, (0, 2, 1)) hidden = np.zeros(hidden_shape) prev_symbol_index = np.ones((1, )) * SOS_INDEX text = '' text_confidence = 1.0 for i in range(MAX_SEQ_LEN): text_dec_infer_request.infer({ args.trd_input_prev_symbol: np.reshape(prev_symbol_index, (1, )), args.trd_input_prev_hidden: hidden, args.trd_input_encoder_outputs: feature }) decoder_output = { name: text_dec_infer_request.get_tensor(name).data[:] for name in text_dec_output_names } symbols_distr = decoder_output[args.trd_output_symbols_distr] symbols_distr_softmaxed = softmax(symbols_distr, axis=1)[0] prev_symbol_index = int(np.argmax(symbols_distr, axis=1)) text_confidence *= symbols_distr_softmaxed[prev_symbol_index] if prev_symbol_index == EOS_INDEX: break text += args.alphabet[prev_symbol_index] hidden = decoder_output[args.trd_output_cur_hidden] texts.append(text if text_confidence >= args.tr_threshold else '') if len(boxes) and args.raw_output_message: log.debug( ' -------------------------- Frame # {} -------------------------- ' .format(frames_processed)) log.debug( ' Class ID | Confidence | XMIN | YMIN | XMAX | YMAX ' ) for box, cls, score, mask in zip(boxes, classes, scores, masks): log.debug( '{:>10} | {:>10f} | {:>8.2f} | {:>8.2f} | {:>8.2f} | {:>8.2f} ' .format(cls, score, *box)) # Get instance track IDs. masks_tracks_ids = None if tracker is not None: masks_tracks_ids = tracker(masks, classes) presenter.drawGraphs(frame) # Visualize masks. frame = visualizer(frame, boxes, classes, scores, masks, texts, masks_tracks_ids) metrics.update(start_time, frame) frames_processed += 1 if video_writer.isOpened() and (args.output_limit <= 0 or frames_processed <= args.output_limit): video_writer.write(frame) if not args.no_show: # Show resulting image. cv2.imshow('Results', frame) if not args.no_show: key = cv2.waitKey(delay) esc_code = 27 if key == esc_code: break presenter.handleKey(key) start_time = perf_counter() frame = cap.read() metrics.log_total() for rep in presenter.reportMeans(): log.info(rep)
class OpenVINOLauncher(Launcher): __provider__ = 'openvino' @classmethod def parameters(cls): parameters = super().parameters() parameters.update(DLSDK_LAUNCHER_PARAMETERS) return parameters def __init__(self, config_entry, model_name='', delayed_model_loading=False, preprocessor=None, postpone_inputs_configuration=False): super().__init__(config_entry, model_name=model_name) self._set_variable = False self.ie_config = self.config.get('ie_config') self.ie_core = Core() if self.ie_config: ov_set_config(self.ie_core, self.ie_config) self._delayed_model_loading = delayed_model_loading dlsdk_launcher_config = DLSDKLauncherConfigValidator( 'OpenVINO_Launcher', fields=self.parameters(), delayed_model_loading=delayed_model_loading, ) dlsdk_launcher_config.validate(self.config, ie_core=self.ie_core) device = self.config['device'].split('.') self._device = '.'.join((device[0].upper(), device[1])) if len(device) > 1 else device[0].upper() self.dynamic_shapes_policy = self.get_value_from_config('_undefined_shapes_resolving_policy') self._set_variable = False self._async_mode = False self._prepare_ie() self._delayed_model_loading = delayed_model_loading self._postpone_input_configuration = postpone_inputs_configuration self._preprocess_info = {} self._preprocess_steps = [] self.disable_resize_to_input = False self._do_reshape = False self._output_layouts = {} self._output_precisions = {} self.dyn_input_layers = [] self._partial_shapes = {} self.is_dynamic = False self.preprocessor = preprocessor self.infer_request = None self._num_requests = None if not delayed_model_loading: self._model, self._weights = automatic_model_search( self._model_name, self.get_value_from_config('model'), self.get_value_from_config('weights'), self.get_value_from_config('_model_type') ) self.load_network(log=not postpone_inputs_configuration, preprocessing=preprocessor) self.allow_reshape_input = self.get_value_from_config('allow_reshape_input') and self.network is not None if not postpone_inputs_configuration: self.try_to_set_default_layout() else: self.allow_reshape_input = self.get_value_from_config('allow_reshape_input') self._target_layout_mapping = {} self._lstm_inputs = None if '_list_lstm_inputs' in self.config: self._configure_lstm_inputs() self.reset_memory_state = self.get_value_from_config('reset_memory_state') @classmethod def validate_config(cls, config, delayed_model_loading=False, fetch_only=False, uri_prefix=''): field_uri = uri_prefix or 'launcher.{}'.format(cls.__provider__) return DLSDKLauncherConfigValidator( field_uri, fields=cls.parameters(), delayed_model_loading=delayed_model_loading).validate( config, field_uri=field_uri, validation_scheme=cls.validation_scheme(), fetch_only=fetch_only) def try_to_set_default_layout(self): if self.get_value_from_config('_model_type') == 'tf': self.default_layout = 'NHWC' input_nodes = self.network.inputs if self.network else self.exec_network.inputs for input_node in input_nodes: shape = parse_partial_shape(input_node.get_node().partial_shape) if len(shape) != 4: continue if input_node.get_node().layout.has_name('C'): channel_dim = input_node.get_node().layout.get_index_by_name('C') if channel_dim in [3, -1]: self.default_layout = 'NHWC' return if shape[-1] in [1, 2, 3, 4, 6, 9]: self.default_layout = 'NHWC' return self.default_layout = 'NCHW' return @property def device(self): return self._device @property def inputs(self): if self.network is None: inputs = self.exec_network.inputs else: inputs = self.network.inputs return {input_info.get_node().friendly_name: input_info.get_node() for input_info in inputs} @property def batch(self): return self._batch @property def output_blob(self): if hasattr(self, 'original_outputs'): return next(iter(self.original_outputs)).get_node().friendly_name return None @property def additional_output_mapping(self): if hasattr(self, 'out_tensor_name_to_node'): return self.out_tensor_name_to_node return {} def predict(self, inputs, metadata=None, return_raw=False, **kwargs): if self._lstm_inputs: return self._predict_sequential(inputs, metadata, return_raw) results = [] raw_results = [] for infer_inputs in inputs: if self._do_reshape: input_shapes = { layer_name: data.shape for layer_name, data in infer_inputs.items() } self._reshape_input(input_shapes) if self.infer_request is None: self.infer_request = self.exec_network.create_infer_request() feed_dict = {self.input_to_tensor_name[layer_name]: data for layer_name, data in infer_inputs.items()} outputs = self.infer_request.infer(inputs=feed_dict) raw_results.append(outputs) results.append({ out_node.get_node().friendly_name: out_res for out_node, out_res in outputs.items() }) if self.reset_memory_state: for state in self.infer_request.query_state(): state.reset() if metadata is not None: self._fill_meta(metadata, None if not self.dyn_input_layers else inputs[-1]) self._do_reshape = False if return_raw: return results, raw_results return results def _predict_sequential(self, inputs, metadata=None, return_raw=False, **kwargs): lstm_inputs_feed = self._fill_lstm_inputs() if not self.infer_request: self.infer_request = self.exec_network.create_infer_request() results = [] raw_results = [] for feed_dict in inputs: feed_dict.update(lstm_inputs_feed) infer_inputs = {self.input_to_tensor_name[layer_name]: data for layer_name, data in feed_dict.items()} out_tensors = self.infer_request.infer(infer_inputs) output_result = { out_node.get_node().friendly_name: out_tensor for out_node, out_tensor in out_tensors.items() } lstm_inputs_feed = self._fill_lstm_inputs(output_result) results.append(output_result) if return_raw: raw_results.append(out_tensors) if self._do_reshape: input_shapes = {layer_name: data.shape for layer_name, data in feed_dict.items()} self._reshape_input(input_shapes) if metadata is not None: self._fill_meta(metadata, None if not self.dyn_input_layers else inputs[-1]) self._do_reshape = False if return_raw: return results, raw_results return results def predict_async(self, ir, inputs, metadata=None, context=None, **kwargs): infer_inputs = inputs[0] feed_dict = {self.input_to_tensor_name[name]: data for name, data in infer_inputs.items()} if metadata is not None: self._fill_meta(metadata, None if not self.dyn_input_layers else infer_inputs) ir.infer(feed_dict, metadata, context) def _fill_meta(self, metadata, inputs=None): for meta_ in metadata: meta_['input_shape'] = self.inputs_info_for_meta(inputs) if self._output_layouts: meta_['output_layout'] = self._output_layouts if self._output_precisions: meta_['output_precision'] = self._output_precisions def _is_hetero(self): return self._device.startswith(HETERO_KEYWORD) def _is_multi(self): return self._device.startswith(MULTI_DEVICE_KEYWORD) def _devices_list(self): device = self._device if self._is_hetero(): device = self._device[len(HETERO_KEYWORD):] if self._is_multi(): device = self._device[len(MULTI_DEVICE_KEYWORD):] device = re.sub(NIREQ_REGEX, '', device) return [platform_.upper().strip() for platform_ in device.split(',')] def _set_affinity(self, affinity_map_path): auto_affinity = self.ie_core.query_network(self.network, self._device) custom_affinity = read_yaml(affinity_map_path) for layer in custom_affinity: if layer not in auto_affinity: raise ConfigError('Layer \'{layer}\' is not present in network'.format(layer=layer)) for node in self.network.get_ordered_ops(): layer_name = node.friendly_name device = custom_affinity.get(layer_name, auto_affinity.get(layer_name)) if device is None: continue if not (device in self._devices_list() or device == self._device): raise ConfigError( 'Device \'{device}\' set for \'{layer}\' layer is not present in ' 'provided configuration \'{configuration}\''.format( device=device, layer=layer_name, configuration=self._device ) ) node.rt_info["affinity"] = device def _is_vpu(self): device_list = map(lambda device: device.split('.')[0], self._devices_list()) return contains_any(device_list, VPU_PLUGINS) @property def num_requests(self): return self._num_requests @num_requests.setter def num_requests(self, num_ireq: int): if num_ireq != self._num_requests: self._num_requests = num_ireq @property def async_mode(self): return self._async_mode @async_mode.setter def async_mode(self, flag): for device in self._devices_list(): ov_set_config( self.ie_core, {'PERFORMANCE_HINT': 'THROUGHPUT' if flag else 'LATENCY'}, device=device.upper()) self._async_mode = flag def get_async_requests(self): self._set_nireq() return [AsyncInferRequestWrapper(ireq_id, self.exec_network.create_infer_request()) for ireq_id in range(self.num_requests)] def _reshape_input(self, shapes, make_dynamic=False): if hasattr(self, 'exec_network'): del self.exec_network if self.infer_request is not None: del self.infer_request self.infer_request = None partial_shapes = {} for name, shape in shapes.items(): p_shape = PartialShape( [Dimension(d) if not isinstance(d, tuple) else Dimension(d[0], d[1]) for d in shape]) partial_shapes[self.input_to_index[name]] = p_shape self.network.reshape(partial_shapes) self.dyn_input_layers, self._partial_shapes = self.get_dynamic_inputs(self.network) if self.dyn_input_layers and make_dynamic: return self.exec_network = self.ie_core.compile_model(self.network, self.device) self.infer_request = self.exec_network.create_infer_request() @staticmethod def reshape_network(network, shapes): partial_shapes = {} for name, shape in shapes.items(): p_shape = PartialShape( [Dimension(d) if not isinstance(d, tuple) else Dimension(d[0], d[1]) for d in shape]) partial_shapes[name] = p_shape network.reshape(partial_shapes) return network def _align_data_shape(self, data, input_blob, data_layout): input_shape = self.inputs[input_blob].shape data_batch_size = data.shape[0] input_batch_size = input_shape[0] if data_batch_size < input_batch_size: warning_message = 'data batch {} is not equal model input batch_size {}.'.format( data_batch_size, input_batch_size) warning(warning_message) diff_number = input_batch_size - data_batch_size filled_part = [data[-1]] * diff_number data = np.concatenate([data, filled_part]) return data.reshape(input_shape) if not self.disable_resize_to_input else data def _prepare_ie(self, log=True): if log: print_info('IE version: {}'.format(get_version())) if self._is_multi(): self._prepare_multi_device(log) else: self.async_mode = self.get_value_from_config('async_mode') if log: self._log_versions() self._device_specific_configuration() def _device_specific_configuration(self): cpu_extensions = self.config.get('cpu_extensions') if 'CPU' in self._devices_list(): if cpu_extensions: selection_mode = self.config.get('_cpu_extensions_mode') cpu_extensions = get_cpu_extension(cpu_extensions, selection_mode) self.ie_core.add_extension(str(cpu_extensions), 'CPU') ov_set_config( self.ie_core, {'CPU_BIND_THREAD': 'YES' if not self._is_multi() else 'NO'}, device='CPU') gpu_extensions = self.config.get('gpu_extensions') if 'GPU' in self._devices_list(): config = {} if gpu_extensions: config['CONFIG_FILE'] = str(gpu_extensions) if self._is_multi() and 'CPU' in self._devices_list(): config['CLDNN_PLUGIN_THROTTLE'] = '1' if config: ov_set_config(self.ie_core, config, device='GPU') if self._is_vpu(): device_list = map(lambda device: device.split('.')[0], self._devices_list()) devices = [vpu_device for vpu_device in VPU_PLUGINS if vpu_device in device_list] log_level = self.config.get('_vpu_log_level') if log_level: for device in devices: ov_set_config(self.ie_core, {'LOG_LEVEL': log_level}, device=device) device_config = self.config.get('device_config') if device_config: self._set_device_config(device_config) def _set_nireq(self): num_requests = self.config.get('num_requests') if num_requests is not None and num_requests != 'AUTO': num_requests = get_or_parse_value(num_requests, casting_type=int) if len(num_requests) != 1: raise ConfigError('Several values for _num_requests specified') self._num_requests = num_requests[0] if self._num_requests != 1 and not self.async_mode: warning('{} infer requests in sync mode is not supported. Only 1 infer request will be used.') self._num_requests = 1 elif not self.async_mode: self._num_requests = 1 else: self._num_requests = self.auto_num_requests() if self.async_mode: print_info('Async mode activated') print_info('Infer requests number:{}'.format(self.num_requests)) def auto_num_requests(self, return_list=False): platform_list = self._devices_list() concurrency_device = {'CPU': 1, 'GPU': 1, 'HDDL': 100, 'MYRIAD': 4} if hasattr(self, 'exec_network') and self.exec_network is not None: if hasattr(self.exec_network, 'get_metric'): num_requests = self.exec_network.get_metric('OPTIMAL_NUMBER_OF_INFER_REQUESTS') else: num_requests = self.exec_network.get_property('OPTIMAL_NUMBER_OF_INFER_REQUESTS') return num_requests if 'CPU' in platform_list and len(platform_list) == 1: min_requests = [4, 5, 3] cpu_count = multiprocessing.cpu_count() for min_request in min_requests: if cpu_count % min_request == 0: num_req = max(min_request, cpu_count / min_request) return num_req if not return_list else [num_req] if 'GPU' in platform_list and len(platform_list) == 1: return 2 if not return_list else [2] per_device_requests = [] for device in platform_list: per_device_requests.append(concurrency_device.get(device, 1)) return per_device_requests if return_list else sum(per_device_requests) def _prepare_multi_device(self, log=True): async_mode = self.get_value_from_config('async_mode') if not async_mode: warning('Using multi device in sync mode non-applicable. Async mode will be used.') num_per_device_req = re.findall(NIREQ_REGEX, self._device) device_list = self._devices_list() num_devices = len(device_list) if num_per_device_req: brackets = r"(\()|(\))" num_per_device_requests = [int(re.sub(brackets, '', nreq)) for nreq in num_per_device_req] if 'num_requests' in self.config: warning( "number requests already provided in device name specification. " "'num_requests' option will be ignored." ) elif 'num_requests' in self.config and self.config['num_requests'] != 'AUTO': num_per_device_requests = get_or_parse_value(self.config['num_request'], casting_type=int) else: num_per_device_requests = self.auto_num_requests(return_list=True) if len(num_per_device_requests) == 1: num_per_device_requests = [num_per_device_requests[0]] * len(device_list) if num_devices != len(num_per_device_requests): raise ConfigError('num requests for all {} should be specified'.format(num_devices)) self._num_requests = sum(num_per_device_requests) * 2 self._async_mode = True if log: self._log_versions() print_info('Async mode activated') def _set_device_config(self, device_config): if not isinstance(device_config, dict): raise ConfigError('device configuration should be a dict-like') if all(not isinstance(value, dict) for value in device_config.values()): ov_set_config(self.ie_core, dict(device_config), device=self.device) else: for key, value in device_config.items(): if isinstance(value, dict): if key in self._devices_list(): if key not in self.ie_core.available_devices: warnings.warn('{} device is unknown. Config loading may lead to error.'.format(key)) ov_set_config(self.ie_core, dict(value), device=key) else: warnings.warn( f'Configuration for {key} will be skipped as device is not listed in evaluation device') else: warnings.warn('Option {key}: {value} will be skipped because device to which it should be ' 'applied is not specified or option is not a dict-like'.format(key=key, value=value)) def _log_versions(self): versions = self.ie_core.get_versions(self._device) print_info("Loaded {} plugin version:".format(self._device)) for device_name, device_version in versions.items(): print_info(" {device_name} - {descr}: {maj}.{min}.{num}".format( device_name=device_name, descr=device_version.description, maj=device_version.major, min=device_version.minor, num=device_version.build_number )) def _create_network(self, input_shapes=None): model_path = Path(self._model) compiled_model = model_path.suffix == '.blob' if compiled_model: self.network = None self.exec_network = self.ie_core.import_model(str(self._model), self._device) self.original_outputs = self.exec_network.outputs model_batch = self._get_model_batch_size() self._batch = model_batch if model_batch is not None else 1 return if self._weights is None and self._model.suffix != '.onnx': self._weights = model_path.parent / (model_path.name.split(model_path.suffix)[0] + '.bin') self.network = self.read_network(self._model, self._weights) self.original_outputs = self.network.outputs self.out_tensor_name_to_node = {} for out in self.original_outputs: if not out.names: continue for name in out.names: self.out_tensor_name_to_node[name] = out.get_node().friendly_name model_batch = self._get_model_batch_size() model_batch = 1 if model_batch is None else model_batch outputs = self.config.get('outputs') if outputs: def output_preprocessing(output_string): output_tuple = string_to_tuple(output_string, casting_type=None) if len(output_tuple) == 1: return output_string return output_tuple[0], int(output_tuple[1]) preprocessed_outputs = [output_preprocessing(output) for output in outputs] self.network.add_outputs(preprocessed_outputs) if input_shapes is not None: self.network.reshape(input_shapes) self._batch = self.config.get('batch', model_batch) self._set_batch_size(self._batch) affinity_map_path = self.config.get('affinity_map') if affinity_map_path and self._is_hetero(): self._set_affinity(affinity_map_path) elif affinity_map_path: warning('affinity_map config is applicable only for HETERO device') def _set_batch_size(self, batch_size): model_batch_size = self._get_model_batch_size() model_batch_size = 1 if model_batch_size is None else model_batch_size if batch_size is None: batch_size = model_batch_size if batch_size == model_batch_size: self._batch = batch_size return input_shapes = {} for input_node in self.network.inputs: layer_name = input_node.get_node().friendly_name if layer_name in self.const_inputs: input_shapes[layer_name] = parse_partial_shape(input_node.get_node().partial_shape) else: layer_shape = parse_partial_shape(input_node.get_node().partial_shape) layout = self.inputs[layer_name].layout if '...' in str(layout): layout = self.get_layout_from_config(layer_name) else: layout = str(layout).replace('[', '').replace(']', '').replace(',', '') batch_pos = layout.find('N') if batch_pos != -1: layer_shape[batch_pos] = batch_size input_shapes[layer_name] = layer_shape self._reshape_input(input_shapes, batch_size == -1) self._batch = batch_size def _get_model_batch_size(self): input_nodes = self.network.inputs if self.network else self.exec_network.inputs input_info = input_nodes[0] if '...' in str(input_info.get_node().layout): layout = self.get_layout_from_config(input_info.get_node().friendly_name) else: layout = str(input_info.get_node().layout).replace('[', '').replace(']', '').replace(',', '') batch_pos = layout.find('N') if batch_pos != -1: return parse_partial_shape(input_info.partial_shape)[batch_pos] return None def load_network(self, network=None, log=False, preprocessing=None): if hasattr(self, 'exec_network'): del self.exec_network if hasattr(self, 'infer_request'): del self.infer_request self.infer_request = None if network is None: self._create_network() else: self.network = network if self.network is not None: self.dyn_input_layers, self._partial_shapes = self.get_dynamic_inputs(self.network) self.input_to_tensor_name = self.get_input_tensor_name_mapping(self.network) self.input_to_index = {inp.get_node().friendly_name: idx for idx, inp in enumerate(self.network.inputs)} if not self._postpone_input_configuration: self._set_precision() self._set_input_shape() self.dyn_input_layers, self._partial_shapes = self.get_dynamic_inputs(self.network) if log: self.print_input_output_info(self.network if self.network is not None else self.exec_network) if preprocessing: self._set_preprocess(preprocessing) model_batch = self._get_model_batch_size() model_batch = 1 if model_batch is None else model_batch self._batch = self.config.get('batch', model_batch) self._set_batch_size(self._batch) self.try_to_set_default_layout() if self.network and not preprocessing and (not self.dyn_input_layers or self.is_dynamic): self.exec_network = self.ie_core.compile_model(self.network, self._device) self.infer_request = self.exec_network.create_infer_request() def update_input_configuration(self, input_config): self.config['inputs'] = input_config self._set_precision() self._set_input_shape() self.try_to_set_default_layout() self._set_batch_size(self.config.get('batch')) self.dyn_input_layers, self._partial_shapes = self.get_dynamic_inputs(self.network) self.print_input_output_info(self.network if self.network is not None else self.exec_network) if self.preprocessor: self._set_preprocess(self.preprocessor) if self.network: self.exec_network = self.ie_core.compile_model(self.network, self._device) self.infer_request = self.exec_network.create_infer_request() @staticmethod def get_dynamic_inputs(network): def is_dynamic(data_info): if hasattr(data_info, 'is_dynamic'): return data_info.is_dynamic return -1 in data_info.shape or not data_info.shape inputs_with_undefined_shapes = [] partial_shapes = {} if network is None: return inputs_with_undefined_shapes, partial_shapes for input_info in network.inputs: input_node = input_info.get_node() input_shape = input_node.get_partial_shape() if is_dynamic(input_shape): inputs_with_undefined_shapes.append(input_node.friendly_name) partial_shapes[input_node.friendly_name] = input_shape return inputs_with_undefined_shapes, partial_shapes @staticmethod def get_input_tensor_name_mapping(network): inputs_mapping = {} for idx, input_node in enumerate(network.inputs): tensor_names = list(input_node.get_names()) if not tensor_names: inputs_mapping[input_node.get_node().friendly_name] = idx else: inputs_mapping[input_node.get_node().friendly_name] = tensor_names[0] return inputs_mapping @property def dyn_batch_only(self): if not self.dyn_input_layers: return True for input_name in self.dyn_input_layers: partial_shape = self._partial_shapes[input_name] num_undef = 0 for i in partial_shape: if i == -1: num_undef += 1 if num_undef > 1: return False layout = self.inputs[input_name].layout if '...' in str(layout): layout = self.get_layout_from_config(input_name) else: layout = str(layout).replace('[', '').replace(']', '').replace(',', '') if not layout: return False for dim, layout_dim in zip(partial_shape, layout): if dim == -1 and layout_dim != 'N': return False return True def get_layout_from_config(self, input_name): for input_config in self.config.get('inputs', []): if input_config.get('name', '') != input_name: continue return input_config.get('layout', '') return '' @property def layout_mapping(self): def prepare_layout_string(layout): layout = str(layout) return layout.replace('[', '').replace(']', '').replace(',', '') inputs = self.network.inputs if self.network is not None else self.exec_network.inputs layouts = {} for input_node in inputs: layouts[input_node.get_node().friendly_name] = prepare_layout_string(input_node.get_node().layout) return layouts def load_ir(self, xml_path, bin_path, log=False): self._model = xml_path self._weights = bin_path self.async_mode = True self.load_network(log=log) self.try_to_set_default_layout() def read_network(self, model, weights): if weights is not None: network = self.ie_core.read_model(model=str(model), weights=str(weights)) else: network = self.ie_core.read_model(model=str(model)) return network def inputs_info_for_meta(self, inputs=None): if inputs: return {layer_name: np.shape(data) for layer_name, data in inputs.items()} return { layer_name: parse_partial_shape(layer.get_partial_shape()) for layer_name, layer in self.inputs.items() if layer_name not in self.const_inputs + self.image_info_inputs} @property def lstm_inputs(self): return self._lstm_inputs def initialize_undefined_shapes(self, input_data, template_shapes=None): if self.dynamic_shapes_policy in ['default', 'dynamic']: try: if template_shapes: input_shapes = {layer_name: template_shapes.get(layer_name, data.shape) for layer_name, data in input_data[0].items()} self._reshape_input(input_shapes) self.load_network(self.network) self.is_dynamic = True if not hasattr(self, 'exec_network') or self.exec_network is None: self.is_dynamic = True self.load_network(self.network) self.exec_network.infer_new_request({ self.input_to_tensor_name[k]: data for k, data in input_data[0].items()}) return except RuntimeError as e: if self.dynamic_shapes_policy == 'dynamic': raise e self.is_dynamic = False self._reshape_input({layer_name: data.shape for layer_name, data in input_data[0].items()}) def resolve_undefined_batch(self): if self.dynamic_shapes_policy in ['default', 'dynamic']: try: self.is_dynamic = True self.load_network(self.network) except RuntimeError as e: if self.dynamic_shapes_policy == 'dynamic': raise e self.is_dynamic = False if not self.is_dynamic: self.load_network(self.network) def fit_to_input(self, data, layer_name, layout, precision, template=None): if precision is None: precision = format_map[self.inputs[layer_name].element_type.get_type_name()] if layer_name in self.dyn_input_layers: layer_rang = len(parse_partial_shape(self._partial_shapes[layer_name])) input_template = template.get(layer_name) if template else template data, l_template = self._data_to_blob_dyn(layer_rang, data, layout, input_template) layer_shape = data.shape if l_template is not None: template[layer_name] = l_template else: layer_shape = tuple(self.inputs[layer_name].shape) precision = format_map[self.inputs[layer_name].element_type.get_type_name()] data = self._data_to_blob(layer_shape, data, layout) if precision: data = data.astype(precision) if layer_name in self.dyn_input_layers: self._do_reshape = not self.is_dynamic return data, template data_shape = np.shape(data) if data_shape != layer_shape: if self.allow_reshape_input: self._do_reshape = True return data return self._align_data_shape(data, layer_name, layout) @staticmethod def _data_to_blob_dyn(layer_rang, data, layout, template=None): data_shape = np.shape(data) if len(data_shape) - layer_rang == 1 and data_shape[0] == 1: if len(data_shape) == len(layout): data = np.transpose(data, layout) if template is not None and len(template) == layer_rang: tmp_template = [1, ] + template new_template = [tmp_template[l_dim] for l_dim in layout][1:] template = new_template data = data[0] data_shape = np.shape(data) if template is not None: if len(template) < np.ndim(data): template = [1] * (np.ndim(data) - len(template)) + template if len(template) > np.ndim(data): template = template[0] if len(layout) == len(data_shape): if template is not None: new_template = [template[l_dim] for l_dim in layout] template = new_template return np.transpose(data, layout), template return np.array(data), template def _data_to_blob(self, layer_shape, data, layout): # pylint:disable=R0911,R0912 data_shape = np.shape(data) if len(layer_shape) == 4: if len(data_shape) == 5: data = data[0] if len(data_shape) == 3: data = np.expand_dims(data, -1) data_shape = np.shape(data) if len(data_shape) < 4: if np.size(np.squeeze(np.zeros(layer_shape))) == np.size(np.squeeze(np.zeros(data_shape))): return np.resize(data, layer_shape) return np.transpose(data, layout) if layout is not None else data if len(layer_shape) == 2: if len(data_shape) == 1: return np.transpose([data]) if len(data_shape) > 2: if all(dim == 1 for dim in layer_shape) and all(dim == 1 for dim in data_shape): return np.resize(data, layer_shape) if len(np.squeeze(np.zeros(layer_shape))) == len(np.squeeze(np.zeros(data_shape))): return np.resize(data, layer_shape) if len(layer_shape) == 3 and len(data_shape) == 4: return np.transpose(data, layout)[0] if layout is not None else data[0] if len(layer_shape) == 1: return np.resize(data, layer_shape) if (len(data_shape) == 3) and (len(layer_shape) == 2) and (data_shape[0] == 1) and ( data_shape[1] == 1) and self.allow_reshape_input: return data[0] if layout is not None and len(layer_shape) == len(layout): return np.transpose(data, layout) if (len(layer_shape) == 1 and len(data_shape) > 1 and len(np.squeeze(np.zeros(layer_shape))) == len(np.squeeze(np.zeros(data_shape)))): return np.resize(data, layer_shape) return np.array(data) def _set_precision(self): config_inputs = self.config.get('inputs', []) for input_config in config_inputs: if 'precision' in input_config: if self.network: self.inputs[input_config['name']].set_element_type( PRECISION_STR_TO_TYPE[input_config['precision'].upper()] ) def _set_input_shape(self): if not self.network: return config_inputs = self.config.get('inputs', []) input_shapes = {} make_dynamic = False for input_config in config_inputs: if 'shape' in input_config: input_shapes[input_config['name']] = input_config['shape'] if -1 in input_config['shape']: make_dynamic = True if not input_shapes: return orig_input_shapes = {input_name: parse_partial_shape(input_info.partial_shape) for input_name, input_info in self.inputs.items()} orig_input_shapes.update(input_shapes) self._reshape_input(orig_input_shapes, make_dynamic) def _configure_lstm_inputs(self): lstm_mapping = {} config_inputs = self.config.get('inputs', []) for input_config in config_inputs: if input_config['type'] == 'LSTM_INPUT': lstm_mapping[input_config['name']] = input_config['value'] self._lstm_inputs = lstm_mapping def _fill_lstm_inputs(self, infer_outputs=None): feed_dict = {} for lstm_var, output_layer in self._lstm_inputs.items(): layer_shape = parse_partial_shape(self.inputs[lstm_var].partial_shape) if infer_outputs: output_layer = postprocess_output_name(output_layer, infer_outputs) input_data = infer_outputs[output_layer].reshape(layer_shape) if infer_outputs else np.zeros( layer_shape, dtype=format_map[self.inputs[lstm_var].element_type.get_type_name()] ) feed_dict[lstm_var] = input_data return feed_dict @staticmethod def print_input_output_info(network, prefix=None): if prefix: print_info('{} - Input info:'.format(prefix)) else: print_info('Input info:') network_inputs = network.inputs network_outputs = network.outputs for input_info in network_inputs: input_node = input_info.get_node() print_info('\tNode name: {}'.format(input_node.friendly_name)) print_info('\tTensor names: {}'.format(', '.join(input_info.get_names()))) print_info('\tprecision: {}'.format(input_node.element_type.get_type_name())) print_info('\tshape: {}\n'.format(parse_partial_shape(input_node.get_partial_shape()))) print_info('Output info') for output_info in network_outputs: out_node = output_info.get_node() print_info('\tNode name: {}'.format(out_node.friendly_name)) print_info('\tTensor names: {}'.format(', '.join(output_info.get_names()))) precision = out_node.get_output_element_type(0).get_type_name() print_info('\tprecision: {}'.format(precision)) shape = parse_partial_shape(out_node.get_output_partial_shape(0)) print_info('\tshape: {}\n'.format(shape)) def _set_preprocess(self, preprocess): if preprocess.ie_processor is None: return if self.network is not None: self.disable_resize_to_input = False preprocess_steps = preprocess.ie_preprocess_steps if not preprocess_steps: return for input_name, input_info in self.network.input_info.items(): if input_name in self.const_inputs + self.image_info_inputs: continue for (name, value) in preprocess_steps: setattr(input_info.preprocess_info, name, value) if preprocess.ie_processor.has_normalization(): channel_id = input_info.layout.find('C') if channel_id != -1: num_channels = input_info.input_data.shape[channel_id] preprocess.ie_processor.set_normalization(num_channels, input_info.preprocess_info) self.disable_resize_to_input = preprocess.ie_processor.has_resize() self._use_set_blob = self.disable_resize_to_input self.load_network(self.network) self._preprocess_steps = preprocess_steps return preprocess_info_by_input = {} preprocess_info = preprocess.preprocess_info for input_name in self.inputs: if input_name in self.const_inputs + self.image_info_inputs: continue if preprocess.ie_processor.has_normalization(): channel_id = self.inputs[input_name].layout.find('C') if channel_id != -1: num_channels = self.inputs[input_name].shape[channel_id] preprocess.ie_processor.set_normalization(num_channels, preprocess_info) preprocess_info_by_input[input_name] = preprocess_info self._preprocess_info = preprocess_info_by_input self.disable_resize_to_input = preprocess.ie_processor.has_resize() def get_model_file_type(self): return self._model.suffix def get_infer_queue(self, log=True): if self.config.get('num_requests', 'AUTO') == 'AUTO': num_requests = self.auto_num_requests() else: num_requests = self.num_requests or 0 queue = AsyncInferQueue(self.exec_network, num_requests) if log: print_info('Prepared async infer queue with {} requests'.format(len(queue))) else: debug('Prepared async infer queue with {} requests'.format(len(queue))) return queue def prepare_data_for_request(self, inputs, batch_meta, batch_id, batch_input_ids, batch_annotation, batch_identifiers): infer_inputs = inputs[0] feed_dict = {self.input_to_tensor_name[name]: data for name, data in infer_inputs.items()} if batch_meta is not None: self._fill_meta(batch_meta, None if not self.dyn_input_layers else infer_inputs) context = (batch_id, batch_input_ids, batch_annotation, batch_identifiers, batch_meta) return feed_dict, context @staticmethod def get_result_from_request(request, return_raw=False): preprocessed_results = [{out.get_node().friendly_name: data for out, data in request.results.items()}] if return_raw: return preprocessed_results, [request.results] return preprocessed_results def input_shape(self, input_name): return parse_partial_shape(self.inputs[input_name].get_partial_shape()) def release(self): if 'network' in self.__dict__: del self.network if 'infer_request' in self.__dict__: del self.infer_request if 'exec_network' in self.__dict__: del self.exec_network if 'ie_core' in self.__dict__: del self.ie_core
def main(): args = parse_arguments() log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() if 'CPU' in args.target_device: if args.path_to_extension: core.add_extension(args.path_to_extension, "CPU") if args.number_threads is not None: core.set_config({'CPU_THREADS_NUM': str(args.number_threads)}, "CPU") elif 'GPU' in args.target_device: if args.path_to_cldnn_config: core.set_config({'CONFIG_FILE': args.path_to_cldnn_config}, "GPU") else: raise AttributeError( "Device {} do not support of 3D convolution. " "Please use CPU, GPU or HETERO:*CPU*, HETERO:*GPU*") log.info('Reading model {}'.format(args.path_to_model)) model = core.read_model(args.path_to_model) if len(model.inputs) != 1: raise RuntimeError("only 1 input layer model is supported") input_tensor_name = model.inputs[0].get_any_name() if args.shape: log.debug("Reshape model from {} to {}".format(model.inputs[0].shape, args.shape)) model.reshape({input_tensor_name: PartialShape(args.shape)}) if len(model.inputs[0].shape) != 5: raise RuntimeError( "Incorrect shape {} for 3d convolution network".format(args.shape)) n, c, d, h, w = model.inputs[0].shape compiled_model = core.compile_model(model, args.target_device) infer_request = compiled_model.create_infer_request() log.info('The model {} is loaded to {}'.format(args.path_to_model, args.target_device)) start_time = perf_counter() if not os.path.exists(args.path_to_input_data): raise AttributeError("Path to input data: '{}' does not exist".format( args.path_to_input_data)) input_type = get_input_type(args.path_to_input_data) is_nifti_data = (input_type == NIFTI_FILE or input_type == NIFTI_FOLDER) if input_type == NIFTI_FOLDER: series_name = find_series_name(args.path_to_input_data) original_data, data_crop, affine, original_size, bbox = \ read_image(args.path_to_input_data, data_name=series_name, sizes=(d, h, w), mri_sequence_order=args.mri_sequence, full_intensities_range=args.full_intensities_range) elif input_type == NIFTI_FILE: original_data, data_crop, affine, original_size, bbox = \ read_image(args.path_to_input_data, data_name=args.path_to_input_data, sizes=(d, h, w), is_series=False, mri_sequence_order=args.mri_sequence, full_intensities_range=args.full_intensities_range) else: data_crop = np.zeros(shape=(n, c, d, h, w), dtype=np.float) im_seq = ImageSequence.Iterator(Image.open(args.path_to_input_data)) for i, page in enumerate(im_seq): im = np.array(page).reshape(h, w, c) for channel in range(c): data_crop[:, channel, i, :, :] = im[:, :, channel] original_data = data_crop original_size = original_data.shape[-3:] result = infer_request.infer({input_tensor_name: data_crop}) result = next(iter(result.values())) batch, channels, out_d, out_h, out_w = result.shape list_img = [] list_seg_result = [] for batch, data in enumerate(result): seg_result = np.zeros(shape=original_size, dtype=np.uint8) if data.shape[1:] != original_size: x = bbox[1] - bbox[0] y = bbox[3] - bbox[2] z = bbox[5] - bbox[4] out_result = np.zeros(shape=((channels, ) + original_size), dtype=float) out_result[:, bbox[0]:bbox[1], bbox[2]:bbox[3], bbox[4]:bbox[5]] = \ resample_np(data, (channels, x, y, z), 1) else: out_result = data if channels == 1: reshaped_data = out_result.reshape(original_size[0], original_size[1], original_size[2]) mask = reshaped_data[:, :, :] > 0.5 reshaped_data[mask] = 1 seg_result = reshaped_data.astype(int) elif channels == 4: seg_result = np.argmax(out_result, axis=0).astype(int) elif channels == 3: res = np.zeros(shape=out_result.shape, dtype=bool) res = out_result > 0.5 wt = res[0] tc = res[1] et = res[2] seg_result[wt] = 2 seg_result[tc] = 1 seg_result[et] = 3 im = np.stack([ original_data[batch, 0, :, :, :], original_data[batch, 0, :, :, :], original_data[batch, 0, :, :, :] ], axis=3) im = 255 * (im - im.min()) / (im.max() - im.min()) color_seg_frame = np.zeros(im.shape, dtype=np.uint8) for idx, c in enumerate(CLASSES_COLOR_MAP): color_seg_frame[seg_result[:, :, :] == idx, :] = np.array( c, dtype=np.uint8) mask = seg_result[:, :, :] > 0 im[mask] = color_seg_frame[mask] for k in range(im.shape[2]): if is_nifti_data: list_img.append( Image.fromarray(im[:, :, k, :].astype('uint8'), 'RGB')) else: list_img.append( Image.fromarray(im[k, :, :, :].astype('uint8'), 'RGB')) if args.output_nifti and is_nifti_data: list_seg_result.append(seg_result) total_latency = (perf_counter() - start_time) * 1e3 log.info("Metrics report:") log.info("\tLatency: {:.1f} ms".format(total_latency)) tiff_output_name = os.path.join(args.path_to_output, 'output.tiff') Image.new('RGB', (original_data.shape[3], original_data.shape[2])).save( tiff_output_name, append_images=list_img, save_all=True) log.debug("Result tiff file was saved to {}".format(tiff_output_name)) if args.output_nifti and is_nifti_data: for seg_res in list_seg_result: nii_filename = os.path.join( args.path_to_output, 'output_{}.nii.gz'.format(list_seg_result.index(seg_res))) nib.save(nib.Nifti1Image(seg_res, affine=affine), nii_filename) log.debug("Result nifti file was saved to {}".format(nii_filename))
def test_op_extension_via_frontend_extension_set_attrs_values(): skip_if_onnx_frontend_is_disabled() # use common (openvino.frontend) import here from openvino.frontend import OpExtension from openvino.runtime import Core ie = Core() # check the model is valid model = ie.read_model(onnx_model_for_op_extension_test) assert model # add extensions ie.add_extension( OpExtension("Multiply", "Mul", {}, {"auto_broadcast": "numpy"})) ie.add_extension(OpExtension("Elu", "Elu", {}, {"alpha": 1.})) ie.add_extension(OpExtension("Floor")) ie.add_extension(OpExtension("Concat", {}, {"axis": 0})) ie.add_extension( OpExtension("Convert", "Cast", {}, {"destination_type": "i64"})) ie.add_extension( OpExtension("AvgPool", "AveragePool", {}, { "kernel": [2, 2], "strides": [2, 2], "pads_begin": [0, 0], "pads_end": [1, 1], "exclude-pad": True, "auto_pad": "same_upper", "rounding_type": "floor" })) model = ie.read_model(onnx_model_for_op_extension_test) assert model
def main(): args = build_argparser() log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) ie = Core() if args.device == "CPU" and args.cpu_extension: ie.add_extension(args.cpu_extension, 'CPU') log.info('Reading model {}'.format(args.model)) model = ie.read_model(args.model, args.model[:-4] + ".bin") if len(model.inputs) != 1: log.error("Demo supports only models with 1 input layer") sys.exit(1) input_tensor_name = model.inputs[0].get_any_name() if len(model.outputs) != 1: log.error("Demo supports only models with 1 output layer") sys.exit(1) batch_size, channels, one, length = model.inputs[0].shape if one != 1: raise RuntimeError( "Wrong third dimension size of model input shape - {} (expected 1)" .format(one)) hop = length - args.overlap if isinstance(args.overlap, int) else int( length * (1.0 - args.overlap)) if hop < 0: log.error( "Wrong value for '-ol/--overlap' argument - overlapping more than clip length" ) sys.exit(1) compiled_model = ie.compile_model(model, args.device) infer_request = compiled_model.create_infer_request() log.info('The model {} is loaded to {}'.format(args.model, args.device)) labels = [] if args.labels: with open(args.labels, "r") as file: labels = [line.rstrip() for line in file.readlines()] start_time = perf_counter() audio = AudioSource(args.input, channels=channels, samplerate=args.sample_rate) outputs = [] clips = 0 for idx, chunk in enumerate( audio.chunks(length, hop, num_chunks=batch_size)): chunk = np.reshape(chunk, model.inputs[0].shape) output = next( iter(infer_request.infer({ input_tensor_name: chunk }).values())) clips += batch_size for batch, data in enumerate(output): chunk_start_time = (idx * batch_size + batch) * hop / audio.samplerate chunk_end_time = ( (idx * batch_size + batch) * hop + length) / audio.samplerate outputs.append(data) label = np.argmax(data) if chunk_start_time < audio.duration(): log.info("[{:.2f}-{:.2f}] - {:6.2%} {:s}".format( chunk_start_time, chunk_end_time, data[label], labels[label] if labels else "Class {}".format(label))) total_latency = (perf_counter() - start_time) * 1e3 log.info("Metrics report:") log.info("\tLatency: {:.1f} ms".format(total_latency)) sys.exit(0)
def main(): args = build_argparser().parse_args() cap = open_images_capture(args.input, args.loop) with open(args.labels, 'rt') as labels_file: class_labels = labels_file.read().splitlines() assert len(class_labels), 'The file with class labels is empty' # Plugin initialization for specified device and load extensions library if specified. log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) core = Core() if args.cpu_extension and 'CPU' in args.device: core.add_extension(args.cpu_extension, 'CPU') # Read IR log.info('Reading model {}'.format(args.model)) model = core.read_model(args.model) image_input, image_info_input, ( n, c, h, w), model_type, output_names, postprocessor = check_model(model) args.no_keep_aspect_ratio = model_type == 'yolact' or args.no_keep_aspect_ratio compiled_model = core.compile_model(model, args.device) infer_request = compiled_model.create_infer_request() log.info('The model {} is loaded to {}'.format(args.model, args.device)) if args.no_track: tracker = None else: tracker = StaticIOUTracker() if args.delay: delay = args.delay else: delay = int(cap.get_type() in ('VIDEO', 'CAMERA')) frames_processed = 0 metrics = PerformanceMetrics() visualizer = Visualizer(class_labels, show_boxes=args.show_boxes, show_scores=args.show_scores) video_writer = cv2.VideoWriter() start_time = perf_counter() frame = cap.read() if frame is None: raise RuntimeError("Can't read an image from the input") out_frame_size = (frame.shape[1], frame.shape[0]) presenter = monitors.Presenter( args.utilization_monitors, 45, (round(out_frame_size[0] / 4), round(out_frame_size[1] / 8))) if args.output and not video_writer.open(args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(), out_frame_size): raise RuntimeError("Can't open video writer") while frame is not None: if args.no_keep_aspect_ratio: # Resize the image to a target size. scale_x = w / frame.shape[1] scale_y = h / frame.shape[0] input_image = cv2.resize(frame, (w, h)) else: # Resize the image to keep the same aspect ratio and to fit it to a window of a target size. scale_x = scale_y = min(h / frame.shape[0], w / frame.shape[1]) input_image = cv2.resize(frame, None, fx=scale_x, fy=scale_y) input_image_size = input_image.shape[:2] input_image = np.pad(input_image, ((0, h - input_image_size[0]), (0, w - input_image_size[1]), (0, 0)), mode='constant', constant_values=0) # Change data layout from HWC to CHW. input_image = input_image.transpose((2, 0, 1)) input_image = input_image.reshape((n, c, h, w)).astype(np.float32) input_image_info = np.asarray( [[input_image_size[0], input_image_size[1], 1]], dtype=np.float32) # Run the model. feed_dict = {image_input: input_image} if image_info_input: feed_dict[image_info_input] = input_image_info infer_request.infer(feed_dict) outputs = { name: infer_request.get_tensor(name).data[:] for name in output_names } # Parse detection results of the current request scores, classes, boxes, masks = postprocessor(outputs, scale_x, scale_y, *frame.shape[:2], h, w, args.prob_threshold) if len(boxes) and args.raw_output_message: log.debug( ' -------------------------- Frame # {} -------------------------- ' .format(frames_processed)) log.debug( ' Class ID | Confidence | XMIN | YMIN | XMAX | YMAX ' ) for box, cls, score in zip(boxes, classes, scores): log.debug( '{:>10} | {:>10f} | {:>8.2f} | {:>8.2f} | {:>8.2f} | {:>8.2f} ' .format(cls, score, *box)) # Get instance track IDs. masks_tracks_ids = None if tracker is not None: masks_tracks_ids = tracker(masks, classes) # Visualize masks. frame = visualizer(frame, boxes, classes, scores, presenter, masks, masks_tracks_ids) metrics.update(start_time, frame) frames_processed += 1 if video_writer.isOpened() and (args.output_limit <= 0 or frames_processed <= args.output_limit): video_writer.write(frame) if not args.no_show: # Show resulting image. cv2.imshow('Results', frame) if not args.no_show: key = cv2.waitKey(delay) esc_code = 27 if key == esc_code: break presenter.handleKey(key) start_time = perf_counter() frame = cap.read() metrics.log_total() for rep in presenter.reportMeans(): log.info(rep)
def set_cpu_extensions(core: Core, cpu_ext: str): core.add_extension(cpu_ext)