def test_get_version(device): ie = Core() version = ie.get_versions(device) assert isinstance(version, dict), "Returned version must be a dictionary" assert device in version, "{} plugin version wasn't found in versions" assert hasattr(version[device], "major"), "Returned version has no field 'major'" assert hasattr(version[device], "minor"), "Returned version has no field 'minor'" assert hasattr(version[device], "description"), "Returned version has no field 'description'" assert hasattr(version[device], "build_number"), "Returned version has no field 'build_number'"
class Benchmark: def __init__(self, device: str, number_infer_requests: int = 0, number_iterations: int = None, duration_seconds: int = None, api_type: str = 'async', inference_only = None): self.device = device self.core = Core() self.nireq = number_infer_requests if api_type == 'async' else 1 self.niter = number_iterations self.duration_seconds = get_duration_seconds(duration_seconds, self.niter, self.device) self.api_type = api_type self.inference_only = inference_only self.latency_groups = [] def __del__(self): del self.core def add_extension(self, path_to_extension: str=None, path_to_cldnn_config: str=None): if path_to_cldnn_config: self.core.set_property(GPU_DEVICE_NAME, {'CONFIG_FILE': path_to_cldnn_config}) logger.info(f'GPU extensions is loaded {path_to_cldnn_config}') if path_to_extension: self.core.add_extension(extension_path=path_to_extension) logger.info(f'CPU extensions is loaded {path_to_extension}') def get_version_info(self) -> str: logger.info(f"OpenVINO:\n{'': <9}{'API version':.<24} {get_version()}") version_string = 'Device info\n' for device, version in self.core.get_versions(self.device).items(): version_string += f"{'': <9}{device}\n" version_string += f"{'': <9}{version.description:.<24}{' version'} {version.major}.{version.minor}\n" version_string += f"{'': <9}{'Build':.<24} {version.build_number}\n" return version_string def set_config(self, config = {}): for device in config.keys(): self.core.set_property(device, config[device]) def set_cache_dir(self, cache_dir: str): self.core.set_property({'CACHE_DIR': cache_dir}) def read_model(self, path_to_model: str): model_filename = os.path.abspath(path_to_model) head, ext = os.path.splitext(model_filename) weights_filename = os.path.abspath(head + BIN_EXTENSION) if ext == XML_EXTENSION else "" return self.core.read_model(model_filename, weights_filename) def create_infer_requests(self, compiled_model): if self.api_type == 'sync': requests = [compiled_model.create_infer_request()] else: requests = AsyncInferQueue(compiled_model, self.nireq) self.nireq = len(requests) return requests def first_infer(self, requests): if self.api_type == 'sync': requests[0].infer() return requests[0].latency else: id = requests.get_idle_request_id() requests.start_async() requests.wait_all() return requests[id].latency def update_progress_bar(self, progress_bar, exec_time, progress_count): if self.duration_seconds: # calculate how many progress intervals are covered by current iteration. # depends on the current iteration time and time of each progress interval. # Previously covered progress intervals must be skipped. progress_interval_time = self.duration_seconds / progress_bar.total_num new_progress = int(exec_time / progress_interval_time - progress_count) progress_bar.add_progress(new_progress) progress_count += new_progress elif self.niter: progress_bar.add_progress(1) return progress_count def sync_inference(self, request, data_queue, progress_bar): progress_count = 0 exec_time = 0 iteration = 0 times = [] start_time = datetime.utcnow() while (self.niter and iteration < self.niter) or \ (self.duration_seconds and exec_time < self.duration_seconds): if self.inference_only == False: request.set_input_tensors(data_queue.get_next_input()) request.infer() times.append(request.latency) iteration += 1 exec_time = (datetime.utcnow() - start_time).total_seconds() if progress_bar: progress_count = self.update_progress_bar(progress_bar, exec_time, progress_count) total_duration_sec = (datetime.utcnow() - start_time).total_seconds() return sorted(times), total_duration_sec, iteration def async_inference_only(self, infer_queue, progress_bar): progress_count = 0 exec_time = 0 iteration = 0 times = [] in_fly = set() start_time = datetime.utcnow() while (self.niter and iteration < self.niter) or \ (self.duration_seconds and exec_time < self.duration_seconds) or \ (iteration % self.nireq): idle_id = infer_queue.get_idle_request_id() if idle_id in in_fly: times.append(infer_queue[idle_id].latency) else: in_fly.add(idle_id) infer_queue.start_async() iteration += 1 exec_time = (datetime.utcnow() - start_time).total_seconds() if progress_bar: progress_count = self.update_progress_bar(progress_bar, exec_time, progress_count) infer_queue.wait_all() total_duration_sec = (datetime.utcnow() - start_time).total_seconds() for infer_request_id in in_fly: times.append(infer_queue[infer_request_id].latency) return sorted(times), total_duration_sec, iteration def async_inference_full_mode(self, infer_queue, data_queue, progress_bar, pcseq): progress_count = 0 processed_frames = 0 exec_time = 0 iteration = 0 times = [] num_groups = len(self.latency_groups) in_fly = set() start_time = datetime.utcnow() while (self.niter and iteration < self.niter) or \ (self.duration_seconds and exec_time < self.duration_seconds) or \ (iteration % num_groups): processed_frames += data_queue.get_next_batch_size() idle_id = infer_queue.get_idle_request_id() if idle_id in in_fly: times.append(infer_queue[idle_id].latency) if pcseq: self.latency_groups[infer_queue.userdata[idle_id]].times.append(infer_queue[idle_id].latency) else: in_fly.add(idle_id) group_id = data_queue.current_group_id infer_queue[idle_id].set_input_tensors(data_queue.get_next_input()) infer_queue.start_async(userdata=group_id) iteration += 1 exec_time = (datetime.utcnow() - start_time).total_seconds() if progress_bar: progress_count = self.update_progress_bar(progress_bar, exec_time, progress_count) infer_queue.wait_all() total_duration_sec = (datetime.utcnow() - start_time).total_seconds() for infer_request_id in in_fly: times.append(infer_queue[infer_request_id].latency) return sorted(times), total_duration_sec, processed_frames, iteration def main_loop(self, requests, data_queue, batch_size, latency_percentile, progress_bar, pcseq): if self.api_type == 'sync': times, total_duration_sec, iteration = self.sync_inference(requests[0], data_queue, progress_bar) elif self.inference_only: times, total_duration_sec, iteration = self.async_inference_only(requests, progress_bar) fps = len(batch_size) * iteration / total_duration_sec else: times, total_duration_sec, processed_frames, iteration = self.async_inference_full_mode(requests, data_queue, progress_bar, pcseq) fps = processed_frames / total_duration_sec median_latency_ms = percentile(times, latency_percentile) avg_latency_ms = sum(times) / len(times) min_latency_ms = times[0] max_latency_ms = times[-1] if self.api_type == 'sync': fps = len(batch_size) * 1000 / median_latency_ms if pcseq: for group in self.latency_groups: if group.times: group.times.sort() group.avg = sum(group.times) / len(group.times) group.min = group.times[0] group.max = group.times[-1] if progress_bar: progress_bar.finish() return fps, median_latency_ms, avg_latency_ms, min_latency_ms, max_latency_ms, total_duration_sec, iteration
class OpenVINOLauncher(Launcher): __provider__ = 'openvino' @classmethod def parameters(cls): parameters = super().parameters() parameters.update(DLSDK_LAUNCHER_PARAMETERS) return parameters def __init__(self, config_entry, model_name='', delayed_model_loading=False, preprocessor=None, postpone_inputs_configuration=False): super().__init__(config_entry, model_name=model_name) self._set_variable = False self.ie_config = self.config.get('ie_config') self.ie_core = Core() if self.ie_config: ov_set_config(self.ie_core, self.ie_config) self._delayed_model_loading = delayed_model_loading dlsdk_launcher_config = DLSDKLauncherConfigValidator( 'OpenVINO_Launcher', fields=self.parameters(), delayed_model_loading=delayed_model_loading, ) dlsdk_launcher_config.validate(self.config, ie_core=self.ie_core) device = self.config['device'].split('.') self._device = '.'.join((device[0].upper(), device[1])) if len(device) > 1 else device[0].upper() self.dynamic_shapes_policy = self.get_value_from_config('_undefined_shapes_resolving_policy') self._set_variable = False self._async_mode = False self._prepare_ie() self._delayed_model_loading = delayed_model_loading self._postpone_input_configuration = postpone_inputs_configuration self._preprocess_info = {} self._preprocess_steps = [] self.disable_resize_to_input = False self._do_reshape = False self._output_layouts = {} self._output_precisions = {} self.dyn_input_layers = [] self._partial_shapes = {} self.is_dynamic = False self.preprocessor = preprocessor self.infer_request = None self._num_requests = None if not delayed_model_loading: self._model, self._weights = automatic_model_search( self._model_name, self.get_value_from_config('model'), self.get_value_from_config('weights'), self.get_value_from_config('_model_type') ) self.load_network(log=not postpone_inputs_configuration, preprocessing=preprocessor) self.allow_reshape_input = self.get_value_from_config('allow_reshape_input') and self.network is not None if not postpone_inputs_configuration: self.try_to_set_default_layout() else: self.allow_reshape_input = self.get_value_from_config('allow_reshape_input') self._target_layout_mapping = {} self._lstm_inputs = None if '_list_lstm_inputs' in self.config: self._configure_lstm_inputs() self.reset_memory_state = self.get_value_from_config('reset_memory_state') @classmethod def validate_config(cls, config, delayed_model_loading=False, fetch_only=False, uri_prefix=''): field_uri = uri_prefix or 'launcher.{}'.format(cls.__provider__) return DLSDKLauncherConfigValidator( field_uri, fields=cls.parameters(), delayed_model_loading=delayed_model_loading).validate( config, field_uri=field_uri, validation_scheme=cls.validation_scheme(), fetch_only=fetch_only) def try_to_set_default_layout(self): if self.get_value_from_config('_model_type') == 'tf': self.default_layout = 'NHWC' input_nodes = self.network.inputs if self.network else self.exec_network.inputs for input_node in input_nodes: shape = parse_partial_shape(input_node.get_node().partial_shape) if len(shape) != 4: continue if input_node.get_node().layout.has_name('C'): channel_dim = input_node.get_node().layout.get_index_by_name('C') if channel_dim in [3, -1]: self.default_layout = 'NHWC' return if shape[-1] in [1, 2, 3, 4, 6, 9]: self.default_layout = 'NHWC' return self.default_layout = 'NCHW' return @property def device(self): return self._device @property def inputs(self): if self.network is None: inputs = self.exec_network.inputs else: inputs = self.network.inputs return {input_info.get_node().friendly_name: input_info.get_node() for input_info in inputs} @property def batch(self): return self._batch @property def output_blob(self): if hasattr(self, 'original_outputs'): return next(iter(self.original_outputs)).get_node().friendly_name return None @property def additional_output_mapping(self): if hasattr(self, 'out_tensor_name_to_node'): return self.out_tensor_name_to_node return {} def predict(self, inputs, metadata=None, return_raw=False, **kwargs): if self._lstm_inputs: return self._predict_sequential(inputs, metadata, return_raw) results = [] raw_results = [] for infer_inputs in inputs: if self._do_reshape: input_shapes = { layer_name: data.shape for layer_name, data in infer_inputs.items() } self._reshape_input(input_shapes) if self.infer_request is None: self.infer_request = self.exec_network.create_infer_request() feed_dict = {self.input_to_tensor_name[layer_name]: data for layer_name, data in infer_inputs.items()} outputs = self.infer_request.infer(inputs=feed_dict) raw_results.append(outputs) results.append({ out_node.get_node().friendly_name: out_res for out_node, out_res in outputs.items() }) if self.reset_memory_state: for state in self.infer_request.query_state(): state.reset() if metadata is not None: self._fill_meta(metadata, None if not self.dyn_input_layers else inputs[-1]) self._do_reshape = False if return_raw: return results, raw_results return results def _predict_sequential(self, inputs, metadata=None, return_raw=False, **kwargs): lstm_inputs_feed = self._fill_lstm_inputs() if not self.infer_request: self.infer_request = self.exec_network.create_infer_request() results = [] raw_results = [] for feed_dict in inputs: feed_dict.update(lstm_inputs_feed) infer_inputs = {self.input_to_tensor_name[layer_name]: data for layer_name, data in feed_dict.items()} out_tensors = self.infer_request.infer(infer_inputs) output_result = { out_node.get_node().friendly_name: out_tensor for out_node, out_tensor in out_tensors.items() } lstm_inputs_feed = self._fill_lstm_inputs(output_result) results.append(output_result) if return_raw: raw_results.append(out_tensors) if self._do_reshape: input_shapes = {layer_name: data.shape for layer_name, data in feed_dict.items()} self._reshape_input(input_shapes) if metadata is not None: self._fill_meta(metadata, None if not self.dyn_input_layers else inputs[-1]) self._do_reshape = False if return_raw: return results, raw_results return results def predict_async(self, ir, inputs, metadata=None, context=None, **kwargs): infer_inputs = inputs[0] feed_dict = {self.input_to_tensor_name[name]: data for name, data in infer_inputs.items()} if metadata is not None: self._fill_meta(metadata, None if not self.dyn_input_layers else infer_inputs) ir.infer(feed_dict, metadata, context) def _fill_meta(self, metadata, inputs=None): for meta_ in metadata: meta_['input_shape'] = self.inputs_info_for_meta(inputs) if self._output_layouts: meta_['output_layout'] = self._output_layouts if self._output_precisions: meta_['output_precision'] = self._output_precisions def _is_hetero(self): return self._device.startswith(HETERO_KEYWORD) def _is_multi(self): return self._device.startswith(MULTI_DEVICE_KEYWORD) def _devices_list(self): device = self._device if self._is_hetero(): device = self._device[len(HETERO_KEYWORD):] if self._is_multi(): device = self._device[len(MULTI_DEVICE_KEYWORD):] device = re.sub(NIREQ_REGEX, '', device) return [platform_.upper().strip() for platform_ in device.split(',')] def _set_affinity(self, affinity_map_path): auto_affinity = self.ie_core.query_network(self.network, self._device) custom_affinity = read_yaml(affinity_map_path) for layer in custom_affinity: if layer not in auto_affinity: raise ConfigError('Layer \'{layer}\' is not present in network'.format(layer=layer)) for node in self.network.get_ordered_ops(): layer_name = node.friendly_name device = custom_affinity.get(layer_name, auto_affinity.get(layer_name)) if device is None: continue if not (device in self._devices_list() or device == self._device): raise ConfigError( 'Device \'{device}\' set for \'{layer}\' layer is not present in ' 'provided configuration \'{configuration}\''.format( device=device, layer=layer_name, configuration=self._device ) ) node.rt_info["affinity"] = device def _is_vpu(self): device_list = map(lambda device: device.split('.')[0], self._devices_list()) return contains_any(device_list, VPU_PLUGINS) @property def num_requests(self): return self._num_requests @num_requests.setter def num_requests(self, num_ireq: int): if num_ireq != self._num_requests: self._num_requests = num_ireq @property def async_mode(self): return self._async_mode @async_mode.setter def async_mode(self, flag): for device in self._devices_list(): ov_set_config( self.ie_core, {'PERFORMANCE_HINT': 'THROUGHPUT' if flag else 'LATENCY'}, device=device.upper()) self._async_mode = flag def get_async_requests(self): self._set_nireq() return [AsyncInferRequestWrapper(ireq_id, self.exec_network.create_infer_request()) for ireq_id in range(self.num_requests)] def _reshape_input(self, shapes, make_dynamic=False): if hasattr(self, 'exec_network'): del self.exec_network if self.infer_request is not None: del self.infer_request self.infer_request = None partial_shapes = {} for name, shape in shapes.items(): p_shape = PartialShape( [Dimension(d) if not isinstance(d, tuple) else Dimension(d[0], d[1]) for d in shape]) partial_shapes[self.input_to_index[name]] = p_shape self.network.reshape(partial_shapes) self.dyn_input_layers, self._partial_shapes = self.get_dynamic_inputs(self.network) if self.dyn_input_layers and make_dynamic: return self.exec_network = self.ie_core.compile_model(self.network, self.device) self.infer_request = self.exec_network.create_infer_request() @staticmethod def reshape_network(network, shapes): partial_shapes = {} for name, shape in shapes.items(): p_shape = PartialShape( [Dimension(d) if not isinstance(d, tuple) else Dimension(d[0], d[1]) for d in shape]) partial_shapes[name] = p_shape network.reshape(partial_shapes) return network def _align_data_shape(self, data, input_blob, data_layout): input_shape = self.inputs[input_blob].shape data_batch_size = data.shape[0] input_batch_size = input_shape[0] if data_batch_size < input_batch_size: warning_message = 'data batch {} is not equal model input batch_size {}.'.format( data_batch_size, input_batch_size) warning(warning_message) diff_number = input_batch_size - data_batch_size filled_part = [data[-1]] * diff_number data = np.concatenate([data, filled_part]) return data.reshape(input_shape) if not self.disable_resize_to_input else data def _prepare_ie(self, log=True): if log: print_info('IE version: {}'.format(get_version())) if self._is_multi(): self._prepare_multi_device(log) else: self.async_mode = self.get_value_from_config('async_mode') if log: self._log_versions() self._device_specific_configuration() def _device_specific_configuration(self): cpu_extensions = self.config.get('cpu_extensions') if 'CPU' in self._devices_list(): if cpu_extensions: selection_mode = self.config.get('_cpu_extensions_mode') cpu_extensions = get_cpu_extension(cpu_extensions, selection_mode) self.ie_core.add_extension(str(cpu_extensions), 'CPU') ov_set_config( self.ie_core, {'CPU_BIND_THREAD': 'YES' if not self._is_multi() else 'NO'}, device='CPU') gpu_extensions = self.config.get('gpu_extensions') if 'GPU' in self._devices_list(): config = {} if gpu_extensions: config['CONFIG_FILE'] = str(gpu_extensions) if self._is_multi() and 'CPU' in self._devices_list(): config['CLDNN_PLUGIN_THROTTLE'] = '1' if config: ov_set_config(self.ie_core, config, device='GPU') if self._is_vpu(): device_list = map(lambda device: device.split('.')[0], self._devices_list()) devices = [vpu_device for vpu_device in VPU_PLUGINS if vpu_device in device_list] log_level = self.config.get('_vpu_log_level') if log_level: for device in devices: ov_set_config(self.ie_core, {'LOG_LEVEL': log_level}, device=device) device_config = self.config.get('device_config') if device_config: self._set_device_config(device_config) def _set_nireq(self): num_requests = self.config.get('num_requests') if num_requests is not None and num_requests != 'AUTO': num_requests = get_or_parse_value(num_requests, casting_type=int) if len(num_requests) != 1: raise ConfigError('Several values for _num_requests specified') self._num_requests = num_requests[0] if self._num_requests != 1 and not self.async_mode: warning('{} infer requests in sync mode is not supported. Only 1 infer request will be used.') self._num_requests = 1 elif not self.async_mode: self._num_requests = 1 else: self._num_requests = self.auto_num_requests() if self.async_mode: print_info('Async mode activated') print_info('Infer requests number:{}'.format(self.num_requests)) def auto_num_requests(self, return_list=False): platform_list = self._devices_list() concurrency_device = {'CPU': 1, 'GPU': 1, 'HDDL': 100, 'MYRIAD': 4} if hasattr(self, 'exec_network') and self.exec_network is not None: if hasattr(self.exec_network, 'get_metric'): num_requests = self.exec_network.get_metric('OPTIMAL_NUMBER_OF_INFER_REQUESTS') else: num_requests = self.exec_network.get_property('OPTIMAL_NUMBER_OF_INFER_REQUESTS') return num_requests if 'CPU' in platform_list and len(platform_list) == 1: min_requests = [4, 5, 3] cpu_count = multiprocessing.cpu_count() for min_request in min_requests: if cpu_count % min_request == 0: num_req = max(min_request, cpu_count / min_request) return num_req if not return_list else [num_req] if 'GPU' in platform_list and len(platform_list) == 1: return 2 if not return_list else [2] per_device_requests = [] for device in platform_list: per_device_requests.append(concurrency_device.get(device, 1)) return per_device_requests if return_list else sum(per_device_requests) def _prepare_multi_device(self, log=True): async_mode = self.get_value_from_config('async_mode') if not async_mode: warning('Using multi device in sync mode non-applicable. Async mode will be used.') num_per_device_req = re.findall(NIREQ_REGEX, self._device) device_list = self._devices_list() num_devices = len(device_list) if num_per_device_req: brackets = r"(\()|(\))" num_per_device_requests = [int(re.sub(brackets, '', nreq)) for nreq in num_per_device_req] if 'num_requests' in self.config: warning( "number requests already provided in device name specification. " "'num_requests' option will be ignored." ) elif 'num_requests' in self.config and self.config['num_requests'] != 'AUTO': num_per_device_requests = get_or_parse_value(self.config['num_request'], casting_type=int) else: num_per_device_requests = self.auto_num_requests(return_list=True) if len(num_per_device_requests) == 1: num_per_device_requests = [num_per_device_requests[0]] * len(device_list) if num_devices != len(num_per_device_requests): raise ConfigError('num requests for all {} should be specified'.format(num_devices)) self._num_requests = sum(num_per_device_requests) * 2 self._async_mode = True if log: self._log_versions() print_info('Async mode activated') def _set_device_config(self, device_config): if not isinstance(device_config, dict): raise ConfigError('device configuration should be a dict-like') if all(not isinstance(value, dict) for value in device_config.values()): ov_set_config(self.ie_core, dict(device_config), device=self.device) else: for key, value in device_config.items(): if isinstance(value, dict): if key in self._devices_list(): if key not in self.ie_core.available_devices: warnings.warn('{} device is unknown. Config loading may lead to error.'.format(key)) ov_set_config(self.ie_core, dict(value), device=key) else: warnings.warn( f'Configuration for {key} will be skipped as device is not listed in evaluation device') else: warnings.warn('Option {key}: {value} will be skipped because device to which it should be ' 'applied is not specified or option is not a dict-like'.format(key=key, value=value)) def _log_versions(self): versions = self.ie_core.get_versions(self._device) print_info("Loaded {} plugin version:".format(self._device)) for device_name, device_version in versions.items(): print_info(" {device_name} - {descr}: {maj}.{min}.{num}".format( device_name=device_name, descr=device_version.description, maj=device_version.major, min=device_version.minor, num=device_version.build_number )) def _create_network(self, input_shapes=None): model_path = Path(self._model) compiled_model = model_path.suffix == '.blob' if compiled_model: self.network = None self.exec_network = self.ie_core.import_model(str(self._model), self._device) self.original_outputs = self.exec_network.outputs model_batch = self._get_model_batch_size() self._batch = model_batch if model_batch is not None else 1 return if self._weights is None and self._model.suffix != '.onnx': self._weights = model_path.parent / (model_path.name.split(model_path.suffix)[0] + '.bin') self.network = self.read_network(self._model, self._weights) self.original_outputs = self.network.outputs self.out_tensor_name_to_node = {} for out in self.original_outputs: if not out.names: continue for name in out.names: self.out_tensor_name_to_node[name] = out.get_node().friendly_name model_batch = self._get_model_batch_size() model_batch = 1 if model_batch is None else model_batch outputs = self.config.get('outputs') if outputs: def output_preprocessing(output_string): output_tuple = string_to_tuple(output_string, casting_type=None) if len(output_tuple) == 1: return output_string return output_tuple[0], int(output_tuple[1]) preprocessed_outputs = [output_preprocessing(output) for output in outputs] self.network.add_outputs(preprocessed_outputs) if input_shapes is not None: self.network.reshape(input_shapes) self._batch = self.config.get('batch', model_batch) self._set_batch_size(self._batch) affinity_map_path = self.config.get('affinity_map') if affinity_map_path and self._is_hetero(): self._set_affinity(affinity_map_path) elif affinity_map_path: warning('affinity_map config is applicable only for HETERO device') def _set_batch_size(self, batch_size): model_batch_size = self._get_model_batch_size() model_batch_size = 1 if model_batch_size is None else model_batch_size if batch_size is None: batch_size = model_batch_size if batch_size == model_batch_size: self._batch = batch_size return input_shapes = {} for input_node in self.network.inputs: layer_name = input_node.get_node().friendly_name if layer_name in self.const_inputs: input_shapes[layer_name] = parse_partial_shape(input_node.get_node().partial_shape) else: layer_shape = parse_partial_shape(input_node.get_node().partial_shape) layout = self.inputs[layer_name].layout if '...' in str(layout): layout = self.get_layout_from_config(layer_name) else: layout = str(layout).replace('[', '').replace(']', '').replace(',', '') batch_pos = layout.find('N') if batch_pos != -1: layer_shape[batch_pos] = batch_size input_shapes[layer_name] = layer_shape self._reshape_input(input_shapes, batch_size == -1) self._batch = batch_size def _get_model_batch_size(self): input_nodes = self.network.inputs if self.network else self.exec_network.inputs input_info = input_nodes[0] if '...' in str(input_info.get_node().layout): layout = self.get_layout_from_config(input_info.get_node().friendly_name) else: layout = str(input_info.get_node().layout).replace('[', '').replace(']', '').replace(',', '') batch_pos = layout.find('N') if batch_pos != -1: return parse_partial_shape(input_info.partial_shape)[batch_pos] return None def load_network(self, network=None, log=False, preprocessing=None): if hasattr(self, 'exec_network'): del self.exec_network if hasattr(self, 'infer_request'): del self.infer_request self.infer_request = None if network is None: self._create_network() else: self.network = network if self.network is not None: self.dyn_input_layers, self._partial_shapes = self.get_dynamic_inputs(self.network) self.input_to_tensor_name = self.get_input_tensor_name_mapping(self.network) self.input_to_index = {inp.get_node().friendly_name: idx for idx, inp in enumerate(self.network.inputs)} if not self._postpone_input_configuration: self._set_precision() self._set_input_shape() self.dyn_input_layers, self._partial_shapes = self.get_dynamic_inputs(self.network) if log: self.print_input_output_info(self.network if self.network is not None else self.exec_network) if preprocessing: self._set_preprocess(preprocessing) model_batch = self._get_model_batch_size() model_batch = 1 if model_batch is None else model_batch self._batch = self.config.get('batch', model_batch) self._set_batch_size(self._batch) self.try_to_set_default_layout() if self.network and not preprocessing and (not self.dyn_input_layers or self.is_dynamic): self.exec_network = self.ie_core.compile_model(self.network, self._device) self.infer_request = self.exec_network.create_infer_request() def update_input_configuration(self, input_config): self.config['inputs'] = input_config self._set_precision() self._set_input_shape() self.try_to_set_default_layout() self._set_batch_size(self.config.get('batch')) self.dyn_input_layers, self._partial_shapes = self.get_dynamic_inputs(self.network) self.print_input_output_info(self.network if self.network is not None else self.exec_network) if self.preprocessor: self._set_preprocess(self.preprocessor) if self.network: self.exec_network = self.ie_core.compile_model(self.network, self._device) self.infer_request = self.exec_network.create_infer_request() @staticmethod def get_dynamic_inputs(network): def is_dynamic(data_info): if hasattr(data_info, 'is_dynamic'): return data_info.is_dynamic return -1 in data_info.shape or not data_info.shape inputs_with_undefined_shapes = [] partial_shapes = {} if network is None: return inputs_with_undefined_shapes, partial_shapes for input_info in network.inputs: input_node = input_info.get_node() input_shape = input_node.get_partial_shape() if is_dynamic(input_shape): inputs_with_undefined_shapes.append(input_node.friendly_name) partial_shapes[input_node.friendly_name] = input_shape return inputs_with_undefined_shapes, partial_shapes @staticmethod def get_input_tensor_name_mapping(network): inputs_mapping = {} for idx, input_node in enumerate(network.inputs): tensor_names = list(input_node.get_names()) if not tensor_names: inputs_mapping[input_node.get_node().friendly_name] = idx else: inputs_mapping[input_node.get_node().friendly_name] = tensor_names[0] return inputs_mapping @property def dyn_batch_only(self): if not self.dyn_input_layers: return True for input_name in self.dyn_input_layers: partial_shape = self._partial_shapes[input_name] num_undef = 0 for i in partial_shape: if i == -1: num_undef += 1 if num_undef > 1: return False layout = self.inputs[input_name].layout if '...' in str(layout): layout = self.get_layout_from_config(input_name) else: layout = str(layout).replace('[', '').replace(']', '').replace(',', '') if not layout: return False for dim, layout_dim in zip(partial_shape, layout): if dim == -1 and layout_dim != 'N': return False return True def get_layout_from_config(self, input_name): for input_config in self.config.get('inputs', []): if input_config.get('name', '') != input_name: continue return input_config.get('layout', '') return '' @property def layout_mapping(self): def prepare_layout_string(layout): layout = str(layout) return layout.replace('[', '').replace(']', '').replace(',', '') inputs = self.network.inputs if self.network is not None else self.exec_network.inputs layouts = {} for input_node in inputs: layouts[input_node.get_node().friendly_name] = prepare_layout_string(input_node.get_node().layout) return layouts def load_ir(self, xml_path, bin_path, log=False): self._model = xml_path self._weights = bin_path self.async_mode = True self.load_network(log=log) self.try_to_set_default_layout() def read_network(self, model, weights): if weights is not None: network = self.ie_core.read_model(model=str(model), weights=str(weights)) else: network = self.ie_core.read_model(model=str(model)) return network def inputs_info_for_meta(self, inputs=None): if inputs: return {layer_name: np.shape(data) for layer_name, data in inputs.items()} return { layer_name: parse_partial_shape(layer.get_partial_shape()) for layer_name, layer in self.inputs.items() if layer_name not in self.const_inputs + self.image_info_inputs} @property def lstm_inputs(self): return self._lstm_inputs def initialize_undefined_shapes(self, input_data, template_shapes=None): if self.dynamic_shapes_policy in ['default', 'dynamic']: try: if template_shapes: input_shapes = {layer_name: template_shapes.get(layer_name, data.shape) for layer_name, data in input_data[0].items()} self._reshape_input(input_shapes) self.load_network(self.network) self.is_dynamic = True if not hasattr(self, 'exec_network') or self.exec_network is None: self.is_dynamic = True self.load_network(self.network) self.exec_network.infer_new_request({ self.input_to_tensor_name[k]: data for k, data in input_data[0].items()}) return except RuntimeError as e: if self.dynamic_shapes_policy == 'dynamic': raise e self.is_dynamic = False self._reshape_input({layer_name: data.shape for layer_name, data in input_data[0].items()}) def resolve_undefined_batch(self): if self.dynamic_shapes_policy in ['default', 'dynamic']: try: self.is_dynamic = True self.load_network(self.network) except RuntimeError as e: if self.dynamic_shapes_policy == 'dynamic': raise e self.is_dynamic = False if not self.is_dynamic: self.load_network(self.network) def fit_to_input(self, data, layer_name, layout, precision, template=None): if precision is None: precision = format_map[self.inputs[layer_name].element_type.get_type_name()] if layer_name in self.dyn_input_layers: layer_rang = len(parse_partial_shape(self._partial_shapes[layer_name])) input_template = template.get(layer_name) if template else template data, l_template = self._data_to_blob_dyn(layer_rang, data, layout, input_template) layer_shape = data.shape if l_template is not None: template[layer_name] = l_template else: layer_shape = tuple(self.inputs[layer_name].shape) precision = format_map[self.inputs[layer_name].element_type.get_type_name()] data = self._data_to_blob(layer_shape, data, layout) if precision: data = data.astype(precision) if layer_name in self.dyn_input_layers: self._do_reshape = not self.is_dynamic return data, template data_shape = np.shape(data) if data_shape != layer_shape: if self.allow_reshape_input: self._do_reshape = True return data return self._align_data_shape(data, layer_name, layout) @staticmethod def _data_to_blob_dyn(layer_rang, data, layout, template=None): data_shape = np.shape(data) if len(data_shape) - layer_rang == 1 and data_shape[0] == 1: if len(data_shape) == len(layout): data = np.transpose(data, layout) if template is not None and len(template) == layer_rang: tmp_template = [1, ] + template new_template = [tmp_template[l_dim] for l_dim in layout][1:] template = new_template data = data[0] data_shape = np.shape(data) if template is not None: if len(template) < np.ndim(data): template = [1] * (np.ndim(data) - len(template)) + template if len(template) > np.ndim(data): template = template[0] if len(layout) == len(data_shape): if template is not None: new_template = [template[l_dim] for l_dim in layout] template = new_template return np.transpose(data, layout), template return np.array(data), template def _data_to_blob(self, layer_shape, data, layout): # pylint:disable=R0911,R0912 data_shape = np.shape(data) if len(layer_shape) == 4: if len(data_shape) == 5: data = data[0] if len(data_shape) == 3: data = np.expand_dims(data, -1) data_shape = np.shape(data) if len(data_shape) < 4: if np.size(np.squeeze(np.zeros(layer_shape))) == np.size(np.squeeze(np.zeros(data_shape))): return np.resize(data, layer_shape) return np.transpose(data, layout) if layout is not None else data if len(layer_shape) == 2: if len(data_shape) == 1: return np.transpose([data]) if len(data_shape) > 2: if all(dim == 1 for dim in layer_shape) and all(dim == 1 for dim in data_shape): return np.resize(data, layer_shape) if len(np.squeeze(np.zeros(layer_shape))) == len(np.squeeze(np.zeros(data_shape))): return np.resize(data, layer_shape) if len(layer_shape) == 3 and len(data_shape) == 4: return np.transpose(data, layout)[0] if layout is not None else data[0] if len(layer_shape) == 1: return np.resize(data, layer_shape) if (len(data_shape) == 3) and (len(layer_shape) == 2) and (data_shape[0] == 1) and ( data_shape[1] == 1) and self.allow_reshape_input: return data[0] if layout is not None and len(layer_shape) == len(layout): return np.transpose(data, layout) if (len(layer_shape) == 1 and len(data_shape) > 1 and len(np.squeeze(np.zeros(layer_shape))) == len(np.squeeze(np.zeros(data_shape)))): return np.resize(data, layer_shape) return np.array(data) def _set_precision(self): config_inputs = self.config.get('inputs', []) for input_config in config_inputs: if 'precision' in input_config: if self.network: self.inputs[input_config['name']].set_element_type( PRECISION_STR_TO_TYPE[input_config['precision'].upper()] ) def _set_input_shape(self): if not self.network: return config_inputs = self.config.get('inputs', []) input_shapes = {} make_dynamic = False for input_config in config_inputs: if 'shape' in input_config: input_shapes[input_config['name']] = input_config['shape'] if -1 in input_config['shape']: make_dynamic = True if not input_shapes: return orig_input_shapes = {input_name: parse_partial_shape(input_info.partial_shape) for input_name, input_info in self.inputs.items()} orig_input_shapes.update(input_shapes) self._reshape_input(orig_input_shapes, make_dynamic) def _configure_lstm_inputs(self): lstm_mapping = {} config_inputs = self.config.get('inputs', []) for input_config in config_inputs: if input_config['type'] == 'LSTM_INPUT': lstm_mapping[input_config['name']] = input_config['value'] self._lstm_inputs = lstm_mapping def _fill_lstm_inputs(self, infer_outputs=None): feed_dict = {} for lstm_var, output_layer in self._lstm_inputs.items(): layer_shape = parse_partial_shape(self.inputs[lstm_var].partial_shape) if infer_outputs: output_layer = postprocess_output_name(output_layer, infer_outputs) input_data = infer_outputs[output_layer].reshape(layer_shape) if infer_outputs else np.zeros( layer_shape, dtype=format_map[self.inputs[lstm_var].element_type.get_type_name()] ) feed_dict[lstm_var] = input_data return feed_dict @staticmethod def print_input_output_info(network, prefix=None): if prefix: print_info('{} - Input info:'.format(prefix)) else: print_info('Input info:') network_inputs = network.inputs network_outputs = network.outputs for input_info in network_inputs: input_node = input_info.get_node() print_info('\tNode name: {}'.format(input_node.friendly_name)) print_info('\tTensor names: {}'.format(', '.join(input_info.get_names()))) print_info('\tprecision: {}'.format(input_node.element_type.get_type_name())) print_info('\tshape: {}\n'.format(parse_partial_shape(input_node.get_partial_shape()))) print_info('Output info') for output_info in network_outputs: out_node = output_info.get_node() print_info('\tNode name: {}'.format(out_node.friendly_name)) print_info('\tTensor names: {}'.format(', '.join(output_info.get_names()))) precision = out_node.get_output_element_type(0).get_type_name() print_info('\tprecision: {}'.format(precision)) shape = parse_partial_shape(out_node.get_output_partial_shape(0)) print_info('\tshape: {}\n'.format(shape)) def _set_preprocess(self, preprocess): if preprocess.ie_processor is None: return if self.network is not None: self.disable_resize_to_input = False preprocess_steps = preprocess.ie_preprocess_steps if not preprocess_steps: return for input_name, input_info in self.network.input_info.items(): if input_name in self.const_inputs + self.image_info_inputs: continue for (name, value) in preprocess_steps: setattr(input_info.preprocess_info, name, value) if preprocess.ie_processor.has_normalization(): channel_id = input_info.layout.find('C') if channel_id != -1: num_channels = input_info.input_data.shape[channel_id] preprocess.ie_processor.set_normalization(num_channels, input_info.preprocess_info) self.disable_resize_to_input = preprocess.ie_processor.has_resize() self._use_set_blob = self.disable_resize_to_input self.load_network(self.network) self._preprocess_steps = preprocess_steps return preprocess_info_by_input = {} preprocess_info = preprocess.preprocess_info for input_name in self.inputs: if input_name in self.const_inputs + self.image_info_inputs: continue if preprocess.ie_processor.has_normalization(): channel_id = self.inputs[input_name].layout.find('C') if channel_id != -1: num_channels = self.inputs[input_name].shape[channel_id] preprocess.ie_processor.set_normalization(num_channels, preprocess_info) preprocess_info_by_input[input_name] = preprocess_info self._preprocess_info = preprocess_info_by_input self.disable_resize_to_input = preprocess.ie_processor.has_resize() def get_model_file_type(self): return self._model.suffix def get_infer_queue(self, log=True): if self.config.get('num_requests', 'AUTO') == 'AUTO': num_requests = self.auto_num_requests() else: num_requests = self.num_requests or 0 queue = AsyncInferQueue(self.exec_network, num_requests) if log: print_info('Prepared async infer queue with {} requests'.format(len(queue))) else: debug('Prepared async infer queue with {} requests'.format(len(queue))) return queue def prepare_data_for_request(self, inputs, batch_meta, batch_id, batch_input_ids, batch_annotation, batch_identifiers): infer_inputs = inputs[0] feed_dict = {self.input_to_tensor_name[name]: data for name, data in infer_inputs.items()} if batch_meta is not None: self._fill_meta(batch_meta, None if not self.dyn_input_layers else infer_inputs) context = (batch_id, batch_input_ids, batch_annotation, batch_identifiers, batch_meta) return feed_dict, context @staticmethod def get_result_from_request(request, return_raw=False): preprocessed_results = [{out.get_node().friendly_name: data for out, data in request.results.items()}] if return_raw: return preprocessed_results, [request.results] return preprocessed_results def input_shape(self, input_name): return parse_partial_shape(self.inputs[input_name].get_partial_shape()) def release(self): if 'network' in self.__dict__: del self.network if 'infer_request' in self.__dict__: del self.infer_request if 'exec_network' in self.__dict__: del self.exec_network if 'ie_core' in self.__dict__: del self.ie_core