def build(cls, model_name, model_version, model_xml, model_bin, mapping_config, batch_size_param, shape_param, num_ireq, target_device, plugin_config): plugin = IEPlugin(device=target_device, plugin_dirs=GLOBAL_CONFIG['plugin_dir']) if GLOBAL_CONFIG['cpu_extension'] is not None \ and 'CPU' in target_device: plugin.add_cpu_extension(GLOBAL_CONFIG['cpu_extension']) net = IENetwork(model=model_xml, weights=model_bin) batching_info = BatchingInfo(batch_size_param) shape_info = ShapeInfo(shape_param, net.inputs) if batching_info.mode == BatchingMode.FIXED: net.batch_size = batching_info.batch_size else: batching_info.batch_size = net.batch_size effective_batch_size = batching_info.get_effective_batch_size() logger.debug( "[Model: {}, version: {}] --- effective batch size - {}".format( model_name, model_version, effective_batch_size)) ############################### # Initial shape setup if shape_info.mode == ShapeMode.FIXED: logger.debug("[Model: {}, version: {}] --- Setting shape to " "fixed value: {}".format(model_name, model_version, shape_info.shape)) net.reshape(shape_info.shape) elif shape_info.mode == ShapeMode.AUTO: logger.debug("[Model: {}, version: {}] --- Setting shape to " "automatic".format(model_name, model_version)) net.reshape({}) elif shape_info.mode == ShapeMode.DEFAULT: logger.debug("[Model: {}, version: {}] --- Setting shape to " "default".format(model_name, model_version)) ############################### # Creating free infer requests indexes queue free_ireq_index_queue = queue.Queue(maxsize=num_ireq) for ireq_index in range(num_ireq): free_ireq_index_queue.put(ireq_index) ############################### requests_queue = queue.Queue( maxsize=GLOBAL_CONFIG['engine_requests_queue_size']) exec_net = plugin.load(network=net, num_requests=num_ireq, config=plugin_config) ir_engine = cls(model_name=model_name, model_version=model_version, mapping_config=mapping_config, net=net, plugin=plugin, exec_net=exec_net, batching_info=batching_info, shape_info=shape_info, free_ireq_index_queue=free_ireq_index_queue, num_ireq=num_ireq, requests_queue=requests_queue, target_device=target_device, plugin_config=plugin_config) return ir_engine
class InferenceEngineOpenVINO: def __init__(self, net_model_xml_path, device): self.device = device net_model_bin_path = os.path.splitext(net_model_xml_path)[0] + '.bin' self.net = IENetwork(model=net_model_xml_path, weights=net_model_bin_path) required_input_key = {'data'} assert required_input_key == set(self.net.inputs.keys()), \ 'Demo supports only topologies with the following input key: {}'.format(', '.join(required_input_key)) required_output_keys = {'features', 'heatmaps', 'pafs'} assert required_output_keys.issubset(self.net.outputs.keys()), \ 'Demo supports only topologies with the following output keys: {}'.format(', '.join(required_output_keys)) self.ie = IECore() self.exec_net = self.ie.load_network(network=self.net, num_requests=1, device_name=device) def infer(self, img): input_layer = next(iter(self.net.inputs)) n, c, h, w = self.net.inputs[input_layer].shape if h != img.shape[0] or w != img.shape[1]: self.net.reshape({input_layer: (n, c, img.shape[0], img.shape[1])}) self.exec_net = self.ie.load_network(network=self.net, num_requests=1, device_name=self.device) img = np.transpose(img, (2, 0, 1))[None, ] inference_result = self.exec_net.infer(inputs={'data': img}) inference_result = (inference_result['features'][0], inference_result['heatmaps'][0], inference_result['pafs'][0]) return inference_result
def reshape(ie_network: IENetwork, batch_size: int): new_shapes = {} for input_layer_name, input_layer in ie_network.inputs.items(): new_shapes[input_layer_name] = get_blob_shape(input_layer, batch_size) if new_shapes: logger.info('Resizing network to batch = {}'.format(batch_size)) ie_network.reshape(new_shapes)
def __init__(self, vc, model_path, pad): self.vc = vc # video capture self.padding = pad self.model = model_path self.frame = None self.stopVideo = None # Flag for stopping video loop self.colorbar_frame = None self.root = tk.Tk() self.panel = None self.colorbar_panel = None self.resolution = [500, 700] button_frame = tk.Frame() button_frame.pack(side="bottom") # create a button, that when pressed, will show current frame btn_frame = tk.Button(button_frame, text="Show Frame", command=self.show_frame) btn_frame.pack(side="left", padx=10, pady=10) # create a button, that when pressed, will start segment a video stream frame btn_segment = tk.Button(button_frame, text="Class map", command=self.segment_classes) btn_segment.pack(side="left", padx=10, pady=10) # button for segmenting and showing absorption coefficient map btn_abs = tk.Button(button_frame, text="Heat map", command=self.segment_heatmap) btn_abs.pack(side="left", padx=10, pady=10) # Button to start video stream display btn_start_display = tk.Button(button_frame, text="start video", command=self.start_video) btn_start_display.pack(side="right", padx=10, pady=10) # set a callback to handle when the window is closed self.root.wm_title("Material Segmentation") self.root.wm_protocol("WM_DELETE_WINDOW", self.on_close) # Set up NCS # Want to set up model once so that it doesn't have to be loaded multiple times model_xml = model_path model_bin = os.path.splitext(model_xml)[0] + ".bin" # Plugin initialization for Movidius stick plugin = IEPlugin(device="MYRIAD") # Read IR net = IENetwork(model=model_xml, weights=model_bin) self.input_blob = next(iter(net.inputs)) n = 1 c = 3 h = self.resolution[0] w = self.resolution[1] net.reshape({self.input_blob: (n, c, h, w)}) # Loading model to the plugin self.exec_net = plugin.load(network=net) # Loading network multiple times takes a long time
def __call__(self, *input_values: NumericData) -> List[NumericData]: """Run computation on input values and return result.""" input_values = [np.array(input_value) for input_value in input_values] input_shapes = [get_shape(input_value) for input_value in input_values] param_names = [param.friendly_name for param in self.parameters] if self.network_cache.get(str(input_shapes)) is None: capsule = Function.to_capsule(self.function) cnn_network = IENetwork(capsule) if self.function.is_dynamic(): cnn_network.reshape(dict(zip(param_names, input_shapes))) # Convert unsupported inputs of the network _convert_inputs(cnn_network) self.network_cache[str(input_shapes)] = cnn_network else: cnn_network = self.network_cache[str(input_shapes)] executable_network = self.runtime.backend.load_network( cnn_network, self.runtime.backend_name) # Input validation if len(input_values) != len(self.parameters): raise UserInputError("Expected %s parameters, received %s.", len(self.parameters), len(input_values)) for parameter, input in zip(self.parameters, input_values): parameter_shape = parameter.get_output_partial_shape(0) input_shape = PartialShape(input.shape) if len(input.shape) > 0 and not parameter_shape.compatible( input_shape): raise UserInputError( "Provided tensor's shape: %s does not match the expected: %s.", input_shape, parameter_shape, ) request = executable_network.requests[0] request.infer(dict(zip(param_names, input_values))) # Set order of output blobs compatible with nG Function result_buffers = [ self.__get_ie_output_blob_buffer(request.output_blobs, result) for result in self.results ] # Since OV overwrite result data type we have to convert results to the original one. original_dtypes = [ get_dtype(result.get_output_element_type(0)) for result in self.results ] converted_buffers = [ buffer.astype(original_dtype) for buffer, original_dtype in zip(result_buffers, original_dtypes) ] return converted_buffers
def reshape(ie_network: ie.IENetwork, batch_size: int) -> ie.IENetwork: if batch_size and batch_size != ie_network.batch_size: new_shapes = {} for input_layer_name, input_layer in ie_network.inputs.items(): layout = input_layer.layout if layout == 'C': new_shape = (input_layer.shape[0],) elif layout == 'NC': new_shape = (batch_size, input_layer.shape[1]) else: raise ValueError("not supported layout '{}'".format(layout)) new_shapes[input_layer_name] = new_shape ie_network.reshape(new_shapes) return ie_network
def ie_main(path_to_model_xml, path_to_model_bin, path_to_original_image, path_to_result_image, device='CPU', cpu_extensions='', batch=1): log.info('COMMON: image preprocessing') image = read_resize_image(path_to_original_image, 300, 300) # First create Network (Note you need to provide model in IR previously converted with Model Optimizer) log.info("Reading IR...") net = IENetwork(model=path_to_model_xml, weights=path_to_model_bin) # Now let's create IECore() entity log.info("Creating Inference Engine Core") ie = IECore() ie.add_extension(extension_path=cpu_extensions, device_name=device) input_blob = next(iter(net.inputs)) out_blob = next(iter(net.outputs)) n, c, h, w = net.inputs[input_blob].shape net.reshape({input_blob: (batch, c, h, w)}) n, c, h, w = net.inputs[input_blob].shape # Now we load Network to plugin log.info("Loading IR to the plugin...") exec_net = ie.load_network(network=net, device_name=device, num_requests=2) del net labels_map = None # Read and pre-process input image image = image[..., ::-1] in_frame = image.transpose((2, 0, 1)) # Change data layout from HWC to CHW batched_frame = np.array([in_frame for _ in range(batch)]) log.info('Current shape: {}'.format(batched_frame.shape)) # Now we run inference on target device inference_start = time.time() res = exec_net.infer(inputs={input_blob: batched_frame}) inference_end = time.time() log.info('INFERENCE ENGINE SPECIFIC: no post processing') return res[out_blob], inference_end - inference_start
def load_ir_model(model_xml, device, input_shape=None): model_bin = os.path.splitext(model_xml)[0] + '.bin' # initialize plugin and read IR plugin = IEPlugin(device=device) net = IENetwork(model=model_xml, weights=model_bin) input_blobs = list(net.inputs.keys()) if input_shape: net.reshape({input_blobs[0]: input_shape}) inputs = [(b, net.inputs[b].shape) for b in input_blobs] out_blob = next(iter(net.outputs)) exec_net = plugin.load(network=net) del net return exec_net, inputs, out_blob
def reshape(ie_network: IENetwork, batch_size: int): new_shapes = {} for input_layer_name, input_layer in ie_network.inputs.items(): shape = input_layer.shape layout = input_layer.layout try: batch_index = layout.index('N') except ValueError: batch_index = 1 if layout == 'C' else -1 if batch_index != -1 and shape[batch_index] != batch_size: shape[batch_index] = batch_size new_shapes[input_layer_name] = shape if new_shapes: logger.info('Resizing network to batch = {}'.format(batch_size)) ie_network.reshape(new_shapes)
def build(cls, model_name, model_version, model_xml, model_bin, mapping_config, batch_size_param, shape_param): plugin = IEPlugin(device=DEVICE, plugin_dirs=PLUGIN_DIR) if CPU_EXTENSION and 'CPU' in DEVICE: plugin.add_cpu_extension(CPU_EXTENSION) net = IENetwork(model=model_xml, weights=model_bin) batching_info = BatchingInfo(batch_size_param) shape_info = ShapeInfo(shape_param, net.inputs) if batching_info.mode == BatchingMode.FIXED: net.batch_size = batching_info.batch_size else: batching_info.batch_size = net.batch_size effective_batch_size = batching_info.get_effective_batch_size() logger.debug( "[Model: {}, version: {}] --- effective batch size - {}".format( model_name, model_version, effective_batch_size)) ############################### # Initial shape setup if shape_info.mode == ShapeMode.FIXED: logger.debug("[Model: {}, version: {}] --- Setting shape to " "fixed value: {}".format(model_name, model_version, shape_info.shape)) net.reshape(shape_info.shape) elif shape_info.mode == ShapeMode.AUTO: logger.debug("[Model: {}, version: {}] --- Setting shape to " "automatic".format(model_name, model_version)) net.reshape({}) elif shape_info.mode == ShapeMode.DEFAULT: logger.debug("[Model: {}, version: {}] --- Setting shape to " "default".format(model_name, model_version)) ############################### exec_net = plugin.load(network=net, num_requests=1) ir_engine = cls(model_name=model_name, model_version=model_version, mapping_config=mapping_config, net=net, plugin=plugin, exec_net=exec_net, batching_info=batching_info, shape_info=shape_info) return ir_engine
def load_model(self): model_xml = self.model_path model_bin = os.path.splitext(model_xml)[0] + ".bin" # Plugin initialization for specified device and load extensions library if specified print("Creating Inference Engine") ie = IECore() if self.cpu_extension and 'CPU' in self.device: ie.add_extension(self.cpu_extension, "CPU") # Read IR log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin)) net = IENetwork(model=model_xml, weights=model_bin) if "CPU" in self.device: supported_layers = ie.query_network(net, "CPU") not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers] if len(not_supported_layers) != 0: log.error("Following layers are not supported by the plugin for specified device {}:\n {}". format(args.device, ', '.join(not_supported_layers))) log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) print("Preparing input blobs") self.input_blob = next(iter(net.inputs)) out_blob = iter(net.outputs) self.out_blob1 = next(out_blob) self.out_blob2 = next(out_blob) print("test") self.net = net n,c,h,w = self.net.inputs[self.input_blob].shape net.reshape({self.input_blob:(n,c,self.ROI[1],self.ROI[3])}) #net.reshape({self.input_blob:(n,c,64,32)}) self.ie = ie print(self.device) t1 = time.time() self.exec_net = self.ie.load_network(network=self.net, device_name=self.device) t2 = time.time() print("load network cost", t2-t1)
def segment(im, model_path, pad=0): """ Function which takes a list of network outputs and returns an upsampled classification map suitable for plotting :return: upsampled classification map """ model_xml = model_path model_bin = os.path.splitext(model_xml)[0] + ".bin" # Plugin initialization for Movidius stick plugin = IEPlugin(device="MYRIAD") # Read IR net = IENetwork(model=model_xml, weights=model_bin) input_blob = next(iter(net.inputs)) out_blob = next(iter(net.outputs)) net.batch_size = 1 # Should be 1 # Read and pre-process input images # Image loaded as type float32. Works as expected with NCS # float16 was thought to be required by NCS but skimage.transform.rescale throws error for this type. processed_images = preprocess_image(im, pad=pad) results = [] for image in processed_images: # Reshape input layer for image net.reshape({input_blob: (1, image.shape[0], image.shape[1], image.shape[2])}) # Loading model to the plugin # Model needs to be loaded every time network input is resized. exec_net = plugin.load(network=net) # Loading network multiple times takes a long time # Start sync inference results.append(exec_net.infer(inputs={input_blob: image})) segmented_results = get_average_prob_maps(results, im.shape, pad) return segmented_results
def __call__(self, *input_values: NumericData) -> List[NumericData]: """Run computation on input values and return result.""" input_values = [np.array(input_value) for input_value in input_values] input_shapes = [get_shape(input_value) for input_value in input_values] if self.network_cache.get(str(input_shapes)) is None: capsule = Function.to_capsule(self.function) cnn_network = IENetwork(capsule) if self.function.is_dynamic(): param_names = [ param.friendly_name for param in self.parameters ] cnn_network.reshape(dict(zip(param_names, input_shapes))) self.network_cache[str(input_shapes)] = cnn_network else: cnn_network = self.network_cache[str(input_shapes)] executable_network = self.runtime.backend.load_network( cnn_network, self.runtime.backend_name) # Input validation if len(input_values) != len(self.parameters): raise UserInputError("Expected %s parameters, received %s.", len(self.parameters), len(input_values)) for parameter, input in zip(self.parameters, input_values): parameter_shape = parameter.get_output_partial_shape(0) input_shape = PartialShape(input.shape) if len(input.shape) > 0 and not parameter_shape.compatible( input_shape): raise UserInputError( "Provided tensor's shape: %s does not match the expected: %s.", input_shape, parameter_shape, ) request = executable_network.requests[0] request.infer(dict(zip(request._inputs_list, input_values))) return [blob.buffer for blob in request.output_blobs.values()]
def main(args=None): try: # ------------------------------ 1. Parsing and validating input arguments ------------------------------------- next_step() if not args: args = parse_args() # ------------------------------ 2. Loading Inference Engine --------------------------------------------------- next_step() device_name = args.target_device.upper() ie = IECore() if CPU_DEVICE_NAME in device_name: if args.path_to_extension: ie.add_cpu_extension(extension_path=args.path_to_extension, device_name=CPU_DEVICE_NAME) if GPU_DEVICE_NAME in device_name: if args.path_to_cldnn_config: ie.set_config({'CONFIG_FILE': args.path_to_cldnn_config}, GPU_DEVICE_NAME) logger.info("GPU extensions is loaded {}".format( args.path_to_cldnn_config)) logger.info("InferenceEngine:\n{: <9}{}".format("", get_version())) version_string = "Device is {}\n".format(device_name) for device, version in ie.get_versions(device_name).items(): version_string += "{: <9}{}\n".format("", device) version_string += "{: <9}{:.<24}{} {}.{}\n".format( "", version.description, " version", version.major, version.minor) version_string += "{: <9}{:.<24} {}\n".format( "", "Build", version.build_number) logger.info(version_string) # --------------------- 3. Read the Intermediate Representation of the network --------------------------------- next_step() xml_filename = os.path.abspath(args.path_to_model) head, tail = os.path.splitext(xml_filename) bin_filename = os.path.abspath(head + BIN_EXTENSION) ie_network = IENetwork(xml_filename, bin_filename) input_info = ie_network.inputs if len(input_info) == 0: raise AttributeError('No inputs info is provided') # --------------------- 4. Resizing network to match image sizes and given batch ------------------------------- next_step() batch_size = ie_network.batch_size precision = ie_network.precision if args.batch_size and args.batch_size != ie_network.batch_size: new_shapes = {} for key in input_info.keys(): shape = input_info[key].shape layout = input_info[key].layout batchIndex = -1 if ((layout == 'NCHW') or (layout == 'NCDHW') or (layout == 'NHWC') or (layout == 'NDHWC') or (layout == 'NC')): batchIndex = 0 elif (layout == 'CN'): batchIndex = 1 if ((batchIndex != -1) and (shape[batchIndex] != args.batch_size)): shape[batchIndex] = args.batch_size new_shapes[key] = shape if (len(new_shapes) > 0): logger.info("Resizing network to batch = {}".format( args.batch_size)) ie_network.reshape(new_shapes) batch_size = args.batch_size logger.info("Network batch size: {}, precision {}".format( batch_size, precision)) # --------------------- 5. Configuring input of the model ------------------------------------------------------ next_step() for key in input_info.keys(): if (isImage(input_info[key])): # Set the precision of input data provided by the user # Should be called before load of the network to the plugin input_info[key].precision = 'U8' # --------------------- 6. Setting device configuration -------------------------------------------------------- next_step() devices = parseDevices(device_name) device_nstreams = parseValuePerDevice(devices, args.number_streams) for device in devices: if device == CPU_DEVICE_NAME: ## CPU supports few special performance-oriented keys ## limit threading for CPU portion of inference if args.number_threads: ie.set_config( {'CPU_THREADS_NUM': str(args.number_threads)}, device) if MULTI_DEVICE_NAME in device_name and GPU_DEVICE_NAME in device_name: ie.set_config({'CPU_BIND_THREAD': 'NO'}, CPU_DEVICE_NAME) else: # pin threads for CPU portion of inference ie.set_config( {'CPU_BIND_THREAD': args.infer_threads_pinning}, device) ## for CPU execution, more throughput-oriented execution via streams # for pure CPU execution, more throughput-oriented execution via streams if args.api_type == 'async': ie.set_config( { 'CPU_THROUGHPUT_STREAMS': str(device_nstreams.get(device)) if device in device_nstreams.keys() else 'CPU_THROUGHPUT_AUTO' }, device) device_nstreams[device] = int( ie.get_config(device, 'CPU_THROUGHPUT_STREAMS')) elif device == GPU_DEVICE_NAME: if args.api_type == 'async': ie.set_config( { 'GPU_THROUGHPUT_STREAMS': str(device_nstreams.get(device)) if device in device_nstreams.keys() else 'GPU_THROUGHPUT_AUTO' }, device) device_nstreams[device] = int( ie.get_config(device, 'GPU_THROUGHPUT_STREAMS')) if MULTI_DEVICE_NAME in device_name and CPU_DEVICE_NAME in device_name: ## multi-device execution with the CPU+GPU performs best with GPU trottling hint, ## which releases another CPU thread (that is otherwise used by the GPU driver for active polling) ie.set_config({'CLDNN_PLUGIN_THROTTLE': str(1)}, device) elif device == MYRIAD_DEVICE_NAME: ie.set_config( { 'LOG_LEVEL': 'LOG_INFO', 'VPU_LOG_LEVEL': 'LOG_WARNING' }, MYRIAD_DEVICE_NAME) # --------------------- 7. Loading the model to the device ----------------------------------------------------- next_step() config = {'PERF_COUNT': ('YES' if args.perf_counts else 'NO')} exe_network = ie.load_network(ie_network, device_name, config=config, num_requests=args.number_infer_requests if args.number_infer_requests else 0) # --------------------- 8. Setting optimal runtime parameters -------------------------------------------------- next_step() ## Number of requests infer_requests = exe_network.requests nireq = len(infer_requests) ## Iteration limit niter = args.number_iterations if niter and args.api_type == 'async': niter = (int)((niter + nireq - 1) / nireq) * nireq if (args.number_iterations != niter): logger.warn( "Number of iterations was aligned by request number " "from {} to {} using number of requests {}".format( args.number_iterations, niter, nireq)) ## Time limit duration_seconds = 0 if args.time: ## time limit duration_seconds = args.time elif not args.number_iterations: ## default time limit duration_seconds = get_duration_in_secs(device) # ------------------------------------ 8. Creating infer requests and filling input blobs ---------------------- next_step() request_queue = InferRequestsQueue(infer_requests) path_to_input = os.path.abspath( args.path_to_input) if args.path_to_input else None requests_input_data = getInputs(path_to_input, batch_size, ie_network.inputs, infer_requests) # ------------------------------------ 9. Measuring performance ------------------------------------------------ progress_count = 0 progress_bar_total_count = 10000 output_string = "Start inference {}ronously".format(args.api_type) if (args.api_type == "async"): if output_string != "": output_string += ", " output_string += str(nireq) + " inference requests" device_ss = '' for device, nstreams in device_nstreams.items(): if device_ss != '': device_ss += ', ' device_ss += "{} streams for {}".format(str(nstreams), device) if device_ss != '': output_string += " using " + device_ss output_string += ", limits: " if niter: if not duration_seconds: progress_bar_total_count = niter output_string += str(niter) + " iterations" if duration_seconds: if niter: output_string += ", " output_string += str( getDurationInMilliseconds(duration_seconds)) + " ms duration" next_step(output_string) ## warming up - out of scope infer_request = request_queue.getIdleRequest() if not infer_request: raise Exception("No idle Infer Requests!") if (args.api_type == 'sync'): infer_request.infer(requests_input_data[infer_request.id]) else: infer_request.startAsync(requests_input_data[infer_request.id]) request_queue.waitAll() request_queue.resetTimes() start_time = datetime.now() exec_time = (datetime.now() - start_time).total_seconds() iteration = 0 progress_bar = ProgressBar(progress_bar_total_count, args.stream_output, args.progress) ## Start inference & calculate performance ## to align number if iterations to guarantee that last infer requests are executed in the same conditions **/ while ((niter and iteration < niter) or (duration_seconds and exec_time < duration_seconds) or (args.api_type == "async" and iteration % nireq != 0)): infer_request = request_queue.getIdleRequest() if not infer_request: raise Exception("No idle Infer Requests!") if (args.api_type == 'sync'): infer_request.infer(requests_input_data[infer_request.id]) else: infer_request.startAsync(requests_input_data[infer_request.id]) iteration += 1 exec_time = (datetime.now() - start_time).total_seconds() if niter: progress_bar.add_progress(1) else: ## calculate how many progress intervals are covered by current iteration. ## depends on the current iteration time and time of each progress interval. ## Previously covered progress intervals must be skipped. progress_interval_time = duration_seconds / progress_bar_total_count new_progress = (int)(exec_time / progress_interval_time - progress_count) progress_bar.add_progress(new_progress) progress_count += new_progress ## wait the latest inference executions request_queue.waitAll() total_duration_sec = request_queue.getDurationInSeconds() times = request_queue.times times.sort() latency_ms = median(times) fps = batch_size * 1000 / latency_ms if args.api_type == 'sync' else batch_size * iteration / total_duration_sec progress_bar.finish() # ------------------------------------ 10. Dumping statistics report ------------------------------------------- next_step() if args.exec_graph_path: try: exec_graph_info = exe_network.get_exec_graph_info() exec_graph_info.serialize(args.exec_graph_path) logger.info("Executable graph is stored to {}".format( args.exec_graph_path)) del exec_graph_info except Exception as e: logging.exception(e) if args.perf_counts: for ni in range(int(nireq)): perf_counts = exe_network.requests[ni].get_perf_counts() logger.info( "Pefrormance counts for {}-th infer request".format(ni)) for layer, stats in perf_counts.items(): max_layer_name = 30 print("{:<30}{:<15}{:<30}{:<20}{:<20}{:<20}".format( layer[:max_layer_name - 4] + '...' if (len(layer) >= max_layer_name) else layer, stats['status'], 'layerType: ' + str(stats['layer_type']), 'realTime: ' + str(stats['real_time']), 'cpu: ' + str(stats['cpu_time']), 'execType: ' + str(stats['exec_type']))) print("Count: {} iterations".format(iteration)) print("Duration: {:.2f} ms".format( getDurationInMilliseconds(total_duration_sec))) if not MULTI_DEVICE_NAME in device_name: print("Latency: {:.4f} ms".format(latency_ms)) print("Throughput: {:.2f} FPS".format(fps)) del exe_network del ie next_step.step_id = 0 except Exception as e: logging.exception(e)
# Read IR log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin)) net = IENetwork(model=model_xml, weights=model_bin) log.info("Preparing input blobs") input_blob = next(iter(net.inputs)) out_blob = next(iter(net.outputs)) n = 1 c = 3 h = args.resolution[0] w = args.resolution[1] log.info("resolution = {} {}".format(h, w)) net.reshape({input_blob: (n, c, h, w)}) # Loading model to the plugin log.info("Loading model to the plugin") exec_net = plugin.load(network=net) # Loading network multiple times takes a long time if args.input == 'cam': input_stream = 0 else: input_stream = args.input assert os.path.isfile(args.input), "Specified input file doesn't exist" cap = cv2.VideoCapture(input_stream) cur_request_id = 0 next_request_id = 1
class InferenceEngine: def __init__(self, model_xml, device, stride): self.device = device self.stride = stride model_bin = os.path.splitext(model_xml)[0] + '.bin' log.info("Loading network files:\n\t{}\n\t{}".format( model_xml, model_bin)) self.net = IENetwork(model=model_xml, weights=model_bin) log.info("Loading Inference Engine") self.ie = IECore() log.info("Device info:") versions = self.ie.get_versions(device) log.info("{}{}".format(" " * 8, device)) log.info("{}MKLDNNPlugin version ......... {}.{}".format( " " * 8, versions[device].major, versions[device].minor)) log.info("{}Build ........... {}".format( " " * 8, versions[device].build_number)) self.input_blob = next(iter(self.net.inputs)) log.info( f"Input blob: {self.input_blob} - shape: {self.net.inputs[self.input_blob].shape}" ) for o in self.net.outputs.keys(): log.info(f"Output blob: {o} - shape: {self.net.outputs[o].shape}") if o == "Mconv7_stage2_L2": self.heatmaps_blob = "Mconv7_stage2_L2" self.pafs_blob = "Mconv7_stage2_L1" elif o == "heatmaps": self.heatmaps_blob = "heatmaps" self.pafs_blob = "pafs" log.info( f"Heatmaps blob: {self.heatmaps_blob} - PAFs blob: {self.pafs_blob}" ) log.info("Loading model to the plugin") self.exec_net = self.ie.load_network(network=self.net, num_requests=1, device_name=device) def infer(self, img): img = img[0:img.shape[0] - (img.shape[0] % self.stride), 0:img.shape[1] - (img.shape[1] % self.stride)] n, c, h, w = self.net.inputs[self.input_blob].shape if h != img.shape[0] or w != img.shape[1]: log.info(f"Reshaping of network") self.net.reshape( {self.input_blob: (n, c, img.shape[0], img.shape[1])}) log.info( f"Input blob: {self.input_blob} - new shape: {self.net.inputs[self.input_blob].shape}" ) for o in self.net.outputs.keys(): log.info( f"Output blob: {o} - new shape: {self.net.outputs[o].shape}" ) self.exec_net = self.ie.load_network(network=self.net, num_requests=1, device_name=self.device) img = np.transpose(img, (2, 0, 1))[None, ] inference_result = self.exec_net.infer(inputs={self.input_blob: img}) inference_result = (inference_result[self.heatmaps_blob][0], inference_result[self.pafs_blob][0]) return inference_result
class OpenPose(Model): def __init__(self, ie, model_path, target_size, aspect_ratio, prob_threshold, size_divisor=8, upsample_ratio=1): super().__init__(ie, model_path) self.image_blob_name = self._get_inputs(self.net) self.pooled_heatmaps_blob_name = 'pooled_heatmaps' self.heatmaps_blob_name = 'heatmaps' self.pafs_blob_name = 'pafs' function = ng.function_from_cnn(self.net) paf = function.get_output_op(0) paf_shape = paf.outputs()[0].get_shape() heatmap = function.get_output_op(1) heatmap_shape = heatmap.outputs()[0].get_shape() if len(paf_shape) != 4 and len(heatmap_shape) != 4: raise RuntimeError('OpenPose outputs must be 4-dimensional') if paf_shape[2] != heatmap_shape[2] and paf_shape[3] != heatmap_shape[ 3]: raise RuntimeError( 'Last two dimensions of OpenPose outputs must match') if paf_shape[1] * 2 == heatmap_shape[1]: paf, heatmap = heatmap, paf elif paf_shape[1] != heatmap_shape[1] * 2: raise RuntimeError( 'Size of second dimension of OpenPose of one output must be two times larger then size ' 'of second dimension of another output') paf = paf.inputs()[0].get_source_output().get_node() paf.set_friendly_name(self.pafs_blob_name) heatmap = heatmap.inputs()[0].get_source_output().get_node() heatmap.set_friendly_name(self.heatmaps_blob_name) # Add keypoints NMS to the network. # Heuristic NMS kernel size adjustment depending on the feature maps upsampling ratio. p = int(np.round(6 / 7 * upsample_ratio)) k = 2 * p + 1 pooled_heatmap = ng.max_pool(heatmap, kernel_shape=(k, k), pads_begin=(p, p), pads_end=(p, p), strides=(1, 1), name=self.pooled_heatmaps_blob_name) f = ng.impl.Function([ ng.result(heatmap, name=self.heatmaps_blob_name), ng.result(pooled_heatmap, name=self.pooled_heatmaps_blob_name), ng.result(paf, name=self.pafs_blob_name) ], function.get_parameters(), 'hpe') self.net = IENetwork(ng.impl.Function.to_capsule(f)) self.output_scale = self.net.input_info[ self.image_blob_name].input_data.shape[-2] / self.net.outputs[ self.heatmaps_blob_name].shape[-2] if target_size is None: target_size = self.net.input_info[ self.image_blob_name].input_data.shape[-2] self.h = (target_size + size_divisor - 1) // size_divisor * size_divisor input_width = round(target_size * aspect_ratio) self.w = (input_width + size_divisor - 1) // size_divisor * size_divisor default_input_shape = self.net.input_info[ self.image_blob_name].input_data.shape input_shape = { self.image_blob_name: (default_input_shape[:-2] + [self.h, self.w]) } self.logger.info('Reshape net to {}'.format(input_shape)) self.net.reshape(input_shape) num_joints = self.net.outputs[self.heatmaps_blob_name].shape[ 1] - 1 # The last channel is for background self.decoder = OpenPoseDecoder(num_joints, score_threshold=prob_threshold) self.size_divisor = size_divisor @staticmethod def _get_inputs(net): image_blob_name = None for blob_name, blob in net.input_info.items(): if len(blob.input_data.shape) == 4: image_blob_name = blob_name else: raise RuntimeError( 'Unsupported {}D input layer "{}". Only 2D and 4D input layers are supported' .format(len(blob.shape), blob_name)) if image_blob_name is None: raise RuntimeError('Failed to identify the input for the image.') return image_blob_name @staticmethod def heatmap_nms(heatmaps, pooled_heatmaps): return heatmaps * (heatmaps == pooled_heatmaps) @staticmethod def _resize_image(frame, input_h): h = frame.shape[0] scale = input_h / h return cv2.resize(frame, None, fx=scale, fy=scale) def preprocess(self, inputs): img = self._resize_image(inputs, self.h) h, w = img.shape[:2] if self.w < w: raise RuntimeError( "The image aspect ratio doesn't fit current model shape") if not (self.w - self.size_divisor < w <= self.w): self.logger.warn( "Chosen model aspect ratio doesn't match image aspect ratio") resize_img_scale = np.array((inputs.shape[1] / w, inputs.shape[0] / h), np.float32) img = np.pad(img, ((0, 0), (0, self.w - w), (0, 0)), mode='constant', constant_values=0) img = img.transpose((2, 0, 1)) # Change data layout from HWC to CHW img = img[None] return {self.image_blob_name: img}, resize_img_scale def postprocess(self, outputs, resize_img_scale): heatmaps = outputs[self.heatmaps_blob_name] pafs = outputs[self.pafs_blob_name] pooled_heatmaps = outputs[self.pooled_heatmaps_blob_name] nms_heatmaps = self.heatmap_nms(heatmaps, pooled_heatmaps) poses, scores = self.decoder(heatmaps, nms_heatmaps, pafs) # Rescale poses to the original image. poses[:, :, :2] *= resize_img_scale * self.output_scale return poses, scores
def main(): args = parse_arguments() # --------------------------------- 1. Load Plugin for inference engine --------------------------------- logger.info("Creating Inference Engine") ie = IECore() if 'CPU' in args.target_device: if args.path_to_extension: ie.add_extension(args.path_to_extension, "CPU") if args.number_threads is not None: ie.set_config({'CPU_THREADS_NUM': str(args.number_threads)}, "CPU") elif 'GPU' in args.target_device: if args.path_to_cldnn_config: ie.set_config({'CONFIG_FILE': args.path_to_cldnn_config}, "GPU") logger.info("GPU extensions is loaded {}".format( args.path_to_cldnn_config)) else: raise AttributeError( "Device {} do not support of 3D convolution. " "Please use CPU, GPU or HETERO:*CPU*, HETERO:*GPU*") logger.info("Device is {}".format(args.target_device)) version = ie.get_versions(args.target_device)[args.target_device] version_str = "{}.{}.{}".format(version.major, version.minor, version.build_number) logger.info("Plugin version is {}".format(version_str)) # --------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) --------------------- xml_filename = os.path.abspath(args.path_to_model) bin_filename = os.path.abspath(os.path.splitext(xml_filename)[0] + '.bin') ie_network = IENetwork(xml_filename, bin_filename) input_info = ie_network.inputs if len(input_info) == 0: raise AttributeError('No inputs info is provided') elif len(input_info) != 1: raise AttributeError("only one input layer network is supported") input_name = next(iter(input_info)) out_name = next(iter(ie_network.outputs)) if args.shape: logger.info("Reshape of network from {} to {}".format( input_info[input_name].shape, args.shape)) ie_network.reshape({input_name: args.shape}) input_info = ie_network.inputs # ---------------------------------------- 4. Preparing input data ---------------------------------------- logger.info("Preparing inputs") if len(input_info[input_name].shape) != 5: raise AttributeError( "Incorrect shape {} for 3d convolution network".format(args.shape)) n, c, d, h, w = input_info[input_name].shape ie_network.batch_size = n if not os.path.exists(args.path_to_input_data): raise AttributeError("Path to input data: '{}' does not exist".format( args.path_to_input_data)) is_nifti_data = os.path.isdir(args.path_to_input_data) if is_nifti_data: series_name = find_series_name(args.path_to_input_data) original_data, data_crop, affine, original_size, bbox = \ read_image(args.path_to_input_data, series_name=series_name, sizes=(h, w, d)) else: if not (fnmatch(args.path_to_input_data, '*.tif') or fnmatch(args.path_to_input_data, '*.tiff')): raise AttributeError("Input file extension must have tiff format") data_crop = np.zeros(shape=(n, c, d, h, w), dtype=np.float) im_seq = ImageSequence.Iterator(Image.open(args.path_to_input_data)) for i, page in enumerate(im_seq): im = np.array(page).reshape(h, w, c) for channel in range(c): data_crop[:, channel, i, :, :] = im[:, :, channel] original_data = data_crop original_size = original_data.shape[-3:] test_im = {input_name: data_crop} # ------------------------------------- 4. Loading model to the plugin ------------------------------------- logger.info("Loading model to the plugin") executable_network = ie.load_network(network=ie_network, device_name=args.target_device) del ie_network # ---------------------------------------------- 5. Do inference -------------------------------------------- logger.info("Start inference") start_time = datetime.now() res = executable_network.infer(test_im) infer_time = datetime.now() - start_time logger.info("Finish inference") logger.info("Inference time is {}".format(infer_time)) # ---------------------------- 6. Processing of the received inference results ------------------------------ result = res[out_name] batch, channels, out_d, out_h, out_w = result.shape list_img = list() list_seg_result = list() logger.info("Processing of the received inference results is started") start_time = datetime.now() for batch, data in enumerate(result): seg_result = np.zeros(shape=original_size, dtype=np.uint8) if data.shape[1:] != original_size: x = bbox[1] - bbox[0] y = bbox[3] - bbox[2] z = bbox[5] - bbox[4] seg_result[bbox[0]:bbox[1], bbox[2]:bbox[3], bbox[4]:bbox[5]] = \ np.argmax(resample_np(data, (channels, x, y, z), 1), axis=0) elif channels == 1: reshaped_data = data.reshape(out_d, out_h, out_w) mask = reshaped_data[:, :, :] > 0.5 reshaped_data[mask] = 1 seg_result = reshaped_data.astype(int) else: seg_result = np.argmax(data, axis=0).astype(int) im = np.stack([ original_data[batch, 0, :, :, :], original_data[batch, 0, :, :, :], original_data[batch, 0, :, :, :] ], axis=3) im = 255 * (im - im.min()) / (im.max() - im.min()) color_seg_frame = np.zeros(im.shape, dtype=np.uint8) for idx, c in enumerate(CLASSES_COLOR_MAP): color_seg_frame[seg_result[:, :, :] == idx, :] = np.array( c, dtype=np.uint8) mask = seg_result[:, :, :] > 0 im[mask] = color_seg_frame[mask] for k in range(out_d): if is_nifti_data: list_img.append( Image.fromarray(im[:, :, k, :].astype('uint8'), 'RGB')) else: list_img.append( Image.fromarray(im[k, :, :, :].astype('uint8'), 'RGB')) if args.output_nifti and is_nifti_data: list_seg_result.append(seg_result) result_processing_time = datetime.now() - start_time logger.info("Processing of the received inference results is finished") logger.info("Processing time is {}".format(result_processing_time)) # --------------------------------------------- 7. Save output ----------------------------------------------- tiff_output_name = os.path.join(args.path_to_output, 'output.tiff') Image.new('RGB', (data.shape[3], data.shape[2])).save(tiff_output_name, append_images=list_img, save_all=True) logger.info("Result tiff file was saved to {}".format(tiff_output_name)) if args.output_nifti and is_nifti_data: for seg_res in list_seg_result: nii_filename = os.path.join( args.path_to_output, 'output_{}.nii.gz'.format(list_seg_result.index(seg_res))) nib.save(nib.Nifti1Image(seg_res, affine=affine), nii_filename) logger.info( "Result nifti file was saved to {}".format(nii_filename))
def main(): log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() ie = IECore() if args.cpu_extension and "CPU" in args.device: ie.add_extension(args.cpu_extension, "CPU") # Read IR model_xml, model_bin = load_model(args.model, args.device == "MYRIAD") log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin)) net = IENetwork(model=model_xml, weights=model_bin) if "CPU" in args.device: supported_layers = ie.query_network(net, "CPU") not_supported_layers = [ l for l in net.layers.keys() if l not in supported_layers ] if len(not_supported_layers) != 0: log.error( "Following layers are not supported by the plugin for specified device {}:\n {}" .format(args.device, ', '.join(not_supported_layers))) log.error( "Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) """ Ask OpenVINO for input and output tensor names and sizes """ input_blob = next(iter(net.inputs)) # Name of the input layer out_blob = next(iter(net.outputs)) # Name of the output layer # Load data input_data, label_data, img_indicies = load_data() batch_size = 1 n_channels = input_data.shape[1] height = input_data.shape[2] width = input_data.shape[3] # Reshape the OpenVINO network to accept the different image input shape # NOTE: This only works for some models (e.g. fully convolutional) net.reshape({input_blob: (batch_size, n_channels, height, width)}) batch_size, n_channels, height, width = net.inputs[input_blob].shape batch_size, n_out_channels, height_out, width_out = net.outputs[ out_blob].shape # Loading model to the plugin log.info("Loading model to the plugin") exec_net = ie.load_network(network=net, device_name=args.device) del net if args.stats: # Print the latency and throughput for inference print_stats(exec_net, input_data, n_channels, batch_size, input_blob, out_blob, args) """ OpenVINO inference code input_blob is the name (string) of the input tensor in the graph out_blob is the name (string) of the output tensor in the graph Essentially, this looks exactly like a feed_dict for TensorFlow inference """ # Go through the sample validation dataset to plot predictions predictions = np.zeros( (img_indicies.shape[0], n_out_channels, height_out, width_out)) for idx in range(0, img_indicies.shape[0], batch_size): res = exec_net.infer( inputs={ input_blob: input_data[idx:(idx + batch_size), :n_channels] }) # Save the predictions to array predictions[idx:(idx + batch_size), ] = res[out_blob] if idx != (len(img_indicies) - 1): # Partial batch left in data log.info("Partial batch left over in dataset.") """ Evaluate model with Dice metric """ for idx in range(img_indicies.shape[0]): dice = dice_score(predictions[idx, 0, :, :], label_data[idx, 0, :, :]) log.info("Image #{}: Dice score = {:.4f}".format( img_indicies[idx], dice)) if args.plot: plot_predictions(predictions, input_data, label_data, img_indicies, args) del exec_net
def main(args=None): try: if args is None: args = parse_args() validate_args(args) # --------------------------------- 1. Load Plugin for inference engine --------------------------------- logging.info("Loading plugin") plugin = IEPlugin(args.target_device) config = dict() if CPU_DEVICE_NAME in args.target_device: if args.path_to_extension: plugin.add_cpu_extension(args.path_to_extension) # limit threading for CPU portion of inference if args.number_threads is not None: config.update({'CPU_THREADS_NUM': str(args.number_threads)}) # pin threads for CPU portion of inference config.update({'CPU_BIND_THREAD': args.infer_threads_pinning}) # for pure CPU execution, more throughput-oriented execution via streams if args.api_type == 'async' and CPU_DEVICE_NAME in args.target_device: config.update({'CPU_THROUGHPUT_STREAMS': str(args.number_infer_requests)}) elif GPU_DEVICE_NAME in args.target_device: if args.path_to_cldnn_config: config.update({'CONFIG_FILE': args.path_to_cldnn_config}) logger.info("GPU extensions is loaded {}".format(args.path_to_cldnn_config)) elif MYRIAD_DEVICE_NAME in args.target_device: config.update({'LOG_LEVEL': 'LOG_INFO'}) config.update({'VPU_LOG_LEVEL': 'LOG_INFO'}) plugin.set_config(config) logger.info("Device is {}".format(plugin.device)) logger.info("Plugin version is {}".format(plugin.version)) # --------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) --------------------- logger.info("Loading network files") xml_filename = os.path.abspath(args.path_to_model) head, tail = os.path.splitext(xml_filename) bin_filename = os.path.abspath(head + BIN_EXTENSION) ie_network = IENetwork(xml_filename, bin_filename) input_info = ie_network.inputs if len(input_info) == 0: raise AttributeError('No inputs info is provided') elif len(input_info) != 1: raise AttributeError("only one input layer network is supported") # -------------------------------------- 3. Change network batch_size ------------------------------------- batch_size = ie_network.batch_size key = list(input_info.keys()).pop() precision = input_info[key].precision if args.batch_size and args.batch_size != ie_network.batch_size: # deepcopy input_info shape = input_info[key].shape # We support models having only one input layers if input_info[key].layout != LAYOUT_TYPE: raise Exception('Unsupported model for batch size changing in automatic mode') shape[BATCH_SIZE_ELEM] = args.batch_size ie_network.reshape({key: shape}) input_info = ie_network.inputs batch_size = args.batch_size logger_message = "Network batch size was changed to: " if args.batch_size is not None else "Network batch size: " logger_message += " {}, precision: {}".format(batch_size, precision) logger.info(logger_message) # ------------------------------------- 4. Loading model to the plugin ------------------------------------- logger.info("Loading model to the plugin") exe_network = plugin.load(ie_network, args.number_infer_requests) # ------------------------------------ 5. Performance measurements stuff ----------------------------------- inputs = get_images(os.path.abspath(args.path_to_images), batch_size) if batch_size < len(inputs): logger.warn("Network batch size {} is less then images count {}" ", some input files will be ignored".format(batch_size, len(inputs))) input_images = {key: fill_blob_with_image(inputs, input_info[key].shape)} times = list() duration = 0 if args.number_iterations is None: duration = get_duration_in_secs(args.target_device) if args.api_type == 'sync': # warming up - out of scope exe_network.infer(input_images) if args.number_iterations is not None: logger.info( "Start inference synchronously ({}) sync inference executions".format(args.number_iterations)) for iteration in range(args.number_iterations): sync_infer_request(exe_network, times, input_images) else: logger.info("Start inference synchronously ({} s duration)".format(duration)) start_time = datetime.now() current_time = start_time while (current_time - start_time).total_seconds() < duration: current_time = sync_infer_request(exe_network, times, input_images) times.sort() latency = median(times) fps = batch_size / latency print("[BENCHMARK RESULT] Latency is {:.4f} msec".format(latency * 1e3)) print("[BENCHMARK RESULT] Throughput is {:.4f} FPS".format(fps)) else: infer_requests = exe_network.requests if args.number_iterations is not None: logger.info("Start inference asynchronously ({}" " async inference executions, {} " " inference requests in parallel".format(args.number_iterations, args.number_infer_requests)) else: logger.info("Start inference asynchronously ({} s duration, " "{} inference requests in parallel)".format(duration, args.number_infer_requests)) current_inference = 0 required_inference_requests_were_executed = False previous_inference = 1 - args.number_infer_requests step = 0 steps_count = args.number_infer_requests - 1 if args.number_iterations is not None: steps_count += args.number_iterations # warming up - out of scope infer_requests[0].async_infer(input_images) infer_requests[0].wait() start_time = datetime.now() while not required_inference_requests_were_executed or step < steps_count or \ args.number_iterations is None and (datetime.now() - start_time).total_seconds() < duration: exe_network.start_async(current_inference, input_images) if previous_inference >= 0: status = infer_requests[previous_inference].wait() if status is not 0: raise Exception("Infer request not completed successfully") current_inference += 1 if current_inference >= args.number_infer_requests: current_inference = 0 required_inference_requests_were_executed = True previous_inference += 1 if previous_inference >= args.number_infer_requests: previous_inference = 0 step += 1 # wait the latest inference executions for not_completed_index in range(args.number_infer_requests): if infer_requests[not_completed_index].wait(0) != 0: infer_requests[not_completed_index].wait() total_duration = (datetime.now() - start_time).total_seconds() fps = batch_size * step / total_duration print("[BENCHMARK RESULT] Throughput is {:.4f} FPS".format(fps)) del exe_network del plugin except Exception as e: logging.exception(e)
log.info("Preparing input blobs") input_blob = next(iter(net.inputs)) out_blob = next(iter(net.outputs)) net.batch_size = len(args.image) # Should be 1 # Read and pre-process input images # Image loaded as type float32. Works as expected with NCS # float16 was thought to be required by NCS but skimage.transform.rescale throws error for this type. im = cv2.imread(args.image).astype(np.float32) processed_images = preprocess_image(im, pad=args.padding) results = [] for image in processed_images: # Reshape input layer for image net.reshape({input_blob: (1, image.shape[0], image.shape[1], image.shape[2])}) # Loading model to the plugin # Model needs to be loaded every time network input is resized. log.info("Loading model to the plugin") exec_net = plugin.load(network=net) # Loading network multiple times takes a long time # Start sync inference log.info("Starting inference ") t0 = time() results.append(exec_net.infer(inputs={input_blob:image})) log.info("Average running time of one iteration: {} ms".format((time() - t0) * 1000)) log.info("processing output blob") segmented_results = segment(results, processed_images[1], pad=args.padding)
for i in net.inputs.keys(): print(f"Input blob: {i} - shape: {net.inputs[i].shape}") for o in net.outputs.keys(): print(f"Output blob: {o} - shape: {net.outputs[o].shape}") if args.reshape is not None: m = re.match(r"(\d+)x(\d+)", args.reshape) if not m: print("Incorrect syntax for 'reshape' argument") else: h = int(m.group(1)) w = int(m.group(2)) h = h - h % args.stride w = w - w % args.stride print(f"Reshapping to {h}x{w}") for i in net.inputs.keys(): n, c, _, _ = net.inputs[i].shape net.reshape({i: (n, c, h, w)}) print(f"Input blob: {i} - new shape: {net.inputs[i].shape}") for o in net.outputs.keys(): print(f"Output blob: {o} - new shape: {net.outputs[o].shape}") # Saving reshaped model in IR files model_name = os.path.splitext(os.path.basename(args.model))[0] new_model_path = Path( args.model).parent / Path(f"{model_name}_{h}x{w}") print(f"Saving reshaped model in {new_model_path}") net.serialize(str(new_model_path.with_suffix(".xml")), str(new_model_path.with_suffix(".bin")))
def main(): total_timer = Timer(name='total') total_timer.tic() log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() # --------------------------- 1. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------ model_xml = args.model model_bin = os.path.splitext(model_xml)[0] + ".bin" log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin)) net = IENetwork(model=model_xml, weights=model_bin) # ----------------------------------------------------------------------------------------------------- # ------------- 2. Load Plugin for inference engine and extensions library if specified -------------- ie = IECore() log.info('InferenceEngine:\n{: <9}{:.<24} {}'.format('', 'API version', get_version())) version_string = 'Device info\n' for device, version in ie.get_versions(args.device).items(): version_string += '{: <9}{}\n'.format('', device) version_string += '{: <9}{:.<24}{} {}.{}\n'.format('', version.description, ' version', version.major, version.minor) version_string += '{: <9}{:.<24} {}\n'.format('', 'Build', version.build_number) log.info(version_string) if args.cpu_extension and "CPU" in args.device: ie.add_extension(args.cpu_extension, "CPU") log.info("CPU extension loaded: {}".format(args.cpu_extension)) if "CPU" in args.device: supported_layers = ie.query_network(net, "CPU") not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers] if len(not_supported_layers) != 0: log.error("Following layers are not supported by the plugin for specified device {}:\n {}". format(args.device, ', '.join(not_supported_layers))) log.error("Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) # ----------------------------------------------------------------------------------------------------- # --------------------------- 4. Configure input & output --------------------------------------------- # --------------------------- Prepare input blobs ----------------------------------------------------- log.info("Preparing input blobs") assert (len(net.inputs.keys()) == 1), "Sample supports topologies only with 1 input" input_name = next(iter(net.inputs.keys())) input_info = net.inputs[input_name] # input_info.precision = 'FP32' input_info.precision = 'U8' # batch_size = input_info.shape[0] batch_size, c, h, w = input_info.shape net_reshape_timer = Timer(name='net_reshape') net_reshape_timer.tic() if w != args.patch_size[0] or h != args.patch_size[1]: net.reshape({input_name:(batch_size, c, args.patch_size[1], args.patch_size[0])}) net_reshape_timer.toc() input_info = net.inputs[input_name] log.info('input shape: {}'.format(input_info.shape)) # --------------------------- Prepare output blobs ---------------------------------------------------- log.info('Preparing output blobs') assert (len(net.outputs.keys()) == 2), "Sample supports topologies only with 2 output" loc_out_name = "797" class_out_name = "741" assert (loc_out_name in net.outputs.keys()) and (class_out_name in net.outputs.keys()) loc_out_info = net.outputs[loc_out_name] class_out_info = net.outputs[class_out_name] loc_out_info.precision = "FP32" class_out_info.precision = "FP32" # ----------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------- log.info("Loading model to the device") # cpu_throughput = {'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO'} ie.set_config({'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO'}, args.device) ie.set_config({'CPU_BIND_THREAD': 'YES'}, args.device) exec_net = ie.load_network(network=net, device_name=args.device, num_requests=0) infer_requests = exec_net.requests request_queue = InferRequestsQueue(infer_requests) log.info('nreqs: {}, nstream:{}'.format(len(infer_requests), ie.get_config(args.device, 'CPU_THROUGHPUT_STREAMS'))) # --------------------------- 3. Read and preprocess input -------------------------------------------- # ----------------------------------------------------------------------------------------------------- if not os.path.exists(args.result_dir): os.makedirs(args.result_dir) if args.voc_res_file and os.path.exists(args.voc_res_file): os.remove(args.voc_res_file) load_data_timer = Timer(name='load_data') post_process_timer = Timer(name='post_process') adapter = RetinaNetAdapter(input_shape=args.patch_size) # --------------------------- Performing inference ---------------------------------------------------- result_all_images = defaultdict(list) data_loader = BatchDataLoader(args.image_dir, args.strides, args.patch_size, batch_size) while True: load_data_timer.tic() input_data = data_loader.next() load_data_timer.toc() if input_data == None: break infer_request = request_queue.get_idle_request() if not infer_request: raise Exception('No idle Infer Requests!') if infer_request.cur_meta == None: infer_request.start_async(input_name, input_data) continue # get result batch_metas = infer_request.cur_meta loc_outs = infer_request.request.outputs[loc_out_name] class_outs = infer_request.request.outputs[class_out_name] # import pdb;pdb.set_trace() ## start infer infer_request.start_async(input_name, input_data) ## post-process for loc_out, class_out, meta in zip(loc_outs, class_outs, batch_metas): image_name = meta['image_name'] if image_name == 'None': continue x = meta['x'] y = meta['y'] result = adapter.process(loc_out, class_out) result, _ = nms(result, thresh=0.5, keep_top_k=100) # result = bbox_voting_dets(nms_result, 0.4) result[:, 0] += x result[:, 1] += y result[:, 2] += x result[:, 3] += y result_all_images[image_name].append(result) # wait the latest inference executions request_queue.wait_all() for infer_request in request_queue.requests: # get result batch_metas = infer_request.cur_meta loc_outs = infer_request.request.outputs[loc_out_name] class_outs = infer_request.request.outputs[class_out_name] ## post-process for loc_out, class_out, meta in zip(loc_outs, class_outs, batch_metas): image_name = meta['image_name'] if image_name == 'None': continue x = meta['x'] y = meta['y'] result = adapter.process(loc_out, class_out) result, _ = nms(result, thresh=0.5, keep_top_k=100) # result = bbox_voting_dets(result, 0.4) result[:, 0] += x result[:, 1] += y result[:, 2] += x result[:, 3] += y result_all_images[image_name].append(result) post_process_timer.tic() ## process total image result for image_name, result_per_image in result_all_images.items(): result_per_image = np.concatenate(result_per_image, axis=0) nms_result, _ = nms(result_per_image, thresh=0.5) # nms_result = np.concatenate(result_per_image, axis=0) voc_format = '{} {:.4f} {} {} {} {}' pos_all = [] voc_all = [] for i in range(nms_result.shape[0]): x = int(nms_result[i, 0]) y = int(nms_result[i, 1]) w = max(int(nms_result[i, 2] - nms_result[i, 0]), 1) h = max(int(nms_result[i, 3] - nms_result[i, 1]), 1) p = float(nms_result[i, 4]) pos = {'x': x, 'y': y, 'w': w, 'h': h, 'p': p} pos_all.append(pos) if args.voc_res_file: xmin = x ymin = y xmax = int(nms_result[i, 2]) ymax = int(nms_result[i, 3]) voc_str = voc_format.format(os.path.splitext(image_name)[0], p, xmin, ymin, xmax, ymax) voc_all.append(voc_str) file_name = os.path.splitext(image_name)[0] + '.json' with open(os.path.join(args.result_dir, file_name), 'w') as f: json.dump(pos_all, f) if args.voc_res_file: with open(args.voc_res_file, 'a') as f: for voc_str in voc_all: f.write(voc_str+'\n') # total_x_min = int(np.floor(np.min(nms_result[:, 0]))) # total_y_min = int(np.floor(np.min(nms_result[:, 1]))) # total_x_max = int(np.ceil(np.max(nms_result[:, 2]))) # total_y_max = int(np.ceil(np.max(nms_result[:, 3]))) # log.info('name: {}, width:{}, height:{}, x_min: {}, y_min: {}, x_max: {}, y_max:{}'.format( # image_name, data_loader.name_shape[image_name][0], data_loader.name_shape[image_name][1], # total_x_min, total_y_min, total_x_max, total_y_max)) post_process_timer.toc() total_timer.toc() # ----------------------------------------------------------------------------------------------------- all_timers = [] all_timers.extend([net_reshape_timer, data_loader.read_img_timer, data_loader.copy_img_timer, load_data_timer, post_process_timer, total_timer]) log.info('infer: {:2f}s'.format(request_queue.get_duration_in_seconds())) for timer in all_timers: log.info('{}: avg: {:.2f} ms, total: {:.2f}s'.format(timer.name, timer.avg * 1000, timer.total)) log.info('total_slides: {}'.format(data_loader.total_slides)) log.info("Execution successful\n")
def load_to_IE(model_xml, mode=None): # Getting the *.bin file location print(model_xml) model_bin = model_xml[:-3] + "bin" #Loading IR files net = IENetwork(model=model_xml, weights=model_bin) # Enable dynamic batchsize #plugin = IEPlugin(device='CPU') #plugin.set_config({'DYN_BATCH_ENABLED': 'YES'}) # Listing all the layers and supported layers cpu_extension_needed = False network_layers = net.layers.keys() supported_layer_map = ie.query_network(network=net, device_name="CPU") supported_layers = supported_layer_map.keys() # Checking if CPU extension is needed for layer in network_layers: if layer in supported_layers: pass else: cpu_extension_needed = True print("CPU extension needed") break # Adding CPU extension if cpu_extension_needed: ie.add_extension(extension_path=cpu_ext, device_name="CPU") print("CPU extension added") else: print("CPU extension not needed") #Getting the supported layers of the network supported_layer_map = ie.query_network(network=net, device_name="CPU") supported_layers = supported_layer_map.keys() # Checking for any unsupported layers, if yes, exit unsupported_layer_exists = False network_layers = net.layers.keys() for layer in network_layers: if layer in supported_layers: pass else: print(layer + ' : Still Unsupported') unsupported_layer_exists = True if unsupported_layer_exists: print("Exiting the program.") exit(1) if mode == 'pnet': input_shapes = [ [1, 3, 649, 1153], [1, 3, 460, 817], [1, 3, 326, 580], [1, 3, 231, 411], [1, 3, 164, 292], [1, 3, 117, 207], [1, 3, 83, 147], [1, 3, 59, 104], [1, 3, 42, 74], [1, 3, 30, 53], [1, 3, 21, 37], [1, 3, 15, 27], ] exec_nets = {} input_blob = next(iter(net.inputs)) for shape in input_shapes: net.reshape({input_blob: tuple(shape)}) exec_net = ie.load_network(network=net, device_name="CPU") exec_nets[str(tuple(shape))] = exec_net return exec_nets else: return net
time_count = 0 header = {} log_list = "a" from openvino.inference_engine import IENetwork, IEPlugin # intel_models = "/opt/intel/computer_vision_sdk_2018.5.455/deployment_tools/intel_models/pedestrian-detection-adas-0002/FP32" intel_models = "C:\\Intel\\computer_vision_sdk_2018.5.456\\deployment_tools\\intel_models\\pedestrian-detection-adas-0002\\FP32" xml_path = os.path.join(intel_models, "pedestrian-detection-adas-0002.xml") bin_path = os.path.join(intel_models, "pedestrian-detection-adas-0002.bin") # location = '/opt/intel/computer_vision_sdk/inference_engine/lib/ubuntu_16.04/intel64/libcpu_extension_avx2.so' location = 'C:\\Intel\\computer_vision_sdk\\inference_engine\\bin\\intel64\\Release\\cpu_extension_avx2.dll' net = IENetwork(model=xml_path, weights=bin_path) input_layer = next(iter(net.inputs)) n, c, h, w = net.inputs[input_layer].shape # print("network shape:",n,c,h,w) # net.batch_size = 1 net.reshape({input_layer: (n, c, 320, 544)}) # plugin = IEPlugin(device="GPU") #you can choose CPU to call extensions plugin = IEPlugin(device="CPU") # you can choose CPU to call extensions plugin.add_cpu_extension(location) exec_net = plugin.load(network=net) # create an executable network # print("[INFO] loading model...") # face model # intel_models1 = "/opt/intel/computer_vision_sdk_2018.5.455/deployment_tools/intel_models/facial-landmarks-35-adas-0001/FP32" intel_models1 = "C:\\Intel\\computer_vision_sdk_2018.5.456\\deployment_tools\\intel_models\\facial-landmarks-35-adas-0001\\FP32" xml_path1 = os.path.join(intel_models1, "facial-landmarks-35-adas-0001.xml") bin_path1 = os.path.join(intel_models1, "facial-landmarks-35-adas-0001.bin") net1 = IENetwork(model=xml_path1, weights=bin_path1) input_layer1 = next(iter(net1.inputs)) n1, c1, h1, w1 = net1.inputs[input_layer1].shape net1.reshape({input_layer1: (n1, c1, 60, 60)}) devices = 'CPU'
ret, img = cap.read() inp_h, inp_w = img.shape[0], img.shape[1] out_h, out_w = inp_h * 3, inp_w * 3 # Do not change! This is how model works # Workaround for reshaping bug c1 = net.layers['79/Cast_11815_const'] c1.blobs['custom'][4] = inp_h c1.blobs['custom'][5] = inp_w c2 = net.layers['86/Cast_11811_const'] c2.blobs['custom'][2] = out_h c2.blobs['custom'][3] = out_w # Reshape network to specific size net.reshape({'0': [1, 3, inp_h, inp_w], '1': [1, 3, out_h, out_w]}) #Load network to device ie = IECore() exec_net = ie.load_network(net, 'CPU') # Prepare input inp = img.transpose(2, 0, 1) # interleaved to planar (HWC -> CHW) inp = inp.reshape(1, 3, inp_h, inp_w) inp = inp.astype(np.float32) # Prepare second input - bicubic resize of first input resized_img = cv.resize(img, (out_w, out_h), interpolation=cv.INTER_CUBIC) resized = resized_img.transpose(2, 0, 1) resized = resized.reshape(1, 3, out_h, out_w) resized = resized.astype(np.float32)
# Plugin initialization for Movidius stick plugin = IEPlugin(device="MYRIAD") # Initialise network log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin)) net = IENetwork(model=model_xml, weights=model_bin) input_blob = next(iter(net.inputs)) out_blob = next(iter(net.outputs)) n = 1 c = 3 h = 224 w = 224 net.reshape({input_blob: (n, c, h, w)}) # Reshape so that n=1 # Load network to plugin once done initialisation log.info("Loading model to the plugin") exec_net = plugin.load(network=net, num_requests=2) input_stream = args.input if input_stream == 'cam': input_stream = 0 else: assert os.path.isfile( input_stream), "Specified input file doesn't exist" cap = cv2.VideoCapture(input_stream) # Start video capture from camera cur_request_id = 0
header = {} log_list = "a" from openvino.inference_engine import IENetwork, IEPlugin # intel_models = "/opt/intel/computer_vision_sdk_2018.5.455/deployment_tools/intel_models/pedestrian-detection-adas-0002/FP32" #intel_models = "C:\\Intel\\computer_vision_sdk_2018.5.456\\deployment_tools\\intel_models\\pedestrian-detection-adas-0002\\FP32" intel_models = "C:\\Program Files (x86)\\IntelSWTools\\openvino\\deployment_tools\\intel_models\\pedestrian-detection-adas-0002\\FP32" xml_path = os.path.join(intel_models, "pedestrian-detection-adas-0002.xml") bin_path = os.path.join(intel_models, "pedestrian-detection-adas-0002.bin") # location = '/opt/intel/computer_vision_sdk/inference_engine/lib/ubuntu_16.04/intel64/libcpu_extension_avx2.so' location = 'C:\\Program Files (x86)\\IntelSWTools\\openvino\\inference_engine\\bin\\intel64\\Release\\cpu_extension_avx2.dll' net = IENetwork(model=xml_path, weights=bin_path) input_layer = next(iter(net.inputs)) n, c, h, w = net.inputs[input_layer].shape # print("network shape:",n,c,h,w) # net.batch_size = 1 net.reshape({input_layer: (n, c, 320, 544)}) # plugin = IEPlugin(device="GPU") #you can choose CPU to call extensions plugin = IEPlugin(device="CPU") # you can choose CPU to call extensions plugin.add_cpu_extension(location) exec_net = plugin.load(network=net) # create an executable network camer_count = 0 norm_size = 32 clf = joblib.load('f_model.m') #上传日志的线程 class myThread(threading.Thread): def __init__(self, label_dic, count_list, count, faces, img, Camer_ID): threading.Thread.__init__(self)
def main(): log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() log.info(args) log.info("Loading test data from file: {}".format(args.csv_file)) ie = IECore() if args.cpu_extension and "CPU" in args.device: ie.add_extension(args.cpu_extension, "CPU") # Read IR model_xml, model_bin = load_model(args.openvino_model, args.device == "MYRIAD") log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin)) net = IENetwork(model=model_xml, weights=model_bin) if "CPU" in args.device: supported_layers = ie.query_network(net, "CPU") not_supported_layers = [ l for l in net.layers.keys() if l not in supported_layers ] if len(not_supported_layers) != 0: log.error( "Following layers are not supported by the plugin for specified device {}:\n {}" .format(args.device, ', '.join(not_supported_layers))) log.error( "Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) """ Ask OpenVINO for input and output tensor names and sizes """ input_blob = next(iter(net.inputs)) # Name of the input layer out_blob = next(iter(net.outputs)) # Name of the output layer # Load data batch_size, n_channels, height, width, depth = net.inputs[input_blob].shape batch_size, n_out_channels, height_out, width_out, depth_out = net.outputs[ out_blob].shape crop_dim = [height, width, depth] """ Read the CSV file with the filenames of the images and masks """ imgFiles, mskFiles, num_imgs = read_csv_file(args.csv_file) """ Load the data for OpenVINO """ input_data, label_data_ov, img_indicies = load_data(imgFiles, mskFiles, crop_dim, n_channels, n_out_channels, openVINO_order=True) # Reshape the OpenVINO network to accept the different image input shape # NOTE: This only works for some models (e.g. fully convolutional) batch_size = 1 n_channels = input_data.shape[1] height = input_data.shape[2] width = input_data.shape[3] depth = input_data.shape[4] net.reshape({input_blob: (batch_size, n_channels, height, width, depth)}) batch_size, n_channels, height, width, depth = net.inputs[input_blob].shape batch_size, n_out_channels, height_out, width_out, depth_out = net.outputs[ out_blob].shape log.info("The network inputs are:") for idx, input_layer in enumerate(net.inputs.keys()): log.info("{}: {}, shape = {} [N,C,H,W,D]".format( idx, input_layer, net.inputs[input_layer].shape)) log.info("The network outputs are:") for idx, output_layer in enumerate(net.outputs.keys()): log.info("{}: {}, shape = {} [N,C,H,W,D]".format( idx, output_layer, net.outputs[output_layer].shape)) # Loading model to the plugin log.info("Loading model to the plugin") exec_net = ie.load_network(network=net, device_name=args.device) del net if args.stats: # Print the latency and throughput for inference print_stats(exec_net, input_data, n_channels, batch_size, input_blob, out_blob, args) """ OpenVINO inference code input_blob is the name (string) of the input tensor in the graph out_blob is the name (string) of the output tensor in the graph Essentially, this looks exactly like a feed_dict for TensorFlow inference """ # Go through the sample validation dataset to plot predictions predictions_ov = np.zeros( (num_imgs, n_out_channels, depth_out, height_out, width_out)) log.info("Starting OpenVINO inference") ov_times = [] for idx in tqdm(range(0, num_imgs)): start_time = time() res = exec_net.infer( inputs={input_blob: input_data[[idx], :n_channels]}) ov_times.append(time() - start_time) predictions_ov[idx, ] = res[out_blob] #print("{}, {}".format(imgFiles[idx], dice_score(res[out_blob],label_data_ov[idx]))) log.info("Finished OpenVINO inference") del exec_net """ Load the data for Keras """ input_data, label_data_keras, img_indicies = load_data( imgFiles, mskFiles, crop_dim, n_channels, n_out_channels, openVINO_order=False) # Load OpenVINO model for inference model = K.models.load_model(args.keras_model, compile=False) # Inference only Keras K.backend._LEARNING_PHASE = tf.constant(0) K.backend.set_learning_phase(False) K.backend.set_learning_phase(0) K.backend.set_image_data_format("channels_last") predictions_keras = np.zeros( (num_imgs, height_out, width_out, depth_out, n_out_channels)) log.info("Starting Keras inference") keras_times = [] for idx in tqdm(range(num_imgs)): start_time = time() res = model.predict(input_data[[idx], ..., :n_channels]) keras_times.append(time() - start_time) #print("{}, {}".format(imgFiles[idx], dice_score(res,label_data_keras[idx]))) predictions_keras[idx] = res log.info("Finished Keras inference") save_directory = "predictions_openvino" try: os.stat(save_directory) except: os.mkdir(save_directory) """ Evaluate model with Dice metric """ out_channel = 0 for idx in tqdm(range(num_imgs)): filename = os.path.splitext(os.path.splitext(img_indicies[idx])[0])[0] img = input_data[idx, ..., :n_channels] ground_truth = label_data_keras[idx, :, :, :, out_channel] # Transpose the OpenVINO prediction back to NCHWD (to be consistent with Keras) pred_ov = np.transpose(predictions_ov, [0, 2, 3, 4, 1])[idx, :, :, :, out_channel] pred_keras = predictions_keras[idx, :, :, :, out_channel] dice_ov = dice_score(pred_ov, ground_truth) dice_keras = dice_score(pred_keras, ground_truth) img_nib = nib.Nifti1Image(img, np.eye(4)) img_nib.to_filename( os.path.join(save_directory, "{}_img.nii.gz".format(filename))) msk_nib = nib.Nifti1Image(ground_truth, np.eye(4)) msk_nib.to_filename( os.path.join(save_directory, "{}_msk.nii.gz".format(filename))) pred_ov_nib = nib.Nifti1Image(pred_ov, np.eye(4)) pred_ov_nib.to_filename( os.path.join(save_directory, "{}_pred_ov.nii.gz".format(filename))) log.info( "Image file {}: OpenVINO Dice score = {:f}, " "Keras/TF Dice score = {:f}, Maximum absolute pixel difference OV versus Keras/TF = {:.2e}" .format(img_indicies[idx], dice_ov, dice_keras, np.mean(np.abs(pred_ov - pred_keras)))) log.info("Average inference time: \n" "OpenVINO = {} seconds (s.d. {})\n " "Keras/TF = {} seconds (s.d. {})\n".format( np.mean(ov_times), np.std(ov_times), np.mean(keras_times), np.std(keras_times))) log.info("Raw OpenVINO inference times = {} seconds".format(ov_times)) log.info("Raw Keras inference times = {} seconds".format(keras_times))
class MtCNNFaceDetection(InferenceBase): Config = MTCNNFaceDetectionConfig() OpenVinoExecutablesP = list() OpenVinoExecutableR = ExecutableNetwork() OpenVinoExecutableO = ExecutableNetwork() OpenVinoNetworkP = IENetwork() OpenVinoNetworkR = IENetwork() OpenVinoNetworkO = IENetwork() Scales = [] RINPUT = [] OINPUT = [] LastFaceDetections = [] LastLandmarkDetections = [] InputLayerP = str() InputLayerR = str() InputLayerO = str() OutputLayersP = list() OutputLayersR = list() OutputLayersO = list() InputShapeP = [] InputShapeR = [] InputShapeO = [] def __init__(self, config=MTCNNFaceDetectionConfig()): super(MtCNNFaceDetection, self).__init__(config) self.Config = config def prepare_detector(self): """ Override Base Class Since MTCNN works with three different model :return: None """ if self.Config.ModelPath is None or self.Config.ModelName is None: return None logging.log(logging.INFO, "Setting Up R - O Network Input Storage") self.RINPUT = np.zeros(dtype=float, shape=(self.Config.RInputBatchSize, 3, 24, 24)) self.OINPUT = np.zeros(dtype=float, shape=(self.Config.OInputBatchSize, 3, 48, 48)) self.OpenVinoIE = IECore() if self.Config.CpuExtension and 'CPU' in self.Config.TargetDevice: logging.log(logging.INFO, "CPU Extensions Added") self.OpenVinoIE.add_extension(self.Config.CpuExtensionPath, "CPU") try: # Model File Paths model_file = self.Config.ModelPath + self.Config.PModelFileName + ".xml" model_weights = self.Config.ModelPath + self.Config.PModelFileName + ".bin" logging.log(logging.INFO, "Loading Models File {}".format(model_file)) logging.log(logging.INFO, "Loading Weights File {}".format(model_weights)) self.OpenVinoNetworkP = IENetwork(model=model_file, weights=model_weights) logging.log(logging.INFO, "Loading P Network") model_file = self.Config.ModelPath + self.Config.RModelFileName + ".xml" model_weights = self.Config.ModelPath + self.Config.RModelFileName + ".bin" logging.log(logging.INFO, "Loading Models File {}".format(model_file)) logging.log(logging.INFO, "Loading Weights File {}".format(model_weights)) self.OpenVinoNetworkR = IENetwork(model=model_file, weights=model_weights) self.OpenVinoNetworkR.batch_size = self.Config.RInputBatchSize logging.log(logging.INFO, "Loading R Network") model_file = self.Config.ModelPath + self.Config.OModelFileName + ".xml" model_weights = self.Config.ModelPath + self.Config.OModelFileName + ".bin" logging.log(logging.INFO, "Loading Models File {}".format(model_file)) logging.log(logging.INFO, "Loading Weights File {}".format(model_weights)) self.OpenVinoNetworkO = IENetwork(model=model_file, weights=model_weights) self.OpenVinoNetworkO.batch_size = self.Config.OInputBatchSize logging.log(logging.INFO, "Loading O Network") except FileNotFoundError: logging.log(logging.ERROR, FileNotFoundError.strerror, " ", FileNotFoundError.filename) exit(-1) if "CPU" in self.Config.TargetDevice: supported_layers = self.OpenVinoIE.query_network( self.OpenVinoNetworkP, "CPU") not_supported_layers = [ l for l in self.OpenVinoNetworkP.layers.keys() if l not in supported_layers ] if len(not_supported_layers) != 0: logging.log( logging.INFO, "Following layers are not supported by the plugin for specified device {}:\n {}" .format(self.Config.TargetDevice, ', '.join(not_supported_layers))) logging.log( logging.INFO, "Please try to specify cpu extensions library path in config.json file " ) # Input / Output Memory Allocations to feed input or get output values self.InputLayerP = next(iter(self.OpenVinoNetworkP.inputs)) self.InputLayerR = next(iter(self.OpenVinoNetworkP.inputs)) self.InputLayerO = next(iter(self.OpenVinoNetworkP.inputs)) self.OutputLayersP = list(self.OpenVinoNetworkP.outputs) self.OutputLayersR = list(self.OpenVinoNetworkR.outputs) self.OutputLayersO = list(self.OpenVinoNetworkO.outputs) self.InputShapeP = self.OpenVinoNetworkP.inputs[self.InputLayerP].shape self.InputShapeR = self.OpenVinoNetworkR.inputs[self.InputLayerR].shape self.InputShapeO = self.OpenVinoNetworkO.inputs[self.InputLayerO].shape # Enable Dynamic Batch By Default config = {"DYN_BATCH_ENABLED": "YES"} self.OpenVinoExecutableR = self.OpenVinoIE.load_network( network=self.OpenVinoNetworkR, device_name=self.Config.TargetDevice, config=config, num_requests=self.Config.RequestCount) logging.log(logging.INFO, "Created R Network Executable") self.OpenVinoExecutableO = self.OpenVinoIE.load_network( network=self.OpenVinoNetworkO, device_name=self.Config.TargetDevice, config=config, num_requests=self.Config.RequestCount) logging.log(logging.INFO, "Created O Network Executable") self.Config.MinLength = min(self.Config.InputHeight, self.Config.InputWidth) M = self.Config.MinDetectionSize / self.Config.MinimumFaceSize self.Config.MinLength *= M while self.Config.MinLength > self.Config.MinDetectionSize: scale = (M * self.Config.Factor**self.Config.FactorCount) self.Scales.append(scale) self.Config.MinLength *= self.Config.Factor self.Config.FactorCount += 1 sw, sh = math.ceil(self.Config.InputWidth * scale), math.ceil( self.Config.InputHeight * scale) self.OpenVinoNetworkP.reshape({self.InputLayerP: (1, 3, sh, sw)}) self.OpenVinoExecutablesP.append( self.OpenVinoIE.load_network( network=self.OpenVinoNetworkP, device_name=self.Config.TargetDevice, num_requests=self.Config.RequestCount)) logging.log( logging.INFO, "Created Scaled P Networks {}".format( len(self.OpenVinoExecutablesP))) def run_mtcnn_face_detection(self, images, request_id=0): """ Get Detected Face Coordinates :param images: :param request_id: :return: """ self.InferenceCount += 1 start_time = time.time() bounding_boxes = [] landmarks = [] cv_img = cv.cvtColor(images, cv.COLOR_BGR2RGB) image = Image.fromarray(cv_img) none_count = 0 for i, scale in enumerate(self.Scales): width, height = image.size sw, sh = math.ceil(width * scale), math.ceil(height * scale) img = image.resize((sw, sh), Image.BILINEAR) img = np.asarray(img, 'float32') img = self.preprocess(img) output = self.OpenVinoExecutablesP[i].infer( {self.InputLayerP: img}) probs = output["prob1"][0, 1, :, :] offsets = output["conv4_2"] boxes = self.generate_bboxes(probs, offsets, scale, self.Config.PNetworkThreshold) if len(boxes) == 0: bounding_boxes.append(None) none_count += 1 else: keep = self.nms(boxes[:, 0:5], overlap_threshold=0.5) bounding_boxes.append(boxes[keep]) if len(bounding_boxes) > none_count: bounding_boxes = [i for i in bounding_boxes if i is not None] bounding_boxes = np.vstack(bounding_boxes) keep = self.nms(bounding_boxes[:, 0:5], self.Config.NMSThresholds[0]) bounding_boxes = bounding_boxes[keep] bounding_boxes = self.calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:]) bounding_boxes = self.convert_to_square(bounding_boxes) bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) img_boxes = self.get_image_boxes(bounding_boxes, image, size=24) if img_boxes.shape[0] > 0: shp = img_boxes.shape self.RINPUT[0:shp[0], ] = img_boxes self.OpenVinoExecutableR.requests[request_id].set_batch(shp[0]) self.OpenVinoExecutableR.requests[request_id].infer( {self.InputLayerR: self.RINPUT}) offsets = self.OpenVinoExecutableR.requests[0].outputs[ 'conv5_2'][:shp[0], ] probs = self.OpenVinoExecutableR.requests[0].outputs[ 'prob1'][:shp[0]] keep = np.where(probs[:, 1] > self.Config.RNetworkThreshold)[0] bounding_boxes = bounding_boxes[keep] bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, )) offsets = offsets[keep] keep = self.nms(bounding_boxes, self.Config.NMSThresholds[1]) bounding_boxes = bounding_boxes[keep] bounding_boxes = self.calibrate_box(bounding_boxes, offsets[keep]) bounding_boxes = self.convert_to_square(bounding_boxes) bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) img_boxes = self.get_image_boxes(bounding_boxes, image, size=48) if img_boxes.shape[0] > 0: shp = img_boxes.shape self.OINPUT[0:shp[0], ] = img_boxes self.OpenVinoExecutableO.requests[0].set_batch(shp[0]) self.OpenVinoExecutableO.requests[0].infer( {self.InputLayerO: self.OINPUT}) landmarks = self.OpenVinoExecutableO.requests[0].outputs[ 'conv6_3'][:shp[0]] offsets = self.OpenVinoExecutableO.requests[0].outputs[ 'conv6_2'][:shp[0]] probs = self.OpenVinoExecutableO.requests[0].outputs[ 'prob1'][:shp[0]] keep = np.where( probs[:, 1] > self.Config.ONetworkThreshold)[0] bounding_boxes = bounding_boxes[keep] bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, )) offsets = offsets[keep] landmarks = landmarks[keep] # compute landmark points width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0 height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0 xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1] landmarks[:, 0:5] = np.expand_dims( xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5] landmarks[:, 5:10] = np.expand_dims( ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10] bounding_boxes = self.calibrate_box( bounding_boxes, offsets) keep = self.nms(bounding_boxes, self.Config.NMSThresholds[2], mode='min') bounding_boxes = bounding_boxes[keep] landmarks = landmarks[keep] none_count = 0 face_detections = [] landmark_detections = [] i = 0 for box in bounding_boxes: if type(box) is type(None): none_count += 1 else: scale = box[4] xmin = float((box[0] / scale) / self.Config.InputWidth) ymin = float((box[1] / scale) / self.Config.InputHeight) xmax = float((box[2] / scale) / self.Config.InputWidth) ymax = float((box[3] / scale) / self.Config.InputHeight) face_detections.append([xmin, ymin, xmax, ymax]) lands = [] for l in range(5): lands.append( float((landmarks[i][l] / scale) / self.Config.InputWidth)) lands.append( float((landmarks[i][l + 5] / scale) / self.Config.InputHeight)) landmark_detections.append(lands) i += 1 if none_count == len(bounding_boxes): return [], [] self.LastFaceDetections = face_detections self.LastLandmarkDetections = landmark_detections self.ElapsedInferenceTime += (time.time() - start_time) def infer(self, images, request_id=0): """ Run inference :param images: image to get faces :param request_id: request id :return: """ self.run_mtcnn_face_detection(images, request_id=0) def request_ready(self, request_id): """ This is true by default since there is no ASYNC mode for MTCNN :param request_id: :return: """ return True def get_face_detection_data(self, request_id=0): """ Get Latest Results for Face Coordinates :param request_id: :return: """ lastDetections = self.LastFaceDetections self.LastFaceDetections = [] return lastDetections def get_face_landmarks_data(self, request_id=0): """ Get Latest Results for Landmark Coordinates :param request_id: :return: """ lastDetections = self.LastLandmarkDetections self.LastLandmarkDetections = [] return lastDetections @staticmethod def preprocess(img): """Preprocessing step before feeding the network. Arguments: img: a float numpy array of shape [h, w, c]. Returns: a float numpy array of shape [1, c, h, w]. """ img = img.transpose((2, 0, 1)) img = np.expand_dims(img, 0) img = (img - 127.5) * 0.0078125 return img @staticmethod def generate_bboxes(probs, offsets, scale, threshold): """Generate bounding boxes at places where there is probably a face. Arguments: probs: a float numpy array of shape [n, m]. offsets: a float numpy array of shape [1, 4, n, m]. scale: a float number, width and height of the image were scaled by this number. threshold: a float number. Returns: a float numpy array of shape [n_boxes, 9] """ # applying P-Net is equivalent, in some sense, to # moving 12x12 window with stride 2 stride = 2 cell_size = 12 # indices of boxes where there is probably a face inds = np.where(probs > threshold) if inds[0].size == 0: return np.array([]) # transformations of bounding boxes tx1, ty1, tx2, ty2 = [ offsets[0, i, inds[0], inds[1]] for i in range(4) ] # they are defined as: # w = x2 - x1 + 1 # h = y2 - y1 + 1 # x1_true = x1 + tx1*w # x2_true = x2 + tx2*w # y1_true = y1 + ty1*h # y2_true = y2 + ty2*h offsets = np.array([tx1, ty1, tx2, ty2]) score = probs[inds[0], inds[1]] # P-Net is applied to scaled images # so we need to rescale bounding boxes back bounding_boxes = np.vstack([ np.round((stride * inds[1] + 1.0) / scale), np.round((stride * inds[0] + 1.0) / scale), np.round((stride * inds[1] + 1.0 + cell_size) / scale), np.round((stride * inds[0] + 1.0 + cell_size) / scale), score, offsets ]) # why one is added? return bounding_boxes.T @staticmethod def nms(boxes, overlap_threshold=0.5, mode='union'): """Non-maximum suppression. Arguments: boxes: a float numpy array of shape [n, 5], where each row is (xmin, ymin, xmax, ymax, score). overlap_threshold: a float number. mode: 'union' or 'min'. Returns: list with indices of the selected boxes """ # if there are no boxes, return the empty list if len(boxes) == 0: return [] # list of picked indices pick = [] # grab the coordinates of the bounding boxes x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)] area = (x2 - x1 + 1.0) * (y2 - y1 + 1.0) ids = np.argsort(score) # in increasing order while len(ids) > 0: # grab index of the largest value last = len(ids) - 1 i = ids[last] pick.append(i) # compute intersections # of the box with the largest score # with the rest of boxes # left top corner of intersection boxes ix1 = np.maximum(x1[i], x1[ids[:last]]) iy1 = np.maximum(y1[i], y1[ids[:last]]) # right bottom corner of intersection boxes ix2 = np.minimum(x2[i], x2[ids[:last]]) iy2 = np.minimum(y2[i], y2[ids[:last]]) # width and height of intersection boxes w = np.maximum(0.0, ix2 - ix1 + 1.0) h = np.maximum(0.0, iy2 - iy1 + 1.0) # intersections' areas inter = w * h if mode == 'min': overlap = inter / np.minimum(area[i], area[ids[:last]]) elif mode == 'union': # intersection over union (IoU) overlap = inter / (area[i] + area[ids[:last]] - inter) # delete all boxes where overlap is too big ids = np.delete( ids, np.concatenate([[last], np.where(overlap > overlap_threshold)[0]])) return pick @staticmethod def calibrate_box(bboxes, offsets): """Transform bounding boxes to be more like true bounding boxes. 'offsets' is one of the outputs of the nets. Arguments: bboxes: a float numpy array of shape [n, 5]. offsets: a float numpy array of shape [n, 4]. Returns: a float numpy array of shape [n, 5]. """ x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] w = x2 - x1 + 1.0 h = y2 - y1 + 1.0 w = np.expand_dims(w, 1) h = np.expand_dims(h, 1) # this is what happening here: # tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)] # x1_true = x1 + tx1*w # y1_true = y1 + ty1*h # x2_true = x2 + tx2*w # y2_true = y2 + ty2*h # below is just more compact form of this # are offsets always such that # x1 < x2 and y1 < y2 ? translation = np.hstack([w, h, w, h]) * offsets bboxes[:, 0:4] = bboxes[:, 0:4] + translation return bboxes @staticmethod def convert_to_square(bboxes): """Convert bounding boxes to a square form. Arguments: bboxes: a float numpy array of shape [n, 5]. Returns: a float numpy array of shape [n, 5], squared bounding boxes. """ square_bboxes = np.zeros_like(bboxes) x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] h = y2 - y1 + 1.0 w = x2 - x1 + 1.0 max_side = np.maximum(h, w) square_bboxes[:, 0] = x1 + w * 0.5 - max_side * 0.5 square_bboxes[:, 1] = y1 + h * 0.5 - max_side * 0.5 square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0 square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0 return square_bboxes @staticmethod def correct_bboxes(bboxes, width, height): """Crop boxes that are too big and get coordinates with respect to cutouts. Arguments: bboxes: a float numpy array of shape [n, 5], where each row is (xmin, ymin, xmax, ymax, score). width: a float number. height: a float number. Returns: dy, dx, edy, edx: a int numpy arrays of shape [n], coordinates of the boxes with respect to the cutouts. y, x, ey, ex: a int numpy arrays of shape [n], corrected ymin, xmin, ymax, xmax. h, w: a int numpy arrays of shape [n], just heights and widths of boxes. in the following order: [dy, edy, dx, edx, y, ey, x, ex, w, h]. """ x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] w, h = x2 - x1 + 1.0, y2 - y1 + 1.0 num_boxes = bboxes.shape[0] # 'e' stands for end # (x, y) -> (ex, ey) x, y, ex, ey = x1, y1, x2, y2 # we need to cut out a box from the image. # (x, y, ex, ey) are corrected coordinates of the box # in the image. # (dx, dy, edx, edy) are coordinates of the box in the cutout # from the image. dx, dy = np.zeros((num_boxes, )), np.zeros((num_boxes, )) edx, edy = w.copy() - 1.0, h.copy() - 1.0 # if box's bottom right corner is too far right ind = np.where(ex > width - 1.0)[0] edx[ind] = w[ind] + width - 2.0 - ex[ind] ex[ind] = width - 1.0 # if box's bottom right corner is too low ind = np.where(ey > height - 1.0)[0] edy[ind] = h[ind] + height - 2.0 - ey[ind] ey[ind] = height - 1.0 # if box's top left corner is too far left ind = np.where(x < 0.0)[0] dx[ind] = 0.0 - x[ind] x[ind] = 0.0 # if box's top left corner is too high ind = np.where(y < 0.0)[0] dy[ind] = 0.0 - y[ind] y[ind] = 0.0 return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h] return_list = [i.astype('int32') for i in return_list] return return_list @staticmethod def get_image_boxes(bounding_boxes, img, size=24): """Cut out boxes from the image. Arguments: bounding_boxes: a float numpy array of shape [n, 5]. img: an instance of PIL.Image. size: an integer, size of cutouts. Returns: a float numpy array of shape [n, 3, size, size]. """ num_boxes = len(bounding_boxes) width, height = img.size [dy, edy, dx, edx, y, ey, x, ex, w, h] = MtCNNFaceDetection.correct_bboxes(bounding_boxes, width, height) img_boxes = np.zeros((num_boxes, 3, size, size), 'float32') for i in range(num_boxes): if h[i] <= 0 or w[i] <= 0: continue img_box = np.zeros((h[i], w[i], 3), 'uint8') img_array = np.asarray(img, 'uint8') img_box[dy[i]:(edy[i] + 1), dx[i]:(edx[i] + 1), :] = \ img_array[y[i]:(ey[i] + 1), x[i]:(ex[i] + 1), :] # resize img_box = Image.fromarray(img_box) img_box = img_box.resize((size, size), Image.BILINEAR) img_box = np.asarray(img_box, 'float32') img_boxes[i, :, :, :] = MtCNNFaceDetection.preprocess(img_box) return img_boxes