class Runtime(object): """Represents an nGraph runtime environment.""" def __init__(self, backend_name: str) -> None: self.backend_name = backend_name log.debug("Creating Inference Engine for %s" % backend_name) self.backend = IECore() assert backend_name in self.backend.available_devices, ( 'The requested device "' + backend_name + '" is not supported!') def set_config(self, config: Dict[str, str]) -> None: """Set the inference engine configuration.""" self.backend.set_config(config, device_name=self.backend_name) def __repr__(self) -> str: return "<Runtime: Backend='{}'>".format(self.backend_name) def computation(self, node_or_function: Union[Node, Function], *inputs: Node) -> "Computation": """Return a callable Computation object.""" if isinstance(node_or_function, Node): ng_function = Function(node_or_function, inputs, node_or_function.name) return Computation(self, ng_function) elif isinstance(node_or_function, Function): return Computation(self, node_or_function) else: raise TypeError( "Runtime.computation must be called with an nGraph Function object " "or an nGraph node object an optionally Parameter node objects. " "Called with: %s", node_or_function, )
def main(): args = build_argparser().parse_args() full_name = path.basename(args.input) extension = path.splitext(full_name)[1] if '.txt' in extension: with open(args.input) as f: videos = [line.strip() for line in f.read().split('\n')] else: videos = [args.input] if not args.input: raise ValueError('--input option is expected') if args.labels: with open(args.labels) as f: labels = [l.strip() for l in f.read().strip().split('\n')] else: labels = None ie = IECore() if 'MYRIAD' in args.device: myriad_config = {'VPU_HW_STAGES_OPTIMIZATION': 'YES'} ie.set_config(myriad_config, 'MYRIAD') if args.cpu_extension and 'CPU' in args.device: ie.add_extension(args.cpu_extension, 'CPU') decoder_target_device = 'CPU' if args.device != 'CPU': encoder_target_device = args.device else: encoder_target_device = decoder_target_device encoder_xml = args.m_encoder encoder_bin = args.m_encoder.replace('.xml', '.bin') encoder = IEModel(encoder_xml, encoder_bin, ie, encoder_target_device, num_requests=(3 if args.device == 'MYRIAD' else 1)) if args.m_decoder is not None: decoder_xml = args.m_decoder decoder_bin = args.m_decoder.replace('.xml', '.bin') decoder = IEModel(decoder_xml, decoder_bin, ie, decoder_target_device, num_requests=2) decoder_seq_size = decoder.input_size[1] else: decoder = DummyDecoder(num_requests=2) decoder_seq_size = args.decoder_seq_size presenter = monitors.Presenter(args.utilization_monitors, 70) result_presenter = ResultRenderer(no_show=args.no_show, presenter=presenter, labels=labels, label_smoothing_window=args.label_smoothing) run_pipeline(videos, encoder, decoder, result_presenter.render_frame, decoder_seq_size=decoder_seq_size, fps=args.fps) print(presenter.reportMeans())
class VinoModel: def __init__(self, config, num_proc): self.config = config self.ie = IECore() self.ie.set_config( { 'CPU_THREADS_NUM': str(num_proc), 'CPU_BIND_THREAD': 'NO' }, 'CPU', ) self.model_xml = config['model_xml'] self.model_bin = os.path.splitext(self.model_xml)[0] + ".bin" self.net = self.ie.read_network(model=self.model_xml, weights=self.model_bin) self.input_blob = list(self.net.input_info.keys())[0] self.net.reshape( {self.input_blob: (1, 3, config['input_shape'][1], config['input_shape'][0])} ) self.net.batch_size = 1 self.exec_net = self.ie.load_network( network=self.net, num_requests=1, device_name='CPU' ) self.batch = np.zeros( [1, 3, self.config['input_shape'][1], self.config['input_shape'][0]], dtype=np.float32 ) def start_async_infer(self, batch, request_id): """Starts async inference with certain request_id""" self.exec_net.start_async(request_id=request_id, inputs={self.input_blob: batch}) def get_outputs_wait(self, request_id): """Waits for async inference to be over and returns result""" if self.exec_net.requests[request_id].wait(-1) == 0: return self.exec_net.requests[request_id].output_blobs else: return 0 def preprocess(self, image): to_shape = tuple(self.config['input_shape']) if self.config.get('pad_image', False): image = pad_img(image, to_shape) else: image = cv2.resize(image, to_shape) self.batch[0, :, :, :] = image.transpose(2, 0, 1) def predict(self, image): self.preprocess(image) # self.start_async_infer(self.batch[0, :, :, :], request_id=0) # results = self.get_outputs_wait(request_id=0) results = self.exec_net.infer(inputs={self.input_blob: self.batch[0, :, :, :]}) out_name = self.config.get('out_name') return results[out_name] if out_name else results
def main(): args = build_argparser().parse_args() full_name = path.basename(args.input) extension = path.splitext(full_name)[1] if ".txt" in extension: with open(args.input) as f: videos = [line.strip() for line in f.read().split("\n")] else: videos = [args.input] if not args.input: raise ValueError("--input option is expected") if args.labels: with open(args.labels) as f: labels = [l.strip() for l in f.read().strip().split("\n")] else: labels = None ie = IECore() if "MYRIAD" in args.device: myriad_config = {"VPU_HW_STAGES_OPTIMIZATION": "YES"} ie.set_config(myriad_config, "MYRIAD") if args.cpu_extension and "CPU" in args.device: ie.add_extension(args.cpu_extension, "CPU") decoder_target_device = "CPU" if args.device != "CPU": encoder_target_device = args.device else: encoder_target_device = decoder_target_device encoder_xml = args.m_encoder encoder_bin = args.m_encoder.replace(".xml", ".bin") decoder_xml = args.m_decoder decoder_bin = args.m_decoder.replace(".xml", ".bin") encoder = IEModel( encoder_xml, encoder_bin, ie, encoder_target_device, num_requests=(3 if args.device == "MYRIAD" else 1), ) decoder = IEModel(decoder_xml, decoder_bin, ie, decoder_target_device, num_requests=2) video_demo(encoder, decoder, videos, args.fps, labels)
class FrameProcessor: def __init__(self, args, used_devices): # 推論エンジン self.iecore = IECore() # puluginのロード start_time = time.time() log.info(f"Loading plugins for devices: {used_devices}") if 'CPU' in used_devices and not len(args.cpu_lib) == 0: log.info(f"Using CPU extensions library '{args.cpu_lib}'") assert os.path.isfile(cpu_ext), "Failed to open CPU extensions library" self.iecore.add_extension(args.cpu_lib, "CPU") if 'GPU' in used_devices and not len(args.gpu_lib) == 0: log.info(f"Using GPU extensions library '{args.gpu_lib}'") assert os.path.isfile(gpu_ext), "Failed to open GPU definitions file" self.iecore.set_config({"CONFIG_FILE": gpu_ext}, "GPU") log.info(f"Plugins are loaded. loading time : {time.time()- start_time:.4f}sec") # パフォーマンス計測設定 for d in used_devices: self.iecore.set_config({"PERF_COUNT": "YES" if args.perf_stats else "NO"}, d) # IR(Intermediate Representation ;中間表現)ファイル(.xml & .bin) の読み込み def load_model(self, model_path): start_time = time.time() # ロード時間測定用 model_path = os.path.abspath(model_path) model_description_path = model_path model_weights_path = os.path.splitext(model_path)[0] + ".bin" log.info(f" Loading the model from '{model_description_path}'") assert os.path.isfile(model_description_path), \ f"Model description is not found at '{model_description_path}'" assert os.path.isfile(model_weights_path), \ f"Model weights are not found at '{model_weights_path}'" model = self.iecore.read_network(model=model_description_path, weights=model_weights_path) log.info(f" Model is loaded loading time : {time.time()- start_time:.4f}sec") return model # ラベルファイルの読み込み def load_label(self, label_path): if not label_path: return None labels_map = None if os.path.isfile(label_path) : # ラベルファイルの読み込み with open(label_path, 'r') as f: labels_map = [x.strip() for x in f] log.info(f" Labels file is loaded") else : log.info(f" Labels file is not exist") return labels_map
def partition_inference(partition): model_bytes = model_bytes_broadcast.value weight_bytes = weight_bytes_broadcast.value partition = list(partition) data_num = len(partition) ie = IECore() config = {'CPU_THREADS_NUM': str(self.core_num)} ie.set_config(config, 'CPU') net = ie.read_network(model=model_bytes, weights=weight_bytes, init_from_buffer=True) net.batch_size = batch_size local_model = ie.load_network(network=net, device_name="CPU", num_requests=data_num) inputs = list(iter(local_model.requests[0].input_blobs)) outputs = list(iter(local_model.requests[0].output_blobs)) assert len( outputs) != 0, "The number of model outputs should not be 0." def add_elem(d): d_len = len(d) if d_len < batch_size: rep_time = [1] * (d_len - 1) rep_time.append(batch_size - d_len + 1) return np.repeat(d, rep_time, axis=0), d_len else: return d, d_len results = [] for idx, batch_data in enumerate(partition): infer_request = local_model.requests[idx] input_dict = dict() elem_num = 0 if isinstance(batch_data, list): for i, input in enumerate(inputs): input_dict[input], elem_num = add_elem(batch_data[i]) else: input_dict[inputs[0]], elem_num = add_elem(batch_data) infer_request.infer(input_dict) if len(outputs) == 1: results.append(infer_request.output_blobs[ outputs[0]].buffer[:elem_num]) else: results.append( list( map( lambda output: infer_request.output_blobs[ output].buffer[:elem_num], outputs))) return results
def main(): args = parse_args() camera_device = args.camera if args.labels: with open(args.labels) as f: labels = [l.strip() for l in f.read().strip().split('\n')] else: labels = None result_presenter = ResultRenderer(labels=labels) ie = IECore() if 'MYRIAD' in args.device: myriad_config = {"VPU_HW_STAGES_OPTIMIZATION": "YES"} ie.set_config(myriad_config, "MYRIAD") if args.cpu_extension and 'CPU' in args.device: ie.add_extension(args.cpu_extension, "CPU") decoder_target_device = "CPU" if args.device != 'CPU': encoder_target_device = args.device else: encoder_target_device = decoder_target_device encoder_xml = args.encoder encoder_bin = encoder_xml.replace(".xml", ".bin") decoder_xml = args.decoder decoder_bin = decoder_xml.replace(".xml", ".bin") encoder = IEModel(encoder_xml, encoder_bin, ie, encoder_target_device, num_requests=(3 if args.device == 'MYRIAD' else 1)) decoder = IEModel(decoder_xml, decoder_bin, ie, decoder_target_device, num_requests=2) run_pipeline(camera_device, encoder, decoder, result_presenter.render_frame, fps=30)
def set_config(iecore: IECore, device: str, nthreads: int, nstreams: int): config = {} if device == 'CPU': if nthreads: config.update({'CPU_THREADS_NUM': str(nthreads)}) # cpu_throughput = {'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO'} # if nstreams: # cpu_throughput['CPU_THROUGHPUT_STREAMS'] = str(nstreams) # config.update(cpu_throughput) if device == 'GPU': gpu_throughput = {'GPU_THROUGHPUT_STREAMS': 'GPU_THROUGHPUT_AUTO'} if nstreams: gpu_throughput['GPU_THROUGHPUT_STREAMS'] = str(nstreams) config.update(gpu_throughput) if device == 'MYRIAD': config.update({ 'LOG_LEVEL': 'LOG_INFO', 'VPU_LOG_LEVEL': 'LOG_WARNING' }) iecore.set_config(config, device)
def __init__(self, model_xml, model_bin, input_names, output_names, vocab_file, device='MYRIAD'): self.context = None self.input_names = input_names self.output_names = output_names log.info("Loading vocab file:\t{}".format(vocab_file)) with open(vocab_file, "r", encoding="utf-8") as r: self.vocab = { t.rstrip("\n"): i for i, t in enumerate(r.readlines()) } log.info("{} tokens loaded".format(len(self.vocab))) log.info("Initializing Inference Engine") ie = IECore() ie.set_config({'VPU_HW_STAGES_OPTIMIZATION': 'NO'}, "MYRIAD") version = ie.get_versions(device)[device] version_str = "{}.{}.{}".format(version.major, version.minor, version.build_number) log.info("Plugin version is {}".format(version_str)) # read IR log.info("Loading network files:\n\t{}\n\t{}".format( model_xml, model_bin)) ie_encoder = ie.read_network(model=model_xml, weights=model_bin) # maximum number of tokens that can be processed by network at once self.max_length = ie_encoder.input_info[ self.input_names[0]].input_data.shape[1] # load model to the device log.info("Loading model to the {}".format(device)) self.ie_encoder_exec = ie.load_network(network=ie_encoder, device_name=device)
def main(args=None): args = parse_args(args) model_xml = args.xml model_bin = args.bin img_fn = args.img predict_count = args.count print("initialize OpenVino...") OpenVinoIE = IECore() print("available devices: ", OpenVinoIE.available_devices) OpenVinoIE.set_config({"CPU_BIND_THREAD": "YES"}, "CPU") print("loading model...") net = IENetwork(model=model_xml, weights=model_bin) config = {} OutputLayer = next(iter(net.outputs)) OpenVinoExecutable = OpenVinoIE.load_network(network=net, config=config, device_name="CPU") input_blob = 'data_2' net.batch_size = 1 _, _, h, w = net.inputs[input_blob].shape print(f'model input shape: {net.inputs[input_blob].shape}') # load images image = cv2.imread(img_fn) image = cv2.resize(image, (h, w)) image = preprocess_image(image) image = image.transpose((2, 0, 1)) # Change data layout from HWC to CHW image = np.expand_dims(image, axis=0) print(f'make {predict_count} predictions:') for _ in range(0, predict_count): start_time = time.time() res = OpenVinoExecutable.infer(inputs={input_blob: image}) print("\t{} s".format(time.time() - start_time))
def load_model(feature, model_xml, device, plugin_dirs, input_key_length, output_key_length, cpu_extension): model_bin = os.path.splitext(model_xml)[0] + ".bin" log.info("Initializing plugin for {} device...".format(device)) #plugin = IEPlugin(device, plugin_dirs) ie = IECore() log.info("Loading network files for {}".format(feature)) if cpu_extension and 'CPU' in device: #plugin.add_cpu_extension(cpu_extension) ie.add_extension(cpu_extension, device) else: ie.set_config({"PERF_COUNT": "YES"}, device) net = IENetwork(model=model_xml, weights=model_bin) if "CPU" in device: #supported_layers = plugin.get_supported_layers(net) supported_layers = ie.query_network(net, "CPU") not_supported_layers = [ l for l in net.layers.keys() if l not in supported_layers ] if len(not_supported_layers) != 0: log.error( "Following layers are not supported by the plugin for specified device {}:\n {}" .format(device, ', '.join(not_supported_layers))) log.error( "Please try to specify cpu extensions library path in demo's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) log.info("Checking {} network inputs".format(feature)) assert len(net.inputs.keys( )) == input_key_length, "Demo supports only single input topologies" log.info("Checking {} network outputs".format(feature)) assert len( net.outputs ) == output_key_length, "Demo supports only single output topologies" return ie, net
def part1(): #! [part1] ### IE API ### ie = IECore() # Read a network in IR, PaddlePaddle, or ONNX format: net = ie.read_network(model=path_to_model) # Load a network to AUTO using the default list of device candidates. # The following lines are equivalent: exec_net = ie.load_network(network=net) exec_net = ie.load_network(network=net, device_name="AUTO") exec_net = ie.load_network(network=net, device_name="AUTO", config={}) # Optional # You can also specify the devices to be used by AUTO in its selection process. # The following lines are equivalent: exec_net = ie.load_network(network=net, device_name="AUTO:GPU,CPU") exec_net = ie.load_network(network=net, device_name="AUTO", config={"MULTI_DEVICE_PRIORITIES": "GPU,CPU"}) # Optional # the AUTO plugin is pre-configured (globally) with the explicit option: ie.set_config(config={"MULTI_DEVICE_PRIORITIES":"GPU,CPU"}, device_name="AUTO");
def load_model(args): global OpenVinoExecutable global InputLayer global OutputLayer global w global h global labels_to_names model_xml = args.xml model_bin = args.bin OpenVinoIE = IECore() OpenVinoIE.set_config({"CPU_BIND_THREAD": "YES"}, "CPU") net = IENetwork(model=model_xml, weights=model_bin) config = {} InputLayer = 'input_1' OutputLayer = next(iter(net.outputs)) OpenVinoExecutable = OpenVinoIE.load_network(network=net, config=config, device_name="CPU") net.batch_size = 1 _, _, h, w = net.inputs[InputLayer].shape labels_to_names = {0: 'Pedestrian'} return OpenVinoExecutable, InputLayer, OutputLayer, h, w, labels_to_names
def main(): # noqa log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout) args = parse_args() # ---------------------------Step 1. Initialize inference engine core-------------------------------------------------- log.info('Creating Inference Engine') ie = IECore() if args.extension and args.device == 'CPU': log.info(f'Loading the {args.device} extension: {args.extension}') ie.add_extension(args.extension, args.device) if args.config and args.device in ('GPU', 'MYRIAD', 'HDDL'): log.info(f'Loading the {args.device} configuration: {args.config}') ie.set_config({'CONFIG_FILE': args.config}, args.device) # ---------------------------Step 2. Read a model in OpenVINO Intermediate Representation or ONNX format--------------- log.info(f'Reading the network: {args.model}') # (.xml and .bin files) or (.onnx file) net = ie.read_network(model=args.model) if len(net.input_info) != 1: log.error('The sample supports only single input topologies') return -1 if len(net.outputs) != 1 and not ('boxes' in net.outputs or 'labels' in net.outputs): log.error( 'The sample supports models with 1 output or with 2 with the names "boxes" and "labels"' ) return -1 # ---------------------------Step 3. Configure input & output---------------------------------------------------------- log.info('Configuring input and output blobs') # Get name of input blob input_blob = next(iter(net.input_info)) # Set input and output precision manually net.input_info[input_blob].precision = 'U8' if len(net.outputs) == 1: output_blob = next(iter(net.outputs)) net.outputs[output_blob].precision = 'FP32' else: net.outputs['boxes'].precision = 'FP32' net.outputs['labels'].precision = 'U16' # ---------------------------Step 4. Loading model to the device------------------------------------------------------- log.info('Loading the model to the plugin') exec_net = ie.load_network(network=net, device_name=args.device) # ---------------------------Step 5. Create infer request-------------------------------------------------------------- # load_network() method of the IECore class with a specified number of requests (default 1) returns an ExecutableNetwork # instance which stores infer requests. So you already created Infer requests in the previous step. # ---------------------------Step 6. Prepare input--------------------------------------------------------------------- original_image = cv2.imread(args.input) image = original_image.copy() _, _, net_h, net_w = net.input_info[input_blob].input_data.shape if image.shape[:-1] != (net_h, net_w): log.warning( f'Image {args.input} is resized from {image.shape[:-1]} to {(net_h, net_w)}' ) image = cv2.resize(image, (net_w, net_h)) # Change data layout from HWC to CHW image = image.transpose((2, 0, 1)) # Add N dimension to transform to NCHW image = np.expand_dims(image, axis=0) # ---------------------------Step 7. Do inference---------------------------------------------------------------------- log.info('Starting inference in synchronous mode') res = exec_net.infer(inputs={input_blob: image}) # ---------------------------Step 8. Process output-------------------------------------------------------------------- # Generate a label list if args.labels: with open(args.labels, 'r') as f: labels = [line.split(',')[0].strip() for line in f] output_image = original_image.copy() h, w, _ = output_image.shape if len(net.outputs) == 1: res = res[output_blob] # Change a shape of a numpy.ndarray with results ([1, 1, N, 7]) to get another one ([N, 7]), # where N is the number of detected bounding boxes detections = res.reshape(-1, 7) else: detections = res['boxes'] labels = res['labels'] # Redefine scale coefficients w, h = w / net_w, h / net_h for i, detection in enumerate(detections): if len(net.outputs) == 1: _, class_id, confidence, xmin, ymin, xmax, ymax = detection else: class_id = labels[i] xmin, ymin, xmax, ymax, confidence = detection if confidence > 0.5: label = int(labels[class_id]) if args.labels else int(class_id) xmin = int(xmin * w) ymin = int(ymin * h) xmax = int(xmax * w) ymax = int(ymax * h) log.info(f'Found: label = {label}, confidence = {confidence:.2f}, ' f'coords = ({xmin}, {ymin}), ({xmax}, {ymax})') # Draw a bounding box on a output image cv2.rectangle(output_image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) cv2.imwrite('out.bmp', output_image) log.info('Image out.bmp created!') # ---------------------------------------------------------------------------------------------------------------------- log.info( 'This sample is an API example, ' 'for any performance measurements please use the dedicated benchmark_app tool\n' ) return 0
def main(args=None): try: # ------------------------------ 1. Parsing and validating input arguments ------------------------------------- next_step() if not args: args = parse_args() # ------------------------------ 2. Loading Inference Engine --------------------------------------------------- next_step() device_name = args.target_device.upper() ie = IECore() if CPU_DEVICE_NAME in device_name: if args.path_to_extension: ie.add_cpu_extension(extension_path=args.path_to_extension, device_name=CPU_DEVICE_NAME) if GPU_DEVICE_NAME in device_name: if args.path_to_cldnn_config: ie.set_config({'CONFIG_FILE': args.path_to_cldnn_config}, GPU_DEVICE_NAME) logger.info("GPU extensions is loaded {}".format( args.path_to_cldnn_config)) logger.info("InferenceEngine:\n{: <9}{}".format("", get_version())) version_string = "Device is {}\n".format(device_name) for device, version in ie.get_versions(device_name).items(): version_string += "{: <9}{}\n".format("", device) version_string += "{: <9}{:.<24}{} {}.{}\n".format( "", version.description, " version", version.major, version.minor) version_string += "{: <9}{:.<24} {}\n".format( "", "Build", version.build_number) logger.info(version_string) # --------------------- 3. Read the Intermediate Representation of the network --------------------------------- next_step() xml_filename = os.path.abspath(args.path_to_model) head, tail = os.path.splitext(xml_filename) bin_filename = os.path.abspath(head + BIN_EXTENSION) ie_network = IENetwork(xml_filename, bin_filename) input_info = ie_network.inputs if len(input_info) == 0: raise AttributeError('No inputs info is provided') # --------------------- 4. Resizing network to match image sizes and given batch ------------------------------- next_step() batch_size = ie_network.batch_size precision = ie_network.precision if args.batch_size and args.batch_size != ie_network.batch_size: new_shapes = {} for key in input_info.keys(): shape = input_info[key].shape layout = input_info[key].layout batchIndex = -1 if ((layout == 'NCHW') or (layout == 'NCDHW') or (layout == 'NHWC') or (layout == 'NDHWC') or (layout == 'NC')): batchIndex = 0 elif (layout == 'CN'): batchIndex = 1 if ((batchIndex != -1) and (shape[batchIndex] != args.batch_size)): shape[batchIndex] = args.batch_size new_shapes[key] = shape if (len(new_shapes) > 0): logger.info("Resizing network to batch = {}".format( args.batch_size)) ie_network.reshape(new_shapes) batch_size = args.batch_size logger.info("Network batch size: {}, precision {}".format( batch_size, precision)) # --------------------- 5. Configuring input of the model ------------------------------------------------------ next_step() for key in input_info.keys(): if (isImage(input_info[key])): # Set the precision of input data provided by the user # Should be called before load of the network to the plugin input_info[key].precision = 'U8' # --------------------- 6. Setting device configuration -------------------------------------------------------- next_step() devices = parseDevices(device_name) device_nstreams = parseValuePerDevice(devices, args.number_streams) for device in devices: if device == CPU_DEVICE_NAME: ## CPU supports few special performance-oriented keys ## limit threading for CPU portion of inference if args.number_threads: ie.set_config( {'CPU_THREADS_NUM': str(args.number_threads)}, device) if MULTI_DEVICE_NAME in device_name and GPU_DEVICE_NAME in device_name: ie.set_config({'CPU_BIND_THREAD': 'NO'}, CPU_DEVICE_NAME) else: # pin threads for CPU portion of inference ie.set_config( {'CPU_BIND_THREAD': args.infer_threads_pinning}, device) ## for CPU execution, more throughput-oriented execution via streams # for pure CPU execution, more throughput-oriented execution via streams if args.api_type == 'async': ie.set_config( { 'CPU_THROUGHPUT_STREAMS': str(device_nstreams.get(device)) if device in device_nstreams.keys() else 'CPU_THROUGHPUT_AUTO' }, device) device_nstreams[device] = int( ie.get_config(device, 'CPU_THROUGHPUT_STREAMS')) elif device == GPU_DEVICE_NAME: if args.api_type == 'async': ie.set_config( { 'GPU_THROUGHPUT_STREAMS': str(device_nstreams.get(device)) if device in device_nstreams.keys() else 'GPU_THROUGHPUT_AUTO' }, device) device_nstreams[device] = int( ie.get_config(device, 'GPU_THROUGHPUT_STREAMS')) if MULTI_DEVICE_NAME in device_name and CPU_DEVICE_NAME in device_name: ## multi-device execution with the CPU+GPU performs best with GPU trottling hint, ## which releases another CPU thread (that is otherwise used by the GPU driver for active polling) ie.set_config({'CLDNN_PLUGIN_THROTTLE': str(1)}, device) elif device == MYRIAD_DEVICE_NAME: ie.set_config( { 'LOG_LEVEL': 'LOG_INFO', 'VPU_LOG_LEVEL': 'LOG_WARNING' }, MYRIAD_DEVICE_NAME) # --------------------- 7. Loading the model to the device ----------------------------------------------------- next_step() config = {'PERF_COUNT': ('YES' if args.perf_counts else 'NO')} exe_network = ie.load_network(ie_network, device_name, config=config, num_requests=args.number_infer_requests if args.number_infer_requests else 0) # --------------------- 8. Setting optimal runtime parameters -------------------------------------------------- next_step() ## Number of requests infer_requests = exe_network.requests nireq = len(infer_requests) ## Iteration limit niter = args.number_iterations if niter and args.api_type == 'async': niter = (int)((niter + nireq - 1) / nireq) * nireq if (args.number_iterations != niter): logger.warn( "Number of iterations was aligned by request number " "from {} to {} using number of requests {}".format( args.number_iterations, niter, nireq)) ## Time limit duration_seconds = 0 if args.time: ## time limit duration_seconds = args.time elif not args.number_iterations: ## default time limit duration_seconds = get_duration_in_secs(device) # ------------------------------------ 8. Creating infer requests and filling input blobs ---------------------- next_step() request_queue = InferRequestsQueue(infer_requests) path_to_input = os.path.abspath( args.path_to_input) if args.path_to_input else None requests_input_data = getInputs(path_to_input, batch_size, ie_network.inputs, infer_requests) # ------------------------------------ 9. Measuring performance ------------------------------------------------ progress_count = 0 progress_bar_total_count = 10000 output_string = "Start inference {}ronously".format(args.api_type) if (args.api_type == "async"): if output_string != "": output_string += ", " output_string += str(nireq) + " inference requests" device_ss = '' for device, nstreams in device_nstreams.items(): if device_ss != '': device_ss += ', ' device_ss += "{} streams for {}".format(str(nstreams), device) if device_ss != '': output_string += " using " + device_ss output_string += ", limits: " if niter: if not duration_seconds: progress_bar_total_count = niter output_string += str(niter) + " iterations" if duration_seconds: if niter: output_string += ", " output_string += str( getDurationInMilliseconds(duration_seconds)) + " ms duration" next_step(output_string) ## warming up - out of scope infer_request = request_queue.getIdleRequest() if not infer_request: raise Exception("No idle Infer Requests!") if (args.api_type == 'sync'): infer_request.infer(requests_input_data[infer_request.id]) else: infer_request.startAsync(requests_input_data[infer_request.id]) request_queue.waitAll() request_queue.resetTimes() start_time = datetime.now() exec_time = (datetime.now() - start_time).total_seconds() iteration = 0 progress_bar = ProgressBar(progress_bar_total_count, args.stream_output, args.progress) ## Start inference & calculate performance ## to align number if iterations to guarantee that last infer requests are executed in the same conditions **/ while ((niter and iteration < niter) or (duration_seconds and exec_time < duration_seconds) or (args.api_type == "async" and iteration % nireq != 0)): infer_request = request_queue.getIdleRequest() if not infer_request: raise Exception("No idle Infer Requests!") if (args.api_type == 'sync'): infer_request.infer(requests_input_data[infer_request.id]) else: infer_request.startAsync(requests_input_data[infer_request.id]) iteration += 1 exec_time = (datetime.now() - start_time).total_seconds() if niter: progress_bar.add_progress(1) else: ## calculate how many progress intervals are covered by current iteration. ## depends on the current iteration time and time of each progress interval. ## Previously covered progress intervals must be skipped. progress_interval_time = duration_seconds / progress_bar_total_count new_progress = (int)(exec_time / progress_interval_time - progress_count) progress_bar.add_progress(new_progress) progress_count += new_progress ## wait the latest inference executions request_queue.waitAll() total_duration_sec = request_queue.getDurationInSeconds() times = request_queue.times times.sort() latency_ms = median(times) fps = batch_size * 1000 / latency_ms if args.api_type == 'sync' else batch_size * iteration / total_duration_sec progress_bar.finish() # ------------------------------------ 10. Dumping statistics report ------------------------------------------- next_step() if args.exec_graph_path: try: exec_graph_info = exe_network.get_exec_graph_info() exec_graph_info.serialize(args.exec_graph_path) logger.info("Executable graph is stored to {}".format( args.exec_graph_path)) del exec_graph_info except Exception as e: logging.exception(e) if args.perf_counts: for ni in range(int(nireq)): perf_counts = exe_network.requests[ni].get_perf_counts() logger.info( "Pefrormance counts for {}-th infer request".format(ni)) for layer, stats in perf_counts.items(): max_layer_name = 30 print("{:<30}{:<15}{:<30}{:<20}{:<20}{:<20}".format( layer[:max_layer_name - 4] + '...' if (len(layer) >= max_layer_name) else layer, stats['status'], 'layerType: ' + str(stats['layer_type']), 'realTime: ' + str(stats['real_time']), 'cpu: ' + str(stats['cpu_time']), 'execType: ' + str(stats['exec_type']))) print("Count: {} iterations".format(iteration)) print("Duration: {:.2f} ms".format( getDurationInMilliseconds(total_duration_sec))) if not MULTI_DEVICE_NAME in device_name: print("Latency: {:.4f} ms".format(latency_ms)) print("Throughput: {:.2f} FPS".format(fps)) del exe_network del ie next_step.step_id = 0 except Exception as e: logging.exception(e)
class Benchmark: def __init__(self, device: str, number_infer_requests, number_iterations, duration_seconds, api_type): self.device = device.upper() self.ie = IECore() self.nireq = number_infer_requests self.niter = number_iterations self.duration_seconds = get_duration_seconds(duration_seconds, self.niter, self.device) self.api_type = api_type self.device_number_streams = {} def __del__(self): del self.ie def add_extension(self, path_to_extension: str = None, path_to_cldnn_config: str = None): if GPU_DEVICE_NAME in self.device: if path_to_cldnn_config: self.ie.set_config({'CONFIG_FILE': path_to_cldnn_config}, GPU_DEVICE_NAME) logger.info( 'GPU extensions is loaded {}'.format(path_to_cldnn_config)) if CPU_DEVICE_NAME in self.device or MYRIAD_DEVICE_NAME in self.device: if path_to_extension: self.ie.add_extension(extension_path=path_to_extension, device_name=CPU_DEVICE_NAME) logger.info( 'CPU extensions is loaded {}'.format(path_to_extension)) def get_version_info(self) -> str: logger.info('InferenceEngine:\n{: <9}{:.<24} {}'.format( '', 'API version', get_version())) version_string = 'Device info\n' for device, version in self.ie.get_versions(self.device).items(): version_string += '{: <9}{}\n'.format('', device) version_string += '{: <9}{:.<24}{} {}.{}\n'.format( '', version.description, ' version', version.major, version.minor) version_string += '{: <9}{:.<24} {}\n'.format( '', 'Build', version.build_number) return version_string @staticmethod def reshape(ie_network: IENetwork, batch_size: int): new_shapes = {} for input_layer_name, input_layer in ie_network.inputs.items(): shape = input_layer.shape layout = input_layer.layout try: batch_index = layout.index('N') except ValueError: batch_index = 1 if layout == 'C' else -1 if batch_index != -1 and shape[batch_index] != batch_size: shape[batch_index] = batch_size new_shapes[input_layer_name] = shape if new_shapes: logger.info('Resizing network to batch = {}'.format(batch_size)) ie_network.reshape(new_shapes) def set_config(self, number_streams: int, api_type: str = 'async', number_threads: int = None, infer_threads_pinning: int = None): devices = parse_devices(self.device) self.device_number_streams = parse_value_per_device( devices, number_streams) for device in devices: if device == CPU_DEVICE_NAME: # CPU supports few special performance-oriented keys # limit threading for CPU portion of inference if number_threads: self.ie.set_config( {'CPU_THREADS_NUM': str(number_threads)}, device) if MULTI_DEVICE_NAME in self.device and GPU_DEVICE_NAME in self.device: self.ie.set_config({'CPU_BIND_THREAD': 'NO'}, CPU_DEVICE_NAME) else: # pin threads for CPU portion of inference self.ie.set_config( {'CPU_BIND_THREAD': infer_threads_pinning}, device) # for CPU execution, more throughput-oriented execution via streams # for pure CPU execution, more throughput-oriented execution via streams if api_type == 'async': cpu_throughput = { 'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO' } if device in self.device_number_streams.keys(): cpu_throughput['CPU_THROUGHPUT_STREAMS'] = str( self.device_number_streams.get(device)) self.ie.set_config(cpu_throughput, device) self.device_number_streams[device] = self.ie.get_config( device, 'CPU_THROUGHPUT_STREAMS') elif device == GPU_DEVICE_NAME: if api_type == 'async': gpu_throughput = { 'GPU_THROUGHPUT_STREAMS': 'GPU_THROUGHPUT_AUTO' } if device in self.device_number_streams.keys(): gpu_throughput['GPU_THROUGHPUT_STREAMS'] = str( self.device_number_streams.get(device)) self.ie.set_config(gpu_throughput, device) self.device_number_streams[device] = self.ie.get_config( device, 'GPU_THROUGHPUT_STREAMS') if MULTI_DEVICE_NAME in self.device and CPU_DEVICE_NAME in self.device: # multi-device execution with the CPU+GPU performs best with GPU trottling hint, # which releases another CPU thread (that is otherwise used by the GPU driver for active polling) self.ie.set_config({'CLDNN_PLUGIN_THROTTLE': '1'}, device) elif device == MYRIAD_DEVICE_NAME: self.ie.set_config({'LOG_LEVEL': 'LOG_INFO'}, MYRIAD_DEVICE_NAME) def load_network(self, ie_network: IENetwork, perf_counts: bool, number_infer_requests: int = None): config = {'PERF_COUNT': ('YES' if perf_counts else 'NO')} exe_network = self.ie.load_network(ie_network, self.device, config=config, num_requests=number_infer_requests or 0) return exe_network def infer(self, request_queue, requests_input_data, batch_size, progress_bar): progress_count = 0 # warming up - out of scope infer_request = request_queue.get_idle_request() if not infer_request: raise Exception('No idle Infer Requests!') if self.api_type == 'sync': infer_request.infer(requests_input_data[infer_request.req_id]) else: infer_request.start_async( requests_input_data[infer_request.req_id]) request_queue.wait_all() request_queue.reset_times() start_time = datetime.now() exec_time = (datetime.now() - start_time).total_seconds() iteration = 0 # Start inference & calculate performance # to align number if iterations to guarantee that last infer requests are executed in the same conditions **/ while (self.niter and iteration < self.niter) or \ (self.duration_seconds and exec_time < self.duration_seconds) or \ (self.api_type == 'async' and iteration % self.nireq): infer_request = request_queue.get_idle_request() if not infer_request: raise Exception('No idle Infer Requests!') if self.api_type == 'sync': infer_request.infer(requests_input_data[infer_request.req_id]) else: infer_request.start_async( requests_input_data[infer_request.req_id]) iteration += 1 exec_time = (datetime.now() - start_time).total_seconds() if self.duration_seconds: # calculate how many progress intervals are covered by current iteration. # depends on the current iteration time and time of each progress interval. # Previously covered progress intervals must be skipped. progress_interval_time = self.duration_seconds / progress_bar.total_num new_progress = int(exec_time / progress_interval_time - progress_count) progress_bar.add_progress(new_progress) progress_count += new_progress elif self.niter: progress_bar.add_progress(1) # wait the latest inference executions request_queue.wait_all() total_duration_sec = request_queue.get_duration_in_seconds() times = request_queue.times times.sort() latency_ms = median(times) fps = batch_size * 1000 / latency_ms if self.api_type == 'async': fps = batch_size * iteration / total_duration_sec progress_bar.finish() return fps, latency_ms, total_duration_sec, iteration
class benchmark(): def __init__(self, model, device='CPU', nireq=4, config=None): global disp_res self.config = config self.read_labels() base, ext = os.path.splitext(model) self.ie = IECore() print('reading the model...', end='', flush=True) self.net = self.ie.read_network(base + '.xml', base + '.bin') print('done') if 'batch' in self.config['model_config']: self.batch = self.config['model_config']['batch'] else: self.batch = 1 self.net.batch_size = self.batch self.inputBlobName = next(iter(self.net.input_info)) self.outputBlobName = next(iter(self.net.outputs)) self.inputShape = self.net.input_info[ self.inputBlobName].tensor_desc.dims self.outputShape = self.net.outputs[self.outputBlobName].shape # Setup network configuration parameters print('*** SET CONFIGURATION') network_cfg = self.config['plugin_config'] for device in network_cfg: cfg_items = network_cfg[device] for cfg in cfg_items: self.ie.set_config(cfg, device) print(' ', cfg, device) print('loading the model to the plugin...', end='', flush=True) self.exenet = self.ie.load_network(self.net, device, num_requests=nireq) print('done') self.nireq = nireq disp_res = [ int(i) for i in self.config['display_resolution'].split('x') ] # [1920,1080] self.canvas = BenchmarkCanvas(display_resolution=disp_res, full_screen=self.config['full_screen']) self.skip_count = self.config['display_skip_count'] self.canvas.displayLogo() self.canvas.displayModel(model, device, self.batch, self.skip_count) self.infer_slot = [[False, 0] for i in range(self.nireq) ] # [Inuse flag, ocvimg index] self.draw_requests = [] self.draw_requests_lock = threading.Lock() def read_labels(self): if 'label_file' in self.config['model_config']: label_file = self.config['model_config']['label_file'] with open(label_file, 'rt') as f: self.labels = [line.rstrip('\n').split(',')[0] for line in f] else: self.labels = None def preprocessImages(self, files): print('preprocessing image files...', end='', flush=True) self.blobImages = [] self.ocvImages = [] for f in files: ocvimg = cv2.imread(f) ocvimg = cv2.cvtColor( ocvimg, cv2.COLOR_BGR2RGB ) # Assuming to use OpenCL to display the frame buffer (RGB) # preprocess for inference blobimg = cv2.resize(ocvimg, (self.inputShape[-1], self.inputShape[-2]), interpolation=cv2.INTER_LINEAR) blobimg = blobimg.transpose((2, 0, 1)) blobimg = blobimg.reshape(self.inputShape[1:]) self.blobImages.append(blobimg) # scaling for image to display in the panes ocvimg = cv2.resize( ocvimg, (self.canvas.grid_width - 2, self.canvas.grid_height - 2), interpolation=cv2.INTER_LINEAR) self.ocvImages.append(ocvimg) print('done') def run(self, niter=10, nireq=4, files=None, max_fps=100): global abort_flag, framebuf_lock print('*** CURRENT CONFIGURATION') met_keys = self.exenet.get_metric('SUPPORTED_METRICS') cfg_keys = self.exenet.get_metric('SUPPORTED_CONFIG_KEYS') for key in cfg_keys: print(' ', key, self.exenet.get_config(key)) niter = (niter // self.batch) * self.batch + ( self.batch if niter % self.batch else 0 ) # tweak number of iteration for batch inferencing self.inf_count = 0 framebuf_lock.acquire() self.canvas.dispProgressBar(curItr=0, ttlItr=niter, elapse=0, max_fps=max_fps) framebuf_lock.release() time.sleep(1) # Do inference inf_kicked = 0 inf_done = 0 start = time.perf_counter() while inf_done < niter: # get idle infer request slot self.exenet.wait(num_requests=1, timeout=WaitMode.RESULT_READY) request_id = self.exenet.get_idle_request_id() infreq = self.exenet.requests[request_id] # if slot has been already in use, process the infer result if self.infer_slot[request_id][0] == True: inf_done += self.batch ocvIdx = self.infer_slot[request_id][1] # OCV image index res = infreq.output_blobs[ self.outputBlobName].buffer[0].ravel() self.infer_slot[request_id] = [False, 0] else: ocvIdx = -1 # kick inference dataIdx = inf_kicked % len(self.blobImages) self.infer_slot[request_id] = [True, dataIdx] infreq.async_infer( inputs={self.inputBlobName: self.blobImages[dataIdx]}) inf_kicked += 1 # deferred postprocess & rendering if ocvIdx != -1: if ocvIdx % self.skip_count == 0: ocvimg = self.ocvImages[ocvIdx].copy() idx = (res.argsort()) #[::-1] txt = self.labels[idx[-1]] cv2.putText(ocvimg, txt, (0, ocvimg.shape[-2] // 2), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 0), 3) cv2.putText(ocvimg, txt, (0, ocvimg.shape[-2] // 2), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 255), 2) self.canvas.displayPane(ocvimg) if ocvIdx % (self.skip_count * 5) == 0: framebuf_lock.acquire() self.canvas.dispProgressBar( curItr=inf_done, ttlItr=niter, elapse=time.perf_counter() - start, max_fps=max_fps) framebuf_lock.release() self.canvas.markCurrentPane() if abort_flag == True: break end = time.perf_counter() if abort_flag == False: # Display the rsult print('Time: {:8.2f} sec, Throughput: {:8.2f} inf/sec'.format( end - start, niter / (end - start))) framebuf_lock.acquire() self.canvas.dispProgressBar(curItr=niter, ttlItr=niter, elapse=end - start, max_fps=max_fps) framebuf_lock.release() glutPostRedisplay() time.sleep(5) else: print('Program aborted') abort_flag = True
def main(): log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() log.info("Creating Inference Engine") ie = IECore() ie.set_config({"PERF_COUNT": "YES" if args.perf_counts else "NO"}, args.device) encoder = read_net(args.m_encoder, ie, args.device) dec_step = read_net(args.m_decoder, ie, args.device) batch_dim, channels, height, width = encoder.input_info[ 'imgs'].input_data.shape assert batch_dim == 1, "Demo only works with batch size 1." assert channels in (1, 3), "Input image is not 1 or 3 channeled image." target_shape = (height, width) images_list = [] if os.path.isdir(args.input): inputs = sorted( os.path.join(args.input, inp) for inp in os.listdir(args.input)) else: inputs = [args.input] log.info("Loading vocab file") vocab = Vocab(args.vocab_path) log.info("Loading and preprocessing images") for filenm in tqdm(inputs): image_raw = cv.imread(filenm) assert image_raw is not None, "Error reading image {}".format(filenm) image = preprocess_image(PREPROCESSING[args.preprocessing_type], image_raw, target_shape) record = dict(img_name=filenm, img=image, formula=None) images_list.append(record) log.info("Loading networks") exec_net_encoder = ie.load_network(network=encoder, device_name=args.device) exec_net_decoder = ie.load_network(network=dec_step, device_name=args.device) log.info("Starting inference") for rec in tqdm(images_list): image = rec['img'] enc_res = exec_net_encoder.infer(inputs={args.imgs_layer: image}) # get results row_enc_out = enc_res[args.row_enc_out_layer] dec_states_h = enc_res[args.hidden_layer] dec_states_c = enc_res[args.context_layer] output = enc_res[args.init_0_layer] tgt = np.array([[START_TOKEN]]) logits = [] for _ in range(args.max_formula_len): dec_res = exec_net_decoder.infer( inputs={ args.row_enc_out_layer: row_enc_out, args.dec_st_c_layer: dec_states_c, args.dec_st_h_layer: dec_states_h, args.output_prev_layer: output, args.tgt_layer: tgt }) dec_states_h = dec_res[args.dec_st_h_t_layer] dec_states_c = dec_res[args.dec_st_c_t_layer] output = dec_res[args.output_layer] logit = dec_res[args.logit_layer] logits.append(logit) tgt = np.array([[np.argmax(logit, axis=1)]]) if tgt[0][0][0] == END_TOKEN: break if args.perf_counts: log.info("Encoder performance statistics") print_stats(exec_net_encoder) log.info("Decoder performance statistics") print_stats(exec_net_decoder) logits = np.array(logits) logits = logits.squeeze(axis=1) targets = np.argmax(logits, axis=1) if args.output_file: with open(args.output_file, 'a') as output_file: output_file.write(rec['img_name'] + '\t' + vocab.construct_phrase(targets) + '\n') else: print("Image name: {}\nFormula: {}\n".format( rec['img_name'], vocab.construct_phrase(targets))) log.info( "This demo is an API example, for any performance measurements please use the dedicated benchmark_app tool " "from the openVINO toolkit\n")
def on_select(item): ax1 = fig.add_subplot(gs[2, :]) ax2 = fig.add_subplot(gs[1, 3]) image = plt.imread("openvino-logo.png") ax2.axis('off') ax2.imshow(image) if 'clear' in (item.labelstr): ax1.cla() else: log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() #read input data if 'Async' in (item.labelstr): ecg_data = load.load_ecg("A00001.mat") else: ecg_data = load.load_ecg(item.labelstr) preproc = util.load(".") input_ecg = preproc.process_x([ecg_data]) ecg_n, ecg_h, ecg_w = input_ecg.shape log.info("Input ecg file shape: {}".format(input_ecg.shape)) input_ecg_plot = np.squeeze(input_ecg) # raw signal plot Fs = 1000 N = len(input_ecg_plot) T = (N - 1) / Fs ts = np.linspace(0, T, N, endpoint=False) ax1.plot(ts, input_ecg_plot, label=item.labelstr, lw=2) ax1.set_ylabel('Amplitude') ax1.set_title( "ECG Raw signal: length - {}, Freq - 1000 Hz".format(ecg_h)) ax1.legend(loc='upper right') #choose proper IRs if (input_ecg.shape[1] == 8960): model_xml = "tf_model_8960_fp16.xml" model_bin = os.path.splitext(model_xml)[0] + ".bin" elif (input_ecg.shape[1] == 17920): model_xml = "tf_model_17920_fp16.xml" model_bin = os.path.splitext(model_xml)[0] + ".bin" # Plugin initialization for specified device and load extensions library if specified log.info("OpenVINO Initializing plugin for {} device...".format( args.device)) ie = IECore() # Read IR log.info("OpenVINO Reading IR...") net = IENetwork(model=model_xml, weights=model_bin) assert len(net.inputs.keys() ) == 1, "Demo supports only single input topologies" if args.cpu_extension and 'CPU' in args.device: ie.add_extension(args.cpu_extension, "CPU") config = {'PERF_COUNT': ('YES' if args.perf_counts else 'NO')} device_nstreams = parseValuePerDevice(args.device, None) if ('Async' in (item.labelstr)) and ('CPU' in (args.device)): ie.set_config( { 'CPU_THROUGHPUT_STREAMS': str(device_nstreams.get(args.device)) if args.device in device_nstreams.keys() else 'CPU_THROUGHPUT_AUTO' }, args.device) device_nstreams[args.device] = int( ie.get_config(args.device, 'CPU_THROUGHPUT_STREAMS')) #prepare input blob input_blob = next(iter(net.inputs)) #load IR to plugin log.info("Loading network with plugin...") n, h, w = net.inputs[input_blob].shape log.info("Network input shape: {}".format( net.inputs[input_blob].shape)) if 'Async' in (item.labelstr): exec_net = ie.load_network(net, args.device, config=config, num_requests=12) infer_requests = exec_net.requests request_queue = InferRequestsQueue(infer_requests) else: exec_net = ie.load_network(net, args.device) output_blob = next(iter(net.outputs)) del net #Do infer inf_start = time.time() if 'Async' in (item.labelstr): for i in range(12): infer_request = request_queue.getIdleRequest() if not infer_request: raise Exception("No idle Infer Requests!") infer_request.startAsync({input_blob: input_ecg}) request_queue.waitAll() else: res = exec_net.infer({input_blob: input_ecg}) inf_end = time.time() if 'Async' in (item.labelstr): det_time = (inf_end - inf_start) / 12 res = exec_net.requests[0].outputs[output_blob] else: det_time = inf_end - inf_start res = res[output_blob] del exec_net print("[Performance] each inference time:{} ms".format(det_time * 1000)) prediction = sst.mode(np.argmax(res, axis=2).squeeze())[0][0] result = preproc.int_to_class[prediction] ax1.set_xlabel( 'File: {}, Intel OpenVINO Infer_perf for each input: {}ms, classification_result: {}' .format(item.labelstr, det_time * 1000, result), fontsize=15, color="c", fontweight='bold') ax1.grid()
class ProcessFrame: def __init__(self, args: Dict): # todo proper handling of self.modes self.ie = IECore() self.modes = self.__determine_processing_mode(args) net_face_detect = net_landmarks_detect = net_recognize_face = None if not self.modes['detect']: raise ValueError('detection model undefined') # load networks from file net_face_detect = self.__prepare_network(args['detection_model']) # put it to corresponding class # self.face_locator = FaceLocator(net_face_detect, args['detection_model_threshold']) self.face_locator = FaceLocator(net_face_detect, args['detection_model_threshold'], NetworkType(args['detection_model'])) # setup device plugins if next(iter(args['device'])) == 'CPU': # CPU self.ie.set_config(config={ "CPU_THROUGHPUT_STREAMS": "1", "CPU_THREADS_NUM": "8", }, device_name='CPU') elif next(iter(args['device'])) == 'GPU': # GPU pass self.ie.set_config(config={"GPU_THROUGHPUT_STREAMS": "1"}, device_name='GPU') elif next(iter(args['device'])) == 'MYRIAD': pass # load to device for inferencing self.face_locator.deploy_network(next(iter(args['device'])), self.ie) if self.modes['landmark']: net_landmarks_detect = self.__prepare_network( args['landmarks_model']) self.landmarks_locator = LandmarksLocator(net_landmarks_detect) self.landmarks_locator.deploy_network(next(iter(args['device'])), self.ie) if self.modes['recognize']: net_recognize_face = self.__prepare_network( args['recognition_model']) self.face_recognizer = FaceRecognizer(net_recognize_face) self.face_recognizer.deploy_network(next(iter(args['device'])), self.ie) # todo other models or load separately @staticmethod def __determine_processing_mode(args: Dict) -> Dict[str, bool]: ret = {} if args['detection_model']: ret['detect'] = True else: ret['detect'] = False if args['landmarks_model']: ret['landmark'] = True else: ret['landmark'] = False if args['recognition_model']: ret['recognize'] = True else: ret['recognize'] = False return ret def __prepare_network(self, model_path: str) -> IENetwork: model_path = os.path.abspath(model_path) model = self.ie.read_network(model=model_path, weights=os.path.splitext(model_path)[0] + ".bin") return model def process_frame( self, frame: np.ndarray ) -> List[ Union[List[FaceLocator.FacePosition], List[LandmarksLocator.FaceLandmarks], List[FaceRecognizer.FaceIdentity]]]: # todo uniton with None faces_landmarks = faces_identities = None face_positions = self.face_locator.get_face_positions(frame) if self.modes['landmark']: faces_landmarks = self.landmarks_locator.get_landmarks( frame, face_positions) if self.modes['recognize']: faces_identities = self.face_recognizer.get_identities( frame, face_positions, faces_landmarks) return [face_positions, faces_landmarks, faces_identities]
def main(): log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout) args = parse_args() # ---------------------------Step 1. Initialize inference engine core-------------------------------------------------- log.info('Creating Inference Engine') ie = IECore() if args.extension and args.device == 'CPU': log.info(f'Loading the {args.device} extension: {args.extension}') ie.add_extension(args.extension, args.device) if args.config and args.device in ('GPU', 'MYRIAD', 'HDDL'): log.info(f'Loading the {args.device} configuration: {args.config}') ie.set_config({'CONFIG_FILE': args.config}, args.device) # ---------------------------Step 2. Read a model in OpenVINO Intermediate Representation or ONNX format--------------- log.info(f'Reading the network: {args.model}') # (.xml and .bin files) or (.onnx file) net = ie.read_network(model=args.model) if len(net.input_info) != 1: log.error('Sample supports only single input topologies') return -1 if len(net.outputs) != 1: log.error('Sample supports only single output topologies') return -1 # ---------------------------Step 3. Configure input & output---------------------------------------------------------- log.info('Configuring input and output blobs') # Get names of input and output blobs input_blob = next(iter(net.input_info)) out_blob = next(iter(net.outputs)) # Set input and output precision manually net.input_info[input_blob].precision = 'U8' net.outputs[out_blob].precision = 'FP32' # Set a batch size to a equal number of input images net.batch_size = len(args.input) # ---------------------------Step 4. Loading model to the device------------------------------------------------------- log.info('Loading the model to the plugin') exec_net = ie.load_network(network=net, device_name=args.device) # ---------------------------Step 5. Create infer request-------------------------------------------------------------- # load_network() method of the IECore class with a specified number of requests (default 1) returns an ExecutableNetwork # instance which stores infer requests. So you already created Infer requests in the previous step. # ---------------------------Step 6. Prepare input--------------------------------------------------------------------- original_images = [] n, c, h, w = net.input_info[input_blob].input_data.shape input_data = np.ndarray(shape=(n, c, h, w)) for i in range(n): image = cv2.imread(args.input[i]) original_images.append(image) if image.shape[:-1] != (h, w): log.warning(f'Image {args.input[i]} is resized from {image.shape[:-1]} to {(h, w)}') image = cv2.resize(image, (w, h)) # Change data layout from HWC to CHW image = image.transpose((2, 0, 1)) input_data[i] = image # ---------------------------Step 7. Do inference---------------------------------------------------------------------- log.info('Starting inference in synchronous mode') res = exec_net.infer(inputs={input_blob: input_data}) # ---------------------------Step 8. Process output-------------------------------------------------------------------- res = res[out_blob] for i in range(n): output_image = res[i] # Change data layout from CHW to HWC output_image = output_image.transpose((1, 2, 0)) # Convert BGR color order to RGB output_image = cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB) # Apply mean argument values output_image = output_image[::] - (args.mean_val_r, args.mean_val_g, args.mean_val_b) # Set pixel values bitween 0 and 255 output_image = np.clip(output_image, 0, 255) # Resize a output image to original size if args.original_size: h, w, _ = original_images[i].shape output_image = cv2.resize(output_image, (w, h)) cv2.imwrite(f'out_{i}.bmp', output_image) if os.path.exists(f'out_{i}.bmp'): log.info(f'Image out_{i}.bmp created!') else: log.error(f'Image out_{i}.bmp was not created. Check your permissions.') # ---------------------------------------------------------------------------------------------------------------------- log.info('This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool\n') return 0
def add_extension(iecore: IECore, path_to_extension: str, device: str): if path_to_extension: if device == 'GPU': iecore.set_config({'CONFIG_FILE': path_to_extension}, device) if device == 'CPU' or device == 'MYRIAD': iecore.add_extension(path_to_extension, device)
def benchmark_embedded_python_api(path_to_model_file): """ Perform benchmark with dummy inputs, return inference requests's latency' mesurement result. :param path_to_model_file: if model is not provided, xml model file name. :return: latency metrics. """ def get_dummy_inputs(batch_size, input_info, requests): """ Generate dummpy inputs based on input and batch information. :param batch_size: batch size :param input_info: network's input infor :param requests: the network's requests :return: requests_input_data """ requests_input_data = [] input_data = {} np_d_type = { 'FP64': np.float64, 'I32': np.int32, 'FP32': np.float32, 'FP16': np.float16, 'U16': np.uint16, 'I16': np.int16, 'U8': np.uint8, 'I8': np.int8 } for key, value in input_info.items(): m = [] dt = np_d_type[value.precision] for x in value.input_data.shape: m.append(x) m[0] = m[0] * batch_size input_data[key] = np.empty(tuple(m), dtype=dt) for _ in range(len(requests)): requests_input_data.append(input_data) return requests_input_data xml_filename = path_to_model_file bin_filename = path_to_model_file[:(len(path_to_model_file) - 4)] + '.bin' if not os.path.exists(bin_filename): logger.error('{} does not exist.'.format(bin_filename)) return None ie = IECore() ie_network = ie.read_network(xml_filename, bin_filename) device = 'CPU' config = {'PERF_COUNT': 'NO'} ie.set_config({'CPU_BIND_THREAD': str(benchmark_cfg['cpu_bind_thread'])}, device) if benchmark_cfg['nthreads'] is not None and benchmark_cfg['nthreads']: ie.set_config({'CPU_THREADS_NUM': str(benchmark_cfg['nthreads'])}, device) if benchmark_cfg['nstreams'] is not None: ie.set_config( {'CPU_THROUGHPUT_STREAMS': str(benchmark_cfg['nstreams'])}, device) exe_network = ie.load_network(ie_network, device, config=config, num_requests=benchmark_cfg['nireq']) infer_requests = exe_network.requests batch_size = ie_network.batch_size request_queue = InferRequestsQueue(infer_requests) requests_input_data = get_dummy_inputs(batch_size, ie_network.input_info, infer_requests) infer_request = request_queue.get_idle_request() # For warming up if benchmark_cfg['api_type'] == 'sync': infer_request.infer(requests_input_data[infer_request.id]) else: infer_request.start_async(requests_input_data[infer_request.id]) request_queue.wait_all() request_queue.reset_times() start_time = datetime.now() exec_time = (datetime.now() - start_time).total_seconds() iteration = 0 logger.info( 'Starting benchmark, will be done in {} seconds with {} api via python interface.' .format(benchmark_cfg['duration_seconds'], benchmark_cfg['api_type'])) while exec_time < benchmark_cfg['duration_seconds']: infer_request = request_queue.get_idle_request() if not infer_request: raise Exception('No idle Infer Requests!') if benchmark_cfg['api_type'] == 'sync': infer_request.infer(requests_input_data[infer_request.id]) else: infer_request.start_async(requests_input_data[infer_request.id]) iteration += 1 exec_time = (datetime.now() - start_time).total_seconds() request_queue.wait_all() t = np.array(request_queue.times) q75, q25 = np.percentile(t, [75, 25]) IQR = q75 - q25 filtered_times = t[t < (q75 + 1.5 * IQR)] logger.debug( 'benchmark result: latency_filtered_mean:{0:.3f}ms, latency_minum: {1:.3f}ms, \ using {2} requests of total {3} ones for latency calcluation.'.format( filtered_times.mean(), filtered_times.min(), filtered_times.size, t.size)) del exe_network del ie del ie_network return filtered_times.mean()
def set_plugin_config(core: IECore, device: str, config: str = None): core.set_config(get_config_dictionary(config_file=config), device_name=device)
def main(thres): #ankle_height = 0 #counting = 0 #old_raiseup = False #okay = False #raiseup = False #old_minwrist = 720 #Global varaible LEG_LABEL = np.load('./groundtruth.npy') threshold = thres print(threshold) #Initialize analzing parameter previous_pose_kpts = [] result = [-1, -1, -1, -1, -1] count = 0 start_frame, end_frame = 1000000, -1 max_angle = 0 min_angle = 90 completed_half = False total_len_frame = 0 model_xml = args.model model_bin = os.path.splitext(model_xml)[0] + '.bin' with tf.Session() as sess: #model_cfg, model_outputs = posenet.load_model(101, sess) #output_stride = model_cfg['output_stride'] output_stride = 16 checkraiseup, rightarm = 0, 720 rawCapture = PiRGBArray(camera, size = (640,480)) start = time.time() frame_count = 0 framenum =0 score_list = [] ie = IECore() if args.cpu_extension and 'CPU' in args.device: ie.add_extension(args.cpu_extension, "CPU") # Read IR ie.set_config({'VPU_HW_STAGES_OPTIMIZATION':'NO'}, "MYRIAD") net = IENetwork(model=model_xml, weights=model_bin) n, c, w, h = net.inputs['image'].shape #print("width, height: ", w, h) #337, 513 net.batch_size = n exec_net = ie.load_network(network=net, device_name=args.device,num_requests=2) del net #cap = cv2.VideoCapture(r"./ligt_oneleg_correct.mp4") #while True: for frame in camera.capture_continuous(rawCapture, format = "bgr", use_video_port = True, splitter_port=1): frame_start = time.time() pos_temp_data = [] framenum +=1 input_image = frame.array input_image, display_img, output_scale = _process_input( input_image,scale_factor=args.scale_factor, output_stride=output_stride) print("display: ", display_img.shape) print("preprocess : ", input_image.shape) input_image = np.expand_dims(input_image, 0) input_image = np.transpose(input_image, (0, 3, 1, 2)) #print(input_image.shape) #file_path = ("test%02d.png" %framenum) #cv2.imwrite(file_path,display_img) res = exec_net.infer({'image': input_image}) heatmaps_result = res['heatmap'] offsets_result = res['offset_2/Add'] displacement_fwd_result = res["displacement_fwd_2/Add"] displacement_bwd_result = res["displacement_bwd_2/Add"] #print("Heatmap: ", heatmaps_result.shape) heatmaps_result = np.transpose(heatmaps_result, (0, 2, 3, 1)) offsets_result = np.transpose(offsets_result, (0, 2, 3, 1)) displacement_fwd_result = np.transpose(displacement_fwd_result, (0, 2, 3, 1)) displacement_bwd_result = np.transpose(displacement_bwd_result, (0, 2, 3, 1)) pose_scores, keypoint_scores, keypoint_coords = posenet.decode_multi.decode_multiple_poses( heatmaps_result.squeeze(axis=0), offsets_result.squeeze(axis=0), displacement_fwd_result.squeeze(axis=0), displacement_bwd_result.squeeze(axis=0), output_stride=output_stride, max_pose_detections=10, min_pose_score=0.1) keypoint_coords *= output_scale # convert posenet keypoints 2 openpose format openpose_keypoints = posenet2openpose(keypoint_coords) #Select joints select_keypoints = np.concatenate((openpose_keypoints[2],openpose_keypoints[5],openpose_keypoints[8], openpose_keypoints[10],openpose_keypoints[11],openpose_keypoints[13])).reshape(-1, 2) #Analyze posture previous_pose_kpts.append(select_keypoints) liftoneleg = LiftOneLeg(previous_pose_kpts) angle, leg_status = liftoneleg.check_leg_up_down() if angle > max_angle: max_angle = angle max_frame = cv2.imwrite("./blog/static/img/best.png", display_img) #Update status and count leg_status, completed_half, count_update, start_frame_update, end_frame_update= \ liftoneleg.count_repetition(angle, leg_status, completed_half, count, framenum, start_frame, end_frame) if (count_update == count +1): print("count : %d" %count_update) score = test_per_frame(previous_pose_kpts[start_frame-total_len_frame:end_frame-total_len_frame], LEG_LABEL) print("**************************") print("score : %d" %score) score_list.append(score) f= open('score.txt', 'w') f.write(str(int(score))) f.close() total_len_frame += len(previous_pose_kpts) previous_pose_kpts = [] count, start_frame, end_frame = count_update, start_frame_update, end_frame_update f = open('demofile.txt', 'w') f.write(str(count)) f.close() # write for feedback!! if count == 5: exercise_time = time.time() - start # write max angle f = open('max_angle.txt', 'w') f.write(str(int(max_angle))) f.close() # write exercise time f= open('time.txt', 'w') f.write(str(int(exercise_time))) f.close() # write score f= open('final_score.txt', 'w') f.write(str(int(sum(score_list)/count))) f.close() sys.exit(0) #return 0 overlay_image = posenet.draw_skel_and_kp( display_img, pose_scores, keypoint_scores, keypoint_coords, min_pose_score=0.1, min_part_score=0.1) #cv2.putText(overlay_image, str(counting), (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 1) cv2.imshow('posenet', overlay_image) rawCapture.truncate(0) frame_count += 1 if cv2.waitKey(1) & 0xFF == ord('q'): break print('Average FPS: ', (time.time() - frame_start))
def main(): path = os.getcwd() print("Welcome to Blindspot Assistance") log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() model_xml = args.model model_bin = os.path.splitext(model_xml)[0] + ".bin" log.info("Creating Inference Engine...") ie = IECore() if args.cpu_threads: ie.set_config({'CPU_THREADS_NUM': args.cpu_threads}, args.device) if args.cpu_extension and 'CPU' in args.device: ie.add_extension(args.cpu_extension, "CPU") # Read IR log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin)) net = IENetwork(model=model_xml, weights=model_bin) if "CPU" in args.device: supported_layers = ie.query_network(net, "CPU") not_supported_layers = [ l for l in net.layers.keys() if l not in supported_layers ] if len(not_supported_layers) != 0: log.error( "Following layers are not supported by the plugin for specified device {}:\n {}" .format(args.device, ', '.join(not_supported_layers))) log.error( "Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) img_info_input_blob = None feed_dict = {} for blob_name in net.inputs: if len(net.inputs[blob_name].shape) == 4: input_blob = blob_name elif len(net.inputs[blob_name].shape) == 2: img_info_input_blob = blob_name else: raise RuntimeError( "Unsupported {}D input layer '{}'. Only 2D and 4D input layers are supported" .format(len(net.inputs[blob_name].shape), blob_name)) assert len(net.outputs) == 1, "Demo supports only single output topologies" out_blob = next(iter(net.outputs)) log.info("Loading IR to the plugin...") exec_net = ie.load_network(network=net, num_requests=2, device_name=args.device) # Read and pre-process input image n, c, h, w = net.inputs[input_blob].shape if img_info_input_blob: feed_dict[img_info_input_blob] = [h, w, 1] if args.input == 'cam': input_stream = 0 else: input_stream = args.input # Detect if the input is a Youtube Video (with Pafy) if "youtube.com" in input_stream: video = pafy.new(url=input_stream) stream = video.getbest() input_stream = stream.url else: assert os.path.isfile( args.input), "Specified input file doesn't exist" if args.labels: with open(args.labels, 'r') as f: labels_map = [x.strip() for x in f] else: labels_map = None cap = cv2.VideoCapture(input_stream) if args.output: FILE_OUTPUT = args.output if os.path.isfile(FILE_OUTPUT): os.remove(FILE_OUTPUT) fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') fps = cap.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(FILE_OUTPUT, fourcc, fps, (int(cap.get(3)), int(cap.get(4)))) cur_request_id = 0 next_request_id = 1 log.info("Starting inference in async mode...") is_async_mode = True render_time = 0 ret, frame = cap.read() # ROI: Autoselected 15% of the left roi = [0, 0, int(cap.get(3) * 0.25), int(cap.get(4))] print( "To close the application, press 'CTRL+C' here or switch to the output window and press ESC key" ) print( "To switch between sync/async modes, press TAB key in the output window" ) object_time = 0 alarm = False object_detected = False while cap.isOpened(): if is_async_mode: ret, next_frame = cap.read() else: ret, frame = cap.read() if not ret: break initial_w = cap.get(3) initial_h = cap.get(4) # Selected rectangle overlay overlay = frame.copy() cv2.rectangle(overlay, (roi[0], roi[1]), (roi[0] + roi[2], roi[1] + roi[3]), (0, 0, 0), -1) # A filled rectangle alpha = 0.3 # Transparency factor. # Following line overlays transparent rectangle over the image cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0, frame) # Main sync point: # in the truly Async mode we start the NEXT infer request, while waiting for the CURRENT to complete # in the regular mode we start the CURRENT request and immediately wait for it's completion inf_start = time.time() if is_async_mode: in_frame = cv2.resize(next_frame, (w, h)) # Change data layout from HWC to CHW in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) feed_dict[input_blob] = in_frame exec_net.start_async(request_id=next_request_id, inputs=feed_dict) else: in_frame = cv2.resize(frame, (w, h)) # Change data layout from HWC to CHW in_frame = in_frame.transpose((2, 0, 1)) in_frame = in_frame.reshape((n, c, h, w)) feed_dict[input_blob] = in_frame exec_net.start_async(request_id=cur_request_id, inputs=feed_dict) if exec_net.requests[cur_request_id].wait(-1) == 0: inf_end = time.time() det_time = inf_end - inf_start # Parse detection results of the current request # output_blob = [image_id, label, conf, x_min, y_min, x_max, y_max] res = exec_net.requests[cur_request_id].outputs[out_blob] for obj in res[0][0]: # Draw only objects when probability more than specified threshold if obj[2] > args.prob_threshold: xmin = int(obj[3] * initial_w) ymin = int(obj[4] * initial_h) xmax = int(obj[5] * initial_w) ymax = int(obj[6] * initial_h) class_id = int(obj[1]) # Draw box and label\class_id color = switch_class_color(class_id) #color = (min(class_id * 12.5, 255), min(class_id * 7, 255), min(class_id * 5, 255)) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 1) det_label = labels_map[class_id] if labels_map else str( switch_class(class_id)) cv2.putText( frame, det_label + ' ' + str(round(obj[2] * 100, 1)) + ' %', (xmin, ymin - 7), cv2.FONT_HERSHEY_COMPLEX, 0.5, color, 1) if (xmin > roi[0] and xmin < roi[0] + roi[2]) or ( xmax > roi[0] and xmax < roi[0] + roi[2]) or ( xmin < roi[0] and xmax > roi[0] + roi[2]): if (ymin > roi[1] and ymin < roi[1] + roi[3]) or ( ymax > roi[1] and ymax < roi[1] + roi[3]) or ( ymin < roi[1] and ymax > roi[1] + roi[3]): object_detected = True last_object = str(switch_class(class_id)) if object_detected: object_time = time.time() object_detected = False alarm = True else: if (time.time() - object_time > 2): alarm = False if alarm: cv2.circle(frame, (25, 50), 10, (0, 0, 255), -1) cv2.putText(frame, "Last object detected: " + last_object, (40, 55), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255), 1) else: cv2.circle(frame, (25, 50), 10, (0, 255, 0), -1) cv2.putText(frame, "Nothing detected", (40, 55), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 255, 0), 1) # Draw performance stats inf_time_message = "Inference time: N\A for async mode" if is_async_mode else \ "Inference time: {:.3f} ms".format(det_time * 1000) render_time_message = "OpenCV rendering time: {:.3f} ms".format( render_time * 1000) async_mode_message = "Async mode is on. Processing request {}".format(cur_request_id) if is_async_mode else \ "Async mode is off. Processing request {}".format(cur_request_id) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) cv2.putText(frame, render_time_message, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1) cv2.putText(frame, async_mode_message, (10, int(initial_h - 20)), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1) render_start = time.time() if args.output: out.write(frame) if not args.hide_output: cv2.imshow("Detection Results", frame) render_end = time.time() render_time = render_end - render_start if is_async_mode: cur_request_id, next_request_id = next_request_id, cur_request_id frame = next_frame key = cv2.waitKey(1) if key == ord('l'): showCrosshair = False fromCenter = True roi = cv2.selectROI("Detection Results", frame, fromCenter, showCrosshair) if key == 27: break if (9 == key): if exec_net.requests[cur_request_id].wait(-1) == 0: is_async_mode = not is_async_mode log.info("Switched to {} mode".format( "async" if is_async_mode else "sync")) cv2.destroyAllWindows()
class Benchmark: def __init__(self, device: str, number_infer_requests: int = None, number_iterations: int = None, duration_seconds: int = None, api_type: str = 'async'): self.device = device self.ie = IECore() self.nireq = number_infer_requests self.niter = number_iterations self.duration_seconds = get_duration_seconds(duration_seconds, self.niter, self.device) self.api_type = api_type def __del__(self): del self.ie def add_extension(self, path_to_extension: str = None, path_to_cldnn_config: str = None): if path_to_cldnn_config: self.ie.set_config({'CONFIG_FILE': path_to_cldnn_config}, GPU_DEVICE_NAME) logger.info( 'GPU extensions is loaded {}'.format(path_to_cldnn_config)) if path_to_extension: self.ie.add_extension(extension_path=path_to_extension, device_name=CPU_DEVICE_NAME) logger.info( 'CPU extensions is loaded {}'.format(path_to_extension)) def get_version_info(self) -> str: logger.info('InferenceEngine:\n{: <9}{:.<24} {}'.format( '', 'API version', get_version())) version_string = 'Device info\n' for device, version in self.ie.get_versions(self.device).items(): version_string += '{: <9}{}\n'.format('', device) version_string += '{: <9}{:.<24}{} {}.{}\n'.format( '', version.description, ' version', version.major, version.minor) version_string += '{: <9}{:.<24} {}\n'.format( '', 'Build', version.build_number) return version_string def set_config(self, config={}): for device in config.keys(): self.ie.set_config(config[device], device) def read_network(self, path_to_model: str): model_filename = os.path.abspath(path_to_model) head, ext = os.path.splitext(model_filename) weights_filename = os.path.abspath( head + BIN_EXTENSION) if ext == XML_EXTENSION else "" ie_network = self.ie.read_network(model_filename, weights_filename) return ie_network def load_network(self, ie_network: IENetwork, config={}): exe_network = self.ie.load_network( ie_network, self.device, config=config, num_requests=1 if self.api_type == 'sync' else self.nireq or 0) # Number of requests self.nireq = len(exe_network.requests) return exe_network def import_network(self, path_to_file: str, config={}): exe_network = self.ie.import_network( model_file=path_to_file, device_name=self.device, config=config, num_requests=1 if self.api_type == 'sync' else self.nireq or 0) # Number of requests self.nireq = len(exe_network.requests) return exe_network def first_infer(self, exe_network): infer_request = exe_network.requests[0] # warming up - out of scope if self.api_type == 'sync': infer_request.infer() else: infer_request.async_infer() status = exe_network.wait() if status != StatusCode.OK: raise Exception( "Wait for all requests is failed with status code {}!". format(status)) return infer_request.latency def infer(self, exe_network, batch_size, progress_bar=None): progress_count = 0 infer_requests = exe_network.requests start_time = datetime.utcnow() exec_time = 0 iteration = 0 times = [] in_fly = set() # Start inference & calculate performance # to align number if iterations to guarantee that last infer requests are executed in the same conditions **/ while (self.niter and iteration < self.niter) or \ (self.duration_seconds and exec_time < self.duration_seconds) or \ (self.api_type == 'async' and iteration % self.nireq): if self.api_type == 'sync': infer_requests[0].infer() times.append(infer_requests[0].latency) else: infer_request_id = exe_network.get_idle_request_id() if infer_request_id < 0: status = exe_network.wait(num_requests=1) if status != StatusCode.OK: raise Exception("Wait for idle request failed!") infer_request_id = exe_network.get_idle_request_id() if infer_request_id < 0: raise Exception("Invalid request id!") if infer_request_id in in_fly: times.append(infer_requests[infer_request_id].latency) else: in_fly.add(infer_request_id) infer_requests[infer_request_id].async_infer() iteration += 1 exec_time = (datetime.utcnow() - start_time).total_seconds() if progress_bar: if self.duration_seconds: # calculate how many progress intervals are covered by current iteration. # depends on the current iteration time and time of each progress interval. # Previously covered progress intervals must be skipped. progress_interval_time = self.duration_seconds / progress_bar.total_num new_progress = int(exec_time / progress_interval_time - progress_count) progress_bar.add_progress(new_progress) progress_count += new_progress elif self.niter: progress_bar.add_progress(1) # wait the latest inference executions status = exe_network.wait() if status != StatusCode.OK: raise Exception( "Wait for all requests is failed with status code {}!".format( status)) total_duration_sec = (datetime.utcnow() - start_time).total_seconds() for infer_request_id in in_fly: times.append(infer_requests[infer_request_id].latency) times.sort() latency_ms = median(times) fps = batch_size * 1000 / latency_ms if self.api_type == 'sync' else batch_size * iteration / total_duration_sec if progress_bar: progress_bar.finish() return fps, latency_ms, total_duration_sec, iteration
def main(): args = build_argparser().parse_args() signal.signal(signal.SIGUSR1, receiveSignal) signal.signal(signal.SIGUSR2, receiveSignal2) signal.signal(signal.SIGINT, terminateProcess) signal.signal(signal.SIGTERM, terminateProcess) if args.rootCAPath is not None: # Data to AWS if args.mode not in AllowedActions: args.error("Unknown --mode option %s. Must be one of %s" % (args.mode, str(AllowedActions))) exit(2) if args.useWebsocket and args.certificatePath and args.privateKeyPath: args.error( "X.509 cert authentication and WebSocket are mutual exclusive. Please pick one." ) exit(2) if not args.useWebsocket and (not args.certificatePath or not args.privateKeyPath): args.error("Missing credentials for authentication.") exit(2) publicAWS = BasicPubSub(host=args.host, rootCAPath=args.rootCAPath, certificatePath=args.certificatePath, privateKeyPath=args.privateKeyPath, port=args.port, useWebsocket=args.useWebsocket, clientId=args.clientId, topic=args.topic, mode=args.mode, message=args.mode) publicAWS.suscribeMQTT() # End Data to AWS full_name = path.basename(args.input) extension = path.splitext(full_name)[1] if '.txt' in extension: with open(full_name) as f: videos = [line.strip() for line in f.read().split('\n')] else: videos = [args.input] if not args.input: raise ValueError("--input option is expected") full_name_labels = path.basename(args.labels) if args.labels: with open(full_name_labels) as f: labels = [l.strip() for l in f.read().strip().split('\n')] else: labels = None ie = IECore() if 'MYRIAD' in args.device: myriad_config = {"VPU_HW_STAGES_OPTIMIZATION": "YES"} ie.set_config(myriad_config, "MYRIAD") if args.cpu_extension and 'CPU' in args.device: ie.add_extension(args.cpu_extension, "CPU") decoder_target_device = "CPU" if args.device != 'CPU': encoder_target_device = args.device else: encoder_target_device = decoder_target_device encoder_xml = args.m_encoder encoder_bin = args.m_encoder.replace(".xml", ".bin") decoder_xml = args.m_decoder decoder_bin = args.m_decoder.replace(".xml", ".bin") encoder = IEModel(encoder_xml, encoder_bin, ie, encoder_target_device, num_requests=(3 if args.device == 'MYRIAD' else 1)) decoder = IEModel(decoder_xml, decoder_bin, ie, decoder_target_device, num_requests=2) print("Waiting on signal") while (True): time.sleep(1) if (state["signal"]): state["signal"] = False state["ready"] = False if args.rootCAPath: video_demo(encoder, decoder, videos, args.fps, labels, args.no_show, publicAWS) else: video_demo(encoder, decoder, videos, args.fps, labels, args.no_show) state["ready"] = True
def main(): args = parse_arguments() # --------------------------------- 1. Load Plugin for inference engine --------------------------------- logger.info("Creating Inference Engine") ie = IECore() if 'CPU' in args.target_device: if args.path_to_extension: ie.add_extension(args.path_to_extension, "CPU") if args.number_threads is not None: ie.set_config({'CPU_THREADS_NUM': str(args.number_threads)}, "CPU") elif 'GPU' in args.target_device: if args.path_to_cldnn_config: ie.set_config({'CONFIG_FILE': args.path_to_cldnn_config}, "GPU") logger.info("GPU extensions is loaded {}".format( args.path_to_cldnn_config)) else: raise AttributeError( "Device {} do not support of 3D convolution. " "Please use CPU, GPU or HETERO:*CPU*, HETERO:*GPU*") logger.info("Device is {}".format(args.target_device)) version = ie.get_versions(args.target_device)[args.target_device] version_str = "{}.{}.{}".format(version.major, version.minor, version.build_number) logger.info("Plugin version is {}".format(version_str)) # --------------------- 2. Read IR Generated by ModelOptimizer (.xml and .bin files) --------------------- xml_filename = os.path.abspath(args.path_to_model) bin_filename = os.path.abspath(os.path.splitext(xml_filename)[0] + '.bin') ie_network = IENetwork(xml_filename, bin_filename) input_info = ie_network.inputs if len(input_info) == 0: raise AttributeError('No inputs info is provided') elif len(input_info) != 1: raise AttributeError("only one input layer network is supported") input_name = next(iter(input_info)) out_name = next(iter(ie_network.outputs)) if args.shape: logger.info("Reshape of network from {} to {}".format( input_info[input_name].shape, args.shape)) ie_network.reshape({input_name: args.shape}) input_info = ie_network.inputs # ---------------------------------------- 4. Preparing input data ---------------------------------------- logger.info("Preparing inputs") if len(input_info[input_name].shape) != 5: raise AttributeError( "Incorrect shape {} for 3d convolution network".format(args.shape)) n, c, d, h, w = input_info[input_name].shape ie_network.batch_size = n if not os.path.exists(args.path_to_input_data): raise AttributeError("Path to input data: '{}' does not exist".format( args.path_to_input_data)) is_nifti_data = os.path.isdir(args.path_to_input_data) if is_nifti_data: series_name = find_series_name(args.path_to_input_data) original_data, data_crop, affine, original_size, bbox = \ read_image(args.path_to_input_data, series_name=series_name, sizes=(h, w, d)) else: if not (fnmatch(args.path_to_input_data, '*.tif') or fnmatch(args.path_to_input_data, '*.tiff')): raise AttributeError("Input file extension must have tiff format") data_crop = np.zeros(shape=(n, c, d, h, w), dtype=np.float) im_seq = ImageSequence.Iterator(Image.open(args.path_to_input_data)) for i, page in enumerate(im_seq): im = np.array(page).reshape(h, w, c) for channel in range(c): data_crop[:, channel, i, :, :] = im[:, :, channel] original_data = data_crop original_size = original_data.shape[-3:] test_im = {input_name: data_crop} # ------------------------------------- 4. Loading model to the plugin ------------------------------------- logger.info("Loading model to the plugin") executable_network = ie.load_network(network=ie_network, device_name=args.target_device) del ie_network # ---------------------------------------------- 5. Do inference -------------------------------------------- logger.info("Start inference") start_time = datetime.now() res = executable_network.infer(test_im) infer_time = datetime.now() - start_time logger.info("Finish inference") logger.info("Inference time is {}".format(infer_time)) # ---------------------------- 6. Processing of the received inference results ------------------------------ result = res[out_name] batch, channels, out_d, out_h, out_w = result.shape list_img = list() list_seg_result = list() logger.info("Processing of the received inference results is started") start_time = datetime.now() for batch, data in enumerate(result): seg_result = np.zeros(shape=original_size, dtype=np.uint8) if data.shape[1:] != original_size: x = bbox[1] - bbox[0] y = bbox[3] - bbox[2] z = bbox[5] - bbox[4] seg_result[bbox[0]:bbox[1], bbox[2]:bbox[3], bbox[4]:bbox[5]] = \ np.argmax(resample_np(data, (channels, x, y, z), 1), axis=0) elif channels == 1: reshaped_data = data.reshape(out_d, out_h, out_w) mask = reshaped_data[:, :, :] > 0.5 reshaped_data[mask] = 1 seg_result = reshaped_data.astype(int) else: seg_result = np.argmax(data, axis=0).astype(int) im = np.stack([ original_data[batch, 0, :, :, :], original_data[batch, 0, :, :, :], original_data[batch, 0, :, :, :] ], axis=3) im = 255 * (im - im.min()) / (im.max() - im.min()) color_seg_frame = np.zeros(im.shape, dtype=np.uint8) for idx, c in enumerate(CLASSES_COLOR_MAP): color_seg_frame[seg_result[:, :, :] == idx, :] = np.array( c, dtype=np.uint8) mask = seg_result[:, :, :] > 0 im[mask] = color_seg_frame[mask] for k in range(out_d): if is_nifti_data: list_img.append( Image.fromarray(im[:, :, k, :].astype('uint8'), 'RGB')) else: list_img.append( Image.fromarray(im[k, :, :, :].astype('uint8'), 'RGB')) if args.output_nifti and is_nifti_data: list_seg_result.append(seg_result) result_processing_time = datetime.now() - start_time logger.info("Processing of the received inference results is finished") logger.info("Processing time is {}".format(result_processing_time)) # --------------------------------------------- 7. Save output ----------------------------------------------- tiff_output_name = os.path.join(args.path_to_output, 'output.tiff') Image.new('RGB', (data.shape[3], data.shape[2])).save(tiff_output_name, append_images=list_img, save_all=True) logger.info("Result tiff file was saved to {}".format(tiff_output_name)) if args.output_nifti and is_nifti_data: for seg_res in list_seg_result: nii_filename = os.path.join( args.path_to_output, 'output_{}.nii.gz'.format(list_seg_result.index(seg_res))) nib.save(nib.Nifti1Image(seg_res, affine=affine), nii_filename) logger.info( "Result nifti file was saved to {}".format(nii_filename))
def main(): log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout) args = parse_args() # ---------------------------Step 1. Initialize inference engine core-------------------------------------------------- log.info('Creating Inference Engine') ie = IECore() if args.extension and args.device == 'CPU': log.info(f'Loading the {args.device} extension: {args.extension}') ie.add_extension(args.extension, args.device) if args.config and args.device in ('GPU', 'MYRIAD', 'HDDL'): log.info(f'Loading the {args.device} configuration: {args.config}') ie.set_config({'CONFIG_FILE': args.config}, args.device) # ---------------------------Step 2. Read a model in OpenVINO Intermediate Representation or ONNX format--------------- log.info(f'Reading the network: {args.model}') # (.xml and .bin files) or (.onnx file) net = ie.read_network(model=args.model) if len(net.input_info) != 1: log.error('Sample supports only single input topologies') return -1 if len(net.outputs) != 1: log.error('Sample supports only single output topologies') return -1 # ---------------------------Step 3. Configure input & output---------------------------------------------------------- log.info('Configuring input and output blobs') # Get names of input and output blobs input_blob = next(iter(net.input_info)) out_blob = next(iter(net.outputs)) # Set input and output precision manually net.input_info[input_blob].precision = 'U8' net.outputs[out_blob].precision = 'FP32' original_image = cv2.imread(args.input) image = original_image.copy() # Change data layout from HWC to CHW image = image.transpose((2, 0, 1)) # Add N dimension to transform to NCHW image = np.expand_dims(image, axis=0) log.info( 'Reshaping the network to the height and width of the input image') log.info( f'Input shape before reshape: {net.input_info[input_blob].input_data.shape}' ) net.reshape({input_blob: image.shape}) log.info( f'Input shape after reshape: {net.input_info[input_blob].input_data.shape}' ) # ---------------------------Step 4. Loading model to the device------------------------------------------------------- log.info('Loading the model to the plugin') exec_net = ie.load_network(network=net, device_name=args.device) # ---------------------------Step 5. Create infer request-------------------------------------------------------------- # load_network() method of the IECore class with a specified number of requests (default 1) returns an ExecutableNetwork # instance which stores infer requests. So you already created Infer requests in the previous step. # ---------------------------Step 6. Prepare input--------------------------------------------------------------------- # This sample changes a network input layer shape instead of a image shape. See Step 4. # ---------------------------Step 7. Do inference---------------------------------------------------------------------- log.info('Starting inference in synchronous mode') res = exec_net.infer(inputs={input_blob: image}) # ---------------------------Step 8. Process output-------------------------------------------------------------------- # Generate a label list if args.labels: with open(args.labels, 'r') as f: labels = [line.split(',')[0].strip() for line in f] res = res[out_blob] output_image = original_image.copy() h, w, _ = output_image.shape # Change a shape of a numpy.ndarray with results ([1, 1, N, 7]) to get another one ([N, 7]), # where N is the number of detected bounding boxes detections = res.reshape(-1, 7) for detection in detections: confidence = detection[2] if confidence > 0.5: class_id = int(detection[1]) label = labels[class_id] if args.labels else class_id xmin = int(detection[3] * w) ymin = int(detection[4] * h) xmax = int(detection[5] * w) ymax = int(detection[6] * h) log.info(f'Found: label = {label}, confidence = {confidence:.2f}, ' f'coords = ({xmin}, {ymin}), ({xmax}, {ymax})') # Draw a bounding box on a output image cv2.rectangle(output_image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) cv2.imwrite('out.bmp', output_image) log.info('Image out.bmp was created!') # ---------------------------------------------------------------------------------------------------------------------- log.info( 'This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool\n' ) return 0