def main(): log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout) # ---------------------------Initialize inference engine core---------------------------------------------------------- log.info('Creating Inference Engine') ie = IECore() # ---------------------------Get metrics of available devices---------------------------------------------------------- log.info('Available devices:') for device in ie.available_devices: log.info(f'{device} :') log.info('\tSUPPORTED_METRICS:') for metric in ie.get_metric(device, 'SUPPORTED_METRICS'): if metric not in ('SUPPORTED_METRICS', 'SUPPORTED_CONFIG_KEYS'): try: metric_val = ie.get_metric(device, metric) except TypeError: metric_val = 'UNSUPPORTED TYPE' log.info(f'\t\t{metric}: {param_to_string(metric_val)}') log.info('') log.info('\tSUPPORTED_CONFIG_KEYS (default values):') for config_key in ie.get_metric(device, 'SUPPORTED_CONFIG_KEYS'): try: config_val = ie.get_config(device, config_key) except TypeError: config_val = 'UNSUPPORTED TYPE' log.info(f'\t\t{config_key}: {param_to_string(config_val)}') log.info('') # ---------------------------------------------------------------------------------------------------------------------- return 0
def main(): ie = IECore() print("Available devices:") for device in ie.available_devices: print("\tDevice: {}".format(device)) print("\tMetrics:") for metric in ie.get_metric(device, "SUPPORTED_METRICS"): metric_val = ie.get_metric(device, metric) print("\t\t{}: {}".format(metric, param_to_string(metric_val))) print("\n\tDefault values for device configuration keys:") for cfg in ie.get_metric(device, "SUPPORTED_CONFIG_KEYS"): cfg_val = ie.get_config(device, cfg) print("\t\t{}: {}".format(cfg, param_to_string(cfg_val)))
def main(): ie = IECore() print("Available devices:") for device in ie.available_devices: print(f"\tDevice: {device}") print("\tMetrics:") for metric in ie.get_metric(device, "SUPPORTED_METRICS"): try: metric_val = ie.get_metric(device, metric) print(f"\t\t{metric}: {param_to_string(metric_val)}") except TypeError: print(f"\t\t{metric}: UNSUPPORTED TYPE") print("\n\tDefault values for device configuration keys:") for cfg in ie.get_metric(device, "SUPPORTED_CONFIG_KEYS"): try: cfg_val = ie.get_config(device, cfg) print(f"\t\t{cfg}: {param_to_string(cfg_val)}") except TypeError: print(f"\t\t{cfg}: UNSUPPORTED TYPE")
def main(args=None): try: # ------------------------------ 1. Parsing and validating input arguments ------------------------------------- next_step() if not args: args = parse_args() # ------------------------------ 2. Loading Inference Engine --------------------------------------------------- next_step() device_name = args.target_device.upper() ie = IECore() if CPU_DEVICE_NAME in device_name: if args.path_to_extension: ie.add_cpu_extension(extension_path=args.path_to_extension, device_name=CPU_DEVICE_NAME) if GPU_DEVICE_NAME in device_name: if args.path_to_cldnn_config: ie.set_config({'CONFIG_FILE': args.path_to_cldnn_config}, GPU_DEVICE_NAME) logger.info("GPU extensions is loaded {}".format( args.path_to_cldnn_config)) logger.info("InferenceEngine:\n{: <9}{}".format("", get_version())) version_string = "Device is {}\n".format(device_name) for device, version in ie.get_versions(device_name).items(): version_string += "{: <9}{}\n".format("", device) version_string += "{: <9}{:.<24}{} {}.{}\n".format( "", version.description, " version", version.major, version.minor) version_string += "{: <9}{:.<24} {}\n".format( "", "Build", version.build_number) logger.info(version_string) # --------------------- 3. Read the Intermediate Representation of the network --------------------------------- next_step() xml_filename = os.path.abspath(args.path_to_model) head, tail = os.path.splitext(xml_filename) bin_filename = os.path.abspath(head + BIN_EXTENSION) ie_network = IENetwork(xml_filename, bin_filename) input_info = ie_network.inputs if len(input_info) == 0: raise AttributeError('No inputs info is provided') # --------------------- 4. Resizing network to match image sizes and given batch ------------------------------- next_step() batch_size = ie_network.batch_size precision = ie_network.precision if args.batch_size and args.batch_size != ie_network.batch_size: new_shapes = {} for key in input_info.keys(): shape = input_info[key].shape layout = input_info[key].layout batchIndex = -1 if ((layout == 'NCHW') or (layout == 'NCDHW') or (layout == 'NHWC') or (layout == 'NDHWC') or (layout == 'NC')): batchIndex = 0 elif (layout == 'CN'): batchIndex = 1 if ((batchIndex != -1) and (shape[batchIndex] != args.batch_size)): shape[batchIndex] = args.batch_size new_shapes[key] = shape if (len(new_shapes) > 0): logger.info("Resizing network to batch = {}".format( args.batch_size)) ie_network.reshape(new_shapes) batch_size = args.batch_size logger.info("Network batch size: {}, precision {}".format( batch_size, precision)) # --------------------- 5. Configuring input of the model ------------------------------------------------------ next_step() for key in input_info.keys(): if (isImage(input_info[key])): # Set the precision of input data provided by the user # Should be called before load of the network to the plugin input_info[key].precision = 'U8' # --------------------- 6. Setting device configuration -------------------------------------------------------- next_step() devices = parseDevices(device_name) device_nstreams = parseValuePerDevice(devices, args.number_streams) for device in devices: if device == CPU_DEVICE_NAME: ## CPU supports few special performance-oriented keys ## limit threading for CPU portion of inference if args.number_threads: ie.set_config( {'CPU_THREADS_NUM': str(args.number_threads)}, device) if MULTI_DEVICE_NAME in device_name and GPU_DEVICE_NAME in device_name: ie.set_config({'CPU_BIND_THREAD': 'NO'}, CPU_DEVICE_NAME) else: # pin threads for CPU portion of inference ie.set_config( {'CPU_BIND_THREAD': args.infer_threads_pinning}, device) ## for CPU execution, more throughput-oriented execution via streams # for pure CPU execution, more throughput-oriented execution via streams if args.api_type == 'async': ie.set_config( { 'CPU_THROUGHPUT_STREAMS': str(device_nstreams.get(device)) if device in device_nstreams.keys() else 'CPU_THROUGHPUT_AUTO' }, device) device_nstreams[device] = int( ie.get_config(device, 'CPU_THROUGHPUT_STREAMS')) elif device == GPU_DEVICE_NAME: if args.api_type == 'async': ie.set_config( { 'GPU_THROUGHPUT_STREAMS': str(device_nstreams.get(device)) if device in device_nstreams.keys() else 'GPU_THROUGHPUT_AUTO' }, device) device_nstreams[device] = int( ie.get_config(device, 'GPU_THROUGHPUT_STREAMS')) if MULTI_DEVICE_NAME in device_name and CPU_DEVICE_NAME in device_name: ## multi-device execution with the CPU+GPU performs best with GPU trottling hint, ## which releases another CPU thread (that is otherwise used by the GPU driver for active polling) ie.set_config({'CLDNN_PLUGIN_THROTTLE': str(1)}, device) elif device == MYRIAD_DEVICE_NAME: ie.set_config( { 'LOG_LEVEL': 'LOG_INFO', 'VPU_LOG_LEVEL': 'LOG_WARNING' }, MYRIAD_DEVICE_NAME) # --------------------- 7. Loading the model to the device ----------------------------------------------------- next_step() config = {'PERF_COUNT': ('YES' if args.perf_counts else 'NO')} exe_network = ie.load_network(ie_network, device_name, config=config, num_requests=args.number_infer_requests if args.number_infer_requests else 0) # --------------------- 8. Setting optimal runtime parameters -------------------------------------------------- next_step() ## Number of requests infer_requests = exe_network.requests nireq = len(infer_requests) ## Iteration limit niter = args.number_iterations if niter and args.api_type == 'async': niter = (int)((niter + nireq - 1) / nireq) * nireq if (args.number_iterations != niter): logger.warn( "Number of iterations was aligned by request number " "from {} to {} using number of requests {}".format( args.number_iterations, niter, nireq)) ## Time limit duration_seconds = 0 if args.time: ## time limit duration_seconds = args.time elif not args.number_iterations: ## default time limit duration_seconds = get_duration_in_secs(device) # ------------------------------------ 8. Creating infer requests and filling input blobs ---------------------- next_step() request_queue = InferRequestsQueue(infer_requests) path_to_input = os.path.abspath( args.path_to_input) if args.path_to_input else None requests_input_data = getInputs(path_to_input, batch_size, ie_network.inputs, infer_requests) # ------------------------------------ 9. Measuring performance ------------------------------------------------ progress_count = 0 progress_bar_total_count = 10000 output_string = "Start inference {}ronously".format(args.api_type) if (args.api_type == "async"): if output_string != "": output_string += ", " output_string += str(nireq) + " inference requests" device_ss = '' for device, nstreams in device_nstreams.items(): if device_ss != '': device_ss += ', ' device_ss += "{} streams for {}".format(str(nstreams), device) if device_ss != '': output_string += " using " + device_ss output_string += ", limits: " if niter: if not duration_seconds: progress_bar_total_count = niter output_string += str(niter) + " iterations" if duration_seconds: if niter: output_string += ", " output_string += str( getDurationInMilliseconds(duration_seconds)) + " ms duration" next_step(output_string) ## warming up - out of scope infer_request = request_queue.getIdleRequest() if not infer_request: raise Exception("No idle Infer Requests!") if (args.api_type == 'sync'): infer_request.infer(requests_input_data[infer_request.id]) else: infer_request.startAsync(requests_input_data[infer_request.id]) request_queue.waitAll() request_queue.resetTimes() start_time = datetime.now() exec_time = (datetime.now() - start_time).total_seconds() iteration = 0 progress_bar = ProgressBar(progress_bar_total_count, args.stream_output, args.progress) ## Start inference & calculate performance ## to align number if iterations to guarantee that last infer requests are executed in the same conditions **/ while ((niter and iteration < niter) or (duration_seconds and exec_time < duration_seconds) or (args.api_type == "async" and iteration % nireq != 0)): infer_request = request_queue.getIdleRequest() if not infer_request: raise Exception("No idle Infer Requests!") if (args.api_type == 'sync'): infer_request.infer(requests_input_data[infer_request.id]) else: infer_request.startAsync(requests_input_data[infer_request.id]) iteration += 1 exec_time = (datetime.now() - start_time).total_seconds() if niter: progress_bar.add_progress(1) else: ## calculate how many progress intervals are covered by current iteration. ## depends on the current iteration time and time of each progress interval. ## Previously covered progress intervals must be skipped. progress_interval_time = duration_seconds / progress_bar_total_count new_progress = (int)(exec_time / progress_interval_time - progress_count) progress_bar.add_progress(new_progress) progress_count += new_progress ## wait the latest inference executions request_queue.waitAll() total_duration_sec = request_queue.getDurationInSeconds() times = request_queue.times times.sort() latency_ms = median(times) fps = batch_size * 1000 / latency_ms if args.api_type == 'sync' else batch_size * iteration / total_duration_sec progress_bar.finish() # ------------------------------------ 10. Dumping statistics report ------------------------------------------- next_step() if args.exec_graph_path: try: exec_graph_info = exe_network.get_exec_graph_info() exec_graph_info.serialize(args.exec_graph_path) logger.info("Executable graph is stored to {}".format( args.exec_graph_path)) del exec_graph_info except Exception as e: logging.exception(e) if args.perf_counts: for ni in range(int(nireq)): perf_counts = exe_network.requests[ni].get_perf_counts() logger.info( "Pefrormance counts for {}-th infer request".format(ni)) for layer, stats in perf_counts.items(): max_layer_name = 30 print("{:<30}{:<15}{:<30}{:<20}{:<20}{:<20}".format( layer[:max_layer_name - 4] + '...' if (len(layer) >= max_layer_name) else layer, stats['status'], 'layerType: ' + str(stats['layer_type']), 'realTime: ' + str(stats['real_time']), 'cpu: ' + str(stats['cpu_time']), 'execType: ' + str(stats['exec_type']))) print("Count: {} iterations".format(iteration)) print("Duration: {:.2f} ms".format( getDurationInMilliseconds(total_duration_sec))) if not MULTI_DEVICE_NAME in device_name: print("Latency: {:.4f} ms".format(latency_ms)) print("Throughput: {:.2f} FPS".format(fps)) del exe_network del ie next_step.step_id = 0 except Exception as e: logging.exception(e)
class Benchmark: def __init__(self, device: str, number_infer_requests, number_iterations, duration_seconds, api_type): self.device = device.upper() self.ie = IECore() self.nireq = number_infer_requests self.niter = number_iterations self.duration_seconds = get_duration_seconds(duration_seconds, self.niter, self.device) self.api_type = api_type self.device_number_streams = {} def __del__(self): del self.ie def add_extension(self, path_to_extension: str = None, path_to_cldnn_config: str = None): if GPU_DEVICE_NAME in self.device: if path_to_cldnn_config: self.ie.set_config({'CONFIG_FILE': path_to_cldnn_config}, GPU_DEVICE_NAME) logger.info( 'GPU extensions is loaded {}'.format(path_to_cldnn_config)) if CPU_DEVICE_NAME in self.device or MYRIAD_DEVICE_NAME in self.device: if path_to_extension: self.ie.add_extension(extension_path=path_to_extension, device_name=CPU_DEVICE_NAME) logger.info( 'CPU extensions is loaded {}'.format(path_to_extension)) def get_version_info(self) -> str: logger.info('InferenceEngine:\n{: <9}{:.<24} {}'.format( '', 'API version', get_version())) version_string = 'Device info\n' for device, version in self.ie.get_versions(self.device).items(): version_string += '{: <9}{}\n'.format('', device) version_string += '{: <9}{:.<24}{} {}.{}\n'.format( '', version.description, ' version', version.major, version.minor) version_string += '{: <9}{:.<24} {}\n'.format( '', 'Build', version.build_number) return version_string @staticmethod def reshape(ie_network: IENetwork, batch_size: int): new_shapes = {} for input_layer_name, input_layer in ie_network.inputs.items(): shape = input_layer.shape layout = input_layer.layout try: batch_index = layout.index('N') except ValueError: batch_index = 1 if layout == 'C' else -1 if batch_index != -1 and shape[batch_index] != batch_size: shape[batch_index] = batch_size new_shapes[input_layer_name] = shape if new_shapes: logger.info('Resizing network to batch = {}'.format(batch_size)) ie_network.reshape(new_shapes) def set_config(self, number_streams: int, api_type: str = 'async', number_threads: int = None, infer_threads_pinning: int = None): devices = parse_devices(self.device) self.device_number_streams = parse_value_per_device( devices, number_streams) for device in devices: if device == CPU_DEVICE_NAME: # CPU supports few special performance-oriented keys # limit threading for CPU portion of inference if number_threads: self.ie.set_config( {'CPU_THREADS_NUM': str(number_threads)}, device) if MULTI_DEVICE_NAME in self.device and GPU_DEVICE_NAME in self.device: self.ie.set_config({'CPU_BIND_THREAD': 'NO'}, CPU_DEVICE_NAME) else: # pin threads for CPU portion of inference self.ie.set_config( {'CPU_BIND_THREAD': infer_threads_pinning}, device) # for CPU execution, more throughput-oriented execution via streams # for pure CPU execution, more throughput-oriented execution via streams if api_type == 'async': cpu_throughput = { 'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO' } if device in self.device_number_streams.keys(): cpu_throughput['CPU_THROUGHPUT_STREAMS'] = str( self.device_number_streams.get(device)) self.ie.set_config(cpu_throughput, device) self.device_number_streams[device] = self.ie.get_config( device, 'CPU_THROUGHPUT_STREAMS') elif device == GPU_DEVICE_NAME: if api_type == 'async': gpu_throughput = { 'GPU_THROUGHPUT_STREAMS': 'GPU_THROUGHPUT_AUTO' } if device in self.device_number_streams.keys(): gpu_throughput['GPU_THROUGHPUT_STREAMS'] = str( self.device_number_streams.get(device)) self.ie.set_config(gpu_throughput, device) self.device_number_streams[device] = self.ie.get_config( device, 'GPU_THROUGHPUT_STREAMS') if MULTI_DEVICE_NAME in self.device and CPU_DEVICE_NAME in self.device: # multi-device execution with the CPU+GPU performs best with GPU trottling hint, # which releases another CPU thread (that is otherwise used by the GPU driver for active polling) self.ie.set_config({'CLDNN_PLUGIN_THROTTLE': '1'}, device) elif device == MYRIAD_DEVICE_NAME: self.ie.set_config({'LOG_LEVEL': 'LOG_INFO'}, MYRIAD_DEVICE_NAME) def load_network(self, ie_network: IENetwork, perf_counts: bool, number_infer_requests: int = None): config = {'PERF_COUNT': ('YES' if perf_counts else 'NO')} exe_network = self.ie.load_network(ie_network, self.device, config=config, num_requests=number_infer_requests or 0) return exe_network def infer(self, request_queue, requests_input_data, batch_size, progress_bar): progress_count = 0 # warming up - out of scope infer_request = request_queue.get_idle_request() if not infer_request: raise Exception('No idle Infer Requests!') if self.api_type == 'sync': infer_request.infer(requests_input_data[infer_request.req_id]) else: infer_request.start_async( requests_input_data[infer_request.req_id]) request_queue.wait_all() request_queue.reset_times() start_time = datetime.now() exec_time = (datetime.now() - start_time).total_seconds() iteration = 0 # Start inference & calculate performance # to align number if iterations to guarantee that last infer requests are executed in the same conditions **/ while (self.niter and iteration < self.niter) or \ (self.duration_seconds and exec_time < self.duration_seconds) or \ (self.api_type == 'async' and iteration % self.nireq): infer_request = request_queue.get_idle_request() if not infer_request: raise Exception('No idle Infer Requests!') if self.api_type == 'sync': infer_request.infer(requests_input_data[infer_request.req_id]) else: infer_request.start_async( requests_input_data[infer_request.req_id]) iteration += 1 exec_time = (datetime.now() - start_time).total_seconds() if self.duration_seconds: # calculate how many progress intervals are covered by current iteration. # depends on the current iteration time and time of each progress interval. # Previously covered progress intervals must be skipped. progress_interval_time = self.duration_seconds / progress_bar.total_num new_progress = int(exec_time / progress_interval_time - progress_count) progress_bar.add_progress(new_progress) progress_count += new_progress elif self.niter: progress_bar.add_progress(1) # wait the latest inference executions request_queue.wait_all() total_duration_sec = request_queue.get_duration_in_seconds() times = request_queue.times times.sort() latency_ms = median(times) fps = batch_size * 1000 / latency_ms if self.api_type == 'async': fps = batch_size * iteration / total_duration_sec progress_bar.finish() return fps, latency_ms, total_duration_sec, iteration
def on_select(item): ax1 = fig.add_subplot(gs[2, :]) ax2 = fig.add_subplot(gs[1, 3]) image = plt.imread("openvino-logo.png") ax2.axis('off') ax2.imshow(image) if 'clear' in (item.labelstr): ax1.cla() else: log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() #read input data if 'Async' in (item.labelstr): ecg_data = load.load_ecg("A00001.mat") else: ecg_data = load.load_ecg(item.labelstr) preproc = util.load(".") input_ecg = preproc.process_x([ecg_data]) ecg_n, ecg_h, ecg_w = input_ecg.shape log.info("Input ecg file shape: {}".format(input_ecg.shape)) input_ecg_plot = np.squeeze(input_ecg) # raw signal plot Fs = 1000 N = len(input_ecg_plot) T = (N - 1) / Fs ts = np.linspace(0, T, N, endpoint=False) ax1.plot(ts, input_ecg_plot, label=item.labelstr, lw=2) ax1.set_ylabel('Amplitude') ax1.set_title( "ECG Raw signal: length - {}, Freq - 1000 Hz".format(ecg_h)) ax1.legend(loc='upper right') #choose proper IRs if (input_ecg.shape[1] == 8960): model_xml = "tf_model_8960_fp16.xml" model_bin = os.path.splitext(model_xml)[0] + ".bin" elif (input_ecg.shape[1] == 17920): model_xml = "tf_model_17920_fp16.xml" model_bin = os.path.splitext(model_xml)[0] + ".bin" # Plugin initialization for specified device and load extensions library if specified log.info("OpenVINO Initializing plugin for {} device...".format( args.device)) ie = IECore() # Read IR log.info("OpenVINO Reading IR...") net = IENetwork(model=model_xml, weights=model_bin) assert len(net.inputs.keys() ) == 1, "Demo supports only single input topologies" if args.cpu_extension and 'CPU' in args.device: ie.add_extension(args.cpu_extension, "CPU") config = {'PERF_COUNT': ('YES' if args.perf_counts else 'NO')} device_nstreams = parseValuePerDevice(args.device, None) if ('Async' in (item.labelstr)) and ('CPU' in (args.device)): ie.set_config( { 'CPU_THROUGHPUT_STREAMS': str(device_nstreams.get(args.device)) if args.device in device_nstreams.keys() else 'CPU_THROUGHPUT_AUTO' }, args.device) device_nstreams[args.device] = int( ie.get_config(args.device, 'CPU_THROUGHPUT_STREAMS')) #prepare input blob input_blob = next(iter(net.inputs)) #load IR to plugin log.info("Loading network with plugin...") n, h, w = net.inputs[input_blob].shape log.info("Network input shape: {}".format( net.inputs[input_blob].shape)) if 'Async' in (item.labelstr): exec_net = ie.load_network(net, args.device, config=config, num_requests=12) infer_requests = exec_net.requests request_queue = InferRequestsQueue(infer_requests) else: exec_net = ie.load_network(net, args.device) output_blob = next(iter(net.outputs)) del net #Do infer inf_start = time.time() if 'Async' in (item.labelstr): for i in range(12): infer_request = request_queue.getIdleRequest() if not infer_request: raise Exception("No idle Infer Requests!") infer_request.startAsync({input_blob: input_ecg}) request_queue.waitAll() else: res = exec_net.infer({input_blob: input_ecg}) inf_end = time.time() if 'Async' in (item.labelstr): det_time = (inf_end - inf_start) / 12 res = exec_net.requests[0].outputs[output_blob] else: det_time = inf_end - inf_start res = res[output_blob] del exec_net print("[Performance] each inference time:{} ms".format(det_time * 1000)) prediction = sst.mode(np.argmax(res, axis=2).squeeze())[0][0] result = preproc.int_to_class[prediction] ax1.set_xlabel( 'File: {}, Intel OpenVINO Infer_perf for each input: {}ms, classification_result: {}' .format(item.labelstr, det_time * 1000, result), fontsize=15, color="c", fontweight='bold') ax1.grid()
def main(): total_timer = Timer(name='total') total_timer.tic() log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() # --------------------------- 1. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------ model_xml = args.model model_bin = os.path.splitext(model_xml)[0] + ".bin" log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin)) net = IENetwork(model=model_xml, weights=model_bin) # ----------------------------------------------------------------------------------------------------- # ------------- 2. Load Plugin for inference engine and extensions library if specified -------------- log.info("Loading Inference Engine") ie = IECore() log.info("Device info:") versions = ie.get_versions(args.device) print("{}{}".format(" " * 8, args.device)) print("{}MKLDNNPlugin version ......... {}.{}".format( " " * 8, versions[args.device].major, versions[args.device].minor)) print("{}Build ........... {}".format(" " * 8, versions[args.device].build_number)) if args.cpu_extension and "CPU" in args.device: ie.add_extension(args.cpu_extension, "CPU") log.info("CPU extension loaded: {}".format(args.cpu_extension)) if "CPU" in args.device: supported_layers = ie.query_network(net, "CPU") not_supported_layers = [ l for l in net.layers.keys() if l not in supported_layers ] if len(not_supported_layers) != 0: log.error( "Following layers are not supported by the plugin for specified device {}:\n {}" .format(args.device, ', '.join(not_supported_layers))) log.error( "Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) # ----------------------------------------------------------------------------------------------------- # --------------------------- 4. Configure input & output --------------------------------------------- # --------------------------- Prepare input blobs ----------------------------------------------------- log.info("Preparing input blobs") assert (len(net.inputs.keys()) == 1 ), "Sample supports topologies only with 1 input" input_name = next(iter(net.inputs.keys())) input_info = net.inputs[input_name] input_info.precision = 'FP32' log.info('input shape: {}'.format(input_info.shape)) # --------------------------- Prepare output blobs ---------------------------------------------------- log.info('Preparing output blobs') assert (len(net.outputs.keys()) == 2 ), "Sample supports topologies only with 2 output" loc_out_name = "797" class_out_name = "741" assert (loc_out_name in net.outputs.keys()) and (class_out_name in net.outputs.keys()) loc_out_info = net.outputs[loc_out_name] class_out_info = net.outputs[class_out_name] loc_out_info.precision = "FP32" class_out_info.precision = "FP32" # ----------------------------------------------------------------------------------------------------- # ----------------------------------------------------------------------------------------------------- log.info("Loading model to the device") # cpu_throughput = {'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO'} ie.set_config({'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO'}, args.device) ie.set_config({'CPU_BIND_THREAD': 'YES'}, args.device) exec_net = ie.load_network(network=net, device_name=args.device, num_requests=0) infer_requests = exec_net.requests request_queue = InferRequestsQueue(infer_requests) log.info('nreqs: {}, nstream:{}'.format( len(infer_requests), ie.get_config(args.device, 'CPU_THROUGHPUT_STREAMS'))) # --------------------------- 3. Read and preprocess input -------------------------------------------- # ----------------------------------------------------------------------------------------------------- if not os.path.exists(args.result_dir): os.makedirs(args.result_dir) if args.voc_res_file and os.path.exists(args.voc_res_file): os.remove(args.voc_res_file) load_data_timer = Timer(name='load_data') post_process_timer = Timer(name='post_process') adapter = RetinaNetAdapter(input_shape=args.patch_size) # --------------------------- Performing inference ---------------------------------------------------- result_all_images = defaultdict(list) data_loader = DataLoader(args.image_dir, args.strides, args.patch_size) while True: load_data_timer.tic() input_data = data_loader.next() load_data_timer.toc() if input_data == None: break infer_request = request_queue.get_idle_request() if not infer_request: raise Exception('No idle Infer Requests!') if infer_request.cur_meta == None: infer_request.start_async(input_name, input_data) continue # get result post_process_timer.tic() image_name = infer_request.cur_meta['image_name'] x = infer_request.cur_meta['x'] y = infer_request.cur_meta['y'] loc_out = infer_request.request.outputs[loc_out_name][0] class_out = infer_request.request.outputs[class_out_name][0] ## start infer infer_request.start_async(input_name, input_data) ## post-process result = adapter.process(loc_out, class_out) result, _ = nms(result, thresh=0.5, keep_top_k=100) result[:, 0] += x result[:, 1] += y result[:, 2] += x result[:, 3] += y result_all_images[image_name].append(result) post_process_timer.toc() # wait the latest inference executions request_queue.wait_all() post_process_timer.tic() for infer_request in request_queue.requests: # get result image_name = infer_request.cur_meta['image_name'] x = infer_request.cur_meta['x'] y = infer_request.cur_meta['y'] loc_out = infer_request.request.outputs[loc_out_name][0] class_out = infer_request.request.outputs[class_out_name][0] ## post-process result = adapter.process(loc_out, class_out) result, _ = nms(result, thresh=0.5, keep_top_k=100) result[:, 0] += x result[:, 1] += y result[:, 2] += x result[:, 3] += y result_all_images[image_name].append(result) post_process_timer.toc() post_process_timer.tic() ## process total image result for image_name, result_per_image in result_all_images.items(): result_per_image = np.concatenate(result_per_image, axis=0) nms_result, _ = nms(result_per_image, thresh=0.5) voc_format = '{} {:.4f} {} {} {} {}' pos_all = [] voc_all = [] for i in range(nms_result.shape[0]): x = int(nms_result[i, 0]) y = int(nms_result[i, 1]) w = max(int(nms_result[i, 2] - nms_result[i, 0]), 1) h = max(int(nms_result[i, 3] - nms_result[i, 1]), 1) p = float(nms_result[i, 4]) pos = {'x': x, 'y': y, 'w': w, 'h': h, 'p': p} pos_all.append(pos) if args.voc_res_file: xmin = x ymin = y xmax = int(nms_result[i, 2]) ymax = int(nms_result[i, 3]) voc_str = voc_format.format( os.path.splitext(image_name)[0], p, xmin, ymin, xmax, ymax) voc_all.append(voc_str) file_name = os.path.splitext(image_name)[0] + '.json' with open(os.path.join(args.result_dir, file_name), 'w') as f: json.dump(pos_all, f) if args.voc_res_file: with open(args.voc_res_file, 'a') as f: for voc_str in voc_all: f.write(voc_str + '\n') post_process_timer.toc() total_timer.toc() # ----------------------------------------------------------------------------------------------------- all_timers = [] # all_timers.extend([create_anchor_timer, # read_img_timer, # preprocess_timer, # infer_timer, # adapter_timer, # patch_img_nms_timer, # whole_img_nms_timer, # add_offset_timer, # write_result_timer, # total_timer]) all_timers.extend([load_data_timer, post_process_timer, total_timer]) for timer in all_timers: log.info('{}: avg: {:.2f} ms, total: {:.2f}s'.format( timer.name, timer.avg * 1000, timer.total)) log.info('infer: {:2f}s'.format(request_queue.get_duration_in_seconds())) log.info("Execution successful\n")
class Classification: def __init__(self, device: str, number_infer_requests, number_iterations, duration_seconds, api_type): self.device = device self.ie = IECore() self.nireq = number_infer_requests self.niter = number_iterations self.duration_seconds = get_duration_seconds(duration_seconds, self.niter, self.device) self.api_type = api_type self.device_number_streams = {} def __del__(self): del self.ie def add_extension(self, path_to_extension: str=None, path_to_cldnn_config: str=None): if GPU_DEVICE_NAME in self.device: if path_to_cldnn_config: self.ie.set_config({'CONFIG_FILE': path_to_cldnn_config}, GPU_DEVICE_NAME) logger.info('GPU extensions is loaded {}'.format(path_to_cldnn_config)) if CPU_DEVICE_NAME in self.device or MYRIAD_DEVICE_NAME in self.device: if path_to_extension: self.ie.add_extension(extension_path=path_to_extension, device_name=CPU_DEVICE_NAME) logger.info('CPU extensions is loaded {}'.format(path_to_extension)) def get_version_info(self) -> str: logger.info('InferenceEngine:\n{: <9}{:.<24} {}'.format('', 'API version', get_version())) version_string = 'Device info\n' for device, version in self.ie.get_versions(self.device).items(): version_string += '{: <9}{}\n'.format('', device) version_string += '{: <9}{:.<24}{} {}.{}\n'.format('', version.description, ' version', version.major, version.minor) version_string += '{: <9}{:.<24} {}\n'.format('', 'Build', version.build_number) return version_string @staticmethod def reshape(ie_network: IENetwork, batch_size: int): new_shapes = {} for input_layer_name, input_layer in ie_network.inputs.items(): new_shapes[input_layer_name] = get_blob_shape(input_layer, batch_size) if new_shapes: logger.info('Resizing network to batch = {}'.format(batch_size)) ie_network.reshape(new_shapes) def set_config(self, number_streams: int, api_type: str = 'async', number_threads: int = None, infer_threads_pinning: int = None): devices = parse_devices(self.device) self.device_number_streams = parse_nstreams_value_per_device(devices, number_streams) for device_name in self.device_number_streams.keys(): key = device_name + "_THROUGHPUT_STREAMS" supported_config_keys = self.ie.get_metric(device_name, 'SUPPORTED_CONFIG_KEYS') if key not in supported_config_keys: raise Exception("Device " + device_name + " doesn't support config key '" + key + "'! " + "Please specify -nstreams for correct devices in format <dev1>:<nstreams1>,<dev2>:<nstreams2>"); for device in devices: if device == CPU_DEVICE_NAME: # CPU supports few special performance-oriented keys # limit threading for CPU portion of inference if number_threads: self.ie.set_config({'CPU_THREADS_NUM': str(number_threads)}, device) if MULTI_DEVICE_NAME in self.device and GPU_DEVICE_NAME in self.device: self.ie.set_config({'CPU_BIND_THREAD': 'NO'}, CPU_DEVICE_NAME) else: # pin threads for CPU portion of inference self.ie.set_config({'CPU_BIND_THREAD': infer_threads_pinning}, device) # for CPU execution, more throughput-oriented execution via streams # for pure CPU execution, more throughput-oriented execution via streams if api_type == 'async': cpu_throughput = {'CPU_THROUGHPUT_STREAMS': 'CPU_THROUGHPUT_AUTO'} if device in self.device_number_streams.keys(): cpu_throughput['CPU_THROUGHPUT_STREAMS'] = str(self.device_number_streams.get(device)) self.ie.set_config(cpu_throughput, device) self.device_number_streams[device] = self.ie.get_config(device, 'CPU_THROUGHPUT_STREAMS') elif device == GPU_DEVICE_NAME: if api_type == 'async': gpu_throughput = {'GPU_THROUGHPUT_STREAMS': 'GPU_THROUGHPUT_AUTO'} if device in self.device_number_streams.keys(): gpu_throughput['GPU_THROUGHPUT_STREAMS'] = str(self.device_number_streams.get(device)) self.ie.set_config(gpu_throughput, device) self.device_number_streams[device] = self.ie.get_config(device, 'GPU_THROUGHPUT_STREAMS') if MULTI_DEVICE_NAME in self.device and CPU_DEVICE_NAME in self.device: # multi-device execution with the CPU+GPU performs best with GPU trottling hint, # which releases another CPU thread (that is otherwise used by the GPU driver for active polling) self.ie.set_config({'CLDNN_PLUGIN_THROTTLE': '1'}, device) elif device == MYRIAD_DEVICE_NAME: self.ie.set_config({'LOG_LEVEL': 'LOG_INFO'}, MYRIAD_DEVICE_NAME) def read_network(self, path_to_model: str): xml_filename = os.path.abspath(path_to_model) head, tail = os.path.splitext(xml_filename) bin_filename = os.path.abspath(head + BIN_EXTENSION) ie_network = self.ie.read_network(xml_filename, bin_filename) input_info = ie_network.inputs if not input_info: raise AttributeError('No inputs info is provided') return ie_network def load_network(self, ie_network: IENetwork, perf_counts: bool): config = {'PERF_COUNT': ('YES' if perf_counts else 'NO')} exe_network = self.ie.load_network(ie_network, self.device, config=config, num_requests=1 if self.api_type == 'sync' else self.nireq or 0) # Number of requests self.nireq = len(exe_network.requests) return exe_network def infer(self, exe_network, batch_size, progress_bar=None): progress_count = 0 infer_requests = exe_network.requests # warming up - out of scope if self.api_type == 'sync': infer_requests[0].infer() else: infer_requests[0].async_infer() status = exe_network.wait() if status != StatusCode.OK: raise Exception("Wait for all requests is failed with status code {}!".format(status)) out_blob = next(iter(exe_network.outputs)) start_time = datetime.utcnow() exec_time = 0 iteration = 0 times = [] in_fly = set() # Start inference & calculate performance # to align number if iterations to guarantee that last infer requests are executed in the same conditions **/ #(self.duration_seconds and exec_time < self.duration_seconds) or \ while (self.niter and iteration < self.niter) or \ (len(in_fly) == iteration*self.nireq) or \ (self.api_type == 'async' and iteration % self.nireq): if self.api_type == 'sync': infer_requests[0].infer() times.append(infer_requests[0].latency) else: infer_request_id = exe_network.get_idle_request_id() if infer_request_id < 0: status = exe_network.wait(num_requests=1) if status != StatusCode.OK: raise Exception("Wait for idle request failed!") infer_request_id = exe_network.get_idle_request_id() if infer_request_id < 0: raise Exception("Invalid request id!") if infer_request_id in in_fly: times.append(infer_requests[infer_request_id].latency) else: in_fly.add(infer_request_id) infer_requests[infer_request_id].async_infer() #times.append(infer_requests[infer_request_id].latency) iteration += 1 exec_time = (datetime.utcnow() - start_time).total_seconds() if progress_bar: if self.duration_seconds: # calculate how many progress intervals are covered by current iteration. # depends on the current iteration time and time of each progress interval. # Previously covered progress intervals must be skipped. progress_interval_time = self.duration_seconds / progress_bar.total_num new_progress = int(exec_time / progress_interval_time - progress_count) progress_bar.add_progress(new_progress) progress_count += new_progress elif self.niter: progress_bar.add_progress(1) # wait the latest inference executions inference_output = [] status = exe_network.wait() for infer_request in infer_requests: output = infer_request.outputs[out_blob] inference_output.append(output.tolist()) if status != StatusCode.OK: raise Exception("Wait for all requests is failed with status code {}!".format(status)) total_duration_sec = (datetime.utcnow() - start_time).total_seconds() for infer_request_id in in_fly: times.append(infer_requests[infer_request_id].latency) times.sort() latency_ms = median(times) fps = batch_size * 1000 / latency_ms if self.api_type == 'sync' else batch_size * iteration / total_duration_sec if progress_bar: progress_bar.finish() return inference_output,fps, latency_ms, total_duration_sec, iteration
def main(): log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() model_xml = args.model model_bin = os.path.splitext(model_xml)[0] + ".bin" # Plugin initialization for specified device and load extensions library if specified log.info("Initializing plugin for {} device...".format(args.device)) device_nstreams = parseValuePerDevice(args.device, args.number_streams) ie = IECore() if args.cpu_extension and 'CPU' in args.device: ie.add_extension(args.cpu_extension, "CPU") # Read IR log.info("Reading IR...") net = IENetwork(model=model_xml, weights=model_bin) if "CPU" in args.device: ie.set_config( { 'CPU_THROUGHPUT_STREAMS': str(device_nstreams.get(args.device)) if args.device in device_nstreams.keys() else 'CPU_THROUGHPUT_AUTO' }, args.device) device_nstreams[args.device] = int( ie.get_config(args.device, 'CPU_THROUGHPUT_STREAMS')) ie.add_extension(args.cpu_extension, "CPU") supported_layers = ie.query_network(net, "CPU") not_supported_layers = [ l for l in net.layers.keys() if l not in supported_layers ] if len(not_supported_layers) != 0: log.error( "Following layers are not supported by the plugin for specified device {}:\n {}" .format(args.device, ', '.join(not_supported_layers))) log.error( "Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) elif "MYRIAD" in args.device: ie.set_config({ 'LOG_LEVEL': 'LOG_INFO', 'VPU_LOG_LEVEL': 'LOG_WARNING' }, MYRIAD_DEVICE_NAME) input_blob = next(iter(net.inputs)) netoutput = iter(net.outputs) out_blob1 = next(netoutput) print("output1:", out_blob1) print("shape:", net.outputs[out_blob1].shape) out_blob2 = next(netoutput) print("output2:", out_blob2) print("shape:", net.outputs[out_blob2].shape) out_blob3 = next(netoutput) print("output3:", out_blob3) print("shape:", net.outputs[out_blob3].shape) log.info("Loading IR to the plugin...") config = {'PERF_COUNT': ('YES' if args.perf_counts else 'NO')} exec_net = ie.load_network(network=net, device_name=args.device) # Read and pre-process input image n, c, h, w = net.inputs[input_blob].shape image = Image.open(args.input[0]) ori_width, ori_height = image.size print("image ori shape:{},{}".format(ori_width, ori_height)) boxed_image = letterbox_image(image, tuple(reversed((416, 416)))) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. image_data = np.expand_dims(image_data, 0) image_data = image_data.transpose((0, 3, 1, 2)) print("image shape:{}".format(image_data.shape)) if args.labels: with open(args.labels, 'r') as f: labels_map = [x.strip() for x in f] else: labels_map = None inf_start = time.time() output = exec_net.infer({input_blob: image_data}) ## wait the latest inference executions inf_end = time.time() det_time = inf_end - inf_start print("[Performance] inference time:{} ms".format(det_time * 1000)) #post-processing part objects = list() for layer_name, out_blob in output.items(): out_blob = out_blob.reshape( net.layers[net.layers[layer_name].parents[0]].shape) layer_params = YoloParams(net.layers[layer_name].params, out_blob.shape[2]) log.info("Layer {} parameters: ".format(layer_name)) layer_params.log_params() objects += parse_yolo_region(out_blob, image_data.shape[2:], (416, 416), layer_params, args.prob_threshold) objects = sorted(objects, key=lambda obj: obj['confidence'], reverse=True) for i in range(len(objects)): if objects[i]['confidence'] == 0: continue for j in range(i + 1, len(objects)): if intersection_over_union(objects[i], objects[j]) > args.iou_threshold: objects[j]['confidence'] = 0 # Drawing objects with respect to the --prob_threshold CLI parameter objects = [ obj for obj in objects if obj['confidence'] >= args.prob_threshold ] log.info("\nDetected boxes for batch {}:".format(1)) log.info(" Class ID | Confidence | XMIN | YMIN | XMAX | YMAX | COLOR ") origin_im_size = (416, 416) for obj in objects: draw = ImageDraw.Draw(boxed_image) # Validation bbox of detected object if obj['xmax'] > origin_im_size[1] or obj['ymax'] > origin_im_size[ 0] or obj['xmin'] < 0 or obj['ymin'] < 0: continue color = (int(min(obj['class_id'] * 12.5, 255)), min(obj['class_id'] * 7, 255), min(obj['class_id'] * 5, 255)) det_label = labels_map[obj['class_id']] if labels_map and len(labels_map) >= obj['class_id'] else \ str(obj['class_id']) draw.rectangle([obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']], outline=color) del draw log.info("{:^9} | {:10f} | {:4} | {:4} | {:4} | {:4} | {} ".format( det_label, obj['confidence'], obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], color)) boxed_image.show() if args.perf_counts: for ni in range(int(args.number_infer_requests)): perf_counts = exe_network.requests[ni].get_perf_counts() logger.info( "Pefrormance counts for {}-th infer request".format(ni)) for layer, stats in perf_counts.items(): max_layer_name = 30 print("{:<30}{:<15}{:<30}{:<20}{:<20}{:<20}".format( layer[:max_layer_name - 4] + '...' if (len(layer) >= max_layer_name) else layer, stats['status'], 'layerType: ' + str(stats['layer_type']), 'realTime: ' + str(stats['real_time']), 'cpu: ' + str(stats['cpu_time']), 'execType: ' + str(stats['exec_type'])))