def main(): log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() # Plugin initialization for specified device and load extensions library if specified. log.info('Creating Inference Engine...') ie = IECore() if args.cpu_extension and 'CPU' in args.device: ie.add_extension(args.cpu_extension, 'CPU') # Read IR log.info('Loading Mask-RCNN network') mask_rcnn_net = ie.read_network( args.mask_rcnn_model, os.path.splitext(args.mask_rcnn_model)[0] + '.bin') log.info('Loading encoder part of text recognition network') text_enc_net = ie.read_network( args.text_enc_model, os.path.splitext(args.text_enc_model)[0] + '.bin') log.info('Loading decoder part of text recognition network') text_dec_net = ie.read_network( args.text_dec_model, os.path.splitext(args.text_dec_model)[0] + '.bin') if 'CPU' in args.device: supported_layers = ie.query_network(mask_rcnn_net, 'CPU') not_supported_layers = [ l for l in mask_rcnn_net.layers.keys() if l not in supported_layers ] if len(not_supported_layers) != 0: log.error( 'Following layers are not supported by the plugin for specified device {}:\n {}' .format(args.device, ', '.join(not_supported_layers))) log.error( "Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) required_input_keys = {'im_data', 'im_info'} assert required_input_keys == set(mask_rcnn_net.input_info), \ 'Demo supports only topologies with the following input keys: {}'.format(', '.join(required_input_keys)) required_output_keys = { 'boxes', 'scores', 'classes', 'raw_masks', 'text_features' } assert required_output_keys.issubset(mask_rcnn_net.outputs.keys()), \ 'Demo supports only topologies with the following output keys: {}'.format(', '.join(required_output_keys)) n, c, h, w = mask_rcnn_net.input_info['im_data'].input_data.shape assert n == 1, 'Only batch 1 is supported by the demo application' log.info('Loading IR to the plugin...') mask_rcnn_exec_net = ie.load_network(network=mask_rcnn_net, device_name=args.device, num_requests=2) text_enc_exec_net = ie.load_network(network=text_enc_net, device_name=args.device) text_dec_exec_net = ie.load_network(network=text_dec_net, device_name=args.device) hidden_shape = text_dec_net.input_info[ args.trd_input_prev_hidden].input_data.shape del mask_rcnn_net del text_enc_net del text_dec_net try: input_source = int(args.input_source) cap = cv2.VideoCapture(input_source) except ValueError: input_source = args.input_source if os.path.isdir(input_source): cap = FolderCapture(input_source) else: cap = cv2.VideoCapture(input_source) if not cap.isOpened(): log.error('Failed to open "{}"'.format(args.input_source)) if isinstance(cap, cv2.VideoCapture): cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) if args.no_track: tracker = None else: tracker = StaticIOUTracker() visualizer = Visualizer(['__background__', 'text'], show_boxes=args.show_boxes, show_scores=args.show_scores) render_time = 0 presenter = monitors.Presenter( args.utilization_monitors, 45, (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH) / 4), round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) / 8))) log.info('Starting inference...') print( "To close the application, press 'CTRL+C' here or switch to the output window and press ESC key" ) while cap.isOpened(): ret, frame = cap.read() if not ret: break if not args.keep_aspect_ratio: # Resize the image to a target size. scale_x = w / frame.shape[1] scale_y = h / frame.shape[0] input_image = cv2.resize(frame, (w, h)) else: # Resize the image to keep the same aspect ratio and to fit it to a window of a target size. scale_x = scale_y = min(h / frame.shape[0], w / frame.shape[1]) input_image = cv2.resize(frame, None, fx=scale_x, fy=scale_y) input_image_size = input_image.shape[:2] input_image = np.pad(input_image, ((0, h - input_image_size[0]), (0, w - input_image_size[1]), (0, 0)), mode='constant', constant_values=0) # Change data layout from HWC to CHW. input_image = input_image.transpose((2, 0, 1)) input_image = input_image.reshape((n, c, h, w)).astype(np.float32) input_image_info = np.asarray( [[input_image_size[0], input_image_size[1], 1]], dtype=np.float32) # Run the net. inf_start = time.time() outputs = mask_rcnn_exec_net.infer({ 'im_data': input_image, 'im_info': input_image_info }) # Parse detection results of the current request boxes = outputs['boxes'] scores = outputs['scores'] classes = outputs['classes'].astype(np.uint32) raw_masks = outputs['raw_masks'] text_features = outputs['text_features'] # Filter out detections with low confidence. detections_filter = scores > args.prob_threshold scores = scores[detections_filter] classes = classes[detections_filter] boxes = boxes[detections_filter] raw_masks = raw_masks[detections_filter] text_features = text_features[detections_filter] boxes[:, 0::2] /= scale_x boxes[:, 1::2] /= scale_y masks = [] for box, cls, raw_mask in zip(boxes, classes, raw_masks): raw_cls_mask = raw_mask[cls, ...] mask = segm_postprocess(box, raw_cls_mask, frame.shape[0], frame.shape[1]) masks.append(mask) texts = [] for feature in text_features: feature = text_enc_exec_net.infer({'input': feature})['output'] feature = np.reshape(feature, (feature.shape[0], feature.shape[1], -1)) feature = np.transpose(feature, (0, 2, 1)) hidden = np.zeros(hidden_shape) prev_symbol_index = np.ones((1, )) * SOS_INDEX text = '' for i in range(MAX_SEQ_LEN): decoder_output = text_dec_exec_net.infer({ args.trd_input_prev_symbol: prev_symbol_index, args.trd_input_prev_hidden: hidden, args.trd_input_encoder_outputs: feature }) symbols_distr = decoder_output[args.trd_output_symbols_distr] prev_symbol_index = int(np.argmax(symbols_distr, axis=1)) if prev_symbol_index == EOS_INDEX: break text += args.alphabet[prev_symbol_index] hidden = decoder_output[args.trd_output_cur_hidden] texts.append(text) inf_end = time.time() inf_time = inf_end - inf_start render_start = time.time() if len(boxes) and args.raw_output_message: log.info('Detected boxes:') log.info( ' Class ID | Confidence | XMIN | YMIN | XMAX | YMAX ' ) for box, cls, score, mask in zip(boxes, classes, scores, masks): log.info( '{:>10} | {:>10f} | {:>8.2f} | {:>8.2f} | {:>8.2f} | {:>8.2f} ' .format(cls, score, *box)) # Get instance track IDs. masks_tracks_ids = None if tracker is not None: masks_tracks_ids = tracker(masks, classes) presenter.drawGraphs(frame) # Visualize masks. frame = visualizer(frame, boxes, classes, scores, masks, texts, masks_tracks_ids) # Draw performance stats. inf_time_message = 'Inference and post-processing time: {:.3f} ms'.format( inf_time * 1000) render_time_message = 'OpenCV rendering time: {:.3f} ms'.format( render_time * 1000) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) cv2.putText(frame, render_time_message, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1) # Print performance counters. if args.perf_counts: perf_counts = mask_rcnn_exec_net.requests[0].get_perf_counts() log.info('Performance counters:') print('{:<70} {:<15} {:<15} {:<15} {:<10}'.format( 'name', 'layer_type', 'exet_type', 'status', 'real_time, us')) for layer, stats in perf_counts.items(): print('{:<70} {:<15} {:<15} {:<15} {:<10}'.format( layer, stats['layer_type'], stats['exec_type'], stats['status'], stats['real_time'])) if not args.no_show: # Show resulting image. cv2.imshow('Results', frame) render_end = time.time() render_time = render_end - render_start if not args.no_show: key = cv2.waitKey(args.delay) esc_code = 27 if key == esc_code: break presenter.handleKey(key) print(presenter.reportMeans()) cv2.destroyAllWindows() cap.release()
def main(): args = build_argparser().parse_args() # ------------- 1. Plugin initialization for specified device and load extensions library if specified ------------- log.info("Creating Inference Engine...") ie = IECore() config_user_specified = {} config_min_latency = {} devices_nstreams = {} if args.num_streams: devices_nstreams = {device: args.num_streams for device in ['CPU', 'GPU'] if device in args.device} \ if args.num_streams.isdigit() \ else dict([device.split(':') for device in args.num_streams.split(',')]) if 'CPU' in args.device: if args.cpu_extension: ie.add_extension(args.cpu_extension, 'CPU') if args.number_threads is not None: config_user_specified['CPU_THREADS_NUM'] = str(args.number_threads) if 'CPU' in devices_nstreams: config_user_specified['CPU_THROUGHPUT_STREAMS'] = devices_nstreams['CPU'] \ if int(devices_nstreams['CPU']) > 0 \ else 'CPU_THROUGHPUT_AUTO' config_min_latency['CPU_THROUGHPUT_STREAMS'] = '1' if 'GPU' in args.device: if 'GPU' in devices_nstreams: config_user_specified['GPU_THROUGHPUT_STREAMS'] = devices_nstreams['GPU'] \ if int(devices_nstreams['GPU']) > 0 \ else 'GPU_THROUGHPUT_AUTO' config_min_latency['GPU_THROUGHPUT_STREAMS'] = '1' # -------------------- 2. Reading the IR generated by the Model Optimizer (.xml and .bin files) -------------------- log.info("Loading network") net = ie.read_network(args.model, os.path.splitext(args.model)[0] + ".bin") assert len( net.input_info ) == 1, "Sample supports only YOLO V3 based single input topologies" # ---------------------------------------------- 3. Preparing inputs ----------------------------------------------- log.info("Preparing inputs") input_blob = next(iter(net.input_info)) # Read and pre-process input images if net.input_info[input_blob].input_data.shape[1] == 3: input_height, input_width = net.input_info[ input_blob].input_data.shape[2:] nchw_shape = True else: input_height, input_width = net.input_info[ input_blob].input_data.shape[1:3] nchw_shape = False if args.labels: with open(args.labels, 'r') as f: labels_map = [x.strip() for x in f] else: labels_map = None input_stream = 0 if args.input == "cam" else args.input mode = Mode(Modes.USER_SPECIFIED) cap = cv2.VideoCapture(input_stream) wait_key_time = 1 # ----------------------------------------- 4. Loading model to the plugin ----------------------------------------- log.info("Loading model to the plugin") exec_nets = {} exec_nets[Modes.USER_SPECIFIED] = ie.load_network( network=net, device_name=args.device, config=config_user_specified, num_requests=args.num_infer_requests) exec_nets[Modes.MIN_LATENCY] = ie.load_network( network=net, device_name=args.device.split(":")[-1].split(",")[0], config=config_min_latency, num_requests=1) print(args.device.split(":")[-1].split(",")[0]) empty_requests = deque(exec_nets[mode.current].requests) completed_request_results = {} next_frame_id = 0 next_frame_id_to_show = 0 mode_metrics = {mode.current: PerformanceMetrics()} prev_mode_active_request_count = 0 event = threading.Event() callback_exceptions = [] # ----------------------------------------------- 5. Doing inference ----------------------------------------------- log.info("Starting inference...") print( "To close the application, press 'CTRL+C' here or switch to the output window and press ESC key" ) print( "To switch between min_latency/user_specified modes, press TAB key in the output window" ) presenter = monitors.Presenter( args.utilization_monitors, 55, (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH) / 4), round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) / 8))) while (cap.isOpened() \ or completed_request_results \ or len(empty_requests) < len(exec_nets[mode.current].requests)) \ and not callback_exceptions: if next_frame_id_to_show in completed_request_results: frame, output, start_time, is_same_mode = completed_request_results.pop( next_frame_id_to_show) next_frame_id_to_show += 1 objects = get_objects(output, net, (input_height, input_width), frame.shape[:-1], args.prob_threshold, args.keep_aspect_ratio) objects = filter_objects(objects, args.iou_threshold, args.prob_threshold) if len(objects) and args.raw_output_message: log.info( " Class ID | Confidence | XMIN | YMIN | XMAX | YMAX | COLOR " ) origin_im_size = frame.shape[:-1] presenter.drawGraphs(frame) for obj in objects: # Validation bbox of detected object obj['xmax'] = min(obj['xmax'], origin_im_size[1]) obj['ymax'] = min(obj['ymax'], origin_im_size[0]) obj['xmin'] = max(obj['xmin'], 0) obj['ymin'] = max(obj['ymin'], 0) color = (min(obj['class_id'] * 12.5, 255), min(obj['class_id'] * 7, 255), min(obj['class_id'] * 5, 255)) det_label = labels_map[obj['class_id']] if labels_map and len(labels_map) >= obj['class_id'] else \ str(obj['class_id']) if args.raw_output_message: log.info( "{:^9} | {:10f} | {:4} | {:4} | {:4} | {:4} | {} ". format(det_label, obj['confidence'], obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], color)) cv2.rectangle(frame, (obj['xmin'], obj['ymin']), (obj['xmax'], obj['ymax']), color, 2) cv2.putText( frame, "#" + det_label + ' ' + str(round(obj['confidence'] * 100, 1)) + ' %', (obj['xmin'], obj['ymin'] - 7), cv2.FONT_HERSHEY_COMPLEX, 0.6, color, 1) helpers.put_highlighted_text(frame, "{} mode".format(mode.current.name), (10, int(origin_im_size[0] - 20)), cv2.FONT_HERSHEY_COMPLEX, 0.75, (10, 10, 200), 2) if is_same_mode and prev_mode_active_request_count == 0: mode_metrics[mode.current].update(start_time, frame) else: mode_metrics[mode.get_other()].update(start_time, frame) prev_mode_active_request_count -= 1 helpers.put_highlighted_text( frame, "Switching modes, please wait...", (10, int(origin_im_size[0] - 50)), cv2.FONT_HERSHEY_COMPLEX, 0.75, (10, 200, 10), 2) if not args.no_show: cv2.imshow("Detection Results", frame) key = cv2.waitKey(wait_key_time) if key in {ord("q"), ord("Q"), 27}: # ESC key break if key == 9: # Tab key if prev_mode_active_request_count == 0: prev_mode = mode.current mode.switch() prev_mode_active_request_count = len( exec_nets[prev_mode].requests) - len( empty_requests) empty_requests.clear() empty_requests.extend(exec_nets[mode.current].requests) mode_metrics[mode.current] = PerformanceMetrics() else: presenter.handleKey(key) elif empty_requests and prev_mode_active_request_count == 0 and cap.isOpened( ): start_time = perf_counter() ret, frame = cap.read() if not ret: if args.loop: cap.open(input_stream) else: cap.release() continue request = empty_requests.popleft() # resize input_frame to network size in_frame = preprocess_frame(frame, input_height, input_width, nchw_shape, args.keep_aspect_ratio) # Start inference request.set_completion_callback( py_callback=async_callback, py_data=(request, next_frame_id, mode.current, frame, start_time, completed_request_results, empty_requests, mode, event, callback_exceptions)) request.async_infer(inputs={input_blob: in_frame}) next_frame_id += 1 else: event.wait() event.clear() if callback_exceptions: raise callback_exceptions[0] for mode, metrics in mode_metrics.items(): print("\nMode: {}".format(mode.name)) metrics.print_total() print(presenter.reportMeans()) for exec_net in exec_nets.values(): await_requests_completion(exec_net.requests)
net.reshape({input_blob:(batch_size,n_channels,height,width,depth)}) batch_size, n_channels, height, width, depth = net.inputs[input_blob].shape batch_size, n_out_channels, height_out, width_out, depth_out = net.outputs[out_blob].shape print("The network inputs are:") for idx, input_layer in enumerate(net.inputs.keys()): print("{}: {}, shape = {} [N,C,H,W,D]".format(idx,input_layer,net.inputs[input_layer].shape)) print("The network outputs are:") for idx, output_layer in enumerate(net.outputs.keys()): print("{}: {}, shape = {} [N,C,H,W,D]".format(idx,output_layer,net.outputs[output_layer].shape)) # Loading model to the plugin print("Loading model to the plugin") exec_net = ie.load_network(network=net, device_name="CPU") del net """ OpenVINO inference code input_blob is the name (string) of the input tensor in the graph out_blob is the name (string) of the output tensor in the graph Essentially, this looks exactly like a feed_dict for TensorFlow inference """ # Go through the sample validation dataset to plot predictions predictions_ov = np.zeros((num_imgs, n_out_channels, depth_out, height_out, width_out)) print("Starting OpenVINO inference") results = {} ov_times = []
class Model_FaceDetection: def __init__(self, model_name, device='CPU', extensions=None): ''' TODO: Use this to set your instance variables. ''' self.model_name = model_name self.device = device self.extensions = extensions self.model_weights = self.model_name.split('.')[0]+'.bin' self.input_name = None self.input_shape = None self.output_names = None self.output_shape = None self.plugin = None self.network = None self.exec_net = None def load_model(self): ''' TODO: You will need to complete this method. This method is for loading the model to the device specified by the user. If your model requires any Plugins, this is where you can load them. ''' self.plugin = IECore() self.network = self.plugin.read_network(model=self.model_name, weights=self.model_weights) self.supported_layers = self.plugin.query_network(network=self.network, device_name=self.device) self.unsupported_layers = [layer for layer in self.network.layers.keys() if layer not in self.supported_layers] if(not self.check_model()): exit(1) self.exec_net = self.plugin.load_network(network=self.network, device_name=self.device,num_requests=1) self.input_name = next(iter(self.network.inputs)) self.input_shape = self.network.inputs[self.input_name].shape self.output_names = next(iter(self.network.outputs)) self.output_shape = self.network.outputs[self.output_names].shape def predict(self, image, prob_threshold): ''' TODO: You will need to complete this method. This method is meant for running predictions on the input image. ''' img_processed = self.preprocess_input(image.copy()) faces = self.exec_net.infer({self.input_name:img_processed}) coords = self.preprocess_output(faces, prob_threshold) if (len(coords)==0): return 0, 0 # finding first face coords = coords[0] h=image.shape[0] w=image.shape[1] coords = coords* np.array([w, h, w, h]) coords = coords.astype(np.int32) face = image[coords[1]:coords[3], coords[0]:coords[2]] return face, coords def check_model(self): # check for unsupported layers if len(self.unsupported_layers)!=0 and self.device=='CPU': print("unsupported layers :{}".format(self.unsupported_layers)) if not self.extensions==None: self.plugin.add_extension(self.extensions, self.device) self.supported_layers = self.plugin.query_network(network = self.network, device_name=self.device) self.unsupported_layers = [lalyer for lalyer in self.network.layers.keys() if lalyer not in self.supported_layers] if len(self.unsupported_layers)!=0: print("unsupported layers found") return False else: print("cpu extension path not found") return False return True def preprocess_input(self, image): ''' Before feeding the data into the model for inference, you might have to preprocess it. This function is where you can do that. ''' resizedImage = cv2.resize(image, (self.input_shape[3], self.input_shape[2])) return np.transpose(np.expand_dims(resizedImage,axis=0), (0,3,1,2)) def preprocess_output(self, outputs, prob_threshold): ''' Before feeding the output of this model to the next model, you might have to preprocess the output. This function is where you can do that. ''' modelOutputs = outputs[self.output_names][0][0] coordList =[] for output in modelOutputs: conf = output[2] if conf>prob_threshold: xMin=output[3] yMin=output[4] xMax=output[5] yMax=output[6] coordList.append([xMin,yMin,xMax,yMax]) return coordList
class face_detection: ''' Class for the Face Detection Model. ''' def __init__(self, model_name, device='CPU', extensions=None): ''' Set your instance variables. ''' self.model_name = model_name self.device = device self.extensions = extensions self.plugin = None self.network = None self.input_blob = None self.output_blob = None self.exec_network = None self.infer_request = None def load_model(self): ''' This method is for loading the model to the device specified by the user. If your model requires any Plugins, this is where you can load them. ''' self.plugin = IECore() model_xml = self.model_name model_bin = os.path.splitext(model_xml)[0] + ".bin" self.network = IENetwork(model=model_xml, weights=model_bin) self.check_model() self.exec_network = self.plugin.load_network(self.network, self.device) self.input_blob = next(iter(self.network.inputs)) self.output_blob = next(iter(self.network.outputs)) return def predict(self, image, request_id): ''' This method is meant for running predictions on the input image. ''' self.exec_network.start_async(request_id=request_id, inputs={self.input_blob: image}) return def check_model(self): if self.extensions and "CPU" in self.device: self.plugin.add_extension(self.extensions, self.device) supported_layers = self.plugin.query_network(network=self.network, device_name="CPU") unsupported_layers = [ l for l in self.network.layers.keys() if l not in supported_layers ] if len(unsupported_layers) != 0: print("Unsupported layers found: {}".format(unsupported_layers)) print("Check whether extensions are available to add to IECore.") exit(1) return def preprocess_input(self, image): ''' Before feeding the data into the model for inference, you might have to preprocess it. This function is where you can do that. ''' net_input_shape = self.network.inputs[self.input_blob].shape h = net_input_shape[2] w = net_input_shape[3] p_frame = cv2.resize(image, (w, h)) p_frame = p_frame.transpose((2, 0, 1)) p_frame = p_frame.reshape(1, 3, h, w) return p_frame def wait(self): status = self.exec_network.requests[0].wait(-1) return status def get_output(self): return self.exec_network.requests[0].outputs[self.output_blob] def preprocess_output(self, faces, image, args, width, height): ''' Before feeding the output of this model to the next model, you might have to preprocess the output. This function is where you can do that. ''' for box in faces[0][0]: # Output shape is 1x1x100x7 conf = box[2] if conf >= args.prob_threshold: xmin = int(box[3] * width) ymin = int(box[4] * height) xmax = int(box[5] * width) ymax = int(box[6] * height) if args.log_level == "DEBUG": cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2) face_crop = image[ymin:ymax, xmin:xmax] return face_crop
class Inference: ''' Class with all relevant tools to do object detection ''' # Load all relevant variables into the class def __init__(self, model_name, device, threshold=0.60): self.model_weights = model_name + '.bin' self.model_structure = model_name + '.xml' self.device = device self.threshold = threshold # Initialise the network and save it in the self.model variables self.model = IENetwork(self.model_structure, self.model_weights) # old openvino version # self.model = core.read_network(self.model_structure, self.model_weights) # new openvino version # Get the input layer self.input_name = next(iter(self.model.inputs)) self.input_name_all = [i for i in self.model.inputs.keys() ] # gets all input_names self.input_name_all_02 = self.model.inputs.keys( ) # gets all output_names self.input_name_first_entry = self.input_name_all[0] self.input_shape = self.model.inputs[self.input_name].shape self.output_name = next(iter(self.model.outputs)) self.output_name_type = self.model.outputs[self.output_name] self.output_names = [i for i in self.model.outputs.keys() ] # gets all output_names self.output_names_total_entries = len(self.output_names) self.output_shape = self.model.outputs[self.output_name].shape self.output_shape_second_entry = self.model.outputs[ self.output_name].shape[1] self.output_name_first_entry = self.output_names[0] print("--------") print("input_name: " + str(self.input_name)) print("input_name_all: " + str(self.input_name_all)) print("input_name_all_total: " + str(self.input_name_all_02)) print("input_name_first_entry: " + str(self.input_name_first_entry)) print("--------") print("input_shape: " + str(self.input_shape)) print("--------") print("output_name: " + str(self.output_name)) print("output_name type: " + str(self.output_name_type)) print("output_names: " + str(self.output_names)) print("output_names_total_entries: " + str(self.output_names_total_entries)) print("output_name_first_entry: " + str(self.output_name_first_entry)) print("--------") print("output_shape: " + str(self.output_shape)) print("output_shape_second_entry: " + str(self.output_shape_second_entry)) print("--------") # Loads the model def load_model(self): # Adds Extension CPU_EXTENSION = "/opt/intel/openvino/deployment_tools/inference_engine/lib/intel64/libcpu_extension_sse4.so" self.core = IECore() self.core.add_extension(CPU_EXTENSION, self.device) # Load the network into an executable network self.exec_network = self.core.load_network(network=self.model, device_name=self.device, num_requests=1) print("Model is loaded") # Start inference and prediction def predict(self, image): # save original image input_img = image # Pre-process the image image = self.preprocess_input(image) result = self.exec_network.infer({self.input_name: image}) #syncro inference print("Start syncro inference") #infer_request_handle = self.async_inference(image) #res = self.get_output(infer_request_handle, 0, output=None) # Vehicle output color, car_type = self.vehicle_attributes(result) return frame, color, car_type # Preprocess the image def preprocess_input(self, frame): # Get the input shape n, c, h, w = (self.core, self.input_shape)[1] print("n-c-h-w " + str(n) + "-" + str(c) + "-" + str(h) + "-" + str(w)) image = cv2.resize(frame, (w, h)) image = image.transpose((2, 0, 1)) image = image.reshape((n, c, h, w)) print("End of preprocess input") return image # Get the inference output def get_output(self, infer_request_handle, request_id, output): if output: res = infer_request_handle.output[output] else: res = self.exec_network.requests[request_id].outputs[ self.output_name] return res def vehicle_attributes(self, result): #Gets the output of the vehicle model CAR_COLORS = [ "white", "gray", "yellow", "red", "green", "blue", "black" ] color = result['color'] color_flatten = result['color'].flatten() color_total = len(color_flatten) color_class = np.argmax(color) color_class_text = CAR_COLORS[color_class] print("--------") print("color: " + str(color)) print("color_flatten: " + str(color_flatten)) print("total number of colors: " + str(color_total)) print("color number with the higest propability (argmax): " + str(color_class)) print("color text with the higest propability (argmax): " + str(color_class_text)) print("--------") CAR_TYPES = ["car", "bus", "truck", "van"] car_type = result['type'] car_type_flatten = result['type'].flatten() car_type_total = len(car_type_flatten) car_type_class = np.argmax(car_type) car_type_class_text = CAR_TYPES[car_type_class] print("car_type: " + str(car_type)) print("car_type_flatten: " + str(car_type_flatten)) print("total number of car types: " + str(car_type_total)) print("car type with the higest propability (argmax): " + str(car_type_class)) print("car text with the higest propability (argmax): " + str(car_type_class_text)) print("--------") return color_class, car_type_class
class Model_Face: ''' Class for the Face Detection Model. ''' def __init__(self, model_name, device='CPU', extensions=None): self.model_weights = model_name + '.bin' self.model_structure = model_name + '.xml' self.device = device self.extensions = extensions # self.check_model() # try: # self.input_name = next(iter(self.net.inputs)) # self.input_shape = self.net.inputs[self.input_name].shape # self.output_name = next(iter(self.net.outputs)) # self.output_shape = self.net.outputs[self.output_name].shape # print('Initialise.. completed.') # except Exception as e: # raise ValueError('Something is wrong with input and output values..') def load_model(self): ''' This method is for loading the model to the device specified by the user. If your model requires any Plugins, this is where you can load them. ''' try: print('Model is loading...') self.core = IECore() self.net = self.core.read_network(model=self.model_structure, weights=self.model_weights) supported = self.core.query_network(self.net, self.device) not_supported = [ layer for layer in self.net.layers.keys() if layer not in supported ] if len(not_supported) != 0 and self.device == 'CPU': print('Unsuported', not_supported) if not self.extensions == None: print('***Quick fix.\n ~CPU Extension added') self.core.add_extension(self.extensions, device) supported = self.core.query_network(self.net, self.device) not_supported = [ layer for layer in self.net.layers.keys() if layer not in supported ] if len(not_supported) == 0: print('***Quick fix, Failed.') else: print('Check the extension path.') self.net_exec = self.core.load_network(network=self.net, device_name=self.device) except Exception as e: raise ('Something is wrong.. ~debug load model~') try: self.input_name = next(iter(self.net.inputs)) self.input_shape = self.net.inputs[self.input_name].shape self.output_name = next(iter(self.net.outputs)) self.output_shape = self.net.outputs[self.output_name].shape print('Initialise.. completed.') except Exception as e: raise ValueError( 'Something is wrong with input and output values..') def predict(self, image, thres): ''' This method is meant for running predictions on the input image. ''' self.image = image print('Face-detection predict..') pre_image = self.preprocess_input(self.image) input_name = self.input_name input_dict = {input_name: pre_image} # infer = self.net_exec.start_async(request_id=0, inputs=input_dict) # status = infer.wait() face = [] # if status == 0: # # print(infer.outputs) # # print(self.output_name) # results = infer.outputs[self.output_name] # outputs = self.preprocess_output(results, thres) # outputs = outputs[0] # height = self.image.shape[0] # width = self.image.shape[1] # outputs = outputs* np.array([width, height, width, height]) # outputs = outputs.astype(np.int32) # face = self.image[outputs[1]:outputs[3], outputs[0]:outputs[2]] results = self.net_exec.infer(input_dict) outputs = self.preprocess_output(results, thres) outputs = outputs[0] height = self.image.shape[0] width = self.image.shape[1] outputs = outputs * np.array([width, height, width, height]) outputs = outputs.astype(np.int32) face = self.image[outputs[1]:outputs[3], outputs[0]:outputs[2]] return face, outputs def check_model(self): ''' Check - initialise the model ''' try: self.model = IENetwork(self.model_structure, self.model_weights) except Exception as e: raise ValueError( "Could not Initialise the network. Have you enterred the correct model path?" ) def preprocess_input(self, image): ''' An input image in the format [BxCxHxW], where: B - batch size C - number of channels H - image height W - image width ''' image = cv2.resize(image, (self.input_shape[3], self.input_shape[2])) image = image.transpose((2, 0, 1)) image = image.reshape(1, *image.shape) return image def preprocess_output(self, outputs, thres): ''' thres = threshold of confidence The net outputs blob with shape: [1, 1, N, 7], where N is the number of detected bounding boxes. Each detection has the format [image_id, label, conf, x_min, y_min, x_max, y_max], where: image_id - ID of the image in the batch label - predicted class ID conf - confidence for the predicted class (x_min, y_min) - coordinates of the top left bounding box corner (x_max, y_max) - coordinates of the bottom right bounding box corner. ''' object_list = [] print('PreOutput-face_detection..') tmp_out = outputs[self.output_name][0][0] for i in tmp_out: conf = i[2] # conf-accuracy of the face if conf > thres: x_min = i[3] x_max = i[5] y_min = i[4] y_max = i[6] object_list.append([x_min, y_min, x_max, y_max]) return object_list
class Model: class Status(Enum): READY = 0 ENCODER_INFER = 1 DECODER_INFER = 2 def __init__(self, args, interactive_mode): self.args = args log.info("Creating Inference Engine") self.ie = IECore() self.ie.set_config( {"PERF_COUNT": "YES" if self.args.perf_counts else "NO"}, args.device) self.encoder = read_net(self.args.m_encoder, self.ie) self.dec_step = read_net(self.args.m_decoder, self.ie) self.exec_net_encoder = self.ie.load_network( network=self.encoder, device_name=self.args.device) self.exec_net_decoder = self.ie.load_network( network=self.dec_step, device_name=self.args.device) self.images_list = [] self.vocab = Vocab(self.args.vocab_path) self.model_status = Model.Status.READY self.is_async = interactive_mode self.num_infers_decoder = 0 self.check_model_dimensions() if not interactive_mode: self.preprocess_inputs() def preprocess_inputs(self): height, width = self.encoder.input_info['imgs'].input_data.shape[-2:] target_shape = (height, width) if os.path.isdir(self.args.input): inputs = sorted( os.path.join(self.args.input, inp) for inp in os.listdir(self.args.input)) else: inputs = [self.args.input] log.info("Loading and preprocessing images") for filenm in tqdm(inputs): image_raw = cv.imread(filenm) assert image_raw is not None, "Error reading image {}".format( filenm) image = preprocess_image( PREPROCESSING[self.args.preprocessing_type], image_raw, target_shape) record = namespace(img_name=filenm, img=image) self.images_list.append(record) def check_model_dimensions(self): batch_dim, channels, height, width = self.encoder.input_info[ 'imgs'].input_data.shape assert batch_dim == 1, "Demo only works with batch size 1." assert channels in (1, 3), "Input image is not 1 or 3 channeled image." def _async_infer_encoder(self, image, req_id): return self.exec_net_encoder.start_async( request_id=req_id, inputs={self.args.imgs_layer: image}) def _async_infer_decoder(self, row_enc_out, dec_st_c, dec_st_h, output, tgt, req_id): self.num_infers_decoder += 1 return self.exec_net_decoder.start_async( request_id=req_id, inputs={ self.args.row_enc_out_layer: row_enc_out, self.args.dec_st_c_layer: dec_st_c, self.args.dec_st_h_layer: dec_st_h, self.args.output_prev_layer: output, self.args.tgt_layer: tgt }) def infer_async(self, model_input): model_input = change_layout(model_input) assert self.is_async if self.model_status == Model.Status.READY: self._start_encoder(model_input) return None if self.model_status == Model.Status.ENCODER_INFER: infer_status_encoder = self._infer_request_handle_encoder.wait( timeout=0) if infer_status_encoder == 0: self._start_decoder() return None return self._process_decoding_results() def infer_sync(self, model_input): assert not self.is_async model_input = change_layout(model_input) self._start_encoder(model_input) infer_status_encoder = self._infer_request_handle_encoder.wait( timeout=-1) assert infer_status_encoder == 0 self._start_decoder() res = None while res is None: res = self._process_decoding_results() return res def _process_decoding_results(self): timeout = 0 if self.is_async else -1 infer_status_decoder = self._infer_request_handle_decoder.wait(timeout) if infer_status_decoder != 0 and self.is_async: return None dec_res = self._infer_request_handle_decoder.output_blobs self._unpack_dec_results(dec_res) if self.tgt[0][0][ 0] == END_TOKEN or self.num_infers_decoder >= self.args.max_formula_len: self.num_infers_decoder = 0 self.logits = np.array(self.logits) logits = self.logits.squeeze(axis=1) targets = np.argmax(logits, axis=1) self.model_status = Model.Status.READY return logits, targets self._infer_request_handle_decoder = self._async_infer_decoder( self.row_enc_out, self.dec_states_c, self.dec_states_h, self.output, self.tgt, req_id=0) return None def _start_encoder(self, model_input): self._infer_request_handle_encoder = self._async_infer_encoder( model_input, req_id=0) self.model_status = Model.Status.ENCODER_INFER def _start_decoder(self): enc_res = self._infer_request_handle_encoder.output_blobs self._unpack_enc_results(enc_res) self._infer_request_handle_decoder = self._async_infer_decoder( self.row_enc_out, self.dec_states_c, self.dec_states_h, self.output, self.tgt, req_id=0) self.model_status = Model.Status.DECODER_INFER def _unpack_dec_results(self, dec_res): self.dec_states_h = dec_res[self.args.dec_st_h_t_layer].buffer self.dec_states_c = dec_res[self.args.dec_st_c_t_layer].buffer self.output = dec_res[self.args.output_layer].buffer logit = dec_res[self.args.logit_layer].buffer self.logits.append(logit) self.tgt = np.array([[np.argmax(logit, axis=1)]]) def _unpack_enc_results(self, enc_res): self.row_enc_out = enc_res[self.args.row_enc_out_layer].buffer self.dec_states_h = enc_res[self.args.hidden_layer].buffer self.dec_states_c = enc_res[self.args.context_layer].buffer self.output = enc_res[self.args.init_0_layer].buffer self.tgt = np.array([[START_TOKEN]]) self.logits = []
class FaceDetectionModel: ''' Class for the Face Detection Model. ''' def __init__(self, model_name, device='CPU', extensions=None): ''' TODO: Use this to set your instance variables. ''' self.model_name = model_name self.device = device self.extensions = extensions self.model_structure = self.model_name self.model_weights = self.model_name.split('.')[0] + '.bin' self.plugin = None self.network = None self.exec_net = None self.input_name = None self.input_shape = None self.output_names = None self.output_shape = None def load_model(self): ''' TODO: You will need to complete this method. This method is for loading the model to the device specified by the user. If your model requires any Plugins, this is where you can load them. ''' self.plugin = IECore() self.network = self.plugin.read_network(model=self.model_structure, weights=self.model_weights) supported_layers = self.plugin.query_network(network=self.network, device_name=self.device) unsupported_layers = [ l for l in self.network.layers.keys() if l not in supported_layers ] if len(unsupported_layers) != 0 and self.device == 'CPU': print("unsupported layers found:{}".format(unsupported_layers)) if not self.extensions == None: print("Adding cpu_extension") self.plugin.add_extension(self.extensions, self.device) supported_layers = self.plugin.query_network( network=self.network, device_name=self.device) unsupported_layers = [ l for l in self.network.layers.keys() if l not in supported_layers ] if len(unsupported_layers) != 0: print( "After adding the extension still unsupported layers found" ) exit(1) print("After adding the extension the issue is resolved") else: print("Give the path of cpu extension") exit(1) self.exec_net = self.plugin.load_network(network=self.network, device_name=self.device, num_requests=1) self.input_name = next(iter(self.network.inputs)) self.input_shape = self.network.inputs[self.input_name].shape self.output_names = next(iter(self.network.outputs)) self.output_shape = self.network.outputs[self.output_names].shape def predict(self, image, prob_threshold): ''' TODO: You will need to complete this method. This method is meant for running predictions on the input image. ''' img_processed = self.preprocess_input(image.copy()) outputs = self.exec_net.infer({self.input_name: img_processed}) coords = self.preprocess_output(outputs, prob_threshold) if (len(coords) == 0): return 0, 0 coords = coords[0] #take the first detected face h = image.shape[0] w = image.shape[1] coords = coords * np.array([w, h, w, h]) coords = coords.astype(np.int32) cropped_face = image[coords[1]:coords[3], coords[0]:coords[2]] return cropped_face, coords def check_model(self): '' def preprocess_input(self, image): ''' Before feeding the data into the model for inference, you might have to preprocess it. This function is where you can do that. ''' image_resized = cv2.resize(image, (self.input_shape[3], self.input_shape[2])) img_processed = np.transpose(np.expand_dims(image_resized, axis=0), (0, 3, 1, 2)) return img_processed def preprocess_output(self, outputs, prob_threshold): ''' Before feeding the output of this model to the next model, you might have to preprocess the output. This function is where you can do that. ''' coords = [] outs = outputs[self.output_names][0][0] for out in outs: conf = out[2] if conf > prob_threshold: x_min = out[3] y_min = out[4] x_max = out[5] y_max = out[6] coords.append([x_min, y_min, x_max, y_max]) return coords
class Model_HPE: ''' Class for the Head Pose Estimation Model. ''' def __init__(self, model_name, device, extensions): self.plugin = None self.net = None self.input_blob = None self.output_blob = None self.exec_net = None self.infer_request = None self.input_shape = None self.output_shape = None self.input_name = None self.device = device self.extension = extensions self.model = model_name self.output = None def load_model(self): ''' This method is for loading the model to the device specified by the user. If your model requires any Plugins, this is where you can load them. ''' model_xml = self.model + ".xml" model_weights = self.model + ".bin" self.plugin = IECore() self.net = IENetwork(model_xml, model_weights) self.exec_net = self.plugin.load_network(network=self.net, device_name=self.device, num_requests=1) if self.extension and 'CPU' in self.device: self.plugin.add_cpu_extension(self.extension) self.check_model() self.input_blob = next(iter(self.net.inputs)) self.input_shape = self.net.inputs[self.input_blob].shape self.output_blob = next(iter(self.net.outputs)) self.output_shape = self.net.outputs[self.output_blob].shape def predict(self, image, benchmark_timing): self.exec_net.start_async(request_id=0, inputs={self.input_blob: image}) if self.exec_net.requests[0].wait(-1) == 0: self.result = self.exec_net.requests[0].outputs if benchmark_timing: pp = PrettyPrinter(indent=4) print('Benchmark Timing for Head_Pose_Estimation') pp.pprint(self.exec_net.requests[0].get_perf_counts()) # Write get_perf_counts() data to a text file data = (self.exec_net.requests[0].get_perf_counts()) self.write_benchmark('Benchmark Timing for Head_Pose_Estimation', data) return self.result def check_model(self): supported_layers = self.plugin.query_network(network=self.net, device_name=self.device) unsupported_layers = [l for l in self.net.layers.keys() if l not in supported_layers] if len(unsupported_layers) != 0: log.error("Unsupported layers found: {}".format(unsupported_layers)) log.error("Check whether extensions are available to add to IECore.") exit(1) def preprocess_input(self, image): temp = image.copy() temp = cv2.resize(temp, (self.input_shape[3], self.input_shape[2])) # n,c,h,w temp = temp.transpose((2, 0, 1)) temp = temp.reshape(1, *temp.shape) return temp def preprocess_output(self, image, outputs, facebox, face, display): output = [] output.append(outputs['angle_y_fc'].tolist()[0][0]) output.append(outputs['angle_p_fc'].tolist()[0][0]) output.append(outputs['angle_r_fc'].tolist()[0][0]) pitch = np.squeeze(outputs['angle_p_fc']) roll = np.squeeze(outputs['angle_r_fc']) yaw = np.squeeze(outputs['angle_y_fc']) axes_op = np.array([pitch, roll, yaw]) if display: xmin, ymin, _, _ = facebox face_center = (xmin + face.shape[1] / 2, ymin + face.shape[0] / 2, 0) self.draw_axes(image, face_center, yaw, pitch, roll) return axes_op # code source: https://knowledge.udacity.com/questions/171017 def draw_axes(self, frame, center_of_face, yaw, pitch, roll): focal_length = 950.0 scale = 100 yaw *= np.pi / 180.0 pitch *= np.pi / 180.0 roll *= np.pi / 180.0 cx = int(center_of_face[0]) cy = int(center_of_face[1]) Rx = np.array([[1, 0, 0], [0, math.cos(pitch), -math.sin(pitch)], [0, math.sin(pitch), math.cos(pitch)]]) Ry = np.array([[math.cos(yaw), 0, -math.sin(yaw)], [0, 1, 0], [math.sin(yaw), 0, math.cos(yaw)]]) Rz = np.array([[math.cos(roll), -math.sin(roll), 0], [math.sin(roll), math.cos(roll), 0], [0, 0, 1]]) # ref: https://www.learnopencv.com/rotation-matrix-to-euler-angles/ R = Rz @ Ry @ Rx camera_matrix = self.build_camera_matrix(center_of_face, focal_length) xaxis = np.array(([1 * scale, 0, 0]), dtype='float32').reshape(3, 1) yaxis = np.array(([0, -1 * scale, 0]), dtype='float32').reshape(3, 1) zaxis = np.array(([0, 0, -1 * scale]), dtype='float32').reshape(3, 1) zaxis1 = np.array(([0, 0, 1 * scale]), dtype='float32').reshape(3, 1) o = np.array(([0, 0, 0]), dtype='float32').reshape(3, 1) o[2] = camera_matrix[0][0] xaxis = np.dot(R, xaxis) + o yaxis = np.dot(R, yaxis) + o zaxis = np.dot(R, zaxis) + o zaxis1 = np.dot(R, zaxis1) + o xp2 = (xaxis[0] / xaxis[2] * camera_matrix[0][0]) + cx yp2 = (xaxis[1] / xaxis[2] * camera_matrix[1][1]) + cy p2 = (int(xp2), int(yp2)) cv2.line(frame, (cx, cy), p2, (0, 0, 255), 2) xp2 = (yaxis[0] / yaxis[2] * camera_matrix[0][0]) + cx yp2 = (yaxis[1] / yaxis[2] * camera_matrix[1][1]) + cy p2 = (int(xp2), int(yp2)) cv2.line(frame, (cx, cy), p2, (0, 255, 0), 2) xp1 = (zaxis1[0] / zaxis1[2] * camera_matrix[0][0]) + cx yp1 = (zaxis1[1] / zaxis1[2] * camera_matrix[1][1]) + cy p1 = (int(xp1), int(yp1)) xp2 = (zaxis[0] / zaxis[2] * camera_matrix[0][0]) + cx yp2 = (zaxis[1] / zaxis[2] * camera_matrix[1][1]) + cy p2 = (int(xp2), int(yp2)) cv2.line(frame, p1, p2, (255, 0, 0), 2) cv2.circle(frame, p2, 3, (255, 0, 0), 2) return frame def build_camera_matrix(self, center_of_face, focal_length): cx = int(center_of_face[0]) cy = int(center_of_face[1]) camera_matrix = np.zeros((3, 3), dtype='float32') camera_matrix[0][0] = focal_length camera_matrix[0][2] = cx camera_matrix[1][1] = focal_length camera_matrix[1][2] = cy camera_matrix[2][2] = 1 return camera_matrix def get_model_name(self): return self.model def write_benchmark(self, title, data): with open("headpose_benchmark_timing.txt", "a") as f: f.write(str(title) + "\n") f.write(str(data) + '\n') f.close()
def main(): args = build_argparser().parse_args() # ------------- 1. Plugin initialization for specified device and load extensions library if specified ------------- log.info("Creating Inference Engine...") ie = IECore() if args.cpu_extension and 'CPU' in args.device: ie.add_extension(args.cpu_extension, "CPU") # -------------------- 2. Reading the IR generated by the Model Optimizer (.xml and .bin files) -------------------- log.info("Loading network") net = ie.read_network(args.model, os.path.splitext(args.model)[0] + ".bin") # ---------------------------------- 3. Load CPU extension for support specific layer ------------------------------ if "CPU" in args.device: supported_layers = ie.query_network(net, "CPU") not_supported_layers = [ l for l in net.layers.keys() if l not in supported_layers ] if len(not_supported_layers) != 0: log.error( "Following layers are not supported by the plugin for specified device {}:\n {}" .format(args.device, ', '.join(not_supported_layers))) log.error( "Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) assert len(net.inputs.keys( )) == 1, "Sample supports only YOLO V3 based single input topologies" # ---------------------------------------------- 4. Preparing inputs ----------------------------------------------- log.info("Preparing inputs") input_blob = next(iter(net.inputs)) # Defaulf batch_size is 1 net.batch_size = 1 # Read and pre-process input images n, c, h, w = net.inputs[input_blob].shape if args.labels: with open(args.labels, 'r') as f: labels_map = [x.strip() for x in f] else: labels_map = None input_stream = 0 if args.input == "cam" else args.input is_async_mode = True cap = cv2.VideoCapture(input_stream) number_input_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) number_input_frames = 1 if number_input_frames != -1 and number_input_frames < 0 else number_input_frames wait_key_code = 1 # Number of frames in picture is 1 and this will be read in cycle. Sync mode is default value for this case if number_input_frames != 1: ret, frame = cap.read() else: is_async_mode = False wait_key_code = 0 # ----------------------------------------- 5. Loading model to the plugin ----------------------------------------- log.info("Loading model to the plugin") exec_net = ie.load_network(network=net, num_requests=2, device_name=args.device) cur_request_id = 0 next_request_id = 1 render_time = 0 parsing_time = 0 # ----------------------------------------------- 6. Doing inference ----------------------------------------------- log.info("Starting inference...") print( "To close the application, press 'CTRL+C' here or switch to the output window and press ESC key" ) print( "To switch between sync/async modes, press TAB key in the output window" ) while cap.isOpened(): # Here is the first asynchronous point: in the Async mode, we capture frame to populate the NEXT infer request # in the regular mode, we capture frame to the CURRENT infer request if is_async_mode: ret, next_frame = cap.read() else: ret, frame = cap.read() if not ret: break if is_async_mode: request_id = next_request_id in_frame = cv2.resize(next_frame, (w, h)) else: request_id = cur_request_id in_frame = cv2.resize(frame, (w, h)) # resize input_frame to network size in_frame = in_frame.transpose( (2, 0, 1)) # Change data layout from HWC to CHW in_frame = in_frame.reshape((n, c, h, w)) # Start inference start_time = time() exec_net.start_async(request_id=request_id, inputs={input_blob: in_frame}) det_time = time() - start_time # Collecting object detection results objects = list() if exec_net.requests[cur_request_id].wait(-1) == 0: output = exec_net.requests[cur_request_id].outputs start_time = time() for layer_name, out_blob in output.items(): out_blob = out_blob.reshape( net.layers[net.layers[layer_name].parents[0]].shape) layer_params = YoloParams(net.layers[layer_name].params, out_blob.shape[2]) log.info("Layer {} parameters: ".format(layer_name)) layer_params.log_params() objects += parse_yolo_region(out_blob, in_frame.shape[2:], frame.shape[:-1], layer_params, args.prob_threshold) parsing_time = time() - start_time # Filtering overlapping boxes with respect to the --iou_threshold CLI parameter objects = sorted(objects, key=lambda obj: obj['confidence'], reverse=True) for i in range(len(objects)): if objects[i]['confidence'] == 0: continue for j in range(i + 1, len(objects)): if intersection_over_union(objects[i], objects[j]) > args.iou_threshold: objects[j]['confidence'] = 0 # Drawing objects with respect to the --prob_threshold CLI parameter objects = [ obj for obj in objects if obj['confidence'] >= args.prob_threshold ] if len(objects) and args.raw_output_message: log.info("\nDetected boxes for batch {}:".format(1)) log.info( " Class ID | Confidence | XMIN | YMIN | XMAX | YMAX | COLOR ") origin_im_size = frame.shape[:-1] for obj in objects: # Validation bbox of detected object if obj['xmax'] > origin_im_size[1] or obj['ymax'] > origin_im_size[ 0] or obj['xmin'] < 0 or obj['ymin'] < 0: continue color = (int(min(obj['class_id'] * 12.5, 255)), min(obj['class_id'] * 7, 255), min(obj['class_id'] * 5, 255)) det_label = labels_map[obj['class_id']] if labels_map and len(labels_map) >= obj['class_id'] else \ str(obj['class_id']) if args.raw_output_message: log.info( "{:^9} | {:10f} | {:4} | {:4} | {:4} | {:4} | {} ".format( det_label, obj['confidence'], obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax'], color)) cv2.rectangle(frame, (obj['xmin'], obj['ymin']), (obj['xmax'], obj['ymax']), color, 2) cv2.putText( frame, "#" + det_label + ' ' + str(round(obj['confidence'] * 100, 1)) + ' %', (obj['xmin'], obj['ymin'] - 7), cv2.FONT_HERSHEY_COMPLEX, 0.6, color, 1) # Draw performance stats over frame inf_time_message = "Inference time: N\A for async mode" if is_async_mode else \ "Inference time: {:.3f} ms".format(det_time * 1e3) render_time_message = "OpenCV rendering time: {:.3f} ms".format( render_time * 1e3) async_mode_message = "Async mode is on. Processing request {}".format(cur_request_id) if is_async_mode else \ "Async mode is off. Processing request {}".format(cur_request_id) parsing_message = "YOLO parsing time is {:.3f} ms".format( parsing_time * 1e3) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) cv2.putText(frame, render_time_message, (15, 45), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1) cv2.putText(frame, async_mode_message, (10, int(origin_im_size[0] - 20)), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1) cv2.putText(frame, parsing_message, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1) start_time = time() if not args.no_show: cv2.imshow("DetectionResults", frame) render_time = time() - start_time if is_async_mode: cur_request_id, next_request_id = next_request_id, cur_request_id frame = next_frame if not args.no_show: key = cv2.waitKey(wait_key_code) # ESC key if key == 27: break # Tab key if key == 9: exec_net.requests[cur_request_id].wait() is_async_mode = not is_async_mode log.info("Switched to {} mode".format( "async" if is_async_mode else "sync")) cv2.destroyAllWindows()
class Facedetection: def __init__(self, model_name, threshold, device, extension, version): # Load all relevant variables into the class self.model_weights = model_name + '.bin' self.model_structure = model_name + '.xml' self.device = device self.extension = extension self.threshold = threshold self.version = version print("--------") print("START Facedetection") print("--------") def load_model(self): # Loads the model # Initialise the network and save it in the self.network variables try: self.core = IECore() #self.network = self.core.read_network(self.model_structure, self.model_weights) #new version self.network = IENetwork(model=self.model_structure, weights=self.model_weights) #log.info("Model is loaded as: ", self.network) self.input_name = next(iter(self.network.inputs)) except Exception as e: log.error("Could not initialise the network") raise ValueError("Could not initialise the network") print("--------") print("Model is loaded as self.network : " + str(self.network)) # Add extension if "CPU" in self.device and (self.version == 2019): log.info("Add extension: ({})".format(str(self.extension))) self.core.add_extension(self.extension, self.device) # Check supported layers self.check_model() # Load the network into an executable network self.exec_network = self.core.load_network(network=self.network, device_name=self.device, num_requests=1) #log.info("Exec_network is loaded as:" + str(self.exec_network)) #print("Exec_network is loaded as:" + str(self.exec_network)) #print("--------") model_data = [ self.model_weights, self.model_structure, self.device, self.extension, self.threshold ] modellayers = self.getmodellayers() return model_data, modellayers def getmodellayers(self): # Get all necessary model values. self.input_name = next(iter(self.network.inputs)) self.output_name = next(iter(self.network.outputs)) self.input_shape = self.network.inputs[self.input_name].shape # Gets all input and outputs. Just for information. self.input_name_all = [i for i in self.network.inputs.keys()] self.input_name_all_02 = self.network.inputs.keys() self.input_name_first_entry = self.input_name_all[0] self.output_name_type = self.network.outputs[self.output_name] self.output_names = [i for i in self.network.outputs.keys()] self.output_names_total_entries = len(self.output_names) self.output_shape = self.network.outputs[self.output_name].shape self.output_shape_second_entry = self.network.outputs[ self.output_name].shape[1] modellayers = [self.input_name, self.input_name_all, self.input_name_all_02, self.input_name_first_entry, self.input_shape, self.output_name, self.output_name_type, \ self.output_names, self.output_names_total_entries, self.output_shape, self.output_shape_second_entry] return modellayers def check_model(self): # Check for supported layers log.info("Checking for unsupported layers") if "CPU" in self.device: supported_layers = self.core.query_network(self.network, "CPU") print("--------") print("Check for supported layers") #print("supported_layers: " + str(supported_layers)) not_supported_layers = [ l for l in self.network.layers.keys() if l not in supported_layers ] print("--------") if len(not_supported_layers) != 0: log.error("Following layers are not supported:", not_supported_layers) #print("Sorry, not all layers are supported") sys.exit(1) log.info("All layers are supported") def predict(self, frame): # Starts predictions face_detection print("--------") print("Starts predictions for face_detection") # Pre-process the image preprocessed_image = self.preprocess_input(frame) # Starts synchronous inference print("Start syncro inference") log.info("Start syncro inference face detection") outputs = self.exec_network.infer( {self.input_name: preprocessed_image}) print("Output of the inference request: " + str(outputs)) requestid = 0 outputs = self.exec_network.requests[requestid].outputs[ self.output_name] print("Output of the inference request (self.output_name): " + str(outputs)) processed_image, frame_cropped, coords = self.preprocess_output( outputs, frame) #cv2.imwrite("output/cropped_image_02.png", frame_cropped) print("End predictions face_detection") print("--------") return processed_image, frame_cropped, coords def preprocess_input(self, frame): # In this function the original image is resized, transposed and reshaped to fit the model requirements. print("--------") print("Start preprocess image") log.info("Start preprocess image face detection") n, c, h, w = (self.core, self.input_shape)[1] print(w, h) preprocessed_image = cv2.resize(frame, (w, h)) preprocessed_image = preprocessed_image.transpose((2, 0, 1)) preprocessed_image = preprocessed_image.reshape((n, c, h, w)) print( "The input shape from the face detection is n= ({}) c= ({}) h= ({}) w= ({})" .format(str(n), str(c), str(h), str(w))) log.info( "The input shape from the face detection is n= ({}) c= ({}) h= ({}) w= ({})" .format(str(n), str(c), str(h), str(w))) print("Image is now [BxCxHxW]: " + str(preprocessed_image.shape)) log.info("Image is now [BxCxHxW]: " + str(preprocessed_image.shape)) print("End: preprocess image") print("--------") return preprocessed_image def preprocess_output(self, outputs, frame): coords = [] coords_02 = [] print("--------") print("Start: preprocess_output") log.info("Start preprocess_output face_detection") print("Bounding box input: " + str(outputs)) self.initial_w = frame.shape[1] self.initial_h = frame.shape[0] print("Original image size is (W x H): " + str(self.initial_w) + "x" + str(self.initial_h)) for obj in outputs[0][0]: confidence = obj[2] if confidence >= self.threshold: obj[3] = int(obj[3] * self.initial_w) obj[4] = int(obj[4] * self.initial_h) obj[5] = int(obj[5] * self.initial_w) obj[6] = int(obj[6] * self.initial_h) coords.append([obj[3], obj[4], obj[5], obj[6]]) print("Bounding box coordinates face detection: " + str(obj[3]) + " x " + str(obj[4]) + " x " + str(obj[5]) + " x " + str(obj[6])) log.info("Bounding box coordinates face detection: " + str(obj[3]) + " x " + str(obj[4]) + " x " + str(obj[5]) + " x " + str(obj[6])) self.xmin = int(obj[3]) self.ymin = int(obj[4]) self.xmax = int(obj[5]) self.ymax = int(obj[6]) cv2.rectangle(frame, ((self.xmin + 10), (self.ymin + 10)), ((self.xmax - 10), (self.ymax - 10)), (0, 0, 0), 1) # draw line (just for fun) cv2.line(frame, (self.xmin, self.ymin), (self.xmin, self.ymin + 20), (0, 0, 0), 3) cv2.line(frame, (self.xmin, self.ymin), (self.xmin + 20, self.ymin), (0, 0, 0), 3) cv2.line(frame, (self.xmax, self.ymax), (self.xmax, self.ymax - 20), (0, 0, 0), 3) cv2.line(frame, (self.xmax, self.ymax), (self.xmax - 20, self.ymax), (0, 0, 0), 3) cv2.line(frame, (self.xmax, self.ymin), (self.xmax, self.ymin + 20), (0, 0, 0), 3) cv2.line(frame, (self.xmax, self.ymin), (self.xmax - 20, self.ymin), (0, 0, 0), 3) cv2.line(frame, (self.xmin, self.ymax), (self.xmin, self.ymax - 20), (0, 0, 0), 3) cv2.line(frame, (self.xmin, self.ymax), (self.xmin + 20, self.ymax), (0, 0, 0), 3) print("Bounding box coordinates face detection: " + str(self.xmin) + " x " + str(self.ymin) + " x " + str(self.xmax) + " x " + str(self.ymax)) log.info( "Bounding box coordinates face detection (int)xmin/ymin/xmax/ymax: " + str(self.xmin) + " x " + str(self.ymin) + " x " + str(self.xmax) + " x " + str(self.ymax)) print("End: boundingbox") print("--------") frame_cropped = frame.copy() frame_cropped = frame_cropped[self.ymin:(self.ymax + 1), self.xmin:(self.xmax + 1)] cv2.imwrite("output/Face_cropped image.png", frame_cropped) cv2.imwrite("output/Face_image.png", frame) return frame, frame_cropped, coords def load_data(self, input_type, input_file): print("Start load_data from InputFeeder") if input_type == 'video': cap = cv2.VideoCapture(input_file) print("Input = video") log.info("Input = video") elif input_type == 'cam': cap = cv2.VideoCapture(0) print("Input = cam") log.info("Input = cam") else: cap = cv2.imread(input_file) print("Input = image") log.info("Input = image") return cap def start(self, frame, inputtype): # Start predictions if inputtype == 'video' or 'cam': try: while cap.isOpened(): ret, frame = cap.read() if not ret: break frame = self.predict(frame) cap.release() except Exception as e: print("Could not run Inference: ", e) log.info("Could not run Inference: ", e) if inputtype == 'image': print("Image") #image = '/home/pi/KeyBox/face_test.jpg' #frame=cv2.imread(image) frame = self.predict(frame) path = '/home/pi/KeyBox/Face_cropped image.png' image = cv2.imread(path) cv2.imshow("test", image) cv2.waitKey(0) cv2.destroyAllWindows()
class Network: """ Load and configure inference plugins for the specified target devices and performs synchronous and asynchronous modes for the specified infer requests. """ def __init__(self): ### TODO: Initialize any class variables desired ### self.plugin = None self.network = None self.input_blob = None self.output_blob = None self.exec_network = None self.infer_request = None def load_model(self, model, device="CPU", cpu_extension=None): self.plugin = IECore() ### TODO: Load the model ### ### Load the Inference Engine API self.plugin = IECore() ### Load IR files into their related class model_xml = model model_bin = os.path.splitext(model_xml)[0] + ".bin" self.network = IENetwork(model=model_xml, weights=model_bin) ### Add a CPU extension, if applicable. if os.path.isfile(cpu_extension) and "CPU" in device: self.plugin.add_extension(cpu_extension, device) ### Get the supported layers of the network supported_layers_path = self.plugin.query_network(network=self.network, device_name="CPU") ### Check for any unsupported layers, and let the user know if anything is missing. Exit the program, if so. keys = self.network.layers.keys() for l in keys: unsupported_layers_path = "" if l not in supported_layers_path: unsupported_layers_path = l if len(unsupported_layers_path) != 0: #print("Unsupported layers found: {}".format(unsupported_layers_path)) #print("Check whether the extensions are available to add to IECore.") exit(1) ### Load the network into the Inference Engine self.exec_network = self.plugin.load_network(self.network, "CPU") self.input_blob = next(iter(self.network.inputs)) self.output_blob = next(iter(self.network.outputs)) #print("IR is successfully loaded into Inference Engine.") #print return ### Note: You may need to update the function parameters. ### def get_input_shape(self): ### TODO: Return the shape of the input layer ### #it returns the shape of the input layer return self.network.inputs[self.input_blob].shape def exec_net(self, request_id, image): #start acynchronous request self.infer_request_handle = self.exec_network.start_async( request_id=request_id, inputs={self.input_blob: image}) return self.exec_network def wait(self, request_id): ### TODO: Wait for the request to be complete. ### ### TODO: Return any necessary information ### ### Note: You may need to update the function parameters. ### wait_n = self.exec_network.requests[request_id].wait(-1) return wait_n def get_output(self, request_id): ### TODO: Extract and return the output results ### Note: You may need to update the function parameters. ### return self.exec_network.requests[request_id].outputs[self.output_blob]
class Gaze_Estimator: ''' Class for the Gaze Estimator model. ''' def __init__(self, model, weights, device, extensions=None): self.device = device self.extensions = extensions self.model = model self.weights = weights self.plugin = None self.network = None self.input = None self.inputBlob = None self.output = None self.outputBlob = None self.execNetwork = None self.inferRequest = None def load_model(self): self.plugin = IECore() if self.extensions: self.plugin.add_extension(self.extensions, self.device) self.network = self.plugin.read_network(model=self.model, weights=self.weights) supported_layers = self.plugin.query_network(self.network, self.device) unsupported_layers = [] for layer in self.network.layers.keys(): if layer not in supported_layers: unsupported_layers.append(layer) if len(unsupported_layers) != 0: log.info( 'Please add Extension as some unsupported layers currently exist' ) self.execNetwork = self.plugin.load_network(self.network, self.device, num_requests=1) self.inputBlob = [i for i in self.network.inputs.keys()] self.outputBlob = [i for i in self.network.outputs.keys()] self.input_shape = self.network.inputs[self.inputBlob[1]].shape def predict(self, left_eye_image, right_eye_image, head_pose_angle): le_img_processed, re_img_processed = self.preprocess_input( left_eye_image, right_eye_image) outputs = self.execNetwork.infer({ 'head_pose_angles': head_pose_angle, 'left_eye_image': le_img_processed, 'right_eye_image': re_img_processed }) mouse_coords, gaze_vec = self.preprocess_output( outputs, head_pose_angle) return mouse_coords, gaze_vec def preprocess_input(self, left_eye_image, right_eye_image): le_img_resized = cv2.resize(left_eye_image, (self.input_shape[3], self.input_shape[2])) le_img_processed = np.transpose(np.expand_dims(le_img_resized, axis=0), (0, 3, 1, 2)) re_img_resized = cv2.resize(right_eye_image, (self.input_shape[3], self.input_shape[2])) re_img_processed = np.transpose(np.expand_dims(re_img_resized, axis=0), (0, 3, 1, 2)) return le_img_processed, re_img_processed def preprocess_output(self, outputs, head_pose_angle): gaze_vec = outputs[self.outputBlob[0]].tolist()[0] angle_r_fc = head_pose_angle[2] cosine = math.cos(angle_r_fc * math.pi / 180) sine = math.sin(angle_r_fc * math.pi / 180) x_val = gaze_vec[0] * cosine + gaze_vec[1] * sine y_val = gaze_vec[0] * sine + gaze_vec[1] * sine return (x_val, y_val), gaze_vec
def draw_inference_from_video(self): """ Call this functions after creating object of VideoInfer class by passing all the required parameters to draw inference. """ log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) log.info("Creating Inference Engine...") ie = IECore() if self.extention_lib_path and 'CPU' in self.device: ie.add_extension(self.extention_lib_path, "CPU") # Read IR log.info("Loading network files:\n\t{}\n\t{}".format(self.model_xml, self.model_path)) net = IENetwork(model=self.model_xml, weights=self.model_path) if "CPU" in self.device: supported_layers = ie.query_network(net, "CPU") not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers] if len(not_supported_layers) != 0: log.error("Following layers are not supported by the plugin for specified device {}:\n {}". format(self.device, ', '.join(not_supported_layers))) log.error("Please try to specify cpu extensions library path in config") sys.exit(1) img_info_input_blob = None feed_dict = {} for blob_name in net.inputs: if len(net.inputs[blob_name].shape) == 4: input_blob = blob_name elif len(net.inputs[blob_name].shape) == 2: img_info_input_blob = blob_name else: raise RuntimeError("Unsupported {}D input layer '{}'. Only 2D and 4D input layers are supported" .format(len(net.inputs[blob_name].shape), blob_name)) assert len(net.outputs) == 1, "Demo supports only single output topologies" out_blob = next(iter(net.outputs)) log.info("Loading IR to the plugin...") exec_net = ie.load_network(network=net, num_requests=2, device_name=self.device) # Read and pre-process input image n, c, h, w = net.inputs[input_blob].shape if img_info_input_blob: feed_dict[img_info_input_blob] = [h, w, 1] if self.input_stream == 'cam': input_stream = 0 elif self.input_stream.startswith('rtsp'): log.info('Using RTSP feed') input_stream = self.input_stream else: input_stream = self.input_stream assert os.path.isfile(self.input_stream), "Specified input file doesn't exist" if self.labels: with open(self.labels, 'r') as f: labels_map = [x.strip() for x in f] else: labels_map = None cap = cv2.VideoCapture(input_stream) cur_request_id = 0 next_request_id = 1 log.info("Starting inference in async mode...") render_time = 0 ret, frame = cap.read() print("To close the application, press 'CTRL+C' here or switch to the output window and press ESC key") print("To switch between sync/async modes, press TAB key in the output window") while cap.isOpened(): if self.async_mode: ret, next_frame = cap.read() else: ret, frame = cap.read() if not ret: break initial_w = cap.get(3) initial_h = cap.get(4) # Main sync point: # in the truly Async mode we start the NEXT infer request, while waiting for the CURRENT to complete # in the regular mode we start the CURRENT request and immediately wait for it's completion inf_start = time.time() if self.async_mode: in_frame = cv2.resize(next_frame, (w, h)) in_frame = in_frame.transpose((2, 0, 1)) # Change data layout from HWC to CHW in_frame = in_frame.reshape((n, c, h, w)) feed_dict[input_blob] = in_frame exec_net.start_async(request_id=next_request_id, inputs=feed_dict) else: in_frame = cv2.resize(frame, (w, h)) in_frame = in_frame.transpose((2, 0, 1)) # Change data layout from HWC to CHW in_frame = in_frame.reshape((n, c, h, w)) feed_dict[input_blob] = in_frame exec_net.start_async(request_id=cur_request_id, inputs=feed_dict) if exec_net.requests[cur_request_id].wait(-1) == 0: inf_end = time.time() det_time = inf_end - inf_start # Parse detection results of the current request res = exec_net.requests[cur_request_id].outputs[out_blob] detections = list() # print(res[0][0].shape) # return for obj in res[0][0]: # Draw only objects when probability more than specified threshold if obj[2] > self.prob_thresh: detection_data = dict() xmin = int(obj[3] * initial_w) ymin = int(obj[4] * initial_h) xmax = int(obj[5] * initial_w) ymax = int(obj[6] * initial_h) class_id = int(obj[1]) detection_data['class'] = class_id detection_data['bbox'] = [(xmin, ymin), (xmax, ymax)] detections.append(detection_data) # Draw box and label\class_id color = (min(class_id * 12.5, 255), min(class_id * 7, 255), min(class_id * 5, 255)) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2) det_label = labels_map[class_id] if labels_map else str(class_id) cv2.putText(frame, det_label + ' ' + str(round(obj[2] * 100, 1)) + ' %', (xmin, ymin - 7), cv2.FONT_HERSHEY_COMPLEX, 0.6, color, 1) if len(detections): yield(detections) # Draw performance stats inf_time_message = "Inference time: N\A for async mode" if self.async_mode else \ "Inference time: {:.3f} ms".format(det_time * 1000) render_time_message = "OpenCV rendering time: {:.3f} ms".format(render_time * 1000) async_mode_message = "Async mode is on. Processing request {}".format(cur_request_id) if self.async_mode else \ "Async mode is off. Processing request {}".format(cur_request_id) print('fps', 1/(render_time+det_time)) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) cv2.putText(frame, render_time_message, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1) cv2.putText(frame, async_mode_message, (10, int(initial_h - 20)), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1) # render_start = time.time() cv2.imshow("Detection Results", frame) # Comment this line to stop rendering output render_end = time.time() render_time = render_end - render_start if self.async_mode: cur_request_id, next_request_id = next_request_id, cur_request_id frame = next_frame key = cv2.waitKey(1) if key == 27: break if (9 == key): self.async_mode = not self.async_mode log.info("Switched to {} mode".format("async" if self.async_mode else "sync")) cv2.destroyAllWindows()
class FacialLandmarksDetection: ''' Class for the Face Detection Model. ''' def __init__(self, model_name, device='CPU', extensions=None): ''' TODO: Use this to set your instance variables. ''' #From params self.model_weights = model_name + '.bin' self.model_structure = model_name + '.xml' self.device = device self.extensions = extensions #For application self.core = None self.network = None self.exec_net = None self.unsupported_layers = None self.image = None def load_model(self): ''' TODO: You will need to complete this method. This method is for loading the model to the device specified by the user. If your model requires any Plugins, this is where you can load them. ''' self.core = IECore() self.network = self.core.read_network(model=self.model_structure, weights=self.model_weights) supported_layers = self.core.query_network(network=self.network, device_name=self.device) self.unsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers] self.check_model() logging.info("Checked facial-landmark-dection model") self.exec_net = self.core.load_network(network=self.network, device_name=self.device,num_requests=1) self.input = next(iter(self.network.inputs)) self.output = next(iter(self.network.outputs)) def predict(self, image): ''' TODO: You will need to complete this method. This method is meant for running predictions on the input image. ''' img_processed = self.preprocess_input(image.copy()) self.image = image self.exec_net.start_async(request_id= 0, inputs={self.input: img_processed}) while self.exec_net.requests[0].wait(-1) == 0: result = self.exec_net.requests[0].outputs[self.output] return self.preprocess_output(result[0]) def check_model(self): if len(self.unsupported_layers)!=0 : self.core.add_extension(self.extensions, self.device) supported_layers = self.core.query_network(network = self.network, device_name=self.device) self.unsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers] if len(self.unsupported_layers)!=0: logging.error("Unsupported layers") exit(1) def preprocess_input(self, image): ''' Before feeding the data into the model for inference, you might have to preprocess it. This function is where you can do that. ''' net_input_shape = [] net_input_shape = self.network.inputs[self.input].shape p_frame = None p_frame = cv2.resize(image, (net_input_shape[3], net_input_shape[2])) p_frame = p_frame.transpose(2, 0, 1) p_frame = p_frame.reshape(1, *p_frame.shape) return p_frame def preprocess_output(self, outputs): ''' Before feeding the output of this model to the next model, you might have to preprocess the output. This function is where you can do that. ''' both_eye_coors = [] both_eye_coors.append(outputs[0].tolist()[0][0]*self.image.shape[1]) both_eye_coors.append(outputs[1].tolist()[0][0]*self.image.shape[0]) both_eye_coors.append(outputs[2].tolist()[0][0]*self.image.shape[1]) both_eye_coors.append(outputs[3].tolist()[0][0]*self.image.shape[0]) both_eye_coors = [round(x) for x in both_eye_coors] return self.image[both_eye_coors[1]-20 : both_eye_coors[1]+20 , both_eye_coors[0]-20:both_eye_coors[0]+20],self.image[both_eye_coors[3]-20 : both_eye_coors[3]+20 , both_eye_coors[2]-20:both_eye_coors[2]+20], both_eye_coors
class Model_HeadPoseEstimation: def __init__(self, model_name, device='CPU', extensions=None): self.model_name = model_name self.device = device self.extensions = extensions self.plugin = None self.network = None self.exec_net = None self.in_name = None self.in_shape = None self.out_name = None def load_model(self): model_structure = self.model_name model_weights = self.model_name.split('.')[0]+'.bin' self.plugin = IECore() if self.extensions and 'CPU' in self.device: self.plugin.add_extension(self.extensions,self.device) self.network = IENetwork(model=model_structure, weights=model_weights) self.check_model() self.exec_net = self.plugin.load_network(network=self.network, device_name=self.device,num_requests=1) self.in_name = next(iter(self.network.inputs)) self.in_shape = self.network.inputs[self.in_name].shape self.out_name = [i for i in self.network.outputs.keys()] def predict(self, image): processed_image = self.preprocess_input(image.copy()) outputs = self.exec_net.infer({self.in_name:processed_image}) final = self.preprocess_output(outputs) return final def check_model(self): if self.device == "CPU": supported_layers = self.plugin.query_network(network=self.network, device_name=self.device) notsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers] if len(notsupported_layers) != 0: logging.error("[ERROR] Unsupported layers found: {}".format(notsupported_layers)) sys.exit(1) def preprocess_input(self, image): image_processed = cv2.resize(image,(self.in_shape[3], self.in_shape[2])) image_processed = image_processed.transpose(2, 0, 1) image_processed = image_processed.reshape(1, *image_processed.shape) return image_processed def preprocess_output(self, outputs): preprocessed_outputs = [] preprocessed_outputs.append(outputs['angle_y_fc'].tolist()[0][0]) preprocessed_outputs.append(outputs['angle_p_fc'].tolist()[0][0]) preprocessed_outputs.append(outputs['angle_r_fc'].tolist()[0][0]) return preprocessed_outputs
class Network: """ Load and configure inference plugins for the specified target devices and performs synchronous and asynchronous modes for the specified infer requests. """ def __init__(self): ### TODO: Initialize any class variables desired ### self.plugin = None self.net = None self.input_blob = None self.output_blob = None self.exec_network = None self.infer_request = None return def load_model(self, model, CPU_EXTENSION, DEVICE, console_output=False): ### TODO: Load the model ### model_xml = model model_bin = os.path.splitext(model_xml)[0] + ".bin" self.plugin = IECore() self.net = IENetwork(model=model_xml, weights=model_bin) ### TODO: Check for supported layers ### #Adding cpu extension if unsupported layer is found supported_layers = self.plugin.query_network(network=self.net, device_name=DEVICE) unsupported_layers = [ l for l in self.net.layers.keys() if l not in supported_layers ] if len(unsupported_layers) != 0: self.plugin.add_extension(CPU_EXTENSION, DEVICE) print("CPU Extension added") ### TODO: Add any necessary extensions ### ### TODO: Return the loaded inference plugin ### self.exec_network = self.plugin.load_network(self.net, DEVICE) ### input and output layer self.input_blob = next(iter(self.net.inputs)) self.output_blob = next(iter(self.net.outputs)) return def get_input_shape(self): ### TODO: Return the shape of the input layer ### return self.net.inputs[self.input_blob].shape def exec_net(self, frame): ### TODO: Start an asynchronous request ### ### TODO: Return any necessary information ### ### Note: You may need to update the function parameters. ### self.exec_network.start_async(request_id=0, inputs={self.input_blob: frame}) return def wait(self): ### TODO: Wait for the request to be complete. ### ### TODO: Return any necessary information ### ### Note: You may need to update the function parameters. ### status = self.exec_network.requests[0].wait(-1) return status def get_output(self): ### TODO: Extract and return the output results ### Note: You may need to update the function parameters. ### return self.exec_network.requests[0].outputs
class Benchmark: def __init__(self, device: str, number_infer_requests: int = None, number_iterations: int = None, duration_seconds: int = None, api_type: str = 'async'): self.device = device self.ie = IECore() self.nireq = number_infer_requests self.niter = number_iterations self.duration_seconds = get_duration_seconds(duration_seconds, self.niter, self.device) self.api_type = api_type def __del__(self): del self.ie def add_extension(self, path_to_extension: str=None, path_to_cldnn_config: str=None): if path_to_cldnn_config: self.ie.set_config({'CONFIG_FILE': path_to_cldnn_config}, GPU_DEVICE_NAME) logger.info('GPU extensions is loaded {}'.format(path_to_cldnn_config)) if path_to_extension: self.ie.add_extension(extension_path=path_to_extension, device_name=CPU_DEVICE_NAME) logger.info('CPU extensions is loaded {}'.format(path_to_extension)) def get_version_info(self) -> str: logger.info('InferenceEngine:\n{: <9}{:.<24} {}'.format('', 'API version', get_version())) version_string = 'Device info\n' for device, version in self.ie.get_versions(self.device).items(): version_string += '{: <9}{}\n'.format('', device) version_string += '{: <9}{:.<24}{} {}.{}\n'.format('', version.description, ' version', version.major, version.minor) version_string += '{: <9}{:.<24} {}\n'.format('', 'Build', version.build_number) return version_string def set_config(self, config = {}): for device in config.keys(): self.ie.set_config(config[device], device) def read_network(self, path_to_model: str): model_filename = os.path.abspath(path_to_model) head, ext = os.path.splitext(model_filename) weights_filename = os.path.abspath(head + BIN_EXTENSION) if ext == XML_EXTENSION else "" ie_network = self.ie.read_network(model_filename, weights_filename) return ie_network def load_network(self, ie_network: IENetwork, config = {}): exe_network = self.ie.load_network(ie_network, self.device, config=config, num_requests=1 if self.api_type == 'sync' else self.nireq or 0) # Number of requests self.nireq = len(exe_network.requests) return exe_network def import_network(self, path_to_file : str, config = {}): exe_network = self.ie.import_network(model_file=path_to_file, device_name=self.device, config=config, num_requests=1 if self.api_type == 'sync' else self.nireq or 0) # Number of requests self.nireq = len(exe_network.requests) return exe_network def infer(self, exe_network, batch_size, progress_bar=None): progress_count = 0 infer_requests = exe_network.requests # warming up - out of scope if self.api_type == 'sync': infer_requests[0].infer() else: infer_requests[0].async_infer() status = exe_network.wait() if status != StatusCode.OK: raise Exception("Wait for all requests is failed with status code {}!".format(status)) start_time = datetime.utcnow() exec_time = 0 iteration = 0 times = [] in_fly = set() # Start inference & calculate performance # to align number if iterations to guarantee that last infer requests are executed in the same conditions **/ while (self.niter and iteration < self.niter) or \ (self.duration_seconds and exec_time < self.duration_seconds) or \ (self.api_type == 'async' and iteration % self.nireq): if self.api_type == 'sync': infer_requests[0].infer() times.append(infer_requests[0].latency) else: infer_request_id = exe_network.get_idle_request_id() if infer_request_id < 0: status = exe_network.wait(num_requests=1) if status != StatusCode.OK: raise Exception("Wait for idle request failed!") infer_request_id = exe_network.get_idle_request_id() if infer_request_id < 0: raise Exception("Invalid request id!") if infer_request_id in in_fly: times.append(infer_requests[infer_request_id].latency) else: in_fly.add(infer_request_id) infer_requests[infer_request_id].async_infer() iteration += 1 exec_time = (datetime.utcnow() - start_time).total_seconds() if progress_bar: if self.duration_seconds: # calculate how many progress intervals are covered by current iteration. # depends on the current iteration time and time of each progress interval. # Previously covered progress intervals must be skipped. progress_interval_time = self.duration_seconds / progress_bar.total_num new_progress = int(exec_time / progress_interval_time - progress_count) progress_bar.add_progress(new_progress) progress_count += new_progress elif self.niter: progress_bar.add_progress(1) # wait the latest inference executions status = exe_network.wait() if status != StatusCode.OK: raise Exception("Wait for all requests is failed with status code {}!".format(status)) total_duration_sec = (datetime.utcnow() - start_time).total_seconds() for infer_request_id in in_fly: times.append(infer_requests[infer_request_id].latency) times.sort() latency_ms = median(times) fps = batch_size * 1000 / latency_ms if self.api_type == 'sync' else batch_size * iteration / total_duration_sec if progress_bar: progress_bar.finish() return fps, latency_ms, total_duration_sec, iteration
class Model_HeadPoseEstimation: ''' Class for the Face Detection Model. ''' def __init__(self, model_name, device='CPU', extensions=None): ''' TODO: Use this to set your instance variables. ''' self.model_name = model_name self.device = device self.extensions = extensions self.model_weights = self.model_name.split(".")[0] + '.bin' self.input_name = None self.input_shape = None self.output_names = None self.output_shape = None self.plugin = None self.network = None self.exec_net = None def load_model(self): ''' TODO: You will need to complete this method. This method is for loading the model to the device specified by the user. If your model requires any Plugins, this is where you can load them. ''' self.plugin = IECore() self.network = self.plugin.read_network(model=self.model_name, weights=self.model_weights) self.supported_layers = self.plugin.query_network( network=self.network, device_name=self.device) self.unsupported_layers = [ layer for layer in self.network.layers.keys() if layer not in self.supported_layers ] if (not self.check_model()): exit(1) self.exec_net = self.plugin.load_network(network=self.network, device_name=self.device, num_requests=1) self.input_name = next(iter(self.network.inputs)) self.input_shape = self.network.inputs[self.input_name].shape self.output_names = [i for i in self.network.outputs.keys()] def predict(self, image): ''' TODO: You will need to complete this method. This method is meant for running predictions on the input image. ''' img2 = self.preprocess_input(image.copy()) outputs = self.exec_net.infer({self.input_name: img2}) return self.preprocess_output(outputs) def check_model(self): # check for unsupported layers if len(self.unsupported_layers) != 0 and self.device == 'CPU': print("unsupported layers :{}".format(self.unsupported_layers)) if not self.extensions == None: self.plugin.add_extension(self.extensions, self.device) self.supported_layers = self.plugin.query_network( network=self.network, device_name=self.device) self.unsupported_layers = [ lalyer for lalyer in self.network.layers.keys() if lalyer not in self.supported_layers ] if len(self.unsupported_layers) != 0: print("unsupported layers found") return False else: print("cpu extension path not found") return False return True def preprocess_input(self, image): ''' Before feeding the data into the model for inference, you might have to preprocess it. This function is where you can do that. ''' resized = cv2.resize(image, (self.input_shape[3], self.input_shape[2])) return np.transpose(np.expand_dims(resized, axis=0), (0, 3, 1, 2)) def preprocess_output(self, outputs): ''' Before feeding the output of this model to the next model, you might have to preprocess the output. This function is where you can do that. ''' return [ outputs['angle_y_fc'].tolist()[0][0], outputs['angle_p_fc'].tolist()[0][0], outputs['angle_r_fc'].tolist()[0][0] ]
def main(): # noqa log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout) args = parse_args() # ---------------------------Step 1. Initialize inference engine core-------------------------------------------------- log.info('Creating Inference Engine') ie = IECore() if args.extension and args.device == 'CPU': log.info(f'Loading the {args.device} extension: {args.extension}') ie.add_extension(args.extension, args.device) if args.config and args.device in ('GPU', 'MYRIAD', 'HDDL'): log.info(f'Loading the {args.device} configuration: {args.config}') ie.set_config({'CONFIG_FILE': args.config}, args.device) # ---------------------------Step 2. Read a model in OpenVINO Intermediate Representation or ONNX format--------------- log.info(f'Reading the network: {args.model}') # (.xml and .bin files) or (.onnx file) net = ie.read_network(model=args.model) if len(net.input_info) != 1: log.error('The sample supports only single input topologies') return -1 if len(net.outputs) != 1 and not ('boxes' in net.outputs or 'labels' in net.outputs): log.error('The sample supports models with 1 output or with 2 with the names "boxes" and "labels"') return -1 # ---------------------------Step 3. Configure input & output---------------------------------------------------------- log.info('Configuring input and output blobs') # Get name of input blob input_blob = next(iter(net.input_info)) # Set input and output precision manually net.input_info[input_blob].precision = 'U8' if len(net.outputs) == 1: output_blob = next(iter(net.outputs)) net.outputs[output_blob].precision = 'FP32' else: net.outputs['boxes'].precision = 'FP32' net.outputs['labels'].precision = 'U16' # ---------------------------Step 4. Loading model to the device------------------------------------------------------- log.info('Loading the model to the plugin') exec_net = ie.load_network(network=net, device_name=args.device) # ---------------------------Step 5. Create infer request-------------------------------------------------------------- # load_network() method of the IECore class with a specified number of requests (default 1) returns an ExecutableNetwork # instance which stores infer requests. So you already created Infer requests in the previous step. # ---------------------------Step 6. Prepare input--------------------------------------------------------------------- original_image = cv2.imread(args.input) image = original_image.copy() _, _, net_h, net_w = net.input_info[input_blob].input_data.shape if image.shape[:-1] != (net_h, net_w): log.warning(f'Image {args.input} is resized from {image.shape[:-1]} to {(net_h, net_w)}') image = cv2.resize(image, (net_w, net_h)) # Change data layout from HWC to CHW image = image.transpose((2, 0, 1)) # Add N dimension to transform to NCHW image = np.expand_dims(image, axis=0) # ---------------------------Step 7. Do inference---------------------------------------------------------------------- log.info('Starting inference in synchronous mode') res = exec_net.infer(inputs={input_blob: image}) # ---------------------------Step 8. Process output-------------------------------------------------------------------- # Generate a label list if args.labels: with open(args.labels, 'r') as f: labels = [line.split(',')[0].strip() for line in f] output_image = original_image.copy() h, w, _ = output_image.shape if len(net.outputs) == 1: res = res[output_blob] # Change a shape of a numpy.ndarray with results ([1, 1, N, 7]) to get another one ([N, 7]), # where N is the number of detected bounding boxes detections = res.reshape(-1, 7) else: detections = res['boxes'] labels = res['labels'] # Redefine scale coefficients w, h = w / net_w, h / net_h for i, detection in enumerate(detections): if len(net.outputs) == 1: _, class_id, confidence, xmin, ymin, xmax, ymax = detection else: class_id = labels[i] xmin, ymin, xmax, ymax, confidence = detection if confidence > 0.5: label = int(labels[class_id]) if args.labels else int(class_id) xmin = int(xmin * w) ymin = int(ymin * h) xmax = int(xmax * w) ymax = int(ymax * h) log.info(f'Found: label = {label}, confidence = {confidence:.2f}, ' f'coords = ({xmin}, {ymin}), ({xmax}, {ymax})') # Draw a bounding box on a output image cv2.rectangle(output_image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) cv2.imwrite('out.bmp', output_image) if os.path.exists('out.bmp'): log.info('Image out.bmp created!') else: log.error('Image out.bmp was not created. Check your permissions.') # ---------------------------------------------------------------------------------------------------------------------- log.info('This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool\n') return 0
class Model_FacialLandmarksDetection: ''' Class for the Facial Landmark Detection Model. ''' def __init__(self, model_name, device='CPU', extension=None): self.net = None self.net_plug = None self.inp_name = None self.out_name = None self.inp_shape = None self.out_shape = None self.model = model_name self.device = device self.ext = extension self.weights = self.model.split('.')[0] + '.bin' def load_model(self, plugin=None): if not plugin: self.plugin = IECore() else: self.plugin = plugin self.net = IENetwork(model=self.model, weights=self.weights) self.net_plug = self.plugin.load_network(network=self.net, device_name=self.device, num_requests=1) self.inp_name = next(iter(self.net.inputs)) self.out_name = next(iter(self.net.outputs)) self.inp_shape = self.net.inputs[self.inp_name].shape def predict(self, frame): processed_frame = self.preprocess_input(frame.copy()) out_img = self.net_plug.infer({self.inp_name: processed_frame}) out_img = self.preprocess_output(out_img) ht = frame.shape[0] wd = frame.shape[1] out_img = out_img * np.array([wd, ht, wd, ht]) out_img = out_img.astype(np.int32) lxmin = out_img[0] - 15 lymin = out_img[1] - 15 lxmax = out_img[0] + 15 lymax = out_img[1] + 15 rxmin = out_img[2] - 15 rymin = out_img[3] - 15 rxmax = out_img[2] + 15 rymax = out_img[3] + 15 l = frame[lymin:lymax, lxmin:lxmax] r = frame[rymin:rymax, rxmin:rxmax] eye_dim = [[lxmin, lymin, lxmax, lymax], [rxmin, rymin, rxmax, rymax]] return l, r, eye_dim def check_model(self): pass def preprocess_input(self, frame): h = self.inp_shape[2] w = self.inp_shape[3] reshaped_frame = cv2.resize(frame, (w, h)) reshaped_frame = reshaped_frame.transpose((2, 0, 1)) reshaped_frame = reshaped_frame.reshape(1, 3, h, w) return reshaped_frame def preprocess_output(self, out): cell = out[self.out_name][0] return (cell[0][0][0], cell[1][0][0], cell[2][0][0], cell[3][0][0])
def main(): path = os.getcwd() print("Welcome to Blindspot Assistance") log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() model_xml = args.model model_bin = os.path.splitext(model_xml)[0] + ".bin" log.info("Creating Inference Engine...") ie = IECore() if args.cpu_threads: ie.set_config({'CPU_THREADS_NUM': args.cpu_threads}, args.device) if args.cpu_extension and 'CPU' in args.device: ie.add_extension(args.cpu_extension, "CPU") # Read IR log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin)) net = IENetwork(model=model_xml, weights=model_bin) if "CPU" in args.device: supported_layers = ie.query_network(net, "CPU") not_supported_layers = [ l for l in net.layers.keys() if l not in supported_layers ] if len(not_supported_layers) != 0: log.error( "Following layers are not supported by the plugin for specified device {}:\n {}" .format(args.device, ', '.join(not_supported_layers))) log.error( "Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) img_info_input_blob = None feed_dict = {} for blob_name in net.inputs: if len(net.inputs[blob_name].shape) == 4: input_blob = blob_name elif len(net.inputs[blob_name].shape) == 2: img_info_input_blob = blob_name else: raise RuntimeError( "Unsupported {}D input layer '{}'. Only 2D and 4D input layers are supported" .format(len(net.inputs[blob_name].shape), blob_name)) assert len(net.outputs) == 1, "Demo supports only single output topologies" out_blob = next(iter(net.outputs)) log.info("Loading IR to the plugin...") exec_net = ie.load_network(network=net, num_requests=2, device_name=args.device) # Read and pre-process input image n, c, h, w = net.inputs[input_blob].shape if img_info_input_blob: feed_dict[img_info_input_blob] = [h, w, 1] if args.input == 'cam': input_stream = 0 else: input_stream = args.input assert os.path.isfile(args.input), "Specified input file doesn't exist" if args.labels: with open(args.labels, 'r') as f: labels_map = [x.strip() for x in f] else: labels_map = None cap = cv2.VideoCapture(input_stream) if args.output: FILE_OUTPUT = args.output if os.path.isfile(FILE_OUTPUT): os.remove(FILE_OUTPUT) fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') fps = cap.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(FILE_OUTPUT, fourcc, fps, (int(cap.get(3)), int(cap.get(4)))) cur_request_id = 0 next_request_id = 1 log.info("Starting inference in async mode...") is_async_mode = True render_time = 0 ret, frame = cap.read() roi = [0, 0, int(cap.get(3) * 0.25), int(cap.get(4))] # ROI: Autoselected 15% of the left print( "To close the application, press 'CTRL+C' here or switch to the output window and press ESC key" ) print( "To switch between sync/async modes, press TAB key in the output window" ) object_time = 0 alarm = False object_detected = False while cap.isOpened(): if is_async_mode: ret, next_frame = cap.read() else: ret, frame = cap.read() if not ret: break initial_w = cap.get(3) initial_h = cap.get(4) # Selected rectangle overlay overlay = frame.copy() cv2.rectangle(overlay, (roi[0], roi[1]), (roi[0] + roi[2], roi[1] + roi[3]), (0, 0, 0), -1) # A filled rectangle alpha = 0.3 # Transparency factor. cv2.addWeighted( overlay, alpha, frame, 1 - alpha, 0, frame ) # Following line overlays transparent rectangle over the image # Main sync point: # in the truly Async mode we start the NEXT infer request, while waiting for the CURRENT to complete # in the regular mode we start the CURRENT request and immediately wait for it's completion inf_start = time.time() if is_async_mode: in_frame = cv2.resize(next_frame, (w, h)) in_frame = in_frame.transpose( (2, 0, 1)) # Change data layout from HWC to CHW in_frame = in_frame.reshape((n, c, h, w)) feed_dict[input_blob] = in_frame exec_net.start_async(request_id=next_request_id, inputs=feed_dict) else: in_frame = cv2.resize(frame, (w, h)) in_frame = in_frame.transpose( (2, 0, 1)) # Change data layout from HWC to CHW in_frame = in_frame.reshape((n, c, h, w)) feed_dict[input_blob] = in_frame exec_net.start_async(request_id=cur_request_id, inputs=feed_dict) if exec_net.requests[cur_request_id].wait(-1) == 0: inf_end = time.time() det_time = inf_end - inf_start # Parse detection results of the current request # output_blob = [image_id, label, conf, x_min, y_min, x_max, y_max] res = exec_net.requests[cur_request_id].outputs[out_blob] for obj in res[0][0]: # Draw only objects when probability more than specified threshold if obj[2] > args.prob_threshold: xmin = int(obj[3] * initial_w) ymin = int(obj[4] * initial_h) xmax = int(obj[5] * initial_w) ymax = int(obj[6] * initial_h) class_id = int(obj[1]) # Draw box and label\class_id if (class_id == 1): color = (0, 255, 0) else: color = (255, 0, 0) #color = (min(class_id * 12.5, 255), min(class_id * 7, 255), min(class_id * 5, 255)) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 1) det_label = labels_map[class_id] if labels_map else str( switch_class(class_id)) cv2.putText( frame, det_label + ' ' + str(round(obj[2] * 100, 1)) + ' %', (xmin, ymin - 7), cv2.FONT_HERSHEY_COMPLEX, 0.5, color, 1) if (xmin > roi[0] and xmin < roi[0] + roi[2]) or ( xmax > roi[0] and xmax < roi[0] + roi[2]) or ( xmin < roi[0] and xmax > roi[0] + roi[2]): if (ymin > roi[1] and ymin < roi[1] + roi[3]) or ( ymax > roi[1] and ymax < roi[1] + roi[3]) or ( ymin < roi[1] and ymax > roi[1] + roi[3]): object_detected = True last_object = str(switch_class(class_id)) if object_detected: object_time = time.time() object_detected = False alarm = True else: if (time.time() - object_time > 2): alarm = False if alarm: cv2.circle(frame, (25, 50), 10, (0, 0, 255), -1) cv2.putText(frame, "Last object detected: " + last_object, (40, 55), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 255), 1) else: cv2.circle(frame, (25, 50), 10, (0, 255, 0), -1) cv2.putText(frame, "Nothing detected", (40, 55), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 255, 0), 1) # Draw performance stats inf_time_message = "Inference time: N\A for async mode" if is_async_mode else \ "Inference time: {:.3f} ms".format(det_time * 1000) render_time_message = "OpenCV rendering time: {:.3f} ms".format( render_time * 1000) async_mode_message = "Async mode is on. Processing request {}".format(cur_request_id) if is_async_mode else \ "Async mode is off. Processing request {}".format(cur_request_id) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) cv2.putText(frame, render_time_message, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1) cv2.putText(frame, async_mode_message, (10, int(initial_h - 20)), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1) render_start = time.time() if args.output: out.write(frame) if not args.hide_output: cv2.imshow("Detection Results", frame) render_end = time.time() render_time = render_end - render_start if is_async_mode: cur_request_id, next_request_id = next_request_id, cur_request_id frame = next_frame key = cv2.waitKey(1) if key == ord('l'): showCrosshair = False fromCenter = True roi = cv2.selectROI("Detection Results", frame, fromCenter, showCrosshair) if key == 27: break if (9 == key): is_async_mode = not is_async_mode log.info("Switched to {} mode".format( "async" if is_async_mode else "sync")) cv2.destroyAllWindows()
class HeadPoseEstimationModel: ''' Class for the Face Detection Model. ''' def __init__(self, model_name, device='CPU', extensions=None): ''' TODO: Use this to set your instance variables. ''' self.model_name = model_name self.device = device self.extensions = extensions self.model_structure = self.model_name self.model_weights = self.model_name.split(".")[0]+'.bin' self.plugin = None self.network = None self.exec_net = None self.input_name = None self.input_shape = None self.output_names = None def load_model(self): ''' TODO: You will need to complete this method. This method is for loading the model to the device specified by the user. If your model requires any Plugins, this is where you can load them. ''' self.plugin = IECore() self.network = self.plugin.read_network(model=self.model_structure, weights=self.model_weights) supported_layers = self.plugin.query_network(network=self.network, device_name=self.device) unsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers] if len(unsupported_layers)!=0 and self.device=='CPU': print("unsupported layers found:{}".format(unsupported_layers)) if not self.extensions==None: print("Adding CPU Extension") self.plugin.add_extension(self.extensions, self.device) supported_layers = self.plugin.query_network(network = self.network, device_name=self.device) unsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers] if len(unsupported_layers)!=0: print("Extension was Ineffective") exit(1) print("CPU Extension Successful") else: print("CPU Extension Required") exit(1) self.exec_net = self.plugin.load_network(network=self.network, device_name=self.device,num_requests=1) self.input_name = next(iter(self.network.inputs)) self.input_shape = self.network.inputs[self.input_name].shape self.output_names = [i for i in self.network.outputs.keys()] def predict(self, image): ''' TODO: You will need to complete this method. This method is meant for running predictions on the input image. ''' processed_img = self.preprocess_input(image.copy()) outputs = self.exec_net.infer({self.input_name:processed_img}) finalOutput = self.preprocess_output(outputs) return finalOutput def check_model(self): supported_layers = self.plugin.query_network(network=self.network, device_name=self.device) unsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers] if len(unsupported_layers)!=0 and self.device=='CPU': print("unsupported layers found:{}".format(unsupported_layers)) if not self.extensions==None: print("Added CPU Extension") self.plugin.add_extension(self.extensions, self.device) supported_layers = self.plugin.query_network(network = self.network, device_name=self.device) unsupported_layers = [l for l in self.network.layers.keys() if l not in supported_layers] if len(unsupported_layers)!=0: print("CPU Extension Ineffective") exit(1) print("CPU Extension Succesful") else: print("CPU Extension Required") exit(1) def preprocess_input(self, image): ''' Before feeding the data into the model for inference, you might have to preprocess it. This function is where you can do that. ''' resized_img = cv2.resize(image, (self.input_shape[3], self.input_shape[2])) processed_img = np.transpose(np.expand_dims(resized_img,axis=0), (0,3,1,2)) return processed_img def preprocess_output(self, outputs): ''' Before feeding the output of this model to the next model, you might have to preprocess the output. This function is where you can do that. ''' outs = [] outs.append(outputs['angle_y_fc'].tolist()[0][0]) outs.append(outputs['angle_p_fc'].tolist()[0][0]) outs.append(outputs['angle_r_fc'].tolist()[0][0]) return outs
def main(): # log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() model_xml = args.model model_bin = os.path.splitext(model_xml)[0] + ".bin" trfm = transforms.Compose([ transforms.Lambda(lambd=cut_pil_image), transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean, std) ]) # Plugin initialization for specified device and load extensions library if specified print("Creating Inference Engine") ie = IECore() if args.cpu_extension and 'CPU' in args.device: ie.add_extension(args.cpu_extension, "CPU") # Read IR print("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin)) net = IENetwork(model=model_xml, weights=model_bin) if "CPU" in args.device: supported_layers = ie.query_network(net, "CPU") not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers] if len(not_supported_layers) != 0: print("Following layers are not supported by the plugin for specified device {}:\n {}". format(args.device, ', '.join(not_supported_layers))) print("Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) assert len(net.inputs.keys()) == 1, "Sample supports only single input topologies" assert len(net.outputs) == 1, "Sample supports only single output topologies" print("Preparing input blobs") input_blob = next(iter(net.inputs)) out_blob = next(iter(net.outputs)) # Read and pre-process input images n, c, h, w = net.inputs[input_blob].shape print(f'input {input_blob}: {net.inputs[input_blob].shape}') print(f'input {out_blob}: {net.outputs[out_blob].shape}') bs, ots = net.outputs[out_blob].shape print(f'read from:{args.input}') # print(f'labels:{labels}') # images = [os.path.join(a, b) for a, b in zip(*make_list_of_files(args.input))] print("Loading model to the plugin") exec_net = ie.load_network(network=net, device_name=args.device) attributes = AttributesDataset(args.attributes_file) test_dataset = CSVDataset(annotation_path=args.attributes_file, images_dir=args.images_dir, attributes=attributes, transform=trfm) test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=1) device = torch.device("cuda" if torch.cuda.is_available() and args.device == 'cuda' else "cpu") model = MultiOutputModel(trained_labels=attributes.fld_names, attrbts=attributes).to(device) statedict = torch.load(args.checkpoint, map_location='cuda') model.load_state_dict(statedict) model.eval() with torch.no_grad(): for image in test_dataloader: img = cv2.imread(image['img_path'][0]) print(f'image shape={img.shape}: {image["img_path"][0]}') cv2.imshow('xxxx', img) img = cv2.resize(img, (w, h), interpolation=cv2.INTER_CUBIC) img = img.transpose((2, 0, 1)) # Change data layout from HWC to CHW res = exec_net.infer(inputs={input_blob: img})[out_blob] res2 = model(image['img'].to(device)) for il, (v, v2) in enumerate(zip(res[0], res2['label'][0])): l = attributes.labels_id_to_name["label"][il] print(f'{il} {l:.<18} {v:+.4f} ... {v2:+.4f}') k = cv2.waitKey(0) if k == ord('q'): exit()
def main(): log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout) args = parse_args() # ---------------------------Step 1. Initialize inference engine core-------------------------------------------------- log.info('Creating Inference Engine') ie = IECore() # ---------------------------Step 2. Read a model in OpenVINO Intermediate Representation------------------------------ log.info( f'Loading the network using ngraph function with weights from {args.model}' ) ngraph_function = create_ngraph_function(args) net = IENetwork(ngraph.impl.Function.to_capsule(ngraph_function)) # ---------------------------Step 3. Configure input & output---------------------------------------------------------- log.info('Configuring input and output blobs') # Get names of input and output blobs input_blob = next(iter(net.input_info)) out_blob = next(iter(net.outputs)) # Set input and output precision manually net.input_info[input_blob].precision = 'U8' net.outputs[out_blob].precision = 'FP32' # Set a batch size to a equal number of input images net.batch_size = len(args.input) # ---------------------------Step 4. Loading model to the device------------------------------------------------------- log.info('Loading the model to the plugin') exec_net = ie.load_network(network=net, device_name=args.device) # ---------------------------Step 5. Create infer request-------------------------------------------------------------- # load_network() method of the IECore class with a specified number of requests (default 1) returns an ExecutableNetwork # instance which stores infer requests. So you already created Infer requests in the previous step. # ---------------------------Step 6. Prepare input--------------------------------------------------------------------- n, c, h, w = net.input_info[input_blob].input_data.shape input_data = np.ndarray(shape=(n, c, h, w)) for i in range(n): image = read_image(args.input[i]) light_pixel_count = np.count_nonzero(image > 127) darK_pixel_count = np.count_nonzero(image < 127) is_light_image = (light_pixel_count - darK_pixel_count) > 0 if is_light_image: log.warning( f'Image {args.input[i]} is inverted to white over black') image = cv2.bitwise_not(image) if image.shape != (h, w): log.warning( f'Image {args.input[i]} is resized from {image.shape} to {(h, w)}' ) image = cv2.resize(image, (w, h)) input_data[i] = image # ---------------------------Step 7. Do inference---------------------------------------------------------------------- log.info('Starting inference in synchronous mode') res = exec_net.infer(inputs={input_blob: input_data}) # ---------------------------Step 8. Process output-------------------------------------------------------------------- # Generate a label list if args.labels: with open(args.labels, 'r') as f: labels = [line.split(',')[0].strip() for line in f] res = res[out_blob] for i in range(n): probs = res[i] # Get an array of args.number_top class IDs in descending order of probability top_n_idexes = np.argsort(probs)[-args.number_top:][::-1] header = 'classid probability' header = header + ' label' if args.labels else header log.info(f'Image path: {args.input[i]}') log.info(f'Top {args.number_top} results: ') log.info(header) log.info('-' * len(header)) for class_id in top_n_idexes: probability_indent = ' ' * (len('classid') - len(str(class_id)) + 1) label_indent = ' ' * (len('probability') - 8) if args.labels else '' label = labels[class_id] if args.labels else '' log.info( f'{class_id}{probability_indent}{probs[class_id]:.7f}{label_indent}{label}' ) log.info('') # ---------------------------------------------------------------------------------------------------------------------- log.info( 'This sample is an API example, ' 'for any performance measurements please use the dedicated benchmark_app tool\n' ) return 0
class GazeEstimationModel: ''' Class for defining GazeEstimation Model and Attributes. - ''' def __init__(self, model_name, threshold, device='CPU', extensions=None, async_mode=True, plugin=None): ''' TODO: Use this to set your instance variables. ''' self.plugin = None self.network = None self.input_blob = None self.output_blob = None self.out_shape = None self.exec_network = None self.threshold = threshold self.device = device self.async_mode = async_mode self.infer_request = None self.net_plugin = None self.net = None self.model_xml = model_name self.extensions = extensions def load_model(self, model_xml, gaze_angles, input_gaze_angles, cpu_extension=None): ''' TODO: load models ''' self.model_xml = model_name model_bin = os.path.splitext(model_xml)[0] + ".bin" self.device = device self.extensions = extensions # Initializing the plugins self.plugin = IECore() # Add any neccesary extensions ## if cpu_extension and "CPU" in device: self.plugin.add_extension(cpu_extension, device) # Reading the Intermediate Representation (IR) model as a IENetwork # deprecated in 2020 version self.network = self.plugin.read_network(model=model_xml, weights=model_bin) self.check_plugin(self.plugin) ## check for supported layer supported_layers = self.plugin.query_network(network=self.network, device_name=device) ## check for unsupported layers unsupported_layers = [ l for l in self.network.layers.keys() if l not in self.plugin.get_supported_layers(self.network) ] if len(unsupported_layers) != 0: print("Unsupported layers found: {}".format(unsupported_layers)) print("Please check for supported extensions.") exit(1) # Loading the IENetwork into the plugin self.exec_network = self.plugin.load_network(self.network, device) # Get the input layer self.input_gaze_angles = self.network.inputs['gaze_angles'] # print(self.input_pose_angles) self.output_blob = next(iter(self.network.outputs)) self.out_shape = self.network.outputs[self.output_blob].shape logging.info("Model Gaze Estimation output shape printed : ", self.out_shape) return def predict(self, l_eye_img, r_eye_img, target_gaze, img_frame, width, height): ''' TODO: The accuracy of gaze direction prediction is evaluated through the use of "mean absolute error (MAE)" of the angle (in degrees) between the ground truth and predicted gaze direction. Input_blob Blob in the format [BxCxHxW] where B = batch size C = number of channels H = image height W = image width with the name right_eye_image and the shape[1x3x60x60] Blob in the format [BxC] where: B = batch size C = number of channels with the name head_pose_angles and the shape[1x3] outputs_blob The net outputs a blob with the shape: [1x3], containing cartesian coordinates of gaze direction vector. Please note that output vector is not normalized and has non-unit length. Output layer name in INference Engine format: gaze_vector Ref: https://docs.openvinotoolkit.org/latest/omz_models_intel_gaze_estimation_adas_0002_description_gaze_estimation_adas_0002.html ''' ## for left and right eye image and shape tally = 0 values = None width = l_eye_img.shape[1] height = l_eye_img.shape[0] l_eye_img, r_eye_img = self.preprocess_input(l_eye_img, r_eye_img) # perform inference on image shape #ref: https://github.com/gauravshelangia/computer-pointer-controller/blob/master/src/facial_landmark_detection.py if self.async_mode: self.exec_network.requests[0].async_infer( inputs={ "gaze_angles": target_gaze, "l_eye_img": l_eye_img, "r_eye_img": r_eye_img }) else: self.exec_network.requests[0].infer( inputs={ "gaze_angles": target_gaze, "l_eye_img": l_eye_img, "r_eye_img": r_eye_img }) if self.exec_network.requests[0].wait(-1) == 0: outputs = self.exec_network.requests[0].outputs[self.output_blob] vout = self.preprocess_output(l_eye_img, r_eye_img, target_gaze, outputs) return vout def preprocess_input(self, l_eye_img, r_eye_img): ''' TODO: You will need to complete this method. Here I preprocess the data before feeding the data into the model for inference. ''' # left eye input shape [1,3,60,60] l_eye_img = cv2.resize(l_eye_img, (60, 60)) l_eye_img = l_eye_img.transpose((2, 0, 1)) l_eye_img = l_eye_img.reshape((1, 3, 60, 60)) # and right eye input shape[1,3,60,60] r_eye_img = cv2.resize(r_eye_img, (60, 60)) r_eye_img = r_eye_img.transpose((2, 0, 1)) r_eye_img = r_eye_img.reshape((1, 3, 60, 60)) return img_frame, l_eye_img, r_eye_img def preprocess_output(self, l_eye_img, r_eye_img, outputs, target_gaze): ''' TODO: You will need to complete this method. Here I preprocess the model before feeding the output of this model to the next model. ''' # ref source code: # Ref: https://knowledge.udacity.com/questions/254779 gaze_vector = outputs[0] roll = gaze_vector[2] #pose_angles[0][2][0] gaze_vector = gaze_vector / np.linalg.norm(gaze_vector) cs = math.cos(roll * math.pi / 180.0) sn = math.sin(roll * math.pi / 180.0) tmpX = gaze_vector[0] * cs + gaze_vector[1] * sn tmpY = -gaze_vector[0] * sn + gaze_vector[1] * cs return (tmpX, tmpY), (gaze_vector) # raise NotImplementedError def clean(self): """ This function deletes all the open instances :return: None """ del self.plugin del self.network del self.exec_network del self.net del self.device
def draw_inference_from_image(self): log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) # Plugin initialization for specified device and load extensions library if specified log.info("Creating Inference Engine") ie = IECore() if self.extention_lib_path and 'CPU' in self.device: ie.add_extension(self.extention_lib_path, "CPU") # Read IR log.info("Loading network files:\n\t{}\n\t{}".format(self.model_xml, self.model_path)) net = IENetwork(model=self.model_xml, weights=self.model_path) if "CPU" in self.device: supported_layers = ie.query_network(net, "CPU") not_supported_layers = [l for l in net.layers.keys() if l not in supported_layers] if len(not_supported_layers) != 0: log.error("Following layers are not supported by the plugin for specified device {}:\n {}". format(self.device, ', '.join(not_supported_layers))) log.error("Please try to specify cpu extensions library path in config") sys.exit(1) assert len(net.inputs.keys()) == 1, "Sample supports only single input topologies" assert len(net.outputs) == 1, "Sample supports only single output topologies" log.info("Preparing input blobs") input_blob = next(iter(net.inputs)) out_blob = next(iter(net.outputs)) net.batch_size = 1 # Read and pre-process input images n, c, h, w = net.inputs[input_blob].shape image = cv2.imread(self.input_stream) initial_h, initial_w = image.shape[:2] if image.shape[:-1] != (h, w): log.warning("Image {} is resized from {} to {}".format(self.input_stream, image.shape[:-1], (h, w))) input_image = cv2.resize(image, (w, h)) input_image = input_image.transpose((2, 0, 1)) # Change data layout from HWC to CHW # Loading model to the plugin log.info("Loading model to the plugin") exec_net = ie.load_network(network=net, device_name=self.device) if self.labels: with open(self.labels, 'r') as f: labels_map = [x.strip() for x in f] else: labels_map = None # Start sync inference log.info("Starting inference in synchronous mode") res = exec_net.infer(inputs={input_blob: input_image}) # Processing output blob log.info("Processing output blob") res = res[out_blob] detections = list() for obj in res[0][0]: if obj[2] > self.prob_thresh: detection_data = dict() xmin = int(obj[3] * initial_w) ymin = int(obj[4] * initial_h) xmax = int(obj[5] * initial_w) ymax = int(obj[6] * initial_h) class_id = int(obj[1]) detection_data['class'] = class_id detection_data['bbox'] = [(xmin, ymin), (xmax, ymax)] detections.append(detection_data) # cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2) # Draw box and label\class_id color = (min(class_id * 12.5, 255), min(class_id * 7, 255), min(class_id * 5, 255)) cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 2) det_label = labels_map[class_id] if labels_map else str(class_id) cv2.putText(image, det_label + ' ' + str(round(obj[2] * 100, 1)) + ' %', (xmin, ymin - 7), cv2.FONT_HERSHEY_COMPLEX, 0.6, color, 1) # comment next two lines to stop rendering detection result cv2.imshow("Detection Result(s)", image) cv2.waitKey(0) return detections
class Model_HeadPose: ''' Class for the Head Pose Estimation Model. ''' def __init__(self, model_name, device='CPU', extensions=None): self.model_weights = model_name + '.bin' self.model_structure = model_name + '.xml' self.device = device self.extensions = extensions # self.check_model() # try: # self.input_name = next(iter(self.model.inputs)) # self.input_shape = self.model.inputs[self.input_name].shape # self.output_name = next(iter(self.model.outputs)) # self.output_shape = self.model.outputs[self.output_name].shape # print('Initialise.. completed.') # except Exception as e: # raise ValueError('Something is wrong with input and output values..') def load_model(self): ''' This method is for loading the model to the device specified by the user. If your model requires any Plugins, this is where you can load them. ''' try: print('Model is loading...') self.core = IECore() self.net = self.core.read_network(model=self.model_structure, weights=self.model_weights) supported = self.core.query_network(self.net, self.device) not_supported = [ layer for layer in self.net.layers.keys() if layer not in supported ] if len(not_supported) != 0 and self.device == 'CPU': print('Unsuported', not_supported) if not self.extensions == None: print('***Quick fix.\n ~CPU Extension added') self.core.add_extension(self.extensions, device) supported = self.core.query_network(self.net, self.device) not_supported = [ layer for layer in self.net.layers.keys() if layer not in supported ] if len(not_supported) == 0: print('***Quick fix, Failed.') else: print('Check the extension path.') self.net_exec = self.core.load_network(network=self.net, device_name=self.device) except Exception as e: raise ('Something is wrong.. ~debug load model~') try: self.input_name = next(iter(self.net.inputs)) self.input_shape = self.net.inputs[self.input_name].shape self.output_name = next(iter(self.net.outputs)) self.output_shape = self.net.outputs[self.output_name].shape print('Initialise.. completed.') except Exception as e: raise ValueError( 'Something is wrong with input and output values..') def predict(self, image): ''' This method is meant for running predictions on the input image. ''' self.image = image print('HeadPose predict..') pre_image = self.preprocess_input(self.image) input_name = self.input_name input_dict = {input_name: pre_image} # infer = self.net_exec.start_async(request_id=0, inputs=input_dict) # status = infer.wait() results = self.net_exec.infer(input_dict) outputs = self.preprocess_output(results) # if status == 0: # results = infer.outputs[self.output_name] # print(results) # print(self.input_name) # outputs = self.preprocess_output(results) return outputs def check_model(self): ''' Check - initialise the model ''' try: self.model = IENetwork(self.model_structure, self.model_weights) except Exception as e: raise ValueError( "Could not Initialise the network. Have you enterred the correct model path?" ) def preprocess_input(self, image): ''' An input image in [1xCxHxW] format. B - batch size C - number of channels H - image height W - image width ''' image = cv2.resize(image, (self.input_shape[3], self.input_shape[2])) image = image.transpose((2, 0, 1)) image = image.reshape(1, *image.shape) return image def preprocess_output(self, outputs): ''' Output layer names in Inference Engine format: name: "angle_y_fc", shape: [1, 1] - Estimated yaw (in degrees). name: "angle_p_fc", shape: [1, 1] - Estimated pitch (in degrees). name: "angle_r_fc", shape: [1, 1] - Estimated roll (in degrees). ''' object_list = [] print('PreOutput-headpose..') # print(outputs) object_list.append(outputs['angle_y_fc'].tolist()[0][0]) object_list.append(outputs['angle_p_fc'].tolist()[0][0]) object_list.append(outputs['angle_r_fc'].tolist()[0][0]) return object_list
class PoseDetector: ''' Class for the Face Detection Model. ''' def __init__( self, model_name='models/intel/head-pose-estimation-adas-0001/FP32-INT8/head-pose-estimation-adas-0001', device='CPU', extensions=None): self.model_name = model_name self.model_w = model_name + '.bin' self.model_s = model_name + '.xml' self.device = device self.extension = extensions self.inference_results = None self.pre_image = None self.post_image = None self.network = None self.input_name = None self.input_shape = None self.output_shape = None self.output_name = None self.plugin = None self.exec_network = None def load_model(self): self.plugin = IECore() log.info("Attempting to load network for model:") log.info(self.model_name) self.network = self.plugin.read_network(model=self.model_s, weights=self.model_w) self.exec_network = self.plugin.load_network(self.network, self.device) self.input_name = next(iter(self.network.inputs)) self.output_name = next(iter(self.network.outputs)) self.input_shape = self.network.inputs[self.input_name].shape self.output_shape = self.network.outputs[self.output_name].shape def predict(self, image): self.pre_image = self.preprocess_input(image) self.inference_results = self.exec_network.infer( {self.input_name: self.pre_image}) pose = self.preprocess_output(self.inference_results, image) return pose def check_model(self): self.plugin = IECore() log.info("Checking model layers for model:") log.info(self.model_name) self.network = self.plugin.read_network(model=self.model_s, weights=self.model_w) # double check supported network layers # code taken from Project-01 (ND131) if "CPU" in self.device: supported_layers = self.plugin.query_network(self.network, "CPU") not_supported_layers = [ l for l in self.network.layers.keys() if l not in supported_layers ] if len(not_supported_layers) != 0: log.error( "Following layers are not supported by the plugin for specified device {}:\n {}" .format(self.device, ', '.join(not_supported_layers))) log.error( "Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") versions = self.plugin.get_versions(self.device) log.info("{}{}".format(" " * 8, self.device)) log.info("{}MKLDNNPlugin version ......... {}.{}".format( " " * 8, versions[self.device].major, versions[self.device].minor)) log.info("{}Build ........... {}".format( " " * 8, versions[self.device].build_number)) def preprocess_input(self, image): post_frame = cv2.resize(image, (self.input_shape[3], self.input_shape[2])) post_frame = post_frame.transpose((2, 0, 1)) post_frame = post_frame.reshape(1, *post_frame.shape) input_width = post_frame.shape[1] input_height = post_frame.shape[0] log.debug("input width x height: %d x %d", self.input_shape[3], self.input_shape[2]) return post_frame def preprocess_output(self, outputs, image): width = image.shape[1] height = image.shape[0] pose = [] pose.append(outputs['angle_y_fc'].tolist()[0][0]) pose.append(outputs['angle_p_fc'].tolist()[0][0]) pose.append(outputs['angle_r_fc'].tolist()[0][0]) return pose