class FacialLandmarks: ''' Class for the Face Detection Model. ''' def __init__(self, model_name, device='CPU', extensions=None): ''' TODO: Use this to set your instance variables. ''' self.model_bin = model_name + '.bin' self.model_xml = model_name + '.xml' self.device = device self.cpu_extension = extensions try: self.ie_plugin = IECore() self.model = self.ie_plugin.read_network(model=self.model_xml, weights=self.model_bin) except Exception: raise ValueError( "Could not Initialise the network. Have you enterred the correct model path?" ) self.input_name = next(iter(self.model.inputs)) self.input_shape = self.model.inputs[self.input_name].shape self.output_name = next(iter(self.model.outputs)) def load_model(self): ''' TODO: You will need to complete this method. This method is for loading the model to the device specified by the user. If your model requires any Plugins, this is where you can load them. ''' # Add CPU extension to IECore if self.cpu_extension and 'CPU' in self.device: log.info('Adding CPU extension:\n\t{}'.format(self.cpu_extension)) self.ie_plugin.add_extension(self.cpu_extension, self.device) # Check layers log.info('Current device specified: {}'.format(self.device)) log.info("Checking for unsupported layers...") supported_layers = self.ie_plugin.query_network( network=self.model, device_name=self.device) unsupported_layers = [ l for l in self.model.layers.keys() if l not in supported_layers ] if len(unsupported_layers) != 0: log.error('These layers are unsupported:\n{}'.format( ', '.join(unsupported_layers))) log.error( 'Specify an available extension to add to IECore from the command line using "-l"' ) exit(1) else: log.info('All layers are supported!') # Load the model network into IECore self.exec_network = self.ie_plugin.load_network( self.model, self.device) log.info("IR Model has been successfully loaded into IECore") return self.exec_network def predict(self, image): ''' TODO: You will need to complete this method. This method is meant for running predictions on the input image. ''' p_image = self.preprocess_input(image) self.exec_network.start_async(0, {self.input_name: p_image}) if self.wait() == 0: outputs = self.get_outputs() return self.preprocess_output(outputs, image) def preprocess_input(self, image): ''' Before feeding the data into the model for inference, you might have to preprocess it. This function is where you can do that. ''' required_width = self.input_shape[2] required_height = self.input_shape[3] dimension = (required_height, required_width) image = cv2.resize(image, dimension) image = image.transpose((2, 0, 1)) image = image.reshape(1, *image.shape) return image def wait(self): status = self.exec_network.requests[0].wait(-1) return status def get_outputs(self): return self.exec_network.requests[0].outputs[self.output_name] def preprocess_output(self, outputs, image): ''' Before feeding the output of this model to the next model, you might have to preprocess the output. This function is where you can do that. ''' # denormalize detections xl = int(outputs[0][0][0] * image.shape[1]) yl = int(outputs[0][1][0] * image.shape[0]) xr = int(outputs[0][2][0] * image.shape[1]) yr = int(outputs[0][3][0] * image.shape[0]) # include offset for left eye xlmin = xl - 15 ylmin = yl - 15 xlmax = xl + 15 ylmax = yl + 15 # include offset for right eye xrmin = xr - 15 yrmin = yr - 15 xrmax = xr + 15 yrmax = yr + 15 coords = [] coords.append((xlmin, ylmin)) coords.append((xlmax, ylmax)) coords.append((xrmin, yrmin)) coords.append((xrmax, yrmax)) # crop eyes eye_l = image[ylmin:ylmax, xlmin:xlmax] eye_r = image[yrmin:yrmax, xrmin:xrmax] return eye_l, eye_r, coords
class HeadPoseEstimation: ''' Class for the Head Pose Estimation Model. ''' def __init__(self, model_name, device='CPU', extensions=None): ''' instantiating the necessary variables. ''' self.model_weights = model_name + '.bin' self.model_structure = model_name + '.xml' self.device = device self.extensions = extensions self.exec_net = None self.input_name = None self.input_shape = None self.output_name = None self.output_shape = None def load_model(self): ''' This method is for loading the model to the device specified by the user. If your model requires any Plugins, this is where you can load them. ''' self.core = IECore() self.model = self.core.read_network(self.model_structure, self.model_weights) supported_layers = self.core.query_network(network=self.model, device_name=self.device) unsupported_layers = [ l for l in self.model.layers.keys() if l not in supported_layers ] if len(unsupported_layers) != 0 and self.device == 'CPU': log.error( "Unsupported layers found: {}".format(unsupported_layers)) log.error("Check whether extensions are available to add") self.core.add_extension(self.extension, self.device) supported_layers = self.core.query_network(network=self.model, device_name=self.device) unsupported_layers = [ l for l in self.model.layers.keys() if l not in supported_layers ] if len(unsupported_layers) != 0: log.error("ERROR: Unsupported layer issue not yet resolved") exit(1) self.exec_net = self.core.load_network(network=self.model, device_name=self.device, num_requests=1) self.input_name = next(iter(self.model.inputs)) self.input_shape = self.model.inputs[self.input_name].shape self.output_name = next(iter(self.model.outputs)) self.output_shape = self.model.outputs[self.output_name].shape def predict(self, image): ''' This method is meant for running predictions on the input image. ''' input_img = self.preprocess_input(image.copy()) result = self.exec_net.infer({self.input_name: input_img}) outputList = self.preprocess_output(result) return outputList def check_model(self): pass def preprocess_input(self, image): ''' Before feeding the data into the model for inference, you might have to preprocess it. This function is where you can do that. ''' p_frame = cv2.resize(image, (self.input_shape[3], self.input_shape[2])) p_frame = p_frame.transpose((2, 0, 1)) p_frame = p_frame.reshape(1, *p_frame.shape) return p_frame def preprocess_output(self, outputs): ''' Before feeding the output of this model to the next model, you might have to preprocess the output. This function is where you can do that. ''' output = [] output.append(outputs['angle_y_fc'].tolist()[0][0]) output.append(outputs['angle_p_fc'].tolist()[0][0]) output.append(outputs['angle_r_fc'].tolist()[0][0]) return output
def get_net(model: str, core: IECore): model_xml = model model_bin = os.path.splitext(model_xml)[0] + ".bin" net = core.read_network(model=model_xml, weights=model_bin) return net
def test_precision_setter(): ie = IECore() net = ie.read_network(model=test_net_xml, weights=test_net_bin) net.layers['19/Fused_Add_'].out_data[0].precision = "I8" assert net.layers['19/Fused_Add_'].out_data[ 0].precision == "I8", "Incorrect precision for layer '19/Fused_Add_'"
def main(): usage() boundary_box_flag = True # Prep for face detection ie = IECore() net_det = ie.read_network(model=model_det + '.xml', weights=model_det + '.bin') input_name_det = next(iter(net_det.input_info)) # Input blob name "data" input_shape_det = net_det.input_info[ input_name_det].tensor_desc.dims # [1,3,384,672] out_name_det = next(iter( net_det.outputs)) # Output blob name "detection_out" exec_net_det = ie.load_network(network=net_det, device_name='CPU', num_requests=1) del net_det # Preparation for landmark detection net_lm = ie.read_network(model=model_lm + '.xml', weights=model_lm + '.bin') input_name_lm = next(iter(net_lm.input_info)) # Input blob name input_shape_lm = net_lm.input_info[ input_name_lm].tensor_desc.dims # [1,3,60,60] out_name_lm = next(iter( net_lm.outputs)) # Output blob name "embd/dim_red/conv" out_shape_lm = net_lm.outputs[out_name_lm].shape # 3x [1,1] exec_net_lm = ie.load_network(network=net_lm, device_name='CPU', num_requests=1) del net_lm # Preparation for headpose detection net_hp = ie.read_network(model=model_hp + '.xml', weights=model_hp + '.bin') input_name_hp = next(iter(net_hp.input_info)) # Input blob name input_shape_hp = net_hp.input_info[ input_name_hp].tensor_desc.dims # [1,3,60,60] out_name_hp = next(iter(net_hp.outputs)) # Output blob name out_shape_hp = net_hp.outputs[out_name_hp].shape # [1,70] exec_net_hp = ie.load_network(network=net_hp, device_name='CPU', num_requests=1) del net_hp # Preparation for gaze estimation net_gaze = ie.read_network(model=model_gaze + '.xml', weights=model_gaze + '.bin') input_shape_gaze = [1, 3, 60, 60] exec_net_gaze = ie.load_network(network=net_gaze, device_name='CPU') del net_gaze # Open USB webcams cam = cv2.VideoCapture(0) camx, camy = [(1920, 1080), (1280, 720), (800, 600), (480, 480)][1] # Set camera resolution [1]=1280,720 cam.set(cv2.CAP_PROP_FRAME_WIDTH, camx) cam.set(cv2.CAP_PROP_FRAME_HEIGHT, camy) laser_flag = True flip_flag = True spark_flag = True while True: ret, img = cam.read() # img won't be modified if ret == False: break if flip_flag == True: img = cv2.flip(img, 1) # flip image out_img = img.copy( ) # out_img will be drawn and modified to make an display image img1 = cv2.resize(img, (input_shape_det[_W], input_shape_det[_H])) img1 = img1.transpose((2, 0, 1)) # Change data layout from HWC to CHW img1 = img1.reshape(input_shape_det) res_det = exec_net_det.infer(inputs={input_name_det: img1}) # Detect faces gaze_lines = [] for obj in res_det[out_name_det][0][ 0]: # obj = [ image_id, label, conf, xmin, ymin, xmax, ymax ] if obj[2] > 0.75: # Confidence > 75% xmin = abs(int(obj[3] * img.shape[1])) ymin = abs(int(obj[4] * img.shape[0])) xmax = abs(int(obj[5] * img.shape[1])) ymax = abs(int(obj[6] * img.shape[0])) class_id = int(obj[1]) face = img[ymin:ymax, xmin:xmax] # Crop the face image if boundary_box_flag == True: cv2.rectangle(out_img, (xmin, ymin), (xmax, ymax), (255, 255, 0), 2) # Find facial landmarks (to find eyes) face1 = cv2.resize(face, (input_shape_lm[_W], input_shape_lm[_H])) face1 = face1.transpose((2, 0, 1)) face1 = face1.reshape(input_shape_lm) res_lm = exec_net_lm.infer(inputs={input_name_lm: face1 }) # Run landmark detection lm = res_lm[out_name_lm][0][:8].reshape( 4, 2 ) # [[left0x, left0y], [left1x, left1y], [right0x, right0y], [right1x, right1y] ] # Estimate head orientation (yaw=Y, pitch=X, role=Z) res_hp = exec_net_hp.infer( inputs={input_name_hp: face1}) # Run head pose estimation yaw = res_hp['angle_y_fc'][0][0] pitch = res_hp['angle_p_fc'][0][0] roll = res_hp['angle_r_fc'][0][0] _X = 0 _Y = 1 # Landmark position memo... lm[1] (eye) lm[0] (nose) lm[2] (eye) lm[3] eye_sizes = [ abs(int((lm[0][_X] - lm[1][_X]) * face.shape[1])), abs(int((lm[3][_X] - lm[2][_X]) * face.shape[1])) ] # eye size in the cropped face image eye_centers = [ [ int(((lm[0][_X] + lm[1][_X]) / 2 * face.shape[1])), int(((lm[0][_Y] + lm[1][_Y]) / 2 * face.shape[0])) ], [ int(((lm[3][_X] + lm[2][_X]) / 2 * face.shape[1])), int(((lm[3][_Y] + lm[2][_Y]) / 2 * face.shape[0])) ] ] # eye center coordinate in the cropped face image if eye_sizes[0] < 4 or eye_sizes[1] < 4: continue ratio = 0.7 eyes = [] for i in range(2): # Crop eye images x1 = int(eye_centers[i][_X] - eye_sizes[i] * ratio) x2 = int(eye_centers[i][_X] + eye_sizes[i] * ratio) y1 = int(eye_centers[i][_Y] - eye_sizes[i] * ratio) y2 = int(eye_centers[i][_Y] + eye_sizes[i] * ratio) eyes.append( cv2.resize(face[y1:y2, x1:x2].copy(), (input_shape_gaze[_W], input_shape_gaze[_H]))) # crop and resize # Draw eye boundary boxes if boundary_box_flag == True: cv2.rectangle(out_img, (x1 + xmin, y1 + ymin), (x2 + xmin, y2 + ymin), (0, 255, 0), 2) # rotate eyes around Z axis to keep them level if roll != 0.: rotMat = cv2.getRotationMatrix2D( (int(input_shape_gaze[_W] / 2), int(input_shape_gaze[_H] / 2)), roll, 1.0) eyes[i] = cv2.warpAffine( eyes[i], rotMat, (input_shape_gaze[_W], input_shape_gaze[_H]), flags=cv2.INTER_LINEAR) eyes[i] = eyes[i].transpose( (2, 0, 1)) # Change data layout from HWC to CHW eyes[i] = eyes[i].reshape((1, 3, 60, 60)) hp_angle = [yaw, pitch, 0] # head pose angle in degree res_gaze = exec_net_gaze.infer( inputs={ 'left_eye_image': eyes[0], 'right_eye_image': eyes[1], 'head_pose_angles': hp_angle }) # gaze estimation gaze_vec = res_gaze['gaze_vector'][ 0] # result is in orthogonal coordinate system (x,y,z. not yaw,pitch,roll)and not normalized gaze_vec_norm = gaze_vec / np.linalg.norm( gaze_vec) # normalize the gaze vector vcos = math.cos(math.radians(roll)) vsin = math.sin(math.radians(roll)) tmpx = gaze_vec_norm[0] * vcos + gaze_vec_norm[1] * vsin tmpy = -gaze_vec_norm[0] * vsin + gaze_vec_norm[1] * vcos gaze_vec_norm = [tmpx, tmpy] # Store gaze line coordinations for i in range(2): coord1 = (eye_centers[i][_X] + xmin, eye_centers[i][_Y] + ymin) coord2 = (eye_centers[i][_X] + xmin + int( (gaze_vec_norm[0] + 0.) * 3000), eye_centers[i][_Y] + ymin - int( (gaze_vec_norm[1] + 0.) * 3000)) gaze_lines.append( [coord1, coord2, False]) # line(coord1, coord2); False=spark flag # Gaze lines intersection check (for sparking) if spark_flag == True: for g1 in range(len(gaze_lines)): for g2 in range(g1 + 1, len(gaze_lines)): if gaze_lines[g1][2] == True or gaze_lines[g2][2] == True: continue # Skip if either line has already marked as crossed x1 = gaze_lines[g1][0] y1 = gaze_lines[g1][1] x2 = gaze_lines[g2][0] y2 = gaze_lines[g2][1] if intersection_check(x1, y1, x2, y2) == True: l1 = line(x1, y1) l2 = line(x2, y2) x, y = intersection( l1, l2) # calculate crossing coordinate gaze_lines[g1][1] = [int(x), int(y)] gaze_lines[g1][2] = True gaze_lines[g2][1] = [int(x), int(y)] gaze_lines[g2][2] = True # Drawing gaze lines and sparks for gaze_line in gaze_lines: draw_gaze_line(out_img, (gaze_line[0][0], gaze_line[0][1]), (gaze_line[1][0], gaze_line[1][1]), laser_flag) if gaze_line[2] == True: draw_spark(out_img, (gaze_line[1][0], gaze_line[1][1])) cv2.imshow("gaze", out_img) key = cv2.waitKey(1) if key == 27: break if key == ord(u'l'): laser_flag = True if laser_flag == False else False # toggles laser_flag if key == ord(u'f'): flip_flag = True if flip_flag == False else False # image flip flag if key == ord(u'b'): boundary_box_flag = True if boundary_box_flag == False else False # boundary box flag if key == ord(u's'): spark_flag = True if spark_flag == False else False # spark flag cv2.destroyAllWindows()
def main(): args = build_argparser().parse_args() cap = open_images_capture(args.input, args.loop) # Plugin initialization for specified device and load extensions library if specified log.info('OpenVINO Inference Engine') log.info('\tbuild: {}'.format(get_version())) ie = IECore() # Read IR log.info('Reading Proposal model {}'.format(args.model_pnet)) p_net = ie.read_network(args.model_pnet) assert len(p_net.input_info.keys()) == 1, "Pnet supports only single input topologies" assert len(p_net.outputs) == 2, "Pnet supports two output topologies" log.info('Reading Refine model {}'.format(args.model_rnet)) r_net = ie.read_network(args.model_rnet) assert len(r_net.input_info.keys()) == 1, "Rnet supports only single input topologies" assert len(r_net.outputs) == 2, "Rnet supports two output topologies" log.info('Reading Output model {}'.format(args.model_onet)) o_net = ie.read_network(args.model_onet) assert len(o_net.input_info.keys()) == 1, "Onet supports only single input topologies" assert len(o_net.outputs) == 3, "Onet supports three output topologies" pnet_input_blob = next(iter(p_net.input_info)) rnet_input_blob = next(iter(r_net.input_info)) onet_input_blob = next(iter(o_net.input_info)) for name, blob in p_net.outputs.items(): if blob.shape[1] == 2: pnet_cls_name = name elif blob.shape[1] == 4: pnet_roi_name = name else: raise RuntimeError("Unsupported output layer for Pnet") for name, blob in r_net.outputs.items(): if blob.shape[1] == 2: rnet_cls_name = name elif blob.shape[1] == 4: rnet_roi_name = name else: raise RuntimeError("Unsupported output layer for Rnet") for name, blob in o_net.outputs.items(): if blob.shape[1] == 2: onet_cls_name = name elif blob.shape[1] == 4: onet_roi_name = name elif blob.shape[1] == 10: onet_pts_name = name else: raise RuntimeError("Unsupported output layer for Onet") next_frame_id = 0 metrics = PerformanceMetrics() presenter = None video_writer = cv2.VideoWriter() is_loaded_before = False while True: start_time = perf_counter() origin_image = cap.read() if origin_image is None: if next_frame_id == 0: raise ValueError("Can't read an image from the input") break if next_frame_id == 0: presenter = monitors.Presenter(args.utilization_monitors, 55, (round(origin_image.shape[1] / 4), round(origin_image.shape[0] / 8))) if args.output and not video_writer.open(args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(), (origin_image.shape[1], origin_image.shape[0])): raise RuntimeError("Can't open video writer") next_frame_id += 1 rgb_image = cv2.cvtColor(origin_image, cv2.COLOR_BGR2RGB) oh, ow, _ = rgb_image.shape scales = utils.calculate_scales(rgb_image) # ************************************* # Pnet stage # ************************************* pnet_res = [] for i, scale in enumerate(scales): hs = int(oh*scale) ws = int(ow*scale) image = preprocess_image(rgb_image, ws, hs) p_net.reshape({pnet_input_blob: [1, 3, ws, hs]}) # Change weidth and height of input blob exec_pnet = ie.load_network(network=p_net, device_name=args.device) if i == 0 and not is_loaded_before: log.info("The Proposal model {} is loaded to {}".format(args.model_pnet, args.device)) p_res = exec_pnet.infer(inputs={pnet_input_blob: image}) pnet_res.append(p_res) image_num = len(scales) rectangles = [] for i in range(image_num): roi = pnet_res[i][pnet_roi_name] cls = pnet_res[i][pnet_cls_name] _, _, out_h, out_w = cls.shape out_side = max(out_h, out_w) rectangle = utils.detect_face_12net(cls[0][1], roi[0], out_side, 1/scales[i], ow, oh, score_threshold[0], iou_threshold[0]) rectangles.extend(rectangle) rectangles = utils.NMS(rectangles, iou_threshold[1], 'iou') # Rnet stage if len(rectangles) > 0: r_net.reshape({rnet_input_blob: [len(rectangles), 3, 24, 24]}) # Change batch size of input blob exec_rnet = ie.load_network(network=r_net, device_name=args.device) if not is_loaded_before: log.info("The Refine model {} is loaded to {}".format(args.model_rnet, args.device)) rnet_input = [] for rectangle in rectangles: crop_img = rgb_image[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] crop_img = preprocess_image(crop_img, 24, 24) rnet_input.extend(crop_img) rnet_res = exec_rnet.infer(inputs={rnet_input_blob: rnet_input}) roi = rnet_res[rnet_roi_name] cls = rnet_res[rnet_cls_name] rectangles = utils.filter_face_24net(cls, roi, rectangles, ow, oh, score_threshold[1], iou_threshold[2]) # Onet stage if len(rectangles) > 0: o_net.reshape({onet_input_blob: [len(rectangles), 3, 48, 48]}) # Change batch size of input blob exec_onet = ie.load_network(network=o_net, device_name=args.device) if not is_loaded_before: log.info("The Output model {} is loaded to {}".format(args.model_onet, args.device)) is_loaded_before = True onet_input = [] for rectangle in rectangles: crop_img = rgb_image[int(rectangle[1]):int(rectangle[3]), int(rectangle[0]):int(rectangle[2])] crop_img = preprocess_image(crop_img, 48, 48) onet_input.extend(crop_img) onet_res = exec_onet.infer(inputs={onet_input_blob: onet_input}) roi = onet_res[onet_roi_name] cls = onet_res[onet_cls_name] pts = onet_res[onet_pts_name] rectangles = utils.filter_face_48net(cls, roi, pts, rectangles, ow, oh, score_threshold[2], iou_threshold[3]) # display results for rectangle in rectangles: # Draw detected boxes cv2.putText(origin_image, 'confidence: {:.2f}'.format(rectangle[4]), (int(rectangle[0]), int(rectangle[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0)) cv2.rectangle(origin_image, (int(rectangle[0]), int(rectangle[1])), (int(rectangle[2]), int(rectangle[3])), (255, 0, 0), 1) # Draw landmarks for i in range(5, 15, 2): cv2.circle(origin_image, (int(rectangle[i+0]), int(rectangle[i+1])), 2, (0, 255, 0)) metrics.update(start_time, origin_image) if video_writer.isOpened() and (args.output_limit <= 0 or next_frame_id <= args.output_limit): video_writer.write(origin_image) if not args.no_show: cv2.imshow('MTCNN Results', origin_image) key = cv2.waitKey(1) if key in {ord('q'), ord('Q'), 27}: break presenter.handleKey(key) metrics.log_total()
class TestModels(unittest.TestCase): @classmethod def setUpClass(self): self.ie = IECore() self.test_img = cv.imread(os.path.join(os.environ['MODELS_PATH'], 'validation_set', '512x512', 'dog.bmp')) if self.test_img is None: tc = unittest.TestCase() tc.fail('No image data found') def get_iou(self, box1, box2): # box is xmin, ymin, xmax, ymax x_min, x_max = max(box1[0], box2[0]), min(box1[2], box2[2]) y_min, y_max = max(box1[1], box2[1]), min(box1[3], box2[3]) inter = (x_max - x_min) * (y_max - y_min) area1 = (box1[2] - box1[0]) * (box1[3] - box1[1]) area2 = (box2[2] - box2[0]) * (box2[3] - box2[1]) return inter / (area1 + area2 - inter) # source: https://github.com/opencv/opencv/blob/master/modules/dnn/misc/python/test/test_dnn.py def normAssertDetections(self, ref_class_ids, ref_scores, ref_boxes, test_class_ids, test_scores, test_boxes, conf_threshold=1e-5, scores_diff=1e-5, boxes_iou_diff=1e-4): matched_ref_boxes = [False] * len(ref_boxes) errMsg = '' for i in range(len(test_boxes)): test_score = test_scores[i] if test_score < conf_threshold: continue test_class_id, test_box = test_class_ids[i], test_boxes[i] matched = False for j in range(len(ref_boxes)): if (not matched_ref_boxes[j]) and test_class_id == ref_class_ids[j] and \ abs(test_score - ref_scores[j]) < scores_diff: iou = self.get_iou(test_box, ref_boxes[j]) if abs(iou - 1.0) < boxes_iou_diff: matched = True matched_ref_boxes[j] = True if not matched: errMsg += '\nUnmatched prediction: class %d score %f box %s' % (test_class_id, test_score, test_box) for i in range(len(ref_boxes)): if (not matched_ref_boxes[i]) and ref_scores[i] > conf_threshold: errMsg += '\nUnmatched reference: class %d score %f box %s' % (ref_class_ids[i], ref_scores[i], ref_boxes[i]) if errMsg: raise Exception(errMsg) def check_torchvision_model(self, model_func, size, threshold=1e-5): inp_size = [1, 3, size[0], size[1]] inp = cv.resize(self.test_img, (size[1], size[0])) inp = np.expand_dims(inp.astype(np.float32).transpose(2, 0, 1), axis=0) inp /= 255 inp = torch.tensor(inp) # Create model model = model_func(pretrained=True, progress=False) model.eval() ref = model(inp) # Convert to OpenVINO IR mo_pytorch.convert(model, input_shape=inp_size, model_name='model') # Run model with OpenVINO and compare outputs net = self.ie.read_network('model.xml', 'model.bin') exec_net = self.ie.load_network(net, 'CPU') out = exec_net.infer({'input': inp.detach().numpy()}) if isinstance(ref, torch.Tensor): ref = {'': ref} for out0, ref0 in zip(out.values(), ref.values()): diff = np.max(np.abs(out0 - ref0.detach().numpy())) self.assertLessEqual(diff, threshold) def test_inception_v3(self): self.check_torchvision_model(models.inception_v3, (299, 299), 4e-5) def test_squeezenet1_1(self): self.check_torchvision_model(models.squeezenet1_1, (227, 227)) def test_alexnet(self): self.check_torchvision_model(models.alexnet, (227, 227)) def test_resnet18(self): self.check_torchvision_model(models.resnet18, (227, 227), 2e-5) def test_deeplabv3_resnet50(self): self.check_torchvision_model(models.segmentation.deeplabv3_resnet50, (240, 320), 2e-4) def test_detectron2_retinanet(self): width = 320 height = 320 # Load model model = model_zoo.get("COCO-Detection/retinanet_R_50_FPN_1x.yaml", trained=True) model.eval() # Prepare input tensor img = cv.resize(self.test_img, (width, height)) inp = img.transpose(2, 0, 1).astype(np.float32) # Get reference prediction ref = model([{'image': torch.tensor(inp)}]) ref = ref[0]['instances'].get_fields() ref_boxes = [] for box, score, class_idx in zip(ref['pred_boxes'], ref['scores'], ref['pred_classes']): xmin, ymin, xmax, ymax = box ref_boxes.append([xmin, ymin, xmax, ymax]) if score > 0.45: cv.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), color=(0, 180, 255), thickness=3) # Convert model to OpenVINO IR mo_pytorch.convert(model, input_shape=[1, 3, height, width], model_name='retinanet_R_50_FPN_1x') # Get OpenVINO prediction net = self.ie.read_network('retinanet_R_50_FPN_1x.xml', 'retinanet_R_50_FPN_1x.bin') exec_net = self.ie.load_network(net, 'CPU') outs = exec_net.infer({'input': inp.reshape(1, 3, height, width)}) ie_detections = next(iter(outs.values())) ie_detections = ie_detections.reshape(-1, 7) for det in ie_detections: conf = det[2] if conf > 0.45: xmin, ymin, xmax, ymax = [int(v) for v in det[3:]] cv.rectangle(img, (xmin, ymin), (xmax, ymax), color=(210, 9, 179)) # Uncomment to visualize detections # cv.imshow('RetinaNet (Detectron2)', img) # cv.waitKey() self.normAssertDetections(ref['pred_classes'], ref['scores'], ref_boxes, ie_detections[:, 1], ie_detections[:, 2], ie_detections[:, 3:]) def test_strided_slice(self): import torch.nn as nn class SSlice(nn.Module): def forward(self, x): return x[:, :1, 2:, 3] self.check_torchvision_model(lambda **args: SSlice(), (299, 299), 4e-5) def test_resunet(self): import BrainMaGe from BrainMaGe.models.networks import fetch_model weights = Path(BrainMaGe.__file__).parent / 'weights' / 'resunet_ma.pt' pt_model = fetch_model(modelname="resunet", num_channels=1, num_classes=2, num_filters=16) checkpoint = torch.load(weights, map_location=torch.device('cpu')) pt_model.load_state_dict(checkpoint["model_state_dict"]) pt_model.eval() # Get reference output inp = torch.randn([1, 1, 128, 128, 128]) ref = pt_model(inp).detach().numpy() # Perform multiple runs with other inputs to make sure that InstanceNorm layer does not stuck for _ in range(2): dummy_inp = torch.randn(inp.shape) pt_model(dummy_inp) # Generate OpenVINO IR mo_pytorch.convert(pt_model, input_shape=list(inp.shape), model_name='model') # Run model with OpenVINO and compare outputs net = self.ie.read_network('model.xml', 'model.bin') exec_net = self.ie.load_network(net, 'CPU') out = exec_net.infer({'input': inp.detach().numpy()}) out = next(iter(out.values())) diff = np.max(np.abs(out - ref)) self.assertLessEqual(diff, 5e-4)
class PersonDetect: ''' Class for the Person Detection Model. ''' def __init__(self, model_name, device, threshold=0.60): self.model_weights=model_name+'.bin' self.model_structure=model_name+'.xml' self.device=device self.threshold=threshold self.exec_network=None self.core = IECore() try: self.core = IECore() self.model= self.core.read_network(model=self.model_structure, weights= self.model_weights) except Exception as e: raise ValueError("Could not Initialise the network. Have you enterred the correct model path?") self.input_name=next(iter(self.model.inputs)) self.input_shape=self.model.inputs[self.input_name].shape self.output_name=next(iter(self.model.outputs)) self.output_shape=self.model.outputs[self.output_name].shape # I took this code from the exercises given to us by Michael Virgo in Course 1. def load_model(self): self.core = IECore() self.exec_network= self.core.load_network(network=self.model, device_name=self.device) return self.exec_network # I took this code from the exercises given to us by Roumaissaa Madoui p_frame = self.preprocess_input(image) self.exec_network.start_async(request_id=0, inputs={self.input_name: p_frame}) if self.exec_network.requests[0].wait(-1) == 0: outputs = self.exec_network.requests[0].outputs[self.output_name] coords = self.preprocess_outputs(outputs) scaled_coords, image = self.draw_outputs(coords, image) return scaled_coords, image def predict(self, image): p_frame = self.preprocess_input(image) self.exec_network.start_async(request_id=0, inputs={self.input_name: p_frame}) if self.exec_network.requests[0].wait(-1) == 0: outputs = self.exec_network.requests[0].outputs[self.output_name] coords = self.preprocess_outputs(outputs) scaled_coords, image = self.draw_outputs(coords, image) return scaled_coords, image # I took this code from the exercises given to us Michael Virgo in Course 1. def draw_outputs(self, coords, image): scaled_coords = [] for coord in coords: # Output shape is 1x1x100x7 xmin = int(coord[3] * image.shape[1]) ymin = int(coord[4] * image.shape[0]) xmax = int(coord[5] * image.shape[1]) ymax = int(coord[6] * image.shape[0]) scaled_coords.append([xmin, ymin, xmax, ymax]) cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 3) return scaled_coords, image # I took this code from the exercises given to us by Michael Virgo in Course 1. def preprocess_outputs(self, outputs): coords = [] for box in outputs[0][0]: conf = box[2] class_id = int(box[1]) if conf >= self.threshold and class_id == 1: coords.append(box) return coords # I took this code from the exercises given to us by Michael Virgo in Course 1. def preprocess_input(self, image): p_frame = cv2.resize(image, (self.input_shape[3], self.input_shape[2])) p_frame = p_frame.transpose((2,0,1)) p_frame = p_frame.reshape(1, *p_frame.shape) return p_frame
def main(): log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() log.info("Loading Inference Engine") ie = IECore() # --------------------------- 1. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------ model_xml = args.model model_name = splitext(basename(model_xml))[0] model_bin = model_xml[:-3] + 'bin' log.info("Loading network files:\n\t{}\n".format(model_xml)) net = ie.read_network(model=model_xml, weights=model_bin) # ----------------------------------------------------------------------------------------------------- tab = pd.DataFrame(data=d) tab.to_csv(globalize(f"{args.tab_base}_{model_name}.csv", "C:/frames/")) # ------------- 2. Load Plugin for inference engine and extensions library if specified -------------- log.info("Device info:") device = 'CPU' versions = ie.get_versions(device) print("{}{}".format(" " * 8, device)) print("{}MKLDNNPlugin version ......... {}.{}".format( " " * 8, versions[device].major, versions[device].minor)) print("{}Build ........... {}".format(" " * 8, versions[device].build_number)) supported_layers = ie.query_network(net, "CPU") not_supported_layers = [ l for l in net.layers.keys() if l not in supported_layers ] if len(not_supported_layers) != 0: log.error( "Following layers are not supported by the plugin for specified device {}:\n {}" .format(device, ', '.join(not_supported_layers))) log.error( "Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) # ----------------------------------------------------------------------------------------------------- infos = [*net.input_info] print("inputs number: " + str(len(infos))) print("input shape: " + str(net.input_info[infos[0]].input_data.shape)) print("input key: " + infos[0]) n, c, h, w = net.input_info[infos[0]].input_data.shape log.info("Preparing input blobs") out_blob = next(iter(net.outputs)) input_name = infos[0] log.info("Batch size is {}".format(net.batch_size)) net.input_info[infos[0]].precision = 'U8' log.info('Preparing output blobs') output_name, output_info = "", net.outputs[next(iter(net.outputs.keys()))] for output_key in net.outputs: if net.layers[output_key].type == "DetectionOutput": output_name, output_info = output_key, net.outputs[output_key] if output_name == "": log.error("Can't find a DetectionOutput layer in the topology") output_dims = output_info.shape if len(output_dims) != 4: log.error("Incorrect output dimensions for SSD model") max_proposal_count, object_size = output_dims[2], output_dims[3] if object_size != 7: log.error("Output item should have 7 as a last dimension") output_info.precision = "FP32" log.info("Loading model to the device") exec_net = ie.load_network(network=net, device_name=device) log.info("Creating infer request and starting inference") # ----------------------------------------------------------------------------------------------------- if args.video is not None and args.input is not None: raise RuntimeError('Either use video or images input') if args.video is None and args.input is None: raise RuntimeError('Need an input: video or images') has_video = False if args.video is not None: has_video = True log.info('Processing and ' + ('sav' if args.save else 'show') + 'ing ' + ('video' if has_video else 'images')) if args.save: if has_video: outdir = dirname(args.video) + sep + 'results' + sep else: outdir = args.input + sep + 'results' + sep makedirs(outdir, exist_ok=True) if has_video: cap = cv2.VideoCapture(args.video) fps = cap.get(cv2.CAP_PROP_FPS) video_length = cap.get(cv2.CAP_PROP_FRAME_COUNT) ret, image = cap.read() # first frame just to read size (...) size = (image.shape[1], image.shape[0]) if args.save: writer = cv2.VideoWriter(outdir + basename(args.video), cv2.VideoWriter_fourcc(*'mp4v'), fps, size) else: filenames = sorted(glob.glob(args.input + '/*.jpg'), key=getmtime) network_ratio = w / h count = -1 x1, y1, ws, hs, paths, objects, probas = [], [], [], [], [], [], [] while True: count += 1 if has_video: ret, image = cap.read() name = f'frame{count}' if count % 100 == 0: print('Progress: %.2f%%' % (100.0 * count / video_length), end='\r', flush=True) if count % 1000 == 0: log.info('Progress: %.2f%%' % (100.0 * count / video_length)) if not ret: break else: if count == len(filenames): break name = filenames[count] image = cv2.imread(name) output = [] ih, iw = image.shape[:-1] input_ratio = iw / ih if input_ratio < network_ratio: new_h = int(floor(w / input_ratio)) new_w = w scale_ratio = iw / w off_h = int(floor((new_h - h) / 2)) off_w = 0 else: new_h = h new_w = int(floor(h * input_ratio)) scale_ratio = ih / h off_h = 0 off_w = int(floor((new_w - w) / 2)) crop = cv2.resize(image, (new_w, new_h)) crop = crop[off_h:off_h + h, off_w:off_w + w, :] images_hw = crop.shape[:-1] data = { input_name: crop.transpose((2, 0, 1)) } # Change data layout from HWC to CHW res = exec_net.infer(inputs=data) res = res[out_blob][0][0] for number, proposal in enumerate(res): if proposal[2] > 0: ih, iw = images_hw label = np.int(proposal[1]) confidence = proposal[2] xmin = np.int(scale_ratio * (off_w + iw * proposal[3])) ymin = np.int(scale_ratio * (off_h + ih * proposal[4])) xmax = np.int(scale_ratio * (off_w + iw * proposal[5])) ymax = np.int(scale_ratio * (off_h + ih * proposal[6])) if confidence > args.confidence: output.append((xmin, ymin, xmax, ymax, label, basename(name), confidence)) """if not args.save: print("[{},{}] element, prob = {:.6} ({},{})-({},{})" \ .format(number, label, confidence, xmin, ymin, xmax, ymax))""" for box in output: if box[5] == 1: cl = (255, 0, 0) else: cl = (0, 0, 255) x, y, width, height = box[0], box[ 1], box[2] - box[0], box[3] - box[1] cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), cl, 2) multiple_append([x1, y1, ws, hs, objects, paths, probas], [x, y, width, height, box[4], box[5], box[6]]) if args.save: if has_video: writer.write(image) else: base = basename(name) log.info(f'Write to {outdir + base}') cv2.imwrite(outdir + 'detection_' + model_name + '_' + base, image) else: if args.hide: cv2.imshow('result', image) cv2.waitKey(0) d = { 'file': paths, 'object': objects, 'x1': x1, 'y1': y1, 'w': ws, 'h': hs, 'p': probas } tab = pd.DataFrame(data=d) tab.to_csv(args.tab_base, index_label='#') # ----------------------------------------------------------------------------------------------------- if has_video: cap.release() if args.save: writer.release() log.info("Execution successful\n")
class Model_Head_Pose_Estimation: ''' Class for the Head Pose Estimation Model. ''' def __init__(self, model_name, device='CPU', extensions=None): self.model_name = model_name self.device = device self.extensions = extensions self.model_structure = model_name self.model_weights = self.model_name.split('.')[0] + '.bin' self.plugin = None self.network = None self.exec_net = None self.input_name = None self.input_shape = None self.output_name = None self.output_shape = None def load_model(self): ''' TODO: You will need to complete this method This method is for loading the model to the device specified by the user. If your model requires any Plugins, this is where you can load them. ''' # Initialize the plugin self.plugin = IECore() self.network = self.plugin.read_network(model=self.model_structure, weights=self.model_weights) # Check for supported layers supported_layers = self.plugin.query_network(network=self.network, device_name=self.device) unsupported_layers = [ l for l in self.network.layers.keys() if l not in supported_layers ] if len(unsupported_layers) != 0 and self.device == 'CPU': print("Unsupported layers found:{}".format(unsupported_layers)) if not self.extensions == None: print("Adding cpu_extension") self.plugin.add_extension(self.extensions, self.device) supported_layers = self.plugin.query_network( network=self.network, device_name=self.device) unsupported_layers = [ l for l in self.network.layers.keys() if l not in supported_layers ] if len(unsupported_layers) != 0: print("Issue remain exists after after adding extensions") exit(1) print("Issue resolved after adding extensions") else: print("Provide the path of cpu extension") exit(1) self.exec_net = self.plugin.load_network(network=self.network, device_name=self.device, num_requests=1) self.input_name = next(iter(self.network.inputs)) self.input_shape = self.network.inputs[self.input_name].shape self.output_name = next(iter(self.network.outputs)) self.output_shape = self.network.outputs[self.output_name].shape def predict(self, image): ''' TODO: You will need to complete this method. This method is meant for running predictions on the input image. ''' img_processed = self.preprocess_input(image.copy()) outputs = self.exec_net.infer({self.input_name: img_processed}) final = self.preprocess_output(outputs) return final def check_model(self): pass def preprocess_input(self, image): ''' Before feeding the data into the model for inference, you might have to preprocess it. This function is where you can do that. ''' img_resized = cv2.resize(image, (self.input_shape[3], self.input_shape[2])) img_processed = np.transpose(np.expand_dims(img_resized, axis=0), (0, 3, 1, 2)) return img_processed def preprocess_output(self, outputs): ''' Before feeding the output of this model to the next model, you might have to preprocess the output. This function is where you can do that. ''' outs = [] outs.append(outputs['angle_y_fc'].tolist()[0][0]) outs.append(outputs['angle_p_fc'].tolist()[0][0]) outs.append(outputs['angle_r_fc'].tolist()[0][0]) return outs
def main(): # arguments parser = ArgumentParser() parser.add_argument( "-m", "--model", help="Required. Path to an .xml file with a trained model", required=True, type=str) parser.add_argument( "-i", "--input", help="Required. Path to a input image file", required=True, type=str) parser.add_argument("-l", "--cpu_extension", help="Optional. Required for CPU custom layers. Absolute MKLDNN (CPU)-targeted custom layers. " "Absolute path to a shared library with the kernels implementations", type=str, default=None) parser.add_argument("-d", "--device", help="Optional. Specify the target device to infer on; CPU, GPU, FPGA, HDDL or MYRIAD is acceptable. " "Sample will look for a suitable plugin for device specified. Default value is CPU", default="CPU", type=str) args = parser.parse_args() # logging log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) log.info("creating inference engine") ie = IECore() if args.cpu_extension and "CPU" in args.device: ie.add_extension(args.cpu_extension, "CPU") log.info("Loading network") net = ie.read_network(args.model, os.path.splitext(args.model)[0] + ".bin") assert len(net.input_info) == 1, "Sample supports only single input topologies" assert len(net.outputs) == 1, "Sample supports only single output topologies" log.info("preparing input blobs") input_blob = next(iter(net.input_info)) out_blob = next(iter(net.outputs)) net.batch_size = 1 # read and pre-process input image _, _, height, width = net.input_info[input_blob].input_data.shape image = cv2.imread(args.input, cv2.IMREAD_COLOR) (input_height, input_width) = image.shape[:-1] # resize if (input_height, input_width) != (height, width): log.info("Image is resized from {} to {}".format( image.shape[:-1], (height, width))) image = cv2.resize(image, (width, height), cv2.INTER_CUBIC) # prepare input image = image.astype(np.float32) image = image.transpose((2, 0, 1)) image_input = np.expand_dims(image, 0) # loading model to the plugin log.info("loading model to the plugin") exec_net = ie.load_network(network=net, device_name=args.device) # start sync inference log.info("starting inference") res = exec_net.infer(inputs={input_blob: image_input}) # processing output blob log.info("processing output blob") disp = np.squeeze(res[out_blob][0]) # resize disp to input resolution disp = cv2.resize(disp, (input_width, input_height), cv2.INTER_CUBIC) # rescale disp disp_min = disp.min() disp_max = disp.max() if disp_max - disp_min > 1e-6: disp = (disp - disp_min) / (disp_max - disp_min) else: disp.fill(0.5) # pfm out = 'disp.pfm' cv2.imwrite(out, disp) log.info("Disparity map was saved to {}".format(out)) # png out = 'disp.png' plt.imsave(out, disp, vmin=0, vmax=1, cmap='inferno') log.info("Color-coded disparity image was saved to {}".format(out)) log.info("This demo is an API example, for any performance measurements please use " "the dedicated benchmark_app tool from the openVINO toolkit\n")
class FaceDetection: ''' Class for the Face Detection Model. ''' def __init__(self, model_name, device='CPU', extensions=None): ''' TODO: Use this to set your instance variables. ''' self.model_name = model_name self.device = device self.extensions = extensions self.model_structure = self.model_name # model xml file self.model_weights = self.model_name.split('.')[0]+'.bin' # get model binary file path just use model xml file ## load the IE Engine API plugin (Inference Engine entity) self.plugin = IECore() ## check if read model without problem self.check_model(self.model_structure, self.model_weights) self.exec_net = None ## Get the input layer, iterate through the inputs here self.input_name = next(iter(self.network.inputs)) ## Return the shape of the input layer self.input_shape = self.network.inputs[self.input_name].shape ## Get the output layer self.output_names = next(iter(self.network.outputs)) ## Return the shape of the output layer self.output_shape = self.network.outputs[self.output_names].shape ## check supported layer and performence counts reference: # https://gist.github.com/justinshenk/9917891c0433f33967f6e8cd8fcaa49a def load_model(self): ''' TODO: You will need to complete this method. This method is for loading the model to the device specified by the user. If your model requires any Plugins, this is where you can load them. ''' ## Queries the plugin with specified device name what network layers are supported in the current configuration. ## get the supported layers of the network supported_layers = self.plugin.query_network(network=self.network, device_name=self.device) ## check unsupported layer layers_unsupported = [ul for ul in self.network.layers.keys() if ul not in supported_layers] ## condition of found unsupported layer and device is CPU if len(layers_unsupported)!=0 and self.device=='CPU': print('unsupported layers found: {}'.format(layers_unsupported)) ## extension is not None if self.extensions!=None: print("Adding cpu_extension now") ## Loads extension library to the plugin with a specified device name. self.plugin.add_extension(self.extensions, self.device) ## update the support and unsupported layers supported_layers = self.plugin.query_network(network=self.network, device_name=self.device) layers_unsupported = [ul for ul in self.network.layers.keys() if ul not in supported_layers] ## if still no unsupported layer exit if len(layers_unsupported)!=0: print("Please try again! unsupported layers found after adding the extensions. device {}:\n{}".format(self.device, ', '.join(layers_unsupported))) print("Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") exit(1) print("Problem is resolved after adding the extension!") ## extensions is None exit else: print("Please give the right path of cpu extension!") exit(1) ## Loads a network that was read from the Intermediate Representation (IR) to the plugin with specified device ## load the network into the inference engine self.exec_net = self.plugin.load_network(network=self.network, device_name=self.device, num_requests=1) def predict(self, image, prob_threshold, perf_flag): ''' TODO: You will need to complete this method. This method is meant for running predictions on the input image. ''' ## 1.process the image processed_input = self.preprocess_input(image.copy()) ## 2.Starts synchronous inference for the first infer request of the executable network and returns output data. ## A dictionary that maps output layer names outputs = self.exec_net.infer({self.input_name:processed_input}) # print(outputs) if perf_flag: self.performance() ## 3. process the outputs coords = self.preprocess_output(outputs, prob_threshold) ## if coords empty, return 0,0 if (len(coords)==0): return 0, 0 ## get the first detected face coords = coords[0] h, w=image.shape[0], image.shape[1] ## print(coords, image.shape) coords = coords* np.array([w, h, w, h]) ## Copy of the array, cast to a specified type. int32 coords = coords.astype(np.int32) ## (x_min, y_min) - coordinates of the top left bounding box corner ## (x_max, y_max) - coordinates of the bottom right bounding box corner. # print('top left, bottom right', coords) ## ymin:ymax, xmin:xmax --> height, width cropped_face = image[coords[1]:coords[3], coords[0]:coords[2]] # print(cropped_face.shape) # cv2.rectangle(image, (coords[0], coords[1]), (coords[2], coords[3]), (255,0,0), 2) # cv2.imshow('detected face', cv2.resize(image, (600, 500))) return cropped_face, coords def check_model(self, model_structure, model_weights): # raise NotImplementedError try: # Reads a network from the IR files and creates an IENetwork, load IR files into their related class, architecture with XML and weights with binary file self.network = self.plugin.read_network(model=model_structure, weights=model_weights) except Exception as e: raise ValueError("Error occurred during face_detection network initialization.") ## check supported layer and performence counts reference: # https://gist.github.com/justinshenk/9917891c0433f33967f6e8cd8fcaa49a def performance(self): perf_counts = self.exec_net.requests[0].get_perf_counts() # print('\n', perf_counts) print("## Face detection model performance:") print("{:<70} {:<15} {:<15} {:<15} {:<10}".format('name', 'layer_type', 'exet_type', 'status', 'real_time, us')) for layer, stats in perf_counts.items(): print("{:<70} {:<15} {:<15} {:<15} {:<10}".format(layer, stats['layer_type'], stats['exec_type'], stats['status'], stats['real_time'])) def preprocess_input(self, image): ''' Before feeding the data into the model for inference, you might have to preprocess it. This function is where you can do that. Given an input image, height and width: ''' ## - Resize to height and width, (H, W), but resize use W, H which is opposite order # print(image.shape) # print(self.input_shape) # [1, 3, 384, 672] H, W = self.input_shape[2], self.input_shape[3] # print(H, W) # (384, 672) image_resized = cv2.resize(image, (W, H)) # print(image_resized.shape) # (384, 672, 3) ## - Transpose the final "channel" dimension to be first to BGR ## - Reshape the image to add a "batch" of 1 at the start ## (optional) # image_processed = np.transpose(np.expand_dims(image_resized, axis=0), (0,3,1,2)) ## BxCxHxW image = image_resized.transpose((2,0,1)) # print(image.shape) # (3, 384, 672) ## add 1 dim at very start, then channels then H, W image_processed = image.reshape(1, 3, self.input_shape[2], self.input_shape[3]) # print(image_processed.shape) # (1, 3, 384, 672) return image_processed def preprocess_output(self, outputs, prob_threshold): ''' Before feeding the output of this model to the next model, you might have to preprocess the output. This function is where you can do that. ''' coords = [] # print(self.input_name) # print(self.output_names) # print(outputs[self.output_names].shape) # (1, 1, 200, 7) # print(outputs[self.output_names][0][0]) outs = outputs[self.output_names][0][0] # output for out in outs: # print(out) confidence = out[2] if confidence > prob_threshold: x_min=out[3] y_min=out[4] x_max=out[5] y_max=out[6] coords.append([x_min, y_min, x_max, y_max]) return coords
class Base(ABC): """Model Base Class""" def __init__( self, model_name, source_width=None, source_height=None, device="CPU", threshold=0.60, extensions=None, ): self.model_weights = f"{model_name}.bin" self.model_structure = f"{model_name}.xml" assert (Path(self.model_weights).absolute().exists() and Path(self.model_structure).absolute().exists()) self.device = device self.threshold = threshold self._model_size = os.stat(self.model_weights).st_size / 1024.0**2 self._ie_core = IECore() self.model = self._get_model() # Get the input layer self.input_name = next(iter(self.model.inputs)) self.input_shape = self.model.inputs[self.input_name].shape self.output_name = next(iter(self.model.outputs)) self.output_shape = self.model.outputs[self.output_name].shape self._init_image_w = source_width self._init_image_h = source_height self.exec_network = None self.perf_stats = {} self.load_model() def _get_model(self): """Helper function for reading the network.""" try: try: model = self._ie_core.read_network(model=self.model_structure, weights=self.model_weights) except AttributeError: logger.warn( "Using an old version of OpenVINO, consider updating it!") model = IENetwork(model=self.model_structure, weights=self.model_weights) except Exception: raise ValueError("Could not Initialise the network. " "Have you entered the correct model path?") else: return model def load_model(self): """Load the model into the plugin""" if self.exec_network is None: start_time = time.time() self.exec_network = self._ie_core.load_network( network=self.model, device_name=self.device) self._model_load_time = (time.time() - start_time) * 1000 logger.info( f"Model: {self.model_structure} took {self._model_load_time:.3f} ms to load." ) def predict(self, image, request_id=0, show_bbox=False, **kwargs): if not isinstance(image, np.ndarray): raise IOError("Image not parsed correctly.") p_image = self.preprocess_input(image, **kwargs) predict_start_time = time.time() self.exec_network.start_async(request_id=request_id, inputs={self.input_name: p_image}) status = self.exec_network.requests[request_id].wait(-1) if status == 0: pred_result = [] for output_name, data_ptr in self.model.outputs.items(): pred_result.append(self.exec_network.requests[request_id]. outputs[output_name]) self.perf_stats[output_name] = self.exec_network.requests[ request_id].get_perf_counts() predict_end_time = float(time.time() - predict_start_time) * 1000 bbox, _ = self.preprocess_output(pred_result, image, show_bbox=show_bbox) return (predict_end_time, bbox) @abstractmethod def preprocess_output(self, inference_results, image, show_bbox=False, **kwargs): """Draw bounding boxes onto the frame.""" raise NotImplementedError("Please Implement this method") @staticmethod @abstractmethod def draw_output(image): raise NotImplementedError("Please Implement this method") @staticmethod def plot_frame(image): """Helper function for finding image coordinates/px""" img = image[:, :, 0] plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) plt.show() def add_text(self, text, image, position, font_size=0.75, color=(255, 255, 255)): cv2.putText( image, text, position, cv2.FONT_HERSHEY_COMPLEX, font_size, color, 1, ) def preprocess_input(self, image, height=None, width=None): """Helper function for processing frame""" if (height and width) is None: height, width = self.input_shape[2:] p_frame = cv2.resize(image, (width, height)) # Change data layout from HWC to CHW p_frame = p_frame.transpose((2, 0, 1)) p_frame = p_frame.reshape(1, *p_frame.shape) return p_frame
class Model_Pose: ''' Class for the Face Detection Model. ''' def __init__(self, model_name, device='CPU', extensions=None, threshold=0.6): ''' TODO: Use this to set your instance variables. ''' self.device = device self.threshold = threshold self.core = IECore() self.network = self.core.read_network( model=str(model_name), weights=str(os.path.splitext(model_name)[0] + ".bin")) self.input = next(iter(self.network.inputs)) self.output = next(iter(self.network.outputs)) def load_model(self): ''' TODO: You will need to complete this method. This method is for loading the model to the device specified by the user. If your model requires any Plugins, this is where you can load them. ''' self.exec_network = self.core.load_network(self.network, self.device) return self.exec_network def predict(self, image): ''' TODO: You will need to complete this method. This method is meant for running predictions on the input image. ''' self.preprocess_image = self.preprocess_input(image) self.results = self.exec_network.infer( inputs={self.input: self.preprocess_image}) self.output_list = self.preprocess_output(self.results) return self.output_list def check_model(self): supported_layers = self.core.query_network(network=self.network, device_name=self.device) unsupported_layers = [ layer for layer in self.network.layers.keys() if layer not in supported_layers ] if len(unsupported_layers) > 0: print("Check extention of these unsupported layers =>" + str(unsupported_layers)) exit(1) print("All layers are supported") def preprocess_input(self, image): ''' Before feeding the data into the model for inference, you might have to preprocess it. This function is where you can do that. ''' image = image.astype(np.float32) net_input_shape = self.network.inputs[self.input].shape p_frame = cv2.resize(image, (net_input_shape[3], net_input_shape[2])) p_frame = p_frame.transpose(2, 0, 1) p_frame = p_frame.reshape(1, *p_frame.shape) return p_frame def preprocess_output(self, outputs): ''' Before feeding the output of this model to the next model, you might have to preprocess the output. This function is where you can do that. ''' yaw = outputs["angle_y_fc"][0, 0] pitch = outputs["angle_p_fc"][0, 0] roll = outputs["angle_r_fc"][0, 0] return [yaw, pitch, roll]
def test_query_network(device): ie = IECore() net = ie.read_network(model=test_net_xml, weights=test_net_bin) query_res = ie.query_network(net, device) assert net.layers.keys() == query_res.keys(), "Not all network layers present in query_network results" assert next(iter(set(query_res.values()))) == device, "Wrong device for some layers"
class Model_Facial_Landmarks_Detection: ''' Class for the Facial Landmark Detection Model. ''' def __init__(self, model_name, device='CPU', extensions=None): self.model_name = model_name self.device = device self.extensions = extensions self.model_structure = model_name self.model_weights = self.model_name.split('.')[0] + '.bin' self.plugin = None self.network = None self.exec_net = None self.input_name = None self.input_shape = None self.output_name = None self.output_shape = None def load_model(self): ''' TODO: You will need to complete this method This method is for loading the model to the device specified by the user. If your model requires any Plugins, this is where you can load them. ''' # Initialize the plugin self.plugin = IECore() self.network = self.plugin.read_network(model=self.model_structure, weights=self.model_weights) # Check for supported layers supported_layers = self.plugin.query_network(network=self.network, device_name=self.device) unsupported_layers = [ l for l in self.network.layers.keys() if l not in supported_layers ] if len(unsupported_layers) != 0 and self.device == 'CPU': print("Unsupported layers found:{}".format(unsupported_layers)) if not self.extensions == None: print("Adding cpu_extension") self.plugin.add_extension(self.extensions, self.device) supported_layers = self.plugin.query_network( network=self.network, device_name=self.device) unsupported_layers = [ l for l in self.network.layers.keys() if l not in supported_layers ] if len(unsupported_layers) != 0: print("Issue remain exists after after adding extensions") exit(1) print("Issue resolved after adding extensions") else: print("Provide the path of cpu extension") exit(1) self.exec_net = self.plugin.load_network(network=self.network, device_name=self.device, num_requests=1) self.input_name = next(iter(self.network.inputs)) self.input_shape = self.network.inputs[self.input_name].shape self.output_name = next(iter(self.network.outputs)) self.output_shape = self.network.outputs[self.output_name].shape def predict(self, image): ''' TODO: You will need to complete this method. This method is meant for running predictions on the input image. ''' img_processed = self.preprocess_input(image.copy()) outputs = self.exec_net.infer({self.input_name: img_processed}) coords = self.preprocess_output(outputs) h = image.shape[0] w = image.shape[1] coords = coords * np.array([w, h, w, h]) coords = coords.astype(np.int32) le_xmin = coords[0] - 10 le_ymin = coords[1] - 10 le_xmax = coords[0] + 10 le_ymax = coords[1] + 10 re_xmin = coords[2] - 10 re_ymin = coords[3] - 10 re_xmax = coords[2] + 10 re_ymax = coords[3] + 10 left_eye = image[le_ymin:le_ymax, le_xmin:le_xmax] right_eye = image[re_ymin:re_ymax, re_xmin:re_xmax] eye_coords = [[le_xmin, le_ymin, le_xmax, le_ymax], [re_xmin, re_ymin, re_xmax, re_ymax]] return left_eye, right_eye, eye_coords def check_model(self): pass def preprocess_input(self, image): ''' Before feeding the data into the model for inference, you might have to preprocess it. This function is where you can do that. ''' img_cvt = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) img_resized = cv2.resize(img_cvt, (self.input_shape[3], self.input_shape[2])) img_processed = np.transpose(np.expand_dims(img_resized, axis=0), (0, 3, 1, 2)) return img_processed def preprocess_output(self, outputs): ''' Before feeding the output of this model to the next model, you might have to preprocess the output. This function is where you can do that. ''' outs = outputs[self.output_name][0] lefteye_x = outs[0].tolist()[0][0] lefteye_y = outs[1].tolist()[0][0] righteye_x = outs[2].tolist()[0][0] righteye_y = outs[3].tolist()[0][0] return (lefteye_x, lefteye_y, righteye_x, righteye_y)
def test_register_plugin(): ie = IECore() ie.register_plugin("MKLDNNPlugin", "BLA") net = ie.read_network(model=test_net_xml, weights=test_net_bin) exec_net = ie.load_network(net, "BLA") assert isinstance(exec_net, ExecutableNetwork), "Cannot load the network to the registered plugin with name 'BLA'"
def test_read_network_from_xml(): ie = IECore() net = ie.read_network(model=test_net_xml, weights=test_net_bin) assert isinstance(net, IENetwork)
def main(): args = build_argparser() logging.basicConfig(format="[ %(levelname)s ] %(message)s", level=logging.INFO, stream=sys.stdout) log = logging.getLogger() log.info("Creating Inference Engine") ie = IECore() if args.device == "CPU" and args.cpu_extension: ie.add_extension(args.cpu_extension, 'CPU') log.info("Loading model {}".format(args.model)) net = ie.read_network(args.model, args.model[:-4] + ".bin") if len(net.input_info) != 1: log.error("Demo supports only models with 1 input layer") sys.exit(1) input_blob = next(iter(net.input_info)) input_shape = net.input_info[input_blob].input_data.shape if len(net.outputs) != 1: log.error("Demo supports only models with 1 output layer") sys.exit(1) output_blob = next(iter(net.outputs)) log.info("Loading model to the plugin") exec_net = ie.load_network(network=net, device_name=args.device) log.info("Preparing input") labels = [] if args.labels: with open(args.labels, "r") as file: labels = [line.rstrip() for line in file.readlines()] batch_size, channels, one, length = input_shape if one != 1: raise RuntimeError( "Wrong third dimension size of model input shape - {} (expected 1)" .format(one)) audio = AudioSource(args.input, channels=channels, samplerate=args.sample_rate) hop = length - args.overlap if isinstance(args.overlap, int) else int( length * (1.0 - args.overlap)) if hop < 0: log.error( "Wrong value for '-ol/--overlap' argument - overlapping more than clip length" ) sys.exit(1) log.info("Starting inference") outputs = [] clips = 0 infer_time = 0 for idx, chunk in enumerate( audio.chunks(length, hop, num_chunks=batch_size)): chunk.shape = input_shape infer_start_time = time.perf_counter() output = exec_net.infer(inputs={input_blob: chunk}) infer_time += time.perf_counter() - infer_start_time clips += batch_size output = output[output_blob] for batch, data in enumerate(output): start_time = (idx * batch_size + batch) * hop / audio.samplerate end_time = ( (idx * batch_size + batch) * hop + length) / audio.samplerate outputs.append(data) label = np.argmax(data) if start_time < audio.duration(): log.info("[{:.2f}-{:.2f}] - {:6.2%} {:s}".format( start_time, end_time, data[label], labels[label] if labels else "Class {}".format(label))) logging.info("Average infer time - {:.1f} ms per clip".format( infer_time / clips * 1000))
def test_read_network_as_path(): ie = IECore() net = ie.read_network(model=Path(model_path()[0]), weights=Path(test_net_bin)) assert isinstance(net, IENetwork)
class Face_Detect_Model: ''' Class for the Face Detection Model. ''' def __init__(self, model_name, device='CPU', threshold=0.5): ''' Intialize instance variables and load the model with the supplied CL arguments. ''' self.model_weights = model_name + '.bin' self.model_structure = model_name + '.xml' self.device = device self.threshold = threshold try: self.core = IECore() self.model = self.core.read_network(model=self.model_structure, weights=self.model_weights) except Exception as e: raise ValueError("Could not Initialise the network.") self.input_name = next(iter(self.model.input_info)) self.input_shape = self.model.input_info[ self.input_name].input_data.shape self.output_name = next(iter(self.model.outputs)) self.output_shape = self.model.outputs[self.output_name].shape # Load the model self.load_model() def load_model(self): ''' Load the already read model with the specified device type. ''' self.net = self.core.load_network(network=self.model, device_name=self.device, num_requests=1) def predict(self, image): ''' Modularize the whole process input / make inference / process output cycle. ''' # Preprocess the input, run the net, and return the face coordinates proc_img = self.preprocess_input(image) input_dict = {self.input_name: proc_img} out = self.net.infer(input_dict)[self.output_name] x_min, y_min, x_max, y_max = self.preprocess_output(out, image.shape) return x_min, y_min, x_max, y_max def preprocess_input(self, image): ''' Preprocess network input so that we can run the network correctly. ''' # Remember that the resize function takes the width first proc_frame = cv2.resize(image, (self.input_shape[3], self.input_shape[2])) proc_frame = np.transpose(proc_frame, (2, 0, 1)) proc_frame = proc_frame[np.newaxis, :] return proc_frame def preprocess_output(self, outputs, orig_input_shape): ''' Get detection with biggest confidence and output its bounding box in the oiriginal image coordinates space. if its confidence its bigger than the user specified CLI threshold. ''' # Get width and height of original image orig_height = orig_input_shape[0] orig_width = orig_input_shape[1] # Get output with biggest confidence best_detection_id = np.argmax(outputs[0, 0, :, 2]) best_detection = outputs[0, 0, best_detection_id] # If detection is over our confidence threshold if best_detection[2] > self.threshold: x_min, y_min, x_max, y_max = best_detection[3:] # Transform detection coordinates to the original image input space x_min = int(x_min * orig_width) x_max = int(x_max * orig_width) y_min = int(y_min * orig_height) y_max = int(y_max * orig_height) return x_min, y_min, x_max, y_max else: return None, None, None, None
def test_incorrect_xml(): ie = IECore() with pytest.raises(Exception) as e: ie.read_network(model="./model.xml", weights=Path(test_net_bin)) assert "Path to the model ./model.xml doesn't exist or it's a directory" in str(e.value)
def layer_out_data(): ie = IECore() net = ie.read_network(model=test_net_xml, weights=test_net_bin) return net.layers['19/Fused_Add_'].out_data[0]
def test_incorrect_bin(): ie = IECore() with pytest.raises(Exception) as e: ie.read_network(model=test_net_xml, weights="./model.bin") assert "Path to the weights ./model.bin doesn't exist or it's a directory" in str(e.value)
def main(): log.basicConfig(format="[ %(levelname)s ] %(message)s", level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() log.info("Loading Inference Engine") ie = IECore() # --------------------------- 1. Read IR Generated by ModelOptimizer (.xml and .bin files) ------------ model_xml = args.model model_bin = os.path.splitext(model_xml)[0] + ".bin" log.info("Loading network files:\n\t{}\n\t{}".format(model_xml, model_bin)) net = ie.read_network(model=model_xml, weights=model_bin) # ----------------------------------------------------------------------------------------------------- # ------------- 2. Load Plugin for inference engine and extensions library if specified -------------- log.info("Device info:") versions = ie.get_versions(args.device) print("{}{}".format(" " * 8, args.device)) print("{}MKLDNNPlugin version ......... {}.{}".format( " " * 8, versions[args.device].major, versions[args.device].minor)) print("{}Build ........... {}".format(" " * 8, versions[args.device].build_number)) if args.cpu_extension and "CPU" in args.device: ie.add_extension(args.cpu_extension, "CPU") log.info("CPU extension loaded: {}".format(args.cpu_extension)) if "CPU" in args.device: supported_layers = ie.query_network(net, "CPU") not_supported_layers = [ l for l in net.layers.keys() if l not in supported_layers ] if len(not_supported_layers) != 0: log.error( "Following layers are not supported by the plugin for specified device {}:\n {}" .format(args.device, ', '.join(not_supported_layers))) log.error( "Please try to specify cpu extensions library path in sample's command line parameters using -l " "or --cpu_extension command line argument") sys.exit(1) # ----------------------------------------------------------------------------------------------------- # --------------------------- 3. Read and preprocess input -------------------------------------------- print("inputs number: " + str(len(net.input_info.keys()))) for input_key in net.input_info: print("input shape: " + str(net.input_info[input_key].input_data.shape)) print("input key: " + input_key) if len(net.input_info[input_key].input_data.layout) == 4: n, c, h, w = net.input_info[input_key].input_data.shape images = np.ndarray(shape=(n, c, h, w)) images_hw = [] for i in range(n): image = cv2.imread(args.input[i]) ih, iw = image.shape[:-1] images_hw.append((ih, iw)) log.info("File was added: ") log.info(" {}".format(args.input[i])) if (ih, iw) != (h, w): log.warning("Image {} is resized from {} to {}".format( args.input[i], image.shape[:-1], (h, w))) image = cv2.resize(image, (w, h)) image = image.transpose( (2, 0, 1)) # Change data layout from HWC to CHW images[i] = image # ----------------------------------------------------------------------------------------------------- # --------------------------- 4. Configure input & output --------------------------------------------- # --------------------------- Prepare input blobs ----------------------------------------------------- log.info("Preparing input blobs") assert (len(net.input_info.keys()) == 1 or len(net.input_info.keys()) == 2), "Sample supports topologies only with 1 or 2 inputs" out_blob = next(iter(net.outputs)) input_name, input_info_name = "", "" for input_key in net.input_info: if len(net.input_info[input_key].layout) == 4: input_name = input_key log.info("Batch size is {}".format(net.batch_size)) net.input_info[input_key].precision = 'U8' elif len(net.input_info[input_key].layout) == 2: input_info_name = input_key net.input_info[input_key].precision = 'FP32' if net.input_info[input_key].input_data.shape[1] != 3 and net.input_info[input_key].input_data.shape[1] != 6 or \ net.input_info[input_key].input_data.shape[0] != 1: log.error( 'Invalid input info. Should be 3 or 6 values length.') data = {} data[input_name] = images if input_info_name != "": infos = np.ndarray(shape=(n, c), dtype=float) for i in range(n): infos[i, 0] = h infos[i, 1] = w infos[i, 2] = 1.0 data[input_info_name] = infos # --------------------------- Prepare output blobs ---------------------------------------------------- log.info('Preparing output blobs') output_name, output_info = "", net.outputs[next(iter(net.outputs.keys()))] for output_key in net.outputs: if net.layers[output_key].type == "DetectionOutput": output_name, output_info = output_key, net.outputs[output_key] if output_name == "": log.error("Can't find a DetectionOutput layer in the topology") output_dims = output_info.shape if len(output_dims) != 4: log.error("Incorrect output dimensions for SSD model") max_proposal_count, object_size = output_dims[2], output_dims[3] if object_size != 7: log.error("Output item should have 7 as a last dimension") output_info.precision = "FP32" # ----------------------------------------------------------------------------------------------------- # --------------------------- Performing inference ---------------------------------------------------- log.info("Loading model to the device") exec_net = ie.load_network(network=net, device_name=args.device) log.info("Creating infer request and starting inference") res = exec_net.infer(inputs=data) # ----------------------------------------------------------------------------------------------------- # --------------------------- Read and postprocess output --------------------------------------------- log.info("Processing output blobs") res = res[out_blob] boxes, classes = {}, {} data = res[0][0] for number, proposal in enumerate(data): if proposal[2] > 0: imid = np.int(proposal[0]) ih, iw = images_hw[imid] label = np.int(proposal[1]) confidence = proposal[2] xmin = np.int(iw * proposal[3]) ymin = np.int(ih * proposal[4]) xmax = np.int(iw * proposal[5]) ymax = np.int(ih * proposal[6]) print("[{},{}] element, prob = {:.6} ({},{})-({},{}) batch id : {}" \ .format(number, label, confidence, xmin, ymin, xmax, ymax, imid), end="") if proposal[2] > 0.5: print(" WILL BE PRINTED!") if not imid in boxes.keys(): boxes[imid] = [] boxes[imid].append([xmin, ymin, xmax, ymax]) if not imid in classes.keys(): classes[imid] = [] classes[imid].append(label) else: print() for imid in classes: tmp_image = cv2.imread(args.input[imid]) for box in boxes[imid]: cv2.rectangle(tmp_image, (box[0], box[1]), (box[2], box[3]), (232, 35, 244), 2) cv2.imwrite("out.bmp", tmp_image) log.info("Image out.bmp created!") # ----------------------------------------------------------------------------------------------------- log.info("Execution successful\n") log.info( "This sample is an API example, for any performance measurements please use the dedicated benchmark_app tool" )
def test_load_network(device): ie = IECore() net = ie.read_network(model=test_net_xml, weights=test_net_bin) exec_net = ie.load_network(net, device) assert isinstance(exec_net, ExecutableNetwork)
def main(): log.basicConfig(format='[ %(levelname)s ] %(message)s', level=log.INFO, stream=sys.stdout) args = build_argparser().parse_args() # Plugin initialization for specified device and load extensions library if specified. log.info('Creating Inference Engine...') ie = IECore() if args.cpu_extension and 'CPU' in args.device: ie.add_extension(args.cpu_extension, 'CPU') # Read IR log.info('Loading network') net = ie.read_network(args.model, os.path.splitext(args.model)[0] + '.bin') required_input_keys = {'im_data', 'im_info'} assert required_input_keys == set(net.input_info), \ 'Demo supports only topologies with the following input keys: {}'.format(', '.join(required_input_keys)) required_output_keys = {'boxes', 'scores', 'classes', 'raw_masks'} assert required_output_keys.issubset(net.outputs.keys()), \ 'Demo supports only topologies with the following output keys: {}'.format(', '.join(required_output_keys)) n, c, h, w = net.input_info['im_data'].input_data.shape assert n == 1, 'Only batch 1 is supported by the demo application' log.info('Loading IR to the plugin...') exec_net = ie.load_network(network=net, device_name=args.device, num_requests=2) try: input_source = int(args.input_source) except ValueError: input_source = args.input_source cap = cv2.VideoCapture(input_source) if not cap.isOpened(): log.error('Failed to open "{}"'.format(args.input_source)) cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) if args.no_track: tracker = None else: tracker = StaticIOUTracker() with open(args.labels, 'rt') as labels_file: class_labels = labels_file.read().splitlines() presenter = monitors.Presenter(args.utilization_monitors, 45, (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH) / 4), round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) / 8))) visualizer = Visualizer(class_labels, show_boxes=args.show_boxes, show_scores=args.show_scores) render_time = 0 log.info('Starting inference...') print("To close the application, press 'CTRL+C' here or switch to the output window and press ESC key") while cap.isOpened(): ret, frame = cap.read() if not ret: break if args.no_keep_aspect_ratio: # Resize the image to a target size. scale_x = w / frame.shape[1] scale_y = h / frame.shape[0] input_image = cv2.resize(frame, (w, h)) else: # Resize the image to keep the same aspect ratio and to fit it to a window of a target size. scale_x = scale_y = min(h / frame.shape[0], w / frame.shape[1]) input_image = cv2.resize(frame, None, fx=scale_x, fy=scale_y) input_image_size = input_image.shape[:2] input_image = np.pad(input_image, ((0, h - input_image_size[0]), (0, w - input_image_size[1]), (0, 0)), mode='constant', constant_values=0) # Change data layout from HWC to CHW. input_image = input_image.transpose((2, 0, 1)) input_image = input_image.reshape((n, c, h, w)).astype(np.float32) input_image_info = np.asarray([[input_image_size[0], input_image_size[1], 1]], dtype=np.float32) # Run the net. inf_start = time.time() outputs = exec_net.infer({'im_data': input_image, 'im_info': input_image_info}) inf_end = time.time() det_time = inf_end - inf_start # Parse detection results of the current request boxes = outputs['boxes'] boxes[:, 0::2] /= scale_x boxes[:, 1::2] /= scale_y scores = outputs['scores'] classes = outputs['classes'].astype(np.uint32) masks = [] for box, cls, raw_mask in zip(boxes, classes, outputs['raw_masks']): raw_cls_mask = raw_mask[cls, ...] mask = segm_postprocess(box, raw_cls_mask, frame.shape[0], frame.shape[1]) masks.append(mask) # Filter out detections with low confidence. detections_filter = scores > args.prob_threshold scores = scores[detections_filter] classes = classes[detections_filter] boxes = boxes[detections_filter] masks = list(segm for segm, is_valid in zip(masks, detections_filter) if is_valid) render_start = time.time() if len(boxes) and args.raw_output_message: log.info('Detected boxes:') log.info(' Class ID | Confidence | XMIN | YMIN | XMAX | YMAX ') for box, cls, score, mask in zip(boxes, classes, scores, masks): log.info('{:>10} | {:>10f} | {:>8.2f} | {:>8.2f} | {:>8.2f} | {:>8.2f} '.format(cls, score, *box)) # Get instance track IDs. masks_tracks_ids = None if tracker is not None: masks_tracks_ids = tracker(masks, classes) # Visualize masks. frame = visualizer(frame, boxes, classes, scores, presenter, masks, masks_tracks_ids) # Draw performance stats. inf_time_message = 'Inference time: {:.3f} ms'.format(det_time * 1000) render_time_message = 'OpenCV rendering time: {:.3f} ms'.format(render_time * 1000) cv2.putText(frame, inf_time_message, (15, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 10, 10), 1) cv2.putText(frame, render_time_message, (15, 30), cv2.FONT_HERSHEY_COMPLEX, 0.5, (10, 10, 200), 1) # Print performance counters. if args.perf_counts: perf_counts = exec_net.requests[0].get_perf_counts() log.info('Performance counters:') print('{:<70} {:<15} {:<15} {:<15} {:<10}'.format('name', 'layer_type', 'exet_type', 'status', 'real_time, us')) for layer, stats in perf_counts.items(): print('{:<70} {:<15} {:<15} {:<15} {:<10}'.format(layer, stats['layer_type'], stats['exec_type'], stats['status'], stats['real_time'])) if not args.no_show: # Show resulting image. cv2.imshow('Results', frame) render_end = time.time() render_time = render_end - render_start if not args.no_show: key = cv2.waitKey(args.delay) esc_code = 27 if key == esc_code: break presenter.handleKey(key) print(presenter.reportMeans()) cv2.destroyAllWindows() cap.release()
def test_load_network_wrong_device(): ie = IECore() net = ie.read_network(model=test_net_xml, weights=test_net_bin) with pytest.raises(RuntimeError) as e: ie.load_network(net, "BLA") assert 'Device with "BLA" name is not registered in the InferenceEngine' in str(e.value)
class PersonDetect: ''' Class for the Person Detection Model. ''' def __init__(self, model_name, device, threshold=0.60): self.model_weights = model_name + '.bin' self.model_structure = model_name + '.xml' self.device = device self.threshold = threshold try: self.core = IECore() self.model = self.core.read_network(model=self.model_structure, weights=self.model_weights) except Exception as e: raise ValueError( "Could not Initialise the network. Have you enterred the correct model path?" ) self.input_name = next(iter(self.model.inputs)) self.input_shape = self.model.inputs[self.input_name].shape self.output_name = next(iter(self.model.outputs)) self.output_shape = self.model.outputs[self.output_name].shape def load_model(self): try: self.net = self.core.load_network(network=self.model, device_name=self.device, num_requests=1) except Exception as e: raise NotImplementedError def predict(self, image): p_frame = self.preprocess_input(image) outputs = self.net.infer({self.input_name: p_frame}) coords = self.preprocess_outputs(outputs[self.output_name]) self.draw_outputs(coords, image) return coords, image def draw_outputs(self, coords, image): for coord in coords: cv2.rectangle(image, (coord[0], coord[1]), (coord[2], coord[3]), (0, 255, 0), 1) def preprocess_outputs(self, outputs): coords = [] for box in outputs[0][0]: # output.shape: 1x1xNx7 conf = box[2] if conf >= self.threshold: xmin = int(box[3] * self.w) ymin = int(box[4] * self.h) xmax = int(box[5] * self.w) ymax = int(box[6] * self.h) coords.append((xmin, ymin, xmax, ymax)) return coords def preprocess_input(self, image): p_frame = cv2.resize(image, (self.input_shape[3], self.input_shape[2])) p_frame = p_frame.transpose((2, 0, 1)) p_frame = p_frame.reshape(1, *p_frame.shape) return p_frame def set_out_size(self, w, h): self.w = w self.h = h
print("\nDetect initing...") print('=' * 30) # load network if args.tiny: print('model: v4tiny') model_xml = './IR_FP16/yolov4-tiny.xml' model_bin = './IR_FP16/yolov4-tiny.bin' else: print('model: v4') model_xml = './IR_FP16/yolov4.xml' model_bin = './IR_FP16/yolov4.bin' ie = IECore() net = ie.read_network(model=model_xml, weights=model_bin) print("inputs number: " + str(len(net.input_info.keys()))) for input_key in net.input_info: print("input shape: " + str(net.input_info[input_key].input_data.shape)) if len(net.input_info[input_key].input_data.layout) == 4: n, c, h, w = net.input_info[input_key].input_data.shape print('=' * 30) # build net print("Loading model to the device...") exec_net = ie.load_network( network=net, device_name='MYRIAD' if args.device == 'VPU' else 'CPU') print("Creating infer request and starting inference...")