def pd_postprocess(self, inference): scores = np.array(inference.getLayerFp16("classificators"), dtype=np.float16) # 896 bboxes = np.array(inference.getLayerFp16("regressors"), dtype=np.float16).reshape((self.nb_anchors,18)) # 896x18 # Decode bboxes self.regions = mpu.decode_bboxes(self.pd_score_thresh, scores, bboxes, self.anchors) # Non maximum suppression self.regions = mpu.non_max_suppression(self.regions, self.pd_nms_thresh) if self.use_lm: mpu.detections_to_rect(self.regions) mpu.rect_transformation(self.regions, self.video_size, self.video_size)
def pd_postprocess(self, inference): scores = np.squeeze(inference[self.pd_scores]) # 896 bboxes = inference[self.pd_bboxes][0] # 896x12 # Decode bboxes self.regions = mpu.decode_bboxes(self.pd_score_thresh, scores, bboxes, self.anchors, best_only=not self.multi_detection) # Non maximum suppression (not needed if best_only is True) if self.multi_detection: self.regions = mpu.non_max_suppression(self.regions, self.pd_nms_thresh) mpu.detections_to_rect(self.regions, kp_pair=[0, 1] if self.full_body else [2, 3]) mpu.rect_transformation(self.regions, self.frame_size, self.frame_size)
def pd_postprocess(self, inference): scores = np.array(inference.getLayerFp16("classificators"), dtype=np.float16) # 896 bboxes = np.array(inference.getLayerFp16("regressors"), dtype=np.float16).reshape( (self.nb_anchors, 12)) # 896x12 # Decode bboxes self.regions = mpu.decode_bboxes(self.pd_score_thresh, scores, bboxes, self.anchors, best_only=not self.multi_detection) # Non maximum suppression (not needed if best_only is True) if self.multi_detection: self.regions = mpu.non_max_suppression(self.regions, self.pd_nms_thresh) mpu.detections_to_rect(self.regions, kp_pair=[0, 1] if self.full_body else [2, 3]) mpu.rect_transformation(self.regions, self.frame_size, self.frame_size)
def run(self): self.fps = FPS(mean_nb_frames=20) nb_frames = 0 nb_pd_inferences = 0 nb_pd_inferences_direct = 0 nb_lm_inferences = 0 nb_lm_inferences_after_landmarks_ROI = 0 glob_pd_rtrip_time = 0 glob_lm_rtrip_time = 0 get_new_frame = True use_previous_landmarks = False global_time = time.perf_counter() while True: if get_new_frame: nb_frames += 1 if self.input_type == "image": vid_frame = self.img else: ok, vid_frame = self.cap.read() if not ok: break h, w = vid_frame.shape[:2] if self.crop: # Cropping the long side to get a square shape self.frame_size = min(h, w) dx = (w - self.frame_size) // 2 dy = (h - self.frame_size) // 2 video_frame = vid_frame[dy:dy + self.frame_size, dx:dx + self.frame_size] else: # Padding on the small side to get a square shape self.frame_size = max(h, w) self.pad_h = int((self.frame_size - h) / 2) self.pad_w = int((self.frame_size - w) / 2) video_frame = cv2.copyMakeBorder(vid_frame, self.pad_h, self.pad_h, self.pad_w, self.pad_w, cv2.BORDER_CONSTANT) annotated_frame = video_frame.copy() if not self.force_detection and use_previous_landmarks: self.regions = regions_from_landmarks mpu.detections_to_rect( self.regions, kp_pair=[0, 1] ) # self.regions.pd_kps are initialized from landmarks on previous frame mpu.rect_transformation(self.regions, self.frame_size, self.frame_size) else: # Infer pose detection # Resize image to NN square input shape frame_nn = cv2.resize(video_frame, (self.pd_w, self.pd_h), interpolation=cv2.INTER_AREA) # Transpose hxwx3 -> 1x3xhxw frame_nn = np.transpose(frame_nn, (2, 0, 1))[None, ] pd_rtrip_time = now() inference = self.pd_exec_net.infer( inputs={self.pd_input_blob: frame_nn}) glob_pd_rtrip_time += now() - pd_rtrip_time self.pd_postprocess(inference) self.pd_render(annotated_frame) nb_pd_inferences += 1 if get_new_frame: nb_pd_inferences_direct += 1 # Landmarks self.nb_active_regions = 0 if self.show_3d: self.vis3d.clear_geometries() self.vis3d.add_geometry(self.grid_floor, reset_bounding_box=False) self.vis3d.add_geometry(self.grid_wall, reset_bounding_box=False) if self.force_detection: for r in self.regions: frame_nn = mpu.warp_rect_img(r.rect_points, video_frame, self.lm_w, self.lm_h) # Transpose hxwx3 -> 1x3xhxw frame_nn = np.transpose(frame_nn, (2, 0, 1))[None, ] # Get landmarks lm_rtrip_time = now() inference = self.lm_exec_net.infer( inputs={self.lm_input_blob: frame_nn}) glob_lm_rtrip_time += now() - lm_rtrip_time nb_lm_inferences += 1 self.lm_postprocess(r, inference) self.lm_render(annotated_frame, r) elif len(self.regions) == 1: r = self.regions[0] frame_nn = mpu.warp_rect_img(r.rect_points, video_frame, self.lm_w, self.lm_h) # Transpose hxwx3 -> 1x3xhxw frame_nn = np.transpose(frame_nn, (2, 0, 1))[None, ] # Get landmarks lm_rtrip_time = now() inference = self.lm_exec_net.infer( inputs={self.lm_input_blob: frame_nn}) glob_lm_rtrip_time += now() - lm_rtrip_time nb_lm_inferences += 1 if use_previous_landmarks: nb_lm_inferences_after_landmarks_ROI += 1 self.lm_postprocess(r, inference) if not self.force_detection: if get_new_frame: if not use_previous_landmarks: # With a new frame, we have run the landmark NN on a ROI found by the detection NN... if r.lm_score > self.lm_score_threshold: # ...and succesfully found a body and its landmarks # Predict the ROI for the next frame from the last 2 landmarks normalized coordinates (x,y) regions_from_landmarks = [ mpu.Region(pd_kps=r.landmarks_padded[ self.nb_lms - 2:self.nb_lms, :2] / self.frame_size) ] use_previous_landmarks = True else: # With a new frame, we have run the landmark NN on a ROI calculated from the landmarks of the previous frame... if r.lm_score > self.lm_score_threshold: # ...and succesfully found a body and its landmarks # Predict the ROI for the next frame from the last 2 landmarks normalized coordinates (x,y) regions_from_landmarks = [ mpu.Region(pd_kps=r.landmarks_padded[ self.nb_lms - 2:self.nb_lms, :2] / self.frame_size) ] use_previous_landmarks = True else: # ...and could not find a body # We don't know if it is because the ROI calculated from the previous frame is not reliable (the body moved) # or because there is really no body in the frame. To decide, we have to run the detection NN on this frame get_new_frame = False use_previous_landmarks = False continue else: # On a frame on which we already ran the landmark NN without founding a body, # we have run the detection NN... if r.lm_score > self.lm_score_threshold: # ...and succesfully found a body and its landmarks use_previous_landmarks = True # Predict the ROI for the next frame from the last 2 landmarks normalized coordinates (x,y) regions_from_landmarks = [ mpu.Region(pd_kps=r.landmarks_padded[ self.nb_lms - 2:self.nb_lms, :2] / self.frame_size) ] use_previous_landmarks = True # else: # ...and could not find a body # We are sure there is no body in that frame get_new_frame = True self.lm_render(annotated_frame, r) else: # Detection NN hasn't found any body get_new_frame = True self.fps.update() if self.show_3d: self.vis3d.poll_events() self.vis3d.update_renderer() if self.smoothing and self.nb_active_regions == 0: self.filter.reset() if not self.crop: annotated_frame = annotated_frame[self.pad_h:self.pad_h + h, self.pad_w:self.pad_w + w] if self.show_fps: self.fps.display(annotated_frame, orig=(50, 50), size=1, color=(240, 180, 100)) cv2.imshow("Blazepose", annotated_frame) if self.output: self.output.write(annotated_frame) key = cv2.waitKey(1) if key == ord('q') or key == 27: break elif key == 32: # Pause on space bar cv2.waitKey(0) elif key == ord('1'): self.show_pd_box = not self.show_pd_box elif key == ord('2'): self.show_pd_kps = not self.show_pd_kps elif key == ord('3'): self.show_rot_rect = not self.show_rot_rect elif key == ord('4'): self.show_landmarks = not self.show_landmarks elif key == ord('5'): self.show_scores = not self.show_scores elif key == ord('6'): self.show_gesture = not self.show_gesture elif key == ord('f'): self.show_fps = not self.show_fps elif key == ord('s'): self.show_segmentation = not self.show_segmentation # Print some stats print( f"FPS : {nb_frames/(time.perf_counter() - global_time):.1f} f/s (# frames = {nb_frames})" ) print( f"# pose detection inferences : {nb_pd_inferences} - # direct: {nb_pd_inferences_direct} - # after landmarks ROI failures: {nb_pd_inferences-nb_pd_inferences_direct}" ) print( f"# landmark inferences : {nb_lm_inferences} - # after pose detection: {nb_lm_inferences - nb_lm_inferences_after_landmarks_ROI} - # after landmarks ROI prediction: {nb_lm_inferences_after_landmarks_ROI}" ) print( f"Pose detection round trip : {glob_pd_rtrip_time/nb_pd_inferences*1000:.1f} ms" ) if nb_lm_inferences: print( f"Landmark round trip : {glob_lm_rtrip_time/nb_lm_inferences*1000:.1f} ms" ) if self.output: self.output.release()