def detect_rnet(im, dets, thresh): """通过rent选择box 参数: im:输入图像 dets:pnet选择的box,是相对原图的绝对坐标 返回值: box绝对坐标 """ h, w, c = im.shape # 将pnet的box变成包含它的正方形,可以避免信息损失 dets = convert_to_square(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) # 调整超出图像的box [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(dets, w, h) delete_size = np.ones_like(tmpw) * 20 ones = np.ones_like(tmpw) zeros = np.zeros_like(tmpw) num_boxes = np.sum( np.where((np.minimum(tmpw, tmph) >= delete_size), ones, zeros)) cropped_ims = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32) if int(num_boxes) == 0: print('P模型检测结果为空!') return None, None for i in range(int(num_boxes)): # 将pnet生成的box相对与原图进行裁剪,超出部分用0补 if tmph[i] < 20 or tmpw[i] < 20: continue tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) try: tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] img = cv2.resize(tmp, (24, 24)) img = img.transpose((2, 0, 1)) img = (img - 127.5) / 128 cropped_ims[i, :, :, :] = img except: continue cls_scores, reg = predict_rnet(cropped_ims) cls_scores = cls_scores[:, 1] keep_inds = np.where(cls_scores > thresh)[0] if len(keep_inds) > 0: boxes = dets[keep_inds] boxes[:, 4] = cls_scores[keep_inds] reg = reg[keep_inds] else: return None, None keep = py_nms(boxes, 0.6, mode='Union') boxes = boxes[keep] # 对pnet截取的图像的坐标进行校准,生成rnet的人脸框对于原图的绝对坐标 boxes_c = calibrate_box(boxes, reg[keep]) return boxes, boxes_c
def detect_onet(im, dets, thresh): """将onet的选框继续筛选基本和rnet差不多但多返回了landmark""" h, w, c = im.shape dets = convert_to_square(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(dets, w, h) num_boxes = dets.shape[0] cropped_ims = np.zeros((num_boxes, 3, 48, 48), dtype=np.float32) for i in range(num_boxes): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] img = cv2.resize(tmp, (48, 48), interpolation=cv2.INTER_LINEAR) img = img.transpose((2, 0, 1)) img = (img - 127.5) / 128 cropped_ims[i, :, :, :] = img cls_scores, reg, landmark = predict_onet(cropped_ims) cls_scores = cls_scores[:, 1] keep_inds = np.where(cls_scores > thresh)[0] if len(keep_inds) > 0: boxes = dets[keep_inds] boxes[:, 4] = cls_scores[keep_inds] reg = reg[keep_inds] landmark = landmark[keep_inds] else: return None, None w = boxes[:, 2] - boxes[:, 0] + 1 h = boxes[:, 3] - boxes[:, 1] + 1 landmark[:, 0::2] = (np.tile(w, (5, 1)) * landmark[:, 0::2].T + np.tile(boxes[:, 0], (5, 1)) - 1).T landmark[:, 1::2] = (np.tile(h, (5, 1)) * landmark[:, 1::2].T + np.tile(boxes[:, 1], (5, 1)) - 1).T boxes_c = calibrate_box(boxes, reg) keep = py_nms(boxes_c, 0.6, mode='Minimum') boxes_c = boxes_c[keep] landmark = landmark[keep] return boxes_c, landmark
thickness=int(max(frame_width, frame_height) * 0.002)) fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G') video_writer = cv2.VideoWriter( os.path.join(args.outputPath, 'single.avi'), fourcc, 20.0, (int(max(frame_width, frame_height) * 1.5), max(frame_width, frame_height))) while cap.isOpened(): # Capture frame-by-frame ret, frame = cap.read() if not ret: break # Convert to square image square, padding_dist, padding_axis = convert_to_square( frame, (128, 128, 128)) square = cv2.transpose(square) square = cv2.flip(square, flipCode=1) corners = corners_extractor.get(square) # Warping src_pts = np.float32([corners]).reshape(-1, 1, 2) homography_matrix, mask = cv2.findHomography( src_pts, dst_pts, cv2.RANSAC, 5.0) aligned_img = cv2.warpPerspective(square, homography_matrix, warped_size) display_patch[400:400 + warped_size[1], 200:200 + warped_size[0], :] = aligned_img