def detect_pnet(self, im): """Get face candidates through pnet Parameters: ----------- im: numpy array, input image array Returns: -------- bboxes_align: numpy array bboxes after calibration """ h, w, c = im.shape net_size = config.PNET_SIZE current_scale = float( net_size) / self.min_face_size # find initial scale im_resized = self.resize_image(im, current_scale) current_height, current_width, _ = im_resized.shape # bounding boxes for all the pyramid scales all_bboxes = list() # generating bounding boxes for each scale while min(current_height, current_width) > net_size: image_tensor = utils.convert_image_to_tensor(im_resized) feed_imgs = image_tensor.unsqueeze(0) feed_imgs = feed_imgs.to(self.device) cls_map, reg_map = self.pnet_detector(feed_imgs) cls_map_np = utils.convert_chwTensor_to_hwcNumpy(cls_map.cpu()) reg_map_np = utils.convert_chwTensor_to_hwcNumpy(reg_map.cpu()) bboxes = self.generate_bounding_box(cls_map_np, reg_map_np, current_scale, self.thresh[0]) current_scale *= self.scale_factor im_resized = self.resize_image(im, current_scale) current_height, current_width, _ = im_resized.shape if bboxes.size == 0: continue keep = utils.nms(bboxes[:, :5], 0.5, 'Union') bboxes = bboxes[keep] all_bboxes.append(bboxes) if len(all_bboxes) == 0: return None all_bboxes = np.vstack(all_bboxes) # apply nms to the detections from all the scales keep = utils.nms(all_bboxes[:, 0:5], 0.7, 'Union') all_bboxes = all_bboxes[keep] # 0-4: original bboxes, 5: score, 5: offsets bboxes_align = utils.calibrate_box(all_bboxes[:, 0:5], all_bboxes[:, 5:]) bboxes_align = utils.convert_to_square(bboxes_align) bboxes_align[:, 0:4] = np.round(bboxes_align[:, 0:4]) return bboxes_align
def detect_onet(self, im, bboxes): """Get face candidates using onet Parameters: ---------- im: numpy array input image array bboxes: numpy array detection results of rnet Returns: ------- bboxes_align: numpy array bboxes after calibration """ net_size = config.ONET_SIZE h, w, c = im.shape if bboxes is None: return None [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = utils.correct_bboxes(bboxes, w, h) num_bboxes = bboxes.shape[0] # crop face using rnet proposal cropped_ims_tensors = [] for i in range(num_bboxes): try: if tmph[i] > 0 and tmpw[i] > 0: tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i], dx[i]:edx[i], :] = im[y[i]:ey[i], x[i]:ex[i], :] crop_im = cv2.resize(tmp, (net_size, net_size)) crop_im_tensor = utils.convert_image_to_tensor(crop_im) cropped_ims_tensors.append(crop_im_tensor) except ValueError as e: print(e) feed_imgs = torch.stack(cropped_ims_tensors) feed_imgs = feed_imgs.to(self.device) cls, reg = self.onet_detector(feed_imgs) cls = cls.cpu().data.numpy() reg = reg.cpu().data.numpy() keep_inds = np.where(cls[:, 1] > self.thresh[2])[0] if len(keep_inds) > 0: keep_bboxes = bboxes[keep_inds] keep_cls = cls[keep_inds, :] keep_reg = reg[keep_inds] keep_bboxes[:, 4] = keep_cls[:, 1].reshape((-1, )) else: return None bboxes_align = utils.calibrate_box(keep_bboxes, keep_reg) keep = utils.nms(bboxes_align, 0.7, mode='Minimum') if len(keep) == 0: return None bboxes_align = bboxes_align[keep] bboxes_align = utils.convert_to_square(bboxes_align) return bboxes_align
def detect_rnet(self, im, bboxes): """Get face candidates using rnet Parameters: ---------- im: numpy array input image array bboxes: numpy array detection results of pnet Returns: ------- bboxes_align: numpy array bboxes after calibration """ net_size = config.RNET_SIZE h, w, c = im.shape if bboxes is None: return None num_bboxes = bboxes.shape[0] [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = utils.correct_bboxes(bboxes, w, h) # crop face using pnet proposals cropped_ims_tensors = [] for i in range(num_bboxes): try: if tmph[i] > 0 and tmpw[i] > 0: tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i], dx[i]:edx[i], :] = im[y[i]:ey[i], x[i]:ex[i], :] crop_im = cv2.resize(tmp, (net_size, net_size)) crop_im_tensor = utils.convert_image_to_tensor(crop_im) cropped_ims_tensors.append(crop_im_tensor) except ValueError as e: print('dy: {}, edy: {}, dx: {}, edx: {}'.format( dy[i], edy[i], dx[i], edx[i])) print('y: {}, ey: {}, x: {}, ex: {}'.format( y[i], ey[i], x[i], ex[i])) print(e) # provide input tensor, if there are too many proposals in PNet # there might be OOM feed_imgs = torch.stack(cropped_ims_tensors) feed_imgs = feed_imgs.to(self.device) cls, reg = self.rnet_detector(feed_imgs) cls = cls.cpu().data.numpy() reg = reg.cpu().data.numpy() keep_inds = np.where(cls[:, 1] > self.thresh[1])[0] if len(keep_inds) > 0: keep_bboxes = bboxes[keep_inds] keep_cls = cls[keep_inds, :] keep_reg = reg[keep_inds] # using softmax 1 as cls score keep_bboxes[:, 4] = keep_cls[:, 1].reshape((-1, )) else: return None keep = utils.nms(keep_bboxes, 0.7) if len(keep) == 0: return None keep_cls = keep_cls[keep] keep_bboxes = keep_bboxes[keep] keep_reg = keep_reg[keep] bboxes_align = utils.calibrate_box(keep_bboxes, keep_reg) bboxes_align = utils.convert_to_square(bboxes_align) bboxes_align[:, 0:4] = np.round(bboxes_align[:, 0:4]) return bboxes_align