def o_step(self, img, bboxes): img = preprocess(img) height, width, _ = img.shape num_boxes = bboxes.shape[0] img_boxes = get_image_boxes(bboxes, img, height, width, num_boxes, size=48) probs, offsets, landmarks = self.onet(img_boxes) valid_idx = probs[:, 1] > self.thresholds[-1] bboxes = tf.boolean_mask(bboxes, valid_idx) if bboxes.shape[0] == 0: return [], [], [] offsets = tf.boolean_mask(offsets, valid_idx) scores = tf.boolean_mask(probs[:, 1], valid_idx) landmarks = tf.boolean_mask(landmarks, valid_idx) landmarks = self.landmark_alignment(bboxes, landmarks) bboxes = calibrate_box(bboxes, offsets) nms_idx = tf.image.non_max_suppression( bboxes, scores, self.max_nms_output_num, iou_threshold=self.nms_thresholds[2]) bboxes = tf.gather(bboxes, nms_idx) landmarks = tf.gather(landmarks, nms_idx) scores = tf.gather(scores, nms_idx) return bboxes, landmarks, scores
def r_step(self, img, bboxes): img = preprocess(img) height, width, _ = img.shape num_boxes = bboxes.shape[0] img_boxes = get_image_boxes(bboxes, img, height, width, num_boxes, size=24) probs, offsets = self.rnet(img_boxes) valid_idx = tf.argmax(probs, axis=-1) == 1 bboxes = tf.boolean_mask(bboxes, valid_idx) if bboxes.shape[0] == 0: return [] offsets = tf.boolean_mask(offsets, valid_idx) scores = tf.boolean_mask(probs[:, 1], valid_idx) bboxes = self.bbox_alignment(bboxes, offsets) nms_idx = tf.image.non_max_suppression( bboxes, scores, self.max_nms_output_num, iou_threshold=self.nms_thresholds[1]) bboxes = tf.gather(bboxes, nms_idx) return bboxes
def detect_faces( image, min_face_size=20.0, thresholds=[0.6, 0.7, 0.8], # thresholds=[0.7, 0.8, 0.9], nms_thresholds=[0.7, 0.7, 0.7]): """ Arguments: image: an instance of PIL.Image. min_face_size: a float number. thresholds: a list of length 3. nms_thresholds: a list of length 3. Returns: two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10], bounding boxes and facial landmarks. """ # LOAD MODELS pnet = PNet() rnet = RNet() onet = ONet() onet.eval() # BUILD AN IMAGE PYRAMID width, height = image.size min_length = min(height, width) min_detection_size = 12 factor = 0.707 # sqrt(0.5) # scales for scaling the image scales = [] # scales the image so that # minimum size that we can detect equals to # minimum face size that we want to detect m = min_detection_size / min_face_size min_length *= m factor_count = 0 while min_length > min_detection_size: scales.append(m * factor**factor_count) min_length *= factor factor_count += 1 # STAGE 1 # it will be returned bounding_boxes = [] # run P-Net on different scales for s in scales: boxes = run_first_stage(image, pnet, scale=s, threshold=thresholds[0]) bounding_boxes.append(boxes) # collect boxes (and offsets, and scores) from different scales bounding_boxes = [i for i in bounding_boxes if i is not None] bounding_boxes = np.vstack(bounding_boxes) keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0]) bounding_boxes = bounding_boxes[keep] # use offsets predicted by pnet to transform bounding boxes bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:]) # shape [n_boxes, 5] bounding_boxes = convert_to_square(bounding_boxes) bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) # STAGE 2 img_boxes = get_image_boxes(bounding_boxes, image, size=24) img_boxes = Variable(torch.FloatTensor(img_boxes), volatile=True) output = rnet(img_boxes) offsets = output[0].data.numpy() # shape [n_boxes, 4] probs = output[1].data.numpy() # shape [n_boxes, 2] keep = np.where(probs[:, 1] > thresholds[1])[0] bounding_boxes = bounding_boxes[keep] bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, )) offsets = offsets[keep] keep = nms(bounding_boxes, nms_thresholds[1]) bounding_boxes = bounding_boxes[keep] bounding_boxes = calibrate_box(bounding_boxes, offsets[keep]) bounding_boxes = convert_to_square(bounding_boxes) bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) # STAGE 3 img_boxes = get_image_boxes(bounding_boxes, image, size=48) if len(img_boxes) == 0: return [], [] img_boxes = Variable(torch.FloatTensor(img_boxes), volatile=True) output = onet(img_boxes) landmarks = output[0].data.numpy() # shape [n_boxes, 10] offsets = output[1].data.numpy() # shape [n_boxes, 4] probs = output[2].data.numpy() # shape [n_boxes, 2] keep = np.where(probs[:, 1] > thresholds[2])[0] bounding_boxes = bounding_boxes[keep] bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, )) offsets = offsets[keep] landmarks = landmarks[keep] # compute landmark points width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0 height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0 xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1] landmarks[:, 0:5] = np.expand_dims( xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5] landmarks[:, 5:10] = np.expand_dims( ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10] bounding_boxes = calibrate_box(bounding_boxes, offsets) keep = nms(bounding_boxes, nms_thresholds[2], mode='min') bounding_boxes = bounding_boxes[keep] landmarks = landmarks[keep] return bounding_boxes, landmarks
def detect_faces(image, min_face_size = 20.0, thresholds=[0.6, 0.7, 0.8], nms_thresholds=[0.7, 0.7, 0.7]): """ Arguments: image: an instance of PIL.Image. min_face_size: a float number. thresholds: a list of length 3. nms_thresholds: a list of length 3. Returns: two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10], bounding boxes and facial landmarks. """ # LOAD MODELS pnet = PNet() rnet = RNet() onet = ONet() onet.eval() # BUILD AN IMAGE PYRAMID width, height = image.size min_length = min(height, width) min_detection_size = 12 factor = 0.707 # sqrt(0.5) # scales for scaling the image scales = [] # scales the image so that # minimum size that we can detect equals to # minimum face size that we want to detect m = min_detection_size/min_face_size min_length *= m factor_count = 0 while min_length > min_detection_size: scales.append(m*factor**factor_count) min_length *= factor factor_count += 1 # STAGE 1 # it will be returned bounding_boxes = [] # run P-Net on different scales for s in scales: boxes = run_first_stage(image, pnet, scale = s, threshold = thresholds[0]) bounding_boxes.append(boxes) # collect boxes (and offsets, and scores) from different scales bounding_boxes = [i for i in bounding_boxes if i is not None] bounding_boxes = np.vstack(bounding_boxes) keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0]) bounding_boxes = bounding_boxes[keep] # use offsets predicted by pnet to transform bounding boxes bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:]) # shape [n_boxes, 5] bounding_boxes = convert_to_square(bounding_boxes) bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) # STAGE 2 img_boxes = get_image_boxes(bounding_boxes, image, size = 24) img_boxes = Variable(torch.FloatTensor(img_boxes), volatile = True) output = rnet(img_boxes) offsets = output[0].data.numpy() # shape [n_boxes, 4] probs = output[1].data.numpy() # shape [n_boxes, 2] keep = np.where(probs[:, 1] > thresholds[1])[0] bounding_boxes = bounding_boxes[keep] bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, )) offsets = offsets[keep] keep = nms(bounding_boxes, nms_thresholds[1]) bounding_boxes = bounding_boxes[keep] bounding_boxes = calibrate_box(bounding_boxes, offsets[keep]) bounding_boxes = convert_to_square(bounding_boxes) bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) # STAGE 3 img_boxes = get_image_boxes(bounding_boxes, image, size = 48) if len(img_boxes) == 0: return [], [] img_boxes = Variable(torch.FloatTensor(img_boxes), volatile = True) output = onet(img_boxes) landmarks = output[0].data.numpy() # shape [n_boxes, 10] offsets = output[1].data.numpy() # shape [n_boxes, 4] probs = output[2].data.numpy() # shape [n_boxes, 2] keep = np.where(probs[:, 1] > thresholds[2])[0] bounding_boxes = bounding_boxes[keep] bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, )) offsets = offsets[keep] landmarks = landmarks[keep] # compute landmark points width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0 height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0 xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1] landmarks[:, 0:5] = np.expand_dims(xmin, 1) + np.expand_dims(width, 1)*landmarks[:, 0:5] landmarks[:, 5:10] = np.expand_dims(ymin, 1) + np.expand_dims(height, 1)*landmarks[:, 5:10] bounding_boxes = calibrate_box(bounding_boxes, offsets) keep = nms(bounding_boxes, nms_thresholds[2], mode = 'min') bounding_boxes = bounding_boxes[keep] landmarks = landmarks[keep] return bounding_boxes, landmarks