def test_nms(self): # loop over the images for (imagePath, boundingBoxes, num_face) in self.images: # load the image and clone it print("[x] %d initial bounding boxes" % (len(boundingBoxes))) image = cv2.imread(imagePath) orig = image.copy() # loop over the bounding boxes for each image and draw them for (startX, startY, endX, endY, _) in boundingBoxes: cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 0, 255), 2) # perform non-maximum suppression on the bounding boxes pick = func.nms(boundingBoxes[:, :4], boundingBoxes[:, 4], 0.3) print("[x] after applying non-maximum, %d bounding boxes" % (len(pick))) # loop over the picked bounding boxes and draw them for i in pick: (startX, startY, endX, endY) = boundingBoxes[i][:4] cv2.rectangle(image, (startX, startY), (endX, endY), (0, 255, 0), 2) # # display the images # cv2.imshow("Original", orig) # cv2.imshow("After NMS", image) # cv2.waitKey(0) self.assertEqual(len(pick), num_face)
def stage_two(self, imgs, boxes, threshold, nms_threshold): # no candidate face found. if boxes.shape[0] == 0: return boxes width = imgs.shape[2] height = imgs.shape[3] lablels = boxes[:, -1] boxes = boxes[:, :4] num_img = imgs.shape[0] # get candidate faces candidate_faces = list() for box, label in zip(boxes, lablels): im = imgs[label, :, box[1]: box[3], box[0]: box[2]].unsqueeze(0) im = torch.nn.functional.interpolate( im, size=(24, 24), mode='bilinear') candidate_faces.append(im) candidate_faces = torch.cat(candidate_faces, 0) # rnet forward pass p_distribution, box_regs, _ = self.rnet(candidate_faces) # filter negative boxes scores = p_distribution[:, 1] mask = (scores >= threshold) boxes = boxes[mask] box_regs = box_regs[mask] scores = scores[mask] labels = lablels[mask] if boxes.shape[0] != 0: boxes = self._calibrate_box(boxes, box_regs) boxes = self._convert_to_square(boxes) boxes = self._refine_boxes(boxes, width, height) final_boxes = torch.empty(0, dtype=torch.int32, device=self.device) final_img_labels = torch.empty(0, dtype=torch.int32, device=self.device) for i in range(num_img): mask = labels == i keep = func.nms(boxes[mask].cpu().numpy(), scores[mask].cpu().numpy(), nms_threshold) final_boxes = torch.cat([final_boxes, boxes[mask][keep]]) final_img_labels = torch.cat([final_img_labels, labels[mask][keep]]) return torch.cat([final_boxes, final_img_labels.unsqueeze(1 )], -1) else: return boxes
def stage_one(self, img, threshold, factor, minsize, nms_threshold): width = img.shape[2] height = img.shape[3] # Compute valid scales scales = [] cur_width = width cur_height = height cur_factor = 1 while cur_width >= 12 and cur_height >= 12: if 12 / cur_factor >= minsize: # Ignore boxes that smaller than minsize w = cur_width h = cur_height scales.append((w, h, cur_factor)) cur_factor *= factor cur_width = math.ceil(cur_width * factor) cur_height = math.ceil(cur_height * factor) # Get candidate boxesi ph candidate_boxes = torch.empty((0, 4), dtype=torch.int32, device=self.device) candidate_scores = torch.empty((0), device=self.device) candidate_offsets = torch.empty((0, 4), dtype=torch.float32, device=self.device) for w, h, f in scales: resize_img = torch.nn.functional.interpolate(img, size=(w, h), mode='bilinear') p_distribution, box_regs, _ = self.pnet(resize_img) candidate, scores, offsets = self._generate_bboxes( p_distribution, box_regs, f, threshold) candidate_boxes = torch.cat([candidate_boxes, candidate]) candidate_scores = torch.cat([candidate_scores, scores]) candidate_offsets = torch.cat([candidate_offsets, offsets]) # nms if candidate_boxes.shape[0] != 0: candidate_boxes = self._calibrate_box(candidate_boxes, candidate_offsets) keep = func.nms(candidate_boxes.cpu().numpy(), candidate_scores.cpu().numpy(), nms_threshold, device=self.device) return candidate_boxes[keep] else: return candidate_boxes
def stage_three(self, img, boxes, threshold, nms_threshold): # no candidate face found. if boxes.shape[0] == 0: return boxes, torch.empty(0, device=self.device, dtype=torch.int32) width = img.shape[2] height = img.shape[3] boxes = self._convert_to_square(boxes) boxes = self._refine_boxes(boxes, width, height) # get candidate faces candidate_faces = list() for box in boxes: im = img[:, :, box[1]:box[3], box[0]:box[2]] im = torch.nn.functional.interpolate(im, size=(48, 48), mode='bilinear') candidate_faces.append(im) candidate_faces = torch.cat(candidate_faces, 0) p_distribution, box_regs, landmarks = self.onet(candidate_faces) # filter negative boxes scores = p_distribution[:, 1] mask = (scores >= threshold) boxes = boxes[mask] box_regs = box_regs[mask] scores = scores[mask] landmarks = landmarks[mask] if boxes.shape[0] > 0: # compute face landmark points landmarks = self._calibrate_landmarks(boxes, landmarks) landmarks = torch.stack([landmarks[:, :5], landmarks[:, 5:10]], 2) boxes = self._calibrate_box(boxes, box_regs) boxes = self._refine_boxes(boxes, width, height) # nms keep = func.nms(boxes.cpu().numpy(), scores.cpu().numpy(), nms_threshold, device=self.device) boxes = boxes[keep] landmarks = landmarks[keep] return boxes, landmarks
def stage_two(self, img, boxes, threshold, nms_threshold): # no candidate face found. if boxes.shape[0] == 0: return boxes width = img.shape[2] height = img.shape[3] boxes = self._convert_to_square(boxes) boxes = self._refine_boxes(boxes, width, height) # get candidate faces candidate_faces = list() for box in boxes: im = img[:, :, box[1]:box[3], box[0]:box[2]] im = torch.nn.functional.interpolate(im, size=(24, 24), mode='bilinear') candidate_faces.append(im) candidate_faces = torch.cat(candidate_faces, 0) # rnet forward pass p_distribution, box_regs, _ = self.rnet(candidate_faces) # filter negative boxes scores = p_distribution[:, 1] mask = (scores >= threshold) boxes = boxes[mask] box_regs = box_regs[mask] scores = scores[mask] if boxes.shape[0] > 0: boxes = self._calibrate_box(boxes, box_regs) # nms keep = func.nms(boxes.cpu().numpy(), scores.cpu().numpy(), nms_threshold, device=self.device) boxes = boxes[keep] return boxes
def stage_three(self, imgs, boxes, threshold, nms_threshold): # no candidate face found. if boxes.shape[0] == 0: return boxes, torch.empty(0, device=self.device, dtype=torch.int32) width = imgs.shape[2] height = imgs.shape[3] labels = boxes[:, -1] boxes = boxes[:, :4] num_img = imgs.shape[0] # get candidate faces candidate_faces = list() for box, label in zip(boxes, labels): im = imgs[label, :, box[1]: box[3], box[0]: box[2]].unsqueeze(0) im = torch.nn.functional.interpolate( im, size=(48, 48), mode='bilinear') candidate_faces.append(im) candidate_faces = torch.cat(candidate_faces, 0) p_distribution, box_regs, landmarks = self.onet(candidate_faces) # filter negative boxes scores = p_distribution[:, 1] mask = (scores >= threshold) boxes = boxes[mask] box_regs = box_regs[mask] scores = scores[mask] landmarks = landmarks[mask] labels =labels[mask] if boxes.shape[0] != 0: # compute face landmark points landmarks = self._calibrate_landmarks(boxes, landmarks) landmarks = torch.stack([landmarks[:, :5], landmarks[:, 5:10]], 2) boxes = self._calibrate_box(boxes, box_regs) boxes = self._refine_boxes(boxes, width, height) final_boxes = torch.empty(0, dtype=torch.int32, device=self.device) final_img_labels = torch.empty(0, dtype=torch.int32, device=self.device) final_landmarks = torch.empty(0, dtype=torch.int32, device=self.device) for i in range(num_img): # nms mask = labels == i keep = func.nms(boxes[mask].cpu().numpy(), scores[mask].cpu().numpy(), nms_threshold) final_boxes = torch.cat([final_boxes, boxes[mask][keep]]) final_img_labels = torch.cat([final_img_labels, labels[mask][keep]]) # compute face landmark points landm = landmarks [mask][keep] final_landmarks = torch.cat([final_landmarks, landm]) return torch.cat([final_boxes, final_img_labels.unsqueeze(1 )], -1), final_landmarks else: return boxes, landmarks
def stage_one(self, imgs, threshold, factor, minsize, nms_threshold): """Stage one of mtcnn detection. Args: imgs (torch.FloatTensro): Output of "_preprocess" method. threshold (float): The minimum probability of reserve bounding boxes. factor (float): Image pyramid scaling ratio. minsize (int): The minimum size of reserve bounding boxes. nms_threshold (float): retain boxes that satisfy overlap <= thresh Returns: torch.IntTensor: All bounding boxes with image label output by stage one detection. [n, 5] """ width = imgs.shape[-2] height = imgs.shape[-1] num_img = imgs.shape[0] # Compute valid scales scales = [] cur_width = width cur_height = height cur_factor = 1 while cur_width >= 12 and cur_height >= 12: if 12 / cur_factor >= minsize: # Ignore boxes that smaller than minsize w = cur_width h = cur_height scales.append((w, h, cur_factor)) cur_factor *= factor cur_width = math.ceil(cur_width * factor) cur_height = math.ceil(cur_height * factor) # Get candidate boxesi ph candidate_boxes = torch.empty(0, dtype=torch.int32, device=self.device) candidate_scores = torch.empty(0, device=self.device) candidate_offsets = torch.empty( 0, dtype=torch.float32, device=self.device) all_img_labels = torch.empty(0, dtype=torch.int32, device=self.device) for w, h, f in scales: resize_img = torch.nn.functional.interpolate( imgs, size=(w, h), mode='bilinear') p_distribution, box_regs, _ = self.pnet(resize_img) candidate, scores, offsets, img_labels = self._generate_bboxes( p_distribution, box_regs, f, threshold) candidate_boxes = torch.cat([candidate_boxes, candidate]) candidate_scores = torch.cat([candidate_scores, scores]) candidate_offsets = torch.cat([candidate_offsets, offsets]) all_img_labels = torch.cat([all_img_labels, img_labels]) if candidate_boxes.shape[0] != 0: candidate_boxes = self._calibrate_box( candidate_boxes, candidate_offsets) candidate_boxes = self._convert_to_square(candidate_boxes) candidate_boxes = self._refine_boxes( candidate_boxes, width, height) final_boxes = torch.empty(0, dtype=torch.int32, device=self.device) final_img_labels = torch.empty(0, dtype=torch.int32, device=self.device) for i in range(num_img): mask = all_img_labels == i keep = func.nms(candidate_boxes[mask].cpu().numpy(), candidate_scores[mask].cpu().numpy(), nms_threshold) final_boxes = torch.cat([final_boxes, candidate_boxes[mask][keep]]) final_img_labels = torch.cat([final_img_labels, all_img_labels[mask][keep]]) return torch.cat([final_boxes, final_img_labels.unsqueeze(1 )], -1) else: return candidate_boxes