Beispiel #1
0
    def test_nms(self):

        # loop over the images
        for (imagePath, boundingBoxes, num_face) in self.images:
            # load the image and clone it
            print("[x] %d initial bounding boxes" % (len(boundingBoxes)))
            image = cv2.imread(imagePath)
            orig = image.copy()

            # loop over the bounding boxes for each image and draw them
            for (startX, startY, endX, endY, _) in boundingBoxes:
                cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 0, 255), 2)

            # perform non-maximum suppression on the bounding boxes
            pick = func.nms(boundingBoxes[:, :4], boundingBoxes[:, 4], 0.3)
            print("[x] after applying non-maximum, %d bounding boxes" % (len(pick)))

            # loop over the picked bounding boxes and draw them
            for i in pick:
                (startX, startY, endX, endY) = boundingBoxes[i][:4]
                cv2.rectangle(image, (startX, startY), (endX, endY), (0, 255, 0), 2)

            # # display the images
            # cv2.imshow("Original", orig)
            # cv2.imshow("After NMS", image)
            # cv2.waitKey(0)
            self.assertEqual(len(pick), num_face)
Beispiel #2
0
    def stage_two(self, imgs, boxes, threshold, nms_threshold):

        # no candidate face found.
        if boxes.shape[0] == 0:
            return boxes

        width = imgs.shape[2]
        height = imgs.shape[3]
        lablels = boxes[:, -1]
        boxes = boxes[:, :4]

        num_img = imgs.shape[0]

        # get candidate faces
        candidate_faces = list()

        for box, label in zip(boxes, lablels):
            im = imgs[label, :, box[1]: box[3], box[0]: box[2]].unsqueeze(0)
            im = torch.nn.functional.interpolate(
                im, size=(24, 24), mode='bilinear')
            candidate_faces.append(im)
        
        candidate_faces = torch.cat(candidate_faces, 0)

        # rnet forward pass
        p_distribution, box_regs, _ = self.rnet(candidate_faces)

        # filter negative boxes
        scores = p_distribution[:, 1]
        mask = (scores >= threshold)
        boxes = boxes[mask]
        box_regs = box_regs[mask]
        scores = scores[mask]
        labels = lablels[mask]

        if boxes.shape[0] != 0:
            boxes = self._calibrate_box(boxes, box_regs)
            boxes = self._convert_to_square(boxes)
            boxes = self._refine_boxes(boxes, width, height)

            final_boxes = torch.empty(0, dtype=torch.int32, device=self.device)
            final_img_labels = torch.empty(0, dtype=torch.int32, device=self.device)
            for i in range(num_img):
                mask = labels == i
                keep = func.nms(boxes[mask].cpu().numpy(),
                            scores[mask].cpu().numpy(), nms_threshold)
                final_boxes = torch.cat([final_boxes, boxes[mask][keep]])
                final_img_labels = torch.cat([final_img_labels, labels[mask][keep]])

            return torch.cat([final_boxes, final_img_labels.unsqueeze(1 )], -1)

        else:

            return boxes
Beispiel #3
0
    def stage_one(self, img, threshold, factor, minsize, nms_threshold):
        width = img.shape[2]
        height = img.shape[3]

        # Compute valid scales
        scales = []
        cur_width = width
        cur_height = height
        cur_factor = 1
        while cur_width >= 12 and cur_height >= 12:
            if 12 / cur_factor >= minsize:  # Ignore boxes that smaller than minsize
                w = cur_width
                h = cur_height
                scales.append((w, h, cur_factor))

            cur_factor *= factor
            cur_width = math.ceil(cur_width * factor)
            cur_height = math.ceil(cur_height * factor)

        # Get candidate boxesi ph
        candidate_boxes = torch.empty((0, 4),
                                      dtype=torch.int32,
                                      device=self.device)
        candidate_scores = torch.empty((0), device=self.device)
        candidate_offsets = torch.empty((0, 4),
                                        dtype=torch.float32,
                                        device=self.device)
        for w, h, f in scales:
            resize_img = torch.nn.functional.interpolate(img,
                                                         size=(w, h),
                                                         mode='bilinear')
            p_distribution, box_regs, _ = self.pnet(resize_img)

            candidate, scores, offsets = self._generate_bboxes(
                p_distribution, box_regs, f, threshold)

            candidate_boxes = torch.cat([candidate_boxes, candidate])
            candidate_scores = torch.cat([candidate_scores, scores])
            candidate_offsets = torch.cat([candidate_offsets, offsets])

        # nms
        if candidate_boxes.shape[0] != 0:
            candidate_boxes = self._calibrate_box(candidate_boxes,
                                                  candidate_offsets)
            keep = func.nms(candidate_boxes.cpu().numpy(),
                            candidate_scores.cpu().numpy(),
                            nms_threshold,
                            device=self.device)
            return candidate_boxes[keep]
        else:
            return candidate_boxes
Beispiel #4
0
    def stage_three(self, img, boxes, threshold, nms_threshold):
        # no candidate face found.
        if boxes.shape[0] == 0:
            return boxes, torch.empty(0, device=self.device, dtype=torch.int32)

        width = img.shape[2]
        height = img.shape[3]

        boxes = self._convert_to_square(boxes)
        boxes = self._refine_boxes(boxes, width, height)

        # get candidate faces
        candidate_faces = list()

        for box in boxes:
            im = img[:, :, box[1]:box[3], box[0]:box[2]]
            im = torch.nn.functional.interpolate(im,
                                                 size=(48, 48),
                                                 mode='bilinear')
            candidate_faces.append(im)

        candidate_faces = torch.cat(candidate_faces, 0)

        p_distribution, box_regs, landmarks = self.onet(candidate_faces)

        # filter negative boxes
        scores = p_distribution[:, 1]
        mask = (scores >= threshold)
        boxes = boxes[mask]
        box_regs = box_regs[mask]
        scores = scores[mask]
        landmarks = landmarks[mask]

        if boxes.shape[0] > 0:

            # compute face landmark points
            landmarks = self._calibrate_landmarks(boxes, landmarks)
            landmarks = torch.stack([landmarks[:, :5], landmarks[:, 5:10]], 2)
            boxes = self._calibrate_box(boxes, box_regs)
            boxes = self._refine_boxes(boxes, width, height)

            # nms
            keep = func.nms(boxes.cpu().numpy(),
                            scores.cpu().numpy(),
                            nms_threshold,
                            device=self.device)
            boxes = boxes[keep]
            landmarks = landmarks[keep]

        return boxes, landmarks
Beispiel #5
0
    def stage_two(self, img, boxes, threshold, nms_threshold):

        # no candidate face found.
        if boxes.shape[0] == 0:
            return boxes

        width = img.shape[2]
        height = img.shape[3]

        boxes = self._convert_to_square(boxes)
        boxes = self._refine_boxes(boxes, width, height)

        # get candidate faces
        candidate_faces = list()

        for box in boxes:
            im = img[:, :, box[1]:box[3], box[0]:box[2]]
            im = torch.nn.functional.interpolate(im,
                                                 size=(24, 24),
                                                 mode='bilinear')
            candidate_faces.append(im)

        candidate_faces = torch.cat(candidate_faces, 0)

        # rnet forward pass
        p_distribution, box_regs, _ = self.rnet(candidate_faces)

        # filter negative boxes
        scores = p_distribution[:, 1]
        mask = (scores >= threshold)
        boxes = boxes[mask]
        box_regs = box_regs[mask]
        scores = scores[mask]

        if boxes.shape[0] > 0:
            boxes = self._calibrate_box(boxes, box_regs)
            # nms
            keep = func.nms(boxes.cpu().numpy(),
                            scores.cpu().numpy(),
                            nms_threshold,
                            device=self.device)
            boxes = boxes[keep]
        return boxes
Beispiel #6
0
    def stage_three(self, imgs, boxes, threshold, nms_threshold):
        # no candidate face found.
        if boxes.shape[0] == 0:
            return boxes, torch.empty(0, device=self.device, dtype=torch.int32)

        width = imgs.shape[2]
        height = imgs.shape[3]

        labels = boxes[:, -1]
        boxes = boxes[:, :4]

        num_img = imgs.shape[0]

        # get candidate faces
        candidate_faces = list()

        for box, label in zip(boxes, labels):
            im = imgs[label, :, box[1]: box[3], box[0]: box[2]].unsqueeze(0)
            im = torch.nn.functional.interpolate(
                im, size=(48, 48), mode='bilinear')
            candidate_faces.append(im)
        
        candidate_faces = torch.cat(candidate_faces, 0)

        p_distribution, box_regs, landmarks = self.onet(candidate_faces)

        # filter negative boxes
        scores = p_distribution[:, 1]
        mask = (scores >= threshold)
        boxes = boxes[mask]
        box_regs = box_regs[mask]
        scores = scores[mask]
        landmarks = landmarks[mask]
        labels =labels[mask]

        if boxes.shape[0] != 0:

            # compute face landmark points
            landmarks = self._calibrate_landmarks(boxes, landmarks)
            landmarks = torch.stack([landmarks[:, :5], landmarks[:, 5:10]], 2)

            boxes = self._calibrate_box(boxes, box_regs)
            boxes = self._refine_boxes(boxes, width, height)

            final_boxes = torch.empty(0, dtype=torch.int32, device=self.device)
            final_img_labels = torch.empty(0, dtype=torch.int32, device=self.device)
            final_landmarks = torch.empty(0, dtype=torch.int32, device=self.device)
            for i in range(num_img):
                
                # nms
                mask = labels == i
                keep = func.nms(boxes[mask].cpu().numpy(),
                            scores[mask].cpu().numpy(), nms_threshold)
                final_boxes = torch.cat([final_boxes, boxes[mask][keep]])
                final_img_labels = torch.cat([final_img_labels, labels[mask][keep]])

                # compute face landmark points
                landm = landmarks  [mask][keep]
                final_landmarks = torch.cat([final_landmarks, landm])

            return torch.cat([final_boxes, final_img_labels.unsqueeze(1 )], -1), final_landmarks

        else:
            return boxes, landmarks
Beispiel #7
0
    def stage_one(self, imgs, threshold, factor, minsize, nms_threshold):
        """Stage one of mtcnn detection.
        
        Args:
            imgs (torch.FloatTensro): Output of "_preprocess" method.
            threshold (float): The minimum probability of reserve bounding boxes.
            factor (float): Image pyramid scaling ratio.
            minsize (int): The minimum size of reserve bounding boxes.
            nms_threshold (float): retain boxes that satisfy overlap <= thresh
        
        Returns:
            torch.IntTensor: All bounding boxes with image label output by stage one detection. [n, 5]
        """

        width = imgs.shape[-2]
        height = imgs.shape[-1]
        num_img = imgs.shape[0]

        # Compute valid scales
        scales = []
        cur_width = width
        cur_height = height
        cur_factor = 1
        while cur_width >= 12 and cur_height >= 12:
            if 12 / cur_factor >= minsize:  # Ignore boxes that smaller than minsize

                w = cur_width
                h = cur_height
                scales.append((w, h, cur_factor))

            cur_factor *= factor
            cur_width = math.ceil(cur_width * factor)
            cur_height = math.ceil(cur_height * factor)

        # Get candidate boxesi ph
        candidate_boxes = torch.empty(0, dtype=torch.int32, device=self.device)
        candidate_scores = torch.empty(0, device=self.device)
        candidate_offsets = torch.empty(
            0, dtype=torch.float32, device=self.device)
        all_img_labels = torch.empty(0, dtype=torch.int32, device=self.device)
        for w, h, f in scales:
            resize_img = torch.nn.functional.interpolate(
                imgs, size=(w, h), mode='bilinear')
            p_distribution, box_regs, _ = self.pnet(resize_img)

            candidate, scores, offsets, img_labels = self._generate_bboxes(
                p_distribution, box_regs, f, threshold)

            candidate_boxes = torch.cat([candidate_boxes, candidate])
            candidate_scores = torch.cat([candidate_scores, scores])
            candidate_offsets = torch.cat([candidate_offsets, offsets])
            all_img_labels = torch.cat([all_img_labels, img_labels])

        
        if candidate_boxes.shape[0] != 0:
            candidate_boxes = self._calibrate_box(
                candidate_boxes, candidate_offsets)
            candidate_boxes = self._convert_to_square(candidate_boxes)
            candidate_boxes = self._refine_boxes(
                candidate_boxes, width, height)
            
            final_boxes = torch.empty(0, dtype=torch.int32, device=self.device)
            final_img_labels = torch.empty(0, dtype=torch.int32, device=self.device)
            for i in range(num_img):
                mask = all_img_labels == i
                keep = func.nms(candidate_boxes[mask].cpu().numpy(),
                            candidate_scores[mask].cpu().numpy(), nms_threshold)
                final_boxes = torch.cat([final_boxes, candidate_boxes[mask][keep]])
                final_img_labels = torch.cat([final_img_labels, all_img_labels[mask][keep]])

            return torch.cat([final_boxes, final_img_labels.unsqueeze(1 )], -1)
        else:
            return candidate_boxes