Example #1
0
    def detect_pnet(self, im):
        """Get face candidates through pnet

        Parameters:
        -----------
        im: numpy array, input image array

        Returns:
        --------
        bboxes_align: numpy array
            bboxes after calibration
        """
        h, w, c = im.shape
        net_size = config.PNET_SIZE
        current_scale = float(
            net_size) / self.min_face_size  # find initial scale
        im_resized = self.resize_image(im, current_scale)
        current_height, current_width, _ = im_resized.shape

        # bounding boxes for all the pyramid scales
        all_bboxes = list()
        # generating bounding boxes for each scale
        while min(current_height, current_width) > net_size:
            image_tensor = utils.convert_image_to_tensor(im_resized)
            feed_imgs = image_tensor.unsqueeze(0)
            feed_imgs = feed_imgs.to(self.device)

            cls_map, reg_map = self.pnet_detector(feed_imgs)
            cls_map_np = utils.convert_chwTensor_to_hwcNumpy(cls_map.cpu())
            reg_map_np = utils.convert_chwTensor_to_hwcNumpy(reg_map.cpu())
            bboxes = self.generate_bounding_box(cls_map_np, reg_map_np,
                                                current_scale, self.thresh[0])

            current_scale *= self.scale_factor
            im_resized = self.resize_image(im, current_scale)
            current_height, current_width, _ = im_resized.shape

            if bboxes.size == 0:
                continue

            keep = utils.nms(bboxes[:, :5], 0.5, 'Union')
            bboxes = bboxes[keep]
            all_bboxes.append(bboxes)

        if len(all_bboxes) == 0:
            return None

        all_bboxes = np.vstack(all_bboxes)

        # apply nms to the detections from all the scales
        keep = utils.nms(all_bboxes[:, 0:5], 0.7, 'Union')
        all_bboxes = all_bboxes[keep]

        # 0-4: original bboxes, 5: score, 5: offsets
        bboxes_align = utils.calibrate_box(all_bboxes[:, 0:5], all_bboxes[:,
                                                                          5:])
        bboxes_align = utils.convert_to_square(bboxes_align)
        bboxes_align[:, 0:4] = np.round(bboxes_align[:, 0:4])

        return bboxes_align
Example #2
0
    def detect_onet(self, im, bboxes):
        """Get face candidates using onet

        Parameters:
        ----------
        im: numpy array
            input image array
        bboxes: numpy array
            detection results of rnet

        Returns:
        -------
        bboxes_align: numpy array
            bboxes after calibration
        """
        net_size = config.ONET_SIZE
        h, w, c = im.shape
        if bboxes is None:
            return None

        [dy, edy, dx, edx, y, ey, x, ex, tmpw,
         tmph] = utils.correct_bboxes(bboxes, w, h)
        num_bboxes = bboxes.shape[0]

        # crop face using rnet proposal
        cropped_ims_tensors = []
        for i in range(num_bboxes):
            try:
                if tmph[i] > 0 and tmpw[i] > 0:
                    tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
                    tmp[dy[i]:edy[i], dx[i]:edx[i], :] = im[y[i]:ey[i],
                                                            x[i]:ex[i], :]
                    crop_im = cv2.resize(tmp, (net_size, net_size))
                    crop_im_tensor = utils.convert_image_to_tensor(crop_im)
                    cropped_ims_tensors.append(crop_im_tensor)
            except ValueError as e:
                print(e)

        feed_imgs = torch.stack(cropped_ims_tensors)
        feed_imgs = feed_imgs.to(self.device)

        cls, reg = self.onet_detector(feed_imgs)
        cls = cls.cpu().data.numpy()
        reg = reg.cpu().data.numpy()

        keep_inds = np.where(cls[:, 1] > self.thresh[2])[0]
        if len(keep_inds) > 0:
            keep_bboxes = bboxes[keep_inds]
            keep_cls = cls[keep_inds, :]
            keep_reg = reg[keep_inds]
            keep_bboxes[:, 4] = keep_cls[:, 1].reshape((-1, ))
        else:
            return None

        bboxes_align = utils.calibrate_box(keep_bboxes, keep_reg)
        keep = utils.nms(bboxes_align, 0.7, mode='Minimum')

        if len(keep) == 0:
            return None

        bboxes_align = bboxes_align[keep]
        bboxes_align = utils.convert_to_square(bboxes_align)
        return bboxes_align
Example #3
0
    def detect_rnet(self, im, bboxes):
        """Get face candidates using rnet

        Parameters:
        ----------
        im: numpy array
            input image array
        bboxes: numpy array
            detection results of pnet

        Returns:
        -------
        bboxes_align: numpy array
            bboxes after calibration
        """
        net_size = config.RNET_SIZE
        h, w, c = im.shape
        if bboxes is None:
            return None

        num_bboxes = bboxes.shape[0]

        [dy, edy, dx, edx, y, ey, x, ex, tmpw,
         tmph] = utils.correct_bboxes(bboxes, w, h)

        # crop face using pnet proposals
        cropped_ims_tensors = []
        for i in range(num_bboxes):
            try:
                if tmph[i] > 0 and tmpw[i] > 0:
                    tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
                    tmp[dy[i]:edy[i], dx[i]:edx[i], :] = im[y[i]:ey[i],
                                                            x[i]:ex[i], :]
                    crop_im = cv2.resize(tmp, (net_size, net_size))
                    crop_im_tensor = utils.convert_image_to_tensor(crop_im)
                    cropped_ims_tensors.append(crop_im_tensor)
            except ValueError as e:
                print('dy: {}, edy: {}, dx: {}, edx: {}'.format(
                    dy[i], edy[i], dx[i], edx[i]))
                print('y: {}, ey: {}, x: {}, ex: {}'.format(
                    y[i], ey[i], x[i], ex[i]))
                print(e)

        # provide input tensor, if there are too many proposals in PNet
        # there might be OOM
        feed_imgs = torch.stack(cropped_ims_tensors)
        feed_imgs = feed_imgs.to(self.device)

        cls, reg = self.rnet_detector(feed_imgs)
        cls = cls.cpu().data.numpy()
        reg = reg.cpu().data.numpy()

        keep_inds = np.where(cls[:, 1] > self.thresh[1])[0]
        if len(keep_inds) > 0:
            keep_bboxes = bboxes[keep_inds]
            keep_cls = cls[keep_inds, :]
            keep_reg = reg[keep_inds]
            # using softmax 1 as cls score
            keep_bboxes[:, 4] = keep_cls[:, 1].reshape((-1, ))
        else:
            return None

        keep = utils.nms(keep_bboxes, 0.7)
        if len(keep) == 0:
            return None

        keep_cls = keep_cls[keep]
        keep_bboxes = keep_bboxes[keep]
        keep_reg = keep_reg[keep]

        bboxes_align = utils.calibrate_box(keep_bboxes, keep_reg)
        bboxes_align = utils.convert_to_square(bboxes_align)
        bboxes_align[:, 0:4] = np.round(bboxes_align[:, 0:4])

        return bboxes_align