Exemple #1
0
    def detect_pnet(self, im):
        """Get face candidates through pnet

        Parameters:
        ----------
        im: numpy array
            input image array
            one batch

        Returns:
        -------
        boxes: numpy array
            detected boxes before calibration
        boxes_align: numpy array
            boxes after calibration
        """

        # im = self.unique_image_format(im)

        # original wider face data
        h, w, c = im.shape

        net_size = 12

        current_scale = float(
            net_size) / self.min_face_size  # find initial scale
        # print('imgshape:{0}, current_scale:{1}'.format(im.shape, current_scale))
        im_resized = self.resize_image(im, current_scale)  # scale = 1.0
        current_height, current_width, _ = im_resized.shape

        # fcn
        all_boxes = list()
        i = 0
        while min(current_height, current_width) > net_size:
            # print(i)
            feed_imgs = []
            image_tensor = image_tools.convert_image_to_tensor(im_resized)
            feed_imgs.append(image_tensor)
            feed_imgs = torch.stack(feed_imgs)
            feed_imgs = Variable(feed_imgs)

            if self.pnet_detector.use_cuda:
                feed_imgs = feed_imgs.cuda()

            # self.pnet_detector is a trained pnet torch model

            # receptive field is 12×12
            # 12×12 --> score
            # 12×12 --> bounding box
            cls_map, reg = self.pnet_detector(feed_imgs)

            cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy(
                cls_map.cpu())
            reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu())

            # del cls_map
            # del reg
            # del feed_imgs
            # print(cls_map_np.shape, reg_np.shape) # cls_map_np = (1, n, m, 1) reg_np.shape = (1, n, m 4)
            # time.sleep(5)
            # landmark_np = image_tools.convert_chwTensor_to_hwcNumpy(landmark.cpu())

            # self.threshold[0] = 0.6
            # print(cls_map_np[0,:,:].shape)
            # time.sleep(4)

            # boxes = [x1, y1, x2, y2, score, reg]
            boxes = self.generate_bounding_box(cls_map_np[0, :, :], reg_np,
                                               current_scale, self.thresh[0])

            # generate pyramid images
            current_scale *= self.scale_factor  # self.scale_factor = 0.709
            im_resized = self.resize_image(im, current_scale)
            current_height, current_width, _ = im_resized.shape

            if boxes.size == 0:
                continue

            # non-maximum suppresion
            keep = utils.nms(boxes[:, :5], 0.5, 'Union')
            boxes = boxes[keep]
            # print(boxes.shape)
            all_boxes.append(boxes)
            # i+=1

        if len(all_boxes) == 0:
            return None, None

        all_boxes = np.vstack(all_boxes)
        # print("shape of all boxes {0}".format(all_boxes.shape))
        # time.sleep(5)

        # merge the detection from first stage
        keep = utils.nms(all_boxes[:, 0:5], 0.7, 'Union')
        all_boxes = all_boxes[keep]
        # boxes = all_boxes[:, :5]

        # x2 - x1
        # y2 - y1
        bw = all_boxes[:, 2] - all_boxes[:, 0] + 1
        bh = all_boxes[:, 3] - all_boxes[:, 1] + 1

        # landmark_keep = all_boxes[:, 9:].reshape((5,2))

        boxes = np.vstack([
            all_boxes[:, 0],
            all_boxes[:, 1],
            all_boxes[:, 2],
            all_boxes[:, 3],
            all_boxes[:, 4],
            # all_boxes[:, 0] + all_boxes[:, 9] * bw,
            # all_boxes[:, 1] + all_boxes[:,10] * bh,
            # all_boxes[:, 0] + all_boxes[:, 11] * bw,
            # all_boxes[:, 1] + all_boxes[:, 12] * bh,
            # all_boxes[:, 0] + all_boxes[:, 13] * bw,
            # all_boxes[:, 1] + all_boxes[:, 14] * bh,
            # all_boxes[:, 0] + all_boxes[:, 15] * bw,
            # all_boxes[:, 1] + all_boxes[:, 16] * bh,
            # all_boxes[:, 0] + all_boxes[:, 17] * bw,
            # all_boxes[:, 1] + all_boxes[:, 18] * bh
        ])

        boxes = boxes.T

        # boxes = boxes = [x1, y1, x2, y2, score, reg] reg= [px1, py1, px2, py2] (in prediction)
        align_topx = all_boxes[:, 0] + all_boxes[:, 5] * bw
        align_topy = all_boxes[:, 1] + all_boxes[:, 6] * bh
        align_bottomx = all_boxes[:, 2] + all_boxes[:, 7] * bw
        align_bottomy = all_boxes[:, 3] + all_boxes[:, 8] * bh

        # refine the boxes
        boxes_align = np.vstack([
            align_topx,
            align_topy,
            align_bottomx,
            align_bottomy,
            all_boxes[:, 4],
            # align_topx + all_boxes[:,9] * bw,
            # align_topy + all_boxes[:,10] * bh,
            # align_topx + all_boxes[:,11] * bw,
            # align_topy + all_boxes[:,12] * bh,
            # align_topx + all_boxes[:,13] * bw,
            # align_topy + all_boxes[:,14] * bh,
            # align_topx + all_boxes[:,15] * bw,
            # align_topy + all_boxes[:,16] * bh,
            # align_topx + all_boxes[:,17] * bw,
            # align_topy + all_boxes[:,18] * bh,
        ])
        boxes_align = boxes_align.T

        return boxes, boxes_align
Exemple #2
0
    def detect_pnet(self, im):
        """Get face candidates through pnet

        Parameters:
        ----------
        im: numpy array
            input image array

        Returns:
        -------
        boxes: numpy array
            detected boxes before calibration
        boxes_align: numpy array
            boxes after calibration
        """

        # im = self.unique_image_format(im)

        h, w, c = im.shape
        net_size = 12

        current_scale = float(
            net_size) / self.min_face_size  # find initial scale
        im_resized = self.resize_image(im, current_scale)
        current_height, current_width, _ = im_resized.shape

        # fcn
        all_boxes = list()
        while min(current_height, current_width) > net_size:
            feed_imgs = []
            image_tensor = image_tools.convert_image_to_tensor(im_resized)
            feed_imgs.append(image_tensor)
            feed_imgs = torch.stack(feed_imgs).float()
            feed_imgs = Variable(feed_imgs)

            if self.pnet_detector.use_cuda:
                feed_imgs = feed_imgs.cuda()

            cls_map, reg = self.pnet_detector(feed_imgs)

            cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy(
                cls_map.cpu())
            reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu())
            # landmark_np = image_tools.convert_chwTensor_to_hwcNumpy(landmark.cpu())

            boxes = self.generate_bounding_box(cls_map_np[0, :, :], reg_np,
                                               current_scale, self.thresh[0])

            current_scale *= self.scale_factor
            im_resized = self.resize_image(im, current_scale)
            current_height, current_width, _ = im_resized.shape

            if boxes.size == 0:
                continue
            keep = utils.nms(boxes[:, :5], 0.5, 'Union')
            boxes = boxes[keep]
            all_boxes.append(boxes)

        if len(all_boxes) == 0:
            return None, None

        all_boxes = np.vstack(all_boxes)

        # merge the detection from first stage
        # keep = utils.nms(all_boxes[:, 0:5], 0.7, 'Union')
        # all_boxes = all_boxes[keep]
        # boxes = all_boxes[:, :5]

        bw = all_boxes[:, 2] - all_boxes[:, 0] + 1
        bh = all_boxes[:, 3] - all_boxes[:, 1] + 1

        # landmark_keep = all_boxes[:, 9:].reshape((5,2))

        boxes = np.vstack([
            all_boxes[:, 0],
            all_boxes[:, 1],
            all_boxes[:, 2],
            all_boxes[:, 3],
            all_boxes[:, 4],
            # all_boxes[:, 0] + all_boxes[:, 9] * bw,
            # all_boxes[:, 1] + all_boxes[:,10] * bh,
            # all_boxes[:, 0] + all_boxes[:, 11] * bw,
            # all_boxes[:, 1] + all_boxes[:, 12] * bh,
            # all_boxes[:, 0] + all_boxes[:, 13] * bw,
            # all_boxes[:, 1] + all_boxes[:, 14] * bh,
            # all_boxes[:, 0] + all_boxes[:, 15] * bw,
            # all_boxes[:, 1] + all_boxes[:, 16] * bh,
            # all_boxes[:, 0] + all_boxes[:, 17] * bw,
            # all_boxes[:, 1] + all_boxes[:, 18] * bh
        ])

        boxes = boxes.T

        align_topx = np.maximum(all_boxes[:, 0] + all_boxes[:, 5] * bw, 0)
        align_topy = np.maximum(all_boxes[:, 1] + all_boxes[:, 6] * bh, 0)
        align_bottomx = np.minimum(all_boxes[:, 2] + all_boxes[:, 7] * bw, w)
        align_bottomy = np.minimum(all_boxes[:, 3] + all_boxes[:, 8] * bh, h)

        # refine the boxes
        boxes_align = np.vstack([
            align_topx,
            align_topy,
            align_bottomx,
            align_bottomy,
            all_boxes[:, 4],
            # align_topx + all_boxes[:,9] * bw,
            # align_topy + all_boxes[:,10] * bh,
            # align_topx + all_boxes[:,11] * bw,
            # align_topy + all_boxes[:,12] * bh,
            # align_topx + all_boxes[:,13] * bw,
            # align_topy + all_boxes[:,14] * bh,
            # align_topx + all_boxes[:,15] * bw,
            # align_topy + all_boxes[:,16] * bh,
            # align_topx + all_boxes[:,17] * bw,
            # align_topy + all_boxes[:,18] * bh,
        ])
        boxes_align = boxes_align.T
        keep = utils.nms(boxes_align, 0.7, 'Union')
        boxes_align = boxes_align[keep]
        return boxes, boxes_align
    def detect_pnet(self, im):
        """Get face candidates through pnet

        Parameters:
        ----------
        im: numpy array, input image array, one batch

        Returns:
        -------
        boxes: numpy array, detected boxes before calibration
        boxes_align: numpy array, boxes after calibration
        """

        # im = self.unique_image_format(im)
        h, w, c = im.shape
        net_size = 12

        current_scale = float(net_size) / self.min_face_size  # scale = 1.0
        im_resized = self.resize_image(im, current_scale)
        current_height, current_width, _ = im_resized.shape

        all_boxes = list()
        while min(current_height, current_width) > net_size:

            feed_imgs = []
            image_tensor = image_tools.convert_image_to_tensor(im_resized)
            feed_imgs.append(image_tensor)
            feed_imgs = Variable(torch.stack(feed_imgs))

            try:
                if self.pnet_detector.module.use_cuda:  # Multi-GPUs
                    feed_imgs = feed_imgs.cuda()
            except:
                if self.pnet_detector.use_cuda:  # Single-GPU or CPU
                    feed_imgs = feed_imgs.cuda()

            cls_map, reg, _ = self.pnet_detector(
                feed_imgs)  # CORE, Don't look landmark

            cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy(
                cls_map.cpu())
            reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu())

            # boxes = [x1, y1, x2, y2, score, reg]
            boxes = self.generate_bbox(cls_map_np[0, :, :], reg_np,
                                       current_scale, self.args.prob_thres[0])

            # generate pyramid images
            current_scale *= self.scale_factor  # self.scale_factor = 0.709
            im_resized = self.resize_image(im, current_scale)
            current_height, current_width, _ = im_resized.shape

            if boxes.size == 0:
                continue

            # non-maximum suppresion
            keep = utils.nms(boxes[:, :5], 0.5, 'Union')
            boxes = boxes[keep]
            all_boxes.append(boxes)

        if len(all_boxes) == 0:
            return None

        all_boxes = np.vstack(all_boxes)

        keep = utils.nms(all_boxes[:, :5], 0.7, 'Union')
        all_boxes = all_boxes[keep]

        bw = all_boxes[:, 2] - all_boxes[:, 0] + 1
        bh = all_boxes[:, 3] - all_boxes[:, 1] + 1

        # all_boxes = [x1, y1, x2, y2, score, reg]
        align_x1 = all_boxes[:, 0] + all_boxes[:, 5] * bw
        align_y1 = all_boxes[:, 1] + all_boxes[:, 6] * bh
        align_x2 = all_boxes[:, 2] + all_boxes[:, 7] * bw
        align_y2 = all_boxes[:, 3] + all_boxes[:, 8] * bh

        boxes_align = np.vstack([
            align_x1,
            align_y1,
            align_x2,
            align_y2,
            all_boxes[:, 4],
        ]).T

        return boxes_align
Exemple #4
0
    def detect_pnet(self, im):
        """Get face candidates through pnet

        Parameters:
        ----------
        im: numpy array
            input image array
            one batch

        Returns:
        -------
        boxes: numpy array
            detected boxes before calibration
        boxes_align: numpy array
            boxes after calibration
        """

        # im = self.unique_image_format(im)

        # original wider face data
        h, w, c = im.shape
        net_size = 12

        current_scale = float(
            net_size) / self.min_face_size  # find initial scale
        #print('imgshape:{0}, current_scale:{1}'.format(im.shape, current_scale))
        im_resized = self.resize_image(im, current_scale)  # scale = 1.0
        current_height, current_width, _ = im_resized.shape
        # fcn
        all_boxes = list()
        while min(current_height, current_width) > net_size:
            #print('current:',current_height, current_width)
            feed_imgs = []
            image_tensor = image_tools.convert_image_to_tensor(im_resized)
            feed_imgs.append(image_tensor)
            feed_imgs = torch.stack(feed_imgs)

            feed_imgs = Variable(feed_imgs)

            if self.pnet_detector.use_cuda:
                feed_imgs = feed_imgs.cuda()

            # self.pnet_detector is a trained pnet torch model

            # receptive field is 12×12
            # 12×12 --> score
            # 12×12 --> bounding box
            cls_map, reg = self.pnet_detector(feed_imgs)

            cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy(
                cls_map.cpu())
            reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu())
            # print(cls_map_np.shape, reg_np.shape) # cls_map_np = (1, n, m, 1) reg_np.shape = (1, n, m 4)
            # time.sleep(5)
            # landmark_np = image_tools.convert_chwTensor_to_hwcNumpy(landmark.cpu())

            # self.threshold[0] = 0.6
            # print(cls_map_np[0,:,:].shape)
            # time.sleep(4)

            # boxes = [x1, y1, x2, y2, score, reg]
            boxes = self.generate_bounding_box(cls_map_np[0, :, :], reg_np,
                                               current_scale, self.thresh[0])
            #cv2.rectangle(im,(300,100),(400,200),color=(0,0,0))
            #cv2.rectangle(im,(400,200),(500,300),color=(0,0,0))

            # generate pyramid images
            current_scale *= self.scale_factor  # self.scale_factor = 0.709
            im_resized = self.resize_image(im, current_scale)
            current_height, current_width, _ = im_resized.shape

            if boxes.size == 0:
                continue

            # non-maximum suppresion
            keep = utils.nms(boxes[:, :5], 0.5, 'Union')
            boxes = boxes[keep]
            all_boxes.append(boxes)
            """ img = im.copy()
            bw = boxes[:,2]-boxes[:,0]
            bh = boxes[:,3]-boxes[:,1]
            for i in range(boxes.shape[0]):
                p1=(int(boxes[i][0]+boxes[i][5]*bw[i]),int(boxes[i][1]+boxes[i][6]*bh[i]))
                p2=(int(boxes[i][2]+boxes[i][7]*bw[i]),int(boxes[i][3]+boxes[i][8]*bh[i]))
                cv2.rectangle(img,p1,p2,color=(0,0,0))
            cv2.imshow('ss',img)
            cv2.waitKey(0)
            #ii+=1
        exit() """

        if len(all_boxes) == 0:
            return None, None
        all_boxes = np.vstack(all_boxes)
        # print("shape of all boxes {0}".format(all_boxes.shape))
        # time.sleep(5)

        # merge the detection from first stage
        keep = utils.nms(all_boxes[:, 0:5], 0.7, 'Union')
        all_boxes = all_boxes[keep]
        # boxes = all_boxes[:, :5]

        # x2 - x1
        # y2 - y1
        bw = all_boxes[:, 2] - all_boxes[:, 0] + 1
        bh = all_boxes[:, 3] - all_boxes[:, 1] + 1

        # landmark_keep = all_boxes[:, 9:].reshape((5,2))

        boxes = np.vstack([
            all_boxes[:, 0],
            all_boxes[:, 1],
            all_boxes[:, 2],
            all_boxes[:, 3],
            all_boxes[:, 4],
            # all_boxes[:, 0] + all_boxes[:, 9] * bw,
            # all_boxes[:, 1] + all_boxes[:,10] * bh,
            # all_boxes[:, 0] + all_boxes[:, 11] * bw,
            # all_boxes[:, 1] + all_boxes[:, 12] * bh,
            # all_boxes[:, 0] + all_boxes[:, 13] * bw,
            # all_boxes[:, 1] + all_boxes[:, 14] * bh,
            # all_boxes[:, 0] + all_boxes[:, 15] * bw,
            # all_boxes[:, 1] + all_boxes[:, 16] * bh,
            # all_boxes[:, 0] + all_boxes[:, 17] * bw,
            # all_boxes[:, 1] + all_boxes[:, 18] * bh
        ])

        boxes = boxes.T

        # boxes = boxes = [x1, y1, x2, y2, score, reg] reg= [px1, py1, px2, py2] (in prediction)
        align_topx = all_boxes[:, 0] + all_boxes[:, 5] * bw
        align_topy = all_boxes[:, 1] + all_boxes[:, 6] * bh
        align_bottomx = all_boxes[:, 2] + all_boxes[:, 7] * bw
        align_bottomy = all_boxes[:, 3] + all_boxes[:, 8] * bh

        # refine the boxes
        boxes_align = np.vstack([
            align_topx,
            align_topy,
            align_bottomx,
            align_bottomy,
            all_boxes[:, 4],
            # align_topx + all_boxes[:,9] * bw,
            # align_topy + all_boxes[:,10] * bh,
            # align_topx + all_boxes[:,11] * bw,
            # align_topy + all_boxes[:,12] * bh,
            # align_topx + all_boxes[:,13] * bw,
            # align_topy + all_boxes[:,14] * bh,
            # align_topx + all_boxes[:,15] * bw,
            # align_topy + all_boxes[:,16] * bh,
            # align_topx + all_boxes[:,17] * bw,
            # align_topy + all_boxes[:,18] * bh,
        ])
        boxes_align = boxes_align.T

        #remove invalid box
        valindex = [True for _ in range(boxes_align.shape[0])]
        for i in range(boxes_align.shape[0]):
            if boxes_align[i][2] - boxes_align[i][0] <= 3 or boxes_align[i][
                    3] - boxes_align[i][1] <= 3:
                valindex[i] = False
                print('pnet has one smaller than 3')
            else:
                if boxes_align[i][2] < 1 or boxes_align[i][
                        0] > w - 2 or boxes_align[i][3] < 1 or boxes_align[i][
                            1] > h - 2:
                    valindex[i] = False
                    print('pnet has one out')
        boxes_align = boxes_align[valindex, :]
        boxes = boxes[valindex, :]
        return boxes, boxes_align