Exemple #1
0
    def detect_pnet(self, im):
        """Get face candidates through pnet

        Parameters:
        -----------
        im: numpy array, input image array

        Returns:
        --------
        bboxes_align: numpy array
            bboxes after calibration
        """
        h, w, c = im.shape
        net_size = config.PNET_SIZE
        current_scale = float(
            net_size) / self.min_face_size  # find initial scale
        im_resized = self.resize_image(im, current_scale)
        current_height, current_width, _ = im_resized.shape

        # bounding boxes for all the pyramid scales
        all_bboxes = list()
        # generating bounding boxes for each scale
        while min(current_height, current_width) > net_size:
            image_tensor = utils.convert_image_to_tensor(im_resized)
            feed_imgs = image_tensor.unsqueeze(0)
            feed_imgs = feed_imgs.to(self.device)

            cls_map, reg_map = self.pnet_detector(feed_imgs)
            cls_map_np = utils.convert_chwTensor_to_hwcNumpy(cls_map.cpu())
            reg_map_np = utils.convert_chwTensor_to_hwcNumpy(reg_map.cpu())
            bboxes = self.generate_bounding_box(cls_map_np, reg_map_np,
                                                current_scale, self.thresh[0])

            current_scale *= self.scale_factor
            im_resized = self.resize_image(im, current_scale)
            current_height, current_width, _ = im_resized.shape

            if bboxes.size == 0:
                continue

            keep = utils.nms(bboxes[:, :5], 0.5, 'Union')
            bboxes = bboxes[keep]
            all_bboxes.append(bboxes)

        if len(all_bboxes) == 0:
            return None

        all_bboxes = np.vstack(all_bboxes)

        # apply nms to the detections from all the scales
        keep = utils.nms(all_bboxes[:, 0:5], 0.7, 'Union')
        all_bboxes = all_bboxes[keep]

        # 0-4: original bboxes, 5: score, 5: offsets
        bboxes_align = utils.calibrate_box(all_bboxes[:, 0:5], all_bboxes[:,
                                                                          5:])
        bboxes_align = utils.convert_to_square(bboxes_align)
        bboxes_align[:, 0:4] = np.round(bboxes_align[:, 0:4])

        return bboxes_align
Exemple #2
0
    def __rnet_detect(self, image, pnet_boxes):
        #   创建空列表,存放抠图
        _img_dataset = []
        #   给p网络输出的框找出中心点,沿着最大边长的两边扩充成正方形再抠图
        _pnet_boxes = utils.convert_to_square(pnet_boxes)
        #   遍历每个框,每个框返回框四个坐标点,抠图,缩放。数据类型转换,添加列表
        for _box in _pnet_boxes:
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])
            #   根据四个点的坐标抠图
            img = image.crop((_x1, _x2, _y1, _y2))
            #   缩放固定尺寸
            img = img.resize((24, 24))
            #   将图片数组转化为张量
            img_data = self.__image_transform(img)
            _img_dataset.append(img_data)
        #   stack堆叠(默认在0轴),此处相当数据类型转换
        img_dataset = torch.stack(_img_dataset)
        #   加入cuda计算
        if self.isCuda:
            img_dataset = img_dataset.cuda()

        #   将24 * 24 的图片传入网络再进行一次筛选
        _cls, _offset = self.rnet(img_dataset)
        #   将gpu上的数据放在cpu上去,再转成数组numpy
        cls = _cls.cpu().data.numpy()
        offset = _offset.cpu().data.numpy()
        # print("r_cls:", cls.shape)      # (11,1):p网络生成了11个框
        # print("r_offset", offset)       # (11,4)
        boxes = []  # R网络要留下来的框,存到boxes里面
        idxs, _ = np.where(cls > r_cls)  # 原置信度0.6是偏低的
        #   根据索引,遍历符合条件的框;1轴上的索引恰为符合条件的置信度索引
        for idx in idxs:
            _box = _pnet_boxes[idx]
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])
            #   基准框的宽
            ow = _x2 - _x1
            oh = _y2 - _y1
            #   实际框的坐标点
            x1 = _x1 + ow * offset[idx][0]
            y1 = _y1 + oh * offset[idx][1]
            x2 = _x2 + ow * offset[idx][2]
            y2 = _y2 + oh * offset[idx][3]
            #   返回4个坐标点和置信度
            boxes.append([x1, y1, x2, y2, cls[idx][0]])
        #   原r_nms为0.5
        return utils.nms(np.array(boxes), r_nms)
Exemple #3
0
    def __onet_detect(self, image, rnet_boxes):
        _img_dataset = []
        if rnet_boxes is None:
            return
        _rnet_boxes = utils.convert_to_square(rnet_boxes)
        # tt = time.process_time()
        for _box in _rnet_boxes:
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            if _x1 <= 0 or _y1 <= 0 or _x2 <= 0 or _y2 <= 0 or _x1 > _x2 or _y1 > _y2:
                continue
            img = image[_y1:_y2, _x1:_x2]
            if img.shape[0] <= 0 or img.shape[1] <= 0:
                continue
            img = cv2.resize(img, (48, 48))
            img_data = self.transform(img) - 0.5
            _img_dataset.append(img_data)
        # ee = time.process_time()
        # print('Of time:', ee - tt)
        # if _img_dataset is None:
        #     return
        # try:
        img_dataset = torch.stack(_img_dataset, dim=0)
        img_dataset = img_dataset.cuda()
        aa = time.process_time()
        _cls, _offset, _iou = self.onet(img_dataset)
        bb = time.process_time()
        print('Onet time:', bb - aa)

        cls = _cls.cpu().data.numpy()
        offset = _offset.cpu().data.numpy()

        idxs, _ = np.where(cls > 0.9)

        _box = _rnet_boxes[idxs]
        _x1 = _rnet_boxes[idxs][:, 0]
        _y1 = _rnet_boxes[idxs][:, 1]
        _x2 = _rnet_boxes[idxs][:, 2]
        _y2 = _rnet_boxes[idxs][:, 3]
        bw = _x2 - _x1
        bh = _y2 - _y1
        x1 = _x1 + bw * offset[idxs][:, 0]
        y1 = _y1 + bh * offset[idxs][:, 1]
        x2 = _x2 + bw * offset[idxs][:, 2]
        y2 = _y2 + bh * offset[idxs][:, 3]
        conf = cls[idxs, 0]
        boxes = np.stack([x1, y1, x2, y2, conf], axis=1)

        return utils.nms(np.array(boxes), 0.3, isMin=True)
Exemple #4
0
    def __onet_detect(self, image, rnet_boxes):
        #   创建空列表,存储R网络的抠图
        _img_dataset = []
        #   将R网络输出的框沿最大的边长扩充成正方形
        _rnet_boxes = utils.convert_to_square(rnet_boxes)
        for _box in _rnet_boxes:
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])
            #   根据坐标点“抠图”
            img = image.crop((_x1, _x2, _y1, _y2))
            img = img.resize((48, 48))
            #   将图片转成张量
            img_data = self.__image_transform(img)
            _img_dataset.append(img_data)
        # 堆叠,相当于数据格式的转换
        img_dataset = torch.stack(_img_dataset)
        if self.isCuda:
            _img_dataset = _img_dataset.cuda()
        _cls, _offset = self.onet(img_dataset)
        cls = _cls.cpu().data.numpy()
        offset = _offset.cpu().data.numpy()
        #   存放O网络的计算结果
        boxes = []
        #   原o_cls为0.97,实际需要达到0.99999
        idxs, _ = np.where(cls > o_cls)
        #   根据索引,遍历符合条件的框
        for idx in idxs:
            #   以R网络为基准点
            _box = _rnet_boxes[idx]
            _x1 = int(_box[0])
            _y1 = int(_box[1])
            _x2 = int(_box[2])
            _y2 = int(_box[3])

            #   框的基准宽和高,框是方的
            ow = _x2 - _x1
            oh = _y2 - _y1

            #   O网络最终生成的框的坐标
            x1 = _x1 + ow * offset[idx][0]
            y1 = _y1 + oh * offset[idx][1]
            x2 = _x2 + ow * offset[idx][2]
            y2 = _y2 + oh * offset[idx][3]

            boxes.append([x1, y1, x2, y2, cls[idx][0]])

            #   返回四个做i标点和一个置信度
            #   用最小面积的IOU(原o_nms(IOU)小于0.7的框被保留下来)
        return utils.nms(np.array(boxes), o_nms, isMin=True)
Exemple #5
0
    def detect_onet(self, im, dets):
        """Get face candidates using onet

        Parameters:
        ----------
        im: numpy array
            input image array
        dets: numpy array
            detection results of rnet

        Returns:
        -------
        boxes_align: numpy array
            boxes after calibration
        landmarks_align: numpy array
            landmarks after calibration

        """
        h, w, c = im.shape
        if dets is None:
            return None, None

        dets = utils.convert_to_square(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])

        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
        num_boxes = dets.shape[0]

        cropped_ims_tensors = []
        for i in range(num_boxes):
            try:
                if tmph[i] > 0 and tmpw[i] > 0:
                    tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
                    tmp[dy[i]:edy[i], dx[i]:edx[i], :] = im[y[i]:ey[i], x[i]:ex[i], :]
                    crop_im = cv2.resize(tmp, (48, 48))
                    crop_im_tensor = convert_image_to_tensor(crop_im)
                    cropped_ims_tensors.append(crop_im_tensor)
            except ValueError as e:
                print(e)

        feed_imgs = torch.stack(cropped_ims_tensors)

        feed_imgs = feed_imgs.to(self.device)

        cls_map, reg, landmark = self.onet_detector(feed_imgs)

        cls_map = cls_map.cpu().data.numpy()
        reg = reg.cpu().data.numpy()
        landmark = landmark.cpu().data.numpy()

        keep_inds = np.where(cls_map > self.thresh[2])[0]

        if len(keep_inds) > 0:
            boxes = dets[keep_inds]
            cls = cls_map[keep_inds]
            reg = reg[keep_inds]
            landmark = landmark[keep_inds]
        else:
            return None, None

        keep = utils.nms(boxes, 0.7, mode="Minimum")

        if len(keep) == 0:
            return None, None

        keep_cls = cls[keep]
        keep_boxes = boxes[keep]
        keep_reg = reg[keep]
        keep_landmark = landmark[keep]

        bw = keep_boxes[:, 2] - keep_boxes[:, 0]
        bh = keep_boxes[:, 3] - keep_boxes[:, 1]

        align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw
        align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh
        align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw
        align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh

        align_landmark_topx = keep_boxes[:, 0]
        align_landmark_topy = keep_boxes[:, 1]

        boxes_align = np.vstack([align_topx,
                                 align_topy,
                                 align_bottomx,
                                 align_bottomy,
                                 keep_cls[:, 0]
                                 ])

        boxes_align = boxes_align.T

        landmark = np.vstack([
            align_landmark_topx + keep_landmark[:, 0] * bw,
            align_landmark_topy + keep_landmark[:, 1] * bh,
            align_landmark_topx + keep_landmark[:, 2] * bw,
            align_landmark_topy + keep_landmark[:, 3] * bh,
            align_landmark_topx + keep_landmark[:, 4] * bw,
            align_landmark_topy + keep_landmark[:, 5] * bh,
            align_landmark_topx + keep_landmark[:, 6] * bw,
            align_landmark_topy + keep_landmark[:, 7] * bh,
            align_landmark_topx + keep_landmark[:, 8] * bw,
            align_landmark_topy + keep_landmark[:, 9] * bh,
        ])

        landmark_align = landmark.T

        return boxes_align, landmark_align
Exemple #6
0
    def detect_rnet(self, im, dets):
        """Get face candidates using rnet

        Parameters:
        ----------
        im: numpy array
            input image array
        dets: numpy array
            detection results of pnet

        Returns:
        -------
        boxes: numpy array
            detected boxes before calibration
        boxes_align: numpy array
            boxes after calibration
        """
        h, w, c = im.shape
        if dets is None:
            return None, None

        dets = utils.convert_to_square(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])

        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
        num_boxes = dets.shape[0]

        cropped_ims_tensors = []
        for i in range(num_boxes):
            try:
                if tmph[i] > 0 and tmpw[i] > 0:
                    tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
                    tmp[dy[i]:edy[i], dx[i]:edx[i], :] = im[y[i]:ey[i], x[i]:ex[i], :]
                    crop_im = cv2.resize(tmp, (24, 24))
                    crop_im_tensor = convert_image_to_tensor(crop_im)
                    # cropped_ims_tensors[i, :, :, :] = crop_im_tensor
                    cropped_ims_tensors.append(crop_im_tensor)
            except ValueError as e:
                print('dy: {}, edy: {}, dx: {}, edx: {}'.format(dy[i], edy[i], dx[i], edx[i]))
                print('y: {}, ey: {}, x: {}, ex: {}'.format(y[i], ey[i], x[i], ex[i]))
                print(e)

        feed_imgs = torch.stack(cropped_ims_tensors)

        feed_imgs = feed_imgs.to(self.device)

        cls_map, reg = self.rnet_detector(feed_imgs)
        cls_map = cls_map.cpu().data.numpy()
        reg = reg.cpu().data.numpy()

        keep_inds = np.where(cls_map > self.thresh[1])[0]

        if len(keep_inds) > 0:
            boxes = dets[keep_inds]
            cls = cls_map[keep_inds]
            reg = reg[keep_inds]
        else:
            return None, None

        keep = utils.nms(boxes, 0.7)
        if len(keep) == 0:
            return None, None

        keep_cls = cls[keep]
        keep_boxes = boxes[keep]
        keep_reg = reg[keep]
        bw = keep_boxes[:, 2] - keep_boxes[:, 0]
        bh = keep_boxes[:, 3] - keep_boxes[:, 1]
        boxes = np.vstack([keep_boxes[:, 0],
                           keep_boxes[:, 1],
                           keep_boxes[:, 2],
                           keep_boxes[:, 3],
                           keep_cls[:, 0]
                           ])
        align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw
        align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh
        align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw
        align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh

        boxes_align = np.vstack([align_topx,
                                 align_topy,
                                 align_bottomx,
                                 align_bottomy,
                                 keep_cls[:, 0]
                                 ])
        boxes = boxes.T
        boxes_align = boxes_align.T

        return boxes, boxes_align
Exemple #7
0
    def detect_onet(self, im, bboxes):
        """Get face candidates using onet

        Parameters:
        ----------
        im: numpy array
            input image array
        bboxes: numpy array
            detection results of rnet

        Returns:
        -------
        bboxes_align: numpy array
            bboxes after calibration
        """
        net_size = config.ONET_SIZE
        h, w, c = im.shape
        if bboxes is None:
            return None

        [dy, edy, dx, edx, y, ey, x, ex, tmpw,
         tmph] = utils.correct_bboxes(bboxes, w, h)
        num_bboxes = bboxes.shape[0]

        # crop face using rnet proposal
        cropped_ims_tensors = []
        for i in range(num_bboxes):
            try:
                if tmph[i] > 0 and tmpw[i] > 0:
                    tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
                    tmp[dy[i]:edy[i], dx[i]:edx[i], :] = im[y[i]:ey[i],
                                                            x[i]:ex[i], :]
                    crop_im = cv2.resize(tmp, (net_size, net_size))
                    crop_im_tensor = utils.convert_image_to_tensor(crop_im)
                    cropped_ims_tensors.append(crop_im_tensor)
            except ValueError as e:
                print(e)

        feed_imgs = torch.stack(cropped_ims_tensors)
        feed_imgs = feed_imgs.to(self.device)

        cls, reg = self.onet_detector(feed_imgs)
        cls = cls.cpu().data.numpy()
        reg = reg.cpu().data.numpy()

        keep_inds = np.where(cls[:, 1] > self.thresh[2])[0]
        if len(keep_inds) > 0:
            keep_bboxes = bboxes[keep_inds]
            keep_cls = cls[keep_inds, :]
            keep_reg = reg[keep_inds]
            keep_bboxes[:, 4] = keep_cls[:, 1].reshape((-1, ))
        else:
            return None

        bboxes_align = utils.calibrate_box(keep_bboxes, keep_reg)
        keep = utils.nms(bboxes_align, 0.7, mode='Minimum')

        if len(keep) == 0:
            return None

        bboxes_align = bboxes_align[keep]
        bboxes_align = utils.convert_to_square(bboxes_align)
        return bboxes_align
Exemple #8
0
    def detect_rnet(self, im, bboxes):
        """Get face candidates using rnet

        Parameters:
        ----------
        im: numpy array
            input image array
        bboxes: numpy array
            detection results of pnet

        Returns:
        -------
        bboxes_align: numpy array
            bboxes after calibration
        """
        net_size = config.RNET_SIZE
        h, w, c = im.shape
        if bboxes is None:
            return None

        num_bboxes = bboxes.shape[0]

        [dy, edy, dx, edx, y, ey, x, ex, tmpw,
         tmph] = utils.correct_bboxes(bboxes, w, h)

        # crop face using pnet proposals
        cropped_ims_tensors = []
        for i in range(num_bboxes):
            try:
                if tmph[i] > 0 and tmpw[i] > 0:
                    tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
                    tmp[dy[i]:edy[i], dx[i]:edx[i], :] = im[y[i]:ey[i],
                                                            x[i]:ex[i], :]
                    crop_im = cv2.resize(tmp, (net_size, net_size))
                    crop_im_tensor = utils.convert_image_to_tensor(crop_im)
                    cropped_ims_tensors.append(crop_im_tensor)
            except ValueError as e:
                print('dy: {}, edy: {}, dx: {}, edx: {}'.format(
                    dy[i], edy[i], dx[i], edx[i]))
                print('y: {}, ey: {}, x: {}, ex: {}'.format(
                    y[i], ey[i], x[i], ex[i]))
                print(e)

        # provide input tensor, if there are too many proposals in PNet
        # there might be OOM
        feed_imgs = torch.stack(cropped_ims_tensors)
        feed_imgs = feed_imgs.to(self.device)

        cls, reg = self.rnet_detector(feed_imgs)
        cls = cls.cpu().data.numpy()
        reg = reg.cpu().data.numpy()

        keep_inds = np.where(cls[:, 1] > self.thresh[1])[0]
        if len(keep_inds) > 0:
            keep_bboxes = bboxes[keep_inds]
            keep_cls = cls[keep_inds, :]
            keep_reg = reg[keep_inds]
            # using softmax 1 as cls score
            keep_bboxes[:, 4] = keep_cls[:, 1].reshape((-1, ))
        else:
            return None

        keep = utils.nms(keep_bboxes, 0.7)
        if len(keep) == 0:
            return None

        keep_cls = keep_cls[keep]
        keep_bboxes = keep_bboxes[keep]
        keep_reg = keep_reg[keep]

        bboxes_align = utils.calibrate_box(keep_bboxes, keep_reg)
        bboxes_align = utils.convert_to_square(bboxes_align)
        bboxes_align[:, 0:4] = np.round(bboxes_align[:, 0:4])

        return bboxes_align
Exemple #9
0
def get_rnet_sample_data(data_dir, anno_file, det_boxes_file, prefix_path):
    neg_save_dir = os.path.join(data_dir, "24/negative")
    pos_save_dir = os.path.join(data_dir, "24/positive")
    part_save_dir = os.path.join(data_dir, "24/part")

    for dir_path in [neg_save_dir, pos_save_dir, part_save_dir]:
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)

    # load ground truth from annotation file
    # format of each line: image/path [x1, y1, x2, y2] for each gt_box in this image
    with open(anno_file, 'r') as f:
        annotations = f.readlines()

    image_size = 24
    im_idx_list = list()
    gt_boxes_list = list()
    num_of_images = len(annotations)
    print("processing %d images in total" % num_of_images)
    for annotation in annotations:
        # for i in range(10):
        annotation = annotation.strip().split(' ')
        # annotation = annotations[i].strip().split(' ')
        im_idx = os.path.join(prefix_path, annotation[0])
        boxes = list(map(float, annotation[1:]))
        boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4)
        im_idx_list.append(im_idx)
        gt_boxes_list.append(boxes)

    save_path = common.ANNO_STORE_DIR
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    f1 = open(os.path.join(save_path, 'pos_%d.txt' % image_size), 'w')
    f2 = open(os.path.join(save_path, 'neg_%d.txt' % image_size), 'w')
    f3 = open(os.path.join(save_path, 'part_%d.txt' % image_size), 'w')

    det_handle = open(det_boxes_file, 'rb')
    det_boxes = pickle.load(det_handle)
    print(len(det_boxes), num_of_images)
    assert len(
        det_boxes) == num_of_images, "incorrect detections or ground truths"

    # index of neg, pos and part face, used as their image names
    n_idx = 0
    p_idx = 0
    d_idx = 0
    image_done = 0
    for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list):
        image_done += 1
        if image_done % 100 == 0:
            print("%d images done" % image_done)
        if dets.shape[0] == 0:
            continue
        img = cv2.imread(im_idx)
        dets = convert_to_square(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])

        # each image have at most 50 neg_samples
        cur_n_idx = 0
        for box in dets:
            x_left, y_top, x_right, y_bottom = box[0:4].astype(int)
            width = x_right - x_left
            height = y_bottom - y_top
            # ignore box that is too small or beyond image border
            if width < 20 or x_left <= 0 or y_top <= 0 or x_right >= img.shape[
                    1] or y_bottom >= img.shape[0]:
                continue
            # compute intersection over union(IoU) between current box and all gt boxes
            Iou = IoU(box, gts)
            cropped_im = img[y_top:y_bottom, x_left:x_right, :]
            resized_im = cv2.resize(cropped_im, (image_size, image_size),
                                    interpolation=cv2.INTER_LINEAR)
            # save negative images and write label

            if np.max(Iou) < 0.3:
                # Iou with all gts must below 0.3
                cur_n_idx += 1
                if cur_n_idx <= 50:
                    save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
                    f2.write(save_file + ' 0\n')
                    cv2.imwrite(save_file, resized_im)
                    n_idx += 1
            else:
                # find gt_box with the highest iou
                idx = np.argmax(Iou)
                assigned_gt = gts[idx]
                x1, y1, x2, y2 = assigned_gt

                # compute bbox reg label
                offset_x1 = (x1 - x_left) / float(width)
                offset_y1 = (y1 - y_top) / float(height)
                offset_x2 = (x2 - x_right) / float(width)
                offset_y2 = (y2 - y_bottom) / float(height)

                # save positive and part-face images and write labels
                if np.max(Iou) >= 0.65:
                    save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
                    f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' %
                             (offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    p_idx += 1

                elif np.max(Iou) >= 0.4:
                    save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
                    f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' %
                             (offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    d_idx += 1

    f1.close()
    f2.close()
    f3.close()
def get_onet_sample_data(data_dir, anno_file, det_boxs_file, prefix):
    neg_save_dir = os.path.join(data_dir, "48/negative")
    pos_save_dir = os.path.join(data_dir, "48/positive")
    part_save_dir = os.path.join(data_dir, "48/part")

    for dir_path in [neg_save_dir, pos_save_dir, part_save_dir]:
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)

    # load ground truth from annotation file
    # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image

    with open(anno_file, 'r') as f:
        annotations = f.readlines()

    image_size = 48
    im_idx_list = list()
    gt_boxes_list = list()
    num_of_images = len(annotations)
    print("processing %d images in total" % num_of_images)

    for annotation in annotations:
        annotation = annotation.strip().split(' ')
        im_idx = os.path.join(prefix, annotation[0])

        boxes = list(map(float, annotation[1:]))
        boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4)
        im_idx_list.append(im_idx)
        gt_boxes_list.append(boxes)

    save_path = config.ANNO_STORE_DIR
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    f1 = open(os.path.join(save_path, 'pos_%d.txt' % image_size), 'w')
    f2 = open(os.path.join(save_path, 'neg_%d.txt' % image_size), 'w')
    f3 = open(os.path.join(save_path, 'part_%d.txt' % image_size), 'w')

    det_handle = open(det_boxs_file, 'rb')
    det_boxes = pickle.load(det_handle)
    print(len(det_boxes), num_of_images)
    assert len(det_boxes) == num_of_images, "incorrect detections or ground truths"

    # index of neg, pos and part face, used as their image names
    n_idx = 0
    p_idx = 0
    d_idx = 0
    image_done = 0
    for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list):
        image_done += 1
        if image_done % 100 == 0:
            print("%d images done" % image_done)
        if dets.shape[0] == 0:
            continue
        img = cv2.imread(im_idx)
        dets = convert_to_square(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])

        # each image have at most 50 neg_samples
        cur_n_idx = 0
        for box in dets:
            x_left, y_top, x_right, y_bottom = box[0:4].astype(int)
            width = x_right - x_left
            height = y_bottom - y_top
            # ignore box that is too small or beyond image border
            if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1:
                continue
            # compute intersection over union(IoU) between current box and all gt boxes
            Iou = IoU(box, gts)
            cropped_im = img[y_top:y_bottom, x_left:x_right, :]
            resized_im = cv2.resize(cropped_im, (image_size, image_size),
                                    interpolation=cv2.INTER_LINEAR)

            # save negative images and write label
            if np.max(Iou) < 0.3:
                # Iou with all gts must below 0.3
                cur_n_idx += 1
                if cur_n_idx <= 50:
                    save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
                    f2.write(save_file + ' 0\n')
                    cv2.imwrite(save_file, resized_im)
                    n_idx += 1
            else:
                # find gt_box with the highest iou
                idx = np.argmax(Iou)
                assigned_gt = gts[idx]
                x1, y1, x2, y2 = assigned_gt

                # compute bbox reg label
                offset_x1 = (x1 - x_left) / float(width)
                offset_y1 = (y1 - y_top) / float(height)
                offset_x2 = (x2 - x_right) / float(width)
                offset_y2 = (y2 - y_bottom) / float(height)

                # save positive and part-face images and write labels
                if np.max(Iou) >= 0.65:
                    save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
                    f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (
                        offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    p_idx += 1

                elif np.max(Iou) >= 0.4:
                    save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
                    f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (
                        offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    d_idx += 1
    f1.close()
    f2.close()
    f3.close()
Exemple #11
0
    def detect_onet(self, im, dets):
        """Get face candidates using onet

        Parameters:
        ----------
        im: numpy array
            input image array
        dets: numpy array
            detection results of rnet

        Returns:
        -------
        boxes_align: numpy array
            boxes after calibration
        landmarks_align: numpy array
            landmarks after calibration

        """
        h, w, c = im.shape
        if dets is None:
            return None, None

        dets = utils.convert_to_square(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])

        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
        num_boxes = dets.shape[0]

        cropped_ims_tensors = []
        for i in range(num_boxes):
            try:
                if tmph[i] > 0 and tmpw[i] > 0:
                    tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
                    tmp[dy[i]:edy[i], dx[i]:edx[i], :] = im[y[i]:ey[i], x[i]:ex[i], :]
                    crop_im = cv2.resize(tmp, (48, 48))
                    crop_im_tensor = convert_image_to_tensor(crop_im)
                    cropped_ims_tensors.append(crop_im_tensor)
            except ValueError as e:
                print(e)

        feed_imgs = torch.stack(cropped_ims_tensors)

        feed_imgs = feed_imgs.to(self.device)

        cls_map, reg, landmark = self.onet_detector(feed_imgs)

        cls_map = cls_map.cpu().data.numpy()
        reg = reg.cpu().data.numpy()
        landmark = landmark.cpu().data.numpy()

        keep_inds = np.where(cls_map > self.thresh[2])[0]

        if len(keep_inds) > 0:
            boxes = dets[keep_inds]
            cls = cls_map[keep_inds]
            reg = reg[keep_inds]
            landmark = landmark[keep_inds]
        else:
            return None, None

        keep = utils.nms(boxes, 0.7, mode="Minimum")

        if len(keep) == 0:
            return None, None

        keep_cls = cls[keep]
        keep_boxes = boxes[keep]
        keep_reg = reg[keep]
        keep_landmark = landmark[keep]

        bw = keep_boxes[:, 2] - keep_boxes[:, 0]
        bh = keep_boxes[:, 3] - keep_boxes[:, 1]

        align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw
        align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh
        align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw
        align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh

        align_landmark_topx = keep_boxes[:, 0]
        align_landmark_topy = keep_boxes[:, 1]

        boxes_align = np.vstack([align_topx,
                                 align_topy,
                                 align_bottomx,
                                 align_bottomy,
                                 keep_cls[:, 0]
                                 ])

        boxes_align = boxes_align.T

        landmark = np.vstack([
            align_landmark_topx + keep_landmark[:, 0] * bw,
            align_landmark_topy + keep_landmark[:, 1] * bh,
            align_landmark_topx + keep_landmark[:, 2] * bw,
            align_landmark_topy + keep_landmark[:, 3] * bh,
            align_landmark_topx + keep_landmark[:, 4] * bw,
            align_landmark_topy + keep_landmark[:, 5] * bh,
            align_landmark_topx + keep_landmark[:, 6] * bw,
            align_landmark_topy + keep_landmark[:, 7] * bh,
            align_landmark_topx + keep_landmark[:, 8] * bw,
            align_landmark_topy + keep_landmark[:, 9] * bh,
        ])

        landmark_align = landmark.T

        return boxes_align, landmark_align
Exemple #12
0
    def detect_rnet(self, im, dets):
        """Get face candidates using rnet

        Parameters:
        ----------
        im: numpy array
            input image array
        dets: numpy array
            detection results of pnet

        Returns:
        -------
        boxes: numpy array
            detected boxes before calibration
        boxes_align: numpy array
            boxes after calibration
        """
        h, w, c = im.shape
        if dets is None:
            return None, None

        dets = utils.convert_to_square(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])

        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
        num_boxes = dets.shape[0]

        cropped_ims_tensors = []
        for i in range(num_boxes):
            try:
                if tmph[i] > 0 and tmpw[i] > 0:
                    tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
                    tmp[dy[i]:edy[i], dx[i]:edx[i], :] = im[y[i]:ey[i], x[i]:ex[i], :]
                    crop_im = cv2.resize(tmp, (24, 24))
                    crop_im_tensor = convert_image_to_tensor(crop_im)
                    # cropped_ims_tensors[i, :, :, :] = crop_im_tensor
                    cropped_ims_tensors.append(crop_im_tensor)
            except ValueError as e:
                print('dy: {}, edy: {}, dx: {}, edx: {}'.format(dy[i], edy[i], dx[i], edx[i]))
                print('y: {}, ey: {}, x: {}, ex: {}'.format(y[i], ey[i], x[i], ex[i]))
                print(e)

        feed_imgs = torch.stack(cropped_ims_tensors)

        feed_imgs = feed_imgs.to(self.device)

        cls_map, reg = self.rnet_detector(feed_imgs)
        cls_map = cls_map.cpu().data.numpy()
        reg = reg.cpu().data.numpy()

        keep_inds = np.where(cls_map > self.thresh[1])[0]

        if len(keep_inds) > 0:
            boxes = dets[keep_inds]
            cls = cls_map[keep_inds]
            reg = reg[keep_inds]
        else:
            return None, None

        keep = utils.nms(boxes, 0.7)
        if len(keep) == 0:
            return None, None

        keep_cls = cls[keep]
        keep_boxes = boxes[keep]
        keep_reg = reg[keep]
        bw = keep_boxes[:, 2] - keep_boxes[:, 0]
        bh = keep_boxes[:, 3] - keep_boxes[:, 1]
        boxes = np.vstack([keep_boxes[:, 0],
                           keep_boxes[:, 1],
                           keep_boxes[:, 2],
                           keep_boxes[:, 3],
                           keep_cls[:, 0]
                           ])
        align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw
        align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh
        align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw
        align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh

        boxes_align = np.vstack([align_topx,
                                 align_topy,
                                 align_bottomx,
                                 align_bottomy,
                                 keep_cls[:, 0]
                                 ])
        boxes = boxes.T
        boxes_align = boxes_align.T

        return boxes, boxes_align
Exemple #13
0

if __name__ == '__main__':
    #   多张图片检测
    image_path = r"test_image"
    for i in os.listdir(image_path):
        detector = Detector()
        with Image.open(os.path.join(image_path, i)) as im:
            # boxes = detector.detect(im)
            print("-" * 100)
            boxes = detector.detect(im)
            print("size:", im.size)
            imDraw = ImageDraw.Draw(im)

            #   保存网络输出的人脸(需要调整尺寸),为后面的人脸识别做准备
            out_put_boxes = utils.convert_to_square(boxes)
            for _box in out_put_boxes:
                _x1 = int(_box[0])
                _y1 = int(_box[1])
                _x2 = int(_box[2])
                _y2 = int(_box[3])
                face_crop = im.crop((_x1, _y1, _x2, _y2))
                face_crop.save(r"D:\picture\mtcnn\str{0}.jpg".format(_x1))

            #   多个框,每循环一次框一个人脸
            for box in boxes:
                x1 = int(box[0])
                y1 = int(box[1])
                x2 = int(box[2])
                y2 = int(box[3])