Python convert_image_to_tensorの例、mtcnn.core.image_tools.convert_image_to_tensor Pythonの例

コード例 #1

0

ファイルを表示

ファイル: detect.py プロジェクト: qhsoft/mtcnn-pytorch

    def detect_onet(self, im, dets):
        """Get face candidates using onet

        Parameters:
        ----------
        im: numpy array
            input image array
        dets: numpy array
            detection results of rnet

        Returns:
        -------
        boxes_align: numpy array
            boxes after calibration
        landmarks_align: numpy array
            landmarks after calibration

        """
        h, w, c = im.shape

        if dets is None:
            return None, None

        dets = self.square_bbox(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])

        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
        num_boxes = dets.shape[0]

        # cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32)
        cropped_ims_tensors = []
        for i in range(num_boxes):
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
            # crop input image
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1,
                                                            x[i]:ex[i] + 1, :]
            crop_im = cv2.resize(tmp, (48, 48))
            crop_im_tensor = image_tools.convert_image_to_tensor(crop_im)
            # cropped_ims_tensors[i, :, :, :] = crop_im_tensor
            cropped_ims_tensors.append(crop_im_tensor)
        feed_imgs = Variable(torch.stack(cropped_ims_tensors))

        if self.rnet_detector.use_cuda:
            feed_imgs = feed_imgs.cuda()

        cls_map, reg, landmark = self.onet_detector(feed_imgs)

        cls_map = cls_map.cpu().data.numpy()
        reg = reg.cpu().data.numpy()
        landmark = landmark.cpu().data.numpy()

        keep_inds = np.where(cls_map > self.thresh[2])[0]

        if len(keep_inds) > 0:
            boxes = dets[keep_inds]
            cls = cls_map[keep_inds]
            reg = reg[keep_inds]
            landmark = landmark[keep_inds]
        else:
            return None, None

        keep = utils.nms(boxes, 0.7, mode="Minimum")

        if len(keep) == 0:
            return None, None

        keep_cls = cls[keep]
        keep_boxes = boxes[keep]
        keep_reg = reg[keep]
        keep_landmark = landmark[keep]

        bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1
        bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1

        align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw
        align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh
        align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw
        align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh

        align_landmark_topx = keep_boxes[:, 0]
        align_landmark_topy = keep_boxes[:, 1]

        boxes_align = np.vstack([
            align_topx,
            align_topy,
            align_bottomx,
            align_bottomy,
            keep_cls[:, 0],
            # align_topx + keep_landmark[:, 0] * bw,
            # align_topy + keep_landmark[:, 1] * bh,
            # align_topx + keep_landmark[:, 2] * bw,
            # align_topy + keep_landmark[:, 3] * bh,
            # align_topx + keep_landmark[:, 4] * bw,
            # align_topy + keep_landmark[:, 5] * bh,
            # align_topx + keep_landmark[:, 6] * bw,
            # align_topy + keep_landmark[:, 7] * bh,
            # align_topx + keep_landmark[:, 8] * bw,
            # align_topy + keep_landmark[:, 9] * bh,
        ])

        boxes_align = boxes_align.T

        landmark = np.vstack([
            align_landmark_topx + keep_landmark[:, 0] * bw,
            align_landmark_topy + keep_landmark[:, 1] * bh,
            align_landmark_topx + keep_landmark[:, 2] * bw,
            align_landmark_topy + keep_landmark[:, 3] * bh,
            align_landmark_topx + keep_landmark[:, 4] * bw,
            align_landmark_topy + keep_landmark[:, 5] * bh,
            align_landmark_topx + keep_landmark[:, 6] * bw,
            align_landmark_topy + keep_landmark[:, 7] * bh,
            align_landmark_topx + keep_landmark[:, 8] * bw,
            align_landmark_topy + keep_landmark[:, 9] * bh,
        ])

        landmark_align = landmark.T

        return boxes_align, landmark_align

コード例 #2

0

ファイルを表示

ファイル: detect.py プロジェクト: qhsoft/mtcnn-pytorch

    def detect_pnet(self, im):
        """Get face candidates through pnet

        Parameters:
        ----------
        im: numpy array
            input image array
            one batch

        Returns:
        -------
        boxes: numpy array
            detected boxes before calibration
        boxes_align: numpy array
            boxes after calibration
        """

        # im = self.unique_image_format(im)

        # original wider face data
        h, w, c = im.shape

        net_size = 12

        current_scale = float(
            net_size) / self.min_face_size  # find initial scale
        # print('imgshape:{0}, current_scale:{1}'.format(im.shape, current_scale))
        im_resized = self.resize_image(im, current_scale)  # scale = 1.0
        current_height, current_width, _ = im_resized.shape

        # fcn
        all_boxes = list()
        i = 0
        while min(current_height, current_width) > net_size:
            # print(i)
            feed_imgs = []
            image_tensor = image_tools.convert_image_to_tensor(im_resized)
            feed_imgs.append(image_tensor)
            feed_imgs = torch.stack(feed_imgs)
            feed_imgs = Variable(feed_imgs)

            if self.pnet_detector.use_cuda:
                feed_imgs = feed_imgs.cuda()

            # self.pnet_detector is a trained pnet torch model

            # receptive field is 12×12
            # 12×12 --> score
            # 12×12 --> bounding box
            cls_map, reg = self.pnet_detector(feed_imgs)

            cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy(
                cls_map.cpu())
            reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu())

            # del cls_map
            # del reg
            # del feed_imgs
            # print(cls_map_np.shape, reg_np.shape) # cls_map_np = (1, n, m, 1) reg_np.shape = (1, n, m 4)
            # time.sleep(5)
            # landmark_np = image_tools.convert_chwTensor_to_hwcNumpy(landmark.cpu())

            # self.threshold[0] = 0.6
            # print(cls_map_np[0,:,:].shape)
            # time.sleep(4)

            # boxes = [x1, y1, x2, y2, score, reg]
            boxes = self.generate_bounding_box(cls_map_np[0, :, :], reg_np,
                                               current_scale, self.thresh[0])

            # generate pyramid images
            current_scale *= self.scale_factor  # self.scale_factor = 0.709
            im_resized = self.resize_image(im, current_scale)
            current_height, current_width, _ = im_resized.shape

            if boxes.size == 0:
                continue

            # non-maximum suppresion
            keep = utils.nms(boxes[:, :5], 0.5, 'Union')
            boxes = boxes[keep]
            # print(boxes.shape)
            all_boxes.append(boxes)
            # i+=1

        if len(all_boxes) == 0:
            return None, None

        all_boxes = np.vstack(all_boxes)
        # print("shape of all boxes {0}".format(all_boxes.shape))
        # time.sleep(5)

        # merge the detection from first stage
        keep = utils.nms(all_boxes[:, 0:5], 0.7, 'Union')
        all_boxes = all_boxes[keep]
        # boxes = all_boxes[:, :5]

        # x2 - x1
        # y2 - y1
        bw = all_boxes[:, 2] - all_boxes[:, 0] + 1
        bh = all_boxes[:, 3] - all_boxes[:, 1] + 1

        # landmark_keep = all_boxes[:, 9:].reshape((5,2))

        boxes = np.vstack([
            all_boxes[:, 0],
            all_boxes[:, 1],
            all_boxes[:, 2],
            all_boxes[:, 3],
            all_boxes[:, 4],
            # all_boxes[:, 0] + all_boxes[:, 9] * bw,
            # all_boxes[:, 1] + all_boxes[:,10] * bh,
            # all_boxes[:, 0] + all_boxes[:, 11] * bw,
            # all_boxes[:, 1] + all_boxes[:, 12] * bh,
            # all_boxes[:, 0] + all_boxes[:, 13] * bw,
            # all_boxes[:, 1] + all_boxes[:, 14] * bh,
            # all_boxes[:, 0] + all_boxes[:, 15] * bw,
            # all_boxes[:, 1] + all_boxes[:, 16] * bh,
            # all_boxes[:, 0] + all_boxes[:, 17] * bw,
            # all_boxes[:, 1] + all_boxes[:, 18] * bh
        ])

        boxes = boxes.T

        # boxes = boxes = [x1, y1, x2, y2, score, reg] reg= [px1, py1, px2, py2] (in prediction)
        align_topx = all_boxes[:, 0] + all_boxes[:, 5] * bw
        align_topy = all_boxes[:, 1] + all_boxes[:, 6] * bh
        align_bottomx = all_boxes[:, 2] + all_boxes[:, 7] * bw
        align_bottomy = all_boxes[:, 3] + all_boxes[:, 8] * bh

        # refine the boxes
        boxes_align = np.vstack([
            align_topx,
            align_topy,
            align_bottomx,
            align_bottomy,
            all_boxes[:, 4],
            # align_topx + all_boxes[:,9] * bw,
            # align_topy + all_boxes[:,10] * bh,
            # align_topx + all_boxes[:,11] * bw,
            # align_topy + all_boxes[:,12] * bh,
            # align_topx + all_boxes[:,13] * bw,
            # align_topy + all_boxes[:,14] * bh,
            # align_topx + all_boxes[:,15] * bw,
            # align_topy + all_boxes[:,16] * bh,
            # align_topx + all_boxes[:,17] * bw,
            # align_topy + all_boxes[:,18] * bh,
        ])
        boxes_align = boxes_align.T

        return boxes, boxes_align

コード例 #3

0

ファイルを表示

ファイル: detect.py プロジェクト: qhsoft/mtcnn-pytorch

    def detect_rnet(self, im, dets):
        """Get face candidates using rnet

        Parameters:
        ----------
        im: numpy array
            input image array
        dets: numpy array
            detection results of pnet

        Returns:
        -------
        boxes: numpy array
            detected boxes before calibration
        boxes_align: numpy array
            boxes after calibration
        """
        # im: an input image
        h, w, c = im.shape

        if dets is None:
            return None, None

        # (705, 5) = [x1, y1, x2, y2, score, reg]
        # print("pnet detection {0}".format(dets.shape))
        # time.sleep(5)

        # return square boxes
        dets = self.square_bbox(dets)
        # rounds
        dets[:, 0:4] = np.round(dets[:, 0:4])

        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
        num_boxes = dets.shape[0]
        '''
        # helper for setting RNet batch size
        batch_size = self.rnet_detector.batch_size
        ratio = float(num_boxes) / batch_size
        if ratio > 3 or ratio < 0.3:
            print "You may need to reset RNet batch size if this info appears frequently, \
        face candidates:%d, current batch_size:%d"%(num_boxes, batch_size)
        '''

        # cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32)
        cropped_ims_tensors = []
        for i in range(num_boxes):
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1,
                                                            x[i]:ex[i] + 1, :]
            crop_im = cv2.resize(tmp, (24, 24))
            crop_im_tensor = image_tools.convert_image_to_tensor(crop_im)
            # cropped_ims_tensors[i, :, :, :] = crop_im_tensor
            cropped_ims_tensors.append(crop_im_tensor)
        feed_imgs = Variable(torch.stack(cropped_ims_tensors))

        if self.rnet_detector.use_cuda:
            feed_imgs = feed_imgs.cuda()

        cls_map, reg = self.rnet_detector(feed_imgs)

        cls_map = cls_map.cpu().data.numpy()
        reg = reg.cpu().data.numpy()
        # landmark = landmark.cpu().data.numpy()

        keep_inds = np.where(cls_map > self.thresh[1])[0]

        if len(keep_inds) > 0:
            boxes = dets[keep_inds]
            cls = cls_map[keep_inds]
            reg = reg[keep_inds]
            # landmark = landmark[keep_inds]
        else:
            return None, None

        keep = utils.nms(boxes, 0.7)

        if len(keep) == 0:
            return None, None

        keep_cls = cls[keep]
        keep_boxes = boxes[keep]
        keep_reg = reg[keep]
        # keep_landmark = landmark[keep]

        bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1
        bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1

        boxes = np.vstack([
            keep_boxes[:, 0],
            keep_boxes[:, 1],
            keep_boxes[:, 2],
            keep_boxes[:, 3],
            keep_cls[:, 0],
            # keep_boxes[:,0] + keep_landmark[:, 0] * bw,
            # keep_boxes[:,1] + keep_landmark[:, 1] * bh,
            # keep_boxes[:,0] + keep_landmark[:, 2] * bw,
            # keep_boxes[:,1] + keep_landmark[:, 3] * bh,
            # keep_boxes[:,0] + keep_landmark[:, 4] * bw,
            # keep_boxes[:,1] + keep_landmark[:, 5] * bh,
            # keep_boxes[:,0] + keep_landmark[:, 6] * bw,
            # keep_boxes[:,1] + keep_landmark[:, 7] * bh,
            # keep_boxes[:,0] + keep_landmark[:, 8] * bw,
            # keep_boxes[:,1] + keep_landmark[:, 9] * bh,
        ])

        align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw
        align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh
        align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw
        align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh

        boxes_align = np.vstack([
            align_topx,
            align_topy,
            align_bottomx,
            align_bottomy,
            keep_cls[:, 0],
            # align_topx + keep_landmark[:, 0] * bw,
            # align_topy + keep_landmark[:, 1] * bh,
            # align_topx + keep_landmark[:, 2] * bw,
            # align_topy + keep_landmark[:, 3] * bh,
            # align_topx + keep_landmark[:, 4] * bw,
            # align_topy + keep_landmark[:, 5] * bh,
            # align_topx + keep_landmark[:, 6] * bw,
            # align_topy + keep_landmark[:, 7] * bh,
            # align_topx + keep_landmark[:, 8] * bw,
            # align_topy + keep_landmark[:, 9] * bh,
        ])

        boxes = boxes.T
        boxes_align = boxes_align.T

        return boxes, boxes_align

コード例 #4

0

ファイルを表示

ファイル: detect.py プロジェクト: Ontheway361/occlusion_detection

    def detect_rnet(self, im, dets):
        """Get face candidates using rnet

        Parameters:
        ----------
        im: numpy array, input image array
        dets: numpy array, detection results of pnet

        Returns:
        -------
        boxes: numpy array, detected boxes before calibration
        boxes_align: numpy array, boxes after calibration
        """
        # im: an input image
        h, w, c = im.shape

        if dets is None:
            return None

        dets = self.square_bbox(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])

        [y1, y2, x1, x2, anchor_y1, anchor_y2, \
         anchor_x1, anchor_x2, box_w, box_h] = self.boundary_check(dets, w, h)

        num_boxes = dets.shape[0]

        cropped_ims_tensors = []
        for i in range(num_boxes):

            tmp_img = np.zeros((box_h[i], box_w[i], 3), dtype=np.uint8)
            tmp_img[y1[i]:y2[i] + 1,
                    x1[i]:x2[i] + 1, :] = im[anchor_y1[i]:anchor_y2[i] + 1,
                                             anchor_x1[i]:anchor_x2[i] + 1, :]
            crop_im = cv2.resize(tmp_img, (24, 24))
            crop_im_tensor = image_tools.convert_image_to_tensor(crop_im)
            cropped_ims_tensors.append(crop_im_tensor)

        feed_imgs = Variable(torch.stack(cropped_ims_tensors))

        try:
            if self.rnet_detector.module.use_cuda:  # Multi-GPUs
                feed_imgs = feed_imgs.cuda()
        except:
            if self.rnet_detector.use_cuda:  # Single-GPU or CPU
                feed_imgs = feed_imgs.cuda()

        cls_map, reg, _ = self.rnet_detector(feed_imgs)  # CORE

        cls_map = cls_map.cpu().data.numpy()
        reg = reg.cpu().data.numpy()

        keep_inds = np.where(cls_map > self.args.prob_thres[1])[0]

        if len(keep_inds) > 0:
            boxes = dets[keep_inds]  # NOTE :: det_box from Pnet
            cls = cls_map[keep_inds]
            reg = reg[keep_inds]
        else:
            return None

        keep = utils.nms(boxes, 0.7)

        if len(keep) == 0:
            return None

        keep_cls = cls[keep]
        keep_boxes = boxes[keep]
        keep_reg = reg[keep]

        bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1
        bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1

        align_x1 = keep_boxes[:, 0] + keep_reg[:, 0] * bw
        align_y1 = keep_boxes[:, 1] + keep_reg[:, 1] * bh
        align_x2 = keep_boxes[:, 2] + keep_reg[:, 2] * bw
        align_y2 = keep_boxes[:, 3] + keep_reg[:, 3] * bh

        boxes_align = np.vstack(
            [align_x1, align_y1, align_x2, align_y2, keep_cls[:, 0]]).T

        return boxes_align

コード例 #5

0

ファイルを表示

ファイル: detect.py プロジェクト: Ontheway361/occlusion_detection

    def detect_onet(self, im, dets):
        """Get face candidates using onet

        Parameters:
        ----------
        im: numpy array, input image array
        dets: numpy array, detection results of rnet

        Returns:
        -------
        boxes_align: numpy array, boxes after calibration
        landmarks_align: numpy array, landmarks after calibration

        """
        h, w, c = im.shape

        if dets is None:
            return None, None

        dets = self.square_bbox(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])

        [y1, y2, x1, x2, anchor_y1, anchor_y2, \
         anchor_x1, anchor_x2, box_w, box_h] = self.boundary_check(dets, w, h)
        cropped_ims_tensors = []
        for i in range(dets.shape[0]):

            tmp_img = np.zeros((box_h[i], box_w[i], 3), dtype=np.uint8)
            tmp_img[y1[i]:y2[i] + 1,
                    x1[i]:x2[i] + 1, :] = im[anchor_y1[i]:anchor_y2[i] + 1,
                                             anchor_x1[i]:anchor_x2[i] + 1, :]
            crop_im = cv2.resize(tmp_img, (48, 48))
            crop_im_tensor = image_tools.convert_image_to_tensor(crop_im)
            cropped_ims_tensors.append(crop_im_tensor)

        feed_imgs = Variable(torch.stack(cropped_ims_tensors))

        try:
            if self.rnet_detector.module.use_cuda:  # Multi-GPUs
                feed_imgs = feed_imgs.cuda()
        except:
            if self.rnet_detector.use_cuda:  # Single-GPU or CPU
                feed_imgs = feed_imgs.cuda()

        cls_map, reg, landmark = self.onet_detector(feed_imgs)  # look all

        cls_map = cls_map.cpu().data.numpy()
        reg = reg.cpu().data.numpy()
        landmark = landmark.cpu().data.numpy()

        keep_inds = np.where(cls_map > self.args.prob_thres[2])[0]
        if len(keep_inds) > 0:
            boxes = dets[keep_inds]
            cls = cls_map[keep_inds]
            reg = reg[keep_inds]
            landmark = landmark[keep_inds]
        else:
            return None, None

        bw = boxes[:, 2] - boxes[:, 0] + 1
        bh = boxes[:, 3] - boxes[:, 1] + 1

        align_x1 = boxes[:, 0] + reg[:, 0] * bw
        align_y1 = boxes[:, 1] + reg[:, 1] * bh
        align_x2 = boxes[:, 2] + reg[:, 2] * bw
        align_y2 = boxes[:, 3] + reg[:, 3] * bh

        boxes_align = np.vstack(
            [align_x1, align_y1, align_x2, align_y2, cls[:, 0]]).T

        # TODO :: 68 <--> 5
        lmk_align = landmark.copy()
        x_idx = [2 * s for s in range(68)]
        y_idx = [2 * s + 1 for s in range(68)]
        for idx in range(lmk_align.shape[0]):
            lmk_align[idx,
                      x_idx] = boxes[idx, 0] + lmk_align[idx, x_idx] * bw[idx]
            lmk_align[idx,
                      y_idx] = boxes[idx, 1] + lmk_align[idx, y_idx] * bh[idx]

        keep = utils.nms(boxes_align, 0.7, mode='Minimum')
        if len(keep) == 0:
            return None, None

        boxes_align = boxes_align[keep]
        lmk_align = lmk_align[keep]

        return boxes_align, lmk_align

コード例 #6

0

ファイルを表示

    def detect_rnet(self, im, dets):
        """Get face candidates using rnet

        Parameters:
        ----------
        im: numpy array
            input image array
        dets: numpy array
            detection results of pnet

        Returns:
        -------
        boxes: numpy array
            detected boxes before calibration
        boxes_align: numpy array
            boxes after calibration
        """
        h, w, c = im.shape

        if dets is None:
            # return np.array([]),np.array([])
            return None, None
        dets = self.square_bbox(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])

        [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = self.pad(dets, w, h)
        num_boxes = dets.shape[0]
        if num_boxes == 0:
            return None, None
        '''
        # helper for setting RNet batch size
        batch_size = self.rnet_detector.batch_size
        ratio = float(num_boxes) / batch_size
        if ratio > 3 or ratio < 0.3:
            print "You may need to reset RNet batch size if this info appears frequently, \
face candidates:%d, current batch_size:%d"%(num_boxes, batch_size)
        '''

        # cropped_ims_tensors = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32)
        cropped_ims_tensors = []
        for i in range(num_boxes):
            tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8)
            tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1,
                                                            x[i]:ex[i] + 1, :]
            crop_im = cv2.resize(tmp, (24, 24))
            crop_im_tensor = image_tools.convert_image_to_tensor(crop_im)
            # cropped_ims_tensors[i, :, :, :] = crop_im_tensor
            cropped_ims_tensors.append(crop_im_tensor)
        feed_imgs = Variable(torch.stack(cropped_ims_tensors).float())

        if self.rnet_detector.use_cuda:
            feed_imgs = feed_imgs.cuda()

        cls_map, reg = self.rnet_detector(feed_imgs)

        cls_map = cls_map.cpu().data.numpy()
        reg = reg.cpu().data.numpy()
        # landmark = landmark.cpu().data.numpy()

        keep_inds = np.where(cls_map > self.thresh[1])[0]

        if len(keep_inds) > 0:
            boxes = dets[keep_inds]
            cls = cls_map[keep_inds]
            reg = reg[keep_inds]
            # landmark = landmark[keep_inds]
        else:
            # return np.array([]), np.array([])
            return None, None

        # keep = utils.nms(boxes, 0.7)
        #
        #     # return None, None
        #
        # keep_cls = cls
        # keep_boxes = boxes
        # keep_reg = reg
        # keep_landmark = landmark[keep]

        bw = boxes[:, 2] - boxes[:, 0] + 1
        bh = boxes[:, 3] - boxes[:, 1] + 1

        align_topx = np.maximum(boxes[:, 0] + reg[:, 0] * bw, 0)
        align_topy = np.maximum(boxes[:, 1] + reg[:, 1] * bh, 0)
        align_bottomx = np.minimum(boxes[:, 2] + reg[:, 2] * bw, w)
        align_bottomy = np.minimum(boxes[:, 3] + reg[:, 3] * bh, h)

        boxes = np.vstack([
            boxes[:, 0],
            boxes[:, 1],
            boxes[:, 2],
            boxes[:, 3],
            cls[:, 0],
        ])

        boxes_align = np.vstack([
            align_topx,
            align_topy,
            align_bottomx,
            align_bottomy,
            cls[:, 0],
        ])

        boxes = boxes.T
        boxes_align = boxes_align.T
        keep = utils.nms(boxes_align, 0.7)
        if len(keep) == 0:
            # return np.array([]), np.array([])
            return None, None
        boxes_align = boxes_align[keep]
        return boxes, boxes_align

コード例 #7

0

ファイルを表示

ファイル: detect.py プロジェクト: Ontheway361/occlusion_detection

    def detect_pnet(self, im):
        """Get face candidates through pnet

        Parameters:
        ----------
        im: numpy array, input image array, one batch

        Returns:
        -------
        boxes: numpy array, detected boxes before calibration
        boxes_align: numpy array, boxes after calibration
        """

        # im = self.unique_image_format(im)
        h, w, c = im.shape
        net_size = 12

        current_scale = float(net_size) / self.min_face_size  # scale = 1.0
        im_resized = self.resize_image(im, current_scale)
        current_height, current_width, _ = im_resized.shape

        all_boxes = list()
        while min(current_height, current_width) > net_size:

            feed_imgs = []
            image_tensor = image_tools.convert_image_to_tensor(im_resized)
            feed_imgs.append(image_tensor)
            feed_imgs = Variable(torch.stack(feed_imgs))

            try:
                if self.pnet_detector.module.use_cuda:  # Multi-GPUs
                    feed_imgs = feed_imgs.cuda()
            except:
                if self.pnet_detector.use_cuda:  # Single-GPU or CPU
                    feed_imgs = feed_imgs.cuda()

            cls_map, reg, _ = self.pnet_detector(
                feed_imgs)  # CORE， Don't look landmark

            cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy(
                cls_map.cpu())
            reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu())

            # boxes = [x1, y1, x2, y2, score, reg]
            boxes = self.generate_bbox(cls_map_np[0, :, :], reg_np,
                                       current_scale, self.args.prob_thres[0])

            # generate pyramid images
            current_scale *= self.scale_factor  # self.scale_factor = 0.709
            im_resized = self.resize_image(im, current_scale)
            current_height, current_width, _ = im_resized.shape

            if boxes.size == 0:
                continue

            # non-maximum suppresion
            keep = utils.nms(boxes[:, :5], 0.5, 'Union')
            boxes = boxes[keep]
            all_boxes.append(boxes)

        if len(all_boxes) == 0:
            return None

        all_boxes = np.vstack(all_boxes)

        keep = utils.nms(all_boxes[:, :5], 0.7, 'Union')
        all_boxes = all_boxes[keep]

        bw = all_boxes[:, 2] - all_boxes[:, 0] + 1
        bh = all_boxes[:, 3] - all_boxes[:, 1] + 1

        # all_boxes = [x1, y1, x2, y2, score, reg]
        align_x1 = all_boxes[:, 0] + all_boxes[:, 5] * bw
        align_y1 = all_boxes[:, 1] + all_boxes[:, 6] * bh
        align_x2 = all_boxes[:, 2] + all_boxes[:, 7] * bw
        align_y2 = all_boxes[:, 3] + all_boxes[:, 8] * bh

        boxes_align = np.vstack([
            align_x1,
            align_y1,
            align_x2,
            align_y2,
            all_boxes[:, 4],
        ]).T

        return boxes_align

コード例 #8

0

ファイルを表示

    def detect_pnet(self, im):
        """Get face candidates through pnet

        Parameters:
        ----------
        im: numpy array
            input image array

        Returns:
        -------
        boxes: numpy array
            detected boxes before calibration
        boxes_align: numpy array
            boxes after calibration
        """

        # im = self.unique_image_format(im)

        h, w, c = im.shape
        net_size = 12

        current_scale = float(
            net_size) / self.min_face_size  # find initial scale
        im_resized = self.resize_image(im, current_scale)
        current_height, current_width, _ = im_resized.shape

        # fcn
        all_boxes = list()
        while min(current_height, current_width) > net_size:
            feed_imgs = []
            image_tensor = image_tools.convert_image_to_tensor(im_resized)
            feed_imgs.append(image_tensor)
            feed_imgs = torch.stack(feed_imgs).float()
            feed_imgs = Variable(feed_imgs)

            if self.pnet_detector.use_cuda:
                feed_imgs = feed_imgs.cuda()

            cls_map, reg = self.pnet_detector(feed_imgs)

            cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy(
                cls_map.cpu())
            reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu())
            # landmark_np = image_tools.convert_chwTensor_to_hwcNumpy(landmark.cpu())

            boxes = self.generate_bounding_box(cls_map_np[0, :, :], reg_np,
                                               current_scale, self.thresh[0])

            current_scale *= self.scale_factor
            im_resized = self.resize_image(im, current_scale)
            current_height, current_width, _ = im_resized.shape

            if boxes.size == 0:
                continue
            keep = utils.nms(boxes[:, :5], 0.5, 'Union')
            boxes = boxes[keep]
            all_boxes.append(boxes)

        if len(all_boxes) == 0:
            return None, None

        all_boxes = np.vstack(all_boxes)

        # merge the detection from first stage
        # keep = utils.nms(all_boxes[:, 0:5], 0.7, 'Union')
        # all_boxes = all_boxes[keep]
        # boxes = all_boxes[:, :5]

        bw = all_boxes[:, 2] - all_boxes[:, 0] + 1
        bh = all_boxes[:, 3] - all_boxes[:, 1] + 1

        # landmark_keep = all_boxes[:, 9:].reshape((5,2))

        boxes = np.vstack([
            all_boxes[:, 0],
            all_boxes[:, 1],
            all_boxes[:, 2],
            all_boxes[:, 3],
            all_boxes[:, 4],
            # all_boxes[:, 0] + all_boxes[:, 9] * bw,
            # all_boxes[:, 1] + all_boxes[:,10] * bh,
            # all_boxes[:, 0] + all_boxes[:, 11] * bw,
            # all_boxes[:, 1] + all_boxes[:, 12] * bh,
            # all_boxes[:, 0] + all_boxes[:, 13] * bw,
            # all_boxes[:, 1] + all_boxes[:, 14] * bh,
            # all_boxes[:, 0] + all_boxes[:, 15] * bw,
            # all_boxes[:, 1] + all_boxes[:, 16] * bh,
            # all_boxes[:, 0] + all_boxes[:, 17] * bw,
            # all_boxes[:, 1] + all_boxes[:, 18] * bh
        ])

        boxes = boxes.T

        align_topx = np.maximum(all_boxes[:, 0] + all_boxes[:, 5] * bw, 0)
        align_topy = np.maximum(all_boxes[:, 1] + all_boxes[:, 6] * bh, 0)
        align_bottomx = np.minimum(all_boxes[:, 2] + all_boxes[:, 7] * bw, w)
        align_bottomy = np.minimum(all_boxes[:, 3] + all_boxes[:, 8] * bh, h)

        # refine the boxes
        boxes_align = np.vstack([
            align_topx,
            align_topy,
            align_bottomx,
            align_bottomy,
            all_boxes[:, 4],
            # align_topx + all_boxes[:,9] * bw,
            # align_topy + all_boxes[:,10] * bh,
            # align_topx + all_boxes[:,11] * bw,
            # align_topy + all_boxes[:,12] * bh,
            # align_topx + all_boxes[:,13] * bw,
            # align_topy + all_boxes[:,14] * bh,
            # align_topx + all_boxes[:,15] * bw,
            # align_topy + all_boxes[:,16] * bh,
            # align_topx + all_boxes[:,17] * bw,
            # align_topy + all_boxes[:,18] * bh,
        ])
        boxes_align = boxes_align.T
        keep = utils.nms(boxes_align, 0.7, 'Union')
        boxes_align = boxes_align[keep]
        return boxes, boxes_align

コード例 #9

0

ファイルを表示

ファイル: train.py プロジェクト: LSN1220/face_detection

def train_rnet(model_store_path, end_epoch,imdb,
              batch_size,frequent=50,base_lr=0.01,use_cuda=True):

    if not os.path.exists(model_store_path):
        os.makedirs(model_store_path)

    lossfn = LossFn()
    net = RNet(is_train=True, use_cuda=use_cuda)
    net.train()
    if use_cuda:
        net.cuda()

    optimizer = torch.optim.Adam(net.parameters(), lr=base_lr)

    train_data=TrainImageReader(imdb,24,batch_size,shuffle=True)


    for cur_epoch in range(1,end_epoch+1):
        train_data.reset()

        for batch_idx,(image,(gt_label,gt_bbox,gt_landmark))in enumerate(train_data):

            im_tensor = [ image_tools.convert_image_to_tensor(image[i,:,:,:]) for i in range(image.shape[0]) ]
            im_tensor = torch.stack(im_tensor)

            im_tensor = Variable(im_tensor)
            gt_label = Variable(torch.from_numpy(gt_label).float())

            gt_bbox = Variable(torch.from_numpy(gt_bbox).float())
            gt_landmark = Variable(torch.from_numpy(gt_landmark).float())

            if use_cuda:
                im_tensor = im_tensor.cuda()
                gt_label = gt_label.cuda()
                gt_bbox = gt_bbox.cuda()
                gt_landmark = gt_landmark.cuda()

            cls_pred, box_offset_pred = net(im_tensor)
            # all_loss, cls_loss, offset_loss = lossfn.loss(gt_label=label_y,gt_offset=bbox_y, pred_label=cls_pred, pred_offset=box_offset_pred)

            cls_loss = lossfn.cls_loss(gt_label,cls_pred)
            box_offset_loss = lossfn.box_loss(gt_label,gt_bbox,box_offset_pred)
            # landmark_loss = lossfn.landmark_loss(gt_label,gt_landmark,landmark_offset_pred)

            all_loss = cls_loss*1.0+box_offset_loss*0.5

            if batch_idx%frequent==0:
                accuracy=compute_accuracy(cls_pred,gt_label)

                show1 = accuracy.data.cpu().numpy()
                show2 = cls_loss.data.cpu().numpy()
                show3 = box_offset_loss.data.cpu().numpy()
                # show4 = landmark_loss.data.cpu().numpy()
                show5 = all_loss.data.cpu().numpy()

                print("%s : Epoch: %d, Step: %d, accuracy: %s, det loss: %s, bbox loss: %s, all_loss: %s, lr:%s "%(datetime.datetime.now(), cur_epoch, batch_idx, show1, show2, show3, show5, base_lr))

            optimizer.zero_grad()
            all_loss.backward()
            optimizer.step()

        torch.save(net.state_dict(), os.path.join(model_store_path,"rnet_epoch_%d.pt" % cur_epoch))
        torch.save(net, os.path.join(model_store_path,"rnet_epoch_model_%d.pkl" % cur_epoch))

コード例 #10

0

ファイルを表示

ファイル: detect.py プロジェクト: hdzxs/MTCNN-Pytorch-1

    def detect_pnet(self, im):
        """Get face candidates through pnet

        Parameters:
        ----------
        im: numpy array
            input image array
            one batch

        Returns:
        -------
        boxes: numpy array
            detected boxes before calibration
        boxes_align: numpy array
            boxes after calibration
        """

        # im = self.unique_image_format(im)

        # original wider face data
        h, w, c = im.shape
        net_size = 12

        current_scale = float(
            net_size) / self.min_face_size  # find initial scale
        #print('imgshape:{0}, current_scale:{1}'.format(im.shape, current_scale))
        im_resized = self.resize_image(im, current_scale)  # scale = 1.0
        current_height, current_width, _ = im_resized.shape
        # fcn
        all_boxes = list()
        while min(current_height, current_width) > net_size:
            #print('current:',current_height, current_width)
            feed_imgs = []
            image_tensor = image_tools.convert_image_to_tensor(im_resized)
            feed_imgs.append(image_tensor)
            feed_imgs = torch.stack(feed_imgs)

            feed_imgs = Variable(feed_imgs)

            if self.pnet_detector.use_cuda:
                feed_imgs = feed_imgs.cuda()

            # self.pnet_detector is a trained pnet torch model

            # receptive field is 12×12
            # 12×12 --> score
            # 12×12 --> bounding box
            cls_map, reg = self.pnet_detector(feed_imgs)

            cls_map_np = image_tools.convert_chwTensor_to_hwcNumpy(
                cls_map.cpu())
            reg_np = image_tools.convert_chwTensor_to_hwcNumpy(reg.cpu())
            # print(cls_map_np.shape, reg_np.shape) # cls_map_np = (1, n, m, 1) reg_np.shape = (1, n, m 4)
            # time.sleep(5)
            # landmark_np = image_tools.convert_chwTensor_to_hwcNumpy(landmark.cpu())

            # self.threshold[0] = 0.6
            # print(cls_map_np[0,:,:].shape)
            # time.sleep(4)

            # boxes = [x1, y1, x2, y2, score, reg]
            boxes = self.generate_bounding_box(cls_map_np[0, :, :], reg_np,
                                               current_scale, self.thresh[0])
            #cv2.rectangle(im,(300,100),(400,200),color=(0,0,0))
            #cv2.rectangle(im,(400,200),(500,300),color=(0,0,0))

            # generate pyramid images
            current_scale *= self.scale_factor  # self.scale_factor = 0.709
            im_resized = self.resize_image(im, current_scale)
            current_height, current_width, _ = im_resized.shape

            if boxes.size == 0:
                continue

            # non-maximum suppresion
            keep = utils.nms(boxes[:, :5], 0.5, 'Union')
            boxes = boxes[keep]
            all_boxes.append(boxes)
            """ img = im.copy()
            bw = boxes[:,2]-boxes[:,0]
            bh = boxes[:,3]-boxes[:,1]
            for i in range(boxes.shape[0]):
                p1=(int(boxes[i][0]+boxes[i][5]*bw[i]),int(boxes[i][1]+boxes[i][6]*bh[i]))
                p2=(int(boxes[i][2]+boxes[i][7]*bw[i]),int(boxes[i][3]+boxes[i][8]*bh[i]))
                cv2.rectangle(img,p1,p2,color=(0,0,0))
            cv2.imshow('ss',img)
            cv2.waitKey(0)
            #ii+=1
        exit() """

        if len(all_boxes) == 0:
            return None, None
        all_boxes = np.vstack(all_boxes)
        # print("shape of all boxes {0}".format(all_boxes.shape))
        # time.sleep(5)

        # merge the detection from first stage
        keep = utils.nms(all_boxes[:, 0:5], 0.7, 'Union')
        all_boxes = all_boxes[keep]
        # boxes = all_boxes[:, :5]

        # x2 - x1
        # y2 - y1
        bw = all_boxes[:, 2] - all_boxes[:, 0] + 1
        bh = all_boxes[:, 3] - all_boxes[:, 1] + 1

        # landmark_keep = all_boxes[:, 9:].reshape((5,2))

        boxes = np.vstack([
            all_boxes[:, 0],
            all_boxes[:, 1],
            all_boxes[:, 2],
            all_boxes[:, 3],
            all_boxes[:, 4],
            # all_boxes[:, 0] + all_boxes[:, 9] * bw,
            # all_boxes[:, 1] + all_boxes[:,10] * bh,
            # all_boxes[:, 0] + all_boxes[:, 11] * bw,
            # all_boxes[:, 1] + all_boxes[:, 12] * bh,
            # all_boxes[:, 0] + all_boxes[:, 13] * bw,
            # all_boxes[:, 1] + all_boxes[:, 14] * bh,
            # all_boxes[:, 0] + all_boxes[:, 15] * bw,
            # all_boxes[:, 1] + all_boxes[:, 16] * bh,
            # all_boxes[:, 0] + all_boxes[:, 17] * bw,
            # all_boxes[:, 1] + all_boxes[:, 18] * bh
        ])

        boxes = boxes.T

        # boxes = boxes = [x1, y1, x2, y2, score, reg] reg= [px1, py1, px2, py2] (in prediction)
        align_topx = all_boxes[:, 0] + all_boxes[:, 5] * bw
        align_topy = all_boxes[:, 1] + all_boxes[:, 6] * bh
        align_bottomx = all_boxes[:, 2] + all_boxes[:, 7] * bw
        align_bottomy = all_boxes[:, 3] + all_boxes[:, 8] * bh

        # refine the boxes
        boxes_align = np.vstack([
            align_topx,
            align_topy,
            align_bottomx,
            align_bottomy,
            all_boxes[:, 4],
            # align_topx + all_boxes[:,9] * bw,
            # align_topy + all_boxes[:,10] * bh,
            # align_topx + all_boxes[:,11] * bw,
            # align_topy + all_boxes[:,12] * bh,
            # align_topx + all_boxes[:,13] * bw,
            # align_topy + all_boxes[:,14] * bh,
            # align_topx + all_boxes[:,15] * bw,
            # align_topy + all_boxes[:,16] * bh,
            # align_topx + all_boxes[:,17] * bw,
            # align_topy + all_boxes[:,18] * bh,
        ])
        boxes_align = boxes_align.T

        #remove invalid box
        valindex = [True for _ in range(boxes_align.shape[0])]
        for i in range(boxes_align.shape[0]):
            if boxes_align[i][2] - boxes_align[i][0] <= 3 or boxes_align[i][
                    3] - boxes_align[i][1] <= 3:
                valindex[i] = False
                print('pnet has one smaller than 3')
            else:
                if boxes_align[i][2] < 1 or boxes_align[i][
                        0] > w - 2 or boxes_align[i][3] < 1 or boxes_align[i][
                            1] > h - 2:
                    valindex[i] = False
                    print('pnet has one out')
        boxes_align = boxes_align[valindex, :]
        boxes = boxes[valindex, :]
        return boxes, boxes_align

コード例 #11

0

ファイルを表示

ファイル: train.py プロジェクト: hdzxs/MTCNN-Pytorch-1

def train_pnet(model_store_path,
               end_epoch,
               imdb,
               batch_size,
               frequent=10,
               base_lr=0.01,
               lr_epoch_decay=[9],
               use_cuda=True,
               load=''):

    #create lr_list
    lr_epoch_decay.append(end_epoch + 1)
    lr_list = np.zeros(end_epoch)
    lr_t = base_lr
    for i in range(len(lr_epoch_decay)):
        if i == 0:
            lr_list[0:lr_epoch_decay[i] - 1] = lr_t
        else:
            lr_list[lr_epoch_decay[i - 1] - 1:lr_epoch_decay[i] - 1] = lr_t
        lr_t *= 0.1

    if not os.path.exists(model_store_path):
        os.makedirs(model_store_path)

    lossfn = LossFn()
    net = PNet(is_train=True, use_cuda=use_cuda)
    if load != '':
        net.load_state_dict(torch.load(load))
        print('model loaded', load)
    net.train()

    if use_cuda:
        net.cuda()
    optimizer = torch.optim.Adam(net.parameters(), lr=lr_list[0])
    #optimizer = torch.optim.SGD(net.parameters(), lr=lr_list[0])

    train_data = TrainImageReader(imdb, 12, batch_size, shuffle=True)

    #frequent = 10
    for cur_epoch in range(1, end_epoch + 1):
        train_data.reset()  # shuffle
        for param in optimizer.param_groups:
            param['lr'] = lr_list[cur_epoch - 1]
        for batch_idx, (image, (gt_label, gt_bbox,
                                gt_landmark)) in enumerate(train_data):

            im_tensor = [
                image_tools.convert_image_to_tensor(image[i, :, :, :])
                for i in range(image.shape[0])
            ]
            im_tensor = torch.stack(im_tensor)

            im_tensor = Variable(im_tensor)
            gt_label = Variable(torch.from_numpy(gt_label).float())

            gt_bbox = Variable(torch.from_numpy(gt_bbox).float())
            # gt_landmark = Variable(torch.from_numpy(gt_landmark).float())

            if use_cuda:
                im_tensor = im_tensor.cuda()
                gt_label = gt_label.cuda()
                gt_bbox = gt_bbox.cuda()
                # gt_landmark = gt_landmark.cuda()

            cls_pred, box_offset_pred = net(im_tensor)
            # all_loss, cls_loss, offset_loss = lossfn.loss(gt_label=label_y,gt_offset=bbox_y, pred_label=cls_pred, pred_offset=box_offset_pred)

            cls_loss = lossfn.cls_loss(gt_label, cls_pred)
            box_offset_loss = lossfn.box_loss(gt_label, gt_bbox,
                                              box_offset_pred)
            # landmark_loss = lossfn.landmark_loss(gt_label,gt_landmark,landmark_offset_pred)

            all_loss = cls_loss * 1.0 + box_offset_loss * 0.5

            if batch_idx % frequent == 0:
                accuracy = compute_accuracy(cls_pred, gt_label)

                show1 = accuracy.data.cpu().numpy()
                show2 = cls_loss.data.cpu().numpy()
                show3 = box_offset_loss.data.cpu().numpy()
                # show4 = landmark_loss.data.cpu().numpy()
                show5 = all_loss.data.cpu().numpy()

                print(
                    "%s : Epoch: %d, Step: %d, accuracy: %s, det loss: %s, bbox loss: %s, all_loss: %s, lr:%s "
                    % (datetime.datetime.now(), cur_epoch, batch_idx, show1,
                       show2, show3, show5, lr_list[cur_epoch - 1]))

            optimizer.zero_grad()
            all_loss.backward()
            optimizer.step()

        torch.save(
            net.state_dict(),
            os.path.join(model_store_path, "pnet_epoch_%d.pt" % cur_epoch))
        torch.save(
            net,
            os.path.join(model_store_path,
                         "pnet_epoch_model_%d.pkl" % cur_epoch))

コード例 #12

0

ファイルを表示

ファイル: train.py プロジェクト: lsding/FaceRecognition

def train_pnet(model_store_path,
               end_epoch,
               imdb,
               batch_size,
               frequent=50,
               base_lr=0.01,
               use_cuda=True):

    if not os.path.exists(model_store_path):
        os.makedirs(model_store_path)

    lossfn = LossFn()
    net = PNet(is_train=True, use_cuda=use_cuda)
    checkpoint = torch.load('model_store/pnet_epoch_4.pt')
    net.load_state_dict(checkpoint)
    net.train()
    if use_cuda:
        net.cuda()
    optimizer = torch.optim.Adam(net.parameters(), lr=base_lr)

    train_data = TrainImageReader(imdb, 12, batch_size, shuffle=True)

    for cur_epoch in range(1, end_epoch + 1):
        train_data.reset()
        accuracy_list = []
        cls_loss_list = []
        bbox_loss_list = []
        # landmark_loss_list=[]

        for batch_idx, (image, (gt_label, gt_bbox,
                                gt_landmark)) in enumerate(train_data):

            im_tensor = [
                image_tools.convert_image_to_tensor(image[i, :, :, :])
                for i in range(image.shape[0])
            ]
            im_tensor = torch.stack(im_tensor).float()
            im_tensor = Variable(im_tensor)
            gt_label = Variable(torch.from_numpy(gt_label).float())

            gt_bbox = Variable(torch.from_numpy(gt_bbox).float())
            # gt_landmark = Variable(torch.from_numpy(gt_landmark).float())

            if use_cuda:
                im_tensor = im_tensor.cuda()
                gt_label = gt_label.cuda()
                gt_bbox = gt_bbox.cuda()
                # gt_landmark = gt_landmark.cuda()

            cls_pred, box_offset_pred = net(im_tensor)
            # all_loss, cls_loss, offset_loss = lossfn.loss(gt_label=label_y,gt_offset=bbox_y, pred_label=cls_pred, pred_offset=box_offset_pred)

            cls_loss = lossfn.cls_loss(gt_label, cls_pred)
            box_offset_loss = lossfn.box_loss(gt_label, gt_bbox,
                                              box_offset_pred)
            # landmark_loss = lossfn.landmark_loss(gt_label,gt_landmark,landmark_offset_pred)

            all_loss = cls_loss * 1.0 + box_offset_loss * 0.5

            if batch_idx % frequent == 0:
                accuracy = compute_accuracy(cls_pred, gt_label)

                # show1 = accuracy.data.tolist()[0]
                # show2 = cls_loss.data.tolist()[0]
                # show3 = box_offset_loss.data.tolist()[0]
                # show5 = all_loss.data.tolist()[0]
                show1 = accuracy.item()
                show2 = cls_loss.item()
                show3 = box_offset_loss.item()
                show5 = all_loss.item()

                print(
                    "%s : Epoch: %d, Step: %d, accuracy: %.4f, det loss: %.4f, bbox loss: %.4f, all_loss: %.4f, lr:%s "
                    % (datetime.datetime.now(), cur_epoch, batch_idx, show1,
                       show2, show3, show5, base_lr))
                accuracy_list.append(accuracy)
                cls_loss_list.append(cls_loss)
                bbox_loss_list.append(box_offset_loss)

            optimizer.zero_grad()
            all_loss.backward()
            optimizer.step()

        accuracy_avg = torch.mean(torch.tensor(accuracy_list))
        cls_loss_avg = torch.mean(torch.tensor(cls_loss_list))
        bbox_loss_avg = torch.mean(torch.tensor(bbox_loss_list))
        # landmark_loss_avg = torch.mean(torch.cat(landmark_loss_list))

        # show6 = accuracy_avg.data.tolist()[0]
        # show7 = cls_loss_avg.data.tolist()[0]
        # show8 = bbox_loss_avg.data.tolist()[0]
        show6 = accuracy_avg.item()
        show7 = cls_loss_avg.item()
        show8 = bbox_loss_avg.item()

        print("Epoch: %d, accuracy: %s, cls loss: %s, bbox loss: %s" %
              (cur_epoch, show6, show7, show8))
        # state = {'net': net.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': cur_epoch}
        torch.save(
            net.state_dict(),
            os.path.join(model_store_path, "pnet_epoch_%d.pt" % cur_epoch))
        torch.save(
            net,
            os.path.join(model_store_path,
                         "pnet_epoch_model_%d.pkl" % cur_epoch))