コード例 #1
0
    def detect_image(self, image_id, image):
        self.confidence = 0.05
        f = open("./input/detection-results/" + image_id + ".txt", "w")
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[0], self.model_image_size[1])))
        photo = np.array(crop_img, dtype=np.float64)
        # 图片预处理,归一化
        with torch.no_grad():
            photo = Variable(
                torch.from_numpy(
                    np.expand_dims(np.transpose(crop_img - MEANS, (2, 0, 1)),
                                   0)).type(torch.FloatTensor))
            if self.cuda:
                photo = photo.cuda()
            preds = self.net(photo)
        top_conf = []
        top_label = []
        top_bboxes = []
        for i in range(preds.size(1)):
            j = 0
            while preds[0, i, j, 0] >= self.confidence:
                score = preds[0, i, j, 0]
                label_name = self.class_names[i - 1]
                pt = (preds[0, i, j, 1:]).detach().numpy()
                coords = [pt[0], pt[1], pt[2], pt[3]]
                top_conf.append(score)
                top_label.append(label_name)
                top_bboxes.append(coords)
                j = j + 1
        # 将预测结果进行解码
        if len(top_conf) <= 0:
            return image
        top_conf = np.array(top_conf)
        top_label = np.array(top_label)
        top_bboxes = np.array(top_bboxes)
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条
        boxes = ssd_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        for i, c in enumerate(top_label):
            predicted_class = c
            score = str(float(top_conf[i]))

            top, left, bottom, right = boxes[i]
            f.write("%s %s %s %s %s %s\n" %
                    (predicted_class, score[:6], str(int(left)), str(
                        int(top)), str(int(right)), str(int(bottom))))

        f.close()
        return
コード例 #2
0
ファイル: FPS_test.py プロジェクト: zyg11/retinaface-pytorch
    def get_FPS(self, image, test_interval):
        image = np.array(image,np.float32)
        im_height, im_width, _ = np.shape(image)
        scale = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]]
        scale_for_landmarks = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
                                            np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
                                            np.shape(image)[1], np.shape(image)[0]]
        if self.letterbox_image:
            image = np.array(letterbox_image(image,[self.input_shape[1], self.input_shape[0]]), np.float32)
        else:
            self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors()

        with torch.no_grad():
            image = torch.from_numpy(preprocess_input(image).transpose(2, 0, 1)).unsqueeze(0)
            if self.cuda:
                self.anchors = self.anchors.cuda()
                image = image.cuda()
            loc, conf, landms = self.net(image)
            boxes = decode(loc.data.squeeze(0), self.anchors, self.cfg['variance'])
            boxes = boxes.cpu().numpy()

            conf = conf.data.squeeze(0)[:,1:2].cpu().numpy()

            landms = decode_landm(landms.data.squeeze(0), self.anchors, self.cfg['variance'])
            landms = landms.cpu().numpy()

            boxes_conf_landms = np.concatenate([boxes, conf, landms],-1)
            boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence)
            if len(boxes_conf_landms)>0:
                if self.letterbox_image:
                    boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width]))
                    
                boxes_conf_landms[:,:4] = boxes_conf_landms[:,:4]*scale
                boxes_conf_landms[:,5:] = boxes_conf_landms[:,5:]*scale_for_landmarks

        t1 = time.time()
        for _ in range(test_interval):
            with torch.no_grad():
                loc, conf, landms = self.net(image)
                boxes = decode(loc.data.squeeze(0), self.anchors, self.cfg['variance'])
                boxes = boxes.cpu().numpy()

                conf = conf.data.squeeze(0)[:,1:2].cpu().numpy()

                landms = decode_landm(landms.data.squeeze(0), self.anchors, self.cfg['variance'])
                landms = landms.cpu().numpy()

                boxes_conf_landms = np.concatenate([boxes, conf, landms],-1)
                boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence)
                if len(boxes_conf_landms)>0:
                    if self.letterbox_image:
                        boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width]))
                        
                    boxes_conf_landms[:,:4] = boxes_conf_landms[:,:4]*scale
                    boxes_conf_landms[:,5:] = boxes_conf_landms[:,5:]*scale_for_landmarks
        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
コード例 #3
0
ファイル: ssd.py プロジェクト: zelda3721/ssd
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[0], self.model_image_size[1])))
        photo = np.array(crop_img, dtype=np.float64)

        # 图片预处理,归一化
        photo = Variable(
            torch.from_numpy(
                np.expand_dims(np.transpose(crop_img - MEANS, (2, 0, 1)),
                               0)).cuda().type(torch.FloatTensor))
        preds = self.net(photo)

        top_conf = []
        top_label = []
        top_bboxes = []
        for i in range(preds.size(1)):
            j = 0
            while preds[0, i, j, 0] >= self.confidence:
                score = preds[0, i, j, 0]
                label_name = self.class_names[i - 1]
                pt = (preds[0, i, j, 1:]).detach().numpy()
                coords = [pt[0], pt[1], pt[2], pt[3]]
                top_conf.append(score)
                top_label.append(label_name)
                top_bboxes.append(coords)
                j = j + 1
        # 将预测结果进行解码
        if len(top_conf) <= 0:
            return image
        top_conf = np.array(top_conf)
        top_label = np.array(top_label)
        top_bboxes = np.array(top_bboxes)
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        # 去掉灰条
        boxes = ssd_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.model_image_size[0], self.model_image_size[1]]),
            image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = (np.shape(image)[0] +
                     np.shape(image)[1]) // self.model_image_size[0]

        for i, c in enumerate(top_label):
            predicted_class = c
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[self.class_names.index(
                                   predicted_class)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image
コード例 #4
0
    # criterion = MultiBoxLoss(3, 0.5, True, 0, True, 3, 0.5,False, True)
    model = ssd.net.module
    imgPath = '1.bmp'
    image = Image.open(imgPath)
    image.show()
    image_size = image.size
    image = image.convert('RGB')
    # r_image = ssd.detect_image(image)
    # r_image.show()

    # -------------------------------------------- #

    image = Image.open(imgPath)
    image = image.convert('RGB')
    image_shape = np.array(np.shape(image)[0:2])  # 获得图片的尺寸
    crop_img = np.array(letterbox_image(image, (300, 300)))
    photo = np.array(crop_img, dtype=np.float64)  # 类型转为dtype = np.float64

    photo = torch.from_numpy(
        np.expand_dims(np.transpose(crop_img - MEANS, (2, 0, 1)), 0)).type(
            torch.FloatTensor).requires_grad_(True)  # 将颜色通道对应的维度调整到前面
    photo = photo.requires_grad_().cuda()  # .cpu()

    # preds = model(photo)  # torch.Size([1, 3, 200, 5])

    # pass
    guided_bp = Guided_backprop(model)
    result = guided_bp.visualize(photo, None)

    result = normalize(result)
    plt.imshow(result)
コード例 #5
0
    def detect_image(self, image):
        #---------------------------------------------------#
        #   对输入图像进行一个备份,后面用于绘图
        #---------------------------------------------------#
        old_image = image.copy()

        image = np.array(image, np.float32)

        #---------------------------------------------------#
        #   计算scale,用于将获得的预测框转换成原图的高宽
        #---------------------------------------------------#
        scale = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]
        scale_for_landmarks = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]

        im_height, im_width, _ = np.shape(image)
        #---------------------------------------------------------#
        #   letterbox_image可以给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        if self.letterbox_image:
            image = np.array(
                letterbox_image(image,
                                [self.input_shape[1], self.input_shape[0]]),
                np.float32)
        else:
            self.anchors = Anchors(self.cfg,
                                   image_size=(im_height,
                                               im_width)).get_anchors()

        with torch.no_grad():
            #-----------------------------------------------------------#
            #   图片预处理,归一化。
            #-----------------------------------------------------------#
            image = torch.from_numpy(
                preprocess_input(image).transpose(2, 0, 1)).unsqueeze(0)

            if self.cuda:
                self.anchors = self.anchors.cuda()
                image = image.cuda()

            loc, conf = self.net(image)

            #-----------------------------------------------------------#
            #   将预测结果进行解码
            #-----------------------------------------------------------#
            boxes = decode(loc.data.squeeze(0), self.anchors,
                           self.cfg['variance'])
            boxes = boxes.cpu().numpy()

            conf = conf.data.squeeze(0)[:, 1:2].cpu().numpy()

            boxes_conf_landms = np.concatenate([boxes, conf], -1)
            print(boxes_conf_landms.shape)
            # boxes_conf_landms = non_max_suppression(boxes_conf_landms, 0.9)
            mask = boxes_conf_landms[:, 4] >= 0.15
            boxes_conf_landms = boxes_conf_landms[mask]
            print(boxes_conf_landms.shape)
            if len(boxes_conf_landms) <= 0:
                return old_image
            #---------------------------------------------------------#
            #   如果使用了letterbox_image的话,要把灰条的部分去除掉。
            #---------------------------------------------------------#
            if self.letterbox_image:
                boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, \
                    np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width]))

        boxes_conf_landms[:, :4] = boxes_conf_landms[:, :4] * scale

        for b in boxes_conf_landms:
            text = "{:.4f}".format(b[4])
            b = list(map(int, b))

            # b[0]-b[3]为人脸框的坐标,b[4]为得分
            cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255),
                          2)
            cx = b[0]
            cy = b[1] + 12
            cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX,
                        0.5, (255, 255, 255))

            print(b[0], b[1], b[2], b[3], b[4])
            # b[5]-b[14]为人脸关键点的坐标
        return old_image
コード例 #6
0
    def detect_image(self,image_id,image):
        f = open("./input/detection-results/"+image_id+".txt","w") 
        image_shape = np.array(np.shape(image)[0:2])
    
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        #---------------------------------------------------------#
        if self.letterbox_image:
            crop_img = np.array(letterbox_image(image, (self.input_shape[1],self.input_shape[0])))
        else:
            crop_img = image.convert('RGB')
            crop_img = crop_img.resize((self.input_shape[1],self.input_shape[0]), Image.BICUBIC)

        photo = np.array(crop_img,dtype = np.float64)
        with torch.no_grad():
            photo = Variable(torch.from_numpy(np.expand_dims(np.transpose(photo - MEANS, (2,0,1)),0)).type(torch.FloatTensor))
            if self.cuda:
                photo = photo.cuda()
            preds = self.net(photo)

            top_conf = []
            top_label = []
            top_bboxes = []
            for i in range(preds.size(1)):
                j = 0
                while preds[0, i, j, 0] >= self.confidence:
                    score = preds[0, i, j, 0]
                    label_name = self.class_names[i-1]
                    pt = (preds[0, i, j, 1:]).detach().numpy()
                    coords = [pt[0], pt[1], pt[2], pt[3]]
                    top_conf.append(score)
                    top_label.append(label_name)
                    top_bboxes.append(coords)
                    j = j + 1

        if len(top_conf)<=0:
            return 
            
        top_conf = np.array(top_conf)
        top_label = np.array(top_label)
        top_bboxes = np.array(top_bboxes)
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
        #-----------------------------------------------------------#
        #   去掉灰条部分
        #-----------------------------------------------------------#
        if self.letterbox_image:
            boxes = ssd_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.input_shape[0],self.input_shape[1]]),image_shape)
        else:
            top_xmin = top_xmin * image_shape[1]
            top_ymin = top_ymin * image_shape[0]
            top_xmax = top_xmax * image_shape[1]
            top_ymax = top_ymax * image_shape[0]
            boxes = np.concatenate([top_ymin,top_xmin,top_ymax,top_xmax], axis=-1)

        for i, c in enumerate(top_label):
            predicted_class = c
            score = str(float(top_conf[i]))

            top, left, bottom, right = boxes[i]
            f.write("%s %s %s %s %s %s\n" % (predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)),str(int(bottom))))

        f.close()
        return 
コード例 #7
0
ファイル: FPS_test.py プロジェクト: leewi9/ssd-pytorch
    def get_FPS(self, image, test_interval):
        # 调整图片使其符合输入要求
        image_shape = np.array(np.shape(image)[0:2])
        crop_img = np.array(
            letterbox_image(image, (self.input_shape[1], self.input_shape[0])))
        photo = np.array(crop_img, dtype=np.float64)
        # 图片预处理,归一化
        with torch.no_grad():
            photo = Variable(
                torch.from_numpy(
                    np.expand_dims(np.transpose(crop_img - MEANS, (2, 0, 1)),
                                   0)).type(torch.FloatTensor))
            if self.cuda:
                photo = photo.cuda()
            preds = self.net(photo)
            top_conf = []
            top_label = []
            top_bboxes = []
            for i in range(preds.size(1)):
                j = 0
                while preds[0, i, j, 0] >= self.confidence:
                    score = preds[0, i, j, 0]
                    label_name = self.class_names[i - 1]
                    pt = (preds[0, i, j, 1:]).detach().numpy()
                    coords = [pt[0], pt[1], pt[2], pt[3]]
                    top_conf.append(score)
                    top_label.append(label_name)
                    top_bboxes.append(coords)
                    j = j + 1
            # 将预测结果进行解码
            if len(top_conf) > 0:
                top_conf = np.array(top_conf)
                top_label = np.array(top_label)
                top_bboxes = np.array(top_bboxes)
                top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                    top_bboxes[:, 0],
                    -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                        top_bboxes[:, 2],
                        -1), np.expand_dims(top_bboxes[:, 3], -1)
                # 去掉灰条
                boxes = ssd_correct_boxes(
                    top_ymin, top_xmin, top_ymax, top_xmax,
                    np.array([self.input_shape[0], self.input_shape[1]]),
                    image_shape)

        t1 = time.time()
        for _ in range(test_interval):
            with torch.no_grad():
                preds = self.net(photo)
                top_conf = []
                top_label = []
                top_bboxes = []
                for i in range(preds.size(1)):
                    j = 0
                    while preds[0, i, j, 0] >= self.confidence:
                        score = preds[0, i, j, 0]
                        label_name = self.class_names[i - 1]
                        pt = (preds[0, i, j, 1:]).detach().numpy()
                        coords = [pt[0], pt[1], pt[2], pt[3]]
                        top_conf.append(score)
                        top_label.append(label_name)
                        top_bboxes.append(coords)
                        j = j + 1
                # 将预测结果进行解码
                if len(top_conf) > 0:
                    top_conf = np.array(top_conf)
                    top_label = np.array(top_label)
                    top_bboxes = np.array(top_bboxes)
                    top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                        top_bboxes[:, 0], -1), np.expand_dims(
                            top_bboxes[:, 1], -1), np.expand_dims(
                                top_bboxes[:, 2],
                                -1), np.expand_dims(top_bboxes[:, 3], -1)
                    # 去掉灰条
                    boxes = ssd_correct_boxes(
                        top_ymin, top_xmin, top_ymax, top_xmax,
                        np.array([self.input_shape[0], self.input_shape[1]]),
                        image_shape)

        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
コード例 #8
0
ファイル: ssd.py プロジェクト: leewi9/ssd-pytorch
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

        #---------------------------------------------------#
        #   不失真的resize,给图像周围增加灰条
        #---------------------------------------------------#
        crop_img = np.array(
            letterbox_image(image, (self.input_shape[1], self.input_shape[0])))

        with torch.no_grad():
            #---------------------------------------------------#
            #   图片预处理,归一化
            #---------------------------------------------------#
            photo = Variable(
                torch.from_numpy(
                    np.expand_dims(np.transpose(crop_img - MEANS, (2, 0, 1)),
                                   0)).type(torch.FloatTensor))
            if self.cuda:
                photo = photo.cuda()

            #---------------------------------------------------#
            #   传入网络进行预测
            #---------------------------------------------------#
            preds = self.net(photo)

            top_conf = []
            top_label = []
            top_bboxes = []
            #---------------------------------------------------#
            #   preds的shape为 1, num_classes, top_k, 5
            #---------------------------------------------------#
            for i in range(preds.size(1)):
                j = 0
                while preds[0, i, j, 0] >= self.confidence:
                    #---------------------------------------------------#
                    #   score为当前预测框的得分
                    #   label_name为预测框的种类
                    #---------------------------------------------------#
                    score = preds[0, i, j, 0]
                    label_name = self.class_names[i - 1]
                    #---------------------------------------------------#
                    #   pt的shape为4, 当前预测框的左上角右下角
                    #---------------------------------------------------#
                    pt = (preds[0, i, j, 1:]).detach().numpy()
                    coords = [pt[0], pt[1], pt[2], pt[3]]
                    top_conf.append(score)
                    top_label.append(label_name)
                    top_bboxes.append(coords)
                    j = j + 1

        # 如果不存在满足门限的预测框,直接返回原图
        if len(top_conf) <= 0:
            return image

        top_conf = np.array(top_conf)
        top_label = np.array(top_label)
        top_bboxes = np.array(top_bboxes)
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
            top_bboxes[:, 0],
            -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                top_bboxes[:, 2], -1), np.expand_dims(top_bboxes[:, 3], -1)

        #-----------------------------------------------------------#
        #   去掉灰条部分
        #-----------------------------------------------------------#
        boxes = ssd_correct_boxes(
            top_ymin, top_xmin, top_ymax, top_xmax,
            np.array([self.input_shape[0], self.input_shape[1]]), image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max(
            (np.shape(image)[0] + np.shape(image)[1]) // self.input_shape[0],
            1)

        for i, c in enumerate(top_label):
            predicted_class = c
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[self.class_names.index(
                                   predicted_class)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image
コード例 #9
0
    def get_FPS(self, image, test_interval):
        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条,实现不失真的resize
        #   也可以直接resize进行识别
        #---------------------------------------------------------#
        if self.letterbox_image:
            crop_img = np.array(letterbox_image(image, (self.input_shape[1],self.input_shape[0])))
        else:
            crop_img = image.convert('RGB')
            crop_img = crop_img.resize((self.input_shape[1],self.input_shape[0]), Image.BICUBIC)

        photo = np.array(crop_img,dtype = np.float64)
        with torch.no_grad():
            photo = Variable(torch.from_numpy(np.expand_dims(np.transpose(photo-MEANS,(2,0,1)),0)).type(torch.FloatTensor))
            if self.cuda:
                photo = photo.cuda()
            preds = self.net(photo)
            top_conf = []
            top_label = []
            top_bboxes = []
            for i in range(preds.size(1)):
                j = 0
                while preds[0, i, j, 0] >= self.confidence:
                    score = preds[0, i, j, 0]
                    label_name = self.class_names[i-1]
                    pt = (preds[0, i, j, 1:]).detach().numpy()
                    coords = [pt[0], pt[1], pt[2], pt[3]]
                    top_conf.append(score)
                    top_label.append(label_name)
                    top_bboxes.append(coords)
                    j = j + 1
                    
            if len(top_conf)>0:
                top_conf = np.array(top_conf)
                top_label = np.array(top_label)
                top_bboxes = np.array(top_bboxes)
                top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
                #-----------------------------------------------------------#
                #   去掉灰条部分
                #-----------------------------------------------------------#
                if self.letterbox_image:
                    boxes = ssd_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.input_shape[0],self.input_shape[1]]),image_shape)
                else:
                    top_xmin = top_xmin * image_shape[1]
                    top_ymin = top_ymin * image_shape[0]
                    top_xmax = top_xmax * image_shape[1]
                    top_ymax = top_ymax * image_shape[0]
                    boxes = np.concatenate([top_ymin,top_xmin,top_ymax,top_xmax], axis=-1)

        t1 = time.time()
        for _ in range(test_interval):
            with torch.no_grad():
                preds = self.net(photo)
                top_conf = []
                top_label = []
                top_bboxes = []
                for i in range(preds.size(1)):
                    j = 0
                    while preds[0, i, j, 0] >= self.confidence:
                        score = preds[0, i, j, 0]
                        label_name = self.class_names[i-1]
                        pt = (preds[0, i, j, 1:]).detach().numpy()
                        coords = [pt[0], pt[1], pt[2], pt[3]]
                        top_conf.append(score)
                        top_label.append(label_name)
                        top_bboxes.append(coords)
                        j = j + 1
                        
                if len(top_conf)>0:
                    top_conf = np.array(top_conf)
                    top_label = np.array(top_label)
                    top_bboxes = np.array(top_bboxes)
                    top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
                    #-----------------------------------------------------------#
                    #   去掉灰条部分
                    #-----------------------------------------------------------#
                    if self.letterbox_image:
                        boxes = ssd_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.input_shape[0],self.input_shape[1]]),image_shape)
                    else:
                        top_xmin = top_xmin * image_shape[1]
                        top_ymin = top_ymin * image_shape[0]
                        top_xmax = top_xmax * image_shape[1]
                        top_ymax = top_ymax * image_shape[0]
                        boxes = np.concatenate([top_ymin,top_xmin,top_ymax,top_xmax], axis=-1)

        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time
コード例 #10
0
    def detect_image(self, image):
        self.confidence = 0.02
        image = np.array(image, np.float32)

        #---------------------------------------------------#
        #   计算scale,用于将获得的预测框转换成原图的高宽
        #---------------------------------------------------#
        scale = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]
        scale_for_landmarks = [
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0],
            np.shape(image)[1],
            np.shape(image)[0]
        ]

        im_height, im_width, _ = np.shape(image)

        #---------------------------------------------------------#
        #   letterbox_image可以给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        if self.letterbox_image:
            image = np.array(
                letterbox_image(image,
                                [self.input_shape[1], self.input_shape[0]]),
                np.float32)
        else:
            self.anchors = Anchors(self.cfg,
                                   image_size=(im_height,
                                               im_width)).get_anchors()

        with torch.no_grad():
            #-----------------------------------------------------------#
            #   图片预处理,归一化。
            #-----------------------------------------------------------#
            image = torch.from_numpy(
                preprocess_input(image).transpose(2, 0, 1)).unsqueeze(0)

            if self.cuda:
                self.anchors = self.anchors.cuda()
                image = image.cuda()

            loc, conf, landms = self.net(image)

            #-----------------------------------------------------------#
            #   将预测结果进行解码
            #-----------------------------------------------------------#
            boxes = decode(loc.data.squeeze(0), self.anchors,
                           self.cfg['variance'])
            boxes = boxes.cpu().numpy()

            conf = conf.data.squeeze(0)[:, 1:2].cpu().numpy()

            landms = decode_landm(landms.data.squeeze(0), self.anchors,
                                  self.cfg['variance'])
            landms = landms.cpu().numpy()

            boxes_conf_landms = np.concatenate([boxes, conf, landms], -1)
            boxes_conf_landms = non_max_suppression(boxes_conf_landms,
                                                    self.confidence)
            if len(boxes_conf_landms) <= 0:
                return np.array([])
            #---------------------------------------------------------#
            #   如果使用了letterbox_image的话,要把灰条的部分去除掉。
            #---------------------------------------------------------#
            if self.letterbox_image:
                boxes_conf_landms = retinaface_correct_boxes(
                    boxes_conf_landms,
                    np.array([self.input_shape[0], self.input_shape[1]]),
                    np.array([im_height, im_width]))

        boxes_conf_landms[:, :4] = boxes_conf_landms[:, :4] * scale
        boxes_conf_landms[:,
                          5:] = boxes_conf_landms[:, 5:] * scale_for_landmarks

        return boxes_conf_landms
コード例 #11
0
    def run(self):
        #计数
        count = 0
        # print('{} model, anchors, and classes loaded.'.format(self.Config["model_path"]))
        image_shape = np.array(np.shape(self.image)[0:2])

        # letterbox_image边缘加灰条仿失真,改变总体图片大小为300*300
        crop_img = np.array(
            letterbox_image(self.image, (self.Config["model_image_size"][0],
                                         self.Config["model_image_size"][1])))
        # 转化float64
        photo = np.array(crop_img, dtype=np.float64)

        # 图片预处理,归一化
        # photo = Variable(torch.from_numpy(np.expand_dims(np.transpose(crop_img-MEANS,(2,0,1)),0)).type(torch.FloatTensor)) #将通道数从第三维度调整到第一维度 !cudaGPU加速.cuda().type(torch.FloatTensor))
        # crop_img - MEANS所有像素点-平均值 相当于标准化
        photo = torch.from_numpy(
            np.expand_dims(np.transpose(crop_img - MEANS, (2, 0, 1)),
                           0)).type(torch.FloatTensor)
        # preds.shape(1,21,200,5)((这张图片)bacth_size,类别num_class,得分最高200框,参数)
        preds = self.net(photo)

        top_conf = []
        top_label = []
        top_bboxes = []

        # 置信度筛选
        for i in range(preds.size(1)):
            j = 0
            while preds[0, i, j, 0] >= self.Config["confidence"]:
                score = preds[0, i, j, 0]
                label_name = self.class_names[i - 1]
                pt = (preds[0, i, j, 1:]).detach().numpy()
                coords = [pt[0], pt[1], pt[2], pt[3]]
                top_conf.append(score)
                top_label.append(label_name)
                top_bboxes.append(coords)
                j = j + 1

        # 将预测结果进行解码
        if len(top_conf) <= 0:
            self.msg.emit("图中一共有:0 个 目标对象")
            self.set_image.emit("")
            self.msg.emit("预测完成")
        else:
            top_conf = np.array(top_conf)
            top_label = np.array(top_label)
            top_bboxes = np.array(top_bboxes)
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(
                top_bboxes[:, 0],
                -1), np.expand_dims(top_bboxes[:, 1], -1), np.expand_dims(
                    top_bboxes[:, 2],
                    -1), np.expand_dims(top_bboxes[:, 3], -1)

            # 去掉灰条
            boxes = ssd_correct_boxes(
                top_ymin, top_xmin, top_ymax, top_xmax,
                np.array([
                    self.Config["model_image_size"][0],
                    self.Config["model_image_size"][1]
                ]), image_shape)
            # 字体
            font = ImageFont.truetype(
                font='model_data/simhei.ttf',
                size=np.floor(3e-2 * np.shape(self.image)[1] +
                              0.5).astype('int32'))
            # 厚度
            thickness = (np.shape(self.image)[0] + np.shape(
                self.image)[1]) // self.Config["model_image_size"][0]

            for i, c in enumerate(top_label):
                count += 1
                predicted_class = c
                score = top_conf[i]

                top, left, bottom, right = boxes[i]
                top = top - 5
                left = left - 5
                bottom = bottom + 5
                right = right + 5

                top = max(0, np.floor(top + 0.5).astype('int32'))
                left = max(0, np.floor(left + 0.5).astype('int32'))
                bottom = min(
                    np.shape(self.image)[0],
                    np.floor(bottom + 0.5).astype('int32'))
                right = min(
                    np.shape(self.image)[1],
                    np.floor(right + 0.5).astype('int32'))

                # 画框框
                label = '{} {:.2f}-{}'.format(predicted_class, score, count)
                draw = ImageDraw.Draw(self.image)
                label_size = draw.textsize(label, font)
                label = label.encode('utf-8')
                self.msg.emit(str(label.decode()))

                if top - label_size[1] >= 0:
                    text_origin = np.array([left, top - label_size[1]])
                else:
                    text_origin = np.array([left, top + 1])

                for i in range(thickness):
                    draw.rectangle([left + i, top + i, right - i, bottom - i],
                                   outline=self.colors[self.class_names.index(
                                       predicted_class)])
                draw.rectangle(
                    [tuple(text_origin),
                     tuple(text_origin + label_size)],
                    fill=self.colors[self.class_names.index(predicted_class)])
                draw.text(text_origin,
                          str(label, 'UTF-8'),
                          fill=(0, 0, 0),
                          font=font)
                del draw
            self.msg.emit("图中一共有:" + str(count) + " 个 目标对象")
            # 可更改
            self.path = "neural_network/img/predict_output.png"
            self.image.save(self.path)
            self.set_image.emit(self.path)
            self.msg.emit("预测完成")
コード例 #12
0
ファイル: ssd.py プロジェクト: cxqzjhz/pytorch-ssd-cam
    def detect_image(self, image):
        image_shape = np.array(
            np.shape(image)[0:2])  # 获得图片的尺寸array([1330, 1330])
        # 在检测中,需要把原始图像转换为,与目标尺寸匹配的图像,保持等比例,其余部分用灰色填充。
        crop_img = np.array(
            letterbox_image(
                image, (self.model_image_size[0], self.model_image_size[1])))
        # 以下这行代码多余
        photo = np.array(crop_img, dtype=np.float64)  # 类型转为dtype = np.float64
        # 图片预处理,归一化
        with torch.no_grad(
        ):  # 表示不进行求导 即使张量的requires_grad = True,也不求梯度 参考链接: https://blog.csdn.net/weixin_43178406/article/details/89517008?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522159644470619195264563535%2522%252C%2522scm%2522%253A%252220140713.130102334..%2522%257D&request_id=159644470619195264563535&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~first_rank_ecpm_v3~pc_rank_v4-4-89517008.first_rank_ecpm_v3_pc_rank_v4&utm_term=with+torch.no_grad%28%29&spm=1018.2118.3001.4187
            photo = Variable(
                torch.from_numpy(
                    np.expand_dims(np.transpose(crop_img - MEANS, (2, 0, 1)),
                                   0)).type(
                                       torch.FloatTensor))  # 将颜色通道对应的维度调整到前面
            # 标准化 通道维度调整到最前的维度 扩充维度 移动到GPU
            # 【Python】 numpy.expand_dims的用法 参考链接: https://blog.csdn.net/qingzhuyuxian/article/details/90510203?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522159645220719195188351469%2522%252C%2522scm%2522%253A%252220140713.130102334..%2522%257D&request_id=159645220719195188351469&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~top_click~default-2-90510203.first_rank_ecpm_v3_pc_rank_v4&utm_term=np.expand_dims&spm=1018.2118.3001.4187

            if self.cuda:
                photo = photo.cuda()  # torch.Size([1, 3, 300, 300])
            preds = self.net(photo)  # torch.Size([1, 3, 200, 5])  1置信度+4位置信息
        #
        top_conf = []  # 保存单张图片中所有预测框的置信度
        top_label = []  # 保存单张图片中所有预测框的物体类别
        top_bboxes = []  # 保存单张图片中所有预测框的位置信息
        for i in range(
                1, preds.size(1)):  # 循环遍历21个类 i=0,1,2...,20 # 跳过背景所在的类,0代表背景
            # for i in range(1,preds.size(1)): for i in range(preds.size(1)):
            j = 0
            while preds[0, i, j, 0] >= self.confidence:  # 对200个不同置信度的框的筛选
                # while preds[0, i, j, 0] >= self.confidence and j < preds.size(2): # 我的修改 ************************************************************************************************************************************************************************************************************************************************************************************
                score = preds[0, i, j, 0]
                # print("cxqcxq 陈旭旗陈旭旗self.class_names :",self.class_names) # cxqcxq 陈旭旗陈旭旗self.class_names : ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
                label_name = self.class_names[
                    i -
                    1]  # 有疑问 为什么不是 label_name = self.class_names[i]***************************************************************************************************************
                pt = (preds[0, i, j, 1:]).detach().numpy()
                coords = [pt[0], pt[1], pt[2], pt[3]]
                top_conf.append(score)
                top_label.append(label_name)
                top_bboxes.append(coords)
                j = j + 1
        # 将预测结果进行解码
        if len(top_conf) <= 0:  # 表示没有检测到任何东西,即全是背景
            return image
        top_conf = np.array(top_conf)
        top_label = np.array(top_label)
        top_bboxes = np.array(top_bboxes)
        top_xmin, top_ymin, top_xmax, top_ymax = \
            np.expand_dims(top_bboxes[:,0],-1),\
                np.expand_dims(top_bboxes[:,1],-1),\
                    np.expand_dims(top_bboxes[:,2],-1),\
                        np.expand_dims(top_bboxes[:,3],-1)

        # 去掉灰条
        boxes = ssd_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,\
            np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = (np.shape(image)[0] +
                     np.shape(image)[1]) // self.model_image_size[0]
        # 8

        for i, c in enumerate(top_label):
            predicted_class = c
            score = top_conf[i]

            top, left, bottom, right = boxes[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)  # (240, 40)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]
                                        ])  # array([460, 654])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):  # 用于对边框加粗,起始可以使用width参数
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[self.class_names.index(
                                   predicted_class)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[self.class_names.index(predicted_class)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw
        return image
コード例 #13
0
    def detect_image(self, image, picpath, frameNo):
        #---------------------------------------------------#
        #   对输入图像进行一个备份,后面用于绘图
        #---------------------------------------------------#
        old_image = image.copy()

        image = np.array(image,np.float32)

        #---------------------------------------------------#
        #   计算scale,用于将获得的预测框转换成原图的高宽
        #---------------------------------------------------#
        scale = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]]
        scale_for_landmarks = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
                                            np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
                                            np.shape(image)[1], np.shape(image)[0]]

        im_height, im_width, _ = np.shape(image)
        #---------------------------------------------------------#
        #   letterbox_image可以给图像增加灰条,实现不失真的resize
        #---------------------------------------------------------#
        if self.letterbox_image:
            image = np.array(letterbox_image(image, [self.input_shape[1], self.input_shape[0]]), np.float32)
        else:
            self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors()
            
        with torch.no_grad():
            #-----------------------------------------------------------#
            #   图片预处理,归一化。
            #-----------------------------------------------------------#
            image = torch.from_numpy(preprocess_input(image).transpose(2, 0, 1)).unsqueeze(0)

            if self.cuda:
                self.anchors = self.anchors.cuda()
                image = image.cuda()

            loc, conf, landms = self.net(image)
            
            #-----------------------------------------------------------#
            #   将预测结果进行解码
            #-----------------------------------------------------------#
            boxes = decode(loc.data.squeeze(0), self.anchors, self.cfg['variance'])
            boxes = boxes.cpu().numpy()

            conf = conf.data.squeeze(0)[:,1:2].cpu().numpy()
            
            landms = decode_landm(landms.data.squeeze(0), self.anchors, self.cfg['variance'])
            landms = landms.cpu().numpy()

            boxes_conf_landms = np.concatenate([boxes, conf, landms],-1)
            #到这里为止我们已经利用Retinaface_pytorch的预训练模型检测完了人脸,并获得了人脸框和人脸五个特征点的坐标信息,全保存在dets中,接下来为人脸剪切部分
            # 用来储存生成的单张人脸的路径
            path_save = "./curve/faces/" #你可以将这里的路径换成你自己的路径
            '''
        #剪切图片
        #if args.show_cutting_image:
            for num, b in enumerate(boxes_conf_landms): # dets中包含了人脸框和五个特征点的坐标
                #if b[4] < 0.6:
                #    continue
                b = list(map(int, b))

                # landms,在人脸上画出特征点,要是你想保存不显示特征点的人脸图,你可以把这里注释掉
                cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)
                cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)
                cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)
                cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)
                cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)

                #计算人脸框矩形大小
                Height = b[3] - b[1]
                Width = b[2] - b[0]
              
                # 显示人脸矩阵大小
                print("人脸数 / faces in all:", str(num+1), "\n")
                print("窗口大小 / The size of window:"
                      , '\n', "高度 / height:", Height
                      , '\n', "宽度 / width: ", Width)
                
                #根据人脸框大小,生成空白的图片
                img_blank = np.zeros((Height, Width, 3), np.uint8)
                # 将人脸填充到空白图片
                for h in range(Height):
                    for w in range(Width):
                        img_blank[h][w] = old_image[b[1] + h][b[0] + w]
                       
                cv2.namedWindow("img_faces")  # , 2)
                #cv2.imshow("img_faces", img_blank)  #显示图片
                cv2.imwrite(path_save + "img_face_4" + str(num + 1) + ".jpg", img_blank)  #将图片保存至你指定的文件夹
                print("Save into:", path_save + "img_face_4" + str(num + 1) + ".jpg")
                cv2.waitKey(0)

            '''
            boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence)
            if len(boxes_conf_landms)<=0:
                return False, old_image
            #---------------------------------------------------------#
            #   如果使用了letterbox_image的话,要把灰条的部分去除掉。
            #---------------------------------------------------------#
            if self.letterbox_image:
                boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, \
                    np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width]))
            
        boxes_conf_landms[:,:4] = boxes_conf_landms[:,:4]*scale
        boxes_conf_landms[:,5:] = boxes_conf_landms[:,5:]*scale_for_landmarks
        num = 0
        for b in boxes_conf_landms:            
            text = "{:.4f}".format(b[4])
            b = list(map(int, b))

            # b[0]-b[3]为人脸框的坐标,b[4]为得分
            cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
            cx = b[0]
            cy = b[1] + 12
            cv2.putText(old_image, text, (cx, cy),
                        cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

            print(b[0], b[1], b[2], b[3], b[4])

            c = [-50, -50,50, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

            #b += c
            c = np.array(b) + np.array(c)
            if (c > 0).all() :
                    b = c
            else:
                for i, ind in enumerate(b[:4]):
                    if ind < 0:
                        print(i, b[i])
                        b[i] = 0

            print(b[0], b[1], b[2], b[3], b[4])
            #计算人脸框矩形大小
            Height = b[3] - b[1]
            Width = b[2] - b[0]

            print("窗口大小 / The size of window:"
                      , '\n', "高度 / height:", Height
                      , '\n', "宽度 / width: ", Width)

            img_blank = old_image[int(b[1]):int(b[3]), int(b[0]):int(b[2])] # height, width
                    
            savepath = picpath + "/" + str(frameNo) + "_" + str(num)+".jpg"        
            #cv2.namedWindow("img_faces")  # , 2)
            #cv2.imshow("img_faces", img_blank)  #显示图片
            #img_blank = cv2.cvtColor(img_blank,cv2.COLOR_RGB2BGR)
            cv2.imwrite(savepath, img_blank)  #将图片保存至你指定的文件夹
            print("Save into:", savepath)
            #cv2.waitKey(0)          
            '''
            # b[5]-b[14]为人脸关键点的坐标
            cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)
            cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)
            cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)
            cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)
            cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)
            '''
            num += 1
        return True, old_image