Exemple #1
0
    EPOCH_LENGTH = 2000
    NUM_CLASSES = 20
    IMAGE_SHAPE = [600,600,3]
    BACKBONE = "resnet50"
    model = FasterRCNN(NUM_CLASSES,backbone=BACKBONE).cuda()

    #-------------------------------------------#
    #   权值文件的下载请看README
    #-------------------------------------------#
    print('Loading weights into state dict...')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_dict = model.state_dict()
    pretrained_dict = torch.load("model_data/voc_weights_resnet.pth", map_location=device)
    pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v)}
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)
    print('Finished!')

    cudnn.benchmark = True

    # 0.1用于验证,0.9用于训练
    val_split = 0.1
    with open(annotation_path) as f:
        lines = f.readlines()
    np.random.seed(10101)
    np.random.shuffle(lines)
    np.random.seed(None)
    num_val = int(len(lines)*val_split)
    num_train = len(lines) - num_val
    
class FRCNN2(object):
    _defaults = {
        "model_path": 'logs/Epoch49-Total_Loss0.3838-Val_Loss0.4336.pth',
        "classes_path": 'model_data/voc_classes.txt',
        "confidence": 0.6,
        "backbone": "resnet50"
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    # ---------------------------------------------------#
    #   初始化faster RCNN
    # ---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.generate()
        self.mean = torch.Tensor([0, 0, 0,
                                  0]).cuda().repeat(self.num_classes + 1)[None]
        self.std = torch.Tensor([0.1, 0.1, 0.2, 0.2
                                 ]).cuda().repeat(self.num_classes + 1)[None]

    # ---------------------------------------------------#
    #   获得所有的分类
    # ---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    # ---------------------------------------------------#
    #   获得所有的分类
    # ---------------------------------------------------#
    def generate(self):
        # 计算总的种类
        self.num_classes = len(self.class_names)

        # 载入模型,如果原来的模型里已经包括了模型结构则直接载入。
        # 否则先构建模型再载入
        self.model = FasterRCNN(self.num_classes,
                                "predict",
                                backbone=self.backbone).cuda()
        self.model.load_state_dict(torch.load(self.model_path))
        cudnn.benchmark = True

        print('{} model, anchors, and classes loaded.'.format(self.model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    # ---------------------------------------------------#
    #   检测图片
    # ---------------------------------------------------#
    def detect_image(self, image_id, image, savepath):
        image_shape = np.array(np.shape(image)[0:2])
        old_width = image_shape[1]
        old_height = image_shape[0]
        old_image = copy.deepcopy(image)
        width, height = get_new_img_size(old_width, old_height)
        image = image.resize([width, height])
        photo = np.array(image, dtype=np.float32) / 255
        photo = np.transpose(photo, (2, 0, 1))
        with torch.no_grad():
            images = []
            images.append(photo)
            images = np.asarray(images)
            images = torch.from_numpy(images).cuda()

            roi_cls_locs, roi_scores, rois, roi_indices, feature = self.model(
                images)
            decodebox = DecodeBox(self.std, self.mean, self.num_classes)
            outputs = decodebox.forward(roi_cls_locs,
                                        roi_scores,
                                        rois,
                                        feature,
                                        height=height,
                                        width=width,
                                        score_thresh=self.confidence)
            if len(outputs) == 0:
                return old_image
            if np.size(outputs, 0) > 4:
                outputs = outputs[np.argsort(outputs[:, 4])]
                outputs = outputs[-4:, :]
            bbox = outputs[:, :4]
            conf = outputs[:, 4]
            label = outputs[:, 5]
            f = outputs[:, 6:]
            f_size = np.size(f, 0)
            if f_size < 4:
                k = 4 - f_size
                cc = 0
                for lab in label:
                    if (lab == 1) or (lab == 3) or (lab == 5):
                        conbin_f = f[cc, :]
                        conbin_f = conbin_f.reshape(1, 2048)
                        for num in range(0, k):
                            f = np.append(f, conbin_f, axis=0)
                        break
                    cc = cc + 1
            bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width
            bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height
            bbox = np.array(bbox, np.int32)
            f = np.array(f, np.float32).reshape((4, 2048))
            print(np.size(f))
            # image = old_image
            # thickness = (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2
            # font = ImageFont.truetype(font='model_data/simhei.ttf',
            #                           size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))
            #
            # for i, c in enumerate(label):
            #     predicted_class = self.class_names[int(c)]
            #     score = conf[i]
            #
            #     left, top, right, bottom = bbox[i]
            #     top = top - 5
            #     left = left - 5
            #     bottom = bottom + 5
            #     right = right + 5
            #
            #     top = max(0, np.floor(top + 0.5).astype('int32'))
            #     left = max(0, np.floor(left + 0.5).astype('int32'))
            #     bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32'))
            #     right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32'))
            #
            #     # 画框框
            #     label = '{} {:.2f}'.format(predicted_class, score)
            #     draw = ImageDraw.Draw(image)
            #     label_size = draw.textsize(label, font)
            #     label = label.encode('utf-8')
            #     print(label)
            #
            #     if top - label_size[1] >= 0:
            #         text_origin = np.array([left, top - label_size[1]])
            #     else:
            #         text_origin = np.array([left, top + 1])
            #
            #     for i in range(thickness):
            #         draw.rectangle(
            #             [left + i, top + i, right - i, bottom - i],
            #             outline=self.colors[int(c)])
            #     draw.rectangle(
            #         [tuple(text_origin), tuple(text_origin + label_size)],
            #         fill=self.colors[int(c)])
            #     draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font)
            #     del draw
            #
            # image.save(savepath)
        if np.size(f, 0) != 4:
            print(
                "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
            )
        return {
            'image_id': image_id,
            'image_h': height,
            'image_w': width,
            'num_boxes': np.size(bbox, 0),
            'boxes': base64.b64encode(bbox),
            'features': base64.b64encode(f)
        }
Exemple #3
0
class FRCNN(object):
    _defaults = {
        "model_path": 'model_data/voc_weights_resnet.pth',
        "classes_path": 'model_data/voc_classes.txt',
        "confidence": 0.5,
        "backbone": "resnet50"
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化faster RCNN
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.generate()
        self.mean = torch.Tensor([0, 0, 0,
                                  0]).cuda().repeat(self.num_classes + 1)[None]
        self.std = torch.Tensor([0.1, 0.1, 0.2, 0.2
                                 ]).cuda().repeat(self.num_classes + 1)[None]

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def generate(self):
        # 计算总的种类
        self.num_classes = len(self.class_names)

        # 载入模型,如果原来的模型里已经包括了模型结构则直接载入。
        # 否则先构建模型再载入
        self.model = FasterRCNN(self.num_classes,
                                "predict",
                                backbone=self.backbone).cuda()
        self.model.load_state_dict(torch.load(self.model_path))
        cudnn.benchmark = True

        print('{} model, anchors, and classes loaded.'.format(self.model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        start_time = time.time()
        image_shape = np.array(np.shape(image)[0:2])
        old_width = image_shape[1]
        old_height = image_shape[0]
        old_image = copy.deepcopy(image)
        width, height = get_new_img_size(old_width, old_height)
        image = image.resize([width, height])
        photo = np.array(image, dtype=np.float32) / 255
        photo = np.transpose(photo, (2, 0, 1))

        images = []
        images.append(photo)
        images = np.asarray(images)
        images = torch.from_numpy(images).cuda()

        roi_cls_locs, roi_scores, rois, roi_indices = self.model(images)
        decodebox = DecodeBox(self.std, self.mean, self.num_classes)
        outputs = decodebox.forward(roi_cls_locs,
                                    roi_scores,
                                    rois,
                                    height=height,
                                    width=width,
                                    score_thresh=self.confidence)
        if len(outputs) == 0:
            return old_image
        bbox = outputs[:, :4]
        conf = outputs[:, 4]
        label = outputs[:, 5]

        bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width
        bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height
        bbox = np.array(bbox, np.int32)
        image = old_image
        thickness = (np.shape(old_image)[0] +
                     np.shape(old_image)[1]) // old_width * 2
        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        for i, c in enumerate(label):
            predicted_class = self.class_names[int(c)]
            score = conf[i]

            left, top, right, bottom = bbox[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw

        print("time:", time.time() - start_time)
        return image
Exemple #4
0
class FRCNN(object):
    _defaults = {
        "model_path": 'model_data/voc_weights_resnet.pth',
        "classes_path": 'model_data/voc_classes.txt',
        "confidence": 0.5,
        "iou": 0.3,
        "backbone": "resnet50",
        "cuda": True,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #初始化程序

    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.generate()
        self.mean = torch.Tensor([0, 0, 0,
                                  0]).repeat(self.num_classes + 1)[None]
        self.std = torch.Tensor([0.1, 0.1, 0.2,
                                 0.2]).repeat(self.num_classes + 1)[None]
        if self.cuda:
            self.mean = self.mean.cuda()
            self.std = self.std.cuda()

    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    def generate(self):
        self.num_classes = len(self.class_names)

        #载入模型
        self.model = FasterRCNN(self.num_classes,
                                "predict",
                                backbone=self.backbone)
        print('Loading weights into state dict...')
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        state_dict = torch.load(self.model_path, map_location=device)
        self.model.load_state_dict(state_dict)

        self.model = self.model.eval()

        if self.cuda:
            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            self.model = nn.DataParallel(self.model)
            self.model = self.model.cuda()

        print('{} model, anchors, and classes loaded.'.format(self.model_path))

        #目标定位并画框
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #检测图片

    def detect_image(self, image):
        with torch.no_grad():
            start_time = time.time()
            image_shape = np.array(np.shape(image)[0:2])
            old_width = image_shape[1]
            old_height = image_shape[0]
            old_image = copy.deepcopy(image)
            width, height = get_new_img_size(old_width, old_height)

            image = image.resize([width, height], Image.BICUBIC)
            photo = np.array(image, dtype=np.float32) / 255
            photo = np.transpose(photo, (2, 0, 1))

            images = []
            images.append(photo)
            images = np.asarray(images)
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()

    #非凸性优化,边界框矫正
            roi_cls_locs, roi_scores, rois, roi_indices = self.model(images)
            decodebox = DecodeBox(self.std, self.mean, self.num_classes)
            outputs = decodebox.forward(roi_cls_locs,
                                        roi_scores,
                                        rois,
                                        height=height,
                                        width=width,
                                        nms_iou=self.iou,
                                        score_thresh=self.confidence)
            if len(outputs) == 0:
                return old_image
            bbox = outputs[:, :4]
            conf = outputs[:, 4]
            label = outputs[:, 5]

            bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width
            bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height
            bbox = np.array(bbox, np.int32)

        image = old_image
        thickness = (np.shape(old_image)[0] +
                     np.shape(old_image)[1]) // old_width * 2
        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        for i, c in enumerate(label):
            predicted_class = self.class_names[int(c)]
            score = conf[i]

            left, top, right, bottom = bbox[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            #图片输出
            label = '{}'.format(predicted_class)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw

        print("time:", time.time() - start_time)
        return image
Exemple #5
0
class FRCNN(object):
    _defaults = {
        "model_path": 'model_data/voc_weights_resnet.pth',
        "classes_path": 'model_data/voc_classes.txt',
        "confidence": 0.8,
        "iou": 0.3,
        "backbone": "vgg16",
        "cuda": True,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化faster RCNN
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.generate()

        self.mean = torch.Tensor([0, 0, 0,
                                  0]).repeat(self.num_classes + 1)[None]
        self.std = torch.Tensor([0.1, 0.1, 0.2,
                                 0.2]).repeat(self.num_classes + 1)[None]
        if self.cuda:
            self.mean = self.mean.cuda()
            self.std = self.std.cuda()

        self.decodebox = DecodeBox(self.std, self.mean, self.num_classes)

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   载入模型
    #---------------------------------------------------#
    def generate(self):
        #-------------------------------#
        #   计算总的类的数量
        #-------------------------------#
        self.num_classes = len(self.class_names)

        #-------------------------------#
        #   载入模型与权值
        #-------------------------------#
        self.model = FasterRCNN(self.num_classes,
                                "predict",
                                backbone=self.backbone).eval()
        print('Loading weights into state dict...')
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        state_dict = torch.load(self.model_path, map_location=device)
        self.model.load_state_dict(state_dict)

        if self.cuda:
            os.environ["CUDA_VISIBLE_DEVICES"] = "0"
            # self.model = nn.DataParallel(self.model)
            self.model = self.model.cuda()

        print('{} model, anchors, and classes loaded.'.format(self.model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        old_width, old_height = image_shape[1], image_shape[0]
        old_image = copy.deepcopy(image)

        #---------------------------------------------------------#
        #   给原图像进行resize,resize到短边为600的大小上
        #---------------------------------------------------------#
        width, height = get_new_img_size(old_width, old_height)
        image = image.resize([width, height], Image.BICUBIC)

        #-----------------------------------------------------------#
        #   图片预处理,归一化。
        #-----------------------------------------------------------#
        photo = np.transpose(
            np.array(image, dtype=np.float32) / 255, (2, 0, 1))

        with torch.no_grad():
            images = torch.from_numpy(np.asarray([photo]))
            if self.cuda:
                images = images.cuda()

            roi_cls_locs, roi_scores, rois, _ = self.model(images)
            #-------------------------------------------------------------#
            #   利用classifier的预测结果对建议框进行解码,获得预测框
            #-------------------------------------------------------------#
            outputs = self.decodebox.forward(roi_cls_locs[0],
                                             roi_scores[0],
                                             rois,
                                             height=height,
                                             width=width,
                                             nms_iou=self.iou,
                                             score_thresh=self.confidence)
            #---------------------------------------------------------#
            #   如果没有检测出物体,返回原图
            #---------------------------------------------------------#
            if len(outputs) == 0:
                return old_image
            outputs = np.array(outputs)
            bbox = outputs[:, :4]
            label = outputs[:, 4]
            conf = outputs[:, 5]

            bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width
            bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max(
            (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2,
            1)

        image = old_image
        for i, c in enumerate(label):
            predicted_class = self.class_names[int(c)]
            score = conf[i]

            left, top, right, bottom = bbox[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # draw bbox
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw

        return image