Esempi in Python per FasterRCNN, esempi in Python per nets.frcnn.FasterRCNN

Esempio n. 1

0

Mostra file

File: frcnn.py Progetto: innvoker/MIL

    def generate(self):
        self.num_classes = len(self.class_names)

        #载入模型
        self.model = FasterRCNN(self.num_classes,
                                "predict",
                                backbone=self.backbone)
        print('Loading weights into state dict...')
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        state_dict = torch.load(self.model_path, map_location=device)
        self.model.load_state_dict(state_dict)

        self.model = self.model.eval()

        if self.cuda:
            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            self.model = nn.DataParallel(self.model)
            self.model = self.model.cuda()

        print('{} model, anchors, and classes loaded.'.format(self.model_path))

        #目标定位并画框
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

Esempio n. 2

0

Mostra file

File: frcnn.py Progetto: yin159/faster-rcnn-pytorch

    def generate(self):
        #-------------------------------#
        #   计算总的类的数量
        #-------------------------------#
        self.num_classes = len(self.class_names)

        #-------------------------------#
        #   载入模型与权值
        #-------------------------------#
        self.model = FasterRCNN(self.num_classes,
                                "predict",
                                backbone=self.backbone).eval()
        print('Loading weights into state dict...')
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        state_dict = torch.load(self.model_path, map_location=device)
        self.model.load_state_dict(state_dict)

        if self.cuda:
            # self.model = nn.DataParallel(self.model)
            self.model = self.model.cuda()

        print('{} model, anchors, and classes loaded.'.format(self.model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

Esempio n. 3

0

Mostra file

File: frcnn.py Progetto: CrysR1337/Junior-3-second-half

    def generate(self):
        # 计算总的种类
        self.num_classes = len(self.class_names)+1
        # print(self.num_classes)
        # 载入模型，如果原来的模型里已经包括了模型结构则直接载入。
        # 否则先构建模型再载入
        self.model = FasterRCNN(self.num_classes,"predict",backbone=self.backbone).cuda()
        self.model.load_state_dict(torch.load(self.model_path))
        cudnn.benchmark = True
                
        print('{} model, anchors, and classes loaded.'.format(self.model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

Esempio n. 4

0

Mostra file

            val_toal_loss += val_total
    print('Finish Validation')
    print('\nEpoch:'+ str(epoch+1) + '/' + str(Epoch))
    print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss/(epoch_size+1),val_toal_loss/(epoch_size_val+1)))

    print('Saving state, iter:', str(epoch+1))
    torch.save(model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.pth'%((epoch+1),total_loss/(epoch_size+1),val_toal_loss/(epoch_size_val+1)))

if __name__ == "__main__":
    # 参数初始化
    annotation_path = '2007_train.txt'
    EPOCH_LENGTH = 2000
    NUM_CLASSES = 20
    IMAGE_SHAPE = [600,600,3]
    BACKBONE = "resnet50"
    model = FasterRCNN(NUM_CLASSES,backbone=BACKBONE).cuda()

    #-------------------------------------------#
    #   权值文件的下载请看README
    #-------------------------------------------#
    print('Loading weights into state dict...')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_dict = model.state_dict()
    pretrained_dict = torch.load("model_data/voc_weights_resnet.pth", map_location=device)
    pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v)}
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)
    print('Finished!')

    cudnn.benchmark = True

Esempio n. 5

0

Mostra file

File: frcnn2.py Progetto: Tanchaoqun123/Fritillariae-Cirrhosae-Bulbus

class FRCNN2(object):
    _defaults = {
        "model_path": 'logs/Epoch49-Total_Loss0.3838-Val_Loss0.4336.pth',
        "classes_path": 'model_data/voc_classes.txt',
        "confidence": 0.6,
        "backbone": "resnet50"
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    # ---------------------------------------------------#
    #   初始化faster RCNN
    # ---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.generate()
        self.mean = torch.Tensor([0, 0, 0,
                                  0]).cuda().repeat(self.num_classes + 1)[None]
        self.std = torch.Tensor([0.1, 0.1, 0.2, 0.2
                                 ]).cuda().repeat(self.num_classes + 1)[None]

    # ---------------------------------------------------#
    #   获得所有的分类
    # ---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    # ---------------------------------------------------#
    #   获得所有的分类
    # ---------------------------------------------------#
    def generate(self):
        # 计算总的种类
        self.num_classes = len(self.class_names)

        # 载入模型，如果原来的模型里已经包括了模型结构则直接载入。
        # 否则先构建模型再载入
        self.model = FasterRCNN(self.num_classes,
                                "predict",
                                backbone=self.backbone).cuda()
        self.model.load_state_dict(torch.load(self.model_path))
        cudnn.benchmark = True

        print('{} model, anchors, and classes loaded.'.format(self.model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    # ---------------------------------------------------#
    #   检测图片
    # ---------------------------------------------------#
    def detect_image(self, image_id, image, savepath):
        image_shape = np.array(np.shape(image)[0:2])
        old_width = image_shape[1]
        old_height = image_shape[0]
        old_image = copy.deepcopy(image)
        width, height = get_new_img_size(old_width, old_height)
        image = image.resize([width, height])
        photo = np.array(image, dtype=np.float32) / 255
        photo = np.transpose(photo, (2, 0, 1))
        with torch.no_grad():
            images = []
            images.append(photo)
            images = np.asarray(images)
            images = torch.from_numpy(images).cuda()

            roi_cls_locs, roi_scores, rois, roi_indices, feature = self.model(
                images)
            decodebox = DecodeBox(self.std, self.mean, self.num_classes)
            outputs = decodebox.forward(roi_cls_locs,
                                        roi_scores,
                                        rois,
                                        feature,
                                        height=height,
                                        width=width,
                                        score_thresh=self.confidence)
            if len(outputs) == 0:
                return old_image
            if np.size(outputs, 0) > 4:
                outputs = outputs[np.argsort(outputs[:, 4])]
                outputs = outputs[-4:, :]
            bbox = outputs[:, :4]
            conf = outputs[:, 4]
            label = outputs[:, 5]
            f = outputs[:, 6:]
            f_size = np.size(f, 0)
            if f_size < 4:
                k = 4 - f_size
                cc = 0
                for lab in label:
                    if (lab == 1) or (lab == 3) or (lab == 5):
                        conbin_f = f[cc, :]
                        conbin_f = conbin_f.reshape(1, 2048)
                        for num in range(0, k):
                            f = np.append(f, conbin_f, axis=0)
                        break
                    cc = cc + 1
            bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width
            bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height
            bbox = np.array(bbox, np.int32)
            f = np.array(f, np.float32).reshape((4, 2048))
            print(np.size(f))
            # image = old_image
            # thickness = (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2
            # font = ImageFont.truetype(font='model_data/simhei.ttf',
            #                           size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))
            #
            # for i, c in enumerate(label):
            #     predicted_class = self.class_names[int(c)]
            #     score = conf[i]
            #
            #     left, top, right, bottom = bbox[i]
            #     top = top - 5
            #     left = left - 5
            #     bottom = bottom + 5
            #     right = right + 5
            #
            #     top = max(0, np.floor(top + 0.5).astype('int32'))
            #     left = max(0, np.floor(left + 0.5).astype('int32'))
            #     bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32'))
            #     right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32'))
            #
            #     # 画框框
            #     label = '{} {:.2f}'.format(predicted_class, score)
            #     draw = ImageDraw.Draw(image)
            #     label_size = draw.textsize(label, font)
            #     label = label.encode('utf-8')
            #     print(label)
            #
            #     if top - label_size[1] >= 0:
            #         text_origin = np.array([left, top - label_size[1]])
            #     else:
            #         text_origin = np.array([left, top + 1])
            #
            #     for i in range(thickness):
            #         draw.rectangle(
            #             [left + i, top + i, right - i, bottom - i],
            #             outline=self.colors[int(c)])
            #     draw.rectangle(
            #         [tuple(text_origin), tuple(text_origin + label_size)],
            #         fill=self.colors[int(c)])
            #     draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font)
            #     del draw
            #
            # image.save(savepath)
        if np.size(f, 0) != 4:
            print(
                "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
            )
        return {
            'image_id': image_id,
            'image_h': height,
            'image_w': width,
            'num_boxes': np.size(bbox, 0),
            'boxes': base64.b64encode(bbox),
            'features': base64.b64encode(f)
        }

Esempio n. 6

0

Mostra file

    #----------------------------------------------------#
    #   训练之前一定要修改NUM_CLASSES
    #   修改成所需要区分的类的个数。
    #----------------------------------------------------#
    NUM_CLASSES = 20
    #-------------------------------------------------------------------------------------#
    #   input_shape是输入图片的大小，默认为800,800,3，随着输入图片的增大，占用显存会增大
    #   视频上为600,600,3，实际测试中发现800,800,3效果更好
    #-------------------------------------------------------------------------------------#
    input_shape = [800,800,3]
    #----------------------------------------------------#
    #   使用到的主干特征提取网络
    #   vgg或者resnet50
    #----------------------------------------------------#
    backbone = "resnet50"
    model = FasterRCNN(NUM_CLASSES,backbone=backbone)
    weights_init(model)

    # #------------------------------------------------------#
    #   权值文件请看README，百度网盘下载
    #------------------------------------------------------#
    model_path = 'model_data/voc_weights_resnet.pth'
    print('Loading weights into state dict...')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_dict = model.state_dict()
    pretrained_dict = torch.load(model_path, map_location=device)
    pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v)}
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)
    print('Finished!')

Esempio n. 7

0

Mostra file

File: frcnn.py Progetto: innvoker/MIL

class FRCNN(object):
    _defaults = {
        "model_path": 'model_data/voc_weights_resnet.pth',
        "classes_path": 'model_data/voc_classes.txt',
        "confidence": 0.5,
        "iou": 0.3,
        "backbone": "resnet50",
        "cuda": True,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #初始化程序

    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.generate()
        self.mean = torch.Tensor([0, 0, 0,
                                  0]).repeat(self.num_classes + 1)[None]
        self.std = torch.Tensor([0.1, 0.1, 0.2,
                                 0.2]).repeat(self.num_classes + 1)[None]
        if self.cuda:
            self.mean = self.mean.cuda()
            self.std = self.std.cuda()

    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    def generate(self):
        self.num_classes = len(self.class_names)

        #载入模型
        self.model = FasterRCNN(self.num_classes,
                                "predict",
                                backbone=self.backbone)
        print('Loading weights into state dict...')
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        state_dict = torch.load(self.model_path, map_location=device)
        self.model.load_state_dict(state_dict)

        self.model = self.model.eval()

        if self.cuda:
            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            self.model = nn.DataParallel(self.model)
            self.model = self.model.cuda()

        print('{} model, anchors, and classes loaded.'.format(self.model_path))

        #目标定位并画框
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #检测图片

    def detect_image(self, image):
        with torch.no_grad():
            start_time = time.time()
            image_shape = np.array(np.shape(image)[0:2])
            old_width = image_shape[1]
            old_height = image_shape[0]
            old_image = copy.deepcopy(image)
            width, height = get_new_img_size(old_width, old_height)

            image = image.resize([width, height], Image.BICUBIC)
            photo = np.array(image, dtype=np.float32) / 255
            photo = np.transpose(photo, (2, 0, 1))

            images = []
            images.append(photo)
            images = np.asarray(images)
            images = torch.from_numpy(images)
            if self.cuda:
                images = images.cuda()

    #非凸性优化，边界框矫正
            roi_cls_locs, roi_scores, rois, roi_indices = self.model(images)
            decodebox = DecodeBox(self.std, self.mean, self.num_classes)
            outputs = decodebox.forward(roi_cls_locs,
                                        roi_scores,
                                        rois,
                                        height=height,
                                        width=width,
                                        nms_iou=self.iou,
                                        score_thresh=self.confidence)
            if len(outputs) == 0:
                return old_image
            bbox = outputs[:, :4]
            conf = outputs[:, 4]
            label = outputs[:, 5]

            bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width
            bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height
            bbox = np.array(bbox, np.int32)

        image = old_image
        thickness = (np.shape(old_image)[0] +
                     np.shape(old_image)[1]) // old_width * 2
        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        for i, c in enumerate(label):
            predicted_class = self.class_names[int(c)]
            score = conf[i]

            left, top, right, bottom = bbox[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            #图片输出
            label = '{}'.format(predicted_class)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw

        print("time:", time.time() - start_time)
        return image

Esempio n. 8

0

Mostra file

File: train.py Progetto: zqyang111/faster-rcnn-pytorch

'''
一些建议的参数设置：
VGG：SGD优化器，冻结时学习率1e-3，解冻时学习率1e-4
    nets.rpn中ProposalCreator的n_train_post_nms=2000；
    utils.utils中ProposalTargetCreator的pos_ratio=0.25；
RESNET50：Adam优化器，冻结时学习率1e-4，解冻时学习率1e-5
    nets.rpn中ProposalCreator的n_train_post_nms=300；
    utils.utils中ProposalTargetCreator的pos_ratio=0.5;
'''
if __name__ == "__main__":
    # 参数初始化
    annotation_path = '2007_train.txt'
    NUM_CLASSES = 20
    IMAGE_SHAPE = [600, 600, 3]
    BACKBONE = "resnet50"
    model = FasterRCNN(NUM_CLASSES, backbone=BACKBONE).cuda()
    #-------------------------------#
    #   Dataloder的使用
    #-------------------------------#
    Use_Data_Loader = True

    model_path = r'model_data/voc_weights_resnet.pth'
    print('Loading weights into state dict...')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_dict = model.state_dict()
    pretrained_dict = torch.load(model_path, map_location=device)
    pretrained_dict = {
        k: v
        for k, v in pretrained_dict.items()
        if np.shape(model_dict[k]) == np.shape(v)
    }

Esempio n. 9

0

Mostra file

from nets.frcnn import FasterRCNN
from torchsummary import summary
from PIL import Image
import numpy as np
from utils.utils import loc2bbox
import torch
from thop import profile
from thop import clever_format
import torch
model = FasterRCNN(20, backbone="resnet50").cuda()
# model.load_state_dict(torch.load("logs/Epoch7-Total_Loss0.9575.pth"))
a = np.array(Image.open("img/street.jpg").resize([600, 600]))
a = np.transpose(a, [2, 0, 1])
a = torch.Tensor(
    np.concatenate(
        [np.expand_dims(a, 0), np.expand_dims(a, 0)], axis=0)).cuda()
roi_cls_locs, roi_scores, rois, roi_indices = model(a)
rois = torch.Tensor(rois)
# mean = torch.Tensor([0,0,0,0]).cuda(). \
#     repeat(21)[None]
# std = torch.Tensor([0.1, 0.1, 0.2, 0.2]).cuda(). \
#     repeat(21)[None]

# roi_cls_loc = (roi_cls_locs * std + mean)
# roi_cls_loc = roi_cls_loc.view([-1, 21, 4])
# roi = rois.view((-1, 1, 4)).expand_as(roi_cls_loc)
# cls_bbox = loc2bbox((roi.cpu().detach().numpy()).reshape((-1, 4)),
#                     (roi_cls_loc.cpu().detach().numpy()).reshape((-1, 4)))
# cls_bbox = torch.Tensor(cls_bbox)
# cls_bbox = cls_bbox.view([-1, 21 * 4])
# print(cls_bbox)

Esempio n. 10

0

Mostra file

class FRCNN(object):
    _defaults = {
        "model_path": 'model_data/voc_weights_resnet.pth',
        "classes_path": 'model_data/voc_classes.txt',
        "confidence": 0.5,
        "backbone": "resnet50"
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化faster RCNN
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.generate()
        self.mean = torch.Tensor([0, 0, 0,
                                  0]).cuda().repeat(self.num_classes + 1)[None]
        self.std = torch.Tensor([0.1, 0.1, 0.2, 0.2
                                 ]).cuda().repeat(self.num_classes + 1)[None]

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def generate(self):
        # 计算总的种类
        self.num_classes = len(self.class_names)

        # 载入模型，如果原来的模型里已经包括了模型结构则直接载入。
        # 否则先构建模型再载入
        self.model = FasterRCNN(self.num_classes,
                                "predict",
                                backbone=self.backbone).cuda()
        self.model.load_state_dict(torch.load(self.model_path))
        cudnn.benchmark = True

        print('{} model, anchors, and classes loaded.'.format(self.model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        start_time = time.time()
        image_shape = np.array(np.shape(image)[0:2])
        old_width = image_shape[1]
        old_height = image_shape[0]
        old_image = copy.deepcopy(image)
        width, height = get_new_img_size(old_width, old_height)
        image = image.resize([width, height])
        photo = np.array(image, dtype=np.float32) / 255
        photo = np.transpose(photo, (2, 0, 1))

        images = []
        images.append(photo)
        images = np.asarray(images)
        images = torch.from_numpy(images).cuda()

        roi_cls_locs, roi_scores, rois, roi_indices = self.model(images)
        decodebox = DecodeBox(self.std, self.mean, self.num_classes)
        outputs = decodebox.forward(roi_cls_locs,
                                    roi_scores,
                                    rois,
                                    height=height,
                                    width=width,
                                    score_thresh=self.confidence)
        if len(outputs) == 0:
            return old_image
        bbox = outputs[:, :4]
        conf = outputs[:, 4]
        label = outputs[:, 5]

        bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width
        bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height
        bbox = np.array(bbox, np.int32)
        image = old_image
        thickness = (np.shape(old_image)[0] +
                     np.shape(old_image)[1]) // old_width * 2
        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        for i, c in enumerate(label):
            predicted_class = self.class_names[int(c)]
            score = conf[i]

            left, top, right, bottom = bbox[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # 画框框
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw

        print("time:", time.time() - start_time)
        return image

Esempio n. 11

0

Mostra file

    #----------------------------------------------------#
    #   训练之前一定要修改NUM_CLASSES
    #   修改成所需要区分的类的个数。
    #----------------------------------------------------#
    NUM_CLASSES = 20
    #-------------------------------------------------------------------------------------#
    #   input_shape是输入图片的大小，默认为800,800,3，随着输入图片的增大，占用显存会增大
    #   视频上为600,600,3，实际测试中发现800,800,3效果更好
    #-------------------------------------------------------------------------------------#
    input_shape = [800,800,3]
    #----------------------------------------------------#
    #   使用到的主干特征提取网络
    #   vgg或者resnet50
    #----------------------------------------------------#
    backbone = "resnet50"
    model = FasterRCNN(NUM_CLASSES,backbone=backbone)

    # #------------------------------------------------------#
    #   权值文件请看README，百度网盘下载
    #------------------------------------------------------#
    # model_path = 'model_data/voc_weights_resnet.pth'
    # print('Loading weights into state dict...')
    # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    # model_dict = model.state_dict()
    # pretrained_dict = torch.load(model_path, map_location=device)
    # pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v)}
    # model_dict.update(pretrained_dict)
    # model.load_state_dict(model_dict)
    print('Finished!')

    net = model.train()

Esempio n. 12

0

Mostra file

class FRCNN(object):
    _defaults = {
        "model_path": 'model_data/voc_weights_resnet.pth',
        "classes_path": 'model_data/voc_classes.txt',
        "confidence": 0.8,
        "iou": 0.3,
        "backbone": "vgg16",
        "cuda": True,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    #---------------------------------------------------#
    #   初始化faster RCNN
    #---------------------------------------------------#
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
        self.generate()

        self.mean = torch.Tensor([0, 0, 0,
                                  0]).repeat(self.num_classes + 1)[None]
        self.std = torch.Tensor([0.1, 0.1, 0.2,
                                 0.2]).repeat(self.num_classes + 1)[None]
        if self.cuda:
            self.mean = self.mean.cuda()
            self.std = self.std.cuda()

        self.decodebox = DecodeBox(self.std, self.mean, self.num_classes)

    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    #---------------------------------------------------#
    #   载入模型
    #---------------------------------------------------#
    def generate(self):
        #-------------------------------#
        #   计算总的类的数量
        #-------------------------------#
        self.num_classes = len(self.class_names)

        #-------------------------------#
        #   载入模型与权值
        #-------------------------------#
        self.model = FasterRCNN(self.num_classes,
                                "predict",
                                backbone=self.backbone).eval()
        print('Loading weights into state dict...')
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        state_dict = torch.load(self.model_path, map_location=device)
        self.model.load_state_dict(state_dict)

        if self.cuda:
            os.environ["CUDA_VISIBLE_DEVICES"] = "0"
            # self.model = nn.DataParallel(self.model)
            self.model = self.model.cuda()

        print('{} model, anchors, and classes loaded.'.format(self.model_path))

        # 画框设置不同的颜色
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])
        old_width, old_height = image_shape[1], image_shape[0]
        old_image = copy.deepcopy(image)

        #---------------------------------------------------------#
        #   给原图像进行resize，resize到短边为600的大小上
        #---------------------------------------------------------#
        width, height = get_new_img_size(old_width, old_height)
        image = image.resize([width, height], Image.BICUBIC)

        #-----------------------------------------------------------#
        #   图片预处理，归一化。
        #-----------------------------------------------------------#
        photo = np.transpose(
            np.array(image, dtype=np.float32) / 255, (2, 0, 1))

        with torch.no_grad():
            images = torch.from_numpy(np.asarray([photo]))
            if self.cuda:
                images = images.cuda()

            roi_cls_locs, roi_scores, rois, _ = self.model(images)
            #-------------------------------------------------------------#
            #   利用classifier的预测结果对建议框进行解码，获得预测框
            #-------------------------------------------------------------#
            outputs = self.decodebox.forward(roi_cls_locs[0],
                                             roi_scores[0],
                                             rois,
                                             height=height,
                                             width=width,
                                             nms_iou=self.iou,
                                             score_thresh=self.confidence)
            #---------------------------------------------------------#
            #   如果没有检测出物体，返回原图
            #---------------------------------------------------------#
            if len(outputs) == 0:
                return old_image
            outputs = np.array(outputs)
            bbox = outputs[:, :4]
            label = outputs[:, 4]
            conf = outputs[:, 5]

            bbox[:, 0::2] = (bbox[:, 0::2]) / width * old_width
            bbox[:, 1::2] = (bbox[:, 1::2]) / height * old_height

        font = ImageFont.truetype(font='model_data/simhei.ttf',
                                  size=np.floor(3e-2 * np.shape(image)[1] +
                                                0.5).astype('int32'))

        thickness = max(
            (np.shape(old_image)[0] + np.shape(old_image)[1]) // old_width * 2,
            1)

        image = old_image
        for i, c in enumerate(label):
            predicted_class = self.class_names[int(c)]
            score = conf[i]

            left, top, right, bottom = bbox[i]
            top = top - 5
            left = left - 5
            bottom = bottom + 5
            right = right + 5

            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(
                np.shape(image)[0],
                np.floor(bottom + 0.5).astype('int32'))
            right = min(
                np.shape(image)[1],
                np.floor(right + 0.5).astype('int32'))

            # draw bbox
            label = '{} {:.2f}'.format(predicted_class, score)
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
            print(label, top, left, bottom, right)

            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])
            else:
                text_origin = np.array([left, top + 1])

            for i in range(thickness):
                draw.rectangle([left + i, top + i, right - i, bottom - i],
                               outline=self.colors[int(c)])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=self.colors[int(c)])
            draw.text(text_origin,
                      str(label, 'UTF-8'),
                      fill=(0, 0, 0),
                      font=font)
            del draw

        return image

Esempio n. 13

0

Mostra file

File: train.py Progetto: jiabinnn/faster-rcnn-pytorch

           (epoch_size_val + 1)))

    print('Saving state, iter:', str(epoch + 1))
    torch.save(
        model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.pth' %
        ((epoch + 1), total_loss / (epoch_size + 1), val_toal_loss /
         (epoch_size_val + 1)))


if __name__ == "__main__":
    # 参数初始化
    annotation_path = 'traindata.txt'
    NUM_CLASSES = 1
    IMAGE_SHAPE = [600, 600, 3]
    BACKBONE = "resnet50"
    model = FasterRCNN(NUM_CLASSES, backbone=BACKBONE)
    #-------------------------------#
    #   Dataloder的使用
    #-------------------------------#
    Use_Data_Loader = True
    Cuda = True

    model_path = r'model_data/voc_weights_resnet.pth'
    print('Loading weights into state dict...')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_dict = model.state_dict()
    pretrained_dict = torch.load(model_path, map_location=device)
    pretrained_dict = {
        k: v
        for k, v in pretrained_dict.items()
        if np.shape(model_dict[k]) == np.shape(v)

Esempio n. 14

0

Mostra file

    Cuda = True
    #----------------------------------------------------#
    #   训练之前一定要修改NUM_CLASSES
    #   修改成所需要区分的类的个数。
    #----------------------------------------------------#
    NUM_CLASSES = 6
    #-------------------------------------------------------------------------------------#
    #   input_shape是输入图片的大小，默认为800,800,3，随着输入图片的增大，占用显存会增大
    #-------------------------------------------------------------------------------------#
    input_shape = [600, 600, 3]
    #----------------------------------------------------#
    #   使用到的主干特征提取网络
    #   vgg或者resnet50
    #----------------------------------------------------#
    backbone = "resnet50"
    model = FasterRCNN(NUM_CLASSES, backbone=backbone)

    # #------------------------------------------------------#
    #   权值文件请看README，百度网盘下载
    #------------------------------------------------------#
    model_path = 'model_data/voc_weights_resnet.pth'
    print('Loading weights into state dict...')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_dict = model.state_dict()  #OrderedDict :328
    pretrained_dict = torch.load(model_path,
                                 map_location=device)  #OrderedDict :328
    pretrained_dict = {
        k: v
        for k, v in pretrained_dict.items()
        if np.shape(model_dict[k]) == np.shape(v)
    }

Esempio n. 15

0

Mostra file

    parser.add_argument("--lr", type=float, help="learning_rate", default=1e-3)
    parser.add_argument("-b", "--backbone", type=str, help="frozen_epoches", default='resnet50', choices=['resnet50','vgg'])
    parser.add_argument("-f", "--freeze", type=int, help="frozen_epoches", default=5)
    parser.add_argument("-u", "--unfreeze", type=int, help="free_epoches", default=5)
    # parser.add_argument('-v', "--version", type=int, help='you want to use efficientdet-dX?', default=0)
    # parser.add_argument('-b', "--batch_size", type=int, help='just batch_size', default=64)
    parser.add_argument('-p', "--pre_model", type=str, help='just pretrained_model_path default = \'\'', default="")
    parser.add_argument('-s', '--val_split', type=float, help='验证集的比例', default=0.1)
    
    args = parser.parse_args()

    # 参数初始化
    annotation_path = '2007_train.txt'
    EPOCH_LENGTH = 2000
    IMAGE_SHAPE = [600,600,3]
    model = FasterRCNN(num_classes,backbone=args.backbone).cuda()
    model_urls = {'resnet50':'https://github.com/you-bowen/tutorical_myDL/releases/download/1.0/frcnn_resnet50.pth',
                  'vgg':'none'}
    #-------------------------------------------#
    #   权值文件的下载请看README
    #-------------------------------------------#
    print('Loading weights into state dict...')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_dict = model.state_dict()
    pretrained_dict = torch.load(args.pre_model) if args.pre_model else load_url(model_urls[args.backbone], map_location=device)
    pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v) and k[-19:] != 'num_batches_tracked'}
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)
    print(model.state_dict()['extractor.4.0.bn1.num_batches_tracked'])
    print('Finished!')