Beispiel #1
0
    def __init__(self, backbone=None, num_classes=21):
        super(SSD300, self).__init__()
        if backbone is None:
            raise Exception("backbone is None")
        if not hasattr(backbone, "out_channels"):
            raise Exception("the backbone not has attribute: out_channel")
        self.feature_extractor = backbone  # 把传入的backbone定义给ssd的feature_extractor

        self.num_classes = num_classes
        # out_channels = [1024, 512, 512, 256, 256, 256] for resnet50,添加一系列卷积层做为特征提取层
        self._build_additional_features(self.feature_extractor.out_channels)
        # 第一个特征层层和最后两个特征层默认每个点生成四个大小的框, 其他层每个点默认生成6个
        self.num_defaults = [4, 6, 6, 6, 4, 4]
        location_extractors = []  # 定位预测器,使用3*3卷积来预测定位
        confidence_extractors = []  # 置信度预测器, 同样使用3*3卷积来预测

        # out_channels = [1024, 512, 512, 256, 256, 256] for resnet50
        for nd, oc in zip(self.num_defaults,
                          self.feature_extractor.out_channels):
            # nd is number_default_boxes, oc is output_channel
            location_extractors.append(
                nn.Conv2d(oc, nd * 4, kernel_size=3, padding=1))
            confidence_extractors.append(
                nn.Conv2d(oc, nd * self.num_classes, kernel_size=3, padding=1))

        self.loc = nn.ModuleList(location_extractors)
        self.conf = nn.ModuleList(confidence_extractors)
        self._init_weights()

        # default_box.dboxes  (8732*4)
        default_box = dboxes300_coco()  # 通过该函数生成默认的8732个default box
        self.compute_loss = Loss(default_box)
        self.encoder = Encoder(default_box)
        self.postprocess = PostProcess(default_box)
Beispiel #2
0
    def __init__(self, backbone=None, num_classes=21):
        super(SSD300, self).__init__()
        if backbone is None:
            raise Exception("backbone is None")
        if not hasattr(backbone, "out_channels"):
            raise Exception("the backbone not has attribute: out_channel")
        self.feature_extractor = backbone

        self.num_classes = num_classes
        # out_channels = [1024, 512, 512, 256, 256, 256] for resnet50
        self._build_additional_features(self.feature_extractor.out_channels)
        self.num_defaults = [4, 6, 6, 6, 4, 4]
        location_extractors = []
        confidence_extractors = []

        # out_channels = [1024, 512, 512, 256, 256, 256] for resnet50
        for nd, oc in zip(self.num_defaults,
                          self.feature_extractor.out_channels):
            # nd is number_default_boxes, oc is output_channel
            location_extractors.append(
                nn.Conv2d(oc, nd * 4, kernel_size=3, padding=1))
            confidence_extractors.append(
                nn.Conv2d(oc, nd * self.num_classes, kernel_size=3, padding=1))

        self.loc = nn.ModuleList(location_extractors)
        self.conf = nn.ModuleList(confidence_extractors)
        self._init_weights()

        default_box = dboxes300_coco()
        self.compute_loss = Loss(default_box)
        self.encoder = Encoder(default_box)
        self.postprocess = PostProcess(default_box)
Beispiel #3
0
    def __init__(self, backbone=None, num_classes=21, pretrain_path=None):
        super(SSD640, self).__init__()
        if backbone is None:
            raise Exception("backbone is None")
        if not hasattr(backbone, "out_channels"):
            raise Exception("the backbone not has attribute: out_channel")

        self.feature_extractor = backbone
        if pretrain_path is not None:
            self.feature_extractor.load_state_dict(torch.load(pretrain_path))

        self.num_classes = num_classes
        # self._build_additional_features([2048, 1024, 512, 256])

        self.num_defaults = [1, 2]
        location_extractors = []
        confidence_extractors = []

        for nd, oc in zip(self.num_defaults,
                          self.feature_extractor.out_channels):
            # nd is number_default_boxes, oc is output_channel
            location_extractors.append(
                nn.Conv2d(oc, nd * 4, kernel_size=3, padding=1))
            confidence_extractors.append(
                nn.Conv2d(oc, nd * self.num_classes, kernel_size=3, padding=1))

        self.loc = nn.ModuleList(location_extractors)
        self.conf = nn.ModuleList(confidence_extractors)
        self._init_weights()

        self.default_box = dboxes300_coco()
        self.compute_loss = Loss(self.default_box)
        self.encoder = Encoder(self.default_box)
        self.postprocess = PostProcess(self.default_box)
    def __init__(self, backbone=None, num_classes=21):
        super(RetinaNet640, self).__init__()
        if backbone is None:
            raise Exception("backbone is None")
        if not hasattr(backbone, "out_channels"):
            raise Exception("the backbone not has attribute: out_channel")
        self.feature_extractor = backbone

        self.num_classes = num_classes
        # out_channels = [1024, 512, 512, 256, 256, 256] for resnet50
        self.predictor = Predictor(num_features=5,
                                   in_channels=256,
                                   num_layers_before_predictor=4,
                                   num_classes=num_classes,
                                   num_boxes=6)

        default_box = dboxes640_coco()
        self.compute_loss = Loss(default_box)
        self.encoder = Encoder(default_box)
        self.postprocess = PostProcess(default_box)