Beispiel #1
0
class SSD(nn.Module):
    def __init__(self, phase, base, extras, head, num_classes):
        super(SSD, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.cfg = Config
        self.vgg = nn.ModuleList(base)
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)
        self.priorbox = PriorBox(self.cfg)
        with torch.no_grad():
            self.priors = torch.tensor(self.priorbox.forward())
        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])
        if phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)

    def forward(self, x):
        sources = list()
        loc = list()
        conf = list()

        # get outputs of conv4_3
        for k in range(23):
            x = self.vgg[k](x)

        s = self.L2Norm(x)
        sources.append(s)

        # get outputs of fc7
        for k in range(23, len(self.vgg)):
            x = self.vgg[k](x)
        sources.append(x)

        # get the rest outputs
        for k, v in enumerate(self.extras):
            x = F.relu(v(x), inplace=True)
            if k % 2 == 1:
                sources.append(x)

        # regression layers and classification layers
        for (x, l, c) in zip(sources, self.loc, self.conf):
            loc.append(l(x).permute(0, 2, 3, 1).contiguous())
            conf.append(c(x).permute(0, 2, 3, 1).contiguous())

        # resize
        loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
        conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
        if self.phase == "test":
            # loc会resize到batch_size,num_anchors,4
            # conf会resize到batch_size,num_anchors,
            output = self.detect(
                loc.view(loc.size(0), -1, 4),  # loc preds
                self.softmax(conf.view(conf.size(0), -1,
                                       self.num_classes)),  # conf preds
                self.priors)
        else:
            output = (loc.view(loc.size(0), -1,
                               4), conf.view(conf.size(0), -1,
                                             self.num_classes), self.priors)
        return output
Beispiel #2
0
def SSD300(input_shape,
           num_classes=21,
           anchors_size=[30, 60, 111, 162, 213, 264, 315]):
    #---------------------------------#
    #   典型的输入大小为[300,300,3]
    #---------------------------------#
    input_tensor = Input(shape=input_shape)

    # net变量里面包含了整个SSD的结构,通过层名可以找到对应的特征层
    net = VGG16(input_tensor)

    #-----------------------将提取到的主干特征进行处理---------------------------#
    # 对conv4_3的通道进行l2标准化处理
    # 38,38,512
    net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3'])
    num_priors = 4
    # 预测框的处理
    # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整
    net['conv4_3_norm_mbox_loc'] = Conv2D(num_priors * 4,
                                          kernel_size=(3, 3),
                                          padding='same',
                                          name='conv4_3_norm_mbox_loc')(
                                              net['conv4_3_norm'])
    net['conv4_3_norm_mbox_loc_flat'] = Flatten(
        name='conv4_3_norm_mbox_loc_flat')(net['conv4_3_norm_mbox_loc'])
    # num_priors表示每个网格点先验框的数量,num_classes是所分的类
    net['conv4_3_norm_mbox_conf'] = Conv2D(num_priors * num_classes,
                                           kernel_size=(3, 3),
                                           padding='same',
                                           name='conv4_3_norm_mbox_conf')(
                                               net['conv4_3_norm'])
    net['conv4_3_norm_mbox_conf_flat'] = Flatten(
        name='conv4_3_norm_mbox_conf_flat')(net['conv4_3_norm_mbox_conf'])

    priorbox = PriorBox(input_shape,
                        anchors_size[0],
                        max_size=anchors_size[1],
                        aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv4_3_norm_mbox_priorbox')
    net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm'])

    # 对fc7层进行处理
    # 19,19,1024
    num_priors = 6
    # 预测框的处理
    # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整
    net['fc7_mbox_loc'] = Conv2D(num_priors * 4,
                                 kernel_size=(3, 3),
                                 padding='same',
                                 name='fc7_mbox_loc')(net['fc7'])
    net['fc7_mbox_loc_flat'] = Flatten(name='fc7_mbox_loc_flat')(
        net['fc7_mbox_loc'])
    # num_priors表示每个网格点先验框的数量,num_classes是所分的类
    net['fc7_mbox_conf'] = Conv2D(num_priors * num_classes,
                                  kernel_size=(3, 3),
                                  padding='same',
                                  name='fc7_mbox_conf')(net['fc7'])
    net['fc7_mbox_conf_flat'] = Flatten(name='fc7_mbox_conf_flat')(
        net['fc7_mbox_conf'])

    priorbox = PriorBox(input_shape,
                        anchors_size[1],
                        max_size=anchors_size[2],
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='fc7_mbox_priorbox')
    net['fc7_mbox_priorbox'] = priorbox(net['fc7'])

    # 对conv6_2进行处理
    # 10,10,512
    num_priors = 6
    # 预测框的处理
    # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整
    x = Conv2D(num_priors * 4,
               kernel_size=(3, 3),
               padding='same',
               name='conv6_2_mbox_loc')(net['conv6_2'])
    net['conv6_2_mbox_loc'] = x
    net['conv6_2_mbox_loc_flat'] = Flatten(name='conv6_2_mbox_loc_flat')(
        net['conv6_2_mbox_loc'])
    # num_priors表示每个网格点先验框的数量,num_classes是所分的类
    x = Conv2D(num_priors * num_classes,
               kernel_size=(3, 3),
               padding='same',
               name='conv6_2_mbox_conf')(net['conv6_2'])
    net['conv6_2_mbox_conf'] = x
    net['conv6_2_mbox_conf_flat'] = Flatten(name='conv6_2_mbox_conf_flat')(
        net['conv6_2_mbox_conf'])

    priorbox = PriorBox(input_shape,
                        anchors_size[2],
                        max_size=anchors_size[3],
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv6_2_mbox_priorbox')
    net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2'])

    # 对conv7_2进行处理
    # 5,5,256
    num_priors = 6
    # 预测框的处理
    # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整
    x = Conv2D(num_priors * 4,
               kernel_size=(3, 3),
               padding='same',
               name='conv7_2_mbox_loc')(net['conv7_2'])
    net['conv7_2_mbox_loc'] = x
    net['conv7_2_mbox_loc_flat'] = Flatten(name='conv7_2_mbox_loc_flat')(
        net['conv7_2_mbox_loc'])
    # num_priors表示每个网格点先验框的数量,num_classes是所分的类
    x = Conv2D(num_priors * num_classes,
               kernel_size=(3, 3),
               padding='same',
               name='conv7_2_mbox_conf')(net['conv7_2'])
    net['conv7_2_mbox_conf'] = x
    net['conv7_2_mbox_conf_flat'] = Flatten(name='conv7_2_mbox_conf_flat')(
        net['conv7_2_mbox_conf'])

    priorbox = PriorBox(input_shape,
                        anchors_size[3],
                        max_size=anchors_size[4],
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv7_2_mbox_priorbox')
    net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2'])

    # 对conv8_2进行处理
    # 3,3,256
    num_priors = 4
    # 预测框的处理
    # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整
    x = Conv2D(num_priors * 4,
               kernel_size=(3, 3),
               padding='same',
               name='conv8_2_mbox_loc')(net['conv8_2'])
    net['conv8_2_mbox_loc'] = x
    net['conv8_2_mbox_loc_flat'] = Flatten(name='conv8_2_mbox_loc_flat')(
        net['conv8_2_mbox_loc'])
    # num_priors表示每个网格点先验框的数量,num_classes是所分的类
    x = Conv2D(num_priors * num_classes,
               kernel_size=(3, 3),
               padding='same',
               name='conv8_2_mbox_conf')(net['conv8_2'])
    net['conv8_2_mbox_conf'] = x
    net['conv8_2_mbox_conf_flat'] = Flatten(name='conv8_2_mbox_conf_flat')(
        net['conv8_2_mbox_conf'])

    priorbox = PriorBox(input_shape,
                        anchors_size[4],
                        max_size=anchors_size[5],
                        aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv8_2_mbox_priorbox')
    net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2'])

    # 对conv9_2进行处理
    # 1,1,256
    num_priors = 4
    # 预测框的处理
    # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整
    x = Conv2D(num_priors * 4,
               kernel_size=(3, 3),
               padding='same',
               name='conv9_2_mbox_loc')(net['conv9_2'])
    net['conv9_2_mbox_loc'] = x
    net['conv9_2_mbox_loc_flat'] = Flatten(name='conv9_2_mbox_loc_flat')(
        net['conv9_2_mbox_loc'])
    # num_priors表示每个网格点先验框的数量,num_classes是所分的类
    x = Conv2D(num_priors * num_classes,
               kernel_size=(3, 3),
               padding='same',
               name='conv9_2_mbox_conf')(net['conv9_2'])
    net['conv9_2_mbox_conf'] = x
    net['conv9_2_mbox_conf_flat'] = Flatten(name='conv9_2_mbox_conf_flat')(
        net['conv9_2_mbox_conf'])

    priorbox = PriorBox(input_shape,
                        anchors_size[5],
                        max_size=anchors_size[6],
                        aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv9_2_mbox_priorbox')

    net['conv9_2_mbox_priorbox'] = priorbox(net['conv9_2'])

    # 将所有结果进行堆叠
    net['mbox_loc'] = concatenate([
        net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'],
        net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'],
        net['conv8_2_mbox_loc_flat'], net['conv9_2_mbox_loc_flat']
    ],
                                  axis=1,
                                  name='mbox_loc')
    net['mbox_conf'] = concatenate([
        net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'],
        net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'],
        net['conv8_2_mbox_conf_flat'], net['conv9_2_mbox_conf_flat']
    ],
                                   axis=1,
                                   name='mbox_conf')
    net['mbox_priorbox'] = concatenate([
        net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'],
        net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'],
        net['conv8_2_mbox_priorbox'], net['conv9_2_mbox_priorbox']
    ],
                                       axis=1,
                                       name='mbox_priorbox')

    # 8732,4
    net['mbox_loc'] = Reshape((-1, 4), name='mbox_loc_final')(net['mbox_loc'])
    # 8732,21
    net['mbox_conf'] = Reshape((-1, num_classes),
                               name='mbox_conf_logits')(net['mbox_conf'])
    # 8732,8
    net['mbox_conf'] = Activation('softmax',
                                  name='mbox_conf_final')(net['mbox_conf'])
    # 8732,33
    net['predictions'] = concatenate(
        [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']],
        axis=2,
        name='predictions')

    model = Model(net['input'], net['predictions'])
    return model
Beispiel #3
0
def SSD300(input_shape, num_classes=21):
    # 300,300,3
    input_tensor = Input(shape=input_shape)
    img_size = (input_shape[1], input_shape[0])

    # SSD结构,net字典
    net = VGG16(input_tensor)
    #-----------------------将提取到的主干特征进行处理---------------------------#
    # 对conv4_3进行处理 38,38,512
    net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3'])
    num_priors = 4
    # 预测框的处理
    # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整
    net['conv4_3_norm_mbox_loc'] = Conv2D(num_priors * 4,
                                          kernel_size=(3, 3),
                                          padding='same',
                                          name='conv4_3_norm_mbox_loc')(
                                              net['conv4_3_norm'])
    net['conv4_3_norm_mbox_loc_flat'] = Flatten(
        name='conv4_3_norm_mbox_loc_flat')(net['conv4_3_norm_mbox_loc'])
    # num_priors表示每个网格点先验框的数量,num_classes是所分的类
    net['conv4_3_norm_mbox_conf'] = Conv2D(num_priors * num_classes,
                                           kernel_size=(3, 3),
                                           padding='same',
                                           name='conv4_3_norm_mbox_conf')(
                                               net['conv4_3_norm'])
    net['conv4_3_norm_mbox_conf_flat'] = Flatten(
        name='conv4_3_norm_mbox_conf_flat')(net['conv4_3_norm_mbox_conf'])
    priorbox = PriorBox(img_size,
                        30.0,
                        max_size=60.0,
                        aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv4_3_norm_mbox_priorbox')
    net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm'])

    # 对fc7层进行处理
    num_priors = 6
    # 预测框的处理
    # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整
    net['fc7_mbox_loc'] = Conv2D(num_priors * 4,
                                 kernel_size=(3, 3),
                                 padding='same',
                                 name='fc7_mbox_loc')(net['fc7'])
    net['fc7_mbox_loc_flat'] = Flatten(name='fc7_mbox_loc_flat')(
        net['fc7_mbox_loc'])
    # num_priors表示每个网格点先验框的数量,num_classes是所分的类
    net['fc7_mbox_conf'] = Conv2D(num_priors * num_classes,
                                  kernel_size=(3, 3),
                                  padding='same',
                                  name='fc7_mbox_conf')(net['fc7'])
    net['fc7_mbox_conf_flat'] = Flatten(name='fc7_mbox_conf_flat')(
        net['fc7_mbox_conf'])

    priorbox = PriorBox(img_size,
                        60.0,
                        max_size=111.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='fc7_mbox_priorbox')
    net['fc7_mbox_priorbox'] = priorbox(net['fc7'])

    # 对conv6_2进行处理
    num_priors = 6
    # 预测框的处理
    # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整
    x = Conv2D(num_priors * 4,
               kernel_size=(3, 3),
               padding='same',
               name='conv6_2_mbox_loc')(net['conv6_2'])
    net['conv6_2_mbox_loc'] = x
    net['conv6_2_mbox_loc_flat'] = Flatten(name='conv6_2_mbox_loc_flat')(
        net['conv6_2_mbox_loc'])
    # num_priors表示每个网格点先验框的数量,num_classes是所分的类
    x = Conv2D(num_priors * num_classes,
               kernel_size=(3, 3),
               padding='same',
               name='conv6_2_mbox_conf')(net['conv6_2'])
    net['conv6_2_mbox_conf'] = x
    net['conv6_2_mbox_conf_flat'] = Flatten(name='conv6_2_mbox_conf_flat')(
        net['conv6_2_mbox_conf'])

    priorbox = PriorBox(img_size,
                        111.0,
                        max_size=162.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv6_2_mbox_priorbox')
    net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2'])

    # 对conv7_2进行处理
    num_priors = 6
    # 预测框的处理
    # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整
    x = Conv2D(num_priors * 4,
               kernel_size=(3, 3),
               padding='same',
               name='conv7_2_mbox_loc')(net['conv7_2'])
    net['conv7_2_mbox_loc'] = x
    net['conv7_2_mbox_loc_flat'] = Flatten(name='conv7_2_mbox_loc_flat')(
        net['conv7_2_mbox_loc'])
    # num_priors表示每个网格点先验框的数量,num_classes是所分的类
    x = Conv2D(num_priors * num_classes,
               kernel_size=(3, 3),
               padding='same',
               name='conv7_2_mbox_conf')(net['conv7_2'])
    net['conv7_2_mbox_conf'] = x
    net['conv7_2_mbox_conf_flat'] = Flatten(name='conv7_2_mbox_conf_flat')(
        net['conv7_2_mbox_conf'])

    priorbox = PriorBox(img_size,
                        162.0,
                        max_size=213.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv7_2_mbox_priorbox')
    net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2'])

    # 对conv8_2进行处理
    num_priors = 4
    # 预测框的处理
    # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整
    x = Conv2D(num_priors * 4,
               kernel_size=(3, 3),
               padding='same',
               name='conv8_2_mbox_loc')(net['conv8_2'])
    net['conv8_2_mbox_loc'] = x
    net['conv8_2_mbox_loc_flat'] = Flatten(name='conv8_2_mbox_loc_flat')(
        net['conv8_2_mbox_loc'])
    # num_priors表示每个网格点先验框的数量,num_classes是所分的类
    x = Conv2D(num_priors * num_classes,
               kernel_size=(3, 3),
               padding='same',
               name='conv8_2_mbox_conf')(net['conv8_2'])
    net['conv8_2_mbox_conf'] = x
    net['conv8_2_mbox_conf_flat'] = Flatten(name='conv8_2_mbox_conf_flat')(
        net['conv8_2_mbox_conf'])

    priorbox = PriorBox(img_size,
                        213.0,
                        max_size=264.0,
                        aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv8_2_mbox_priorbox')
    net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2'])

    # 对conv9_2进行处理
    num_priors = 4
    # 预测框的处理
    # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整
    x = Conv2D(num_priors * 4,
               kernel_size=(3, 3),
               padding='same',
               name='conv9_2_mbox_loc')(net['conv9_2'])
    net['conv9_2_mbox_loc'] = x
    net['conv9_2_mbox_loc_flat'] = Flatten(name='conv9_2_mbox_loc_flat')(
        net['conv9_2_mbox_loc'])
    # num_priors表示每个网格点先验框的数量,num_classes是所分的类
    x = Conv2D(num_priors * num_classes,
               kernel_size=(3, 3),
               padding='same',
               name='conv9_2_mbox_conf')(net['conv9_2'])
    net['conv9_2_mbox_conf'] = x
    net['conv9_2_mbox_conf_flat'] = Flatten(name='conv9_2_mbox_conf_flat')(
        net['conv9_2_mbox_conf'])

    priorbox = PriorBox(img_size,
                        264.0,
                        max_size=315.0,
                        aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv9_2_mbox_priorbox')

    net['conv9_2_mbox_priorbox'] = priorbox(net['conv9_2'])

    # 将所有结果进行堆叠
    net['mbox_loc'] = concatenate([
        net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'],
        net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'],
        net['conv8_2_mbox_loc_flat'], net['conv9_2_mbox_loc_flat']
    ],
                                  axis=1,
                                  name='mbox_loc')
    net['mbox_conf'] = concatenate([
        net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'],
        net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'],
        net['conv8_2_mbox_conf_flat'], net['conv9_2_mbox_conf_flat']
    ],
                                   axis=1,
                                   name='mbox_conf')
    net['mbox_priorbox'] = concatenate([
        net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'],
        net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'],
        net['conv8_2_mbox_priorbox'], net['conv9_2_mbox_priorbox']
    ],
                                       axis=1,
                                       name='mbox_priorbox')

    # if hasattr(net['mbox_loc'], '_keras_shape'):
    #     num_boxes = net['mbox_loc']._keras_shape[-1] // 4
    # elif hasattr(net['mbox_loc'], 'int_shape'):
    num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4
    # 8732,4
    net['mbox_loc'] = Reshape((num_boxes, 4),
                              name='mbox_loc_final')(net['mbox_loc'])
    # 8732,21
    net['mbox_conf'] = Reshape((num_boxes, num_classes),
                               name='mbox_conf_logits')(net['mbox_conf'])
    net['mbox_conf'] = Activation('softmax',
                                  name='mbox_conf_final')(net['mbox_conf'])

    net['predictions'] = concatenate(
        [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']],
        axis=2,
        name='predictions')
    print(net['predictions'])
    model = Model(net['input'], net['predictions'])
    return model
Beispiel #4
0
def SSD300(input_shape, num_classes=21):
    # 300,300,3
    input_tensor = Input(shape=input_shape)
    img_size = (input_shape[1], input_shape[0])
    mbn3L_model = MobileNetV3_Small(
        shape=input_shape,
        n_class=num_classes,
        include_top=False,
        backbone=True,
        input_tensor=input_tensor).build(plot=False)
    # SSD结构,net字典
    #net = mobilenet(input_tensor)
    net = mbn3L_model
    #-----------------------将提取到的主干特征进行处理---------------------------#
    num_priors = 4
    # 预测框的处理
    # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整
    net['conv4_3_loc'] = Conv2D(num_priors * 4,
                                kernel_size=(3, 3),
                                padding='same',
                                name='conv4_3_loc')(net['conv4_3'])
    net['conv4_3_loc_flat'] = Flatten(name='conv4_3_loc_flat')(
        net['conv4_3_loc'])
    # num_priors表示每个网格点先验框的数量,num_classes是所分的类
    net['conv4_3_conf'] = Conv2D(num_priors * num_classes,
                                 kernel_size=(3, 3),
                                 padding='same',
                                 name='conv4_3_conf')(net['conv4_3'])
    net['conv4_3_conf_flat'] = Flatten(name='conv4_3_conf_flat')(
        net['conv4_3_conf'])
    priorbox = PriorBox(img_size,
                        30.0,
                        max_size=60.0,
                        aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv4_3_priorbox')
    net['conv4_3_priorbox'] = priorbox(net['conv4_3'])

    # 对fc7层进行处理
    num_priors = 6
    # 预测框的处理
    # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整
    net['fc7_mbox_loc'] = Conv2D(num_priors * 4,
                                 kernel_size=(3, 3),
                                 padding='same',
                                 name='fc7_mbox_loc')(net['fc7'])
    net['fc7_mbox_loc_flat'] = Flatten(name='fc7_mbox_loc_flat')(
        net['fc7_mbox_loc'])
    # num_priors表示每个网格点先验框的数量,num_classes是所分的类
    net['fc7_mbox_conf'] = Conv2D(num_priors * num_classes,
                                  kernel_size=(3, 3),
                                  padding='same',
                                  name='fc7_mbox_conf')(net['fc7'])
    net['fc7_mbox_conf_flat'] = Flatten(name='fc7_mbox_conf_flat')(
        net['fc7_mbox_conf'])

    priorbox = PriorBox(img_size,
                        60.0,
                        max_size=111.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='fc7_mbox_priorbox')
    net['fc7_mbox_priorbox'] = priorbox(net['fc7'])

    # 对conv6_2进行处理
    num_priors = 6
    # 预测框的处理
    # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整
    x = Conv2D(num_priors * 4,
               kernel_size=(3, 3),
               padding='same',
               name='conv6_2_mbox_loc')(net['conv6_2'])
    net['conv6_2_mbox_loc'] = x
    net['conv6_2_mbox_loc_flat'] = Flatten(name='conv6_2_mbox_loc_flat')(
        net['conv6_2_mbox_loc'])
    # num_priors表示每个网格点先验框的数量,num_classes是所分的类
    x = Conv2D(num_priors * num_classes,
               kernel_size=(3, 3),
               padding='same',
               name='conv6_2_mbox_conf')(net['conv6_2'])
    net['conv6_2_mbox_conf'] = x
    net['conv6_2_mbox_conf_flat'] = Flatten(name='conv6_2_mbox_conf_flat')(
        net['conv6_2_mbox_conf'])

    priorbox = PriorBox(img_size,
                        111.0,
                        max_size=162.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv6_2_mbox_priorbox')
    net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2'])

    # 对conv7_2进行处理
    num_priors = 6
    # 预测框的处理
    # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整
    x = Conv2D(num_priors * 4,
               kernel_size=(3, 3),
               padding='same',
               name='conv7_2_mbox_loc')(net['conv7_2'])
    net['conv7_2_mbox_loc'] = x
    net['conv7_2_mbox_loc_flat'] = Flatten(name='conv7_2_mbox_loc_flat')(
        net['conv7_2_mbox_loc'])
    # num_priors表示每个网格点先验框的数量,num_classes是所分的类
    x = Conv2D(num_priors * num_classes,
               kernel_size=(3, 3),
               padding='same',
               name='conv7_2_mbox_conf')(net['conv7_2'])
    net['conv7_2_mbox_conf'] = x
    net['conv7_2_mbox_conf_flat'] = Flatten(name='conv7_2_mbox_conf_flat')(
        net['conv7_2_mbox_conf'])

    priorbox = PriorBox(img_size,
                        162.0,
                        max_size=213.0,
                        aspect_ratios=[2, 3],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv7_2_mbox_priorbox')
    net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2'])

    # 对conv8_2进行处理
    num_priors = 4
    # 预测框的处理
    # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整
    x = Conv2D(num_priors * 4,
               kernel_size=(3, 3),
               padding='same',
               name='conv8_2_mbox_loc')(net['conv8_2'])
    net['conv8_2_mbox_loc'] = x
    net['conv8_2_mbox_loc_flat'] = Flatten(name='conv8_2_mbox_loc_flat')(
        net['conv8_2_mbox_loc'])
    # num_priors表示每个网格点先验框的数量,num_classes是所分的类
    x = Conv2D(num_priors * num_classes,
               kernel_size=(3, 3),
               padding='same',
               name='conv8_2_mbox_conf')(net['conv8_2'])
    net['conv8_2_mbox_conf'] = x
    net['conv8_2_mbox_conf_flat'] = Flatten(name='conv8_2_mbox_conf_flat')(
        net['conv8_2_mbox_conf'])

    priorbox = PriorBox(img_size,
                        213.0,
                        max_size=264.0,
                        aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv8_2_mbox_priorbox')
    net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2'])

    # 对conv9_2进行处理
    num_priors = 4
    # 预测框的处理
    # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整
    x = Conv2D(num_priors * 4,
               kernel_size=(3, 3),
               padding='same',
               name='conv9_2_mbox_loc')(net['conv9_2'])
    net['conv9_2_mbox_loc'] = x
    net['conv9_2_mbox_loc_flat'] = Flatten(name='conv9_2_mbox_loc_flat')(
        net['conv9_2_mbox_loc'])
    # num_priors表示每个网格点先验框的数量,num_classes是所分的类
    x = Conv2D(num_priors * num_classes,
               kernel_size=(3, 3),
               padding='same',
               name='conv9_2_mbox_conf')(net['conv9_2'])
    net['conv9_2_mbox_conf'] = x
    net['conv9_2_mbox_conf_flat'] = Flatten(name='conv9_2_mbox_conf_flat')(
        net['conv9_2_mbox_conf'])

    priorbox = PriorBox(img_size,
                        264.0,
                        max_size=315.0,
                        aspect_ratios=[2],
                        variances=[0.1, 0.1, 0.2, 0.2],
                        name='conv9_2_mbox_priorbox')

    net['conv9_2_mbox_priorbox'] = priorbox(net['conv9_2'])

    # 将所有结果进行堆叠
    net['mbox_loc'] = concatenate([
        net['conv4_3_loc_flat'], net['fc7_mbox_loc_flat'],
        net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'],
        net['conv8_2_mbox_loc_flat'], net['conv9_2_mbox_loc_flat']
    ],
                                  axis=1,
                                  name='mbox_loc')
    net['mbox_conf'] = concatenate([
        net['conv4_3_conf_flat'], net['fc7_mbox_conf_flat'],
        net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'],
        net['conv8_2_mbox_conf_flat'], net['conv9_2_mbox_conf_flat']
    ],
                                   axis=1,
                                   name='mbox_conf')
    net['mbox_priorbox'] = concatenate([
        net['conv4_3_priorbox'], net['fc7_mbox_priorbox'],
        net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'],
        net['conv8_2_mbox_priorbox'], net['conv9_2_mbox_priorbox']
    ],
                                       axis=1,
                                       name='mbox_priorbox')

    if hasattr(net['mbox_loc'], '_keras_shape'):
        num_boxes = net['mbox_loc']._keras_shape[-1] // 4
    elif hasattr(net['mbox_loc'], 'int_shape'):
        num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4
    # 8732,4
    net['mbox_loc'] = Reshape((num_boxes, 4),
                              name='mbox_loc_final')(net['mbox_loc'])
    # 8732,21
    net['mbox_conf'] = Reshape((num_boxes, num_classes),
                               name='mbox_conf_logits')(net['mbox_conf'])
    net['mbox_conf'] = Activation('softmax',
                                  name='mbox_conf_final')(net['mbox_conf'])

    net['predictions'] = concatenate(
        [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']],
        axis=2,
        name='predictions')
    model = Model(input_tensor, net['predictions'])
    return model
Beispiel #5
0
class SSD(nn.Module):
    def __init__(self, phase, base, extras, head, num_classes, confidence,
                 nms_iou):
        super(SSD, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.cfg = Config
        self.vgg = nn.ModuleList(base)
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)
        self.priorbox = PriorBox(self.cfg)
        with torch.no_grad():
            self.priors = Variable(self.priorbox.forward())
        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])
        if phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(num_classes, 0, 200, confidence, nms_iou)

    def forward(self, x):
        sources = list()
        loc = list()
        conf = list()

        #---------------------------#
        #   获得conv4_3的内容
        #   shape为38,38,512
        #---------------------------#
        for k in range(23):
            x = self.vgg[k](x)

        #---------------------------#
        #   conv4_3的内容
        #   需要进行L2标准化
        #---------------------------#
        s = self.L2Norm(x)
        sources.append(s)

        #---------------------------#
        #   获得conv7的内容
        #   shape为19,19,1024
        #---------------------------#
        for k in range(23, len(self.vgg)):
            x = self.vgg[k](x)
        sources.append(x)

        #-------------------------------------------------------------#
        #   在add_extras获得的特征层里
        #   第1层、第3层、第5层、第7层可以用来进行回归预测和分类预测。
        #   shape分别为(10,10,512), (5,5,256), (3,3,256), (1,1,256)
        #-------------------------------------------------------------#
        for k, v in enumerate(self.extras):
            x = F.relu(v(x), inplace=True)
            if k % 2 == 1:
                sources.append(x)

        #-------------------------------------------------------------#
        #   为获得的6个有效特征层添加回归预测和分类预测
        #-------------------------------------------------------------#
        for (x, l, c) in zip(sources, self.loc, self.conf):
            loc.append(l(x).permute(0, 2, 3, 1).contiguous())
            conf.append(c(x).permute(0, 2, 3, 1).contiguous())

        #-------------------------------------------------------------#
        #   进行reshape方便堆叠
        #-------------------------------------------------------------#
        loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
        conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
        #-------------------------------------------------------------#
        #   loc会reshape到batch_size,num_anchors,4
        #   conf会reshap到batch_size,num_anchors,self.num_classes
        #   如果用于预测的话,会添加上detect用于对先验框解码,获得预测结果
        #   不用于预测的话,直接返回网络的回归预测结果和分类预测结果用于训练
        #-------------------------------------------------------------#
        if self.phase == "test":
            output = self.detect(
                loc.view(loc.size(0), -1, 4),
                self.softmax(conf.view(conf.size(0), -1, self.num_classes)),
                self.priors)
        else:
            output = (loc.view(loc.size(0), -1,
                               4), conf.view(conf.size(0), -1,
                                             self.num_classes), self.priors)
        return output
Beispiel #6
0
    def __init__(self, phase, base, extras, head, num_classes):
        super(SSD, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.cfg = Config
        self.vgg = nn.ModuleList(base)
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)
        self.priorbox = PriorBox(self.cfg)
        with torch.no_grad():
            self.priors = Variable(self.priorbox.forward())
        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])
        if phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)

        self.upsample_256_256 = Upsample(10)
        self.conv_256_512 = nn.Conv2d(in_channels=256,
                                      out_channels=512,
                                      kernel_size=1,
                                      stride=1)

        #conv8_2 -> conv8_2
        self.conv_512_512_1 = nn.Conv2d(in_channels=512,
                                        out_channels=512,
                                        kernel_size=1,
                                        stride=1)

        self.upsample_512_512 = Upsample(19)
        self.conv_512_1024 = nn.Conv2d(in_channels=512,
                                       out_channels=1024,
                                       kernel_size=1,
                                       stride=1)
        self.conv_1024_1024 = nn.Conv2d(in_channels=1024,
                                        out_channels=1024,
                                        kernel_size=1,
                                        stride=1)

        self.upsample_1024_1024 = Upsample(38)
        self.conv_1024_512 = nn.Conv2d(in_channels=1024,
                                       out_channels=512,
                                       kernel_size=1,
                                       stride=1)
        self.conv_512_512_2 = nn.Conv2d(in_channels=512,
                                        out_channels=512,
                                        kernel_size=1,
                                        stride=1)

        self.smooth = nn.Conv2d(512, 512, kernel_size=3, padding=1, stride=1)
        self.smooth1 = nn.Conv2d(1024,
                                 1024,
                                 kernel_size=3,
                                 padding=1,
                                 stride=1)

        if USE_CBAM:

            self.CBAM1 = Bottleneck(512)
            self.CBAM2 = Bottleneck(1024)
            self.CBAM3 = Bottleneck(512)
            self.CBAM4 = Bottleneck(256)
            self.CBAM5 = Bottleneck(256)
            self.CBAM6 = Bottleneck(256)

        if USE_SE:
            self.SE1 = SEModule(512)
            self.SE2 = SEModule(1024)
            self.SE3 = SEModule(512)
            self.SE4 = SEModule(256)
            self.SE5 = SEModule(256)
            self.SE6 = SEModule(256)
Beispiel #7
0
class SSD(nn.Module):
    def __init__(self, phase, base, extras, head, num_classes):
        super(SSD, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.cfg = Config
        self.vgg = nn.ModuleList(base)
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)
        self.priorbox = PriorBox(self.cfg)
        with torch.no_grad():
            self.priors = Variable(self.priorbox.forward())
        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])
        if phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)

        self.upsample_256_256 = Upsample(10)
        self.conv_256_512 = nn.Conv2d(in_channels=256,
                                      out_channels=512,
                                      kernel_size=1,
                                      stride=1)

        #conv8_2 -> conv8_2
        self.conv_512_512_1 = nn.Conv2d(in_channels=512,
                                        out_channels=512,
                                        kernel_size=1,
                                        stride=1)

        self.upsample_512_512 = Upsample(19)
        self.conv_512_1024 = nn.Conv2d(in_channels=512,
                                       out_channels=1024,
                                       kernel_size=1,
                                       stride=1)
        self.conv_1024_1024 = nn.Conv2d(in_channels=1024,
                                        out_channels=1024,
                                        kernel_size=1,
                                        stride=1)

        self.upsample_1024_1024 = Upsample(38)
        self.conv_1024_512 = nn.Conv2d(in_channels=1024,
                                       out_channels=512,
                                       kernel_size=1,
                                       stride=1)
        self.conv_512_512_2 = nn.Conv2d(in_channels=512,
                                        out_channels=512,
                                        kernel_size=1,
                                        stride=1)

        self.smooth = nn.Conv2d(512, 512, kernel_size=3, padding=1, stride=1)
        self.smooth1 = nn.Conv2d(1024,
                                 1024,
                                 kernel_size=3,
                                 padding=1,
                                 stride=1)

        if USE_CBAM:

            self.CBAM1 = Bottleneck(512)
            self.CBAM2 = Bottleneck(1024)
            self.CBAM3 = Bottleneck(512)
            self.CBAM4 = Bottleneck(256)
            self.CBAM5 = Bottleneck(256)
            self.CBAM6 = Bottleneck(256)

        if USE_SE:
            self.SE1 = SEModule(512)
            self.SE2 = SEModule(1024)
            self.SE3 = SEModule(512)
            self.SE4 = SEModule(256)
            self.SE5 = SEModule(256)
            self.SE6 = SEModule(256)

    def forward(self, x):
        sources = list()
        attention = list()
        loc = list()
        conf = list()

        # 获得conv4_3的内容
        for k in range(10):
            x = self.vgg[k](x)

        sources.append(x)

        for k in range(23, 30):
            x = self.vgg[k](x)

        s = self.L2Norm(x)
        sources.append(s)

        # 获得fc7的内容
        for k in range(30, len(self.vgg)):
            x = self.vgg[k](x)
        sources.append(x)

        # 获得后面的内容
        for k, v in enumerate(self.extras):
            x = F.relu(v(x), inplace=True)
            if k % 2 == 1:
                sources.append(x)

        if USE_SE:
            attention.append(sources[0])
            attention.append(self.SE1(sources[1]))
            attention.append(sources[2])
            attention.append(self.SE2(sources[3]))
            attention.append(self.SE3(sources[4]))
            attention.append(self.SE4(sources[5]))
            attention.append(self.SE5(sources[6]))
            attention.append(self.SE6(sources[7]))

        sources_final = list()

        conv8_fp1 = self.conv_256_512(self.upsample_256_256(
            attention[5])) + self.conv_512_512_1(attention[4])
        conv8_fp = self.smooth(conv8_fp1)

        fc7_fp1 = self.conv_512_1024(
            self.upsample_512_512(conv8_fp1)) + self.conv_1024_1024(
                attention[3])
        fc7_fp = self.smooth(fc7_fp1)

        conv4_fp = self.conv_1024_512(
            self.upsample_1024_1024(fc7_fp1)) + self.conv_512_512_2(
                attention[1])
        conv4_fp = self.smooth(conv4_fp)

        if USE_CBAM:
            sources_final.append(self.CBAM1(conv4_fp))
            sources_final.append(self.CBAM2(fc7_fp))
            sources_final.append(self.CBAM3(conv8_fp))
            sources_final.append(self.CBAM4(sources[5]))
            sources_final.append(self.CBAM5(sources[6]))
            sources_final.append(self.CBAM6(sources[7]))

        else:
            sources_final.append(conv4_fp)
            sources_final.append(fc7_fp)
            sources_final.append(conv8_fp)
            sources_final.append(attention[5])
            sources_final.append(attention[6])
            sources_final.append(attention[7])

        # 添加回归层和分类层
        # for (x, l, c) in zip(sources, self.loc, self.conf):
        #     loc.append(l(x).permute(0, 2, 3, 1).contiguous())
        #     conf.append(c(x).permute(0, 2, 3, 1).contiguous())

        for (x, l, c) in zip(sources_final, self.loc, self.conf):
            loc.append(l(x).permute(0, 2, 3, 1).contiguous())
            conf.append(c(x).permute(0, 2, 3, 1).contiguous())

        # 进行resize
        loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
        conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
        if self.phase == "test":
            # loc会resize到batch_size,num_anchors,4
            # conf会resize到batch_size,num_anchors,

            # 这部分暂时没有进行改动
            output = self.detect(
                loc.view(loc.size(0), -1, 4),  # loc preds
                self.softmax(conf.view(conf.size(0), -1,
                                       self.num_classes)),  # conf preds
                self.priors)
        else:
            output = (loc.view(loc.size(0), -1,
                               4), conf.view(conf.size(0), -1,
                                             self.num_classes), self.priors)
        return output
Beispiel #8
0
class SSD(nn.Module):
    def __init__(self, phase, base, extras, head, num_classes):
        super(SSD, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.cfg = Config
        self.vgg = nn.ModuleList(base)
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)
        self.priorbox = PriorBox(self.cfg)
        with torch.no_grad():
            self.priors = Variable(self.priorbox.forward())
            # self.priors = self.priorbox.forward()  # 这一行改成这样也能正常运行

        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])
        self.relu_list4cxq = nn.ModuleList([torch.nn.ReLU(True) for i in range(8)])  # 自己修改后的方式
        self.feature_maps4cxq = None  # 用于grad cam
        self.scores4cxq = None  # 用于grad cam
        if phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
    def forward(self, x):
        sources = list()
        loc = list()
        conf = list()

        # 获得conv4_3的内容
        for k in range(23):
            x = self.vgg[k](x)

        s = self.L2Norm(x)
        sources.append(s)

        # 获得fc7的内容
        for k in range(23, len(self.vgg)):
            x = self.vgg[k](x)
        sources.append(x)

        # 获得后面的内容
        for k, v in enumerate(self.extras):
            # x = F.relu(v(x), inplace=True)  # 原始实现方式
            x = self.relu_list4cxq[k](v(x))  # 修改后的方式  
            if k % 2 == 1:
                sources.append(x)


        self.feature_maps4cxq = sources  # 6张特征图
        # 添加回归层和分类层
        for (x, l, c) in zip(sources, self.loc, self.conf):
            loc.append(l(x).permute(0, 2, 3, 1).contiguous())
            conf.append(c(x).permute(0, 2, 3, 1).contiguous())

        self.scores4cxq = conf  # 用于保存各个类别的分数

        # 进行resize
        loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)  # torch.Size([4, 34928])
        conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)  # torch.Size([4, 26196])
        if self.phase == "test":
            # loc会resize到batch_size,num_anchors,4
            # conf会resize到batch_size,num_anchors,num_classes
            # output = self.detect(
            output = self.detect.apply(
                loc.view(loc.size(0), -1, 4),                   # loc preds torch.Size([4, 8732, 4])
                self.softmax(conf.view(conf.size(0), -1,
                             self.num_classes)),                # conf preds # torch.Size([4, 8732, 3])
                self.priors              # torch.Size([8732, 4])
            )  # torch.Size([1, 3, 200, 5])  1置信度+4位置信息
        else:
            output = (
                loc.view(loc.size(0), -1, 4),
                conf.view(conf.size(0), -1, self.num_classes),
                self.priors
            )  # torch.Size([4, 8732, 4]) torch.Size([4, 8732, 3]) torch.Size([8732, 4])
        return output
Beispiel #9
0
    def __init__(self, phase, base, extras, head, num_classes):
        super(SSD, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.cfg = Config
        self.vgg = nn.ModuleList(base)
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)
        self.priorbox = PriorBox(self.cfg)
        with torch.no_grad():
            self.priors = Variable(self.priorbox.forward())
        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])
        if phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)

        self.DilationConv_128_128 = nn.Conv2d(in_channels=128,
                                              out_channels=128,
                                              kernel_size=3,
                                              padding=2,
                                              dilation=2,
                                              stride=2)
        self.conv_512_256 = nn.Conv2d(in_channels=512,
                                      out_channels=256,
                                      kernel_size=1,
                                      stride=1)
        self.upsample_1024_1024 = Upsample(38)
        self.conv_1024_128 = nn.Conv2d(in_channels=1024,
                                       out_channels=128,
                                       kernel_size=1,
                                       stride=1)

        self.DilationConv_512_256 = nn.Conv2d(in_channels=512,
                                              out_channels=256,
                                              kernel_size=3,
                                              padding=2,
                                              dilation=2,
                                              stride=2)

        self.conv_1024_512 = nn.Conv2d(in_channels=1024,
                                       out_channels=512,
                                       kernel_size=1,
                                       stride=1)

        self.upsample_512_512 = Upsample(19)
        self.conv_512_256_fc7 = nn.Conv2d(in_channels=512,
                                          out_channels=256,
                                          kernel_size=1,
                                          stride=1)

        self.DilationConv_512_128_2 = nn.Conv2d(in_channels=512,
                                                out_channels=128,
                                                kernel_size=3,
                                                padding=2,
                                                dilation=2,
                                                stride=2)

        self.conv_512_256_2 = nn.Conv2d(in_channels=512,
                                        out_channels=256,
                                        kernel_size=1,
                                        stride=1)

        self.upsample_256_256_2 = Upsample(10)
        self.conv_256_128_2 = nn.Conv2d(in_channels=256,
                                        out_channels=128,
                                        kernel_size=1,
                                        stride=1)

        self.smooth = nn.Conv2d(512, 512, kernel_size=3, padding=1, stride=1)
        self.smooth2 = nn.Conv2d(1024,
                                 1024,
                                 kernel_size=3,
                                 padding=1,
                                 stride=1)

        self.bn = nn.BatchNorm2d(128)
        self.bn1 = nn.BatchNorm2d(256)

        if USE_SE:
            self.SE1 = SEModule(512)
            self.SE2 = SEModule(512)
            self.SE3 = SEModule(512)
            self.SE4 = SEModule(256)
            self.SE5 = SEModule(256)
            self.SE6 = SEModule(256)

        if USE_ECA:
            self.ECA1 = ECAModule(512)
            self.ECA2 = ECAModule(1024)
            self.ECA3 = ECAModule(512)
            self.ECA4 = ECAModule(256)
Beispiel #10
0
class SSD(nn.Module):
    def __init__(self, phase, base, extras, head, num_classes):
        super(SSD, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.cfg = Config
        self.vgg = nn.ModuleList(base)
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)
        self.priorbox = PriorBox(self.cfg)
        with torch.no_grad():
            self.priors = Variable(self.priorbox.forward())
        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])
        if phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)

        self.DilationConv_128_128 = nn.Conv2d(in_channels=128,
                                              out_channels=128,
                                              kernel_size=3,
                                              padding=2,
                                              dilation=2,
                                              stride=2)
        self.conv_512_256 = nn.Conv2d(in_channels=512,
                                      out_channels=256,
                                      kernel_size=1,
                                      stride=1)
        self.upsample_1024_1024 = Upsample(38)
        self.conv_1024_128 = nn.Conv2d(in_channels=1024,
                                       out_channels=128,
                                       kernel_size=1,
                                       stride=1)

        self.DilationConv_512_256 = nn.Conv2d(in_channels=512,
                                              out_channels=256,
                                              kernel_size=3,
                                              padding=2,
                                              dilation=2,
                                              stride=2)

        self.conv_1024_512 = nn.Conv2d(in_channels=1024,
                                       out_channels=512,
                                       kernel_size=1,
                                       stride=1)

        self.upsample_512_512 = Upsample(19)
        self.conv_512_256_fc7 = nn.Conv2d(in_channels=512,
                                          out_channels=256,
                                          kernel_size=1,
                                          stride=1)

        self.DilationConv_512_128_2 = nn.Conv2d(in_channels=512,
                                                out_channels=128,
                                                kernel_size=3,
                                                padding=2,
                                                dilation=2,
                                                stride=2)

        self.conv_512_256_2 = nn.Conv2d(in_channels=512,
                                        out_channels=256,
                                        kernel_size=1,
                                        stride=1)

        self.upsample_256_256_2 = Upsample(10)
        self.conv_256_128_2 = nn.Conv2d(in_channels=256,
                                        out_channels=128,
                                        kernel_size=1,
                                        stride=1)

        self.smooth = nn.Conv2d(512, 512, kernel_size=3, padding=1, stride=1)
        self.smooth2 = nn.Conv2d(1024,
                                 1024,
                                 kernel_size=3,
                                 padding=1,
                                 stride=1)

        self.bn = nn.BatchNorm2d(128)
        self.bn1 = nn.BatchNorm2d(256)

        if USE_SE:
            self.SE1 = SEModule(512)
            self.SE2 = SEModule(512)
            self.SE3 = SEModule(512)
            self.SE4 = SEModule(256)
            self.SE5 = SEModule(256)
            self.SE6 = SEModule(256)

        if USE_ECA:
            self.ECA1 = ECAModule(512)
            self.ECA2 = ECAModule(1024)
            self.ECA3 = ECAModule(512)
            self.ECA4 = ECAModule(256)

    def forward(self, x):
        sources = list()
        loc = list()
        conf = list()

        for k in range(10):
            x = self.vgg[k](x)
        sources.append(x)

        # 获得conv4_3的内容
        for k in range(10, 23):
            x = self.vgg[k](x)

        s = self.L2Norm(x)
        sources.append(s)

        # 获得fc7的内容
        # for k in range(23, len(self.vgg)):
        #     x = self.vgg[k](x)
        # sources.append(x)

        for k in range(23, 30):
            x = self.vgg[k](x)

        s = self.L2Norm(x)
        sources.append(s)

        for k in range(30, len(self.vgg)):
            x = self.vgg[k](x)
        sources.append(x)

        # 获得后面的内容
        for k, v in enumerate(self.extras):
            x = F.relu(v(x), inplace=True)
            if k % 2 == 1:
                sources.append(x)

        sources_final = list()
        sources_final1 = list()

        if USE_ECA:
            sources_final.append(self.ECA4(sources[5]))
        else:
            sources_final.append(sources[5])

        conv8_fp1 = torch.cat(
            (F.relu(self.bn(self.DilationConv_512_128_2(sources[2])),
                    inplace=True),
             F.relu(self.conv_512_256_2(sources[4]), inplace=True),
             F.relu(self.conv_256_128_2(self.upsample_256_256_2(sources[5])),
                    inplace=True)), 1)

        conv8_fp = F.relu(self.smooth(conv8_fp1), inplace=True)

        if USE_ECA:
            sources_final.append(self.ECA3(conv8_fp))
        else:
            sources_final.append(conv8_fp)

        # fc7_fp = torch.cat((F.relu(self.bn(self.DilationConv_512_256(sources[1])),inplace=True),
        #                     F.relu(self.conv_1024_512(sources[3]),inplace=True),
        #                     F.relu(self.conv512_256_fc7(self.upsample_512_512(sources[4])),inplace=True)),1)

        # fc7_fp1 = torch.cat((F.relu(self.bn1(self.DilationConv_512_256(sources[1])),inplace=True),
        #                     F.relu(self.conv_1024_512(sources[3]),inplace=True),
        #                     F.relu(self.conv_512_256_fc7(self.upsample_512_512(sources[4])),inplace=True)),1)

        fc7_fp1 = torch.cat(
            (F.relu(self.bn1(self.DilationConv_512_256(sources[1])),
                    inplace=True),
             F.relu(self.conv_1024_512(sources[3]), inplace=True),
             F.relu(self.conv_512_256_fc7(self.upsample_512_512(sources[4])),
                    inplace=True)), 1)

        fc7_fp = F.relu(self.smooth2(fc7_fp1), inplace=True)

        if USE_ECA:
            sources_final.append(self.ECA2(fc7_fp))
        else:
            sources_final.append(fc7_fp)

        conv4_fp = torch.cat(
            (F.relu(self.bn(self.DilationConv_128_128(sources[0])),
                    inplace=True),
             F.relu(self.conv_512_256(sources[1]), inplace=True),
             F.relu(self.conv_1024_128(self.upsample_1024_1024(sources[3])),
                    inplace=True)), 1)

        conv4_fp = F.relu(self.smooth(conv4_fp), inplace=True)
        if USE_ECA:
            sources_final.append(self.ECA1(conv4_fp))
        else:
            sources_final.append(conv4_fp)

        # 添加回归层和分类层
        # for (x, l, c) in zip(sources, self.loc, self.conf):
        #     loc.append(l(x).permute(0, 2, 3, 1).contiguous())
        #     conf.append(c(x).permute(0, 2, 3, 1).contiguous())

        for (x, l, c) in zip(sources_final[::-1], self.loc, self.conf):
            loc.append(l(x).permute(0, 2, 3, 1).contiguous())
            conf.append(c(x).permute(0, 2, 3, 1).contiguous())

        # 进行resize
        loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
        conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
        if self.phase == "test":
            # loc会resize到batch_size,num_anchors,4
            # conf会resize到batch_size,num_anchors,
            output = self.detect(
                loc.view(loc.size(0), -1, 4),  # loc preds
                self.softmax(conf.view(conf.size(0), -1,
                                       self.num_classes)),  # conf preds
                self.priors)
        else:
            output = (loc.view(loc.size(0), -1,
                               4), conf.view(conf.size(0), -1,
                                             self.num_classes), self.priors)
        return output