class SSD(nn.Module): def __init__(self, phase, base, extras, head, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = Config self.vgg = nn.ModuleList(base) self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = torch.tensor(self.priorbox.forward()) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45) def forward(self, x): sources = list() loc = list() conf = list() # get outputs of conv4_3 for k in range(23): x = self.vgg[k](x) s = self.L2Norm(x) sources.append(s) # get outputs of fc7 for k in range(23, len(self.vgg)): x = self.vgg[k](x) sources.append(x) # get the rest outputs for k, v in enumerate(self.extras): x = F.relu(v(x), inplace=True) if k % 2 == 1: sources.append(x) # regression layers and classification layers for (x, l, c) in zip(sources, self.loc, self.conf): loc.append(l(x).permute(0, 2, 3, 1).contiguous()) conf.append(c(x).permute(0, 2, 3, 1).contiguous()) # resize loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) if self.phase == "test": # loc会resize到batch_size,num_anchors,4 # conf会resize到batch_size,num_anchors, output = self.detect( loc.view(loc.size(0), -1, 4), # loc preds self.softmax(conf.view(conf.size(0), -1, self.num_classes)), # conf preds self.priors) else: output = (loc.view(loc.size(0), -1, 4), conf.view(conf.size(0), -1, self.num_classes), self.priors) return output
def SSD300(input_shape, num_classes=21, anchors_size=[30, 60, 111, 162, 213, 264, 315]): #---------------------------------# # 典型的输入大小为[300,300,3] #---------------------------------# input_tensor = Input(shape=input_shape) # net变量里面包含了整个SSD的结构,通过层名可以找到对应的特征层 net = VGG16(input_tensor) #-----------------------将提取到的主干特征进行处理---------------------------# # 对conv4_3的通道进行l2标准化处理 # 38,38,512 net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3']) num_priors = 4 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 net['conv4_3_norm_mbox_loc'] = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv4_3_norm_mbox_loc')( net['conv4_3_norm']) net['conv4_3_norm_mbox_loc_flat'] = Flatten( name='conv4_3_norm_mbox_loc_flat')(net['conv4_3_norm_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 net['conv4_3_norm_mbox_conf'] = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='conv4_3_norm_mbox_conf')( net['conv4_3_norm']) net['conv4_3_norm_mbox_conf_flat'] = Flatten( name='conv4_3_norm_mbox_conf_flat')(net['conv4_3_norm_mbox_conf']) priorbox = PriorBox(input_shape, anchors_size[0], max_size=anchors_size[1], aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv4_3_norm_mbox_priorbox') net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm']) # 对fc7层进行处理 # 19,19,1024 num_priors = 6 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 net['fc7_mbox_loc'] = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='fc7_mbox_loc')(net['fc7']) net['fc7_mbox_loc_flat'] = Flatten(name='fc7_mbox_loc_flat')( net['fc7_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 net['fc7_mbox_conf'] = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='fc7_mbox_conf')(net['fc7']) net['fc7_mbox_conf_flat'] = Flatten(name='fc7_mbox_conf_flat')( net['fc7_mbox_conf']) priorbox = PriorBox(input_shape, anchors_size[1], max_size=anchors_size[2], aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='fc7_mbox_priorbox') net['fc7_mbox_priorbox'] = priorbox(net['fc7']) # 对conv6_2进行处理 # 10,10,512 num_priors = 6 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 x = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv6_2_mbox_loc')(net['conv6_2']) net['conv6_2_mbox_loc'] = x net['conv6_2_mbox_loc_flat'] = Flatten(name='conv6_2_mbox_loc_flat')( net['conv6_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 x = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='conv6_2_mbox_conf')(net['conv6_2']) net['conv6_2_mbox_conf'] = x net['conv6_2_mbox_conf_flat'] = Flatten(name='conv6_2_mbox_conf_flat')( net['conv6_2_mbox_conf']) priorbox = PriorBox(input_shape, anchors_size[2], max_size=anchors_size[3], aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox') net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2']) # 对conv7_2进行处理 # 5,5,256 num_priors = 6 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 x = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv7_2_mbox_loc')(net['conv7_2']) net['conv7_2_mbox_loc'] = x net['conv7_2_mbox_loc_flat'] = Flatten(name='conv7_2_mbox_loc_flat')( net['conv7_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 x = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='conv7_2_mbox_conf')(net['conv7_2']) net['conv7_2_mbox_conf'] = x net['conv7_2_mbox_conf_flat'] = Flatten(name='conv7_2_mbox_conf_flat')( net['conv7_2_mbox_conf']) priorbox = PriorBox(input_shape, anchors_size[3], max_size=anchors_size[4], aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox') net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2']) # 对conv8_2进行处理 # 3,3,256 num_priors = 4 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 x = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv8_2_mbox_loc')(net['conv8_2']) net['conv8_2_mbox_loc'] = x net['conv8_2_mbox_loc_flat'] = Flatten(name='conv8_2_mbox_loc_flat')( net['conv8_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 x = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='conv8_2_mbox_conf')(net['conv8_2']) net['conv8_2_mbox_conf'] = x net['conv8_2_mbox_conf_flat'] = Flatten(name='conv8_2_mbox_conf_flat')( net['conv8_2_mbox_conf']) priorbox = PriorBox(input_shape, anchors_size[4], max_size=anchors_size[5], aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox') net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2']) # 对conv9_2进行处理 # 1,1,256 num_priors = 4 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 x = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv9_2_mbox_loc')(net['conv9_2']) net['conv9_2_mbox_loc'] = x net['conv9_2_mbox_loc_flat'] = Flatten(name='conv9_2_mbox_loc_flat')( net['conv9_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 x = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='conv9_2_mbox_conf')(net['conv9_2']) net['conv9_2_mbox_conf'] = x net['conv9_2_mbox_conf_flat'] = Flatten(name='conv9_2_mbox_conf_flat')( net['conv9_2_mbox_conf']) priorbox = PriorBox(input_shape, anchors_size[5], max_size=anchors_size[6], aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv9_2_mbox_priorbox') net['conv9_2_mbox_priorbox'] = priorbox(net['conv9_2']) # 将所有结果进行堆叠 net['mbox_loc'] = concatenate([ net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'], net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'], net['conv8_2_mbox_loc_flat'], net['conv9_2_mbox_loc_flat'] ], axis=1, name='mbox_loc') net['mbox_conf'] = concatenate([ net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'], net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'], net['conv8_2_mbox_conf_flat'], net['conv9_2_mbox_conf_flat'] ], axis=1, name='mbox_conf') net['mbox_priorbox'] = concatenate([ net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'], net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'], net['conv8_2_mbox_priorbox'], net['conv9_2_mbox_priorbox'] ], axis=1, name='mbox_priorbox') # 8732,4 net['mbox_loc'] = Reshape((-1, 4), name='mbox_loc_final')(net['mbox_loc']) # 8732,21 net['mbox_conf'] = Reshape((-1, num_classes), name='mbox_conf_logits')(net['mbox_conf']) # 8732,8 net['mbox_conf'] = Activation('softmax', name='mbox_conf_final')(net['mbox_conf']) # 8732,33 net['predictions'] = concatenate( [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']], axis=2, name='predictions') model = Model(net['input'], net['predictions']) return model
def SSD300(input_shape, num_classes=21): # 300,300,3 input_tensor = Input(shape=input_shape) img_size = (input_shape[1], input_shape[0]) # SSD结构,net字典 net = VGG16(input_tensor) #-----------------------将提取到的主干特征进行处理---------------------------# # 对conv4_3进行处理 38,38,512 net['conv4_3_norm'] = Normalize(20, name='conv4_3_norm')(net['conv4_3']) num_priors = 4 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 net['conv4_3_norm_mbox_loc'] = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv4_3_norm_mbox_loc')( net['conv4_3_norm']) net['conv4_3_norm_mbox_loc_flat'] = Flatten( name='conv4_3_norm_mbox_loc_flat')(net['conv4_3_norm_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 net['conv4_3_norm_mbox_conf'] = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='conv4_3_norm_mbox_conf')( net['conv4_3_norm']) net['conv4_3_norm_mbox_conf_flat'] = Flatten( name='conv4_3_norm_mbox_conf_flat')(net['conv4_3_norm_mbox_conf']) priorbox = PriorBox(img_size, 30.0, max_size=60.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv4_3_norm_mbox_priorbox') net['conv4_3_norm_mbox_priorbox'] = priorbox(net['conv4_3_norm']) # 对fc7层进行处理 num_priors = 6 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 net['fc7_mbox_loc'] = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='fc7_mbox_loc')(net['fc7']) net['fc7_mbox_loc_flat'] = Flatten(name='fc7_mbox_loc_flat')( net['fc7_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 net['fc7_mbox_conf'] = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='fc7_mbox_conf')(net['fc7']) net['fc7_mbox_conf_flat'] = Flatten(name='fc7_mbox_conf_flat')( net['fc7_mbox_conf']) priorbox = PriorBox(img_size, 60.0, max_size=111.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='fc7_mbox_priorbox') net['fc7_mbox_priorbox'] = priorbox(net['fc7']) # 对conv6_2进行处理 num_priors = 6 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 x = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv6_2_mbox_loc')(net['conv6_2']) net['conv6_2_mbox_loc'] = x net['conv6_2_mbox_loc_flat'] = Flatten(name='conv6_2_mbox_loc_flat')( net['conv6_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 x = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='conv6_2_mbox_conf')(net['conv6_2']) net['conv6_2_mbox_conf'] = x net['conv6_2_mbox_conf_flat'] = Flatten(name='conv6_2_mbox_conf_flat')( net['conv6_2_mbox_conf']) priorbox = PriorBox(img_size, 111.0, max_size=162.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox') net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2']) # 对conv7_2进行处理 num_priors = 6 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 x = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv7_2_mbox_loc')(net['conv7_2']) net['conv7_2_mbox_loc'] = x net['conv7_2_mbox_loc_flat'] = Flatten(name='conv7_2_mbox_loc_flat')( net['conv7_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 x = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='conv7_2_mbox_conf')(net['conv7_2']) net['conv7_2_mbox_conf'] = x net['conv7_2_mbox_conf_flat'] = Flatten(name='conv7_2_mbox_conf_flat')( net['conv7_2_mbox_conf']) priorbox = PriorBox(img_size, 162.0, max_size=213.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox') net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2']) # 对conv8_2进行处理 num_priors = 4 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 x = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv8_2_mbox_loc')(net['conv8_2']) net['conv8_2_mbox_loc'] = x net['conv8_2_mbox_loc_flat'] = Flatten(name='conv8_2_mbox_loc_flat')( net['conv8_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 x = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='conv8_2_mbox_conf')(net['conv8_2']) net['conv8_2_mbox_conf'] = x net['conv8_2_mbox_conf_flat'] = Flatten(name='conv8_2_mbox_conf_flat')( net['conv8_2_mbox_conf']) priorbox = PriorBox(img_size, 213.0, max_size=264.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox') net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2']) # 对conv9_2进行处理 num_priors = 4 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 x = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv9_2_mbox_loc')(net['conv9_2']) net['conv9_2_mbox_loc'] = x net['conv9_2_mbox_loc_flat'] = Flatten(name='conv9_2_mbox_loc_flat')( net['conv9_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 x = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='conv9_2_mbox_conf')(net['conv9_2']) net['conv9_2_mbox_conf'] = x net['conv9_2_mbox_conf_flat'] = Flatten(name='conv9_2_mbox_conf_flat')( net['conv9_2_mbox_conf']) priorbox = PriorBox(img_size, 264.0, max_size=315.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv9_2_mbox_priorbox') net['conv9_2_mbox_priorbox'] = priorbox(net['conv9_2']) # 将所有结果进行堆叠 net['mbox_loc'] = concatenate([ net['conv4_3_norm_mbox_loc_flat'], net['fc7_mbox_loc_flat'], net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'], net['conv8_2_mbox_loc_flat'], net['conv9_2_mbox_loc_flat'] ], axis=1, name='mbox_loc') net['mbox_conf'] = concatenate([ net['conv4_3_norm_mbox_conf_flat'], net['fc7_mbox_conf_flat'], net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'], net['conv8_2_mbox_conf_flat'], net['conv9_2_mbox_conf_flat'] ], axis=1, name='mbox_conf') net['mbox_priorbox'] = concatenate([ net['conv4_3_norm_mbox_priorbox'], net['fc7_mbox_priorbox'], net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'], net['conv8_2_mbox_priorbox'], net['conv9_2_mbox_priorbox'] ], axis=1, name='mbox_priorbox') # if hasattr(net['mbox_loc'], '_keras_shape'): # num_boxes = net['mbox_loc']._keras_shape[-1] // 4 # elif hasattr(net['mbox_loc'], 'int_shape'): num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4 # 8732,4 net['mbox_loc'] = Reshape((num_boxes, 4), name='mbox_loc_final')(net['mbox_loc']) # 8732,21 net['mbox_conf'] = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(net['mbox_conf']) net['mbox_conf'] = Activation('softmax', name='mbox_conf_final')(net['mbox_conf']) net['predictions'] = concatenate( [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']], axis=2, name='predictions') print(net['predictions']) model = Model(net['input'], net['predictions']) return model
def SSD300(input_shape, num_classes=21): # 300,300,3 input_tensor = Input(shape=input_shape) img_size = (input_shape[1], input_shape[0]) mbn3L_model = MobileNetV3_Small( shape=input_shape, n_class=num_classes, include_top=False, backbone=True, input_tensor=input_tensor).build(plot=False) # SSD结构,net字典 #net = mobilenet(input_tensor) net = mbn3L_model #-----------------------将提取到的主干特征进行处理---------------------------# num_priors = 4 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 net['conv4_3_loc'] = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv4_3_loc')(net['conv4_3']) net['conv4_3_loc_flat'] = Flatten(name='conv4_3_loc_flat')( net['conv4_3_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 net['conv4_3_conf'] = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='conv4_3_conf')(net['conv4_3']) net['conv4_3_conf_flat'] = Flatten(name='conv4_3_conf_flat')( net['conv4_3_conf']) priorbox = PriorBox(img_size, 30.0, max_size=60.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv4_3_priorbox') net['conv4_3_priorbox'] = priorbox(net['conv4_3']) # 对fc7层进行处理 num_priors = 6 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 net['fc7_mbox_loc'] = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='fc7_mbox_loc')(net['fc7']) net['fc7_mbox_loc_flat'] = Flatten(name='fc7_mbox_loc_flat')( net['fc7_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 net['fc7_mbox_conf'] = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='fc7_mbox_conf')(net['fc7']) net['fc7_mbox_conf_flat'] = Flatten(name='fc7_mbox_conf_flat')( net['fc7_mbox_conf']) priorbox = PriorBox(img_size, 60.0, max_size=111.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='fc7_mbox_priorbox') net['fc7_mbox_priorbox'] = priorbox(net['fc7']) # 对conv6_2进行处理 num_priors = 6 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 x = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv6_2_mbox_loc')(net['conv6_2']) net['conv6_2_mbox_loc'] = x net['conv6_2_mbox_loc_flat'] = Flatten(name='conv6_2_mbox_loc_flat')( net['conv6_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 x = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='conv6_2_mbox_conf')(net['conv6_2']) net['conv6_2_mbox_conf'] = x net['conv6_2_mbox_conf_flat'] = Flatten(name='conv6_2_mbox_conf_flat')( net['conv6_2_mbox_conf']) priorbox = PriorBox(img_size, 111.0, max_size=162.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox') net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2']) # 对conv7_2进行处理 num_priors = 6 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 x = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv7_2_mbox_loc')(net['conv7_2']) net['conv7_2_mbox_loc'] = x net['conv7_2_mbox_loc_flat'] = Flatten(name='conv7_2_mbox_loc_flat')( net['conv7_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 x = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='conv7_2_mbox_conf')(net['conv7_2']) net['conv7_2_mbox_conf'] = x net['conv7_2_mbox_conf_flat'] = Flatten(name='conv7_2_mbox_conf_flat')( net['conv7_2_mbox_conf']) priorbox = PriorBox(img_size, 162.0, max_size=213.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox') net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2']) # 对conv8_2进行处理 num_priors = 4 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 x = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv8_2_mbox_loc')(net['conv8_2']) net['conv8_2_mbox_loc'] = x net['conv8_2_mbox_loc_flat'] = Flatten(name='conv8_2_mbox_loc_flat')( net['conv8_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 x = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='conv8_2_mbox_conf')(net['conv8_2']) net['conv8_2_mbox_conf'] = x net['conv8_2_mbox_conf_flat'] = Flatten(name='conv8_2_mbox_conf_flat')( net['conv8_2_mbox_conf']) priorbox = PriorBox(img_size, 213.0, max_size=264.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox') net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2']) # 对conv9_2进行处理 num_priors = 4 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 x = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv9_2_mbox_loc')(net['conv9_2']) net['conv9_2_mbox_loc'] = x net['conv9_2_mbox_loc_flat'] = Flatten(name='conv9_2_mbox_loc_flat')( net['conv9_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 x = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='conv9_2_mbox_conf')(net['conv9_2']) net['conv9_2_mbox_conf'] = x net['conv9_2_mbox_conf_flat'] = Flatten(name='conv9_2_mbox_conf_flat')( net['conv9_2_mbox_conf']) priorbox = PriorBox(img_size, 264.0, max_size=315.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv9_2_mbox_priorbox') net['conv9_2_mbox_priorbox'] = priorbox(net['conv9_2']) # 将所有结果进行堆叠 net['mbox_loc'] = concatenate([ net['conv4_3_loc_flat'], net['fc7_mbox_loc_flat'], net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'], net['conv8_2_mbox_loc_flat'], net['conv9_2_mbox_loc_flat'] ], axis=1, name='mbox_loc') net['mbox_conf'] = concatenate([ net['conv4_3_conf_flat'], net['fc7_mbox_conf_flat'], net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'], net['conv8_2_mbox_conf_flat'], net['conv9_2_mbox_conf_flat'] ], axis=1, name='mbox_conf') net['mbox_priorbox'] = concatenate([ net['conv4_3_priorbox'], net['fc7_mbox_priorbox'], net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'], net['conv8_2_mbox_priorbox'], net['conv9_2_mbox_priorbox'] ], axis=1, name='mbox_priorbox') if hasattr(net['mbox_loc'], '_keras_shape'): num_boxes = net['mbox_loc']._keras_shape[-1] // 4 elif hasattr(net['mbox_loc'], 'int_shape'): num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4 # 8732,4 net['mbox_loc'] = Reshape((num_boxes, 4), name='mbox_loc_final')(net['mbox_loc']) # 8732,21 net['mbox_conf'] = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(net['mbox_conf']) net['mbox_conf'] = Activation('softmax', name='mbox_conf_final')(net['mbox_conf']) net['predictions'] = concatenate( [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']], axis=2, name='predictions') model = Model(input_tensor, net['predictions']) return model
class SSD(nn.Module): def __init__(self, phase, base, extras, head, num_classes, confidence, nms_iou): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = Config self.vgg = nn.ModuleList(base) self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = Variable(self.priorbox.forward()) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, confidence, nms_iou) def forward(self, x): sources = list() loc = list() conf = list() #---------------------------# # 获得conv4_3的内容 # shape为38,38,512 #---------------------------# for k in range(23): x = self.vgg[k](x) #---------------------------# # conv4_3的内容 # 需要进行L2标准化 #---------------------------# s = self.L2Norm(x) sources.append(s) #---------------------------# # 获得conv7的内容 # shape为19,19,1024 #---------------------------# for k in range(23, len(self.vgg)): x = self.vgg[k](x) sources.append(x) #-------------------------------------------------------------# # 在add_extras获得的特征层里 # 第1层、第3层、第5层、第7层可以用来进行回归预测和分类预测。 # shape分别为(10,10,512), (5,5,256), (3,3,256), (1,1,256) #-------------------------------------------------------------# for k, v in enumerate(self.extras): x = F.relu(v(x), inplace=True) if k % 2 == 1: sources.append(x) #-------------------------------------------------------------# # 为获得的6个有效特征层添加回归预测和分类预测 #-------------------------------------------------------------# for (x, l, c) in zip(sources, self.loc, self.conf): loc.append(l(x).permute(0, 2, 3, 1).contiguous()) conf.append(c(x).permute(0, 2, 3, 1).contiguous()) #-------------------------------------------------------------# # 进行reshape方便堆叠 #-------------------------------------------------------------# loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) #-------------------------------------------------------------# # loc会reshape到batch_size,num_anchors,4 # conf会reshap到batch_size,num_anchors,self.num_classes # 如果用于预测的话,会添加上detect用于对先验框解码,获得预测结果 # 不用于预测的话,直接返回网络的回归预测结果和分类预测结果用于训练 #-------------------------------------------------------------# if self.phase == "test": output = self.detect( loc.view(loc.size(0), -1, 4), self.softmax(conf.view(conf.size(0), -1, self.num_classes)), self.priors) else: output = (loc.view(loc.size(0), -1, 4), conf.view(conf.size(0), -1, self.num_classes), self.priors) return output
def __init__(self, phase, base, extras, head, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = Config self.vgg = nn.ModuleList(base) self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = Variable(self.priorbox.forward()) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45) self.upsample_256_256 = Upsample(10) self.conv_256_512 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=1, stride=1) #conv8_2 -> conv8_2 self.conv_512_512_1 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1) self.upsample_512_512 = Upsample(19) self.conv_512_1024 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=1, stride=1) self.conv_1024_1024 = nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=1, stride=1) self.upsample_1024_1024 = Upsample(38) self.conv_1024_512 = nn.Conv2d(in_channels=1024, out_channels=512, kernel_size=1, stride=1) self.conv_512_512_2 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1) self.smooth = nn.Conv2d(512, 512, kernel_size=3, padding=1, stride=1) self.smooth1 = nn.Conv2d(1024, 1024, kernel_size=3, padding=1, stride=1) if USE_CBAM: self.CBAM1 = Bottleneck(512) self.CBAM2 = Bottleneck(1024) self.CBAM3 = Bottleneck(512) self.CBAM4 = Bottleneck(256) self.CBAM5 = Bottleneck(256) self.CBAM6 = Bottleneck(256) if USE_SE: self.SE1 = SEModule(512) self.SE2 = SEModule(1024) self.SE3 = SEModule(512) self.SE4 = SEModule(256) self.SE5 = SEModule(256) self.SE6 = SEModule(256)
class SSD(nn.Module): def __init__(self, phase, base, extras, head, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = Config self.vgg = nn.ModuleList(base) self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = Variable(self.priorbox.forward()) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45) self.upsample_256_256 = Upsample(10) self.conv_256_512 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=1, stride=1) #conv8_2 -> conv8_2 self.conv_512_512_1 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1) self.upsample_512_512 = Upsample(19) self.conv_512_1024 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=1, stride=1) self.conv_1024_1024 = nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=1, stride=1) self.upsample_1024_1024 = Upsample(38) self.conv_1024_512 = nn.Conv2d(in_channels=1024, out_channels=512, kernel_size=1, stride=1) self.conv_512_512_2 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1) self.smooth = nn.Conv2d(512, 512, kernel_size=3, padding=1, stride=1) self.smooth1 = nn.Conv2d(1024, 1024, kernel_size=3, padding=1, stride=1) if USE_CBAM: self.CBAM1 = Bottleneck(512) self.CBAM2 = Bottleneck(1024) self.CBAM3 = Bottleneck(512) self.CBAM4 = Bottleneck(256) self.CBAM5 = Bottleneck(256) self.CBAM6 = Bottleneck(256) if USE_SE: self.SE1 = SEModule(512) self.SE2 = SEModule(1024) self.SE3 = SEModule(512) self.SE4 = SEModule(256) self.SE5 = SEModule(256) self.SE6 = SEModule(256) def forward(self, x): sources = list() attention = list() loc = list() conf = list() # 获得conv4_3的内容 for k in range(10): x = self.vgg[k](x) sources.append(x) for k in range(23, 30): x = self.vgg[k](x) s = self.L2Norm(x) sources.append(s) # 获得fc7的内容 for k in range(30, len(self.vgg)): x = self.vgg[k](x) sources.append(x) # 获得后面的内容 for k, v in enumerate(self.extras): x = F.relu(v(x), inplace=True) if k % 2 == 1: sources.append(x) if USE_SE: attention.append(sources[0]) attention.append(self.SE1(sources[1])) attention.append(sources[2]) attention.append(self.SE2(sources[3])) attention.append(self.SE3(sources[4])) attention.append(self.SE4(sources[5])) attention.append(self.SE5(sources[6])) attention.append(self.SE6(sources[7])) sources_final = list() conv8_fp1 = self.conv_256_512(self.upsample_256_256( attention[5])) + self.conv_512_512_1(attention[4]) conv8_fp = self.smooth(conv8_fp1) fc7_fp1 = self.conv_512_1024( self.upsample_512_512(conv8_fp1)) + self.conv_1024_1024( attention[3]) fc7_fp = self.smooth(fc7_fp1) conv4_fp = self.conv_1024_512( self.upsample_1024_1024(fc7_fp1)) + self.conv_512_512_2( attention[1]) conv4_fp = self.smooth(conv4_fp) if USE_CBAM: sources_final.append(self.CBAM1(conv4_fp)) sources_final.append(self.CBAM2(fc7_fp)) sources_final.append(self.CBAM3(conv8_fp)) sources_final.append(self.CBAM4(sources[5])) sources_final.append(self.CBAM5(sources[6])) sources_final.append(self.CBAM6(sources[7])) else: sources_final.append(conv4_fp) sources_final.append(fc7_fp) sources_final.append(conv8_fp) sources_final.append(attention[5]) sources_final.append(attention[6]) sources_final.append(attention[7]) # 添加回归层和分类层 # for (x, l, c) in zip(sources, self.loc, self.conf): # loc.append(l(x).permute(0, 2, 3, 1).contiguous()) # conf.append(c(x).permute(0, 2, 3, 1).contiguous()) for (x, l, c) in zip(sources_final, self.loc, self.conf): loc.append(l(x).permute(0, 2, 3, 1).contiguous()) conf.append(c(x).permute(0, 2, 3, 1).contiguous()) # 进行resize loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) if self.phase == "test": # loc会resize到batch_size,num_anchors,4 # conf会resize到batch_size,num_anchors, # 这部分暂时没有进行改动 output = self.detect( loc.view(loc.size(0), -1, 4), # loc preds self.softmax(conf.view(conf.size(0), -1, self.num_classes)), # conf preds self.priors) else: output = (loc.view(loc.size(0), -1, 4), conf.view(conf.size(0), -1, self.num_classes), self.priors) return output
class SSD(nn.Module): def __init__(self, phase, base, extras, head, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = Config self.vgg = nn.ModuleList(base) self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = Variable(self.priorbox.forward()) # self.priors = self.priorbox.forward() # 这一行改成这样也能正常运行 self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) self.relu_list4cxq = nn.ModuleList([torch.nn.ReLU(True) for i in range(8)]) # 自己修改后的方式 self.feature_maps4cxq = None # 用于grad cam self.scores4cxq = None # 用于grad cam if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45) def forward(self, x): sources = list() loc = list() conf = list() # 获得conv4_3的内容 for k in range(23): x = self.vgg[k](x) s = self.L2Norm(x) sources.append(s) # 获得fc7的内容 for k in range(23, len(self.vgg)): x = self.vgg[k](x) sources.append(x) # 获得后面的内容 for k, v in enumerate(self.extras): # x = F.relu(v(x), inplace=True) # 原始实现方式 x = self.relu_list4cxq[k](v(x)) # 修改后的方式 if k % 2 == 1: sources.append(x) self.feature_maps4cxq = sources # 6张特征图 # 添加回归层和分类层 for (x, l, c) in zip(sources, self.loc, self.conf): loc.append(l(x).permute(0, 2, 3, 1).contiguous()) conf.append(c(x).permute(0, 2, 3, 1).contiguous()) self.scores4cxq = conf # 用于保存各个类别的分数 # 进行resize loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) # torch.Size([4, 34928]) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) # torch.Size([4, 26196]) if self.phase == "test": # loc会resize到batch_size,num_anchors,4 # conf会resize到batch_size,num_anchors,num_classes # output = self.detect( output = self.detect.apply( loc.view(loc.size(0), -1, 4), # loc preds torch.Size([4, 8732, 4]) self.softmax(conf.view(conf.size(0), -1, self.num_classes)), # conf preds # torch.Size([4, 8732, 3]) self.priors # torch.Size([8732, 4]) ) # torch.Size([1, 3, 200, 5]) 1置信度+4位置信息 else: output = ( loc.view(loc.size(0), -1, 4), conf.view(conf.size(0), -1, self.num_classes), self.priors ) # torch.Size([4, 8732, 4]) torch.Size([4, 8732, 3]) torch.Size([8732, 4]) return output
def __init__(self, phase, base, extras, head, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = Config self.vgg = nn.ModuleList(base) self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = Variable(self.priorbox.forward()) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45) self.DilationConv_128_128 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=2, dilation=2, stride=2) self.conv_512_256 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1, stride=1) self.upsample_1024_1024 = Upsample(38) self.conv_1024_128 = nn.Conv2d(in_channels=1024, out_channels=128, kernel_size=1, stride=1) self.DilationConv_512_256 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=3, padding=2, dilation=2, stride=2) self.conv_1024_512 = nn.Conv2d(in_channels=1024, out_channels=512, kernel_size=1, stride=1) self.upsample_512_512 = Upsample(19) self.conv_512_256_fc7 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1, stride=1) self.DilationConv_512_128_2 = nn.Conv2d(in_channels=512, out_channels=128, kernel_size=3, padding=2, dilation=2, stride=2) self.conv_512_256_2 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1, stride=1) self.upsample_256_256_2 = Upsample(10) self.conv_256_128_2 = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1, stride=1) self.smooth = nn.Conv2d(512, 512, kernel_size=3, padding=1, stride=1) self.smooth2 = nn.Conv2d(1024, 1024, kernel_size=3, padding=1, stride=1) self.bn = nn.BatchNorm2d(128) self.bn1 = nn.BatchNorm2d(256) if USE_SE: self.SE1 = SEModule(512) self.SE2 = SEModule(512) self.SE3 = SEModule(512) self.SE4 = SEModule(256) self.SE5 = SEModule(256) self.SE6 = SEModule(256) if USE_ECA: self.ECA1 = ECAModule(512) self.ECA2 = ECAModule(1024) self.ECA3 = ECAModule(512) self.ECA4 = ECAModule(256)
class SSD(nn.Module): def __init__(self, phase, base, extras, head, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = Config self.vgg = nn.ModuleList(base) self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = Variable(self.priorbox.forward()) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45) self.DilationConv_128_128 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=2, dilation=2, stride=2) self.conv_512_256 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1, stride=1) self.upsample_1024_1024 = Upsample(38) self.conv_1024_128 = nn.Conv2d(in_channels=1024, out_channels=128, kernel_size=1, stride=1) self.DilationConv_512_256 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=3, padding=2, dilation=2, stride=2) self.conv_1024_512 = nn.Conv2d(in_channels=1024, out_channels=512, kernel_size=1, stride=1) self.upsample_512_512 = Upsample(19) self.conv_512_256_fc7 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1, stride=1) self.DilationConv_512_128_2 = nn.Conv2d(in_channels=512, out_channels=128, kernel_size=3, padding=2, dilation=2, stride=2) self.conv_512_256_2 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1, stride=1) self.upsample_256_256_2 = Upsample(10) self.conv_256_128_2 = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1, stride=1) self.smooth = nn.Conv2d(512, 512, kernel_size=3, padding=1, stride=1) self.smooth2 = nn.Conv2d(1024, 1024, kernel_size=3, padding=1, stride=1) self.bn = nn.BatchNorm2d(128) self.bn1 = nn.BatchNorm2d(256) if USE_SE: self.SE1 = SEModule(512) self.SE2 = SEModule(512) self.SE3 = SEModule(512) self.SE4 = SEModule(256) self.SE5 = SEModule(256) self.SE6 = SEModule(256) if USE_ECA: self.ECA1 = ECAModule(512) self.ECA2 = ECAModule(1024) self.ECA3 = ECAModule(512) self.ECA4 = ECAModule(256) def forward(self, x): sources = list() loc = list() conf = list() for k in range(10): x = self.vgg[k](x) sources.append(x) # 获得conv4_3的内容 for k in range(10, 23): x = self.vgg[k](x) s = self.L2Norm(x) sources.append(s) # 获得fc7的内容 # for k in range(23, len(self.vgg)): # x = self.vgg[k](x) # sources.append(x) for k in range(23, 30): x = self.vgg[k](x) s = self.L2Norm(x) sources.append(s) for k in range(30, len(self.vgg)): x = self.vgg[k](x) sources.append(x) # 获得后面的内容 for k, v in enumerate(self.extras): x = F.relu(v(x), inplace=True) if k % 2 == 1: sources.append(x) sources_final = list() sources_final1 = list() if USE_ECA: sources_final.append(self.ECA4(sources[5])) else: sources_final.append(sources[5]) conv8_fp1 = torch.cat( (F.relu(self.bn(self.DilationConv_512_128_2(sources[2])), inplace=True), F.relu(self.conv_512_256_2(sources[4]), inplace=True), F.relu(self.conv_256_128_2(self.upsample_256_256_2(sources[5])), inplace=True)), 1) conv8_fp = F.relu(self.smooth(conv8_fp1), inplace=True) if USE_ECA: sources_final.append(self.ECA3(conv8_fp)) else: sources_final.append(conv8_fp) # fc7_fp = torch.cat((F.relu(self.bn(self.DilationConv_512_256(sources[1])),inplace=True), # F.relu(self.conv_1024_512(sources[3]),inplace=True), # F.relu(self.conv512_256_fc7(self.upsample_512_512(sources[4])),inplace=True)),1) # fc7_fp1 = torch.cat((F.relu(self.bn1(self.DilationConv_512_256(sources[1])),inplace=True), # F.relu(self.conv_1024_512(sources[3]),inplace=True), # F.relu(self.conv_512_256_fc7(self.upsample_512_512(sources[4])),inplace=True)),1) fc7_fp1 = torch.cat( (F.relu(self.bn1(self.DilationConv_512_256(sources[1])), inplace=True), F.relu(self.conv_1024_512(sources[3]), inplace=True), F.relu(self.conv_512_256_fc7(self.upsample_512_512(sources[4])), inplace=True)), 1) fc7_fp = F.relu(self.smooth2(fc7_fp1), inplace=True) if USE_ECA: sources_final.append(self.ECA2(fc7_fp)) else: sources_final.append(fc7_fp) conv4_fp = torch.cat( (F.relu(self.bn(self.DilationConv_128_128(sources[0])), inplace=True), F.relu(self.conv_512_256(sources[1]), inplace=True), F.relu(self.conv_1024_128(self.upsample_1024_1024(sources[3])), inplace=True)), 1) conv4_fp = F.relu(self.smooth(conv4_fp), inplace=True) if USE_ECA: sources_final.append(self.ECA1(conv4_fp)) else: sources_final.append(conv4_fp) # 添加回归层和分类层 # for (x, l, c) in zip(sources, self.loc, self.conf): # loc.append(l(x).permute(0, 2, 3, 1).contiguous()) # conf.append(c(x).permute(0, 2, 3, 1).contiguous()) for (x, l, c) in zip(sources_final[::-1], self.loc, self.conf): loc.append(l(x).permute(0, 2, 3, 1).contiguous()) conf.append(c(x).permute(0, 2, 3, 1).contiguous()) # 进行resize loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) if self.phase == "test": # loc会resize到batch_size,num_anchors,4 # conf会resize到batch_size,num_anchors, output = self.detect( loc.view(loc.size(0), -1, 4), # loc preds self.softmax(conf.view(conf.size(0), -1, self.num_classes)), # conf preds self.priors) else: output = (loc.view(loc.size(0), -1, 4), conf.view(conf.size(0), -1, self.num_classes), self.priors) return output