class SSD(nn.Module): def __init__(self, phase, base, extras, head, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = Config self.vgg = nn.ModuleList(base) self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = self.priorbox.forward() self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(self) def forward(self, x): sources = list() loc = list() conf = list() # 获得conv4_3的内容 relu层也算 Pooling不进行relu 一共36层 0-22=1-23 for k in range(23): # 22层 x = self.vgg[k](x) s = self.L2Norm(x) # L2标准化 原因:深度不够 24层 sources.append(s) # 获得fc7的内容 for k in range(23, len(self.vgg)): #23-34=24-35层 x = self.vgg[k](x) sources.append(x) #FC7_1 # 获得后面的内容 for k, v in enumerate(self.extras): x = F.relu(v(x), inplace=True) # 这里加了relu所以在网络中没有显示 if k % 2 == 1: sources.append(x) # [batch_size,channel # 添加回归层和分类层 for (x, l, c) in zip(sources, self.loc, self.conf): loc.append(l(x).permute(0, 2, 3, 1).contiguous()) # permute 通道数翻转 conf.append(c(x).permute(0, 2, 3, 1).contiguous()) # 进行resize loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) if self.phase == "test": output = self.detect.apply( loc.view(loc.size(0), -1, 4), self.softmax(conf.view(conf.size(0), -1, self.num_classes)), self.priors) else: output = (loc.view(loc.size(0), -1, 4), conf.view(conf.size(0), -1, self.num_classes), self.priors) return output
class SSD(nn.Module): def __init__(self, phase, base, extras, head, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = Config self.vgg = nn.ModuleList(base) self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = Variable(self.priorbox.forward()) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45) def forward(self, x): sources = list() loc = list() conf = list() # 获得conv4_3的内容 for k in range(23): x = self.vgg[k](x) s = self.L2Norm(x) sources.append(s) # 获得fc7的内容 for k in range(23, len(self.vgg)): x = self.vgg[k](x) sources.append(x) # 获得后面的内容 for k, v in enumerate(self.extras): x = F.relu(v(x), inplace=True) if k % 2 == 1: sources.append(x) # 添加回归层和分类层 for (x, l, c) in zip(sources, self.loc, self.conf): loc.append(l(x).permute(0, 2, 3, 1).contiguous()) conf.append(c(x).permute(0, 2, 3, 1).contiguous()) # 进行resize loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) if self.phase == "test": # loc会resize到batch_size,num_anchors,4 # conf会resize到batch_size,num_anchors, output = self.detect( loc.view(loc.size(0), -1, 4), # loc preds self.softmax(conf.view(conf.size(0), -1, self.num_classes)), # conf preds self.priors) else: output = (loc.view(loc.size(0), -1, 4), conf.view(conf.size(0), -1, self.num_classes), self.priors) return output
class SSD(nn.Module): def __init__(self, phase, base, extras, head, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = Config self.vgg = nn.ModuleList(base) self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = Variable(self.priorbox.forward()) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45) def forward(self, x): sources = list() loc = list() conf = list() # 获得conv4_3的内容 for k in range(23): x = self.vgg[k](x) s = self.L2Norm(x)# 标准化,因为之前深度不深,标准化可以得到更好的结果 sources.append(s) # 获得fc7的内容 对应conv7 for k in range(23, len(self.vgg)): x = self.vgg[k](x) sources.append(x) # 获得后面的内容 for k, v in enumerate(self.extras): x = F.relu(v(x), inplace=True) if k % 2 == 1:#每隔2次将特征层传入sources中 sources.append(x) # 添加回归层和分类层 # 在pytorch中,通道数channel在第一维,因为第0维是batchsize,将channel放到最后一维可以更好处理 # x-> sources(层,上面整合过) for (x, l, c) in zip(sources, self.loc, self.conf): loc.append(l(x).permute(0, 2, 3, 1).contiguous()) conf.append(c(x).permute(0, 2, 3, 1).contiguous()) # 进行resize loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) if self.phase == "test": # loc会resize到batch_size,num_anchors,4 # conf会resize到batch_size,num_anchors, output = self.detect( #batchsize,先验框,先验框调整的参数,view相当于resize loc.view(loc.size(0), -1, 4), # loc preds #batchsize,先验框,物体的种类,view相当于resize,预测时进行softmax self.softmax(conf.view(conf.size(0), -1, self.num_classes)), # conf preds self.priors ) else:#训练时不进行softmax output = ( loc.view(loc.size(0), -1, 4),#获取每一个先验框调整参数 conf.view(conf.size(0), -1, self.num_classes),#获取每一个先验框种类 self.priors ) return output
class SSD(nn.Module): def __init__(self, phase, base, extras, head, num_classes, confidence, nms_iou): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = Config self.vgg = nn.ModuleList(base) self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = Variable(self.priorbox.forward()) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, confidence, nms_iou) def forward(self, x): sources = list() loc = list() conf = list() #---------------------------# # 获得conv4_3的内容 # shape为38,38,512 #---------------------------# for k in range(23): x = self.vgg[k](x) #---------------------------# # conv4_3的内容 # 需要进行L2标准化 #---------------------------# s = self.L2Norm(x) sources.append(s) #---------------------------# # 获得conv7的内容 # shape为19,19,1024 #---------------------------# for k in range(23, len(self.vgg)): x = self.vgg[k](x) sources.append(x) #-------------------------------------------------------------# # 在add_extras获得的特征层里 # 第1层、第3层、第5层、第7层可以用来进行回归预测和分类预测。 # shape分别为(10,10,512), (5,5,256), (3,3,256), (1,1,256) #-------------------------------------------------------------# for k, v in enumerate(self.extras): x = F.relu(v(x), inplace=True) if k % 2 == 1: sources.append(x) #-------------------------------------------------------------# # 为获得的6个有效特征层添加回归预测和分类预测 #-------------------------------------------------------------# for (x, l, c) in zip(sources, self.loc, self.conf): loc.append(l(x).permute(0, 2, 3, 1).contiguous()) conf.append(c(x).permute(0, 2, 3, 1).contiguous()) #-------------------------------------------------------------# # 进行reshape方便堆叠 #-------------------------------------------------------------# loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) #-------------------------------------------------------------# # loc会reshape到batch_size,num_anchors,4 # conf会reshap到batch_size,num_anchors,self.num_classes # 如果用于预测的话,会添加上detect用于对先验框解码,获得预测结果 # 不用于预测的话,直接返回网络的回归预测结果和分类预测结果用于训练 #-------------------------------------------------------------# if self.phase == "test": output = self.detect( loc.view(loc.size(0), -1, 4), self.softmax(conf.view(conf.size(0), -1, self.num_classes)), self.priors) else: output = (loc.view(loc.size(0), -1, 4), conf.view(conf.size(0), -1, self.num_classes), self.priors) return output
class SSD(nn.Module): def __init__(self, phase, base, extras, head, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = Config self.vgg = nn.ModuleList(base) self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = Variable(self.priorbox.forward()) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45) self.upsample_256_256 = Upsample(10) self.conv_256_512 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=1, stride=1) #conv8_2 -> conv8_2 self.conv_512_512_1 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1) self.upsample_512_512 = Upsample(19) self.conv_512_1024 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=1, stride=1) self.conv_1024_1024 = nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=1, stride=1) self.upsample_1024_1024 = Upsample(38) self.conv_1024_512 = nn.Conv2d(in_channels=1024, out_channels=512, kernel_size=1, stride=1) self.conv_512_512_2 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=1, stride=1) self.smooth = nn.Conv2d(512, 512, kernel_size=3, padding=1, stride=1) self.smooth1 = nn.Conv2d(1024, 1024, kernel_size=3, padding=1, stride=1) if USE_CBAM: self.CBAM1 = Bottleneck(512) self.CBAM2 = Bottleneck(1024) self.CBAM3 = Bottleneck(512) self.CBAM4 = Bottleneck(256) self.CBAM5 = Bottleneck(256) self.CBAM6 = Bottleneck(256) if USE_SE: self.SE1 = SEModule(512) self.SE2 = SEModule(1024) self.SE3 = SEModule(512) self.SE4 = SEModule(256) self.SE5 = SEModule(256) self.SE6 = SEModule(256) def forward(self, x): sources = list() attention = list() loc = list() conf = list() # 获得conv4_3的内容 for k in range(10): x = self.vgg[k](x) sources.append(x) for k in range(23, 30): x = self.vgg[k](x) s = self.L2Norm(x) sources.append(s) # 获得fc7的内容 for k in range(30, len(self.vgg)): x = self.vgg[k](x) sources.append(x) # 获得后面的内容 for k, v in enumerate(self.extras): x = F.relu(v(x), inplace=True) if k % 2 == 1: sources.append(x) if USE_SE: attention.append(sources[0]) attention.append(self.SE1(sources[1])) attention.append(sources[2]) attention.append(self.SE2(sources[3])) attention.append(self.SE3(sources[4])) attention.append(self.SE4(sources[5])) attention.append(self.SE5(sources[6])) attention.append(self.SE6(sources[7])) sources_final = list() conv8_fp1 = self.conv_256_512(self.upsample_256_256( attention[5])) + self.conv_512_512_1(attention[4]) conv8_fp = self.smooth(conv8_fp1) fc7_fp1 = self.conv_512_1024( self.upsample_512_512(conv8_fp1)) + self.conv_1024_1024( attention[3]) fc7_fp = self.smooth(fc7_fp1) conv4_fp = self.conv_1024_512( self.upsample_1024_1024(fc7_fp1)) + self.conv_512_512_2( attention[1]) conv4_fp = self.smooth(conv4_fp) if USE_CBAM: sources_final.append(self.CBAM1(conv4_fp)) sources_final.append(self.CBAM2(fc7_fp)) sources_final.append(self.CBAM3(conv8_fp)) sources_final.append(self.CBAM4(sources[5])) sources_final.append(self.CBAM5(sources[6])) sources_final.append(self.CBAM6(sources[7])) else: sources_final.append(conv4_fp) sources_final.append(fc7_fp) sources_final.append(conv8_fp) sources_final.append(attention[5]) sources_final.append(attention[6]) sources_final.append(attention[7]) # 添加回归层和分类层 # for (x, l, c) in zip(sources, self.loc, self.conf): # loc.append(l(x).permute(0, 2, 3, 1).contiguous()) # conf.append(c(x).permute(0, 2, 3, 1).contiguous()) for (x, l, c) in zip(sources_final, self.loc, self.conf): loc.append(l(x).permute(0, 2, 3, 1).contiguous()) conf.append(c(x).permute(0, 2, 3, 1).contiguous()) # 进行resize loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) if self.phase == "test": # loc会resize到batch_size,num_anchors,4 # conf会resize到batch_size,num_anchors, # 这部分暂时没有进行改动 output = self.detect( loc.view(loc.size(0), -1, 4), # loc preds self.softmax(conf.view(conf.size(0), -1, self.num_classes)), # conf preds self.priors) else: output = (loc.view(loc.size(0), -1, 4), conf.view(conf.size(0), -1, self.num_classes), self.priors) return output
class SSD(nn.Module): def __init__(self, phase, base, extras, head, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = Config self.vgg = nn.ModuleList(base) self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = Variable(self.priorbox.forward()) # self.priors = self.priorbox.forward() # 这一行改成这样也能正常运行 self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) self.relu_list4cxq = nn.ModuleList([torch.nn.ReLU(True) for i in range(8)]) # 自己修改后的方式 self.feature_maps4cxq = None # 用于grad cam self.scores4cxq = None # 用于grad cam if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45) def forward(self, x): sources = list() loc = list() conf = list() # 获得conv4_3的内容 for k in range(23): x = self.vgg[k](x) s = self.L2Norm(x) sources.append(s) # 获得fc7的内容 for k in range(23, len(self.vgg)): x = self.vgg[k](x) sources.append(x) # 获得后面的内容 for k, v in enumerate(self.extras): # x = F.relu(v(x), inplace=True) # 原始实现方式 x = self.relu_list4cxq[k](v(x)) # 修改后的方式 if k % 2 == 1: sources.append(x) self.feature_maps4cxq = sources # 6张特征图 # 添加回归层和分类层 for (x, l, c) in zip(sources, self.loc, self.conf): loc.append(l(x).permute(0, 2, 3, 1).contiguous()) conf.append(c(x).permute(0, 2, 3, 1).contiguous()) self.scores4cxq = conf # 用于保存各个类别的分数 # 进行resize loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) # torch.Size([4, 34928]) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) # torch.Size([4, 26196]) if self.phase == "test": # loc会resize到batch_size,num_anchors,4 # conf会resize到batch_size,num_anchors,num_classes # output = self.detect( output = self.detect.apply( loc.view(loc.size(0), -1, 4), # loc preds torch.Size([4, 8732, 4]) self.softmax(conf.view(conf.size(0), -1, self.num_classes)), # conf preds # torch.Size([4, 8732, 3]) self.priors # torch.Size([8732, 4]) ) # torch.Size([1, 3, 200, 5]) 1置信度+4位置信息 else: output = ( loc.view(loc.size(0), -1, 4), conf.view(conf.size(0), -1, self.num_classes), self.priors ) # torch.Size([4, 8732, 4]) torch.Size([4, 8732, 3]) torch.Size([8732, 4]) return output
class SSD(nn.Module): def __init__(self, phase, base, extras, head, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = Config self.vgg = nn.ModuleList(base) self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.priorbox = PriorBox(self.cfg) with torch.no_grad(): self.priors = Variable(self.priorbox.forward()) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if phase == 'test': self.softmax = nn.Softmax(dim=-1) self.detect = Detect(num_classes, 0, 200, 0.01, 0.45) self.DilationConv_128_128 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=2, dilation=2, stride=2) self.conv_512_256 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1, stride=1) self.upsample_1024_1024 = Upsample(38) self.conv_1024_128 = nn.Conv2d(in_channels=1024, out_channels=128, kernel_size=1, stride=1) self.DilationConv_512_256 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=3, padding=2, dilation=2, stride=2) self.conv_1024_512 = nn.Conv2d(in_channels=1024, out_channels=512, kernel_size=1, stride=1) self.upsample_512_512 = Upsample(19) self.conv_512_256_fc7 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1, stride=1) self.DilationConv_512_128_2 = nn.Conv2d(in_channels=512, out_channels=128, kernel_size=3, padding=2, dilation=2, stride=2) self.conv_512_256_2 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=1, stride=1) self.upsample_256_256_2 = Upsample(10) self.conv_256_128_2 = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1, stride=1) self.smooth = nn.Conv2d(512, 512, kernel_size=3, padding=1, stride=1) self.smooth2 = nn.Conv2d(1024, 1024, kernel_size=3, padding=1, stride=1) self.bn = nn.BatchNorm2d(128) self.bn1 = nn.BatchNorm2d(256) if USE_SE: self.SE1 = SEModule(512) self.SE2 = SEModule(512) self.SE3 = SEModule(512) self.SE4 = SEModule(256) self.SE5 = SEModule(256) self.SE6 = SEModule(256) if USE_ECA: self.ECA1 = ECAModule(512) self.ECA2 = ECAModule(1024) self.ECA3 = ECAModule(512) self.ECA4 = ECAModule(256) def forward(self, x): sources = list() loc = list() conf = list() for k in range(10): x = self.vgg[k](x) sources.append(x) # 获得conv4_3的内容 for k in range(10, 23): x = self.vgg[k](x) s = self.L2Norm(x) sources.append(s) # 获得fc7的内容 # for k in range(23, len(self.vgg)): # x = self.vgg[k](x) # sources.append(x) for k in range(23, 30): x = self.vgg[k](x) s = self.L2Norm(x) sources.append(s) for k in range(30, len(self.vgg)): x = self.vgg[k](x) sources.append(x) # 获得后面的内容 for k, v in enumerate(self.extras): x = F.relu(v(x), inplace=True) if k % 2 == 1: sources.append(x) sources_final = list() sources_final1 = list() if USE_ECA: sources_final.append(self.ECA4(sources[5])) else: sources_final.append(sources[5]) conv8_fp1 = torch.cat( (F.relu(self.bn(self.DilationConv_512_128_2(sources[2])), inplace=True), F.relu(self.conv_512_256_2(sources[4]), inplace=True), F.relu(self.conv_256_128_2(self.upsample_256_256_2(sources[5])), inplace=True)), 1) conv8_fp = F.relu(self.smooth(conv8_fp1), inplace=True) if USE_ECA: sources_final.append(self.ECA3(conv8_fp)) else: sources_final.append(conv8_fp) # fc7_fp = torch.cat((F.relu(self.bn(self.DilationConv_512_256(sources[1])),inplace=True), # F.relu(self.conv_1024_512(sources[3]),inplace=True), # F.relu(self.conv512_256_fc7(self.upsample_512_512(sources[4])),inplace=True)),1) # fc7_fp1 = torch.cat((F.relu(self.bn1(self.DilationConv_512_256(sources[1])),inplace=True), # F.relu(self.conv_1024_512(sources[3]),inplace=True), # F.relu(self.conv_512_256_fc7(self.upsample_512_512(sources[4])),inplace=True)),1) fc7_fp1 = torch.cat( (F.relu(self.bn1(self.DilationConv_512_256(sources[1])), inplace=True), F.relu(self.conv_1024_512(sources[3]), inplace=True), F.relu(self.conv_512_256_fc7(self.upsample_512_512(sources[4])), inplace=True)), 1) fc7_fp = F.relu(self.smooth2(fc7_fp1), inplace=True) if USE_ECA: sources_final.append(self.ECA2(fc7_fp)) else: sources_final.append(fc7_fp) conv4_fp = torch.cat( (F.relu(self.bn(self.DilationConv_128_128(sources[0])), inplace=True), F.relu(self.conv_512_256(sources[1]), inplace=True), F.relu(self.conv_1024_128(self.upsample_1024_1024(sources[3])), inplace=True)), 1) conv4_fp = F.relu(self.smooth(conv4_fp), inplace=True) if USE_ECA: sources_final.append(self.ECA1(conv4_fp)) else: sources_final.append(conv4_fp) # 添加回归层和分类层 # for (x, l, c) in zip(sources, self.loc, self.conf): # loc.append(l(x).permute(0, 2, 3, 1).contiguous()) # conf.append(c(x).permute(0, 2, 3, 1).contiguous()) for (x, l, c) in zip(sources_final[::-1], self.loc, self.conf): loc.append(l(x).permute(0, 2, 3, 1).contiguous()) conf.append(c(x).permute(0, 2, 3, 1).contiguous()) # 进行resize loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) if self.phase == "test": # loc会resize到batch_size,num_anchors,4 # conf会resize到batch_size,num_anchors, output = self.detect( loc.view(loc.size(0), -1, 4), # loc preds self.softmax(conf.view(conf.size(0), -1, self.num_classes)), # conf preds self.priors) else: output = (loc.view(loc.size(0), -1, 4), conf.view(conf.size(0), -1, self.num_classes), self.priors) return output