def __init__(self, num_classes, backbone_indices, backbone_channels, pp_out_channels, bin_sizes, enable_auxiliary_loss=True): super().__init__() self.backbone_indices = backbone_indices self.psp_module = layers.PPModule(in_channels=backbone_channels[1], out_channels=pp_out_channels, bin_sizes=bin_sizes) self.dropout = nn.Dropout(p=0.1) # dropout_prob self.conv = nn.Conv2D(in_channels=pp_out_channels, out_channels=num_classes, kernel_size=1) if enable_auxiliary_loss: self.auxlayer = layers.AuxLayer( in_channels=backbone_channels[0], inter_channels=backbone_channels[0] // 4, out_channels=num_classes) self.enable_auxiliary_loss = enable_auxiliary_loss
def __init__(self, inplane, num_class, fpn_inplanes, fpn_dim=256, enable_auxiliary_loss=False): super(SFNetHead, self).__init__() self.ppm = layers.PPModule(in_channels=inplane, out_channels=fpn_dim, bin_sizes=(1, 2, 3, 6), dim_reduction=True, align_corners=True) self.enable_auxiliary_loss = enable_auxiliary_loss self.fpn_in = [] for fpn_inplane in fpn_inplanes[:-1]: self.fpn_in.append( nn.Sequential(nn.Conv2D(fpn_inplane, fpn_dim, 1), layers.SyncBatchNorm(fpn_dim), nn.ReLU())) self.fpn_in = nn.LayerList(self.fpn_in) self.fpn_out = [] self.fpn_out_align = [] self.dsn = [] for i in range(len(fpn_inplanes) - 1): self.fpn_out.append( nn.Sequential( layers.ConvBNReLU(fpn_dim, fpn_dim, 3, bias_attr=False))) self.fpn_out_align.append( AlignedModule(inplane=fpn_dim, outplane=fpn_dim // 2)) if self.enable_auxiliary_loss: self.dsn.append( nn.Sequential(layers.AuxLayer(fpn_dim, fpn_dim, num_class))) self.fpn_out = nn.LayerList(self.fpn_out) self.fpn_out_align = nn.LayerList(self.fpn_out_align) if self.enable_auxiliary_loss: self.dsn = nn.LayerList(self.dsn) self.conv_last = nn.Sequential( layers.ConvBNReLU(len(fpn_inplanes) * fpn_dim, fpn_dim, 3, bias_attr=False), nn.Conv2D(fpn_dim, num_class, kernel_size=1))
def __init__(self, in_channels, block_channels, out_channels, expansion, num_blocks, align_corners): super(GlobalFeatureExtractor, self).__init__() self.bottleneck1 = self._make_layer(InvertedBottleneck, in_channels, block_channels[0], num_blocks[0], expansion, 2) self.bottleneck2 = self._make_layer( InvertedBottleneck, block_channels[0], block_channels[1], num_blocks[1], expansion, 2) self.bottleneck3 = self._make_layer( InvertedBottleneck, block_channels[1], block_channels[2], num_blocks[2], expansion, 1) self.ppm = layers.PPModule( block_channels[2], out_channels, bin_sizes=(1, 2, 3, 6), dim_reduction=True, align_corners=align_corners)
def __init__(self, in_channels=64, block_channels=(64, 96, 128), out_channels=128, expansion=6, num_blocks=(3, 3, 3)): super(GlobalFeatureExtractor, self).__init__() self.bottleneck1 = self._make_layer(InvertedBottleneck, in_channels, block_channels[0], num_blocks[0], expansion, 2) self.bottleneck2 = self._make_layer( InvertedBottleneck, block_channels[0], block_channels[1], num_blocks[1], expansion, 2) self.bottleneck3 = self._make_layer( InvertedBottleneck, block_channels[1], block_channels[2], num_blocks[2], expansion, 1) self.ppm = layers.PPModule( block_channels[2], out_channels, dim_reduction=True)
def __init__(self, backbone, pretrained=None, backbone_scale=0.25, refine_kernel_size=3, if_refine=True): super().__init__() if if_refine: if backbone_scale > 0.5: raise ValueError( 'Backbone_scale should not be greater than 1/2, but it is {}' .format(backbone_scale)) else: backbone_scale = 1 self.backbone = backbone self.backbone_scale = backbone_scale self.pretrained = pretrained self.if_refine = if_refine if if_refine: self.refiner = Refiner(kernel_size=refine_kernel_size) self.backbone_channels = backbone.feat_channels ###################### ### Decoder part - Glance ###################### self.psp_module = layers.PPModule(self.backbone_channels[-1], 512, bin_sizes=(1, 3, 5), dim_reduction=False, align_corners=False) self.psp4 = conv_up_psp(512, 256, 2) self.psp3 = conv_up_psp(512, 128, 4) self.psp2 = conv_up_psp(512, 64, 8) self.psp1 = conv_up_psp(512, 64, 16) # stage 5g self.decoder5_g = nn.Sequential( layers.ConvBNReLU(512 + self.backbone_channels[-1], 512, 3, padding=1), layers.ConvBNReLU(512, 512, 3, padding=2, dilation=2), layers.ConvBNReLU(512, 256, 3, padding=2, dilation=2), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)) # stage 4g self.decoder4_g = nn.Sequential( layers.ConvBNReLU(512, 256, 3, padding=1), layers.ConvBNReLU(256, 256, 3, padding=1), layers.ConvBNReLU(256, 128, 3, padding=1), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)) # stage 3g self.decoder3_g = nn.Sequential( layers.ConvBNReLU(256, 128, 3, padding=1), layers.ConvBNReLU(128, 128, 3, padding=1), layers.ConvBNReLU(128, 64, 3, padding=1), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)) # stage 2g self.decoder2_g = nn.Sequential( layers.ConvBNReLU(128, 128, 3, padding=1), layers.ConvBNReLU(128, 128, 3, padding=1), layers.ConvBNReLU(128, 64, 3, padding=1), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)) # stage 1g self.decoder1_g = nn.Sequential( layers.ConvBNReLU(128, 64, 3, padding=1), layers.ConvBNReLU(64, 64, 3, padding=1), layers.ConvBNReLU(64, 64, 3, padding=1), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)) # stage 0g self.decoder0_g = nn.Sequential( layers.ConvBNReLU(64, 64, 3, padding=1), layers.ConvBNReLU(64, 64, 3, padding=1), nn.Conv2D(64, 3, 3, padding=1)) ########################## ### Decoder part - FOCUS ########################## self.bridge_block = nn.Sequential( layers.ConvBNReLU(self.backbone_channels[-1], 512, 3, dilation=2, padding=2), layers.ConvBNReLU(512, 512, 3, dilation=2, padding=2), layers.ConvBNReLU(512, 512, 3, dilation=2, padding=2)) # stage 5f self.decoder5_f = nn.Sequential( layers.ConvBNReLU(512 + self.backbone_channels[-1], 512, 3, padding=1), layers.ConvBNReLU(512, 512, 3, padding=2, dilation=2), layers.ConvBNReLU(512, 256, 3, padding=2, dilation=2), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)) # stage 4f self.decoder4_f = nn.Sequential( layers.ConvBNReLU(256 + self.backbone_channels[-2], 256, 3, padding=1), layers.ConvBNReLU(256, 256, 3, padding=1), layers.ConvBNReLU(256, 128, 3, padding=1), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)) # stage 3f self.decoder3_f = nn.Sequential( layers.ConvBNReLU(128 + self.backbone_channels[-3], 128, 3, padding=1), layers.ConvBNReLU(128, 128, 3, padding=1), layers.ConvBNReLU(128, 64, 3, padding=1), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)) # stage 2f self.decoder2_f = nn.Sequential( layers.ConvBNReLU(64 + self.backbone_channels[-4], 128, 3, padding=1), layers.ConvBNReLU(128, 128, 3, padding=1), layers.ConvBNReLU(128, 64, 3, padding=1), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)) # stage 1f self.decoder1_f = nn.Sequential( layers.ConvBNReLU(64 + self.backbone_channels[-5], 64, 3, padding=1), layers.ConvBNReLU(64, 64, 3, padding=1), layers.ConvBNReLU(64, 64, 3, padding=1), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)) # stage 0f self.decoder0_f = nn.Sequential( layers.ConvBNReLU(64, 64, 3, padding=1), layers.ConvBNReLU(64, 64, 3, padding=1), nn.Conv2D(64, 1 + 1 + 32, 3, padding=1)) self.init_weight()