def _make_head(self, block, pre_stage_channels, outplanes=2048, conv='Conv2d', ctx=''): # Increasing the #channels on each resolution, from C, 2C, 4C, 8C to 128, 256, 512, 1024 incre_modules = [] for i, channels in enumerate(pre_stage_channels): self.inplanes = channels incre_module = self._make_layer(block, self.head_dim[i], 1, stride=1, dilation=1, conv=conv, ctx=ctx) incre_modules.append(incre_module) incre_modules = nn.ModuleList(incre_modules) # downsampling modules downsamp_modules = [] for i in range(len(pre_stage_channels) - 1): in_channels = self.head_dim[i] * block.expansion out_channels = self.head_dim[i + 1] * block.expansion downsamp_module = nn.Sequential( nn.Conv2d(in_channels, out_channels, 3, 2, 1), # official implementation forgets bias=False make_norm(out_channels, norm=self.norm.replace('Mix', '')), nn.ReLU(inplace=True) ) downsamp_modules.append(downsamp_module) downsamp_modules = nn.ModuleList(downsamp_modules) final_layer = nn.Sequential( nn.Conv2d(self.head_dim[3] * block.expansion, outplanes, 1, 1, 0), make_norm(outplanes, norm=self.norm.replace('Mix', '')), nn.ReLU(inplace=True) ) return incre_modules, downsamp_modules, final_layer
def _make_layer(self, block, planes, blocks, stride=1, dilation=1, conv='Conv2d', ctx=''): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: if self.avg_down: downsample = nn.Sequential( nn.AvgPool2d(kernel_size=stride, stride=stride), nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=1, bias=False), make_norm(planes * block.expansion, norm=self.norm.replace('Mix', '')), ) else: downsample = nn.Sequential( nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), make_norm(planes * block.expansion, norm=self.norm.replace('Mix', '')), ) layers = [] layers.append( block(self.inplanes, planes, 64, 1, stride, dilation, radix=self.radix, downsample=downsample, stride_3x3=True, conv=conv, norm=self.norm, ctx=ctx) ) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append( block(self.inplanes, planes, 64, 1, 1, dilation, radix=self.radix, downsample=None, stride_3x3=True, conv=conv, norm=self.norm, ctx=ctx) ) return nn.Sequential(*layers)
def __init__(self, cfg, dim_in, spatial_in): super(ParsingIoUHead, self).__init__() self.dim_in = dim_in[-1] self.spatial_in = spatial_in[-1] num_convs = cfg.PARSING.PARSINGIOU.NUM_CONVS # default = 2 conv_dim = cfg.PARSING.PARSINGIOU.CONV_DIM norm = cfg.PARSING.PARSINGIOU.NORM self.conv1x1 = make_conv(self.dim_in, self.dim_in, kernel_size=1, stride=1, norm=make_norm(self.dim_in, norm=norm), act=make_act()) conv_layers = [] for i in range(num_convs): conv_layers.append( make_conv(self.dim_in, conv_dim, kernel_size=1, stride=1, norm=make_norm(conv_dim, norm=norm), act=make_act()) ) self.dim_in = conv_dim self.add_module('conv_layers', nn.Sequential(*conv_layers)) self.dim_out = [conv_dim] self.spatial_out = [(1, 1), ] self._init_weights()
def panoptic_upsampler_block(dim_in, dim_out, expansion, norm=''): modules = [] if expansion == 0: modules.append(make_conv( dim_in, dim_out, kernel=3, dilation=1, stride=1, norm=make_norm(dim_out, norm=norm), act=make_act(), )) # no upsample for i in range(expansion): modules.append(make_conv( dim_in if i == 0 else dim_out, dim_out, kernel=3, dilation=1, stride=1, norm=make_norm(dim_out, norm=norm), act=make_act(), )) modules.append(nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)) return nn.Sequential(*modules)
def __init__(self, block, planes, stage=2, output_branches=2, conv='Conv2d', norm='BN', ctx='', use_global=False): super(StageModule, self).__init__() self.use_global = use_global self.branches = nn.ModuleList() for i in range(stage): w = planes * (2 ** i) branch = nn.Sequential( block(w, w, stride_3x3=True, conv=conv, norm=norm, ctx=ctx), block(w, w, stride_3x3=True, conv=conv, norm=norm, ctx=ctx), block(w, w, stride_3x3=True, conv=conv, norm=norm, ctx=ctx), block(w, w, stride_3x3=True, conv=conv, norm=norm, ctx=ctx), ) self.branches.append(branch) self.fuse_layers = nn.ModuleList() if self.use_global: self.global_layers = nn.ModuleList() # for each output_branches (i.e. each branch in all cases but the very last one) for i in range(output_branches): self.fuse_layers.append(nn.ModuleList()) for j in range(stage): # for each branch if i == j: self.fuse_layers[-1].append(nn.Sequential()) # Used in place of "None" because it is callable elif i < j: self.fuse_layers[-1].append(nn.Sequential( nn.Conv2d(planes * (2 ** j), planes * (2 ** i), 1, 1, 0, bias=False), make_norm(planes * (2 ** i), norm=norm.replace('Mix', '')), nn.Upsample(scale_factor=(2.0 ** (j - i)), mode='nearest'), )) elif i > j: ops = [] for k in range(i - j - 1): ops.append(nn.Sequential( nn.Conv2d(planes * (2 ** j), planes * (2 ** j), 3, 2, 1, bias=False), make_norm(planes * (2 ** j), norm=norm.replace('Mix', '')), nn.ReLU(inplace=True), )) ops.append(nn.Sequential( nn.Conv2d(planes * (2 ** j), planes * (2 ** i), 3, 2, 1, bias=False), make_norm(planes * (2 ** i), norm=norm.replace('Mix', '')), )) self.fuse_layers[-1].append(nn.Sequential(*ops)) if self.use_global: sum_planes = sum([planes * (2 ** k) for k in range(stage)]) self.global_layers.append( nn.Sequential( nn.Conv2d(sum_planes, planes * (2 ** i), 1, 1, 0, bias=False), make_norm(planes * (2 ** i), norm=norm.replace('Mix', '')), nn.Sigmoid() ) ) self.relu = nn.ReLU(inplace=True)
def __init__(self, avg_down=False, use_global=False, base_width=32, radix=1, stage_with_conv=('Conv2d', 'Conv2d', 'Conv2d', 'Conv2d'), norm='BN', stage_with_ctx=('', '', '', ''), num_classes=1000): """ Constructor Args: num_classes: number of classes """ super(HRNet, self).__init__() block_1 = Bottleneck block_2 = BasicBlock self.avg_down = avg_down self.base_width = base_width self.radix = radix self.norm = norm self.head_dim = (32, 64, 128, 256) self.inplanes = 64 # default 64 self.conv1 = nn.Conv2d(3, 64, 3, 2, 1, bias=False) self.bn1 = make_norm(64, norm=norm.replace('Mix', '')) self.conv2 = nn.Conv2d(64, 64, 3, 2, 1, bias=False) self.bn2 = make_norm(64, norm=norm.replace('Mix', '')) self.relu = nn.ReLU(inplace=True) self.layer1 = self._make_layer(block_1, 64, 4, 1, conv=stage_with_conv[0], ctx=stage_with_ctx[0]) self.transition1 = self._make_transition(index=1, stride=2) # Fusion layer 1: create full and 1/2 resolution self.stage2 = nn.Sequential( StageModule(block_2, base_width, 2, 2, stage_with_conv[1], norm, stage_with_ctx[1], False), ) # Stage 2 with 1 group of block modules, which has 2 branches self.transition2 = self._make_transition(index=2, stride=2) # Fusion layer 2: create 1/4 resolution self.stage3 = nn.Sequential( StageModule(block_2, base_width, 3, 3, stage_with_conv[2], norm, stage_with_ctx[2], use_global), StageModule(block_2, base_width, 3, 3, stage_with_conv[2], norm, stage_with_ctx[2], use_global), StageModule(block_2, base_width, 3, 3, stage_with_conv[2], norm, stage_with_ctx[2], use_global), StageModule(block_2, base_width, 3, 3, stage_with_conv[2], norm, stage_with_ctx[2], use_global), ) # Stage 3 with 4 groups of block modules, which has 3 branches self.transition3 = self._make_transition(index=3, stride=2) # Fusion layer 3: create 1/8 resolution self.stage4 = nn.Sequential( StageModule(block_2, base_width, 4, 4, stage_with_conv[3], norm, stage_with_ctx[3], use_global), StageModule(block_2, base_width, 4, 4, stage_with_conv[3], norm, stage_with_ctx[3], use_global), StageModule(block_2, base_width, 4, 4, stage_with_conv[3], norm, stage_with_ctx[3], use_global), ) # Stage 4 with 3 groups of block modules, which has 4 branches pre_stage_channels = [base_width, base_width * 2, base_width * 4, base_width * 8] self.incre_modules, self.downsamp_modules, self.final_layer = \ self._make_head(block_1, pre_stage_channels, outplanes=2048, conv=stage_with_conv[3], ctx=stage_with_ctx[3]) self.avgpool = nn.AdaptiveAvgPool2d(1) self.classifier = nn.Linear(2048, num_classes) self._init_weights()
def __init__(self, bottleneck=True, aligned=False, use_3x3x3stem=False, stride_3x3=False, avg_down=False, stem_width=64, base_width=64, layers=(3, 4, 6, 3), radix=1, stage_with_conv=('Conv2d', 'Conv2d', 'Conv2d', 'Conv2d'), norm='BN', stage_with_ctx=('', '', '', ''), num_classes=1000): """ Constructor Args: layers: config of layers, e.g., (3, 4, 23, 3) num_classes: number of classes """ super(ResNet, self).__init__() if aligned: block = AlignedBottleneck else: if bottleneck: block = Bottleneck else: block = BasicBlock self.expansion = block.expansion self.use_3x3x3stem = use_3x3x3stem self.stride_3x3 = stride_3x3 self.avg_down = avg_down self.base_width = base_width self.radix = radix self.norm = norm self.inplanes = stem_width if not self.use_3x3x3stem: self.conv1 = nn.Conv2d(3, self.inplanes, 7, 2, 3, bias=False) self.bn1 = make_norm(self.inplanes, norm=norm.replace('Mix', '')) else: self.conv1 = nn.Conv2d(3, self.inplanes // 2, 3, 2, 1, bias=False) self.bn1 = make_norm(self.inplanes // 2, norm=norm.replace('Mix', '')) self.conv2 = nn.Conv2d(self.inplanes // 2, self.inplanes // 2, 3, 1, 1, bias=False) self.bn2 = make_norm(self.inplanes // 2, norm=norm.replace('Mix', '')) self.conv3 = nn.Conv2d(self.inplanes // 2, self.inplanes, 3, 1, 1, bias=False) self.bn3 = make_norm(self.inplanes, norm=norm.replace('Mix', '')) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0], 1, conv=stage_with_conv[0], ctx=stage_with_ctx[0]) self.layer2 = self._make_layer(block, 128, layers[1], 2, conv=stage_with_conv[1], ctx=stage_with_ctx[1]) self.layer3 = self._make_layer(block, 256, layers[2], 2, conv=stage_with_conv[2], ctx=stage_with_ctx[2]) self.layer4 = self._make_layer(block, 512, layers[3], 2, conv=stage_with_conv[3], ctx=stage_with_ctx[3]) self.avgpool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Linear(512 * self.expansion, num_classes) self._init_weights()
def _make_transition(self, index=1, stride=1): transition = nn.ModuleList() if index == 1: transition.append(nn.Sequential( nn.Conv2d(self.inplanes, self.base_width, kernel_size=3, stride=1, padding=1, bias=False), make_norm(self.base_width, norm=self.norm.replace('Mix', '')), nn.ReLU(inplace=True), )) else: transition.extend([nn.Sequential() for _ in range(index)]) transition.append(nn.Sequential( nn.Sequential( # Double Sequential to fit with official pre-trained weights nn.Conv2d(self.inplanes if index == 1 else self.base_width * (2 ** (index - 1)), self.base_width * (2 ** index), kernel_size=3, stride=stride, padding=1, bias=False), make_norm(self.base_width * (2 ** index), norm=self.norm.replace('Mix', '')), nn.ReLU(inplace=True), ) )) return transition
def _make_layer(self, block, planes, blocks, stride=1, dilation=1, conv='Conv2d', ctx=''): """ Stack n bottleneck modules where n is inferred from the depth of the network. Args: block: block type used to construct ResNet planes: number of output channels (need to multiply by block.expansion) blocks: number of blocks to be built stride: factor to reduce the spatial dimensionality in the first bottleneck of the block. Returns: a Module consisting of n sequential bottlenecks. """ downsample = None if stride != 1 or self.inplanes != planes * block.expansion: if self.avg_down: downsample = nn.Sequential( nn.AvgPool2d(kernel_size=stride, stride=stride), nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=1, bias=False), make_norm(planes * block.expansion, norm=self.norm.replace('Mix', '')), ) else: downsample = nn.Sequential( nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), make_norm(planes * block.expansion, norm=self.norm.replace('Mix', '')), ) layers = [] layers.append( block(self.inplanes, planes, self.base_width, 1, stride, dilation, radix=self.radix, downsample=downsample, stride_3x3=self.stride_3x3, conv=conv, norm=self.norm, ctx=ctx) ) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append( block(self.inplanes, planes, self.base_width, 1, 1, dilation, radix=self.radix, downsample=None, stride_3x3=self.stride_3x3, conv=conv, norm=self.norm, ctx=ctx) ) return nn.Sequential(*layers)
def __init__(self, cfg, dim_in, spatial_in): super().__init__() self.dim_in = dim_in[-1] self.spatial_in = spatial_in[-1] hidden_dim = cfg.FPN.DECONVX.HEAD_DIM # default: 256 head_decay_factor = cfg.FPN.DECONVX.HEAD_DECAY_FACTOR # default: 1 self.deconv_kernel = cfg.FPN.DECONVX.HEAD_KERNEL # default: 4 padding, output_padding = self._get_deconv_param() deconv_with_bias = cfg.FPN.DECONVX.WITH_BIAS num_deconvs = cfg.FPN.DECONVX.NUM_DECONVS norm = cfg.FPN.DECONVX.NORM # deconv module deconv_list = [] for _ in range(num_deconvs): deconv_list.extend([ nn.ConvTranspose2d(self.dim_in, hidden_dim, kernel_size=self.deconv_kernel, stride=2, padding=padding, output_padding=output_padding, bias=deconv_with_bias), make_norm(hidden_dim, norm=norm), nn.ReLU(inplace=True) ]) self.dim_in = hidden_dim hidden_dim //= head_decay_factor self.spatial_in *= 2 self.deconv_module = nn.Sequential(*deconv_list) self.dim_out = [self.dim_in] self.spatial_out = [self.spatial_in] self._init_weights()
def __init__(self, cfg, dim_in, spatial_in): super(GCEHead, self).__init__() self.dim_in = dim_in[-1] self.spatial_in = spatial_in use_nl = cfg.MASK.GCE_HEAD.USE_NL norm = cfg.MASK.GCE_HEAD.NORM conv_dim = cfg.MASK.GCE_HEAD.CONV_DIM aspp_dim = cfg.MASK.GCE_HEAD.ASPP_DIM num_convs_before_aspp = cfg.MASK.GCE_HEAD.NUM_CONVS_BEFORE_ASPP aspp_dilation = cfg.MASK.GCE_HEAD.ASPP_DILATION num_convs_after_aspp = cfg.MASK.GCE_HEAD.NUM_CONVS_AFTER_ASPP # convx before aspp before_aspp_list = [] for _ in range(num_convs_before_aspp): before_aspp_list.append( make_conv(self.dim_in, conv_dim, kernel_size=3, norm=make_norm(conv_dim, norm=norm), act=make_act())) self.dim_in = conv_dim self.conv_before_aspp = nn.Sequential( *before_aspp_list) if len(before_aspp_list) else None # aspp self.aspp = ASPP(self.dim_in, aspp_dim, dilations=aspp_dilation, norm=norm) self.dim_in = self.aspp.dim_out feat_list = [ make_conv(self.dim_in, conv_dim, kernel_size=1, norm=make_norm(conv_dim, norm=norm), act=make_act()) ] # non-local if use_nl: feat_list.append( NonLocal2d(conv_dim, int(conv_dim * cfg.KRCNN.GCE_HEAD.NL_RATIO), conv_dim, use_gn=True)) self.feat = nn.Sequential(*feat_list) self.dim_in = conv_dim # convx after aspp assert num_convs_after_aspp >= 1 after_aspp_list = [] for _ in range(num_convs_after_aspp): after_aspp_list.append( make_conv(self.dim_in, conv_dim, kernel_size=3, norm=make_norm(conv_dim, norm=norm), act=make_act())) self.dim_in = conv_dim self.conv_after_aspp = nn.Sequential( *after_aspp_list) if len(after_aspp_list) else None self.dim_out = [self.dim_in] self.spatial_out = [self.spatial_in] self._init_weights()
def __init__(self, cfg, stride=32): """ Constructor """ super(HRNet, self).__init__() self.dim_in = 3 self.spatial_in = [1] block_1 = Bottleneck block_2 = BasicBlock base_width = cfg.BACKBONE.HRNET.WIDTH use_global = cfg.BACKBONE.HRNET.USE_GLOBAL stage_with_conv = cfg.BACKBONE.HRNET.STAGE_WITH_CONV norm = cfg.BACKBONE.HRNET.NORM stage_with_ctx = cfg.BACKBONE.HRNET.STAGE_WITH_CTX self.avg_down = cfg.BACKBONE.HRNET.AVG_DOWN self.base_width = base_width self.norm = norm self.stride = stride multi_out = 1 if self.stride == 4 else 4 self.inplanes = 64 # default 64 self.conv1 = nn.Conv2d(self.dim_in, 64, 3, 2, 1, bias=False) self.bn1 = make_norm(64, norm=norm.replace('Mix', '')) self.conv2 = nn.Conv2d(64, 64, 3, 2, 1, bias=False) self.bn2 = make_norm(64, norm=norm.replace('Mix', '')) self.relu = nn.ReLU(inplace=True) self.layer1 = self._make_layer(block_1, 64, 4, 1, conv=stage_with_conv[0], ctx=stage_with_ctx[0]) # 4 blocks self.transition1 = self._make_transition( index=1, stride=2) # Fusion layer 1: create full and 1/2 resolution self.stage2 = nn.Sequential( hr.StageModule(block_2, base_width, 2, 2, stage_with_conv[1], norm, stage_with_ctx[1], False), ) # Stage 2 with 1 group of block modules, which has 2 branches self.transition2 = self._make_transition( index=2, stride=2) # Fusion layer 2: create 1/4 resolution self.stage3 = nn.Sequential( hr.StageModule(block_2, base_width, 3, 3, stage_with_conv[2], norm, stage_with_ctx[2], use_global), hr.StageModule(block_2, base_width, 3, 3, stage_with_conv[2], norm, stage_with_ctx[2], use_global), hr.StageModule(block_2, base_width, 3, 3, stage_with_conv[2], norm, stage_with_ctx[2], use_global), hr.StageModule(block_2, base_width, 3, 3, stage_with_conv[2], norm, stage_with_ctx[2], use_global), ) # Stage 3 with 4 groups of block modules, which has 3 branches self.transition3 = self._make_transition( index=3, stride=2) # Fusion layer 3: create 1/8 resolution self.stage4 = nn.Sequential( hr.StageModule(block_2, base_width, 4, 4, stage_with_conv[3], norm, stage_with_ctx[3], use_global), hr.StageModule(block_2, base_width, 4, 4, stage_with_conv[3], norm, stage_with_ctx[3], use_global), hr.StageModule(block_2, base_width, 4, multi_out, stage_with_conv[3], norm, stage_with_ctx[3], use_global), ) # Stage 4 with 3 groups of block modules, which has 4 branches self.dim_out = self.stage_out_dim[1:int(math.log(self.stride, 2))] self.spatial_out = self.stage_out_spatial[ 1:int(math.log(self.stride, 2))] del self.incre_modules del self.downsamp_modules del self.final_layer del self.avgpool del self.classifier self._init_weights()
def __init__(self, cfg, stride=32): """ Constructor """ super(ResNet, self).__init__() self.dim_in = 3 self.spatial_in = [1] if cfg.BACKBONE.RESNET.USE_ALIGN: block = AlignedBottleneck else: if cfg.BACKBONE.RESNET.BOTTLENECK: block = Bottleneck # not use the original Bottleneck module else: block = BasicBlock stem_width = cfg.BACKBONE.RESNET.STEM_WIDTH layers = cfg.BACKBONE.RESNET.LAYERS[:int(math.log(stride, 2)) - 1] stage_with_conv = cfg.BACKBONE.RESNET.STAGE_WITH_CONV norm = cfg.BACKBONE.RESNET.NORM stage_with_ctx = cfg.BACKBONE.RESNET.STAGE_WITH_CTX self.expansion = block.expansion self.use_3x3x3stem = cfg.BACKBONE.RESNET.USE_3x3x3HEAD self.stride_3x3 = cfg.BACKBONE.RESNET.STRIDE_3X3 self.avg_down = cfg.BACKBONE.RESNET.AVG_DOWN self.base_width = cfg.BACKBONE.RESNET.WIDTH self.radix = cfg.BACKBONE.RESNET.RADIX self.norm = norm self.stride = stride self.inplanes = stem_width if not self.use_3x3x3stem: self.conv1 = nn.Conv2d(self.dim_in, self.inplanes, 7, 2, 3, bias=False) self.bn1 = make_norm(self.inplanes, norm=norm.replace('Mix', '')) else: self.conv1 = nn.Conv2d(self.dim_in, self.inplanes // 2, 3, 2, 1, bias=False) self.bn1 = make_norm(self.inplanes // 2, norm=norm.replace('Mix', '')) self.conv2 = nn.Conv2d(self.inplanes // 2, self.inplanes // 2, 3, 1, 1, bias=False) self.bn2 = make_norm(self.inplanes // 2, norm=norm.replace('Mix', '')) self.conv3 = nn.Conv2d(self.inplanes // 2, self.inplanes, 3, 1, 1, bias=False) self.bn3 = make_norm(self.inplanes, norm=norm.replace('Mix', '')) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0], 1, conv=stage_with_conv[0], ctx=stage_with_ctx[0]) self.layer2 = self._make_layer(block, 128, layers[1], 2, conv=stage_with_conv[1], ctx=stage_with_ctx[1]) self.layer3 = self._make_layer(block, 256, layers[2], 2, conv=stage_with_conv[2], ctx=stage_with_ctx[2]) self.layer4 = self._make_layer(block, 512, layers[3], 2, conv=stage_with_conv[3], ctx=stage_with_ctx[3]) self.dim_out = self.stage_out_dim[1:int(math.log(self.stride, 2))] self.spatial_out = self.stage_out_spatial[ 1:int(math.log(self.stride, 2))] del self.avgpool del self.fc self._init_weights()