def __init__(self, arch='b', img_size=224, patch_size=16, in_channels=3, ffn_ratio=4, qkv_bias=False, drop_rate=0., attn_drop_rate=0., drop_path_rate=0., act_cfg=dict(type='GELU'), norm_cfg=dict(type='LN'), first_stride=4, num_fcs=2, init_cfg=[ dict(type='TruncNormal', layer='Linear', std=.02), dict(type='Constant', layer='LayerNorm', val=1., bias=0.) ]): super(TNT, self).__init__(init_cfg=init_cfg) if isinstance(arch, str): arch = arch.lower() assert arch in set(self.arch_zoo), \ f'Arch {arch} is not in default archs {set(self.arch_zoo)}' self.arch_settings = self.arch_zoo[arch] else: essential_keys = { 'embed_dims_outer', 'embed_dims_inner', 'num_layers', 'num_heads_inner', 'num_heads_outer' } assert isinstance(arch, dict) and set(arch) == essential_keys, \ f'Custom arch needs a dict with keys {essential_keys}' self.arch_settings = arch self.embed_dims_inner = self.arch_settings['embed_dims_inner'] self.embed_dims_outer = self.arch_settings['embed_dims_outer'] # embed_dims for consistency with other models self.embed_dims = self.embed_dims_outer self.num_layers = self.arch_settings['num_layers'] self.num_heads_inner = self.arch_settings['num_heads_inner'] self.num_heads_outer = self.arch_settings['num_heads_outer'] self.pixel_embed = PixelEmbed(img_size=img_size, patch_size=patch_size, in_channels=in_channels, embed_dims_inner=self.embed_dims_inner, stride=first_stride) num_patches = self.pixel_embed.num_patches self.num_patches = num_patches new_patch_size = self.pixel_embed.new_patch_size num_pixel = new_patch_size[0] * new_patch_size[1] self.norm1_proj = build_norm_layer(norm_cfg, num_pixel * self.embed_dims_inner)[1] self.projection = nn.Linear(num_pixel * self.embed_dims_inner, self.embed_dims_outer) self.norm2_proj = build_norm_layer(norm_cfg, self.embed_dims_outer)[1] self.cls_token = nn.Parameter(torch.zeros(1, 1, self.embed_dims_outer)) self.patch_pos = nn.Parameter( torch.zeros(1, num_patches + 1, self.embed_dims_outer)) self.pixel_pos = nn.Parameter( torch.zeros(1, self.embed_dims_inner, new_patch_size[0], new_patch_size[1])) self.drop_after_pos = nn.Dropout(p=drop_rate) dpr = [ x.item() for x in torch.linspace(0, drop_path_rate, self.num_layers) ] # stochastic depth decay rule self.layers = ModuleList() for i in range(self.num_layers): block_cfg = dict(ffn_ratio=ffn_ratio, drop_rate=drop_rate, attn_drop_rate=attn_drop_rate, drop_path_rate=dpr[i], num_fcs=num_fcs, qkv_bias=qkv_bias, norm_cfg=norm_cfg, batch_first=True) self.layers.append( TnTLayer(num_pixel=num_pixel, embed_dims_inner=self.embed_dims_inner, embed_dims_outer=self.embed_dims_outer, num_heads_inner=self.num_heads_inner, num_heads_outer=self.num_heads_outer, inner_block_cfg=block_cfg, outer_block_cfg=block_cfg, norm_cfg=norm_cfg)) self.norm = build_norm_layer(norm_cfg, self.embed_dims_outer)[1] trunc_normal_(self.cls_token, std=.02) trunc_normal_(self.patch_pos, std=.02) trunc_normal_(self.pixel_pos, std=.02)
def __init__(self, block, num_blocks, in_channels, out_channels, expansion=None, stride=1, avg_down=False, conv_cfg=None, norm_cfg=dict(type='BN'), downsample_first=True, kernel_size=3, groups=1, attention=False, **kwargs): # Protect mutable default arguments norm_cfg = copy.deepcopy(norm_cfg) self.block = block self.expansion = get_expansion(block, expansion) downsample = None if stride != 1 or in_channels != out_channels: downsample = [] conv_stride = stride if avg_down and stride != 1: conv_stride = 1 downsample.append( nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False)) downsample.extend([ build_conv_layer(conv_cfg, in_channels, out_channels, kernel_size=1, stride=conv_stride, bias=False), build_norm_layer(norm_cfg, out_channels)[1] ]) downsample = nn.Sequential(*downsample) layers = [] if downsample_first: layers.append( block(in_channels=in_channels, out_channels=out_channels, expansion=self.expansion, stride=stride, downsample=downsample, conv_cfg=conv_cfg, norm_cfg=norm_cfg, kernel_size=kernel_size, groups=groups, attention=attention, **kwargs)) in_channels = out_channels for _ in range(1, num_blocks): layers.append( block(in_channels=in_channels, out_channels=out_channels, expansion=self.expansion, stride=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, kernel_size=kernel_size, groups=groups, attention=attention, **kwargs)) else: # downsample_first=False is for HourglassModule for i in range(0, num_blocks - 1): layers.append( block(in_channels=in_channels, out_channels=in_channels, expansion=self.expansion, stride=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, kernel_size=kernel_size, groups=groups, attention=attention, **kwargs)) layers.append( block(in_channels=in_channels, out_channels=out_channels, expansion=self.expansion, stride=stride, downsample=downsample, conv_cfg=conv_cfg, norm_cfg=norm_cfg, kernel_size=kernel_size, groups=groups, attention=attention, **kwargs)) super().__init__(*layers)
def __init__(self, inplanes, planes, scales=4, base_width=26, base_channels=64, stage_type='normal', sac=None, **kwargs): """Bottle2neck block for Res2Net. If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is "caffe", the stride-two layer is the first 1x1 conv layer. """ super(Bottle2neck, self).__init__(inplanes, planes, **kwargs) assert scales > 1, 'Res2Net degenerates to ResNet when scales = 1.' assert sac is None or isinstance(sac, dict) self.sac = sac self.with_sac = sac is not None width = int(math.floor(self.planes * (base_width / base_channels))) self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, width * scales, postfix=1) self.norm3_name, norm3 = build_norm_layer(self.norm_cfg, self.planes * self.expansion, postfix=3) self.conv1 = build_conv_layer(self.conv_cfg, self.inplanes, width * scales, kernel_size=1, stride=self.conv1_stride, bias=False) self.add_module(self.norm1_name, norm1) if stage_type == 'stage' and self.conv2_stride != 1: self.pool = nn.AvgPool2d(kernel_size=3, stride=self.conv2_stride, padding=1) convs = [] bns = [] fallback_on_stride = False if self.with_dcn: fallback_on_stride = self.dcn.pop('fallback_on_stride', False) if self.with_sac: for i in range(scales - 1): convs.append( build_conv_layer(self.sac, width, width, kernel_size=3, stride=self.conv2_stride, padding=self.dilation, dilation=self.dilation, bias=False)) bns.append( build_norm_layer(self.norm_cfg, width, postfix=i + 1)[1]) self.convs = nn.ModuleList(convs) self.bns = nn.ModuleList(bns) elif not self.with_dcn or fallback_on_stride: for i in range(scales - 1): convs.append( build_conv_layer(self.conv_cfg, width, width, kernel_size=3, stride=self.conv2_stride, padding=self.dilation, dilation=self.dilation, bias=False)) bns.append( build_norm_layer(self.norm_cfg, width, postfix=i + 1)[1]) self.convs = nn.ModuleList(convs) self.bns = nn.ModuleList(bns) else: assert self.conv_cfg is None, 'conv_cfg must be None for DCN' for i in range(scales - 1): convs.append( build_conv_layer(self.dcn, width, width, kernel_size=3, stride=self.conv2_stride, padding=self.dilation, dilation=self.dilation, bias=False)) bns.append( build_norm_layer(self.norm_cfg, width, postfix=i + 1)[1]) self.convs = nn.ModuleList(convs) self.bns = nn.ModuleList(bns) self.conv3 = build_conv_layer(self.conv_cfg, width * scales, self.planes * self.expansion, kernel_size=1, bias=False) self.add_module(self.norm3_name, norm3) self.stage_type = stage_type self.scales = scales self.width = width delattr(self, 'conv2') delattr(self, self.norm2_name)
def __init__(self, img_size=(224, 224), patch_size=16, in_channels=3, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, out_indices=11, qkv_bias=True, qk_scale=None, drop_rate=0., attn_drop_rate=0., drop_path_rate=0., norm_cfg=dict(type='LN', eps=1e-6, requires_grad=True), act_cfg=dict(type='GELU'), norm_eval=False, final_norm=False, out_shape='NCHW', with_cls_token=True, interpolate_mode='bicubic', with_cp=False): super(VisionTransformer, self).__init__() self.img_size = img_size self.patch_size = patch_size self.features = self.embed_dim = embed_dim self.patch_embed = PatchEmbed(img_size=img_size, patch_size=patch_size, in_channels=in_channels, embed_dim=embed_dim) self.with_cls_token = with_cls_token self.cls_token = nn.Parameter(torch.zeros(1, 1, self.embed_dim)) self.pos_embed = nn.Parameter( torch.zeros(1, self.patch_embed.num_patches + 1, embed_dim)) self.pos_drop = nn.Dropout(p=drop_rate) if isinstance(out_indices, int): self.out_indices = [out_indices] elif isinstance(out_indices, list) or isinstance(out_indices, tuple): self.out_indices = out_indices else: raise TypeError('out_indices must be type of int, list or tuple') dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth) ] # stochastic depth decay rule self.blocks = nn.ModuleList([ Block(dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, drop=dpr[i], attn_drop=attn_drop_rate, act_cfg=act_cfg, norm_cfg=norm_cfg, with_cp=with_cp) for i in range(depth) ]) assert out_shape in ['NLC', 'NCHW'], 'output shape must be "NLC" or "NCHW".' self.out_shape = out_shape self.interpolate_mode = interpolate_mode self.final_norm = final_norm if final_norm: _, self.norm = build_norm_layer(norm_cfg, embed_dim) self.norm_eval = norm_eval self.with_cp = with_cp
def __init__(self, *args, norm_cfg=dict(type='LN'), **kwargs): super().__init__(*args, **kwargs) self.norm = build_norm_layer(norm_cfg, self.embed_dims)[1]
def _make_fuse_layers(self): """Make fuse layer.""" if self.num_branches == 1: return None num_branches = self.num_branches in_channels = self.in_channels fuse_layers = [] num_out_branches = num_branches if self.multiscale_output else 1 for i in range(num_out_branches): fuse_layer = [] for j in range(num_branches): if j > i: fuse_layer.append( nn.Sequential( build_conv_layer(self.conv_cfg, in_channels[j], in_channels[i], kernel_size=1, stride=1, padding=0, bias=False), build_norm_layer(self.norm_cfg, in_channels[i])[1], nn.Upsample(scale_factor=2**(j - i), mode='nearest'))) # GhostModule( # in_channels[j], # in_channels[i], # kernel_size=1, # ratio=2, # dw_size=3, # stride=1, # relu=False), ##已经包括了BN和relu(可选) # nn.Upsample( # scale_factor=2**(j - i), mode='nearest'))) elif j == i: fuse_layer.append(None) else: #j < i conv_downsamples = [] for k in range(i - j): if k == i - j - 1: conv_downsamples.append( nn.Sequential( build_conv_layer(self.conv_cfg, in_channels[j], in_channels[j], kernel_size=3, stride=2, padding=1, groups=in_channels[j], bias=False), build_norm_layer(self.norm_cfg, in_channels[j])[1], build_conv_layer(self.conv_cfg, in_channels[j], in_channels[i], kernel_size=1, stride=1, padding=0, bias=False), build_norm_layer(self.norm_cfg, in_channels[i])[1])) # GhostModule( # in_channels[j], # in_channels[i], # kernel_size=1, # ratio=2, # dw_size=3, # stride=1, # relu=False))) ##已经包括了BN和relu(可选) else: conv_downsamples.append( nn.Sequential( build_conv_layer(self.conv_cfg, in_channels[j], in_channels[j], kernel_size=3, stride=2, padding=1, groups=in_channels[j], bias=False), build_norm_layer(self.norm_cfg, in_channels[j])[1], build_conv_layer(self.conv_cfg, in_channels[j], in_channels[j], kernel_size=1, stride=1, padding=0, bias=False), build_norm_layer(self.norm_cfg, in_channels[j])[1], nn.ReLU(inplace=True))) # GhostModule( # in_channels[j], # in_channels[j], # kernel_size=1, # ratio=2, # dw_size=3, # stride=1, # relu=True))) ##已经包括了BN和relu(可选) fuse_layer.append(nn.Sequential(*conv_downsamples)) fuse_layers.append(nn.ModuleList(fuse_layer)) return nn.ModuleList(fuse_layers)
def _make_fuse_layers(self): """Make fuse layer.""" if self.num_branches == 1: return None num_branches = self.num_branches in_channels = self.in_channels fuse_layers = [] num_out_branches = num_branches if self.multiscale_output else 1 for i in range(num_out_branches): fuse_layer = [] for j in range(num_branches): if j > i: fuse_layer.append( nn.Sequential( build_conv_layer( self.conv_cfg, in_channels[j], in_channels[i], kernel_size=1, stride=1, padding=0, bias=False), build_norm_layer(self.norm_cfg, in_channels[i])[1], nn.Upsample( scale_factor=2**(j - i), mode=self.upsample_cfg['mode'], align_corners=self. upsample_cfg['align_corners']))) elif j == i: fuse_layer.append(None) else: conv_downsamples = [] for k in range(i - j): if k == i - j - 1: conv_downsamples.append( nn.Sequential( build_conv_layer( self.conv_cfg, in_channels[j], in_channels[i], kernel_size=3, stride=2, padding=1, bias=False), build_norm_layer(self.norm_cfg, in_channels[i])[1])) else: conv_downsamples.append( nn.Sequential( build_conv_layer( self.conv_cfg, in_channels[j], in_channels[j], kernel_size=3, stride=2, padding=1, bias=False), build_norm_layer(self.norm_cfg, in_channels[j])[1], nn.ReLU(inplace=True))) fuse_layer.append(nn.Sequential(*conv_downsamples)) fuse_layers.append(nn.ModuleList(fuse_layer)) return nn.ModuleList(fuse_layers)
def __init__(self, num_classes=80, num_ffn_fcs=2, num_heads=8, num_cls_fcs=1, num_reg_fcs=3, feedforward_channels=2048, in_channels=256, dropout=0.0, ffn_act_cfg=dict(type='ReLU', inplace=True), dynamic_conv_cfg=dict( type='DynamicConv', in_channels=256, feat_channels=64, out_channels=256, input_feat_shape=7, act_cfg=dict(type='ReLU', inplace=True), norm_cfg=dict(type='LN')), loss_iou=dict(type='GIoULoss', loss_weight=2.0), **kwargs): super(DIIHead, self).__init__( num_classes=num_classes, reg_decoded_bbox=True, reg_class_agnostic=True, **kwargs) self.loss_iou = build_loss(loss_iou) self.in_channels = in_channels self.fp16_enabled = False self.attention = MultiheadAttention(in_channels, num_heads, dropout) self.attention_norm = build_norm_layer(dict(type='LN'), in_channels)[1] self.instance_interactive_conv = build_transformer(dynamic_conv_cfg) self.instance_interactive_conv_dropout = nn.Dropout(dropout) self.instance_interactive_conv_norm = build_norm_layer( dict(type='LN'), in_channels)[1] self.ffn = FFN( in_channels, feedforward_channels, num_ffn_fcs, act_cfg=ffn_act_cfg, dropout=dropout) self.ffn_norm = build_norm_layer(dict(type='LN'), in_channels)[1] self.cls_fcs = nn.ModuleList() for _ in range(num_cls_fcs): self.cls_fcs.append( nn.Linear(in_channels, in_channels, bias=False)) self.cls_fcs.append( build_norm_layer(dict(type='LN'), in_channels)[1]) self.cls_fcs.append( build_activation_layer(dict(type='ReLU', inplace=True))) # over load the self.fc_cls in BBoxHead if self.loss_cls.use_sigmoid: self.fc_cls = nn.Linear(in_channels, self.num_classes) else: self.fc_cls = nn.Linear(in_channels, self.num_classes + 1) self.reg_fcs = nn.ModuleList() for _ in range(num_reg_fcs): self.reg_fcs.append( nn.Linear(in_channels, in_channels, bias=False)) self.reg_fcs.append( build_norm_layer(dict(type='LN'), in_channels)[1]) self.reg_fcs.append( build_activation_layer(dict(type='ReLU', inplace=True))) # over load the self.fc_cls in BBoxHead self.fc_reg = nn.Linear(in_channels, 4) assert self.reg_class_agnostic, 'DIIHead only ' \ 'suppport `reg_class_agnostic=True` ' assert self.reg_decoded_bbox, 'DIIHead only ' \ 'suppport `reg_decoded_bbox=True`'
def __init__(self, block, inplanes, planes, num_blocks, stride=1, dilation=1, avg_down=False, conv_cfg=None, norm_cfg=dict(type='BN'), downsample_first=True, **kwargs): self.block = block if stride != 1 or inplanes != planes * block.expansion: if stride == 1 and dilation == 1: kernel_size = 1 dd = 1 padding = 0 else: kernel_size = 3 if dilation > 1: dd = dilation // 2 padding = dd else: dd = 1 padding = 0 downsample = nn.Sequential( build_conv_layer( conv_cfg, inplanes, planes * block.expansion, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dd, bias=False), build_norm_layer(norm_cfg, planes * block.expansion)[1]) layers = [] if downsample_first: layers.append( block( inplanes=inplanes, planes=planes, stride=stride, dilation=dilation, downsample=downsample, conv_cfg=conv_cfg, norm_cfg=norm_cfg, **kwargs)) inplanes = planes * block.expansion for _ in range(1, num_blocks): layers.append( block( inplanes=inplanes, planes=planes, stride=1, dilation=dilation, conv_cfg=conv_cfg, norm_cfg=norm_cfg, **kwargs)) else: # downsample_first=False is for HourglassModule for _ in range(num_blocks - 1): layers.append( block( inplanes=inplanes, planes=inplanes, stride=1, dilation=dilation, conv_cfg=conv_cfg, norm_cfg=norm_cfg, **kwargs)) layers.append( block( inplanes=inplanes, planes=planes, stride=stride, dilation=dilation, downsample=downsample, conv_cfg=conv_cfg, norm_cfg=norm_cfg, **kwargs)) super(SOTResLayer, self).__init__(*layers)
def __init__(self, in_channels, out_channels, num_deconv_layers=3, num_deconv_filters=(256, 256, 256), num_deconv_kernels=(4, 4, 4), extra=None, in_index=0, input_transform=None, align_corners=False, loss_keypoint=None, train_cfg=None, test_cfg=None): super().__init__() self.in_channels = in_channels self.loss = build_loss(loss_keypoint) self.train_cfg = {} if train_cfg is None else train_cfg self.test_cfg = {} if test_cfg is None else test_cfg self.target_type = self.test_cfg.get('target_type', 'GaussianHeatMap') self._init_inputs(in_channels, in_index, input_transform) self.in_index = in_index self.align_corners = align_corners if extra is not None and not isinstance(extra, dict): raise TypeError('extra should be dict or None.') if num_deconv_layers > 0: self.deconv_layers = self._make_deconv_layer( num_deconv_layers, num_deconv_filters, num_deconv_kernels, ) elif num_deconv_layers == 0: self.deconv_layers = nn.Identity() else: raise ValueError( f'num_deconv_layers ({num_deconv_layers}) should >= 0.') identity_final_layer = False if extra is not None and 'final_conv_kernel' in extra: assert extra['final_conv_kernel'] in [0, 1, 3] if extra['final_conv_kernel'] == 3: padding = 1 elif extra['final_conv_kernel'] == 1: padding = 0 else: # 0 for Identity mapping. identity_final_layer = True kernel_size = extra['final_conv_kernel'] else: kernel_size = 1 padding = 0 if identity_final_layer: self.final_layer = nn.Identity() else: conv_channels = num_deconv_filters[ -1] if num_deconv_layers > 0 else self.in_channels layers = [] if extra is not None: num_conv_layers = extra.get('num_conv_layers', 0) num_conv_kernels = extra.get('num_conv_kernels', [1] * num_conv_layers) for i in range(num_conv_layers): layers.append( build_conv_layer( dict(type='Conv2d'), in_channels=conv_channels, out_channels=conv_channels, kernel_size=num_conv_kernels[i], stride=1, padding=(num_conv_kernels[i] - 1) // 2)) layers.append( build_norm_layer(dict(type='BN'), conv_channels)[1]) layers.append(nn.ReLU(inplace=True)) layers.append( build_conv_layer( cfg=dict(type='Conv2d'), in_channels=conv_channels, out_channels=out_channels, kernel_size=kernel_size, stride=1, padding=padding)) if len(layers) > 1: self.final_layer = nn.Sequential(*layers) else: self.final_layer = layers[0]
def __init__(self, extra, in_channels=3, conv_cfg=None, norm_cfg=dict(type='BN'), norm_eval=True, with_cp=False, zero_init_residual=False): super(HRNet, self).__init__() self.extra = extra self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.norm_eval = norm_eval self.with_cp = with_cp self.zero_init_residual = zero_init_residual # stem net self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1) self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, 64, postfix=2) self.conv1 = build_conv_layer( self.conv_cfg, in_channels, 64, kernel_size=3, stride=2, padding=1, bias=False) self.add_module(self.norm1_name, norm1) self.conv2 = build_conv_layer( self.conv_cfg, 64, 64, kernel_size=3, stride=2, padding=1, bias=False) self.add_module(self.norm2_name, norm2) self.relu = nn.ReLU(inplace=True) # stage 1 self.stage1_cfg = self.extra['stage1'] num_channels = self.stage1_cfg['num_channels'][0] block_type = self.stage1_cfg['block'] num_blocks = self.stage1_cfg['num_blocks'][0] block = self.blocks_dict[block_type] stage1_out_channels = num_channels * block.expansion self.layer1 = self._make_layer(block, 64, num_channels, num_blocks) # stage 2 self.stage2_cfg = self.extra['stage2'] num_channels = self.stage2_cfg['num_channels'] block_type = self.stage2_cfg['block'] block = self.blocks_dict[block_type] num_channels = [channel * block.expansion for channel in num_channels] self.transition1 = self._make_transition_layer([stage1_out_channels], num_channels) self.stage2, pre_stage_channels = self._make_stage( self.stage2_cfg, num_channels) # stage 3 self.stage3_cfg = self.extra['stage3'] num_channels = self.stage3_cfg['num_channels'] block_type = self.stage3_cfg['block'] block = self.blocks_dict[block_type] num_channels = [channel * block.expansion for channel in num_channels] self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels) self.stage3, pre_stage_channels = self._make_stage( self.stage3_cfg, num_channels) # stage 4 self.stage4_cfg = self.extra['stage4'] num_channels = self.stage4_cfg['num_channels'] block_type = self.stage4_cfg['block'] block = self.blocks_dict[block_type] num_channels = [channel * block.expansion for channel in num_channels] self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels) self.stage4, pre_stage_channels = self._make_stage( self.stage4_cfg, num_channels)
def __init__(self, inplanes, planes, groups=1, base_width=4, base_channels=64, radix=2, reduction_factor=4, avd=True, **kwargs): """Bottleneck block for ResNeSt. If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is "caffe", the stride-two layer is the first 1x1 conv layer. """ super(Bottleneck, self).__init__(inplanes, planes, **kwargs) if groups == 1: width = self.planes else: width = math.floor(self.planes * (base_width / base_channels)) * groups self.avd = avd and self.conv2_stride > 1 self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, width, postfix=1) self.norm3_name, norm3 = build_norm_layer(self.norm_cfg, self.planes * self.expansion, postfix=3) self.conv1 = build_conv_layer(self.conv_cfg, self.inplanes, width, kernel_size=1, stride=self.conv1_stride, bias=False) self.add_module(self.norm1_name, norm1) self.with_modulated_dcn = False self.conv2 = SplAtConv2d(width, width, kernel_size=3, stride=1 if self.avd else self.conv2_stride, padding=self.dilation, dilation=self.dilation, groups=groups, radix=radix, reduction_factor=reduction_factor, bias=False, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, dcn=self.dcn) delattr(self, self.norm2_name) if self.avd: self.avd_layer = nn.AvgPool2d(3, self.conv2_stride, padding=1) self.conv3 = build_conv_layer(self.conv_cfg, width, self.planes * self.expansion, kernel_size=1, bias=False) self.add_module(self.norm3_name, norm3)
def __init__(self, inplanes, planes, groups=1, base_width=4, base_channels=64, **kwargs): super(Bottleneck, self).__init__(inplanes, planes, **kwargs) if groups == 1: width = self.planes else: width = math.floor(self.planes * (base_width / base_channels)) * groups self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, width, postfix=1) self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, width, postfix=2) self.norm3_name, norm3 = build_norm_layer(self.norm_cfg, self.planes * self.expansion, postfix=3) self.conv1 = build_conv_layer(self.conv_cfg, self.inplanes, width, kernel_size=1, stride=self.conv1_stride, bias=False) self.add_module(self.norm1_name, norm1) fallback_on_stride = False self.with_modulated_dcn = False if self.with_dcn: fallback_on_stride = self.dcn.pop('fallback_on_stride', False) if not self.with_dcn or fallback_on_stride: self.conv2 = build_conv_layer(self.conv_cfg, width, width, kernel_size=3, stride=self.conv2_stride, padding=self.dilation, dilation=self.dilation, groups=groups, bias=False) else: assert self.conv_cfg is None, 'conv_cfg must be None for DCN' self.conv2 = build_conv_layer(self.dcn, width, width, kernel_size=3, stride=self.conv2_stride, padding=self.dilation, dilation=self.dilation, groups=groups, bias=False) self.add_module(self.norm2_name, norm2) self.conv3 = build_conv_layer(self.conv_cfg, width, self.planes * self.expansion, kernel_size=1, bias=False) self.add_module(self.norm3_name, norm3)
def __init__(self, attn_cfgs=None, ffn_cfgs=dict( type='FFN', embed_dims=256, feedforward_channels=1024, num_fcs=2, ffn_drop=0., act_cfg=dict(type='ReLU', inplace=True), ), operation_order=None, norm_cfg=dict(type='LN'), init_cfg=None, batch_first=False, **kwargs): deprecated_args = dict(feedforward_channels='feedforward_channels', ffn_dropout='ffn_drop', ffn_num_fcs='num_fcs') for ori_name, new_name in deprecated_args.items(): if ori_name in kwargs: warnings.warn( f'The arguments `{ori_name}` in BaseTransformerLayer ' f'has been deprecated, now you should set `{new_name}` ' f'and other FFN related arguments ' f'to a dict named `ffn_cfgs`. ') ffn_cfgs[new_name] = kwargs[ori_name] super(BaseTransformerLayer, self).__init__(init_cfg) self.batch_first = batch_first assert set(operation_order) & set( ['self_attn', 'norm', 'ffn', 'cross_attn']) == \ set(operation_order), f'The operation_order of' \ f' {self.__class__.__name__} should ' \ f'contains all four operation type ' \ f"{['self_attn', 'norm', 'ffn', 'cross_attn']}" num_attn = operation_order.count('self_attn') + operation_order.count( 'cross_attn') if isinstance(attn_cfgs, dict): attn_cfgs = [copy.deepcopy(attn_cfgs) for _ in range(num_attn)] else: assert num_attn == len(attn_cfgs), f'The length ' \ f'of attn_cfg {num_attn} is ' \ f'not consistent with the number of attention' \ f'in operation_order {operation_order}.' self.num_attn = num_attn self.operation_order = operation_order self.norm_cfg = norm_cfg self.pre_norm = operation_order[0] == 'norm' self.attentions = ModuleList() index = 0 for operation_name in operation_order: if operation_name in ['self_attn', 'cross_attn']: if 'batch_first' in attn_cfgs[index]: assert self.batch_first == attn_cfgs[index]['batch_first'] else: attn_cfgs[index]['batch_first'] = self.batch_first attention = build_attention(attn_cfgs[index]) # Some custom attentions used as `self_attn` # or `cross_attn` can have different behavior. attention.operation_name = operation_name self.attentions.append(attention) index += 1 self.embed_dims = self.attentions[0].embed_dims self.ffns = ModuleList() num_ffns = operation_order.count('ffn') if isinstance(ffn_cfgs, dict): ffn_cfgs = ConfigDict(ffn_cfgs) if isinstance(ffn_cfgs, dict): ffn_cfgs = [copy.deepcopy(ffn_cfgs) for _ in range(num_ffns)] assert len(ffn_cfgs) == num_ffns for ffn_index in range(num_ffns): if 'embed_dims' not in ffn_cfgs[ffn_index]: ffn_cfgs['embed_dims'] = self.embed_dims else: assert ffn_cfgs[ffn_index]['embed_dims'] == self.embed_dims self.ffns.append( build_feedforward_network(ffn_cfgs[ffn_index], dict(type='FFN'))) self.norms = ModuleList() num_norms = operation_order.count('norm') for _ in range(num_norms): self.norms.append(build_norm_layer(norm_cfg, self.embed_dims)[1])
def __init__(self, input_shapes, in_features, out_channels, norm="", top_block=None, fuse_type="sum"): """ Args: in_features (list[str]): names of the input feature maps coming from the backbone to which FPN is attached. For example, if the backbone produces ["res2", "res3", "res4"], any *contiguous* sublist of these may be used; order must be from high to low resolution. out_channels (int): number of channels in the output feature maps. norm (str, dict): dictionary to construct and config norm layer. Example: dict(type='BN') top_block (nn.Module or None): if provided, an extra operation will be performed on the output of the last (smallest resolution) FPN output, and the result will extend the result list. The top_block further downsamples the feature map. It must have an attribute "num_levels", meaning the number of extra FPN levels added by this block, and "in_feature", which is a string representing its input feature (e.g., p5). fuse_type (str): types for fusing the top down features and the lateral ones. It can be "sum" (default), which sums up element-wise; or "avg", which takes the element-wise mean of the two. """ super(FPN, self).__init__() # Feature map strides and channels from the bottom up network (e.g. ResNet) in_strides = [input_shapes[f].stride for f in in_features] in_channels = [input_shapes[f].channels for f in in_features] _assert_strides_are_log2_contiguous(in_strides) lateral_convs, lateral_norms = [], [] output_convs, output_norms = [], [] self.use_norm = norm != "" use_bias = norm == "" for idx, in_channels in enumerate(in_channels): lateral_conv = Conv2d(in_channels, out_channels, kernel_size=1, bias=use_bias) output_conv = Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=use_bias) stage = int(math.log2(in_strides[idx])) if self.use_norm: _, lateral_norm = build_norm_layer(norm, out_channels, stage) _, output_norm = build_norm_layer(norm, out_channels, stage) self.add_module("fpn_lateral_norm{}".format(stage), lateral_norm) self.add_module("fpn_output_norm{}".format(stage), output_norm) lateral_norms.append(lateral_norm) output_norms.append(output_norm) self.add_module("fpn_lateral{}".format(stage), lateral_conv) self.add_module("fpn_output{}".format(stage), output_conv) lateral_convs.append(lateral_conv) output_convs.append(output_conv) # Place convs into top-down order (from low to high resolution) # to make the top-down computation in forward clearer. self.lateral_convs = lateral_convs[::-1] self.output_convs = output_convs[::-1] if self.use_norm: self.lateral_norms = lateral_norms[::-1] self.output_norms = output_norms[::-1] self.top_block = top_block self.in_features = in_features # Return feature names are "p<stage>", like ["p2", "p3", ..., "p6"] self._out_feature_strides = { "p{}".format(int(math.log2(s))): s for s in in_strides } # top block output feature maps. if self.top_block is not None: for s in range(stage, stage + self.top_block.num_levels): self._out_feature_strides["p{}".format(s + 1)] = 2**(s + 1) self._out_features = list(self._out_feature_strides.keys()) self._out_feature_channels = { k: out_channels for k in self._out_features } self._size_divisibility = in_strides[-1] assert fuse_type in {"avg", "sum"} self._fuse_type = fuse_type
def __init__(self, inplanes, planes, groups=1, base_width=4, base_channels=64, **kwargs): """Bottleneck block for ResNeXt. If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is "caffe", the stride-two layer is the first 1x1 conv layer. """ super(Bottleneck, self).__init__(inplanes, planes, **kwargs) if groups == 1: width = self.planes else: width = math.floor(self.planes * (base_width / base_channels)) * groups self.relu1 = DyReLUB(width) self.relu2 = DyReLUB(width) self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, width, postfix=1) self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, width, postfix=2) self.norm3_name, norm3 = build_norm_layer(self.norm_cfg, self.planes * self.expansion, postfix=3) self.conv1 = build_conv_layer(self.conv_cfg, self.inplanes, width, kernel_size=1, stride=self.conv1_stride, bias=False) self.add_module(self.norm1_name, norm1) fallback_on_stride = False self.with_modulated_dcn = False if self.with_dcn: fallback_on_stride = self.dcn.pop('fallback_on_stride', False) if not self.with_dcn or fallback_on_stride: self.conv2 = build_conv_layer(self.conv_cfg, width, width, kernel_size=3, stride=self.conv2_stride, padding=self.dilation, dilation=self.dilation, groups=groups, bias=False) else: assert self.conv_cfg is None, 'conv_cfg must be None for DCN' self.conv2 = build_conv_layer(self.dcn, width, width, kernel_size=3, stride=self.conv2_stride, padding=self.dilation, dilation=self.dilation, groups=groups, bias=False) self.add_module(self.norm2_name, norm2) self.conv3 = build_conv_layer(self.conv_cfg, width, self.planes * self.expansion, kernel_size=1, bias=False) self.add_module(self.norm3_name, norm3)
def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer): """Make transition layer.""" num_branches_cur = len(num_channels_cur_layer) num_branches_pre = len(num_channels_pre_layer) transition_layers = [] for i in range(num_branches_cur): if i < num_branches_pre: if num_channels_cur_layer[i] != num_channels_pre_layer[i]: transition_layers.append( nn.Sequential( build_conv_layer(self.conv_cfg, num_channels_pre_layer[i], num_channels_pre_layer[i], kernel_size=3, stride=1, padding=1, groups=num_channels_pre_layer[i], bias=False), build_norm_layer(self.norm_cfg, num_channels_pre_layer[i])[1], build_conv_layer(self.conv_cfg, num_channels_pre_layer[i], num_channels_cur_layer[i], kernel_size=1, stride=1, padding=0, bias=False), build_norm_layer(self.norm_cfg, num_channels_cur_layer[i])[1], nn.ReLU())) else: transition_layers.append(None) else: conv_downsamples = [] for j in range(i + 1 - num_branches_pre): in_channels = num_channels_pre_layer[-1] out_channels = num_channels_cur_layer[i] \ if j == i - num_branches_pre else in_channels conv_downsamples.append( nn.Sequential( build_conv_layer(self.conv_cfg, in_channels, in_channels, kernel_size=3, stride=2, padding=1, groups=in_channels, bias=False), build_norm_layer(self.norm_cfg, in_channels)[1], build_conv_layer(self.conv_cfg, in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False), build_norm_layer(self.norm_cfg, out_channels)[1], nn.ReLU())) transition_layers.append(nn.Sequential(*conv_downsamples)) return nn.ModuleList(transition_layers)
def __init__(self, block, num_blocks, in_channels, out_channels, expansion=None, shortcut_type='B', stride=1, avg_down=False, conv_cfg=None, norm_cfg=dict(type='BN'), **kwargs): self.block = block self.expansion = get_expansion(block, expansion) downsample = None if isinstance(stride, int): stride_flag = stride != 1 elif isinstance(stride, tuple): stride_flag = stride[0] != 1 if stride_flag or in_channels != out_channels: downsample = [] conv_stride = stride if shortcut_type == 'A': downsample = partial(downsample_basic_block, planes=in_channels * expansion, stride=stride) else: if avg_down and stride != 1: conv_stride = 1 downsample.append( nn.AvgPool3d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False)) downsample.extend([ build_conv_layer(conv_cfg, in_channels, out_channels, kernel_size=1, stride=conv_stride, bias=False), build_norm_layer(norm_cfg, out_channels)[1] ]) downsample = nn.Sequential(*downsample) layers = [] layers.append( block(in_channels=in_channels, out_channels=out_channels, expansion=self.expansion, stride=stride, downsample=downsample, conv_cfg=conv_cfg, norm_cfg=norm_cfg, **kwargs)) in_channels = out_channels for i in range(1, num_blocks): layers.append( block(in_channels=in_channels, out_channels=out_channels, expansion=self.expansion, stride=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, **kwargs)) super(ResLayer, self).__init__(*layers)
def __init__(self, img_size=224, patch_size=16, in_channels=3, embed_dims=768, num_layers=12, num_heads=12, mlp_ratio=4, out_indices=-1, qkv_bias=True, drop_rate=0., attn_drop_rate=0., drop_path_rate=0., with_cls_token=True, output_cls_token=False, norm_cfg=dict(type='LN'), act_cfg=dict(type='GELU'), patch_norm=False, final_norm=False, interpolate_mode='bicubic', num_fcs=2, norm_eval=False, with_cp=False, pretrain_style='timm', pretrained=None, init_cfg=None): super(VisionTransformer, self).__init__() if isinstance(img_size, int): img_size = to_2tuple(img_size) elif isinstance(img_size, tuple): if len(img_size) == 1: img_size = to_2tuple(img_size[0]) assert len(img_size) == 2, \ f'The size of image should have length 1 or 2, ' \ f'but got {len(img_size)}' assert pretrain_style in ['timm', 'mmcls'] if output_cls_token: assert with_cls_token is True, f'with_cls_token must be True if' \ f'set output_cls_token to True, but got {with_cls_token}' if isinstance(pretrained, str) or pretrained is None: warnings.warn('DeprecationWarning: pretrained is a deprecated, ' 'please use "init_cfg" instead') else: raise TypeError('pretrained must be a str or None') self.img_size = img_size self.patch_size = patch_size self.interpolate_mode = interpolate_mode self.norm_eval = norm_eval self.with_cp = with_cp self.pretrain_style = pretrain_style self.pretrained = pretrained self.init_cfg = init_cfg self.patch_embed = PatchEmbed( in_channels=in_channels, embed_dims=embed_dims, conv_type='Conv2d', kernel_size=patch_size, stride=patch_size, pad_to_patch_size=True, norm_cfg=norm_cfg if patch_norm else None, init_cfg=None, ) num_patches = (img_size[0] // patch_size) * \ (img_size[1] // patch_size) self.with_cls_token = with_cls_token self.output_cls_token = output_cls_token self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dims)) self.pos_embed = nn.Parameter( torch.zeros(1, num_patches + 1, embed_dims)) self.drop_after_pos = nn.Dropout(p=drop_rate) if isinstance(out_indices, int): if out_indices == -1: out_indices = num_layers - 1 self.out_indices = [out_indices] elif isinstance(out_indices, list) or isinstance(out_indices, tuple): self.out_indices = out_indices else: raise TypeError('out_indices must be type of int, list or tuple') dpr = [ x.item() for x in torch.linspace(0, drop_path_rate, num_layers) ] # stochastic depth decay rule self.layers = ModuleList() for i in range(num_layers): self.layers.append( TransformerEncoderLayer(embed_dims=embed_dims, num_heads=num_heads, feedforward_channels=mlp_ratio * embed_dims, attn_drop_rate=attn_drop_rate, drop_rate=drop_rate, drop_path_rate=dpr[i], num_fcs=num_fcs, qkv_bias=qkv_bias, act_cfg=act_cfg, norm_cfg=norm_cfg, batch_first=True)) self.final_norm = final_norm if final_norm: self.norm1_name, norm1 = build_norm_layer(norm_cfg, embed_dims, postfix=1) self.add_module(self.norm1_name, norm1)
def __init__(self, in_channels, out_channels, groups=1, width_per_group=4, base_channels=64, radix=2, reduction_factor=4, avg_down_stride=True, **kwargs): super().__init__(in_channels, out_channels, **kwargs) self.groups = groups self.width_per_group = width_per_group # For ResNet bottleneck, middle channels are determined by expansion # and out_channels, but for ResNeXt bottleneck, it is determined by # groups and width_per_group and the stage it is located in. if groups != 1: assert self.mid_channels % base_channels == 0 self.mid_channels = (groups * width_per_group * self.mid_channels // base_channels) self.avg_down_stride = avg_down_stride and self.conv2_stride > 1 self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, self.mid_channels, postfix=1) self.norm3_name, norm3 = build_norm_layer(self.norm_cfg, self.out_channels, postfix=3) self.conv1 = build_conv_layer(self.conv_cfg, self.in_channels, self.mid_channels, kernel_size=1, stride=self.conv1_stride, bias=False) self.add_module(self.norm1_name, norm1) self.conv2 = SplitAttentionConv2d( self.mid_channels, self.mid_channels, kernel_size=3, stride=1 if self.avg_down_stride else self.conv2_stride, padding=self.dilation, dilation=self.dilation, groups=groups, radix=radix, reduction_factor=reduction_factor, conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg) delattr(self, self.norm2_name) if self.avg_down_stride: self.avd_layer = nn.AvgPool2d(3, self.conv2_stride, padding=1) self.conv3 = build_conv_layer(self.conv_cfg, self.mid_channels, self.out_channels, kernel_size=1, bias=False) self.add_module(self.norm3_name, norm3)
def __init__(self, extra, in_channels=3, conv_cfg=None, norm_cfg=dict(type='BN'), norm_eval=False, with_cp=False, zero_init_residual=False): # Protect mutable default arguments norm_cfg = copy.deepcopy(norm_cfg) super().__init__() self.extra = extra self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.norm_eval = norm_eval self.with_cp = with_cp self.zero_init_residual = zero_init_residual # stem net self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1) self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, 64, postfix=2) self.conv1 = build_conv_layer( self.conv_cfg, in_channels, 64, kernel_size=3, stride=2, padding=1, bias=False) self.add_module(self.norm1_name, norm1) self.conv2 = build_conv_layer( self.conv_cfg, 64, 64, kernel_size=3, stride=2, padding=1, bias=False) self.add_module(self.norm2_name, norm2) self.relu = nn.ReLU(inplace=True) self.upsample_cfg = self.extra.get('upsample', { 'mode': 'nearest', 'align_corners': None }) # stage 1 self.stage1_cfg = self.extra['stage1'] num_channels = self.stage1_cfg['num_channels'][0] block_type = self.stage1_cfg['block'] num_blocks = self.stage1_cfg['num_blocks'][0] block = self.blocks_dict[block_type] stage1_out_channels = num_channels * get_expansion(block) self.layer1 = self._make_layer(block, 64, stage1_out_channels, num_blocks) # stage 2 self.stage2_cfg = self.extra['stage2'] num_channels = self.stage2_cfg['num_channels'] block_type = self.stage2_cfg['block'] block = self.blocks_dict[block_type] num_channels = [ channel * get_expansion(block) for channel in num_channels ] self.transition1 = self._make_transition_layer([stage1_out_channels], num_channels) self.stage2, pre_stage_channels = self._make_stage( self.stage2_cfg, num_channels) # stage 3 self.stage3_cfg = self.extra['stage3'] num_channels = self.stage3_cfg['num_channels'] block_type = self.stage3_cfg['block'] block = self.blocks_dict[block_type] num_channels = [ channel * get_expansion(block) for channel in num_channels ] self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels) self.stage3, pre_stage_channels = self._make_stage( self.stage3_cfg, num_channels) # stage 4 self.stage4_cfg = self.extra['stage4'] num_channels = self.stage4_cfg['num_channels'] block_type = self.stage4_cfg['block'] block = self.blocks_dict[block_type] num_channels = [ channel * get_expansion(block) for channel in num_channels ] self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels) self.stage4, pre_stage_channels = self._make_stage( self.stage4_cfg, num_channels, multiscale_output=self.stage4_cfg.get('multiscale_output', False))
def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, style='pytorch', with_cp=False, conv_cfg=None, norm_cfg=dict(type='BN'), dcn=None, plugins=None): """Bottleneck block for ResNet. If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is "caffe", the stride-two layer is the first 1x1 conv layer. """ super(Bottleneck, self).__init__() assert style in ['pytorch', 'caffe'] assert dcn is None or isinstance(dcn, dict) assert plugins is None or isinstance(plugins, list) if plugins is not None: allowed_position = ['after_conv1', 'after_conv2', 'after_conv3'] assert all(p['position'] in allowed_position for p in plugins) self.inplanes = inplanes self.planes = planes self.stride = stride self.dilation = dilation self.style = style self.with_cp = with_cp self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.dcn = dcn self.with_dcn = dcn is not None self.plugins = plugins self.with_plugins = plugins is not None if self.with_plugins: # collect plugins for conv1/conv2/conv3 self.after_conv1_plugins = [ plugin['cfg'] for plugin in plugins if plugin['position'] == 'after_conv1' ] self.after_conv2_plugins = [ plugin['cfg'] for plugin in plugins if plugin['position'] == 'after_conv2' ] self.after_conv3_plugins = [ plugin['cfg'] for plugin in plugins if plugin['position'] == 'after_conv3' ] if self.style == 'pytorch': self.conv1_stride = 1 self.conv2_stride = stride else: self.conv1_stride = stride self.conv2_stride = 1 self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) self.norm3_name, norm3 = build_norm_layer(norm_cfg, planes * self.expansion, postfix=3) self.conv1 = build_conv_layer(conv_cfg, inplanes, planes, kernel_size=1, stride=self.conv1_stride, bias=False) self.add_module(self.norm1_name, norm1) fallback_on_stride = False if self.with_dcn: fallback_on_stride = dcn.pop('fallback_on_stride', False) if not self.with_dcn or fallback_on_stride: self.conv2 = build_conv_layer(conv_cfg, planes, planes, kernel_size=3, stride=self.conv2_stride, padding=dilation, dilation=dilation, bias=False) else: assert self.conv_cfg is None, 'conv_cfg must be None for DCN' self.conv2 = build_conv_layer(dcn, planes, planes, kernel_size=3, stride=self.conv2_stride, padding=dilation, dilation=dilation, bias=False) self.add_module(self.norm2_name, norm2) self.conv3 = build_conv_layer(conv_cfg, planes, planes * self.expansion, kernel_size=1, bias=False) self.add_module(self.norm3_name, norm3) self.relu = nn.ReLU(inplace=True) self.downsample = downsample if self.with_plugins: self.after_conv1_plugin_names = self.make_block_plugins( planes, self.after_conv1_plugins) self.after_conv2_plugin_names = self.make_block_plugins( planes, self.after_conv2_plugins) self.after_conv3_plugin_names = self.make_block_plugins( planes * self.expansion, self.after_conv3_plugins)
def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, style='pytorch', with_cp=False, conv_cfg=None, norm_cfg=dict(type='BN'), dcn=None, plugins=None): """Bottleneck block for ResNet. If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is "caffe", the stride-two layer is the first 1x1 conv layer. """ super(Bottleneck, self).__init__() assert style in ['pytorch', 'caffe'] self.inplanes = inplanes self.planes = planes self.stride = stride self.dilation = dilation self.style = style self.with_cp = with_cp self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg if self.style == 'pytorch': self.conv1_stride = 1 self.conv2_stride = stride else: self.conv1_stride = stride self.conv2_stride = 1 self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) self.norm3_name, norm3 = build_norm_layer(norm_cfg, planes * self.expansion, postfix=3) self.conv1 = build_conv_layer(conv_cfg, inplanes, planes, kernel_size=1, stride=self.conv1_stride, bias=False) self.add_module(self.norm1_name, norm1) self.conv2 = build_conv_layer(conv_cfg, planes, planes, kernel_size=3, stride=self.conv2_stride, padding=dilation, dilation=dilation, bias=False) self.add_module(self.norm2_name, norm2) self.conv3 = build_conv_layer(conv_cfg, planes, planes * self.expansion, kernel_size=1, bias=False) self.add_module(self.norm3_name, norm3) self.relu = nn.ReLU(inplace=True) self.downsample = downsample
def __init__(self, extra, in_channels=3, conv_cfg=None, norm_cfg=dict(type='BN'), norm_eval=True, with_cp=False, zero_init_residual=False, multiscale_output=True, pretrained=None, init_cfg=None): super(HRNet, self).__init__(init_cfg) self.pretrained = pretrained assert not (init_cfg and pretrained), \ 'init_cfg and pretrained cannot be specified at the same time' if isinstance(pretrained, str): warnings.warn('DeprecationWarning: pretrained is deprecated, ' 'please use "init_cfg" instead') self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) elif pretrained is None: if init_cfg is None: self.init_cfg = [ dict(type='Kaiming', layer='Conv2d'), dict(type='Constant', val=1, layer=['_BatchNorm', 'GroupNorm']) ] else: raise TypeError('pretrained must be a str or None') # Assert configurations of 4 stages are in extra assert 'stage1' in extra and 'stage2' in extra \ and 'stage3' in extra and 'stage4' in extra # Assert whether the length of `num_blocks` and `num_channels` are # equal to `num_branches` for i in range(4): cfg = extra[f'stage{i + 1}'] assert len(cfg['num_blocks']) == cfg['num_branches'] and \ len(cfg['num_channels']) == cfg['num_branches'] self.extra = extra self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.norm_eval = norm_eval self.with_cp = with_cp self.zero_init_residual = zero_init_residual # stem net self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1) self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, 64, postfix=2) self.conv1 = build_conv_layer(self.conv_cfg, in_channels, 64, kernel_size=3, stride=2, padding=1, bias=False) self.add_module(self.norm1_name, norm1) self.conv2 = build_conv_layer(self.conv_cfg, 64, 64, kernel_size=3, stride=2, padding=1, bias=False) self.add_module(self.norm2_name, norm2) self.relu = nn.ReLU(inplace=True) # stage 1 self.stage1_cfg = self.extra['stage1'] num_channels = self.stage1_cfg['num_channels'][0] block_type = self.stage1_cfg['block'] num_blocks = self.stage1_cfg['num_blocks'][0] block = self.blocks_dict[block_type] stage1_out_channels = num_channels * block.expansion self.layer1 = self._make_layer(block, 64, num_channels, num_blocks) # stage 2 self.stage2_cfg = self.extra['stage2'] num_channels = self.stage2_cfg['num_channels'] block_type = self.stage2_cfg['block'] block = self.blocks_dict[block_type] num_channels = [channel * block.expansion for channel in num_channels] self.transition1 = self._make_transition_layer([stage1_out_channels], num_channels) self.stage2, pre_stage_channels = self._make_stage( self.stage2_cfg, num_channels) # stage 3 self.stage3_cfg = self.extra['stage3'] num_channels = self.stage3_cfg['num_channels'] block_type = self.stage3_cfg['block'] block = self.blocks_dict[block_type] num_channels = [channel * block.expansion for channel in num_channels] self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels) self.stage3, pre_stage_channels = self._make_stage( self.stage3_cfg, num_channels) # stage 4 self.stage4_cfg = self.extra['stage4'] num_channels = self.stage4_cfg['num_channels'] block_type = self.stage4_cfg['block'] block = self.blocks_dict[block_type] num_channels = [channel * block.expansion for channel in num_channels] self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels) self.stage4, pre_stage_channels = self._make_stage( self.stage4_cfg, num_channels, multiscale_output=multiscale_output)
def __init__(self, block, inplanes, planes, num_blocks, stride=1, avg_down=True, conv_cfg=None, norm_cfg=dict(type='BN'), scales=4, base_width=26, **kwargs): self.block = block downsample = None if stride != 1 or inplanes != planes * block.expansion: downsample = [] conv_stride = stride # Res2Net-v1b pretrained models use AvgPool2d in downsample # even if stride == 1 if avg_down: # and stride != 1: conv_stride = 1 downsample.append( nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False)) downsample.extend([ build_conv_layer(conv_cfg, inplanes, planes * block.expansion, kernel_size=1, stride=conv_stride, bias=False), build_norm_layer(norm_cfg, planes * block.expansion)[1] ]) downsample = nn.Sequential(*downsample) layers = [] layers.append( block(inplanes=inplanes, planes=planes, stride=stride, downsample=downsample, conv_cfg=conv_cfg, norm_cfg=norm_cfg, scales=scales, base_width=base_width, stage_type='stage', **kwargs)) inplanes = planes * block.expansion for i in range(1, num_blocks): layers.append( block(inplanes=inplanes, planes=planes, stride=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, scales=scales, base_width=base_width, **kwargs)) super(Res2Layer, self).__init__(*layers)
def __init__(self, in_channels, out_channels, expansion=4, stride=1, dilation=1, downsample=None, style='pytorch', with_cp=False, conv_cfg=None, norm_cfg=dict(type='BN')): super(Bottleneck, self).__init__() assert style in ['pytorch', 'caffe'] self.in_channels = in_channels self.out_channels = out_channels self.expansion = expansion assert out_channels % expansion == 0 self.mid_channels = out_channels // expansion self.stride = stride self.dilation = dilation self.style = style self.with_cp = with_cp self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg if self.style == 'pytorch': self.conv1_stride = 1 self.conv2_stride = stride else: self.conv1_stride = stride self.conv2_stride = 1 self.norm1_name, norm1 = build_norm_layer(norm_cfg, self.mid_channels, postfix=1) self.norm2_name, norm2 = build_norm_layer(norm_cfg, self.mid_channels, postfix=2) self.norm3_name, norm3 = build_norm_layer(norm_cfg, out_channels, postfix=3) self.conv1 = build_conv_layer(conv_cfg, in_channels, self.mid_channels, kernel_size=1, stride=self.conv1_stride, bias=False) self.add_module(self.norm1_name, norm1) self.conv2 = build_conv_layer(conv_cfg, self.mid_channels, self.mid_channels, kernel_size=3, stride=self.conv2_stride, padding=dilation, dilation=dilation, bias=False) self.add_module(self.norm2_name, norm2) self.conv3 = build_conv_layer(conv_cfg, self.mid_channels, out_channels, kernel_size=1, bias=False) self.add_module(self.norm3_name, norm3) self.relu = nn.ReLU(inplace=True) self.downsample = downsample
def __init__(self, in_channels, out_channels, expansion=4, stride=1, dilation=1, downsample=None, style='pytorch', with_cp=False, conv_cfg=None, norm_cfg=dict(type='BN'), kernel_size=3, groups=1, attention=False): # Protect mutable default arguments norm_cfg = copy.deepcopy(norm_cfg) super().__init__() assert style in ['pytorch', 'caffe'] self.in_channels = in_channels self.out_channels = out_channels self.expansion = expansion assert out_channels % expansion == 0 self.mid_channels = out_channels // expansion self.stride = stride self.dilation = dilation self.style = style self.with_cp = with_cp self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg if self.style == 'pytorch': self.conv1_stride = 1 self.conv2_stride = stride else: self.conv1_stride = stride self.conv2_stride = 1 self.norm1_name, norm1 = build_norm_layer(norm_cfg, self.mid_channels, postfix=1) self.norm2_name, norm2 = build_norm_layer(norm_cfg, self.mid_channels, postfix=2) self.norm3_name, norm3 = build_norm_layer(norm_cfg, out_channels, postfix=3) self.conv1 = build_conv_layer(conv_cfg, in_channels, self.mid_channels, kernel_size=1, stride=self.conv1_stride, bias=False) self.add_module(self.norm1_name, norm1) self.conv2 = build_conv_layer(conv_cfg, self.mid_channels, self.mid_channels, kernel_size=kernel_size, stride=self.conv2_stride, padding=kernel_size // 2, groups=groups, dilation=dilation, bias=False) self.add_module(self.norm2_name, norm2) self.conv3 = build_conv_layer(conv_cfg, self.mid_channels, out_channels, kernel_size=1, bias=False) self.add_module(self.norm3_name, norm3) if attention: self.attention = ContextBlock(out_channels, max(1.0 / 16, 16.0 / out_channels)) else: self.attention = None self.relu = nn.ReLU(inplace=True) self.downsample = downsample
def __init__(self, block, inplanes, planes, num_blocks, stride=1, avg_down=False, conv_cfg=None, norm_cfg=dict(type='BN'), downsample_first=True, **kwargs): self.block = block downsample = None if stride != 1 or inplanes != planes * block.expansion: downsample = [] conv_stride = stride if avg_down: conv_stride = 1 downsample.append( nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False)) downsample.extend([ build_conv_layer(conv_cfg, inplanes, planes * block.expansion, kernel_size=1, stride=conv_stride, bias=False), build_norm_layer(norm_cfg, planes * block.expansion)[1] ]) downsample = nn.Sequential(*downsample) layers = [] if downsample_first: layers.append( block(inplanes=inplanes, planes=planes, stride=stride, downsample=downsample, conv_cfg=conv_cfg, norm_cfg=norm_cfg, **kwargs)) inplanes = planes * block.expansion for _ in range(1, num_blocks): layers.append( block(inplanes=inplanes, planes=planes, stride=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, **kwargs)) else: # downsample_first=False is for HourglassModule for _ in range(num_blocks - 1): layers.append( block(inplanes=inplanes, planes=inplanes, stride=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, **kwargs)) layers.append( block(inplanes=inplanes, planes=planes, stride=stride, downsample=downsample, conv_cfg=conv_cfg, norm_cfg=norm_cfg, **kwargs)) super(ResLayer, self).__init__(*layers)
def __init__(self, depth, in_channels=3, stem_channels=64, base_channels=64, num_stages=4, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1), out_indices=(0, 1, 2, 3), style='pytorch', deep_stem=False, avg_down=False, frozen_stages=-1, conv_cfg=None, norm_cfg=dict(type='BN', requires_grad=True), norm_eval=True, dcn=None, stage_with_dcn=(False, False, False, False), plugins=None, with_cp=False, zero_init_residual=True): super(DB_ResNet, self).__init__() if depth not in self.arch_settings: raise KeyError(f'invalid depth {depth} for resnet') self.depth = depth self.stem_channels = stem_channels self.base_channels = base_channels self.num_stages = num_stages assert num_stages >= 1 and num_stages <= 4 self.strides = strides self.dilations = dilations assert len(strides) == len(dilations) == num_stages self.out_indices = out_indices assert max(out_indices) < num_stages self.style = style self.deep_stem = deep_stem self.avg_down = avg_down self.frozen_stages = frozen_stages self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.with_cp = with_cp self.norm_eval = norm_eval self.dcn = dcn self.stage_with_dcn = stage_with_dcn if dcn is not None: assert len(stage_with_dcn) == num_stages self.plugins = plugins self.zero_init_residual = zero_init_residual self.block, stage_blocks = self.arch_settings[depth] self.stage_blocks = stage_blocks[:num_stages] self.inplanes = stem_channels self._make_stem_layer(in_channels, stem_channels) self.res_layers = [] ### add db modules self.second_res_layers = [] self.eb_second_conv1 = nn.Conv2d(256, 64, kernel_size=1, stride=1, padding=0, dilation=1, bias=False) self.eb_second_conv2 = nn.Conv2d(512, 256, kernel_size=1, stride=1, padding=0, dilation=1, bias=False) self.eb_second_conv3 = nn.Conv2d(1024, 512, kernel_size=1, stride=1, padding=0, dilation=1, bias=False) self.eb_second_conv4 = nn.Conv2d(2048, 1024, kernel_size=1, stride=1, padding=0, dilation=1, bias=False) ######### add bn for db normalize = self.norm_cfg self.eb_second_bn1_name, eb_second_bn1 = build_norm_layer( normalize, 64, postfix='eb_second_bn1') self.add_module(self.eb_second_bn1_name, eb_second_bn1) self.eb_second_bn2_name, eb_second_bn2 = build_norm_layer( normalize, 256, postfix='eb_second_bn2') self.add_module(self.eb_second_bn2_name, eb_second_bn2) self.eb_second_bn3_name, eb_second_bn3 = build_norm_layer( normalize, 512, postfix='eb_second_bn3') self.add_module(self.eb_second_bn3_name, eb_second_bn3) self.eb_second_bn4_name, eb_second_bn4 = build_norm_layer( normalize, 1024, postfix='eb_second_bn4') self.add_module(self.eb_second_bn4_name, eb_second_bn4) ###### upsample in db self.eb_upsample = nn.Upsample(scale_factor=2, mode='nearest') for i, num_blocks in enumerate(self.stage_blocks): stride = strides[i] dilation = dilations[i] dcn = self.dcn if self.stage_with_dcn[i] else None if plugins is not None: stage_plugins = self.make_stage_plugins(plugins, i) else: stage_plugins = None planes = base_channels * 2**i res_layer = self.make_res_layer(block=self.block, inplanes=self.inplanes, planes=planes, num_blocks=num_blocks, stride=stride, dilation=dilation, style=self.style, avg_down=self.avg_down, with_cp=with_cp, conv_cfg=conv_cfg, norm_cfg=norm_cfg, dcn=dcn, plugins=stage_plugins) second_res_layer = self.make_res_layer(block=self.block, inplanes=self.inplanes, planes=planes, num_blocks=num_blocks, stride=stride, dilation=dilation, style=self.style, avg_down=self.avg_down, with_cp=with_cp, conv_cfg=conv_cfg, norm_cfg=norm_cfg, dcn=dcn, plugins=stage_plugins) self.inplanes = planes * self.block.expansion layer_name = f'layer{i + 1}' self.add_module(layer_name, res_layer) self.res_layers.append(layer_name) #### add dual backbone second_layer_name = 'second_layer{}'.format(i + 1) self.add_module(second_layer_name, second_res_layer) self.second_res_layers.append(second_layer_name) self._freeze_stages() self.feat_dim = self.block.expansion * base_channels * 2**( len(self.stage_blocks) - 1)
def __init__(self, img_size=224, in_channels=3, embed_dims=384, num_layers=14, out_indices=-1, drop_rate=0., drop_path_rate=0., norm_cfg=dict(type='LN'), final_norm=True, with_cls_token=True, output_cls_token=True, interpolate_mode='bicubic', t2t_cfg=dict(), layer_cfgs=dict(), init_cfg=None): super(T2T_ViT, self).__init__(init_cfg) # Token-to-Token Module self.tokens_to_token = T2TModule( img_size=img_size, in_channels=in_channels, embed_dims=embed_dims, **t2t_cfg) self.patch_resolution = self.tokens_to_token.init_out_size num_patches = self.patch_resolution[0] * self.patch_resolution[1] # Set cls token if output_cls_token: assert with_cls_token is True, f'with_cls_token must be True if' \ f'set output_cls_token to True, but got {with_cls_token}' self.with_cls_token = with_cls_token self.output_cls_token = output_cls_token self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dims)) # Set position embedding self.interpolate_mode = interpolate_mode sinusoid_table = get_sinusoid_encoding( num_patches + self.num_extra_tokens, embed_dims) self.register_buffer('pos_embed', sinusoid_table) self._register_load_state_dict_pre_hook(self._prepare_pos_embed) self.drop_after_pos = nn.Dropout(p=drop_rate) if isinstance(out_indices, int): out_indices = [out_indices] assert isinstance(out_indices, Sequence), \ f'"out_indices" must be a sequence or int, ' \ f'get {type(out_indices)} instead.' for i, index in enumerate(out_indices): if index < 0: out_indices[i] = num_layers + index assert 0 <= out_indices[i] <= num_layers, \ f'Invalid out_indices {index}' self.out_indices = out_indices # stochastic depth decay rule dpr = [x for x in np.linspace(0, drop_path_rate, num_layers)] self.encoder = ModuleList() for i in range(num_layers): if isinstance(layer_cfgs, Sequence): layer_cfg = layer_cfgs[i] else: layer_cfg = deepcopy(layer_cfgs) layer_cfg = { 'embed_dims': embed_dims, 'num_heads': 6, 'feedforward_channels': 3 * embed_dims, 'drop_path_rate': dpr[i], 'qkv_bias': False, 'norm_cfg': norm_cfg, **layer_cfg } layer = T2TTransformerLayer(**layer_cfg) self.encoder.append(layer) self.final_norm = final_norm if final_norm: self.norm = build_norm_layer(norm_cfg, embed_dims)[1] else: self.norm = nn.Identity()