Esempio n. 1
0
    def __init__(self,
                 arch='b',
                 img_size=224,
                 patch_size=16,
                 in_channels=3,
                 ffn_ratio=4,
                 qkv_bias=False,
                 drop_rate=0.,
                 attn_drop_rate=0.,
                 drop_path_rate=0.,
                 act_cfg=dict(type='GELU'),
                 norm_cfg=dict(type='LN'),
                 first_stride=4,
                 num_fcs=2,
                 init_cfg=[
                     dict(type='TruncNormal', layer='Linear', std=.02),
                     dict(type='Constant', layer='LayerNorm', val=1., bias=0.)
                 ]):
        super(TNT, self).__init__(init_cfg=init_cfg)

        if isinstance(arch, str):
            arch = arch.lower()
            assert arch in set(self.arch_zoo), \
                f'Arch {arch} is not in default archs {set(self.arch_zoo)}'
            self.arch_settings = self.arch_zoo[arch]
        else:
            essential_keys = {
                'embed_dims_outer', 'embed_dims_inner', 'num_layers',
                'num_heads_inner', 'num_heads_outer'
            }
            assert isinstance(arch, dict) and set(arch) == essential_keys, \
                f'Custom arch needs a dict with keys {essential_keys}'
            self.arch_settings = arch

        self.embed_dims_inner = self.arch_settings['embed_dims_inner']
        self.embed_dims_outer = self.arch_settings['embed_dims_outer']
        # embed_dims for consistency with other models
        self.embed_dims = self.embed_dims_outer
        self.num_layers = self.arch_settings['num_layers']
        self.num_heads_inner = self.arch_settings['num_heads_inner']
        self.num_heads_outer = self.arch_settings['num_heads_outer']

        self.pixel_embed = PixelEmbed(img_size=img_size,
                                      patch_size=patch_size,
                                      in_channels=in_channels,
                                      embed_dims_inner=self.embed_dims_inner,
                                      stride=first_stride)
        num_patches = self.pixel_embed.num_patches
        self.num_patches = num_patches
        new_patch_size = self.pixel_embed.new_patch_size
        num_pixel = new_patch_size[0] * new_patch_size[1]

        self.norm1_proj = build_norm_layer(norm_cfg, num_pixel *
                                           self.embed_dims_inner)[1]
        self.projection = nn.Linear(num_pixel * self.embed_dims_inner,
                                    self.embed_dims_outer)
        self.norm2_proj = build_norm_layer(norm_cfg, self.embed_dims_outer)[1]

        self.cls_token = nn.Parameter(torch.zeros(1, 1, self.embed_dims_outer))
        self.patch_pos = nn.Parameter(
            torch.zeros(1, num_patches + 1, self.embed_dims_outer))
        self.pixel_pos = nn.Parameter(
            torch.zeros(1, self.embed_dims_inner, new_patch_size[0],
                        new_patch_size[1]))
        self.drop_after_pos = nn.Dropout(p=drop_rate)

        dpr = [
            x.item()
            for x in torch.linspace(0, drop_path_rate, self.num_layers)
        ]  # stochastic depth decay rule
        self.layers = ModuleList()
        for i in range(self.num_layers):
            block_cfg = dict(ffn_ratio=ffn_ratio,
                             drop_rate=drop_rate,
                             attn_drop_rate=attn_drop_rate,
                             drop_path_rate=dpr[i],
                             num_fcs=num_fcs,
                             qkv_bias=qkv_bias,
                             norm_cfg=norm_cfg,
                             batch_first=True)
            self.layers.append(
                TnTLayer(num_pixel=num_pixel,
                         embed_dims_inner=self.embed_dims_inner,
                         embed_dims_outer=self.embed_dims_outer,
                         num_heads_inner=self.num_heads_inner,
                         num_heads_outer=self.num_heads_outer,
                         inner_block_cfg=block_cfg,
                         outer_block_cfg=block_cfg,
                         norm_cfg=norm_cfg))

        self.norm = build_norm_layer(norm_cfg, self.embed_dims_outer)[1]

        trunc_normal_(self.cls_token, std=.02)
        trunc_normal_(self.patch_pos, std=.02)
        trunc_normal_(self.pixel_pos, std=.02)
Esempio n. 2
0
    def __init__(self,
                 block,
                 num_blocks,
                 in_channels,
                 out_channels,
                 expansion=None,
                 stride=1,
                 avg_down=False,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 downsample_first=True,
                 kernel_size=3,
                 groups=1,
                 attention=False,
                 **kwargs):
        # Protect mutable default arguments
        norm_cfg = copy.deepcopy(norm_cfg)
        self.block = block
        self.expansion = get_expansion(block, expansion)

        downsample = None
        if stride != 1 or in_channels != out_channels:
            downsample = []
            conv_stride = stride
            if avg_down and stride != 1:
                conv_stride = 1
                downsample.append(
                    nn.AvgPool2d(kernel_size=stride,
                                 stride=stride,
                                 ceil_mode=True,
                                 count_include_pad=False))
            downsample.extend([
                build_conv_layer(conv_cfg,
                                 in_channels,
                                 out_channels,
                                 kernel_size=1,
                                 stride=conv_stride,
                                 bias=False),
                build_norm_layer(norm_cfg, out_channels)[1]
            ])
            downsample = nn.Sequential(*downsample)

        layers = []
        if downsample_first:
            layers.append(
                block(in_channels=in_channels,
                      out_channels=out_channels,
                      expansion=self.expansion,
                      stride=stride,
                      downsample=downsample,
                      conv_cfg=conv_cfg,
                      norm_cfg=norm_cfg,
                      kernel_size=kernel_size,
                      groups=groups,
                      attention=attention,
                      **kwargs))
            in_channels = out_channels
            for _ in range(1, num_blocks):
                layers.append(
                    block(in_channels=in_channels,
                          out_channels=out_channels,
                          expansion=self.expansion,
                          stride=1,
                          conv_cfg=conv_cfg,
                          norm_cfg=norm_cfg,
                          kernel_size=kernel_size,
                          groups=groups,
                          attention=attention,
                          **kwargs))
        else:  # downsample_first=False is for HourglassModule
            for i in range(0, num_blocks - 1):
                layers.append(
                    block(in_channels=in_channels,
                          out_channels=in_channels,
                          expansion=self.expansion,
                          stride=1,
                          conv_cfg=conv_cfg,
                          norm_cfg=norm_cfg,
                          kernel_size=kernel_size,
                          groups=groups,
                          attention=attention,
                          **kwargs))
            layers.append(
                block(in_channels=in_channels,
                      out_channels=out_channels,
                      expansion=self.expansion,
                      stride=stride,
                      downsample=downsample,
                      conv_cfg=conv_cfg,
                      norm_cfg=norm_cfg,
                      kernel_size=kernel_size,
                      groups=groups,
                      attention=attention,
                      **kwargs))

        super().__init__(*layers)
Esempio n. 3
0
    def __init__(self,
                 inplanes,
                 planes,
                 scales=4,
                 base_width=26,
                 base_channels=64,
                 stage_type='normal',
                 sac=None,
                 **kwargs):
        """Bottle2neck block for Res2Net.

        If style is "pytorch", the stride-two layer is the 3x3 conv layer, if
        it is "caffe", the stride-two layer is the first 1x1 conv layer.
        """
        super(Bottle2neck, self).__init__(inplanes, planes, **kwargs)
        assert scales > 1, 'Res2Net degenerates to ResNet when scales = 1.'
        assert sac is None or isinstance(sac, dict)
        self.sac = sac
        self.with_sac = sac is not None

        width = int(math.floor(self.planes * (base_width / base_channels)))

        self.norm1_name, norm1 = build_norm_layer(self.norm_cfg,
                                                  width * scales,
                                                  postfix=1)
        self.norm3_name, norm3 = build_norm_layer(self.norm_cfg,
                                                  self.planes * self.expansion,
                                                  postfix=3)

        self.conv1 = build_conv_layer(self.conv_cfg,
                                      self.inplanes,
                                      width * scales,
                                      kernel_size=1,
                                      stride=self.conv1_stride,
                                      bias=False)
        self.add_module(self.norm1_name, norm1)

        if stage_type == 'stage' and self.conv2_stride != 1:
            self.pool = nn.AvgPool2d(kernel_size=3,
                                     stride=self.conv2_stride,
                                     padding=1)
        convs = []
        bns = []

        fallback_on_stride = False
        if self.with_dcn:
            fallback_on_stride = self.dcn.pop('fallback_on_stride', False)

        if self.with_sac:
            for i in range(scales - 1):
                convs.append(
                    build_conv_layer(self.sac,
                                     width,
                                     width,
                                     kernel_size=3,
                                     stride=self.conv2_stride,
                                     padding=self.dilation,
                                     dilation=self.dilation,
                                     bias=False))
                bns.append(
                    build_norm_layer(self.norm_cfg, width, postfix=i + 1)[1])
            self.convs = nn.ModuleList(convs)
            self.bns = nn.ModuleList(bns)
        elif not self.with_dcn or fallback_on_stride:
            for i in range(scales - 1):
                convs.append(
                    build_conv_layer(self.conv_cfg,
                                     width,
                                     width,
                                     kernel_size=3,
                                     stride=self.conv2_stride,
                                     padding=self.dilation,
                                     dilation=self.dilation,
                                     bias=False))
                bns.append(
                    build_norm_layer(self.norm_cfg, width, postfix=i + 1)[1])
            self.convs = nn.ModuleList(convs)
            self.bns = nn.ModuleList(bns)
        else:
            assert self.conv_cfg is None, 'conv_cfg must be None for DCN'
            for i in range(scales - 1):
                convs.append(
                    build_conv_layer(self.dcn,
                                     width,
                                     width,
                                     kernel_size=3,
                                     stride=self.conv2_stride,
                                     padding=self.dilation,
                                     dilation=self.dilation,
                                     bias=False))
                bns.append(
                    build_norm_layer(self.norm_cfg, width, postfix=i + 1)[1])
            self.convs = nn.ModuleList(convs)
            self.bns = nn.ModuleList(bns)

        self.conv3 = build_conv_layer(self.conv_cfg,
                                      width * scales,
                                      self.planes * self.expansion,
                                      kernel_size=1,
                                      bias=False)
        self.add_module(self.norm3_name, norm3)

        self.stage_type = stage_type
        self.scales = scales
        self.width = width
        delattr(self, 'conv2')
        delattr(self, self.norm2_name)
Esempio n. 4
0
    def __init__(self,
                 img_size=(224, 224),
                 patch_size=16,
                 in_channels=3,
                 embed_dim=768,
                 depth=12,
                 num_heads=12,
                 mlp_ratio=4,
                 out_indices=11,
                 qkv_bias=True,
                 qk_scale=None,
                 drop_rate=0.,
                 attn_drop_rate=0.,
                 drop_path_rate=0.,
                 norm_cfg=dict(type='LN', eps=1e-6, requires_grad=True),
                 act_cfg=dict(type='GELU'),
                 norm_eval=False,
                 final_norm=False,
                 out_shape='NCHW',
                 with_cls_token=True,
                 interpolate_mode='bicubic',
                 with_cp=False):
        super(VisionTransformer, self).__init__()
        self.img_size = img_size
        self.patch_size = patch_size
        self.features = self.embed_dim = embed_dim
        self.patch_embed = PatchEmbed(img_size=img_size,
                                      patch_size=patch_size,
                                      in_channels=in_channels,
                                      embed_dim=embed_dim)

        self.with_cls_token = with_cls_token
        self.cls_token = nn.Parameter(torch.zeros(1, 1, self.embed_dim))
        self.pos_embed = nn.Parameter(
            torch.zeros(1, self.patch_embed.num_patches + 1, embed_dim))
        self.pos_drop = nn.Dropout(p=drop_rate)

        if isinstance(out_indices, int):
            self.out_indices = [out_indices]
        elif isinstance(out_indices, list) or isinstance(out_indices, tuple):
            self.out_indices = out_indices
        else:
            raise TypeError('out_indices must be type of int, list or tuple')

        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)
               ]  # stochastic depth decay rule
        self.blocks = nn.ModuleList([
            Block(dim=embed_dim,
                  num_heads=num_heads,
                  mlp_ratio=mlp_ratio,
                  qkv_bias=qkv_bias,
                  qk_scale=qk_scale,
                  drop=dpr[i],
                  attn_drop=attn_drop_rate,
                  act_cfg=act_cfg,
                  norm_cfg=norm_cfg,
                  with_cp=with_cp) for i in range(depth)
        ])

        assert out_shape in ['NLC',
                             'NCHW'], 'output shape must be "NLC" or "NCHW".'

        self.out_shape = out_shape

        self.interpolate_mode = interpolate_mode
        self.final_norm = final_norm
        if final_norm:
            _, self.norm = build_norm_layer(norm_cfg, embed_dim)

        self.norm_eval = norm_eval
        self.with_cp = with_cp
Esempio n. 5
0
 def __init__(self, *args, norm_cfg=dict(type='LN'), **kwargs):
     super().__init__(*args, **kwargs)
     self.norm = build_norm_layer(norm_cfg, self.embed_dims)[1]
Esempio n. 6
0
    def _make_fuse_layers(self):
        """Make fuse layer."""
        if self.num_branches == 1:
            return None

        num_branches = self.num_branches
        in_channels = self.in_channels
        fuse_layers = []
        num_out_branches = num_branches if self.multiscale_output else 1
        for i in range(num_out_branches):
            fuse_layer = []
            for j in range(num_branches):
                if j > i:
                    fuse_layer.append(
                        nn.Sequential(
                            build_conv_layer(self.conv_cfg,
                                             in_channels[j],
                                             in_channels[i],
                                             kernel_size=1,
                                             stride=1,
                                             padding=0,
                                             bias=False),
                            build_norm_layer(self.norm_cfg, in_channels[i])[1],
                            nn.Upsample(scale_factor=2**(j - i),
                                        mode='nearest')))
                    # GhostModule(
                    #             in_channels[j],
                    #             in_channels[i],
                    #             kernel_size=1,
                    #             ratio=2,
                    #             dw_size=3,
                    #             stride=1,
                    #             relu=False), ##已经包括了BN和relu(可选)
                    # nn.Upsample(
                    #         scale_factor=2**(j - i), mode='nearest')))
                elif j == i:
                    fuse_layer.append(None)
                else:  #j < i
                    conv_downsamples = []
                    for k in range(i - j):
                        if k == i - j - 1:
                            conv_downsamples.append(
                                nn.Sequential(
                                    build_conv_layer(self.conv_cfg,
                                                     in_channels[j],
                                                     in_channels[j],
                                                     kernel_size=3,
                                                     stride=2,
                                                     padding=1,
                                                     groups=in_channels[j],
                                                     bias=False),
                                    build_norm_layer(self.norm_cfg,
                                                     in_channels[j])[1],
                                    build_conv_layer(self.conv_cfg,
                                                     in_channels[j],
                                                     in_channels[i],
                                                     kernel_size=1,
                                                     stride=1,
                                                     padding=0,
                                                     bias=False),
                                    build_norm_layer(self.norm_cfg,
                                                     in_channels[i])[1]))
                            # GhostModule(
                            #     in_channels[j],
                            #     in_channels[i],
                            #     kernel_size=1,
                            #     ratio=2,
                            #     dw_size=3,
                            #     stride=1,
                            #     relu=False))) ##已经包括了BN和relu(可选)
                        else:
                            conv_downsamples.append(
                                nn.Sequential(
                                    build_conv_layer(self.conv_cfg,
                                                     in_channels[j],
                                                     in_channels[j],
                                                     kernel_size=3,
                                                     stride=2,
                                                     padding=1,
                                                     groups=in_channels[j],
                                                     bias=False),
                                    build_norm_layer(self.norm_cfg,
                                                     in_channels[j])[1],
                                    build_conv_layer(self.conv_cfg,
                                                     in_channels[j],
                                                     in_channels[j],
                                                     kernel_size=1,
                                                     stride=1,
                                                     padding=0,
                                                     bias=False),
                                    build_norm_layer(self.norm_cfg,
                                                     in_channels[j])[1],
                                    nn.ReLU(inplace=True)))
                            # GhostModule(
                            #     in_channels[j],
                            #     in_channels[j],
                            #     kernel_size=1,
                            #     ratio=2,
                            #     dw_size=3,
                            #     stride=1,
                            #     relu=True))) ##已经包括了BN和relu(可选)
                    fuse_layer.append(nn.Sequential(*conv_downsamples))
            fuse_layers.append(nn.ModuleList(fuse_layer))

        return nn.ModuleList(fuse_layers)
Esempio n. 7
0
    def _make_fuse_layers(self):
        """Make fuse layer."""
        if self.num_branches == 1:
            return None

        num_branches = self.num_branches
        in_channels = self.in_channels
        fuse_layers = []
        num_out_branches = num_branches if self.multiscale_output else 1

        for i in range(num_out_branches):
            fuse_layer = []
            for j in range(num_branches):
                if j > i:
                    fuse_layer.append(
                        nn.Sequential(
                            build_conv_layer(
                                self.conv_cfg,
                                in_channels[j],
                                in_channels[i],
                                kernel_size=1,
                                stride=1,
                                padding=0,
                                bias=False),
                            build_norm_layer(self.norm_cfg, in_channels[i])[1],
                            nn.Upsample(
                                scale_factor=2**(j - i),
                                mode=self.upsample_cfg['mode'],
                                align_corners=self.
                                upsample_cfg['align_corners'])))
                elif j == i:
                    fuse_layer.append(None)
                else:
                    conv_downsamples = []
                    for k in range(i - j):
                        if k == i - j - 1:
                            conv_downsamples.append(
                                nn.Sequential(
                                    build_conv_layer(
                                        self.conv_cfg,
                                        in_channels[j],
                                        in_channels[i],
                                        kernel_size=3,
                                        stride=2,
                                        padding=1,
                                        bias=False),
                                    build_norm_layer(self.norm_cfg,
                                                     in_channels[i])[1]))
                        else:
                            conv_downsamples.append(
                                nn.Sequential(
                                    build_conv_layer(
                                        self.conv_cfg,
                                        in_channels[j],
                                        in_channels[j],
                                        kernel_size=3,
                                        stride=2,
                                        padding=1,
                                        bias=False),
                                    build_norm_layer(self.norm_cfg,
                                                     in_channels[j])[1],
                                    nn.ReLU(inplace=True)))
                    fuse_layer.append(nn.Sequential(*conv_downsamples))
            fuse_layers.append(nn.ModuleList(fuse_layer))

        return nn.ModuleList(fuse_layers)
Esempio n. 8
0
    def __init__(self,
                 num_classes=80,
                 num_ffn_fcs=2,
                 num_heads=8,
                 num_cls_fcs=1,
                 num_reg_fcs=3,
                 feedforward_channels=2048,
                 in_channels=256,
                 dropout=0.0,
                 ffn_act_cfg=dict(type='ReLU', inplace=True),
                 dynamic_conv_cfg=dict(
                     type='DynamicConv',
                     in_channels=256,
                     feat_channels=64,
                     out_channels=256,
                     input_feat_shape=7,
                     act_cfg=dict(type='ReLU', inplace=True),
                     norm_cfg=dict(type='LN')),
                 loss_iou=dict(type='GIoULoss', loss_weight=2.0),
                 **kwargs):
        super(DIIHead, self).__init__(
            num_classes=num_classes,
            reg_decoded_bbox=True,
            reg_class_agnostic=True,
            **kwargs)
        self.loss_iou = build_loss(loss_iou)
        self.in_channels = in_channels
        self.fp16_enabled = False
        self.attention = MultiheadAttention(in_channels, num_heads, dropout)
        self.attention_norm = build_norm_layer(dict(type='LN'), in_channels)[1]

        self.instance_interactive_conv = build_transformer(dynamic_conv_cfg)
        self.instance_interactive_conv_dropout = nn.Dropout(dropout)
        self.instance_interactive_conv_norm = build_norm_layer(
            dict(type='LN'), in_channels)[1]

        self.ffn = FFN(
            in_channels,
            feedforward_channels,
            num_ffn_fcs,
            act_cfg=ffn_act_cfg,
            dropout=dropout)
        self.ffn_norm = build_norm_layer(dict(type='LN'), in_channels)[1]

        self.cls_fcs = nn.ModuleList()
        for _ in range(num_cls_fcs):
            self.cls_fcs.append(
                nn.Linear(in_channels, in_channels, bias=False))
            self.cls_fcs.append(
                build_norm_layer(dict(type='LN'), in_channels)[1])
            self.cls_fcs.append(
                build_activation_layer(dict(type='ReLU', inplace=True)))

        # over load the self.fc_cls in BBoxHead
        if self.loss_cls.use_sigmoid:
            self.fc_cls = nn.Linear(in_channels, self.num_classes)
        else:
            self.fc_cls = nn.Linear(in_channels, self.num_classes + 1)

        self.reg_fcs = nn.ModuleList()
        for _ in range(num_reg_fcs):
            self.reg_fcs.append(
                nn.Linear(in_channels, in_channels, bias=False))
            self.reg_fcs.append(
                build_norm_layer(dict(type='LN'), in_channels)[1])
            self.reg_fcs.append(
                build_activation_layer(dict(type='ReLU', inplace=True)))
        # over load the self.fc_cls in BBoxHead
        self.fc_reg = nn.Linear(in_channels, 4)

        assert self.reg_class_agnostic, 'DIIHead only ' \
            'suppport `reg_class_agnostic=True` '
        assert self.reg_decoded_bbox, 'DIIHead only ' \
            'suppport `reg_decoded_bbox=True`'
Esempio n. 9
0
    def __init__(self,
                 block,
                 inplanes,
                 planes,
                 num_blocks,
                 stride=1,
                 dilation=1,
                 avg_down=False,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 downsample_first=True,
                 **kwargs):
        self.block = block

        if stride != 1 or inplanes != planes * block.expansion:
            if stride == 1 and dilation == 1:
                kernel_size = 1
                dd = 1
                padding = 0
            else:
                kernel_size = 3
                if dilation > 1:
                    dd = dilation // 2
                    padding = dd
                else:
                    dd = 1
                    padding = 0
            downsample = nn.Sequential(
                build_conv_layer(
                    conv_cfg,
                    inplanes,
                    planes * block.expansion,
                    kernel_size=kernel_size,
                    stride=stride,
                    padding=padding,
                    dilation=dd,
                    bias=False),
                build_norm_layer(norm_cfg, planes * block.expansion)[1])

        layers = []
        if downsample_first:
            layers.append(
                block(
                    inplanes=inplanes,
                    planes=planes,
                    stride=stride,
                    dilation=dilation,
                    downsample=downsample,
                    conv_cfg=conv_cfg,
                    norm_cfg=norm_cfg,
                    **kwargs))
            inplanes = planes * block.expansion
            for _ in range(1, num_blocks):
                layers.append(
                    block(
                        inplanes=inplanes,
                        planes=planes,
                        stride=1,
                        dilation=dilation,
                        conv_cfg=conv_cfg,
                        norm_cfg=norm_cfg,
                        **kwargs))

        else:  # downsample_first=False is for HourglassModule
            for _ in range(num_blocks - 1):
                layers.append(
                    block(
                        inplanes=inplanes,
                        planes=inplanes,
                        stride=1,
                        dilation=dilation,
                        conv_cfg=conv_cfg,
                        norm_cfg=norm_cfg,
                        **kwargs))
            layers.append(
                block(
                    inplanes=inplanes,
                    planes=planes,
                    stride=stride,
                    dilation=dilation,
                    downsample=downsample,
                    conv_cfg=conv_cfg,
                    norm_cfg=norm_cfg,
                    **kwargs))
        super(SOTResLayer, self).__init__(*layers)
    def __init__(self,
                 in_channels,
                 out_channels,
                 num_deconv_layers=3,
                 num_deconv_filters=(256, 256, 256),
                 num_deconv_kernels=(4, 4, 4),
                 extra=None,
                 in_index=0,
                 input_transform=None,
                 align_corners=False,
                 loss_keypoint=None,
                 train_cfg=None,
                 test_cfg=None):
        super().__init__()

        self.in_channels = in_channels
        self.loss = build_loss(loss_keypoint)

        self.train_cfg = {} if train_cfg is None else train_cfg
        self.test_cfg = {} if test_cfg is None else test_cfg
        self.target_type = self.test_cfg.get('target_type', 'GaussianHeatMap')

        self._init_inputs(in_channels, in_index, input_transform)
        self.in_index = in_index
        self.align_corners = align_corners

        if extra is not None and not isinstance(extra, dict):
            raise TypeError('extra should be dict or None.')

        if num_deconv_layers > 0:
            self.deconv_layers = self._make_deconv_layer(
                num_deconv_layers,
                num_deconv_filters,
                num_deconv_kernels,
            )
        elif num_deconv_layers == 0:
            self.deconv_layers = nn.Identity()
        else:
            raise ValueError(
                f'num_deconv_layers ({num_deconv_layers}) should >= 0.')

        identity_final_layer = False
        if extra is not None and 'final_conv_kernel' in extra:
            assert extra['final_conv_kernel'] in [0, 1, 3]
            if extra['final_conv_kernel'] == 3:
                padding = 1
            elif extra['final_conv_kernel'] == 1:
                padding = 0
            else:
                # 0 for Identity mapping.
                identity_final_layer = True
            kernel_size = extra['final_conv_kernel']
        else:
            kernel_size = 1
            padding = 0

        if identity_final_layer:
            self.final_layer = nn.Identity()
        else:
            conv_channels = num_deconv_filters[
                -1] if num_deconv_layers > 0 else self.in_channels

            layers = []
            if extra is not None:
                num_conv_layers = extra.get('num_conv_layers', 0)
                num_conv_kernels = extra.get('num_conv_kernels',
                                             [1] * num_conv_layers)

                for i in range(num_conv_layers):
                    layers.append(
                        build_conv_layer(
                            dict(type='Conv2d'),
                            in_channels=conv_channels,
                            out_channels=conv_channels,
                            kernel_size=num_conv_kernels[i],
                            stride=1,
                            padding=(num_conv_kernels[i] - 1) // 2))
                    layers.append(
                        build_norm_layer(dict(type='BN'), conv_channels)[1])
                    layers.append(nn.ReLU(inplace=True))

            layers.append(
                build_conv_layer(
                    cfg=dict(type='Conv2d'),
                    in_channels=conv_channels,
                    out_channels=out_channels,
                    kernel_size=kernel_size,
                    stride=1,
                    padding=padding))

            if len(layers) > 1:
                self.final_layer = nn.Sequential(*layers)
            else:
                self.final_layer = layers[0]
    def __init__(self,
                 extra,
                 in_channels=3,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 norm_eval=True,
                 with_cp=False,
                 zero_init_residual=False):
        super(HRNet, self).__init__()
        self.extra = extra
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.norm_eval = norm_eval
        self.with_cp = with_cp
        self.zero_init_residual = zero_init_residual

        # stem net
        self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1)
        self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, 64, postfix=2)

        self.conv1 = build_conv_layer(
            self.conv_cfg,
            in_channels,
            64,
            kernel_size=3,
            stride=2,
            padding=1,
            bias=False)

        self.add_module(self.norm1_name, norm1)
        self.conv2 = build_conv_layer(
            self.conv_cfg,
            64,
            64,
            kernel_size=3,
            stride=2,
            padding=1,
            bias=False)

        self.add_module(self.norm2_name, norm2)
        self.relu = nn.ReLU(inplace=True)

        # stage 1
        self.stage1_cfg = self.extra['stage1']
        num_channels = self.stage1_cfg['num_channels'][0]
        block_type = self.stage1_cfg['block']
        num_blocks = self.stage1_cfg['num_blocks'][0]

        block = self.blocks_dict[block_type]
        stage1_out_channels = num_channels * block.expansion
        self.layer1 = self._make_layer(block, 64, num_channels, num_blocks)

        # stage 2
        self.stage2_cfg = self.extra['stage2']
        num_channels = self.stage2_cfg['num_channels']
        block_type = self.stage2_cfg['block']

        block = self.blocks_dict[block_type]
        num_channels = [channel * block.expansion for channel in num_channels]
        self.transition1 = self._make_transition_layer([stage1_out_channels],
                                                       num_channels)
        self.stage2, pre_stage_channels = self._make_stage(
            self.stage2_cfg, num_channels)

        # stage 3
        self.stage3_cfg = self.extra['stage3']
        num_channels = self.stage3_cfg['num_channels']
        block_type = self.stage3_cfg['block']

        block = self.blocks_dict[block_type]
        num_channels = [channel * block.expansion for channel in num_channels]
        self.transition2 = self._make_transition_layer(pre_stage_channels,
                                                       num_channels)
        self.stage3, pre_stage_channels = self._make_stage(
            self.stage3_cfg, num_channels)

        # stage 4
        self.stage4_cfg = self.extra['stage4']
        num_channels = self.stage4_cfg['num_channels']
        block_type = self.stage4_cfg['block']

        block = self.blocks_dict[block_type]
        num_channels = [channel * block.expansion for channel in num_channels]
        self.transition3 = self._make_transition_layer(pre_stage_channels,
                                                       num_channels)
        self.stage4, pre_stage_channels = self._make_stage(
            self.stage4_cfg, num_channels)
Esempio n. 12
0
    def __init__(self,
                 inplanes,
                 planes,
                 groups=1,
                 base_width=4,
                 base_channels=64,
                 radix=2,
                 reduction_factor=4,
                 avd=True,
                 **kwargs):
        """Bottleneck block for ResNeSt.
        If style is "pytorch", the stride-two layer is the 3x3 conv layer, if
        it is "caffe", the stride-two layer is the first 1x1 conv layer.
        """
        super(Bottleneck, self).__init__(inplanes, planes, **kwargs)

        if groups == 1:
            width = self.planes
        else:
            width = math.floor(self.planes *
                               (base_width / base_channels)) * groups

        self.avd = avd and self.conv2_stride > 1

        self.norm1_name, norm1 = build_norm_layer(self.norm_cfg,
                                                  width,
                                                  postfix=1)
        self.norm3_name, norm3 = build_norm_layer(self.norm_cfg,
                                                  self.planes * self.expansion,
                                                  postfix=3)

        self.conv1 = build_conv_layer(self.conv_cfg,
                                      self.inplanes,
                                      width,
                                      kernel_size=1,
                                      stride=self.conv1_stride,
                                      bias=False)
        self.add_module(self.norm1_name, norm1)
        self.with_modulated_dcn = False
        self.conv2 = SplAtConv2d(width,
                                 width,
                                 kernel_size=3,
                                 stride=1 if self.avd else self.conv2_stride,
                                 padding=self.dilation,
                                 dilation=self.dilation,
                                 groups=groups,
                                 radix=radix,
                                 reduction_factor=reduction_factor,
                                 bias=False,
                                 conv_cfg=self.conv_cfg,
                                 norm_cfg=self.norm_cfg,
                                 dcn=self.dcn)
        delattr(self, self.norm2_name)

        if self.avd:
            self.avd_layer = nn.AvgPool2d(3, self.conv2_stride, padding=1)

        self.conv3 = build_conv_layer(self.conv_cfg,
                                      width,
                                      self.planes * self.expansion,
                                      kernel_size=1,
                                      bias=False)
        self.add_module(self.norm3_name, norm3)
    def __init__(self,
                 inplanes,
                 planes,
                 groups=1,
                 base_width=4,
                 base_channels=64,
                 **kwargs):
        super(Bottleneck, self).__init__(inplanes, planes, **kwargs)

        if groups == 1:
            width = self.planes
        else:
            width = math.floor(self.planes *
                               (base_width / base_channels)) * groups

        self.norm1_name, norm1 = build_norm_layer(self.norm_cfg,
                                                  width,
                                                  postfix=1)
        self.norm2_name, norm2 = build_norm_layer(self.norm_cfg,
                                                  width,
                                                  postfix=2)
        self.norm3_name, norm3 = build_norm_layer(self.norm_cfg,
                                                  self.planes * self.expansion,
                                                  postfix=3)

        self.conv1 = build_conv_layer(self.conv_cfg,
                                      self.inplanes,
                                      width,
                                      kernel_size=1,
                                      stride=self.conv1_stride,
                                      bias=False)
        self.add_module(self.norm1_name, norm1)
        fallback_on_stride = False
        self.with_modulated_dcn = False
        if self.with_dcn:
            fallback_on_stride = self.dcn.pop('fallback_on_stride', False)
        if not self.with_dcn or fallback_on_stride:
            self.conv2 = build_conv_layer(self.conv_cfg,
                                          width,
                                          width,
                                          kernel_size=3,
                                          stride=self.conv2_stride,
                                          padding=self.dilation,
                                          dilation=self.dilation,
                                          groups=groups,
                                          bias=False)
        else:
            assert self.conv_cfg is None, 'conv_cfg must be None for DCN'
            self.conv2 = build_conv_layer(self.dcn,
                                          width,
                                          width,
                                          kernel_size=3,
                                          stride=self.conv2_stride,
                                          padding=self.dilation,
                                          dilation=self.dilation,
                                          groups=groups,
                                          bias=False)

        self.add_module(self.norm2_name, norm2)
        self.conv3 = build_conv_layer(self.conv_cfg,
                                      width,
                                      self.planes * self.expansion,
                                      kernel_size=1,
                                      bias=False)
        self.add_module(self.norm3_name, norm3)
Esempio n. 14
0
    def __init__(self,
                 attn_cfgs=None,
                 ffn_cfgs=dict(
                     type='FFN',
                     embed_dims=256,
                     feedforward_channels=1024,
                     num_fcs=2,
                     ffn_drop=0.,
                     act_cfg=dict(type='ReLU', inplace=True),
                 ),
                 operation_order=None,
                 norm_cfg=dict(type='LN'),
                 init_cfg=None,
                 batch_first=False,
                 **kwargs):

        deprecated_args = dict(feedforward_channels='feedforward_channels',
                               ffn_dropout='ffn_drop',
                               ffn_num_fcs='num_fcs')
        for ori_name, new_name in deprecated_args.items():
            if ori_name in kwargs:
                warnings.warn(
                    f'The arguments `{ori_name}` in BaseTransformerLayer '
                    f'has been deprecated, now you should set `{new_name}` '
                    f'and other FFN related arguments '
                    f'to a dict named `ffn_cfgs`. ')
                ffn_cfgs[new_name] = kwargs[ori_name]

        super(BaseTransformerLayer, self).__init__(init_cfg)

        self.batch_first = batch_first

        assert set(operation_order) & set(
            ['self_attn', 'norm', 'ffn', 'cross_attn']) == \
            set(operation_order), f'The operation_order of' \
            f' {self.__class__.__name__} should ' \
            f'contains all four operation type ' \
            f"{['self_attn', 'norm', 'ffn', 'cross_attn']}"

        num_attn = operation_order.count('self_attn') + operation_order.count(
            'cross_attn')
        if isinstance(attn_cfgs, dict):
            attn_cfgs = [copy.deepcopy(attn_cfgs) for _ in range(num_attn)]
        else:
            assert num_attn == len(attn_cfgs), f'The length ' \
                f'of attn_cfg {num_attn} is ' \
                f'not consistent with the number of attention' \
                f'in operation_order {operation_order}.'

        self.num_attn = num_attn
        self.operation_order = operation_order
        self.norm_cfg = norm_cfg
        self.pre_norm = operation_order[0] == 'norm'
        self.attentions = ModuleList()

        index = 0
        for operation_name in operation_order:
            if operation_name in ['self_attn', 'cross_attn']:
                if 'batch_first' in attn_cfgs[index]:
                    assert self.batch_first == attn_cfgs[index]['batch_first']
                else:
                    attn_cfgs[index]['batch_first'] = self.batch_first
                attention = build_attention(attn_cfgs[index])
                # Some custom attentions used as `self_attn`
                # or `cross_attn` can have different behavior.
                attention.operation_name = operation_name
                self.attentions.append(attention)
                index += 1

        self.embed_dims = self.attentions[0].embed_dims

        self.ffns = ModuleList()
        num_ffns = operation_order.count('ffn')
        if isinstance(ffn_cfgs, dict):
            ffn_cfgs = ConfigDict(ffn_cfgs)
        if isinstance(ffn_cfgs, dict):
            ffn_cfgs = [copy.deepcopy(ffn_cfgs) for _ in range(num_ffns)]
        assert len(ffn_cfgs) == num_ffns
        for ffn_index in range(num_ffns):
            if 'embed_dims' not in ffn_cfgs[ffn_index]:
                ffn_cfgs['embed_dims'] = self.embed_dims
            else:
                assert ffn_cfgs[ffn_index]['embed_dims'] == self.embed_dims
            self.ffns.append(
                build_feedforward_network(ffn_cfgs[ffn_index],
                                          dict(type='FFN')))

        self.norms = ModuleList()
        num_norms = operation_order.count('norm')
        for _ in range(num_norms):
            self.norms.append(build_norm_layer(norm_cfg, self.embed_dims)[1])
Esempio n. 15
0
    def __init__(self,
                 input_shapes,
                 in_features,
                 out_channels,
                 norm="",
                 top_block=None,
                 fuse_type="sum"):
        """
        Args:
            in_features (list[str]): names of the input feature maps coming
                from the backbone to which FPN is attached. For example, if the
                backbone produces ["res2", "res3", "res4"], any *contiguous* sublist
                of these may be used; order must be from high to low resolution.
            out_channels (int): number of channels in the output feature maps.
            norm (str, dict): dictionary to construct and config norm layer.
            Example: dict(type='BN')
            top_block (nn.Module or None): if provided, an extra operation will
                be performed on the output of the last (smallest resolution)
                FPN output, and the result will extend the result list. The top_block
                further downsamples the feature map. It must have an attribute
                "num_levels", meaning the number of extra FPN levels added by
                this block, and "in_feature", which is a string representing
                its input feature (e.g., p5).
            fuse_type (str): types for fusing the top down features and the lateral
                ones. It can be "sum" (default), which sums up element-wise; or "avg",
                which takes the element-wise mean of the two.
        """
        super(FPN, self).__init__()

        # Feature map strides and channels from the bottom up network (e.g. ResNet)
        in_strides = [input_shapes[f].stride for f in in_features]
        in_channels = [input_shapes[f].channels for f in in_features]

        _assert_strides_are_log2_contiguous(in_strides)
        lateral_convs, lateral_norms = [], []
        output_convs, output_norms = [], []
        self.use_norm = norm != ""

        use_bias = norm == ""
        for idx, in_channels in enumerate(in_channels):
            lateral_conv = Conv2d(in_channels,
                                  out_channels,
                                  kernel_size=1,
                                  bias=use_bias)

            output_conv = Conv2d(out_channels,
                                 out_channels,
                                 kernel_size=3,
                                 stride=1,
                                 padding=1,
                                 bias=use_bias)

            stage = int(math.log2(in_strides[idx]))

            if self.use_norm:
                _, lateral_norm = build_norm_layer(norm, out_channels, stage)
                _, output_norm = build_norm_layer(norm, out_channels, stage)
                self.add_module("fpn_lateral_norm{}".format(stage),
                                lateral_norm)
                self.add_module("fpn_output_norm{}".format(stage), output_norm)
                lateral_norms.append(lateral_norm)
                output_norms.append(output_norm)

            self.add_module("fpn_lateral{}".format(stage), lateral_conv)
            self.add_module("fpn_output{}".format(stage), output_conv)

            lateral_convs.append(lateral_conv)
            output_convs.append(output_conv)
        # Place convs into top-down order (from low to high resolution)
        # to make the top-down computation in forward clearer.
        self.lateral_convs = lateral_convs[::-1]
        self.output_convs = output_convs[::-1]
        if self.use_norm:
            self.lateral_norms = lateral_norms[::-1]
            self.output_norms = output_norms[::-1]

        self.top_block = top_block
        self.in_features = in_features
        # Return feature names are "p<stage>", like ["p2", "p3", ..., "p6"]
        self._out_feature_strides = {
            "p{}".format(int(math.log2(s))): s
            for s in in_strides
        }
        # top block output feature maps.
        if self.top_block is not None:
            for s in range(stage, stage + self.top_block.num_levels):
                self._out_feature_strides["p{}".format(s + 1)] = 2**(s + 1)

        self._out_features = list(self._out_feature_strides.keys())
        self._out_feature_channels = {
            k: out_channels
            for k in self._out_features
        }
        self._size_divisibility = in_strides[-1]
        assert fuse_type in {"avg", "sum"}
        self._fuse_type = fuse_type
Esempio n. 16
0
    def __init__(self,
                 inplanes,
                 planes,
                 groups=1,
                 base_width=4,
                 base_channels=64,
                 **kwargs):
        """Bottleneck block for ResNeXt.

        If style is "pytorch", the stride-two layer is the 3x3 conv layer, if
        it is "caffe", the stride-two layer is the first 1x1 conv layer.
        """
        super(Bottleneck, self).__init__(inplanes, planes, **kwargs)

        if groups == 1:
            width = self.planes
        else:
            width = math.floor(self.planes *
                               (base_width / base_channels)) * groups

        self.relu1 = DyReLUB(width)
        self.relu2 = DyReLUB(width)

        self.norm1_name, norm1 = build_norm_layer(self.norm_cfg,
                                                  width,
                                                  postfix=1)
        self.norm2_name, norm2 = build_norm_layer(self.norm_cfg,
                                                  width,
                                                  postfix=2)
        self.norm3_name, norm3 = build_norm_layer(self.norm_cfg,
                                                  self.planes * self.expansion,
                                                  postfix=3)

        self.conv1 = build_conv_layer(self.conv_cfg,
                                      self.inplanes,
                                      width,
                                      kernel_size=1,
                                      stride=self.conv1_stride,
                                      bias=False)
        self.add_module(self.norm1_name, norm1)
        fallback_on_stride = False
        self.with_modulated_dcn = False
        if self.with_dcn:
            fallback_on_stride = self.dcn.pop('fallback_on_stride', False)
        if not self.with_dcn or fallback_on_stride:
            self.conv2 = build_conv_layer(self.conv_cfg,
                                          width,
                                          width,
                                          kernel_size=3,
                                          stride=self.conv2_stride,
                                          padding=self.dilation,
                                          dilation=self.dilation,
                                          groups=groups,
                                          bias=False)
        else:
            assert self.conv_cfg is None, 'conv_cfg must be None for DCN'
            self.conv2 = build_conv_layer(self.dcn,
                                          width,
                                          width,
                                          kernel_size=3,
                                          stride=self.conv2_stride,
                                          padding=self.dilation,
                                          dilation=self.dilation,
                                          groups=groups,
                                          bias=False)

        self.add_module(self.norm2_name, norm2)
        self.conv3 = build_conv_layer(self.conv_cfg,
                                      width,
                                      self.planes * self.expansion,
                                      kernel_size=1,
                                      bias=False)
        self.add_module(self.norm3_name, norm3)
Esempio n. 17
0
    def _make_transition_layer(self, num_channels_pre_layer,
                               num_channels_cur_layer):
        """Make transition layer."""
        num_branches_cur = len(num_channels_cur_layer)
        num_branches_pre = len(num_channels_pre_layer)

        transition_layers = []
        for i in range(num_branches_cur):
            if i < num_branches_pre:
                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
                    transition_layers.append(
                        nn.Sequential(
                            build_conv_layer(self.conv_cfg,
                                             num_channels_pre_layer[i],
                                             num_channels_pre_layer[i],
                                             kernel_size=3,
                                             stride=1,
                                             padding=1,
                                             groups=num_channels_pre_layer[i],
                                             bias=False),
                            build_norm_layer(self.norm_cfg,
                                             num_channels_pre_layer[i])[1],
                            build_conv_layer(self.conv_cfg,
                                             num_channels_pre_layer[i],
                                             num_channels_cur_layer[i],
                                             kernel_size=1,
                                             stride=1,
                                             padding=0,
                                             bias=False),
                            build_norm_layer(self.norm_cfg,
                                             num_channels_cur_layer[i])[1],
                            nn.ReLU()))
                else:
                    transition_layers.append(None)
            else:
                conv_downsamples = []
                for j in range(i + 1 - num_branches_pre):
                    in_channels = num_channels_pre_layer[-1]
                    out_channels = num_channels_cur_layer[i] \
                        if j == i - num_branches_pre else in_channels
                    conv_downsamples.append(
                        nn.Sequential(
                            build_conv_layer(self.conv_cfg,
                                             in_channels,
                                             in_channels,
                                             kernel_size=3,
                                             stride=2,
                                             padding=1,
                                             groups=in_channels,
                                             bias=False),
                            build_norm_layer(self.norm_cfg, in_channels)[1],
                            build_conv_layer(self.conv_cfg,
                                             in_channels,
                                             out_channels,
                                             kernel_size=1,
                                             stride=1,
                                             padding=0,
                                             bias=False),
                            build_norm_layer(self.norm_cfg, out_channels)[1],
                            nn.ReLU()))
                transition_layers.append(nn.Sequential(*conv_downsamples))

        return nn.ModuleList(transition_layers)
Esempio n. 18
0
    def __init__(self,
                 block,
                 num_blocks,
                 in_channels,
                 out_channels,
                 expansion=None,
                 shortcut_type='B',
                 stride=1,
                 avg_down=False,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 **kwargs):
        self.block = block
        self.expansion = get_expansion(block, expansion)

        downsample = None
        if isinstance(stride, int):
            stride_flag = stride != 1
        elif isinstance(stride, tuple):
            stride_flag = stride[0] != 1
        if stride_flag or in_channels != out_channels:
            downsample = []
            conv_stride = stride
            if shortcut_type == 'A':
                downsample = partial(downsample_basic_block,
                                     planes=in_channels * expansion,
                                     stride=stride)
            else:
                if avg_down and stride != 1:
                    conv_stride = 1
                    downsample.append(
                        nn.AvgPool3d(kernel_size=stride,
                                     stride=stride,
                                     ceil_mode=True,
                                     count_include_pad=False))
                downsample.extend([
                    build_conv_layer(conv_cfg,
                                     in_channels,
                                     out_channels,
                                     kernel_size=1,
                                     stride=conv_stride,
                                     bias=False),
                    build_norm_layer(norm_cfg, out_channels)[1]
                ])
                downsample = nn.Sequential(*downsample)

        layers = []
        layers.append(
            block(in_channels=in_channels,
                  out_channels=out_channels,
                  expansion=self.expansion,
                  stride=stride,
                  downsample=downsample,
                  conv_cfg=conv_cfg,
                  norm_cfg=norm_cfg,
                  **kwargs))
        in_channels = out_channels
        for i in range(1, num_blocks):
            layers.append(
                block(in_channels=in_channels,
                      out_channels=out_channels,
                      expansion=self.expansion,
                      stride=1,
                      conv_cfg=conv_cfg,
                      norm_cfg=norm_cfg,
                      **kwargs))
        super(ResLayer, self).__init__(*layers)
Esempio n. 19
0
    def __init__(self,
                 img_size=224,
                 patch_size=16,
                 in_channels=3,
                 embed_dims=768,
                 num_layers=12,
                 num_heads=12,
                 mlp_ratio=4,
                 out_indices=-1,
                 qkv_bias=True,
                 drop_rate=0.,
                 attn_drop_rate=0.,
                 drop_path_rate=0.,
                 with_cls_token=True,
                 output_cls_token=False,
                 norm_cfg=dict(type='LN'),
                 act_cfg=dict(type='GELU'),
                 patch_norm=False,
                 final_norm=False,
                 interpolate_mode='bicubic',
                 num_fcs=2,
                 norm_eval=False,
                 with_cp=False,
                 pretrain_style='timm',
                 pretrained=None,
                 init_cfg=None):
        super(VisionTransformer, self).__init__()

        if isinstance(img_size, int):
            img_size = to_2tuple(img_size)
        elif isinstance(img_size, tuple):
            if len(img_size) == 1:
                img_size = to_2tuple(img_size[0])
            assert len(img_size) == 2, \
                f'The size of image should have length 1 or 2, ' \
                f'but got {len(img_size)}'

        assert pretrain_style in ['timm', 'mmcls']

        if output_cls_token:
            assert with_cls_token is True, f'with_cls_token must be True if' \
                f'set output_cls_token to True, but got {with_cls_token}'

        if isinstance(pretrained, str) or pretrained is None:
            warnings.warn('DeprecationWarning: pretrained is a deprecated, '
                          'please use "init_cfg" instead')
        else:
            raise TypeError('pretrained must be a str or None')

        self.img_size = img_size
        self.patch_size = patch_size
        self.interpolate_mode = interpolate_mode
        self.norm_eval = norm_eval
        self.with_cp = with_cp
        self.pretrain_style = pretrain_style
        self.pretrained = pretrained
        self.init_cfg = init_cfg

        self.patch_embed = PatchEmbed(
            in_channels=in_channels,
            embed_dims=embed_dims,
            conv_type='Conv2d',
            kernel_size=patch_size,
            stride=patch_size,
            pad_to_patch_size=True,
            norm_cfg=norm_cfg if patch_norm else None,
            init_cfg=None,
        )

        num_patches = (img_size[0] // patch_size) * \
            (img_size[1] // patch_size)

        self.with_cls_token = with_cls_token
        self.output_cls_token = output_cls_token
        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dims))
        self.pos_embed = nn.Parameter(
            torch.zeros(1, num_patches + 1, embed_dims))
        self.drop_after_pos = nn.Dropout(p=drop_rate)

        if isinstance(out_indices, int):
            if out_indices == -1:
                out_indices = num_layers - 1
            self.out_indices = [out_indices]
        elif isinstance(out_indices, list) or isinstance(out_indices, tuple):
            self.out_indices = out_indices
        else:
            raise TypeError('out_indices must be type of int, list or tuple')

        dpr = [
            x.item() for x in torch.linspace(0, drop_path_rate, num_layers)
        ]  # stochastic depth decay rule

        self.layers = ModuleList()
        for i in range(num_layers):
            self.layers.append(
                TransformerEncoderLayer(embed_dims=embed_dims,
                                        num_heads=num_heads,
                                        feedforward_channels=mlp_ratio *
                                        embed_dims,
                                        attn_drop_rate=attn_drop_rate,
                                        drop_rate=drop_rate,
                                        drop_path_rate=dpr[i],
                                        num_fcs=num_fcs,
                                        qkv_bias=qkv_bias,
                                        act_cfg=act_cfg,
                                        norm_cfg=norm_cfg,
                                        batch_first=True))

        self.final_norm = final_norm
        if final_norm:
            self.norm1_name, norm1 = build_norm_layer(norm_cfg,
                                                      embed_dims,
                                                      postfix=1)
            self.add_module(self.norm1_name, norm1)
Esempio n. 20
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 groups=1,
                 width_per_group=4,
                 base_channels=64,
                 radix=2,
                 reduction_factor=4,
                 avg_down_stride=True,
                 **kwargs):
        super().__init__(in_channels, out_channels, **kwargs)

        self.groups = groups
        self.width_per_group = width_per_group

        # For ResNet bottleneck, middle channels are determined by expansion
        # and out_channels, but for ResNeXt bottleneck, it is determined by
        # groups and width_per_group and the stage it is located in.
        if groups != 1:
            assert self.mid_channels % base_channels == 0
            self.mid_channels = (groups * width_per_group *
                                 self.mid_channels // base_channels)

        self.avg_down_stride = avg_down_stride and self.conv2_stride > 1

        self.norm1_name, norm1 = build_norm_layer(self.norm_cfg,
                                                  self.mid_channels,
                                                  postfix=1)
        self.norm3_name, norm3 = build_norm_layer(self.norm_cfg,
                                                  self.out_channels,
                                                  postfix=3)

        self.conv1 = build_conv_layer(self.conv_cfg,
                                      self.in_channels,
                                      self.mid_channels,
                                      kernel_size=1,
                                      stride=self.conv1_stride,
                                      bias=False)
        self.add_module(self.norm1_name, norm1)
        self.conv2 = SplitAttentionConv2d(
            self.mid_channels,
            self.mid_channels,
            kernel_size=3,
            stride=1 if self.avg_down_stride else self.conv2_stride,
            padding=self.dilation,
            dilation=self.dilation,
            groups=groups,
            radix=radix,
            reduction_factor=reduction_factor,
            conv_cfg=self.conv_cfg,
            norm_cfg=self.norm_cfg)
        delattr(self, self.norm2_name)

        if self.avg_down_stride:
            self.avd_layer = nn.AvgPool2d(3, self.conv2_stride, padding=1)

        self.conv3 = build_conv_layer(self.conv_cfg,
                                      self.mid_channels,
                                      self.out_channels,
                                      kernel_size=1,
                                      bias=False)
        self.add_module(self.norm3_name, norm3)
Esempio n. 21
0
    def __init__(self,
                 extra,
                 in_channels=3,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 norm_eval=False,
                 with_cp=False,
                 zero_init_residual=False):
        # Protect mutable default arguments
        norm_cfg = copy.deepcopy(norm_cfg)
        super().__init__()
        self.extra = extra
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.norm_eval = norm_eval
        self.with_cp = with_cp
        self.zero_init_residual = zero_init_residual

        # stem net
        self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1)
        self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, 64, postfix=2)

        self.conv1 = build_conv_layer(
            self.conv_cfg,
            in_channels,
            64,
            kernel_size=3,
            stride=2,
            padding=1,
            bias=False)

        self.add_module(self.norm1_name, norm1)
        self.conv2 = build_conv_layer(
            self.conv_cfg,
            64,
            64,
            kernel_size=3,
            stride=2,
            padding=1,
            bias=False)

        self.add_module(self.norm2_name, norm2)
        self.relu = nn.ReLU(inplace=True)

        self.upsample_cfg = self.extra.get('upsample', {
            'mode': 'nearest',
            'align_corners': None
        })

        # stage 1
        self.stage1_cfg = self.extra['stage1']
        num_channels = self.stage1_cfg['num_channels'][0]
        block_type = self.stage1_cfg['block']
        num_blocks = self.stage1_cfg['num_blocks'][0]

        block = self.blocks_dict[block_type]
        stage1_out_channels = num_channels * get_expansion(block)
        self.layer1 = self._make_layer(block, 64, stage1_out_channels,
                                       num_blocks)

        # stage 2
        self.stage2_cfg = self.extra['stage2']
        num_channels = self.stage2_cfg['num_channels']
        block_type = self.stage2_cfg['block']

        block = self.blocks_dict[block_type]
        num_channels = [
            channel * get_expansion(block) for channel in num_channels
        ]
        self.transition1 = self._make_transition_layer([stage1_out_channels],
                                                       num_channels)
        self.stage2, pre_stage_channels = self._make_stage(
            self.stage2_cfg, num_channels)

        # stage 3
        self.stage3_cfg = self.extra['stage3']
        num_channels = self.stage3_cfg['num_channels']
        block_type = self.stage3_cfg['block']

        block = self.blocks_dict[block_type]
        num_channels = [
            channel * get_expansion(block) for channel in num_channels
        ]
        self.transition2 = self._make_transition_layer(pre_stage_channels,
                                                       num_channels)
        self.stage3, pre_stage_channels = self._make_stage(
            self.stage3_cfg, num_channels)

        # stage 4
        self.stage4_cfg = self.extra['stage4']
        num_channels = self.stage4_cfg['num_channels']
        block_type = self.stage4_cfg['block']

        block = self.blocks_dict[block_type]
        num_channels = [
            channel * get_expansion(block) for channel in num_channels
        ]
        self.transition3 = self._make_transition_layer(pre_stage_channels,
                                                       num_channels)

        self.stage4, pre_stage_channels = self._make_stage(
            self.stage4_cfg,
            num_channels,
            multiscale_output=self.stage4_cfg.get('multiscale_output', False))
Esempio n. 22
0
    def __init__(self,
                 inplanes,
                 planes,
                 stride=1,
                 dilation=1,
                 downsample=None,
                 style='pytorch',
                 with_cp=False,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 dcn=None,
                 plugins=None):
        """Bottleneck block for ResNet.

        If style is "pytorch", the stride-two layer is the 3x3 conv layer, if
        it is "caffe", the stride-two layer is the first 1x1 conv layer.
        """
        super(Bottleneck, self).__init__()
        assert style in ['pytorch', 'caffe']
        assert dcn is None or isinstance(dcn, dict)
        assert plugins is None or isinstance(plugins, list)
        if plugins is not None:
            allowed_position = ['after_conv1', 'after_conv2', 'after_conv3']
            assert all(p['position'] in allowed_position for p in plugins)

        self.inplanes = inplanes
        self.planes = planes
        self.stride = stride
        self.dilation = dilation
        self.style = style
        self.with_cp = with_cp
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.dcn = dcn
        self.with_dcn = dcn is not None
        self.plugins = plugins
        self.with_plugins = plugins is not None

        if self.with_plugins:
            # collect plugins for conv1/conv2/conv3
            self.after_conv1_plugins = [
                plugin['cfg'] for plugin in plugins
                if plugin['position'] == 'after_conv1'
            ]
            self.after_conv2_plugins = [
                plugin['cfg'] for plugin in plugins
                if plugin['position'] == 'after_conv2'
            ]
            self.after_conv3_plugins = [
                plugin['cfg'] for plugin in plugins
                if plugin['position'] == 'after_conv3'
            ]

        if self.style == 'pytorch':
            self.conv1_stride = 1
            self.conv2_stride = stride
        else:
            self.conv1_stride = stride
            self.conv2_stride = 1

        self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1)
        self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2)
        self.norm3_name, norm3 = build_norm_layer(norm_cfg,
                                                  planes * self.expansion,
                                                  postfix=3)

        self.conv1 = build_conv_layer(conv_cfg,
                                      inplanes,
                                      planes,
                                      kernel_size=1,
                                      stride=self.conv1_stride,
                                      bias=False)
        self.add_module(self.norm1_name, norm1)
        fallback_on_stride = False
        if self.with_dcn:
            fallback_on_stride = dcn.pop('fallback_on_stride', False)
        if not self.with_dcn or fallback_on_stride:
            self.conv2 = build_conv_layer(conv_cfg,
                                          planes,
                                          planes,
                                          kernel_size=3,
                                          stride=self.conv2_stride,
                                          padding=dilation,
                                          dilation=dilation,
                                          bias=False)
        else:
            assert self.conv_cfg is None, 'conv_cfg must be None for DCN'
            self.conv2 = build_conv_layer(dcn,
                                          planes,
                                          planes,
                                          kernel_size=3,
                                          stride=self.conv2_stride,
                                          padding=dilation,
                                          dilation=dilation,
                                          bias=False)

        self.add_module(self.norm2_name, norm2)
        self.conv3 = build_conv_layer(conv_cfg,
                                      planes,
                                      planes * self.expansion,
                                      kernel_size=1,
                                      bias=False)
        self.add_module(self.norm3_name, norm3)

        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample

        if self.with_plugins:
            self.after_conv1_plugin_names = self.make_block_plugins(
                planes, self.after_conv1_plugins)
            self.after_conv2_plugin_names = self.make_block_plugins(
                planes, self.after_conv2_plugins)
            self.after_conv3_plugin_names = self.make_block_plugins(
                planes * self.expansion, self.after_conv3_plugins)
    def __init__(self,
                 inplanes,
                 planes,
                 stride=1,
                 dilation=1,
                 downsample=None,
                 style='pytorch',
                 with_cp=False,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 dcn=None,
                 plugins=None):
        """Bottleneck block for ResNet.
        If style is "pytorch", the stride-two layer is the 3x3 conv layer,
        if it is "caffe", the stride-two layer is the first 1x1 conv layer.
        """
        super(Bottleneck, self).__init__()
        assert style in ['pytorch', 'caffe']

        self.inplanes = inplanes
        self.planes = planes
        self.stride = stride
        self.dilation = dilation
        self.style = style
        self.with_cp = with_cp
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg

        if self.style == 'pytorch':
            self.conv1_stride = 1
            self.conv2_stride = stride
        else:
            self.conv1_stride = stride
            self.conv2_stride = 1

        self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1)
        self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2)
        self.norm3_name, norm3 = build_norm_layer(norm_cfg,
                                                  planes * self.expansion,
                                                  postfix=3)

        self.conv1 = build_conv_layer(conv_cfg,
                                      inplanes,
                                      planes,
                                      kernel_size=1,
                                      stride=self.conv1_stride,
                                      bias=False)
        self.add_module(self.norm1_name, norm1)
        self.conv2 = build_conv_layer(conv_cfg,
                                      planes,
                                      planes,
                                      kernel_size=3,
                                      stride=self.conv2_stride,
                                      padding=dilation,
                                      dilation=dilation,
                                      bias=False)

        self.add_module(self.norm2_name, norm2)
        self.conv3 = build_conv_layer(conv_cfg,
                                      planes,
                                      planes * self.expansion,
                                      kernel_size=1,
                                      bias=False)
        self.add_module(self.norm3_name, norm3)

        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
Esempio n. 24
0
    def __init__(self,
                 extra,
                 in_channels=3,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 norm_eval=True,
                 with_cp=False,
                 zero_init_residual=False,
                 multiscale_output=True,
                 pretrained=None,
                 init_cfg=None):
        super(HRNet, self).__init__(init_cfg)

        self.pretrained = pretrained
        assert not (init_cfg and pretrained), \
            'init_cfg and pretrained cannot be specified at the same time'
        if isinstance(pretrained, str):
            warnings.warn('DeprecationWarning: pretrained is deprecated, '
                          'please use "init_cfg" instead')
            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
        elif pretrained is None:
            if init_cfg is None:
                self.init_cfg = [
                    dict(type='Kaiming', layer='Conv2d'),
                    dict(type='Constant',
                         val=1,
                         layer=['_BatchNorm', 'GroupNorm'])
                ]
        else:
            raise TypeError('pretrained must be a str or None')

        # Assert configurations of 4 stages are in extra
        assert 'stage1' in extra and 'stage2' in extra \
               and 'stage3' in extra and 'stage4' in extra
        # Assert whether the length of `num_blocks` and `num_channels` are
        # equal to `num_branches`
        for i in range(4):
            cfg = extra[f'stage{i + 1}']
            assert len(cfg['num_blocks']) == cfg['num_branches'] and \
                   len(cfg['num_channels']) == cfg['num_branches']

        self.extra = extra
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.norm_eval = norm_eval
        self.with_cp = with_cp
        self.zero_init_residual = zero_init_residual

        # stem net
        self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1)
        self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, 64, postfix=2)

        self.conv1 = build_conv_layer(self.conv_cfg,
                                      in_channels,
                                      64,
                                      kernel_size=3,
                                      stride=2,
                                      padding=1,
                                      bias=False)

        self.add_module(self.norm1_name, norm1)
        self.conv2 = build_conv_layer(self.conv_cfg,
                                      64,
                                      64,
                                      kernel_size=3,
                                      stride=2,
                                      padding=1,
                                      bias=False)

        self.add_module(self.norm2_name, norm2)
        self.relu = nn.ReLU(inplace=True)

        # stage 1
        self.stage1_cfg = self.extra['stage1']
        num_channels = self.stage1_cfg['num_channels'][0]
        block_type = self.stage1_cfg['block']
        num_blocks = self.stage1_cfg['num_blocks'][0]

        block = self.blocks_dict[block_type]
        stage1_out_channels = num_channels * block.expansion
        self.layer1 = self._make_layer(block, 64, num_channels, num_blocks)

        # stage 2
        self.stage2_cfg = self.extra['stage2']
        num_channels = self.stage2_cfg['num_channels']
        block_type = self.stage2_cfg['block']

        block = self.blocks_dict[block_type]
        num_channels = [channel * block.expansion for channel in num_channels]
        self.transition1 = self._make_transition_layer([stage1_out_channels],
                                                       num_channels)
        self.stage2, pre_stage_channels = self._make_stage(
            self.stage2_cfg, num_channels)

        # stage 3
        self.stage3_cfg = self.extra['stage3']
        num_channels = self.stage3_cfg['num_channels']
        block_type = self.stage3_cfg['block']

        block = self.blocks_dict[block_type]
        num_channels = [channel * block.expansion for channel in num_channels]
        self.transition2 = self._make_transition_layer(pre_stage_channels,
                                                       num_channels)
        self.stage3, pre_stage_channels = self._make_stage(
            self.stage3_cfg, num_channels)

        # stage 4
        self.stage4_cfg = self.extra['stage4']
        num_channels = self.stage4_cfg['num_channels']
        block_type = self.stage4_cfg['block']

        block = self.blocks_dict[block_type]
        num_channels = [channel * block.expansion for channel in num_channels]
        self.transition3 = self._make_transition_layer(pre_stage_channels,
                                                       num_channels)
        self.stage4, pre_stage_channels = self._make_stage(
            self.stage4_cfg, num_channels, multiscale_output=multiscale_output)
Esempio n. 25
0
    def __init__(self,
                 block,
                 inplanes,
                 planes,
                 num_blocks,
                 stride=1,
                 avg_down=True,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 scales=4,
                 base_width=26,
                 **kwargs):
        self.block = block

        downsample = None
        if stride != 1 or inplanes != planes * block.expansion:
            downsample = []
            conv_stride = stride
            # Res2Net-v1b pretrained models use AvgPool2d in downsample
            # even if stride == 1
            if avg_down:  # and stride != 1:
                conv_stride = 1
                downsample.append(
                    nn.AvgPool2d(kernel_size=stride,
                                 stride=stride,
                                 ceil_mode=True,
                                 count_include_pad=False))
            downsample.extend([
                build_conv_layer(conv_cfg,
                                 inplanes,
                                 planes * block.expansion,
                                 kernel_size=1,
                                 stride=conv_stride,
                                 bias=False),
                build_norm_layer(norm_cfg, planes * block.expansion)[1]
            ])
            downsample = nn.Sequential(*downsample)

        layers = []
        layers.append(
            block(inplanes=inplanes,
                  planes=planes,
                  stride=stride,
                  downsample=downsample,
                  conv_cfg=conv_cfg,
                  norm_cfg=norm_cfg,
                  scales=scales,
                  base_width=base_width,
                  stage_type='stage',
                  **kwargs))
        inplanes = planes * block.expansion
        for i in range(1, num_blocks):
            layers.append(
                block(inplanes=inplanes,
                      planes=planes,
                      stride=1,
                      conv_cfg=conv_cfg,
                      norm_cfg=norm_cfg,
                      scales=scales,
                      base_width=base_width,
                      **kwargs))
        super(Res2Layer, self).__init__(*layers)
Esempio n. 26
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 expansion=4,
                 stride=1,
                 dilation=1,
                 downsample=None,
                 style='pytorch',
                 with_cp=False,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN')):
        super(Bottleneck, self).__init__()
        assert style in ['pytorch', 'caffe']

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.expansion = expansion
        assert out_channels % expansion == 0
        self.mid_channels = out_channels // expansion
        self.stride = stride
        self.dilation = dilation
        self.style = style
        self.with_cp = with_cp
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg

        if self.style == 'pytorch':
            self.conv1_stride = 1
            self.conv2_stride = stride
        else:
            self.conv1_stride = stride
            self.conv2_stride = 1

        self.norm1_name, norm1 = build_norm_layer(norm_cfg,
                                                  self.mid_channels,
                                                  postfix=1)
        self.norm2_name, norm2 = build_norm_layer(norm_cfg,
                                                  self.mid_channels,
                                                  postfix=2)
        self.norm3_name, norm3 = build_norm_layer(norm_cfg,
                                                  out_channels,
                                                  postfix=3)

        self.conv1 = build_conv_layer(conv_cfg,
                                      in_channels,
                                      self.mid_channels,
                                      kernel_size=1,
                                      stride=self.conv1_stride,
                                      bias=False)
        self.add_module(self.norm1_name, norm1)
        self.conv2 = build_conv_layer(conv_cfg,
                                      self.mid_channels,
                                      self.mid_channels,
                                      kernel_size=3,
                                      stride=self.conv2_stride,
                                      padding=dilation,
                                      dilation=dilation,
                                      bias=False)

        self.add_module(self.norm2_name, norm2)
        self.conv3 = build_conv_layer(conv_cfg,
                                      self.mid_channels,
                                      out_channels,
                                      kernel_size=1,
                                      bias=False)
        self.add_module(self.norm3_name, norm3)

        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
Esempio n. 27
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 expansion=4,
                 stride=1,
                 dilation=1,
                 downsample=None,
                 style='pytorch',
                 with_cp=False,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 kernel_size=3,
                 groups=1,
                 attention=False):
        # Protect mutable default arguments
        norm_cfg = copy.deepcopy(norm_cfg)
        super().__init__()
        assert style in ['pytorch', 'caffe']

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.expansion = expansion
        assert out_channels % expansion == 0
        self.mid_channels = out_channels // expansion
        self.stride = stride
        self.dilation = dilation
        self.style = style
        self.with_cp = with_cp
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg

        if self.style == 'pytorch':
            self.conv1_stride = 1
            self.conv2_stride = stride
        else:
            self.conv1_stride = stride
            self.conv2_stride = 1

        self.norm1_name, norm1 = build_norm_layer(norm_cfg,
                                                  self.mid_channels,
                                                  postfix=1)
        self.norm2_name, norm2 = build_norm_layer(norm_cfg,
                                                  self.mid_channels,
                                                  postfix=2)
        self.norm3_name, norm3 = build_norm_layer(norm_cfg,
                                                  out_channels,
                                                  postfix=3)

        self.conv1 = build_conv_layer(conv_cfg,
                                      in_channels,
                                      self.mid_channels,
                                      kernel_size=1,
                                      stride=self.conv1_stride,
                                      bias=False)
        self.add_module(self.norm1_name, norm1)
        self.conv2 = build_conv_layer(conv_cfg,
                                      self.mid_channels,
                                      self.mid_channels,
                                      kernel_size=kernel_size,
                                      stride=self.conv2_stride,
                                      padding=kernel_size // 2,
                                      groups=groups,
                                      dilation=dilation,
                                      bias=False)

        self.add_module(self.norm2_name, norm2)
        self.conv3 = build_conv_layer(conv_cfg,
                                      self.mid_channels,
                                      out_channels,
                                      kernel_size=1,
                                      bias=False)
        self.add_module(self.norm3_name, norm3)

        if attention:
            self.attention = ContextBlock(out_channels,
                                          max(1.0 / 16, 16.0 / out_channels))
        else:
            self.attention = None

        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
Esempio n. 28
0
    def __init__(self,
                 block,
                 inplanes,
                 planes,
                 num_blocks,
                 stride=1,
                 avg_down=False,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 downsample_first=True,
                 **kwargs):
        self.block = block

        downsample = None
        if stride != 1 or inplanes != planes * block.expansion:
            downsample = []
            conv_stride = stride
            if avg_down:
                conv_stride = 1
                downsample.append(
                    nn.AvgPool2d(kernel_size=stride,
                                 stride=stride,
                                 ceil_mode=True,
                                 count_include_pad=False))
            downsample.extend([
                build_conv_layer(conv_cfg,
                                 inplanes,
                                 planes * block.expansion,
                                 kernel_size=1,
                                 stride=conv_stride,
                                 bias=False),
                build_norm_layer(norm_cfg, planes * block.expansion)[1]
            ])
            downsample = nn.Sequential(*downsample)

        layers = []
        if downsample_first:
            layers.append(
                block(inplanes=inplanes,
                      planes=planes,
                      stride=stride,
                      downsample=downsample,
                      conv_cfg=conv_cfg,
                      norm_cfg=norm_cfg,
                      **kwargs))
            inplanes = planes * block.expansion
            for _ in range(1, num_blocks):
                layers.append(
                    block(inplanes=inplanes,
                          planes=planes,
                          stride=1,
                          conv_cfg=conv_cfg,
                          norm_cfg=norm_cfg,
                          **kwargs))

        else:  # downsample_first=False is for HourglassModule
            for _ in range(num_blocks - 1):
                layers.append(
                    block(inplanes=inplanes,
                          planes=inplanes,
                          stride=1,
                          conv_cfg=conv_cfg,
                          norm_cfg=norm_cfg,
                          **kwargs))
            layers.append(
                block(inplanes=inplanes,
                      planes=planes,
                      stride=stride,
                      downsample=downsample,
                      conv_cfg=conv_cfg,
                      norm_cfg=norm_cfg,
                      **kwargs))
        super(ResLayer, self).__init__(*layers)
    def __init__(self,
                 depth,
                 in_channels=3,
                 stem_channels=64,
                 base_channels=64,
                 num_stages=4,
                 strides=(1, 2, 2, 2),
                 dilations=(1, 1, 1, 1),
                 out_indices=(0, 1, 2, 3),
                 style='pytorch',
                 deep_stem=False,
                 avg_down=False,
                 frozen_stages=-1,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN', requires_grad=True),
                 norm_eval=True,
                 dcn=None,
                 stage_with_dcn=(False, False, False, False),
                 plugins=None,
                 with_cp=False,
                 zero_init_residual=True):
        super(DB_ResNet, self).__init__()
        if depth not in self.arch_settings:
            raise KeyError(f'invalid depth {depth} for resnet')
        self.depth = depth
        self.stem_channels = stem_channels
        self.base_channels = base_channels
        self.num_stages = num_stages
        assert num_stages >= 1 and num_stages <= 4
        self.strides = strides
        self.dilations = dilations
        assert len(strides) == len(dilations) == num_stages
        self.out_indices = out_indices
        assert max(out_indices) < num_stages
        self.style = style
        self.deep_stem = deep_stem
        self.avg_down = avg_down
        self.frozen_stages = frozen_stages
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.with_cp = with_cp
        self.norm_eval = norm_eval
        self.dcn = dcn
        self.stage_with_dcn = stage_with_dcn
        if dcn is not None:
            assert len(stage_with_dcn) == num_stages
        self.plugins = plugins
        self.zero_init_residual = zero_init_residual
        self.block, stage_blocks = self.arch_settings[depth]
        self.stage_blocks = stage_blocks[:num_stages]
        self.inplanes = stem_channels

        self._make_stem_layer(in_channels, stem_channels)

        self.res_layers = []

        ### add db modules
        self.second_res_layers = []
        self.eb_second_conv1 = nn.Conv2d(256,
                                         64,
                                         kernel_size=1,
                                         stride=1,
                                         padding=0,
                                         dilation=1,
                                         bias=False)
        self.eb_second_conv2 = nn.Conv2d(512,
                                         256,
                                         kernel_size=1,
                                         stride=1,
                                         padding=0,
                                         dilation=1,
                                         bias=False)
        self.eb_second_conv3 = nn.Conv2d(1024,
                                         512,
                                         kernel_size=1,
                                         stride=1,
                                         padding=0,
                                         dilation=1,
                                         bias=False)
        self.eb_second_conv4 = nn.Conv2d(2048,
                                         1024,
                                         kernel_size=1,
                                         stride=1,
                                         padding=0,
                                         dilation=1,
                                         bias=False)

        ######### add bn for db
        normalize = self.norm_cfg

        self.eb_second_bn1_name, eb_second_bn1 = build_norm_layer(
            normalize, 64, postfix='eb_second_bn1')
        self.add_module(self.eb_second_bn1_name, eb_second_bn1)

        self.eb_second_bn2_name, eb_second_bn2 = build_norm_layer(
            normalize, 256, postfix='eb_second_bn2')
        self.add_module(self.eb_second_bn2_name, eb_second_bn2)

        self.eb_second_bn3_name, eb_second_bn3 = build_norm_layer(
            normalize, 512, postfix='eb_second_bn3')
        self.add_module(self.eb_second_bn3_name, eb_second_bn3)

        self.eb_second_bn4_name, eb_second_bn4 = build_norm_layer(
            normalize, 1024, postfix='eb_second_bn4')
        self.add_module(self.eb_second_bn4_name, eb_second_bn4)

        ###### upsample in db
        self.eb_upsample = nn.Upsample(scale_factor=2, mode='nearest')

        for i, num_blocks in enumerate(self.stage_blocks):
            stride = strides[i]
            dilation = dilations[i]
            dcn = self.dcn if self.stage_with_dcn[i] else None
            if plugins is not None:
                stage_plugins = self.make_stage_plugins(plugins, i)
            else:
                stage_plugins = None
            planes = base_channels * 2**i
            res_layer = self.make_res_layer(block=self.block,
                                            inplanes=self.inplanes,
                                            planes=planes,
                                            num_blocks=num_blocks,
                                            stride=stride,
                                            dilation=dilation,
                                            style=self.style,
                                            avg_down=self.avg_down,
                                            with_cp=with_cp,
                                            conv_cfg=conv_cfg,
                                            norm_cfg=norm_cfg,
                                            dcn=dcn,
                                            plugins=stage_plugins)
            second_res_layer = self.make_res_layer(block=self.block,
                                                   inplanes=self.inplanes,
                                                   planes=planes,
                                                   num_blocks=num_blocks,
                                                   stride=stride,
                                                   dilation=dilation,
                                                   style=self.style,
                                                   avg_down=self.avg_down,
                                                   with_cp=with_cp,
                                                   conv_cfg=conv_cfg,
                                                   norm_cfg=norm_cfg,
                                                   dcn=dcn,
                                                   plugins=stage_plugins)

            self.inplanes = planes * self.block.expansion
            layer_name = f'layer{i + 1}'
            self.add_module(layer_name, res_layer)
            self.res_layers.append(layer_name)
            #### add dual backbone
            second_layer_name = 'second_layer{}'.format(i + 1)
            self.add_module(second_layer_name, second_res_layer)
            self.second_res_layers.append(second_layer_name)

        self._freeze_stages()

        self.feat_dim = self.block.expansion * base_channels * 2**(
            len(self.stage_blocks) - 1)
Esempio n. 30
0
    def __init__(self,
                 img_size=224,
                 in_channels=3,
                 embed_dims=384,
                 num_layers=14,
                 out_indices=-1,
                 drop_rate=0.,
                 drop_path_rate=0.,
                 norm_cfg=dict(type='LN'),
                 final_norm=True,
                 with_cls_token=True,
                 output_cls_token=True,
                 interpolate_mode='bicubic',
                 t2t_cfg=dict(),
                 layer_cfgs=dict(),
                 init_cfg=None):
        super(T2T_ViT, self).__init__(init_cfg)

        # Token-to-Token Module
        self.tokens_to_token = T2TModule(
            img_size=img_size,
            in_channels=in_channels,
            embed_dims=embed_dims,
            **t2t_cfg)
        self.patch_resolution = self.tokens_to_token.init_out_size
        num_patches = self.patch_resolution[0] * self.patch_resolution[1]

        # Set cls token
        if output_cls_token:
            assert with_cls_token is True, f'with_cls_token must be True if' \
                f'set output_cls_token to True, but got {with_cls_token}'
        self.with_cls_token = with_cls_token
        self.output_cls_token = output_cls_token
        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dims))

        # Set position embedding
        self.interpolate_mode = interpolate_mode
        sinusoid_table = get_sinusoid_encoding(
            num_patches + self.num_extra_tokens, embed_dims)
        self.register_buffer('pos_embed', sinusoid_table)
        self._register_load_state_dict_pre_hook(self._prepare_pos_embed)

        self.drop_after_pos = nn.Dropout(p=drop_rate)

        if isinstance(out_indices, int):
            out_indices = [out_indices]
        assert isinstance(out_indices, Sequence), \
            f'"out_indices" must be a sequence or int, ' \
            f'get {type(out_indices)} instead.'
        for i, index in enumerate(out_indices):
            if index < 0:
                out_indices[i] = num_layers + index
            assert 0 <= out_indices[i] <= num_layers, \
                f'Invalid out_indices {index}'
        self.out_indices = out_indices

        # stochastic depth decay rule
        dpr = [x for x in np.linspace(0, drop_path_rate, num_layers)]

        self.encoder = ModuleList()
        for i in range(num_layers):
            if isinstance(layer_cfgs, Sequence):
                layer_cfg = layer_cfgs[i]
            else:
                layer_cfg = deepcopy(layer_cfgs)
            layer_cfg = {
                'embed_dims': embed_dims,
                'num_heads': 6,
                'feedforward_channels': 3 * embed_dims,
                'drop_path_rate': dpr[i],
                'qkv_bias': False,
                'norm_cfg': norm_cfg,
                **layer_cfg
            }

            layer = T2TTransformerLayer(**layer_cfg)
            self.encoder.append(layer)

        self.final_norm = final_norm
        if final_norm:
            self.norm = build_norm_layer(norm_cfg, embed_dims)[1]
        else:
            self.norm = nn.Identity()