Esempio n. 1
0
    def __init__(self,
                 in_channels=[128],
                 tasks=None,
                 train_cfg=None,
                 test_cfg=None,
                 bbox_coder=None,
                 common_heads=dict(),
                 loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
                 loss_bbox=dict(type='L1Loss',
                                reduction='none',
                                loss_weight=0.25),
                 seperate_head=dict(type='SeparateHead',
                                    init_bias=-2.19,
                                    final_kernel=3),
                 feature_channel=64,
                 num_heatmap_convs=2,
                 conv_cfg=dict(type='Conv2d'),
                 norm_cfg=dict(type='BN2d'),
                 bias='auto',
                 norm_bbox=True,
                 pred_mode=['voxel'],
                 num_head_convs=2,
                 num_classes=17,
                 ignore_label=0,
                 grid_size=[480, 360, 32]):
        super(SelectivesegHead, self).__init__()

        self.class_names = [t['class_names'] for t in tasks]
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.norm_bbox = norm_bbox

        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)
        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.fp16_enabled = False

        self.pred_mode = pred_mode
        assert ('voxel' in self.pred_mode) or ('point' in self.pred_mode) or (
            'pillar' in self.pred_mode)
        self.ignore_label = ignore_label
        self.grid_size = grid_size

        semantic_conv = []
        for i in range(num_head_convs):
            if i > 0:
                in_channels = feature_channel
            conv = ConvModule(in_channels,
                              feature_channel,
                              kernel_size=1,
                              conv_cfg=conv_cfg,
                              norm_cfg=norm_cfg,
                              bias=False)
            semantic_conv.append(conv)
        self.semantic_conv = nn.Sequential(*semantic_conv)

        self.semantic_cls = nn.Conv1d(feature_channel, self.num_classes, 1)

        self.semantic_CE_loss = torch.nn.CrossEntropyLoss(
            ignore_index=ignore_label)
Esempio n. 2
0
class MobileNetV2(nn.Module):
    """MobileNetV2 backbone.

    Args:
        widen_factor (float): Width multiplier, multiply number of
            channels in each layer by this amount. Default: 1.0.
        strides (Sequence[int], optional): Strides of the first block of each
            layer. If not specified, default config in ``arch_setting`` will
            be used.
        dilations (Sequence[int]): Dilation of each layer.
        out_indices (None or Sequence[int]): Output from which stages.
            Default: (7, ).
        frozen_stages (int): Stages to be frozen (all param fixed).
            Default: -1, which means not freezing any parameters.
        conv_cfg (dict): Config dict for convolution layer.
            Default: None, which means using conv2d.
        norm_cfg (dict): Config dict for normalization layer.
            Default: dict(type='BN').
        act_cfg (dict): Config dict for activation layer.
            Default: dict(type='ReLU6').
        norm_eval (bool): Whether to set norm layers to eval mode, namely,
            freeze running stats (mean and var). Note: Effect on Batch Norm
            and its variants only. Default: False.
        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
            memory while slowing down the training speed. Default: False.
    """

    # Parameters to build layers. 3 parameters are needed to construct a
    # layer, from left to right: expand_ratio, channel, num_blocks.
    arch_settings = [[1, 16, 1], [6, 24, 2], [6, 32, 3], [6, 64, 4],
                     [6, 96, 3], [6, 160, 3], [6, 320, 1]]

    def __init__(self,
                 widen_factor=1.,
                 strides=(1, 2, 2, 2, 1, 2, 1),
                 dilations=(1, 1, 1, 1, 1, 1, 1),
                 out_indices=(1, 2, 4, 6),
                 frozen_stages=-1,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 act_cfg=dict(type='ReLU6'),
                 norm_eval=False,
                 with_cp=False):
        super(MobileNetV2, self).__init__()
        self.widen_factor = widen_factor
        self.strides = strides
        self.dilations = dilations
        assert len(strides) == len(dilations) == len(self.arch_settings)
        self.out_indices = out_indices
        for index in out_indices:
            if index not in range(0, 7):
                raise ValueError('the item in out_indices must in '
                                 f'range(0, 7). But received {index}')

        if frozen_stages not in range(-1, 7):
            raise ValueError('frozen_stages must be in range(-1, 7). '
                             f'But received {frozen_stages}')
        self.out_indices = out_indices
        self.frozen_stages = frozen_stages
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.act_cfg = act_cfg
        self.norm_eval = norm_eval
        self.with_cp = with_cp

        self.in_channels = make_divisible(32 * widen_factor, 8)

        self.conv1 = ConvModule(in_channels=3,
                                out_channels=self.in_channels,
                                kernel_size=3,
                                stride=2,
                                padding=1,
                                conv_cfg=self.conv_cfg,
                                norm_cfg=self.norm_cfg,
                                act_cfg=self.act_cfg)

        self.layers = []

        for i, layer_cfg in enumerate(self.arch_settings):
            expand_ratio, channel, num_blocks = layer_cfg
            stride = self.strides[i]
            dilation = self.dilations[i]
            out_channels = make_divisible(channel * widen_factor, 8)
            inverted_res_layer = self.make_layer(out_channels=out_channels,
                                                 num_blocks=num_blocks,
                                                 stride=stride,
                                                 dilation=dilation,
                                                 expand_ratio=expand_ratio)
            layer_name = f'layer{i + 1}'
            self.add_module(layer_name, inverted_res_layer)
            self.layers.append(layer_name)

    def make_layer(self, out_channels, num_blocks, stride, dilation,
                   expand_ratio):
        """Stack InvertedResidual blocks to build a layer for MobileNetV2.

        Args:
            out_channels (int): out_channels of block.
            num_blocks (int): Number of blocks.
            stride (int): Stride of the first block.
            dilation (int): Dilation of the first block.
            expand_ratio (int): Expand the number of channels of the
                hidden layer in InvertedResidual by this ratio.
        """
        layers = []
        for i in range(num_blocks):
            layers.append(
                InvertedResidual(self.in_channels,
                                 out_channels,
                                 stride if i == 0 else 1,
                                 expand_ratio=expand_ratio,
                                 dilation=dilation if i == 0 else 1,
                                 conv_cfg=self.conv_cfg,
                                 norm_cfg=self.norm_cfg,
                                 act_cfg=self.act_cfg,
                                 with_cp=self.with_cp))
            self.in_channels = out_channels

        return nn.Sequential(*layers)

    def init_weights(self, pretrained=None):
        if isinstance(pretrained, str):
            logger = logging.getLogger()
            load_checkpoint(self, pretrained, strict=False, logger=logger)
        elif pretrained is None:
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    kaiming_init(m)
                elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
                    constant_init(m, 1)
        else:
            raise TypeError('pretrained must be a str or None')

    def forward(self, x):
        x = self.conv1(x)

        outs = []
        for i, layer_name in enumerate(self.layers):
            layer = getattr(self, layer_name)
            x = layer(x)
            if i in self.out_indices:
                outs.append(x)

        if len(outs) == 1:
            return outs[0]
        else:
            return tuple(outs)

    def _freeze_stages(self):
        if self.frozen_stages >= 0:
            for param in self.conv1.parameters():
                param.requires_grad = False
        for i in range(1, self.frozen_stages + 1):
            layer = getattr(self, f'layer{i}')
            layer.eval()
            for param in layer.parameters():
                param.requires_grad = False

    def train(self, mode=True):
        super(MobileNetV2, self).train(mode)
        self._freeze_stages()
        if mode and self.norm_eval:
            for m in self.modules():
                if isinstance(m, _BatchNorm):
                    m.eval()
    def __init__(self,
                 in_channels,
                 out_channels,
                 num_outs,
                 start_level=0,
                 end_level=-1,
                 add_extra_convs=False,
                 extra_convs_on_inputs=False,
                 relu_before_extra_convs=False,
                 no_norm_on_lateral=False,
                 conv_cfg=None,
                 norm_cfg=None,
                 act_cfg=None,
                 upsample_cfg=dict(mode='nearest')):
        super(FPN_involution, self).__init__()
        assert isinstance(in_channels, list)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.num_ins = len(in_channels)
        self.num_outs = num_outs
        self.relu_before_extra_convs = relu_before_extra_convs
        self.no_norm_on_lateral = no_norm_on_lateral
        self.fp16_enabled = False
        self.upsample_cfg = upsample_cfg.copy()

        if end_level == -1:
            self.backbone_end_level = self.num_ins
            assert num_outs >= self.num_ins - start_level
        else:
            # if end_level < inputs, no extra level is allowed
            self.backbone_end_level = end_level
            assert end_level <= len(in_channels)
            assert num_outs == end_level - start_level
        self.start_level = start_level
        self.end_level = end_level
        self.add_extra_convs = add_extra_convs
        assert isinstance(add_extra_convs, (str, bool))
        if isinstance(add_extra_convs, str):
            # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output'
            assert add_extra_convs in ('on_input', 'on_lateral', 'on_output')
        elif add_extra_convs:  # True
            if extra_convs_on_inputs:
                # For compatibility with previous release
                # TODO: deprecate `extra_convs_on_inputs`
                self.add_extra_convs = 'on_input'
            else:
                self.add_extra_convs = 'on_output'

        self.lateral_convs = nn.ModuleList()
        self.fpn_convs = nn.ModuleList()

        for i in range(self.start_level, self.backbone_end_level):
            l_conv = ConvModule(
                in_channels[i],
                out_channels,
                1,
                conv_cfg=conv_cfg,
                norm_cfg=norm_cfg if not self.no_norm_on_lateral else None,
                act_cfg=act_cfg,
                inplace=False)
            fpn_conv = involution(out_channels, 7, 1)
            #ConvModule(
            #out_channels,
            #out_channels,
            #3,
            #padding=1,
            #conv_cfg=conv_cfg,
            #norm_cfg=norm_cfg,
            #act_cfg=act_cfg,
            #inplace=False)

            self.lateral_convs.append(l_conv)
            self.fpn_convs.append(fpn_conv)

        # add extra conv layers (e.g., RetinaNet)
        extra_levels = num_outs - self.backbone_end_level + self.start_level
        if self.add_extra_convs and extra_levels >= 1:
            for i in range(extra_levels):
                if i == 0 and self.add_extra_convs == 'on_input':
                    in_channels = self.in_channels[self.backbone_end_level - 1]
                else:
                    in_channels = out_channels
                extra_fpn_conv = ConvModule(in_channels,
                                            out_channels,
                                            3,
                                            stride=2,
                                            padding=1,
                                            conv_cfg=conv_cfg,
                                            norm_cfg=norm_cfg,
                                            act_cfg=act_cfg,
                                            inplace=False)
                self.fpn_convs.append(extra_fpn_conv)
Esempio n. 4
0
class X3D(nn.Module):
    """X3D backbone. https://arxiv.org/pdf/2004.04730.pdf.

    Args:
        gamma_w (float): Global channel width expansion factor. Default: 1.
        gamma_b (float): Bottleneck channel width expansion factor. Default: 1.
        gamma_d (float): Network depth expansion factor. Default: 1.
        pretrained (str | None): Name of pretrained model. Default: None.
        in_channels (int): Channel num of input features. Default: 3.
        num_stages (int): Resnet stages. Default: 4.
        spatial_strides (Sequence[int]):
            Spatial strides of residual blocks of each stage.
            Default: ``(1, 2, 2, 2)``.
        frozen_stages (int): Stages to be frozen (all param fixed). If set to
            -1, it means not freezing any parameters. Default: -1.
        se_style (str): The style of inserting SE modules into BlockX3D, 'half'
            denotes insert into half of the blocks, while 'all' denotes insert
            into all blocks. Default: 'half'.
        se_ratio (float | None): The reduction ratio of squeeze and excitation
            unit. If set as None, it means not using SE unit. Default: 1 / 16.
        use_swish (bool): Whether to use swish as the activation function
            before and after the 3x3x3 conv. Default: True.
        conv_cfg (dict): Config for conv layers. required keys are ``type``
            Default: ``dict(type='Conv3d')``.
        norm_cfg (dict): Config for norm layers. required keys are ``type`` and
            ``requires_grad``.
            Default: ``dict(type='BN3d', requires_grad=True)``.
        act_cfg (dict): Config dict for activation layer.
            Default: ``dict(type='ReLU', inplace=True)``.
        norm_eval (bool): Whether to set BN layers to eval mode, namely, freeze
            running stats (mean and var). Default: False.
        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
            memory while slowing down the training speed. Default: False.
        zero_init_residual (bool):
            Whether to use zero initialization for residual block,
            Default: True.
        kwargs (dict, optional): Key arguments for "make_res_layer".
    """
    def __init__(self,
                 gamma_w=1.0,
                 gamma_b=1.0,
                 gamma_d=1.0,
                 pretrained=None,
                 in_channels=3,
                 num_stages=4,
                 spatial_strides=(2, 2, 2, 2),
                 frozen_stages=-1,
                 se_style='half',
                 se_ratio=1 / 16,
                 use_swish=True,
                 conv_cfg=dict(type='Conv3d'),
                 norm_cfg=dict(type='BN3d', requires_grad=True),
                 act_cfg=dict(type='ReLU', inplace=True),
                 norm_eval=False,
                 with_cp=False,
                 zero_init_residual=True,
                 **kwargs):
        super().__init__()
        self.gamma_w = gamma_w
        self.gamma_b = gamma_b
        self.gamma_d = gamma_d

        self.pretrained = pretrained
        self.in_channels = in_channels
        # Hard coded, can be changed by gamma_w
        self.base_channels = 24
        self.stage_blocks = [1, 2, 5, 3]

        # apply parameters gamma_w and gamma_d
        self.base_channels = self._round_width(self.base_channels,
                                               self.gamma_w)

        self.stage_blocks = [
            self._round_repeats(x, self.gamma_d) for x in self.stage_blocks
        ]

        self.num_stages = num_stages
        assert 1 <= num_stages <= 4
        self.spatial_strides = spatial_strides
        assert len(spatial_strides) == num_stages
        self.frozen_stages = frozen_stages

        self.se_style = se_style
        assert self.se_style in ['all', 'half']
        self.se_ratio = se_ratio
        assert (self.se_ratio is None) or (self.se_ratio > 0)
        self.use_swish = use_swish

        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.act_cfg = act_cfg
        self.norm_eval = norm_eval
        self.with_cp = with_cp
        self.zero_init_residual = zero_init_residual

        self.block = BlockX3D
        self.stage_blocks = self.stage_blocks[:num_stages]
        self.layer_inplanes = self.base_channels
        self._make_stem_layer()

        self.res_layers = []
        for i, num_blocks in enumerate(self.stage_blocks):
            spatial_stride = spatial_strides[i]
            inplanes = self.base_channels * 2**i
            planes = int(inplanes * self.gamma_b)

            res_layer = self.make_res_layer(self.block,
                                            self.layer_inplanes,
                                            inplanes,
                                            planes,
                                            num_blocks,
                                            spatial_stride=spatial_stride,
                                            se_style=self.se_style,
                                            se_ratio=self.se_ratio,
                                            use_swish=self.use_swish,
                                            norm_cfg=self.norm_cfg,
                                            conv_cfg=self.conv_cfg,
                                            act_cfg=self.act_cfg,
                                            with_cp=with_cp,
                                            **kwargs)
            self.layer_inplanes = inplanes
            layer_name = f'layer{i + 1}'
            self.add_module(layer_name, res_layer)
            self.res_layers.append(layer_name)

        self.feat_dim = self.base_channels * 2**(len(self.stage_blocks) - 1)
        self.conv5 = ConvModule(self.feat_dim,
                                int(self.feat_dim * self.gamma_b),
                                kernel_size=1,
                                stride=1,
                                padding=0,
                                bias=False,
                                conv_cfg=self.conv_cfg,
                                norm_cfg=self.norm_cfg,
                                act_cfg=self.act_cfg)
        self.feat_dim = int(self.feat_dim * self.gamma_b)

    @staticmethod
    def _round_width(width, multiplier, min_depth=8, divisor=8):
        """Round width of filters based on width multiplier."""
        if not multiplier:
            return width

        width *= multiplier
        min_depth = min_depth or divisor
        new_filters = max(min_depth,
                          int(width + divisor / 2) // divisor * divisor)
        if new_filters < 0.9 * width:
            new_filters += divisor
        return int(new_filters)

    @staticmethod
    def _round_repeats(repeats, multiplier):
        """Round number of layers based on depth multiplier."""
        if not multiplier:
            return repeats
        return int(math.ceil(multiplier * repeats))

    # the module is parameterized with gamma_b
    # no temporal_stride
    def make_res_layer(self,
                       block,
                       layer_inplanes,
                       inplanes,
                       planes,
                       blocks,
                       spatial_stride=1,
                       se_style='half',
                       se_ratio=None,
                       use_swish=True,
                       norm_cfg=None,
                       act_cfg=None,
                       conv_cfg=None,
                       with_cp=False,
                       **kwargs):
        """Build residual layer for ResNet3D.

        Args:
            block (nn.Module): Residual module to be built.
            layer_inplanes (int): Number of channels for the input feature
                of the res layer.
            inplanes (int): Number of channels for the input feature in each
                block, which equals to base_channels * gamma_w.
            planes (int): Number of channels for the output feature in each
                block, which equals to base_channel * gamma_w * gamma_b.
            blocks (int): Number of residual blocks.
            spatial_stride (int): Spatial strides in residual and conv layers.
                Default: 1.
            se_style (str): The style of inserting SE modules into BlockX3D,
                'half' denotes insert into half of the blocks, while 'all'
                denotes insert into all blocks. Default: 'half'.
            se_ratio (float | None): The reduction ratio of squeeze and
                excitation unit. If set as None, it means not using SE unit.
                Default: None.
            use_swish (bool): Whether to use swish as the activation function
                before and after the 3x3x3 conv. Default: True.
            conv_cfg (dict | None): Config for norm layers. Default: None.
            norm_cfg (dict | None): Config for norm layers. Default: None.
            act_cfg (dict | None): Config for activate layers. Default: None.
            with_cp (bool | None): Use checkpoint or not. Using checkpoint
                will save some memory while slowing down the training speed.
                Default: False.

        Returns:
            nn.Module: A residual layer for the given config.
        """
        downsample = None
        if spatial_stride != 1 or layer_inplanes != inplanes:
            downsample = ConvModule(layer_inplanes,
                                    inplanes,
                                    kernel_size=1,
                                    stride=(1, spatial_stride, spatial_stride),
                                    padding=0,
                                    bias=False,
                                    conv_cfg=conv_cfg,
                                    norm_cfg=norm_cfg,
                                    act_cfg=None)

        use_se = [False] * blocks
        if self.se_style == 'all':
            use_se = [True] * blocks
        elif self.se_style == 'half':
            use_se = [i % 2 == 0 for i in range(blocks)]
        else:
            raise NotImplementedError

        layers = []
        layers.append(
            block(layer_inplanes,
                  planes,
                  inplanes,
                  spatial_stride=spatial_stride,
                  downsample=downsample,
                  se_ratio=se_ratio if use_se[0] else None,
                  use_swish=use_swish,
                  norm_cfg=norm_cfg,
                  conv_cfg=conv_cfg,
                  act_cfg=act_cfg,
                  with_cp=with_cp,
                  **kwargs))

        for i in range(1, blocks):
            layers.append(
                block(inplanes,
                      planes,
                      inplanes,
                      spatial_stride=1,
                      se_ratio=se_ratio if use_se[i] else None,
                      use_swish=use_swish,
                      norm_cfg=norm_cfg,
                      conv_cfg=conv_cfg,
                      act_cfg=act_cfg,
                      with_cp=with_cp,
                      **kwargs))

        return nn.Sequential(*layers)

    def _make_stem_layer(self):
        """Construct the stem layers consists of a conv+norm+act module and a
        pooling layer."""
        self.conv1_s = ConvModule(self.in_channels,
                                  self.base_channels,
                                  kernel_size=(1, 3, 3),
                                  stride=(1, 2, 2),
                                  padding=(0, 1, 1),
                                  bias=False,
                                  conv_cfg=self.conv_cfg,
                                  norm_cfg=None,
                                  act_cfg=None)
        self.conv1_t = ConvModule(self.base_channels,
                                  self.base_channels,
                                  kernel_size=(5, 1, 1),
                                  stride=(1, 1, 1),
                                  padding=(2, 0, 0),
                                  groups=self.base_channels,
                                  bias=False,
                                  conv_cfg=self.conv_cfg,
                                  norm_cfg=self.norm_cfg,
                                  act_cfg=self.act_cfg)

    def _freeze_stages(self):
        """Prevent all the parameters from being optimized before
        ``self.frozen_stages``."""
        if self.frozen_stages >= 0:
            self.conv1_s.eval()
            self.conv1_t.eval()
            for param in self.conv1_s.parameters():
                param.requires_grad = False
            for param in self.conv1_t.parameters():
                param.requires_grad = False

        for i in range(1, self.frozen_stages + 1):
            m = getattr(self, f'layer{i}')
            m.eval()
            for param in m.parameters():
                param.requires_grad = False

    def init_weights(self):
        """Initiate the parameters either from existing checkpoint or from
        scratch."""
        if isinstance(self.pretrained, str):
            logger = get_root_logger()
            logger.info(f'load model from: {self.pretrained}')

            load_checkpoint(self, self.pretrained, strict=False, logger=logger)

        elif self.pretrained is None:
            for m in self.modules():
                if isinstance(m, nn.Conv3d):
                    kaiming_init(m)
                elif isinstance(m, _BatchNorm):
                    constant_init(m, 1)

            if self.zero_init_residual:
                for m in self.modules():
                    if isinstance(m, BlockX3D):
                        constant_init(m.conv3.bn, 0)
        else:
            raise TypeError('pretrained must be a str or None')

    def forward(self, x):
        """Defines the computation performed at every call.

        Args:
            x (torch.Tensor): The input data.

        Returns:
            torch.Tensor: The feature of the input
            samples extracted by the backbone.
        """
        x = self.conv1_s(x)
        x = self.conv1_t(x)
        for layer_name in self.res_layers:
            res_layer = getattr(self, layer_name)
            x = res_layer(x)
        x = self.conv5(x)
        return x

    def train(self, mode=True):
        """Set the optimization status when training."""
        super().train(mode)
        self._freeze_stages()
        if mode and self.norm_eval:
            for m in self.modules():
                if isinstance(m, _BatchNorm):
                    m.eval()
Esempio n. 5
0
    def make_res_layer(self,
                       block,
                       layer_inplanes,
                       inplanes,
                       planes,
                       blocks,
                       spatial_stride=1,
                       se_style='half',
                       se_ratio=None,
                       use_swish=True,
                       norm_cfg=None,
                       act_cfg=None,
                       conv_cfg=None,
                       with_cp=False,
                       **kwargs):
        """Build residual layer for ResNet3D.

        Args:
            block (nn.Module): Residual module to be built.
            layer_inplanes (int): Number of channels for the input feature
                of the res layer.
            inplanes (int): Number of channels for the input feature in each
                block, which equals to base_channels * gamma_w.
            planes (int): Number of channels for the output feature in each
                block, which equals to base_channel * gamma_w * gamma_b.
            blocks (int): Number of residual blocks.
            spatial_stride (int): Spatial strides in residual and conv layers.
                Default: 1.
            se_style (str): The style of inserting SE modules into BlockX3D,
                'half' denotes insert into half of the blocks, while 'all'
                denotes insert into all blocks. Default: 'half'.
            se_ratio (float | None): The reduction ratio of squeeze and
                excitation unit. If set as None, it means not using SE unit.
                Default: None.
            use_swish (bool): Whether to use swish as the activation function
                before and after the 3x3x3 conv. Default: True.
            conv_cfg (dict | None): Config for norm layers. Default: None.
            norm_cfg (dict | None): Config for norm layers. Default: None.
            act_cfg (dict | None): Config for activate layers. Default: None.
            with_cp (bool | None): Use checkpoint or not. Using checkpoint
                will save some memory while slowing down the training speed.
                Default: False.

        Returns:
            nn.Module: A residual layer for the given config.
        """
        downsample = None
        if spatial_stride != 1 or layer_inplanes != inplanes:
            downsample = ConvModule(layer_inplanes,
                                    inplanes,
                                    kernel_size=1,
                                    stride=(1, spatial_stride, spatial_stride),
                                    padding=0,
                                    bias=False,
                                    conv_cfg=conv_cfg,
                                    norm_cfg=norm_cfg,
                                    act_cfg=None)

        use_se = [False] * blocks
        if self.se_style == 'all':
            use_se = [True] * blocks
        elif self.se_style == 'half':
            use_se = [i % 2 == 0 for i in range(blocks)]
        else:
            raise NotImplementedError

        layers = []
        layers.append(
            block(layer_inplanes,
                  planes,
                  inplanes,
                  spatial_stride=spatial_stride,
                  downsample=downsample,
                  se_ratio=se_ratio if use_se[0] else None,
                  use_swish=use_swish,
                  norm_cfg=norm_cfg,
                  conv_cfg=conv_cfg,
                  act_cfg=act_cfg,
                  with_cp=with_cp,
                  **kwargs))

        for i in range(1, blocks):
            layers.append(
                block(inplanes,
                      planes,
                      inplanes,
                      spatial_stride=1,
                      se_ratio=se_ratio if use_se[i] else None,
                      use_swish=use_swish,
                      norm_cfg=norm_cfg,
                      conv_cfg=conv_cfg,
                      act_cfg=act_cfg,
                      with_cp=with_cp,
                      **kwargs))

        return nn.Sequential(*layers)
Esempio n. 6
0
def make_res_layer(block,
                   inplanes,
                   planes,
                   blocks,
                   stride=1,
                   dilation=1,
                   style='pytorch',
                   conv_cfg=None,
                   norm_cfg=None,
                   act_cfg=None,
                   with_cp=False):
    """Build residual layer for ResNet.

    Args:
        block: (nn.Module): Residual module to be built.
        inplanes (int): Number of channels for the input feature in each block.
        planes (int): Number of channels for the output feature in each block.
        blocks (int): Number of residual blocks.
        stride (int): Stride in the conv layer. Default: 1.
        dilation (int): Spacing between kernel elements. Default: 1.
        style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two
            layer is the 3x3 conv layer, otherwise the stride-two layer is
            the first 1x1 conv layer. Default: 'pytorch'.
        conv_cfg (dict | None): Config for norm layers. Default: None.
        norm_cfg (dict | None): Config for norm layers. Default: None.
        act_cfg (dict | None): Config for activate layers. Default: None.
        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
            memory while slowing down the training speed. Default: False.

    Returns:
        nn.Module: A residual layer for the given config.
    """
    downsample = None
    if stride != 1 or inplanes != planes * block.expansion:
        downsample = ConvModule(
            inplanes,
            planes * block.expansion,
            kernel_size=1,
            stride=stride,
            bias=False,
            conv_cfg=conv_cfg,
            norm_cfg=norm_cfg,
            act_cfg=None)

    layers = []
    layers.append(
        block(
            inplanes,
            planes,
            stride,
            dilation,
            downsample,
            style=style,
            conv_cfg=conv_cfg,
            norm_cfg=norm_cfg,
            act_cfg=act_cfg,
            with_cp=with_cp))
    inplanes = planes * block.expansion
    for _ in range(1, blocks):
        layers.append(
            block(
                inplanes,
                planes,
                1,
                dilation,
                style=style,
                conv_cfg=conv_cfg,
                norm_cfg=norm_cfg,
                act_cfg=act_cfg,
                with_cp=with_cp))

    return nn.Sequential(*layers)
Esempio n. 7
0
    def __init__(self,
                 img_channels,
                 pts_channels,
                 mid_channels,
                 out_channels,
                 img_levels=3,
                 conv_cfg=None,
                 norm_cfg=None,
                 act_cfg=None,
                 activate_out=True,
                 fuse_out=False,
                 dropout_ratio=0,
                 aligned=True,
                 align_corners=True,
                 padding_mode='zeros',
                 lateral_conv=True):
        super(PointFusion, self).__init__()
        if isinstance(img_levels, int):
            img_levels = [img_levels]
        if isinstance(img_channels, int):
            img_channels = [img_channels] * len(img_levels)
        assert isinstance(img_levels, list)
        assert isinstance(img_channels, list)
        assert len(img_channels) == len(img_levels)

        self.img_levels = img_levels
        self.act_cfg = act_cfg
        self.activate_out = activate_out
        self.fuse_out = fuse_out
        self.dropout_ratio = dropout_ratio
        self.img_channels = img_channels
        self.aligned = aligned
        self.align_corners = align_corners
        self.padding_mode = padding_mode

        self.lateral_convs = None
        if lateral_conv:
            self.lateral_convs = nn.ModuleList()
            for i in range(len(img_channels)):
                l_conv = ConvModule(img_channels[i],
                                    mid_channels,
                                    3,
                                    padding=1,
                                    conv_cfg=conv_cfg,
                                    norm_cfg=norm_cfg,
                                    act_cfg=self.act_cfg,
                                    inplace=False)
                self.lateral_convs.append(l_conv)
            self.img_transform = nn.Sequential(
                nn.Linear(mid_channels * len(img_channels), out_channels),
                nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
            )
        else:
            self.img_transform = nn.Sequential(
                nn.Linear(sum(img_channels), out_channels),
                nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
            )
        self.pts_transform = nn.Sequential(
            nn.Linear(pts_channels, out_channels),
            nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
        )

        if self.fuse_out:
            self.fuse_conv = nn.Sequential(
                nn.Linear(mid_channels, out_channels),
                # For pts the BN is initialized differently by default
                # TODO: check whether this is necessary
                nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
                nn.ReLU(inplace=False))

        self.init_weights()
Esempio n. 8
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 num_outs,
                 start_level=0,
                 end_level=-1,
                 add_extra_convs=False,
                 extra_convs_on_inputs=True,
                 relu_before_extra_convs=False,
                 no_norm_on_lateral=False,
                 conv_cfg=None,
                 norm_cfg=None,
                 act_cfg=None,
                 upsample_cfg=dict(mode='nearest')):
        super(DyFPN_B_CNNGate, self).__init__(in_channels, out_channels,
                                              num_outs)
        assert isinstance(in_channels, list)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.num_ins = len(in_channels)
        self.num_outs = num_outs
        self.relu_before_extra_convs = relu_before_extra_convs
        self.no_norm_on_lateral = no_norm_on_lateral
        self.fp16_enabled = False
        self.upsample_cfg = upsample_cfg.copy()

        if end_level == -1:
            self.backbone_end_level = self.num_ins
            assert num_outs >= self.num_ins - start_level
        else:
            # if end_level < inputs, no extra level is allowed
            self.backbone_end_level = end_level
            assert end_level <= len(in_channels)
            assert num_outs == end_level - start_level
        self.start_level = start_level
        self.end_level = end_level
        self.add_extra_convs = add_extra_convs
        self.gate_0 = Attention_SEblock(channels=in_channels[0],
                                        reduction=4,
                                        temperature=1)
        self.gate_1 = Attention_SEblock(channels=in_channels[1],
                                        reduction=4,
                                        temperature=1)
        self.gate_2 = Attention_SEblock(channels=in_channels[2],
                                        reduction=4,
                                        temperature=1)
        self.gate_3 = Attention_SEblock(channels=in_channels[3],
                                        reduction=4,
                                        temperature=1)

        assert isinstance(add_extra_convs, (str, bool))
        if isinstance(add_extra_convs, str):
            # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output'
            assert add_extra_convs in ('on_input', 'on_lateral', 'on_output')
        elif add_extra_convs:  # True
            if extra_convs_on_inputs:
                # For compatibility with previous release
                # TODO: deprecate `extra_convs_on_inputs`
                self.add_extra_convs = 'on_input'
            else:
                self.add_extra_convs = 'on_output'

        self.lateral_convs = nn.ModuleList()
        self.fpn_convs = nn.ModuleList()
        self.skip_lateral_convs = nn.ModuleList()
        for i in range(self.start_level, self.backbone_end_level):
            l_conv_11 = ConvModule(
                in_channels[i],
                out_channels,
                1,
                conv_cfg=conv_cfg,
                norm_cfg=norm_cfg if not self.no_norm_on_lateral else None,
                act_cfg=act_cfg,
                inplace=False)
            l_conv_33 = ConvModule(
                in_channels[i],
                out_channels,
                3,
                padding=1,
                conv_cfg=conv_cfg,
                norm_cfg=norm_cfg if not self.no_norm_on_lateral else None,
                act_cfg=act_cfg,
                inplace=False)
            l_conv_33_dilation_2 = ConvModule(
                in_channels[i],
                out_channels,
                3,
                padding=2,
                dilation=2,
                conv_cfg=conv_cfg,
                norm_cfg=norm_cfg if not self.no_norm_on_lateral else None,
                act_cfg=act_cfg,
                inplace=False)
            l_conv_33_dilation_3 = ConvModule(
                in_channels[i],
                out_channels,
                3,
                padding=3,
                dilation=3,
                conv_cfg=conv_cfg,
                norm_cfg=norm_cfg if not self.no_norm_on_lateral else None,
                act_cfg=act_cfg,
                inplace=False)

            l_conv_55 = ConvModule(
                in_channels[i],
                out_channels,
                5,
                padding=2,
                conv_cfg=conv_cfg,
                norm_cfg=norm_cfg if not self.no_norm_on_lateral else None,
                act_cfg=act_cfg,
                inplace=False)
            l_conv_55_dilation_2 = ConvModule(
                in_channels[i],
                out_channels,
                5,
                padding=4,
                dilation=2,
                conv_cfg=conv_cfg,
                norm_cfg=norm_cfg if not self.no_norm_on_lateral else None,
                act_cfg=act_cfg,
                inplace=False)
            l_conv_55_dilation_3 = ConvModule(
                in_channels[i],
                out_channels,
                5,
                padding=6,
                dilation=3,
                conv_cfg=conv_cfg,
                norm_cfg=norm_cfg if not self.no_norm_on_lateral else None,
                act_cfg=act_cfg,
                inplace=False)
            skip_l_conv_11 = ConvModule(
                in_channels[i],
                out_channels,
                1,
                conv_cfg=conv_cfg,
                norm_cfg=norm_cfg if not self.no_norm_on_lateral else None,
                act_cfg=act_cfg,
                inplace=False)
            self.lateral_convs.append(l_conv_11)
            self.lateral_convs.append(l_conv_33)
            self.lateral_convs.append(l_conv_33_dilation_2)
            self.lateral_convs.append(l_conv_33_dilation_3)
            self.lateral_convs.append(l_conv_55)
            self.lateral_convs.append(l_conv_55_dilation_2)
            self.lateral_convs.append(l_conv_55_dilation_3)
            self.skip_lateral_convs.append(skip_l_conv_11)

            fpn_conv = ConvModule(out_channels,
                                  out_channels,
                                  3,
                                  padding=1,
                                  conv_cfg=conv_cfg,
                                  norm_cfg=norm_cfg,
                                  act_cfg=act_cfg,
                                  inplace=False)
            self.fpn_convs.append(fpn_conv)

        # add extra conv layers (e.g., RetinaNet)
        extra_levels = num_outs - self.backbone_end_level + self.start_level
        if self.add_extra_convs and extra_levels >= 1:
            for i in range(extra_levels):
                if i == 0 and self.add_extra_convs == 'on_input':
                    in_channels = self.in_channels[self.backbone_end_level - 1]
                else:
                    in_channels = out_channels
                extra_fpn_conv = ConvModule(in_channels,
                                            out_channels,
                                            3,
                                            stride=2,
                                            padding=1,
                                            conv_cfg=conv_cfg,
                                            norm_cfg=norm_cfg,
                                            act_cfg=act_cfg,
                                            inplace=False)
                self.fpn_convs.append(extra_fpn_conv)
    def __init__(self,
                 num_dims,
                 num_classes,
                 primitive_mode,
                 train_cfg=None,
                 test_cfg=None,
                 vote_moudule_cfg=None,
                 vote_aggregation_cfg=None,
                 feat_channels=(128, 128),
                 upper_thresh=100.0,
                 surface_thresh=0.5,
                 conv_cfg=dict(type='Conv1d'),
                 norm_cfg=dict(type='BN1d'),
                 objectness_loss=None,
                 center_loss=None,
                 semantic_reg_loss=None,
                 semantic_cls_loss=None):
        super(PrimitiveHead, self).__init__()
        assert primitive_mode in ['z', 'xy', 'line']
        # The dimension of primitive semantic information.
        self.num_dims = num_dims
        self.num_classes = num_classes
        self.primitive_mode = primitive_mode
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self.gt_per_seed = vote_moudule_cfg['gt_per_seed']
        self.num_proposal = vote_aggregation_cfg['num_point']
        self.upper_thresh = upper_thresh
        self.surface_thresh = surface_thresh

        self.objectness_loss = build_loss(objectness_loss)
        self.center_loss = build_loss(center_loss)
        self.semantic_reg_loss = build_loss(semantic_reg_loss)
        self.semantic_cls_loss = build_loss(semantic_cls_loss)

        assert vote_aggregation_cfg['mlp_channels'][0] == vote_moudule_cfg[
            'in_channels']

        # Primitive existence flag prediction
        self.flag_conv = ConvModule(vote_moudule_cfg['conv_channels'][-1],
                                    vote_moudule_cfg['conv_channels'][-1] // 2,
                                    1,
                                    padding=0,
                                    conv_cfg=conv_cfg,
                                    norm_cfg=norm_cfg,
                                    bias=True,
                                    inplace=True)
        self.flag_pred = torch.nn.Conv1d(
            vote_moudule_cfg['conv_channels'][-1] // 2, 2, 1)

        self.vote_module = VoteModule(**vote_moudule_cfg)
        self.vote_aggregation = PointSAModule(**vote_aggregation_cfg)

        prev_channel = vote_aggregation_cfg['mlp_channels'][-1]
        conv_pred_list = list()
        for k in range(len(feat_channels)):
            conv_pred_list.append(
                ConvModule(prev_channel,
                           feat_channels[k],
                           1,
                           padding=0,
                           conv_cfg=conv_cfg,
                           norm_cfg=norm_cfg,
                           bias=True,
                           inplace=True))
            prev_channel = feat_channels[k]
        self.conv_pred = nn.Sequential(*conv_pred_list)

        conv_out_channel = 3 + num_dims + num_classes
        self.conv_pred.add_module('conv_out',
                                  nn.Conv1d(prev_channel, conv_out_channel, 1))
Esempio n. 10
0
    def __init__(self,
                 in_channels,
                 stem_channels=1024,
                 num_blocks=2,
                 kernel_sizes=(3, 3, 3),
                 dropout=0.25,
                 causal=False,
                 residual=True,
                 use_stride_conv=False,
                 conv_cfg=dict(type='Conv1d'),
                 norm_cfg=dict(type='BN1d'),
                 max_norm=None):
        # Protect mutable default arguments
        conv_cfg = copy.deepcopy(conv_cfg)
        norm_cfg = copy.deepcopy(norm_cfg)
        super().__init__()
        self.in_channels = in_channels
        self.stem_channels = stem_channels
        self.num_blocks = num_blocks
        self.kernel_sizes = kernel_sizes
        self.dropout = dropout
        self.causal = causal
        self.residual = residual
        self.use_stride_conv = use_stride_conv
        self.max_norm = max_norm

        assert num_blocks == len(kernel_sizes) - 1
        for ks in kernel_sizes:
            assert ks % 2 == 1, 'Only odd filter widths are supported.'

        self.expand_conv = ConvModule(
            in_channels,
            stem_channels,
            kernel_size=kernel_sizes[0],
            stride=kernel_sizes[0] if use_stride_conv else 1,
            bias='auto',
            conv_cfg=conv_cfg,
            norm_cfg=norm_cfg)

        dilation = kernel_sizes[0]
        self.tcn_blocks = nn.ModuleList()
        for i in range(1, num_blocks + 1):
            self.tcn_blocks.append(
                BasicTemporalBlock(in_channels=stem_channels,
                                   out_channels=stem_channels,
                                   mid_channels=stem_channels,
                                   kernel_size=kernel_sizes[i],
                                   dilation=dilation,
                                   dropout=dropout,
                                   causal=causal,
                                   residual=residual,
                                   use_stride_conv=use_stride_conv,
                                   conv_cfg=conv_cfg,
                                   norm_cfg=norm_cfg))
            dilation *= kernel_sizes[i]

        if self.max_norm is not None:
            # Apply weight norm clip to conv layers
            weight_clip = WeightNormClipHook(self.max_norm)
            for module in self.modules():
                if isinstance(module, nn.modules.conv._ConvNd):
                    weight_clip.register(module)

        self.dropout = nn.Dropout(dropout) if dropout > 0 else None
Esempio n. 11
0
    def __init__(self,
                 num_classes,
                 seg_in_channels,
                 part_in_channels,
                 seg_conv_channels=None,
                 part_conv_channels=None,
                 merge_conv_channels=None,
                 down_conv_channels=None,
                 shared_fc_channels=None,
                 cls_channels=None,
                 reg_channels=None,
                 dropout_ratio=0.1,
                 roi_feat_size=14,
                 with_corner_loss=True,
                 bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
                 conv_cfg=dict(type='Conv1d'),
                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
                 loss_bbox=dict(type='SmoothL1Loss',
                                beta=1.0 / 9.0,
                                loss_weight=2.0),
                 loss_cls=dict(type='CrossEntropyLoss',
                               use_sigmoid=True,
                               reduction='none',
                               loss_weight=1.0),
                 init_cfg=None):
        super(PartA2BboxHead, self).__init__(init_cfg=init_cfg)
        self.num_classes = num_classes
        self.with_corner_loss = with_corner_loss
        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.loss_bbox = build_loss(loss_bbox)
        self.loss_cls = build_loss(loss_cls)
        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)

        assert down_conv_channels[-1] == shared_fc_channels[0]

        # init layers
        part_channel_last = part_in_channels
        part_conv = []
        for i, channel in enumerate(part_conv_channels):
            part_conv.append(
                make_sparse_convmodule(part_channel_last,
                                       channel,
                                       3,
                                       padding=1,
                                       norm_cfg=norm_cfg,
                                       indice_key=f'rcnn_part{i}',
                                       conv_type='SubMConv3d'))
            part_channel_last = channel
        self.part_conv = spconv.SparseSequential(*part_conv)

        seg_channel_last = seg_in_channels
        seg_conv = []
        for i, channel in enumerate(seg_conv_channels):
            seg_conv.append(
                make_sparse_convmodule(seg_channel_last,
                                       channel,
                                       3,
                                       padding=1,
                                       norm_cfg=norm_cfg,
                                       indice_key=f'rcnn_seg{i}',
                                       conv_type='SubMConv3d'))
            seg_channel_last = channel
        self.seg_conv = spconv.SparseSequential(*seg_conv)

        self.conv_down = spconv.SparseSequential()

        merge_conv_channel_last = part_channel_last + seg_channel_last
        merge_conv = []
        for i, channel in enumerate(merge_conv_channels):
            merge_conv.append(
                make_sparse_convmodule(merge_conv_channel_last,
                                       channel,
                                       3,
                                       padding=1,
                                       norm_cfg=norm_cfg,
                                       indice_key='rcnn_down0'))
            merge_conv_channel_last = channel

        down_conv_channel_last = merge_conv_channel_last
        conv_down = []
        for i, channel in enumerate(down_conv_channels):
            conv_down.append(
                make_sparse_convmodule(down_conv_channel_last,
                                       channel,
                                       3,
                                       padding=1,
                                       norm_cfg=norm_cfg,
                                       indice_key='rcnn_down1'))
            down_conv_channel_last = channel

        self.conv_down.add_module('merge_conv',
                                  spconv.SparseSequential(*merge_conv))
        self.conv_down.add_module(
            'max_pool3d', spconv.SparseMaxPool3d(kernel_size=2, stride=2))
        self.conv_down.add_module('down_conv',
                                  spconv.SparseSequential(*conv_down))

        shared_fc_list = []
        pool_size = roi_feat_size // 2
        pre_channel = shared_fc_channels[0] * pool_size**3
        for k in range(1, len(shared_fc_channels)):
            shared_fc_list.append(
                ConvModule(pre_channel,
                           shared_fc_channels[k],
                           1,
                           padding=0,
                           conv_cfg=conv_cfg,
                           norm_cfg=norm_cfg,
                           inplace=True))
            pre_channel = shared_fc_channels[k]

            if k != len(shared_fc_channels) - 1 and dropout_ratio > 0:
                shared_fc_list.append(nn.Dropout(dropout_ratio))

        self.shared_fc = nn.Sequential(*shared_fc_list)

        # Classification layer
        channel_in = shared_fc_channels[-1]
        cls_channel = 1
        cls_layers = []
        pre_channel = channel_in
        for k in range(0, len(cls_channels)):
            cls_layers.append(
                ConvModule(pre_channel,
                           cls_channels[k],
                           1,
                           padding=0,
                           conv_cfg=conv_cfg,
                           norm_cfg=norm_cfg,
                           inplace=True))
            pre_channel = cls_channels[k]
        cls_layers.append(
            ConvModule(pre_channel,
                       cls_channel,
                       1,
                       padding=0,
                       conv_cfg=conv_cfg,
                       act_cfg=None))
        if dropout_ratio >= 0:
            cls_layers.insert(1, nn.Dropout(dropout_ratio))

        self.conv_cls = nn.Sequential(*cls_layers)

        # Regression layer
        reg_layers = []
        pre_channel = channel_in
        for k in range(0, len(reg_channels)):
            reg_layers.append(
                ConvModule(pre_channel,
                           reg_channels[k],
                           1,
                           padding=0,
                           conv_cfg=conv_cfg,
                           norm_cfg=norm_cfg,
                           inplace=True))
            pre_channel = reg_channels[k]
        reg_layers.append(
            ConvModule(pre_channel,
                       self.bbox_coder.code_size,
                       1,
                       padding=0,
                       conv_cfg=conv_cfg,
                       act_cfg=None))
        if dropout_ratio >= 0:
            reg_layers.insert(1, nn.Dropout(dropout_ratio))

        self.conv_reg = nn.Sequential(*reg_layers)

        if init_cfg is None:
            self.init_cfg = dict(type='Xavier',
                                 layer=['Conv2d', 'Conv1d'],
                                 distribution='uniform')
Esempio n. 12
0
 def __init__(self,
              out_channels=128,
              align_corners=False,
              conv_cfg=None,
              norm_cfg=dict(type='BN'),
              act_cfg=dict(type='ReLU'),
              init_cfg=None):
     super(BGALayer, self).__init__(init_cfg=init_cfg)
     self.out_channels = out_channels
     self.align_corners = align_corners
     self.detail_dwconv = nn.Sequential(
         DepthwiseSeparableConvModule(
             in_channels=self.out_channels,
             out_channels=self.out_channels,
             kernel_size=3,
             stride=1,
             padding=1,
             dw_norm_cfg=norm_cfg,
             dw_act_cfg=None,
             pw_norm_cfg=None,
             pw_act_cfg=None,
         ))
     self.detail_down = nn.Sequential(
         ConvModule(
             in_channels=self.out_channels,
             out_channels=self.out_channels,
             kernel_size=3,
             stride=2,
             padding=1,
             bias=False,
             conv_cfg=conv_cfg,
             norm_cfg=norm_cfg,
             act_cfg=None),
         nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False))
     self.semantic_conv = nn.Sequential(
         ConvModule(
             in_channels=self.out_channels,
             out_channels=self.out_channels,
             kernel_size=3,
             stride=1,
             padding=1,
             bias=False,
             conv_cfg=conv_cfg,
             norm_cfg=norm_cfg,
             act_cfg=None))
     self.semantic_dwconv = nn.Sequential(
         DepthwiseSeparableConvModule(
             in_channels=self.out_channels,
             out_channels=self.out_channels,
             kernel_size=3,
             stride=1,
             padding=1,
             dw_norm_cfg=norm_cfg,
             dw_act_cfg=None,
             pw_norm_cfg=None,
             pw_act_cfg=None,
         ))
     self.conv = ConvModule(
         in_channels=self.out_channels,
         out_channels=self.out_channels,
         kernel_size=3,
         stride=1,
         padding=1,
         inplace=True,
         conv_cfg=conv_cfg,
         norm_cfg=norm_cfg,
         act_cfg=act_cfg,
     )
Esempio n. 13
0
 def __init__(self,
              detail_channels=(64, 64, 128),
              in_channels=3,
              conv_cfg=None,
              norm_cfg=dict(type='BN'),
              act_cfg=dict(type='ReLU'),
              init_cfg=None):
     super(DetailBranch, self).__init__(init_cfg=init_cfg)
     detail_branch = []
     for i in range(len(detail_channels)):
         if i == 0:
             detail_branch.append(
                 nn.Sequential(
                     ConvModule(
                         in_channels=in_channels,
                         out_channels=detail_channels[i],
                         kernel_size=3,
                         stride=2,
                         padding=1,
                         conv_cfg=conv_cfg,
                         norm_cfg=norm_cfg,
                         act_cfg=act_cfg),
                     ConvModule(
                         in_channels=detail_channels[i],
                         out_channels=detail_channels[i],
                         kernel_size=3,
                         stride=1,
                         padding=1,
                         conv_cfg=conv_cfg,
                         norm_cfg=norm_cfg,
                         act_cfg=act_cfg)))
         else:
             detail_branch.append(
                 nn.Sequential(
                     ConvModule(
                         in_channels=detail_channels[i - 1],
                         out_channels=detail_channels[i],
                         kernel_size=3,
                         stride=2,
                         padding=1,
                         conv_cfg=conv_cfg,
                         norm_cfg=norm_cfg,
                         act_cfg=act_cfg),
                     ConvModule(
                         in_channels=detail_channels[i],
                         out_channels=detail_channels[i],
                         kernel_size=3,
                         stride=1,
                         padding=1,
                         conv_cfg=conv_cfg,
                         norm_cfg=norm_cfg,
                         act_cfg=act_cfg),
                     ConvModule(
                         in_channels=detail_channels[i],
                         out_channels=detail_channels[i],
                         kernel_size=3,
                         stride=1,
                         padding=1,
                         conv_cfg=conv_cfg,
                         norm_cfg=norm_cfg,
                         act_cfg=act_cfg)))
     self.detail_branch = nn.ModuleList(detail_branch)
Esempio n. 14
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 exp_ratio=6,
                 stride=1,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 act_cfg=dict(type='ReLU'),
                 init_cfg=None):
        super(GELayer, self).__init__(init_cfg=init_cfg)
        mid_channel = in_channels * exp_ratio
        self.conv1 = ConvModule(
            in_channels=in_channels,
            out_channels=in_channels,
            kernel_size=3,
            stride=1,
            padding=1,
            conv_cfg=conv_cfg,
            norm_cfg=norm_cfg,
            act_cfg=act_cfg)
        if stride == 1:
            self.dwconv = nn.Sequential(
                # ReLU in ConvModule not shown in paper
                ConvModule(
                    in_channels=in_channels,
                    out_channels=mid_channel,
                    kernel_size=3,
                    stride=stride,
                    padding=1,
                    groups=in_channels,
                    conv_cfg=conv_cfg,
                    norm_cfg=norm_cfg,
                    act_cfg=act_cfg))
            self.shortcut = None
        else:
            self.dwconv = nn.Sequential(
                ConvModule(
                    in_channels=in_channels,
                    out_channels=mid_channel,
                    kernel_size=3,
                    stride=stride,
                    padding=1,
                    groups=in_channels,
                    bias=False,
                    conv_cfg=conv_cfg,
                    norm_cfg=norm_cfg,
                    act_cfg=None),
                # ReLU in ConvModule not shown in paper
                ConvModule(
                    in_channels=mid_channel,
                    out_channels=mid_channel,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    groups=mid_channel,
                    conv_cfg=conv_cfg,
                    norm_cfg=norm_cfg,
                    act_cfg=act_cfg),
            )
            self.shortcut = nn.Sequential(
                DepthwiseSeparableConvModule(
                    in_channels=in_channels,
                    out_channels=out_channels,
                    kernel_size=3,
                    stride=stride,
                    padding=1,
                    dw_norm_cfg=norm_cfg,
                    dw_act_cfg=None,
                    pw_norm_cfg=norm_cfg,
                    pw_act_cfg=None,
                ))

        self.conv2 = nn.Sequential(
            ConvModule(
                in_channels=mid_channel,
                out_channels=out_channels,
                kernel_size=1,
                stride=1,
                padding=0,
                bias=False,
                conv_cfg=conv_cfg,
                norm_cfg=norm_cfg,
                act_cfg=None,
            ))

        self.act = build_activation_layer(act_cfg)
Esempio n. 15
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 num_outs,
                 start_level=0,
                 end_level=-1,
                 norm_cfg=None,
                 act_cfg=None,
                 order=('conv', 'norm', 'act'),
                 upsample_cfg=dict(type='carafe',
                                   up_kernel=5,
                                   up_group=1,
                                   encoder_kernel=3,
                                   encoder_dilation=1)):
        super(FPN_CARAFE_3_3_pow_norm, self).__init__()
        assert isinstance(in_channels, list)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.num_ins = len(in_channels)
        self.num_outs = num_outs
        self.norm_cfg = norm_cfg
        self.act_cfg = act_cfg
        self.with_bias = norm_cfg is None
        self.upsample_cfg = upsample_cfg.copy()
        self.upsample = self.upsample_cfg.get('type')
        self.relu = nn.ReLU(inplace=False)

        self.order = order
        assert order in [('conv', 'norm', 'act'), ('act', 'conv', 'norm')]

        assert self.upsample in [
            'nearest', 'bilinear', 'deconv', 'pixel_shuffle', 'carafe', None
        ]
        if self.upsample in ['deconv', 'pixel_shuffle']:
            assert hasattr(
                self.upsample_cfg,
                'upsample_kernel') and self.upsample_cfg.upsample_kernel > 0
            self.upsample_kernel = self.upsample_cfg.pop('upsample_kernel')

        if end_level == -1:
            self.backbone_end_level = self.num_ins
            assert num_outs >= self.num_ins - start_level
        else:
            # if end_level < inputs, no extra level is allowed
            self.backbone_end_level = end_level
            assert end_level <= len(in_channels)
            assert num_outs == end_level - start_level
        self.start_level = start_level
        self.end_level = end_level

        self.lateral_convs = nn.ModuleList()
        self.fpn_convs = nn.ModuleList()
        self.upsample_modules = nn.ModuleList()

        for i in range(self.start_level, self.backbone_end_level):
            l_conv = ConvModule(in_channels[i],
                                out_channels,
                                1,
                                norm_cfg=norm_cfg,
                                bias=self.with_bias,
                                act_cfg=act_cfg,
                                inplace=False,
                                order=self.order)
            fpn_conv = ConvModule(out_channels,
                                  out_channels,
                                  3,
                                  padding=1,
                                  norm_cfg=self.norm_cfg,
                                  bias=self.with_bias,
                                  act_cfg=act_cfg,
                                  inplace=False,
                                  order=self.order)
            if i != self.backbone_end_level - 1:
                upsample_cfg_ = self.upsample_cfg.copy()
                if self.upsample == 'deconv':
                    upsample_cfg_.update(
                        in_channels=out_channels,
                        out_channels=out_channels,
                        kernel_size=self.upsample_kernel,
                        stride=2,
                        padding=(self.upsample_kernel - 1) // 2,
                        output_padding=(self.upsample_kernel - 1) // 2)
                elif self.upsample == 'pixel_shuffle':
                    upsample_cfg_.update(in_channels=out_channels,
                                         out_channels=out_channels,
                                         scale_factor=2,
                                         upsample_kernel=self.upsample_kernel)
                elif self.upsample == 'carafe':
                    upsample_cfg_.update(channels=out_channels, scale_factor=2)
                else:
                    # suppress warnings
                    align_corners = (None
                                     if self.upsample == 'nearest' else False)
                    upsample_cfg_.update(scale_factor=2,
                                         mode=self.upsample,
                                         align_corners=align_corners)
                upsample_module = build_upsample_layer(upsample_cfg_)
                self.upsample_modules.append(upsample_module)
            self.lateral_convs.append(l_conv)
            self.fpn_convs.append(fpn_conv)

        # add extra conv layers (e.g., RetinaNet)
        extra_out_levels = (num_outs - self.backbone_end_level +
                            self.start_level)
        if extra_out_levels >= 1:
            for i in range(extra_out_levels):
                in_channels = (self.in_channels[self.backbone_end_level -
                                                1] if i == 0 else out_channels)
                extra_l_conv = ConvModule(in_channels,
                                          out_channels,
                                          3,
                                          stride=2,
                                          padding=1,
                                          norm_cfg=norm_cfg,
                                          bias=self.with_bias,
                                          act_cfg=act_cfg,
                                          inplace=False,
                                          order=self.order)
                if self.upsample == 'deconv':
                    upsampler_cfg_ = dict(
                        in_channels=out_channels,
                        out_channels=out_channels,
                        kernel_size=self.upsample_kernel,
                        stride=2,
                        padding=(self.upsample_kernel - 1) // 2,
                        output_padding=(self.upsample_kernel - 1) // 2)
                elif self.upsample == 'pixel_shuffle':
                    upsampler_cfg_ = dict(in_channels=out_channels,
                                          out_channels=out_channels,
                                          scale_factor=2,
                                          upsample_kernel=self.upsample_kernel)
                elif self.upsample == 'carafe':
                    upsampler_cfg_ = dict(channels=out_channels,
                                          scale_factor=2,
                                          **self.upsample_cfg)
                else:
                    # suppress warnings
                    align_corners = (None
                                     if self.upsample == 'nearest' else False)
                    upsampler_cfg_ = dict(scale_factor=2,
                                          mode=self.upsample,
                                          align_corners=align_corners)
                upsampler_cfg_['type'] = self.upsample
                upsample_module = build_upsample_layer(upsampler_cfg_)
                extra_fpn_conv = ConvModule(out_channels,
                                            out_channels,
                                            3,
                                            padding=1,
                                            norm_cfg=self.norm_cfg,
                                            bias=self.with_bias,
                                            act_cfg=act_cfg,
                                            inplace=False,
                                            order=self.order)
                self.upsample_modules.append(upsample_module)
                self.fpn_convs.append(extra_fpn_conv)
                self.lateral_convs.append(extra_l_conv)
Esempio n. 16
0
    def __init__(self,
                 ind,
                 num_units,
                 in_channels,
                 unit_channels=256,
                 gen_skip=False,
                 gen_cross_conv=False,
                 norm_cfg=dict(type='BN'),
                 out_channels=64):
        # Protect mutable default arguments
        norm_cfg = cp.deepcopy(norm_cfg)
        super().__init__()
        self.num_units = num_units
        self.norm_cfg = norm_cfg
        self.in_skip = ConvModule(
            in_channels,
            unit_channels,
            kernel_size=1,
            stride=1,
            padding=0,
            norm_cfg=self.norm_cfg,
            act_cfg=None,
            inplace=True)
        self.relu = nn.ReLU(inplace=True)

        self.ind = ind
        if self.ind > 0:
            self.up_conv = ConvModule(
                unit_channels,
                unit_channels,
                kernel_size=1,
                stride=1,
                padding=0,
                norm_cfg=self.norm_cfg,
                act_cfg=None,
                inplace=True)

        self.gen_skip = gen_skip
        if self.gen_skip:
            self.out_skip1 = ConvModule(
                in_channels,
                in_channels,
                kernel_size=1,
                stride=1,
                padding=0,
                norm_cfg=self.norm_cfg,
                inplace=True)

            self.out_skip2 = ConvModule(
                unit_channels,
                in_channels,
                kernel_size=1,
                stride=1,
                padding=0,
                norm_cfg=self.norm_cfg,
                inplace=True)

        self.gen_cross_conv = gen_cross_conv
        if self.ind == num_units - 1 and self.gen_cross_conv:
            self.cross_conv = ConvModule(
                unit_channels,
                out_channels,
                kernel_size=1,
                stride=1,
                padding=0,
                norm_cfg=self.norm_cfg,
                inplace=True)
Esempio n. 17
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 num_outs,
                 stack_times,
                 start_level=0,
                 end_level=-1,
                 add_extra_convs=False,
                 norm_cfg=None):
        super(NASFPN, self).__init__()
        assert isinstance(in_channels, list)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.num_ins = len(in_channels)  # num of input feature levels
        self.num_outs = num_outs  # num of output feature levels
        self.stack_times = stack_times
        self.norm_cfg = norm_cfg

        if end_level == -1:
            self.backbone_end_level = self.num_ins
            assert num_outs >= self.num_ins - start_level
        else:
            # if end_level < inputs, no extra level is allowed
            self.backbone_end_level = end_level
            assert end_level <= len(in_channels)
            assert num_outs == end_level - start_level
        self.start_level = start_level
        self.end_level = end_level
        self.add_extra_convs = add_extra_convs

        # add lateral connections
        self.lateral_convs = nn.ModuleList()
        for i in range(self.start_level, self.backbone_end_level):
            l_conv = ConvModule(in_channels[i],
                                out_channels,
                                1,
                                norm_cfg=norm_cfg,
                                act_cfg=None)
            self.lateral_convs.append(l_conv)

        # add extra downsample layers (stride-2 pooling or conv)
        extra_levels = num_outs - self.backbone_end_level + self.start_level
        self.extra_downsamples = nn.ModuleList()
        for i in range(extra_levels):
            extra_conv = ConvModule(out_channels,
                                    out_channels,
                                    1,
                                    norm_cfg=norm_cfg,
                                    act_cfg=None)
            self.extra_downsamples.append(
                nn.Sequential(extra_conv, nn.MaxPool2d(2, 2)))

        # add NAS FPN connections
        self.fpn_stages = nn.ModuleList()
        for _ in range(self.stack_times):
            stage = nn.ModuleDict()
            # gp(p6, p4) -> p4_1
            stage['gp_64_4'] = GlobalPoolingCell(in_channels=out_channels,
                                                 out_channels=out_channels,
                                                 out_norm_cfg=norm_cfg)
            # sum(p4_1, p4) -> p4_2
            stage['sum_44_4'] = SumCell(in_channels=out_channels,
                                        out_channels=out_channels,
                                        out_norm_cfg=norm_cfg)
            # sum(p4_2, p3) -> p3_out
            stage['sum_43_3'] = SumCell(in_channels=out_channels,
                                        out_channels=out_channels,
                                        out_norm_cfg=norm_cfg)
            # sum(p3_out, p4_2) -> p4_out
            stage['sum_34_4'] = SumCell(in_channels=out_channels,
                                        out_channels=out_channels,
                                        out_norm_cfg=norm_cfg)
            # sum(p5, gp(p4_out, p3_out)) -> p5_out
            stage['gp_43_5'] = GlobalPoolingCell(with_out_conv=False)
            stage['sum_55_5'] = SumCell(in_channels=out_channels,
                                        out_channels=out_channels,
                                        out_norm_cfg=norm_cfg)
            # sum(p7, gp(p5_out, p4_2)) -> p7_out
            stage['gp_54_7'] = GlobalPoolingCell(with_out_conv=False)
            stage['sum_77_7'] = SumCell(in_channels=out_channels,
                                        out_channels=out_channels,
                                        out_norm_cfg=norm_cfg)
            # gp(p7_out, p5_out) -> p6_out
            stage['gp_75_6'] = GlobalPoolingCell(in_channels=out_channels,
                                                 out_channels=out_channels,
                                                 out_norm_cfg=norm_cfg)
            self.fpn_stages.append(stage)
Esempio n. 18
0
    def __init__(self,
                 st_feat_channels,
                 lt_feat_channels,
                 latent_channels,
                 num_st_feat,
                 num_lt_feat,
                 use_scale=True,
                 pre_activate=True,
                 pre_activate_with_ln=True,
                 conv_cfg=None,
                 norm_cfg=None,
                 dropout_ratio=0.2,
                 zero_init_out_conv=False):
        super().__init__()
        if conv_cfg is None:
            conv_cfg = dict(type='Conv3d')
        self.st_feat_channels = st_feat_channels
        self.lt_feat_channels = lt_feat_channels
        self.latent_channels = latent_channels
        self.num_st_feat = num_st_feat
        self.num_lt_feat = num_lt_feat
        self.use_scale = use_scale
        self.pre_activate = pre_activate
        self.pre_activate_with_ln = pre_activate_with_ln
        self.dropout_ratio = dropout_ratio
        self.zero_init_out_conv = zero_init_out_conv

        self.st_feat_conv = ConvModule(self.st_feat_channels,
                                       self.latent_channels,
                                       kernel_size=1,
                                       conv_cfg=conv_cfg,
                                       norm_cfg=norm_cfg,
                                       act_cfg=None)

        self.lt_feat_conv = ConvModule(self.lt_feat_channels,
                                       self.latent_channels,
                                       kernel_size=1,
                                       conv_cfg=conv_cfg,
                                       norm_cfg=norm_cfg,
                                       act_cfg=None)

        self.global_conv = ConvModule(self.lt_feat_channels,
                                      self.latent_channels,
                                      kernel_size=1,
                                      conv_cfg=conv_cfg,
                                      norm_cfg=norm_cfg,
                                      act_cfg=None)

        if pre_activate:
            self.ln = nn.LayerNorm([latent_channels, num_st_feat, 1, 1])
        else:
            self.ln = nn.LayerNorm([st_feat_channels, num_st_feat, 1, 1])

        self.relu = nn.ReLU()

        self.out_conv = ConvModule(self.latent_channels,
                                   self.st_feat_channels,
                                   kernel_size=1,
                                   conv_cfg=conv_cfg,
                                   norm_cfg=norm_cfg,
                                   act_cfg=None)

        if self.dropout_ratio > 0:
            self.dropout = nn.Dropout(self.dropout_ratio)
Esempio n. 19
0
class ResNet(nn.Module):
    """ResNet backbone.

    Args:
        depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.
        pretrained (str | None): Name of pretrained model. Default: None.
        in_channels (int): Channel num of input features. Default: 3.
        num_stages (int): Resnet stages. Default: 4.
        strides (Sequence[int]): Strides of the first block of each stage.
        out_indices (Sequence[int]): Indices of output feature. Default: (3, ).
        dilations (Sequence[int]): Dilation of each stage.
        style (str): ``pytorch`` or ``caffe``. If set to "pytorch", the
            stride-two layer is the 3x3 conv layer, otherwise the stride-two
            layer is the first 1x1 conv layer. Default: ``pytorch``.
        frozen_stages (int): Stages to be frozen (all param fixed). -1 means
            not freezing any parameters. Default: -1.
        conv_cfg (dict): Config for norm layers. Default: dict(type='Conv').
        norm_cfg (dict):
            Config for norm layers. required keys are `type` and
            `requires_grad`. Default: dict(type='BN2d', requires_grad=True).
        act_cfg (dict): Config for activate layers.
            Default: dict(type='ReLU', inplace=True).
        norm_eval (bool): Whether to set BN layers to eval mode, namely, freeze
            running stats (mean and var). Default: False.
        partial_bn (bool): Whether to use partial bn. Default: False.
        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
            memory while slowing down the training speed. Default: False.
    """

    arch_settings = {
        18: (BasicBlock, (2, 2, 2, 2)),
        34: (BasicBlock, (3, 4, 6, 3)),
        50: (Bottleneck, (3, 4, 6, 3)),
        101: (Bottleneck, (3, 4, 23, 3)),
        152: (Bottleneck, (3, 8, 36, 3))
    }

    def __init__(self,
                 depth,
                 pretrained=None,
                 torchvision_pretrain=True,
                 in_channels=3,
                 num_stages=4,
                 out_indices=(3, ),
                 strides=(1, 2, 2, 2),
                 dilations=(1, 1, 1, 1),
                 style='pytorch',
                 frozen_stages=-1,
                 conv_cfg=dict(type='Conv'),
                 norm_cfg=dict(type='BN2d', requires_grad=True),
                 act_cfg=dict(type='ReLU', inplace=True),
                 norm_eval=False,
                 partial_bn=False,
                 with_cp=False):
        super().__init__()
        if depth not in self.arch_settings:
            raise KeyError(f'invalid depth {depth} for resnet')
        self.depth = depth
        self.in_channels = in_channels
        self.pretrained = pretrained
        self.torchvision_pretrain = torchvision_pretrain
        self.num_stages = num_stages
        assert 1 <= num_stages <= 4
        self.out_indices = out_indices
        assert max(out_indices) < num_stages
        self.strides = strides
        self.dilations = dilations
        assert len(strides) == len(dilations) == num_stages
        self.style = style
        self.frozen_stages = frozen_stages
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.act_cfg = act_cfg
        self.norm_eval = norm_eval
        self.partial_bn = partial_bn
        self.with_cp = with_cp

        self.block, stage_blocks = self.arch_settings[depth]
        self.stage_blocks = stage_blocks[:num_stages]
        self.inplanes = 64

        self._make_stem_layer()

        self.res_layers = []
        for i, num_blocks in enumerate(self.stage_blocks):
            stride = strides[i]
            dilation = dilations[i]
            planes = 64 * 2**i
            res_layer = make_res_layer(
                self.block,
                self.inplanes,
                planes,
                num_blocks,
                stride=stride,
                dilation=dilation,
                style=self.style,
                conv_cfg=conv_cfg,
                norm_cfg=norm_cfg,
                act_cfg=act_cfg,
                with_cp=with_cp)
            self.inplanes = planes * self.block.expansion
            layer_name = f'layer{i + 1}'
            self.add_module(layer_name, res_layer)
            self.res_layers.append(layer_name)

        self.feat_dim = self.block.expansion * 64 * 2**(
            len(self.stage_blocks) - 1)

    def _make_stem_layer(self):
        """Construct the stem layers consists of a conv+norm+act module and a
        pooling layer."""
        self.conv1 = ConvModule(
            self.in_channels,
            64,
            kernel_size=7,
            stride=2,
            padding=3,
            bias=False,
            conv_cfg=self.conv_cfg,
            norm_cfg=self.norm_cfg,
            act_cfg=self.act_cfg)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

    @staticmethod
    def _load_conv_params(conv, state_dict_tv, module_name_tv,
                          loaded_param_names):
        """Load the conv parameters of resnet from torchvision.

        Args:
            conv (nn.Module): The destination conv module.
            state_dict_tv (OrderedDict): The state dict of pretrained
                torchvision model.
            module_name_tv (str): The name of corresponding conv module in the
                torchvision model.
            loaded_param_names (list[str]): List of parameters that have been
                loaded.
        """

        weight_tv_name = module_name_tv + '.weight'
        if conv.weight.data.shape == state_dict_tv[weight_tv_name].shape:
            conv.weight.data.copy_(state_dict_tv[weight_tv_name])
            loaded_param_names.append(weight_tv_name)

        if getattr(conv, 'bias') is not None:
            bias_tv_name = module_name_tv + '.bias'
            if conv.bias.data.shape == state_dict_tv[bias_tv_name].shape:
                conv.bias.data.copy_(state_dict_tv[bias_tv_name])
                loaded_param_names.append(bias_tv_name)

    @staticmethod
    def _load_bn_params(bn, state_dict_tv, module_name_tv, loaded_param_names):
        """Load the bn parameters of resnet from torchvision.

        Args:
            bn (nn.Module): The destination bn module.
            state_dict_tv (OrderedDict): The state dict of pretrained
                torchvision model.
            module_name_tv (str): The name of corresponding bn module in the
                torchvision model.
            loaded_param_names (list[str]): List of parameters that have been
                loaded.
        """

        for param_name, param in bn.named_parameters():
            param_tv_name = f'{module_name_tv}.{param_name}'
            param_tv = state_dict_tv[param_tv_name]
            if param.data.shape == param_tv.shape:
                param.data.copy_(param_tv)
                loaded_param_names.append(param_tv_name)

        for param_name, param in bn.named_buffers():
            param_tv_name = f'{module_name_tv}.{param_name}'
            # some buffers like num_batches_tracked may not exist
            if param_tv_name in state_dict_tv:
                param_tv = state_dict_tv[param_tv_name]
                if param.data.shape == param_tv.shape:
                    param.data.copy_(param_tv)
                    loaded_param_names.append(param_tv_name)

    def _load_torchvision_checkpoint(self, logger=None):
        """Initiate the parameters from torchvision pretrained checkpoint."""
        state_dict_torchvision = _load_checkpoint(self.pretrained)
        if 'state_dict' in state_dict_torchvision:
            state_dict_torchvision = state_dict_torchvision['state_dict']

        loaded_param_names = []
        for name, module in self.named_modules():
            if isinstance(module, ConvModule):
                # we use a ConvModule to wrap conv+bn+relu layers, thus the
                # name mapping is needed
                if 'downsample' in name:
                    # layer{X}.{Y}.downsample.conv->layer{X}.{Y}.downsample.0
                    original_conv_name = name + '.0'
                    # layer{X}.{Y}.downsample.bn->layer{X}.{Y}.downsample.1
                    original_bn_name = name + '.1'
                else:
                    # layer{X}.{Y}.conv{n}.conv->layer{X}.{Y}.conv{n}
                    original_conv_name = name
                    # layer{X}.{Y}.conv{n}.bn->layer{X}.{Y}.bn{n}
                    original_bn_name = name.replace('conv', 'bn')
                print(name)
                self._load_conv_params(module.conv, state_dict_torchvision,
                                       original_conv_name, loaded_param_names)
                self._load_bn_params(module.bn, state_dict_torchvision,
                                     original_bn_name, loaded_param_names)
        import pdb
        pdb.set_trace()
        # check if any parameters in the 2d checkpoint are not loaded
        remaining_names = set(
            state_dict_torchvision.keys()) - set(loaded_param_names)
        if remaining_names:
            logger.info(
                f'These parameters in pretrained checkpoint are not loaded'
                f': {remaining_names}')

    def init_weights(self):
        """Initiate the parameters either from existing checkpoint or from
        scratch."""
        if isinstance(self.pretrained, str):
            logger = get_root_logger()
            if self.torchvision_pretrain:
                # torchvision's
                self._load_torchvision_checkpoint(logger)
            else:
                # ours
                load_checkpoint(
                    self, self.pretrained, strict=False, logger=logger)
        elif self.pretrained is None:
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    kaiming_init(m)
                elif isinstance(m, nn.BatchNorm2d):
                    constant_init(m, 1)
        else:
            raise TypeError('pretrained must be a str or None')

    def forward(self, x):
        """Defines the computation performed at every call.

        Args:
            x (torch.Tensor): The input data.

        Returns:
            torch.Tensor: The feature of the input samples extracted
            by the backbone.
        """
        x = self.conv1(x)
        x = self.maxpool(x)
        outs = []
        for i, layer_name in enumerate(self.res_layers):
            res_layer = getattr(self, layer_name)
            x = res_layer(x)
            if i in self.out_indices:
                outs.append(x)
        if len(outs) == 1:
            return outs[0]

        return tuple(outs)

    def _freeze_stages(self):
        """Prevent all the parameters from being optimized before
        ``self.frozen_stages``."""
        if self.frozen_stages >= 0:
            self.conv1.bn.eval()
            for m in self.conv1.modules():
                for param in m.parameters():
                    param.requires_grad = False

        for i in range(1, self.frozen_stages + 1):
            m = getattr(self, f'layer{i}')
            m.eval()
            for param in m.parameters():
                param.requires_grad = False

    def _partial_bn(self):
        logger = get_root_logger()
        logger.info('Freezing BatchNorm2D except the first one.')
        count_bn = 0
        for m in self.modules():
            if isinstance(m, nn.BatchNorm2d):
                count_bn += 1
                if count_bn >= 2:
                    m.eval()
                    # shutdown update in frozen mode
                    m.weight.requires_grad = False
                    m.bias.requires_grad = False

    def train(self, mode=True):
        """Set the optimization status when training."""
        super().train(mode)
        self._freeze_stages()
        if mode and self.norm_eval:
            for m in self.modules():
                if isinstance(m, _BatchNorm):
                    m.eval()
        if mode and self.partial_bn:
            self._partial_bn()
Esempio n. 20
0
class ShuffleNetV1(BaseBackbone):
    """ShuffleNetV1 backbone.

    Args:
        groups (int): The number of groups to be used in grouped 1x1
            convolutions in each ShuffleUnit. Default: 3.
        widen_factor (float): Width multiplier - adjusts the number
            of channels in each layer by this amount. Default: 1.0.
        out_indices (Sequence[int]): Output from which stages.
            Default: (2, )
        frozen_stages (int): Stages to be frozen (all param fixed).
            Default: -1, which means not freezing any parameters.
        conv_cfg (dict, optional): Config dict for convolution layer.
            Default: None, which means using conv2d.
        norm_cfg (dict): Config dict for normalization layer.
            Default: dict(type='BN').
        act_cfg (dict): Config dict for activation layer.
            Default: dict(type='ReLU').
        norm_eval (bool): Whether to set norm layers to eval mode, namely,
            freeze running stats (mean and var). Note: Effect on Batch Norm
            and its variants only. Default: False.
        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
            memory while slowing down the training speed. Default: False.
    """
    def __init__(self,
                 groups=3,
                 widen_factor=1.0,
                 out_indices=(2, ),
                 frozen_stages=-1,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 act_cfg=dict(type='ReLU'),
                 norm_eval=False,
                 with_cp=False):
        super(ShuffleNetV1, self).__init__()
        self.stage_blocks = [4, 8, 4]
        self.groups = groups

        for index in out_indices:
            if index not in range(0, 3):
                raise ValueError('the item in out_indices must in '
                                 f'range(0, 3). But received {index}')

        if frozen_stages not in range(-1, 3):
            raise ValueError('frozen_stages must be in range(-1, 3). '
                             f'But received {frozen_stages}')
        self.out_indices = out_indices
        self.frozen_stages = frozen_stages
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.act_cfg = act_cfg
        self.norm_eval = norm_eval
        self.with_cp = with_cp

        if groups == 1:
            channels = (144, 288, 576)
        elif groups == 2:
            channels = (200, 400, 800)
        elif groups == 3:
            channels = (240, 480, 960)
        elif groups == 4:
            channels = (272, 544, 1088)
        elif groups == 8:
            channels = (384, 768, 1536)
        else:
            raise ValueError(f'{groups} groups is not supported for 1x1 '
                             'Grouped Convolutions')

        channels = [make_divisible(ch * widen_factor, 8) for ch in channels]

        self.in_channels = int(24 * widen_factor)

        self.conv1 = ConvModule(in_channels=3,
                                out_channels=self.in_channels,
                                kernel_size=3,
                                stride=2,
                                padding=1,
                                conv_cfg=conv_cfg,
                                norm_cfg=norm_cfg,
                                act_cfg=act_cfg)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layers = nn.ModuleList()
        for i, num_blocks in enumerate(self.stage_blocks):
            first_block = True if i == 0 else False
            layer = self.make_layer(channels[i], num_blocks, first_block)
            self.layers.append(layer)

    def _freeze_stages(self):
        if self.frozen_stages >= 0:
            for param in self.conv1.parameters():
                param.requires_grad = False
        for i in range(self.frozen_stages):
            layer = self.layers[i]
            layer.eval()
            for param in layer.parameters():
                param.requires_grad = False

    def init_weights(self, pretrained=None):
        if isinstance(pretrained, str):
            logger = logging.getLogger()
            load_checkpoint(self, pretrained, strict=False, logger=logger)
        elif pretrained is None:
            for name, m in self.named_modules():
                if isinstance(m, nn.Conv2d):
                    if 'conv1' in name:
                        normal_init(m, mean=0, std=0.01)
                    else:
                        normal_init(m, mean=0, std=1.0 / m.weight.shape[1])
                elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
                    constant_init(m.weight, val=1, bias=0.0001)
                    if isinstance(m, _BatchNorm):
                        if m.running_mean is not None:
                            nn.init.constant_(m.running_mean, 0)
        else:
            raise TypeError('pretrained must be a str or None. But received '
                            f'{type(pretrained)}')

    def make_layer(self, out_channels, num_blocks, first_block=False):
        """ Stack ShuffleUnit blocks to make a layer.

        Args:
            out_channels (int): out_channels of the block.
            num_blocks (int): Number of blocks.
            first_block (bool): Whether is the first ShuffleUnit of a
                sequential ShuffleUnits. Default: False, which means not using
                the grouped 1x1 convolution.
        """
        layers = []
        for i in range(num_blocks):
            first_block = first_block if i == 0 else False
            combine_mode = 'concat' if i == 0 else 'add'
            layers.append(
                ShuffleUnit(self.in_channels,
                            out_channels,
                            groups=self.groups,
                            first_block=first_block,
                            combine=combine_mode,
                            conv_cfg=self.conv_cfg,
                            norm_cfg=self.norm_cfg,
                            act_cfg=self.act_cfg,
                            with_cp=self.with_cp))
            self.in_channels = out_channels

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool(x)

        outs = []
        for i, layer in enumerate(self.layers):
            x = layer(x)
            if i in self.out_indices:
                outs.append(x)

        if len(outs) == 1:
            return outs[0]
        else:
            return tuple(outs)

    def train(self, mode=True):
        super(ShuffleNetV1, self).train(mode)
        self._freeze_stages()
        if mode and self.norm_eval:
            for m in self.modules():
                if isinstance(m, _BatchNorm):
                    m.eval()
Esempio n. 21
0
    def __init__(self,
                 in_channels,
                 max_channels,
                 num_convs=5,
                 fc_in_channels=None,
                 fc_out_channels=1024,
                 kernel_size=5,
                 conv_cfg=None,
                 norm_cfg=None,
                 act_cfg=dict(type='ReLU'),
                 out_act_cfg=dict(type='ReLU'),
                 with_input_norm=True,
                 with_out_convs=False,
                 with_spectral_norm=False,
                 **kwargs):
        super().__init__()
        if fc_in_channels is not None:
            assert fc_in_channels > 0

        self.max_channels = max_channels
        self.with_fc = fc_in_channels is not None
        self.num_convs = num_convs
        self.with_out_act = out_act_cfg is not None
        self.with_out_convs = with_out_convs

        cur_channels = in_channels
        for i in range(num_convs):
            out_ch = min(64 * 2**i, max_channels)
            norm_cfg_ = norm_cfg
            act_cfg_ = act_cfg
            if i == 0 and not with_input_norm:
                norm_cfg_ = None
            elif (i == num_convs - 1 and not self.with_fc
                  and not self.with_out_convs):
                norm_cfg_ = None
                act_cfg_ = out_act_cfg
            self.add_module(
                f'conv{i + 1}',
                ConvModule(cur_channels,
                           out_ch,
                           kernel_size=kernel_size,
                           stride=2,
                           padding=kernel_size // 2,
                           norm_cfg=norm_cfg_,
                           act_cfg=act_cfg_,
                           with_spectral_norm=with_spectral_norm,
                           **kwargs))
            cur_channels = out_ch

        if self.with_out_convs:
            cur_channels = min(64 * 2**(num_convs - 1), max_channels)
            out_ch = min(64 * 2**num_convs, max_channels)
            self.add_module(
                f'conv{num_convs + 1}',
                ConvModule(cur_channels,
                           out_ch,
                           kernel_size,
                           stride=1,
                           padding=kernel_size // 2,
                           norm_cfg=norm_cfg,
                           act_cfg=act_cfg,
                           with_spectral_norm=with_spectral_norm,
                           **kwargs))
            self.add_module(
                f'conv{num_convs + 2}',
                ConvModule(out_ch,
                           1,
                           kernel_size,
                           stride=1,
                           padding=kernel_size // 2,
                           act_cfg=None,
                           with_spectral_norm=with_spectral_norm,
                           **kwargs))

        if self.with_fc:
            self.fc = LinearModule(fc_in_channels,
                                   fc_out_channels,
                                   bias=True,
                                   act_cfg=out_act_cfg,
                                   with_spectral_norm=with_spectral_norm)
Esempio n. 22
0
    def __init__(self,
                 groups=3,
                 widen_factor=1.0,
                 out_indices=(2, ),
                 frozen_stages=-1,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 act_cfg=dict(type='ReLU'),
                 norm_eval=False,
                 with_cp=False):
        super(ShuffleNetV1, self).__init__()
        self.stage_blocks = [4, 8, 4]
        self.groups = groups

        for index in out_indices:
            if index not in range(0, 3):
                raise ValueError('the item in out_indices must in '
                                 f'range(0, 3). But received {index}')

        if frozen_stages not in range(-1, 3):
            raise ValueError('frozen_stages must be in range(-1, 3). '
                             f'But received {frozen_stages}')
        self.out_indices = out_indices
        self.frozen_stages = frozen_stages
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.act_cfg = act_cfg
        self.norm_eval = norm_eval
        self.with_cp = with_cp

        if groups == 1:
            channels = (144, 288, 576)
        elif groups == 2:
            channels = (200, 400, 800)
        elif groups == 3:
            channels = (240, 480, 960)
        elif groups == 4:
            channels = (272, 544, 1088)
        elif groups == 8:
            channels = (384, 768, 1536)
        else:
            raise ValueError(f'{groups} groups is not supported for 1x1 '
                             'Grouped Convolutions')

        channels = [make_divisible(ch * widen_factor, 8) for ch in channels]

        self.in_channels = int(24 * widen_factor)

        self.conv1 = ConvModule(in_channels=3,
                                out_channels=self.in_channels,
                                kernel_size=3,
                                stride=2,
                                padding=1,
                                conv_cfg=conv_cfg,
                                norm_cfg=norm_cfg,
                                act_cfg=act_cfg)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layers = nn.ModuleList()
        for i, num_blocks in enumerate(self.stage_blocks):
            first_block = True if i == 0 else False
            layer = self.make_layer(channels[i], num_blocks, first_block)
            self.layers.append(layer)
Esempio n. 23
0
    def __init__(self,
                 gamma_w=1.0,
                 gamma_b=1.0,
                 gamma_d=1.0,
                 pretrained=None,
                 in_channels=3,
                 num_stages=4,
                 spatial_strides=(2, 2, 2, 2),
                 frozen_stages=-1,
                 se_style='half',
                 se_ratio=1 / 16,
                 use_swish=True,
                 conv_cfg=dict(type='Conv3d'),
                 norm_cfg=dict(type='BN3d', requires_grad=True),
                 act_cfg=dict(type='ReLU', inplace=True),
                 norm_eval=False,
                 with_cp=False,
                 zero_init_residual=True,
                 **kwargs):
        super().__init__()
        self.gamma_w = gamma_w
        self.gamma_b = gamma_b
        self.gamma_d = gamma_d

        self.pretrained = pretrained
        self.in_channels = in_channels
        # Hard coded, can be changed by gamma_w
        self.base_channels = 24
        self.stage_blocks = [1, 2, 5, 3]

        # apply parameters gamma_w and gamma_d
        self.base_channels = self._round_width(self.base_channels,
                                               self.gamma_w)

        self.stage_blocks = [
            self._round_repeats(x, self.gamma_d) for x in self.stage_blocks
        ]

        self.num_stages = num_stages
        assert 1 <= num_stages <= 4
        self.spatial_strides = spatial_strides
        assert len(spatial_strides) == num_stages
        self.frozen_stages = frozen_stages

        self.se_style = se_style
        assert self.se_style in ['all', 'half']
        self.se_ratio = se_ratio
        assert (self.se_ratio is None) or (self.se_ratio > 0)
        self.use_swish = use_swish

        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.act_cfg = act_cfg
        self.norm_eval = norm_eval
        self.with_cp = with_cp
        self.zero_init_residual = zero_init_residual

        self.block = BlockX3D
        self.stage_blocks = self.stage_blocks[:num_stages]
        self.layer_inplanes = self.base_channels
        self._make_stem_layer()

        self.res_layers = []
        for i, num_blocks in enumerate(self.stage_blocks):
            spatial_stride = spatial_strides[i]
            inplanes = self.base_channels * 2**i
            planes = int(inplanes * self.gamma_b)

            res_layer = self.make_res_layer(self.block,
                                            self.layer_inplanes,
                                            inplanes,
                                            planes,
                                            num_blocks,
                                            spatial_stride=spatial_stride,
                                            se_style=self.se_style,
                                            se_ratio=self.se_ratio,
                                            use_swish=self.use_swish,
                                            norm_cfg=self.norm_cfg,
                                            conv_cfg=self.conv_cfg,
                                            act_cfg=self.act_cfg,
                                            with_cp=with_cp,
                                            **kwargs)
            self.layer_inplanes = inplanes
            layer_name = f'layer{i + 1}'
            self.add_module(layer_name, res_layer)
            self.res_layers.append(layer_name)

        self.feat_dim = self.base_channels * 2**(len(self.stage_blocks) - 1)
        self.conv5 = ConvModule(self.feat_dim,
                                int(self.feat_dim * self.gamma_b),
                                kernel_size=1,
                                stride=1,
                                padding=0,
                                bias=False,
                                conv_cfg=self.conv_cfg,
                                norm_cfg=self.norm_cfg,
                                act_cfg=self.act_cfg)
        self.feat_dim = int(self.feat_dim * self.gamma_b)
Esempio n. 24
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 groups=3,
                 first_block=True,
                 combine='add',
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 act_cfg=dict(type='ReLU'),
                 with_cp=False):
        super(ShuffleUnit, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.first_block = first_block
        self.combine = combine
        self.groups = groups
        self.bottleneck_channels = self.out_channels // 4
        self.with_cp = with_cp

        if self.combine == 'add':
            self.depthwise_stride = 1
            self._combine_func = self._add
            assert in_channels == out_channels, (
                'in_channels must be equal to out_channels when combine '
                'is add')
        elif self.combine == 'concat':
            self.depthwise_stride = 2
            self._combine_func = self._concat
            self.out_channels -= self.in_channels
            self.avgpool = nn.AvgPool2d(kernel_size=3, stride=2, padding=1)
        else:
            raise ValueError(f'Cannot combine tensors with {self.combine}. '
                             'Only "add" and "concat" are supported')

        self.first_1x1_groups = 1 if first_block else self.groups
        self.g_conv_1x1_compress = ConvModule(
            in_channels=self.in_channels,
            out_channels=self.bottleneck_channels,
            kernel_size=1,
            groups=self.first_1x1_groups,
            conv_cfg=conv_cfg,
            norm_cfg=norm_cfg,
            act_cfg=act_cfg)

        self.depthwise_conv3x3_bn = ConvModule(
            in_channels=self.bottleneck_channels,
            out_channels=self.bottleneck_channels,
            kernel_size=3,
            stride=self.depthwise_stride,
            padding=1,
            groups=self.bottleneck_channels,
            conv_cfg=conv_cfg,
            norm_cfg=norm_cfg,
            act_cfg=None)

        self.g_conv_1x1_expand = ConvModule(
            in_channels=self.bottleneck_channels,
            out_channels=self.out_channels,
            kernel_size=1,
            groups=self.groups,
            conv_cfg=conv_cfg,
            norm_cfg=norm_cfg,
            act_cfg=None)

        self.act = build_activation_layer(act_cfg)
Esempio n. 25
0
    def __init__(self,
                 inplanes,
                 planes,
                 outplanes,
                 spatial_stride=1,
                 downsample=None,
                 se_ratio=None,
                 use_swish=True,
                 conv_cfg=dict(type='Conv3d'),
                 norm_cfg=dict(type='BN3d'),
                 act_cfg=dict(type='ReLU'),
                 with_cp=False):
        super().__init__()

        self.inplanes = inplanes
        self.planes = planes
        self.outplanes = outplanes
        self.spatial_stride = spatial_stride
        self.downsample = downsample
        self.se_ratio = se_ratio
        self.use_swish = use_swish
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.act_cfg = act_cfg
        self.act_cfg_swish = dict(type='Swish')
        self.with_cp = with_cp

        self.conv1 = ConvModule(in_channels=inplanes,
                                out_channels=planes,
                                kernel_size=1,
                                stride=1,
                                padding=0,
                                bias=False,
                                conv_cfg=self.conv_cfg,
                                norm_cfg=self.norm_cfg,
                                act_cfg=self.act_cfg)
        # Here we use the channel-wise conv
        self.conv2 = ConvModule(in_channels=planes,
                                out_channels=planes,
                                kernel_size=3,
                                stride=(1, self.spatial_stride,
                                        self.spatial_stride),
                                padding=1,
                                groups=planes,
                                bias=False,
                                conv_cfg=self.conv_cfg,
                                norm_cfg=self.norm_cfg,
                                act_cfg=None)

        self.swish = Swish()

        self.conv3 = ConvModule(in_channels=planes,
                                out_channels=outplanes,
                                kernel_size=1,
                                stride=1,
                                padding=0,
                                bias=False,
                                conv_cfg=self.conv_cfg,
                                norm_cfg=self.norm_cfg,
                                act_cfg=None)

        if self.se_ratio is not None:
            self.se_module = SEModule(planes, self.se_ratio)

        self.relu = build_activation_layer(self.act_cfg)
Esempio n. 26
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 num_outs,
                 start_level=0,
                 end_level=-1,
                 add_extra_convs=False,
                 extra_convs_on_inputs=True,
                 relu_before_extra_convs=False,
                 no_norm_on_lateral=False,
                 conv_cfg=None,
                 norm_cfg=None,
                 act_cfg=None,
                 upsample_cfg=dict(mode='nearest')):
        super(NoFPN, self).__init__()
        assert isinstance(in_channels, list)
        self.add_extra_convs = add_extra_convs
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.num_ins = len(in_channels)
        self.num_outs = num_outs
        self.fp16_enabled = False
        self.upsample_cfg = upsample_cfg.copy()

        if end_level == -1:
            self.backbone_end_level = self.num_ins
            assert num_outs >= self.num_ins - start_level
        else:
            # if end_level < inputs, no extra level is allowed
            self.backbone_end_level = end_level
            assert end_level <= len(in_channels)
            assert num_outs == end_level - start_level
        self.start_level = start_level
        self.end_level = end_level

        self.lateral_convs = nn.ModuleList()
        self.fpn_convs = nn.ModuleList()
        self.spatail_gates = nn.ModuleList()
        self.downsample_convs = nn.ModuleList()
        for i in range(self.start_level, self.backbone_end_level):
            d_conv = nn.ModuleList()
            for j in range(self.start_level, self.backbone_end_level):
                if i > j:
                    d_conv.append(
                        ConvModule(out_channels,
                                   out_channels,
                                   1 + 2 * (i - j),
                                   stride=2**(i - j),
                                   padding=1,
                                   inplace=False))
            self.spatail_gates.append(
                SpatialGate(levels=self.num_ins - start_level))
            self.downsample_convs.append(d_conv)
        # 1, 4
        for i in range(self.start_level, self.backbone_end_level):
            l_conv = ConvModule(in_channels[i], out_channels, 1, inplace=False)
            fpn_conv = ConvModule(out_channels,
                                  out_channels,
                                  3,
                                  padding=1,
                                  inplace=False)

            self.lateral_convs.append(l_conv)
            self.fpn_convs.append(fpn_conv)

        # add extra conv layers (e.g., RetinaNet)
        extra_levels = num_outs - self.backbone_end_level + self.start_level
        if self.add_extra_convs and extra_levels >= 1:
            for i in range(extra_levels):
                if i == 0 and self.add_extra_convs == 'on_input':
                    # 2048
                    in_channels = self.in_channels[self.backbone_end_level - 1]
                else:
                    # 256
                    in_channels = out_channels
                extra_fpn_conv = ConvModule(in_channels,
                                            out_channels,
                                            3,
                                            stride=2,
                                            padding=1,
                                            inplace=False)
                self.fpn_convs.append(extra_fpn_conv)
Esempio n. 27
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 num_outs,
                 start_level=0,
                 end_level=-1,
                 add_extra_convs=False,
                 extra_convs_on_inputs=True,
                 relu_before_extra_convs=False,
                 no_norm_on_lateral=False,
                 conv_cfg=None,
                 norm_cfg=None,
                 act_cfg=None,
                 upsample_cfg=dict(mode='nearest')):
        super().__init__()
        assert isinstance(in_channels, list)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.num_ins = len(in_channels)
        self.num_outs = num_outs
        self.relu_before_extra_convs = relu_before_extra_convs
        self.no_norm_on_lateral = no_norm_on_lateral
        self.fp16_enabled = False
        self.upsample_cfg = upsample_cfg.copy()

        if end_level == -1:
            self.backbone_end_level = self.num_ins
            assert num_outs >= self.num_ins - start_level
        else:
            # if end_level < inputs, no extra level is allowed
            self.backbone_end_level = end_level
            assert end_level <= len(in_channels)
            assert num_outs == end_level - start_level
        self.start_level = start_level
        self.end_level = end_level
        self.add_extra_convs = add_extra_convs
        assert isinstance(add_extra_convs, (str, bool))
        if isinstance(add_extra_convs, str):
            # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output'
            assert add_extra_convs in ('on_input', 'on_lateral', 'on_output')
        elif add_extra_convs:  # True
            if extra_convs_on_inputs:
                # TODO: deprecate `extra_convs_on_inputs`
                warnings.simplefilter('once')
                warnings.warn(
                    '"extra_convs_on_inputs" will be deprecated in v2.9.0,'
                    'Please use "add_extra_convs"', DeprecationWarning)
                self.add_extra_convs = 'on_input'
            else:
                self.add_extra_convs = 'on_output'

        self.lateral_convs = nn.ModuleList()
        self.fpn_convs = nn.ModuleList()

        concat_in_channels = []
        for i in range(self.start_level, self.backbone_end_level):
            if i == self.backbone_end_level - 1:
                concat_in_channels.append(in_channels[i])
            else:
                concat_in_channels.append(in_channels[i] + out_channels[i + 1])

        for i in range(self.start_level, self.backbone_end_level):
            l_conv = ConvModule(
                concat_in_channels[i],
                out_channels[i],
                1,
                conv_cfg=conv_cfg,
                norm_cfg=norm_cfg if not self.no_norm_on_lateral else None,
                act_cfg=act_cfg,
                inplace=False)
            fpn_conv = ConvModule(out_channels[i],
                                  out_channels[i],
                                  3,
                                  padding=1,
                                  conv_cfg=conv_cfg,
                                  norm_cfg=norm_cfg,
                                  act_cfg=act_cfg,
                                  inplace=False)

            self.lateral_convs.append(l_conv)
            self.fpn_convs.append(fpn_conv)

        # add extra conv layers (e.g., RetinaNet)
        extra_levels = num_outs - self.backbone_end_level + self.start_level
        if self.add_extra_convs and extra_levels >= 1:
            for i in range(extra_levels):
                if i == 0 and self.add_extra_convs == 'on_input':
                    in_channels = self.in_channels[self.backbone_end_level - 1]
                else:
                    in_channels = out_channels
                extra_fpn_conv = ConvModule(in_channels,
                                            out_channels,
                                            3,
                                            stride=2,
                                            padding=1,
                                            conv_cfg=conv_cfg,
                                            norm_cfg=norm_cfg,
                                            act_cfg=act_cfg,
                                            inplace=False)
                self.fpn_convs.append(extra_fpn_conv)
Esempio n. 28
0
    def __init__(
            self,
            in_channels=[64, 128, 256, 512],
            out_channels=128,
            num_outs=4,
            start_level=0,
            end_level=-1,
            add_extra_convs=False,
            relu_before_extra_convs=False,
            no_norm_on_lateral=False,
            conv_cfg=None,
            norm_cfg=None,
            act_cfg=None,
            num_heads=[2, 2, 2, 2],
            mlp_ratios=[4, 4, 4, 4],
            sr_ratios=[8, 4, 2, 1],
            qkv_bias=True,
            qk_scale=None,
            drop_rate=0.,
            attn_drop_rate=0.,
            drop_path_rate=0.,
            transformer_norm_cfg=dict(type='LN'),
            use_sr_conv=False,
    ):
        super().__init__()
        assert isinstance(in_channels, list)
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.num_ins = len(in_channels)
        self.num_outs = num_outs
        self.no_norm_on_lateral = no_norm_on_lateral
        self.fp16_enabled = False
        self.norm_cfg = norm_cfg
        self.conv_cfg = conv_cfg
        self.act_cfg = act_cfg
        self.mlp_ratios = mlp_ratios

        if end_level == -1 or end_level == self.num_ins - 1:
            self.backbone_end_level = self.num_ins
            assert num_outs >= self.num_ins - start_level
        else:
            # if end_level is not the last level, no extra level is allowed
            self.backbone_end_level = end_level + 1
            assert end_level < self.num_ins
            assert num_outs == end_level - start_level + 1
        self.start_level = start_level
        self.end_level = end_level

        self.lateral_convs = nn.ModuleList()
        self.merge_blocks = nn.ModuleList()

        for i in range(self.start_level, self.backbone_end_level):
            l_conv = ConvModule(
                in_channels[i],
                out_channels,
                1,
                conv_cfg=conv_cfg,
                norm_cfg=norm_cfg if not self.no_norm_on_lateral else None,
                act_cfg=act_cfg,
                inplace=False)
            self.lateral_convs.append(l_conv)

        for i in range(self.start_level, self.backbone_end_level - 1):
            merge_block = TCFormerDynamicBlock(
                dim=out_channels,
                num_heads=num_heads[i],
                mlp_ratio=mlp_ratios[i],
                qkv_bias=qkv_bias,
                qk_scale=qk_scale,
                drop=drop_rate,
                attn_drop=attn_drop_rate,
                drop_path=drop_path_rate,
                norm_cfg=transformer_norm_cfg,
                sr_ratio=sr_ratios[i],
                use_sr_conv=use_sr_conv)
            self.merge_blocks.append(merge_block)

        # add extra conv layers (e.g., RetinaNet)
        self.relu_before_extra_convs = relu_before_extra_convs

        self.add_extra_convs = add_extra_convs
        assert isinstance(add_extra_convs, (str, bool))
        if isinstance(add_extra_convs, str):
            # Extra_convs_source choices: 'on_input', 'on_output'
            assert add_extra_convs in ('on_input', 'on_output')
        elif add_extra_convs:  # True
            self.add_extra_convs = 'on_input'

        self.extra_convs = nn.ModuleList()
        extra_levels = num_outs - (self.end_level + 1 - self.start_level)
        if self.add_extra_convs and extra_levels >= 1:
            for i in range(extra_levels):
                if i == 0 and self.add_extra_convs == 'on_input':
                    in_channels = self.in_channels[self.end_level]
                else:
                    in_channels = out_channels
                extra_fpn_conv = ConvModule(
                    in_channels,
                    out_channels,
                    3,
                    stride=2,
                    padding=1,
                    conv_cfg=conv_cfg,
                    norm_cfg=norm_cfg,
                    act_cfg=act_cfg,
                    inplace=False)
                self.extra_convs.append(extra_fpn_conv)
Esempio n. 29
0
    def __init__(self,
                 num_point: int,
                 radii: List[float],
                 sample_nums: List[int],
                 mlp_channels: List[List[int]],
                 fps_mod: List[str] = ['D-FPS'],
                 fps_sample_range_list: List[int] = [-1],
                 dilated_group: bool = False,
                 norm_cfg: dict = dict(type='BN2d'),
                 use_xyz: bool = True,
                 pool_mod='max',
                 normalize_xyz: bool = False,
                 bias='auto'):
        super().__init__()

        assert len(radii) == len(sample_nums) == len(mlp_channels)
        assert pool_mod in ['max', 'avg']
        assert isinstance(fps_mod, list) or isinstance(fps_mod, tuple)
        assert isinstance(fps_sample_range_list, list) or isinstance(
            fps_sample_range_list, tuple)
        assert len(fps_mod) == len(fps_sample_range_list)

        if isinstance(mlp_channels, tuple):
            mlp_channels = list(map(list, mlp_channels))

        if isinstance(num_point, int):
            self.num_point = [num_point]
        elif isinstance(num_point, list) or isinstance(num_point, tuple):
            self.num_point = num_point
        else:
            raise NotImplementedError('Error type of num_point!')

        self.pool_mod = pool_mod
        self.groupers = nn.ModuleList()
        self.mlps = nn.ModuleList()
        self.fps_mod_list = fps_mod
        self.fps_sample_range_list = fps_sample_range_list

        self.points_sampler = Points_Sampler(self.num_point, self.fps_mod_list,
                                             self.fps_sample_range_list)

        for i in range(len(radii)):
            radius = radii[i]
            sample_num = sample_nums[i]
            if num_point is not None:
                if dilated_group and i != 0:
                    min_radius = radii[i - 1]
                else:
                    min_radius = 0
                grouper = QueryAndGroup(
                    radius,
                    sample_num,
                    min_radius=min_radius,
                    use_xyz=use_xyz,
                    normalize_xyz=normalize_xyz)
            else:
                grouper = GroupAll(use_xyz)
            self.groupers.append(grouper)

            mlp_spec = mlp_channels[i]
            if use_xyz:
                mlp_spec[0] += 3

            mlp = nn.Sequential()
            for i in range(len(mlp_spec) - 1):
                mlp.add_module(
                    f'layer{i}',
                    ConvModule(
                        mlp_spec[i],
                        mlp_spec[i + 1],
                        kernel_size=(1, 1),
                        stride=(1, 1),
                        conv_cfg=dict(type='Conv2d'),
                        norm_cfg=norm_cfg,
                        bias=bias))
            self.mlps.append(mlp)
Esempio n. 30
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 base_channels=64,
                 norm_cfg=dict(type='IN'),
                 use_dropout=False,
                 num_blocks=9,
                 padding_mode='reflect',
                 init_cfg=dict(type='normal', gain=0.02)):
        super().__init__()
        assert num_blocks >= 0, ('Number of residual blocks must be '
                                 f'non-negative, but got {num_blocks}.')
        assert isinstance(norm_cfg, dict), ("'norm_cfg' should be dict, but"
                                            f'got {type(norm_cfg)}')
        assert 'type' in norm_cfg, "'norm_cfg' must have key 'type'"
        # We use norm layers in the resnet generator.
        # Only for IN, use bias since it does not have affine parameters.
        use_bias = norm_cfg['type'] == 'IN'

        model = []
        model += [
            ConvModule(in_channels=in_channels,
                       out_channels=base_channels,
                       kernel_size=7,
                       padding=3,
                       bias=use_bias,
                       norm_cfg=norm_cfg,
                       padding_mode=padding_mode)
        ]

        num_down = 2
        # add downsampling layers
        for i in range(num_down):
            multiple = 2**i
            model += [
                ConvModule(in_channels=base_channels * multiple,
                           out_channels=base_channels * multiple * 2,
                           kernel_size=3,
                           stride=2,
                           padding=1,
                           bias=use_bias,
                           norm_cfg=norm_cfg)
            ]

        # add residual blocks
        multiple = 2**num_down
        for i in range(num_blocks):
            model += [
                ResidualBlockWithDropout(base_channels * multiple,
                                         padding_mode=padding_mode,
                                         norm_cfg=norm_cfg,
                                         use_dropout=use_dropout)
            ]

        # add upsampling layers
        for i in range(num_down):
            multiple = 2**(num_down - i)
            model += [
                ConvModule(in_channels=base_channels * multiple,
                           out_channels=base_channels * multiple // 2,
                           kernel_size=3,
                           stride=2,
                           padding=1,
                           bias=use_bias,
                           conv_cfg=dict(type='Deconv', output_padding=1),
                           norm_cfg=norm_cfg)
            ]

        model += [
            ConvModule(in_channels=base_channels,
                       out_channels=out_channels,
                       kernel_size=7,
                       padding=3,
                       bias=True,
                       norm_cfg=None,
                       act_cfg=dict(type='Tanh'),
                       padding_mode=padding_mode)
        ]

        self.model = nn.Sequential(*model)
        self.init_type = 'normal' if init_cfg is None else init_cfg.get(
            'type', 'normal')
        self.init_gain = 0.02 if init_cfg is None else init_cfg.get(
            'gain', 0.02)