Ejemplo n.º 1
0
    def __init__(self,
                 in_features,
                 out_features,
                 bias=True,
                 act_cfg=dict(type='ReLU'),
                 inplace=True,
                 with_spectral_norm=False,
                 order=('linear', 'act')):
        super().__init__()
        assert act_cfg is None or isinstance(act_cfg, dict)
        self.act_cfg = act_cfg
        self.inplace = inplace
        self.with_spectral_norm = with_spectral_norm
        self.order = order
        assert isinstance(self.order, tuple) and len(self.order) == 2
        assert set(order) == set(['linear', 'act'])

        self.with_activation = act_cfg is not None
        self.with_bias = bias

        # build linear layer
        self.linear = nn.Linear(in_features, out_features, bias=bias)
        # export the attributes of self.linear to a higher level for
        # convenience
        self.in_features = self.linear.in_features
        self.out_features = self.linear.out_features

        if self.with_spectral_norm:
            self.linear = nn.utils.spectral_norm(self.linear)

        # build activation layer
        if self.with_activation:
            act_cfg_ = act_cfg.copy()
            act_cfg_.setdefault('inplace', inplace)
            self.activate = build_activation_layer(act_cfg_)

        # Use msra init by default
        self.init_weights()
Ejemplo n.º 2
0
    def __init__(self,
                 embed_dims,
                 feedforward_channels,
                 act_cfg=dict(type='GELU'),
                 ffn_drop=0.,
                 dropout_layer=None,
                 init_cfg=None):
        super(MixFFN, self).__init__(init_cfg)

        self.embed_dims = embed_dims
        self.feedforward_channels = feedforward_channels
        self.act_cfg = act_cfg
        self.activate = build_activation_layer(act_cfg)

        in_channels = embed_dims
        fc1 = Conv2d(in_channels=in_channels,
                     out_channels=feedforward_channels,
                     kernel_size=1,
                     stride=1,
                     bias=True)
        # 3x3 depth wise conv to provide positional encode information
        pe_conv = Conv2d(in_channels=feedforward_channels,
                         out_channels=feedforward_channels,
                         kernel_size=3,
                         stride=1,
                         padding=(3 - 1) // 2,
                         bias=True,
                         groups=feedforward_channels)
        fc2 = Conv2d(in_channels=feedforward_channels,
                     out_channels=in_channels,
                     kernel_size=1,
                     stride=1,
                     bias=True)
        drop = nn.Dropout(ffn_drop)
        layers = [fc1, pe_conv, self.activate, drop, fc2, drop]
        self.layers = Sequential(*layers)
        self.dropout_layer = build_dropout(
            dropout_layer) if dropout_layer else torch.nn.Identity()
Ejemplo n.º 3
0
    def __init__(self,
                 inplanes,
                 planes,
                 stride=1,
                 dilation=1,
                 downsample=None,
                 style='pytorch',
                 with_cp=False,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 act_cfg=dict(type='ReLU', inplace=True)):
        super(BasicBlock, self).__init__()

        self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1)
        self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2)

        self.conv1 = build_conv_layer(conv_cfg,
                                      inplanes,
                                      planes,
                                      3,
                                      stride=stride,
                                      padding=dilation,
                                      dilation=dilation,
                                      bias=False)
        self.add_module(self.norm1_name, norm1)
        self.conv2 = build_conv_layer(conv_cfg,
                                      planes,
                                      planes,
                                      3,
                                      padding=1,
                                      bias=False)
        self.add_module(self.norm2_name, norm2)

        self.relu = build_activation_layer(act_cfg)
        self.downsample = downsample
        self.stride = stride
        self.dilation = dilation
        assert not with_cp
Ejemplo n.º 4
0
    def __init__(self,
                 input_dim,
                 output_dim,
                 kernel_size,
                 stride=1,
                 with_se=False,
                 se_reduction=16,
                 se_bias=False,
                 dilation=1,
                 batch_norm=True,
                 pad=True,
                 nonlinearity="LeakyReLU"):
        super(TDNN_pad, self).__init__()
        self.context_size = kernel_size
        self.stride = stride
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.dilation = dilation
        self.pad = pad
        if self.pad:
            self.pad_length = (kernel_size - 1) * dilation // 2
        self.with_se = with_se
        if self.with_se:
            self.se = SELayer(self.output_dim,
                              reduction=se_reduction,
                              nonlinearity=nonlinearity,
                              bias=se_bias)

        self.kernel = nn.Conv1d(self.input_dim,
                                self.output_dim,
                                self.context_size,
                                dilation=self.dilation,
                                stride=self.stride)
        self.nonlinearity = build_activation_layer(
            dict(type=nonlinearity, inplace=True))
        self.batch_norm = batch_norm
        if batch_norm:
            self.bn = nn.BatchNorm1d(output_dim)
Ejemplo n.º 5
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 zero_init_offset=True,
                 act_cfg=dict(type='HSigmoid', bias=3.0, divisor=6.0)):
        super().__init__()
        self.zero_init_offset = zero_init_offset
        # (offset_x, offset_y, mask) * kernel_size_y * kernel_size_x
        self.offset_and_mask_dim = 3 * 3 * 3
        self.offset_dim = 2 * 3 * 3

        self.spatial_conv_high = DyDCNv2(in_channels, out_channels)
        self.spatial_conv_mid = DyDCNv2(in_channels, out_channels)
        self.spatial_conv_low = DyDCNv2(in_channels, out_channels, stride=2)
        self.spatial_conv_offset = nn.Conv2d(in_channels,
                                             self.offset_and_mask_dim,
                                             3,
                                             padding=1)
        self.scale_attn_module = nn.Sequential(nn.AdaptiveAvgPool2d(1),
                                               nn.Conv2d(out_channels, 1, 1),
                                               nn.ReLU(inplace=True),
                                               build_activation_layer(act_cfg))
        self.task_attn_module = DyReLU(out_channels)
        self._init_weights()
Ejemplo n.º 6
0
 def __init__(self,
              in_channels,
              hidden_channels=None,
              out_channels=None,
              norm_cfg=dict(type='BN', requires_grad=True),
              act_cfg=dict(type='GELU')):
     super().__init__()
     out_features = out_channels or in_channels
     hidden_features = hidden_channels or in_channels
     self.ffn_fc1 = ConvModule(in_channels=in_channels,
                               out_channels=hidden_features,
                               kernel_size=1,
                               stride=1,
                               padding=0,
                               norm_cfg=norm_cfg,
                               act_cfg=None)
     self.ffn_fc2 = ConvModule(in_channels=hidden_features,
                               out_channels=out_features,
                               kernel_size=1,
                               stride=1,
                               padding=0,
                               norm_cfg=norm_cfg,
                               act_cfg=None)
     self.act = build_activation_layer(act_cfg)
Ejemplo n.º 7
0
    def __init__(self,
                 inplanes,
                 planes,
                 spatial_stride=1,
                 temporal_stride=1,
                 dilation=1,
                 downsample=None,
                 style='pytorch',
                 inflate=True,
                 inflate_style='3x1x1',
                 non_local=False,
                 non_local_cfg=dict(),
                 conv_cfg=dict(type='Conv3d'),
                 norm_cfg=dict(type='BN3d'),
                 act_cfg=dict(type='ReLU'),
                 with_cp=False):
        super().__init__()
        assert style in ['pytorch', 'caffe']
        assert inflate_style in ['3x1x1', '3x3x3']

        self.inplanes = inplanes
        self.planes = planes
        self.spatial_stride = spatial_stride
        self.temporal_stride = temporal_stride
        self.dilation = dilation
        self.style = style
        self.inflate = inflate
        self.inflate_style = inflate_style
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.act_cfg = act_cfg
        self.with_cp = with_cp
        self.non_local = non_local
        self.non_local_cfg = non_local_cfg

        self.conv1_stride_s = spatial_stride
        self.conv2_stride_s = 1
        self.conv1_stride_t = temporal_stride
        self.conv2_stride_t = 1

        if self.inflate:
            conv1_kernel_size = (3, 3, 3)
            conv1_padding = (1, dilation, dilation)
            conv2_kernel_size = (3, 3, 3)
            conv2_padding = (1, 1, 1)
        else:
            conv1_kernel_size = (1, 3, 3)
            conv1_padding = (0, dilation, dilation)
            conv2_kernel_size = (1, 3, 3)
            conv2_padding = (0, 1, 1)

        self.conv1 = ConvModule(inplanes,
                                planes,
                                conv1_kernel_size,
                                stride=(self.conv1_stride_t,
                                        self.conv1_stride_s,
                                        self.conv1_stride_s),
                                padding=conv1_padding,
                                dilation=(1, dilation, dilation),
                                bias=False,
                                conv_cfg=self.conv_cfg,
                                norm_cfg=self.norm_cfg,
                                act_cfg=self.act_cfg)

        self.conv2 = ConvModule(planes,
                                planes * self.expansion,
                                conv2_kernel_size,
                                stride=(self.conv2_stride_t,
                                        self.conv2_stride_s,
                                        self.conv2_stride_s),
                                padding=conv2_padding,
                                bias=False,
                                conv_cfg=self.conv_cfg,
                                norm_cfg=self.norm_cfg,
                                act_cfg=None)

        self.downsample = downsample
        self.relu = build_activation_layer(self.act_cfg)

        if self.non_local:
            self.non_local_block = NonLocal3d(self.conv2.norm.num_features,
                                              **self.non_local_cfg)
Ejemplo n.º 8
0
    def __init__(self,
                 inplanes,
                 planes,
                 spatial_stride=1,
                 temporal_stride=1,
                 dilation=1,
                 downsample=None,
                 style='pytorch',
                 inflate=True,
                 inflate_style='3x1x1',
                 conv_cfg=dict(type='Conv3d'),
                 norm_cfg=dict(type='BN3d'),
                 act_cfg=dict(type='ReLU'),
                 with_cp=False):
        super().__init__()
        assert style in ['pytorch', 'caffe']
        assert inflate_style in ['3x1x1', '3x3x3']

        self.inplanes = inplanes
        self.planes = planes
        self.spatial_stride = spatial_stride
        self.temporal_stride = temporal_stride
        self.dilation = dilation
        self.style = style
        self.inflate = inflate
        self.inflate_style = inflate_style
        self.norm_cfg = norm_cfg
        self.conv_cfg = conv_cfg
        self.act_cfg = act_cfg
        self.with_cp = with_cp

        if self.style == 'pytorch':
            self.conv1_stride_s = 1
            self.conv2_stride_s = spatial_stride
            self.conv1_stride_t = 1
            self.conv2_stride_t = temporal_stride
        else:
            self.conv1_stride_s = spatial_stride
            self.conv2_stride_s = 1
            self.conv1_stride_t = temporal_stride
            self.conv2_stride_t = 1

        if self.inflate:
            if inflate_style == '3x1x1':
                conv1_kernel_size = (3, 1, 1)
                conv1_padding = (1, 0, 0)
                conv2_kernel_size = (1, 3, 3)
                conv2_padding = (0, dilation, dilation)
            else:
                conv1_kernel_size = (1, 1, 1)
                conv1_padding = (0, 0, 0)
                conv2_kernel_size = (3, 3, 3)
                conv2_padding = (1, dilation, dilation)
        else:
            conv1_kernel_size = (1, 1, 1)
            conv1_padding = (0, 0, 0)
            conv2_kernel_size = (1, 3, 3)
            conv2_padding = (0, dilation, dilation)

        self.conv1 = ConvModule(
            inplanes,
            planes,
            conv1_kernel_size,
            stride=(self.conv1_stride_t, self.conv1_stride_s,
                    self.conv1_stride_s),
            padding=conv1_padding,
            bias=False,
            conv_cfg=self.conv_cfg,
            norm_cfg=self.norm_cfg,
            act_cfg=self.act_cfg)

        self.conv2 = ConvModule(
            planes,
            planes,
            conv2_kernel_size,
            stride=(self.conv2_stride_t, self.conv2_stride_s,
                    self.conv2_stride_s),
            padding=conv2_padding,
            dilation=(1, dilation, dilation),
            bias=False,
            conv_cfg=self.conv_cfg,
            norm_cfg=self.norm_cfg,
            act_cfg=self.act_cfg)

        self.conv3 = ConvModule(
            planes,
            planes * self.expansion,
            1,
            bias=False,
            conv_cfg=self.conv_cfg,
            norm_cfg=self.norm_cfg,
            # No activation in the third ConvModule for bottleneck
            act_cfg=None)

        self.downsample = downsample
        self.relu = build_activation_layer(self.act_cfg)
Ejemplo n.º 9
0
    def __init__(
            self,
            num_classes,
            in_channels,
            num_query=100,
            num_reg_fcs=2,
            transformer=None,
            sync_cls_avg_factor=False,
            positional_encoding=dict(type='SinePositionalEncoding',
                                     num_feats=128,
                                     normalize=True),
            loss_cls=dict(type='CrossEntropyLoss',
                          bg_cls_weight=0.1,
                          use_sigmoid=False,
                          loss_weight=1.0,
                          class_weight=1.0),
            loss_bbox=dict(type='L1Loss', loss_weight=5.0),
            loss_iou=dict(type='GIoULoss', loss_weight=2.0),
            train_cfg=dict(assigner=dict(
                type='HungarianAssigner',
                cls_cost=dict(type='ClassificationCost', weight=1.),
                reg_cost=dict(type='BBoxL1Cost', weight=5.0),
                iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))),
            test_cfg=dict(max_per_img=100),
            init_cfg=None,
            **kwargs):
        # NOTE here use `AnchorFreeHead` instead of `TransformerHead`,
        # since it brings inconvenience when the initialization of
        # `AnchorFreeHead` is called.
        super(AnchorFreeHead, self).__init__(init_cfg)
        self.bg_cls_weight = 0
        self.sync_cls_avg_factor = sync_cls_avg_factor
        class_weight = loss_cls.get('class_weight', None)
        if class_weight is not None and (self.__class__ is DETRHead):
            assert isinstance(class_weight, float), 'Expected ' \
                'class_weight to have type float. Found ' \
                f'{type(class_weight)}.'
            # NOTE following the official DETR rep0, bg_cls_weight means
            # relative classification weight of the no-object class.
            bg_cls_weight = loss_cls.get('bg_cls_weight', class_weight)
            assert isinstance(bg_cls_weight, float), 'Expected ' \
                'bg_cls_weight to have type float. Found ' \
                f'{type(bg_cls_weight)}.'
            class_weight = torch.ones(num_classes + 1) * class_weight
            # set background class as the last indice
            class_weight[num_classes] = bg_cls_weight
            loss_cls.update({'class_weight': class_weight})
            if 'bg_cls_weight' in loss_cls:
                loss_cls.pop('bg_cls_weight')
            self.bg_cls_weight = bg_cls_weight

        if train_cfg:
            assert 'assigner' in train_cfg, 'assigner should be provided '\
                'when train_cfg is set.'
            assigner = train_cfg['assigner']
            assert loss_cls['loss_weight'] == assigner['cls_cost']['weight'], \
                'The classification weight for loss and matcher should be' \
                'exactly the same.'
            assert loss_bbox['loss_weight'] == assigner['reg_cost'][
                'weight'], 'The regression L1 weight for loss and matcher ' \
                'should be exactly the same.'
            assert loss_iou['loss_weight'] == assigner['iou_cost']['weight'], \
                'The regression iou weight for loss and matcher should be' \
                'exactly the same.'
            self.assigner = build_assigner(assigner)
            # DETR sampling=False, so use PseudoSampler
            sampler_cfg = dict(type='PseudoSampler')
            self.sampler = build_sampler(sampler_cfg, context=self)
        self.num_query = num_query
        self.num_classes = num_classes
        self.in_channels = in_channels
        self.num_reg_fcs = num_reg_fcs
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self.fp16_enabled = False
        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)
        self.loss_iou = build_loss(loss_iou)

        if self.loss_cls.use_sigmoid:
            self.cls_out_channels = num_classes
        else:
            self.cls_out_channels = num_classes + 1
        self.act_cfg = transformer.get('act_cfg',
                                       dict(type='ReLU', inplace=True))
        self.activate = build_activation_layer(self.act_cfg)
        self.positional_encoding = build_positional_encoding(
            positional_encoding)
        self.transformer = build_transformer(transformer)
        self.embed_dims = self.transformer.embed_dims
        assert 'num_feats' in positional_encoding
        num_feats = positional_encoding['num_feats']
        assert num_feats * 2 == self.embed_dims, 'embed_dims should' \
            f' be exactly 2 times of num_feats. Found {self.embed_dims}' \
            f' and {num_feats}.'
        self._init_layers()
Ejemplo n.º 10
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 groups=3,
                 first_block=True,
                 combine='add',
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 act_cfg=dict(type='ReLU'),
                 with_cp=False):
        # Protect mutable default arguments
        norm_cfg = copy.deepcopy(norm_cfg)
        act_cfg = copy.deepcopy(act_cfg)
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.first_block = first_block
        self.combine = combine
        self.groups = groups
        self.bottleneck_channels = self.out_channels // 4
        self.with_cp = with_cp

        if self.combine == 'add':
            self.depthwise_stride = 1
            self._combine_func = self._add
            assert in_channels == out_channels, (
                'in_channels must be equal to out_channels when combine '
                'is add')
        elif self.combine == 'concat':
            self.depthwise_stride = 2
            self._combine_func = self._concat
            self.out_channels -= self.in_channels
            self.avgpool = nn.AvgPool2d(kernel_size=3, stride=2, padding=1)
        else:
            raise ValueError(f'Cannot combine tensors with {self.combine}. '
                             'Only "add" and "concat" are supported')

        self.first_1x1_groups = 1 if first_block else self.groups
        self.g_conv_1x1_compress = ConvModule(
            in_channels=self.in_channels,
            out_channels=self.bottleneck_channels,
            kernel_size=1,
            groups=self.first_1x1_groups,
            conv_cfg=conv_cfg,
            norm_cfg=norm_cfg,
            act_cfg=act_cfg)

        self.depthwise_conv3x3_bn = ConvModule(
            in_channels=self.bottleneck_channels,
            out_channels=self.bottleneck_channels,
            kernel_size=3,
            stride=self.depthwise_stride,
            padding=1,
            groups=self.bottleneck_channels,
            conv_cfg=conv_cfg,
            norm_cfg=norm_cfg,
            act_cfg=None)

        self.g_conv_1x1_expand = ConvModule(
            in_channels=self.bottleneck_channels,
            out_channels=self.out_channels,
            kernel_size=1,
            groups=self.groups,
            conv_cfg=conv_cfg,
            norm_cfg=norm_cfg,
            act_cfg=None)

        self.act = build_activation_layer(act_cfg)
Ejemplo n.º 11
0
    def __init__(self,
                 num_classes=150,
                 num_ffn_fcs=2,
                 num_heads=8,
                 num_mask_fcs=3,
                 feedforward_channels=2048,
                 in_channels=256,
                 out_channels=256,
                 dropout=0.0,
                 act_cfg=dict(type='ReLU', inplace=True),
                 ffn_act_cfg=dict(type='ReLU', inplace=True),
                 conv_kernel_size=1,
                 feat_transform_cfg=None,
                 kernel_init=False,
                 with_ffn=True,
                 feat_gather_stride=1,
                 mask_transform_stride=1,
                 kernel_updator_cfg=dict(type='DynamicConv',
                                         in_channels=256,
                                         feat_channels=64,
                                         out_channels=256,
                                         act_cfg=dict(type='ReLU',
                                                      inplace=True),
                                         norm_cfg=dict(type='LN'))):
        super(KernelUpdateHead, self).__init__()
        self.num_classes = num_classes
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.fp16_enabled = False
        self.dropout = dropout
        self.num_heads = num_heads
        self.kernel_init = kernel_init
        self.with_ffn = with_ffn
        self.conv_kernel_size = conv_kernel_size
        self.feat_gather_stride = feat_gather_stride
        self.mask_transform_stride = mask_transform_stride

        self.attention = MultiheadAttention(in_channels * conv_kernel_size**2,
                                            num_heads, dropout)
        self.attention_norm = build_norm_layer(
            dict(type='LN'), in_channels * conv_kernel_size**2)[1]
        self.kernel_update_conv = build_transformer_layer(kernel_updator_cfg)

        if feat_transform_cfg is not None:
            kernel_size = feat_transform_cfg.pop('kernel_size', 1)
            transform_channels = in_channels
            self.feat_transform = ConvModule(transform_channels,
                                             in_channels,
                                             kernel_size,
                                             stride=feat_gather_stride,
                                             padding=int(feat_gather_stride //
                                                         2),
                                             **feat_transform_cfg)
        else:
            self.feat_transform = None

        if self.with_ffn:
            self.ffn = FFN(in_channels,
                           feedforward_channels,
                           num_ffn_fcs,
                           act_cfg=ffn_act_cfg,
                           dropout=dropout)
            self.ffn_norm = build_norm_layer(dict(type='LN'), in_channels)[1]

        self.mask_fcs = nn.ModuleList()
        for _ in range(num_mask_fcs):
            self.mask_fcs.append(
                nn.Linear(in_channels, in_channels, bias=False))
            self.mask_fcs.append(
                build_norm_layer(dict(type='LN'), in_channels)[1])
            self.mask_fcs.append(build_activation_layer(act_cfg))

        self.fc_mask = nn.Linear(in_channels, out_channels)
Ejemplo n.º 12
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 num_kernels,
                 norm_cfg=dict(type='BN2d', momentum=0.1),
                 act_cfg=dict(type='ReLU', inplace=True),
                 scorenet_input='w_neighbor_dist',
                 weight_bank_init='kaiming',
                 kernel_input='w_neighbor',
                 scorenet_cfg=dict(mlp_channels=[8, 16, 16],
                                   score_norm='softmax',
                                   temp_factor=1.0,
                                   last_bn=False)):
        super(PAConv, self).__init__()

        # determine weight kernel size according to used features
        if kernel_input == 'identity':
            # only use grouped_features
            self.kernel_mul = 1
        elif kernel_input == 'w_neighbor':
            # concat of (grouped_features - center_features, grouped_features)
            self.kernel_mul = 2
        else:
            raise NotImplementedError(
                f'unsupported kernel_input {kernel_input}')
        self.kernel_input = kernel_input

        # determine mlp channels in ScoreNet according to used xyz features
        if scorenet_input == 'identity':
            # only use relative position (grouped_xyz - center_xyz)
            self.scorenet_in_channels = 3
        elif scorenet_input == 'w_neighbor':
            # (grouped_xyz - center_xyz, grouped_xyz)
            self.scorenet_in_channels = 6
        elif scorenet_input == 'w_neighbor_dist':
            # (center_xyz, grouped_xyz - center_xyz, Euclidian distance)
            self.scorenet_in_channels = 7
        else:
            raise NotImplementedError(
                f'unsupported scorenet_input {scorenet_input}')
        self.scorenet_input = scorenet_input

        # construct weight kernels in weight bank
        # self.weight_bank is of shape [C, num_kernels * out_c]
        # where C can be in_c or (2 * in_c)
        if weight_bank_init == 'kaiming':
            weight_init = nn.init.kaiming_normal_
        elif weight_bank_init == 'xavier':
            weight_init = nn.init.xavier_normal_
        else:
            raise NotImplementedError(
                f'unsupported weight bank init method {weight_bank_init}')

        self.m = num_kernels
        weight_bank = weight_init(
            torch.empty(self.m, in_channels * self.kernel_mul, out_channels))
        weight_bank = weight_bank.permute(1, 0, 2).reshape(
            in_channels * self.kernel_mul, self.m * out_channels).contiguous()
        self.weight_bank = nn.Parameter(weight_bank, requires_grad=True)

        # construct ScoreNet
        scorenet_cfg_ = copy.deepcopy(scorenet_cfg)
        scorenet_cfg_['mlp_channels'].insert(0, self.scorenet_in_channels)
        scorenet_cfg_['mlp_channels'].append(self.m)
        self.scorenet = ScoreNet(**scorenet_cfg_)

        self.bn = build_norm_layer(norm_cfg, out_channels)[1] if \
            norm_cfg is not None else None
        self.activate = build_activation_layer(act_cfg) if \
            act_cfg is not None else None

        self.init_weights()
Ejemplo n.º 13
0
    def __init__(self,
                 input_scale,
                 num_classes=0,
                 base_channels=128,
                 input_channels=3,
                 attention_cfg=dict(type='SelfAttentionBlock'),
                 attention_after_nth_block=-1,
                 channels_cfg=None,
                 downsample_cfg=None,
                 from_rgb_cfg=dict(type='SNGANDiscHeadResBlock'),
                 blocks_cfg=dict(type='SNGANDiscResBlock'),
                 act_cfg=dict(type='ReLU'),
                 with_spectral_norm=True,
                 sn_eps=1e-12,
                 init_cfg=dict(type='BigGAN'),
                 pretrained=None):

        super().__init__()

        self.init_type = init_cfg.get('type', None)

        # add SN options and activation function options to cfg
        self.from_rgb_cfg = deepcopy(from_rgb_cfg)
        self.from_rgb_cfg.setdefault('act_cfg', act_cfg)
        self.from_rgb_cfg.setdefault('with_spectral_norm', with_spectral_norm)
        self.from_rgb_cfg.setdefault('init_cfg', init_cfg)

        # add SN options and activation function options to cfg
        self.blocks_cfg = deepcopy(blocks_cfg)
        self.blocks_cfg.setdefault('act_cfg', act_cfg)
        self.blocks_cfg.setdefault('with_spectral_norm', with_spectral_norm)
        self.blocks_cfg.setdefault('sn_eps', sn_eps)
        self.blocks_cfg.setdefault('init_cfg', init_cfg)

        channels_cfg = deepcopy(self._defualt_channels_cfg) \
            if channels_cfg is None else deepcopy(channels_cfg)
        if isinstance(channels_cfg, dict):
            if input_scale not in channels_cfg:
                raise KeyError(f'`input_scale={input_scale} is not found in '
                               '`channel_cfg`, only support configs for '
                               f'{[chn for chn in channels_cfg.keys()]}')
            self.channel_factor_list = channels_cfg[input_scale]
        elif isinstance(channels_cfg, list):
            self.channel_factor_list = channels_cfg
        else:
            raise ValueError('Only support list or dict for `channel_cfg`, '
                             f'receive {type(channels_cfg)}')

        downsample_cfg = deepcopy(self._defualt_downsample_cfg) \
            if downsample_cfg is None else deepcopy(downsample_cfg)
        if isinstance(downsample_cfg, dict):
            if input_scale not in downsample_cfg:
                raise KeyError(f'`output_scale={input_scale} is not found in '
                               '`downsample_cfg`, only support configs for '
                               f'{[chn for chn in downsample_cfg.keys()]}')
            self.downsample_list = downsample_cfg[input_scale]
        elif isinstance(downsample_cfg, list):
            self.downsample_list = downsample_cfg
        else:
            raise ValueError('Only support list or dict for `channel_cfg`, '
                             f'receive {type(downsample_cfg)}')

        if len(self.downsample_list) != len(self.channel_factor_list):
            raise ValueError('`downsample_cfg` should have same length with '
                             '`channels_cfg`, but receive '
                             f'{len(self.downsample_list)} and '
                             f'{len(self.channel_factor_list)}.')

        # check `attention_after_nth_block`
        if not isinstance(attention_after_nth_block, list):
            attention_after_nth_block = [attention_after_nth_block]
        if not all([isinstance(idx, int)
                    for idx in attention_after_nth_block]):
            raise ValueError('`attention_after_nth_block` only support int or '
                             'a list of int. Please check your input type.')

        self.from_rgb = build_module(
            self.from_rgb_cfg,
            dict(in_channels=input_channels, out_channels=base_channels))

        self.conv_blocks = nn.ModuleList()
        # add self-attention block after the first block
        if 1 in attention_after_nth_block:
            attn_cfg_ = deepcopy(attention_cfg)
            attn_cfg_['in_channels'] = base_channels
            self.conv_blocks.append(build_module(attn_cfg_))

        for idx in range(len(self.downsample_list)):
            factor_input = 1 if idx == 0 else self.channel_factor_list[idx - 1]
            factor_output = self.channel_factor_list[idx]

            # get block-specific config
            block_cfg_ = deepcopy(self.blocks_cfg)
            block_cfg_['downsample'] = self.downsample_list[idx]
            block_cfg_['in_channels'] = factor_input * base_channels
            block_cfg_['out_channels'] = factor_output * base_channels
            self.conv_blocks.append(build_module(block_cfg_))

            # build self-attention block
            # the first ConvBlock is `from_rgb` block,
            # add 2 to get the index of the ConvBlocks
            if idx + 2 in attention_after_nth_block:
                attn_cfg_ = deepcopy(attention_cfg)
                attn_cfg_['in_channels'] = factor_output * base_channels
                self.conv_blocks.append(build_module(attn_cfg_))

        self.decision = nn.Linear(factor_output * base_channels, 1)

        if with_spectral_norm:
            self.decision = spectral_norm(self.decision)

        self.num_classes = num_classes

        # In this case, discriminator is designed for conditional synthesis.
        if num_classes > 0:
            self.proj_y = nn.Embedding(num_classes,
                                       factor_output * base_channels)
            if with_spectral_norm:
                self.proj_y = spectral_norm(self.proj_y)

        self.activate = build_activation_layer(act_cfg)
        self.init_weights(pretrained)
Ejemplo n.º 14
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 stride=1,
                 padding=1,
                 dilation=1,
                 groups=1,
                 padding_mode='zeros',
                 se_cfg=None,
                 with_cp=False,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 act_cfg=dict(type='ReLU'),
                 deploy=False,
                 init_cfg=None):
        super(RepVGGBlock, self).__init__(init_cfg)

        assert se_cfg is None or isinstance(se_cfg, dict)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
        self.groups = groups
        self.se_cfg = se_cfg
        self.with_cp = with_cp
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.act_cfg = act_cfg
        self.deploy = deploy

        if deploy:
            self.branch_reparam = build_conv_layer(conv_cfg,
                                                   in_channels=in_channels,
                                                   out_channels=out_channels,
                                                   kernel_size=3,
                                                   stride=stride,
                                                   padding=padding,
                                                   dilation=dilation,
                                                   groups=groups,
                                                   bias=True,
                                                   padding_mode=padding_mode)
        else:
            # judge if input shape and output shape are the same.
            # If true, add a normalized identity shortcut.
            if out_channels == in_channels and stride == 1 and \
                    padding == dilation:
                self.branch_norm = build_norm_layer(norm_cfg, in_channels)[1]
            else:
                self.branch_norm = None

            self.branch_3x3 = self.create_conv_bn(
                kernel_size=3,
                dilation=dilation,
                padding=padding,
            )
            self.branch_1x1 = self.create_conv_bn(kernel_size=1)

        if se_cfg is not None:
            self.se_layer = SELayer(channels=out_channels, **se_cfg)
        else:
            self.se_layer = None

        self.act = build_activation_layer(act_cfg)
Ejemplo n.º 15
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 hidden_channels=None,
                 num_classes=0,
                 use_cbn=True,
                 use_norm_affine=False,
                 act_cfg=dict(type='ReLU'),
                 norm_cfg=dict(type='BN'),
                 upsample_cfg=dict(type='nearest', scale_factor=2),
                 upsample=True,
                 auto_sync_bn=True,
                 conv_cfg=None,
                 with_spectral_norm=False,
                 with_embedding_spectral_norm=None,
                 sn_style='torch',
                 norm_eps=1e-4,
                 sn_eps=1e-12,
                 init_cfg=dict(type='BigGAN')):

        super().__init__()
        self.learnable_sc = in_channels != out_channels or upsample
        self.with_upsample = upsample
        self.init_type = init_cfg.get('type', None)

        self.activate = build_activation_layer(act_cfg)
        hidden_channels = out_channels if hidden_channels is None \
            else hidden_channels

        if self.with_upsample:
            self.upsample = build_upsample_layer(upsample_cfg)

        self.conv_cfg = deepcopy(self._default_conv_cfg)
        if conv_cfg is not None:
            self.conv_cfg.update(conv_cfg)

        # set `norm_spectral_norm` as `with_spectral_norm` if not defined
        with_embedding_spectral_norm = with_embedding_spectral_norm \
            if with_embedding_spectral_norm is not None else with_spectral_norm

        sn_cfg = dict(eps=sn_eps, sn_style=sn_style)
        self.conv_1 = SNConvModule(
            in_channels,
            hidden_channels,
            with_spectral_norm=with_spectral_norm,
            spectral_norm_cfg=sn_cfg,
            **self.conv_cfg)
        self.conv_2 = SNConvModule(
            hidden_channels,
            out_channels,
            with_spectral_norm=with_spectral_norm,
            spectral_norm_cfg=sn_cfg,
            **self.conv_cfg)

        self.norm_1 = SNConditionNorm(in_channels, num_classes, use_cbn,
                                      norm_cfg, use_norm_affine, auto_sync_bn,
                                      with_embedding_spectral_norm, sn_style,
                                      norm_eps, sn_eps, init_cfg)
        self.norm_2 = SNConditionNorm(hidden_channels, num_classes, use_cbn,
                                      norm_cfg, use_norm_affine, auto_sync_bn,
                                      with_embedding_spectral_norm, sn_style,
                                      norm_eps, sn_eps, init_cfg)

        if self.learnable_sc:
            # use hyperparameters-fixed shortcut here
            self.shortcut = SNConvModule(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=1,
                padding=0,
                act_cfg=None,
                with_spectral_norm=with_spectral_norm,
                spectral_norm_cfg=sn_cfg)
        self.init_weights()
Ejemplo n.º 16
0
    def __init__(self,
                 output_scale,
                 num_classes=0,
                 base_channels=64,
                 out_channels=3,
                 input_scale=4,
                 noise_size=128,
                 attention_cfg=dict(type='SelfAttentionBlock'),
                 attention_after_nth_block=0,
                 channels_cfg=None,
                 blocks_cfg=dict(type='SNGANGenResBlock'),
                 act_cfg=dict(type='ReLU'),
                 use_cbn=True,
                 auto_sync_bn=True,
                 with_spectral_norm=False,
                 with_embedding_spectral_norm=None,
                 norm_eps=1e-4,
                 sn_eps=1e-12,
                 init_cfg=dict(type='BigGAN'),
                 pretrained=None):

        super().__init__()

        self.input_scale = input_scale
        self.output_scale = output_scale
        self.noise_size = noise_size
        self.num_classes = num_classes
        self.init_type = init_cfg.get('type', None)

        self.blocks_cfg = deepcopy(blocks_cfg)

        self.blocks_cfg.setdefault('num_classes', num_classes)
        self.blocks_cfg.setdefault('act_cfg', act_cfg)
        self.blocks_cfg.setdefault('use_cbn', use_cbn)
        self.blocks_cfg.setdefault('auto_sync_bn', auto_sync_bn)
        self.blocks_cfg.setdefault('with_spectral_norm', with_spectral_norm)

        # set `norm_spectral_norm` as `with_spectral_norm` if not defined
        with_embedding_spectral_norm = with_embedding_spectral_norm \
            if with_embedding_spectral_norm is not None else with_spectral_norm
        self.blocks_cfg.setdefault('with_embedding_spectral_norm',
                                   with_embedding_spectral_norm)
        self.blocks_cfg.setdefault('init_cfg', init_cfg)
        self.blocks_cfg.setdefault('norm_eps', norm_eps)
        self.blocks_cfg.setdefault('sn_eps', sn_eps)

        channels_cfg = deepcopy(self._default_channels_cfg) \
            if channels_cfg is None else deepcopy(channels_cfg)
        if isinstance(channels_cfg, dict):
            if output_scale not in channels_cfg:
                raise KeyError(f'`output_scale={output_scale} is not found in '
                               '`channel_cfg`, only support configs for '
                               f'{[chn for chn in channels_cfg.keys()]}')
            self.channel_factor_list = channels_cfg[output_scale]
        elif isinstance(channels_cfg, list):
            self.channel_factor_list = channels_cfg
        else:
            raise ValueError('Only support list or dict for `channel_cfg`, '
                             f'receive {type(channels_cfg)}')

        self.noise2feat = nn.Linear(
            noise_size,
            input_scale**2 * base_channels * self.channel_factor_list[0])
        if with_spectral_norm:
            self.noise2feat = spectral_norm(self.noise2feat)

        # check `attention_after_nth_block`
        if not isinstance(attention_after_nth_block, list):
            attention_after_nth_block = [attention_after_nth_block]
        if not is_list_of(attention_after_nth_block, int):
            raise ValueError('`attention_after_nth_block` only support int or '
                             'a list of int. Please check your input type.')

        self.conv_blocks = nn.ModuleList()
        self.attention_block_idx = []
        for idx in range(len(self.channel_factor_list)):
            factor_input = self.channel_factor_list[idx]
            factor_output = self.channel_factor_list[idx+1] \
                if idx < len(self.channel_factor_list)-1 else 1

            # get block-specific config
            block_cfg_ = deepcopy(self.blocks_cfg)
            block_cfg_['in_channels'] = factor_input * base_channels
            block_cfg_['out_channels'] = factor_output * base_channels
            self.conv_blocks.append(build_module(block_cfg_))

            # build self-attention block
            # `idx` is start from 0, add 1 to get the index
            if idx + 1 in attention_after_nth_block:
                self.attention_block_idx.append(len(self.conv_blocks))
                attn_cfg_ = deepcopy(attention_cfg)
                attn_cfg_['in_channels'] = factor_output * base_channels
                self.conv_blocks.append(build_module(attn_cfg_))

        to_rgb_norm_cfg = dict(type='BN', eps=norm_eps)
        if check_dist_init() and auto_sync_bn:
            to_rgb_norm_cfg['type'] = 'SyncBN'

        self.to_rgb = ConvModule(factor_output * base_channels,
                                 out_channels,
                                 kernel_size=3,
                                 stride=1,
                                 padding=1,
                                 bias=True,
                                 norm_cfg=to_rgb_norm_cfg,
                                 act_cfg=act_cfg,
                                 order=('norm', 'act', 'conv'),
                                 with_spectral_norm=with_spectral_norm)
        self.final_act = build_activation_layer(dict(type='Tanh'))

        self.init_weights(pretrained)
Ejemplo n.º 17
0
    def __init__(self,
                 num_classes,
                 in_channels,
                 num_fcs=2,
                 transformer=dict(
                     type='Transformer',
                     embed_dims=256,
                     num_heads=8,
                     num_encoder_layers=6,
                     num_decoder_layers=6,
                     feedforward_channels=2048,
                     dropout=0.1,
                     act_cfg=dict(type='ReLU', inplace=True),
                     norm_cfg=dict(type='LN'),
                     num_fcs=2,
                     pre_norm=False,
                     return_intermediate_dec=True),
                 positional_encoding=dict(
                     type='SinePositionalEncoding',
                     num_feats=128,
                     normalize=True),
                 loss_cls=dict(
                     type='CrossEntropyLoss',
                     bg_cls_weight=0.1,
                     use_sigmoid=False,
                     loss_weight=1.0,
                     class_weight=1.0),
                 loss_bbox=dict(type='L1Loss', loss_weight=5.0),
                 loss_iou=dict(type='GIoULoss', loss_weight=2.0),
                 train_cfg=dict(
                     assigner=dict(
                         type='HungarianAssigner',
                         cls_weight=1.,
                         bbox_weight=5.,
                         iou_weight=2.,
                         iou_calculator=dict(type='BboxOverlaps2D'),
                         iou_mode='giou')),
                 test_cfg=dict(max_per_img=100),
                 **kwargs):
        # NOTE here use `AnchorFreeHead` instead of `TransformerHead`,
        # since it brings inconvenience when the initialization of
        # `AnchorFreeHead` is called.
        super(AnchorFreeHead, self).__init__()
        use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
        assert not use_sigmoid_cls, 'setting use_sigmoid_cls as True is ' \
            'not supported in DETR, since background is needed for the ' \
            'matching process.'
        assert 'embed_dims' in transformer \
            and 'num_feats' in positional_encoding
        num_feats = positional_encoding['num_feats']
        embed_dims = transformer['embed_dims']
        assert num_feats * 2 == embed_dims, 'embed_dims should' \
            f' be exactly 2 times of num_feats. Found {embed_dims}' \
            f' and {num_feats}.'
        assert test_cfg is not None and 'max_per_img' in test_cfg

        class_weight = loss_cls.get('class_weight', None)
        if class_weight is not None:
            assert isinstance(class_weight, float), 'Expected ' \
                'class_weight to have type float. Found ' \
                f'{type(class_weight)}.'
            # NOTE following the official DETR rep0, bg_cls_weight means
            # relative classification weight of the no-object class.
            bg_cls_weight = loss_cls.get('bg_cls_weight', class_weight)
            assert isinstance(bg_cls_weight, float), 'Expected ' \
                'bg_cls_weight to have type float. Found ' \
                f'{type(bg_cls_weight)}.'
            class_weight = torch.ones(num_classes + 1) * class_weight
            # set background class as the last indice
            class_weight[num_classes] = bg_cls_weight
            loss_cls.update({'class_weight': class_weight})
            if 'bg_cls_weight' in loss_cls:
                loss_cls.pop('bg_cls_weight')
            self.bg_cls_weight = bg_cls_weight

        if train_cfg:
            assert 'assigner' in train_cfg, 'assigner should be provided '\
                'when train_cfg is set.'
            assigner = train_cfg['assigner']
            assert loss_cls['loss_weight'] == assigner['cls_weight'], \
                'The classification weight for loss and matcher should be' \
                'exactly the same.'
            assert loss_bbox['loss_weight'] == assigner['bbox_weight'], \
                'The regression L1 weight for loss and matcher should be' \
                'exactly the same.'
            assert loss_iou['loss_weight'] == assigner['iou_weight'], \
                'The regression iou weight for loss and matcher should be' \
                'exactly the same.'
            self.assigner = build_assigner(assigner)
            # DETR sampling=False, so use PseudoSampler
            sampler_cfg = dict(type='PseudoSampler')
            self.sampler = build_sampler(sampler_cfg, context=self)
        self.num_classes = num_classes
        self.cls_out_channels = num_classes + 1
        self.in_channels = in_channels
        self.num_fcs = num_fcs
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self.use_sigmoid_cls = use_sigmoid_cls
        self.embed_dims = embed_dims
        self.num_query = test_cfg['max_per_img']
        self.fp16_enabled = False
        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)
        self.loss_iou = build_loss(loss_iou)
        self.act_cfg = transformer.get('act_cfg',
                                       dict(type='ReLU', inplace=True))
        self.activate = build_activation_layer(self.act_cfg)
        self.positional_encoding = build_positional_encoding(
            positional_encoding)
        self.transformer = build_transformer(transformer)
        self._init_layers()
Ejemplo n.º 18
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 expansion=4,
                 stride=1,
                 dilation=1,
                 downsample=None,
                 style='pytorch',
                 with_cp=False,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 act_cfg=dict(type='ReLU', inplace=True),
                 drop_path_rate=0.0,
                 init_cfg=None):
        super(Bottleneck, self).__init__(init_cfg=init_cfg)
        assert style in ['pytorch', 'caffe']

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.expansion = expansion
        assert out_channels % expansion == 0
        self.mid_channels = out_channels // expansion
        self.stride = stride
        self.dilation = dilation
        self.style = style
        self.with_cp = with_cp
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg

        if self.style == 'pytorch':
            self.conv1_stride = 1
            self.conv2_stride = stride
        else:
            self.conv1_stride = stride
            self.conv2_stride = 1

        self.norm1_name, norm1 = build_norm_layer(norm_cfg,
                                                  self.mid_channels,
                                                  postfix=1)
        self.norm2_name, norm2 = build_norm_layer(norm_cfg,
                                                  self.mid_channels,
                                                  postfix=2)
        self.norm3_name, norm3 = build_norm_layer(norm_cfg,
                                                  out_channels,
                                                  postfix=3)

        self.conv1 = build_conv_layer(conv_cfg,
                                      in_channels,
                                      self.mid_channels,
                                      kernel_size=1,
                                      stride=self.conv1_stride,
                                      bias=False)
        self.add_module(self.norm1_name, norm1)
        self.conv2 = build_conv_layer(conv_cfg,
                                      self.mid_channels,
                                      self.mid_channels,
                                      kernel_size=3,
                                      stride=self.conv2_stride,
                                      padding=dilation,
                                      dilation=dilation,
                                      bias=False)

        self.add_module(self.norm2_name, norm2)
        self.conv3 = build_conv_layer(conv_cfg,
                                      self.mid_channels,
                                      out_channels,
                                      kernel_size=1,
                                      bias=False)
        self.add_module(self.norm3_name, norm3)

        self.relu = build_activation_layer(act_cfg)
        self.downsample = downsample
        self.drop_path = DropPath(drop_prob=drop_path_rate
                                  ) if drop_path_rate > eps else nn.Identity()
Ejemplo n.º 19
0
    def __init__(self,
                 inplanes,
                 planes,
                 outplanes,
                 spatial_stride=1,
                 downsample=None,
                 se_ratio=None,
                 use_swish=True,
                 conv_cfg=dict(type='Conv3d'),
                 norm_cfg=dict(type='BN3d'),
                 act_cfg=dict(type='ReLU'),
                 with_cp=False):
        super().__init__()

        self.inplanes = inplanes
        self.planes = planes
        self.outplanes = outplanes
        self.spatial_stride = spatial_stride
        self.downsample = downsample
        self.se_ratio = se_ratio
        self.use_swish = use_swish
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.act_cfg = act_cfg
        self.act_cfg_swish = dict(type='Swish')
        self.with_cp = with_cp

        self.conv1 = ConvModule(in_channels=inplanes,
                                out_channels=planes,
                                kernel_size=1,
                                stride=1,
                                padding=0,
                                bias=False,
                                conv_cfg=self.conv_cfg,
                                norm_cfg=self.norm_cfg,
                                act_cfg=self.act_cfg)
        # Here we use the channel-wise conv
        self.conv2 = ConvModule(in_channels=planes,
                                out_channels=planes,
                                kernel_size=3,
                                stride=(1, self.spatial_stride,
                                        self.spatial_stride),
                                padding=1,
                                groups=planes,
                                bias=False,
                                conv_cfg=self.conv_cfg,
                                norm_cfg=self.norm_cfg,
                                act_cfg=None)

        self.swish = Swish()

        self.conv3 = ConvModule(in_channels=planes,
                                out_channels=outplanes,
                                kernel_size=1,
                                stride=1,
                                padding=0,
                                bias=False,
                                conv_cfg=self.conv_cfg,
                                norm_cfg=self.norm_cfg,
                                act_cfg=None)

        if self.se_ratio is not None:
            self.se_module = SEModule(planes, self.se_ratio)

        self.relu = build_activation_layer(self.act_cfg)
Ejemplo n.º 20
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 exp_ratio=6,
                 stride=1,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 act_cfg=dict(type='ReLU'),
                 init_cfg=None):
        super(GELayer, self).__init__(init_cfg=init_cfg)
        mid_channel = in_channels * exp_ratio
        self.conv1 = ConvModule(
            in_channels=in_channels,
            out_channels=in_channels,
            kernel_size=3,
            stride=1,
            padding=1,
            conv_cfg=conv_cfg,
            norm_cfg=norm_cfg,
            act_cfg=act_cfg)
        if stride == 1:
            self.dwconv = nn.Sequential(
                # ReLU in ConvModule not shown in paper
                ConvModule(
                    in_channels=in_channels,
                    out_channels=mid_channel,
                    kernel_size=3,
                    stride=stride,
                    padding=1,
                    groups=in_channels,
                    conv_cfg=conv_cfg,
                    norm_cfg=norm_cfg,
                    act_cfg=act_cfg))
            self.shortcut = None
        else:
            self.dwconv = nn.Sequential(
                ConvModule(
                    in_channels=in_channels,
                    out_channels=mid_channel,
                    kernel_size=3,
                    stride=stride,
                    padding=1,
                    groups=in_channels,
                    bias=False,
                    conv_cfg=conv_cfg,
                    norm_cfg=norm_cfg,
                    act_cfg=None),
                # ReLU in ConvModule not shown in paper
                ConvModule(
                    in_channels=mid_channel,
                    out_channels=mid_channel,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    groups=mid_channel,
                    conv_cfg=conv_cfg,
                    norm_cfg=norm_cfg,
                    act_cfg=act_cfg),
            )
            self.shortcut = nn.Sequential(
                DepthwiseSeparableConvModule(
                    in_channels=in_channels,
                    out_channels=out_channels,
                    kernel_size=3,
                    stride=stride,
                    padding=1,
                    dw_norm_cfg=norm_cfg,
                    dw_act_cfg=None,
                    pw_norm_cfg=norm_cfg,
                    pw_act_cfg=None,
                ))

        self.conv2 = nn.Sequential(
            ConvModule(
                in_channels=mid_channel,
                out_channels=out_channels,
                kernel_size=1,
                stride=1,
                padding=0,
                bias=False,
                conv_cfg=conv_cfg,
                norm_cfg=norm_cfg,
                act_cfg=None,
            ))

        self.act = build_activation_layer(act_cfg)
Ejemplo n.º 21
0
    def __init__(self,
                 inplanes,
                 planes,
                 stride=1,
                 dilation=1,
                 downsample=None,
                 style='pytorch',
                 with_cp=False,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN'),
                 act_cfg=dict(type='ReLU', inplace=True)):
        """Bottleneck block for ResNet.
        If style is "pytorch", the stride-two layer is the 3x3 conv layer,
        if it is "caffe", the stride-two layer is the first 1x1 conv layer.
        """
        super(Bottleneck, self).__init__()
        assert style in ['pytorch', 'caffe']

        self.inplanes = inplanes
        self.planes = planes
        self.stride = stride
        self.dilation = dilation
        self.style = style
        self.with_cp = with_cp
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg

        if self.style == 'pytorch':
            self.conv1_stride = 1
            self.conv2_stride = stride
        else:
            self.conv1_stride = stride
            self.conv2_stride = 1

        self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1)
        self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2)
        self.norm3_name, norm3 = build_norm_layer(norm_cfg,
                                                  planes * self.expansion,
                                                  postfix=3)

        self.conv1 = build_conv_layer(conv_cfg,
                                      inplanes,
                                      planes,
                                      kernel_size=1,
                                      stride=self.conv1_stride,
                                      bias=False)
        self.add_module(self.norm1_name, norm1)
        self.conv2 = build_conv_layer(conv_cfg,
                                      planes,
                                      planes,
                                      kernel_size=3,
                                      stride=self.conv2_stride,
                                      padding=dilation,
                                      dilation=dilation,
                                      bias=False)
        self.add_module(self.norm2_name, norm2)
        self.conv3 = build_conv_layer(conv_cfg,
                                      planes,
                                      planes * self.expansion,
                                      kernel_size=1,
                                      bias=False)
        self.add_module(self.norm3_name, norm3)

        self.relu = build_activation_layer(act_cfg)
        self.downsample = downsample
Ejemplo n.º 22
0
    def __init__(self,
                 num_classes=80,
                 num_ffn_fcs=2,
                 num_heads=8,
                 num_cls_fcs=1,
                 num_reg_fcs=3,
                 feedforward_channels=2048,
                 in_channels=256,
                 dropout=0.0,
                 ffn_act_cfg=dict(type='ReLU', inplace=True),
                 dynamic_conv_cfg=dict(type='DynamicConv',
                                       in_channels=256,
                                       feat_channels=64,
                                       out_channels=256,
                                       input_feat_shape=7,
                                       act_cfg=dict(type='ReLU', inplace=True),
                                       norm_cfg=dict(type='LN')),
                 loss_iou=dict(type='GIoULoss', loss_weight=2.0),
                 init_cfg=None,
                 **kwargs):
        assert init_cfg is None, 'To prevent abnormal initialization ' \
                                 'behavior, init_cfg is not allowed to be set'
        super(DIIHead, self).__init__(num_classes=num_classes,
                                      reg_decoded_bbox=True,
                                      reg_class_agnostic=True,
                                      init_cfg=init_cfg,
                                      **kwargs)
        self.loss_iou = build_loss(loss_iou)
        self.in_channels = in_channels
        self.fp16_enabled = False
        self.attention = MultiheadAttention(in_channels, num_heads, dropout)
        self.attention_norm = build_norm_layer(dict(type='LN'), in_channels)[1]

        self.instance_interactive_conv = build_transformer(dynamic_conv_cfg)
        self.instance_interactive_conv_dropout = nn.Dropout(dropout)
        self.instance_interactive_conv_norm = build_norm_layer(
            dict(type='LN'), in_channels)[1]

        self.ffn = FFN(in_channels,
                       feedforward_channels,
                       num_ffn_fcs,
                       act_cfg=ffn_act_cfg,
                       dropout=dropout)
        self.ffn_norm = build_norm_layer(dict(type='LN'), in_channels)[1]

        self.cls_fcs = nn.ModuleList()
        for _ in range(num_cls_fcs):
            self.cls_fcs.append(nn.Linear(in_channels, in_channels,
                                          bias=False))
            self.cls_fcs.append(
                build_norm_layer(dict(type='LN'), in_channels)[1])
            self.cls_fcs.append(
                build_activation_layer(dict(type='ReLU', inplace=True)))

        # over load the self.fc_cls in BBoxHead
        if self.loss_cls.use_sigmoid:
            self.fc_cls = nn.Linear(in_channels, self.num_classes)
        else:
            self.fc_cls = nn.Linear(in_channels, self.num_classes + 1)

        self.reg_fcs = nn.ModuleList()
        for _ in range(num_reg_fcs):
            self.reg_fcs.append(nn.Linear(in_channels, in_channels,
                                          bias=False))
            self.reg_fcs.append(
                build_norm_layer(dict(type='LN'), in_channels)[1])
            self.reg_fcs.append(
                build_activation_layer(dict(type='ReLU', inplace=True)))
        # over load the self.fc_cls in BBoxHead
        self.fc_reg = nn.Linear(in_channels, 4)

        assert self.reg_class_agnostic, 'DIIHead only ' \
            'suppport `reg_class_agnostic=True` '
        assert self.reg_decoded_bbox, 'DIIHead only ' \
            'suppport `reg_decoded_bbox=True`'
Ejemplo n.º 23
0
 def _extra_norm_ac(self, norm_cfg, num_features):
     return nn.Sequential(
         build_norm_layer(norm_cfg, num_features)[1],
         build_activation_layer(self.act_cfg))