예제 #1
0
    def __init__(self,
                 embed_dims,
                 num_heads,
                 ffn_ratio=4,
                 drop_rate=0.,
                 attn_drop_rate=0.,
                 drop_path_rate=0.,
                 num_fcs=2,
                 qkv_bias=False,
                 act_cfg=dict(type='GELU'),
                 norm_cfg=dict(type='LN'),
                 batch_first=True,
                 init_cfg=None):
        super(TransformerBlock, self).__init__(init_cfg=init_cfg)

        self.norm_attn = build_norm_layer(norm_cfg, embed_dims)[1]
        self.attn = MultiheadAttention(
            embed_dims=embed_dims,
            num_heads=num_heads,
            attn_drop=attn_drop_rate,
            proj_drop=drop_rate,
            dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate),
            batch_first=batch_first)

        self.norm_ffn = build_norm_layer(norm_cfg, embed_dims)[1]
        self.ffn = FFN(
            embed_dims=embed_dims,
            feedforward_channels=embed_dims * ffn_ratio,
            num_fcs=num_fcs,
            ffn_drop=drop_rate,
            dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate),
            act_cfg=act_cfg)

        if not qkv_bias:
            self.attn.attn.in_proj_bias = None
예제 #2
0
    def __init__(self,
                 embed_dims,
                 num_heads,
                 feedforward_channels,
                 drop_rate=0.,
                 attn_drop_rate=0.,
                 drop_path_rate=0.,
                 num_fcs=2,
                 qkv_bias=True,
                 act_cfg=dict(type='GELU'),
                 norm_cfg=dict(type='LN'),
                 batch_first=True):
        super(TransformerEncoderLayer, self).__init__()

        self.norm1_name, norm1 = build_norm_layer(norm_cfg,
                                                  embed_dims,
                                                  postfix=1)
        self.add_module(self.norm1_name, norm1)

        self.attn = MultiheadAttention(embed_dims=embed_dims,
                                       num_heads=num_heads,
                                       attn_drop=attn_drop_rate,
                                       proj_drop=drop_rate,
                                       dropout_layer=dict(
                                           type='DropPath',
                                           drop_prob=drop_path_rate),
                                       batch_first=batch_first,
                                       bias=qkv_bias)

        self.norm2_name, norm2 = build_norm_layer(norm_cfg,
                                                  embed_dims,
                                                  postfix=2)
        self.add_module(self.norm2_name, norm2)

        self.ffn = FFN(embed_dims=embed_dims,
                       feedforward_channels=feedforward_channels,
                       num_fcs=num_fcs,
                       ffn_drop=drop_rate,
                       dropout_layer=dict(type='DropPath',
                                          drop_prob=drop_path_rate),
                       act_cfg=act_cfg)
예제 #3
0
    def __init__(self,
                 num_classes=80,
                 num_ffn_fcs=2,
                 num_heads=8,
                 num_cls_fcs=1,
                 num_reg_fcs=3,
                 feedforward_channels=2048,
                 in_channels=256,
                 dropout=0.0,
                 ffn_act_cfg=dict(type='ReLU', inplace=True),
                 dynamic_conv_cfg=dict(type='DynamicConv',
                                       in_channels=256,
                                       feat_channels=64,
                                       out_channels=256,
                                       input_feat_shape=7,
                                       act_cfg=dict(type='ReLU', inplace=True),
                                       norm_cfg=dict(type='LN')),
                 loss_iou=dict(type='GIoULoss', loss_weight=2.0),
                 init_cfg=None,
                 **kwargs):
        assert init_cfg is None, 'To prevent abnormal initialization ' \
                                 'behavior, init_cfg is not allowed to be set'
        super(DIIHead, self).__init__(num_classes=num_classes,
                                      reg_decoded_bbox=True,
                                      reg_class_agnostic=True,
                                      init_cfg=init_cfg,
                                      **kwargs)
        self.loss_iou = build_loss(loss_iou)
        self.in_channels = in_channels
        self.fp16_enabled = False
        self.attention = MultiheadAttention(in_channels, num_heads, dropout)
        self.attention_norm = build_norm_layer(dict(type='LN'), in_channels)[1]

        self.instance_interactive_conv = build_transformer(dynamic_conv_cfg)
        self.instance_interactive_conv_dropout = nn.Dropout(dropout)
        self.instance_interactive_conv_norm = build_norm_layer(
            dict(type='LN'), in_channels)[1]

        self.ffn = FFN(in_channels,
                       feedforward_channels,
                       num_ffn_fcs,
                       act_cfg=ffn_act_cfg,
                       dropout=dropout)
        self.ffn_norm = build_norm_layer(dict(type='LN'), in_channels)[1]

        self.cls_fcs = nn.ModuleList()
        for _ in range(num_cls_fcs):
            self.cls_fcs.append(nn.Linear(in_channels, in_channels,
                                          bias=False))
            self.cls_fcs.append(
                build_norm_layer(dict(type='LN'), in_channels)[1])
            self.cls_fcs.append(
                build_activation_layer(dict(type='ReLU', inplace=True)))

        # over load the self.fc_cls in BBoxHead
        if self.loss_cls.use_sigmoid:
            self.fc_cls = nn.Linear(in_channels, self.num_classes)
        else:
            self.fc_cls = nn.Linear(in_channels, self.num_classes + 1)

        self.reg_fcs = nn.ModuleList()
        for _ in range(num_reg_fcs):
            self.reg_fcs.append(nn.Linear(in_channels, in_channels,
                                          bias=False))
            self.reg_fcs.append(
                build_norm_layer(dict(type='LN'), in_channels)[1])
            self.reg_fcs.append(
                build_activation_layer(dict(type='ReLU', inplace=True)))
        # over load the self.fc_cls in BBoxHead
        self.fc_reg = nn.Linear(in_channels, 4)

        assert self.reg_class_agnostic, 'DIIHead only ' \
            'suppport `reg_class_agnostic=True` '
        assert self.reg_decoded_bbox, 'DIIHead only ' \
            'suppport `reg_decoded_bbox=True`'
예제 #4
0
def test_multiheadattention():
    MultiheadAttention(embed_dims=5,
                       num_heads=5,
                       attn_drop=0,
                       proj_drop=0,
                       dropout_layer=dict(type='Dropout', drop_prob=0.),
                       batch_first=True)
    batch_dim = 2
    embed_dim = 5
    num_query = 100
    attn_batch_first = MultiheadAttention(embed_dims=5,
                                          num_heads=5,
                                          attn_drop=0,
                                          proj_drop=0,
                                          dropout_layer=dict(type='DropPath',
                                                             drop_prob=0.),
                                          batch_first=True)

    attn_query_first = MultiheadAttention(embed_dims=5,
                                          num_heads=5,
                                          attn_drop=0,
                                          proj_drop=0,
                                          dropout_layer=dict(type='DropPath',
                                                             drop_prob=0.),
                                          batch_first=False)

    param_dict = dict(attn_query_first.named_parameters())
    for n, v in attn_batch_first.named_parameters():
        param_dict[n].data = v.data

    input_batch_first = torch.rand(batch_dim, num_query, embed_dim)
    input_query_first = input_batch_first.transpose(0, 1)

    assert torch.allclose(
        attn_query_first(input_query_first).sum(),
        attn_batch_first(input_batch_first).sum())

    key_batch_first = torch.rand(batch_dim, num_query, embed_dim)
    key_query_first = key_batch_first.transpose(0, 1)

    assert torch.allclose(
        attn_query_first(input_query_first, key_query_first).sum(),
        attn_batch_first(input_batch_first, key_batch_first).sum())

    identity = torch.ones_like(input_query_first)

    # check deprecated arguments can be used normally

    assert torch.allclose(
        attn_query_first(input_query_first, key_query_first,
                         residual=identity).sum(),
        attn_batch_first(input_batch_first, key_batch_first).sum() +
        identity.sum() - input_batch_first.sum())

    assert torch.allclose(
        attn_query_first(input_query_first, key_query_first,
                         identity=identity).sum(),
        attn_batch_first(input_batch_first, key_batch_first).sum() +
        identity.sum() - input_batch_first.sum())

    attn_query_first(input_query_first, key_query_first,
                     identity=identity).sum(),
예제 #5
0
    def __init__(self,
                 num_classes=150,
                 num_ffn_fcs=2,
                 num_heads=8,
                 num_mask_fcs=3,
                 feedforward_channels=2048,
                 in_channels=256,
                 out_channels=256,
                 dropout=0.0,
                 act_cfg=dict(type='ReLU', inplace=True),
                 ffn_act_cfg=dict(type='ReLU', inplace=True),
                 conv_kernel_size=1,
                 feat_transform_cfg=None,
                 kernel_init=False,
                 with_ffn=True,
                 feat_gather_stride=1,
                 mask_transform_stride=1,
                 kernel_updator_cfg=dict(type='DynamicConv',
                                         in_channels=256,
                                         feat_channels=64,
                                         out_channels=256,
                                         act_cfg=dict(type='ReLU',
                                                      inplace=True),
                                         norm_cfg=dict(type='LN'))):
        super(KernelUpdateHead, self).__init__()
        self.num_classes = num_classes
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.fp16_enabled = False
        self.dropout = dropout
        self.num_heads = num_heads
        self.kernel_init = kernel_init
        self.with_ffn = with_ffn
        self.conv_kernel_size = conv_kernel_size
        self.feat_gather_stride = feat_gather_stride
        self.mask_transform_stride = mask_transform_stride

        self.attention = MultiheadAttention(in_channels * conv_kernel_size**2,
                                            num_heads, dropout)
        self.attention_norm = build_norm_layer(
            dict(type='LN'), in_channels * conv_kernel_size**2)[1]
        self.kernel_update_conv = build_transformer_layer(kernel_updator_cfg)

        if feat_transform_cfg is not None:
            kernel_size = feat_transform_cfg.pop('kernel_size', 1)
            transform_channels = in_channels
            self.feat_transform = ConvModule(transform_channels,
                                             in_channels,
                                             kernel_size,
                                             stride=feat_gather_stride,
                                             padding=int(feat_gather_stride //
                                                         2),
                                             **feat_transform_cfg)
        else:
            self.feat_transform = None

        if self.with_ffn:
            self.ffn = FFN(in_channels,
                           feedforward_channels,
                           num_ffn_fcs,
                           act_cfg=ffn_act_cfg,
                           dropout=dropout)
            self.ffn_norm = build_norm_layer(dict(type='LN'), in_channels)[1]

        self.mask_fcs = nn.ModuleList()
        for _ in range(num_mask_fcs):
            self.mask_fcs.append(
                nn.Linear(in_channels, in_channels, bias=False))
            self.mask_fcs.append(
                build_norm_layer(dict(type='LN'), in_channels)[1])
            self.mask_fcs.append(build_activation_layer(act_cfg))

        self.fc_mask = nn.Linear(in_channels, out_channels)
예제 #6
0
 def build_attn(self, attn_cfg):
     self.attn = MultiheadAttention(**attn_cfg)