Exemple #1
0
    def __init__(self,
                 embed_dims,
                 num_heads,
                 num_frames,
                 attn_drop=0.,
                 proj_drop=0.,
                 dropout_layer=dict(type='DropPath', drop_prob=0.1),
                 norm_cfg=dict(type='LN'),
                 init_cfg=None,
                 **kwargs):
        super().__init__(init_cfg)
        self.embed_dims = embed_dims
        self.num_heads = num_heads
        self.num_frames = num_frames
        self.norm = build_norm_layer(norm_cfg, self.embed_dims)[1]

        if digit_version(torch.__version__) < digit_version('1.9.0'):
            kwargs.pop('batch_first', None)
        self.attn = nn.MultiheadAttention(embed_dims, num_heads, attn_drop,
                                          **kwargs)
        self.proj_drop = nn.Dropout(proj_drop)
        self.dropout_layer = build_dropout(
            dropout_layer) if dropout_layer else nn.Identity()
        self.temporal_fc = nn.Linear(self.embed_dims, self.embed_dims)

        self.init_weights()
Exemple #2
0
    def __init__(self,
                 embed_dims,
                 num_heads,
                 window_size,
                 shift_size=0,
                 qkv_bias=True,
                 qk_scale=None,
                 attn_drop_rate=0,
                 proj_drop_rate=0,
                 dropout_layer=dict(type='DropPath', drop_prob=0.),
                 init_cfg=None):
        super().__init__(init_cfg)

        self.window_size = window_size
        self.shift_size = shift_size
        assert 0 <= self.shift_size < self.window_size

        self.w_msa = WindowMSA(embed_dims=embed_dims,
                               num_heads=num_heads,
                               window_size=to_2tuple(window_size),
                               qkv_bias=qkv_bias,
                               qk_scale=qk_scale,
                               attn_drop_rate=attn_drop_rate,
                               proj_drop_rate=proj_drop_rate,
                               init_cfg=None)

        self.drop = build_dropout(dropout_layer)
Exemple #3
0
    def __init__(self,
                 dim,
                 num_heads,
                 mlp_ratio=4.,
                 qkv_bias=False,
                 qk_scale=None,
                 drop=0.,
                 attn_drop=0.,
                 drop_path=0.,
                 act_layer=nn.GELU,
                 norm_cfg=dict(type='LN'),
                 sr_ratio=1,
                 use_sr_conv=True):
        super(TCFormerRegularBlock, self).__init__()
        self.norm1 = build_norm_layer(norm_cfg, dim)[1]

        self.attn = TCFormerDynamicAttention(dim,
                                             num_heads=num_heads,
                                             qkv_bias=qkv_bias,
                                             qk_scale=qk_scale,
                                             attn_drop=attn_drop,
                                             proj_drop=drop,
                                             sr_ratio=sr_ratio,
                                             use_sr_conv=use_sr_conv)
        self.drop_path = build_dropout(
            dict(type='DropPath', drop_prob=drop_path))

        self.norm2 = build_norm_layer(norm_cfg, dim)[1]
        mlp_hidden_dim = int(dim * mlp_ratio)
        self.mlp = TCMLP(in_features=dim,
                         hidden_features=mlp_hidden_dim,
                         act_layer=act_layer,
                         drop=drop)
Exemple #4
0
    def __init__(self,
                 embed_dims,
                 num_heads,
                 window_size,
                 shift_size=0,
                 qkv_bias=True,
                 qk_scale=None,
                 attn_drop=0,
                 proj_drop=0,
                 dropout_layer=dict(type='DropPath', drop_prob=0.),
                 pad_small_map=False,
                 input_resolution=None,
                 auto_pad=None,
                 init_cfg=None):
        super().__init__(init_cfg)

        if input_resolution is not None or auto_pad is not None:
            warnings.warn(
                'The ShiftWindowMSA in new version has supported auto padding '
                'and dynamic input shape in all condition. And the argument '
                '`auto_pad` and `input_resolution` have been deprecated.',
                DeprecationWarning)

        self.shift_size = shift_size
        self.window_size = window_size
        assert 0 <= self.shift_size < self.window_size

        self.w_msa = WindowMSA(
            embed_dims=embed_dims,
            window_size=to_2tuple(self.window_size),
            num_heads=num_heads,
            qkv_bias=qkv_bias,
            qk_scale=qk_scale,
            attn_drop=attn_drop,
            proj_drop=proj_drop,
        )

        self.drop = build_dropout(dropout_layer)
        self.pad_small_map = pad_small_map
    def __init__(self,
                 embed_dims,
                 input_resolution,
                 num_heads,
                 window_size,
                 shift_size=0,
                 qkv_bias=True,
                 qk_scale=None,
                 attn_drop=0,
                 proj_drop=0,
                 dropout_layer=dict(type='DropPath', drop_prob=0.),
                 auto_pad=False,
                 init_cfg=None):
        super().__init__(init_cfg)

        self.embed_dims = embed_dims
        self.input_resolution = input_resolution
        self.shift_size = shift_size
        self.window_size = window_size
        if min(self.input_resolution) <= self.window_size:
            # if window size is larger than input resolution, don't partition
            self.shift_size = 0
            self.window_size = min(self.input_resolution)

        self.w_msa = WindowMSA(embed_dims, to_2tuple(self.window_size),
                               num_heads, qkv_bias, qk_scale, attn_drop,
                               proj_drop)

        self.drop = build_dropout(dropout_layer)

        H, W = self.input_resolution
        # Handle auto padding
        self.auto_pad = auto_pad
        if self.auto_pad:
            self.pad_r = (self.window_size -
                          W % self.window_size) % self.window_size
            self.pad_b = (self.window_size -
                          H % self.window_size) % self.window_size
            self.H_pad = H + self.pad_b
            self.W_pad = W + self.pad_r
        else:
            H_pad, W_pad = self.input_resolution
            assert H_pad % self.window_size + W_pad % self.window_size == 0,\
                f'input_resolution({self.input_resolution}) is not divisible '\
                f'by window_size({self.window_size}). Please check feature '\
                f'map shape or set `auto_pad=True`.'
            self.H_pad, self.W_pad = H_pad, W_pad
            self.pad_r, self.pad_b = 0, 0

        if self.shift_size > 0:
            # calculate attention mask for SW-MSA
            img_mask = torch.zeros((1, self.H_pad, self.W_pad, 1))  # 1 H W 1
            h_slices = (slice(0, -self.window_size),
                        slice(-self.window_size,
                              -self.shift_size), slice(-self.shift_size, None))
            w_slices = (slice(0, -self.window_size),
                        slice(-self.window_size,
                              -self.shift_size), slice(-self.shift_size, None))
            cnt = 0
            for h in h_slices:
                for w in w_slices:
                    img_mask[:, h, w, :] = cnt
                    cnt += 1

            # nW, window_size, window_size, 1
            mask_windows = self.window_partition(img_mask)
            mask_windows = mask_windows.view(
                -1, self.window_size * self.window_size)
            attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
            attn_mask = attn_mask.masked_fill(attn_mask != 0,
                                              float(-100.0)).masked_fill(
                                                  attn_mask == 0, float(0.0))
        else:
            attn_mask = None

        self.register_buffer('attn_mask', attn_mask)
Exemple #6
0
def build_drop_path(drop_path_rate):
    """Build drop path layer."""
    return build_dropout(dict(type='DropPath', drop_prob=drop_path_rate))