コード例 #1
0
 def __init__(
     self,
     dim,
     num_heads,
     mlp_ratio=4.0,
     qkv_bias=False,
     qk_scale=None,
     drop=0.0,
     attn_drop=0.0,
     drop_path=0.0,
     act_layer=nn.GELU,
     norm_layer=nn.LayerNorm,
     epsilon=1e-5,
 ):
     super().__init__()
     self.norm1 = norm_layer(dim, epsilon=epsilon)
     self.attn = Attention(
         dim,
         num_heads=num_heads,
         qkv_bias=qkv_bias,
         qk_scale=qk_scale,
         attn_drop=attn_drop,
         proj_drop=drop,
     )
     # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
     self.drop_path = DropPath(drop_path) if drop_path > 0.0 else Identity()
     self.norm2 = norm_layer(dim, epsilon=epsilon)
     mlp_hidden_dim = int(dim * mlp_ratio)
     self.mlp = Mlp(
         in_features=dim,
         hidden_features=mlp_hidden_dim,
         act_layer=act_layer,
         drop=drop,
     )
コード例 #2
0
    def __init__(
        self,
        dim,
        num_heads,
        head_dim=None,
        mlp_ratio=4.0,
        qkv_bias=False,
        qk_scale=None,
        drop=0.0,
        attn_drop=0.0,
        drop_path=0.0,
        act_layer=nn.GELU,
        norm_layer=nn.LayerNorm,
        group=1,
        skip_lam=1.0,
    ):
        super().__init__()
        self.skip_lam = skip_lam
        self.dim = dim
        self.mlp_hidden_dim = int(dim * mlp_ratio)

        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else Identity()
        self.norm2 = norm_layer(dim)
        self.mlp = Mlp(
            in_features=dim,
            hidden_features=self.mlp_hidden_dim,
            act_layer=act_layer,
            drop=drop,
            group=group,
        )
コード例 #3
0
 def __init__(
     self,
     dim,
     num_heads,
     head_dim=None,
     mlp_ratio=4.0,
     qkv_bias=False,
     qk_scale=None,
     drop=0.0,
     attn_drop=0.0,
     drop_path=0.0,
     act_layer=nn.GELU,
     norm_layer=nn.LayerNorm,
     group=1,
     skip_lam=1.0,
 ):
     super().__init__()
     self.dim = dim
     self.norm1 = norm_layer(dim)
     self.skip_lam = skip_lam
     self.attn = Attention(
         dim,
         num_heads=num_heads,
         head_dim=head_dim,
         qkv_bias=qkv_bias,
         qk_scale=qk_scale,
         attn_drop=attn_drop,
         proj_drop=drop,
     )
     self.drop_path = DropPath(drop_path) if drop_path > 0.0 else Identity()
コード例 #4
0
    def __init__(
        self,
        dim,
        in_dim,
        num_pixel,
        num_heads=12,
        in_num_head=4,
        mlp_ratio=4.0,
        qkv_bias=False,
        drop=0.0,
        attn_drop=0.0,
        drop_path=0.0,
        act_layer=nn.GELU,
        norm_layer=nn.LayerNorm,
    ):
        super().__init__()
        # Inner transformer
        self.norm_in = norm_layer(in_dim)
        self.attn_in = Attention(
            in_dim,
            in_dim,
            num_heads=in_num_head,
            qkv_bias=qkv_bias,
            attn_drop=attn_drop,
            proj_drop=drop,
        )

        self.norm_mlp_in = norm_layer(in_dim)
        self.mlp_in = Mlp(
            in_features=in_dim,
            hidden_features=int(in_dim * 4),
            out_features=in_dim,
            act_layer=act_layer,
            drop=drop,
        )

        self.norm1_proj = norm_layer(in_dim)
        self.proj = nn.Linear(in_dim * num_pixel, dim)
        # Outer transformer
        self.norm_out = norm_layer(dim)
        self.attn_out = Attention(
            dim,
            dim,
            num_heads=num_heads,
            qkv_bias=qkv_bias,
            attn_drop=attn_drop,
            proj_drop=drop,
        )
        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else Identity()

        self.norm_mlp = norm_layer(dim)
        self.mlp = Mlp(
            in_features=dim,
            hidden_features=int(dim * mlp_ratio),
            out_features=dim,
            act_layer=act_layer,
            drop=drop,
        )
コード例 #5
0
    def __init__(
        self,
        dim,
        num_heads,
        mlp_ratio=4.0,
        qkv_bias=False,
        qk_scale=None,
        drop=0.0,
        attn_drop=0.0,
        drop_path=0.0,
        act_layer=nn.GELU,
        norm_layer=nn.LayerNorm,
        epsilon=1e-6,
        shared_cpe=None,
        shared_crpe=None,
    ):
        super().__init__()

        # Conv-Attention.
        self.cpe = shared_cpe

        self.norm1 = norm_layer(dim, epsilon=epsilon)
        self.factoratt_crpe = FactorAtt_ConvRelPosEnc(
            dim,
            num_heads=num_heads,
            qkv_bias=qkv_bias,
            qk_scale=qk_scale,
            attn_drop=attn_drop,
            proj_drop=drop,
            shared_crpe=shared_crpe,
        )
        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else Identity()

        # MLP.
        self.norm2 = norm_layer(dim, epsilon=epsilon)
        mlp_hidden_dim = int(dim * mlp_ratio)
        self.mlp = Mlp(
            in_features=dim,
            hidden_features=mlp_hidden_dim,
            act_layer=act_layer,
            drop=drop,
        )
コード例 #6
0
ファイル: t2t.py プロジェクト: AgentMaker/Paddle-Image-Models
    def __init__(
        self,
        dim,
        in_dim,
        num_heads,
        mlp_ratio=1.0,
        qkv_bias=False,
        qk_scale=None,
        drop=0.0,
        attn_drop=0.0,
        drop_path=0.0,
        act_layer=nn.GELU,
        norm_layer=nn.LayerNorm,
    ):
        super().__init__()
        self.norm1 = norm_layer(dim)

        self.attn = Attention(
            dim,
            in_dim=in_dim,
            num_heads=num_heads,
            qkv_bias=qkv_bias,
            qk_scale=qk_scale,
            attn_drop=attn_drop,
            proj_drop=drop,
        )

        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else Identity()

        self.norm2 = norm_layer(in_dim)

        self.mlp = Mlp(
            in_features=in_dim,
            hidden_features=int(in_dim * mlp_ratio),
            out_features=in_dim,
            act_layer=act_layer,
            drop=drop,
        )
コード例 #7
0
 def __init__(
     self,
     dim,
     num_heads,
     mlp_ratio=4.0,
     qkv_bias=False,
     qk_scale=None,
     drop=0.0,
     attn_drop=0.0,
     drop_path=0.0,
     act_layer=nn.GELU,
     norm_layer=nn.LayerNorm,
     epsilon=1e-6,
     Attention_block=Attention_talking_head,
     Mlp_block=Mlp,
     init_values=1e-4,
 ):
     super().__init__()
     self.norm1 = norm_layer(dim, epsilon=epsilon)
     self.attn = Attention_block(
         dim,
         num_heads=num_heads,
         qkv_bias=qkv_bias,
         qk_scale=qk_scale,
         attn_drop=attn_drop,
         proj_drop=drop,
     )
     self.drop_path = DropPath(drop_path) if drop_path > 0.0 else Identity()
     self.norm2 = norm_layer(dim, epsilon=epsilon)
     mlp_hidden_dim = int(dim * mlp_ratio)
     self.mlp = Mlp_block(
         in_features=dim,
         hidden_features=mlp_hidden_dim,
         act_layer=act_layer,
         drop=drop,
     )
     self.gamma_1 = add_parameter(self, init_values * paddle.ones((dim, )))
     self.gamma_2 = add_parameter(self, init_values * paddle.ones((dim, )))
コード例 #8
0
ファイル: dla.py プロジェクト: AgentMaker/Paddle-Image-Models
 def __init__(
     self,
     levels,
     block,
     in_channels,
     out_channels,
     stride=1,
     dilation=1,
     cardinality=1,
     base_width=64,
     level_root=False,
     root_dim=0,
     root_kernel_size=1,
     root_residual=False,
 ):
     super(DlaTree, self).__init__()
     if root_dim == 0:
         root_dim = 2 * out_channels
     if level_root:
         root_dim += in_channels
     self.downsample = (nn.MaxPool2D(stride, stride=stride)
                        if stride > 1 else Identity())
     self.project = Identity()
     cargs = dict(dilation=dilation,
                  cardinality=cardinality,
                  base_width=base_width)
     if levels == 1:
         self.tree1 = block(in_channels, out_channels, stride, **cargs)
         self.tree2 = block(out_channels, out_channels, 1, **cargs)
         if in_channels != out_channels:
             self.project = nn.Sequential(
                 nn.Conv2D(
                     in_channels,
                     out_channels,
                     kernel_size=1,
                     stride=1,
                     bias_attr=False,
                 ),
                 nn.BatchNorm2D(out_channels),
             )
     else:
         cargs.update(
             dict(root_kernel_size=root_kernel_size,
                  root_residual=root_residual))
         self.tree1 = DlaTree(levels - 1,
                              block,
                              in_channels,
                              out_channels,
                              stride,
                              root_dim=0,
                              **cargs)
         self.tree2 = DlaTree(levels - 1,
                              block,
                              out_channels,
                              out_channels,
                              root_dim=root_dim + out_channels,
                              **cargs)
     if levels == 1:
         self.root = DlaRoot(root_dim, out_channels, root_kernel_size,
                             root_residual)
     self.level_root = level_root
     self.root_dim = root_dim
     self.levels = levels
コード例 #9
0
    def __init__(
        self,
        dim,
        input_resolution,
        num_heads,
        window_size=7,
        shift_size=0,
        mlp_ratio=4.0,
        qkv_bias=True,
        qk_scale=None,
        drop=0.0,
        attn_drop=0.0,
        drop_path=0.0,
        act_layer=nn.GELU,
        norm_layer=nn.LayerNorm,
    ):
        super().__init__()
        self.dim = dim
        self.input_resolution = input_resolution
        self.num_heads = num_heads
        self.window_size = window_size
        self.shift_size = shift_size
        self.mlp_ratio = mlp_ratio
        if min(self.input_resolution) <= self.window_size:
            # if window size is larger than input resolution, we don't partition windows
            self.shift_size = 0
            self.window_size = min(self.input_resolution)
        assert (
            0 <= self.shift_size < self.window_size
        ), "shift_size must in 0-window_size"

        self.norm1 = norm_layer(dim)
        self.attn = WindowAttention(
            dim,
            window_size=to_2tuple(self.window_size),
            num_heads=num_heads,
            qkv_bias=qkv_bias,
            qk_scale=qk_scale,
            attn_drop=attn_drop,
            proj_drop=drop,
        )

        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else Identity()
        self.norm2 = norm_layer(dim)
        mlp_hidden_dim = int(dim * mlp_ratio)
        self.mlp = Mlp(
            in_features=dim,
            hidden_features=mlp_hidden_dim,
            act_layer=act_layer,
            drop=drop,
        )

        if self.shift_size > 0:
            # calculate attention mask for SW-MSA
            H, W = self.input_resolution
            img_mask = paddle.zeros((1, H, W, 1))  # 1 H W 1

            h_slices = (
                slice(0, -self.window_size),
                slice(-self.window_size, -self.shift_size),
                slice(-self.shift_size, None),
            )
            w_slices = (
                slice(0, -self.window_size),
                slice(-self.window_size, -self.shift_size),
                slice(-self.shift_size, None),
            )
            cnt = 0
            for h in h_slices:
                for w in w_slices:
                    img_mask[:, h, w, :] = cnt
                    cnt += 1

            # nW, window_size, window_size, 1
            mask_windows = window_partition(img_mask, self.window_size)
            mask_windows = mask_windows.reshape(
                (-1, self.window_size * self.window_size)
            )
            attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)

            _h = paddle.full_like(attn_mask, -100.0, dtype="float32")
            _z = paddle.full_like(attn_mask, 0.0, dtype="float32")
            attn_mask = paddle.where(attn_mask != 0, _h, _z)

        else:
            attn_mask = None

        self.register_buffer("attn_mask", attn_mask)
コード例 #10
0
    def __init__(
        self,
        dims,
        num_heads,
        mlp_ratios=[],
        qkv_bias=False,
        qk_scale=None,
        drop=0.0,
        attn_drop=0.0,
        drop_path=0.0,
        act_layer=nn.GELU,
        norm_layer=nn.LayerNorm,
        epsilon=1e-6,
        shared_cpes=None,
        shared_crpes=None,
    ):
        super().__init__()

        # Conv-Attention.
        self.cpes = shared_cpes

        self.norm12 = norm_layer(dims[1], epsilon=epsilon)
        self.norm13 = norm_layer(dims[2], epsilon=epsilon)
        self.norm14 = norm_layer(dims[3], epsilon=epsilon)
        self.factoratt_crpe2 = FactorAtt_ConvRelPosEnc(
            dims[1],
            num_heads=num_heads,
            qkv_bias=qkv_bias,
            qk_scale=qk_scale,
            attn_drop=attn_drop,
            proj_drop=drop,
            shared_crpe=shared_crpes[1],
        )
        self.factoratt_crpe3 = FactorAtt_ConvRelPosEnc(
            dims[2],
            num_heads=num_heads,
            qkv_bias=qkv_bias,
            qk_scale=qk_scale,
            attn_drop=attn_drop,
            proj_drop=drop,
            shared_crpe=shared_crpes[2],
        )
        self.factoratt_crpe4 = FactorAtt_ConvRelPosEnc(
            dims[3],
            num_heads=num_heads,
            qkv_bias=qkv_bias,
            qk_scale=qk_scale,
            attn_drop=attn_drop,
            proj_drop=drop,
            shared_crpe=shared_crpes[3],
        )
        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else Identity()

        # MLP.
        self.norm22 = norm_layer(dims[1], epsilon=epsilon)
        self.norm23 = norm_layer(dims[2], epsilon=epsilon)
        self.norm24 = norm_layer(dims[3], epsilon=epsilon)
        # In parallel block, we assume dimensions are the same and share the linear transformation.
        assert dims[1] == dims[2] == dims[3]
        assert mlp_ratios[1] == mlp_ratios[2] == mlp_ratios[3]
        mlp_hidden_dim = int(dims[1] * mlp_ratios[1])
        self.mlp2 = self.mlp3 = self.mlp4 = Mlp(
            in_features=dims[1],
            hidden_features=mlp_hidden_dim,
            act_layer=act_layer,
            drop=drop,
        )