Ejemplo n.º 1
0
    def __init__(self,
                 dim,
                 window_size,
                 shifts=None,
                 input_resolution=None,
                 ff_dim=None,
                 use_pre_norm=False,
                 **kwargs):
        super().__init__()

        self.attention_block = LayerNorm(
            ShiftWindowAttention(dim=dim,
                                 window_size=window_size,
                                 shifts=shifts,
                                 input_resolution=input_resolution,
                                 **kwargs) if shifts is not None else
            WindowAttention(dim=dim, window_size=window_size, **kwargs),
            dim=dim,
            use_pre_norm=use_pre_norm)
        self.attention_path_dropout = PathDropout(
            kwargs["path_dropout"] if "path_dropout" in kwargs else 0.)

        self.ff_block = LayerNorm(FeedForward(
            dim=dim,
            expand_dim=ff_dim if ff_dim is not None else 4 * dim,
            **kwargs),
                                  dim=dim,
                                  use_pre_norm=use_pre_norm)
        self.ff_path_dropout = PathDropout(
            kwargs["path_dropout"] if "path_dropout" in kwargs else 0.)
Ejemplo n.º 2
0
    def __init__(
        self,
        dim: int,
        ff_expand_scale: int = 4,
        path_dropout: float = 0.,
        conv_position_encoder: Optional[nn.Module] = None,
        use_cls: bool = True,
        **kwargs,
    ) -> None:
        super().__init__()

        self.conv_position_encoder = ConvolutionalPositionEncoding(
            dim, use_cls=use_cls
        ) if conv_position_encoder is None else conv_position_encoder

        self.norm_0 = nn.LayerNorm(dim)
        self.conv_attn_module = ConvAttentionalModule(
            dim,
            use_cls=use_cls,
            use_conv_position_encoder=False,
            conv_position_encoder=None,
            **kwargs,
        )
        self.path_dropout_0 = PathDropout(path_dropout)

        self.norm_1 = nn.LayerNorm(dim)
        self.ff_block = FeedForward(
            dim,
            ff_expand_scale=ff_expand_scale,
            ff_dropout=kwargs["ff_dropout"],
        )
        self.path_dropout_1 = PathDropout(path_dropout)
    def __init__(self,
                 input_height: int,
                 input_width: int,
                 in_dim: int,
                 out_dim: int,
                 expand_scale: int = 4,
                 use_downsampling: bool = False,
                 **kwargs):
        super().__init__()
        path_dropout = kwargs.pop("path_dropout")

        self.norm = nn.Sequential(
            Rearrange("b c h w -> b h w c"),
            nn.LayerNorm(in_dim),
            Rearrange("b h w c -> b c h w"),
        )
        self.attention_block = CoAtNetRelativeAttention(
            input_height, input_width, in_dim, out_dim, **kwargs)
        self.attention_path_dropout = PathDropout(path_dropout)
        self.pool = nn.MaxPool2d((2, 2)) if use_downsampling else nn.Identity()
        self.skip = nn.Conv2d(
            in_dim, out_dim,
            kernel_size=1) if use_downsampling else nn.Identity()

        self.ff_block = nn.Sequential(
            nn.Conv2d(out_dim, out_dim * expand_scale, kernel_size=1),
            nn.GELU(),
            nn.Conv2d(out_dim * expand_scale, out_dim, kernel_size=1),
        )
        self.ff_path_dropout = PathDropout(path_dropout)
    def __init__(self,
                 input_height: int,
                 input_width: int,
                 in_dim: int,
                 out_dim: int,
                 expand_scale: int = 4,
                 use_downsampling: bool = False,
                 **kwargs):
        super().__init__()

        self.norm = nn.BatchNorm2d(in_dim)
        self.mb_conv = MBConvXd(in_dim,
                                out_dim,
                                expand_scale=expand_scale,
                                stride=2 if use_downsampling else 1,
                                **kwargs)
        self.path_dropout = PathDropout(
            kwargs["path_dropout"] if "path_dropout" in kwargs else 0.)

        self.skip = nn.Sequential(nn.MaxPool2d(
            (2, 2)), nn.Conv2d(
                in_dim, out_dim,
                kernel_size=1)) if use_downsampling else nn.Identity()
Ejemplo n.º 5
0
    def __init__(self,
                 dim: int,
                 num_patches: int,
                 alpha: float,
                 path_dropout=0.,
                 ff_dropout=0.):
        super().__init__()

        self.token_mixer = nn.Sequential(
            OrderedDict([("aff_pre_norm", AffineTransform(dim)),
                         ("transpose_0", Rearrange("b n d -> b d n")),
                         ("linear", nn.Linear(num_patches, num_patches)),
                         ("transpose_1", Rearrange("b d n -> b n d")),
                         ("aff_post_norm",
                          AffineTransform(dim, alpha=alpha, beta=None)),
                         ("path_dropout", PathDropout(path_dropout))]))
        self.channel_mixer = LayerScale(dim,
                                        core_block=MLP,
                                        pre_norm=AffineTransform,
                                        alpha=alpha,
                                        path_dropout=path_dropout,
                                        expand_scale=4,
                                        ff_dropout=ff_dropout)
    def __init__(
        self,
        in_channel: int,
        out_channel: Optional[int] = None,
        expand_channel: Optional[int] = None,
        expand_scale: Optional[int] = None,
        kernel_size: int = 3,
        stride: int = 1,
        padding: int = 1,
        norm_layer_name: str = "BatchNorm2d",
        act_fnc_name: str = "SiLU",
        se_scale: Optional[float] = None,
        se_act_fnc_name: str = "SiLU",
        dimension: int = 2,
        path_dropout: float = 0.,
        expansion_head_type: Literal["pixel_depth", "fused"] = "pixel_depth",
        **
        kwargs  # For example: `eps` and `elementwise_affine` for `nn.LayerNorm`
    ):
        super().__init__(in_channel, out_channel, dimension=dimension)

        assert (
            expand_channel is not None or expand_scale is not None
        ), name_with_msg(
            self,
            "Either `expand_channel` or `expand_scale` should be specified")
        expand_channel = expand_channel if expand_channel is not None else in_channel * expand_scale

        assert (
            isinstance(expansion_head_type, str)
            and expansion_head_type in ["pixel_depth", "fused"]
        ), name_with_msg(
            f"The specified `expansion_head_type` - {expansion_head_type} ({type(expansion_head_type)}) doesn't exist.\n \
            Please choose from here: ['pixel_depth', 'fused']")

        # Expansion Head
        if expansion_head_type == "pixel_depth":
            pixel_wise_conv_0 = nn.Sequential(
                self.conv(self.in_channel,
                          expand_channel,
                          kernel_size=1,
                          bias=False),
                get_attr_if_exists(nn, norm_layer_name)(expand_channel,
                                                        **kwargs),
                get_attr_if_exists(nn, act_fnc_name)())

            depth_wise_conv = nn.Sequential(
                self.conv(expand_channel,
                          expand_channel,
                          kernel_size,
                          stride=stride,
                          padding=padding,
                          groups=expand_channel,
                          bias=False),
                get_attr_if_exists(nn, norm_layer_name)(expand_channel,
                                                        **kwargs),
                get_attr_if_exists(nn, act_fnc_name)())

            self.expansion_head = nn.Sequential(pixel_wise_conv_0,
                                                depth_wise_conv)
        else:
            self.expansion_head = nn.Sequential(
                nn.Conv2d(self.in_channel,
                          expand_channel,
                          kernel_size,
                          stride=stride,
                          padding=padding,
                          bias=False),
                get_attr_if_exists(nn, norm_layer_name)(expand_channel,
                                                        **kwargs),
                get_attr_if_exists(nn, act_fnc_name)())

        #
        self.se_block = None
        if se_scale is not None:
            bottleneck_channel = int(expand_channel * se_scale)

            self.se_block = SEConvXd(
                expand_channel,
                bottleneck_channel,
                se_act_fnc_name=se_act_fnc_name,
            )

        #
        self.pixel_wise_conv_1 = nn.Sequential(
            self.conv(
                expand_channel,
                self.out_channel,
                kernel_size=1,
                bias=False,
            ),
            get_attr_if_exists(nn, norm_layer_name)(self.out_channel,
                                                    **kwargs))

        # From: https://github.com/tensorflow/tpu/blob/3679ca6b979349dde6da7156be2528428b000c7c/models/official/efficientnet/utils.py#L276
        # It's a batch-wise dropout
        self.path_dropout = PathDropout(path_dropout)
        self.skip = True if self.in_channel == self.out_channel and stride == 1 else False