예제 #1
0
    def __init__(self, pool_spatial="mean", pool_temporal="mean"):
        super().__init__()

        self.model = r2plus1d_34_32_ig65m(num_classes=359, pretrained=True, progress=True)

        self.pool_spatial = Reduce("n c t h w -> n c t", reduction=pool_spatial)
        self.pool_temporal = Reduce("n c t -> n c", reduction=pool_temporal)
예제 #2
0
 def __init__(self, block, conv_makers, layers, stem):
     super().__init__(block=block,
                      conv_makers=conv_makers,
                      layers=layers,
                      stem=stem)
     self.pool_spatial = Reduce("n c t h w -> n c t", reduction="mean")
     self.pool_temporal = Reduce("n c t -> n c", reduction="mean")
예제 #3
0
 def create_model(use_reduce=False):
     return Sequential(
         Conv2d(3, 6, kernel_size=5),
         Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2) if use_reduce else MaxPool2d(kernel_size=2),
         Conv2d(6, 16, kernel_size=5),
         Reduce('b c (h h2) (w w2) -> b (c h w)', 'max', h2=2, w2=2),
         Linear(16 * 5 * 5, 120),
         ReLU(),
         Linear(120, 84),
         ReLU(),
         Linear(84, 10),
     )
예제 #4
0
 def create_model():
     return Sequential([
         Conv2d(6, kernel_size=5, input_shape=[32, 32, 3]),
         Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2),
         Conv2d(16, kernel_size=5),
         Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2),
         Rearrange('b c h w -> b (c h w)'),
         Linear(120),
         ReLU(),
         Linear(84),
         ReLU(),
         Linear(10),
     ])
예제 #5
0
 def create_model():
     return chainer.Sequential(
         L.Convolution2D(3, 6, ksize=(5, 5)),
         Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2),
         L.Convolution2D(6, 16, ksize=(5, 5)),
         Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2),
         Rearrange('b c h w -> b (c h w)'),
         L.Linear(16 * 5 * 5, 120),
         L.Linear(120, 84),
         F.relu,
         EinMix('b c1 -> (b c2)', weight_shape='c1 c2', bias_shape='c2', c1=84, c2=84),
         EinMix('(b c2) -> b c3', weight_shape='c2 c3', bias_shape='c3', c2=84, c3=84),
         L.Linear(84, 10),
     )
예제 #6
0
    def __init__(
        self,
        kernel_size: int = 7,
    ):
        super().__init__()

        self.avg_pool = Reduce("b c h w -> b 1 h w", reduction="mean")
        self.max_pool = Reduce("b c h w -> b 1 h w", reduction="max")
        self.att = nn.Sequential(
            nn.Conv2d(2,
                      1,
                      kernel_size=kernel_size,
                      padding=kernel_size // 2,
                      bias=False))
        self.gate = nn.Sigmoid()
예제 #7
0
 def create_keras_model():
     return Sequential([
         Conv2d(6, kernel_size=5, input_shape=[32, 32, 3]),
         Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2),
         Conv2d(16, kernel_size=5),
         Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2),
         Rearrange('b c h w -> b (c h w)'),
         Linear(120),
         ReLU(),
         Linear(84),
         ReLU(),
         EinMix('b c1 -> (b c2)', weight_shape='c1 c2', bias_shape='c2', c1=84, c2=84),
         EinMix('(b c2) -> b c3', weight_shape='c2 c3', bias_shape='c3', c2=84, c3=84),
         Linear(10),
     ])
예제 #8
0
    def __init__(
        self,
        image_size,
        image_channel,
        patch_size,
        depth,
        dim,
        path_dropout=0.,
        token_dropout=0.,
        ff_dropout=0.,
    ):
        super().__init__(image_size, image_channel, patch_size)

        self.linear_proj = nn.Linear(self.patch_dim, dim, bias=False)
        self.token_dropout = TokenDropout(token_dropout)

        self.encoder = nn.Sequential(
            OrderedDict([(f"layer_{idx}",
                          ResMLPLayer(dim,
                                      num_patches=self.num_patches,
                                      alpha=self._get_alpha(idx),
                                      path_dropout=path_dropout,
                                      ff_dropout=ff_dropout))
                         for idx in range(depth)]))

        self.pooler = Reduce("b n d -> b d", "mean")
예제 #9
0
    def __init__(
        self,
        features: int,
        reduction: int = 16,
        reduced_features: int = None,
        activation: nn.Module = ReLUInPlace,
    ):
        super().__init__()
        self.reduced_features = (features //
                                 reduction if reduced_features is None else
                                 reduced_features)

        self.pool = Reduce("b c h w -> b c 1 1", reduction="mean")
        self.att = nn.Sequential(
            OrderedDict({
                "fc1":
                nn.Conv2d(
                    features,
                    self.reduced_features,
                    kernel_size=1,
                ),
                "act1":
                activation(),
                "fc2":
                nn.Conv2d(
                    self.reduced_features,
                    features,
                    kernel_size=1,
                ),
                "act2":
                nn.Sigmoid(),
                "proj":
                nn.Identity(),
            }))
def MLPMixer(*,
             image_size,
             patch_size,
             dim,
             depth,
             num_classes,
             expansion_factor=4,
             dropout=0.):
    assert (image_size %
            patch_size) == 0, 'image must be divisible by patch size'
    num_patches = (image_size // patch_size)**2
    chan_first, chan_last = partial(nn.Conv1d, kernel_size=1), nn.Linear

    return nn.Sequential(
        Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)',
                  p1=patch_size,
                  p2=patch_size), nn.Linear((patch_size**2) * 3, dim), *[
                      nn.Sequential(
                          PreNormResidual(
                              dim,
                              FeedForward(num_patches, expansion_factor,
                                          dropout, chan_first)),
                          PreNormResidual(
                              dim,
                              FeedForward(dim, expansion_factor, dropout,
                                          chan_last))) for _ in range(depth)
                  ], nn.LayerNorm(dim), Reduce('b n c -> b c', 'mean'),
        nn.Linear(dim, num_classes))
예제 #11
0
    def __init__(self, in_features: int, features: int, radix: int, groups: int):
        """Implementation of Split Attention proposed in `"ResNeSt: Split-Attention Networks" <https://arxiv.org/abs/2004.08955>`_

        Grouped convolution have been proved to be impirically better (ResNetXt). The main idea is to apply an attention group-wise.

        `Einops <https://github.com/arogozhnikov/einops>`_ is used to improve the readibility of this module

        Args:
            in_features (int): number of input features
            features (int): attention's features
            radix (int): number of subgroups (`radix`) in the groups
            groups (int): number of groups, each group contains `radix` subgroups
        """
        super().__init__()
        self.radix, self.groups = radix, groups
        self.att = nn.Sequential(
            # this produces U^{/hat}
            Reduce("b r (k c) h w-> b (k c) h w", reduction="sum", r=radix, k=groups),
            # eq 1
            nn.AdaptiveAvgPool2d(1),
            # the two following conv layers are G in the paper
            ConvBnAct(
                in_features,
                features,
                kernel_size=1,
                groups=groups,
                activation=ReLUInPlace,
                bias=True,
            ),
            nn.Conv2d(features, in_features * radix, kernel_size=1, groups=groups),
            Rearrange("b (r k c) h w -> b r k c h w", r=radix, k=groups),
            nn.Softmax(dim=1) if radix > 1 else nn.Sigmoid(),
            Rearrange("b r k c h w -> b r (k c) h w", r=radix, k=groups),
        )
예제 #12
0
    def __init__(self,
                 image_size,
                 patch_size,
                 dim,
                 depth,
                 num_classes,
                 expansion_factor=4,
                 dropout=0.):
        super().__init__()
        assert (image_size %
                patch_size) == 0, 'image must be divisible by patch size'
        num_patches = (image_size // patch_size)**2
        chan_first, chan_last = partial(
            nn.Conv1d, kernel_size=1), nn.Linear  # partial用于固定函数的部分参数

        self.rearrange = Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)',
                                   p1=patch_size,
                                   p2=patch_size)
        self.per_patch_fc = nn.Linear((patch_size**2) * 3, dim)
        self.mixer_layer = nn.Sequential(
            PreNormResidual(
                dim,
                FeedForward(num_patches, expansion_factor, dropout,
                            chan_first)),
            PreNormResidual(
                dim, FeedForward(dim, expansion_factor, dropout, chan_last)))
        self.norm = nn.LayerNorm(dim)
        self.reduce = Reduce('b n c -> b c', 'mean')
        self.out = nn.Linear(dim, num_classes)
예제 #13
0
    def __init__(self,
                 emb_size: int = 768,
                 n_classes: int = 1000,
                 policy: str = 'token'):
        """
        ViT Classification Head

        Args:
            emb_size (int, optional):  Embedding dimensions Defaults to 768.
            n_classes (int, optional): [description]. Defaults to 1000.
            policy (str, optional): Pooling policy, can be token or mean. Defaults to 'token'.
        """

        assert policy in self.POLICIES, f"Only policies {','.join(self.POLICIES)} are supported"

        super().__init__(
            OrderedDict({
                'pool':
                Reduce('b n e -> b e', reduction='mean')
                if policy == 'mean' else Lambda(lambda x: x[:, 0]),
                'norm':
                nn.LayerNorm(emb_size),
                'fc':
                nn.Linear(emb_size, n_classes)
            }))
예제 #14
0
def ResMLP(*,
           image_size,
           patch_size,
           dim,
           depth,
           num_classes,
           expansion_factor=4):
    assert (image_size %
            patch_size) == 0, 'image must be divisible by patch size'
    num_patches = (image_size // patch_size)**2
    wrapper = lambda i, fn: PreAffinePostLayerScale(dim, i + 1, fn)

    return nn.Sequential(
        Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)',
                  p1=patch_size,
                  p2=patch_size), nn.Linear((patch_size**2) * 3, dim),
        *[
            nn.Sequential(
                wrapper(i, nn.Conv1d(num_patches, num_patches, 1)),
                wrapper(
                    i,
                    nn.Sequential(nn.Linear(dim, dim * expansion_factor),
                                  nn.GELU(),
                                  nn.Linear(dim * expansion_factor, dim))))
            for i in range(depth)
        ], Affine(dim), Reduce('b n c -> b c', 'mean'),
        nn.Linear(dim, num_classes))
예제 #15
0
    def __init__(self,
                 num_classes,
                 num_blocks,
                 patch_size,
                 hidden_dim,
                 ffn_dim,
                 attn_dim=None,
                 prob_survival=1.,
                 image_size=224):
        super().__init__()
        assert (
            image_size %
            patch_size) == 0, 'image size must be divisible by the patch size'

        num_patches = (image_size // patch_size)**2

        self.patch_embedding = nn.Conv2d(3,
                                         hidden_dim,
                                         kernel_size=patch_size,
                                         stride=patch_size)

        self.prob_survival = prob_survival

        self.gmlp_layers = nn.ModuleList([
            gMLPBlock(hidden_dim, ffn_dim, num_patches, attn_dim)
            for _ in range(num_blocks)
        ])

        self.to_logits = nn.Sequential(nn.LayerNorm(hidden_dim),
                                       Reduce('b n c -> b c', 'mean'),
                                       nn.Linear(hidden_dim, num_classes))
예제 #16
0
def create_torch_model(use_reduce=False, add_scripted_layer=False):
    from torch.nn import Sequential, Conv2d, MaxPool2d, Linear, ReLU
    from einops.layers.torch import Rearrange, Reduce, EinMix
    return Sequential(
        Conv2d(3, 6, kernel_size=(5, 5)),
        Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2) if use_reduce else MaxPool2d(kernel_size=2),
        Conv2d(6, 16, kernel_size=(5, 5)),
        Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2),
        torch.jit.script(Rearrange('b c h w -> b (c h w)'))
        if add_scripted_layer else Rearrange('b c h w -> b (c h w)'),
        Linear(16 * 5 * 5, 120),
        ReLU(),
        Linear(120, 84),
        ReLU(),
        EinMix('b c1 -> (b c2)', weight_shape='c1 c2', bias_shape='c2', c1=84, c2=84),
        EinMix('(b c2) -> b c3', weight_shape='c2 c3', bias_shape='c3', c2=84, c3=84),
        Linear(84, 10),
    )
예제 #17
0
 def create_model():
     model = HybridSequential()
     layers = [
         Conv2D(6, kernel_size=5),
         Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2),
         Conv2D(16, kernel_size=5),
         Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2),
         Rearrange('b c h w -> b (c h w)'),
         Dense(120),
         LeakyReLU(alpha=0.0),
         Dense(84),
         LeakyReLU(alpha=0.0),
         Dense(10),
     ]
     for layer in layers:
         model.add(layer)
     model.initialize(mxnet.init.Xavier(), ctx=mxnet.cpu())
     return model
예제 #18
0
 def __init__(
     self,
     features: int,
     reduction: int = 16,
     reduced_features: int = None,
     activation: nn.Module = nn.ReLU,
 ):
     super().__init__()
     self.reduced_features = (features //
                              reduction if reduced_features is None else
                              reduced_features)
     self.avg_pool = Reduce("b c h w -> b c 1 1", reduction="mean")
     self.max_pool = Reduce("b c h w -> b c 1 1", reduction="max")
     self.att = nn.Sequential(
         nn.Conv2d(features, self.reduced_features, kernel_size=1),
         activation(),
         nn.Conv2d(self.reduced_features, features, kernel_size=1),
     )
     self.gate = nn.Sigmoid()
예제 #19
0
    def __init__(
        self,
        *,
        image_size,
        patch_size,
        num_classes,
        dim,
        heads,
        num_hierarchies,
        block_repeats,
        mlp_mult = 4,
        channels = 3,
        dim_head = 64,
        dropout = 0.
    ):
        super().__init__()
        assert (image_size % patch_size) == 0, 'Image dimensions must be divisible by the patch size.'
        num_patches = (image_size // patch_size) ** 2
        patch_dim = channels * patch_size ** 2
        fmap_size = image_size // patch_size
        blocks = 2 ** (num_hierarchies - 1)

        seq_len = (fmap_size // blocks) ** 2   # sequence length is held constant across heirarchy
        hierarchies = list(reversed(range(num_hierarchies)))
        mults = [2 ** i for i in hierarchies]

        layer_heads = list(map(lambda t: t * heads, mults))
        layer_dims = list(map(lambda t: t * dim, mults))

        layer_dims = [*layer_dims, layer_dims[-1]]
        dim_pairs = zip(layer_dims[:-1], layer_dims[1:])

        self.to_patch_embedding = nn.Sequential(
            Rearrange('b c (h p1) (w p2) -> b (p1 p2 c) h w', p1 = patch_size, p2 = patch_size),
            nn.Conv2d(patch_dim, layer_dims[0], 1),
        )

        block_repeats = cast_tuple(block_repeats, num_hierarchies)

        self.layers = nn.ModuleList([])

        for level, heads, (dim_in, dim_out), block_repeat in zip(hierarchies, layer_heads, dim_pairs, block_repeats):
            is_last = level == 0
            depth = block_repeat

            self.layers.append(nn.ModuleList([
                Transformer(dim_in, seq_len, depth, heads, mlp_mult, dropout),
                Aggregate(dim_in, dim_out) if not is_last else nn.Identity()
            ]))

        self.mlp_head = nn.Sequential(
            LayerNorm(dim),
            Reduce('b c h w -> b c', 'mean'),
            nn.Linear(dim, num_classes)
        )
예제 #20
0
    def __init__(self,
                 num_classes,
                 config=[2, 2, 6, 2],
                 dim=96,
                 drop_path_rate=0.2,
                 input_resolution=[224, 448]):
        super(SwinTransformer, self).__init__()
        self.config = config
        self.dim = dim
        self.head_dim = 32
        self.window_size = (7, 14)
        # self.patch_partition = Rearrange('b c (h1 sub_h) (w1 sub_w) -> b h1 w1 (c sub_h sub_w)', sub_h=4, sub_w=4)

        # drop path rate for each layer
        dpr = [
            x.item() for x in torch.linspace(0, drop_path_rate, sum(config))
        ]

        begin = 0
        self.stage1 = [nn.Conv2d(3, dim, kernel_size=4, stride=4),
                       Rearrange('b c h w -> b h w c'),
                       nn.LayerNorm(dim),] + \
                      [Block(dim, dim, self.head_dim, self.window_size, dpr[i+begin], 'W' if not i%2 else 'SW', input_resolution[0]//4)
                      for i in range(config[0])]
        begin += config[0]
        self.stage2 = [Rearrange('b (h neih) (w neiw) c -> b h w (neiw neih c)', neih=2, neiw=2),
                       nn.LayerNorm(4*dim), nn.Linear(4*dim, 2*dim, bias=False),] + \
                      [Block(2*dim, 2*dim, self.head_dim, self.window_size, dpr[i+begin], 'W' if not i%2 else 'SW', input_resolution[0]//8)
                      for i in range(config[1])]

        begin += config[1]
        self.stage3 = [Rearrange('b (h neih) (w neiw) c -> b h w (neiw neih c)', neih=2, neiw=2),
                       nn.LayerNorm(8*dim), nn.Linear(8*dim, 4*dim, bias=False),] + \
                      [Block(4*dim, 4*dim, self.head_dim, self.window_size, dpr[i+begin], 'W' if not i%2 else 'SW',input_resolution[0]//16)
                      for i in range(config[2])]

        begin += config[2]
        self.stage4 = [Rearrange('b (h neih) (w neiw) c -> b h w (neiw neih c)', neih=2, neiw=2),
                       nn.LayerNorm(16*dim), nn.Linear(16*dim, 8*dim, bias=False),] + \
                      [Block(8*dim, 8*dim, self.head_dim, self.window_size, dpr[i+begin], 'W' if not i%2 else 'SW', input_resolution[0]//32)
                      for i in range(config[3])]

        self.stage1 = nn.Sequential(*self.stage1)
        self.stage2 = nn.Sequential(*self.stage2)
        self.stage3 = nn.Sequential(*self.stage3)
        self.stage4 = nn.Sequential(*self.stage4)

        self.norm_last = nn.LayerNorm(dim * 8)
        self.mean_pool = Reduce('b h w c -> b c', reduction='mean')
        self.classifier = nn.Linear(
            8 * dim, num_classes) if num_classes > 0 else nn.Identity()

        self.apply(self._init_weights)
예제 #21
0
    def __init__(self,
                 image_size,
                 dims,
                 channels,
                 num_classes,
                 expansion=4,
                 kernel_size=3,
                 patch_size=(2, 2),
                 depths=(2, 4, 3)):
        super().__init__()
        assert len(dims) == 3, 'dims must be a tuple of 3'
        assert len(depths) == 3, 'depths must be a tuple of 3'

        ih, iw = image_size
        ph, pw = patch_size
        assert ih % ph == 0 and iw % pw == 0

        init_dim, *_, last_dim = channels

        self.conv1 = conv_nxn_bn(3, init_dim, stride=2)

        self.stem = nn.ModuleList([])
        self.stem.append(MV2Block(channels[0], channels[1], 1, expansion))
        self.stem.append(MV2Block(channels[1], channels[2], 2, expansion))
        self.stem.append(MV2Block(channels[2], channels[3], 1, expansion))
        self.stem.append(MV2Block(channels[2], channels[3], 1, expansion))

        self.trunk = nn.ModuleList([])
        self.trunk.append(
            nn.ModuleList([
                MV2Block(channels[3], channels[4], 2, expansion),
                MobileViTBlock(dims[0], depths[0], channels[5], kernel_size,
                               patch_size, int(dims[0] * 2))
            ]))

        self.trunk.append(
            nn.ModuleList([
                MV2Block(channels[5], channels[6], 2, expansion),
                MobileViTBlock(dims[1], depths[1], channels[7], kernel_size,
                               patch_size, int(dims[1] * 4))
            ]))

        self.trunk.append(
            nn.ModuleList([
                MV2Block(channels[7], channels[8], 2, expansion),
                MobileViTBlock(dims[2], depths[2], channels[9], kernel_size,
                               patch_size, int(dims[2] * 4))
            ]))

        self.to_logits = nn.Sequential(
            conv_1x1_bn(channels[-2], last_dim),
            Reduce('b c h w -> b c', 'mean'),
            nn.Linear(channels[-1], num_classes, bias=False))
예제 #22
0
    def __init__(self,
                 *,
                 image_size,
                 patch_size,
                 num_classes,
                 dim,
                 depth,
                 heads=1,
                 ff_mult=4,
                 channels=3,
                 attn_dim=None,
                 prob_survival=1.):
        super().__init__()
        assert (dim %
                heads) == 0, 'dimension must be divisible by number of heads'

        image_height, image_width = pair(image_size)
        patch_height, patch_width = pair(patch_size)
        assert (image_height % patch_height) == 0 and (
            image_width % patch_width
        ) == 0, 'image height and width must be divisible by patch size'
        num_patches = (image_height // patch_height) * (image_width //
                                                        patch_width)

        dim_ff = dim * ff_mult

        self.to_patch_embed = nn.Sequential(
            Rearrange('b c (h p1) (w p2) -> b (h w) (c p1 p2)',
                      p1=patch_height,
                      p2=patch_width),
            nn.Linear(channels * patch_height * patch_width, dim))

        self.prob_survival = prob_survival

        self.layers = nn.ModuleList([
            Residual(
                PreNorm(
                    dim,
                    gMLPBlock(dim=dim,
                              heads=heads,
                              dim_ff=dim_ff,
                              seq_len=num_patches,
                              attn_dim=attn_dim))) for i in range(depth)
        ])

        self.to_logits = nn.Sequential(nn.LayerNorm(dim),
                                       Reduce('b n d -> b d', 'mean'),
                                       nn.Linear(dim, num_classes))
예제 #23
0
def MLPMixer(*,
             image_size,
             channels,
             patch_size,
             dim,
             depth,
             num_classes,
             expansion_factor=4,
             dropout=0.0):
    """The implementation of MLPMixer

    Based on the paper:

        Tolstikhin, I., Houlsby, N., Kolesnikov, A., Beyer, L., Zhai, X., Unterthiner, T., ... & Dosovitskiy, A.
        (2021). Mlp-mixer: An all-mlp architecture for vision. arXiv preprint arXiv:2105.01601.

    This implementation is modified to only support Multivariate Time Series
    Classification data from the original implementation as follows:

        @misc{tolstikhin2021mlpmixer,
            title   = {MLP-Mixer: An all-MLP Architecture for Vision},
            author  = {Ilya Tolstikhin and Neil Houlsby and Alexander Kolesnikov and Lucas Beyer and Xiaohua Zhai and Thomas Unterthiner and Jessica Yung and Daniel Keysers and Jakob Uszkoreit and Mario Lucic and Alexey Dosovitskiy},
            year    = {2021},
            eprint  = {2105.01601},
            archivePrefix = {arXiv},
            primaryClass = {cs.CV}
        }

    """
    assert (image_size %
            patch_size) == 0, "image must be divisible by patch size"
    num_patches = image_size // patch_size
    chan_first, chan_last = partial(nn.Conv1d, kernel_size=1), nn.Linear

    return nn.Sequential(
        Rearrange("b c (h p) w -> b h (p w c)", p=patch_size),
        nn.Linear((patch_size) * channels, dim), *[
            nn.Sequential(
                PreNormResidual(
                    dim,
                    FeedForward(num_patches, expansion_factor, dropout,
                                chan_first)),
                PreNormResidual(
                    dim, FeedForward(dim, expansion_factor, dropout,
                                     chan_last)),
            ) for _ in range(depth)
        ], nn.LayerNorm(dim), Reduce("b n c -> b c", "mean"),
        nn.Linear(dim, num_classes))
예제 #24
0
def Permutator(*,
               image_size,
               patch_size,
               dim,
               depth,
               num_classes,
               segments,
               expansion_factor=4,
               dropout=0.):
    assert (image_size %
            patch_size) == 0, 'image must be divisible by patch size'
    assert (
        dim %
        segments) == 0, 'dimension must be divisible by the number of segments'
    height = width = image_size // patch_size
    s = segments

    return nn.Sequential(
        Rearrange('b c (h p1) (w p2) -> b h w (p1 p2 c)',
                  p1=patch_size,
                  p2=patch_size), nn.Linear((patch_size**2) * 3, dim),
        *[
            nn.Sequential(
                PreNormResidual(
                    dim,
                    nn.Sequential(
                        ParallelSum(
                            nn.Sequential(
                                Rearrange('b h w (c s) -> b w c (h s)', s=s),
                                nn.Linear(height * s, height * s),
                                Rearrange('b w c (h s) -> b h w (c s)', s=s),
                            ),
                            nn.Sequential(
                                Rearrange('b h w (c s) -> b h c (w s)', s=s),
                                nn.Linear(width * s, width * s),
                                Rearrange('b h c (w s) -> b h w (c s)', s=s),
                            ), nn.Linear(dim, dim)), nn.Linear(dim, dim))),
                PreNormResidual(
                    dim,
                    nn.Sequential(nn.Linear(dim, dim * expansion_factor),
                                  nn.GELU(), nn.Dropout(dropout),
                                  nn.Linear(dim * expansion_factor, dim),
                                  nn.Dropout(dropout)))) for _ in range(depth)
        ], nn.LayerNorm(dim), Reduce('b h w c -> b c', 'mean'),
        nn.Linear(dim, num_classes))
예제 #25
0
    def __init__(
        self,
        features: int,
        *args,
        activation: nn.Module = ReLUInPlace,
        **kwargs,
    ):

        super().__init__(features, *args, **kwargs)
        self.pool = Reduce("b c h w -> b c", reduction="mean")
        self.att = nn.Sequential(
            OrderedDict({
                "fc1": nn.Linear(features, self.reduced_features),
                "act1": activation(),
                "fc2": nn.Linear(self.reduced_features, features),
                "act2": nn.Sigmoid(),
                "proj": Rearrange("b c -> b c 1 1"),
            }))
예제 #26
0
    def __init__(self,
                 features: int,
                 kernel_size: int = 3,
                 gamma: int = 2,
                 beta: int = 1):

        super().__init__()
        assert kernel_size % 2 == 1

        t = int(abs(math.log(features, 2) + beta) / gamma)
        k = t if t % 2 else t + 1

        self.att = nn.Sequential(
            Reduce("b c h w -> b 1 c", reduction="mean"),
            nn.Conv1d(1, 1, kernel_size=k, padding=k // 2, bias=False),
            Rearrange("b 1 c -> b c 1 1"),
            nn.Sigmoid(),
        )
예제 #27
0
    def __init__(self,
                 *,
                 image_size,
                 patch_size,
                 num_classes,
                 dim,
                 depth,
                 heads,
                 mlp_dim,
                 patch_merge_layer=None,
                 patch_merge_num_tokens=8,
                 channels=3,
                 dim_head=64,
                 dropout=0.,
                 emb_dropout=0.):
        super().__init__()
        image_height, image_width = pair(image_size)
        patch_height, patch_width = pair(patch_size)

        assert image_height % patch_height == 0 and image_width % patch_width == 0, 'Image dimensions must be divisible by the patch size.'

        num_patches = (image_height // patch_height) * (image_width //
                                                        patch_width)
        patch_dim = channels * patch_height * patch_width

        self.to_patch_embedding = nn.Sequential(
            Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)',
                      p1=patch_height,
                      p2=patch_width),
            nn.Linear(patch_dim, dim),
        )

        self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
        self.dropout = nn.Dropout(emb_dropout)

        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim,
                                       dropout, patch_merge_layer,
                                       patch_merge_num_tokens)

        self.mlp_head = nn.Sequential(Reduce('b n d -> b d', 'mean'),
                                      nn.LayerNorm(dim),
                                      nn.Linear(dim, num_classes))
예제 #28
0
 def __init__(self,
              img_size=224,
              patch_size=16,
              in_chans=3,
              embed_dim=384,
              depth=36,
              num_classes=1000,
              expand_ratio=4,
              mlp_bias=False):
     super().__init__()
     self.patch_emb = PatchEmbed(img_size,
                                 patch_size,
                                 in_chans,
                                 embed_dim,
                                 flatten=False)
     self.stages = nn.Sequential(
         *
         [S2Block(embed_dim, expand_ratio, mlp_bias) for i in range(depth)])
     self.mlp_head = nn.Sequential(Reduce('b c h w -> b c', 'mean'),
                                   nn.Linear(embed_dim, num_classes))
예제 #29
0
 def __init__(self,
              features: int,
              *args,
              activation: nn.Module = ReLUInPlace,
              **kwargs):
     super().__init__(features, *args, activation=activation, **kwargs)
     self.pool = Reduce("b c h w -> b 1 h w", reduction="mean")
     self.att = nn.Sequential(
         OrderedDict({
             "conv1":
             nn.Conv2d(1, self.reduced_features, kernel_size=1),
             "act1":
             activation(),
             "conv2":
             nn.Conv2d(self.reduced_features, 1, kernel_size=1),
             "act2":
             nn.Sigmoid(),
             "proj":
             nn.Identity(),
         }))
    def __init__(
            self,
            image_channel,
            image_size,  # one lateral's size of a squre image
            patch_size,  # one lateral's size of a squre patch
            *,
            depth,
            token_mlp_dim,
            channel_mlp_dim,
            ff_dropout=0.0,
            **kwargs):
        super().__init__(image_size, image_channel, patch_size)

        self.linear_proj = nn.Linear(self.patch_dim, self.patch_dim)
        self.encoder = nn.Sequential(
            OrderedDict([(f"layer_{idx}",
                          MLPMixerLayer(self.num_patches, self.patch_dim,
                                        token_mlp_dim, channel_mlp_dim,
                                        ff_dropout)) for idx in range(depth)]))
        self.pooler = nn.Sequential(nn.LayerNorm(self.patch_dim),
                                    Reduce("b s c -> b c", "mean"))