def __init__(self, pool_spatial="mean", pool_temporal="mean"): super().__init__() self.model = r2plus1d_34_32_ig65m(num_classes=359, pretrained=True, progress=True) self.pool_spatial = Reduce("n c t h w -> n c t", reduction=pool_spatial) self.pool_temporal = Reduce("n c t -> n c", reduction=pool_temporal)
def __init__(self, block, conv_makers, layers, stem): super().__init__(block=block, conv_makers=conv_makers, layers=layers, stem=stem) self.pool_spatial = Reduce("n c t h w -> n c t", reduction="mean") self.pool_temporal = Reduce("n c t -> n c", reduction="mean")
def create_model(use_reduce=False): return Sequential( Conv2d(3, 6, kernel_size=5), Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2) if use_reduce else MaxPool2d(kernel_size=2), Conv2d(6, 16, kernel_size=5), Reduce('b c (h h2) (w w2) -> b (c h w)', 'max', h2=2, w2=2), Linear(16 * 5 * 5, 120), ReLU(), Linear(120, 84), ReLU(), Linear(84, 10), )
def create_model(): return Sequential([ Conv2d(6, kernel_size=5, input_shape=[32, 32, 3]), Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2), Conv2d(16, kernel_size=5), Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2), Rearrange('b c h w -> b (c h w)'), Linear(120), ReLU(), Linear(84), ReLU(), Linear(10), ])
def create_model(): return chainer.Sequential( L.Convolution2D(3, 6, ksize=(5, 5)), Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2), L.Convolution2D(6, 16, ksize=(5, 5)), Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2), Rearrange('b c h w -> b (c h w)'), L.Linear(16 * 5 * 5, 120), L.Linear(120, 84), F.relu, EinMix('b c1 -> (b c2)', weight_shape='c1 c2', bias_shape='c2', c1=84, c2=84), EinMix('(b c2) -> b c3', weight_shape='c2 c3', bias_shape='c3', c2=84, c3=84), L.Linear(84, 10), )
def __init__( self, kernel_size: int = 7, ): super().__init__() self.avg_pool = Reduce("b c h w -> b 1 h w", reduction="mean") self.max_pool = Reduce("b c h w -> b 1 h w", reduction="max") self.att = nn.Sequential( nn.Conv2d(2, 1, kernel_size=kernel_size, padding=kernel_size // 2, bias=False)) self.gate = nn.Sigmoid()
def create_keras_model(): return Sequential([ Conv2d(6, kernel_size=5, input_shape=[32, 32, 3]), Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2), Conv2d(16, kernel_size=5), Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2), Rearrange('b c h w -> b (c h w)'), Linear(120), ReLU(), Linear(84), ReLU(), EinMix('b c1 -> (b c2)', weight_shape='c1 c2', bias_shape='c2', c1=84, c2=84), EinMix('(b c2) -> b c3', weight_shape='c2 c3', bias_shape='c3', c2=84, c3=84), Linear(10), ])
def __init__( self, image_size, image_channel, patch_size, depth, dim, path_dropout=0., token_dropout=0., ff_dropout=0., ): super().__init__(image_size, image_channel, patch_size) self.linear_proj = nn.Linear(self.patch_dim, dim, bias=False) self.token_dropout = TokenDropout(token_dropout) self.encoder = nn.Sequential( OrderedDict([(f"layer_{idx}", ResMLPLayer(dim, num_patches=self.num_patches, alpha=self._get_alpha(idx), path_dropout=path_dropout, ff_dropout=ff_dropout)) for idx in range(depth)])) self.pooler = Reduce("b n d -> b d", "mean")
def __init__( self, features: int, reduction: int = 16, reduced_features: int = None, activation: nn.Module = ReLUInPlace, ): super().__init__() self.reduced_features = (features // reduction if reduced_features is None else reduced_features) self.pool = Reduce("b c h w -> b c 1 1", reduction="mean") self.att = nn.Sequential( OrderedDict({ "fc1": nn.Conv2d( features, self.reduced_features, kernel_size=1, ), "act1": activation(), "fc2": nn.Conv2d( self.reduced_features, features, kernel_size=1, ), "act2": nn.Sigmoid(), "proj": nn.Identity(), }))
def MLPMixer(*, image_size, patch_size, dim, depth, num_classes, expansion_factor=4, dropout=0.): assert (image_size % patch_size) == 0, 'image must be divisible by patch size' num_patches = (image_size // patch_size)**2 chan_first, chan_last = partial(nn.Conv1d, kernel_size=1), nn.Linear return nn.Sequential( Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=patch_size, p2=patch_size), nn.Linear((patch_size**2) * 3, dim), *[ nn.Sequential( PreNormResidual( dim, FeedForward(num_patches, expansion_factor, dropout, chan_first)), PreNormResidual( dim, FeedForward(dim, expansion_factor, dropout, chan_last))) for _ in range(depth) ], nn.LayerNorm(dim), Reduce('b n c -> b c', 'mean'), nn.Linear(dim, num_classes))
def __init__(self, in_features: int, features: int, radix: int, groups: int): """Implementation of Split Attention proposed in `"ResNeSt: Split-Attention Networks" <https://arxiv.org/abs/2004.08955>`_ Grouped convolution have been proved to be impirically better (ResNetXt). The main idea is to apply an attention group-wise. `Einops <https://github.com/arogozhnikov/einops>`_ is used to improve the readibility of this module Args: in_features (int): number of input features features (int): attention's features radix (int): number of subgroups (`radix`) in the groups groups (int): number of groups, each group contains `radix` subgroups """ super().__init__() self.radix, self.groups = radix, groups self.att = nn.Sequential( # this produces U^{/hat} Reduce("b r (k c) h w-> b (k c) h w", reduction="sum", r=radix, k=groups), # eq 1 nn.AdaptiveAvgPool2d(1), # the two following conv layers are G in the paper ConvBnAct( in_features, features, kernel_size=1, groups=groups, activation=ReLUInPlace, bias=True, ), nn.Conv2d(features, in_features * radix, kernel_size=1, groups=groups), Rearrange("b (r k c) h w -> b r k c h w", r=radix, k=groups), nn.Softmax(dim=1) if radix > 1 else nn.Sigmoid(), Rearrange("b r k c h w -> b r (k c) h w", r=radix, k=groups), )
def __init__(self, image_size, patch_size, dim, depth, num_classes, expansion_factor=4, dropout=0.): super().__init__() assert (image_size % patch_size) == 0, 'image must be divisible by patch size' num_patches = (image_size // patch_size)**2 chan_first, chan_last = partial( nn.Conv1d, kernel_size=1), nn.Linear # partial用于固定函数的部分参数 self.rearrange = Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=patch_size, p2=patch_size) self.per_patch_fc = nn.Linear((patch_size**2) * 3, dim) self.mixer_layer = nn.Sequential( PreNormResidual( dim, FeedForward(num_patches, expansion_factor, dropout, chan_first)), PreNormResidual( dim, FeedForward(dim, expansion_factor, dropout, chan_last))) self.norm = nn.LayerNorm(dim) self.reduce = Reduce('b n c -> b c', 'mean') self.out = nn.Linear(dim, num_classes)
def __init__(self, emb_size: int = 768, n_classes: int = 1000, policy: str = 'token'): """ ViT Classification Head Args: emb_size (int, optional): Embedding dimensions Defaults to 768. n_classes (int, optional): [description]. Defaults to 1000. policy (str, optional): Pooling policy, can be token or mean. Defaults to 'token'. """ assert policy in self.POLICIES, f"Only policies {','.join(self.POLICIES)} are supported" super().__init__( OrderedDict({ 'pool': Reduce('b n e -> b e', reduction='mean') if policy == 'mean' else Lambda(lambda x: x[:, 0]), 'norm': nn.LayerNorm(emb_size), 'fc': nn.Linear(emb_size, n_classes) }))
def ResMLP(*, image_size, patch_size, dim, depth, num_classes, expansion_factor=4): assert (image_size % patch_size) == 0, 'image must be divisible by patch size' num_patches = (image_size // patch_size)**2 wrapper = lambda i, fn: PreAffinePostLayerScale(dim, i + 1, fn) return nn.Sequential( Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=patch_size, p2=patch_size), nn.Linear((patch_size**2) * 3, dim), *[ nn.Sequential( wrapper(i, nn.Conv1d(num_patches, num_patches, 1)), wrapper( i, nn.Sequential(nn.Linear(dim, dim * expansion_factor), nn.GELU(), nn.Linear(dim * expansion_factor, dim)))) for i in range(depth) ], Affine(dim), Reduce('b n c -> b c', 'mean'), nn.Linear(dim, num_classes))
def __init__(self, num_classes, num_blocks, patch_size, hidden_dim, ffn_dim, attn_dim=None, prob_survival=1., image_size=224): super().__init__() assert ( image_size % patch_size) == 0, 'image size must be divisible by the patch size' num_patches = (image_size // patch_size)**2 self.patch_embedding = nn.Conv2d(3, hidden_dim, kernel_size=patch_size, stride=patch_size) self.prob_survival = prob_survival self.gmlp_layers = nn.ModuleList([ gMLPBlock(hidden_dim, ffn_dim, num_patches, attn_dim) for _ in range(num_blocks) ]) self.to_logits = nn.Sequential(nn.LayerNorm(hidden_dim), Reduce('b n c -> b c', 'mean'), nn.Linear(hidden_dim, num_classes))
def create_torch_model(use_reduce=False, add_scripted_layer=False): from torch.nn import Sequential, Conv2d, MaxPool2d, Linear, ReLU from einops.layers.torch import Rearrange, Reduce, EinMix return Sequential( Conv2d(3, 6, kernel_size=(5, 5)), Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2) if use_reduce else MaxPool2d(kernel_size=2), Conv2d(6, 16, kernel_size=(5, 5)), Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2), torch.jit.script(Rearrange('b c h w -> b (c h w)')) if add_scripted_layer else Rearrange('b c h w -> b (c h w)'), Linear(16 * 5 * 5, 120), ReLU(), Linear(120, 84), ReLU(), EinMix('b c1 -> (b c2)', weight_shape='c1 c2', bias_shape='c2', c1=84, c2=84), EinMix('(b c2) -> b c3', weight_shape='c2 c3', bias_shape='c3', c2=84, c3=84), Linear(84, 10), )
def create_model(): model = HybridSequential() layers = [ Conv2D(6, kernel_size=5), Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2), Conv2D(16, kernel_size=5), Reduce('b c (h h2) (w w2) -> b c h w', 'max', h2=2, w2=2), Rearrange('b c h w -> b (c h w)'), Dense(120), LeakyReLU(alpha=0.0), Dense(84), LeakyReLU(alpha=0.0), Dense(10), ] for layer in layers: model.add(layer) model.initialize(mxnet.init.Xavier(), ctx=mxnet.cpu()) return model
def __init__( self, features: int, reduction: int = 16, reduced_features: int = None, activation: nn.Module = nn.ReLU, ): super().__init__() self.reduced_features = (features // reduction if reduced_features is None else reduced_features) self.avg_pool = Reduce("b c h w -> b c 1 1", reduction="mean") self.max_pool = Reduce("b c h w -> b c 1 1", reduction="max") self.att = nn.Sequential( nn.Conv2d(features, self.reduced_features, kernel_size=1), activation(), nn.Conv2d(self.reduced_features, features, kernel_size=1), ) self.gate = nn.Sigmoid()
def __init__( self, *, image_size, patch_size, num_classes, dim, heads, num_hierarchies, block_repeats, mlp_mult = 4, channels = 3, dim_head = 64, dropout = 0. ): super().__init__() assert (image_size % patch_size) == 0, 'Image dimensions must be divisible by the patch size.' num_patches = (image_size // patch_size) ** 2 patch_dim = channels * patch_size ** 2 fmap_size = image_size // patch_size blocks = 2 ** (num_hierarchies - 1) seq_len = (fmap_size // blocks) ** 2 # sequence length is held constant across heirarchy hierarchies = list(reversed(range(num_hierarchies))) mults = [2 ** i for i in hierarchies] layer_heads = list(map(lambda t: t * heads, mults)) layer_dims = list(map(lambda t: t * dim, mults)) layer_dims = [*layer_dims, layer_dims[-1]] dim_pairs = zip(layer_dims[:-1], layer_dims[1:]) self.to_patch_embedding = nn.Sequential( Rearrange('b c (h p1) (w p2) -> b (p1 p2 c) h w', p1 = patch_size, p2 = patch_size), nn.Conv2d(patch_dim, layer_dims[0], 1), ) block_repeats = cast_tuple(block_repeats, num_hierarchies) self.layers = nn.ModuleList([]) for level, heads, (dim_in, dim_out), block_repeat in zip(hierarchies, layer_heads, dim_pairs, block_repeats): is_last = level == 0 depth = block_repeat self.layers.append(nn.ModuleList([ Transformer(dim_in, seq_len, depth, heads, mlp_mult, dropout), Aggregate(dim_in, dim_out) if not is_last else nn.Identity() ])) self.mlp_head = nn.Sequential( LayerNorm(dim), Reduce('b c h w -> b c', 'mean'), nn.Linear(dim, num_classes) )
def __init__(self, num_classes, config=[2, 2, 6, 2], dim=96, drop_path_rate=0.2, input_resolution=[224, 448]): super(SwinTransformer, self).__init__() self.config = config self.dim = dim self.head_dim = 32 self.window_size = (7, 14) # self.patch_partition = Rearrange('b c (h1 sub_h) (w1 sub_w) -> b h1 w1 (c sub_h sub_w)', sub_h=4, sub_w=4) # drop path rate for each layer dpr = [ x.item() for x in torch.linspace(0, drop_path_rate, sum(config)) ] begin = 0 self.stage1 = [nn.Conv2d(3, dim, kernel_size=4, stride=4), Rearrange('b c h w -> b h w c'), nn.LayerNorm(dim),] + \ [Block(dim, dim, self.head_dim, self.window_size, dpr[i+begin], 'W' if not i%2 else 'SW', input_resolution[0]//4) for i in range(config[0])] begin += config[0] self.stage2 = [Rearrange('b (h neih) (w neiw) c -> b h w (neiw neih c)', neih=2, neiw=2), nn.LayerNorm(4*dim), nn.Linear(4*dim, 2*dim, bias=False),] + \ [Block(2*dim, 2*dim, self.head_dim, self.window_size, dpr[i+begin], 'W' if not i%2 else 'SW', input_resolution[0]//8) for i in range(config[1])] begin += config[1] self.stage3 = [Rearrange('b (h neih) (w neiw) c -> b h w (neiw neih c)', neih=2, neiw=2), nn.LayerNorm(8*dim), nn.Linear(8*dim, 4*dim, bias=False),] + \ [Block(4*dim, 4*dim, self.head_dim, self.window_size, dpr[i+begin], 'W' if not i%2 else 'SW',input_resolution[0]//16) for i in range(config[2])] begin += config[2] self.stage4 = [Rearrange('b (h neih) (w neiw) c -> b h w (neiw neih c)', neih=2, neiw=2), nn.LayerNorm(16*dim), nn.Linear(16*dim, 8*dim, bias=False),] + \ [Block(8*dim, 8*dim, self.head_dim, self.window_size, dpr[i+begin], 'W' if not i%2 else 'SW', input_resolution[0]//32) for i in range(config[3])] self.stage1 = nn.Sequential(*self.stage1) self.stage2 = nn.Sequential(*self.stage2) self.stage3 = nn.Sequential(*self.stage3) self.stage4 = nn.Sequential(*self.stage4) self.norm_last = nn.LayerNorm(dim * 8) self.mean_pool = Reduce('b h w c -> b c', reduction='mean') self.classifier = nn.Linear( 8 * dim, num_classes) if num_classes > 0 else nn.Identity() self.apply(self._init_weights)
def __init__(self, image_size, dims, channels, num_classes, expansion=4, kernel_size=3, patch_size=(2, 2), depths=(2, 4, 3)): super().__init__() assert len(dims) == 3, 'dims must be a tuple of 3' assert len(depths) == 3, 'depths must be a tuple of 3' ih, iw = image_size ph, pw = patch_size assert ih % ph == 0 and iw % pw == 0 init_dim, *_, last_dim = channels self.conv1 = conv_nxn_bn(3, init_dim, stride=2) self.stem = nn.ModuleList([]) self.stem.append(MV2Block(channels[0], channels[1], 1, expansion)) self.stem.append(MV2Block(channels[1], channels[2], 2, expansion)) self.stem.append(MV2Block(channels[2], channels[3], 1, expansion)) self.stem.append(MV2Block(channels[2], channels[3], 1, expansion)) self.trunk = nn.ModuleList([]) self.trunk.append( nn.ModuleList([ MV2Block(channels[3], channels[4], 2, expansion), MobileViTBlock(dims[0], depths[0], channels[5], kernel_size, patch_size, int(dims[0] * 2)) ])) self.trunk.append( nn.ModuleList([ MV2Block(channels[5], channels[6], 2, expansion), MobileViTBlock(dims[1], depths[1], channels[7], kernel_size, patch_size, int(dims[1] * 4)) ])) self.trunk.append( nn.ModuleList([ MV2Block(channels[7], channels[8], 2, expansion), MobileViTBlock(dims[2], depths[2], channels[9], kernel_size, patch_size, int(dims[2] * 4)) ])) self.to_logits = nn.Sequential( conv_1x1_bn(channels[-2], last_dim), Reduce('b c h w -> b c', 'mean'), nn.Linear(channels[-1], num_classes, bias=False))
def __init__(self, *, image_size, patch_size, num_classes, dim, depth, heads=1, ff_mult=4, channels=3, attn_dim=None, prob_survival=1.): super().__init__() assert (dim % heads) == 0, 'dimension must be divisible by number of heads' image_height, image_width = pair(image_size) patch_height, patch_width = pair(patch_size) assert (image_height % patch_height) == 0 and ( image_width % patch_width ) == 0, 'image height and width must be divisible by patch size' num_patches = (image_height // patch_height) * (image_width // patch_width) dim_ff = dim * ff_mult self.to_patch_embed = nn.Sequential( Rearrange('b c (h p1) (w p2) -> b (h w) (c p1 p2)', p1=patch_height, p2=patch_width), nn.Linear(channels * patch_height * patch_width, dim)) self.prob_survival = prob_survival self.layers = nn.ModuleList([ Residual( PreNorm( dim, gMLPBlock(dim=dim, heads=heads, dim_ff=dim_ff, seq_len=num_patches, attn_dim=attn_dim))) for i in range(depth) ]) self.to_logits = nn.Sequential(nn.LayerNorm(dim), Reduce('b n d -> b d', 'mean'), nn.Linear(dim, num_classes))
def MLPMixer(*, image_size, channels, patch_size, dim, depth, num_classes, expansion_factor=4, dropout=0.0): """The implementation of MLPMixer Based on the paper: Tolstikhin, I., Houlsby, N., Kolesnikov, A., Beyer, L., Zhai, X., Unterthiner, T., ... & Dosovitskiy, A. (2021). Mlp-mixer: An all-mlp architecture for vision. arXiv preprint arXiv:2105.01601. This implementation is modified to only support Multivariate Time Series Classification data from the original implementation as follows: @misc{tolstikhin2021mlpmixer, title = {MLP-Mixer: An all-MLP Architecture for Vision}, author = {Ilya Tolstikhin and Neil Houlsby and Alexander Kolesnikov and Lucas Beyer and Xiaohua Zhai and Thomas Unterthiner and Jessica Yung and Daniel Keysers and Jakob Uszkoreit and Mario Lucic and Alexey Dosovitskiy}, year = {2021}, eprint = {2105.01601}, archivePrefix = {arXiv}, primaryClass = {cs.CV} } """ assert (image_size % patch_size) == 0, "image must be divisible by patch size" num_patches = image_size // patch_size chan_first, chan_last = partial(nn.Conv1d, kernel_size=1), nn.Linear return nn.Sequential( Rearrange("b c (h p) w -> b h (p w c)", p=patch_size), nn.Linear((patch_size) * channels, dim), *[ nn.Sequential( PreNormResidual( dim, FeedForward(num_patches, expansion_factor, dropout, chan_first)), PreNormResidual( dim, FeedForward(dim, expansion_factor, dropout, chan_last)), ) for _ in range(depth) ], nn.LayerNorm(dim), Reduce("b n c -> b c", "mean"), nn.Linear(dim, num_classes))
def Permutator(*, image_size, patch_size, dim, depth, num_classes, segments, expansion_factor=4, dropout=0.): assert (image_size % patch_size) == 0, 'image must be divisible by patch size' assert ( dim % segments) == 0, 'dimension must be divisible by the number of segments' height = width = image_size // patch_size s = segments return nn.Sequential( Rearrange('b c (h p1) (w p2) -> b h w (p1 p2 c)', p1=patch_size, p2=patch_size), nn.Linear((patch_size**2) * 3, dim), *[ nn.Sequential( PreNormResidual( dim, nn.Sequential( ParallelSum( nn.Sequential( Rearrange('b h w (c s) -> b w c (h s)', s=s), nn.Linear(height * s, height * s), Rearrange('b w c (h s) -> b h w (c s)', s=s), ), nn.Sequential( Rearrange('b h w (c s) -> b h c (w s)', s=s), nn.Linear(width * s, width * s), Rearrange('b h c (w s) -> b h w (c s)', s=s), ), nn.Linear(dim, dim)), nn.Linear(dim, dim))), PreNormResidual( dim, nn.Sequential(nn.Linear(dim, dim * expansion_factor), nn.GELU(), nn.Dropout(dropout), nn.Linear(dim * expansion_factor, dim), nn.Dropout(dropout)))) for _ in range(depth) ], nn.LayerNorm(dim), Reduce('b h w c -> b c', 'mean'), nn.Linear(dim, num_classes))
def __init__( self, features: int, *args, activation: nn.Module = ReLUInPlace, **kwargs, ): super().__init__(features, *args, **kwargs) self.pool = Reduce("b c h w -> b c", reduction="mean") self.att = nn.Sequential( OrderedDict({ "fc1": nn.Linear(features, self.reduced_features), "act1": activation(), "fc2": nn.Linear(self.reduced_features, features), "act2": nn.Sigmoid(), "proj": Rearrange("b c -> b c 1 1"), }))
def __init__(self, features: int, kernel_size: int = 3, gamma: int = 2, beta: int = 1): super().__init__() assert kernel_size % 2 == 1 t = int(abs(math.log(features, 2) + beta) / gamma) k = t if t % 2 else t + 1 self.att = nn.Sequential( Reduce("b c h w -> b 1 c", reduction="mean"), nn.Conv1d(1, 1, kernel_size=k, padding=k // 2, bias=False), Rearrange("b 1 c -> b c 1 1"), nn.Sigmoid(), )
def __init__(self, *, image_size, patch_size, num_classes, dim, depth, heads, mlp_dim, patch_merge_layer=None, patch_merge_num_tokens=8, channels=3, dim_head=64, dropout=0., emb_dropout=0.): super().__init__() image_height, image_width = pair(image_size) patch_height, patch_width = pair(patch_size) assert image_height % patch_height == 0 and image_width % patch_width == 0, 'Image dimensions must be divisible by the patch size.' num_patches = (image_height // patch_height) * (image_width // patch_width) patch_dim = channels * patch_height * patch_width self.to_patch_embedding = nn.Sequential( Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=patch_height, p2=patch_width), nn.Linear(patch_dim, dim), ) self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim)) self.dropout = nn.Dropout(emb_dropout) self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout, patch_merge_layer, patch_merge_num_tokens) self.mlp_head = nn.Sequential(Reduce('b n d -> b d', 'mean'), nn.LayerNorm(dim), nn.Linear(dim, num_classes))
def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=384, depth=36, num_classes=1000, expand_ratio=4, mlp_bias=False): super().__init__() self.patch_emb = PatchEmbed(img_size, patch_size, in_chans, embed_dim, flatten=False) self.stages = nn.Sequential( * [S2Block(embed_dim, expand_ratio, mlp_bias) for i in range(depth)]) self.mlp_head = nn.Sequential(Reduce('b c h w -> b c', 'mean'), nn.Linear(embed_dim, num_classes))
def __init__(self, features: int, *args, activation: nn.Module = ReLUInPlace, **kwargs): super().__init__(features, *args, activation=activation, **kwargs) self.pool = Reduce("b c h w -> b 1 h w", reduction="mean") self.att = nn.Sequential( OrderedDict({ "conv1": nn.Conv2d(1, self.reduced_features, kernel_size=1), "act1": activation(), "conv2": nn.Conv2d(self.reduced_features, 1, kernel_size=1), "act2": nn.Sigmoid(), "proj": nn.Identity(), }))
def __init__( self, image_channel, image_size, # one lateral's size of a squre image patch_size, # one lateral's size of a squre patch *, depth, token_mlp_dim, channel_mlp_dim, ff_dropout=0.0, **kwargs): super().__init__(image_size, image_channel, patch_size) self.linear_proj = nn.Linear(self.patch_dim, self.patch_dim) self.encoder = nn.Sequential( OrderedDict([(f"layer_{idx}", MLPMixerLayer(self.num_patches, self.patch_dim, token_mlp_dim, channel_mlp_dim, ff_dropout)) for idx in range(depth)])) self.pooler = nn.Sequential(nn.LayerNorm(self.patch_dim), Reduce("b s c -> b c", "mean"))