def __init__(self, dim, pool_size=3, mlp_ratio=4., norm_cfg=dict(type='GN', num_groups=1), act_cfg=dict(type='GELU'), drop=0., drop_path=0., layer_scale_init_value=1e-5): super().__init__() self.norm1 = build_norm_layer(norm_cfg, dim)[1] self.token_mixer = Pooling(pool_size=pool_size) self.norm2 = build_norm_layer(norm_cfg, dim)[1] mlp_hidden_dim = int(dim * mlp_ratio) self.mlp = Mlp( in_features=dim, hidden_features=mlp_hidden_dim, act_cfg=act_cfg, drop=drop) # The following two techniques are useful to train deep PoolFormers. self.drop_path = DropPath(drop_path) if drop_path > 0. \ else nn.Identity() self.layer_scale_1 = nn.Parameter( layer_scale_init_value * torch.ones((dim)), requires_grad=True) self.layer_scale_2 = nn.Parameter( layer_scale_init_value * torch.ones((dim)), requires_grad=True)
def __init__(self, in_channels, growth_rate, bn_size, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU'), drop_rate=0., memory_efficient=False): super(DenseLayer, self).__init__() self.norm1 = build_norm_layer(norm_cfg, in_channels)[1] self.conv1 = nn.Conv2d(in_channels, bn_size * growth_rate, kernel_size=1, stride=1, bias=False) self.act = build_activation_layer(act_cfg) self.norm2 = build_norm_layer(norm_cfg, bn_size * growth_rate)[1] self.conv2 = nn.Conv2d(bn_size * growth_rate, growth_rate, kernel_size=3, stride=1, padding=1, bias=False) self.drop_rate = float(drop_rate) self.memory_efficient = memory_efficient
def __init__(self, in_channels, norm_cfg=dict(type='LN2d', eps=1e-6), act_cfg=dict(type='GELU'), mlp_ratio=4., linear_pw_conv=True, drop_path_rate=0., layer_scale_init_value=1e-6): super().__init__() self.depthwise_conv = nn.Conv2d(in_channels, in_channels, kernel_size=7, padding=3, groups=in_channels) self.linear_pw_conv = linear_pw_conv self.norm = build_norm_layer(norm_cfg, in_channels)[1] mid_channels = int(mlp_ratio * in_channels) if self.linear_pw_conv: # Use linear layer to do pointwise conv. pw_conv = nn.Linear else: pw_conv = partial(nn.Conv2d, kernel_size=1) self.pointwise_conv1 = pw_conv(in_channels, mid_channels) self.act = build_activation_layer(act_cfg) self.pointwise_conv2 = pw_conv(mid_channels, in_channels) self.gamma = nn.Parameter( layer_scale_init_value * torch.ones((in_channels)), requires_grad=True) if layer_scale_init_value > 0 else None self.drop_path = DropPath( drop_path_rate) if drop_path_rate > 0. else nn.Identity()
def __init__(self, noise_size, out_channels, act_cfg=dict(type='LeakyReLU', negative_slope=0.2), norm_cfg=dict(type='PixelNorm'), normalize_latent=True, order=('linear', 'act', 'norm')): super().__init__() self.noise_size = noise_size self.out_channels = out_channels self.normalize_latent = normalize_latent self.with_activation = act_cfg is not None self.with_norm = norm_cfg is not None self.order = order assert len(order) == 3 and set(order) == set(['linear', 'act', 'norm']) # w/o bias, because the bias is added after reshaping the tensor to # 2D feature self.linear = EqualizedLRLinearModule( noise_size, out_channels * 16, equalized_lr_cfg=dict(gain=np.sqrt(2) / 4), bias=False) if self.with_activation: self.activation = build_activation_layer(act_cfg) # add bias for reshaped 2D feature. self.register_parameter( 'bias', nn.Parameter(torch.zeros(1, out_channels, 1, 1))) if self.with_norm: _, self.norm = build_norm_layer(norm_cfg, out_channels)
def __init__(self, in_channels, num_heads=1, norm_cfg=dict(type='GN', num_groups=32)): super().__init__() self.num_heads = num_heads _, self.norm = build_norm_layer(norm_cfg, in_channels) self.qkv = nn.Conv1d(in_channels, in_channels * 3, 1) self.proj = nn.Conv1d(in_channels, in_channels, 1) self.init_weights()
def __init__(self, in_channels, embedding_channels, use_scale_shift_norm, dropout, out_channels=None, norm_cfg=dict(type='GN', num_groups=32), act_cfg=dict(type='SiLU', inplace=False), shortcut_kernel_size=1): super().__init__() out_channels = in_channels if out_channels is None else out_channels _norm_cfg = deepcopy(norm_cfg) _, norm_1 = build_norm_layer(_norm_cfg, in_channels) conv_1 = [ norm_1, build_activation_layer(act_cfg), nn.Conv2d(in_channels, out_channels, 3, padding=1) ] self.conv_1 = nn.Sequential(*conv_1) norm_with_embedding_cfg = dict( in_channels=out_channels, embedding_channels=embedding_channels, use_scale_shift=use_scale_shift_norm, norm_cfg=_norm_cfg) self.norm_with_embedding = build_module( dict(type='NormWithEmbedding'), default_args=norm_with_embedding_cfg) conv_2 = [ build_activation_layer(act_cfg), nn.Dropout(dropout), nn.Conv2d(out_channels, out_channels, 3, padding=1) ] self.conv_2 = nn.Sequential(*conv_2) assert shortcut_kernel_size in [ 1, 3 ], ('Only support `1` and `3` for `shortcut_kernel_size`, but ' f'receive {shortcut_kernel_size}.') self.learnable_shortcut = out_channels != in_channels if self.learnable_shortcut: shortcut_padding = 1 if shortcut_kernel_size == 3 else 0 self.shortcut = nn.Conv2d( in_channels, out_channels, shortcut_kernel_size, padding=shortcut_padding) self.init_weights()
def __init__(self, in_channels, embedding_channels, norm_cfg=dict(type='GN', num_groups=32), act_cfg=dict(type='SiLU', inplace=False), use_scale_shift=True): super().__init__() self.use_scale_shift = use_scale_shift _, self.norm = build_norm_layer(norm_cfg, in_channels) embedding_output = in_channels * 2 if use_scale_shift else in_channels self.embedding_layer = nn.Sequential( build_activation_layer(act_cfg), nn.Linear(embedding_channels, embedding_output))
def __init__(self, in_channels, out_channels, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU')): super(DenseTransition, self).__init__() self.add_module('norm', build_norm_layer(norm_cfg, in_channels)[1]) self.add_module('act', build_activation_layer(act_cfg)) self.add_module( 'conv', nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False)) self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))
def test_build_norm_layer(): with pytest.raises(TypeError): # cfg must be a dict cfg = 'BN' build_norm_layer(cfg, 3) with pytest.raises(KeyError): # `type` must be in cfg cfg = dict() build_norm_layer(cfg, 3) with pytest.raises(KeyError): # unsupported norm type cfg = dict(type='FancyNorm') build_norm_layer(cfg, 3) with pytest.raises(AssertionError): # postfix must be int or str cfg = dict(type='BN') build_norm_layer(cfg, 3, postfix=[1, 2]) with pytest.raises(AssertionError): # `num_groups` must be in cfg when using 'GN' cfg = dict(type='GN') build_norm_layer(cfg, 3) # test each type of norm layer in norm_cfg abbr_mapping = { 'BN': 'bn', 'BN1d': 'bn', 'BN2d': 'bn', 'BN3d': 'bn', 'SyncBN': 'bn', 'GN': 'gn', 'LN': 'ln', 'IN': 'in', 'IN1d': 'in', 'IN2d': 'in', 'IN3d': 'in', } for type_name, module in NORM_LAYERS.module_dict.items(): if type_name == 'MMSyncBN': # skip MMSyncBN continue for postfix in ['_test', 1]: cfg = dict(type=type_name) if type_name == 'GN': cfg['num_groups'] = 2 name, layer = build_norm_layer(cfg, 3, postfix=postfix) assert name == abbr_mapping[type_name] + str(postfix) assert isinstance(layer, module) if type_name == 'GN': assert layer.num_channels == 3 assert layer.num_groups == cfg['num_groups'] elif type_name != 'LN': assert layer.num_features == 3
def __init__(self, arch='121', in_channels=3, bn_size=4, drop_rate=0, compression_factor=0.5, memory_efficient=False, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU'), out_indices=-1, frozen_stages=0, init_cfg=None): super().__init__(init_cfg=init_cfg) if isinstance(arch, str): assert arch in self.arch_settings, \ f'Unavailable arch, please choose from ' \ f'({set(self.arch_settings)}) or pass a dict.' arch = self.arch_settings[arch] elif isinstance(arch, dict): essential_keys = {'growth_rate', 'depths', 'init_channels'} assert isinstance(arch, dict) and essential_keys <= set(arch), \ f'Custom arch needs a dict with keys {essential_keys}' self.growth_rate = arch['growth_rate'] self.depths = arch['depths'] self.init_channels = arch['init_channels'] self.act = build_activation_layer(act_cfg) self.num_stages = len(self.depths) # check out indices and frozen stages if isinstance(out_indices, int): out_indices = [out_indices] assert isinstance(out_indices, Sequence), \ f'"out_indices" must by a sequence or int, ' \ f'get {type(out_indices)} instead.' for i, index in enumerate(out_indices): if index < 0: out_indices[i] = self.num_stages + index assert out_indices[i] >= 0, f'Invalid out_indices {index}' self.out_indices = out_indices self.frozen_stages = frozen_stages # Set stem layers self.stem = nn.Sequential( nn.Conv2d(in_channels, self.init_channels, kernel_size=7, stride=2, padding=3, bias=False), build_norm_layer(norm_cfg, self.init_channels)[1], self.act, nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) # Repetitions of DenseNet Blocks self.stages = nn.ModuleList() self.transitions = nn.ModuleList() channels = self.init_channels for i in range(self.num_stages): depth = self.depths[i] stage = DenseBlock(num_layers=depth, in_channels=channels, bn_size=bn_size, growth_rate=self.growth_rate, norm_cfg=norm_cfg, act_cfg=act_cfg, drop_rate=drop_rate, memory_efficient=memory_efficient) self.stages.append(stage) channels += depth * self.growth_rate if i != self.num_stages - 1: transition = DenseTransition( in_channels=channels, out_channels=math.floor(channels * compression_factor), norm_cfg=norm_cfg, act_cfg=act_cfg, ) channels = math.floor(channels * compression_factor) else: # Final layers after dense block is just bn with act. # Unlike the paper, the original repo also put this in # transition layer, whereas torchvision take this out. # We reckon this as transition layer here. transition = nn.Sequential( build_norm_layer(norm_cfg, channels)[1], self.act, ) self.transitions.append(transition) self._freeze_stages()
def __init__(self, arch='768/32', in_channels=3, norm_cfg=dict(type='BN'), act_cfg=dict(type='GELU'), out_indices=-1, frozen_stages=0, init_cfg=None): super().__init__(init_cfg=init_cfg) if isinstance(arch, str): assert arch in self.arch_settings, \ f'Unavailable arch, please choose from ' \ f'({set(self.arch_settings)}) or pass a dict.' arch = self.arch_settings[arch] elif isinstance(arch, dict): essential_keys = { 'embed_dims', 'depth', 'patch_size', 'kernel_size' } assert isinstance(arch, dict) and essential_keys <= set(arch), \ f'Custom arch needs a dict with keys {essential_keys}' self.embed_dims = arch['embed_dims'] self.depth = arch['depth'] self.patch_size = arch['patch_size'] self.kernel_size = arch['kernel_size'] self.act = build_activation_layer(act_cfg) # check out indices and frozen stages if isinstance(out_indices, int): out_indices = [out_indices] assert isinstance(out_indices, Sequence), \ f'"out_indices" must by a sequence or int, ' \ f'get {type(out_indices)} instead.' for i, index in enumerate(out_indices): if index < 0: out_indices[i] = self.depth + index assert out_indices[i] >= 0, f'Invalid out_indices {index}' self.out_indices = out_indices self.frozen_stages = frozen_stages # Set stem layers self.stem = nn.Sequential( nn.Conv2d(in_channels, self.embed_dims, kernel_size=self.patch_size, stride=self.patch_size), self.act, build_norm_layer(norm_cfg, self.embed_dims)[1]) # Set conv2d according to torch version convfunc = nn.Conv2d if digit_version(torch.__version__) < digit_version('1.9.0'): convfunc = Conv2dAdaptivePadding # Repetitions of ConvMixer Layer self.stages = nn.Sequential(*[ nn.Sequential( Residual( nn.Sequential( convfunc(self.embed_dims, self.embed_dims, self.kernel_size, groups=self.embed_dims, padding='same'), self.act, build_norm_layer(norm_cfg, self.embed_dims)[1])), nn.Conv2d(self.embed_dims, self.embed_dims, kernel_size=1), self.act, build_norm_layer(norm_cfg, self.embed_dims)[1]) for _ in range(self.depth) ]) self._freeze_stages()
def __init__(self, arch='tiny', in_channels=3, stem_patch_size=4, norm_cfg=dict(type='LN2d', eps=1e-6), act_cfg=dict(type='GELU'), linear_pw_conv=True, drop_path_rate=0., layer_scale_init_value=1e-6, out_indices=-1, frozen_stages=0, gap_before_final_norm=True, init_cfg=None): super().__init__(init_cfg=init_cfg) if isinstance(arch, str): assert arch in self.arch_settings, \ f'Unavailable arch, please choose from ' \ f'({set(self.arch_settings)}) or pass a dict.' arch = self.arch_settings[arch] elif isinstance(arch, dict): assert 'depths' in arch and 'channels' in arch, \ f'The arch dict must have "depths" and "channels", ' \ f'but got {list(arch.keys())}.' self.depths = arch['depths'] self.channels = arch['channels'] assert (isinstance(self.depths, Sequence) and isinstance(self.channels, Sequence) and len(self.depths) == len(self.channels)), \ f'The "depths" ({self.depths}) and "channels" ({self.channels}) ' \ 'should be both sequence with the same length.' self.num_stages = len(self.depths) if isinstance(out_indices, int): out_indices = [out_indices] assert isinstance(out_indices, Sequence), \ f'"out_indices" must by a sequence or int, ' \ f'get {type(out_indices)} instead.' for i, index in enumerate(out_indices): if index < 0: out_indices[i] = 4 + index assert out_indices[i] >= 0, f'Invalid out_indices {index}' self.out_indices = out_indices self.frozen_stages = frozen_stages self.gap_before_final_norm = gap_before_final_norm # stochastic depth decay rule dpr = [ x.item() for x in torch.linspace(0, drop_path_rate, sum(self.depths)) ] block_idx = 0 # 4 downsample layers between stages, including the stem layer. self.downsample_layers = ModuleList() stem = nn.Sequential( nn.Conv2d( in_channels, self.channels[0], kernel_size=stem_patch_size, stride=stem_patch_size), build_norm_layer(norm_cfg, self.channels[0])[1], ) self.downsample_layers.append(stem) # 4 feature resolution stages, each consisting of multiple residual # blocks self.stages = nn.ModuleList() for i in range(self.num_stages): depth = self.depths[i] channels = self.channels[i] if i >= 1: downsample_layer = nn.Sequential( LayerNorm2d(self.channels[i - 1]), nn.Conv2d( self.channels[i - 1], channels, kernel_size=2, stride=2), ) self.downsample_layers.append(downsample_layer) stage = Sequential(*[ ConvNeXtBlock( in_channels=channels, drop_path_rate=dpr[block_idx + j], norm_cfg=norm_cfg, act_cfg=act_cfg, linear_pw_conv=linear_pw_conv, layer_scale_init_value=layer_scale_init_value) for j in range(depth) ]) block_idx += depth self.stages.append(stage) if i in self.out_indices: norm_layer = build_norm_layer(norm_cfg, channels)[1] self.add_module(f'norm{i}', norm_layer) self._freeze_stages()
def __init__(self, arch='s12', pool_size=3, norm_cfg=dict(type='GN', num_groups=1), act_cfg=dict(type='GELU'), in_patch_size=7, in_stride=4, in_pad=2, down_patch_size=3, down_stride=2, down_pad=1, drop_rate=0., drop_path_rate=0., out_indices=-1, frozen_stages=0, init_cfg=None): super().__init__(init_cfg=init_cfg) if isinstance(arch, str): assert arch in self.arch_settings, \ f'Unavailable arch, please choose from ' \ f'({set(self.arch_settings)}) or pass a dict.' arch = self.arch_settings[arch] elif isinstance(arch, dict): assert 'layers' in arch and 'embed_dims' in arch, \ f'The arch dict must have "layers" and "embed_dims", ' \ f'but got {list(arch.keys())}.' layers = arch['layers'] embed_dims = arch['embed_dims'] mlp_ratios = arch['mlp_ratios'] \ if 'mlp_ratios' in arch else [4, 4, 4, 4] layer_scale_init_value = arch['layer_scale_init_value'] \ if 'layer_scale_init_value' in arch else 1e-5 self.patch_embed = PatchEmbed( patch_size=in_patch_size, stride=in_stride, padding=in_pad, in_chans=3, embed_dim=embed_dims[0]) # set the main block in network network = [] for i in range(len(layers)): stage = basic_blocks( embed_dims[i], i, layers, pool_size=pool_size, mlp_ratio=mlp_ratios[i], norm_cfg=norm_cfg, act_cfg=act_cfg, drop_rate=drop_rate, drop_path_rate=drop_path_rate, layer_scale_init_value=layer_scale_init_value) network.append(stage) if i >= len(layers) - 1: break if embed_dims[i] != embed_dims[i + 1]: # downsampling between two stages network.append( PatchEmbed( patch_size=down_patch_size, stride=down_stride, padding=down_pad, in_chans=embed_dims[i], embed_dim=embed_dims[i + 1])) self.network = nn.ModuleList(network) if isinstance(out_indices, int): out_indices = [out_indices] assert isinstance(out_indices, Sequence), \ f'"out_indices" must by a sequence or int, ' \ f'get {type(out_indices)} instead.' for i, index in enumerate(out_indices): if index < 0: out_indices[i] = 7 + index assert out_indices[i] >= 0, f'Invalid out_indices {index}' self.out_indices = out_indices if self.out_indices: for i_layer in self.out_indices: layer = build_norm_layer(norm_cfg, embed_dims[(i_layer + 1) // 2])[1] layer_name = f'norm{i_layer}' self.add_module(layer_name, layer) self.frozen_stages = frozen_stages self._freeze_stages()