def __init__(self, dim, pool_size=3, mlp_ratio=4., act_layer=nn.GELU, norm_layer=GroupNorm, drop_rate=0., drop_path_rate=0., use_layer_scale=True, layer_scale_init_value=1e-5): super().__init__() self.norm1 = norm_layer(dim) self.token_mixer = Pooling(pool_size=pool_size) self.norm2 = norm_layer(dim) mlp_hidden_dim = int(dim * mlp_ratio) self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop_rate=drop_rate) if drop_path_rate > 0.: self.drop_path = DropPath(drop_path_rate) else: self.drop_path = nn.Identity() self.use_layer_scale = use_layer_scale if use_layer_scale: self.layer_scale_1 = nn.Parameter(layer_scale_init_value * torch.ones((dim)), requires_grad=True) self.layer_scale_2 = nn.Parameter(layer_scale_init_value * torch.ones((dim)), requires_grad=True)
def __init__(self, embed_dims, ffn_ratio=4., drop_rate=0., drop_path_rate=0., act_cfg=dict(type='GELU'), norm_cfg=dict(type='BN', eps=1e-5), layer_scale_init_value=1e-2, init_cfg=None): super(VANBlock, self).__init__(init_cfg=init_cfg) self.out_channels = embed_dims self.norm1 = build_norm_layer(norm_cfg, embed_dims)[1] self.attn = SpatialAttention(embed_dims, act_cfg=act_cfg) self.drop_path = DropPath( drop_path_rate) if drop_path_rate > 0. else nn.Identity() self.norm2 = build_norm_layer(norm_cfg, embed_dims)[1] mlp_hidden_dim = int(embed_dims * ffn_ratio) self.mlp = MixFFN(embed_dims=embed_dims, feedforward_channels=mlp_hidden_dim, act_cfg=act_cfg, ffn_drop=drop_rate) self.layer_scale_1 = nn.Parameter( layer_scale_init_value * torch.ones((embed_dims)), requires_grad=True) if layer_scale_init_value > 0 else None self.layer_scale_2 = nn.Parameter( layer_scale_init_value * torch.ones((embed_dims)), requires_grad=True) if layer_scale_init_value > 0 else None
def __init__(self, in_channels, norm_cfg=dict(type='LN2d', eps=1e-6), act_cfg=dict(type='GELU'), mlp_ratio=4., linear_pw_conv=True, drop_path_rate=0., layer_scale_init_value=1e-6): super().__init__() self.depthwise_conv = nn.Conv2d(in_channels, in_channels, kernel_size=7, padding=3, groups=in_channels) self.linear_pw_conv = linear_pw_conv self.norm = build_norm_layer(norm_cfg, in_channels)[1] mid_channels = int(mlp_ratio * in_channels) if self.linear_pw_conv: # Use linear layer to do pointwise conv. pw_conv = nn.Linear else: pw_conv = partial(nn.Conv2d, kernel_size=1) self.pointwise_conv1 = pw_conv(in_channels, mid_channels) self.act = build_activation_layer(act_cfg) self.pointwise_conv2 = pw_conv(mid_channels, in_channels) self.gamma = nn.Parameter( layer_scale_init_value * torch.ones((in_channels)), requires_grad=True) if layer_scale_init_value > 0 else None self.drop_path = DropPath( drop_path_rate) if drop_path_rate > 0. else nn.Identity()
def __init__(self, in_channels, out_channels, expansion=2, add_identity=True, use_depthwise=False, conv_cfg=None, drop_path_rate=0, norm_cfg=dict(type='BN', eps=1e-5), act_cfg=dict(type='LeakyReLU', inplace=True), init_cfg=None): super().__init__(init_cfg) hidden_channels = int(out_channels / expansion) conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule self.conv1 = ConvModule(in_channels, hidden_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) self.conv2 = conv(hidden_channels, out_channels, 3, stride=1, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) self.add_identity = \ add_identity and in_channels == out_channels self.drop_path = DropPath(drop_prob=drop_path_rate ) if drop_path_rate > eps else nn.Identity()
def __init__(self, dim, pool_size=3, mlp_ratio=4., norm_cfg=dict(type='GN', num_groups=1), act_cfg=dict(type='GELU'), drop=0., drop_path=0., layer_scale_init_value=1e-5): super().__init__() self.norm1 = build_norm_layer(norm_cfg, dim)[1] self.token_mixer = Pooling(pool_size=pool_size) self.norm2 = build_norm_layer(norm_cfg, dim)[1] mlp_hidden_dim = int(dim * mlp_ratio) self.mlp = Mlp( in_features=dim, hidden_features=mlp_hidden_dim, act_cfg=act_cfg, drop=drop) # The following two techniques are useful to train deep PoolFormers. self.drop_path = DropPath(drop_path) if drop_path > 0. \ else nn.Identity() self.layer_scale_1 = nn.Parameter( layer_scale_init_value * torch.ones((dim)), requires_grad=True) self.layer_scale_2 = nn.Parameter( layer_scale_init_value * torch.ones((dim)), requires_grad=True)
def __init__(self, in_channels, out_channels, expansion=1, stride=1, dilation=1, downsample=None, style='pytorch', with_cp=False, conv_cfg=None, norm_cfg=dict(type='BN'), drop_path_rate=0.0, act_cfg=dict(type='ReLU', inplace=True), init_cfg=None): super(BasicBlock, self).__init__(init_cfg=init_cfg) self.in_channels = in_channels self.out_channels = out_channels self.expansion = expansion assert self.expansion == 1 assert out_channels % expansion == 0 self.mid_channels = out_channels // expansion self.stride = stride self.dilation = dilation self.style = style self.with_cp = with_cp self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.norm1_name, norm1 = build_norm_layer(norm_cfg, self.mid_channels, postfix=1) self.norm2_name, norm2 = build_norm_layer(norm_cfg, out_channels, postfix=2) self.conv1 = build_conv_layer(conv_cfg, in_channels, self.mid_channels, 3, stride=stride, padding=dilation, dilation=dilation, bias=False) self.add_module(self.norm1_name, norm1) self.conv2 = build_conv_layer(conv_cfg, self.mid_channels, out_channels, 3, padding=1, bias=False) self.add_module(self.norm2_name, norm2) self.relu = build_activation_layer(act_cfg) self.downsample = downsample self.drop_path = DropPath(drop_prob=drop_path_rate ) if drop_path_rate > eps else nn.Identity()
def __init__(self, in_channels, out_channels, mid_channels, kernel_size=3, stride=1, se_cfg=None, conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU'), drop_path_rate=0., with_cp=False, init_cfg=None): super(InvertedResidual, self).__init__(init_cfg) self.with_res_shortcut = (stride == 1 and in_channels == out_channels) assert stride in [1, 2] self.with_cp = with_cp self.drop_path = DropPath( drop_path_rate) if drop_path_rate > 0 else nn.Identity() self.with_se = se_cfg is not None self.with_expand_conv = (mid_channels != in_channels) if self.with_se: assert isinstance(se_cfg, dict) if self.with_expand_conv: self.expand_conv = ConvModule(in_channels=in_channels, out_channels=mid_channels, kernel_size=1, stride=1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) self.depthwise_conv = ConvModule(in_channels=mid_channels, out_channels=mid_channels, kernel_size=kernel_size, stride=stride, padding=kernel_size // 2, groups=mid_channels, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) if self.with_se: self.se = SELayer(**se_cfg) self.linear_conv = ConvModule(in_channels=mid_channels, out_channels=out_channels, kernel_size=1, stride=1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None)
def __init__(self, in_channels, out_channels, mid_channels, kernel_size=3, stride=1, se_cfg=None, with_residual=True, conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU'), drop_path_rate=0., with_cp=False, init_cfg=None, **kwargs): super(EdgeResidual, self).__init__(init_cfg=init_cfg) assert stride in [1, 2] self.with_cp = with_cp self.drop_path = DropPath( drop_path_rate) if drop_path_rate > 0 else nn.Identity() self.with_se = se_cfg is not None self.with_residual = (stride == 1 and in_channels == out_channels and with_residual) if self.with_se: assert isinstance(se_cfg, dict) self.conv1 = ConvModule(in_channels=in_channels, out_channels=mid_channels, kernel_size=kernel_size, stride=1, padding=kernel_size // 2, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) if self.with_se: self.se = SELayer(**se_cfg) self.conv2 = ConvModule(in_channels=mid_channels, out_channels=out_channels, kernel_size=1, stride=stride, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None)
def __init__(self, dim, mlp_ratio=4., drop=0., drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, focal_level=2, focal_window=9, use_layerscale=False, layerscale_value=1e-4): super().__init__() self.dim = dim self.mlp_ratio = mlp_ratio self.focal_window = focal_window self.focal_level = focal_level self.use_layerscale = use_layerscale self.norm1 = norm_layer(dim) self.modulation = FocalModulation( dim, focal_window=self.focal_window, focal_level=self.focal_level, proj_drop=drop) self.drop_path = DropPath( drop_path) if drop_path > 0. else nn.Identity() self.norm2 = norm_layer(dim) mlp_hidden_dim = int(dim * mlp_ratio) self.mlp = Mlp( in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) self.H = None self.W = None self.gamma_1 = 1.0 self.gamma_2 = 1.0 if self.use_layerscale: self.gamma_1 = nn.Parameter( layerscale_value * torch.ones((dim)), requires_grad=True) self.gamma_2 = nn.Parameter( layerscale_value * torch.ones((dim)), requires_grad=True)
def __init__(self, dim, num_heads, window_size=7, shift_size=0, mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, pretrained_window_size=0): super().__init__() self.dim = dim self.num_heads = num_heads self.window_size = window_size self.shift_size = shift_size self.mlp_ratio = mlp_ratio assert 0 <= self.shift_size < self.window_size, \ 'shift_size must in 0-window_size' self.norm1 = norm_layer(dim) self.attn = WindowAttention( dim, window_size=to_2tuple(self.window_size), num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop, pretrained_window_size=to_2tuple(pretrained_window_size)) self.drop_path = DropPath( drop_path) if drop_path > 0. else nn.Identity() self.norm2 = norm_layer(dim) mlp_hidden_dim = int(dim * mlp_ratio) self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop) self.H = None self.W = None
def __init__(self, embed_dim, dim_feedforward=2048, drop_path_rate=0.1): super(ConvMLPBlock, self).__init__() self.norm1 = nn.LayerNorm(embed_dim) self.channel_mlp1 = Mlp( embed_dim_in=embed_dim, hidden_dim=dim_feedforward) self.norm2 = nn.LayerNorm(embed_dim) self.connect = nn.Conv2d( embed_dim, embed_dim, 3, stride=1, padding=1, groups=embed_dim, bias=False) self.connect_norm = nn.LayerNorm(embed_dim) self.channel_mlp2 = Mlp( embed_dim_in=embed_dim, hidden_dim=dim_feedforward) self.drop_path = DropPath( drop_path_rate) if drop_path_rate > 0 else nn.Identity()
def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0., drop_path_rate=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, sr_ratio=1, linear=False): super().__init__() self.norm1 = norm_layer(dim) self.attn = Attention( dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop_rate=attn_drop_rate, proj_drop_rate=drop_rate, sr_ratio=sr_ratio, linear=linear) # NOTE: drop path for stochastic depth, # we shall see if this is better than dropout here if drop_path_rate > 0.: self.drop_path = DropPath(drop_path_rate) else: self.drop_path = nn.Identity() self.norm2 = norm_layer(dim) mlp_hidden_dim = int(dim * mlp_ratio) self.mlp = Mlp( in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop_rate=drop_rate, linear=linear)
def __init__(self, in_channels, out_channels, expansion=4, stride=1, dilation=1, downsample=None, style='pytorch', with_cp=False, conv_cfg=None, norm_cfg=dict(type='BN'), drop_path_rate=0.0, init_cfg=None): super(Bottleneck, self).__init__(init_cfg=init_cfg) assert style in ['pytorch', 'caffe'] self.in_channels = in_channels self.out_channels = out_channels self.expansion = expansion assert out_channels % expansion == 0 self.mid_channels = out_channels // expansion self.stride = stride self.dilation = dilation self.style = style self.with_cp = with_cp self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg if self.style == 'pytorch': self.conv1_stride = 1 self.conv2_stride = stride else: self.conv1_stride = stride self.conv2_stride = 1 self.norm1_name, norm1 = build_norm_layer(norm_cfg, self.mid_channels, postfix=1) self.norm2_name, norm2 = build_norm_layer(norm_cfg, self.mid_channels, postfix=2) self.norm3_name, norm3 = build_norm_layer(norm_cfg, out_channels, postfix=3) self.conv1 = build_conv_layer(conv_cfg, in_channels, self.mid_channels, kernel_size=1, stride=self.conv1_stride, bias=False) self.add_module(self.norm1_name, norm1) self.conv2 = build_conv_layer(conv_cfg, self.mid_channels, self.mid_channels, kernel_size=3, stride=self.conv2_stride, padding=dilation, dilation=dilation, bias=False) self.add_module(self.norm2_name, norm2) self.conv3 = build_conv_layer(conv_cfg, self.mid_channels, out_channels, kernel_size=1, bias=False) self.add_module(self.norm3_name, norm3) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.drop_path = DropPath(drop_prob=drop_path_rate ) if drop_path_rate > eps else nn.Identity()