def __init__( self, in_channels: nn.MaybeChoice[int], out_channels: nn.MaybeChoice[int], expand_ratio: nn.MaybeChoice[float], kernel_size: nn.MaybeChoice[int] = 3, stride: int = 1, squeeze_excite: Optional[Callable[[nn.MaybeChoice[int], nn.MaybeChoice[int]], nn.Module]] = None, norm_layer: Optional[Callable[[int], nn.Module]] = None, activation_layer: Optional[Callable[..., nn.Module]] = None, ) -> None: super().__init__() self.stride = stride self.out_channels = out_channels assert stride in [1, 2] hidden_ch = cast(int, make_divisible(in_channels * expand_ratio, 8)) # NOTE: this equivalence check should also work for ValueChoice self.has_skip = stride == 1 and in_channels == out_channels layers: List[nn.Module] = [ # point-wise convolution # NOTE: some paper omit this point-wise convolution when stride = 1. # In our implementation, if this pw convolution is intended to be omitted, # please use SepConv instead. ConvBNReLU(in_channels, hidden_ch, kernel_size=1, norm_layer=norm_layer, activation_layer=activation_layer), # depth-wise ConvBNReLU(hidden_ch, hidden_ch, stride=stride, kernel_size=kernel_size, groups=hidden_ch, norm_layer=norm_layer, activation_layer=activation_layer), # SE squeeze_excite( cast(int, hidden_ch), cast(int, in_channels) ) if squeeze_excite is not None else nn.Identity(), # pw-linear ConvBNReLU(hidden_ch, out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity), ] super().__init__(*simplify_sequential(layers))
def __init__( self, in_channels: nn.MaybeChoice[int], out_channels: nn.MaybeChoice[int], kernel_size: nn.MaybeChoice[int] = 3, stride: int = 1, squeeze_excite: Optional[Callable[[nn.MaybeChoice[int], nn.MaybeChoice[int]], nn.Module]] = None, norm_layer: Optional[Callable[[int], nn.Module]] = None, activation_layer: Optional[Callable[..., nn.Module]] = None, ) -> None: blocks = [ # dw ConvBNReLU(in_channels, in_channels, stride=stride, kernel_size=kernel_size, groups=in_channels, norm_layer=norm_layer, activation_layer=activation_layer), # optional se squeeze_excite(in_channels, in_channels) if squeeze_excite else nn.Identity(), # pw-linear ConvBNReLU(in_channels, out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity) ] super().__init__(*simplify_sequential(blocks)) self.has_skip = stride == 1 and in_channels == out_channels
def __init__(self, node_id, num_prev_nodes, channels, num_downsample_connect): super().__init__() self.ops = nn.ModuleList() choice_keys = [] for i in range(num_prev_nodes): stride = 2 if i < num_downsample_connect else 1 choice_keys.append("{}_p{}".format(node_id, i)) self.ops.append( nn.LayerChoice([ ops.PoolBN('max', channels, 3, stride, 1, affine=False), ops.PoolBN('avg', channels, 3, stride, 1, affine=False), nn.Identity() if stride == 1 else ops.FactorizedReduce( channels, channels, affine=False), ops.SepConv(channels, channels, 3, stride, 1, affine=False), ops.SepConv(channels, channels, 5, stride, 2, affine=False), ops.DilConv(channels, channels, 3, stride, 2, 2, affine=False), ops.DilConv(channels, channels, 5, stride, 4, 2, affine=False) ])) self.drop_path = ops.DropPath() self.input_switch = nn.InputChoice(n_candidates=num_prev_nodes, n_chosen=2)
'avg_pool_3x3': lambda C, stride, affine: nn.AvgPool2d( 3, stride=stride, padding=1, count_include_pad=False), 'avg_pool_5x5': lambda C, stride, affine: nn.AvgPool2d( 5, stride=stride, padding=2, count_include_pad=False), 'max_pool_2x2': lambda C, stride, affine: nn.MaxPool2d(2, stride=stride, padding=0), 'max_pool_3x3': lambda C, stride, affine: nn.MaxPool2d(3, stride=stride, padding=1), 'max_pool_5x5': lambda C, stride, affine: nn.MaxPool2d(5, stride=stride, padding=2), 'max_pool_7x7': lambda C, stride, affine: nn.MaxPool2d(7, stride=stride, padding=3), 'skip_connect': lambda C, stride, affine: nn.Identity() if stride == 1 else FactorizedReduce(C, C, affine=affine), 'conv_1x1': lambda C, stride, affine: nn.Sequential( nn.ReLU(inplace=False), nn.Conv2d(C, C, 1, stride=stride, padding=0, bias=False), nn.BatchNorm2d(C, affine=affine)), 'conv_3x3': lambda C, stride, affine: nn.Sequential( nn.ReLU(inplace=False), nn.Conv2d(C, C, 3, stride=stride, padding=1, bias=False), nn.BatchNorm2d(C, affine=affine)), 'sep_conv_3x3': lambda C, stride, affine: SepConv(C, C, 3, stride, 1, affine=affine), 'sep_conv_5x5': lambda C, stride, affine: SepConv(C, C, 5, stride, 2, affine=affine),
def __init__(self): super().__init__() self.block = nn.Repeat(lambda index: nn.LayerChoice( [AddOne(), nn.Identity()]), (2, 3), label='rep')
def __init__(self): super().__init__() self.block = nn.Repeat(nn.LayerChoice( [AddOne(), nn.Identity()], label='lc'), (3, 5), label='rep')
def __init__( self, search_embed_dim: Tuple[int, ...] = (192, 216, 240), search_mlp_ratio: Tuple[float, ...] = (3.5, 4.0), search_num_heads: Tuple[int, ...] = (3, 4), search_depth: Tuple[int, ...] = (12, 13, 14), img_size: int = 224, patch_size: int = 16, in_chans: int = 3, num_classes: int = 1000, qkv_bias: bool = False, drop_rate: float = 0., attn_drop_rate: float = 0., drop_path_rate: float = 0., pre_norm: bool = True, global_pool: bool = False, abs_pos: bool = True, qk_scale: Optional[float] = None, rpe: bool = True, ): super().__init__() embed_dim = nn.ValueChoice(list(search_embed_dim), label="embed_dim") fixed_embed_dim = nn.ModelParameterChoice( list(search_embed_dim), label="embed_dim") depth = nn.ValueChoice(list(search_depth), label="depth") self.patch_embed = nn.Conv2d( in_chans, cast(int, embed_dim), kernel_size=patch_size, stride=patch_size) self.patches_num = int((img_size // patch_size) ** 2) self.global_pool = global_pool self.cls_token = nn.Parameter(torch.zeros(1, 1, cast(int, fixed_embed_dim))) trunc_normal_(self.cls_token, std=.02) dpr = [ x.item() for x in torch.linspace( 0, drop_path_rate, max(search_depth))] # stochastic depth decay rule self.abs_pos = abs_pos if self.abs_pos: self.pos_embed = nn.Parameter(torch.zeros( 1, self.patches_num + 1, cast(int, fixed_embed_dim))) trunc_normal_(self.pos_embed, std=.02) self.blocks = nn.Repeat(lambda index: nn.LayerChoice([ TransformerEncoderLayer(embed_dim=embed_dim, fixed_embed_dim=fixed_embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, drop_rate=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[index], rpe_length=img_size // patch_size, qk_scale=qk_scale, rpe=rpe, pre_norm=pre_norm,) for mlp_ratio, num_heads in itertools.product(search_mlp_ratio, search_num_heads) ], label=f'layer{index}'), depth) self.pre_norm = pre_norm if self.pre_norm: self.norm = nn.LayerNorm(cast(int, embed_dim)) self.head = nn.Linear( cast(int, embed_dim), num_classes) if num_classes > 0 else nn.Identity()