Example #1
0
    def __init__(
        self,
        in_channels: nn.MaybeChoice[int],
        out_channels: nn.MaybeChoice[int],
        expand_ratio: nn.MaybeChoice[float],
        kernel_size: nn.MaybeChoice[int] = 3,
        stride: int = 1,
        squeeze_excite: Optional[Callable[[nn.MaybeChoice[int], nn.MaybeChoice[int]], nn.Module]] = None,
        norm_layer: Optional[Callable[[int], nn.Module]] = None,
        activation_layer: Optional[Callable[..., nn.Module]] = None,
    ) -> None:
        super().__init__()
        self.stride = stride
        self.out_channels = out_channels
        assert stride in [1, 2]

        hidden_ch = cast(int, make_divisible(in_channels * expand_ratio, 8))

        # NOTE: this equivalence check should also work for ValueChoice
        self.has_skip = stride == 1 and in_channels == out_channels

        layers: List[nn.Module] = [
            # point-wise convolution
            # NOTE: some paper omit this point-wise convolution when stride = 1.
            # In our implementation, if this pw convolution is intended to be omitted,
            # please use SepConv instead.
            ConvBNReLU(in_channels, hidden_ch, kernel_size=1,
                       norm_layer=norm_layer, activation_layer=activation_layer),
            # depth-wise
            ConvBNReLU(hidden_ch, hidden_ch, stride=stride, kernel_size=kernel_size, groups=hidden_ch,
                       norm_layer=norm_layer, activation_layer=activation_layer),
            # SE
            squeeze_excite(
                cast(int, hidden_ch),
                cast(int, in_channels)
            ) if squeeze_excite is not None else nn.Identity(),
            # pw-linear
            ConvBNReLU(hidden_ch, out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity),
        ]

        super().__init__(*simplify_sequential(layers))
Example #2
0
 def __init__(
     self,
     in_channels: nn.MaybeChoice[int],
     out_channels: nn.MaybeChoice[int],
     kernel_size: nn.MaybeChoice[int] = 3,
     stride: int = 1,
     squeeze_excite: Optional[Callable[[nn.MaybeChoice[int], nn.MaybeChoice[int]], nn.Module]] = None,
     norm_layer: Optional[Callable[[int], nn.Module]] = None,
     activation_layer: Optional[Callable[..., nn.Module]] = None,
 ) -> None:
     blocks = [
         # dw
         ConvBNReLU(in_channels, in_channels, stride=stride, kernel_size=kernel_size, groups=in_channels,
                    norm_layer=norm_layer, activation_layer=activation_layer),
         # optional se
         squeeze_excite(in_channels, in_channels) if squeeze_excite else nn.Identity(),
         # pw-linear
         ConvBNReLU(in_channels, out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.Identity)
     ]
     super().__init__(*simplify_sequential(blocks))
     self.has_skip = stride == 1 and in_channels == out_channels
Example #3
0
 def __init__(self, node_id, num_prev_nodes, channels,
              num_downsample_connect):
     super().__init__()
     self.ops = nn.ModuleList()
     choice_keys = []
     for i in range(num_prev_nodes):
         stride = 2 if i < num_downsample_connect else 1
         choice_keys.append("{}_p{}".format(node_id, i))
         self.ops.append(
             nn.LayerChoice([
                 ops.PoolBN('max', channels, 3, stride, 1, affine=False),
                 ops.PoolBN('avg', channels, 3, stride, 1, affine=False),
                 nn.Identity() if stride == 1 else ops.FactorizedReduce(
                     channels, channels, affine=False),
                 ops.SepConv(channels, channels, 3, stride, 1,
                             affine=False),
                 ops.SepConv(channels, channels, 5, stride, 2,
                             affine=False),
                 ops.DilConv(channels,
                             channels,
                             3,
                             stride,
                             2,
                             2,
                             affine=False),
                 ops.DilConv(channels,
                             channels,
                             5,
                             stride,
                             4,
                             2,
                             affine=False)
             ]))
     self.drop_path = ops.DropPath()
     self.input_switch = nn.InputChoice(n_candidates=num_prev_nodes,
                                        n_chosen=2)
Example #4
0
 'avg_pool_3x3':
 lambda C, stride, affine: nn.AvgPool2d(
     3, stride=stride, padding=1, count_include_pad=False),
 'avg_pool_5x5':
 lambda C, stride, affine: nn.AvgPool2d(
     5, stride=stride, padding=2, count_include_pad=False),
 'max_pool_2x2':
 lambda C, stride, affine: nn.MaxPool2d(2, stride=stride, padding=0),
 'max_pool_3x3':
 lambda C, stride, affine: nn.MaxPool2d(3, stride=stride, padding=1),
 'max_pool_5x5':
 lambda C, stride, affine: nn.MaxPool2d(5, stride=stride, padding=2),
 'max_pool_7x7':
 lambda C, stride, affine: nn.MaxPool2d(7, stride=stride, padding=3),
 'skip_connect':
 lambda C, stride, affine: nn.Identity()
 if stride == 1 else FactorizedReduce(C, C, affine=affine),
 'conv_1x1':
 lambda C, stride, affine: nn.Sequential(
     nn.ReLU(inplace=False),
     nn.Conv2d(C, C, 1, stride=stride, padding=0, bias=False),
     nn.BatchNorm2d(C, affine=affine)),
 'conv_3x3':
 lambda C, stride, affine: nn.Sequential(
     nn.ReLU(inplace=False),
     nn.Conv2d(C, C, 3, stride=stride, padding=1, bias=False),
     nn.BatchNorm2d(C, affine=affine)),
 'sep_conv_3x3':
 lambda C, stride, affine: SepConv(C, C, 3, stride, 1, affine=affine),
 'sep_conv_5x5':
 lambda C, stride, affine: SepConv(C, C, 5, stride, 2, affine=affine),
Example #5
0
 def __init__(self):
     super().__init__()
     self.block = nn.Repeat(lambda index: nn.LayerChoice(
         [AddOne(), nn.Identity()]), (2, 3),
                            label='rep')
Example #6
0
 def __init__(self):
     super().__init__()
     self.block = nn.Repeat(nn.LayerChoice(
         [AddOne(), nn.Identity()], label='lc'), (3, 5),
                            label='rep')
Example #7
0
    def __init__(
            self,
            search_embed_dim: Tuple[int, ...] = (192, 216, 240),
            search_mlp_ratio: Tuple[float, ...] = (3.5, 4.0),
            search_num_heads: Tuple[int, ...] = (3, 4),
            search_depth: Tuple[int, ...] = (12, 13, 14),
            img_size: int = 224,
            patch_size: int = 16,
            in_chans: int = 3,
            num_classes: int = 1000,
            qkv_bias: bool = False,
            drop_rate: float = 0.,
            attn_drop_rate: float = 0.,
            drop_path_rate: float = 0.,
            pre_norm: bool = True,
            global_pool: bool = False,
            abs_pos: bool = True,
            qk_scale: Optional[float] = None,
            rpe: bool = True,
    ):
        super().__init__()

        embed_dim = nn.ValueChoice(list(search_embed_dim), label="embed_dim")
        fixed_embed_dim = nn.ModelParameterChoice(
            list(search_embed_dim), label="embed_dim")
        depth = nn.ValueChoice(list(search_depth), label="depth")
        self.patch_embed = nn.Conv2d(
            in_chans,
            cast(int, embed_dim),
            kernel_size=patch_size,
            stride=patch_size)
        self.patches_num = int((img_size // patch_size) ** 2)
        self.global_pool = global_pool
        self.cls_token = nn.Parameter(torch.zeros(1, 1, cast(int, fixed_embed_dim)))
        trunc_normal_(self.cls_token, std=.02)

        dpr = [
            x.item() for x in torch.linspace(
                0,
                drop_path_rate,
                max(search_depth))]  # stochastic depth decay rule

        self.abs_pos = abs_pos
        if self.abs_pos:
            self.pos_embed = nn.Parameter(torch.zeros(
                1, self.patches_num + 1, cast(int, fixed_embed_dim)))
            trunc_normal_(self.pos_embed, std=.02)

        self.blocks = nn.Repeat(lambda index: nn.LayerChoice([
            TransformerEncoderLayer(embed_dim=embed_dim,
                                    fixed_embed_dim=fixed_embed_dim,
                                    num_heads=num_heads, mlp_ratio=mlp_ratio,
                                    qkv_bias=qkv_bias, drop_rate=drop_rate,
                                    attn_drop=attn_drop_rate,
                                    drop_path=dpr[index],
                                    rpe_length=img_size // patch_size,
                                    qk_scale=qk_scale, rpe=rpe,
                                    pre_norm=pre_norm,)
            for mlp_ratio, num_heads in itertools.product(search_mlp_ratio, search_num_heads)
        ], label=f'layer{index}'), depth)
        self.pre_norm = pre_norm
        if self.pre_norm:
            self.norm = nn.LayerNorm(cast(int, embed_dim))
        self.head = nn.Linear(
            cast(int, embed_dim),
            num_classes) if num_classes > 0 else nn.Identity()