Example #1
0
    def _build(self, s_in: Shape, c_out: int) -> Shape:
        c_in = s_in.num_features()
        max_exp = max(self.expansions)
        exp_mults = [e / max_exp for e in self.expansions]
        c_mid = make_divisible(int(c_in * max_exp), divisible=8)
        self.has_skip = self.stride == 1 and c_in == c_out
        ops = []

        self.conv = SuperKernelConv(c_mid,
                                    c_mid,
                                    self.name,
                                    self.strategy_name,
                                    self.k_sizes,
                                    exp_mults,
                                    self.dilation,
                                    self.stride,
                                    self.padding,
                                    c_mid,
                                    bias=False)

        if max_exp > 1:
            # pw
            ops.extend([
                nn.Conv2d(c_in, c_mid, 1, 1, 0, bias=False),
                nn.BatchNorm2d(c_mid, affine=self.bn_affine),
                Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
            ])
        # dw
        ops.extend([
            self.conv,
            nn.BatchNorm2d(c_mid, affine=self.bn_affine),
            Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
        ])
        # optional attention module
        if isinstance(self.att_dict, dict):
            ops.append(
                AbstractAttentionModule.module_from_dict(c_mid,
                                                         c_substitute=c_in,
                                                         **self.att_dict))
        # pw
        ops.extend([
            nn.Conv2d(c_mid, c_out, 1, 1, 0, bias=False),
            nn.BatchNorm2d(c_out, affine=self.bn_affine),
        ])
        self.block = nn.Sequential(*ops)
        if self.has_skip:
            self.block = DropPathModule(self.block)
        return self.probe_outputs(s_in)
Example #2
0
 def _build(self, s_in: Shape, c_out: int) -> Shape:
     c_in = s_in.num_features()
     c_inner = int(c_out * self.expansion)
     self.block = self._build_block(c_in, c_inner, c_out,
                                    self.has_first_act)
     if self.shortcut_type in [None, 'None']:
         pass
     elif self.shortcut_type == 'id':
         self.shortcut = nn.Identity()
     elif self.shortcut_type == 'conv1x1':
         self.shortcut = nn.Sequential(*[
             nn.Conv2d(c_in, c_out, 1, self.stride, 0, bias=False),
             nn.BatchNorm2d(c_out, affine=self.bn_affine),
         ])
     elif self.shortcut_type == 'avg_conv':
         self.shortcut = nn.Sequential(*[
             nn.AvgPool2d(kernel_size=2, stride=self.stride, padding=0),
             nn.Conv2d(c_in, c_out, 1, 1, 0, bias=False),
         ])
     else:
         raise NotImplementedError('shortcut type "%s" is not implemented' %
                                   self.shortcut_type)
     self.has_shortcut = isinstance(self.shortcut, nn.Module)
     if self.has_shortcut:
         self.block = DropPathModule(self.block)
     return self.probe_outputs(s_in)
Example #3
0
    def _build(self, s_in: Shape, c_out: int) -> Shape:
        c_in = s_in.num_features()
        c_mid = make_divisible(int(c_in * self.expansion), divisible=8)
        self.has_skip = self.stride == 1 and c_in == c_out
        ops = []
        conv_kwargs = dict(dilation=self.dilation, padding=self.padding)

        if self.expansion > 1:
            # pw
            ops.extend([
                get_conv2d(c_in, c_mid, k_size=self.k_size_in, groups=1, **conv_kwargs),
                nn.BatchNorm2d(c_mid, affine=self.bn_affine),
                Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
            ])
        # dw
        ops.extend([
            get_conv2d(c_mid, c_mid, k_size=self.k_size, stride=self.stride, groups=-1, **conv_kwargs),
            nn.BatchNorm2d(c_mid, affine=self.bn_affine),
            Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
        ])
        # optional squeeze+excitation module
        if isinstance(self.att_dict, dict):
            ops.append(AbstractAttentionModule.module_from_dict(c_mid, c_substitute=c_in, att_dict=self.att_dict))
        # pw
        ops.extend([
            get_conv2d(c_mid, c_out, k_size=self.k_size_out, groups=1, **conv_kwargs),
            nn.BatchNorm2d(c_out, affine=self.bn_affine),
        ])
        self.block = nn.Sequential(*ops)
        if self.has_skip:
            self.block = DropPathModule(self.block)
        return self.probe_outputs(s_in)
Example #4
0
 def __init__(self, ops: nn.ModuleList, **stored_kwargs):
     super().__init__(**stored_kwargs)
     ops = nn.ModuleList([
         DropPathModule(op,
                        op.is_layer(SkipLayer),
                        drop_p=0.0,
                        drop_ids=True) for op in ops
     ])
     self._add_to_submodule_lists(ops=ops)
Example #5
0
    def _build(self, s_in: Shape, c_out: int) -> Shape:
        assert not (c_out <= s_in.num_features() and self.stride > 1), "must increase num features when stride is >1"
        assert s_in.num_features() % 4 == 0 and c_out % 2 == 0, "num features must be divisible by 4"

        padding = get_padding(self.padding, self.k_size, self.stride, self.dilation)
        padding2 = get_padding(self.padding, self.k_size, 1, self.dilation)

        if self.stride >= 2:
            c_side = c_main_in = s_in.num_features()

            self.branch_proj = nn.Sequential(*[
                # dw
                nn.Conv2d(c_side, c_side, self.k_size, self.stride, padding, groups=c_side, bias=False),
                nn.BatchNorm2d(c_side, affine=self.bn_affine),
                # pw
                nn.Conv2d(c_side, c_side, 1, 1, 0, bias=False),
                nn.BatchNorm2d(c_side, affine=self.bn_affine),
                Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
            ])
        else:
            c_side = c_main_in = s_in.num_features() // 2
        c_main_out = c_out - c_side
        c_main_mid = int(c_out // 2 * self.expansion)

        bm = [
            # dw 1
            nn.Conv2d(c_main_in, c_main_in, self.k_size, self.stride, padding, groups=c_main_in, bias=False),
            nn.BatchNorm2d(c_main_in, affine=self.bn_affine),
            # pw 1
            nn.Conv2d(c_main_in, c_main_mid, 1, 1, 0, bias=False),
            nn.BatchNorm2d(c_main_mid, affine=self.bn_affine),
            Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
            # dw 2
            nn.Conv2d(c_main_mid, c_main_mid, self.k_size, 1, padding2, groups=c_main_mid, bias=False),
            nn.BatchNorm2d(c_main_mid, affine=self.bn_affine),
            # pw 2
            nn.Conv2d(c_main_mid, c_main_mid, 1, 1, 0, bias=False),
            nn.BatchNorm2d(c_main_mid, affine=self.bn_affine),
            Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
            # dw 3
            nn.Conv2d(c_main_mid, c_main_mid, self.k_size, 1, padding2, groups=c_main_mid, bias=False),
            nn.BatchNorm2d(c_main_mid, affine=self.bn_affine),
            # pw 3
            nn.Conv2d(c_main_mid, c_main_out, 1, 1, 0, bias=False),
            nn.BatchNorm2d(c_main_out, affine=self.bn_affine),
            Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
        ]
        # optional attention module
        if isinstance(self.att_dict, dict):
            bm.append(AbstractAttentionModule.module_from_dict(c_main_out, c_substitute=c_main_in,
                                                               att_dict=self.att_dict))

        # self.branch_main = nn.Sequential(*bm)
        self.branch_main = DropPathModule(nn.Sequential(*bm))
        return self.probe_outputs(s_in)
Example #6
0
    def __init__(self, name: str, strategy_name='default', skip_op: str = None, k_size_in=1, k_size_out=1,
                 k_sizes=(3, 5, 7), stride=1, padding='same', expansions=(3, 6),
                 dilation=1, bn_affine=True, act_fun='relu6', act_inplace=True, att_dict: dict = None):
        """
        A fused layer for several kernel sizes and expansion sizes, to share the 1x1 conv weights.
        Currently only designed for having a single kernel+expansion per forward pass and for the final config.

        :param name: name under which to register architecture weights
        :param strategy_name: name of the strategy for architecture weights
        :param skip_op: optional layer name, adds an op that enables skipping the entire block, e.g. "SkipLayer"
        :param k_size_in: kernel size(s) for the first conv kernel (expanding)
        :param k_size_out: kernel size(s) for the last conv kernel (projecting)
        :param k_sizes: kernel sizes for the spatial kernel
        :param stride: stride for the spatial kernel
        :param padding: 'same' or number
        :param expansions: multipliers for inner channels, based on input channels
        :param dilation: dilation for the spatial kernel
        :param bn_affine: affine batch norm
        :param act_fun: activation function
        :param act_inplace: whether to use the activation function in-place if possible (e.g. ReLU)
        :param att_dict: None to disable attention modules, otherwise a dict with respective kwargs
        """
        super().__init__()
        self._add_to_kwargs(name=name, strategy_name=strategy_name, skip_op=skip_op,
                            k_size_in=k_size_in, k_size_out=k_size_out,
                            k_sizes=k_sizes, stride=stride, expansions=expansions,
                            padding=padding, dilation=dilation, bn_affine=bn_affine,
                            act_fun=act_fun, act_inplace=act_inplace, att_dict=att_dict)
        self._add_to_print_kwargs(has_skip=False, has_att=isinstance(self.att_dict, dict))
        self.ws = None
        self.skip = None
        self.pw_in = nn.ModuleList([])
        self.dw_conv = nn.ModuleList([])
        self.dw_att = nn.ModuleList([])
        self.pw_out = nn.ModuleList([])
        self.drop_path = DropPathModule()

        self._choices_by_idx = []
Example #7
0
    def _build(self, s_in: Shape, c_out: int) -> Shape:
        c_in = s_in.num_features()
        c_mid = int(c_in * max(self.expansions))
        self.has_skip = self.stride == 1 and c_in == c_out
        max_exp = max(self.expansions)
        exp_mults = [e / max_exp for e in self.expansions]
        ops = []

        if max_exp > 1:
            # pw
            ops.extend([
                nn.Conv2d(c_in, c_mid, 1, 1, 0, groups=1, bias=False),
                nn.BatchNorm2d(c_mid, affine=self.bn_affine),
                Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
            ])
        # dw
        self.conv = SuperKernelThresholdConv(c_mid, c_mid, self.k_sizes, exp_mults, self.dilation, self.stride,
                                             self.padding, -1, bias=False)
        ops.extend([
            self.conv,
            nn.BatchNorm2d(c_mid, affine=self.bn_affine),
            Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
        ])
        # optional squeeze+excitation module with searchable width
        if isinstance(self.sse_dict, dict):
            self.learned_se = SuperSqueezeExcitationChannelThresholdModule(c_mid, c_substitute=c_in, **self.sse_dict)
            ops.append(self.learned_se)
        else:
            self.learned_se = None
        # pw
        ops.extend([
            nn.Conv2d(c_mid, c_out, 1, 1, 0, groups=1, bias=False),
            nn.BatchNorm2d(c_out, affine=self.bn_affine),
        ])
        self.block = nn.Sequential(*ops)
        if self.has_skip:
            self.block = DropPathModule(self.block)
        return self.probe_outputs(s_in)
Example #8
0
class SuperMobileInvertedConvLayer(AbstractLayer):
    def __init__(self,
                 name: str,
                 strategy_name='default',
                 k_sizes=(3, 5, 7),
                 stride=1,
                 padding='same',
                 expansions=(3, 6),
                 dilation=1,
                 bn_affine=True,
                 act_fun='relu6',
                 act_inplace=True,
                 att_dict: dict = None):
        """
        A super kernel layer for several kernel sizes and expansion sizes, to share as many weights as possible.

        :param name: name under which to register architecture weights
        :param strategy_name: name of the strategy for architecture weights
        :param k_sizes: kernel sizes for the spatial kernel
        :param stride: stride for the spatial kernel
        :param padding: 'same' or number
        :param expansions: multipliers for inner channels, based on input channels
        :param dilation: dilation for the spatial kernel
        :param bn_affine: affine batch norm
        :param act_fun: activation function
        :param act_inplace: whether to use the activation function in-place if possible (e.g. ReLU)
        :param att_dict: None to disable attention modules, otherwise a dict with respective kwargs
        """
        super().__init__()
        self._add_to_kwargs(name=name,
                            strategy_name=strategy_name,
                            k_sizes=k_sizes,
                            stride=stride,
                            expansions=sorted(expansions),
                            padding=padding,
                            dilation=dilation,
                            bn_affine=bn_affine,
                            act_fun=act_fun,
                            act_inplace=act_inplace,
                            att_dict=att_dict)
        self._add_to_print_kwargs(has_skip=False)
        self.conv = None
        self.block = None

    def _build(self, s_in: Shape, c_out: int) -> Shape:
        c_in = s_in.num_features()
        max_exp = max(self.expansions)
        exp_mults = [e / max_exp for e in self.expansions]
        c_mid = make_divisible(int(c_in * max_exp), divisible=8)
        self.has_skip = self.stride == 1 and c_in == c_out
        ops = []

        self.conv = SuperKernelConv(c_mid,
                                    c_mid,
                                    self.name,
                                    self.strategy_name,
                                    self.k_sizes,
                                    exp_mults,
                                    self.dilation,
                                    self.stride,
                                    self.padding,
                                    c_mid,
                                    bias=False)

        if max_exp > 1:
            # pw
            ops.extend([
                nn.Conv2d(c_in, c_mid, 1, 1, 0, bias=False),
                nn.BatchNorm2d(c_mid, affine=self.bn_affine),
                Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
            ])
        # dw
        ops.extend([
            self.conv,
            nn.BatchNorm2d(c_mid, affine=self.bn_affine),
            Register.act_funs.get(self.act_fun)(inplace=self.act_inplace),
        ])
        # optional attention module
        if isinstance(self.att_dict, dict):
            ops.append(
                AbstractAttentionModule.module_from_dict(c_mid,
                                                         c_substitute=c_in,
                                                         **self.att_dict))
        # pw
        ops.extend([
            nn.Conv2d(c_mid, c_out, 1, 1, 0, bias=False),
            nn.BatchNorm2d(c_out, affine=self.bn_affine),
        ])
        self.block = nn.Sequential(*ops)
        if self.has_skip:
            self.block = DropPathModule(self.block)
        return self.probe_outputs(s_in)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        if self.has_skip:
            return x + self.block.forward(x)
        return self.block.forward(x)

    def config(self, finalize=False, **__):
        cfg = super().config(finalize=finalize, **__)
        if finalize:
            cfg['name'] = MobileInvertedConvLayer.__name__
            kwargs = cfg['kwargs']
            kwargs.pop('name')
            kwargs.pop('strategy_name')
            kwargs.pop('k_sizes')
            kwargs.pop('expansions')
            ks = self.conv.get_finalized_kernel()
            es = self.conv.get_finalized_channel_mult()
            kwargs['k_size'] = ks[1]
            kwargs['expansion'] = self.expansions[es[0]]
            cfg['kwargs'] = kwargs
        return cfg