Python ConvLayer Examples

Programming Language: Python

Namespace/Package Name: ofa.layers

Class/Type: ConvLayer

Examples at hotexamples.com: 6

Python ConvLayer - 6 examples found. These are the top rated real world Python examples of ofa.layers.ConvLayer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ConvLayer(6)

to(1)

Frequently Used Methods

ConvLayer (6)

to (1)

Example #1

Show file

File: mobilenet_s4.py Project: twice154/ofa-pytorch

    def build_net_via_cfg(cfg, input_channel, last_channel, n_classes, dropout_rate):
        # first conv layer
        first_conv = ConvLayer(
            3, input_channel, kernel_size=3, stride=2, use_bn=True, act_func='h_swish', ops_order='weight_bn_act'
        )
        # build mobile blocks
        feature_dim = input_channel
        blocks = []
        for stage_id, block_config_list in cfg.items():
            for k, mid_channel, out_channel, use_se, act_func, stride, expand_ratio in block_config_list:
                mb_conv = MBInvertedConvLayer(
                    feature_dim, out_channel, k, stride, expand_ratio, mid_channel, act_func, use_se
                )
                if stride == 1 and out_channel == feature_dim:
                    shortcut = IdentityLayer(out_channel, out_channel)
                else:
                    shortcut = None
                blocks.append(MobileInvertedResidualBlock(mb_conv, shortcut))
                feature_dim = out_channel
        # final expand layer
        final_expand_layer = ConvLayer(
            feature_dim, feature_dim * 6, kernel_size=1, use_bn=True, act_func='h_swish', ops_order='weight_bn_act',
        )
        feature_dim = feature_dim * 6
        # feature mix layer
        feature_mix_layer = ConvLayer(
            feature_dim, last_channel, kernel_size=1, bias=False, use_bn=False, act_func='h_swish',
        )
        # classifier
        classifier = LinearLayer(last_channel, n_classes, dropout_rate=dropout_rate)

        return first_conv, blocks, final_expand_layer, feature_mix_layer, classifier

Example #2

Show file

File: dynamic_layers.py Project: twice154/ofa-pytorch

    def get_active_subnet(self, in_channel, preserve_weight=True):
        sub_layer = ConvLayer(in_channel,
                              self.active_out_channel,
                              self.kernel_size,
                              self.stride,
                              self.dilation,
                              use_bn=self.use_bn,
                              act_func=self.act_func)
        sub_layer = sub_layer.to(get_net_device(self))

        if not preserve_weight:
            return sub_layer

        sub_layer.conv.weight.data.copy_(
            self.conv.conv.weight.data[:self.
                                       active_out_channel, :in_channel, :, :])
        if self.use_bn:
            copy_bn(sub_layer.bn, self.bn.bn)

        return sub_layer

Example #3

Show file

File: ofa_mbx4.py Project: twice154/ofa-pytorch

    def __init__(self,
                 bn_param=(0.1, 1e-5),
                 dropout_rate=0.1,
                 base_stage_width=None,
                 width_mult_list=1.0,
                 ks_list=3,
                 expand_ratio_list=6,
                 depth_list=4,
                 pixelshuffle_depth_list=2):

        self.width_mult_list = int2list(width_mult_list,
                                        1)  # 이게 output width 조절하는 변수
        self.ks_list = int2list(ks_list, 1)
        self.expand_ratio_list = int2list(expand_ratio_list, 1)
        self.depth_list = int2list(depth_list, 1)
        self.pixelshuffle_depth_list = int2list(pixelshuffle_depth_list, 1)
        self.base_stage_width = base_stage_width

        self.width_mult_list.sort()
        self.ks_list.sort()
        self.expand_ratio_list.sort()
        self.depth_list.sort()
        self.pixelshuffle_depth_list.sort()
        # FROM [3,64    64, 64, 64, 64,     64, 64,     64,64,     64, 64, 64, 64,     64, 64,     64,  64]
        base_stage_width = [
            16, 64, 64, 64, 64, 64, 64, 3, 64, 64, 64, 64, 64, 64, 64, 256, 3
        ]
        # [Unshu   ResBlock              ResCon               ResBlock              ResCon    Shu]
        # [2,      4,  4,  4,  4,      1,  1,      1, 1,      4,  4,  4,  4,      1,  1,      2,   1]
        # [   Skip,                     Con,            Skip,                       Con]

        # final_expand_width = [
        #     make_divisible(base_stage_width[-2] * max(self.width_mult_list), 8) for _ in self.width_mult_list
        # ]
        # last_channel = [
        #     make_divisible(base_stage_width[-1] * max(self.width_mult_list), 8) for _ in self.width_mult_list
        # ]

        stride_stages = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        act_stages = [
            'pixelunshuffle', 'relu6', 'relu6', 'relu6', 'relu6', None, None,
            None, None, 'relu6', 'relu6', 'relu6', 'relu6', None, None,
            'pixelshuffle', None
        ]
        se_stages = [
            False, False, False, False, False, False, False, False, False,
            False, False, False, False, False, False, False, False
        ]
        if depth_list is None:
            n_block_list = [1, 2, 3, 4, 2, 3]
            self.depth_list = [4, 4]
            print('Use MobileNetV3 Depth Setting')
        else:
            n_block_list = [
                max(self.pixelshuffle_depth_list)
            ] + [max(self.depth_list)] * 4 + [1] * 4 + [max(
                self.depth_list)] * 4 + [1] * 2 + [
                    max(self.pixelshuffle_depth_list)
                ] + [1]  # 2는 pixelshuffle, pixelunshuffle의 depth
            # [2, 4, 4, 1, 1, 1, 1, 1, 4, 4, 1, 1, 2, 1]
        width_list = []
        for base_width in base_stage_width:
            # width = [make_divisible(base_width * width_mult, 8) for width_mult in self.width_mult_list]
            width = [
                make_divisible(base_width * width_mult, 1)
                for width_mult in self.width_mult_list
            ]
            width_list.append(width)

        #################################################################################################### encoder unshuffle
        input_channel = width_list[0]
        enc_first_pixelunshuffle = ConvLayer(3,
                                             max(input_channel),
                                             kernel_size=3,
                                             stride=stride_stages[0],
                                             act_func=act_stages[0],
                                             use_bn=True)
        enc_second_pixelunshuffle = ConvLayer(max(input_channel) * 4,
                                              max(input_channel),
                                              kernel_size=3,
                                              stride=stride_stages[0],
                                              act_func=act_stages[0],
                                              use_bn=True)

        #################################################################################################### encoder inverted residual blocks
        self.block_group_info = [[0, 1]]
        blocks = [enc_first_pixelunshuffle, enc_second_pixelunshuffle]
        _block_index = 2
        feature_dim = width_list[1]  # pixelunshuffle 해서 x4 되기때문에 그냥 이렇게함

        for width, n_block, s, act_func, use_se in zip(width_list[1:5],
                                                       n_block_list[1:5],
                                                       stride_stages[1:5],
                                                       act_stages[1:5],
                                                       se_stages[1:5]):
            self.block_group_info.append(
                [_block_index + i for i in range(n_block)])
            _block_index += n_block

            output_channel = width
            for i in range(n_block):
                if i == 0:
                    stride = s
                else:
                    stride = 1
                mobile_inverted_conv = DynamicMBConvLayer(
                    in_channel_list=feature_dim,
                    out_channel_list=output_channel,
                    kernel_size_list=ks_list,
                    expand_ratio_list=expand_ratio_list,
                    stride=stride,
                    act_func=act_func,
                    use_se=use_se,
                )
                shortcut = IdentityLayer(feature_dim, feature_dim)
                blocks.append(
                    MobileInvertedResidualBlock(mobile_inverted_conv,
                                                shortcut))
                feature_dim = output_channel

        #################################################################################################### encoder final conv blocks
        enc_final_conv_blocks = []
        for width, n_block, s, act_func, use_se in zip(width_list[5:8],
                                                       n_block_list[5:8],
                                                       stride_stages[5:8],
                                                       act_stages[5:8],
                                                       se_stages[5:8]):
            # self.block_group_info.append([_block_index + i for i in range(n_block)])
            # _block_index += n_block

            output_channel = width
            for i in range(n_block):
                if i == 0:
                    stride = s
                else:
                    stride = 1
                enc_final_conv_blocks.append(
                    ConvLayer(max(feature_dim),
                              max(output_channel),
                              kernel_size=3,
                              stride=s,
                              act_func=act_func,
                              use_bn=True))
                feature_dim = output_channel

        #################################################################################################### decoder first conv block
        dec_first_conv_block = ConvLayer(max(feature_dim),
                                         max(width_list[8]),
                                         kernel_size=3,
                                         stride=stride_stages[8],
                                         act_func=act_stages[8],
                                         use_bn=True)

        #################################################################################################### decoder inverted residual blocks
        feature_dim = width_list[6]

        for width, n_block, s, act_func, use_se in zip(width_list[9:13],
                                                       n_block_list[9:13],
                                                       stride_stages[9:13],
                                                       act_stages[9:13],
                                                       se_stages[9:13]):
            self.block_group_info.append(
                [_block_index + i for i in range(n_block)])
            _block_index += n_block

            output_channel = width
            for i in range(n_block):
                if i == 0:
                    stride = s
                else:
                    stride = 1
                mobile_inverted_conv = DynamicMBConvLayer(
                    in_channel_list=feature_dim,
                    out_channel_list=output_channel,
                    kernel_size_list=ks_list,
                    expand_ratio_list=expand_ratio_list,
                    stride=stride,
                    act_func=act_func,
                    use_se=use_se,
                )
                shortcut = IdentityLayer(feature_dim, feature_dim)
                blocks.append(
                    MobileInvertedResidualBlock(mobile_inverted_conv,
                                                shortcut))
                feature_dim = output_channel

        #################################################################################################### decoder final conv blocks
        dec_final_conv_blocks = []
        for width, n_block, s, act_func, use_se in zip(width_list[13:15],
                                                       n_block_list[13:15],
                                                       stride_stages[13:15],
                                                       act_stages[13:15],
                                                       se_stages[13:15]):
            # self.block_group_info.append([_block_index + i for i in range(n_block)])
            # _block_index += n_block

            output_channel = width
            for i in range(n_block):
                if i == 0:
                    stride = s
                else:
                    stride = 1
                dec_final_conv_blocks.append(
                    ConvLayer(max(feature_dim),
                              max(output_channel),
                              kernel_size=3,
                              stride=s,
                              act_func=act_func,
                              use_bn=True))
                feature_dim = output_channel

        #################################################################################################### decoder shuffle

    # for width, n_block, s, act_func, use_se in zip(width_list[11], n_block_list[11],
    #                                                 stride_stages[11], act_stages[11], se_stages[11]):
        self.block_group_info.append(
            [_block_index + i for i in range(n_block_list[15])])
        _block_index += n_block_list[15]

        output_channel = width_list[15]
        for i in range(n_block_list[15]):
            if i == 0:
                stride = stride_stages[15]
            else:
                stride = 1
            blocks.append(
                ConvLayer(max(feature_dim),
                          max(output_channel),
                          kernel_size=3,
                          stride=s,
                          act_func=act_stages[15],
                          use_bn=True))

        #################################################################################################### decoder final output conv block
        dec_final_output_conv_block = ConvLayer(max(feature_dim),
                                                max(width_list[16]),
                                                kernel_size=3,
                                                stride=stride_stages[16],
                                                act_func=act_stages[16],
                                                use_bn=True)

        ####################################################################################################
        # runtime_depth
        self.runtime_depth = [
            len(block_idx) for block_idx in self.block_group_info
        ]

        super(OFAMobileNetX4,
              self).__init__(blocks, enc_final_conv_blocks,
                             dec_first_conv_block, dec_final_conv_blocks,
                             dec_final_output_conv_block, self.runtime_depth)

        # set bn param
        self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])

Example #4

Show file

File: ofa_mbv3_depth.py Project: lulongfei-luffy/once-for-all

    def __init__(self, n_classes=1000, bn_param=(0.1, 1e-5), dropout_rate=0.1, base_stage_width=None,
                 width_mult_list=1.0, ks_list=3, expand_ratio_list=6, depth_list=4):

        self.width_mult_list = int2list(width_mult_list, 1)
        self.ks_list = int2list(ks_list, 1)
        self.expand_ratio_list = int2list(expand_ratio_list, 1)
        self.depth_list = int2list(depth_list, 1)
        self.base_stage_width = base_stage_width

        self.width_mult_list.sort()
        self.ks_list.sort()
        self.expand_ratio_list.sort()
        self.depth_list.sort()

        # base_stage_width = [16, 24, 40, 80, 112, 160, 960, 1280]
        base_stage_width = [16,    24, 40, 80, 112, 160,    192, 224, 256, 320, 480,   960, 1280]
        stride_stages = [1, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2]
        act_stages = ['relu', 'relu', 'relu', 'h_swish', 'h_swish', 'h_swish', 'relu', 'relu', 'h_swish', 'h_swish',
                      'h_swish']
        se_stages = [False, False, True, False, True, True, False, True, False, True, True]

        final_expand_width = [
            make_divisible(base_stage_width[-2] * max(self.width_mult_list), 8) for _ in self.width_mult_list
        ]
        last_channel = [
            make_divisible(base_stage_width[-1] * max(self.width_mult_list), 8) for _ in self.width_mult_list
        ]

        # stride_stages = [1, 2, 2, 2, 1, 2]
        # act_stages = ['relu', 'relu', 'relu', 'h_swish', 'h_swish', 'h_swish']
        # se_stages = [False, False, True, False, True, True]

        if depth_list is None:
            n_block_list = [1, 2, 3, 4, 2, 3]
            self.depth_list = [4, 4]
            print('Use MobileNetV3 Depth Setting')
        else:
            n_block_list = [1] + [max(self.depth_list)] * 10  # depth_list = [,12,3,4]
            # [1, 4,4,4,4,..........]
        width_list = []
        for base_width in base_stage_width[:-2]:
            width = [make_divisible(base_width * width_mult, 8) for width_mult in self.width_mult_list]
            width_list.append(width)

        input_channel = width_list[0]
        # first conv layer
        if len(set(input_channel)) == 1:
            first_conv = ConvLayer(3, max(input_channel), kernel_size=3, stride=2, act_func='h_swish')
            first_block_conv = MBInvertedConvLayer(
                in_channels=max(input_channel), out_channels=max(input_channel), kernel_size=3, stride=stride_stages[0],
                expand_ratio=1, act_func=act_stages[0], use_se=se_stages[0],
            )
        else:
            first_conv = DynamicConvLayer(
                in_channel_list=int2list(3, len(input_channel)), out_channel_list=input_channel, kernel_size=3,
                stride=2, act_func='h_swish',
            )
            first_block_conv = DynamicMBConvLayer(
                in_channel_list=input_channel, out_channel_list=input_channel, kernel_size_list=3, expand_ratio_list=1,
                stride=stride_stages[0], act_func=act_stages[0], use_se=se_stages[0],
            )
        first_block = MobileInvertedResidualBlock(first_block_conv, IdentityLayer(input_channel, input_channel))

        # inverted residual blocks
        self.block_group_info = []
        blocks = [first_block]
        _block_index = 1
        feature_dim = input_channel

        for width, n_block, s, act_func, use_se in zip(width_list[1:], n_block_list[1:],
                                                       stride_stages[1:], act_stages[1:], se_stages[1:]):
            self.block_group_info.append([_block_index + i for i in range(n_block)])
            _block_index += n_block

            output_channel = width
            for i in range(n_block):
                if i == 0:
                    stride = s
                else:
                    stride = 1
                mobile_inverted_conv = DynamicMBConvLayer(
                    in_channel_list=feature_dim, out_channel_list=output_channel, kernel_size_list=ks_list,
                    expand_ratio_list=expand_ratio_list, stride=stride, act_func=act_func, use_se=use_se,
                )
                if stride == 1 and feature_dim == output_channel:
                    shortcut = IdentityLayer(feature_dim, feature_dim)
                else:
                    shortcut = None
                blocks.append(MobileInvertedResidualBlock(mobile_inverted_conv, shortcut))
                feature_dim = output_channel
        # final expand layer, feature mix layer & classifier
        if len(final_expand_width) == 1:
            final_expand_layer = ConvLayer(max(feature_dim), max(final_expand_width), kernel_size=1, act_func='h_swish')
            feature_mix_layer = ConvLayer(
                max(final_expand_width), max(last_channel), kernel_size=1, bias=False, use_bn=False, act_func='h_swish',
            )
        else:
            final_expand_layer = DynamicConvLayer(
                in_channel_list=feature_dim, out_channel_list=final_expand_width, kernel_size=1, act_func='h_swish'
            )
            feature_mix_layer = DynamicConvLayer(
                in_channel_list=final_expand_width, out_channel_list=last_channel, kernel_size=1,
                use_bn=False, act_func='h_swish',
            )
        if len(set(last_channel)) == 1:
            classifier = LinearLayer(max(last_channel), n_classes, dropout_rate=dropout_rate)
        else:
            classifier = DynamicLinearLayer(
                in_features_list=last_channel, out_features=n_classes, bias=True, dropout_rate=dropout_rate
            )
        super(OFAMobileNetV3_depth, self).__init__(first_conv, blocks, final_expand_layer, feature_mix_layer, classifier)

        # set bn param
        self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])

        # runtime_depth
        self.runtime_depth = [len(block_idx) for block_idx in self.block_group_info]

Example #5

Show file

File: ofa_proxyless.py Project: twice154/ofa-pytorch

    def __init__(self,
                 n_classes=1000,
                 bn_param=(0.1, 1e-3),
                 dropout_rate=0.1,
                 base_stage_width=None,
                 width_mult_list=1.0,
                 ks_list=3,
                 expand_ratio_list=6,
                 depth_list=4):

        self.width_mult_list = int2list(width_mult_list, 1)
        self.ks_list = int2list(ks_list, 1)
        self.expand_ratio_list = int2list(expand_ratio_list, 1)
        self.depth_list = int2list(depth_list, 1)
        self.base_stage_width = base_stage_width

        self.width_mult_list.sort()
        self.ks_list.sort()
        self.expand_ratio_list.sort()
        self.depth_list.sort()

        if base_stage_width == 'google':
            base_stage_width = [32, 16, 24, 32, 64, 96, 160, 320, 1280]
        else:
            # ProxylessNAS Stage Width
            base_stage_width = [32, 16, 24, 40, 80, 96, 192, 320, 1280]

        input_channel = [
            make_divisible(base_stage_width[0] * width_mult, 8)
            for width_mult in self.width_mult_list
        ]
        first_block_width = [
            make_divisible(base_stage_width[1] * width_mult, 8)
            for width_mult in self.width_mult_list
        ]
        last_channel = [
            make_divisible(base_stage_width[-1] * width_mult, 8)
            if width_mult > 1.0 else base_stage_width[-1]
            for width_mult in self.width_mult_list
        ]

        # first conv layer
        if len(input_channel) == 1:
            first_conv = ConvLayer(3,
                                   max(input_channel),
                                   kernel_size=3,
                                   stride=2,
                                   use_bn=True,
                                   act_func='relu6',
                                   ops_order='weight_bn_act')
        else:
            first_conv = DynamicConvLayer(in_channel_list=int2list(
                3, len(input_channel)),
                                          out_channel_list=input_channel,
                                          kernel_size=3,
                                          stride=2,
                                          act_func='relu6')
        # first block
        if len(first_block_width) == 1:
            first_block_conv = MBInvertedConvLayer(
                in_channels=max(input_channel),
                out_channels=max(first_block_width),
                kernel_size=3,
                stride=1,
                expand_ratio=1,
                act_func='relu6',
            )
        else:
            first_block_conv = DynamicMBConvLayer(
                in_channel_list=input_channel,
                out_channel_list=first_block_width,
                kernel_size_list=3,
                expand_ratio_list=1,
                stride=1,
                act_func='relu6',
            )
        first_block = MobileInvertedResidualBlock(first_block_conv, None)

        input_channel = first_block_width

        # inverted residual blocks
        self.block_group_info = []
        blocks = [first_block]
        _block_index = 1

        stride_stages = [2, 2, 2, 1, 2, 1]
        if depth_list is None:
            n_block_list = [2, 3, 4, 3, 3, 1]
            self.depth_list = [4, 4]
            print('Use MobileNetV2 Depth Setting')
        else:
            n_block_list = [max(self.depth_list)] * 5 + [1]

        width_list = []
        for base_width in base_stage_width[2:-1]:
            width = [
                make_divisible(base_width * width_mult, 8)
                for width_mult in self.width_mult_list
            ]
            width_list.append(width)

        for width, n_block, s in zip(width_list, n_block_list, stride_stages):
            self.block_group_info.append(
                [_block_index + i for i in range(n_block)])
            _block_index += n_block

            output_channel = width
            for i in range(n_block):
                if i == 0:
                    stride = s
                else:
                    stride = 1

                mobile_inverted_conv = DynamicMBConvLayer(
                    in_channel_list=int2list(input_channel, 1),
                    out_channel_list=int2list(output_channel, 1),
                    kernel_size_list=ks_list,
                    expand_ratio_list=expand_ratio_list,
                    stride=stride,
                    act_func='relu6',
                )

                if stride == 1 and input_channel == output_channel:
                    shortcut = IdentityLayer(input_channel, input_channel)
                else:
                    shortcut = None

                mb_inverted_block = MobileInvertedResidualBlock(
                    mobile_inverted_conv, shortcut)

                blocks.append(mb_inverted_block)
                input_channel = output_channel
        # 1x1_conv before global average pooling
        if len(last_channel) == 1:
            feature_mix_layer = ConvLayer(
                max(input_channel),
                max(last_channel),
                kernel_size=1,
                use_bn=True,
                act_func='relu6',
            )
            classifier = LinearLayer(max(last_channel),
                                     n_classes,
                                     dropout_rate=dropout_rate)
        else:
            feature_mix_layer = DynamicConvLayer(
                in_channel_list=input_channel,
                out_channel_list=last_channel,
                kernel_size=1,
                stride=1,
                act_func='relu6',
            )
            classifier = DynamicLinearLayer(in_features_list=last_channel,
                                            out_features=n_classes,
                                            bias=True,
                                            dropout_rate=dropout_rate)

        super(OFAProxylessNASNets,
              self).__init__(first_conv, blocks, feature_mix_layer, classifier)

        # set bn param
        self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])

        # runtime_depth
        self.runtime_depth = [
            len(block_idx) for block_idx in self.block_group_info
        ]

Example #6

Show file

    def __init__(self,
                 n_classes=1000,
                 width_mult=1,
                 bn_param=(0.1, 1e-3),
                 dropout_rate=0.2,
                 ks=None,
                 expand_ratio=None,
                 depth_param=None,
                 stage_width_list=None):

        if ks is None:
            ks = 3
        if expand_ratio is None:
            expand_ratio = 6

        input_channel = 32
        last_channel = 1280

        input_channel = make_divisible(input_channel * width_mult, 8)
        last_channel = make_divisible(last_channel * width_mult,
                                      8) if width_mult > 1.0 else last_channel

        inverted_residual_setting = [
            # t, c, n, s
            [1, 16, 1, 1],
            [expand_ratio, 24, 2, 2],
            [expand_ratio, 32, 3, 2],
            [expand_ratio, 64, 4, 2],
            [expand_ratio, 96, 3, 1],
            [expand_ratio, 160, 3, 2],
            [expand_ratio, 320, 1, 1],
        ]

        if depth_param is not None:
            assert isinstance(depth_param, int)
            for i in range(1, len(inverted_residual_setting) - 1):
                inverted_residual_setting[i][2] = depth_param

        if stage_width_list is not None:
            for i in range(len(inverted_residual_setting)):
                inverted_residual_setting[i][1] = stage_width_list[i]

        ks = int2list(ks,
                      sum([n for _, _, n, _ in inverted_residual_setting]) - 1)
        _pt = 0

        # first conv layer
        first_conv = ConvLayer(3,
                               input_channel,
                               kernel_size=3,
                               stride=2,
                               use_bn=True,
                               act_func='relu6',
                               ops_order='weight_bn_act')
        # inverted residual blocks
        blocks = []
        for t, c, n, s in inverted_residual_setting:
            output_channel = make_divisible(c * width_mult, 8)
            for i in range(n):
                if i == 0:
                    stride = s
                else:
                    stride = 1
                if t == 1:
                    kernel_size = 3
                else:
                    kernel_size = ks[_pt]
                    _pt += 1
                mobile_inverted_conv = MBInvertedConvLayer(
                    in_channels=input_channel,
                    out_channels=output_channel,
                    kernel_size=kernel_size,
                    stride=stride,
                    expand_ratio=t,
                )
                if stride == 1:
                    if input_channel == output_channel:
                        shortcut = IdentityLayer(input_channel, input_channel)
                    else:
                        shortcut = None
                else:
                    shortcut = None
                blocks.append(
                    MobileInvertedResidualBlock(mobile_inverted_conv,
                                                shortcut))
                input_channel = output_channel
        # 1x1_conv before global average pooling
        feature_mix_layer = ConvLayer(
            input_channel,
            last_channel,
            kernel_size=1,
            use_bn=True,
            act_func='relu6',
            ops_order='weight_bn_act',
        )

        classifier = LinearLayer(last_channel,
                                 n_classes,
                                 dropout_rate=dropout_rate)

        super(MobileNetV2, self).__init__(first_conv, blocks,
                                          feature_mix_layer, classifier)

        # set bn param
        self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])