def __init__(self, n_classes=1000, width_mult=1, bn_param=(0.1, 1e-3), dropout_rate=0.2, ks=None, expand_ratio=None, depth_param=None, stage_width_list=None): input_channel = 16 last_channel = 1280 input_channel = make_divisible(input_channel * width_mult, 8) last_channel = make_divisible(last_channel * width_mult, 8) if width_mult > 1.0 else last_channel cfg = { # k, exp, c, se, nl, s, e, '0': [ [3, 16, 16, False, 'relu', 1, 1], ], '1': [ [3, 64, 24, False, 'relu', 2, None], # 4 [3, 72, 24, False, 'relu', 1, None], # 3 ], '2': [ [5, 72, 40, True, 'relu', 2, None], # 3 [5, 120, 40, True, 'relu', 1, None], # 3 [5, 120, 40, True, 'relu', 1, None], # 3 ], '3': [ [3, 240, 80, False, 'h_swish', 2, None], # 6 [3, 200, 80, False, 'h_swish', 1, None], # 2.5 [3, 184, 80, False, 'h_swish', 1, None], # 2.3 [3, 184, 80, False, 'h_swish', 1, None], # 2.3 ], '4': [ [3, 480, 112, True, 'h_swish', 1, None], # 6 [3, 672, 112, True, 'h_swish', 1, None], # 6 ], '5': [ [5, 672, 160, True, 'h_swish', 2, None], # 6 [5, 960, 160, True, 'h_swish', 1, None], # 6 [5, 960, 160, True, 'h_swish', 1, None], # 6 ] } cfg = self.adjust_cfg(cfg, ks, expand_ratio, depth_param, stage_width_list) # width multiplier on mobile setting, change `exp: 1` and `c: 2` for stage_id, block_config_list in cfg.items(): for block_config in block_config_list: if block_config[1] is not None: block_config[1] = make_divisible(block_config[1] * width_mult, 8) block_config[2] = make_divisible(block_config[2] * width_mult, 8) first_conv, blocks, final_expand_layer, feature_mix_layer, classifier = self.build_net_via_cfg( cfg, input_channel, last_channel, n_classes, dropout_rate ) super(MobileNetV3Large, self).__init__(first_conv, blocks, final_expand_layer, feature_mix_layer, classifier) # set bn param self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])
def forward(self, x): in_channel = x.size(1) num_mid = make_divisible(in_channel // self.reduction, divisor=8) y = x.mean(3, keepdim=True).mean(2, keepdim=True) # reduce reduce_conv = self.fc.reduce reduce_filter = reduce_conv.weight[:num_mid, : in_channel, :, :].contiguous() reduce_bias = reduce_conv.bias[: num_mid] if reduce_conv.bias is not None else None y = F.conv2d(y, reduce_filter, reduce_bias, 1, 0, 1, 1) # relu y = self.fc.relu(y) # expand expand_conv = self.fc.expand expand_filter = expand_conv.weight[:in_channel, : num_mid, :, :].contiguous() expand_bias = expand_conv.bias[: in_channel] if expand_conv.bias is not None else None y = F.conv2d(y, expand_filter, expand_bias, 1, 0, 1, 1) # hard sigmoid y = self.fc.h_sigmoid(y) return x * y
def get_active_net_config(self): # first conv first_conv_config = self.first_conv.config first_block_config = self.blocks[0].config if isinstance(self.first_conv, DynamicConvLayer): first_conv_config = self.first_conv.get_active_subnet_config(3) first_block_config = { 'name': MobileInvertedResidualBlock.__name__, 'mobile_inverted_conv': self.blocks[0].mobile_inverted_conv.get_active_subnet_config( first_conv_config['out_channels'] ), 'shortcut': self.blocks[0].shortcut.config if self.blocks[0].shortcut is not None else None, } final_expand_config = self.final_expand_layer.config feature_mix_layer_config = self.feature_mix_layer.config if isinstance(self.final_expand_layer, DynamicConvLayer): final_expand_config = self.final_expand_layer.get_active_subnet_config( self.blocks[-1].mobile_inverted_conv.active_out_channel) feature_mix_layer_config = self.feature_mix_layer.get_active_subnet_config( final_expand_config['out_channels']) classifier_config = self.classifier.config if isinstance(self.classifier, DynamicLinearLayer): classifier_config = self.classifier.get_active_subnet_config(self.feature_mix_layer.active_out_channel) block_config_list = [first_block_config] input_channel = first_block_config['mobile_inverted_conv']['out_channels'] for stage_id, block_idx in enumerate(self.block_group_info): depth = self.runtime_depth[stage_id] active_idx = block_idx[:depth] stage_blocks = [] for idx in active_idx: middle_channel = make_divisible(round(input_channel * self.blocks[idx].mobile_inverted_conv.active_expand_ratio), 8) stage_blocks.append({ 'name': MobileInvertedResidualBlock.__name__, 'mobile_inverted_conv': { 'name': MBInvertedConvLayer.__name__, 'in_channels': input_channel, 'out_channels': self.blocks[idx].mobile_inverted_conv.active_out_channel, 'kernel_size': self.blocks[idx].mobile_inverted_conv.active_kernel_size, 'stride': self.blocks[idx].mobile_inverted_conv.stride, 'expand_ratio': self.blocks[idx].mobile_inverted_conv.active_expand_ratio, 'mid_channels': middle_channel, 'act_func': self.blocks[idx].mobile_inverted_conv.act_func, 'use_se': self.blocks[idx].mobile_inverted_conv.use_se, }, 'shortcut': self.blocks[idx].shortcut.config if self.blocks[idx].shortcut is not None else None, }) input_channel = self.blocks[idx].mobile_inverted_conv.active_out_channel block_config_list += stage_blocks return { 'name': MobileNetV3.__name__, 'bn': self.get_bn_param(), 'first_conv': first_conv_config, 'blocks': block_config_list, 'final_expand_layer': final_expand_config, 'feature_mix_layer': feature_mix_layer_config, 'classifier': classifier_config, }
def __init__(self, bn_param=(0.1, 1e-5), dropout_rate=0.1, base_stage_width=None, width_mult_list=1.0, ks_list=3, expand_ratio_list=6, depth_list=4, pixelshuffle_depth_list=2): self.width_mult_list = int2list(width_mult_list, 1) # 이게 output width 조절하는 변수 self.ks_list = int2list(ks_list, 1) self.expand_ratio_list = int2list(expand_ratio_list, 1) self.depth_list = int2list(depth_list, 1) self.pixelshuffle_depth_list = int2list(pixelshuffle_depth_list, 1) self.base_stage_width = base_stage_width self.width_mult_list.sort() self.ks_list.sort() self.expand_ratio_list.sort() self.depth_list.sort() self.pixelshuffle_depth_list.sort() # FROM [3,64 64, 64, 64, 64, 64, 64, 64,64, 64, 64, 64, 64, 64, 64, 64, 64] base_stage_width = [ 16, 64, 64, 64, 64, 64, 64, 3, 64, 64, 64, 64, 64, 64, 64, 256, 3 ] # [Unshu ResBlock ResCon ResBlock ResCon Shu] # [2, 4, 4, 4, 4, 1, 1, 1, 1, 4, 4, 4, 4, 1, 1, 2, 1] # [ Skip, Con, Skip, Con] # final_expand_width = [ # make_divisible(base_stage_width[-2] * max(self.width_mult_list), 8) for _ in self.width_mult_list # ] # last_channel = [ # make_divisible(base_stage_width[-1] * max(self.width_mult_list), 8) for _ in self.width_mult_list # ] stride_stages = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] act_stages = [ 'pixelunshuffle', 'relu6', 'relu6', 'relu6', 'relu6', None, None, None, None, 'relu6', 'relu6', 'relu6', 'relu6', None, None, 'pixelshuffle', None ] se_stages = [ False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False ] if depth_list is None: n_block_list = [1, 2, 3, 4, 2, 3] self.depth_list = [4, 4] print('Use MobileNetV3 Depth Setting') else: n_block_list = [ max(self.pixelshuffle_depth_list) ] + [max(self.depth_list)] * 4 + [1] * 4 + [max( self.depth_list)] * 4 + [1] * 2 + [ max(self.pixelshuffle_depth_list) ] + [1] # 2는 pixelshuffle, pixelunshuffle의 depth # [2, 4, 4, 1, 1, 1, 1, 1, 4, 4, 1, 1, 2, 1] width_list = [] for base_width in base_stage_width: # width = [make_divisible(base_width * width_mult, 8) for width_mult in self.width_mult_list] width = [ make_divisible(base_width * width_mult, 1) for width_mult in self.width_mult_list ] width_list.append(width) #################################################################################################### encoder unshuffle input_channel = width_list[0] enc_first_pixelunshuffle = ConvLayer(3, max(input_channel), kernel_size=3, stride=stride_stages[0], act_func=act_stages[0], use_bn=True) enc_second_pixelunshuffle = ConvLayer(max(input_channel) * 4, max(input_channel), kernel_size=3, stride=stride_stages[0], act_func=act_stages[0], use_bn=True) #################################################################################################### encoder inverted residual blocks self.block_group_info = [[0, 1]] blocks = [enc_first_pixelunshuffle, enc_second_pixelunshuffle] _block_index = 2 feature_dim = width_list[1] # pixelunshuffle 해서 x4 되기때문에 그냥 이렇게함 for width, n_block, s, act_func, use_se in zip(width_list[1:5], n_block_list[1:5], stride_stages[1:5], act_stages[1:5], se_stages[1:5]): self.block_group_info.append( [_block_index + i for i in range(n_block)]) _block_index += n_block output_channel = width for i in range(n_block): if i == 0: stride = s else: stride = 1 mobile_inverted_conv = DynamicMBConvLayer( in_channel_list=feature_dim, out_channel_list=output_channel, kernel_size_list=ks_list, expand_ratio_list=expand_ratio_list, stride=stride, act_func=act_func, use_se=use_se, ) shortcut = IdentityLayer(feature_dim, feature_dim) blocks.append( MobileInvertedResidualBlock(mobile_inverted_conv, shortcut)) feature_dim = output_channel #################################################################################################### encoder final conv blocks enc_final_conv_blocks = [] for width, n_block, s, act_func, use_se in zip(width_list[5:8], n_block_list[5:8], stride_stages[5:8], act_stages[5:8], se_stages[5:8]): # self.block_group_info.append([_block_index + i for i in range(n_block)]) # _block_index += n_block output_channel = width for i in range(n_block): if i == 0: stride = s else: stride = 1 enc_final_conv_blocks.append( ConvLayer(max(feature_dim), max(output_channel), kernel_size=3, stride=s, act_func=act_func, use_bn=True)) feature_dim = output_channel #################################################################################################### decoder first conv block dec_first_conv_block = ConvLayer(max(feature_dim), max(width_list[8]), kernel_size=3, stride=stride_stages[8], act_func=act_stages[8], use_bn=True) #################################################################################################### decoder inverted residual blocks feature_dim = width_list[6] for width, n_block, s, act_func, use_se in zip(width_list[9:13], n_block_list[9:13], stride_stages[9:13], act_stages[9:13], se_stages[9:13]): self.block_group_info.append( [_block_index + i for i in range(n_block)]) _block_index += n_block output_channel = width for i in range(n_block): if i == 0: stride = s else: stride = 1 mobile_inverted_conv = DynamicMBConvLayer( in_channel_list=feature_dim, out_channel_list=output_channel, kernel_size_list=ks_list, expand_ratio_list=expand_ratio_list, stride=stride, act_func=act_func, use_se=use_se, ) shortcut = IdentityLayer(feature_dim, feature_dim) blocks.append( MobileInvertedResidualBlock(mobile_inverted_conv, shortcut)) feature_dim = output_channel #################################################################################################### decoder final conv blocks dec_final_conv_blocks = [] for width, n_block, s, act_func, use_se in zip(width_list[13:15], n_block_list[13:15], stride_stages[13:15], act_stages[13:15], se_stages[13:15]): # self.block_group_info.append([_block_index + i for i in range(n_block)]) # _block_index += n_block output_channel = width for i in range(n_block): if i == 0: stride = s else: stride = 1 dec_final_conv_blocks.append( ConvLayer(max(feature_dim), max(output_channel), kernel_size=3, stride=s, act_func=act_func, use_bn=True)) feature_dim = output_channel #################################################################################################### decoder shuffle # for width, n_block, s, act_func, use_se in zip(width_list[11], n_block_list[11], # stride_stages[11], act_stages[11], se_stages[11]): self.block_group_info.append( [_block_index + i for i in range(n_block_list[15])]) _block_index += n_block_list[15] output_channel = width_list[15] for i in range(n_block_list[15]): if i == 0: stride = stride_stages[15] else: stride = 1 blocks.append( ConvLayer(max(feature_dim), max(output_channel), kernel_size=3, stride=s, act_func=act_stages[15], use_bn=True)) #################################################################################################### decoder final output conv block dec_final_output_conv_block = ConvLayer(max(feature_dim), max(width_list[16]), kernel_size=3, stride=stride_stages[16], act_func=act_stages[16], use_bn=True) #################################################################################################### # runtime_depth self.runtime_depth = [ len(block_idx) for block_idx in self.block_group_info ] super(OFAMobileNetX4, self).__init__(blocks, enc_final_conv_blocks, dec_first_conv_block, dec_final_conv_blocks, dec_final_output_conv_block, self.runtime_depth) # set bn param self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])
def __init__(self, n_classes=1000, bn_param=(0.1, 1e-5), dropout_rate=0.1, base_stage_width=None, width_mult_list=1.0, ks_list=3, expand_ratio_list=6, depth_list=4): self.width_mult_list = int2list(width_mult_list, 1) self.ks_list = int2list(ks_list, 1) self.expand_ratio_list = int2list(expand_ratio_list, 1) self.depth_list = int2list(depth_list, 1) self.base_stage_width = base_stage_width self.width_mult_list.sort() self.ks_list.sort() self.expand_ratio_list.sort() self.depth_list.sort() # base_stage_width = [16, 24, 40, 80, 112, 160, 960, 1280] base_stage_width = [16, 24, 40, 80, 112, 160, 192, 224, 256, 320, 480, 960, 1280] stride_stages = [1, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2] act_stages = ['relu', 'relu', 'relu', 'h_swish', 'h_swish', 'h_swish', 'relu', 'relu', 'h_swish', 'h_swish', 'h_swish'] se_stages = [False, False, True, False, True, True, False, True, False, True, True] final_expand_width = [ make_divisible(base_stage_width[-2] * max(self.width_mult_list), 8) for _ in self.width_mult_list ] last_channel = [ make_divisible(base_stage_width[-1] * max(self.width_mult_list), 8) for _ in self.width_mult_list ] # stride_stages = [1, 2, 2, 2, 1, 2] # act_stages = ['relu', 'relu', 'relu', 'h_swish', 'h_swish', 'h_swish'] # se_stages = [False, False, True, False, True, True] if depth_list is None: n_block_list = [1, 2, 3, 4, 2, 3] self.depth_list = [4, 4] print('Use MobileNetV3 Depth Setting') else: n_block_list = [1] + [max(self.depth_list)] * 10 # depth_list = [,12,3,4] # [1, 4,4,4,4,..........] width_list = [] for base_width in base_stage_width[:-2]: width = [make_divisible(base_width * width_mult, 8) for width_mult in self.width_mult_list] width_list.append(width) input_channel = width_list[0] # first conv layer if len(set(input_channel)) == 1: first_conv = ConvLayer(3, max(input_channel), kernel_size=3, stride=2, act_func='h_swish') first_block_conv = MBInvertedConvLayer( in_channels=max(input_channel), out_channels=max(input_channel), kernel_size=3, stride=stride_stages[0], expand_ratio=1, act_func=act_stages[0], use_se=se_stages[0], ) else: first_conv = DynamicConvLayer( in_channel_list=int2list(3, len(input_channel)), out_channel_list=input_channel, kernel_size=3, stride=2, act_func='h_swish', ) first_block_conv = DynamicMBConvLayer( in_channel_list=input_channel, out_channel_list=input_channel, kernel_size_list=3, expand_ratio_list=1, stride=stride_stages[0], act_func=act_stages[0], use_se=se_stages[0], ) first_block = MobileInvertedResidualBlock(first_block_conv, IdentityLayer(input_channel, input_channel)) # inverted residual blocks self.block_group_info = [] blocks = [first_block] _block_index = 1 feature_dim = input_channel for width, n_block, s, act_func, use_se in zip(width_list[1:], n_block_list[1:], stride_stages[1:], act_stages[1:], se_stages[1:]): self.block_group_info.append([_block_index + i for i in range(n_block)]) _block_index += n_block output_channel = width for i in range(n_block): if i == 0: stride = s else: stride = 1 mobile_inverted_conv = DynamicMBConvLayer( in_channel_list=feature_dim, out_channel_list=output_channel, kernel_size_list=ks_list, expand_ratio_list=expand_ratio_list, stride=stride, act_func=act_func, use_se=use_se, ) if stride == 1 and feature_dim == output_channel: shortcut = IdentityLayer(feature_dim, feature_dim) else: shortcut = None blocks.append(MobileInvertedResidualBlock(mobile_inverted_conv, shortcut)) feature_dim = output_channel # final expand layer, feature mix layer & classifier if len(final_expand_width) == 1: final_expand_layer = ConvLayer(max(feature_dim), max(final_expand_width), kernel_size=1, act_func='h_swish') feature_mix_layer = ConvLayer( max(final_expand_width), max(last_channel), kernel_size=1, bias=False, use_bn=False, act_func='h_swish', ) else: final_expand_layer = DynamicConvLayer( in_channel_list=feature_dim, out_channel_list=final_expand_width, kernel_size=1, act_func='h_swish' ) feature_mix_layer = DynamicConvLayer( in_channel_list=final_expand_width, out_channel_list=last_channel, kernel_size=1, use_bn=False, act_func='h_swish', ) if len(set(last_channel)) == 1: classifier = LinearLayer(max(last_channel), n_classes, dropout_rate=dropout_rate) else: classifier = DynamicLinearLayer( in_features_list=last_channel, out_features=n_classes, bias=True, dropout_rate=dropout_rate ) super(OFAMobileNetV3_depth, self).__init__(first_conv, blocks, final_expand_layer, feature_mix_layer, classifier) # set bn param self.set_bn_param(momentum=bn_param[0], eps=bn_param[1]) # runtime_depth self.runtime_depth = [len(block_idx) for block_idx in self.block_group_info]
def __init__(self, n_classes=1000, bn_param=(0.1, 1e-3), dropout_rate=0.1, base_stage_width=None, width_mult_list=1.0, ks_list=3, expand_ratio_list=6, depth_list=4): self.width_mult_list = int2list(width_mult_list, 1) self.ks_list = int2list(ks_list, 1) self.expand_ratio_list = int2list(expand_ratio_list, 1) self.depth_list = int2list(depth_list, 1) self.base_stage_width = base_stage_width self.width_mult_list.sort() self.ks_list.sort() self.expand_ratio_list.sort() self.depth_list.sort() if base_stage_width == 'google': base_stage_width = [32, 16, 24, 32, 64, 96, 160, 320, 1280] else: # ProxylessNAS Stage Width base_stage_width = [32, 16, 24, 40, 80, 96, 192, 320, 1280] input_channel = [ make_divisible(base_stage_width[0] * width_mult, 8) for width_mult in self.width_mult_list ] first_block_width = [ make_divisible(base_stage_width[1] * width_mult, 8) for width_mult in self.width_mult_list ] last_channel = [ make_divisible(base_stage_width[-1] * width_mult, 8) if width_mult > 1.0 else base_stage_width[-1] for width_mult in self.width_mult_list ] # first conv layer if len(input_channel) == 1: first_conv = ConvLayer(3, max(input_channel), kernel_size=3, stride=2, use_bn=True, act_func='relu6', ops_order='weight_bn_act') else: first_conv = DynamicConvLayer(in_channel_list=int2list( 3, len(input_channel)), out_channel_list=input_channel, kernel_size=3, stride=2, act_func='relu6') # first block if len(first_block_width) == 1: first_block_conv = MBInvertedConvLayer( in_channels=max(input_channel), out_channels=max(first_block_width), kernel_size=3, stride=1, expand_ratio=1, act_func='relu6', ) else: first_block_conv = DynamicMBConvLayer( in_channel_list=input_channel, out_channel_list=first_block_width, kernel_size_list=3, expand_ratio_list=1, stride=1, act_func='relu6', ) first_block = MobileInvertedResidualBlock(first_block_conv, None) input_channel = first_block_width # inverted residual blocks self.block_group_info = [] blocks = [first_block] _block_index = 1 stride_stages = [2, 2, 2, 1, 2, 1] if depth_list is None: n_block_list = [2, 3, 4, 3, 3, 1] self.depth_list = [4, 4] print('Use MobileNetV2 Depth Setting') else: n_block_list = [max(self.depth_list)] * 5 + [1] width_list = [] for base_width in base_stage_width[2:-1]: width = [ make_divisible(base_width * width_mult, 8) for width_mult in self.width_mult_list ] width_list.append(width) for width, n_block, s in zip(width_list, n_block_list, stride_stages): self.block_group_info.append( [_block_index + i for i in range(n_block)]) _block_index += n_block output_channel = width for i in range(n_block): if i == 0: stride = s else: stride = 1 mobile_inverted_conv = DynamicMBConvLayer( in_channel_list=int2list(input_channel, 1), out_channel_list=int2list(output_channel, 1), kernel_size_list=ks_list, expand_ratio_list=expand_ratio_list, stride=stride, act_func='relu6', ) if stride == 1 and input_channel == output_channel: shortcut = IdentityLayer(input_channel, input_channel) else: shortcut = None mb_inverted_block = MobileInvertedResidualBlock( mobile_inverted_conv, shortcut) blocks.append(mb_inverted_block) input_channel = output_channel # 1x1_conv before global average pooling if len(last_channel) == 1: feature_mix_layer = ConvLayer( max(input_channel), max(last_channel), kernel_size=1, use_bn=True, act_func='relu6', ) classifier = LinearLayer(max(last_channel), n_classes, dropout_rate=dropout_rate) else: feature_mix_layer = DynamicConvLayer( in_channel_list=input_channel, out_channel_list=last_channel, kernel_size=1, stride=1, act_func='relu6', ) classifier = DynamicLinearLayer(in_features_list=last_channel, out_features=n_classes, bias=True, dropout_rate=dropout_rate) super(OFAProxylessNASNets, self).__init__(first_conv, blocks, feature_mix_layer, classifier) # set bn param self.set_bn_param(momentum=bn_param[0], eps=bn_param[1]) # runtime_depth self.runtime_depth = [ len(block_idx) for block_idx in self.block_group_info ]
def __init__(self, n_classes=1000, width_mult=1, bn_param=(0.1, 1e-3), dropout_rate=0.2, ks=None, expand_ratio=None, depth_param=None, stage_width_list=None): if ks is None: ks = 3 if expand_ratio is None: expand_ratio = 6 input_channel = 32 last_channel = 1280 input_channel = make_divisible(input_channel * width_mult, 8) last_channel = make_divisible(last_channel * width_mult, 8) if width_mult > 1.0 else last_channel inverted_residual_setting = [ # t, c, n, s [1, 16, 1, 1], [expand_ratio, 24, 2, 2], [expand_ratio, 32, 3, 2], [expand_ratio, 64, 4, 2], [expand_ratio, 96, 3, 1], [expand_ratio, 160, 3, 2], [expand_ratio, 320, 1, 1], ] if depth_param is not None: assert isinstance(depth_param, int) for i in range(1, len(inverted_residual_setting) - 1): inverted_residual_setting[i][2] = depth_param if stage_width_list is not None: for i in range(len(inverted_residual_setting)): inverted_residual_setting[i][1] = stage_width_list[i] ks = int2list(ks, sum([n for _, _, n, _ in inverted_residual_setting]) - 1) _pt = 0 # first conv layer first_conv = ConvLayer(3, input_channel, kernel_size=3, stride=2, use_bn=True, act_func='relu6', ops_order='weight_bn_act') # inverted residual blocks blocks = [] for t, c, n, s in inverted_residual_setting: output_channel = make_divisible(c * width_mult, 8) for i in range(n): if i == 0: stride = s else: stride = 1 if t == 1: kernel_size = 3 else: kernel_size = ks[_pt] _pt += 1 mobile_inverted_conv = MBInvertedConvLayer( in_channels=input_channel, out_channels=output_channel, kernel_size=kernel_size, stride=stride, expand_ratio=t, ) if stride == 1: if input_channel == output_channel: shortcut = IdentityLayer(input_channel, input_channel) else: shortcut = None else: shortcut = None blocks.append( MobileInvertedResidualBlock(mobile_inverted_conv, shortcut)) input_channel = output_channel # 1x1_conv before global average pooling feature_mix_layer = ConvLayer( input_channel, last_channel, kernel_size=1, use_bn=True, act_func='relu6', ops_order='weight_bn_act', ) classifier = LinearLayer(last_channel, n_classes, dropout_rate=dropout_rate) super(MobileNetV2, self).__init__(first_conv, blocks, feature_mix_layer, classifier) # set bn param self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])