def set_active_subnet(self, wid=None, ks=None, e=None, d=None): width_mult_id = int2list(wid, 3 + len(self.blocks) - 1) ks = int2list(ks, len(self.blocks) - 1) expand_ratio = int2list(e, len(self.blocks) - 1) depth = int2list(d, len(self.block_group_info)) if len(self.width_mult_list) > 1 and width_mult_id[0] is not None: # active_out_channel self.first_conv.active_out_channel = self.first_conv.out_channel_list[ width_mult_id[0]] self.blocks[0].mobile_inverted_conv.active_out_channel = \ self.blocks[0].mobile_inverted_conv.out_channel_list[width_mult_id[1]] self.feature_mix_layer.active_out_channel = self.feature_mix_layer.out_channel_list[ width_mult_id[2]] for block, w, k, e in zip(self.blocks[1:], width_mult_id[3:], ks, expand_ratio): if w is not None: block.mobile_inverted_conv.active_out_channel = block.mobile_inverted_conv.out_channel_list[ w] if k is not None: block.mobile_inverted_conv.active_kernel_size = k if e is not None: block.mobile_inverted_conv.active_expand_ratio = e for i, d in enumerate(depth): if d is not None: self.runtime_depth[i] = min(len(self.block_group_info[i][0]), d)
def __init__(self, in_channel_list, out_channel_list, kernel_size_list=3, expand_ratio_list=6, stride=1, act_func='relu6', use_se=False): super(DynamicMBConvLayer, self).__init__() self.in_channel_list = in_channel_list self.out_channel_list = out_channel_list self.kernel_size_list = int2list(kernel_size_list, 1) self.expand_ratio_list = int2list(expand_ratio_list, 1) self.stride = stride self.act_func = act_func self.use_se = use_se # build modules max_middle_channel = round( max(self.in_channel_list) * max(self.expand_ratio_list)) if max(self.expand_ratio_list) == 1: self.inverted_bottleneck = None else: self.inverted_bottleneck = nn.Sequential( OrderedDict([ ('conv', DynamicPointConv2d(max(self.in_channel_list), max_middle_channel)), ('bn', DynamicBatchNorm2d(max_middle_channel)), ('act', build_activation(self.act_func, inplace=True)), ])) self.depth_conv = nn.Sequential( OrderedDict([('conv', DynamicSeparableConv2d(max_middle_channel, self.kernel_size_list, self.stride)), ('bn', DynamicBatchNorm2d(max_middle_channel)), ('act', build_activation(self.act_func, inplace=True))])) if self.use_se: self.depth_conv.add_module('se', DynamicSE(max_middle_channel)) self.point_linear = nn.Sequential( OrderedDict([ ('conv', DynamicPointConv2d(max_middle_channel, max(self.out_channel_list))), ('bn', DynamicBatchNorm2d(max(self.out_channel_list))), ])) self.active_kernel_size = max(self.kernel_size_list) self.active_expand_ratio = max(self.expand_ratio_list) self.active_out_channel = max(self.out_channel_list)
def set_quantization_policy(self, pw_w_bits_setting=None, pw_a_bits_setting=None, dw_w_bits_setting=None, dw_a_bits_setting=None): pw_w_bits_setting = int2list(pw_w_bits_setting, len(self.blocks) - 1) pw_a_bits_setting = int2list(pw_a_bits_setting, len(self.blocks) - 1) dw_w_bits_setting = int2list(dw_w_bits_setting, len(self.blocks) - 1) dw_a_bits_setting = int2list(dw_a_bits_setting, len(self.blocks) - 1) for block, pw_w, pw_a, dw_w, dw_a in zip(self.blocks[1:], pw_w_bits_setting, pw_a_bits_setting, dw_w_bits_setting, dw_a_bits_setting): block.mobile_inverted_conv.set_quantization_policy(pw_w_bit=pw_w, pw_a_bit=pw_a, dw_w_bit=dw_w, dw_a_bit=dw_a)
def set_active_subnet(self, wid=None, ks=None, e=None, d=None): width_mult_id = int2list(wid, 4 + len(self.block_group_info)) ks = int2list(ks, len(self.blocks) - 1) expand_ratio = int2list(e, len(self.blocks) - 1) depth = int2list(d, len(self.block_group_info)) for block, k, e in zip(self.blocks[1:], ks, expand_ratio): if k is not None: block.mobile_inverted_conv.active_kernel_size = k if e is not None: block.mobile_inverted_conv.active_expand_ratio = e for i, d in enumerate(depth): if d is not None: self.runtime_depth[i] = min(len(self.block_group_info[i]), d)
def validate(run_manager, epoch=0, is_test=True, image_size_list=None, width_mult_list=None, ks_list=None, expand_ratio_list=None, depth_list=None, additional_setting=None): dynamic_net = run_manager.net if isinstance(dynamic_net, nn.DataParallel): dynamic_net = dynamic_net.module dynamic_net.eval() if image_size_list is None: image_size_list = int2list(run_manager.run_config.data_provider.image_size, 1) if width_mult_list is None: width_mult_list = [i for i in range(len(dynamic_net.width_mult_list))] if ks_list is None: ks_list = dynamic_net.ks_list if expand_ratio_list is None: expand_ratio_list = dynamic_net.expand_ratio_list if depth_list is None: depth_list = dynamic_net.depth_list subnet_settings = [] for w in width_mult_list: for d in depth_list: for e in expand_ratio_list: for k in ks_list: for img_size in image_size_list: subnet_settings.append([{ 'image_size': img_size, 'wid': w, 'd': d, 'e': e, 'ks': k, }, 'R%s-W%s-D%s-E%s-K%s' % (img_size, w, d, e, k)]) if additional_setting is not None: subnet_settings += additional_setting losses_of_subnets, top1_of_subnets, top5_of_subnets = [], [], [] valid_log = '' for setting, name in subnet_settings: run_manager.write_log('-' * 30 + ' Validate %s ' % name + '-' * 30, 'train', should_print=False) run_manager.run_config.data_provider.assign_active_img_size(setting.pop('image_size')) dynamic_net.set_active_subnet(**setting) run_manager.write_log(dynamic_net.module_str, 'train', should_print=False) run_manager.reset_running_statistics(dynamic_net) loss, top1, top5 = run_manager.validate(epoch=epoch, is_test=is_test, run_str=name, net=dynamic_net) losses_of_subnets.append(loss) top1_of_subnets.append(top1) top5_of_subnets.append(top5) valid_log += '%s (%.3f), ' % (name, top1) return list_mean(losses_of_subnets), list_mean(top1_of_subnets), list_mean(top5_of_subnets), valid_log
def __init__(self, n_classes=1000, width_mult=1, bn_param=(0.1, 1e-3), dropout_rate=0.2, ks=None, expand_ratio=None, depth_param=None, stage_width_list=None): if ks is None: ks = 3 if expand_ratio is None: expand_ratio = 6 input_channel = 32 last_channel = 1280 input_channel = make_divisible(input_channel * width_mult, 8) last_channel = make_divisible(last_channel * width_mult, 8) if width_mult > 1.0 else last_channel inverted_residual_setting = [ # t, c, n, s [1, 16, 1, 1], [expand_ratio, 24, 2, 2], [expand_ratio, 32, 3, 2], [expand_ratio, 64, 4, 2], [expand_ratio, 96, 3, 1], [expand_ratio, 160, 3, 2], [expand_ratio, 320, 1, 1], ] if depth_param is not None: assert isinstance(depth_param, int) for i in range(1, len(inverted_residual_setting) - 1): inverted_residual_setting[i][2] = depth_param if stage_width_list is not None: for i in range(len(inverted_residual_setting)): inverted_residual_setting[i][1] = stage_width_list[i] ks = int2list(ks, sum([n for _, _, n, _ in inverted_residual_setting]) - 1) _pt = 0 # first conv layer first_conv = ConvLayer(3, input_channel, kernel_size=3, stride=2, use_bn=True, act_func='relu6', ops_order='weight_bn_act') # inverted residual blocks blocks = [] for t, c, n, s in inverted_residual_setting: output_channel = make_divisible(c * width_mult, 8) for i in range(n): if i == 0: stride = s else: stride = 1 if t == 1: kernel_size = 3 else: kernel_size = ks[_pt] _pt += 1 mobile_inverted_conv = MBInvertedConvLayer( in_channels=input_channel, out_channels=output_channel, kernel_size=kernel_size, stride=stride, expand_ratio=t, ) if stride == 1: if input_channel == output_channel: shortcut = IdentityLayer(input_channel, input_channel) else: shortcut = None else: shortcut = None blocks.append( MobileInvertedResidualBlock(mobile_inverted_conv, shortcut)) input_channel = output_channel # 1x1_conv before global average pooling feature_mix_layer = ConvLayer( input_channel, last_channel, kernel_size=1, use_bn=True, act_func='relu6', ops_order='weight_bn_act', ) classifier = LinearLayer(last_channel, n_classes, dropout_rate=dropout_rate) super(MobileNetV2, self).__init__(first_conv, blocks, feature_mix_layer, classifier) # set bn param self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])
def __init__(self, n_classes=1000, bn_param=(0.1, 1e-3), dropout_rate=0.1, base_stage_width=None, width_mult_list=1.0, ks_list=3, expand_ratio_list=6, depth_list=4, depth_ensemble_list=None, depth_ensemble_mode='avg'): self.width_mult_list = int2list(width_mult_list, 1) self.ks_list = int2list(ks_list, 1) self.expand_ratio_list = int2list(expand_ratio_list, 1) self.depth_list = int2list(depth_list, 1) self.depth_ensemble_list = depth_ensemble_list self.depth_ensemble_mode = depth_ensemble_mode self.width_mult_list.sort() self.ks_list.sort() self.expand_ratio_list.sort() self.depth_list.sort() if base_stage_width == 'v2': base_stage_width = [32, 16, 24, 32, 64, 96, 160, 320, 1280] elif base_stage_width == 'old': base_stage_width = [32, 16, 32, 40, 80, 96, 192, 320, 1280] else: # ProxylessNAS Stage Width base_stage_width = [32, 16, 24, 40, 80, 96, 192, 320, 1280] input_channel = [make_divisible(base_stage_width[0] * width_mult, 8) for width_mult in self.width_mult_list] first_block_width = [make_divisible(base_stage_width[1] * width_mult, 8) for width_mult in self.width_mult_list] last_channel = [ make_divisible(base_stage_width[-1] * width_mult, 8) if width_mult > 1.0 else base_stage_width[-1] for width_mult in self.width_mult_list ] # first conv layer if len(input_channel) == 1: first_conv = QConvLayer( 3, max(input_channel), kernel_size=3, stride=2, use_bn=True, act_func='relu6', ops_order='weight_bn_act', w_bit=8, a_bit=-1, half_wave=False ) else: first_conv = DynamicQConvLayer( in_channel_list=int2list(3, len(input_channel)), out_channel_list=input_channel, kernel_size=3, stride=2, act_func='relu6', w_bit=8, a_bit=8, half_wave=False ) # first block if len(first_block_width) == 1: first_block_conv = MBInvertedQConvLayer( in_channels=max(input_channel), out_channels=max(first_block_width), kernel_size=3, stride=1, expand_ratio=1, act_func='relu6', pw_w_bit=8, pw_a_bit=8, dw_w_bit=8, dw_a_bit=8 ) else: first_block_conv = DynamicMBQConvLayer( in_channel_list=input_channel, out_channel_list=first_block_width, kernel_size_list=3, expand_ratio_list=1, stride=1, act_func='relu6', # pw_w_bit=4, pw_a_bit=4, dw_w_bit=4, dw_a_bit=4 pw_w_bit=8, pw_a_bit=8, dw_w_bit=8, dw_a_bit=8 ) first_block = MobileInvertedResidualBlock(first_block_conv, None) input_channel = first_block_width # inverted residual blocks self.block_group_info = [] blocks = [first_block] _block_index = 1 stride_stages = [2, 2, 2, 1, 2, 1] if depth_list is None: n_block_list = [2, 3, 4, 3, 3, 1] self.depth_list = [4] else: n_block_list = [max(self.depth_list)] * 5 + [1] width_list = [] for base_width in base_stage_width[2:-1]: width = [make_divisible(base_width * width_mult, 8) for width_mult in self.width_mult_list] width_list.append(width) for width, n_block, s in zip(width_list, n_block_list, stride_stages): self.block_group_info.append( ([_block_index + i for i in range(n_block)], width) ) _block_index += n_block output_channel = width for i in range(n_block): if i == 0: stride = s else: stride = 1 mobile_inverted_conv = DynamicMBQConvLayer( in_channel_list=int2list(input_channel, 1), out_channel_list=int2list(output_channel, 1), kernel_size_list=ks_list, expand_ratio_list=expand_ratio_list, stride=stride, act_func='relu6', # pw_w_bit=4, pw_a_bit=4, dw_w_bit=4, dw_a_bit=4 pw_w_bit=8, pw_a_bit=8, dw_w_bit=8, dw_a_bit=8 ) if stride == 1 and input_channel == output_channel: shortcut = IdentityLayer(input_channel, input_channel) else: shortcut = None mb_inverted_block = MobileInvertedResidualBlock(mobile_inverted_conv, shortcut) blocks.append(mb_inverted_block) input_channel = output_channel # 1x1_conv before global average pooling if len(last_channel) == 1: feature_mix_layer = QConvLayer( max(input_channel), max(last_channel), kernel_size=1, use_bn=True, act_func='relu6', w_bit=8, a_bit=8, half_wave=False ) classifier = QLinearLayer(max(last_channel), n_classes, dropout_rate=dropout_rate, w_bit=8, a_bit=8) else: feature_mix_layer = DynamicMBQConvLayer( in_channel_list=input_channel, out_channel_list=last_channel, kernel_size=1, stride=1, act_func='relu6', # pw_w_bit=4, pw_a_bit=4, dw_w_bit=4, dw_a_bit=4 pw_w_bit=8, pw_a_bit=8, dw_w_bit=8, dw_a_bit=8, half_wave=False ) classifier = DynamicQLinearLayer( in_features_list=last_channel, out_features=n_classes, bias=True, dropout_rate=dropout_rate, w_bit=8, a_bit=8 ) super(DynamicQuantizedProxylessNASNets, self).__init__(first_conv, blocks, feature_mix_layer, classifier) # set bn param self.set_bn_param(momentum=bn_param[0], eps=bn_param[1]) # runtime_depth self.runtime_depth = [ len(block_idx) for block_idx, _ in self.block_group_info ] if self.depth_ensemble_list is not None: self.depth_ensemble_list.sort()
def __init__(self, in_channel_list, out_channel_list, kernel_size_list=3, expand_ratio_list=6, stride=1, act_func='relu6', pw_w_bit=-1, pw_a_bit=-1, dw_w_bit=-1, dw_a_bit=-1, **kwargs): super(DynamicMBQConvLayer, self).__init__() self.in_channel_list = in_channel_list self.out_channel_list = out_channel_list self.kernel_size_list = int2list(kernel_size_list, 1) self.expand_ratio_list = int2list(expand_ratio_list, 1) self.stride = stride self.act_func = act_func # build modules max_middle_channel = round( max(self.in_channel_list) * max(self.expand_ratio_list)) if max(self.expand_ratio_list) == 1: self.inverted_bottleneck = None else: self.inverted_bottleneck = nn.Sequential( OrderedDict([ ('conv', DynamicPointQConv2d(max(self.in_channel_list), max_middle_channel, w_bit=pw_w_bit, a_bit=pw_a_bit, half_wave=False)), ('bn', DynamicBatchNorm2d(max_middle_channel)), ('act', build_activation(self.act_func, inplace=True)), ])) self.depth_conv = nn.Sequential( OrderedDict([('conv', DynamicSeparableQConv2d(max_middle_channel, self.kernel_size_list, self.stride, w_bit=dw_w_bit, a_bit=dw_a_bit)), ('bn', DynamicBatchNorm2d(max_middle_channel)), ('act', build_activation(self.act_func, inplace=True))])) self.point_linear = nn.Sequential( OrderedDict([ ('conv', DynamicPointQConv2d(max_middle_channel, max(self.out_channel_list), w_bit=pw_w_bit, a_bit=pw_a_bit)), ('bn', DynamicBatchNorm2d(max(self.out_channel_list))), ])) self.active_kernel_size = max(self.kernel_size_list) self.active_expand_ratio = max(self.expand_ratio_list) self.active_out_channel = max(self.out_channel_list)
def __init__(self, n_classes=1000, bn_param=(0.1, 1e-5), dropout_rate=0.1, base_stage_width=None, width_mult_list=1.0, ks_list=3, expand_ratio_list=6, depth_list=4): self.width_mult_list = int2list(width_mult_list, 1) self.ks_list = int2list(ks_list, 1) self.expand_ratio_list = int2list(expand_ratio_list, 1) self.depth_list = int2list(depth_list, 1) self.base_stage_width = base_stage_width self.width_mult_list.sort() self.ks_list.sort() self.expand_ratio_list.sort() self.depth_list.sort() base_stage_width = [16, 24, 40, 80, 112, 160, 960, 1280] final_expand_width = [ make_divisible(base_stage_width[-2] * max(self.width_mult_list), 8) for _ in self.width_mult_list ] self.final_expand_width = final_expand_width last_channel = [ make_divisible(base_stage_width[-1] * max(self.width_mult_list), 8) for _ in self.width_mult_list ] self.last_channel = last_channel # stride_stages = [1, 2, 2, 2, 1, 2] stride_stages = [1, 2, 2, 2, 1, 1] act_stages = ['relu', 'relu', 'relu', 'h_swish', 'h_swish', 'h_swish'] se_stages = [False, False, True, False, True, True] if depth_list is None: n_block_list = [1, 2, 3, 4, 2, 3] self.depth_list = [4, 4] print('Use MobileNetV3 Depth Setting') else: n_block_list = [1] + [max(self.depth_list)] * 5 width_list = [] for base_width in base_stage_width[:-2]: width = [ make_divisible(base_width * width_mult, 8) for width_mult in self.width_mult_list ] width_list.append(width) input_channel = width_list[0] # first conv layer # if width_mult_list has only one elem if len(set(input_channel)) == 1: first_conv = ConvLayer(3, max(input_channel), kernel_size=3, stride=2, act_func='h_swish') first_block_conv = MBInvertedConvLayer( in_channels=max(input_channel), out_channels=max(input_channel), kernel_size=3, stride=stride_stages[0], expand_ratio=1, act_func=act_stages[0], use_se=se_stages[0], ) else: first_conv = DynamicConvLayer( in_channel_list=int2list(3, len(input_channel)), out_channel_list=input_channel, kernel_size=3, stride=2, act_func='h_swish', ) first_block_conv = DynamicMBConvLayer( in_channel_list=input_channel, out_channel_list=input_channel, kernel_size_list=3, expand_ratio_list=1, stride=stride_stages[0], act_func=act_stages[0], use_se=se_stages[0], ) first_block = MobileInvertedResidualBlock( first_block_conv, IdentityLayer(input_channel, input_channel)) # inverted residual blocks self.block_group_info = [] blocks = [first_block] _block_index = 1 feature_dim = input_channel for width, n_block, s, act_func, use_se in zip(width_list[1:], n_block_list[1:], stride_stages[1:], act_stages[1:], se_stages[1:]): self.block_group_info.append( [_block_index + i for i in range(n_block)]) _block_index += n_block output_channel = width for i in range(n_block): if i == 0: stride = s else: stride = 1 mobile_inverted_conv = DynamicMBConvLayer( in_channel_list=feature_dim, out_channel_list=output_channel, kernel_size_list=ks_list, expand_ratio_list=expand_ratio_list, stride=stride, act_func=act_func, use_se=use_se, ) if stride == 1 and feature_dim == output_channel: shortcut = IdentityLayer(feature_dim, feature_dim) else: shortcut = None blocks.append( MobileInvertedResidualBlock(mobile_inverted_conv, shortcut)) feature_dim = output_channel # final expand layer, feature mix layer & classifier if len(final_expand_width) == 1: final_expand_layer = ConvLayer(max(feature_dim), max(final_expand_width), kernel_size=1, act_func='h_swish') feature_mix_layer = ConvLayer( max(final_expand_width), max(last_channel), kernel_size=1, bias=False, use_bn=False, act_func='h_swish', ) else: final_expand_layer = DynamicConvLayer( in_channel_list=feature_dim, out_channel_list=final_expand_width, kernel_size=1, act_func='h_swish') feature_mix_layer = DynamicConvLayer( in_channel_list=final_expand_width, out_channel_list=last_channel, kernel_size=1, use_bn=False, act_func='h_swish', ) if len(set(last_channel)) == 1: classifier = LinearLayer(max(last_channel), n_classes, dropout_rate=dropout_rate) else: classifier = DynamicLinearLayer(in_features_list=last_channel, out_features=n_classes, bias=True, dropout_rate=dropout_rate) super(OFAMobileNetV3, self).__init__(first_conv, blocks, final_expand_layer, feature_mix_layer, classifier) # set bn param self.set_bn_param(momentum=bn_param[0], eps=bn_param[1]) # runtime_depth self.runtime_depth = [ len(block_idx) for block_idx in self.block_group_info ]
def __init__(self, n_classes=1000, bn_param=(0.1, 1e-5), dropout_rate=0.1, base_stage_width=None, width_mult_list=1.0, ks_list=3, expand_ratio_list=6, depth_list=4): """ Args: n_classes: 分类类数 bn_param: bn参数 dropout_rate: 用在哪些层里面呢 width_mult_list: 在单层layer重复一些操作[~~网络基础宽度缩放 X 并不是~~] ks_list: 卷积核的候选大小 expand_ratio_list: 网络宽度/channel数的扩大倍数 depth_list: 网络深度/layer的重复/堆叠次数 """ # int2list 将列表,元组,整数都变为一个列表 self.width_mult_list = int2list(width_mult_list, 1) self.ks_list = int2list(ks_list, 1) self.expand_ratio_list = int2list(expand_ratio_list, 1) self.depth_list = int2list(depth_list, 1) self.base_stage_width = base_stage_width self.width_mult_list.sort() self.ks_list.sort() self.expand_ratio_list.sort() self.depth_list.sort() base_stage_width = [16, 24, 40, 80, 112, 160, 960, 1280] # make_divisible 使得卷积channel数为8的倍数,并以8为基底3舍4入 final_expand_width = [ make_divisible(base_stage_width[-2] * max(self.width_mult_list), 8) for _ in self.width_mult_list ] last_channel = [ make_divisible(base_stage_width[-1] * max(self.width_mult_list), 8) for _ in self.width_mult_list ] # 步长,决定下采样; 激活函数; se指的是,难道是self-attention stride_stages = [1, 2, 2, 2, 1, 2] act_stages = ['relu', 'relu', 'relu', 'h_swish', 'h_swish', 'h_swish'] se_stages = [False, False, True, False, True, True] # 深度的配置除了第一个卷积,其他五层都可能expand if depth_list is None: n_block_list = [1, 2, 3, 4, 2, 3] self.depth_list = [4, 4] print('Use MobileNetV3 Depth Setting') else: n_block_list = [1] + [max(self.depth_list)] * 5 # 宽度/channel数配置 width_list = [] for base_width in base_stage_width[:-2]: width = [ make_divisible(base_width * width_mult, 8) for width_mult in self.width_mult_list ] width_list.append(width) # width_list好想和我想象的功能不太一样,我以为是初始channel的expand倍数 input_channel = width_list[0] # first conv layer if len(set(input_channel)) == 1: first_conv = ConvLayer(3, max(input_channel), kernel_size=3, stride=2, act_func='h_swish') first_block_conv = MBInvertedConvLayer( in_channels=max(input_channel), out_channels=max(input_channel), kernel_size=3, stride=stride_stages[0], expand_ratio=1, act_func=act_stages[0], use_se=se_stages[0], ) else: first_conv = DynamicConvLayer( in_channel_list=int2list(3, len(input_channel)), out_channel_list=input_channel, kernel_size=3, stride=2, act_func='h_swish', ) first_block_conv = DynamicMBConvLayer( in_channel_list=input_channel, out_channel_list=input_channel, kernel_size_list=3, expand_ratio_list=1, stride=stride_stages[0], act_func=act_stages[0], use_se=se_stages[0], ) first_block = MobileInvertedResidualBlock( first_block_conv, IdentityLayer(input_channel, input_channel)) # inverted residual blocks self.block_group_info = [] blocks = [first_block] _block_index = 1 feature_dim = input_channel for width, n_block, s, act_func, use_se in zip(width_list[1:], n_block_list[1:], stride_stages[1:], act_stages[1:], se_stages[1:]): self.block_group_info.append( [_block_index + i for i in range(n_block)]) _block_index += n_block output_channel = width for i in range(n_block): if i == 0: stride = s else: stride = 1 mobile_inverted_conv = DynamicMBConvLayer( in_channel_list=feature_dim, out_channel_list=output_channel, kernel_size_list=ks_list, expand_ratio_list=expand_ratio_list, stride=stride, act_func=act_func, use_se=use_se, ) if stride == 1 and feature_dim == output_channel: shortcut = IdentityLayer(feature_dim, feature_dim) else: shortcut = None blocks.append( MobileInvertedResidualBlock(mobile_inverted_conv, shortcut)) feature_dim = output_channel # final expand layer, feature mix layer & classifier if len(final_expand_width) == 1: final_expand_layer = ConvLayer(max(feature_dim), max(final_expand_width), kernel_size=1, act_func='h_swish') feature_mix_layer = ConvLayer( max(final_expand_width), max(last_channel), kernel_size=1, bias=False, use_bn=False, act_func='h_swish', ) else: final_expand_layer = DynamicConvLayer( in_channel_list=feature_dim, out_channel_list=final_expand_width, kernel_size=1, act_func='h_swish') feature_mix_layer = DynamicConvLayer( in_channel_list=final_expand_width, out_channel_list=last_channel, kernel_size=1, use_bn=False, act_func='h_swish', ) if len(set(last_channel)) == 1: classifier = LinearLayer(max(last_channel), n_classes, dropout_rate=dropout_rate) else: classifier = DynamicLinearLayer(in_features_list=last_channel, out_features=n_classes, bias=True, dropout_rate=dropout_rate) super(OFAMobileNetV3, self).__init__(first_conv, blocks, final_expand_layer, feature_mix_layer, classifier) # set bn param self.set_bn_param(momentum=bn_param[0], eps=bn_param[1]) # runtime_depth self.runtime_depth = [ len(block_idx) for block_idx in self.block_group_info ]