def get_efficientnet_v2s_kwargs(channel_multiplier=1.0, depth_multiplier=1.0, **kwargs): """ Creates an EfficientNet-V2s model NOTE: this is a preliminary definition based on paper, awaiting official code release for details and weights Ref impl: Paper: `EfficientNetV2: Smaller Models and Faster Training` - https://arxiv.org/abs/2104.00298 """ arch_def = [ # FIXME it's not clear if the FusedMBConv layers have SE enabled for the Small variant, # Table 4 suggests no. 23.94M params w/o, 23.98 with which is closer to 24M. # ['er_r2_k3_s1_e1_c24_se0.25'], # ['er_r4_k3_s2_e4_c48_se0.25'], # ['er_r4_k3_s2_e4_c64_se0.25'], ['er_r2_k3_s1_e1_c24'], ['er_r4_k3_s2_e4_c48'], ['er_r4_k3_s2_e4_c64'], ['ir_r6_k3_s2_e4_c128_se0.25'], ['ir_r9_k3_s1_e6_c160_se0.25'], ['ir_r15_k3_s2_e6_c272_se0.25'], ] model_kwargs = dict( block_args=decode_arch_def(arch_def, depth_multiplier), num_features=round_channels(1792, channel_multiplier, 8, None), stem_size=24, channel_multiplier=channel_multiplier, norm_kwargs=resolve_bn_args(kwargs), act_layer=resolve_act_layer(kwargs, 'silu'), # FIXME this is an assumption, paper does not mention **kwargs, ) return model_kwargs
def get_efficientnet_kwargs(channel_multiplier=1.0, depth_multiplier=1.0, drop_rate=0.2): """Creates an EfficientNet model. Ref impl: https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/efficientnet_model.py Paper: https://arxiv.org/abs/1905.11946 EfficientNet params name: (channel_multiplier, depth_multiplier, resolution, dropout_rate) 'efficientnet-b0': (1.0, 1.0, 224, 0.2), 'efficientnet-b1': (1.0, 1.1, 240, 0.2), 'efficientnet-b2': (1.1, 1.2, 260, 0.3), 'efficientnet-b3': (1.2, 1.4, 300, 0.3), 'efficientnet-b4': (1.4, 1.8, 380, 0.4), 'efficientnet-b5': (1.6, 2.2, 456, 0.4), 'efficientnet-b6': (1.8, 2.6, 528, 0.5), 'efficientnet-b7': (2.0, 3.1, 600, 0.5), 'efficientnet-b8': (2.2, 3.6, 672, 0.5), 'efficientnet-l2': (4.3, 5.3, 800, 0.5), Args: channel_multiplier: multiplier to number of channels per layer depth_multiplier: multiplier to number of repeats per stage """ arch_def = [ ['ds_r1_k3_s1_e1_c16_se0.25'], ['ir_r2_k3_s2_e6_c24_se0.25'], ['ir_r2_k5_s2_e6_c40_se0.25'], ['ir_r3_k3_s2_e6_c80_se0.25'], ['ir_r3_k5_s1_e6_c112_se0.25'], ['ir_r4_k5_s2_e6_c192_se0.25'], ['ir_r1_k3_s1_e6_c320_se0.25'], ] model_kwargs = dict( block_args=decode_arch_def(arch_def, depth_multiplier), num_features=round_channels(1280, channel_multiplier, 8, None), stem_size=32, channel_multiplier=channel_multiplier, act_layer=Swish, norm_kwargs={}, # TODO: check drop_rate=drop_rate, drop_path_rate=0.2, ) return model_kwargs