def scale_conv_tower_spec(model_spec, multipliers, base=None): """Scale all the filters in `model_spec`, rounding to multiples of `base`. Args: model_spec: A ConvTowerSpec namedtuple. multipliers: float or list/tuple of floats, the possible filter multipliers. base: Positive integer, all filter sizes must be a multiple of this value. Returns: A new basic_specs.ConvTowerSpec. """ if base is None: base = model_spec.filters_base if isinstance(multipliers, (int, float)): multipliers = (multipliers, ) def update(oneof): """Compute version of `oneof` whose filters have been scaled up/down.""" if oneof.tag != basic_specs.FILTERS_TAG: return oneof all_filters = set() for filters in oneof.choices: if isinstance(filters, basic_specs.FilterMultiplier): # Skip scaling because the filter sizes are relative, not absolute. all_filters.add(filters) else: for mult in multipliers: all_filters.add(scale_filters(filters, mult, base)) return schema.OneOf(sorted(all_filters), basic_specs.FILTERS_TAG) result = schema.map_oneofs(update, model_spec) return basic_specs.ConvTowerSpec(result.blocks, base)
def test_scale_conv_tower_spec_filter_multipliers(self): model_spec = basic_specs.ConvTowerSpec(blocks=[ basic_specs.block(layers=[ schema.OneOf([ basic_specs.FilterMultiplier(3.0), basic_specs.FilterMultiplier(6.0) ], basic_specs.FILTERS_TAG) ], filters=48) ], filters_base=8) scaled_spec = search_space_utils.scale_conv_tower_spec( model_spec, multipliers=(0.5, 1, 2), base=8) # FilterMultiplier objects should not be affected by the scaling function. self.assertEqual(scaled_spec.blocks[0].layers[0].choices, [ basic_specs.FilterMultiplier(3.0), basic_specs.FilterMultiplier(6.0), ]) # However, absolute filter sizes should still be scaled. self.assertEqual(scaled_spec.blocks[0].filters.choices, [24, 48, 96])
def _mobilenet_v3_large_search_base(block_filters_multipliers, expansion_multipliers, search_squeeze_and_excite=False, always_use_relu=False, use_relative_expansion_filters=False, base_filters=(16, 24, 40, 80, 112, 160, 960, 1280)): """Experimental search space built around MobileNet V3 - Large model.""" swish6_or_relu = RELU if always_use_relu else SWISH6 def block(layers, filters): all_filters = sorted({ search_space_utils.scale_filters(filters, multiplier, base=8) for multiplier in block_filters_multipliers }) return basic_specs.Block(layers=layers, filters=choose_filters(all_filters)) residual = ResidualSpec global_avg_pool = GlobalAveragePoolSpec def initial_conv(s, bn=True): return ConvSpec(kernel_size=schema.OneOf([3, 5], basic_specs.OP_TAG), strides=s, use_batch_norm=bn) def sepconv(s, act): return SeparableConvSpec(kernel_size=schema.OneOf([3, 5, 7], basic_specs.OP_TAG), strides=s, activation=act) def bneck(input_size, se, s, act): """Construct a DepthwiseBottleneckSpec namedtuple.""" if use_relative_expansion_filters: expansion_filters = sorted({ basic_specs.FilterMultiplier(expansion) for expansion in expansion_multipliers }) else: expansion_filters = sorted({ search_space_utils.scale_filters(input_size, expansion, base=8) for expansion in expansion_multipliers }) if search_squeeze_and_excite: # Replace the default value of the argument 'se' with a OneOf node. se = schema.OneOf([False, True], basic_specs.OP_TAG) return DepthwiseBottleneckSpec( kernel_size=schema.OneOf([3, 5, 7], basic_specs.OP_TAG), expansion_filters=choose_filters(expansion_filters), use_squeeze_and_excite=se, strides=s, activation=act) def optional(layer): return schema.OneOf([layer, basic_specs.ZeroSpec()], basic_specs.OP_TAG) blocks = [ # Stem block([ initial_conv(s=2), swish6_or_relu, residual(optional(sepconv(s=1, act=RELU))), DetectionEndpointSpec(), ], filters=base_filters[0]), # Body block([ bneck(input_size=base_filters[0], se=False, s=2, act=RELU), residual( optional( bneck(input_size=base_filters[1], se=False, s=1, act=RELU))), residual( optional( bneck(input_size=base_filters[1], se=False, s=1, act=RELU))), residual( optional( bneck(input_size=base_filters[1], se=False, s=1, act=RELU))), DetectionEndpointSpec(), ], filters=base_filters[1]), block([ bneck(input_size=base_filters[1], se=True, s=2, act=RELU), residual( optional( bneck(input_size=base_filters[2], se=True, s=1, act=RELU))), residual( optional( bneck(input_size=base_filters[2], se=True, s=1, act=RELU))), residual( optional( bneck(input_size=base_filters[2], se=True, s=1, act=RELU))), DetectionEndpointSpec(), ], base_filters[2]), block([ bneck( input_size=base_filters[2], se=False, s=2, act=swish6_or_relu), residual( optional( bneck(input_size=base_filters[3], se=False, s=1, act=swish6_or_relu))), residual( optional( bneck(input_size=base_filters[3], se=False, s=1, act=swish6_or_relu))), residual( optional( bneck(input_size=base_filters[3], se=False, s=1, act=swish6_or_relu))), ], base_filters[3]), block([ bneck(input_size=base_filters[3], se=True, s=1, act=swish6_or_relu), residual( optional( bneck(input_size=base_filters[4], se=True, s=1, act=swish6_or_relu))), residual( optional( bneck(input_size=base_filters[4], se=True, s=1, act=swish6_or_relu))), residual( optional( bneck(input_size=base_filters[4], se=True, s=1, act=swish6_or_relu))), DetectionEndpointSpec(), ], base_filters[4]), block([ bneck(input_size=base_filters[4], se=True, s=2, act=swish6_or_relu), residual( optional( bneck(input_size=base_filters[5], se=True, s=1, act=swish6_or_relu))), residual( optional( bneck(input_size=base_filters[5], se=True, s=1, act=swish6_or_relu))), residual( optional( bneck(input_size=base_filters[5], se=True, s=1, act=swish6_or_relu))), DetectionEndpointSpec(), ], base_filters[5]), # Head block([ ConvSpec(kernel_size=1, strides=1, use_batch_norm=True), swish6_or_relu, global_avg_pool(), ], base_filters[6]), block([ ConvSpec(kernel_size=1, strides=1, use_batch_norm=False), swish6_or_relu, ], base_filters[7]), ] return basic_specs.ConvTowerSpec(blocks=blocks, filters_base=8)
def mobilenet_multi_max(): """Specification for MobileNet Multi-MAX model. From the paper: "Discovering Multi-Hardware Mobile Models via Architecture Search" Returns: A ConvTowerSpec namedtuple for the Mobilenet Multi-MAX model. """ block = basic_specs.block residual = ResidualSpec global_avg_pool = GlobalAveragePoolSpec def conv(kernel, s, bn=True): return ConvSpec(kernel_size=kernel, strides=s, use_batch_norm=bn) def bneck(kernel, exp_size, s): return DepthwiseBottleneckSpec(kernel_size=kernel, expansion_filters=choose_filters( [exp_size]), use_squeeze_and_excite=False, strides=s, activation=RELU) blocks = [ # Stem block([ conv(kernel=3, s=2), RELU, DetectionEndpointSpec(), ], filters=32), # Body block([ bneck(kernel=3, exp_size=96, s=2), DetectionEndpointSpec(), ], filters=32), block([ bneck(kernel=5, exp_size=192, s=2), residual(bneck(kernel=3, exp_size=128, s=1)), residual(bneck(kernel=3, exp_size=128, s=1)), DetectionEndpointSpec(), ], 64), block([ bneck(kernel=5, exp_size=384, s=2), residual(bneck(kernel=3, exp_size=512, s=1)), residual(bneck(kernel=3, exp_size=384, s=1)), residual(bneck(kernel=3, exp_size=384, s=1)), ], 128), block([ bneck(kernel=3, exp_size=768, s=1), residual(bneck(kernel=3, exp_size=384, s=1)), DetectionEndpointSpec(), ], 128), block([ bneck(kernel=3, exp_size=768, s=2), residual(bneck(kernel=5, exp_size=640, s=1)), residual(bneck(kernel=3, exp_size=800, s=1)), residual(bneck(kernel=5, exp_size=640, s=1)), DetectionEndpointSpec(), ], 160), # Head block([ conv(kernel=1, s=1), RELU, global_avg_pool(), ], 960), block([ conv(kernel=1, s=1, bn=False), RELU, ], 1280), ] return basic_specs.ConvTowerSpec(blocks=blocks, filters_base=32)
def _mobilenet_v3_large_base(use_relative_filter_sizes): """Specification for MobileNet V3 - Large model.""" block = basic_specs.block residual = ResidualSpec global_avg_pool = GlobalAveragePoolSpec def conv(kernel, s, bn=True): return ConvSpec(kernel_size=kernel, strides=s, use_batch_norm=bn) def sepconv(kernel, s, act): return SeparableConvSpec(kernel_size=kernel, strides=s, activation=act) def bneck(kernel, input_size, exp_size, se, s, act): if use_relative_filter_sizes: # The expanded filter size will be computed relative to the input filter # size. Separate logic in the model builder code ensures that the expanded # filter size will be an integer multiple of `model_spec.filters_base`. filters = basic_specs.FilterMultiplier(exp_size / input_size) else: filters = exp_size return DepthwiseBottleneckSpec(kernel_size=kernel, expansion_filters=choose_filters( [filters]), use_squeeze_and_excite=se, strides=s, activation=act) blocks = [ # Stem block([ conv(kernel=3, s=2), SWISH6, residual(sepconv(kernel=3, s=1, act=RELU)), DetectionEndpointSpec(), ], filters=16), # Body block([ bneck( kernel=3, input_size=16, exp_size=64, se=False, s=2, act=RELU), residual( bneck(kernel=3, input_size=24, exp_size=72, se=False, s=1, act=RELU)), DetectionEndpointSpec(), ], filters=24), block([ bneck(kernel=5, input_size=24, exp_size=72, se=True, s=2, act=RELU), residual( bneck(kernel=5, input_size=40, exp_size=120, se=True, s=1, act=RELU)), residual( bneck(kernel=5, input_size=40, exp_size=120, se=True, s=1, act=RELU)), DetectionEndpointSpec(), ], 40), block([ bneck(kernel=3, input_size=40, exp_size=240, se=False, s=2, act=SWISH6), residual( bneck(kernel=3, input_size=80, exp_size=200, se=False, s=1, act=SWISH6)), residual( bneck(kernel=3, input_size=80, exp_size=184, se=False, s=1, act=SWISH6)), residual( bneck(kernel=3, input_size=80, exp_size=184, se=False, s=1, act=SWISH6)), ], 80), block([ bneck(kernel=3, input_size=80, exp_size=480, se=True, s=1, act=SWISH6), residual( bneck(kernel=3, input_size=112, exp_size=672, se=True, s=1, act=SWISH6)), DetectionEndpointSpec(), ], 112), block([ bneck(kernel=5, input_size=112, exp_size=672, se=True, s=2, act=SWISH6), residual( bneck(kernel=5, input_size=160, exp_size=960, se=True, s=1, act=SWISH6)), residual( bneck(kernel=5, input_size=160, exp_size=960, se=True, s=1, act=SWISH6)), DetectionEndpointSpec(), ], 160), # Head block([ conv(kernel=1, s=1), SWISH6, global_avg_pool(), ], 960), block([ conv(kernel=1, s=1, bn=False), SWISH6, ], 1280), ] return basic_specs.ConvTowerSpec(blocks=blocks, filters_base=8)
def _proxylessnas_search_base(base_filters, collapse_shared_ops=False): """Reproduction of ProxylessNAS search space with custom output filters.""" block = basic_specs.block residual = ResidualSpec global_avg_pool = GlobalAveragePoolSpec def conv(kernel, s, bn=True): return ConvSpec(kernel_size=kernel, strides=s, use_batch_norm=bn) def sepconv(s): choices = [] for kernel_size in (3, 5, 7): choices.append( SeparableConvSpec(kernel_size=kernel_size, strides=s, activation=RELU)) return schema.OneOf(choices, basic_specs.OP_TAG) def bneck(s, skippable): """Construct a spec for an inverted bottleneck layer.""" possible_filter_multipliers = [3.0, 6.0] possible_kernel_sizes = [3, 5, 7] choices = [] if collapse_shared_ops: kernel_size = schema.OneOf(possible_kernel_sizes, basic_specs.OP_TAG) expansion_filters = schema.OneOf([ basic_specs.FilterMultiplier(multiplier) for multiplier in possible_filter_multipliers ], basic_specs.FILTERS_TAG) choices.append( DepthwiseBottleneckSpec(kernel_size=kernel_size, expansion_filters=expansion_filters, use_squeeze_and_excite=False, strides=s, activation=RELU)) else: for multiplier in possible_filter_multipliers: for kernel_size in possible_kernel_sizes: choices.append( DepthwiseBottleneckSpec( kernel_size=kernel_size, expansion_filters=basic_specs.FilterMultiplier( multiplier), use_squeeze_and_excite=False, strides=s, activation=RELU)) if skippable: choices.append(basic_specs.ZeroSpec()) return schema.OneOf(choices, basic_specs.OP_TAG) blocks = [ # Stem block([ conv(kernel=3, s=2), RELU, ], filters=base_filters[0]), block( [ # NOTE: The original MobileNet V2 paper used an inverted bottleneck # layer with an expansion factor of 1 here. Under the definition used # by the paper, an inverted bottleneck layer with an expansion factor # of 1 was equivalent to a depthwise separable convolution, which is # what we use. (Our definition of an inverted bottleneck layer with # an expansion factor of 1 is slightly different from the one used in # the MobileNet paper.) sepconv(s=1), DetectionEndpointSpec(), ], filters=base_filters[1]), # Body block([ bneck(s=2, skippable=False), residual(bneck(s=1, skippable=True)), residual(bneck(s=1, skippable=True)), residual(bneck(s=1, skippable=True)), DetectionEndpointSpec(), ], filters=base_filters[2]), block([ bneck(s=2, skippable=False), residual(bneck(s=1, skippable=True)), residual(bneck(s=1, skippable=True)), residual(bneck(s=1, skippable=True)), DetectionEndpointSpec(), ], filters=base_filters[3]), block([ bneck(s=2, skippable=False), residual(bneck(s=1, skippable=True)), residual(bneck(s=1, skippable=True)), residual(bneck(s=1, skippable=True)), ], filters=base_filters[4]), block([ bneck(s=1, skippable=False), residual(bneck(s=1, skippable=True)), residual(bneck(s=1, skippable=True)), residual(bneck(s=1, skippable=True)), DetectionEndpointSpec(), ], filters=base_filters[5]), block([ bneck(s=2, skippable=False), residual(bneck(s=1, skippable=True)), residual(bneck(s=1, skippable=True)), residual(bneck(s=1, skippable=True)), DetectionEndpointSpec(), ], filters=base_filters[6]), block([ bneck(s=1, skippable=False), DetectionEndpointSpec(), ], filters=base_filters[7]), # Head block([ conv(kernel=1, s=1), RELU, global_avg_pool(), ], filters=base_filters[8]), ] return basic_specs.ConvTowerSpec(blocks=blocks, filters_base=8)
def _make_single_layer_model(layer): return basic_specs.ConvTowerSpec(blocks=[ basic_specs.Block(layers=[layer], filters=32), ], filters_base=8)