def _maybe_squeeze_and_excite( params, input_filters_or_mask, inner_activation, gating_activation, enabled ): """Generate a squeeze-and-excite layer or identity function.""" def make_squeeze_and_excite(): return _squeeze_and_excite( params=params, input_filters_or_mask=input_filters_or_mask, inner_activation=inner_activation, gating_activation=gating_activation) # We use explicit bool comparisons to make sure a user doesn't pass in a # bad configuration like enabled=OneOf([False, True, 42]). if isinstance(enabled, bool): return make_squeeze_and_excite() if enabled else layers.Identity() elif isinstance(enabled, schema.OneOf): options = [] for choice in enabled.choices: options.append(make_squeeze_and_excite() if choice else layers.Identity()) return layers.maybe_switch_v2(enabled.mask, options) else: raise ValueError('Unsupported value for "enabled": {}'.format(enabled))
def _build_residual_spec(params, layer_spec, input_filters, output_filters, filters_base): """Builds a layers.Layer implementation for ResidualSpec specification.""" def can_optimize_residual_spec(layer): """Returns true if we can replace residual layer with an identity.""" can_optimize_oneof = (isinstance(layer, schema.OneOf) and len(layer.choices) == 1 and isinstance( layer.choices[0], basic_specs.ZeroSpec)) can_optimize_zerospec = isinstance(layer, basic_specs.ZeroSpec) return can_optimize_oneof or can_optimize_zerospec if can_optimize_residual_spec(layer_spec.layer): return layers.Identity() layer = _build_layer(params, layer_spec.layer, input_filters, output_filters, filters_base) return layers.ParallelSum([layer, layers.Identity()])
def _squeeze_and_excite(params, input_filters_or_mask, inner_activation, gating_activation): """Generate a squeeze-and-excite layer.""" # We provide two code paths: # 1. For the case where the number of input filters is known at graph # construction time, and input_filters_or_mask is an int. This typically # happens during stand-alone model training. # 2. For the case where the number of input filters is not known until # runtime, and input_filters_or_mask is a 1D float tensor. This often # happens during an architecture search. if isinstance(input_filters_or_mask, int): input_filters = input_filters_or_mask hidden_filters = search_space_utils.make_divisible( input_filters * _SQUEEZE_AND_EXCITE_RATIO, divisor=params['filters_base']) return layers.ParallelProduct([ layers.Identity(), layers.Sequential([ layers.GlobalAveragePool(keepdims=True), layers.Conv2D( filters=hidden_filters, kernel_size=(1, 1), kernel_initializer=params['kernel_initializer'], kernel_regularizer=params['kernel_regularizer'], use_bias=True), inner_activation, layers.Conv2D( filters=input_filters, kernel_size=(1, 1), kernel_initializer=params['kernel_initializer'], kernel_regularizer=params['kernel_regularizer'], use_bias=True), gating_activation, ]), ]) else: input_mask = input_filters_or_mask input_filters = tf.reduce_sum(input_mask) hidden_filters = search_space_utils.tf_make_divisible( input_filters * _SQUEEZE_AND_EXCITE_RATIO, divisor=params['filters_base']) max_input_filters = int(input_mask.shape[0]) max_hidden_filters = search_space_utils.make_divisible( max_input_filters * _SQUEEZE_AND_EXCITE_RATIO, divisor=params['filters_base']) hidden_mask = tf.sequence_mask( hidden_filters, max_hidden_filters, dtype=tf.float32) return layers.ParallelProduct([ layers.Identity(), layers.Sequential([ layers.GlobalAveragePool(keepdims=True), layers.MaskedConv2D( input_mask=input_mask, output_mask=hidden_mask, kernel_size=(1, 1), kernel_initializer=params['kernel_initializer'], kernel_regularizer=params['kernel_regularizer'], use_bias=True), inner_activation, layers.MaskedConv2D( input_mask=hidden_mask, output_mask=input_mask, kernel_size=(1, 1), kernel_initializer=params['kernel_initializer'], kernel_regularizer=params['kernel_regularizer'], use_bias=True), gating_activation, ]) ])