Ejemplo n.º 1
0
 def __init__(self, block_fn, block_repeats, kernel_size, strides,
              expand_ratio, in_filters, out_filters, is_output, width_scale,
              depth_scale):
     self.block_fn = block_fn
     self.block_repeats = round_repeats(block_repeats, depth_scale)
     self.kernel_size = kernel_size
     self.strides = strides
     self.expand_ratio = expand_ratio
     self.in_filters = nn_layers.round_filters(in_filters, width_scale)
     self.out_filters = nn_layers.round_filters(out_filters, width_scale)
     self.is_output = is_output
Ejemplo n.º 2
0
 def __init__(self, block_fn: str, block_repeats: int, kernel_size: int,
              strides: int, expand_ratio: float, in_filters: int,
              out_filters: int, is_output: bool, width_scale: float,
              depth_scale: float):
     self.block_fn = block_fn
     self.block_repeats = round_repeats(block_repeats, depth_scale)
     self.kernel_size = kernel_size
     self.strides = strides
     self.expand_ratio = expand_ratio
     self.in_filters = nn_layers.round_filters(in_filters, width_scale)
     self.out_filters = nn_layers.round_filters(out_filters, width_scale)
     self.is_output = is_output
Ejemplo n.º 3
0
def block_spec_decoder(
        specs: Dict[Any, Any],
        filter_size_scale: float,
        # Set to 1 for mobilenetv1.
        divisible_by: int = 8,
        finegrain_classification_mode: bool = True):
    """Decodes specs for a block.

  Args:
    specs: A `dict` specification of block specs of a mobilenet version.
    filter_size_scale: A `float` multiplier for the filter size for all
      convolution ops. The value must be greater than zero. Typical usage will
      be to set this value in (0, 1) to reduce the number of parameters or
      computation cost of the model.
    divisible_by: An `int` that ensures all inner dimensions are divisible by
      this number.
    finegrain_classification_mode: If True, the model will keep the last layer
      large even for small multipliers, following
      https://arxiv.org/abs/1801.04381.

  Returns:
    A list of `BlockSpec` that defines structure of the base network.
  """

    spec_name = specs['spec_name']
    block_spec_schema = specs['block_spec_schema']
    block_specs = specs['block_specs']

    if not block_specs:
        raise ValueError(
            'The block spec cannot be empty for {} !'.format(spec_name))

    if len(block_specs[0]) != len(block_spec_schema):
        raise ValueError('The block spec values {} do not match with '
                         'the schema {}'.format(block_specs[0],
                                                block_spec_schema))

    decoded_specs = []

    for s in block_specs:
        kw_s = dict(zip(block_spec_schema, s))
        decoded_specs.append(BlockSpec(**kw_s))

    # This adjustment applies to V2 and V3
    if (spec_name != 'MobileNetV1' and finegrain_classification_mode
            and filter_size_scale < 1.0):
        decoded_specs[-1].filters /= filter_size_scale

    for ds in decoded_specs:
        if ds.filters:
            ds.filters = nn_layers.round_filters(filters=ds.filters,
                                                 multiplier=filter_size_scale,
                                                 divisor=divisible_by,
                                                 min_depth=8)

    return decoded_specs
Ejemplo n.º 4
0
def block_spec_decoder(
    specs: Dict[Any, Any],
    filter_size_scale: float,
    divisible_by: int = 8) -> List[BlockSpec]:
  """Decodes specs for a block.

  Args:
    specs: A `dict` specification of block specs of a mobiledet version.
    filter_size_scale: A `float` multiplier for the filter size for all
      convolution ops. The value must be greater than zero. Typical usage will
      be to set this value in (0, 1) to reduce the number of parameters or
      computation cost of the model.
    divisible_by: An `int` that ensures all inner dimensions are divisible by
      this number.

  Returns:
    A list of `BlockSpec` that defines structure of the base network.
  """

  spec_name = specs['spec_name']
  block_spec_schema = specs['block_spec_schema']
  block_specs = specs['block_specs']

  if not block_specs:
    raise ValueError(
        'The block spec cannot be empty for {} !'.format(spec_name))

  if len(block_specs[0]) != len(block_spec_schema):
    raise ValueError('The block spec values {} do not match with '
                     'the schema {}'.format(block_specs[0], block_spec_schema))

  decoded_specs = []

  for s in block_specs:
    kw_s = dict(zip(block_spec_schema, s))
    decoded_specs.append(BlockSpec(**kw_s))

  for ds in decoded_specs:
    if ds.filters:
      ds.filters = nn_layers.round_filters(filters=ds.filters,
                                           multiplier=filter_size_scale,
                                           divisor=divisible_by,
                                           round_down_protect=False,
                                           min_depth=8)

  return decoded_specs
Ejemplo n.º 5
0
    def __init__(self,
                 model_id,
                 input_specs=layers.InputSpec(shape=[None, None, None, 3]),
                 se_ratio=0.0,
                 stochastic_depth_drop_rate=0.0,
                 kernel_initializer='VarianceScaling',
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activation='relu',
                 use_sync_bn=False,
                 norm_momentum=0.99,
                 norm_epsilon=0.001,
                 **kwargs):
        """Initializes an EfficientNet model.

    Args:
      model_id: A `str` of model ID of EfficientNet.
      input_specs: A `tf.keras.layers.InputSpec` of the input tensor.
      se_ratio: A `float` of squeeze and excitation ratio for inverted
        bottleneck blocks.
      stochastic_depth_drop_rate: A `float` of drop rate for drop connect layer.
      kernel_initializer: A `str` for kernel initializer of convolutional
        layers.
      kernel_regularizer: A `tf.keras.regularizers.Regularizer` object for
        Conv2D. Default to None.
      bias_regularizer: A `tf.keras.regularizers.Regularizer` object for Conv2D.
        Default to None.
      activation: A `str` of name of the activation function.
      use_sync_bn: If True, use synchronized batch normalization.
      norm_momentum: A `float` of normalization momentum for the moving average.
      norm_epsilon: A `float` added to variance to avoid dividing by zero.
      **kwargs: Additional keyword arguments to be passed.
    """
        self._model_id = model_id
        self._input_specs = input_specs
        self._se_ratio = se_ratio
        self._stochastic_depth_drop_rate = stochastic_depth_drop_rate
        self._use_sync_bn = use_sync_bn
        self._activation = activation
        self._kernel_initializer = kernel_initializer
        self._norm_momentum = norm_momentum
        self._norm_epsilon = norm_epsilon
        self._kernel_regularizer = kernel_regularizer
        self._bias_regularizer = bias_regularizer
        if use_sync_bn:
            self._norm = layers.experimental.SyncBatchNormalization
        else:
            self._norm = layers.BatchNormalization

        if tf.keras.backend.image_data_format() == 'channels_last':
            bn_axis = -1
        else:
            bn_axis = 1

        # Build EfficientNet.
        inputs = tf.keras.Input(shape=input_specs.shape[1:])
        width_scale = SCALING_MAP[model_id]['width_scale']
        depth_scale = SCALING_MAP[model_id]['depth_scale']

        # Build stem.
        x = layers.Conv2D(filters=nn_layers.round_filters(32, width_scale),
                          kernel_size=3,
                          strides=2,
                          use_bias=False,
                          padding='same',
                          kernel_initializer=self._kernel_initializer,
                          kernel_regularizer=self._kernel_regularizer,
                          bias_regularizer=self._bias_regularizer)(inputs)
        x = self._norm(axis=bn_axis,
                       momentum=norm_momentum,
                       epsilon=norm_epsilon)(x)
        x = tf_utils.get_activation(activation)(x)

        # Build intermediate blocks.
        endpoints = {}
        endpoint_level = 2
        decoded_specs = block_spec_decoder(EN_B0_BLOCK_SPECS, width_scale,
                                           depth_scale)

        for i, specs in enumerate(decoded_specs):
            x = self._block_group(inputs=x,
                                  specs=specs,
                                  name='block_group_{}'.format(i))
            if specs.is_output:
                endpoints[str(endpoint_level)] = x
                endpoint_level += 1

        # Build output specs for downstream tasks.
        self._output_specs = {l: endpoints[l].get_shape() for l in endpoints}

        # Build the final conv for classification.
        x = layers.Conv2D(filters=nn_layers.round_filters(1280, width_scale),
                          kernel_size=1,
                          strides=1,
                          use_bias=False,
                          padding='same',
                          kernel_initializer=self._kernel_initializer,
                          kernel_regularizer=self._kernel_regularizer,
                          bias_regularizer=self._bias_regularizer)(x)
        x = self._norm(axis=bn_axis,
                       momentum=norm_momentum,
                       epsilon=norm_epsilon)(x)
        endpoints[str(endpoint_level)] = tf_utils.get_activation(activation)(x)

        super(EfficientNet, self).__init__(inputs=inputs,
                                           outputs=endpoints,
                                           **kwargs)