Exemple #1
0
def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id):
    channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1

    in_channels = backend.int_shape(inputs)[channel_axis]
    pointwise_conv_filters = int(filters * alpha)
    pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
    x = inputs
    prefix = 'block_{}_'.format(block_id)

    if block_id:
        # Expand
        x = layers.Conv2D(round(expansion * in_channels),
                          kernel_size=1,
                          padding='same',
                          use_bias=False,
                          activation=None,
                          kernel_regularizer=regularizers.l2(l2_reg),
                          name=prefix + 'expand')(x)
        x = layers.BatchNormalization(axis=channel_axis,
                                      epsilon=1e-3,
                                      momentum=0.999,
                                      name=prefix + 'expand_BN')(x)
        x = layers.ReLU(6., name=prefix + 'expand_relu')(x)
    else:
        prefix = 'expanded_conv_'

    # Depthwise
    if stride == 2:
        x = layers.ZeroPadding2D(padding=correct_pad(backend, x, 3),
                                 name=prefix + 'pad')(x)
    x = layers.DepthwiseConv2D(kernel_size=3,
                               strides=stride,
                               activation=None,
                               use_bias=False,
                               depthwise_regularizer=regularizers.l2(l2_reg),
                               padding='same' if stride == 1 else 'valid',
                               name=prefix + 'depthwise')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  epsilon=1e-3,
                                  momentum=0.999,
                                  name=prefix + 'depthwise_BN')(x)

    x = layers.ReLU(6., name=prefix + 'depthwise_relu')(x)

    # Project
    x = layers.Conv2D(pointwise_filters,
                      kernel_size=1,
                      padding='same',
                      use_bias=False,
                      activation=None,
                      kernel_regularizer=regularizers.l2(l2_reg),
                      name=prefix + 'project')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  epsilon=1e-3,
                                  momentum=0.999,
                                  name=prefix + 'project_BN')(x)

    if in_channels == pointwise_filters and stride == 1:
        return layers.Add(name=prefix + 'add')([inputs, x])
    return x
def _separable_conv_block(ip,
                          filters,
                          kernel_size=(3, 3),
                          strides=(1, 1),
                          block_id=None):
    '''Adds 2 blocks of [relu-separable conv-batchnorm].

    # Arguments
        ip: Input tensor
        filters: Number of output filters per layer
        kernel_size: Kernel size of separable convolutions
        strides: Strided convolution for downsampling
        block_id: String block_id

    # Returns
        A Keras tensor
    '''
    channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1

    with backend.name_scope('separable_conv_block_%s' % block_id):
        x = layers.Activation('relu')(ip)
        if strides == (2, 2):
            x = layers.ZeroPadding2D(
                padding=correct_pad(backend, x, kernel_size),
                name='separable_conv_1_pad_%s' % block_id)(x)
            conv_pad = 'valid'
        else:
            conv_pad = 'same'
        x = layers.SeparableConv2D(filters,
                                   kernel_size,
                                   strides=strides,
                                   name='separable_conv_1_%s' % block_id,
                                   padding=conv_pad,
                                   use_bias=False,
                                   depthwise_regularizer=l2(weight_decay),
                                   pointwise_regularizer=l2(weight_decay),
                                   kernel_initializer='he_normal')(x)
        if use_bn:
            x = layers.BatchNormalization(axis=channel_dim,
                                          momentum=bn_momentum,
                                          epsilon=1e-3,
                                          name='separable_conv_1_bn_%s' %
                                          (block_id))(x)
        x = layers.Activation('relu')(x)
        x = layers.SeparableConv2D(filters,
                                   kernel_size,
                                   name='separable_conv_2_%s' % block_id,
                                   padding='same',
                                   use_bias=False,
                                   depthwise_regularizer=l2(weight_decay),
                                   pointwise_regularizer=l2(weight_decay),
                                   kernel_initializer='he_normal')(x)
        if use_bn:
            x = layers.BatchNormalization(axis=channel_dim,
                                          momentum=bn_momentum,
                                          epsilon=1e-3,
                                          name='separable_conv_2_bn_%s' %
                                          (block_id))(x)
    return x
Exemple #3
0
def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id,
                        leaky_relu):
    """Utility function used in MobileNetV2."""
    in_channels = backend.int_shape(inputs)[-1]
    pointwise_conv_filters = int(filters * alpha)
    pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
    x = inputs
    prefix = 'block_{}_'.format(block_id)

    if block_id:
        # Expand
        x = layers.Conv2D(expansion * in_channels,
                          kernel_size=1,
                          padding='same',
                          use_bias=False,
                          activation=None,
                          name=prefix + 'expand')(x)
        x = layers.BatchNormalization(epsilon=1e-3,
                                      momentum=0.999,
                                      name=prefix + 'expand_BN')(x)
        x = _mobilenetv2_relu_activate(name=prefix + 'expand_relu',
                                       leaky_relu=leaky_relu)(x)
    else:
        prefix = 'expanded_conv_'

    # Depthwise
    if stride == 2:
        x = layers.ZeroPadding2D(padding=correct_pad(backend, x, 3),
                                 name=prefix + 'pad')(x)
    x = layers.DepthwiseConv2D(kernel_size=3,
                               strides=stride,
                               activation=None,
                               use_bias=False,
                               padding='same' if stride == 1 else 'valid',
                               name=prefix + 'depthwise')(x)
    x = layers.BatchNormalization(epsilon=1e-3,
                                  momentum=0.999,
                                  name=prefix + 'depthwise_BN')(x)

    x = _mobilenetv2_relu_activate(name=prefix + 'depthwise_relu',
                                   leaky_relu=leaky_relu)(x)

    # Project
    x = layers.Conv2D(pointwise_filters,
                      kernel_size=1,
                      padding='same',
                      use_bias=False,
                      activation=None,
                      name=prefix + 'project')(x)
    x = layers.BatchNormalization(epsilon=1e-3,
                                  momentum=0.999,
                                  name=prefix + 'project_BN')(x)

    if in_channels == pointwise_filters and stride == 1:
        return layers.Add(name=prefix + 'add')([inputs, x])
    return x
Exemple #4
0
def _inverted_res_block(x, expansion, filters, kernel_size, stride, se_ratio,
                        activation, block_id):
    channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1
    shortcut = x
    prefix = 'expanded_conv/'
    infilters = backend.int_shape(x)[channel_axis]
    if block_id:
        # Expand
        prefix = 'expanded_conv_{}/'.format(block_id)
        x = layers.Conv2D(_depth(infilters * expansion),
                          kernel_size=1,
                          padding='same',
                          use_bias=False,
                          name=prefix + 'expand')(x)
        x = layers.BatchNormalization(axis=channel_axis,
                                      epsilon=1e-3,
                                      momentum=0.999,
                                      name=prefix + 'expand/BatchNorm')(x)
        x = layers.Activation(activation)(x)

    if stride == 2:
        x = layers.ZeroPadding2D(padding=correct_pad(backend, x, kernel_size),
                                 name=prefix + 'depthwise/pad')(x)
    x = layers.DepthwiseConv2D(kernel_size,
                               strides=stride,
                               padding='same' if stride == 1 else 'valid',
                               use_bias=False,
                               name=prefix + 'depthwise')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  epsilon=1e-3,
                                  momentum=0.999,
                                  name=prefix + 'depthwise/BatchNorm')(x)
    x = layers.Activation(activation)(x)

    if se_ratio:
        x = _se_block(x, _depth(infilters * expansion), se_ratio, prefix)

    x = layers.Conv2D(filters,
                      kernel_size=1,
                      padding='same',
                      use_bias=False,
                      name=prefix + 'project')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  epsilon=1e-3,
                                  momentum=0.999,
                                  name=prefix + 'project/BatchNorm')(x)

    if stride == 1 and infilters == filters:
        x = layers.Add(name=prefix + 'Add')([shortcut, x])
    return x
Exemple #5
0
def custom_backbone(input_tensor):

    # input layer
    x = ZeroPadding2D(padding=correct_pad(K, input_tensor, 3))(input_tensor)
    x = Conv2D(32, (3, 3), strides=(2, 2))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    # blocks
    x = twoblocks(x, 128, 192, 64)
    x = MaxPooling2D((2, 2), strides=(2, 2))(x)

    x = twoblocks(x, 192, 256, 96)
    x = MaxPooling2D((2, 2), strides=(2, 2))(x)

    x = twoblocks(x, 256, 384, 128)
    x = MaxPooling2D((2, 2), strides=(2, 2))(x)

    x = twoblocks(x, 384, 512, 384)
    x = MaxPooling2D((2, 2), strides=(2, 2))(x)
    x = twoblocks(x, 512, 768, 256)

    return x
def MobileNetV2(input_shape=None,
                alpha=1.0,
                include_top=True,
                weights='imagenet',
                input_tensor=None,
                pooling=None,
                classes=1000,
                **kwargs):
    """Instantiates the MobileNetV2 architecture.
    # Arguments
        input_shape: optional shape tuple, to be specified if you would
            like to use a model with an input img resolution that is not
            (224, 224, 3).
            It should have exactly 3 inputs channels (224, 224, 3).
            You can also omit this option if you would like
            to infer input_shape from an input_tensor.
            If you choose to include both input_tensor and input_shape then
            input_shape will be used if they match, if the shapes
            do not match then we will throw an error.
            E.g. `(160, 160, 3)` would be one valid value.
        alpha: controls the width of the network. This is known as the
        width multiplier in the MobileNetV2 paper, but the name is kept for
        consistency with MobileNetV1 in Keras.
            - If `alpha` < 1.0, proportionally decreases the number
                of filters in each layer.
            - If `alpha` > 1.0, proportionally increases the number
                of filters in each layer.
            - If `alpha` = 1, default number of filters from the paper
                 are used at each layer.
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of
            `layers.Input()`)
            to use as image input for the model.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional block.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional block, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.
    # Returns
        A Keras model instance.
    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape or invalid alpha, rows when
            weights='imagenet'
    """

    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top` '
            'as true, `classes` should be 1000')

    # Determine proper input shape and default size.
    # If both input_shape and input_tensor are used, they should match
    if input_shape is not None and input_tensor is not None:
        try:
            is_input_t_tensor = backend.is_keras_tensor(input_tensor)
        except ValueError:
            try:
                is_input_t_tensor = backend.is_keras_tensor(
                    keras_utils.get_source_inputs(input_tensor))
            except ValueError:
                raise ValueError('input_tensor: ', input_tensor,
                                 'is not type input_tensor')
        if is_input_t_tensor:
            if backend.image_data_format == 'channels_first':
                if backend.int_shape(input_tensor)[1] != input_shape[1]:
                    raise ValueError(
                        'input_shape: ', input_shape, 'and input_tensor: ',
                        input_tensor,
                        'do not meet the same shape requirements')
            else:
                if backend.int_shape(input_tensor)[2] != input_shape[1]:
                    raise ValueError(
                        'input_shape: ', input_shape, 'and input_tensor: ',
                        input_tensor,
                        'do not meet the same shape requirements')
        else:
            raise ValueError('input_tensor specified: ', input_tensor,
                             'is not a keras tensor')

    # If input_shape is None, infer shape from input_tensor
    if input_shape is None and input_tensor is not None:

        try:
            backend.is_keras_tensor(input_tensor)
        except ValueError:
            raise ValueError('input_tensor: ', input_tensor, 'is type: ',
                             type(input_tensor), 'which is not a valid type')

        if input_shape is None and not backend.is_keras_tensor(input_tensor):
            default_size = 224
        elif input_shape is None and backend.is_keras_tensor(input_tensor):
            if backend.image_data_format() == 'channels_first':
                rows = backend.int_shape(input_tensor)[2]
                cols = backend.int_shape(input_tensor)[3]
            else:
                rows = backend.int_shape(input_tensor)[1]
                cols = backend.int_shape(input_tensor)[2]

            if rows == cols and rows in [96, 128, 160, 192, 224]:
                default_size = rows
            else:
                default_size = 224

    # If input_shape is None and no input_tensor
    elif input_shape is None:
        default_size = 224

    # If input_shape is not None, assume default size
    else:
        if backend.image_data_format() == 'channels_first':
            rows = input_shape[1]
            cols = input_shape[2]
        else:
            rows = input_shape[0]
            cols = input_shape[1]

        if rows == cols and rows in [96, 128, 160, 192, 224]:
            default_size = rows
        else:
            default_size = 224

    input_shape = _obtain_input_shape(input_shape,
                                      default_size=default_size,
                                      min_size=32,
                                      data_format=backend.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    if backend.image_data_format() == 'channels_last':
        row_axis, col_axis = (0, 1)
    else:
        row_axis, col_axis = (1, 2)
    rows = input_shape[row_axis]
    cols = input_shape[col_axis]

    if weights == 'imagenet':
        if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]:
            raise ValueError('If imagenet weights are being loaded, '
                             'alpha can be one of `0.35`, `0.50`, `0.75`, '
                             '`1.0`, `1.3` or `1.4` only.')

        if rows != cols or rows not in [96, 128, 160, 192, 224]:
            rows = 224
            warnings.warn('MobileNet shape is undefined.'
                          ' Weights for input shape'
                          '(224, 224) will be loaded.')

    if backend.image_data_format() != 'channels_last':
        warnings.warn('The MobileNet family of models is only available '
                      'for the input data format "channels_last" '
                      '(width, height, channels). '
                      'However your settings specify the default '
                      'data format "channels_first" (channels, width, height).'
                      ' You should set `image_data_format="channels_last"` '
                      'in your Keras config located at ~/.keras/keras.json. '
                      'The model being returned right now will expect inputs '
                      'to follow the "channels_last" data format.')
        backend.set_image_data_format('channels_last')
        old_data_format = 'channels_first'
    else:
        old_data_format = None

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    first_block_filters = _make_divisible(32 * alpha, 8)
    x = layers.ZeroPadding2D(padding=correct_pad(backend, img_input, 3),
                             name='Conv1_pad')(img_input)
    x = layers.Conv2D(first_block_filters,
                      kernel_size=3,
                      strides=(2, 2),
                      padding='valid',
                      use_bias=False,
                      name='Conv1')(x)
    x = layers.BatchNormalization(epsilon=1e-3,
                                  momentum=0.999,
                                  name='bn_Conv1')(x)
    x = layers.ReLU(6., name='Conv1_relu')(x)

    x = _inverted_res_block(x,
                            filters=16,
                            alpha=alpha,
                            stride=1,
                            expansion=1,
                            block_id=0)

    x = _inverted_res_block(x,
                            filters=24,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=1)
    x = _inverted_res_block(x,
                            filters=24,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=2)

    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=3)
    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=4)
    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=5)

    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=6)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=7)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=8)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=9)

    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=10)
    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=11)
    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=12)

    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=13)
    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=14)
    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=15)

    x = _inverted_res_block(x,
                            filters=320,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=16)

    # no alpha applied to last conv as stated in the paper:
    # if the width multiplier is greater than 1 we
    # increase the number of output channels
    if alpha > 1.0:
        last_block_filters = _make_divisible(1280 * alpha, 8)
    else:
        last_block_filters = 1280

    x = layers.Conv2D(last_block_filters,
                      kernel_size=1,
                      use_bias=False,
                      name='Conv_1')(x)
    x = layers.BatchNormalization(epsilon=1e-3,
                                  momentum=0.999,
                                  name='Conv_1_bn')(x)
    x = layers.ReLU(6., name='out_relu')(x)

    if include_top:
        x = layers.GlobalAveragePooling2D()(x)
        x = layers.Dense(classes,
                         activation='softmax',
                         use_bias=True,
                         name='Logits')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = keras_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = models.Model(inputs,
                         x,
                         name='mobilenetv2_%0.2f_%s' % (alpha, rows))

    # Load weights.
    if weights == 'imagenet':
        if backend.image_data_format() == 'channels_first':
            raise ValueError('Weights for "channels_first" format '
                             'are not available.')

        if include_top:
            model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' +
                          str(alpha) + '_' + str(rows) + '.h5')
            weigh_path = BASE_WEIGHT_PATH + model_name
            weights_path = keras_utils.get_file(model_name,
                                                weigh_path,
                                                cache_subdir='models')
        else:
            model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' +
                          str(alpha) + '_' + str(rows) + '_no_top' + '.h5')
            weigh_path = BASE_WEIGHT_PATH + model_name
            weights_path = keras_utils.get_file(model_name,
                                                weigh_path,
                                                cache_subdir='models')
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    if old_data_format:
        backend.set_image_data_format(old_data_format)
    return model
def MobileNetV2(input_shape=None,
                alpha=1.0,
                include_top=True,
                weights='imagenet',
                input_tensor=None,
                pooling=None,
                classes=1000,
                output_stride=None,
                seperate_softmax=True,
                **kwargs):
    """Instantiates the MobileNetV2 architecture.

    # Arguments
        input_shape: optional shape tuple, to be specified if you would
            like to use a model with an input img resolution that is not
            (224, 224, 3).
            It should have exactly 3 inputs channels (224, 224, 3).
            You can also omit this option if you would like
            to infer input_shape from an input_tensor.
            If you choose to include both input_tensor and input_shape then
            input_shape will be used if they match, if the shapes
            do not match then we will throw an error.
            E.g. `(160, 160, 3)` would be one valid value.
        alpha: controls the width of the network. This is known as the
        width multiplier in the MobileNetV2 paper, but the name is kept for
        consistency with MobileNetV1 in Keras.
            - If `alpha` < 1.0, proportionally decreases the number
                of filters in each layer.
            - If `alpha` > 1.0, proportionally increases the number
                of filters in each layer.
            - If `alpha` = 1, default number of filters from the paper
                 are used at each layer.
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of
            `layers.Input()`)
            to use as image input for the model.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional block.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional block, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.
        output_stride: An integer that specifies the requested ratio of input to
            output spatial resolution. If not None, then we invoke atrous convolution
            if necessary to prevent the network from reducing the spatial resolution
            of the activation maps. Allowed values are 1 or any even number, excluding
            zero. Typical values are 8 (accurate fully convolutional mode), 16
            (fast fully convolutional mode), and 32 (classification mode).

            NOTE- output_stride relies on all consequent operators to support dilated
            operators via "rate" parameter. This might require wrapping non-conv
            operators to operate properly.

    # Returns
        A Keras model instance.

    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape or invalid alpha, rows when
            weights='imagenet'
    """
    global backend, layers, models, keras_utils
    backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)

    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top` '
            'as true, `classes` should be 1000')

    # Determine proper input shape and default size.
    # If both input_shape and input_tensor are used, they should match
    if input_shape is not None and input_tensor is not None:
        try:
            is_input_t_tensor = backend.is_keras_tensor(input_tensor)
        except ValueError:
            try:
                is_input_t_tensor = backend.is_keras_tensor(
                    keras_utils.get_source_inputs(input_tensor))
            except ValueError:
                raise ValueError('input_tensor: ', input_tensor,
                                 'is not type input_tensor')
        if is_input_t_tensor:
            if backend.image_data_format == 'channels_first':
                if backend.int_shape(input_tensor)[1] != input_shape[1]:
                    raise ValueError(
                        'input_shape: ', input_shape, 'and input_tensor: ',
                        input_tensor,
                        'do not meet the same shape requirements')
            else:
                if backend.int_shape(input_tensor)[2] != input_shape[1]:
                    raise ValueError(
                        'input_shape: ', input_shape, 'and input_tensor: ',
                        input_tensor,
                        'do not meet the same shape requirements')
        else:
            raise ValueError('input_tensor specified: ', input_tensor,
                             'is not a keras tensor')

    # If input_shape is None, infer shape from input_tensor
    if input_shape is None and input_tensor is not None:

        try:
            backend.is_keras_tensor(input_tensor)
        except ValueError:
            raise ValueError('input_tensor: ', input_tensor, 'is type: ',
                             type(input_tensor), 'which is not a valid type')

        if input_shape is None and not backend.is_keras_tensor(input_tensor):
            default_size = 224
        elif input_shape is None and backend.is_keras_tensor(input_tensor):
            if backend.image_data_format() == 'channels_first':
                rows = backend.int_shape(input_tensor)[2]
                cols = backend.int_shape(input_tensor)[3]
            else:
                rows = backend.int_shape(input_tensor)[1]
                cols = backend.int_shape(input_tensor)[2]

            if rows == cols and rows in [96, 128, 160, 192, 224]:
                default_size = rows
            else:
                default_size = 224

    # If input_shape is None and no input_tensor
    elif input_shape is None:
        default_size = 224

    # If input_shape is not None, assume default size
    else:
        if backend.image_data_format() == 'channels_first':
            rows = input_shape[1]
            cols = input_shape[2]
        else:
            rows = input_shape[0]
            cols = input_shape[1]

        if rows == cols and rows in [96, 128, 160, 192, 224]:
            default_size = rows
        else:
            default_size = 224

    input_shape = _obtain_input_shape(input_shape,
                                      default_size=default_size,
                                      min_size=32,
                                      data_format=backend.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    if backend.image_data_format() == 'channels_last':
        row_axis, col_axis = (0, 1)
    else:
        row_axis, col_axis = (1, 2)
    rows = input_shape[row_axis]
    cols = input_shape[col_axis]

    if weights == 'imagenet':
        if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]:
            raise ValueError('If imagenet weights are being loaded, '
                             'alpha can be one of `0.35`, `0.50`, `0.75`, '
                             '`1.0`, `1.3` or `1.4` only.')

        if rows != cols or rows not in [96, 128, 160, 192, 224]:
            rows = 224
            warnings.warn('`input_shape` is undefined or non-square, '
                          'or `rows` is not in [96, 128, 160, 192, 224].'
                          ' Weights for input shape (224, 224) will be'
                          ' loaded as the default.')

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1

    first_block_filters = _make_divisible(32 * alpha, 8)
    x = layers.ZeroPadding2D(padding=correct_pad(backend, img_input, 3),
                             name='Conv_pad')(img_input)
    x = layers.Conv2D(first_block_filters,
                      kernel_size=3,
                      strides=(2, 2),
                      padding='valid',
                      use_bias=False,
                      name='Conv')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  epsilon=1e-3,
                                  momentum=0.999,
                                  name='Conv_BN')(x)
    x = layers.ReLU(6., name='Conv_relu')(x)

    rate = 1

    x = _inverted_res_block(x,
                            filters=16,
                            alpha=alpha,
                            stride=1,
                            expansion=1,
                            block_id=0,
                            rate=rate)

    x = _inverted_res_block(x,
                            filters=24,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=1,
                            rate=rate)
    x = _inverted_res_block(x,
                            filters=24,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=2,
                            rate=rate)

    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=3,
                            rate=rate)
    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=4,
                            rate=rate)
    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=5,
                            rate=rate)

    if output_stride == 8 or output_stride == 16:
        x = _inverted_res_block(x,
                                filters=64,
                                alpha=alpha,
                                stride=1,
                                expansion=6,
                                block_id=6,
                                rate=rate)
        rate = 2
    else:
        x = _inverted_res_block(x,
                                filters=64,
                                alpha=alpha,
                                stride=2,
                                expansion=6,
                                block_id=6,
                                rate=rate)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=7,
                            rate=rate)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=8,
                            rate=rate)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=9,
                            rate=rate)

    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=10,
                            rate=rate)
    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=11,
                            rate=rate)
    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=12,
                            rate=rate)
    if output_stride == 8:
        x = _inverted_res_block(x,
                                filters=160,
                                alpha=alpha,
                                stride=1,
                                expansion=6,
                                block_id=13,
                                rate=rate)
        rate += 2
    else:
        x = _inverted_res_block(x,
                                filters=160,
                                alpha=alpha,
                                stride=2,
                                expansion=6,
                                block_id=13,
                                rate=rate)
    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=14,
                            rate=rate)
    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=15,
                            rate=rate)

    x = _inverted_res_block(x,
                            filters=320,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=16,
                            rate=rate)

    # no alpha applied to last conv as stated in the paper:
    # if the width multiplier is greater than 1 we
    # increase the number of output channels
    if alpha > 1.0:
        last_block_filters = _make_divisible(1280 * alpha, 8)
    else:
        last_block_filters = 1280

    if not output_stride:

        x = layers.Conv2D(last_block_filters,
                          kernel_size=1,
                          use_bias=False,
                          name='Conv_1')(x)
        x = layers.BatchNormalization(axis=channel_axis,
                                      epsilon=1e-3,
                                      momentum=0.999,
                                      name='Conv_1_bn')(x)
        x = layers.ReLU(6., name='out_relu')(x)

        if include_top:
            x = layers.GlobalAveragePooling2D()(x)
            if seperate_softmax:
                x = layers.Dense(classes,
                                 activation=None,
                                 use_bias=True,
                                 name='Logits')(x)
                x = layers.Activation('softmax', name='softmax')(x)
            else:
                x = layers.Dense(classes,
                                 activation='softmax',
                                 use_bias=True,
                                 name='Logits')(x)
        else:
            if pooling == 'avg':
                x = layers.GlobalAveragePooling2D()(x)
            elif pooling == 'max':
                x = layers.GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = keras_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = models.Model(inputs,
                         x,
                         name='mobilenetv2_%0.2f_%s' % (alpha, rows))

    # Load weights.
    if weights == 'imagenet':
        if include_top:
            model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' +
                          str(alpha) + '_' + str(rows) + '.h5')
            weight_path = BASE_WEIGHT_PATH + model_name
            weights_path = keras_utils.get_file(model_name,
                                                weight_path,
                                                cache_subdir='models')
        else:
            model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' +
                          str(alpha) + '_' + str(rows) + '_no_top' + '.h5')
            weight_path = BASE_WEIGHT_PATH + model_name
            weights_path = keras_utils.get_file(model_name,
                                                weight_path,
                                                cache_subdir='models')
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
def MobileNetV2(input_shape=None,
                alpha=1.0,
                include_top=True,
                weights='imagenet',
                input_tensor=None,
                pooling=None,
                classes=1000,
                **kwargs):
    """Instantiates the MobileNetV2 architecture.

    # Arguments
        input_shape: optional shape tuple, to be specified if you would
            like to use a model with an input img resolution that is not
            (224, 224, 3).
            It should have exactly 3 inputs channels (224, 224, 3).
            You can also omit this option if you would like
            to infer input_shape from an input_tensor.
            If you choose to include both input_tensor and input_shape then
            input_shape will be used if they match, if the shapes
            do not match then we will throw an error.
            E.g. `(160, 160, 3)` would be one valid value.
        alpha: controls the width of the network. This is known as the
        width multiplier in the MobileNetV2 paper, but the name is kept for
        consistency with MobileNetV1 in Keras.
            - If `alpha` < 1.0, proportionally decreases the number
                of filters in each layer.
            - If `alpha` > 1.0, proportionally increases the number
                of filters in each layer.
            - If `alpha` = 1, default number of filters from the paper
                 are used at each layer.
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of
            `layers.Input()`)
            to use as image input for the model.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional block.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional block, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.

    # Returns
        A Keras model instance.

    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape or invalid alpha, rows when
            weights='imagenet'
    """
    global backend, layers, models, keras_utils
    #  backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)

    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top` '
            'as true, `classes` should be 1000')

    # Determine proper input shape and default size.
    # If both input_shape and input_tensor are used, they should match
    if input_shape is not None and input_tensor is not None:
        try:
            is_input_t_tensor = backend.is_keras_tensor(input_tensor)
        except ValueError:
            try:
                is_input_t_tensor = backend.is_keras_tensor(
                    keras_utils.get_source_inputs(input_tensor))
            except ValueError:
                raise ValueError('input_tensor: ', input_tensor,
                                 'is not type input_tensor')
        if is_input_t_tensor:
            if backend.image_data_format == 'channels_first':
                if backend.int_shape(input_tensor)[1] != input_shape[1]:
                    raise ValueError(
                        'input_shape: ', input_shape, 'and input_tensor: ',
                        input_tensor,
                        'do not meet the same shape requirements')
            else:
                if backend.int_shape(input_tensor)[2] != input_shape[1]:
                    raise ValueError(
                        'input_shape: ', input_shape, 'and input_tensor: ',
                        input_tensor,
                        'do not meet the same shape requirements')
        else:
            raise ValueError('input_tensor specified: ', input_tensor,
                             'is not a keras tensor')

    # If input_shape is None, infer shape from input_tensor
    if input_shape is None and input_tensor is not None:

        try:
            backend.is_keras_tensor(input_tensor)
        except ValueError:
            raise ValueError('input_tensor: ', input_tensor, 'is type: ',
                             type(input_tensor), 'which is not a valid type')

        if input_shape is None and not backend.is_keras_tensor(input_tensor):
            default_size = 224
        elif input_shape is None and backend.is_keras_tensor(input_tensor):
            if backend.image_data_format() == 'channels_first':
                rows = backend.int_shape(input_tensor)[2]
                cols = backend.int_shape(input_tensor)[3]
            else:
                rows = backend.int_shape(input_tensor)[1]
                cols = backend.int_shape(input_tensor)[2]

            if rows == cols and rows in [96, 128, 160, 192, 224]:
                default_size = rows
            else:
                default_size = 224

    # If input_shape is None and no input_tensor
    elif input_shape is None:
        default_size = 224

    # If input_shape is not None, assume default size
    else:
        if backend.image_data_format() == 'channels_first':
            rows = input_shape[1]
            cols = input_shape[2]
        else:
            rows = input_shape[0]
            cols = input_shape[1]

        if rows == cols and rows in [96, 128, 160, 192, 224]:
            default_size = rows
        else:
            default_size = 224

    if backend.image_data_format() == 'channels_last':
        row_axis, col_axis = (0, 1)
    else:
        row_axis, col_axis = (1, 2)
    rows = input_shape[row_axis]
    cols = input_shape[col_axis]

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1

    first_block_filters = _make_divisible(32 * alpha, 8)
    x = layers.ZeroPadding2D(padding=correct_pad(backend, img_input, 3),
                             name='Conv1_pad')(img_input)
    x = layers.Conv2D(first_block_filters,
                      kernel_size=3,
                      strides=(2, 2),
                      padding='valid',
                      use_bias=False,
                      name='Conv1')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  epsilon=1e-3,
                                  momentum=0.999,
                                  name='bn_Conv1')(x)
    #  x = layers.ReLU(6., name='Conv1_relu')(x)

    x = layers.PReLU(name='Conv1_relu')(x)
    block_id = 0
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=1,
                            stride=2,
                            expansion=2,
                            block_id=block_id)
    for i in range(0, 4):
        block_id = block_id + 1
        x = _inverted_res_block(x,
                                filters=64,
                                alpha=1,
                                stride=1,
                                expansion=2,
                                block_id=block_id)

    block_id = block_id + 1
    x = _inverted_res_block(x,
                            filters=128,
                            alpha=1,
                            stride=2,
                            expansion=4,
                            block_id=block_id)

    for i in range(0, 6):
        block_id = block_id + 1
        x = _inverted_res_block(x,
                                filters=128,
                                alpha=1,
                                stride=1,
                                expansion=2,
                                block_id=block_id)

    block_id = block_id + 1
    x = _inverted_res_block(x,
                            filters=128,
                            alpha=1,
                            stride=2,
                            expansion=4,
                            block_id=block_id)

    for i in range(0, 2):
        block_id = block_id + 1
        x = _inverted_res_block(x,
                                filters=128,
                                alpha=1,
                                stride=1,
                                expansion=2,
                                block_id=block_id)

    x = layers.Conv2D(512, kernel_size=1, use_bias=False, name='Conv_1')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  epsilon=1e-3,
                                  momentum=0.999,
                                  name='Conv_1_bn')(x)
    #  x = layers.ReLU(6., name='out_relu')(x)
    x = layers.PReLU(name='out_relu')(x)

    inputs = img_input

    # Create model.
    model = models.Model(inputs,
                         x,
                         name='mobilenetv2_%0.2f_%s' % (alpha, rows))
    return model
def _reduction_a_cell(ip, p, filters, block_id=None):
    '''Adds a Reduction cell for NASNet-A (Fig. 4 in the paper).

    # Arguments
        ip: Input tensor `x`
        p: Input tensor `p`
        filters: Number of output filters
        block_id: String block_id

    # Returns
        A Keras tensor
    '''
    channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1

    with backend.name_scope('reduction_A_block_%s' % block_id):
        p = _adjust_block(p, ip, filters, block_id)

        h = layers.Activation('relu')(ip)
        h = layers.Conv2D(filters, (1, 1),
                          strides=(1, 1),
                          padding='same',
                          kernel_regularizer=l2(weight_decay),
                          name='reduction_conv_1_%s' % block_id,
                          use_bias=False,
                          kernel_initializer='he_normal')(h)
        if use_bn:
            h = layers.BatchNormalization(axis=channel_dim,
                                          momentum=bn_momentum,
                                          epsilon=1e-3,
                                          name='reduction_bn_1_%s' %
                                          block_id)(h)
        h3 = layers.ZeroPadding2D(padding=correct_pad(backend, h, 3),
                                  name='reduction_pad_1_%s' % block_id)(h)

        with backend.name_scope('block_1'):
            x1_1 = _separable_conv_block(h,
                                         filters, (5, 5),
                                         strides=(2, 2),
                                         block_id='reduction_left1_%s' %
                                         block_id)
            x1_2 = _separable_conv_block(p,
                                         filters, (7, 7),
                                         strides=(2, 2),
                                         block_id='reduction_right1_%s' %
                                         block_id)
            x1 = layers.add([x1_1, x1_2], name='reduction_add_1_%s' % block_id)

        with backend.name_scope('block_2'):
            x2_1 = layers.MaxPooling2D(
                (3, 3),
                strides=(2, 2),
                padding='valid',
                name='reduction_left2_%s' % block_id)(h3)
            x2_2 = _separable_conv_block(p,
                                         filters, (7, 7),
                                         strides=(2, 2),
                                         block_id='reduction_right2_%s' %
                                         block_id)
            x2 = layers.add([x2_1, x2_2], name='reduction_add_2_%s' % block_id)

        with backend.name_scope('block_3'):
            x3_1 = layers.AveragePooling2D(
                (3, 3),
                strides=(2, 2),
                padding='valid',
                name='reduction_left3_%s' % block_id)(h3)
            x3_2 = _separable_conv_block(p,
                                         filters, (5, 5),
                                         strides=(2, 2),
                                         block_id='reduction_right3_%s' %
                                         block_id)
            x3 = layers.add([x3_1, x3_2], name='reduction_add3_%s' % block_id)

        with backend.name_scope('block_4'):
            x4 = layers.AveragePooling2D(
                (3, 3),
                strides=(1, 1),
                padding='same',
                name='reduction_left4_%s' % block_id)(x1)
            x4 = layers.add([x2, x4])

        with backend.name_scope('block_5'):
            x5_1 = _separable_conv_block(x1,
                                         filters, (3, 3),
                                         block_id='reduction_left4_%s' %
                                         block_id)
            x5_2 = layers.MaxPooling2D(
                (3, 3),
                strides=(2, 2),
                padding='valid',
                name='reduction_right5_%s' % block_id)(h3)
            x5 = layers.add([x5_1, x5_2], name='reduction_add4_%s' % block_id)

        x = layers.concatenate([x2, x3, x4, x5],
                               axis=channel_dim,
                               name='reduction_concat_%s' % block_id)
        return x, ip
Exemple #10
0
def mobilenet_v2_base(img_input, alpha=1.0, **kwargs):
    """

    :param img_input:
    :param alpha:
    :param kwargs:
    :return feature1:
    :return feature2:
    """
    channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1

    first_block_filters = _make_divisible(32 * alpha, 8)
    x = layers.ZeroPadding2D(padding=correct_pad(backend, img_input, 3),
                             name='Conv1_pad')(img_input)
    x = layers.Conv2D(first_block_filters,
                      kernel_size=3,
                      strides=(2, 2),
                      padding='valid',
                      use_bias=False,
                      kernel_regularizer=regularizers.l2(l2_reg),
                      name='Conv1')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  epsilon=1e-3,
                                  momentum=0.999,
                                  name='bn_Conv1')(x)
    x = layers.ReLU(6., name='Conv1_relu')(x)

    x = _inverted_res_block(x,
                            filters=16,
                            alpha=alpha,
                            stride=1,
                            expansion=1,
                            block_id=0)

    x = _inverted_res_block(x,
                            filters=24,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=1)
    x = _inverted_res_block(x,
                            filters=24,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=2)

    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=3)
    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=4)
    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=5)

    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=6)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=7)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=8)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=9)

    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=10)
    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=11)
    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=12)
    feature1 = x
    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=13)
    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=14)
    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=15)

    x = _inverted_res_block(x,
                            filters=320,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=16)
    feature2 = x

    # m = Model(img_input, x)
    # for l in m.layers:
    #     l.trainable = False

    return feature1, feature2
    def build_v2(input_shape, alpha, depth_multiplier, dropout, include_top,
                 input_tensor, pooling, classes):
        channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

        img_input = Input(shape=input_shape)
        first_block_filters = make_divisible(32 * alpha, 8)

        # If input_shape is None, infer shape from input_tensor
        if input_shape is None and input_tensor is not None:

            try:
                backend.is_keras_tensor(input_tensor)
            except ValueError:
                raise ValueError('input_tensor: ', input_tensor, 'is type: ',
                                 type(input_tensor),
                                 'which is not a valid type')

            if input_shape is None and not backend.is_keras_tensor(
                    input_tensor):
                default_size = 224
            elif input_shape is None and backend.is_keras_tensor(input_tensor):
                if backend.image_data_format() == 'channels_first':
                    rows = backend.int_shape(input_tensor)[2]
                    cols = backend.int_shape(input_tensor)[3]
                else:
                    rows = backend.int_shape(input_tensor)[1]
                    cols = backend.int_shape(input_tensor)[2]

                if rows == cols and rows in [96, 128, 160, 192, 224]:
                    default_size = rows
                else:
                    default_size = 224

        # If input_shape is None and no input_tensor
        elif input_shape is None:
            default_size = 224

        # If input_shape is not None, assume default size
        else:
            if K.image_data_format() == 'channels_first':
                rows = input_shape[1]
                cols = input_shape[2]
            else:
                rows = input_shape[0]
                cols = input_shape[1]

            if rows == cols and rows in [96, 128, 160, 192, 224]:
                default_size = rows
            else:
                default_size = 224

        x = ZeroPadding2D(padding=correct_pad(K, img_input, 3),
                          name='Conv1_pad')(img_input)
        x = Conv2D(first_block_filters,
                   kernel_size=3,
                   strides=(2, 2),
                   padding='valid',
                   use_bias=False,
                   name='Conv1')(x)
        x = BatchNormalization(axis=channel_axis,
                               epsilon=1e-3,
                               momentum=0.999,
                               name='bn_Conv1')(x)
        x = Activation('relu', name='Conv1_relu')(x)

        x = inverted_res_block(x,
                               filters=16,
                               alpha=alpha,
                               stride=1,
                               expansion=1,
                               block_id=0)

        x = inverted_res_block(x,
                               filters=24,
                               alpha=alpha,
                               stride=2,
                               expansion=6,
                               block_id=1)
        # x = inverted_res_block(x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2)

        x = inverted_res_block(x,
                               filters=32,
                               alpha=alpha,
                               stride=2,
                               expansion=6,
                               block_id=3)
        # x = inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4)
        # x = inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5)

        x = inverted_res_block(x,
                               filters=64,
                               alpha=alpha,
                               stride=2,
                               expansion=6,
                               block_id=6)
        # x = inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=7)
        # x = inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=8)
        # x = inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=9)
        #
        # x = inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=10)
        # x = inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=11)
        # x = inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=12)

        x = inverted_res_block(x,
                               filters=160,
                               alpha=alpha,
                               stride=2,
                               expansion=6,
                               block_id=13)
        # x = inverted_res_block(x, filters=160, alpha=alpha, stride=1,  expansion=6, block_id=14)
        # x = inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15)

        x = inverted_res_block(x,
                               filters=320,
                               alpha=alpha,
                               stride=1,
                               expansion=6,
                               block_id=16)

        # no alpha applied to last conv as stated in the paper:
        # if the width multiplier is greater than 1 we
        # increase the number of output channels
        if alpha > 1.0:
            last_block_filters = make_divisible(1280 * alpha, 8)
        else:
            last_block_filters = 1280

        x = Conv2D(last_block_filters,
                   kernel_size=1,
                   use_bias=False,
                   name='Conv_1')(x)
        x = BatchNormalization(axis=channel_axis,
                               epsilon=1e-3,
                               momentum=0.999,
                               name='Conv_1_bn')(x)
        x = Activation('relu', name='out_relu')(x)

        if include_top:
            x = GlobalAveragePooling2D()(x)
            x = Dense(classes,
                      activation='softmax',
                      use_bias=True,
                      name='Logits')(x)
        else:
            if pooling == 'avg':
                x = GlobalAveragePooling2D()(x)
            elif pooling == 'max':
                x = GlobalMaxPooling2D()(x)

        if input_tensor is not None:
            inputs = keras_utils.get_source_inputs(input_tensor)
        else:
            inputs = img_input
        # Create model.
        model = Model(inputs, outputs=x)

        return model
Exemple #12
0
def MobileNetV2(input_shape=None,
                alpha=1.0,
                include_top=True,
                input_tensor=None,
                pooling=None,
                classes=1000):
    """Instantiates the MobileNetV2 architecture.

  Args:
      input_shape: optional shape tuple, to be specified if you would
          like to use a model with an input img resolution that is not
          (224, 224, 3).
          It should have exactly 3 inputs channels (224, 224, 3).
          You can also omit this option if you would like
          to infer input_shape from an input_tensor.
          If you choose to include both input_tensor and input_shape then
          input_shape will be used if they match, if the shapes
          do not match then we will throw an error.
          E.g. `(160, 160, 3)` would be one valid value.
      alpha: controls the width of the network. This is known as the
      width multiplier in the MobileNetV2 paper, but the name is kept for
      consistency with MobileNetV1 in Keras.
          - If `alpha` < 1.0, proportionally decreases the number
              of filters in each layer.
          - If `alpha` > 1.0, proportionally increases the number
              of filters in each layer.
          - If `alpha` = 1, default number of filters from the paper
               are used at each layer.
      include_top: whether to include the fully-connected
          layer at the top of the network.
      input_tensor: optional Keras tensor (i.e. output of
          `layers.Input()`)
          to use as image input for the model.
      pooling: Optional pooling mode for feature extraction
          when `include_top` is `False`.
          - `None` means that the output of the model
              will be the 4D tensor output of the
              last convolutional block.
          - `avg` means that global average pooling
              will be applied to the output of the
              last convolutional block, and thus
              the output of the model will be a
              2D tensor.
          - `max` means that global max pooling will
              be applied.
      classes: optional number of classes to classify images
          into, only to be specified if `include_top` is True, and
          if no `weights` argument is specified.

  Returns:
      A Keras model instance.

  Raises:
      ValueError: in case of invalid argument for `weights`,
          or invalid input shape or invalid alpha, rows when
          weights='imagenet'
  """
    if classes == 1000:
        raise ValueError('If use dataset is `imagenet`, please use it,'
                         'otherwise please use classifier images classes.')

    if input_shape == (32, 32, 1):
        raise ValueError(
            'If use input shape is `32 * 32 * 1`, please don`t use it! '
            'So you should change network architecture '
            'or use input shape is `224 * 224 * 3`.')

    # Determine proper input shape and default size.
    # If both input_shape and input_tensor are used, they should match
    if input_shape is not None and input_tensor is not None:
        try:
            is_input_t_tensor = backend.is_keras_tensor(input_tensor)
        except ValueError:
            try:
                is_input_t_tensor = backend.is_keras_tensor(
                    utils.get_source_inputs(input_tensor))
            except ValueError:
                raise ValueError('input_tensor: ', input_tensor,
                                 'is not type input_tensor')
        if is_input_t_tensor:
            if backend.image_data_format == 'channels_first':
                if backend.int_shape(input_tensor)[1] != input_shape[1]:
                    raise ValueError(
                        'input_shape: ', input_shape, 'and input_tensor: ',
                        input_tensor,
                        'do not meet the same shape requirements')
            else:
                if backend.int_shape(input_tensor)[2] != input_shape[1]:
                    raise ValueError(
                        'input_shape: ', input_shape, 'and input_tensor: ',
                        input_tensor,
                        'do not meet the same shape requirements')
        else:
            raise ValueError('input_tensor specified: ', input_tensor,
                             'is not a keras tensor')

    # If input_shape is None, infer shape from input_tensor
    if input_shape is None and input_tensor is not None:

        try:
            backend.is_keras_tensor(input_tensor)
        except ValueError:
            raise ValueError('input_tensor: ', input_tensor, 'is type: ',
                             type(input_tensor), 'which is not a valid type')

        if input_shape is None and not backend.is_keras_tensor(input_tensor):
            pass
        elif input_shape is None and backend.is_keras_tensor(input_tensor):
            if backend.image_data_format() == 'channels_first':
                rows = backend.int_shape(input_tensor)[2]
                cols = backend.int_shape(input_tensor)[3]
            else:
                rows = backend.int_shape(input_tensor)[1]
                cols = backend.int_shape(input_tensor)[2]

            if rows == cols and rows in [96, 128, 160, 192, 224]:
                pass
            else:
                pass

    # If input_shape is None and no input_tensor
    elif input_shape is None:
        pass

    # If input_shape is not None, assume default size
    else:
        if backend.image_data_format() == 'channels_first':
            rows = input_shape[1]
            cols = input_shape[2]
        else:
            rows = input_shape[0]
            cols = input_shape[1]

        if rows == cols and rows in [96, 128, 160, 192, 224]:
            pass
        else:
            pass

    if backend.image_data_format() == 'channels_last':
        row_axis, col_axis = (0, 1)
    else:
        row_axis, col_axis = (1, 2)
    rows = input_shape[row_axis]

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    first_block_filters = _make_divisible(32 * alpha, 8)
    x = layers.ZeroPadding2D(padding=correct_pad(backend, img_input, 3),
                             name='Conv1_pad')(img_input)
    x = layers.Conv2D(first_block_filters,
                      kernel_size=3,
                      strides=(2, 2),
                      padding='valid',
                      use_bias=False,
                      name='Conv1')(x)
    x = layers.BatchNormalization(epsilon=1e-3,
                                  momentum=0.999,
                                  name='bn_Conv1')(x)
    x = layers.ReLU(6., name='Conv1_relu')(x)

    x = _inverted_res_block(x,
                            filters=16,
                            alpha=alpha,
                            stride=1,
                            expansion=1,
                            block_id=0)

    x = _inverted_res_block(x,
                            filters=24,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=1)
    x = _inverted_res_block(x,
                            filters=24,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=2)

    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=3)
    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=4)
    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=5)

    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=6)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=7)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=8)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=9)

    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=10)
    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=11)
    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=12)

    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=13)
    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=14)
    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=15)

    x = _inverted_res_block(x,
                            filters=320,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=16)

    # no alpha applied to last conv as stated in the paper:
    # if the width multiplier is greater than 1 we
    # increase the number of output channels
    if alpha > 1.0:
        last_block_filters = _make_divisible(1280 * alpha, 8)
    else:
        last_block_filters = 1280

    x = layers.Conv2D(last_block_filters,
                      kernel_size=1,
                      use_bias=False,
                      name='Conv_1')(x)
    x = layers.BatchNormalization(epsilon=1e-3,
                                  momentum=0.999,
                                  name='Conv_1_bn')(x)
    x = layers.ReLU(6., name='out_relu')(x)

    if include_top:
        x = layers.GlobalAveragePooling2D()(x)
        x = layers.Dense(classes,
                         activation='softmax',
                         use_bias=True,
                         name='Logits')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = models.Model(inputs,
                         x,
                         name='mobilenetv2_%0.2f_%s' % (alpha, rows))

    return model
Exemple #13
0
def MobileNetV3(stack_fn,
                last_point_ch,
                input_shape=None,
                alpha=1.0,
                model_type='large',
                minimalistic=False,
                include_top=True,
                weights='imagenet',
                input_tensor=None,
                classes=1000,
                pooling=None,
                dropout_rate=0.2,
                **kwargs):
    """Instantiates the MobileNetV3 architecture.

    # Arguments
        stack_fn: a function that returns output tensor for the
            stacked residual blocks.
        last_point_ch: number channels at the last layer (before top)
        input_shape: optional shape tuple, to be specified if you would
            like to use a model with an input img resolution that is not
            (224, 224, 3).
            It should have exactly 3 inputs channels (224, 224, 3).
            You can also omit this option if you would like
            to infer input_shape from an input_tensor.
            If you choose to include both input_tensor and input_shape then
            input_shape will be used if they match, if the shapes
            do not match then we will throw an error.
            E.g. `(160, 160, 3)` would be one valid value.
        alpha: controls the width of the network. This is known as the
            depth multiplier in the MobileNetV3 paper, but the name is kept for
            consistency with MobileNetV1 in Keras.
            - If `alpha` < 1.0, proportionally decreases the number
                of filters in each layer.
            - If `alpha` > 1.0, proportionally increases the number
                of filters in each layer.
            - If `alpha` = 1, default number of filters from the paper
                are used at each layer.
        model_type: MobileNetV3 is defined as two models: large and small. These
        models are targeted at high and low resource use cases respectively.
        minimalistic: In addition to large and small models this module also contains
            so-called minimalistic models, these models have the same per-layer
            dimensions characteristic as MobilenetV3 however, they don't utilize any
            of the advanced blocks (squeeze-and-excite units, hard-swish, and 5x5
            convolutions). While these models are less efficient on CPU, they are
            much more performant on GPU/DSP.
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of
            `layers.Input()`)
            to use as image input for the model.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.
        pooling: optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        dropout_rate: fraction of the input units to drop on the last layer
    # Returns
        A Keras model instance.

    # Raises
        ValueError: in case of invalid model type, argument for `weights`,
            or invalid input shape when weights='imagenet'
    """
    # global backend, layers, models, keras_utils
    # backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)

    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top` '
            'as true, `classes` should be 1000')

    # Determine proper input shape and default size.
    # If both input_shape and input_tensor are used, they should match
    if input_shape is not None and input_tensor is not None:
        try:
            is_input_t_tensor = backend.is_keras_tensor(input_tensor)
        except ValueError:
            try:
                is_input_t_tensor = backend.is_keras_tensor(
                    keras_utils.get_source_inputs(input_tensor))
            except ValueError:
                raise ValueError('input_tensor: ', input_tensor,
                                 'is not type input_tensor')
        if is_input_t_tensor:
            if backend.image_data_format == 'channels_first':
                if backend.int_shape(input_tensor)[1] != input_shape[1]:
                    raise ValueError(
                        'input_shape: ', input_shape, 'and input_tensor: ',
                        input_tensor,
                        'do not meet the same shape requirements')
            else:
                if backend.int_shape(input_tensor)[2] != input_shape[1]:
                    raise ValueError(
                        'input_shape: ', input_shape, 'and input_tensor: ',
                        input_tensor,
                        'do not meet the same shape requirements')
        else:
            raise ValueError('input_tensor specified: ', input_tensor,
                             'is not a keras tensor')

    # If input_shape is None, infer shape from input_tensor
    if input_shape is None and input_tensor is not None:

        try:
            backend.is_keras_tensor(input_tensor)
        except ValueError:
            raise ValueError('input_tensor: ', input_tensor, 'is type: ',
                             type(input_tensor), 'which is not a valid type')

        if backend.is_keras_tensor(input_tensor):
            if backend.image_data_format() == 'channels_first':
                rows = backend.int_shape(input_tensor)[2]
                cols = backend.int_shape(input_tensor)[3]
                input_shape = (3, cols, rows)
            else:
                rows = backend.int_shape(input_tensor)[1]
                cols = backend.int_shape(input_tensor)[2]
                input_shape = (cols, rows, 3)
    # If input_shape is None and input_tensor is None using standart shape
    if input_shape is None and input_tensor is None:
        input_shape = (None, None, 3)

    if backend.image_data_format() == 'channels_last':
        row_axis, col_axis = (0, 1)
    else:
        row_axis, col_axis = (1, 2)
    rows = input_shape[row_axis]
    cols = input_shape[col_axis]
    if rows and cols and (rows < 32 or cols < 32):
        raise ValueError(
            'Input size must be at least 32x32; got `input_shape=' +
            str(input_shape) + '`')
    if weights == 'imagenet':
        if minimalistic is False and alpha not in [0.75, 1.0] \
                or minimalistic is True and alpha != 1.0:
            raise ValueError(
                'If imagenet weights are being loaded, '
                'alpha can be one of `0.75`, `1.0` for non minimalistic'
                ' or `1.0` for minimalistic only.')

        if rows != cols or rows != 224:
            warnings.warn('`input_shape` is undefined or non-square, '
                          'or `rows` is not 224.'
                          ' Weights for input shape (224, 224) will be'
                          ' loaded as the default.')

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1

    if minimalistic:
        kernel = 3
        activation = relu
        se_ratio = None
    else:
        kernel = 5
        activation = hard_swish
        se_ratio = 0.25

    x = layers.ZeroPadding2D(padding=correct_pad(backend, img_input, 3),
                             name='Conv_pad')(img_input)
    x = layers.Conv2D(16,
                      kernel_size=3,
                      strides=(2, 2),
                      padding='valid',
                      use_bias=False,
                      name='Conv')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  epsilon=1e-3,
                                  momentum=0.999,
                                  name='Conv/BatchNorm')(x)
    x = layers.Activation(activation)(x)

    x = stack_fn(x, kernel, activation, se_ratio)

    last_conv_ch = _depth(backend.int_shape(x)[channel_axis] * 6)

    # if the width multiplier is greater than 1 we
    # increase the number of output channels
    if alpha > 1.0:
        last_point_ch = _depth(last_point_ch * alpha)

    x = layers.Conv2D(last_conv_ch,
                      kernel_size=1,
                      padding='same',
                      use_bias=False,
                      name='Conv_1')(x)
    x = layers.BatchNormalization(axis=channel_axis,
                                  epsilon=1e-3,
                                  momentum=0.999,
                                  name='Conv_1/BatchNorm')(x)
    x = layers.Activation(activation)(x)

    if include_top:
        x = layers.GlobalAveragePooling2D()(x)
        if channel_axis == 1:
            x = layers.Reshape((last_conv_ch, 1, 1))(x)
        else:
            x = layers.Reshape((1, 1, last_conv_ch))(x)
        x = layers.Conv2D(last_point_ch,
                          kernel_size=1,
                          padding='same',
                          name='Conv_2')(x)
        x = layers.Activation(activation)(x)
        if dropout_rate > 0:
            x = layers.Dropout(dropout_rate)(x)
        x = layers.Conv2D(classes,
                          kernel_size=1,
                          padding='same',
                          name='Logits')(x)
        x = layers.Flatten()(x)
        x = layers.Softmax(name='Predictions/Softmax')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D(name='max_pool')(x)
    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = keras_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = tf.keras.Model(inputs, x, name='MobilenetV3' + model_type)

    # Load weights.
    if weights == 'imagenet':
        model_name = "{}{}_224_{}_float".format(
            model_type, '_minimalistic' if minimalistic else '', str(alpha))
        if include_top:
            file_name = 'weights_mobilenet_v3_' + model_name + '.h5'
            file_hash = WEIGHTS_HASHES[model_name][0]
        else:
            file_name = 'weights_mobilenet_v3_' + model_name + '_no_top.h5'
            file_hash = WEIGHTS_HASHES[model_name][1]
        weights_path = keras_utils.get_file(file_name,
                                            BASE_WEIGHT_PATH + file_name,
                                            cache_subdir='models',
                                            file_hash=file_hash)
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
Exemple #14
0
def MobileNetV2(alpha=1.0, input_tensor=None, **kwargs):

    img_input = input_tensor if input_tensor is not None else layers.Input(
        shape=(None, None, 3))
    first_block_filters = _make_divisible(32 * alpha, 8)
    x = layers.ZeroPadding2D(padding=correct_pad(backend, img_input, 3),
                             name='Conv1_pad')(img_input)
    x = layers.Conv2D(first_block_filters,
                      kernel_size=3,
                      strides=(2, 2),
                      padding='valid',
                      use_bias=False,
                      name='Conv1')(x)
    x = layers.BatchNormalization(epsilon=1e-3,
                                  momentum=0.999,
                                  name='bn_Conv1')(x)
    x = layers.ReLU(6., name='Conv1_relu')(x)

    x = _inverted_res_block(x,
                            filters=16,
                            alpha=alpha,
                            stride=1,
                            expansion=1,
                            block_id=0)

    x = _inverted_res_block(x,
                            filters=24,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=1)
    x = _inverted_res_block(x,
                            filters=24,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=2)

    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=3)
    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=4)
    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=5)

    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=6)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=7)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=8)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=9)

    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=10)
    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=11)
    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=12)

    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=13)
    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=14)
    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=15)

    x = _inverted_res_block(x,
                            filters=320,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=16)

    # no alpha applied to last conv as stated in the paper:
    # if the width multiplier is greater than 1 we
    # increase the number of output channels
    if alpha > 1.0:
        last_block_filters = _make_divisible(1280 * alpha, 8)
    else:
        last_block_filters = 1280

    x = layers.Conv2D(last_block_filters,
                      kernel_size=1,
                      use_bias=False,
                      name='Conv_1')(x)
    x = layers.BatchNormalization(epsilon=1e-3,
                                  momentum=0.999,
                                  name='Conv_1_bn')(x)
    x = layers.ReLU(6., name='out_relu')(x)

    return Model(input_tensor, x)
Exemple #15
0
def EfficientNet(width_coefficient,
                 depth_coefficient,
                 default_size,
                 dropout_rate=0.2,
                 drop_connect_rate=0.2,
                 depth_divisor=8,
                 activation_fn=swish,
                 blocks_args=DEFAULT_BLOCKS_ARGS,
                 model_name='efficientnet',
                 include_top=True,
                 weights='imagenet',
                 input_tensor=None,
                 input_shape=None,
                 pooling=None,
                 classes=1000,
                 **kwargs):
    """Instantiates the EfficientNet architecture using given scaling coefficients.
    Optionally loads weights pre-trained on ImageNet.
    Note that the data format convention used by the model is
    the one specified in your Keras config at `~/.keras/keras.json`.
    # Arguments
        width_coefficient: float, scaling coefficient for network width.
        depth_coefficient: float, scaling coefficient for network depth.
        default_size: integer, default input image size.
        dropout_rate: float, dropout rate before final classifier layer.
        drop_connect_rate: float, dropout rate at skip connections.
        depth_divisor: integer, a unit of network width.
        activation_fn: activation function.
        blocks_args: list of dicts, parameters to construct block modules.
        model_name: string, model name.
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor
            (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False.
            It should have exactly 3 inputs channels.
        pooling: optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.
    # Returns
        A Keras model instance.
    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
    """
    global backend, layers, models, keras_utils
    backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)

    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top`'
            ' as true, `classes` should be 1000')

    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=default_size,
                                      min_size=32,
                                      data_format=backend.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    if input_tensor is None:
        img_input = layers.Input(shape=input_shape)
    else:
        if not backend.is_keras_tensor(input_tensor):
            img_input = layers.Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor

    bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1

    def round_filters(filters, divisor=depth_divisor):
        """Round number of filters based on depth multiplier."""
        filters *= width_coefficient
        new_filters = max(divisor,
                          int(filters + divisor / 2) // divisor * divisor)
        # Make sure that round down does not go down by more than 10%.
        if new_filters < 0.9 * filters:
            new_filters += divisor
        return int(new_filters)

    def round_repeats(repeats):
        """Round number of repeats based on depth multiplier."""
        return int(math.ceil(depth_coefficient * repeats))

    # Build stem
    x = img_input
    x = layers.ZeroPadding2D(padding=correct_pad(backend, x, 3),
                             name='stem_conv_pad')(x)
    x = layers.Conv2D(round_filters(32),
                      3,
                      strides=2,
                      padding='valid',
                      use_bias=False,
                      kernel_initializer=CONV_KERNEL_INITIALIZER,
                      name='stem_conv')(x)
    x = layers.BatchNormalization(axis=bn_axis, name='stem_bn')(x)
    x = layers.Activation(activation_fn, name='stem_activation')(x)

    # Build blocks
    from copy import deepcopy
    blocks_args = deepcopy(blocks_args)

    b = 0
    blocks = float(sum(args['repeats'] for args in blocks_args))
    for (i, args) in enumerate(blocks_args):
        assert args['repeats'] > 0
        # Update block input and output filters based on depth multiplier.
        args['filters_in'] = round_filters(args['filters_in'])
        args['filters_out'] = round_filters(args['filters_out'])

        for j in range(round_repeats(args.pop('repeats'))):
            # The first block needs to take care of stride and filter size increase.
            if j > 0:
                args['strides'] = 1
                args['filters_in'] = args['filters_out']
            x = block(x,
                      activation_fn,
                      drop_connect_rate * b / blocks,
                      name='block{}{}_'.format(i + 1, chr(j + 97)),
                      **args)
            b += 1

    # Build top
    x = layers.Conv2D(round_filters(1280),
                      1,
                      padding='same',
                      use_bias=False,
                      kernel_initializer=CONV_KERNEL_INITIALIZER,
                      name='top_conv')(x)
    x = layers.BatchNormalization(axis=bn_axis, name='top_bn')(x)
    x = layers.Activation(activation_fn, name='top_activation')(x)
    if include_top:
        x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
        if dropout_rate > 0:
            x = layers.Dropout(dropout_rate, name='top_dropout')(x)
        x = layers.Dense(classes,
                         activation='softmax',
                         kernel_initializer=DENSE_KERNEL_INITIALIZER,
                         name='probs')(x)
    else:
        if pooling == 'avg':
            x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
        elif pooling == 'max':
            x = layers.GlobalMaxPooling2D(name='max_pool')(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = keras_utils.get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = models.Model(inputs, x, name=model_name)

    # Load weights.
    if weights == 'imagenet':
        if include_top:
            file_suff = '_weights_tf_dim_ordering_tf_kernels_autoaugment.h5'
            file_hash = WEIGHTS_HASHES[model_name[-2:]][0]
        else:
            file_suff = '_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5'
            file_hash = WEIGHTS_HASHES[model_name[-2:]][1]
        file_name = model_name + file_suff

        # https://github.com/Callidior/keras-applications/releases/download/efficientnet/efficientnet-b3_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5

        # weights_path = keras_utils.get_file(file_name,
        #                                     BASE_WEIGHTS_PATH + file_name,
        #                                     cache_subdir='./models',
        #                                     file_hash=file_hash)

        # manually download the weights then load,
        # this is for when run code on a gpu cluster that may not have internet access
        weights_path = 'models/' + file_name
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
Exemple #16
0
def block(inputs,
          activation_fn=swish,
          drop_rate=0.,
          name='',
          filters_in=32,
          filters_out=16,
          kernel_size=3,
          strides=1,
          expand_ratio=1,
          se_ratio=0.,
          id_skip=True):
    """A mobile inverted residual block.
    # Arguments
        inputs: input tensor.
        activation_fn: activation function.
        drop_rate: float between 0 and 1, fraction of the input units to drop.
        name: string, block label.
        filters_in: integer, the number of input filters.
        filters_out: integer, the number of output filters.
        kernel_size: integer, the dimension of the convolution window.
        strides: integer, the stride of the convolution.
        expand_ratio: integer, scaling coefficient for the input filters.
        se_ratio: float between 0 and 1, fraction to squeeze the input filters.
        id_skip: boolean.
    # Returns
        output tensor for the block.
    """
    bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1

    # Expansion phase
    filters = filters_in * expand_ratio
    if expand_ratio != 1:
        x = layers.Conv2D(filters,
                          1,
                          padding='same',
                          use_bias=False,
                          kernel_initializer=CONV_KERNEL_INITIALIZER,
                          name=name + 'expand_conv')(inputs)
        x = layers.BatchNormalization(axis=bn_axis, name=name + 'expand_bn')(x)
        x = layers.Activation(activation_fn,
                              name=name + 'expand_activation')(x)
    else:
        x = inputs

    # Depthwise Convolution
    if strides == 2:
        x = layers.ZeroPadding2D(padding=correct_pad(backend, x, kernel_size),
                                 name=name + 'dwconv_pad')(x)
        conv_pad = 'valid'
    else:
        conv_pad = 'same'
    x = layers.DepthwiseConv2D(kernel_size,
                               strides=strides,
                               padding=conv_pad,
                               use_bias=False,
                               depthwise_initializer=CONV_KERNEL_INITIALIZER,
                               name=name + 'dwconv')(x)
    x = layers.BatchNormalization(axis=bn_axis, name=name + 'bn')(x)
    x = layers.Activation(activation_fn, name=name + 'activation')(x)

    # Squeeze and Excitation phase
    if 0 < se_ratio <= 1:
        filters_se = max(1, int(filters_in * se_ratio))
        se = layers.GlobalAveragePooling2D(name=name + 'se_squeeze')(x)
        if bn_axis == 1:
            se = layers.Reshape((filters, 1, 1), name=name + 'se_reshape')(se)
        else:
            se = layers.Reshape((1, 1, filters), name=name + 'se_reshape')(se)
        se = layers.Conv2D(filters_se,
                           1,
                           padding='same',
                           activation=activation_fn,
                           kernel_initializer=CONV_KERNEL_INITIALIZER,
                           name=name + 'se_reduce')(se)
        se = layers.Conv2D(filters,
                           1,
                           padding='same',
                           activation='sigmoid',
                           kernel_initializer=CONV_KERNEL_INITIALIZER,
                           name=name + 'se_expand')(se)
        if backend.backend() == 'theano':
            # For the Theano backend, we have to explicitly make
            # the excitation weights broadcastable.
            se = layers.Lambda(lambda x: backend.pattern_broadcast(
                x, [True, True, True, False]),
                               output_shape=lambda input_shape: input_shape,
                               name=name + 'se_broadcast')(se)
        x = layers.multiply([x, se], name=name + 'se_excite')

    # Output phase
    x = layers.Conv2D(filters_out,
                      1,
                      padding='same',
                      use_bias=False,
                      kernel_initializer=CONV_KERNEL_INITIALIZER,
                      name=name + 'project_conv')(x)
    x = layers.BatchNormalization(axis=bn_axis, name=name + 'project_bn')(x)
    if (id_skip is True and strides == 1 and filters_in == filters_out):
        if drop_rate > 0:
            x = layers.Dropout(drop_rate,
                               noise_shape=(None, 1, 1, 1),
                               name=name + 'drop')(x)
        x = layers.add([x, inputs], name=name + 'add')

    return x