def _se_block(inputs, filters, se_ratio, prefix):
    x = GlobalAveragePooling2D(name=prefix + 'squeeze_excite/AvgPool')(inputs)
    if K.image_data_format() == 'channels_first':
        x = Reshape((filters, 1, 1))(x)
    else:
        x = Reshape((1, 1, filters))(x)
    x = DeeplabConv2D(_depth(filters * se_ratio),
                      kernel_size=1,
                      padding='same',
                      name=prefix + 'squeeze_excite/Conv')(x)
    x = ReLU(name=prefix + 'squeeze_excite/Relu')(x)
    x = DeeplabConv2D(filters,
                      kernel_size=1,
                      padding='same',
                      name=prefix + 'squeeze_excite/Conv_1')(x)
    x = Activation(hard_sigmoid)(x)
    #if K.backend() == 'theano':
    ## For the Theano backend, we have to explicitly make
    ## the excitation weights broadcastable.
    #x = Lambda(
    #lambda br: K.pattern_broadcast(br, [True, True, True, False]),
    #output_shape=lambda input_shape: input_shape,
    #name=prefix + 'squeeze_excite/broadcast')(x)
    x = Multiply(name=prefix + 'squeeze_excite/Mul')([inputs, x])
    return x
def _conv2d_same(x, filters, prefix, stride=1, kernel_size=3, rate=1):
    """Implements right 'same' padding for even kernel sizes
        Without this there is a 1 pixel drift when stride = 2
        Args:
            x: input tensor
            filters: num of filters in pointwise convolution
            prefix: prefix before name
            stride: stride at depthwise conv
            kernel_size: kernel size for depthwise convolution
            rate: atrous rate for depthwise convolution
    """
    if stride == 1:
        return DeeplabConv2D(filters, (kernel_size, kernel_size),
                             strides=(stride, stride),
                             padding='same',
                             use_bias=False,
                             dilation_rate=(rate, rate),
                             name=prefix)(x)
    else:
        kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
        pad_total = kernel_size_effective - 1
        pad_beg = pad_total // 2
        pad_end = pad_total - pad_beg
        x = ZeroPadding2D((pad_beg, pad_end))(x)
        return DeeplabConv2D(filters, (kernel_size, kernel_size),
                             strides=(stride, stride),
                             padding='valid',
                             use_bias=False,
                             dilation_rate=(rate, rate),
                             name=prefix)(x)
def _inverted_res_block(inputs,
                        expansion,
                        stride,
                        alpha,
                        filters,
                        block_id,
                        skip_connection,
                        rate=1):
    #in_channels = inputs._keras_shape[-1]
    in_channels = inputs.shape.as_list()[-1]
    pointwise_conv_filters = int(filters * alpha)
    pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
    x = inputs
    prefix = 'expanded_conv_{}_'.format(block_id)
    if block_id:
        # Expand
        x = DeeplabConv2D(expansion * in_channels,
                          kernel_size=1,
                          padding='same',
                          use_bias=False,
                          activation=None,
                          name=prefix + 'expand')(x)
        x = CustomBatchNormalization(epsilon=1e-3,
                                     momentum=0.999,
                                     name=prefix + 'expand_BN')(x)
        x = ReLU(max_value=6.)(x)
    else:
        prefix = 'expanded_conv_'
    # Depthwise
    x = DeeplabDepthwiseConv2D(kernel_size=3,
                               strides=stride,
                               activation=None,
                               use_bias=False,
                               padding='same',
                               dilation_rate=(rate, rate),
                               name=prefix + 'depthwise')(x)
    x = CustomBatchNormalization(epsilon=1e-3,
                                 momentum=0.999,
                                 name=prefix + 'depthwise_BN')(x)
    x = ReLU(max_value=6., name=prefix + 'depthwise_relu')(x)

    x = DeeplabConv2D(pointwise_filters,
                      kernel_size=1,
                      padding='same',
                      use_bias=False,
                      activation=None,
                      name=prefix + 'project')(x)
    x = CustomBatchNormalization(epsilon=1e-3,
                                 momentum=0.999,
                                 name=prefix + 'project_BN')(x)

    if skip_connection:
        return Add(name=prefix + 'add')([inputs, x])
    # if in_channels == pointwise_filters and stride == 1:
    #    return Add(name='res_connect_' + str(block_id))([inputs, x])

    return x
Beispiel #4
0
def identity_block(input_tensor, kernel_size, filters, stage, block, rate=1):
    """The identity block is the block that has no conv layer at shortcut.

    # Arguments
        input_tensor: input tensor
        kernel_size: default 3, the kernel size of
            middle conv layer at main path
        filters: list of integers, the filters of 3 conv layer at main path
        stage: integer, current stage label, used for generating layer names
        block: 'a','b'..., current block label, used for generating layer names

    # Returns
        Output tensor for the block.
    """
    filters1, filters2, filters3 = filters
    if K.image_data_format() == 'channels_last':
        bn_axis = 3
    else:
        bn_axis = 1
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    x = DeeplabConv2D(filters1, (1, 1),
                      kernel_initializer='he_normal',
                      dilation_rate=(rate, rate),
                      name=conv_name_base + '2a')(input_tensor)
    x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
    x = ReLU()(x)

    x = DeeplabConv2D(filters2,
                      kernel_size,
                      padding='same',
                      kernel_initializer='he_normal',
                      dilation_rate=(rate, rate),
                      name=conv_name_base + '2b')(x)
    x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
    x = ReLU()(x)

    x = DeeplabConv2D(filters3, (1, 1),
                      kernel_initializer='he_normal',
                      dilation_rate=(rate, rate),
                      name=conv_name_base + '2c')(x)
    x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)

    x = add([x, input_tensor])
    x = ReLU()(x)
    return x
Beispiel #5
0
def Deeplabv3pResNet50(input_shape=(512, 512, 3),
                          weights=None,
                          input_tensor=None,
                          classes=21,
                          OS=8,
                          **kwargs):
    """ Instantiates the Deeplabv3+ MobileNetV3Large architecture
    # Arguments
        input_shape: shape of input image. format HxWxC
            PASCAL VOC model was trained on (512,512,3) images
        weights: one of 'pascal_voc' (pre-trained on pascal voc)
            or None (random initialization)
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        classes: number of desired classes. If classes != 21,
            last layer is initialized randomly
        OS: determines input_shape/feature_extractor_output ratio. One of {8,16}.

    # Returns
        A Keras model instance.
    """
    if not (weights in {'pascal_voc', None}):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization) or `pascal_voc` '
                         '(pre-trained on PASCAL VOC)')

    if input_tensor is None:
        img_input = Input(shape=input_shape, name='image_input')
    else:
        img_input = input_tensor

    # normalize input image
    img_norm = Lambda(normalize, name='input_normalize')(img_input)

    # backbone body for feature extract
    x, skip_feature, backbone_len = ResNet50(include_top=False, input_tensor=img_norm, weights='imagenet', OS=OS)

    # ASPP block
    x = ASPP_block(x, OS)

    # Deeplabv3+ decoder for feature projection
    x = Decoder_block(x, skip_feature)

    # Final prediction conv block
    x = DeeplabConv2D(classes, (1, 1), padding='same', name='logits_semantic')(x)
    x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x)
    x = Reshape((input_shape[0]*input_shape[1], classes)) (x)
    x = Softmax(name='Predictions/Softmax')(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    #if input_tensor is not None:
        #inputs = get_source_inputs(input_tensor)
    #else:
        #inputs = img_input
    model = Model(img_input, x, name='deeplabv3p_resnet50')

    return model, backbone_len
Beispiel #6
0
def conv_block(input_tensor,
               kernel_size,
               filters,
               stage,
               block,
               strides=(2, 2),
               rate=1):
    """A block that has a conv layer at shortcut.

    # Arguments
        input_tensor: input tensor
        kernel_size: default 3, the kernel size of
            middle conv layer at main path
        filters: list of integers, the filters of 3 conv layer at main path
        stage: integer, current stage label, used for generating layer names
        block: 'a','b'..., current block label, used for generating layer names
        strides: Strides for the first conv layer in the block.

    # Returns
        Output tensor for the block.

    Note that from stage 3,
    the first conv layer at main path is with strides=(2, 2)
    And the shortcut should have strides=(2, 2) as well
    """
    filters1, filters2, filters3 = filters
    if K.image_data_format() == 'channels_last':
        bn_axis = 3
    else:
        bn_axis = 1
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    x = DeeplabConv2D(filters1, (1, 1),
                      strides=strides,
                      kernel_initializer='he_normal',
                      dilation_rate=(rate, rate),
                      name=conv_name_base + '2a')(input_tensor)
    x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
    x = ReLU()(x)

    x = DeeplabConv2D(filters2,
                      kernel_size,
                      padding='same',
                      kernel_initializer='he_normal',
                      dilation_rate=(rate, rate),
                      name=conv_name_base + '2b')(x)
    x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
    x = ReLU()(x)

    x = DeeplabConv2D(filters3, (1, 1),
                      kernel_initializer='he_normal',
                      dilation_rate=(rate, rate),
                      name=conv_name_base + '2c')(x)
    x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)

    shortcut = DeeplabConv2D(filters3, (1, 1),
                             strides=strides,
                             kernel_initializer='he_normal',
                             dilation_rate=(rate, rate),
                             name=conv_name_base + '1')(input_tensor)
    shortcut = CustomBatchNormalization(axis=bn_axis,
                                        name=bn_name_base + '1')(shortcut)

    x = add([x, shortcut])
    x = ReLU()(x)
    return x
Beispiel #7
0
def ResNet50(include_top=True,
             OS=8,
             weights='imagenet',
             input_tensor=None,
             input_shape=None,
             pooling=None,
             classes=1000,
             **kwargs):
    """Instantiates the ResNet50 architecture.

    Optionally loads weights pre-trained on ImageNet.
    Note that the data format convention used by the model is
    the one specified in your Keras config at `~/.keras/keras.json`.

    # Arguments
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)` (with `channels_last` data format)
            or `(3, 224, 224)` (with `channels_first` data format).
            It should have exactly 3 inputs channels,
            and width and height should be no smaller than 32.
            E.g. `(200, 200, 3)` would be one valid value.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional block.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional block, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.

    # Returns
        A Keras model instance.

    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
    """
    """
    Modified ResNet50 feature extractor body
    with specified output stride and skip level feature
    """
    if OS == 8:
        origin_os16_stride = (1, 1)
        origin_os16_block_rate = 2
        origin_os32_stride = (1, 1)
        origin_os32_block_rate = 4
    elif OS == 16:
        origin_os16_stride = (2, 2)
        origin_os16_block_rate = 1
        origin_os32_stride = (1, 1)
        origin_os32_block_rate = 2
    elif OS == 32:
        origin_os16_stride = (2, 2)
        origin_os16_block_rate = 1
        origin_os32_stride = (2, 2)
        origin_os32_block_rate = 1
    else:
        raise ValueError('invalid output stride', OS)

    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top`'
            ' as true, `classes` should be 1000')

    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=224,
                                      min_size=32,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        #if not backend.is_keras_tensor(input_tensor):
        #img_input = Input(tensor=input_tensor, shape=input_shape)
        #else:
        #img_input = input_tensor
        img_input = input_tensor

    if K.image_data_format() == 'channels_last':
        bn_axis = 3
    else:
        bn_axis = 1

    x = ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input)
    x = DeeplabConv2D(64, (7, 7),
                      strides=(2, 2),
                      padding='valid',
                      kernel_initializer='he_normal',
                      name='conv1')(x)
    x = CustomBatchNormalization(axis=bn_axis, name='bn_conv1')(x)
    x = ReLU()(x)
    x = ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
    x = MaxPooling2D((3, 3), strides=(2, 2))(x)
    x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
    x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
    # skip level feature, with output stride = 4
    skip = x

    x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
    x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')

    # original output stride changes to 16 from here, so we start to control block stride and dilation rate
    x = conv_block(x,
                   3, [256, 256, 1024],
                   stage=4,
                   block='a',
                   strides=origin_os16_stride)  # origin: stride=(2, 2)
    x = identity_block(x,
                       3, [256, 256, 1024],
                       stage=4,
                       block='b',
                       rate=origin_os16_block_rate)
    x = identity_block(x,
                       3, [256, 256, 1024],
                       stage=4,
                       block='c',
                       rate=origin_os16_block_rate)
    x = identity_block(x,
                       3, [256, 256, 1024],
                       stage=4,
                       block='d',
                       rate=origin_os16_block_rate)
    x = identity_block(x,
                       3, [256, 256, 1024],
                       stage=4,
                       block='e',
                       rate=origin_os16_block_rate)
    x = identity_block(x,
                       3, [256, 256, 1024],
                       stage=4,
                       block='f',
                       rate=origin_os16_block_rate)

    # original output stride changes to 32 from here
    x = conv_block(x,
                   3, [512, 512, 2048],
                   stage=5,
                   block='a',
                   strides=origin_os32_stride,
                   rate=origin_os16_block_rate)  # origin: stride=(2, 2)
    x = identity_block(x,
                       3, [512, 512, 2048],
                       stage=5,
                       block='b',
                       rate=origin_os32_block_rate)
    x = identity_block(x,
                       3, [512, 512, 2048],
                       stage=5,
                       block='c',
                       rate=origin_os32_block_rate)

    if include_top:
        x = GlobalAveragePooling2D(name='avg_pool')(x)
        x = Dense(classes, activation='softmax', name='fc1000')(x)
    else:
        if pooling == 'avg':
            x = GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D()(x)
        else:
            warnings.warn('The output shape of `ResNet50(include_top=False)` '
                          'has been changed since Keras 2.2.0.')

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input
    # Create model.
    model = Model(inputs, x, name='resnet50')

    # Load weights.
    if weights == 'imagenet':
        if include_top:
            weights_path = get_file(
                'resnet50_weights_tf_dim_ordering_tf_kernels.h5',
                WEIGHTS_PATH,
                cache_subdir='models',
                md5_hash='a7b3fe01876f51b976af0dea6bc144eb')
        else:
            weights_path = get_file(
                'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
                WEIGHTS_PATH_NO_TOP,
                cache_subdir='models',
                md5_hash='a268eb855778b3df3c7506639542a6af')
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    backbone_len = len(model.layers)
    # need to return feature map and skip connection,
    # not the whole "no top" model
    return x, skip, backbone_len
def Deeplabv3pLiteMobileNetV3Large(input_shape=(512, 512, 3),
                                   alpha=1.0,
                                   weights=None,
                                   input_tensor=None,
                                   classes=21,
                                   OS=8,
                                   **kwargs):
    """ Instantiates the Deeplabv3+ MobileNetV3LargeLite architecture
    # Arguments
        input_shape: shape of input image. format HxWxC
            PASCAL VOC model was trained on (512,512,3) images
        alpha: controls the width of the MobileNetV3Large network. This is known as the
            width multiplier in the MobileNetV3Large paper.
                - If `alpha` < 1.0, proportionally decreases the number
                    of filters in each layer.
                - If `alpha` > 1.0, proportionally increases the number
                    of filters in each layer.
                - If `alpha` = 1, default number of filters from the paper
                    are used at each layer.
        weights: one of 'pascal_voc' (pre-trained on pascal voc)
            or None (random initialization)
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        classes: number of desired classes. If classes != 21,
            last layer is initialized randomly
        OS: determines input_shape/feature_extractor_output ratio. One of {8,16}.

    # Returns
        A Keras model instance.
    # Raises
        RuntimeError: If attempting to run this model with a
            backend that does not support separable convolutions.
        ValueError: in case of invalid argument for `weights` or `backbone`
    """

    if not (weights in {'pascal_voc', None}):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization) or `pascal_voc` '
                         '(pre-trained on PASCAL VOC)')

    if input_tensor is None:
        img_input = Input(shape=input_shape, name='image_input')
    else:
        img_input = input_tensor

    # normalize input image
    img_norm = Lambda(normalize, name='input_normalize')(img_input)

    # backbone body for feature extract
    x, _, backbone_len = MobileNetV3Large(include_top=False,
                                          input_tensor=img_norm,
                                          weights='imagenet',
                                          OS=OS,
                                          alpha=1.0)

    # use ASPP Lite block & no decode block
    x = ASPP_Lite_block(x)

    # Final prediction conv block
    x = DeeplabConv2D(classes, (1, 1), padding='same',
                      name='logits_semantic')(x)
    x = Lambda(img_resize,
               arguments={
                   'size': (input_shape[0], input_shape[1]),
                   'mode': 'bilinear'
               },
               name='pred_resize')(x)
    x = Reshape((input_shape[0] * input_shape[1], classes))(x)
    x = Softmax(name='Predictions/Softmax')(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    #if input_tensor is not None:
    #inputs = get_source_inputs(input_tensor)
    #else:
    #inputs = img_input
    model = Model(img_input, x, name='deeplabv3p_mobilenetv3large_lite')

    return model, backbone_len
def MobileNetV3(stack_fn,
                last_point_ch,
                input_shape=None,
                alpha=1.0,
                model_type='large',
                minimalistic=False,
                include_top=True,
                weights='imagenet',
                input_tensor=None,
                classes=1000,
                pooling=None,
                dropout_rate=0.2,
                **kwargs):
    """Instantiates the MobileNetV3 architecture.
    # Arguments
        stack_fn: a function that returns output tensor for the
            stacked residual blocks.
        last_point_ch: number channels at the last layer (before top)
        input_shape: optional shape tuple, to be specified if you would
            like to use a model with an input img resolution that is not
            (224, 224, 3).
            It should have exactly 3 inputs channels (224, 224, 3).
            You can also omit this option if you would like
            to infer input_shape from an input_tensor.
            If you choose to include both input_tensor and input_shape then
            input_shape will be used if they match, if the shapes
            do not match then we will throw an error.
            E.g. `(160, 160, 3)` would be one valid value.
        alpha: controls the width of the network. This is known as the
            depth multiplier in the MobileNetV3 paper, but the name is kept for
            consistency with MobileNetV1 in Keras.
            - If `alpha` < 1.0, proportionally decreases the number
                of filters in each layer.
            - If `alpha` > 1.0, proportionally increases the number
                of filters in each layer.
            - If `alpha` = 1, default number of filters from the paper
                are used at each layer.
        model_type: MobileNetV3 is defined as two models: large and small. These
        models are targeted at high and low resource use cases respectively.
        minimalistic: In addition to large and small models this module also contains
            so-called minimalistic models, these models have the same per-layer
            dimensions characteristic as MobilenetV3 however, they don't utilize any
            of the advanced blocks (squeeze-and-excite units, hard-swish, and 5x5
            convolutions). While these models are less efficient on CPU, they are
            much more performant on GPU/DSP.
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of
            `layers.Input()`)
            to use as image input for the model.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.
        pooling: optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        dropout_rate: fraction of the input units to drop on the last layer
    # Returns
        A Keras model instance.
    # Raises
        ValueError: in case of invalid model type, argument for `weights`,
            or invalid input shape when weights='imagenet'
    """

    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top` '
            'as true, `classes` should be 1000')

    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=224,
                                      min_size=32,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    # If input_shape is None and input_tensor is None using standart shape
    if input_shape is None and input_tensor is None:
        input_shape = (None, None, 3)

    if K.image_data_format() == 'channels_last':
        row_axis, col_axis = (0, 1)
    else:
        row_axis, col_axis = (1, 2)
    rows = input_shape[row_axis]
    cols = input_shape[col_axis]
    if rows and cols and (rows < 32 or cols < 32):
        raise ValueError(
            'Input size must be at least 32x32; got `input_shape=' +
            str(input_shape) + '`')
    if weights == 'imagenet':
        if minimalistic is False and alpha not in [0.75, 1.0] \
                or minimalistic is True and alpha != 1.0:
            raise ValueError(
                'If imagenet weights are being loaded, '
                'alpha can be one of `0.75`, `1.0` for non minimalistic'
                ' or `1.0` for minimalistic only.')

        if rows != cols or rows != 224:
            warnings.warn('`input_shape` is undefined or non-square, '
                          'or `rows` is not 224.'
                          ' Weights for input shape (224, 224) will be'
                          ' loaded as the default.')

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        #if not K.is_keras_tensor(input_tensor):
        #img_input = Input(tensor=input_tensor, shape=input_shape)
        #else:
        #img_input = input_tensor
        img_input = input_tensor

    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    if minimalistic:
        kernel = 3
        activation = relu
        se_ratio = None
    else:
        kernel = 5
        activation = hard_swish
        se_ratio = 0.25

    x = ZeroPadding2D(padding=correct_pad(K, img_input, 3),
                      name='Conv_pad')(img_input)
    x = DeeplabConv2D(16,
                      kernel_size=3,
                      strides=(2, 2),
                      padding='valid',
                      use_bias=False,
                      name='Conv')(x)
    x = CustomBatchNormalization(axis=channel_axis,
                                 epsilon=1e-3,
                                 momentum=0.999,
                                 name='Conv/BatchNorm')(x)
    x = Activation(activation)(x)

    x, skip_feature = stack_fn(x, kernel, activation, se_ratio)
    # keep end of the feature extrator as final feature map
    final_feature = x

    last_conv_ch = _depth(K.int_shape(x)[channel_axis] * 6)

    # if the width multiplier is greater than 1 we
    # increase the number of output channels
    if alpha > 1.0:
        last_point_ch = _depth(last_point_ch * alpha)

    x = DeeplabConv2D(last_conv_ch,
                      kernel_size=1,
                      padding='same',
                      use_bias=False,
                      name='Conv_1')(x)
    x = CustomBatchNormalization(axis=channel_axis,
                                 epsilon=1e-3,
                                 momentum=0.999,
                                 name='Conv_1/BatchNorm')(x)
    x = Activation(activation)(x)

    if include_top:
        x = GlobalAveragePooling2D()(x)
        if channel_axis == 1:
            x = Reshape((last_conv_ch, 1, 1))(x)
        else:
            x = Reshape((1, 1, last_conv_ch))(x)
        x = DeeplabConv2D(last_point_ch,
                          kernel_size=1,
                          padding='same',
                          name='Conv_2')(x)
        x = Activation(activation)(x)
        if dropout_rate > 0:
            x = Dropout(dropout_rate)(x)
        x = DeeplabConv2D(classes,
                          kernel_size=1,
                          padding='same',
                          name='Logits')(x)
        x = Flatten()(x)
        x = Softmax(name='Predictions/Softmax')(x)
    else:
        if pooling == 'avg':
            x = GlobalAveragePooling2D(name='avg_pool')(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D(name='max_pool')(x)
    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = Model(inputs, x, name='MobilenetV3' + model_type)

    # Load weights.
    if weights == 'imagenet':
        model_name = "{}{}_224_{}_float".format(
            model_type, '_minimalistic' if minimalistic else '', str(alpha))
        if include_top:
            file_name = 'weights_mobilenet_v3_' + model_name + '.h5'
            file_hash = WEIGHTS_HASHES[model_name][0]
        else:
            file_name = 'weights_mobilenet_v3_' + model_name + '_no_top.h5'
            file_hash = WEIGHTS_HASHES[model_name][1]
        weights_path = get_file(file_name,
                                BASE_WEIGHT_PATH + file_name,
                                cache_subdir='models',
                                file_hash=file_hash)
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    #return model
    return final_feature, skip_feature, len(model.layers) - 3
def _inverted_res_block(x,
                        expansion,
                        filters,
                        kernel_size,
                        stride,
                        se_ratio,
                        activation,
                        block_id,
                        skip_connection=False,
                        rate=1):
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
    shortcut = x
    prefix = 'expanded_conv/'
    infilters = K.int_shape(x)[channel_axis]
    if block_id:
        # Expand
        prefix = 'expanded_conv_{}/'.format(block_id)
        x = DeeplabConv2D(_depth(infilters * expansion),
                          kernel_size=1,
                          padding='same',
                          use_bias=False,
                          name=prefix + 'expand')(x)
        x = CustomBatchNormalization(axis=channel_axis,
                                     epsilon=1e-3,
                                     momentum=0.999,
                                     name=prefix + 'expand/BatchNorm')(x)
        x = Activation(activation)(x)

    #if stride == 2:
    #x = ZeroPadding2D(padding=correct_pad(K, x, kernel_size),
    #name=prefix + 'depthwise/pad')(x)
    x = DeeplabDepthwiseConv2D(
        kernel_size,
        strides=stride,
        padding='same',  # if stride == 1 else 'valid',
        dilation_rate=(rate, rate),
        use_bias=False,
        name=prefix + 'depthwise/Conv')(x)
    x = CustomBatchNormalization(axis=channel_axis,
                                 epsilon=1e-3,
                                 momentum=0.999,
                                 name=prefix + 'depthwise/BatchNorm')(x)
    x = Activation(activation)(x)

    if se_ratio:
        x = _se_block(x, _depth(infilters * expansion), se_ratio, prefix)

    x = DeeplabConv2D(filters,
                      kernel_size=1,
                      padding='same',
                      use_bias=False,
                      name=prefix + 'project')(x)
    x = CustomBatchNormalization(axis=channel_axis,
                                 epsilon=1e-3,
                                 momentum=0.999,
                                 name=prefix + 'project/BatchNorm')(x)

    #if stride == 1 and infilters == filters:
    #x = Add(name=prefix + 'Add')([shortcut, x])
    if skip_connection:
        x = Add(name=prefix + 'Add')([shortcut, x])
    return x
def MobileNetV2_body(input_tensor, OS, alpha, weights='imagenet'):
    """
    Modified MobileNetV2 feature extractor body
    with specified output stride and skip level feature
    """
    if OS == 8:
        origin_os16_stride = 1
        origin_os16_block_rate = 2
        origin_os32_stride = 1
        origin_os32_block_rate = 4
    elif OS == 16:
        origin_os16_stride = 2
        origin_os16_block_rate = 1
        origin_os32_stride = 1
        origin_os32_block_rate = 2
    elif OS == 32:
        origin_os16_stride = 2
        origin_os16_block_rate = 1
        origin_os32_stride = 2
        origin_os32_block_rate = 1
    else:
        raise ValueError('invalid output stride', OS)

    first_block_filters = _make_divisible(32 * alpha, 8)
    x = DeeplabConv2D(first_block_filters,
                      kernel_size=3,
                      strides=(2, 2),
                      padding='same',
                      use_bias=False,
                      name='Conv')(input_tensor)
    x = CustomBatchNormalization(epsilon=1e-3, momentum=0.999,
                                 name='Conv_BN')(x)
    x = ReLU(6.)(x)

    x = _inverted_res_block(x,
                            filters=16,
                            alpha=alpha,
                            stride=1,
                            expansion=1,
                            block_id=0,
                            skip_connection=False)

    x = _inverted_res_block(x,
                            filters=24,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=1,
                            skip_connection=False)
    x = _inverted_res_block(x,
                            filters=24,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=2,
                            skip_connection=True)
    # skip level feature, with output stride = 4
    skip = x

    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=3,
                            skip_connection=False)
    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=4,
                            skip_connection=True)
    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=5,
                            skip_connection=True)

    # original output stride changes to 16 from here, so we start to control block stride and dilation rate
    x = _inverted_res_block(
        x,
        filters=64,
        alpha=alpha,
        stride=origin_os16_stride,  # origin: stride=2!
        expansion=6,
        block_id=6,
        skip_connection=False)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            rate=origin_os16_block_rate,
                            expansion=6,
                            block_id=7,
                            skip_connection=True)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            rate=origin_os16_block_rate,
                            expansion=6,
                            block_id=8,
                            skip_connection=True)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            rate=origin_os16_block_rate,
                            expansion=6,
                            block_id=9,
                            skip_connection=True)

    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            rate=origin_os16_block_rate,
                            expansion=6,
                            block_id=10,
                            skip_connection=False)
    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            rate=origin_os16_block_rate,
                            expansion=6,
                            block_id=11,
                            skip_connection=True)
    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            rate=origin_os16_block_rate,
                            expansion=6,
                            block_id=12,
                            skip_connection=True)

    # original output stride changes to 32 from here
    x = _inverted_res_block(
        x,
        filters=160,
        alpha=alpha,
        stride=origin_os32_stride,
        rate=origin_os16_block_rate,  # origin: stride=2!
        expansion=6,
        block_id=13,
        skip_connection=False)
    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=1,
                            rate=origin_os32_block_rate,
                            expansion=6,
                            block_id=14,
                            skip_connection=True)
    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=1,
                            rate=origin_os32_block_rate,
                            expansion=6,
                            block_id=15,
                            skip_connection=True)

    x = _inverted_res_block(x,
                            filters=320,
                            alpha=alpha,
                            stride=1,
                            rate=origin_os32_block_rate,
                            expansion=6,
                            block_id=16,
                            skip_connection=False)
    # end of feature extractor

    # expand the model structure to MobileNetV2 no top, so
    # that we can load official imagenet pretrained weights

    # no alpha applied to last conv as stated in the paper:
    # if the width multiplier is greater than 1 we
    # increase the number of output channels
    if alpha > 1.0:
        last_block_filters = _make_divisible(1280 * alpha, 8)
    else:
        last_block_filters = 1280

    y = DeeplabConv2D(last_block_filters,
                      kernel_size=1,
                      use_bias=False,
                      name='Conv_1')(x)
    y = CustomBatchNormalization(epsilon=1e-3,
                                 momentum=0.999,
                                 name='Conv_1_bn')(y)
    y = ReLU(6., name='out_relu')(y)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    #else:
    #inputs = img_input

    # hardcode row=224
    rows = 224

    model = Model(inputs, y, name='mobilenetv2_%0.2f_%s' % (alpha, rows))
    # Load weights.
    if weights == 'imagenet':
        model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' +
                      str(alpha) + '_' + str(rows) + '_no_top' + '.h5')
        weight_path = BACKBONE_WEIGHT_PATH + model_name
        weights_path = get_file(model_name, weight_path, cache_subdir='models')

        model.load_weights(weights_path)

    backbone_len = len(model.layers) - 3
    # need to return feature map and skip connection,
    # not the whole "no top" model
    return x, skip, backbone_len
def Deeplabv3pMobileNetV2(input_shape=(512, 512, 3),
                          alpha=1.0,
                          weights=None,
                          input_tensor=None,
                          classes=21,
                          OS=8,
                          **kwargs):
    """ Instantiates the Deeplabv3+ MobileNetV2 architecture
    # Arguments
        input_shape: shape of input image. format HxWxC
            PASCAL VOC model was trained on (512,512,3) images
        alpha: controls the width of the MobileNetV2 network. This is known as the
            width multiplier in the MobileNetV2 paper.
                - If `alpha` < 1.0, proportionally decreases the number
                    of filters in each layer.
                - If `alpha` > 1.0, proportionally increases the number
                    of filters in each layer.
                - If `alpha` = 1, default number of filters from the paper
                    are used at each layer.
            Used only for mobilenetv2 backbone
        weights: one of 'pascal_voc' (pre-trained on pascal voc)
            or None (random initialization)
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        classes: number of desired classes. If classes != 21,
            last layer is initialized randomly
        OS: determines input_shape/feature_extractor_output ratio. One of {8,16}.
            Used only for xception backbone.

    # Returns
        A Keras model instance.
    """

    if not (weights in {'pascal_voc', None}):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization) or `pascal_voc` '
                         '(pre-trained on PASCAL VOC)')

    if input_tensor is None:
        img_input = Input(shape=input_shape, name='image_input')
    else:
        img_input = input_tensor

    # normalize input image
    img_norm = Lambda(normalize, name='input_normalize')(img_input)

    # backbone body for feature extract
    x, skip_feature, backbone_len = MobileNetV2_body(img_norm, OS, alpha)

    # ASPP block
    x = ASPP_block(x, OS)

    # Deeplabv3+ decoder for feature projection
    x = Decoder_block(x, skip_feature)

    # Final prediction conv block
    x = DeeplabConv2D(classes, (1, 1), padding='same',
                      name='logits_semantic')(x)
    x = Lambda(img_resize,
               arguments={
                   'size': (input_shape[0], input_shape[1]),
                   'mode': 'bilinear'
               },
               name='pred_resize')(x)
    x = Reshape((input_shape[0] * input_shape[1], classes))(x)
    x = Softmax(name='Predictions/Softmax')(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    #if input_tensor is not None:
    #inputs = get_source_inputs(input_tensor)
    #else:
    #inputs = img_input

    model = Model(img_input, x, name='deeplabv3p_mobilenetv2')

    # load weights
    #if weights == 'pascal_voc':
    #weights_path = get_file('deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels.h5',
    #WEIGHTS_PATH_MOBILE,
    #cache_subdir='models')
    #model.load_weights(weights_path, by_name=True)
    return model, backbone_len
def Deeplabv3pXception(input_shape=(512, 512, 3),
                       weights='pascal_voc',
                       input_tensor=None,
                       classes=21,
                       OS=16,
                       **kwargs):
    """ Instantiates the Deeplabv3+ architecture
    Optionally loads weights pre-trained
    on PASCAL VOC. This model is available for TensorFlow only,
    and can only be used with inputs following the TensorFlow
    data format `(width, height, channels)`.
    # Arguments
        input_shape: shape of input image. format HxWxC
            PASCAL VOC model was trained on (512,512,3) images
        weights: one of 'pascal_voc' (pre-trained on pascal voc)
            or None (random initialization)
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        classes: number of desired classes. If classes != 21,
            last layer is initialized randomly
        OS: determines input_shape/feature_extractor_output ratio. One of {8,16}.
            Used only for xception backbone.
    # Returns
        A Keras model instance.
    # Raises
        RuntimeError: If attempting to run this model with a
            backend that does not support separable convolutions.
        ValueError: in case of invalid argument for `weights` or `backbone`
    """

    if not (weights in {'pascal_voc', None}):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization) or `pascal_voc` '
                         '(pre-trained on PASCAL VOC)')

    if input_tensor is None:
        img_input = Input(shape=input_shape, name='image_input')
    else:
        img_input = input_tensor

    # normalize input image
    img_norm = Lambda(normalize, name='input_normalize')(img_input)

    # backbone body for feature extract
    x, skip_feature, backbone_len = Xception_body(img_norm, OS)

    # ASPP block
    x = ASPP_block(x, OS)

    # Deeplabv3+ decoder for feature projection
    x = Decoder_block(x, skip_feature)

    # Final prediction conv block
    x = DeeplabConv2D(classes, (1, 1), padding='same',
                      name='logits_semantic')(x)
    x = Lambda(img_resize,
               arguments={
                   'size': (input_shape[0], input_shape[1]),
                   'mode': 'bilinear'
               },
               name='pred_resize')(x)
    x = Reshape((input_shape[0] * input_shape[1], classes))(x)
    x = Softmax(name='Predictions/Softmax')(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    #if input_tensor is not None:
    #inputs = get_source_inputs(input_tensor)
    #else:
    #inputs = img_input

    model = Model(img_input, x, name='deeplabv3p_xception')

    # load weights
    if weights == 'pascal_voc':
        weights_path = get_file(
            'deeplabv3_xception_tf_dim_ordering_tf_kernels.h5',
            WEIGHTS_PATH_X,
            cache_subdir='models')
        model.load_weights(weights_path, by_name=True)
    return model, backbone_len
def Xception_body(input_tensor, OS):
    """
    Modified Alighed Xception feature extractor body
    with specified output stride and skip level feature
    """
    if OS == 8:
        origin_os16_stride = 1
        origin_os16_block_rate = 2
        origin_os32_stride = 1
        origin_os32_block_rate = 4
    elif OS == 16:
        origin_os16_stride = 2
        origin_os16_block_rate = 1
        origin_os32_stride = 1
        origin_os32_block_rate = 2
    elif OS == 32:
        origin_os16_stride = 2
        origin_os16_block_rate = 1
        origin_os32_stride = 2
        origin_os32_block_rate = 1
    else:
        raise ValueError('invalid output stride', OS)

    x = DeeplabConv2D(32, (3, 3),
                      strides=(2, 2),
                      name='entry_flow_conv1_1',
                      use_bias=False,
                      padding='same')(input_tensor)

    x = CustomBatchNormalization(name='entry_flow_conv1_1_BN')(x)
    x = ReLU()(x)

    x = _conv2d_same(x, 64, 'entry_flow_conv1_2', kernel_size=3, stride=1)
    x = CustomBatchNormalization(name='entry_flow_conv1_2_BN')(x)
    x = ReLU()(x)

    x = _xception_block(x, [128, 128, 128],
                        'entry_flow_block1',
                        skip_connection_type='conv',
                        stride=2,
                        depth_activation=False)
    # skip level feature, with output stride = 4
    x, skip = _xception_block(x, [256, 256, 256],
                              'entry_flow_block2',
                              skip_connection_type='conv',
                              stride=2,
                              depth_activation=False,
                              return_skip=True)

    # original output stride changes to 16 from here, so we start to control block stride and dilation rate
    x = _xception_block(x, [728, 728, 728],
                        'entry_flow_block3',
                        skip_connection_type='conv',
                        stride=origin_os16_stride,
                        depth_activation=False)
    for i in range(16):
        x = _xception_block(x, [728, 728, 728],
                            'middle_flow_unit_{}'.format(i + 1),
                            skip_connection_type='sum',
                            stride=1,
                            rate=origin_os16_block_rate,
                            depth_activation=False)

    # original output stride changes to 32 from here
    x = _xception_block(x, [728, 1024, 1024],
                        'exit_flow_block1',
                        skip_connection_type='conv',
                        stride=origin_os32_stride,
                        rate=origin_os16_block_rate,
                        depth_activation=False)
    x = _xception_block(x, [1536, 1536, 2048],
                        'exit_flow_block2',
                        skip_connection_type='none',
                        stride=1,
                        rate=origin_os32_block_rate,
                        depth_activation=True)
    # end of feature extractor

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    #else:
    #inputs = img_input

    backbone_len = len(Model(inputs, x).layers)
    return x, skip, backbone_len