Esempio n. 1
0
def _se_block(inputs, filters, se_ratio, prefix):
    x = GlobalAveragePooling2D(name=prefix + 'squeeze_excite/AvgPool')(inputs)
    if K.image_data_format() == 'channels_first':
        x = Reshape((filters, 1, 1))(x)
    else:
        x = Reshape((1, 1, filters))(x)
    x = YoloConv2D(_depth(filters * se_ratio),
                   kernel_size=1,
                   padding='same',
                   name=prefix + 'squeeze_excite/Conv')(x)
    x = ReLU(name=prefix + 'squeeze_excite/Relu')(x)
    x = YoloConv2D(filters,
                   kernel_size=1,
                   padding='same',
                   name=prefix + 'squeeze_excite/Conv_1')(x)
    x = Activation(hard_sigmoid)(x)
    #if K.backend() == 'theano':
    ## For the Theano backend, we have to explicitly make
    ## the excitation weights broadcastable.
    #x = Lambda(
    #lambda br: K.pattern_broadcast(br, [True, True, True, False]),
    #output_shape=lambda input_shape: input_shape,
    #name=prefix + 'squeeze_excite/broadcast')(x)
    x = Multiply(name=prefix + 'squeeze_excite/Mul')([inputs, x])
    return x
Esempio n. 2
0
def _group_conv(x,
                in_channels,
                out_channels,
                groups,
                kernel=1,
                stride=1,
                name=''):
    """
    grouped convolution
    Parameters
    ----------
    x:
        Input tensor of with `channels_last` data format
    in_channels:
        number of input channels
    out_channels:
        number of output channels
    groups:
        number of groups per channel
    kernel: int(1)
        An integer or tuple/list of 2 integers, specifying the
        width and height of the 2D convolution window.
        Can be a single integer to specify the same value for
        all spatial dimensions.
    stride: int(1)
        An integer or tuple/list of 2 integers,
        specifying the strides of the convolution along the width and height.
        Can be a single integer to specify the same value for all spatial dimensions.
    name: str
        A string to specifies the layer name
    Returns
    -------
    """
    if groups == 1:
        return YoloConv2D(filters=out_channels,
                          kernel_size=kernel,
                          padding='same',
                          use_bias=False,
                          strides=stride,
                          name=name)(x)

    # number of intput channels per group
    ig = in_channels // groups
    group_list = []

    assert out_channels % groups == 0

    for i in range(groups):
        offset = i * ig
        group = Lambda(lambda z: z[:, :, :, offset:offset + ig],
                       name='%s/g%d_slice' % (name, i))(x)
        group_list.append(
            YoloConv2D(int(0.5 + out_channels / groups),
                       kernel_size=kernel,
                       strides=stride,
                       use_bias=False,
                       padding='same',
                       name='%s_/g%d' % (name, i))(group))
    return Concatenate(name='%s/concat' % name)(group_list)
Esempio n. 3
0
def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id):
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    in_channels = K.int_shape(inputs)[channel_axis]
    pointwise_conv_filters = int(filters * alpha)
    pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
    x = inputs
    prefix = 'block_{}_'.format(block_id)

    if block_id:
        # Expand
        x = YoloConv2D(expansion * in_channels,
                       kernel_size=1,
                       padding='same',
                       use_bias=False,
                       activation=None,
                       name=prefix + 'expand')(x)
        x = CustomBatchNormalization(axis=channel_axis,
                                     epsilon=1e-3,
                                     momentum=0.999,
                                     name=prefix + 'expand_BN')(x)
        x = ReLU(6., name=prefix + 'expand_relu')(x)
    else:
        prefix = 'expanded_conv_'

    # Depthwise
    if stride == 2:
        x = ZeroPadding2D(padding=correct_pad(K, x, 3), name=prefix + 'pad')(x)
    x = YoloDepthwiseConv2D(kernel_size=3,
                            strides=stride,
                            activation=None,
                            use_bias=False,
                            padding='same' if stride == 1 else 'valid',
                            name=prefix + 'depthwise')(x)
    x = CustomBatchNormalization(axis=channel_axis,
                                 epsilon=1e-3,
                                 momentum=0.999,
                                 name=prefix + 'depthwise_BN')(x)

    x = ReLU(6., name=prefix + 'depthwise_relu')(x)

    # Project
    x = YoloConv2D(pointwise_filters,
                   kernel_size=1,
                   padding='same',
                   use_bias=False,
                   activation=None,
                   name=prefix + 'project')(x)
    x = CustomBatchNormalization(axis=channel_axis,
                                 epsilon=1e-3,
                                 momentum=0.999,
                                 name=prefix + 'project_BN')(x)

    if in_channels == pointwise_filters and stride == 1:
        return Add(name=prefix + 'add')([inputs, x])
    return x
Esempio n. 4
0
def _inverted_res_block(x, expansion, filters, kernel_size, stride, se_ratio,
                        activation, block_id):
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
    shortcut = x
    prefix = 'expanded_conv/'
    infilters = K.int_shape(x)[channel_axis]
    if block_id:
        # Expand
        prefix = 'expanded_conv_{}/'.format(block_id)
        x = YoloConv2D(_depth(infilters * expansion),
                       kernel_size=1,
                       padding='same',
                       use_bias=False,
                       name=prefix + 'expand')(x)
        x = CustomBatchNormalization(axis=channel_axis,
                                     epsilon=1e-3,
                                     momentum=0.999,
                                     name=prefix + 'expand/BatchNorm')(x)
        x = Activation(activation)(x)

    if stride == 2:
        x = ZeroPadding2D(padding=correct_pad(K, x, kernel_size),
                          name=prefix + 'depthwise/pad')(x)
    x = YoloDepthwiseConv2D(kernel_size,
                            strides=stride,
                            padding='same' if stride == 1 else 'valid',
                            use_bias=False,
                            name=prefix + 'depthwise/Conv')(x)
    x = CustomBatchNormalization(axis=channel_axis,
                                 epsilon=1e-3,
                                 momentum=0.999,
                                 name=prefix + 'depthwise/BatchNorm')(x)
    x = Activation(activation)(x)

    if se_ratio:
        x = _se_block(x, _depth(infilters * expansion), se_ratio, prefix)

    x = YoloConv2D(filters,
                   kernel_size=1,
                   padding='same',
                   use_bias=False,
                   name=prefix + 'project')(x)
    x = CustomBatchNormalization(axis=channel_axis,
                                 epsilon=1e-3,
                                 momentum=0.999,
                                 name=prefix + 'project/BatchNorm')(x)

    if stride == 1 and infilters == filters:
        x = Add(name=prefix + 'Add')([shortcut, x])
    return x
Esempio n. 5
0
def _ep_block(inputs, filters, stride, expansion, block_id):
    #in_channels = backend.int_shape(inputs)[-1]
    in_channels = inputs.shape.as_list()[-1]

    pointwise_conv_filters = int(filters)
    x = inputs
    prefix = 'ep_block_{}_'.format(block_id)

    # Expand
    x = YoloConv2D(int(expansion * in_channels),
                   kernel_size=1,
                   padding='same',
                   use_bias=False,
                   activation=None,
                   name=prefix + 'expand')(x)
    x = CustomBatchNormalization(epsilon=1e-3,
                                 momentum=0.999,
                                 name=prefix + 'expand_BN')(x)
    x = ReLU(6., name=prefix + 'expand_relu')(x)

    # Depthwise
    if stride == 2:
        x = ZeroPadding2D(padding=correct_pad(K, x, 3), name=prefix + 'pad')(x)

    x = YoloDepthwiseConv2D(kernel_size=3,
                            strides=stride,
                            activation=None,
                            use_bias=False,
                            padding='same' if stride == 1 else 'valid',
                            name=prefix + 'depthwise')(x)
    x = CustomBatchNormalization(epsilon=1e-3,
                                 momentum=0.999,
                                 name=prefix + 'depthwise_BN')(x)
    x = ReLU(6., name=prefix + 'depthwise_relu')(x)

    # Project
    x = YoloConv2D(pointwise_conv_filters,
                   kernel_size=1,
                   padding='same',
                   use_bias=False,
                   activation=None,
                   name=prefix + 'project')(x)
    x = CustomBatchNormalization(epsilon=1e-3,
                                 momentum=0.999,
                                 name=prefix + 'project_BN')(x)

    if in_channels == pointwise_conv_filters and stride == 1:
        return Add(name=prefix + 'add')([inputs, x])
    return x
def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)):
    """Adds an initial convolution layer (with batch normalization and relu6).

    # Arguments
        inputs: Input tensor of shape `(rows, cols, 3)`
            (with `channels_last` data format) or
            (3, rows, cols) (with `channels_first` data format).
            It should have exactly 3 inputs channels,
            and width and height should be no smaller than 32.
            E.g. `(224, 224, 3)` would be one valid value.
        filters: Integer, the dimensionality of the output space
            (i.e. the number of output filters in the convolution).
        alpha: controls the width of the network.
            - If `alpha` < 1.0, proportionally decreases the number
                of filters in each layer.
            - If `alpha` > 1.0, proportionally increases the number
                of filters in each layer.
            - If `alpha` = 1, default number of filters from the paper
                 are used at each layer.
        kernel: An integer or tuple/list of 2 integers, specifying the
            width and height of the 2D convolution window.
            Can be a single integer to specify the same value for
            all spatial dimensions.
        strides: An integer or tuple/list of 2 integers,
            specifying the strides of the convolution
            along the width and height.
            Can be a single integer to specify the same value for
            all spatial dimensions.
            Specifying any stride value != 1 is incompatible with specifying
            any `dilation_rate` value != 1.

    # Input shape
        4D tensor with shape:
        `(samples, channels, rows, cols)` if data_format='channels_first'
        or 4D tensor with shape:
        `(samples, rows, cols, channels)` if data_format='channels_last'.

    # Output shape
        4D tensor with shape:
        `(samples, filters, new_rows, new_cols)`
        if data_format='channels_first'
        or 4D tensor with shape:
        `(samples, new_rows, new_cols, filters)`
        if data_format='channels_last'.
        `rows` and `cols` values might have changed due to stride.

    # Returns
        Output tensor of block.
    """
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
    filters = int(filters * alpha)
    x = ZeroPadding2D(padding=((0, 1), (0, 1)), name='conv1_pad')(inputs)
    x = YoloConv2D(filters,
                   kernel,
                   padding='valid',
                   use_bias=False,
                   strides=strides,
                   name='conv1')(x)
    x = CustomBatchNormalization(axis=channel_axis, name='conv1_bn')(x)
    return ReLU(6., name='conv1_relu')(x)
def DarknetConv2D(*args, **kwargs):
    """Wrapper to set Darknet parameters for YoloConv2D."""
    #darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
    #darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
    darknet_conv_kwargs = {'padding': 'valid' if kwargs.get('strides')==(2,2) else 'same'}
    darknet_conv_kwargs.update(kwargs)
    return YoloConv2D(*args, **darknet_conv_kwargs)
Esempio n. 8
0
def SqueezeExcite(input_x, se_ratio=0.25, reduced_base_chs=None, divisor=4, name=None):
    reduce_chs =_make_divisible((reduced_base_chs or int(input_x.shape[-1]))*se_ratio, divisor)

    x = GlobalAveragePooling2D(name=name+'_avg_pool2d')(input_x)
    if K.image_data_format() == 'channels_first':
        x = Reshape((int(input_x.shape[-1]), 1, 1))(x)
    else:
        x = Reshape((1, 1, int(input_x.shape[-1])))(x)

    x = YoloConv2D(filters=reduce_chs, kernel_size=1, use_bias=True, name=name+'_conv_reduce')(x)
    x = ReLU(name=name+'_act')(x)
    x = YoloConv2D(filters=int(input_x.shape[-1]), kernel_size=1, use_bias=True, name=name+'_conv_expand')(x)

    x = Activation(hard_sigmoid, name=name+'_hard_sigmoid')(x)
    x = Multiply()([input_x, x])

    return x
def basic_conv2d_graph(x, out_channels, kernel_size, strides, padding, activation=True, name=''):
    x = YoloConv2D(
        out_channels, kernel_size=kernel_size, strides=strides,
        padding=padding, use_bias=False, name=name + '_conv')(x)
    x = CustomBatchNormalization(name=name + '_norm')(x)
    if activation:
        x = ReLU()(x)

    return x
Esempio n. 10
0
def ConvBnAct(input_x, out_chs, kernel_size, stride=(1,1), name=None):
    x = YoloConv2D(filters=out_chs,
               kernel_size=kernel_size,
               strides=stride,
               padding='valid',
               use_bias=False,
               name=name+'_conv')(input_x)
    x = CustomBatchNormalization(name=name+'_bn1')(x)
    x = ReLU(name=name+'_relu')(x)
    return x
Esempio n. 11
0
def primary_conv(x, output_filters, kernel_size, strides=(1,1), padding='same', act=True, use_bias=False, name=None):
    x = YoloConv2D(filters=output_filters,
               kernel_size=kernel_size,
               strides=strides,
               padding=padding,
               use_bias=use_bias,
               name=name + '_0')(x)
    x = CustomBatchNormalization(name=name+'_1')(x)
    x = ReLU(name=name+'_relu')(x) if act else x
    return x
def Depthwise_Separable_Conv2D_BN_Leaky(filters, kernel_size=(3, 3), block_id_str=None):
    """Depthwise Separable Convolution2D."""
    if not block_id_str:
        block_id_str = str(K.get_uid())
    return compose(
        YoloDepthwiseConv2D(kernel_size, padding='same', name='conv_dw_' + block_id_str),
        CustomBatchNormalization(name='conv_dw_%s_bn' % block_id_str),
        LeakyReLU(alpha=0.1, name='conv_dw_%s_leaky_relu' % block_id_str),
        YoloConv2D(filters, (1,1), padding='same', use_bias=False, strides=(1, 1), name='conv_pw_%s' % block_id_str),
        CustomBatchNormalization(name='conv_pw_%s_bn' % block_id_str),
        LeakyReLU(alpha=0.1, name='conv_pw_%s_leaky_relu' % block_id_str))
def tiny_yolo3_vgg16_body(inputs, num_anchors, num_classes):
    '''Create Tiny YOLO_v3 VGG16 model CNN body in keras.'''
    vgg16 = VGG16(input_tensor=inputs, weights='imagenet', include_top=False)
    x = vgg16.get_layer('block5_pool').output
    x = YoloConv2D(512, (3, 3),
                   activation='relu',
                   padding='same',
                   name='block6_conv1')(x)
    x = YoloConv2D(512, (3, 3),
                   activation='relu',
                   padding='same',
                   name='block6_conv2')(x)
    x = YoloConv2D(512, (3, 3),
                   activation='relu',
                   padding='same',
                   name='block6_conv3')(x)
    #x = YoloConv2D(512, (3, 3), activation='relu', padding='same', name='block6_conv4')(x)

    # input: 416 x 416 x 3
    # block6_conv3 :13 x 13 x 512
    # block5_conv3 :26 x 26 x 512
    # block4_conv3 : 52 x 52 x 512

    x1 = vgg16.get_layer('block5_conv3').output
    x2 = x

    x2 = DarknetConv2D_BN_Leaky(512, (1, 1))(x2)

    y1 = compose(
        DarknetConv2D_BN_Leaky(1024, (3, 3)),
        #Depthwise_Separable_Conv2D_BN_Leaky(filters=1024, kernel_size=(3, 3), block_id_str='14'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2)

    x2 = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(x2)
    y2 = compose(
        Concatenate(),
        DarknetConv2D_BN_Leaky(512, (3, 3)),
        #Depthwise_Separable_Conv2D_BN_Leaky(filters=512, kernel_size=(3, 3), block_id_str='15'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, x1])

    return Model(inputs, [y1, y2])
Esempio n. 14
0
def Darknet_Depthwise_Separable_Conv2D_BN_Swish(filters, kernel_size=(3, 3), block_id_str=None, **kwargs):
    """Depthwise Separable Convolution2D."""
    if not block_id_str:
        block_id_str = str(K.get_uid())
    no_bias_kwargs = {'use_bias': False}
    no_bias_kwargs.update(kwargs)
    return compose(
        DarknetDepthwiseConv2D(kernel_size, name='conv_dw_' + block_id_str, **no_bias_kwargs),
        CustomBatchNormalization(name='conv_dw_%s_bn' % block_id_str),
        Activation(swish, name='conv_dw_%s_swish' % block_id_str),
        YoloConv2D(filters, (1,1), padding='same', use_bias=False, strides=(1, 1), name='conv_pw_%s' % block_id_str),
        CustomBatchNormalization(name='conv_pw_%s_bn' % block_id_str),
        Activation(swish, name='conv_pw_%s_swish' % block_id_str))
Esempio n. 15
0
def GhostBottleneck(input_x, mid_chs, out_chs, dw_kernel_size=3, stride=(1,1), se_ratio=0., name=None):
    '''ghostnet bottleneck w/optional se'''
    has_se = se_ratio is not None and se_ratio > 0.

    #1st ghost bottleneck
    x = GhostModule(input_x, mid_chs, act=True, name=name+'_ghost1')

    #depth_with convolution
    if stride[0] > 1:
        x = YoloDepthwiseConv2D(kernel_size=dw_kernel_size,
                            strides=stride,
                            padding='same',
                            use_bias=False,
                            name=name+'_conv_dw')(x)
        x = CustomBatchNormalization(name=name+'_bn_dw')(x)

    #Squeeze_and_excitation
    if has_se:
        x = SqueezeExcite(x, se_ratio=se_ratio, name=name+'_se')

    #2nd ghost bottleneck
    x = GhostModule(x, out_chs, act=False, name=name+'_ghost2')

    #short cut
    if (input_x.shape[-1] == out_chs and stride[0] == 1):
        sc = input_x
    else:
        name1 = name + '_shortcut'
        sc = YoloDepthwiseConv2D(kernel_size=dw_kernel_size,
                             strides=stride,
                             padding='same',
                             use_bias=False,
                             name=name1+'_0')(input_x)
        sc = CustomBatchNormalization(name=name1+'_1')(sc)
        sc = YoloConv2D(filters=out_chs,
                    kernel_size=1,
                    strides=(1,1),
                    padding='valid',
                    use_bias=False,
                    name=name1+'_2')(sc)
        sc = CustomBatchNormalization(name=name1+'_3')(sc)

    x = Add(name=name+'_add')([x, sc])
    return x
Esempio n. 16
0
def ShuffleNetV2(input_shape=None,
                 include_top=True,
                 weights='imagenet',
                 input_tensor=None,
                 scale_factor=1.0,
                 pooling=None,
                 num_shuffle_units=[3, 7, 3],
                 bottleneck_ratio=1,
                 classes=1000,
                 **kwargs):
    """Instantiates the ShuffleNetV2 architecture.

    # Arguments
        input_shape: optional shape tuple, to be specified if you would
            like to use a model with an input img resolution that is not
            (224, 224, 3).
            It should have exactly 3 inputs channels (224, 224, 3).
            You can also omit this option if you would like
            to infer input_shape from an input_tensor.
            If you choose to include both input_tensor and input_shape then
            input_shape will be used if they match, if the shapes
            do not match then we will throw an error.
            E.g. `(160, 160, 3)` would be one valid value.
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of
            `layers.Input()`)
            to use as image input for the model.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional block.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional block, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.

    # Returns
        A Keras model instance.

    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape or invalid alpha, rows when
            weights='imagenet'
    """
    if K.backend() != 'tensorflow':
        raise RuntimeError('Only tensorflow supported for now')

    name = 'ShuffleNetV2_{}_{}_{}'.format(
        scale_factor, bottleneck_ratio,
        "".join([str(x) for x in num_shuffle_units]))

    input_shape = _obtain_input_shape(input_shape,
                                      default_size=224,
                                      min_size=28,
                                      require_flatten=include_top,
                                      data_format=K.image_data_format())
    out_dim_stage_two = {0.5: 48, 1: 116, 1.5: 176, 2: 244}

    if pooling not in ['max', 'avg', None]:
        raise ValueError('Invalid value for pooling')
    if not (float(scale_factor) * 4).is_integer():
        raise ValueError('Invalid value for scale_factor, should be x over 4')
    exp = np.insert(np.arange(len(num_shuffle_units), dtype=np.float32), 0,
                    0)  # [0., 0., 1., 2.]
    out_channels_in_stage = 2**exp
    out_channels_in_stage *= out_dim_stage_two[
        bottleneck_ratio]  #  calculate output channels for each stage
    out_channels_in_stage[0] = 24  # first stage has always 24 output channels
    out_channels_in_stage *= scale_factor
    out_channels_in_stage = out_channels_in_stage.astype(int)

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        #if not K.is_keras_tensor(input_tensor):
        #img_input = Input(tensor=input_tensor, shape=input_shape)
        #else:
        #img_input = input_tensor
        img_input = input_tensor

    # create shufflenet architecture
    x = YoloConv2D(filters=out_channels_in_stage[0],
                   kernel_size=(3, 3),
                   padding='same',
                   use_bias=False,
                   strides=(2, 2),
                   activation='relu',
                   name='conv1')(img_input)
    x = MaxPool2D(pool_size=(3, 3),
                  strides=(2, 2),
                  padding='same',
                  name='maxpool1')(x)

    # create stages containing shufflenet units beginning at stage 2
    for stage in range(len(num_shuffle_units)):
        repeat = num_shuffle_units[stage]
        x = block(x,
                  out_channels_in_stage,
                  repeat=repeat,
                  bottleneck_ratio=bottleneck_ratio,
                  stage=stage + 2)

    if bottleneck_ratio < 2:
        k = 1024
    else:
        k = 2048
    x = YoloConv2D(k,
                   kernel_size=1,
                   padding='same',
                   strides=1,
                   name='1x1conv5_out',
                   activation='relu')(x)

    if include_top:
        x = GlobalAveragePooling2D(name='global_avg_pool')(x)
        x = Dense(classes, activation='softmax', use_bias=True,
                  name='Logits')(x)
    else:
        if pooling == 'avg':
            x = GlobalAveragePooling2D(name='global_avg_pool')(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D(name='global_max_pool')(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)

    else:
        inputs = img_input

    # Create model.
    model = Model(inputs, x, name=name)

    # Load weights.
    if weights == 'imagenet':
        if K.image_data_format() == 'channels_first':
            raise ValueError('Weights for "channels_first" format '
                             'are not available.')

        if include_top:
            model_name = ('shufflenet_v2_weights_tf_dim_ordering_tf_kernels_' +
                          str(alpha) + '_' + str(rows) + '.h5')
            weigh_path = BASE_WEIGHT_PATH + model_name
            weights_path = get_file(model_name,
                                    weigh_path,
                                    cache_subdir='models')
        else:
            model_name = ('shufflenet_v2_weights_tf_dim_ordering_tf_kernels_' +
                          str(alpha) + '_' + str(rows) + '_no_top' + '.h5')
            weigh_path = BASE_WEIGHT_PATH + model_name
            weights_path = get_file(model_name,
                                    weigh_path,
                                    cache_subdir='models')
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
def block(inputs,
          activation_fn=swish,
          drop_rate=0.,
          name='',
          filters_in=32,
          filters_out=16,
          kernel_size=3,
          strides=1,
          expand_ratio=1,
          se_ratio=0.,
          id_skip=True):
    """A mobile inverted residual block.
    # Arguments
        inputs: input tensor.
        activation_fn: activation function.
        drop_rate: float between 0 and 1, fraction of the input units to drop.
        name: string, block label.
        filters_in: integer, the number of input filters.
        filters_out: integer, the number of output filters.
        kernel_size: integer, the dimension of the convolution window.
        strides: integer, the stride of the convolution.
        expand_ratio: integer, scaling coefficient for the input filters.
        se_ratio: float between 0 and 1, fraction to squeeze the input filters.
        id_skip: boolean.
    # Returns
        output tensor for the block.
    """
    bn_axis = 3 if K.image_data_format() == 'channels_last' else 1

    # Expansion phase
    filters = filters_in * expand_ratio
    if expand_ratio != 1:
        x = YoloConv2D(filters,
                       1,
                       padding='same',
                       use_bias=False,
                       kernel_initializer=CONV_KERNEL_INITIALIZER,
                       name=name + 'expand_conv')(inputs)
        x = CustomBatchNormalization(axis=bn_axis, name=name + 'expand_bn')(x)
        x = Activation(activation_fn, name=name + 'expand_activation')(x)
    else:
        x = inputs

    # Depthwise Convolution
    if strides == 2:
        x = ZeroPadding2D(padding=correct_pad(K, x, kernel_size),
                          name=name + 'dwconv_pad')(x)
        conv_pad = 'valid'
    else:
        conv_pad = 'same'
    x = YoloDepthwiseConv2D(kernel_size,
                            strides=strides,
                            padding=conv_pad,
                            use_bias=False,
                            depthwise_initializer=CONV_KERNEL_INITIALIZER,
                            name=name + 'dwconv')(x)
    x = CustomBatchNormalization(axis=bn_axis, name=name + 'bn')(x)
    x = Activation(activation_fn, name=name + 'activation')(x)

    # Squeeze and Excitation phase
    if 0 < se_ratio <= 1:
        filters_se = max(1, int(filters_in * se_ratio))
        se = GlobalAveragePooling2D(name=name + 'se_squeeze')(x)
        se = Reshape((1, 1, filters), name=name + 'se_reshape')(se)
        se = YoloConv2D(filters_se,
                        1,
                        padding='same',
                        activation=activation_fn,
                        kernel_initializer=CONV_KERNEL_INITIALIZER,
                        name=name + 'se_reduce')(se)
        se = YoloConv2D(filters,
                        1,
                        padding='same',
                        activation='sigmoid',
                        kernel_initializer=CONV_KERNEL_INITIALIZER,
                        name=name + 'se_expand')(se)
        if K.backend() == 'theano':
            # For the Theano backend, we have to explicitly make
            # the excitation weights broadcastable.
            se = Lambda(
                lambda x: K.pattern_broadcast(x, [True, True, True, False]),
                output_shape=lambda input_shape: input_shape,
                name=name + 'se_broadcast')(se)
        x = multiply([x, se], name=name + 'se_excite')

    # Output phase
    x = YoloConv2D(filters_out,
                   1,
                   padding='same',
                   use_bias=False,
                   kernel_initializer=CONV_KERNEL_INITIALIZER,
                   name=name + 'project_conv')(x)
    x = CustomBatchNormalization(axis=bn_axis, name=name + 'project_bn')(x)
    if (id_skip is True and strides == 1 and filters_in == filters_out):
        if drop_rate > 0:
            if tf2.enabled():
                x = Dropout(drop_rate,
                            noise_shape=(None, 1, 1, 1),
                            name=name + 'drop')(x)
            else:
                x = Dropout(
                    drop_rate,
                    #noise_shape=(None, 1, 1, 1),
                    name=name + 'drop')(x)
        x = add([x, inputs], name=name + 'add')

    return x
def EfficientNet(width_coefficient,
                 depth_coefficient,
                 default_size,
                 dropout_rate=0.2,
                 drop_connect_rate=0.2,
                 depth_divisor=8,
                 activation_fn=swish,
                 blocks_args=DEFAULT_BLOCKS_ARGS,
                 model_name='efficientnet',
                 include_top=True,
                 weights='imagenet',
                 input_tensor=None,
                 input_shape=None,
                 pooling=None,
                 classes=1000,
                 **kwargs):
    """Instantiates the EfficientNet architecture using given scaling coefficients.
    Optionally loads weights pre-trained on ImageNet.
    Note that the data format convention used by the model is
    the one specified in your Keras config at `~/.keras/keras.json`.
    # Arguments
        width_coefficient: float, scaling coefficient for network width.
        depth_coefficient: float, scaling coefficient for network depth.
        default_size: integer, default input image size.
        dropout_rate: float, dropout rate before final classifier layer.
        drop_connect_rate: float, dropout rate at skip connections.
        depth_divisor: integer, a unit of network width.
        activation_fn: activation function.
        blocks_args: list of dicts, parameters to construct block modules.
        model_name: string, model name.
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor
            (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False.
            It should have exactly 3 inputs channels.
        pooling: optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.
    # Returns
        A Keras model instance.
    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
    """
    #global backend, layers, models, keras_utils
    #backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)

    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top`'
            ' as true, `classes` should be 1000')

    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=default_size,
                                      min_size=32,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        #if not K.is_keras_tensor(input_tensor):
        #img_input = Input(tensor=input_tensor, shape=input_shape)
        #else:
        #img_input = input_tensor
        img_input = input_tensor

    bn_axis = 3 if K.image_data_format() == 'channels_last' else 1

    def round_filters(filters, divisor=depth_divisor):
        """Round number of filters based on depth multiplier."""
        filters *= width_coefficient
        new_filters = max(divisor,
                          int(filters + divisor / 2) // divisor * divisor)
        # Make sure that round down does not go down by more than 10%.
        if new_filters < 0.9 * filters:
            new_filters += divisor
        return int(new_filters)

    def round_repeats(repeats):
        """Round number of repeats based on depth multiplier."""
        return int(math.ceil(depth_coefficient * repeats))

    # Build stem
    x = img_input
    x = ZeroPadding2D(padding=correct_pad(K, x, 3), name='stem_conv_pad')(x)
    x = YoloConv2D(round_filters(32),
                   3,
                   strides=2,
                   padding='valid',
                   use_bias=False,
                   kernel_initializer=CONV_KERNEL_INITIALIZER,
                   name='stem_conv')(x)
    x = CustomBatchNormalization(axis=bn_axis, name='stem_bn')(x)
    x = Activation(activation_fn, name='stem_activation')(x)

    # Build blocks
    from copy import deepcopy
    blocks_args = deepcopy(blocks_args)

    b = 0
    blocks = float(sum(args['repeats'] for args in blocks_args))
    for (i, args) in enumerate(blocks_args):
        assert args['repeats'] > 0
        # Update block input and output filters based on depth multiplier.
        args['filters_in'] = round_filters(args['filters_in'])
        args['filters_out'] = round_filters(args['filters_out'])

        for j in range(round_repeats(args.pop('repeats'))):
            # The first block needs to take care of stride and filter size increase.
            if j > 0:
                args['strides'] = 1
                args['filters_in'] = args['filters_out']
            x = block(x,
                      activation_fn,
                      drop_connect_rate * b / blocks,
                      name='block{}{}_'.format(i + 1, chr(j + 97)),
                      **args)
            b += 1

    # Build top
    x = YoloConv2D(round_filters(1280),
                   1,
                   padding='same',
                   use_bias=False,
                   kernel_initializer=CONV_KERNEL_INITIALIZER,
                   name='top_conv')(x)
    x = CustomBatchNormalization(axis=bn_axis, name='top_bn')(x)
    x = Activation(activation_fn, name='top_activation')(x)
    if include_top:
        x = GlobalAveragePooling2D(name='avg_pool')(x)
        if dropout_rate > 0:
            x = Dropout(dropout_rate, name='top_dropout')(x)
        x = Dense(classes,
                  activation='softmax',
                  kernel_initializer=DENSE_KERNEL_INITIALIZER,
                  name='probs')(x)
    else:
        if pooling == 'avg':
            x = GlobalAveragePooling2D(name='avg_pool')(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D(name='max_pool')(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = Model(inputs, x, name=model_name)

    # Load weights.
    if weights == 'imagenet':
        if include_top:
            file_suff = '_weights_tf_dim_ordering_tf_kernels_autoaugment.h5'
            file_hash = WEIGHTS_HASHES[model_name[-2:]][0]
        else:
            file_suff = '_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5'
            file_hash = WEIGHTS_HASHES[model_name[-2:]][1]
        file_name = model_name + file_suff
        weights_path = get_file(file_name,
                                BASE_WEIGHTS_PATH + file_name,
                                cache_subdir='models',
                                file_hash=file_hash)
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
def yolo3_vgg16_body(inputs, num_anchors, num_classes):
    """Create YOLO_V3 model CNN body in Keras."""
    '''
    Layer Name input_1 Output: Tensor("input_1:0", shape=(?, 416, 416, 3), dtype=float32)
    Layer Name block1_conv1 Output: Tensor("block1_conv1/Relu:0", shape=(?, 416, 416, 64), dtype=float32)
    Layer Name block1_conv2 Output: Tensor("block1_conv2/Relu:0", shape=(?, 416, 416, 64), dtype=float32)
    Layer Name block1_pool Output: Tensor("block1_pool/MaxPool:0", shape=(?, 208, 208, 64), dtype=float32)
    Layer Name block2_conv1 Output: Tensor("block2_conv1/Relu:0", shape=(?, 208, 208, 128), dtype=float32)
    Layer Name block2_conv2 Output: Tensor("block2_conv2/Relu:0", shape=(?, 208, 208, 128), dtype=float32)
    Layer Name block2_pool Output: Tensor("block2_pool/MaxPool:0", shape=(?, 104, 104, 128), dtype=float32)
    Layer Name block3_conv1 Output: Tensor("block3_conv1/Relu:0", shape=(?, 104, 104, 256), dtype=float32)
    Layer Name block3_conv2 Output: Tensor("block3_conv2/Relu:0", shape=(?, 104, 104, 256), dtype=float32)
    Layer Name block3_conv3 Output: Tensor("block3_conv3/Relu:0", shape=(?, 104, 104, 256), dtype=float32)
    Layer Name block3_pool Output: Tensor("block3_pool/MaxPool:0", shape=(?, 52, 52, 256), dtype=float32)
    Layer Name block4_conv1 Output: Tensor("block4_conv1/Relu:0", shape=(?, 52, 52, 512), dtype=float32)
    Layer Name block4_conv2 Output: Tensor("block4_conv2/Relu:0", shape=(?, 52, 52, 512), dtype=float32)
    Layer Name block4_conv3 Output: Tensor("block4_conv3/Relu:0", shape=(?, 52, 52, 512), dtype=float32)
    Layer Name block4_pool Output: Tensor("block4_pool/MaxPool:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name block5_conv1 Output: Tensor("block5_conv1/Relu:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name block5_conv2 Output: Tensor("block5_conv2/Relu:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name block5_conv3 Output: Tensor("block5_conv3/Relu:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name block5_pool Output: Tensor("block5_pool/MaxPool:0", shape=(?, 13, 13, 512), dtype=float32)
    '''

    #net, endpoint = inception_v2.inception_v2(inputs)
    vgg16 = VGG16(input_tensor=inputs, weights='imagenet', include_top=False)
    x = vgg16.get_layer('block5_pool').output
    x = YoloConv2D(512, (3, 3),
                   activation='relu',
                   padding='same',
                   name='block6_conv1')(x)
    x = YoloConv2D(512, (3, 3),
                   activation='relu',
                   padding='same',
                   name='block6_conv2')(x)
    x = YoloConv2D(512, (3, 3),
                   activation='relu',
                   padding='same',
                   name='block6_conv3')(x)
    x = YoloConv2D(512, (3, 3),
                   activation='relu',
                   padding='same',
                   name='block6_conv4')(x)

    # input: 416 x 416 x 3
    # block6_conv3 :13 x 13 x 512
    # block5_conv3 :26 x 26 x 512
    # block4_conv3 : 52 x 52 x 512

    # f1 :13 x 13 x 1024 13 x 13 x 512
    x, y1 = make_last_layers(x,
                             512,
                             num_anchors * (num_classes + 5),
                             predict_id='1')

    x = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(x)

    f2 = vgg16.get_layer('block5_conv3').output
    # f2: 26 x 26 x 512
    x = Concatenate()([x, f2])

    x, y2 = make_last_layers(x,
                             256,
                             num_anchors * (num_classes + 5),
                             predict_id='2')

    x = compose(DarknetConv2D_BN_Leaky(128, (1, 1)), UpSampling2D(2))(x)

    f3 = vgg16.get_layer('block4_conv3').output
    # f3 : 52 x 52 x 256
    x = Concatenate()([x, f3])
    x, y3 = make_last_layers(x,
                             128,
                             num_anchors * (num_classes + 5),
                             predict_id='3')

    return Model(inputs=inputs, outputs=[y1, y2, y3])
Esempio n. 20
0
def MobileNetV2(input_shape=None,
                alpha=1.0,
                include_top=True,
                weights='imagenet',
                input_tensor=None,
                pooling=None,
                classes=1000,
                **kwargs):
    """Instantiates the MobileNetV2 architecture.

    # Arguments
        input_shape: optional shape tuple, to be specified if you would
            like to use a model with an input img resolution that is not
            (224, 224, 3).
            It should have exactly 3 inputs channels (224, 224, 3).
            You can also omit this option if you would like
            to infer input_shape from an input_tensor.
            If you choose to include both input_tensor and input_shape then
            input_shape will be used if they match, if the shapes
            do not match then we will throw an error.
            E.g. `(160, 160, 3)` would be one valid value.
        alpha: controls the width of the network. This is known as the
        width multiplier in the MobileNetV2 paper, but the name is kept for
        consistency with MobileNetV1 in Keras.
            - If `alpha` < 1.0, proportionally decreases the number
                of filters in each layer.
            - If `alpha` > 1.0, proportionally increases the number
                of filters in each layer.
            - If `alpha` = 1, default number of filters from the paper
                 are used at each layer.
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of
            `layers.Input()`)
            to use as image input for the model.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional block.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional block, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.

    # Returns
        A Keras model instance.

    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape or invalid alpha, rows when
            weights='imagenet'
    """
    #global backend, layers, models, keras_utils
    #backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)

    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top` '
            'as true, `classes` should be 1000')

    input_shape = _obtain_input_shape(input_shape,
                                      default_size=224,
                                      min_size=32,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    # Determine proper input shape and default size.
    # If both input_shape and input_tensor are used, they should match
    #if input_shape is not None and input_tensor is not None:
    #try:
    #is_input_t_tensor = K.is_keras_tensor(input_tensor)
    #except ValueError:
    #try:
    #is_input_t_tensor = K.is_keras_tensor(
    #get_source_inputs(input_tensor))
    #except ValueError:
    #raise ValueError('input_tensor: ', input_tensor,
    #'is not type input_tensor')
    #if is_input_t_tensor:
    #if K.image_data_format == 'channels_first':
    #if K.int_shape(input_tensor)[1] != input_shape[1]:
    #raise ValueError('input_shape: ', input_shape,
    #'and input_tensor: ', input_tensor,
    #'do not meet the same shape requirements')
    #else:
    #if K.int_shape(input_tensor)[2] != input_shape[1]:
    #raise ValueError('input_shape: ', input_shape,
    #'and input_tensor: ', input_tensor,
    #'do not meet the same shape requirements')
    #else:
    #raise ValueError('input_tensor specified: ', input_tensor,
    #'is not a keras tensor')

    # If input_shape is None, infer shape from input_tensor
    #if input_shape is None and input_tensor is not None:

    #try:
    #K.is_keras_tensor(input_tensor)
    #except ValueError:
    #raise ValueError('input_tensor: ', input_tensor,
    #'is type: ', type(input_tensor),
    #'which is not a valid type')

    #if input_shape is None and not K.is_keras_tensor(input_tensor):
    #default_size = 224
    #elif input_shape is None and K.is_keras_tensor(input_tensor):
    #if K.image_data_format() == 'channels_first':
    #rows = K.int_shape(input_tensor)[2]
    #cols = K.int_shape(input_tensor)[3]
    #else:
    #rows = K.int_shape(input_tensor)[1]
    #cols = K.int_shape(input_tensor)[2]

    #if rows == cols and rows in [96, 128, 160, 192, 224]:
    #default_size = rows
    #else:
    #default_size = 224

    # If input_shape is None and no input_tensor
    #elif input_shape is None:
    #default_size = 224

    # If input_shape is not None, assume default size
    #else:
    #if K.image_data_format() == 'channels_first':
    #rows = input_shape[1]
    #cols = input_shape[2]
    #else:
    #rows = input_shape[0]
    #cols = input_shape[1]

    #if rows == cols and rows in [96, 128, 160, 192, 224]:
    #default_size = rows
    #else:
    #default_size = 224

    # If input_shape is None and input_tensor is None using standard shape
    if input_shape is None and input_tensor is None:
        input_shape = (None, None, 3)

    if K.image_data_format() == 'channels_last':
        row_axis, col_axis = (0, 1)
    else:
        row_axis, col_axis = (1, 2)
    rows = input_shape[row_axis]
    cols = input_shape[col_axis]

    if weights == 'imagenet':
        if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]:
            raise ValueError('If imagenet weights are being loaded, '
                             'alpha can be one of `0.35`, `0.50`, `0.75`, '
                             '`1.0`, `1.3` or `1.4` only.')

        if rows != cols or rows not in [96, 128, 160, 192, 224]:
            rows = 224
            warnings.warn('`input_shape` is undefined or non-square, '
                          'or `rows` is not in [96, 128, 160, 192, 224].'
                          ' Weights for input shape (224, 224) will be'
                          ' loaded as the default.')

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        #if not K.is_keras_tensor(input_tensor):
        #img_input = Input(tensor=input_tensor, shape=input_shape)
        #else:
        #img_input = input_tensor
        img_input = input_tensor

    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    first_block_filters = _make_divisible(32 * alpha, 8)
    x = ZeroPadding2D(padding=correct_pad(K, img_input, 3),
                      name='Conv1_pad')(img_input)
    x = YoloConv2D(first_block_filters,
                   kernel_size=3,
                   strides=(2, 2),
                   padding='valid',
                   use_bias=False,
                   name='Conv1')(x)
    x = CustomBatchNormalization(axis=channel_axis,
                                 epsilon=1e-3,
                                 momentum=0.999,
                                 name='bn_Conv1')(x)
    x = ReLU(6., name='Conv1_relu')(x)

    x = _inverted_res_block(x,
                            filters=16,
                            alpha=alpha,
                            stride=1,
                            expansion=1,
                            block_id=0)

    x = _inverted_res_block(x,
                            filters=24,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=1)
    x = _inverted_res_block(x,
                            filters=24,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=2)

    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=3)
    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=4)
    x = _inverted_res_block(x,
                            filters=32,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=5)

    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=6)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=7)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=8)
    x = _inverted_res_block(x,
                            filters=64,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=9)

    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=10)
    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=11)
    x = _inverted_res_block(x,
                            filters=96,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=12)

    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=2,
                            expansion=6,
                            block_id=13)
    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=14)
    x = _inverted_res_block(x,
                            filters=160,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=15)

    x = _inverted_res_block(x,
                            filters=320,
                            alpha=alpha,
                            stride=1,
                            expansion=6,
                            block_id=16)

    # no alpha applied to last conv as stated in the paper:
    # if the width multiplier is greater than 1 we
    # increase the number of output channels
    if alpha > 1.0:
        last_block_filters = _make_divisible(1280 * alpha, 8)
    else:
        last_block_filters = 1280

    x = YoloConv2D(last_block_filters,
                   kernel_size=1,
                   use_bias=False,
                   name='Conv_1')(x)
    x = CustomBatchNormalization(axis=channel_axis,
                                 epsilon=1e-3,
                                 momentum=0.999,
                                 name='Conv_1_bn')(x)
    x = ReLU(6., name='out_relu')(x)

    if include_top:
        x = GlobalAveragePooling2D()(x)
        x = Dense(classes, activation='softmax', use_bias=True,
                  name='Logits')(x)
    else:
        if pooling == 'avg':
            x = GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = Model(inputs, x, name='mobilenetv2_%0.2f_%s' % (alpha, rows))

    # Load weights.
    if weights == 'imagenet':
        if include_top:
            model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' +
                          str(alpha) + '_' + str(rows) + '.h5')
            weight_path = BASE_WEIGHT_PATH + model_name
            weights_path = get_file(model_name,
                                    weight_path,
                                    cache_subdir='models')
        else:
            model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' +
                          str(alpha) + '_' + str(rows) + '_no_top' + '.h5')
            weight_path = BASE_WEIGHT_PATH + model_name
            weights_path = get_file(model_name,
                                    weight_path,
                                    cache_subdir='models')
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
def _depthwise_conv_block(inputs,
                          pointwise_conv_filters,
                          alpha,
                          depth_multiplier=1,
                          strides=(1, 1),
                          block_id=1):
    """Adds a depthwise convolution block.

    A depthwise convolution block consists of a depthwise conv,
    batch normalization, relu6, pointwise convolution,
    batch normalization and relu6 activation.

    # Arguments
        inputs: Input tensor of shape `(rows, cols, channels)`
            (with `channels_last` data format) or
            (channels, rows, cols) (with `channels_first` data format).
        pointwise_conv_filters: Integer, the dimensionality of the output space
            (i.e. the number of output filters in the pointwise convolution).
        alpha: controls the width of the network.
            - If `alpha` < 1.0, proportionally decreases the number
                of filters in each layer.
            - If `alpha` > 1.0, proportionally increases the number
                of filters in each layer.
            - If `alpha` = 1, default number of filters from the paper
                 are used at each layer.
        depth_multiplier: The number of depthwise convolution output channels
            for each input channel.
            The total number of depthwise convolution output
            channels will be equal to `filters_in * depth_multiplier`.
        strides: An integer or tuple/list of 2 integers,
            specifying the strides of the convolution
            along the width and height.
            Can be a single integer to specify the same value for
            all spatial dimensions.
            Specifying any stride value != 1 is incompatible with specifying
            any `dilation_rate` value != 1.
        block_id: Integer, a unique identification designating
            the block number.

    # Input shape
        4D tensor with shape:
        `(batch, channels, rows, cols)` if data_format='channels_first'
        or 4D tensor with shape:
        `(batch, rows, cols, channels)` if data_format='channels_last'.

    # Output shape
        4D tensor with shape:
        `(batch, filters, new_rows, new_cols)`
        if data_format='channels_first'
        or 4D tensor with shape:
        `(batch, new_rows, new_cols, filters)`
        if data_format='channels_last'.
        `rows` and `cols` values might have changed due to stride.

    # Returns
        Output tensor of block.
    """
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
    pointwise_conv_filters = int(pointwise_conv_filters * alpha)

    if strides == (1, 1):
        x = inputs
    else:
        x = ZeroPadding2D(((0, 1), (0, 1)),
                          name='conv_pad_%d' % block_id)(inputs)
    x = YoloDepthwiseConv2D((3, 3),
                            padding='same' if strides == (1, 1) else 'valid',
                            depth_multiplier=depth_multiplier,
                            strides=strides,
                            use_bias=False,
                            name='conv_dw_%d' % block_id)(x)
    x = CustomBatchNormalization(axis=channel_axis,
                                 name='conv_dw_%d_bn' % block_id)(x)
    x = ReLU(6., name='conv_dw_%d_relu' % block_id)(x)

    x = YoloConv2D(pointwise_conv_filters, (1, 1),
                   padding='same',
                   use_bias=False,
                   strides=(1, 1),
                   name='conv_pw_%d' % block_id)(x)
    x = CustomBatchNormalization(axis=channel_axis,
                                 name='conv_pw_%d_bn' % block_id)(x)
    return ReLU(6., name='conv_pw_%d_relu' % block_id)(x)
Esempio n. 22
0
def GhostNet(input_shape=None,
             include_top=True,
             weights='imagenet',
             input_tensor=None,
             cfgs=DEFAULT_CFGS,
             width=1.0,
             dropout_rate=0.2,
             pooling=None,
             classes=1000,
             **kwargs):
    """Instantiates the GhostNet architecture.

    # Arguments
        input_shape: optional shape tuple, to be specified if you would
            like to use a model with an input img resolution that is not
            (224, 224, 3).
            It should have exactly 3 inputs channels (224, 224, 3).
            You can also omit this option if you would like
            to infer input_shape from an input_tensor.
            If you choose to include both input_tensor and input_shape then
            input_shape will be used if they match, if the shapes
            do not match then we will throw an error.
            E.g. `(160, 160, 3)` would be one valid value.
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of
            `layers.Input()`)
            to use as image input for the model.
        cfgs: model structure config list
        width: controls the width of the network
        dropout_rate: fraction of the input units to drop on the last layer
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional block.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional block, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.

    # Returns
        A Keras model instance.

    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape or invalid alpha, rows when
            weights='imagenet'
    """
    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError('If using `weights` as `"imagenet"` with `include_top` '
                         'as true, `classes` should be 1000')

    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=224,
                                      min_size=32,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)


    # If input_shape is None and input_tensor is None using standard shape
    if input_shape is None and input_tensor is None:
        input_shape = (None, None, 3)

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        #if not K.is_keras_tensor(input_tensor):
            #img_input = Input(tensor=input_tensor, shape=input_shape)
        #else:
            #img_input = input_tensor
        img_input = input_tensor

    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    # building first layer
    output_channel = int(_make_divisible(16 * width, 4))
    x = YoloConv2D(filters=output_channel,
               kernel_size=3,
               strides=(2, 2),
               padding='same',
               use_bias=False,
               name='conv_stem')(img_input)
    x = CustomBatchNormalization(name='bn1')(x)
    x = ReLU(name='Conv2D_1_act')(x)

    # building inverted residual blocks
    for index, cfg in enumerate(cfgs):
        sub_index = 0
        for k,exp_size,c,se_ratio,s in cfg:
            output_channel = int(_make_divisible(c * width, 4))
            hidden_channel = int(_make_divisible(exp_size * width, 4))
            x = GhostBottleneck(x, hidden_channel, output_channel, k, (s,s),
                                se_ratio=se_ratio,
                                name='blocks_'+str(index)+'_'+str(sub_index))
            sub_index += 1

    output_channel = _make_divisible(exp_size * width, 4)
    x = ConvBnAct(x, output_channel, kernel_size=1, name='blocks_9_0')

    if include_top:
        x = GlobalAveragePooling2D(name='global_avg_pooling2D')(x)
        if K.image_data_format() == 'channels_first':
            x = Reshape((output_channel, 1, 1))(x)
        else:
            x = Reshape((1, 1, output_channel))(x)

        # building last several layers
        output_channel = 1280
        x = YoloConv2D(filters=output_channel,
                   kernel_size=1,
                   strides=(1,1),
                   padding='valid',
                   use_bias=True,
                   name='conv_head')(x)
        x = ReLU(name='relu_head')(x)

        if dropout_rate > 0.:
            x = Dropout(dropout_rate, name='dropout_1')(x)
        x = Flatten()(x)
        x = Dense(units=classes, activation='softmax',
                         use_bias=True, name='classifier')(x)
    else:
        if pooling == 'avg':
            x = GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = Model(inputs, x, name='ghostnet_%0.2f' % (width))

    # Load weights.
    if weights == 'imagenet':
        if include_top:
            file_name = 'ghostnet_weights_tf_dim_ordering_tf_kernels_224.h5'
            weight_path = BASE_WEIGHT_PATH + file_name
        else:
            file_name = 'ghostnet_weights_tf_dim_ordering_tf_kernels_224_no_top.h5'
            weight_path = BASE_WEIGHT_PATH + file_name

        weights_path = get_file(file_name, weight_path, cache_subdir='models')
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
def MobileNet(input_shape=None,
              alpha=1.0,
              depth_multiplier=1,
              dropout=1e-3,
              include_top=True,
              weights='imagenet',
              input_tensor=None,
              pooling=None,
              classes=1000,
              **kwargs):
    """Instantiates the MobileNet architecture.

    # Arguments
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)`
            (with `channels_last` data format)
            or (3, 224, 224) (with `channels_first` data format).
            It should have exactly 3 inputs channels,
            and width and height should be no smaller than 32.
            E.g. `(200, 200, 3)` would be one valid value.
        alpha: controls the width of the network. This is known as the
            width multiplier in the MobileNet paper.
            - If `alpha` < 1.0, proportionally decreases the number
                of filters in each layer.
            - If `alpha` > 1.0, proportionally increases the number
                of filters in each layer.
            - If `alpha` = 1, default number of filters from the paper
                 are used at each layer.
        depth_multiplier: depth multiplier for depthwise convolution. This
            is called the resolution multiplier in the MobileNet paper.
        dropout: dropout rate
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of
            `layers.Input()`)
            to use as image input for the model.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model
                will be the 4D tensor output of the
                last convolutional block.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional block, and thus
                the output of the model will be a
                2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.

    # Returns
        A Keras model instance.

    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
        RuntimeError: If attempting to run this model with a
            backend that does not support separable convolutions.
    """
    #global backend, layers, models, keras_utils
    #backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)

    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top` '
            'as true, `classes` should be 1000')

    # Determine proper input shape and default size.
    if input_shape is None:
        default_size = 224
    else:
        if K.image_data_format() == 'channels_first':
            rows = input_shape[1]
            cols = input_shape[2]
        else:
            rows = input_shape[0]
            cols = input_shape[1]

        if rows == cols and rows in [128, 160, 192, 224]:
            default_size = rows
        else:
            default_size = 224

    input_shape = _obtain_input_shape(input_shape,
                                      default_size=default_size,
                                      min_size=32,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    if K.image_data_format() == 'channels_last':
        row_axis, col_axis = (0, 1)
    else:
        row_axis, col_axis = (1, 2)
    rows = input_shape[row_axis]
    cols = input_shape[col_axis]

    if weights == 'imagenet':
        if depth_multiplier != 1:
            raise ValueError('If imagenet weights are being loaded, '
                             'depth multiplier must be 1')

        if alpha not in [0.25, 0.50, 0.75, 1.0]:
            raise ValueError('If imagenet weights are being loaded, '
                             'alpha can be one of'
                             '`0.25`, `0.50`, `0.75` or `1.0` only.')

        if rows != cols or rows not in [128, 160, 192, 224]:
            rows = 224
            warnings.warn('`input_shape` is undefined or non-square, '
                          'or `rows` is not in [128, 160, 192, 224]. '
                          'Weights for input shape (224, 224) will be'
                          ' loaded as the default.')

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        #if not K.is_keras_tensor(input_tensor):
        #img_input = Input(tensor=input_tensor, shape=input_shape)
        #else:
        #img_input = input_tensor
        img_input = input_tensor

    x = _conv_block(img_input, 32, alpha, strides=(2, 2))
    x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1)

    x = _depthwise_conv_block(x,
                              128,
                              alpha,
                              depth_multiplier,
                              strides=(2, 2),
                              block_id=2)
    x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3)

    x = _depthwise_conv_block(x,
                              256,
                              alpha,
                              depth_multiplier,
                              strides=(2, 2),
                              block_id=4)
    x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5)

    x = _depthwise_conv_block(x,
                              512,
                              alpha,
                              depth_multiplier,
                              strides=(2, 2),
                              block_id=6)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10)
    x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11)

    x = _depthwise_conv_block(x,
                              1024,
                              alpha,
                              depth_multiplier,
                              strides=(2, 2),
                              block_id=12)
    x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13)

    if include_top:
        if K.image_data_format() == 'channels_first':
            shape = (int(1024 * alpha), 1, 1)
        else:
            shape = (1, 1, int(1024 * alpha))

        x = GlobalAveragePooling2D()(x)
        x = Reshape(shape, name='reshape_1')(x)
        x = Dropout(dropout, name='dropout')(x)
        x = YoloConv2D(classes, (1, 1), padding='same', name='conv_preds')(x)
        x = Reshape((classes, ), name='reshape_2')(x)
        x = Activation('softmax', name='act_softmax')(x)
    else:
        if pooling == 'avg':
            x = GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows))

    # Load weights.
    if weights == 'imagenet':
        if alpha == 1.0:
            alpha_text = '1_0'
        elif alpha == 0.75:
            alpha_text = '7_5'
        elif alpha == 0.50:
            alpha_text = '5_0'
        else:
            alpha_text = '2_5'

        if include_top:
            model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows)
            weight_path = BASE_WEIGHT_PATH + model_name
            weights_path = get_file(model_name,
                                    weight_path,
                                    cache_subdir='models')
        else:
            model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows)
            weight_path = BASE_WEIGHT_PATH + model_name
            weights_path = get_file(model_name,
                                    weight_path,
                                    cache_subdir='models')
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
Esempio n. 24
0
def MobileNetV3(stack_fn,
                last_point_ch,
                input_shape=None,
                alpha=1.0,
                model_type='large',
                minimalistic=False,
                include_top=True,
                weights='imagenet',
                input_tensor=None,
                classes=1000,
                pooling=None,
                dropout_rate=0.2,
                **kwargs):
    """Instantiates the MobileNetV3 architecture.
    # Arguments
        stack_fn: a function that returns output tensor for the
            stacked residual blocks.
        last_point_ch: number channels at the last layer (before top)
        input_shape: optional shape tuple, to be specified if you would
            like to use a model with an input img resolution that is not
            (224, 224, 3).
            It should have exactly 3 inputs channels (224, 224, 3).
            You can also omit this option if you would like
            to infer input_shape from an input_tensor.
            If you choose to include both input_tensor and input_shape then
            input_shape will be used if they match, if the shapes
            do not match then we will throw an error.
            E.g. `(160, 160, 3)` would be one valid value.
        alpha: controls the width of the network. This is known as the
            depth multiplier in the MobileNetV3 paper, but the name is kept for
            consistency with MobileNetV1 in Keras.
            - If `alpha` < 1.0, proportionally decreases the number
                of filters in each layer.
            - If `alpha` > 1.0, proportionally increases the number
                of filters in each layer.
            - If `alpha` = 1, default number of filters from the paper
                are used at each layer.
        model_type: MobileNetV3 is defined as two models: large and small. These
        models are targeted at high and low resource use cases respectively.
        minimalistic: In addition to large and small models this module also contains
            so-called minimalistic models, these models have the same per-layer
            dimensions characteristic as MobilenetV3 however, they don't utilize any
            of the advanced blocks (squeeze-and-excite units, hard-swish, and 5x5
            convolutions). While these models are less efficient on CPU, they are
            much more performant on GPU/DSP.
        include_top: whether to include the fully-connected
            layer at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of
            `layers.Input()`)
            to use as image input for the model.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.
        pooling: optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        dropout_rate: fraction of the input units to drop on the last layer
    # Returns
        A Keras model instance.
    # Raises
        ValueError: in case of invalid model type, argument for `weights`,
            or invalid input shape when weights='imagenet'
    """
    #global backend, layers, models, keras_utils
    #backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)

    if not (weights in {'imagenet', None} or os.path.exists(weights)):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization), `imagenet` '
                         '(pre-training on ImageNet), '
                         'or the path to the weights file to be loaded.')

    if weights == 'imagenet' and include_top and classes != 1000:
        raise ValueError(
            'If using `weights` as `"imagenet"` with `include_top` '
            'as true, `classes` should be 1000')

    # Determine proper input shape
    input_shape = _obtain_input_shape(input_shape,
                                      default_size=224,
                                      min_size=32,
                                      data_format=K.image_data_format(),
                                      require_flatten=include_top,
                                      weights=weights)

    # Determine proper input shape and default size.
    # If both input_shape and input_tensor are used, they should match
    #if input_shape is not None and input_tensor is not None:
    #try:
    #is_input_t_tensor = K.is_keras_tensor(input_tensor)
    #except ValueError:
    #try:
    #is_input_t_tensor = K.is_keras_tensor(
    #get_source_inputs(input_tensor))
    #except ValueError:
    #raise ValueError('input_tensor: ', input_tensor,
    #'is not type input_tensor')
    #if is_input_t_tensor:
    #if K.image_data_format == 'channels_first':
    #if K.int_shape(input_tensor)[1] != input_shape[1]:
    #raise ValueError('input_shape: ', input_shape,
    #'and input_tensor: ', input_tensor,
    #'do not meet the same shape requirements')
    #else:
    #if K.int_shape(input_tensor)[2] != input_shape[1]:
    #raise ValueError('input_shape: ', input_shape,
    #'and input_tensor: ', input_tensor,
    #'do not meet the same shape requirements')
    #else:
    #raise ValueError('input_tensor specified: ', input_tensor,
    #'is not a keras tensor')

    # If input_shape is None, infer shape from input_tensor
    #if input_shape is None and input_tensor is not None:

    #try:
    #K.is_keras_tensor(input_tensor)
    #except ValueError:
    #raise ValueError('input_tensor: ', input_tensor,
    #'is type: ', type(input_tensor),
    #'which is not a valid type')

    #if K.is_keras_tensor(input_tensor):
    #if K.image_data_format() == 'channels_first':
    #rows = K.int_shape(input_tensor)[2]
    #cols = K.int_shape(input_tensor)[3]
    #input_shape = (3, cols, rows)
    #else:
    #rows = K.int_shape(input_tensor)[1]
    #cols = K.int_shape(input_tensor)[2]
    #input_shape = (cols, rows, 3)

    # If input_shape is None and input_tensor is None using standard shape
    if input_shape is None and input_tensor is None:
        input_shape = (None, None, 3)

    if K.image_data_format() == 'channels_last':
        row_axis, col_axis = (0, 1)
    else:
        row_axis, col_axis = (1, 2)
    rows = input_shape[row_axis]
    cols = input_shape[col_axis]
    if rows and cols and (rows < 32 or cols < 32):
        raise ValueError(
            'Input size must be at least 32x32; got `input_shape=' +
            str(input_shape) + '`')
    if weights == 'imagenet':
        if minimalistic is False and alpha not in [0.75, 1.0] \
                or minimalistic is True and alpha != 1.0:
            raise ValueError(
                'If imagenet weights are being loaded, '
                'alpha can be one of `0.75`, `1.0` for non minimalistic'
                ' or `1.0` for minimalistic only.')

        if rows != cols or rows != 224:
            warnings.warn('`input_shape` is undefined or non-square, '
                          'or `rows` is not 224.'
                          ' Weights for input shape (224, 224) will be'
                          ' loaded as the default.')

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        #if not K.is_keras_tensor(input_tensor):
        #img_input = Input(tensor=input_tensor, shape=input_shape)
        #else:
        #img_input = input_tensor
        img_input = input_tensor

    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    if minimalistic:
        kernel = 3
        activation = relu
        se_ratio = None
    else:
        kernel = 5
        activation = hard_swish
        se_ratio = 0.25

    x = ZeroPadding2D(padding=correct_pad(K, img_input, 3),
                      name='Conv_pad')(img_input)
    x = YoloConv2D(16,
                   kernel_size=3,
                   strides=(2, 2),
                   padding='valid',
                   use_bias=False,
                   name='Conv')(x)
    x = CustomBatchNormalization(axis=channel_axis,
                                 epsilon=1e-3,
                                 momentum=0.999,
                                 name='Conv/BatchNorm')(x)
    x = Activation(activation)(x)

    x = stack_fn(x, kernel, activation, se_ratio)

    last_conv_ch = _depth(K.int_shape(x)[channel_axis] * 6)

    # if the width multiplier is greater than 1 we
    # increase the number of output channels
    if alpha > 1.0:
        last_point_ch = _depth(last_point_ch * alpha)

    x = YoloConv2D(last_conv_ch,
                   kernel_size=1,
                   padding='same',
                   use_bias=False,
                   name='Conv_1')(x)
    x = CustomBatchNormalization(axis=channel_axis,
                                 epsilon=1e-3,
                                 momentum=0.999,
                                 name='Conv_1/BatchNorm')(x)
    x = Activation(activation)(x)

    if include_top:
        x = GlobalAveragePooling2D()(x)
        if channel_axis == 1:
            x = Reshape((last_conv_ch, 1, 1))(x)
        else:
            x = Reshape((1, 1, last_conv_ch))(x)
        x = YoloConv2D(last_point_ch,
                       kernel_size=1,
                       padding='same',
                       name='Conv_2')(x)
        x = Activation(activation)(x)
        if dropout_rate > 0:
            x = Dropout(dropout_rate)(x)
        x = YoloConv2D(classes, kernel_size=1, padding='same',
                       name='Logits')(x)
        x = Flatten()(x)
        x = Softmax(name='Predictions/Softmax')(x)
    else:
        if pooling == 'avg':
            x = GlobalAveragePooling2D(name='avg_pool')(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D(name='max_pool')(x)
    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = Model(inputs, x, name='MobilenetV3' + model_type)

    # Load weights.
    if weights == 'imagenet':
        model_name = "{}{}_224_{}_float".format(
            model_type, '_minimalistic' if minimalistic else '', str(alpha))
        if include_top:
            file_name = 'weights_mobilenet_v3_' + model_name + '.h5'
            file_hash = WEIGHTS_HASHES[model_name][0]
        else:
            file_name = 'weights_mobilenet_v3_' + model_name + '_no_top.h5'
            file_hash = WEIGHTS_HASHES[model_name][1]
        weights_path = get_file(file_name,
                                BASE_WEIGHT_PATH + file_name,
                                cache_subdir='models',
                                file_hash=file_hash)
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
Esempio n. 25
0
def ShuffleNet(include_top=True,
               input_tensor=None,
               scale_factor=1.0,
               pooling=None,
               input_shape=None,
               groups=1,
               weights='imagenet',
               num_shuffle_units=[3, 7, 3],
               bottleneck_ratio=0.25,
               classes=1000,
               **kwargs):
    """
    ShuffleNet implementation for Keras 2
    ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices
    Xiangyu Zhang, Xinyu Zhou, Mengxiao Lin, Jian Sun
    https://arxiv.org/pdf/1707.01083.pdf
    Note that only TensorFlow is supported for now, therefore it only works
    with the data format `image_data_format='channels_last'` in your Keras
    config at `~/.keras/keras.json`.
    Parameters
    ----------
    include_top: bool(True)
         whether to include the fully-connected layer at the top of the network.
    input_tensor:
        optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
    scale_factor:
        scales the number of output channels
    input_shape:
    pooling:
        Optional pooling mode for feature extraction
        when `include_top` is `False`.
        - `None` means that the output of the model
            will be the 4D tensor output of the
            last convolutional layer.
        - `avg` means that global average pooling
            will be applied to the output of the
            last convolutional layer, and thus
            the output of the model will be a
            2D tensor.
        - `max` means that global max pooling will
            be applied.
    groups: int
        number of groups per channel
    num_shuffle_units: list([3,7,3])
        number of stages (list length) and the number of shufflenet units in a
        stage beginning with stage 2 because stage 1 is fixed
        e.g. idx 0 contains 3 + 1 (first shuffle unit in each stage differs) shufflenet units for stage 2
        idx 1 contains 7 + 1 Shufflenet Units for stage 3 and
        idx 2 contains 3 + 1 Shufflenet Units
    bottleneck_ratio:
        bottleneck ratio implies the ratio of bottleneck channels to output channels.
        For example, bottleneck ratio = 1 : 4 means the output feature map is 4 times
        the width of the bottleneck feature map.
    classes: int(1000)
        number of classes to predict
    Returns
    -------
        A Keras model instance
    References
    ----------
    - [ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices]
      (http://www.arxiv.org/pdf/1707.01083.pdf)
    """

    if K.backend() != 'tensorflow':
        raise RuntimeError('Only TensorFlow backend is currently supported, '
                           'as other backends do not support ')

    name = "ShuffleNet_%.2gX_g%d_br_%.2g_%s" % (
        scale_factor, groups, bottleneck_ratio, "".join(
            [str(x) for x in num_shuffle_units]))

    input_shape = _obtain_input_shape(input_shape,
                                      default_size=224,
                                      min_size=28,
                                      require_flatten=include_top,
                                      data_format=K.image_data_format())

    out_dim_stage_two = {1: 144, 2: 200, 3: 240, 4: 272, 8: 384}
    if groups not in out_dim_stage_two:
        raise ValueError("Invalid number of groups.")

    if pooling not in ['max', 'avg', None]:
        raise ValueError("Invalid value for pooling.")

    if not (float(scale_factor) * 4).is_integer():
        raise ValueError("Invalid value for scale_factor. Should be x over 4.")

    exp = np.insert(np.arange(0, len(num_shuffle_units), dtype=np.float32), 0,
                    0)
    out_channels_in_stage = 2**exp
    out_channels_in_stage *= out_dim_stage_two[
        groups]  # calculate output channels for each stage
    out_channels_in_stage[0] = 24  # first stage has always 24 output channels
    out_channels_in_stage *= scale_factor
    out_channels_in_stage = out_channels_in_stage.astype(int)

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        #if not K.is_keras_tensor(input_tensor):
        #img_input = Input(tensor=input_tensor, shape=input_shape)
        #else:
        #img_input = input_tensor
        img_input = input_tensor

    # create shufflenet architecture
    x = YoloConv2D(filters=out_channels_in_stage[0],
                   kernel_size=(3, 3),
                   padding='same',
                   use_bias=False,
                   strides=(2, 2),
                   activation="relu",
                   name="conv1")(img_input)
    x = MaxPooling2D(pool_size=(3, 3),
                     strides=(2, 2),
                     padding='same',
                     name="maxpool1")(x)

    # create stages containing shufflenet units beginning at stage 2
    for stage in range(0, len(num_shuffle_units)):
        repeat = num_shuffle_units[stage]
        x = _block(x,
                   out_channels_in_stage,
                   repeat=repeat,
                   bottleneck_ratio=bottleneck_ratio,
                   groups=groups,
                   stage=stage + 2)

    if include_top:
        #x = Dense(units=classes, name="fc")(x)
        #x = Activation('softmax', name='softmax')(x)
        x = GlobalAveragePooling2D(name='global_avg_pool')(x)
        x = Dense(units=classes,
                  activation='softmax',
                  use_bias=True,
                  name='Logits')(x)
    else:
        if pooling == 'avg':
            x = GlobalAveragePooling2D(name='global_avg_pool')(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D(name='global_max_pool')(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input

    # Create model.
    model = Model(inputs=inputs, outputs=x, name=name)

    # Load weights.
    if weights == 'imagenet':
        if K.image_data_format() == 'channels_first':
            raise ValueError('Weights for "channels_first" format '
                             'are not available.')

        if include_top:
            model_name = ('shufflenet_weights_tf_dim_ordering_tf_kernels_' +
                          str(alpha) + '_' + str(rows) + '.h5')
            weigh_path = BASE_WEIGHT_PATH + model_name
            weights_path = get_file(model_name,
                                    weigh_path,
                                    cache_subdir='models')
        else:
            model_name = ('shufflenet_weights_tf_dim_ordering_tf_kernels_' +
                          str(alpha) + '_' + str(rows) + '_no_top' + '.h5')
            weigh_path = BASE_WEIGHT_PATH + model_name
            weights_path = get_file(model_name,
                                    weigh_path,
                                    cache_subdir='models')
        model.load_weights(weights_path)
    elif weights is not None:
        model.load_weights(weights)

    return model
Esempio n. 26
0
def shuffle_unit(inputs,
                 out_channels,
                 bottleneck_ratio,
                 strides=2,
                 stage=1,
                 block=1):
    if K.image_data_format() == 'channels_last':
        bn_axis = -1
    else:
        raise ValueError('Only channels last supported')

    prefix = 'stage{}/block{}'.format(stage, block)
    bottleneck_channels = int(out_channels * bottleneck_ratio)
    if strides < 2:
        c_hat, c = channel_split(inputs, '{}/spl'.format(prefix))
        inputs = c

    x = YoloConv2D(bottleneck_channels,
                   kernel_size=(1, 1),
                   strides=1,
                   padding='same',
                   name='{}/1x1conv_1'.format(prefix))(inputs)
    x = CustomBatchNormalization(axis=bn_axis,
                                 name='{}/bn_1x1conv_1'.format(prefix))(x)
    x = Activation('relu', name='{}/relu_1x1conv_1'.format(prefix))(x)
    x = YoloDepthwiseConv2D(kernel_size=3,
                            strides=strides,
                            padding='same',
                            name='{}/3x3dwconv'.format(prefix))(x)
    x = CustomBatchNormalization(axis=bn_axis,
                                 name='{}/bn_3x3dwconv'.format(prefix))(x)
    x = YoloConv2D(bottleneck_channels,
                   kernel_size=1,
                   strides=1,
                   padding='same',
                   name='{}/1x1conv_2'.format(prefix))(x)
    x = CustomBatchNormalization(axis=bn_axis,
                                 name='{}/bn_1x1conv_2'.format(prefix))(x)
    x = Activation('relu', name='{}/relu_1x1conv_2'.format(prefix))(x)

    if strides < 2:
        ret = Concatenate(axis=bn_axis,
                          name='{}/concat_1'.format(prefix))([x, c_hat])
    else:
        s2 = YoloDepthwiseConv2D(kernel_size=3,
                                 strides=2,
                                 padding='same',
                                 name='{}/3x3dwconv_2'.format(prefix))(inputs)
        s2 = CustomBatchNormalization(
            axis=bn_axis, name='{}/bn_3x3dwconv_2'.format(prefix))(s2)
        s2 = YoloConv2D(bottleneck_channels,
                        kernel_size=1,
                        strides=1,
                        padding='same',
                        name='{}/1x1_conv_3'.format(prefix))(s2)
        s2 = CustomBatchNormalization(
            axis=bn_axis, name='{}/bn_1x1conv_3'.format(prefix))(s2)
        s2 = Activation('relu', name='{}/relu_1x1conv_3'.format(prefix))(s2)
        ret = Concatenate(axis=bn_axis,
                          name='{}/concat_2'.format(prefix))([x, s2])

    ret = Lambda(channel_shuffle,
                 name='{}/channel_shuffle'.format(prefix))(ret)

    return ret