Esempio n. 1
0
def tiny_yolo4lite_mobilenet_body(inputs,
                                  num_anchors,
                                  num_classes,
                                  alpha=1.0,
                                  use_spp=True):
    '''Create Tiny YOLO_v3 Lite MobileNet model CNN body in keras.'''
    mobilenet = MobileNet(input_tensor=inputs,
                          weights='imagenet',
                          include_top=False,
                          alpha=alpha)

    # input: 416 x 416 x 3
    # conv_pw_13_relu :13 x 13 x (1024*alpha)
    # conv_pw_11_relu :26 x 26 x (512*alpha)
    # conv_pw_5_relu : 52 x 52 x (256*alpha)

    # f1 :13 x 13 x (1024*alpha) for 416 input
    f1 = mobilenet.get_layer('conv_pw_13_relu').output
    # f2: 26 x 26 x (512*alpha) for 416 input
    f2 = mobilenet.get_layer('conv_pw_11_relu').output

    #feature map 1 head (13 x 13 x (512*alpha) for 416 input)
    x1 = DarknetConv2D_BN_Leaky(int(512 * alpha), (1, 1))(f1)
    if use_spp:
        x1 = Spp_Conv2D_BN_Leaky(x1, int(512 * alpha))

    #upsample fpn merge for feature map 1 & 2
    x1_upsample = compose(DarknetConv2D_BN_Leaky(int(256 * alpha), (1, 1)),
                          UpSampling2D(2))(x1)
    x2 = compose(
        Concatenate(),
        #DarknetConv2D_BN_Leaky(int(512*alpha), (3,3)),
        Depthwise_Separable_Conv2D_BN_Leaky(filters=int(512 * alpha),
                                            kernel_size=(3, 3),
                                            block_id_str='15'))(
                                                [x1_upsample, f2])

    #feature map 2 output (26 x 26 x (512*alpha) for 416 input)
    y2 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x2)

    #downsample fpn merge for feature map 2 & 1
    x2_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        #DarknetConv2D_BN_Leaky(int(512*alpha), (3,3), strides=(2,2)),
        Darknet_Depthwise_Separable_Conv2D_BN_Leaky(int(512 * alpha), (3, 3),
                                                    strides=(2, 2),
                                                    block_id_str='16'))(x2)
    x1 = compose(
        Concatenate(),
        #DarknetConv2D_BN_Leaky(int(1024*alpha), (3,3)),
        Depthwise_Separable_Conv2D_BN_Leaky(filters=int(1024 * alpha),
                                            kernel_size=(3, 3),
                                            block_id_str='17'))(
                                                [x2_downsample, x1])

    #feature map 1 output (13 x 13 x (1024*alpha) for 416 input)
    y1 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x1)

    return Model(inputs, [y1, y2])
Esempio n. 2
0
def tiny_yolo4_mobilenetv3large_body(inputs,
                                     num_anchors,
                                     num_classes,
                                     alpha=1.0,
                                     spp=True):
    '''Create Tiny YOLO_v4 MobileNetV3Large model CNN body in keras.'''
    mobilenetv3large = MobileNetV3Large(input_tensor=inputs,
                                        weights='imagenet',
                                        include_top=False,
                                        alpha=alpha)

    # input: 416 x 416 x 3
    # activation_38(layer 194, final feature map): 13 x 13 x (960*alpha)
    # expanded_conv_14/Add(layer 191, end of block14): 13 x 13 x (160*alpha)

    # activation_29(layer 146, middle in block12) : 26 x 26 x (672*alpha)
    # expanded_conv_11/Add(layer 143, end of block11) : 26 x 26 x (112*alpha)

    # activation_15(layer 79, middle in block6) : 52 x 52 x (240*alpha)
    # expanded_conv_5/Add(layer 76, end of block5): 52 x 52 x (40*alpha)

    # f1 :13 x 13 x (960*alpha)
    # NOTE: activation layer name may different for TF1.x/2.x, so we
    # use index to fetch layer
    f1 = mobilenetv3large.layers[194].output
    # f2: 26 x 26 x (672*alpha) for 416 input
    f2 = mobilenetv3large.layers[146].output

    #feature map 1 head (13 x 13 x (480*alpha) for 416 input)
    x1 = DarknetConv2D_BN_Leaky(int(480 * alpha), (1, 1))(f1)
    if spp:
        x1 = Spp_Conv2D_BN_Leaky(x1, int(480 * alpha))

    #upsample fpn merge for feature map 1 & 2
    x1_upsample = compose(DarknetConv2D_BN_Leaky(int(336 * alpha), (1, 1)),
                          UpSampling2D(2))(x1)
    x2 = compose(
        Concatenate(),
        #Depthwise_Separable_Conv2D_BN_Leaky(filters=int(672*alpha), kernel_size=(3, 3), block_id_str='15'),
        DarknetConv2D_BN_Leaky(int(672 * alpha), (3, 3)))([x1_upsample, f2])

    #feature map 2 output (26 x 26 x (672*alpha) for 416 input)
    y2 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x2)

    #downsample fpn merge for feature map 2 & 1
    x2_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        #Darknet_Depthwise_Separable_Conv2D_BN_Leaky(int(480*alpha), (3,3), strides=(2,2), block_id_str='16'),
        DarknetConv2D_BN_Leaky(int(480 * alpha), (3, 3), strides=(2, 2)))(x2)
    x1 = compose(
        Concatenate(),
        #Depthwise_Separable_Conv2D_BN_Leaky(filters=int(960*alpha), kernel_size=(3, 3), block_id_str='17'),
        DarknetConv2D_BN_Leaky(int(960 * alpha), (3, 3)))([x2_downsample, x1])

    #feature map 1 output (13 x 13 x (960*alpha) for 416 input)
    y1 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x1)

    return Model(inputs, [y1, y2])
Esempio n. 3
0
def tiny_yolo4lite_efficientnet_body(inputs,
                                     num_anchors,
                                     num_classes,
                                     level=0,
                                     use_spp=True):
    '''
    Create Tiny YOLO_v4 Lite EfficientNet model CNN body in keras.
    # Arguments
        level: EfficientNet level number.
            by default we use basic EfficientNetB0 as backbone
    '''
    efficientnet, feature_map_info = get_efficientnet_backbone_info(
        inputs, level=level)

    f1 = efficientnet.get_layer('top_activation').output
    f2 = efficientnet.get_layer('block6a_expand_activation').output
    f1_channel_num = feature_map_info['f1_channel_num']
    f2_channel_num = feature_map_info['f2_channel_num']

    #feature map 1 head (13 x 13 x (f1_channel_num//2) for 416 input)
    x1 = DarknetConv2D_BN_Leaky(f1_channel_num // 2, (1, 1))(f1)
    if use_spp:
        x1 = Spp_Conv2D_BN_Leaky(x1, f1_channel_num // 2)

    #upsample fpn merge for feature map 1 & 2
    x1_upsample = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)),
                          UpSampling2D(2))(x1)
    x2 = compose(
        Concatenate(),
        #DarknetConv2D_BN_Leaky(f2_channel_num, (3,3)),
        Depthwise_Separable_Conv2D_BN_Leaky(filters=f2_channel_num,
                                            kernel_size=(3, 3),
                                            block_id_str='8'))(
                                                [x1_upsample, f2])

    #feature map 2 output (26 x 26 x f2_channel_num for 416 input)
    y2 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x2)

    #downsample fpn merge for feature map 2 & 1
    x2_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        #DarknetConv2D_BN_Leaky(f1_channel_num//2, (3,3), strides=(2,2)),
        Darknet_Depthwise_Separable_Conv2D_BN_Leaky(f1_channel_num // 2,
                                                    (3, 3),
                                                    strides=(2, 2),
                                                    block_id_str='9'))(x2)
    x1 = compose(
        Concatenate(),
        #DarknetConv2D_BN_Leaky(f1_channel_num, (3,3)),
        Depthwise_Separable_Conv2D_BN_Leaky(filters=f1_channel_num,
                                            kernel_size=(3, 3),
                                            block_id_str='10'))(
                                                [x2_downsample, x1])

    #feature map 1 output (13 x 13 x f1_channel_num for 416 input)
    y1 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x1)

    return Model(inputs, [y1, y2])
Esempio n. 4
0
def tiny_yolo4_mobilenetv3small_body(inputs,
                                     num_anchors,
                                     num_classes,
                                     alpha=1.0,
                                     use_spp=True):
    '''Create Tiny YOLO_v4 MobileNetV3Small model CNN body in keras.'''
    mobilenetv3small = MobileNetV3Small(input_tensor=inputs,
                                        weights='imagenet',
                                        include_top=False,
                                        alpha=alpha)

    # input: 416 x 416 x 3
    # activation_31(layer 165, final feature map): 13 x 13 x (576*alpha)
    # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha)

    # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha)
    # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha)

    # activation_7(layer 38, middle in block3) : 52 x 52 x (96*alpha)
    # expanded_conv_2/Add(layer 35, end of block2): 52 x 52 x (24*alpha)

    # f1 :13 x 13 x (576*alpha)
    # NOTE: activation layer name may different for TF1.x/2.x, so we
    # use index to fetch layer
    f1 = mobilenetv3small.layers[165].output
    # f2: 26 x 26 x (288*alpha) for 416 input
    f2 = mobilenetv3small.layers[117].output

    #feature map 1 head (13 x 13 x (288*alpha) for 416 input)
    x1 = DarknetConv2D_BN_Leaky(int(288 * alpha), (1, 1))(f1)
    if use_spp:
        x1 = Spp_Conv2D_BN_Leaky(x1, int(288 * alpha))

    #upsample fpn merge for feature map 1 & 2
    x1_upsample = compose(DarknetConv2D_BN_Leaky(int(144 * alpha), (1, 1)),
                          UpSampling2D(2))(x1)
    x2 = compose(
        Concatenate(),
        #Depthwise_Separable_Conv2D_BN_Leaky(filters=int(288*alpha), kernel_size=(3, 3), block_id_str='11'),
        DarknetConv2D_BN_Leaky(int(288 * alpha), (3, 3)))([x1_upsample, f2])

    #feature map 2 output (26 x 26 x (288*alpha) for 416 input)
    y2 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x2)

    #downsample fpn merge for feature map 2 & 1
    x2_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        #Darknet_Depthwise_Separable_Conv2D_BN_Leaky(int(288*alpha), (3,3), strides=(2,2), block_id_str='12'),
        DarknetConv2D_BN_Leaky(int(288 * alpha), (3, 3), strides=(2, 2)))(x2)
    x1 = compose(
        Concatenate(),
        #Depthwise_Separable_Conv2D_BN_Leaky(filters=int(576*alpha), kernel_size=(3, 3), block_id_str='13'),
        DarknetConv2D_BN_Leaky(int(576 * alpha), (3, 3)))([x2_downsample, x1])

    #feature map 1 output (13 x 13 x (576*alpha) for 416 input)
    y1 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x1)

    return Model(inputs, [y1, y2])
def yolo5lite_predictions(feature_maps, feature_channel_nums, num_anchors, num_classes, depth_multiple, width_multiple):
    f1, f2, f3 = feature_maps
    f1_channel_num, f2_channel_num, f3_channel_num = feature_channel_nums

    # SPP & BottleneckCSP block, in ultralytics PyTorch version
    # they're defined in backbone
    x1 = make_yolo5_spp_neck(f1, f1_channel_num)
    x1 = bottleneck_csp_lite_block(x1, f1_channel_num, 3, depth_multiple, width_multiple, shortcut=False, block_id_str='pred_1')

    #feature map 1 head (19x19 for 608 input)
    x1 = DarknetConv2D_BN_Swish(f2_channel_num, (1,1))(x1)

    #upsample fpn merge for feature map 1 & 2
    x1_upsample = UpSampling2D(2)(x1)
    x2 = Concatenate()([f2, x1_upsample])

    x2 = bottleneck_csp_lite_block(x2, f2_channel_num, 3, depth_multiple, width_multiple, shortcut=False, block_id_str='pred_2')
    #feature map 2 head (38x38 for 608 input)
    x2 = DarknetConv2D_BN_Swish(f3_channel_num, (1,1))(x2)

    #upsample fpn merge for feature map 2 & 3
    x2_upsample = UpSampling2D(2)(x2)
    x3 = Concatenate()([f3, x2_upsample])

    #feature map 3 head & output (76x76 for 608 input)
    x3 = bottleneck_csp_lite_block(x3, f3_channel_num, 3, depth_multiple, width_multiple, shortcut=False, block_id_str='pred_3')
    y3 = DarknetConv2D(num_anchors*(num_classes+5), (1,1), name='predict_conv_3')(x3)

    #downsample fpn merge for feature map 3 & 2
    x3_downsample = compose(
            ZeroPadding2D(((1,0),(1,0))),
            #DarknetConv2D_BN_Swish(f3_channel_num, (3,3), strides=(2,2)))(x3)
            Darknet_Depthwise_Separable_Conv2D_BN_Swish(f3_channel_num, (3,3), strides=(2,2), block_id_str='pred_3_2'))(x3)

    x2 = Concatenate()([x3_downsample, x2])

    #feature map 2 output (38x38 for 608 input)
    x2 = bottleneck_csp_lite_block(x2, f2_channel_num, 3, depth_multiple, width_multiple, shortcut=False, block_id_str='pred_4')

    y2 = DarknetConv2D(num_anchors*(num_classes+5), (1,1), name='predict_conv_2')(x2)

    #downsample fpn merge for feature map 2 & 1
    x2_downsample = compose(
            ZeroPadding2D(((1,0),(1,0))),
            #DarknetConv2D_BN_Swish(f2_channel_num, (3,3), strides=(2,2)))(x2)
            Darknet_Depthwise_Separable_Conv2D_BN_Swish(f2_channel_num, (3,3), strides=(2,2), block_id_str='pred_4_2'))(x2)

    x1 = Concatenate()([x2_downsample, x1])

    #feature map 1 output (19x19 for 608 input)
    x1 = bottleneck_csp_lite_block(x1, f1_channel_num, 3, depth_multiple, width_multiple, shortcut=False, block_id_str='pred_5')

    y1 = DarknetConv2D(num_anchors*(num_classes+5), (1,1), name='predict_conv_1')(x1)

    return y1, y2, y3
def bottleneck_csp_c3_lite_block(x,
                                 num_filters,
                                 num_blocks,
                                 depth_multiple,
                                 width_multiple,
                                 shortcut=False,
                                 block_id_str=None):
    '''CSP Bottleneck with 3 convolutions'''
    num_filters = make_divisible(num_filters * width_multiple, 8)
    num_blocks = max(round(num_blocks * depth_multiple),
                     1) if num_blocks > 1 else num_blocks  # depth gain

    res_connection = DarknetConv2D_BN_Swish(num_filters // 2, (1, 1))(x)
    x = DarknetConv2D_BN_Swish(num_filters // 2, (1, 1))(x)

    # Bottleneck block stack
    for i in range(num_blocks):
        y = compose(
            DarknetConv2D_BN_Swish(num_filters // 2, (1, 1)),
            #DarknetConv2D_BN_Swish(num_filters//2, (3,3)))(x)
            Depthwise_Separable_Conv2D_BN_Swish(filters=num_filters // 2,
                                                kernel_size=(3, 3),
                                                block_id_str=block_id_str +
                                                '_1'))(x)
        x = Add()([x, y]) if shortcut else y

    #x = DarknetConv2D(num_filters//2, (1,1))(x)
    x = Concatenate()([x, res_connection])

    return DarknetConv2D_BN_Swish(num_filters, (1, 1))(x)
def bottleneck_csp_block(x,
                         num_filters,
                         num_blocks,
                         depth_multiple,
                         width_multiple,
                         shortcut=False):
    '''CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks'''
    num_filters = make_divisible(num_filters * width_multiple, 8)
    num_blocks = max(round(num_blocks * depth_multiple),
                     1) if num_blocks > 1 else num_blocks  # depth gain

    res_connection = DarknetConv2D(num_filters // 2, (1, 1))(x)
    x = DarknetConv2D_BN_Swish(num_filters // 2, (1, 1))(x)

    # Bottleneck block stack
    for i in range(num_blocks):
        y = compose(DarknetConv2D_BN_Swish(num_filters // 2, (1, 1)),
                    DarknetConv2D_BN_Swish(num_filters // 2, (3, 3)))(x)
        x = Add()([x, y]) if shortcut else y

    x = DarknetConv2D(num_filters // 2, (1, 1))(x)
    x = Concatenate()([x, res_connection])

    x = CustomBatchNormalization()(x)
    x = Activation(swish)(x)
    return DarknetConv2D_BN_Swish(num_filters, (1, 1))(x)
def DarknetConv2D_BN_Swish(*args, **kwargs):
    """Darknet Convolution2D followed by CustomBatchNormalization and Swish."""
    no_bias_kwargs = {'use_bias': False}
    no_bias_kwargs.update(kwargs)
    return compose(
        DarknetConv2D(*args, **no_bias_kwargs),
        CustomBatchNormalization(),
        Activation(swish))
def Spp_Conv2D_BN_Swish(x, num_filters):
    y1 = MaxPooling2D(pool_size=(5, 5), strides=(1, 1), padding='same')(x)
    y2 = MaxPooling2D(pool_size=(9, 9), strides=(1, 1), padding='same')(x)
    y3 = MaxPooling2D(pool_size=(13, 13), strides=(1, 1), padding='same')(x)

    y = compose(Concatenate(), DarknetConv2D_BN_Swish(num_filters,
                                                      (1, 1)))([y3, y2, y1, x])
    return y
Esempio n. 10
0
def tiny_yolo5lite_predictions(feature_maps, feature_channel_nums, num_anchors,
                               num_classes, use_spp):
    f1, f2 = feature_maps
    f1_channel_num, f2_channel_num = feature_channel_nums

    #feature map 1 head (13 x 13 x f1_channel_num//2 for 416 input)
    x1 = DarknetConv2D_BN_Mish(f1_channel_num // 2, (1, 1))(f1)
    if use_spp:
        x1 = Spp_Conv2D_BN_Mish(x1, f1_channel_num // 2)

    #upsample fpn merge for feature map 1 & 2
    x1_upsample = compose(DarknetConv2D_BN_Mish(f2_channel_num // 2, (1, 1)),
                          UpSampling2D(2))(x1)
    x2 = compose(
        Concatenate(),
        #DarknetConv2D_BN_Mish(f2_channel_num, (3,3)),
        Depthwise_Separable_Conv2D_BN_Mish(filters=f2_channel_num,
                                           kernel_size=(3, 3),
                                           block_id_str='pred_1'))(
                                               [x1_upsample, f2])

    #feature map 2 output (26 x 26 x f2_channel_num for 416 input)
    y2 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1),
                       name='predict_conv_2')(x2)

    #downsample fpn merge for feature map 2 & 1
    x2_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        #DarknetConv2D_BN_Mish(f1_channel_num//2, (3,3), strides=(2,2)),
        Darknet_Depthwise_Separable_Conv2D_BN_Mish(f1_channel_num // 2, (3, 3),
                                                   strides=(2, 2),
                                                   block_id_str='pred_2'))(x2)
    x1 = compose(
        Concatenate(),
        #DarknetConv2D_BN_Mish(f1_channel_num, (3,3)),
        Depthwise_Separable_Conv2D_BN_Mish(filters=f1_channel_num,
                                           kernel_size=(3, 3),
                                           block_id_str='pred_3'))(
                                               [x2_downsample, x1])

    #feature map 1 output (13 x 13 x f1_channel_num for 416 input)
    y1 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1),
                       name='predict_conv_1')(x1)

    return y1, y2
Esempio n. 11
0
def yolo4_body(inputs, num_anchors, num_classes, weights_path=None):
    """Create YOLO_V4 model CNN body in Keras."""
    darknet = Model(inputs, csp_darknet53_body(inputs))
    if weights_path is not None:
        darknet.load_weights(weights_path, by_name=True)
        print('Load weights {}.'.format(weights_path))

    #feature map 1 head (19x19 for 608 input)
    x1 = make_yolo_spp_head(darknet.output, 512)

    #upsample fpn merge for feature map 1 & 2
    x1_upsample = compose(DarknetConv2D_BN_Leaky(256, (1, 1)),
                          UpSampling2D(2))(x1)

    x2 = DarknetConv2D_BN_Leaky(256, (1, 1))(darknet.layers[204].output)
    x2 = Concatenate()([x2, x1_upsample])

    #feature map 2 head (38x38 for 608 input)
    x2 = make_yolo_head(x2, 256)

    #upsample fpn merge for feature map 2 & 3
    x2_upsample = compose(DarknetConv2D_BN_Leaky(128, (1, 1)),
                          UpSampling2D(2))(x2)

    x3 = DarknetConv2D_BN_Leaky(128, (1, 1))(darknet.layers[131].output)
    x3 = Concatenate()([x3, x2_upsample])

    #feature map 3 head & output (76x76 for 608 input)
    #x3, y3 = make_last_layers(x3, 128, num_anchors*(num_classes+5))
    x3 = make_yolo_head(x3, 128)
    y3 = compose(DarknetConv2D_BN_Leaky(256, (3, 3)),
                 DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x3)

    #downsample fpn merge for feature map 3 & 2
    x3_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        DarknetConv2D_BN_Leaky(256, (3, 3), strides=(2, 2)))(x3)

    x2 = Concatenate()([x3_downsample, x2])

    #feature map 2 output (38x38 for 608 input)
    #x2, y2 = make_last_layers(x2, 256, num_anchors*(num_classes+5))
    x2 = make_yolo_head(x2, 256)
    y2 = compose(DarknetConv2D_BN_Leaky(512, (3, 3)),
                 DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2)

    #downsample fpn merge for feature map 2 & 1
    x2_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        DarknetConv2D_BN_Leaky(512, (3, 3), strides=(2, 2)))(x2)

    x1 = Concatenate()([x2_downsample, x1])

    #feature map 1 output (19x19 for 608 input)
    #x1, y1 = make_last_layers(x1, 512, num_anchors*(num_classes+5))
    x1 = make_yolo_head(x1, 512)
    y1 = compose(DarknetConv2D_BN_Leaky(1024, (3, 3)),
                 DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1)

    return Model(inputs, [y1, y2, y3])
Esempio n. 12
0
def resblock_body(x, num_filters, num_blocks):
    '''A series of resblocks starting with a downsampling Convolution2D'''
    # Darknet uses left and top padding instead of 'same' mode
    x = ZeroPadding2D(((1, 0), (1, 0)))(x)
    x = DarknetConv2D_BN_Mish(num_filters, (3, 3), strides=(2, 2))(x)
    for i in range(num_blocks):
        y = compose(DarknetConv2D_BN_Mish(num_filters // 2, (1, 1)),
                    DarknetConv2D_BN_Mish(num_filters, (3, 3)))(x)
        x = Add()([x, y])
    return x
Esempio n. 13
0
def Depthwise_Separable_Conv2D_BN_Swish(filters, kernel_size=(3, 3), block_id_str=None):
    """Depthwise Separable Convolution2D."""
    if not block_id_str:
        block_id_str = str(K.get_uid())
    return compose(
        YoloDepthwiseConv2D(kernel_size, padding='same', name='conv_dw_' + block_id_str),
        CustomBatchNormalization(name='conv_dw_%s_bn' % block_id_str),
        Activation(swish, name='conv_dw_%s_swish' % block_id_str),
        YoloConv2D(filters, (1,1), padding='same', use_bias=False, strides=(1, 1), name='conv_pw_%s' % block_id_str),
        CustomBatchNormalization(name='conv_pw_%s_bn' % block_id_str),
        Activation(swish, name='conv_pw_%s_swish' % block_id_str))
Esempio n. 14
0
def Spp_Conv2D_BN_Swish_Fast(x, num_filters):
    """
    An optimized SPP block using smaller size pooling layer,
    which would be more friendly to some edge inference device (NPU).
    """
    y1 = MaxPooling2D(pool_size=(5, 5), strides=(1, 1), padding='same')(x)
    y2 = MaxPooling2D(pool_size=(5, 5), strides=(1, 1), padding='same')(y1)
    y3 = MaxPooling2D(pool_size=(5, 5), strides=(1, 1), padding='same')(y2)

    y = compose(Concatenate(), DarknetConv2D_BN_Swish(num_filters,
                                                      (1, 1)))([y3, y2, y1, x])
    return y
Esempio n. 15
0
def Darknet_Depthwise_Separable_Conv2D_BN_Swish(filters, kernel_size=(3, 3), block_id_str=None, **kwargs):
    """Depthwise Separable Convolution2D."""
    if not block_id_str:
        block_id_str = str(K.get_uid())
    no_bias_kwargs = {'use_bias': False}
    no_bias_kwargs.update(kwargs)
    return compose(
        DarknetDepthwiseConv2D(kernel_size, name='conv_dw_' + block_id_str, **no_bias_kwargs),
        CustomBatchNormalization(name='conv_dw_%s_bn' % block_id_str),
        Activation(swish, name='conv_dw_%s_swish' % block_id_str),
        YoloConv2D(filters, (1,1), padding='same', use_bias=False, strides=(1, 1), name='conv_pw_%s' % block_id_str),
        CustomBatchNormalization(name='conv_pw_%s_bn' % block_id_str),
        Activation(swish, name='conv_pw_%s_swish' % block_id_str))
Esempio n. 16
0
def make_csp_yolo_head(x, num_filters):
    '''6 Conv2D_BN_Mish layers followed by a Conv2D_linear layer'''
    x = DarknetConv2D_BN_Mish(num_filters, (1, 1))(x)
    res_connection = DarknetConv2D_BN_Mish(num_filters, (1, 1))(x)

    x = compose(DarknetConv2D_BN_Mish(num_filters, (1, 1)),
                DarknetConv2D_BN_Mish(num_filters, (3, 3)),
                DarknetConv2D_BN_Mish(num_filters, (1, 1)),
                DarknetConv2D_BN_Mish(num_filters, (3, 3)))(x)

    x = Concatenate()([x, res_connection])
    x = DarknetConv2D_BN_Mish(num_filters, (1, 1))(x)

    return x
Esempio n. 17
0
def csp_resblock_body(x, num_filters, num_blocks, all_narrow=True):
    '''A series of resblocks starting with a downsampling Convolution2D'''
    # Darknet uses left and top padding instead of 'same' mode
    x = ZeroPadding2D(((1, 0), (1, 0)))(x)
    x = DarknetConv2D_BN_Mish(num_filters, (3, 3), strides=(2, 2))(x)

    res_connection = DarknetConv2D_BN_Mish(
        num_filters // 2 if all_narrow else num_filters, (1, 1))(x)
    x = DarknetConv2D_BN_Mish(num_filters // 2 if all_narrow else num_filters,
                              (1, 1))(x)

    for i in range(num_blocks):
        y = compose(
            DarknetConv2D_BN_Mish(num_filters // 2, (1, 1)),
            DarknetConv2D_BN_Mish(
                num_filters // 2 if all_narrow else num_filters, (3, 3)))(x)
        x = Add()([x, y])

    x = DarknetConv2D_BN_Mish(num_filters // 2 if all_narrow else num_filters,
                              (1, 1))(x)
    x = Concatenate()([x, res_connection])

    return DarknetConv2D_BN_Mish(num_filters, (1, 1))(x)
Esempio n. 18
0
def make_csp_yolo_depthwise_separable_head(x, num_filters, block_id_str=None):
    '''6 Conv2D_BN_Mish layers followed by a Conv2D_linear layer'''
    if not block_id_str:
        block_id_str = str(K.get_uid())

    x = DarknetConv2D_BN_Mish(num_filters, (1, 1))(x)
    res_connection = DarknetConv2D_BN_Mish(num_filters, (1, 1))(x)

    x = compose(
        DarknetConv2D_BN_Mish(num_filters, (1, 1)),
        Depthwise_Separable_Conv2D_BN_Mish(filters=num_filters,
                                           kernel_size=(3, 3),
                                           block_id_str=block_id_str + '_1'),
        DarknetConv2D_BN_Mish(num_filters, (1, 1)),
        Depthwise_Separable_Conv2D_BN_Mish(filters=num_filters,
                                           kernel_size=(3, 3),
                                           block_id_str=block_id_str +
                                           '_2'))(x)

    x = Concatenate()([x, res_connection])
    x = DarknetConv2D_BN_Mish(num_filters, (1, 1))(x)

    return x
Esempio n. 19
0
def yolo4lite_mobilenetv3small_body(inputs,
                                    num_anchors,
                                    num_classes,
                                    alpha=1.0):
    '''Create YOLO_v4 Lite MobileNetV3Small model CNN body in keras.'''
    mobilenetv3small = MobileNetV3Small(input_tensor=inputs,
                                        weights='imagenet',
                                        include_top=False,
                                        alpha=alpha)

    # input: 416 x 416 x 3
    # activation_31(layer 165, final feature map): 13 x 13 x (576*alpha)
    # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha)

    # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha)
    # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha)

    # activation_7(layer 38, middle in block3) : 52 x 52 x (96*alpha)
    # expanded_conv_2/Add(layer 35, end of block2): 52 x 52 x (24*alpha)

    # f1 :13 x 13 x (576*alpha)
    # NOTE: activation layer name may different for TF1.x/2.x, so we
    # use index to fetch layer
    f1 = mobilenetv3small.layers[165].output
    #feature map 1 head (13 x 13 x (288*alpha) for 416 input)
    x1 = make_yolo_spp_depthwise_separable_head(f1,
                                                int(288 * alpha),
                                                block_id_str='11')

    #upsample fpn merge for feature map 1 & 2
    x1_upsample = compose(DarknetConv2D_BN_Leaky(int(144 * alpha), (1, 1)),
                          UpSampling2D(2))(x1)

    f2 = mobilenetv3small.layers[117].output
    # f2: 26 x 26 x (288*alpha) for 416 input
    x2 = DarknetConv2D_BN_Leaky(int(144 * alpha), (1, 1))(f2)
    x2 = Concatenate()([x2, x1_upsample])

    #feature map 2 head (26 x 26 x (144*alpha) for 416 input)
    x2 = make_yolo_depthwise_separable_head(x2,
                                            int(144 * alpha),
                                            block_id_str='12')

    #upsample fpn merge for feature map 2 & 3
    x2_upsample = compose(DarknetConv2D_BN_Leaky(int(48 * alpha), (1, 1)),
                          UpSampling2D(2))(x2)

    f3 = mobilenetv3small.layers[38].output
    # f3 : 52 x 52 x (96*alpha)

    x3 = DarknetConv2D_BN_Leaky(int(48 * alpha), (1, 1))(f3)
    x3 = Concatenate()([x3, x2_upsample])

    #feature map 3 head & output (52 x 52 x (96*alpha) for 416 input)
    #x3, y3 = make_depthwise_separable_last_layers(x3, int(48*alpha), num_anchors*(num_classes+5), block_id_str='13')
    x3 = make_yolo_depthwise_separable_head(x3,
                                            int(48 * alpha),
                                            block_id_str='13')
    y3 = compose(
        Depthwise_Separable_Conv2D_BN_Leaky(int(96 * alpha), (3, 3),
                                            block_id_str='13_3'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x3)

    #downsample fpn merge for feature map 3 & 2
    x3_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        Darknet_Depthwise_Separable_Conv2D_BN_Leaky(int(144 * alpha), (3, 3),
                                                    strides=(2, 2),
                                                    block_id_str='13_4'))(x3)

    x2 = Concatenate()([x3_downsample, x2])

    #feature map 2 output (26 x 26 x (288*alpha) for 416 input)
    #x2, y2 = make_depthwise_separable_last_layers(x2, int(144*alpha), num_anchors*(num_classes+5), block_id_str='14')
    x2 = make_yolo_depthwise_separable_head(x2,
                                            int(144 * alpha),
                                            block_id_str='14')
    y2 = compose(
        Depthwise_Separable_Conv2D_BN_Leaky(int(288 * alpha), (3, 3),
                                            block_id_str='14_3'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2)

    #downsample fpn merge for feature map 2 & 1
    x2_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        Darknet_Depthwise_Separable_Conv2D_BN_Leaky(int(288 * alpha), (3, 3),
                                                    strides=(2, 2),
                                                    block_id_str='14_4'))(x2)

    x1 = Concatenate()([x2_downsample, x1])

    #feature map 1 output (13 x 13 x (576*alpha) for 416 input)
    #x1, y1 = make_depthwise_separable_last_layers(x1, int(288*alpha), num_anchors*(num_classes+5), block_id_str='15')
    x1 = make_yolo_depthwise_separable_head(x1,
                                            int(288 * alpha),
                                            block_id_str='15')
    y1 = compose(
        Depthwise_Separable_Conv2D_BN_Leaky(int(576 * alpha), (3, 3),
                                            block_id_str='15_3'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1)

    return Model(inputs, [y1, y2, y3])
Esempio n. 20
0
def yolo4lite_efficientnet_body(inputs, num_anchors, num_classes, level=1):
    '''
    Create YOLO_v4 Lite EfficientNet model CNN body in keras.
    # Arguments
        level: EfficientNet level number.
            by default we use basic EfficientNetB1 as backbone
    '''
    efficientnet, feature_map_info = get_efficientnet_backbone_info(
        inputs, level=level)

    f1 = efficientnet.get_layer('top_activation').output
    f1_channel_num = feature_map_info['f1_channel_num']

    f2 = efficientnet.get_layer('block6a_expand_activation').output
    f2_channel_num = feature_map_info['f2_channel_num']

    f3 = efficientnet.get_layer('block4a_expand_activation').output
    f3_channel_num = feature_map_info['f3_channel_num']

    #feature map 1 head (13x13x(f1_channel_num//2) for 416 input)
    x1 = make_yolo_spp_depthwise_separable_head(f1,
                                                f1_channel_num // 2,
                                                block_id_str='8')

    #upsample fpn merge for feature map 1 & 2
    x1_upsample = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)),
                          UpSampling2D(2))(x1)

    x2 = DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1))(f2)
    x2 = Concatenate()([x2, x1_upsample])

    #feature map 2 head (26x26x(f2_channel_num//2) for 416 input)
    x2 = make_yolo_depthwise_separable_head(x2,
                                            f2_channel_num // 2,
                                            block_id_str='9')

    #upsample fpn merge for feature map 2 & 3
    x2_upsample = compose(DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)),
                          UpSampling2D(2))(x2)

    x3 = DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1))(f3)
    x3 = Concatenate()([x3, x2_upsample])

    #feature map 3 head & output (52x52xf3_channel_num for 416 input)
    #x3, y3 = make_depthwise_separable_last_layers(x3, f3_channel_num//2, num_anchors*(num_classes+5), block_id_str='10')
    x3 = make_yolo_depthwise_separable_head(x3,
                                            f3_channel_num // 2,
                                            block_id_str='10')
    y3 = compose(
        Depthwise_Separable_Conv2D_BN_Leaky(f3_channel_num, (3, 3),
                                            block_id_str='10_3'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x3)

    #downsample fpn merge for feature map 3 & 2
    x3_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        Darknet_Depthwise_Separable_Conv2D_BN_Leaky(f2_channel_num // 2,
                                                    (3, 3),
                                                    strides=(2, 2),
                                                    block_id_str='10_4'))(x3)

    x2 = Concatenate()([x3_downsample, x2])

    #feature map 2 output (26x26xf2_channel_num for 416 input)
    #x2, y2 = make_depthwise_separable_last_layers(x2, f2_channel_num//2, num_anchors*(num_classes+5), block_id_str='11')
    x2 = make_yolo_depthwise_separable_head(x2,
                                            f2_channel_num // 2,
                                            block_id_str='11')
    y2 = compose(
        Depthwise_Separable_Conv2D_BN_Leaky(f2_channel_num, (3, 3),
                                            block_id_str='11_3'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2)

    #downsample fpn merge for feature map 2 & 1
    x2_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        Darknet_Depthwise_Separable_Conv2D_BN_Leaky(f1_channel_num // 2,
                                                    (3, 3),
                                                    strides=(2, 2),
                                                    block_id_str='11_4'))(x2)

    x1 = Concatenate()([x2_downsample, x1])

    #feature map 1 output (13x13xf1_channel_num for 416 input)
    #x1, y1 = make_depthwise_separable_last_layers(x1, f1_channel_num//2, num_anchors*(num_classes+5), block_id_str='12')
    x1 = make_yolo_depthwise_separable_head(x1,
                                            f1_channel_num // 2,
                                            block_id_str='12')
    y1 = compose(
        Depthwise_Separable_Conv2D_BN_Leaky(f1_channel_num, (3, 3),
                                            block_id_str='12_3'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1)

    return Model(inputs, [y1, y2, y3])
Esempio n. 21
0
def yolo4_mobilenet_body(inputs, num_anchors, num_classes, alpha=1.0):
    """Create YOLO_V4 MobileNet model CNN body in Keras."""
    mobilenet = MobileNet(input_tensor=inputs,
                          weights='imagenet',
                          include_top=False,
                          alpha=alpha)

    # input: 416 x 416 x 3
    # conv_pw_13_relu :13 x 13 x (1024*alpha)
    # conv_pw_11_relu :26 x 26 x (512*alpha)
    # conv_pw_5_relu : 52 x 52 x (256*alpha)

    # f1: 13 x 13 x (1024*alpha) for 416 input
    f1 = mobilenet.get_layer('conv_pw_13_relu').output
    # f2: 26 x 26 x (512*alpha) for 416 input
    f2 = mobilenet.get_layer('conv_pw_11_relu').output
    # f3: 52 x 52 x (256*alpha) for 416 input
    f3 = mobilenet.get_layer('conv_pw_5_relu').output

    f1_channel_num = int(1024 * alpha)
    f2_channel_num = int(512 * alpha)
    f3_channel_num = int(256 * alpha)

    #feature map 1 head (13 x 13 x (512*alpha) for 416 input)
    x1 = make_yolo_spp_head(f1, f1_channel_num // 2)

    #upsample fpn merge for feature map 1 & 2
    x1_upsample = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)),
                          UpSampling2D(2))(x1)

    x2 = DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1))(f2)
    x2 = Concatenate()([x2, x1_upsample])

    #feature map 2 head (26 x 26 x (256*alpha) for 416 input)
    x2 = make_yolo_head(x2, f2_channel_num // 2)

    #upsample fpn merge for feature map 2 & 3
    x2_upsample = compose(DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)),
                          UpSampling2D(2))(x2)

    x3 = DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1))(f3)
    x3 = Concatenate()([x3, x2_upsample])

    #feature map 3 head & output (52 x 52 x (256*alpha) for 416 input)
    #x3, y3 = make_last_layers(x3, f3_channel_num//2, num_anchors*(num_classes+5))
    x3 = make_yolo_head(x3, f3_channel_num // 2)
    y3 = compose(DarknetConv2D_BN_Leaky(f3_channel_num, (3, 3)),
                 DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x3)

    #downsample fpn merge for feature map 3 & 2
    x3_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        DarknetConv2D_BN_Leaky(f2_channel_num // 2, (3, 3),
                               strides=(2, 2)))(x3)

    x2 = Concatenate()([x3_downsample, x2])

    #feature map 2 output (26 x 26 x (512*alpha) for 416 input)
    #x2, y2 = make_last_layers(x2, f2_channel_num//2, num_anchors*(num_classes+5))
    x2 = make_yolo_head(x2, f2_channel_num // 2)
    y2 = compose(DarknetConv2D_BN_Leaky(f2_channel_num, (3, 3)),
                 DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2)

    #downsample fpn merge for feature map 2 & 1
    x2_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        DarknetConv2D_BN_Leaky(f1_channel_num // 2, (3, 3),
                               strides=(2, 2)))(x2)

    x1 = Concatenate()([x2_downsample, x1])

    #feature map 1 output (13 x 13 x (1024*alpha) for 416 input)
    #x1, y1 = make_last_layers(x1, f1_channel_num//2, num_anchors*(num_classes+5))
    x1 = make_yolo_head(x1, f1_channel_num // 2)
    y1 = compose(DarknetConv2D_BN_Leaky(f1_channel_num, (3, 3)),
                 DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1)

    return Model(inputs, [y1, y2, y3])
Esempio n. 22
0
def yolo5lite_predictions(feature_maps, feature_channel_nums, num_anchors,
                          num_classes):
    f1, f2, f3 = feature_maps
    f1_channel_num, f2_channel_num, f3_channel_num = feature_channel_nums

    #feature map 1 head (13 x 13 x f1_channel_num//2 for 416 input)
    x1 = make_csp_yolo_spp_depthwise_separable_head(f1,
                                                    f1_channel_num // 2,
                                                    block_id_str='pred_1')

    #upsample fpn merge for feature map 1 & 2
    x1_upsample = compose(DarknetConv2D_BN_Mish(f2_channel_num // 2, (1, 1)),
                          UpSampling2D(2))(x1)

    x2 = DarknetConv2D_BN_Mish(f2_channel_num // 2, (1, 1))(f2)
    x2 = Concatenate()([x2, x1_upsample])

    #feature map 2 head (26 x 26 x f2_channel_num//2 for 416 input)
    x2 = make_csp_yolo_depthwise_separable_head(x2,
                                                f2_channel_num // 2,
                                                block_id_str='pred_2')

    #upsample fpn merge for feature map 2 & 3
    x2_upsample = compose(DarknetConv2D_BN_Mish(f3_channel_num // 2, (1, 1)),
                          UpSampling2D(2))(x2)

    x3 = DarknetConv2D_BN_Mish(f3_channel_num // 2, (1, 1))(f3)
    x3 = Concatenate()([x3, x2_upsample])

    #feature map 3 head & output (52 x 52 x f3_channel_num for 416 input)
    x3 = make_csp_yolo_depthwise_separable_head(x3,
                                                f3_channel_num // 2,
                                                block_id_str='pred_3')
    y3 = compose(
        Depthwise_Separable_Conv2D_BN_Mish(f3_channel_num, (3, 3),
                                           block_id_str='pred_3_3'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1),
                      name='predict_conv_3'))(x3)

    #downsample fpn merge for feature map 3 & 2
    x3_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        Darknet_Depthwise_Separable_Conv2D_BN_Mish(
            f2_channel_num // 2, (3, 3),
            strides=(2, 2),
            block_id_str='pred_3_4'))(x3)

    x2 = Concatenate()([x3_downsample, x2])

    #feature map 2 output (26 x 26 x f2_channel_num for 416 input)
    x2 = make_csp_yolo_depthwise_separable_head(x2,
                                                f2_channel_num // 2,
                                                block_id_str='pred_4')
    y2 = compose(
        Depthwise_Separable_Conv2D_BN_Mish(f2_channel_num, (3, 3),
                                           block_id_str='pred_4_3'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1),
                      name='predict_conv_2'))(x2)

    #downsample fpn merge for feature map 2 & 1
    x2_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        Darknet_Depthwise_Separable_Conv2D_BN_Mish(
            f1_channel_num // 2, (3, 3),
            strides=(2, 2),
            block_id_str='pred_4_4'))(x2)

    x1 = Concatenate()([x2_downsample, x1])

    #feature map 1 output (13 x 13 x f1_channel_num for 416 input)
    x1 = make_csp_yolo_depthwise_separable_head(x1,
                                                f1_channel_num // 2,
                                                block_id_str='pred_5')
    y1 = compose(
        Depthwise_Separable_Conv2D_BN_Mish(f1_channel_num, (3, 3),
                                           block_id_str='pred_5_3'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1),
                      name='predict_conv_1'))(x1)

    return y1, y2, y3
Esempio n. 23
0
def yolo5_predictions(feature_maps, feature_channel_nums, num_anchors,
                      num_classes):
    f1, f2, f3 = feature_maps
    f1_channel_num, f2_channel_num, f3_channel_num = feature_channel_nums

    #feature map 1 head (19x19 for 608 input)
    x1 = make_csp_yolo_spp_head(f1, f1_channel_num // 2)

    #upsample fpn merge for feature map 1 & 2
    x1_upsample = compose(DarknetConv2D_BN_Mish(f2_channel_num // 2, (1, 1)),
                          UpSampling2D(2))(x1)

    x2 = DarknetConv2D_BN_Mish(f2_channel_num // 2, (1, 1))(f2)
    x2 = Concatenate()([x2, x1_upsample])

    #feature map 2 head (38x38 for 608 input)
    x2 = make_csp_yolo_head(x2, f2_channel_num // 2)

    #upsample fpn merge for feature map 2 & 3
    x2_upsample = compose(DarknetConv2D_BN_Mish(f3_channel_num // 2, (1, 1)),
                          UpSampling2D(2))(x2)

    x3 = DarknetConv2D_BN_Mish(f3_channel_num // 2, (1, 1))(f3)
    x3 = Concatenate()([x3, x2_upsample])

    #feature map 3 head & output (76x76 for 608 input)
    #x3, y3 = make_last_layers(x3, f3_channel_num//2, num_anchors*(num_classes+5))
    x3 = make_csp_yolo_head(x3, f3_channel_num // 2)
    y3 = compose(
        DarknetConv2D_BN_Mish(f3_channel_num, (3, 3)),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1),
                      name='predict_conv_3'))(x3)

    #downsample fpn merge for feature map 3 & 2
    x3_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        DarknetConv2D_BN_Mish(f2_channel_num // 2, (3, 3), strides=(2, 2)))(x3)

    x2 = Concatenate()([x3_downsample, x2])

    #feature map 2 output (38x38 for 608 input)
    #x2, y2 = make_last_layers(x2, 256, num_anchors*(num_classes+5))
    x2 = make_csp_yolo_head(x2, f2_channel_num // 2)
    y2 = compose(
        DarknetConv2D_BN_Mish(f2_channel_num, (3, 3)),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1),
                      name='predict_conv_2'))(x2)

    #downsample fpn merge for feature map 2 & 1
    x2_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        DarknetConv2D_BN_Mish(f1_channel_num // 2, (3, 3), strides=(2, 2)))(x2)

    x1 = Concatenate()([x2_downsample, x1])

    #feature map 1 output (19x19 for 608 input)
    #x1, y1 = make_last_layers(x1, f1_channel_num//2, num_anchors*(num_classes+5))
    x1 = make_csp_yolo_head(x1, f1_channel_num // 2)
    y1 = compose(
        DarknetConv2D_BN_Mish(f1_channel_num, (3, 3)),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1),
                      name='predict_conv_1'))(x1)

    return y1, y2, y3
Esempio n. 24
0
def yolo4lite_mobilenet_body(inputs, num_anchors, num_classes, alpha=1.0):
    '''Create YOLO_v4 Lite MobileNet model CNN body in keras.'''
    mobilenet = MobileNet(input_tensor=inputs,
                          weights='imagenet',
                          include_top=False,
                          alpha=alpha)

    # input: 416 x 416 x 3
    # conv_pw_13_relu :13 x 13 x (1024*alpha)
    # conv_pw_11_relu :26 x 26 x (512*alpha)
    # conv_pw_5_relu : 52 x 52 x (256*alpha)

    f1 = mobilenet.get_layer('conv_pw_13_relu').output
    # f1 :13 x 13 x (1024*alpha) for 416 input
    #feature map 1 head (13 x 13 x (512*alpha) for 416 input)
    x1 = make_yolo_spp_depthwise_separable_head(f1,
                                                int(512 * alpha),
                                                block_id_str='14')

    #upsample fpn merge for feature map 1 & 2
    x1_upsample = compose(DarknetConv2D_BN_Leaky(int(256 * alpha), (1, 1)),
                          UpSampling2D(2))(x1)

    f2 = mobilenet.get_layer('conv_pw_11_relu').output
    # f2: 26 x 26 x (512*alpha) for 416 input
    x2 = DarknetConv2D_BN_Leaky(int(256 * alpha), (1, 1))(f2)
    x2 = Concatenate()([x2, x1_upsample])

    #feature map 2 head (26 x 26 x (256*alpha) for 416 input)
    x2 = make_yolo_depthwise_separable_head(x2,
                                            int(256 * alpha),
                                            block_id_str='15')

    #upsample fpn merge for feature map 2 & 3
    x2_upsample = compose(DarknetConv2D_BN_Leaky(int(128 * alpha), (1, 1)),
                          UpSampling2D(2))(x2)

    f3 = mobilenet.get_layer('conv_pw_5_relu').output
    # f3 : 52 x 52 x  (256*alpha) for 416 input
    x3 = DarknetConv2D_BN_Leaky(int(128 * alpha), (1, 1))(f3)
    x3 = Concatenate()([x3, x2_upsample])

    #feature map 3 head & output (52 x 52 x (256*alpha) for 416 input)
    #x3, y3 = make_depthwise_separable_last_layers(x3, int(128*alpha), num_anchors*(num_classes+5), block_id_str='16')
    x3 = make_yolo_depthwise_separable_head(x3,
                                            int(128 * alpha),
                                            block_id_str='16')
    y3 = compose(
        Depthwise_Separable_Conv2D_BN_Leaky(int(256 * alpha), (3, 3),
                                            block_id_str='16_3'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x3)

    #downsample fpn merge for feature map 3 & 2
    x3_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        Darknet_Depthwise_Separable_Conv2D_BN_Leaky(int(256 * alpha), (3, 3),
                                                    strides=(2, 2),
                                                    block_id_str='16_4'))(x3)

    x2 = Concatenate()([x3_downsample, x2])

    #feature map 2 output (26 x 26 x (512*alpha) for 416 input)
    #x2, y2 = make_depthwise_separable_last_layers(x2, int(256*alpha), num_anchors*(num_classes+5), block_id_str='17')
    x2 = make_yolo_depthwise_separable_head(x2,
                                            int(256 * alpha),
                                            block_id_str='17')
    y2 = compose(
        Depthwise_Separable_Conv2D_BN_Leaky(int(512 * alpha), (3, 3),
                                            block_id_str='17_3'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2)

    #downsample fpn merge for feature map 2 & 1
    x2_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        Darknet_Depthwise_Separable_Conv2D_BN_Leaky(int(512 * alpha), (3, 3),
                                                    strides=(2, 2),
                                                    block_id_str='17_4'))(x2)

    x1 = Concatenate()([x2_downsample, x1])

    #feature map 1 output (13 x 13 x (1024*alpha) for 416 input)
    #x1, y1 = make_depthwise_separable_last_layers(x1, int(512*alpha), num_anchors*(num_classes+5), block_id_str='18')
    x1 = make_yolo_depthwise_separable_head(x1,
                                            int(512 * alpha),
                                            block_id_str='18')
    y1 = compose(
        Depthwise_Separable_Conv2D_BN_Leaky(int(1024 * alpha), (3, 3),
                                            block_id_str='18_3'),
        DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1)

    return Model(inputs, [y1, y2, y3])
Esempio n. 25
0
def yolo4lite_mobilenetv3large_body(inputs,
                                    num_anchors,
                                    num_classes,
                                    alpha=1.0):
    '''Create YOLO_v4 Lite MobileNetV3Large model CNN body in keras.'''
    mobilenetv3large = MobileNetV3Large(input_tensor=inputs,
                                        weights='imagenet',
                                        include_top=False,
                                        alpha=alpha)

    # input: 416 x 416 x 3
    # activation_38(layer 194, final feature map): 13 x 13 x (960*alpha)
    # expanded_conv_14/Add(layer 191, end of block14): 13 x 13 x (160*alpha)

    # activation_29(layer 146, middle in block12) : 26 x 26 x (672*alpha)
    # expanded_conv_11/Add(layer 143, end of block11) : 26 x 26 x (112*alpha)

    # activation_15(layer 79, middle in block6) : 52 x 52 x (240*alpha)
    # expanded_conv_5/Add(layer 76, end of block5): 52 x 52 x (40*alpha)

    # f1 :13 x 13 x (960*alpha)
    # NOTE: activation layer name may different for TF1.x/2.x, so we
    # use index to fetch layer
    f1 = mobilenetv3large.layers[194].output
    #feature map 1 head (13 x 13 x (480*alpha) for 416 input)
    x1 = make_yolo_spp_depthwise_separable_head(f1, int(480 * alpha))

    #upsample fpn merge for feature map 1 & 2
    x1_upsample = compose(DarknetConv2D_BN_Leaky(int(336 * alpha), (1, 1)),
                          UpSampling2D(2))(x1)

    f2 = mobilenetv3large.layers[146].output
    # f2: 26 x 26 x (672*alpha) for 416 input
    x2 = DarknetConv2D_BN_Leaky(int(336 * alpha), (1, 1))(f2)
    x2 = Concatenate()([x2, x1_upsample])

    #feature map 2 head (26 x 26 x (336*alpha) for 416 input)
    x2 = make_yolo_depthwise_separable_head(x2, int(336 * alpha))

    #upsample fpn merge for feature map 2 & 3
    x2_upsample = compose(DarknetConv2D_BN_Leaky(int(120 * alpha), (1, 1)),
                          UpSampling2D(2))(x2)

    f3 = mobilenetv3large.layers[79].output
    # f3 : 52 x 52 x (240*alpha) for 416 input
    x3 = DarknetConv2D_BN_Leaky(int(120 * alpha), (1, 1))(f3)
    x3 = Concatenate()([x3, x2_upsample])

    #feature map 3 head & output (52 x 52 x (240*alpha) for 416 input)
    #x3, y3 = make_depthwise_separable_last_layers(x3, int(120*alpha), num_anchors*(num_classes+5))
    x3 = make_yolo_depthwise_separable_head(x3, int(120 * alpha))
    y3 = compose(Depthwise_Separable_Conv2D_BN_Leaky(int(240 * alpha), (3, 3)),
                 DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x3)

    #downsample fpn merge for feature map 3 & 2
    x3_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        Darknet_Depthwise_Separable_Conv2D_BN_Leaky(int(336 * alpha), (3, 3),
                                                    strides=(2, 2)))(x3)

    x2 = Concatenate()([x3_downsample, x2])

    #feature map 2 output (26 x 26 x (672*alpha) for 416 input)
    #x2, y2 = make_depthwise_separable_last_layers(x2, int(336*alpha), num_anchors*(num_classes+5))
    x2 = make_yolo_depthwise_separable_head(x2, int(336 * alpha))
    y2 = compose(Depthwise_Separable_Conv2D_BN_Leaky(int(672 * alpha), (3, 3)),
                 DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2)

    #downsample fpn merge for feature map 2 & 1
    x2_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        Darknet_Depthwise_Separable_Conv2D_BN_Leaky(int(480 * alpha), (3, 3),
                                                    strides=(2, 2)))(x2)

    x1 = Concatenate()([x2_downsample, x1])

    #feature map 1 output (13 x 13 x (960*alpha) for 416 input)
    #x1, y1 = make_depthwise_separable_last_layers(x1, int(480*alpha), num_anchors*(num_classes+5))
    x1 = make_yolo_depthwise_separable_head(x1, int(480 * alpha))
    y1 = compose(Depthwise_Separable_Conv2D_BN_Leaky(int(960 * alpha), (3, 3)),
                 DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1)

    return Model(inputs, [y1, y2, y3])
Esempio n. 26
0
def yolo4_mobilenetv3small_body(inputs, num_anchors, num_classes, alpha=1.0):
    """Create YOLO_V4 MobileNetV3Small model CNN body in Keras."""
    mobilenetv3small = MobileNetV3Small(input_tensor=inputs,
                                        weights='imagenet',
                                        include_top=False,
                                        alpha=alpha)

    # input: 416 x 416 x 3
    # activation_31(layer 165, final feature map): 13 x 13 x (576*alpha)
    # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha)

    # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha)
    # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha)

    # activation_7(layer 38, middle in block3) : 52 x 52 x (96*alpha)
    # expanded_conv_2/Add(layer 35, end of block2): 52 x 52 x (24*alpha)

    # NOTE: activation layer name may different for TF1.x/2.x, so we
    # use index to fetch layer
    # f1: 13 x 13 x (576*alpha)
    f1 = mobilenetv3small.layers[165].output
    # f2: 26 x 26 x (288*alpha) for 416 input
    f2 = mobilenetv3small.layers[117].output
    # f3: 52 x 52 x (96*alpha)
    f3 = mobilenetv3small.layers[38].output

    f1_channel_num = int(576 * alpha)
    f2_channel_num = int(288 * alpha)
    f3_channel_num = int(96 * alpha)
    #f1_channel_num = 1024
    #f2_channel_num = 512
    #f3_channel_num = 256

    #feature map 1 head (13 x 13 x f1_channel_num//2 for 416 input)
    x1 = make_yolo_spp_head(f1, f1_channel_num // 2)

    #upsample fpn merge for feature map 1 & 2
    x1_upsample = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)),
                          UpSampling2D(2))(x1)

    x2 = DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1))(f2)
    x2 = Concatenate()([x2, x1_upsample])

    #feature map 2 head (26 x 26 x f2_channel_num//2 for 416 input)
    x2 = make_yolo_head(x2, f2_channel_num // 2)

    #upsample fpn merge for feature map 2 & 3
    x2_upsample = compose(DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)),
                          UpSampling2D(2))(x2)

    x3 = DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1))(f3)
    x3 = Concatenate()([x3, x2_upsample])

    #feature map 3 head & output (52 x 52 x f3_channel_num for 416 input)
    #x3, y3 = make_last_layers(x3, f3_channel_num//2, num_anchors*(num_classes+5))
    x3 = make_yolo_head(x3, f3_channel_num // 2)
    y3 = compose(DarknetConv2D_BN_Leaky(f3_channel_num, (3, 3)),
                 DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x3)

    #downsample fpn merge for feature map 3 & 2
    x3_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        DarknetConv2D_BN_Leaky(f2_channel_num // 2, (3, 3),
                               strides=(2, 2)))(x3)

    x2 = Concatenate()([x3_downsample, x2])

    #feature map 2 output (26 x 26 x f2_channel_num for 416 input)
    #x2, y2 = make_last_layers(x2, f2_channel_num//2, num_anchors*(num_classes+5))
    x2 = make_yolo_head(x2, f2_channel_num // 2)
    y2 = compose(DarknetConv2D_BN_Leaky(f2_channel_num, (3, 3)),
                 DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2)

    #downsample fpn merge for feature map 2 & 1
    x2_downsample = compose(
        ZeroPadding2D(((1, 0), (1, 0))),
        DarknetConv2D_BN_Leaky(f1_channel_num // 2, (3, 3),
                               strides=(2, 2)))(x2)

    x1 = Concatenate()([x2_downsample, x1])

    #feature map 1 output (13 x 13 x f1_channel_num for 416 input)
    #x1, y1 = make_last_layers(x1, f1_channel_num//2, num_anchors*(num_classes+5))
    x1 = make_yolo_head(x1, f1_channel_num // 2)
    y1 = compose(DarknetConv2D_BN_Leaky(f1_channel_num, (3, 3)),
                 DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1)

    return Model(inputs, [y1, y2, y3])