Example #1
0
def yolo3_spp_xception_body(inputs, num_anchors, num_classes):
    """Create YOLO_V3 SPP Xception model CNN body in Keras."""
    xception = Xception(input_tensor=inputs,
                        weights='imagenet',
                        include_top=False)

    # input: 416 x 416 x 3
    # block14_sepconv2_act: 13 x 13 x 2048
    # block13_sepconv2_bn(middle in block13): 26 x 26 x 1024
    # add_46(end of block12): 26 x 26 x 728
    # block4_sepconv2_bn(middle in block4) : 52 x 52 x 728
    # add_37(end of block3) : 52 x 52 x 256

    f1 = xception.get_layer('block14_sepconv2_act').output
    # f1 :13 x 13 x 2048
    x, y1 = make_spp_last_layers(f1, 1024, num_anchors * (num_classes + 5))

    x = compose(DarknetConv2D_BN_Leaky(512, (1, 1)), UpSampling2D(2))(x)

    f2 = xception.get_layer('block13_sepconv2_bn').output
    # f2: 26 x 26 x 1024
    x = Concatenate()([x, f2])

    x, y2 = make_last_layers(x, 512, num_anchors * (num_classes + 5))

    x = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(x)

    f3 = xception.get_layer('block4_sepconv2_bn').output
    # f3 : 52 x 52 x 728
    x = Concatenate()([x, f3])
    x, y3 = make_last_layers(x, 256, num_anchors * (num_classes + 5))

    return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3_shufflenetv2_body(inputs, num_anchors, num_classes):
    """Create YOLO_V3 ShuffleNetV2 model CNN body in Keras."""
    shufflenetv2 = ShuffleNetV2(input_tensor=inputs,
                                weights=None,
                                include_top=False)

    # input: 416 x 416 x 3
    # 1x1conv5_out: 13 x 13 x 1024
    # stage4/block1/relu_1x1conv_1: 26 x 26 x 464
    # stage3/block1/relu_1x1conv_1: 52 x 52 x 232

    f1 = shufflenetv2.get_layer('1x1conv5_out').output
    # f1 :13 x 13 x 1024
    x, y1 = make_last_layers(f1, 464, num_anchors * (num_classes + 5))

    x = compose(DarknetConv2D_BN_Leaky(232, (1, 1)), UpSampling2D(2))(x)

    f2 = shufflenetv2.get_layer('stage4/block1/relu_1x1conv_1').output
    # f2: 26 x 26 x 464
    x = Concatenate()([x, f2])

    x, y2 = make_last_layers(x, 232, num_anchors * (num_classes + 5))

    x = compose(DarknetConv2D_BN_Leaky(116, (1, 1)), UpSampling2D(2))(x)

    f3 = shufflenetv2.get_layer('stage3/block1/relu_1x1conv_1').output
    # f3 : 52 x 52 x 232
    x = Concatenate()([x, f3])
    x, y3 = make_last_layers(x, 116, num_anchors * (num_classes + 5))

    return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3_mobilenetv3large_body(inputs, num_anchors, num_classes, alpha=1.0):
    """Create YOLO_V3 MobileNetV3Large model CNN body in Keras."""
    mobilenetv3large = MobileNetV3Large(input_tensor=inputs,
                                        weights='imagenet',
                                        include_top=False,
                                        alpha=alpha)

    # input: 416 x 416 x 3
    # activation_38(layer 194, final feature map): 13 x 13 x (960*alpha)
    # expanded_conv_14/Add(layer 191, end of block14): 13 x 13 x (160*alpha)

    # activation_29(layer 146, middle in block12) : 26 x 26 x (672*alpha)
    # expanded_conv_11/Add(layer 143, end of block11) : 26 x 26 x (112*alpha)

    # activation_15(layer 79, middle in block6) : 52 x 52 x (240*alpha)
    # expanded_conv_5/Add(layer 76, end of block5): 52 x 52 x (40*alpha)

    # NOTE: activation layer name may different for TF1.x/2.x, so we
    # use index to fetch layer
    # f1: 13 x 13 x (960*alpha)
    f1 = mobilenetv3large.layers[194].output
    # f2: 26 x 26 x (672*alpha)
    f2 = mobilenetv3large.layers[146].output
    # f3: 52 x 52 x (240*alpha)
    f3 = mobilenetv3large.layers[79].output

    f1_channel_num = int(960 * alpha)
    f2_channel_num = int(672 * alpha)
    f3_channel_num = int(240 * alpha)
    #f1_channel_num = 1024
    #f2_channel_num = 512
    #f3_channel_num = 256

    #feature map 1 head & output (13x13 for 416 input)
    x, y1 = make_last_layers(f1, f1_channel_num // 2,
                             num_anchors * (num_classes + 5))
    #x, y1 = make_last_layers(f1, f1_channel_num//2, num_anchors * (num_classes + 5), predict_filters=int(1024*alpha))

    #upsample fpn merge for feature map 1 & 2
    x = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)),
                UpSampling2D(2))(x)
    x = Concatenate()([x, f2])

    #feature map 2 head & output (26x26 for 416 input)
    x, y2 = make_last_layers(x, f2_channel_num // 2,
                             num_anchors * (num_classes + 5))
    #x, y2 = make_last_layers(x, f2_channel_num//2, num_anchors*(num_classes+5), predict_filters=int(512*alpha))

    #upsample fpn merge for feature map 2 & 3
    x = compose(DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)),
                UpSampling2D(2))(x)
    x = Concatenate()([x, f3])

    #feature map 3 head & output (52x52 for 416 input)
    x, y3 = make_last_layers(x, f3_channel_num // 2,
                             num_anchors * (num_classes + 5))
    #x, y3 = make_last_layers(x, f3_channel_num//2, num_anchors*(num_classes+5), predict_filters=int(256*alpha))

    return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3_mobilenetv3small_body(inputs, num_anchors, num_classes, alpha=1.0):
    """Create YOLO_V3 MobileNetV3Small model CNN body in Keras."""
    mobilenetv3small = MobileNetV3Small(input_tensor=inputs,
                                        weights='imagenet',
                                        include_top=False,
                                        alpha=alpha)

    # input: 416 x 416 x 3
    # activation_31(layer 165, final feature map): 13 x 13 x (576*alpha)
    # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha)

    # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha)
    # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha)

    # activation_7(layer 38, middle in block3) : 52 x 52 x (96*alpha)
    # expanded_conv_2/Add(layer 35, end of block2): 52 x 52 x (24*alpha)

    # NOTE: activation layer name may different for TF1.x/2.x, so we
    # use index to fetch layer
    # f1 :13 x 13 x (576*alpha)
    f1 = mobilenetv3small.layers[165].output
    # f2: 26 x 26 x (288*alpha)
    f2 = mobilenetv3small.layers[117].output
    # f3 : 52 x 52 x (96*alpha)
    f3 = mobilenetv3small.layers[38].output

    f1_channel_num = int(576 * alpha)
    f2_channel_num = int(288 * alpha)
    f3_channel_num = int(96 * alpha)
    #f1_channel_num = 1024
    #f2_channel_num = 512
    #f3_channel_num = 256

    #feature map 1 head & output (13x13 for 416 input)
    x, y1 = make_last_layers(f1, f1_channel_num // 2,
                             num_anchors * (num_classes + 5))
    #x, y1 = make_last_layers(f1, f1_channel_num//2, num_anchors * (num_classes + 5), predict_filters=int(1024*alpha))

    #upsample fpn merge for feature map 1 & 2
    x = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)),
                UpSampling2D(2))(x)
    x = Concatenate()([x, f2])

    #feature map 2 head & output (26x26 for 416 input)
    x, y2 = make_last_layers(x, f2_channel_num // 2,
                             num_anchors * (num_classes + 5))
    #x, y2 = make_last_layers(x, f2_channel_num//2, num_anchors*(num_classes+5), predict_filters=int(512*alpha))

    #upsample fpn merge for feature map 2 & 3
    x = compose(DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)),
                UpSampling2D(2))(x)
    x = Concatenate()([x, f3])

    #feature map 3 head & output (52x52 for 416 input)
    x, y3 = make_last_layers(x, f3_channel_num // 2,
                             num_anchors * (num_classes + 5))
    #x, y3 = make_last_layers(x, f3_channel_num//2, num_anchors*(num_classes+5), predict_filters=int(256*alpha))

    return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3_mobilenetv2_body(inputs, num_anchors, num_classes, alpha=1.0):
    """Create YOLO_V3 MobileNetV2 model CNN body in Keras."""
    mobilenetv2 = MobileNetV2(input_tensor=inputs,
                              weights='imagenet',
                              include_top=False,
                              alpha=alpha)

    # input: 416 x 416 x 3
    # out_relu: 13 x 13 x 1280
    # block_13_expand_relu: 26 x 26 x (576*alpha)
    # block_6_expand_relu: 52 x 52 x (192*alpha)

    # f1 :13 x 13 x 1280
    f1 = mobilenetv2.get_layer('out_relu').output
    # f2: 26 x 26 x (576*alpha)
    f2 = mobilenetv2.get_layer('block_13_expand_relu').output
    # f3 : 52 x 52 x (192*alpha)
    f3 = mobilenetv2.get_layer('block_6_expand_relu').output

    f1_channel_num = int(1280 * alpha)
    f2_channel_num = int(576 * alpha)
    f3_channel_num = int(192 * alpha)
    #f1_channel_num = 1024
    #f2_channel_num = 512
    #f3_channel_num = 256

    #feature map 1 head & output (13x13 for 416 input)
    x, y1 = make_last_layers(f1, f1_channel_num // 2,
                             num_anchors * (num_classes + 5))
    #x, y1 = make_last_layers(f1, f1_channel_num//2, num_anchors * (num_classes + 5), predict_filters=int(1024*alpha))

    #upsample fpn merge for feature map 1 & 2
    x = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)),
                UpSampling2D(2))(x)
    x = Concatenate()([x, f2])

    #feature map 2 head & output (26x26 for 416 input)
    x, y2 = make_last_layers(x, f2_channel_num // 2,
                             num_anchors * (num_classes + 5))
    #x, y2 = make_last_layers(x, f2_channel_num//2, num_anchors*(num_classes+5), predict_filters=int(512*alpha))

    #upsample fpn merge for feature map 2 & 3
    x = compose(DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)),
                UpSampling2D(2))(x)
    x = Concatenate()([x, f3])

    #feature map 3 head & output (52x52 for 416 input)
    x, y3 = make_last_layers(x, f3_channel_num // 2,
                             num_anchors * (num_classes + 5))
    #x, y3 = make_last_layers(x, f3_channel_num//2, num_anchors*(num_classes+5), predict_filters=int(256*alpha))

    return Model(inputs=inputs, outputs=[y1, y2, y3])
Example #6
0
def yolo3_spp_xception_body(inputs, num_anchors, num_classes):
    """Create YOLO_V3 SPP Xception model CNN body in Keras."""
    xception = Xception(input_tensor=inputs,
                        weights='imagenet',
                        include_top=False)

    # input: 416 x 416 x 3
    # block14_sepconv2_act: 13 x 13 x 2048
    # block13_sepconv2_bn(middle in block13): 26 x 26 x 1024
    # add_46(end of block12): 26 x 26 x 728
    # block4_sepconv2_bn(middle in block4) : 52 x 52 x 728
    # add_37(end of block3) : 52 x 52 x 256

    # f1: 13 x 13 x 2048
    f1 = xception.get_layer('block14_sepconv2_act').output
    # f2: 26 x 26 x 1024
    f2 = xception.get_layer('block13_sepconv2_bn').output
    # f3: 52 x 52 x 728
    f3 = xception.get_layer('block4_sepconv2_bn').output

    #f1_channel_num = 2048
    #f2_channel_num = 1024
    #f3_channel_num = 728
    f1_channel_num = 1024
    f2_channel_num = 512
    f3_channel_num = 256

    #feature map 1 head & output (13x13 for 416 input)
    x, y1 = make_spp_last_layers(f1, f1_channel_num // 2,
                                 num_anchors * (num_classes + 5))

    #upsample fpn merge for feature map 1 & 2
    x = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)),
                UpSampling2D(2))(x)
    x = Concatenate()([x, f2])

    #feature map 2 head & output (26x26 for 416 input)
    x, y2 = make_last_layers(x, f2_channel_num // 2,
                             num_anchors * (num_classes + 5))

    #upsample fpn merge for feature map 2 & 3
    x = compose(DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)),
                UpSampling2D(2))(x)
    x = Concatenate()([x, f3])

    #feature map 3 head & output (52x52 for 416 input)
    x, y3 = make_last_layers(x, f3_channel_num // 2,
                             num_anchors * (num_classes + 5))

    return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3_efficientnet_body(inputs, num_anchors, num_classes, level=3):
    '''
    Create YOLO_v3 EfficientNet model CNN body in keras.
    # Arguments
        level: EfficientNet level number.
            by default we use basic EfficientNetB3 as backbone
    '''
    efficientnet, feature_map_info = get_efficientnet_backbone_info(
        inputs, level=level)

    f1 = efficientnet.get_layer('top_activation').output
    f1_channel_num = feature_map_info['f1_channel_num']

    f2 = efficientnet.get_layer('block6a_expand_activation').output
    f2_channel_num = feature_map_info['f2_channel_num']

    f3 = efficientnet.get_layer('block4a_expand_activation').output
    f3_channel_num = feature_map_info['f3_channel_num']

    #feature map 1 head & output (19x19 for 608 input)
    #x, y1 = make_last_layers(f1, 672, num_anchors * (num_classes + 5))
    x, y1 = make_last_layers(f1, f1_channel_num // 2,
                             num_anchors * (num_classes + 5))

    #upsample fpn merge for feature map 1 & 2
    x = compose(
        #DarknetConv2D_BN_Leaky(336, (1,1)),
        DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)),
        UpSampling2D(2))(x)
    x = Concatenate()([x, f2])

    #feature map 2 head & output (38x38 for 608 input)
    #x, y2 = make_last_layers(x, 240, num_anchors*(num_classes+5))
    x, y2 = make_last_layers(x, f2_channel_num // 2,
                             num_anchors * (num_classes + 5))

    #upsample fpn merge for feature map 2 & 3
    x = compose(
        #DarknetConv2D_BN_Leaky(120, (1,1)),
        DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)),
        UpSampling2D(2))(x)
    x = Concatenate()([x, f3])

    #feature map 3 head & output (76x76 for 608 input)
    #x, y3 = make_last_layers(x, 120, num_anchors*(num_classes+5))
    x, y3 = make_last_layers(x, f3_channel_num // 2,
                             num_anchors * (num_classes + 5))

    return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3_shufflenetv2_body(inputs, num_anchors, num_classes):
    """Create YOLO_V3 ShuffleNetV2 model CNN body in Keras."""
    shufflenetv2 = ShuffleNetV2(input_tensor=inputs,
                                weights=None,
                                include_top=False)

    # input: 416 x 416 x 3
    # 1x1conv5_out: 13 x 13 x 1024
    # stage4/block1/relu_1x1conv_1: 26 x 26 x 464
    # stage3/block1/relu_1x1conv_1: 52 x 52 x 232

    # f1: 13 x 13 x 1024
    f1 = shufflenetv2.get_layer('1x1conv5_out').output
    # f2: 26 x 26 x 464
    f2 = shufflenetv2.get_layer('stage4/block1/relu_1x1conv_1').output
    # f3: 52 x 52 x 232
    f3 = shufflenetv2.get_layer('stage3/block1/relu_1x1conv_1').output

    f1_channel_num = 1024
    f2_channel_num = 464
    f3_channel_num = 232
    #f1_channel_num = 1024
    #f2_channel_num = 512
    #f3_channel_num = 256

    #feature map 1 head & output (13x13 for 416 input)
    x, y1 = make_last_layers(f1, f1_channel_num // 2,
                             num_anchors * (num_classes + 5))

    #upsample fpn merge for feature map 1 & 2
    x = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)),
                UpSampling2D(2))(x)
    x = Concatenate()([x, f2])

    #feature map 2 head & output (26x26 for 416 input)
    x, y2 = make_last_layers(x, f2_channel_num // 2,
                             num_anchors * (num_classes + 5))

    #upsample fpn merge for feature map 2 & 3
    x = compose(DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)),
                UpSampling2D(2))(x)
    x = Concatenate()([x, f3])

    #feature map 3 head & output (52x52 for 416 input)
    x, y3 = make_last_layers(x, f3_channel_num // 2,
                             num_anchors * (num_classes + 5))

    return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3_body(inputs, num_anchors, num_classes, weights_path=None):
    """Create YOLO_V3 model CNN body in Keras."""
    darknet = Model(inputs, darknet53_body(inputs))
    if weights_path is not None:
        darknet.load_weights(weights_path, by_name=True)
        print('Load weights {}.'.format(weights_path))
    x, y1 = make_last_layers(darknet.output, 512,
                             num_anchors * (num_classes + 5))

    x = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(x)
    x = Concatenate()([x, darknet.layers[152].output])
    x, y2 = make_last_layers(x, 256, num_anchors * (num_classes + 5))

    x = compose(DarknetConv2D_BN_Leaky(128, (1, 1)), UpSampling2D(2))(x)
    x = Concatenate()([x, darknet.layers[92].output])
    x, y3 = make_last_layers(x, 128, num_anchors * (num_classes + 5))

    return Model(inputs, [y1, y2, y3])
Example #10
0
def yolo3_efficientnet_body(inputs, num_anchors, num_classes, level=0):
    '''
    Create YOLO_v3 EfficientNet model CNN body in keras.
    # Arguments
        level: EfficientNet level number.
            by default we use basic EfficientNetB0 as backbone
    '''
    efficientnet, feature_map_info = get_efficientnet_backbone_info(
        inputs, level=level)

    f1 = efficientnet.get_layer('top_activation').output

    #x, y1 = make_last_layers(f1, 672, num_anchors * (num_classes + 5))
    f2_channel_num = feature_map_info['f2_channel_num']
    x, y1 = make_last_layers(f1, f2_channel_num,
                             num_anchors * (num_classes + 5))

    x = compose(
        #DarknetConv2D_BN_Leaky(336, (1,1)),
        DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)),
        UpSampling2D(2))(x)

    f2 = efficientnet.get_layer('block6a_expand_activation').output

    x = Concatenate()([x, f2])

    #x, y2 = make_last_layers(x, 240, num_anchors*(num_classes+5))
    f3_channel_num = feature_map_info['f3_channel_num']
    x, y2 = make_last_layers(x, f3_channel_num,
                             num_anchors * (num_classes + 5))

    x = compose(
        #DarknetConv2D_BN_Leaky(120, (1,1)),
        DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)),
        UpSampling2D(2))(x)

    f3 = efficientnet.get_layer('block4a_expand_activation').output

    x = Concatenate()([x, f3])
    #x, y3 = make_last_layers(x, 120, num_anchors*(num_classes+5))
    x, y3 = make_last_layers(x, f3_channel_num // 2,
                             num_anchors * (num_classes + 5))

    return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3_mobilenetv2_body(inputs, num_anchors, num_classes, alpha=1.0):
    """Create YOLO_V3 MobileNetV2 model CNN body in Keras."""
    mobilenetv2 = MobileNetV2(input_tensor=inputs,
                              weights='imagenet',
                              include_top=False,
                              alpha=alpha)

    # input: 416 x 416 x 3
    # out_relu: 13 x 13 x 1280
    # block_13_expand_relu: 26 x 26 x (576*alpha)
    # block_6_expand_relu: 52 x 52 x (192*alpha)

    f1 = mobilenetv2.get_layer('out_relu').output
    # f1 :13 x 13 x 1280
    x, y1 = make_last_layers(f1,
                             int(576 * alpha),
                             num_anchors * (num_classes + 5),
                             predict_filters=int(1024 * alpha))

    x = compose(DarknetConv2D_BN_Leaky(int(288 * alpha), (1, 1)),
                UpSampling2D(2))(x)

    f2 = mobilenetv2.get_layer('block_13_expand_relu').output
    # f2: 26 x 26 x (576*alpha)
    x = Concatenate()([x, f2])

    x, y2 = make_last_layers(x,
                             int(192 * alpha),
                             num_anchors * (num_classes + 5),
                             predict_filters=int(512 * alpha))

    x = compose(DarknetConv2D_BN_Leaky(int(96 * alpha), (1, 1)),
                UpSampling2D(2))(x)

    f3 = mobilenetv2.get_layer('block_6_expand_relu').output
    # f3 : 52 x 52 x (192*alpha)
    x = Concatenate()([x, f3])
    x, y3 = make_last_layers(x,
                             int(96 * alpha),
                             num_anchors * (num_classes + 5),
                             predict_filters=int(256 * alpha))

    return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3_spp_body(inputs, num_anchors, num_classes, weights_path=None):
    """Create YOLO_V3 SPP model CNN body in Keras."""
    darknet = Model(inputs, darknet53_body(inputs))
    if weights_path is not None:
        darknet.load_weights(weights_path, by_name=True)
        print('Load weights {}.'.format(weights_path))

    # f1: 13 x 13 x 1024
    f1 = darknet.output
    # f2: 26 x 26 x 512
    f2 = darknet.layers[152].output
    # f3: 52 x 52 x 256
    f3 = darknet.layers[92].output

    f1_channel_num = 1024
    f2_channel_num = 512
    f3_channel_num = 256

    # feature map 1 head & output (19x19 for 608 input)
    x, y1 = make_spp_last_layers(f1, f1_channel_num // 2,
                                 num_anchors * (num_classes + 5))

    # upsample fpn merge for feature map 1 & 2
    x = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)),
                UpSampling2D(2))(x)
    x = Concatenate()([x, f2])

    # feature map 2 head & output (38x38 for 608 input)
    x, y2 = make_last_layers(x, f2_channel_num // 2,
                             num_anchors * (num_classes + 5))

    # upsample fpn merge for feature map 2 & 3
    x = compose(DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)),
                UpSampling2D(2))(x)
    x = Concatenate()([x, f3])

    # feature map 3 head & output (76x76 for 608 input)
    x, y3 = make_last_layers(x, f3_channel_num // 2,
                             num_anchors * (num_classes + 5))

    return Model(inputs, [y1, y2, y3])
Example #13
0
def yolo3_mobilenet_body(inputs, num_anchors, num_classes, alpha=1.0):
    """Create YOLO_V3 MobileNet model CNN body in Keras."""
    '''
    Layer Name: input_1 Output: Tensor("input_1:0", shape=(?, 416, 416, 3), dtype=float32)
    Layer Name: conv1_pad Output: Tensor("conv1_pad/Pad:0", shape=(?, 417, 417, 3), dtype=float32)
    Layer Name: conv1 Output: Tensor("conv1/convolution:0", shape=(?, 208, 208, 32), dtype=float32)
    Layer Name: conv1_bn Output: Tensor("conv1_bn/cond/Merge:0", shape=(?, 208, 208, 32), dtype=float32)
    Layer Name: conv1_relu Output: Tensor("conv1_relu/Minimum:0", shape=(?, 208, 208, 32), dtype=float32)
    Layer Name: conv_pad_1 Output: Tensor("conv_pad_1/Pad:0", shape=(?, 210, 210, 32), dtype=float32)
    Layer Name: conv_dw_1 Output: Tensor("conv_dw_1/depthwise:0", shape=(?, 208, 208, 32), dtype=float32)
    Layer Name: conv_dw_1_bn Output: Tensor("conv_dw_1_bn/cond/Merge:0", shape=(?, 208, 208, 32), dtype=float32)
    Layer Name: conv_dw_1_relu Output: Tensor("conv_dw_1_relu/Minimum:0", shape=(?, 208, 208, 32), dtype=float32)
    Layer Name: conv_pw_1 Output: Tensor("conv_pw_1/convolution:0", shape=(?, 208, 208, 64), dtype=float32)
    Layer Name: conv_pw_1_bn Output: Tensor("conv_pw_1_bn/cond/Merge:0", shape=(?, 208, 208, 64), dtype=float32)
    Layer Name: conv_pw_1_relu Output: Tensor("conv_pw_1_relu/Minimum:0", shape=(?, 208, 208, 64), dtype=float32)
    Layer Name: conv_pad_2 Output: Tensor("conv_pad_2/Pad:0", shape=(?, 210, 210, 64), dtype=float32)
    Layer Name: conv_dw_2 Output: Tensor("conv_dw_2/depthwise:0", shape=(?, 104, 104, 64), dtype=float32)
    Layer Name: conv_dw_2_bn Output: Tensor("conv_dw_2_bn/cond/Merge:0", shape=(?, 104, 104, 64), dtype=float32)
    Layer Name: conv_dw_2_relu Output: Tensor("conv_dw_2_relu/Minimum:0", shape=(?, 104, 104, 64), dtype=float32)
    Layer Name: conv_pw_2 Output: Tensor("conv_pw_2/convolution:0", shape=(?, 104, 104, 128), dtype=float32)
    Layer Name: conv_pw_2_bn Output: Tensor("conv_pw_2_bn/cond/Merge:0", shape=(?, 104, 104, 128), dtype=float32)
    Layer Name: conv_pw_2_relu Output: Tensor("conv_pw_2_relu/Minimum:0", shape=(?, 104, 104, 128), dtype=float32)
    Layer Name: conv_pad_3 Output: Tensor("conv_pad_3/Pad:0", shape=(?, 106, 106, 128), dtype=float32)
    Layer Name: conv_dw_3 Output: Tensor("conv_dw_3/depthwise:0", shape=(?, 104, 104, 128), dtype=float32)
    Layer Name: conv_dw_3_bn Output: Tensor("conv_dw_3_bn/cond/Merge:0", shape=(?, 104, 104, 128), dtype=float32)
    Layer Name: conv_dw_3_relu Output: Tensor("conv_dw_3_relu/Minimum:0", shape=(?, 104, 104, 128), dtype=float32)
    Layer Name: conv_pw_3 Output: Tensor("conv_pw_3/convolution:0", shape=(?, 104, 104, 128), dtype=float32)
    Layer Name: conv_pw_3_bn Output: Tensor("conv_pw_3_bn/cond/Merge:0", shape=(?, 104, 104, 128), dtype=float32)
    Layer Name: conv_pw_3_relu Output: Tensor("conv_pw_3_relu/Minimum:0", shape=(?, 104, 104, 128), dtype=float32)
    Layer Name: conv_pad_4 Output: Tensor("conv_pad_4/Pad:0", shape=(?, 106, 106, 128), dtype=float32)
    Layer Name: conv_dw_4 Output: Tensor("conv_dw_4/depthwise:0", shape=(?, 52, 52, 128), dtype=float32)
    Layer Name: conv_dw_4_bn Output: Tensor("conv_dw_4_bn/cond/Merge:0", shape=(?, 52, 52, 128), dtype=float32)
    Layer Name: conv_dw_4_relu Output: Tensor("conv_dw_4_relu/Minimum:0", shape=(?, 52, 52, 128), dtype=float32)
    Layer Name: conv_pw_4 Output: Tensor("conv_pw_4/convolution:0", shape=(?, 52, 52, 256), dtype=float32)
    Layer Name: conv_pw_4_bn Output: Tensor("conv_pw_4_bn/cond/Merge:0", shape=(?, 52, 52, 256), dtype=float32)
    Layer Name: conv_pw_4_relu Output: Tensor("conv_pw_4_relu/Minimum:0", shape=(?, 52, 52, 256), dtype=float32)
    Layer Name: conv_pad_5 Output: Tensor("conv_pad_5/Pad:0", shape=(?, 54, 54, 256), dtype=float32)
    Layer Name: conv_dw_5 Output: Tensor("conv_dw_5/depthwise:0", shape=(?, 52, 52, 256), dtype=float32)
    Layer Name: conv_dw_5_bn Output: Tensor("conv_dw_5_bn/cond/Merge:0", shape=(?, 52, 52, 256), dtype=float32)
    Layer Name: conv_dw_5_relu Output: Tensor("conv_dw_5_relu/Minimum:0", shape=(?, 52, 52, 256), dtype=float32)
    Layer Name: conv_pw_5 Output: Tensor("conv_pw_5/convolution:0", shape=(?, 52, 52, 256), dtype=float32)
    Layer Name: conv_pw_5_bn Output: Tensor("conv_pw_5_bn/cond/Merge:0", shape=(?, 52, 52, 256), dtype=float32)
    Layer Name: conv_pw_5_relu Output: Tensor("conv_pw_5_relu/Minimum:0", shape=(?, 52, 52, 256), dtype=float32)
    Layer Name: conv_pad_6 Output: Tensor("conv_pad_6/Pad:0", shape=(?, 54, 54, 256), dtype=float32)
    Layer Name: conv_dw_6 Output: Tensor("conv_dw_6/depthwise:0", shape=(?, 26, 26, 256), dtype=float32)
    Layer Name: conv_dw_6_bn Output: Tensor("conv_dw_6_bn/cond/Merge:0", shape=(?, 26, 26, 256), dtype=float32)
    Layer Name: conv_dw_6_relu Output: Tensor("conv_dw_6_relu/Minimum:0", shape=(?, 26, 26, 256), dtype=float32)
    Layer Name: conv_pw_6 Output: Tensor("conv_pw_6/convolution:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pw_6_bn Output: Tensor("conv_pw_6_bn/cond/Merge:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pw_6_relu Output: Tensor("conv_pw_6_relu/Minimum:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pad_7 Output: Tensor("conv_pad_7/Pad:0", shape=(?, 28, 28, 512), dtype=float32)
    Layer Name: conv_dw_7 Output: Tensor("conv_dw_7/depthwise:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_dw_7_bn Output: Tensor("conv_dw_7_bn/cond/Merge:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_dw_7_relu Output: Tensor("conv_dw_7_relu/Minimum:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pw_7 Output: Tensor("conv_pw_7/convolution:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pw_7_bn Output: Tensor("conv_pw_7_bn/cond/Merge:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pw_7_relu Output: Tensor("conv_pw_7_relu/Minimum:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pad_8 Output: Tensor("conv_pad_8/Pad:0", shape=(?, 28, 28, 512), dtype=float32)
    Layer Name: conv_dw_8 Output: Tensor("conv_dw_8/depthwise:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_dw_8_bn Output: Tensor("conv_dw_8_bn/cond/Merge:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_dw_8_relu Output: Tensor("conv_dw_8_relu/Minimum:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pw_8 Output: Tensor("conv_pw_8/convolution:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pw_8_bn Output: Tensor("conv_pw_8_bn/cond/Merge:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pw_8_relu Output: Tensor("conv_pw_8_relu/Minimum:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pad_9 Output: Tensor("conv_pad_9/Pad:0", shape=(?, 28, 28, 512), dtype=float32)
    Layer Name: conv_dw_9 Output: Tensor("conv_dw_9/depthwise:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_dw_9_bn Output: Tensor("conv_dw_9_bn/cond/Merge:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_dw_9_relu Output: Tensor("conv_dw_9_relu/Minimum:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pw_9 Output: Tensor("conv_pw_9/convolution:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pw_9_bn Output: Tensor("conv_pw_9_bn/cond/Merge:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pw_9_relu Output: Tensor("conv_pw_9_relu/Minimum:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pad_10 Output: Tensor("conv_pad_10/Pad:0", shape=(?, 28, 28, 512), dtype=float32)
    Layer Name: conv_dw_10 Output: Tensor("conv_dw_10/depthwise:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_dw_10_bn Output: Tensor("conv_dw_10_bn/cond/Merge:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_dw_10_relu Output: Tensor("conv_dw_10_relu/Minimum:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pw_10 Output: Tensor("conv_pw_10/convolution:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pw_10_bn Output: Tensor("conv_pw_10_bn/cond/Merge:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pw_10_relu Output: Tensor("conv_pw_10_relu/Minimum:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pad_11 Output: Tensor("conv_pad_11/Pad:0", shape=(?, 28, 28, 512), dtype=float32)
    Layer Name: conv_dw_11 Output: Tensor("conv_dw_11/depthwise:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_dw_11_bn Output: Tensor("conv_dw_11_bn/cond/Merge:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_dw_11_relu Output: Tensor("conv_dw_11_relu/Minimum:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pw_11 Output: Tensor("conv_pw_11/convolution:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pw_11_bn Output: Tensor("conv_pw_11_bn/cond/Merge:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pw_11_relu Output: Tensor("conv_pw_11_relu/Minimum:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name: conv_pad_12 Output: Tensor("conv_pad_12/Pad:0", shape=(?, 28, 28, 512), dtype=float32)
    Layer Name: conv_dw_12 Output: Tensor("conv_dw_12/depthwise:0", shape=(?, 13, 13, 512), dtype=float32)
    Layer Name: conv_dw_12_bn Output: Tensor("conv_dw_12_bn/cond/Merge:0", shape=(?, 13, 13, 512), dtype=float32)
    Layer Name: conv_dw_12_relu Output: Tensor("conv_dw_12_relu/Minimum:0", shape=(?, 13, 13, 512), dtype=float32)
    Layer Name: conv_pw_12 Output: Tensor("conv_pw_12/convolution:0", shape=(?, 13, 13, 1024), dtype=float32)
    Layer Name: conv_pw_12_bn Output: Tensor("conv_pw_12_bn/cond/Merge:0", shape=(?, 13, 13, 1024), dtype=float32)
    Layer Name: conv_pw_12_relu Output: Tensor("conv_pw_12_relu/Minimum:0", shape=(?, 13, 13, 1024), dtype=float32)
    Layer Name: conv_pad_13 Output: Tensor("conv_pad_13/Pad:0", shape=(?, 15, 15, 1024), dtype=float32)
    Layer Name: conv_dw_13 Output: Tensor("conv_dw_13/depthwise:0", shape=(?, 13, 13, 1024), dtype=float32)
    Layer Name: conv_dw_13_bn Output: Tensor("conv_dw_13_bn/cond/Merge:0", shape=(?, 13, 13, 1024), dtype=float32)
    Layer Name: conv_dw_13_relu Output: Tensor("conv_dw_13_relu/Minimum:0", shape=(?, 13, 13, 1024), dtype=float32)
    Layer Name: conv_pw_13 Output: Tensor("conv_pw_13/convolution:0", shape=(?, 13, 13, 1024), dtype=float32)
    Layer Name: conv_pw_13_bn Output: Tensor("conv_pw_13_bn/cond/Merge:0", shape=(?, 13, 13, 1024), dtype=float32)
    Layer Name: conv_pw_13_relu Output: Tensor("conv_pw_13_relu/Minimum:0", shape=(?, 13, 13, 1024), dtype=float32)
    Layer Name: global_average_pooling2d_1 Output: Tensor("global_average_pooling2d_1/Mean:0", shape=(?, 1024), dtype=float32)
    Layer Name: reshape_1 Output: Tensor("reshape_1/Reshape:0", shape=(?, 1, 1, 1024), dtype=float32)
    Layer Name: dropout Output: Tensor("dropout/cond/Merge:0", shape=(?, 1, 1, 1024), dtype=float32)
    Layer Name: conv_preds Output: Tensor("conv_preds/BiasAdd:0", shape=(?, 1, 1, 1000), dtype=float32)
    Layer Name: act_softmax Output: Tensor("act_softmax/truediv:0", shape=(?, 1, 1, 1000), dtype=float32)
    Layer Name: reshape_2 Output: Tensor("reshape_2/Reshape:0", shape=(?, 1000), dtype=float32)
    '''

    mobilenet = MobileNet(input_tensor=inputs,
                          weights='imagenet',
                          include_top=False,
                          alpha=alpha)

    # input: 416 x 416 x 3
    # conv_pw_13_relu :13 x 13 x (1024*alpha)
    # conv_pw_11_relu :26 x 26 x (512*alpha)
    # conv_pw_5_relu : 52 x 52 x (256*alpha)

    f1 = mobilenet.get_layer('conv_pw_13_relu').output
    # f1 :13 x 13 x (1024*alpha)
    x, y1 = make_last_layers(f1, int(512 * alpha),
                             num_anchors * (num_classes + 5))

    x = compose(DarknetConv2D_BN_Leaky(int(256 * alpha), (1, 1)),
                UpSampling2D(2))(x)

    f2 = mobilenet.get_layer('conv_pw_11_relu').output
    # f2: 26 x 26 x (512*alpha)
    x = Concatenate()([x, f2])

    x, y2 = make_last_layers(x, int(256 * alpha),
                             num_anchors * (num_classes + 5))

    x = compose(DarknetConv2D_BN_Leaky(int(128 * alpha), (1, 1)),
                UpSampling2D(2))(x)

    f3 = mobilenet.get_layer('conv_pw_5_relu').output
    # f3 : 52 x 52 x  (256*alpha)
    x = Concatenate()([x, f3])
    x, y3 = make_last_layers(x, int(128 * alpha),
                             num_anchors * (num_classes + 5))

    return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3_vgg16_body(inputs, num_anchors, num_classes):
    """Create YOLO_V3 model CNN body in Keras."""
    '''
    Layer Name input_1 Output: Tensor("input_1:0", shape=(?, 416, 416, 3), dtype=float32)
    Layer Name block1_conv1 Output: Tensor("block1_conv1/Relu:0", shape=(?, 416, 416, 64), dtype=float32)
    Layer Name block1_conv2 Output: Tensor("block1_conv2/Relu:0", shape=(?, 416, 416, 64), dtype=float32)
    Layer Name block1_pool Output: Tensor("block1_pool/MaxPool:0", shape=(?, 208, 208, 64), dtype=float32)
    Layer Name block2_conv1 Output: Tensor("block2_conv1/Relu:0", shape=(?, 208, 208, 128), dtype=float32)
    Layer Name block2_conv2 Output: Tensor("block2_conv2/Relu:0", shape=(?, 208, 208, 128), dtype=float32)
    Layer Name block2_pool Output: Tensor("block2_pool/MaxPool:0", shape=(?, 104, 104, 128), dtype=float32)
    Layer Name block3_conv1 Output: Tensor("block3_conv1/Relu:0", shape=(?, 104, 104, 256), dtype=float32)
    Layer Name block3_conv2 Output: Tensor("block3_conv2/Relu:0", shape=(?, 104, 104, 256), dtype=float32)
    Layer Name block3_conv3 Output: Tensor("block3_conv3/Relu:0", shape=(?, 104, 104, 256), dtype=float32)
    Layer Name block3_pool Output: Tensor("block3_pool/MaxPool:0", shape=(?, 52, 52, 256), dtype=float32)
    Layer Name block4_conv1 Output: Tensor("block4_conv1/Relu:0", shape=(?, 52, 52, 512), dtype=float32)
    Layer Name block4_conv2 Output: Tensor("block4_conv2/Relu:0", shape=(?, 52, 52, 512), dtype=float32)
    Layer Name block4_conv3 Output: Tensor("block4_conv3/Relu:0", shape=(?, 52, 52, 512), dtype=float32)
    Layer Name block4_pool Output: Tensor("block4_pool/MaxPool:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name block5_conv1 Output: Tensor("block5_conv1/Relu:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name block5_conv2 Output: Tensor("block5_conv2/Relu:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name block5_conv3 Output: Tensor("block5_conv3/Relu:0", shape=(?, 26, 26, 512), dtype=float32)
    Layer Name block5_pool Output: Tensor("block5_pool/MaxPool:0", shape=(?, 13, 13, 512), dtype=float32)
    '''

    #net, endpoint = inception_v2.inception_v2(inputs)
    vgg16 = VGG16(input_tensor=inputs, weights='imagenet', include_top=False)
    x = vgg16.get_layer('block5_pool').output
    x = YoloConv2D(512, (3, 3),
                   activation='relu',
                   padding='same',
                   name='block6_conv1')(x)
    x = YoloConv2D(512, (3, 3),
                   activation='relu',
                   padding='same',
                   name='block6_conv2')(x)
    x = YoloConv2D(512, (3, 3),
                   activation='relu',
                   padding='same',
                   name='block6_conv3')(x)
    x = YoloConv2D(512, (3, 3),
                   activation='relu',
                   padding='same',
                   name='block6_conv4')(x)

    # input: 416 x 416 x 3
    # block6_conv3 :13 x 13 x 512
    # block5_conv3 :26 x 26 x 512
    # block4_conv3 : 52 x 52 x 512

    # f1 :13 x 13 x 1024 13 x 13 x 512
    x, y1 = make_last_layers(x,
                             512,
                             num_anchors * (num_classes + 5),
                             predict_id='1')

    x = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(x)

    f2 = vgg16.get_layer('block5_conv3').output
    # f2: 26 x 26 x 512
    x = Concatenate()([x, f2])

    x, y2 = make_last_layers(x,
                             256,
                             num_anchors * (num_classes + 5),
                             predict_id='2')

    x = compose(DarknetConv2D_BN_Leaky(128, (1, 1)), UpSampling2D(2))(x)

    f3 = vgg16.get_layer('block4_conv3').output
    # f3 : 52 x 52 x 256
    x = Concatenate()([x, f3])
    x, y3 = make_last_layers(x,
                             128,
                             num_anchors * (num_classes + 5),
                             predict_id='3')

    return Model(inputs=inputs, outputs=[y1, y2, y3])