Esempio n. 1
0
def yolo3_shufflenetv2_body(inputs, num_anchors, num_classes):
    """Create YOLO_V3 ShuffleNetV2 model CNN body in Keras."""
    shufflenetv2 = ShuffleNetV2(input_tensor=inputs,
                                weights=None,
                                include_top=False)
    print('backbone layers number: {}'.format(len(shufflenetv2.layers)))

    # input: 416 x 416 x 3
    # 1x1conv5_out: 13 x 13 x 1024
    # stage4/block1/relu_1x1conv_1: 26 x 26 x 464
    # stage3/block1/relu_1x1conv_1: 52 x 52 x 232

    # f1: 13 x 13 x 1024
    f1 = shufflenetv2.get_layer('1x1conv5_out').output
    # f2: 26 x 26 x 464
    f2 = shufflenetv2.get_layer('stage4/block1/relu_1x1conv_1').output
    # f3: 52 x 52 x 232
    f3 = shufflenetv2.get_layer('stage3/block1/relu_1x1conv_1').output

    f1_channel_num = 1024
    f2_channel_num = 464
    f3_channel_num = 232
    #f1_channel_num = 1024
    #f2_channel_num = 512
    #f3_channel_num = 256

    y1, y2, y3 = yolo3_predictions(
        (f1, f2, f3), (f1_channel_num, f2_channel_num, f3_channel_num),
        num_anchors, num_classes)

    return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3_spp_xception_body(inputs, num_anchors, num_classes):
    """Create YOLO_V3 SPP Xception model CNN body in Keras."""
    xception = Xception(input_tensor=inputs, weights='imagenet', include_top=False)
    print('backbone layers number: {}'.format(len(xception.layers)))

    # input: 416 x 416 x 3
    # block14_sepconv2_act: 13 x 13 x 2048
    # block13_sepconv2_bn(middle in block13): 26 x 26 x 1024
    # add_46(end of block12): 26 x 26 x 728
    # block4_sepconv2_bn(middle in block4) : 52 x 52 x 728
    # add_37(end of block3) : 52 x 52 x 256

    # f1: 13 x 13 x 2048
    f1 = xception.get_layer('block14_sepconv2_act').output
    # f2: 26 x 26 x 1024
    f2 = xception.get_layer('block13_sepconv2_bn').output
    # f3: 52 x 52 x 728
    f3 = xception.get_layer('block4_sepconv2_bn').output

    #f1_channel_num = 2048
    #f2_channel_num = 1024
    #f3_channel_num = 728
    f1_channel_num = 1024
    f2_channel_num = 512
    f3_channel_num = 256

    y1, y2, y3 = yolo3_predictions((f1, f2, f3), (f1_channel_num, f2_channel_num, f3_channel_num), num_anchors, num_classes, use_spp=True)

    return Model(inputs = inputs, outputs=[y1,y2,y3])
def yolo3_mobilenetv2_body(inputs, num_anchors, num_classes, alpha=1.0):
    """Create YOLO_V3 MobileNetV2 model CNN body in Keras."""
    mobilenetv2 = MobileNetV2(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha)
    print('backbone layers number: {}'.format(len(mobilenetv2.layers)))

    # input: 416 x 416 x 3
    # out_relu: 13 x 13 x 1280
    # block_13_expand_relu: 26 x 26 x (576*alpha)
    # block_6_expand_relu: 52 x 52 x (192*alpha)

    # f1 :13 x 13 x 1280
    f1 = mobilenetv2.get_layer('out_relu').output
    # f2: 26 x 26 x (576*alpha)
    f2 = mobilenetv2.get_layer('block_13_expand_relu').output
    # f3 : 52 x 52 x (192*alpha)
    f3 = mobilenetv2.get_layer('block_6_expand_relu').output

    f1_channel_num = int(1280*alpha)
    f2_channel_num = int(576*alpha)
    f3_channel_num = int(192*alpha)
    #f1_channel_num = 1024
    #f2_channel_num = 512
    #f3_channel_num = 256

    y1, y2, y3 = yolo3_predictions((f1, f2, f3), (f1_channel_num, f2_channel_num, f3_channel_num), num_anchors, num_classes)

    return Model(inputs = inputs, outputs=[y1,y2,y3])
Esempio n. 4
0
def yolo3_mobilenetv3small_body(inputs, num_anchors, num_classes, alpha=1.0):
    """Create YOLO_V3 MobileNetV3Small model CNN body in Keras."""
    mobilenetv3small = MobileNetV3Small(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha)
    print('backbone layers number: {}'.format(len(mobilenetv3small.layers)))

    # input: 416 x 416 x 3
    # activation_31(layer 165, final feature map): 13 x 13 x (576*alpha)
    # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha)

    # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha)
    # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha)

    # activation_7(layer 38, middle in block3) : 52 x 52 x (96*alpha)
    # expanded_conv_2/Add(layer 35, end of block2): 52 x 52 x (24*alpha)

    # NOTE: activation layer name may different for TF1.x/2.x, so we
    # use index to fetch layer
    # f1 :13 x 13 x (576*alpha)
    f1 = mobilenetv3small.layers[165].output
    # f2: 26 x 26 x (288*alpha)
    f2 = mobilenetv3small.layers[117].output
    # f3 : 52 x 52 x (96*alpha)
    f3 = mobilenetv3small.layers[38].output

    f1_channel_num = int(576*alpha)
    f2_channel_num = int(288*alpha)
    f3_channel_num = int(96*alpha)
    #f1_channel_num = 1024
    #f2_channel_num = 512
    #f3_channel_num = 256

    y1, y2, y3 = yolo3_predictions((f1, f2, f3), (f1_channel_num, f2_channel_num, f3_channel_num), num_anchors, num_classes)

    return Model(inputs = inputs, outputs=[y1,y2,y3])
def yolo3_spp_body(inputs, num_anchors, num_classes, weights_path=None):
    """Create YOLO_V3 SPP model CNN body in Keras."""
    darknet = Model(inputs, darknet53_body(inputs))
    if weights_path is not None:
        darknet.load_weights(weights_path, by_name=True)
        print('Load weights {}.'.format(weights_path))

    # f1: 13 x 13 x 1024
    f1 = darknet.output
    # f2: 26 x 26 x 512
    f2 = darknet.layers[152].output
    # f3: 52 x 52 x 256
    f3 = darknet.layers[92].output

    f1_channel_num = 1024
    f2_channel_num = 512
    f3_channel_num = 256

    y1, y2, y3 = yolo3_predictions(
        (f1, f2, f3), (f1_channel_num, f2_channel_num, f3_channel_num),
        num_anchors,
        num_classes,
        use_spp=True)

    return Model(inputs, [y1, y2, y3])
def yolo3_resnet50v2_body(inputs, num_anchors, num_classes):
    """Create YOLO_V3 ResNet50V2 model CNN body in Keras."""
    resnet50v2 = ResNet50V2(input_tensor=inputs,
                            weights='imagenet',
                            include_top=False)
    print('backbone layers number: {}'.format(len(resnet50v2.layers)))

    # input: 416 x 416 x 3
    # post_relu: 13 x 13 x 2048
    # conv4_block5_out: 26 x 26 x 1024
    # conv3_block3_out: 52 x 52 x 512

    # f1 :13 x 13 x 2048
    f1 = resnet50v2.get_layer('post_relu').output
    # f2: 26 x 26 x 1024
    f2 = resnet50v2.get_layer('conv4_block5_out').output
    # f3 : 52 x 52 x 512
    f3 = resnet50v2.get_layer('conv3_block3_out').output

    f1_channel_num = 1024
    f2_channel_num = 512
    f3_channel_num = 256

    y1, y2, y3 = yolo3_predictions(
        (f1, f2, f3), (f1_channel_num, f2_channel_num, f3_channel_num),
        num_anchors, num_classes)

    return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3_mobilenet_body(inputs, num_anchors, num_classes, alpha=1.0):
    """Create YOLO_V3 MobileNet model CNN body in Keras."""
    mobilenet = MobileNet(input_tensor=inputs,
                          weights='imagenet',
                          include_top=False,
                          alpha=alpha)

    # input: 416 x 416 x 3
    # conv_pw_13_relu :13 x 13 x (1024*alpha)
    # conv_pw_11_relu :26 x 26 x (512*alpha)
    # conv_pw_5_relu : 52 x 52 x (256*alpha)

    # f1: 13 x 13 x (1024*alpha)
    f1 = mobilenet.get_layer('conv_pw_13_relu').output
    # f2: 26 x 26 x (512*alpha)
    f2 = mobilenet.get_layer('conv_pw_11_relu').output
    # f3: 52 x 52 x  (256*alpha)
    f3 = mobilenet.get_layer('conv_pw_5_relu').output

    f1_channel_num = int(1024 * alpha)
    f2_channel_num = int(512 * alpha)
    f3_channel_num = int(256 * alpha)

    y1, y2, y3 = yolo3_predictions(
        (f1, f2, f3), (f1_channel_num, f2_channel_num, f3_channel_num),
        num_anchors, num_classes)

    return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3_peleenet_body(inputs, num_anchors, num_classes):
    """Create YOLO_V3 PeleeNet model CNN body in Keras."""
    peleenet = PeleeNet(input_tensor=inputs,
                        weights='imagenet',
                        include_top=False)
    print('backbone layers number: {}'.format(len(peleenet.layers)))

    # input: 416 x 416 x 3
    # re_lu_338(layer 365, final feature map): 13 x 13 x 704
    # re_lu_307(layer 265, end of stride 16) : 26 x 26 x 512
    # re_lu_266(layer 133, end of stride 8)  : 52 x 52 x 256

    # NOTE: activation layer name may different for TF1.x/2.x, so we
    # use index to fetch layer
    # f1: 13 x 13 x 704
    f1 = peleenet.layers[365].output
    # f2: 26 x 26 x 512
    f2 = peleenet.layers[265].output
    # f3: 52 x 52 x 256
    f3 = peleenet.layers[133].output

    f1_channel_num = 704
    f2_channel_num = 512
    f3_channel_num = 256

    y1, y2, y3 = yolo3_predictions(
        (f1, f2, f3), (f1_channel_num, f2_channel_num, f3_channel_num),
        num_anchors, num_classes)

    return Model(inputs=inputs, outputs=[y1, y2, y3])
Esempio n. 9
0
def yolo3_ghostnet_body(inputs, num_anchors, num_classes):
    """Create YOLO_V3 GhostNet model CNN body in Keras."""
    ghostnet = GhostNet(input_tensor=inputs,
                        weights='imagenet',
                        include_top=False)
    print('backbone layers number: {}'.format(len(ghostnet.layers)))

    # input: 416 x 416 x 3
    # blocks_9_0_relu(layer 291, final feature map): 13 x 13 x 960
    # blocks_8_3_add(layer 288, end of block8): 13 x 13 x 160

    # blocks_7_0_ghost1_concat(layer 203, middle in block7) : 26 x 26 x 672
    # blocks_6_4_add(layer 196, end of block6) : 26 x 26 x 112

    # blocks_5_0_ghost1_concat(layer 101, middle in block5) : 52 x 52 x 240
    # blocks_4_0_add(layer 94, end of block4): 52 x 52 x 40

    # NOTE: activation layer name may different for TF1.x/2.x, so we
    # use index to fetch layer
    # f1: 13 x 13 x 960
    f1 = ghostnet.layers[291].output
    # f2: 26 x 26 x 672
    f2 = ghostnet.layers[203].output
    # f3: 52 x 52 x 240
    f3 = ghostnet.layers[101].output

    f1_channel_num = 960
    f2_channel_num = 672
    f3_channel_num = 240

    y1, y2, y3 = yolo3_predictions(
        (f1, f2, f3), (f1_channel_num, f2_channel_num, f3_channel_num),
        num_anchors, num_classes)

    return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3_efficientnet_body(inputs, num_anchors, num_classes, level=3):
    '''
    Create YOLO_v3 EfficientNet model CNN body in keras.
    # Arguments
        level: EfficientNet level number.
            by default we use EfficientNetB3 as backbone
    '''
    efficientnet, feature_map_info = get_efficientnet_backbone_info(
        inputs, level=level)
    print('backbone layers number: {}'.format(len(efficientnet.layers)))

    f1 = efficientnet.get_layer('top_activation').output
    f1_channel_num = feature_map_info['f1_channel_num']

    f2 = efficientnet.get_layer('block6a_expand_activation').output
    f2_channel_num = feature_map_info['f2_channel_num']

    f3 = efficientnet.get_layer('block4a_expand_activation').output
    f3_channel_num = feature_map_info['f3_channel_num']

    y1, y2, y3 = yolo3_predictions(
        (f1, f2, f3), (f1_channel_num, f2_channel_num, f3_channel_num),
        num_anchors, num_classes)

    return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo3_mobilenetv3large_body(inputs, num_anchors, num_classes, alpha=1.0):
    """Create YOLO_V3 MobileNetV3Large model CNN body in Keras."""
    mobilenetv3large = MobileNetV3Large(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha)
    print('backbone layers number: {}'.format(len(mobilenetv3large.layers)))

    # input: 416 x 416 x 3
    # activation_38(layer 194, final feature map): 13 x 13 x (960*alpha)
    # expanded_conv_14/Add(layer 191, end of block14): 13 x 13 x (160*alpha)

    # activation_29(layer 146, middle in block12) : 26 x 26 x (672*alpha)
    # expanded_conv_11/Add(layer 143, end of block11) : 26 x 26 x (112*alpha)

    # activation_15(layer 79, middle in block6) : 52 x 52 x (240*alpha)
    # expanded_conv_5/Add(layer 76, end of block5): 52 x 52 x (40*alpha)

    # NOTE: activation layer name may different for TF1.x/2.x, so we
    # use index to fetch layer
    # f1: 13 x 13 x (960*alpha)
    f1 = mobilenetv3large.layers[194].output
    # f2: 26 x 26 x (672*alpha)
    f2 = mobilenetv3large.layers[146].output
    # f3: 52 x 52 x (240*alpha)
    f3 = mobilenetv3large.layers[79].output

    f1_channel_num = int(960*alpha)
    f2_channel_num = int(672*alpha)
    f3_channel_num = int(240*alpha)
    #f1_channel_num = 1024
    #f2_channel_num = 512
    #f3_channel_num = 256

    y1, y2, y3 = yolo3_predictions((f1, f2, f3), (f1_channel_num, f2_channel_num, f3_channel_num), num_anchors, num_classes)

    return Model(inputs = inputs, outputs=[y1,y2,y3])