def yolo4lite_resnet50v2_body(inputs, num_anchors, num_classes): '''Create YOLO_v4 Lite ResNet50V2 model CNN body in keras.''' resnet50v2 = ResNet50V2(input_tensor=inputs, weights='imagenet', include_top=False) print('backbone layers number: {}'.format(len(resnet50v2.layers))) # input: 416 x 416 x 3 # post_relu: 13 x 13 x 2048 # conv4_block5_out: 26 x 26 x 1024 # conv3_block3_out: 52 x 52 x 512 # f1 :13 x 13 x 2048 f1 = resnet50v2.get_layer('post_relu').output # f2: 26 x 26 x 1024 f2 = resnet50v2.get_layer('conv4_block5_out').output # f3 : 52 x 52 x 512 f3 = resnet50v2.get_layer('conv3_block3_out').output f1_channel_num = 1024 f2_channel_num = 512 f3_channel_num = 256 y1, y2, y3 = yolo4lite_predictions( (f1, f2, f3), (f1_channel_num, f2_channel_num, f3_channel_num), num_anchors, num_classes) return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo4lite_mobilenet_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create YOLO_v4 Lite MobileNet model CNN body in keras.''' mobilenet = MobileNet(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) print('backbone layers number: {}'.format(len(mobilenet.layers))) # input: 416 x 416 x 3 # conv_pw_13_relu :13 x 13 x (1024*alpha) # conv_pw_11_relu :26 x 26 x (512*alpha) # conv_pw_5_relu : 52 x 52 x (256*alpha) # f1: 13 x 13 x (1024*alpha) for 416 input f1 = mobilenet.get_layer('conv_pw_13_relu').output # f2: 26 x 26 x (512*alpha) for 416 input f2 = mobilenet.get_layer('conv_pw_11_relu').output # f3: 52 x 52 x (256*alpha) for 416 input f3 = mobilenet.get_layer('conv_pw_5_relu').output f1_channel_num = int(1024 * alpha) f2_channel_num = int(512 * alpha) f3_channel_num = int(256 * alpha) y1, y2, y3 = yolo4lite_predictions( (f1, f2, f3), (f1_channel_num, f2_channel_num, f3_channel_num), num_anchors, num_classes) return Model(inputs, [y1, y2, y3])
def yolo4lite_mobilenetv2_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create YOLO_v4 Lite MobileNetV2 model CNN body in keras.''' mobilenetv2 = MobileNetV2(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) print('backbone layers number: {}'.format(len(mobilenetv2.layers))) # input: 416 x 416 x 3 # out_relu: 13 x 13 x 1280 # block_13_expand_relu: 26 x 26 x (576*alpha) # block_6_expand_relu: 52 x 52 x (192*alpha) # f1 :13 x 13 x 1280 f1 = mobilenetv2.get_layer('out_relu').output # f2: 26 x 26 x (576*alpha) f2 = mobilenetv2.get_layer('block_13_expand_relu').output # f3 : 52 x 52 x (192*alpha) f3 = mobilenetv2.get_layer('block_6_expand_relu').output f1_channel_num = int(1280 * alpha) f2_channel_num = int(576 * alpha) f3_channel_num = int(192 * alpha) #f1_channel_num = 1024 #f2_channel_num = 512 #f3_channel_num = 256 y1, y2, y3 = yolo4lite_predictions( (f1, f2, f3), (f1_channel_num, f2_channel_num, f3_channel_num), num_anchors, num_classes) return Model(inputs=inputs, outputs=[y1, y2, y3])
def yolo4lite_efficientnet_body(inputs, num_anchors, num_classes, level=1): ''' Create YOLO_v4 Lite EfficientNet model CNN body in keras. # Arguments level: EfficientNet level number. by default we use EfficientNetB1 as backbone ''' efficientnet, feature_map_info = get_efficientnet_backbone_info( inputs, level=level) print('backbone layers number: {}'.format(len(efficientnet.layers))) f1 = efficientnet.get_layer('top_activation').output f1_channel_num = feature_map_info['f1_channel_num'] f2 = efficientnet.get_layer('block6a_expand_activation').output f2_channel_num = feature_map_info['f2_channel_num'] f3 = efficientnet.get_layer('block4a_expand_activation').output f3_channel_num = feature_map_info['f3_channel_num'] y1, y2, y3 = yolo4lite_predictions( (f1, f2, f3), (f1_channel_num, f2_channel_num, f3_channel_num), num_anchors, num_classes) return Model(inputs, [y1, y2, y3])
def yolo4lite_mobilenetv3large_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create YOLO_v4 Lite MobileNetV3Large model CNN body in keras.''' mobilenetv3large = MobileNetV3Large(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) print('backbone layers number: {}'.format(len(mobilenetv3large.layers))) # input: 416 x 416 x 3 # activation_38(layer 194, final feature map): 13 x 13 x (960*alpha) # expanded_conv_14/Add(layer 191, end of block14): 13 x 13 x (160*alpha) # activation_29(layer 146, middle in block12) : 26 x 26 x (672*alpha) # expanded_conv_11/Add(layer 143, end of block11) : 26 x 26 x (112*alpha) # activation_15(layer 79, middle in block6) : 52 x 52 x (240*alpha) # expanded_conv_5/Add(layer 76, end of block5): 52 x 52 x (40*alpha) # NOTE: activation layer name may different for TF1.x/2.x, so we # use index to fetch layer # f1: 13 x 13 x (960*alpha) f1 = mobilenetv3large.layers[194].output # f2: 26 x 26 x (672*alpha) for 416 input f2 = mobilenetv3large.layers[146].output # f3: 52 x 52 x (240*alpha) for 416 input f3 = mobilenetv3large.layers[79].output f1_channel_num = int(960 * alpha) f2_channel_num = int(672 * alpha) f3_channel_num = int(240 * alpha) #f1_channel_num = 1024 #f2_channel_num = 512 #f3_channel_num = 256 y1, y2, y3 = yolo4lite_predictions( (f1, f2, f3), (f1_channel_num, f2_channel_num, f3_channel_num), num_anchors, num_classes) return Model(inputs, [y1, y2, y3])
def yolo4lite_mobilenetv3small_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create YOLO_v4 Lite MobileNetV3Small model CNN body in keras.''' mobilenetv3small = MobileNetV3Small(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) print('backbone layers number: {}'.format(len(mobilenetv3small.layers))) # input: 416 x 416 x 3 # activation_31(layer 165, final feature map): 13 x 13 x (576*alpha) # expanded_conv_10/Add(layer 162, end of block10): 13 x 13 x (96*alpha) # activation_22(layer 117, middle in block8) : 26 x 26 x (288*alpha) # expanded_conv_7/Add(layer 114, end of block7) : 26 x 26 x (48*alpha) # activation_7(layer 38, middle in block3) : 52 x 52 x (96*alpha) # expanded_conv_2/Add(layer 35, end of block2): 52 x 52 x (24*alpha) # NOTE: activation layer name may different for TF1.x/2.x, so we # use index to fetch layer # f1: 13 x 13 x (576*alpha) f1 = mobilenetv3small.layers[165].output # f2: 26 x 26 x (288*alpha) for 416 input f2 = mobilenetv3small.layers[117].output # f3: 52 x 52 x (96*alpha) f3 = mobilenetv3small.layers[38].output f1_channel_num = int(576 * alpha) f2_channel_num = int(288 * alpha) f3_channel_num = int(96 * alpha) #f1_channel_num = 1024 #f2_channel_num = 512 #f3_channel_num = 256 y1, y2, y3 = yolo4lite_predictions( (f1, f2, f3), (f1_channel_num, f2_channel_num, f3_channel_num), num_anchors, num_classes) return Model(inputs, [y1, y2, y3])