def yolo2_mobilenetv3large_body(inputs, num_anchors, num_classes, alpha=1.0): """Create YOLO_V2 MobileNetV3Large model CNN body in Keras.""" mobilenetv3large = MobileNetV3Large(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # mobilenetv3large.output(layer 194, final feature map): 13 x 13 x (960*alpha) # expanded_conv_14/Add(layer 191, end of block14): 13 x 13 x (160*alpha) # activation_29(layer 146, middle in block12) : 26 x 26 x (672*alpha) # expanded_conv_11/Add(layer 143, end of block11) : 26 x 26 x (112*alpha) conv_head1 = compose(DarknetConv2D_BN_Leaky(int(960 * alpha), (3, 3)), DarknetConv2D_BN_Leaky(int(960 * alpha), (3, 3)))( mobilenetv3large.output) # activation_29(layer 146) output shape: 26 x 26 x (672*alpha) activation_29 = mobilenetv3large.layers[146].output conv_head2 = DarknetConv2D_BN_Leaky(int(64 * alpha), (1, 1))(activation_29) # TODO: Allow Keras Lambda to use func arguments for output_shape? conv_head2_reshaped = Lambda(space_to_depth_x2, output_shape=space_to_depth_x2_output_shape, name='space_to_depth')(conv_head2) x = Concatenate()([conv_head2_reshaped, conv_head1]) x = DarknetConv2D_BN_Leaky(int(960 * alpha), (3, 3))(x) x = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1), name='predict_conv')(x) return Model(inputs, x)
def tiny_yolo3_mobilenetv3large_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create Tiny YOLO_v3 MobileNetV3Large model CNN body in keras.''' mobilenetv3large = MobileNetV3Large(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) print('backbone layers number: {}'.format(len(mobilenetv3large.layers))) # input: 416 x 416 x 3 # activation_38(layer 194, final feature map): 13 x 13 x (960*alpha) # expanded_conv_14/Add(layer 191, end of block14): 13 x 13 x (160*alpha) # activation_29(layer 146, middle in block12) : 26 x 26 x (672*alpha) # expanded_conv_11/Add(layer 143, end of block11) : 26 x 26 x (112*alpha) # activation_15(layer 79, middle in block6) : 52 x 52 x (240*alpha) # expanded_conv_5/Add(layer 76, end of block5): 52 x 52 x (40*alpha) # NOTE: activation layer name may different for TF1.x/2.x, so we # use index to fetch layer # f1 :13 x 13 x (960*alpha) f1 = mobilenetv3large.layers[194].output # f2: 26 x 26 x (672*alpha) f2 = mobilenetv3large.layers[146].output f1_channel_num = int(960*alpha) f2_channel_num = int(672*alpha) #f1_channel_num = 1024 #f2_channel_num = 512 y1, y2 = tiny_yolo3_predictions((f1, f2), (f1_channel_num, f2_channel_num), num_anchors, num_classes) return Model(inputs, [y1,y2])
def yolo2lite_mobilenetv3large_body(inputs, num_anchors, num_classes, alpha=1.0): """Create YOLO_V2 Lite MobileNetV3Large model CNN body in Keras.""" mobilenetv3large = MobileNetV3Large(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) print('backbone layers number: {}'.format(len(mobilenetv3large.layers))) # input: 416 x 416 x 3 # mobilenetv3large.output(layer 194, final feature map): 13 x 13 x (960*alpha) # expanded_conv_14/Add(layer 191, end of block14): 13 x 13 x (160*alpha) # activation_29(layer 146, middle in block12) : 26 x 26 x (672*alpha) # expanded_conv_11/Add(layer 143, end of block11) : 26 x 26 x (112*alpha) # NOTE: activation layer name may different for TF1.x/2.x, so we # use index to fetch layer # f1: 13 x 13 x (960*alpha) f1 = mobilenetv3large.output # f2: 26 x 26 x (672*alpha) f2 = mobilenetv3large.layers[146].output f1_channel_num = int(960 * alpha) f2_channel_num = int(672 * alpha) y = yolo2lite_predictions((f1, f2), (f1_channel_num, f2_channel_num), num_anchors, num_classes) return Model(inputs, y)
def tiny_yolo2lite_mobilenetv3large_body(inputs, num_anchors, num_classes, alpha=1.0): """Create Tiny YOLO_V2 Lite MobileNetV3Large model CNN body in Keras.""" mobilenetv3large = MobileNetV3Large(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) print('backbone layers number: {}'.format(len(mobilenetv3large.layers))) # input: 416 x 416 x 3 # mobilenetv3large.output(layer 194, final feature map): 13 x 13 x (960*alpha) # f1: 13 x 13 x (960*alpha) f1 = mobilenetv3large.output f1_channel_num = int(960 * alpha) y = compose( Depthwise_Separable_Conv2D_BN_Leaky(f1_channel_num, (3, 3), block_id_str='pred_1'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1), name='predict_conv'))(f1) return Model(inputs, y)
def yolo3_mobilenetv3large_body(inputs, num_anchors, num_classes, alpha=1.0): """Create YOLO_V3 MobileNetV3Large model CNN body in Keras.""" mobilenetv3large = MobileNetV3Large(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # activation_38(layer 194, final feature map): 13 x 13 x (960*alpha) # expanded_conv_14/Add(layer 191, end of block14): 13 x 13 x (160*alpha) # activation_29(layer 146, middle in block12) : 26 x 26 x (672*alpha) # expanded_conv_11/Add(layer 143, end of block11) : 26 x 26 x (112*alpha) # activation_15(layer 79, middle in block6) : 52 x 52 x (240*alpha) # expanded_conv_5/Add(layer 76, end of block5): 52 x 52 x (40*alpha) # NOTE: activation layer name may different for TF1.x/2.x, so we # use index to fetch layer # f1: 13 x 13 x (960*alpha) f1 = mobilenetv3large.layers[194].output # f2: 26 x 26 x (672*alpha) f2 = mobilenetv3large.layers[146].output # f3: 52 x 52 x (240*alpha) f3 = mobilenetv3large.layers[79].output f1_channel_num = int(960 * alpha) f2_channel_num = int(672 * alpha) f3_channel_num = int(240 * alpha) #f1_channel_num = 1024 #f2_channel_num = 512 #f3_channel_num = 256 #feature map 1 head & output (13x13 for 416 input) x, y1 = make_last_layers(f1, f1_channel_num // 2, num_anchors * (num_classes + 5)) #x, y1 = make_last_layers(f1, f1_channel_num//2, num_anchors * (num_classes + 5), predict_filters=int(1024*alpha)) #upsample fpn merge for feature map 1 & 2 x = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, f2]) #feature map 2 head & output (26x26 for 416 input) x, y2 = make_last_layers(x, f2_channel_num // 2, num_anchors * (num_classes + 5)) #x, y2 = make_last_layers(x, f2_channel_num//2, num_anchors*(num_classes+5), predict_filters=int(512*alpha)) #upsample fpn merge for feature map 2 & 3 x = compose(DarknetConv2D_BN_Leaky(f3_channel_num // 2, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, f3]) #feature map 3 head & output (52x52 for 416 input) x, y3 = make_last_layers(x, f3_channel_num // 2, num_anchors * (num_classes + 5)) #x, y3 = make_last_layers(x, f3_channel_num//2, num_anchors*(num_classes+5), predict_filters=int(256*alpha)) return Model(inputs=inputs, outputs=[y1, y2, y3])
def tiny_yolo4_mobilenetv3large_body(inputs, num_anchors, num_classes, alpha=1.0, spp=True): '''Create Tiny YOLO_v4 MobileNetV3Large model CNN body in keras.''' mobilenetv3large = MobileNetV3Large(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # activation_38(layer 194, final feature map): 13 x 13 x (960*alpha) # expanded_conv_14/Add(layer 191, end of block14): 13 x 13 x (160*alpha) # activation_29(layer 146, middle in block12) : 26 x 26 x (672*alpha) # expanded_conv_11/Add(layer 143, end of block11) : 26 x 26 x (112*alpha) # activation_15(layer 79, middle in block6) : 52 x 52 x (240*alpha) # expanded_conv_5/Add(layer 76, end of block5): 52 x 52 x (40*alpha) # f1 :13 x 13 x (960*alpha) # NOTE: activation layer name may different for TF1.x/2.x, so we # use index to fetch layer f1 = mobilenetv3large.layers[194].output # f2: 26 x 26 x (672*alpha) for 416 input f2 = mobilenetv3large.layers[146].output #feature map 1 head (13 x 13 x (480*alpha) for 416 input) x1 = DarknetConv2D_BN_Leaky(int(480 * alpha), (1, 1))(f1) if spp: x1 = Spp_Conv2D_BN_Leaky(x1, int(480 * alpha)) #upsample fpn merge for feature map 1 & 2 x1_upsample = compose(DarknetConv2D_BN_Leaky(int(336 * alpha), (1, 1)), UpSampling2D(2))(x1) x2 = compose( Concatenate(), #Depthwise_Separable_Conv2D_BN_Leaky(filters=int(672*alpha), kernel_size=(3, 3), block_id_str='15'), DarknetConv2D_BN_Leaky(int(672 * alpha), (3, 3)))([x1_upsample, f2]) #feature map 2 output (26 x 26 x (672*alpha) for 416 input) y2 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x2) #downsample fpn merge for feature map 2 & 1 x2_downsample = compose( ZeroPadding2D(((1, 0), (1, 0))), #Darknet_Depthwise_Separable_Conv2D_BN_Leaky(int(480*alpha), (3,3), strides=(2,2), block_id_str='16'), DarknetConv2D_BN_Leaky(int(480 * alpha), (3, 3), strides=(2, 2)))(x2) x1 = compose( Concatenate(), #Depthwise_Separable_Conv2D_BN_Leaky(filters=int(960*alpha), kernel_size=(3, 3), block_id_str='17'), DarknetConv2D_BN_Leaky(int(960 * alpha), (3, 3)))([x2_downsample, x1]) #feature map 1 output (13 x 13 x (960*alpha) for 416 input) y1 = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x1) return Model(inputs, [y1, y2])
def tiny_yolo3lite_mobilenetv3large_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create Tiny YOLO_v3 Lite MobileNetV3Large model CNN body in keras.''' mobilenetv3large = MobileNetV3Large(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # activation_38(layer 194, final feature map): 13 x 13 x (960*alpha) # expanded_conv_14/Add(layer 191, end of block14): 13 x 13 x (160*alpha) # activation_29(layer 146, middle in block12) : 26 x 26 x (672*alpha) # expanded_conv_11/Add(layer 143, end of block11) : 26 x 26 x (112*alpha) # activation_15(layer 79, middle in block6) : 52 x 52 x (240*alpha) # expanded_conv_5/Add(layer 76, end of block5): 52 x 52 x (40*alpha) # NOTE: activation layer name may different for TF1.x/2.x, so we # use index to fetch layer # f1 :13 x 13 x (960*alpha) f1 = mobilenetv3large.layers[194].output # f2: 26 x 26 x (672*alpha) f2 = mobilenetv3large.layers[146].output f1_channel_num = int(960 * alpha) f2_channel_num = int(672 * alpha) #f1_channel_num = 1024 #f2_channel_num = 512 #feature map 1 transform x1 = DarknetConv2D_BN_Leaky(f1_channel_num // 2, (1, 1))(f1) #feature map 1 output (13x13 for 416 input) y1 = compose( #DarknetConv2D_BN_Leaky(f1_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f1_channel_num, kernel_size=(3, 3), block_id_str='15'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1) #upsample fpn merge for feature map 1 & 2 x2 = compose(DarknetConv2D_BN_Leaky(f2_channel_num // 2, (1, 1)), UpSampling2D(2))(x1) #feature map 2 output (26x26 for 416 input) y2 = compose( Concatenate(), #DarknetConv2D_BN_Leaky(f2_channel_num, (3,3)), Depthwise_Separable_Conv2D_BN_Leaky(filters=f2_channel_num, kernel_size=(3, 3), block_id_str='16'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, f2]) return Model(inputs, [y1, y2])
def tiny_yolo2_mobilenetv3large_body(inputs, num_anchors, num_classes, alpha=1.0): """Create Tiny YOLO_V2 MobileNetV3Large model CNN body in Keras.""" mobilenetv3large = MobileNetV3Large(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # mobilenetv3large.output(layer 194, final feature map): 13 x 13 x (960*alpha) y = compose( DarknetConv2D_BN_Leaky(int(960 * alpha), (3, 3)), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1), name='predict_conv'))(mobilenetv3large.output) return Model(inputs, y)
def yolo3lite_mobilenetv3large_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create YOLO_v3 Lite MobileNetV3Large model CNN body in keras.''' mobilenetv3large = MobileNetV3Large(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # activation_38(layer 194, final feature map): 13 x 13 x (960*alpha) # expanded_conv_14/Add(layer 191, end of block14): 13 x 13 x (160*alpha) # activation_29(layer 146, middle in block12) : 26 x 26 x (672*alpha) # expanded_conv_11/Add(layer 143, end of block11) : 26 x 26 x (112*alpha) # activation_15(layer 79, middle in block6) : 52 x 52 x (240*alpha) # expanded_conv_5/Add(layer 76, end of block5): 52 x 52 x (40*alpha) # NOTE: activation layer name may different for TF1.x/2.x, so we # use index to fetch layer f1 = mobilenetv3large.layers[194].output # f1 :13 x 13 x (960*alpha) x, y1 = make_depthwise_separable_last_layers(f1, int(672*alpha), num_anchors * (num_classes + 5), block_id_str='15') #x, y1 = make_depthwise_separable_last_layers(f1, int(672*alpha), num_anchors * (num_classes + 5), block_id_str='15', predict_filters=int(1024*alpha)) x = compose( DarknetConv2D_BN_Leaky(int(336*alpha), (1,1)), UpSampling2D(2))(x) f2 = mobilenetv3large.layers[146].output # f2: 26 x 26 x (672*alpha) x = Concatenate()([x,f2]) x, y2 = make_depthwise_separable_last_layers(x, int(240*alpha), num_anchors*(num_classes+5), block_id_str='16') #x, y2 = make_depthwise_separable_last_layers(x, int(240*alpha), num_anchors*(num_classes+5), block_id_str='16', predict_filters=int(512*alpha)) x = compose( DarknetConv2D_BN_Leaky(int(120*alpha), (1,1)), UpSampling2D(2))(x) f3 = mobilenetv3large.layers[79].output # f3 : 52 x 52 x (240*alpha) x = Concatenate()([x, f3]) x, y3 = make_depthwise_separable_last_layers(x, int(120*alpha), num_anchors*(num_classes+5), block_id_str='17') #x, y3 = make_depthwise_separable_last_layers(x, int(120*alpha), num_anchors*(num_classes+5), block_id_str='17', predict_filters=int(256*alpha)) return Model(inputs = inputs, outputs=[y1,y2,y3])
def yolo3_mobilenetv3large_body(inputs, num_anchors, num_classes, alpha=1.0): """Create YOLO_V3 MobileNetV3Large model CNN body in Keras.""" mobilenetv3large = MobileNetV3Large(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # activation_38(layer 194, final feature map): 13 x 13 x (960*alpha) # expanded_conv_14/Add(layer 191, end of block14): 13 x 13 x (160*alpha) # activation_29(layer 146, middle in block12) : 26 x 26 x (672*alpha) # expanded_conv_11/Add(layer 143, end of block11) : 26 x 26 x (112*alpha) # activation_15(layer 79, middle in block6) : 52 x 52 x (240*alpha) # expanded_conv_5/Add(layer 76, end of block5): 52 x 52 x (40*alpha) # NOTE: activation layer name may different for TF1.x/2.x, so we # use index to fetch layer # f1: 13 x 13 x (960*alpha) f1 = mobilenetv3large.layers[194].output # f2: 26 x 26 x (672*alpha) f2 = mobilenetv3large.layers[146].output # f3: 52 x 52 x (240*alpha) f3 = mobilenetv3large.layers[79].output f1_channel_num = int(960 * alpha) f2_channel_num = int(672 * alpha) f3_channel_num = int(240 * alpha) #f1_channel_num = 1024 #f2_channel_num = 512 #f3_channel_num = 256 y1, y2, y3 = yolo3_predictions( (f1, f2, f3), (f1_channel_num, f2_channel_num, f3_channel_num), num_anchors, num_classes) return Model(inputs=inputs, outputs=[y1, y2, y3])
def tiny_yolo3_mobilenetv3large_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create Tiny YOLO_v3 MobileNetV3Large model CNN body in keras.''' mobilenetv3large = MobileNetV3Large(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # activation_38(layer 194, final feature map): 13 x 13 x (960*alpha) # expanded_conv_14/Add(layer 191, end of block14): 13 x 13 x (160*alpha) # activation_29(layer 146, middle in block12) : 26 x 26 x (672*alpha) # expanded_conv_11/Add(layer 143, end of block11) : 26 x 26 x (112*alpha) # activation_15(layer 79, middle in block6) : 52 x 52 x (240*alpha) # expanded_conv_5/Add(layer 76, end of block5): 52 x 52 x (40*alpha) # NOTE: activation layer name may different for TF1.x/2.x, so we # use index to fetch layer x1 = mobilenetv3large.layers[146].output x2 = mobilenetv3large.layers[194].output x2 = DarknetConv2D_BN_Leaky(int(672*alpha), (1,1))(x2) y1 = compose( DarknetConv2D_BN_Leaky(int(960*alpha), (3,3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=int(960*alpha), kernel_size=(3, 3), block_id_str='15'), DarknetConv2D(num_anchors*(num_classes+5), (1,1)))(x2) x2 = compose( DarknetConv2D_BN_Leaky(int(336*alpha), (1,1)), UpSampling2D(2))(x2) y2 = compose( Concatenate(), DarknetConv2D_BN_Leaky(int(672*alpha), (3,3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=int(672*alpha), kernel_size=(3, 3), block_id_str='16'), DarknetConv2D(num_anchors*(num_classes+5), (1,1)))([x2,x1]) return Model(inputs, [y1,y2])
def yolo4lite_mobilenetv3large_body(inputs, num_anchors, num_classes, alpha=1.0): '''Create YOLO_v4 Lite MobileNetV3Large model CNN body in keras.''' mobilenetv3large = MobileNetV3Large(input_tensor=inputs, weights='imagenet', include_top=False, alpha=alpha) # input: 416 x 416 x 3 # activation_38(layer 194, final feature map): 13 x 13 x (960*alpha) # expanded_conv_14/Add(layer 191, end of block14): 13 x 13 x (160*alpha) # activation_29(layer 146, middle in block12) : 26 x 26 x (672*alpha) # expanded_conv_11/Add(layer 143, end of block11) : 26 x 26 x (112*alpha) # activation_15(layer 79, middle in block6) : 52 x 52 x (240*alpha) # expanded_conv_5/Add(layer 76, end of block5): 52 x 52 x (40*alpha) # f1 :13 x 13 x (960*alpha) # NOTE: activation layer name may different for TF1.x/2.x, so we # use index to fetch layer f1 = mobilenetv3large.layers[194].output #feature map 1 head (13 x 13 x (480*alpha) for 416 input) x1 = make_yolo_spp_depthwise_separable_head(f1, int(480 * alpha)) #upsample fpn merge for feature map 1 & 2 x1_upsample = compose(DarknetConv2D_BN_Leaky(int(336 * alpha), (1, 1)), UpSampling2D(2))(x1) f2 = mobilenetv3large.layers[146].output # f2: 26 x 26 x (672*alpha) for 416 input x2 = DarknetConv2D_BN_Leaky(int(336 * alpha), (1, 1))(f2) x2 = Concatenate()([x2, x1_upsample]) #feature map 2 head (26 x 26 x (336*alpha) for 416 input) x2 = make_yolo_depthwise_separable_head(x2, int(336 * alpha)) #upsample fpn merge for feature map 2 & 3 x2_upsample = compose(DarknetConv2D_BN_Leaky(int(120 * alpha), (1, 1)), UpSampling2D(2))(x2) f3 = mobilenetv3large.layers[79].output # f3 : 52 x 52 x (240*alpha) for 416 input x3 = DarknetConv2D_BN_Leaky(int(120 * alpha), (1, 1))(f3) x3 = Concatenate()([x3, x2_upsample]) #feature map 3 head & output (52 x 52 x (240*alpha) for 416 input) #x3, y3 = make_depthwise_separable_last_layers(x3, int(120*alpha), num_anchors*(num_classes+5)) x3 = make_yolo_depthwise_separable_head(x3, int(120 * alpha)) y3 = compose(Depthwise_Separable_Conv2D_BN_Leaky(int(240 * alpha), (3, 3)), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x3) #downsample fpn merge for feature map 3 & 2 x3_downsample = compose( ZeroPadding2D(((1, 0), (1, 0))), Darknet_Depthwise_Separable_Conv2D_BN_Leaky(int(336 * alpha), (3, 3), strides=(2, 2)))(x3) x2 = Concatenate()([x3_downsample, x2]) #feature map 2 output (26 x 26 x (672*alpha) for 416 input) #x2, y2 = make_depthwise_separable_last_layers(x2, int(336*alpha), num_anchors*(num_classes+5)) x2 = make_yolo_depthwise_separable_head(x2, int(336 * alpha)) y2 = compose(Depthwise_Separable_Conv2D_BN_Leaky(int(672 * alpha), (3, 3)), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2) #downsample fpn merge for feature map 2 & 1 x2_downsample = compose( ZeroPadding2D(((1, 0), (1, 0))), Darknet_Depthwise_Separable_Conv2D_BN_Leaky(int(480 * alpha), (3, 3), strides=(2, 2)))(x2) x1 = Concatenate()([x2_downsample, x1]) #feature map 1 output (13 x 13 x (960*alpha) for 416 input) #x1, y1 = make_depthwise_separable_last_layers(x1, int(480*alpha), num_anchors*(num_classes+5)) x1 = make_yolo_depthwise_separable_head(x1, int(480 * alpha)) y1 = compose(Depthwise_Separable_Conv2D_BN_Leaky(int(960 * alpha), (3, 3)), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x1) return Model(inputs, [y1, y2, y3])
def get_base_model(model_type, model_input_shape, weights='imagenet'): input_tensor = Input(shape=model_input_shape + (3, ), name='image_input') if model_type == 'mobilenet': model = MobileNet(input_tensor=input_tensor, input_shape=model_input_shape + (3, ), weights=weights, pooling=None, include_top=False, alpha=0.5) elif model_type == 'mobilenetv2': model = MobileNetV2(input_tensor=input_tensor, input_shape=model_input_shape + (3, ), weights=weights, pooling=None, include_top=False, alpha=0.5) elif model_type == 'mobilenetv3large': model = MobileNetV3Large(input_tensor=input_tensor, input_shape=model_input_shape + (3, ), weights=weights, pooling=None, include_top=False, alpha=0.75) elif model_type == 'mobilenetv3small': model = MobileNetV3Small(input_tensor=input_tensor, input_shape=model_input_shape + (3, ), weights=weights, pooling=None, include_top=False, alpha=0.75) elif model_type == 'peleenet': model = PeleeNet(input_tensor=input_tensor, input_shape=model_input_shape + (3, ), weights=weights, pooling=None, include_top=False) elif model_type == 'ghostnet': model = GhostNet(input_tensor=input_tensor, input_shape=model_input_shape + (3, ), weights=weights, pooling=None, include_top=False) elif model_type == 'squeezenet': model = SqueezeNet(input_tensor=input_tensor, input_shape=model_input_shape + (3, ), weights=weights, pooling=None, include_top=False) elif model_type == 'mobilevit_s': model = MobileViT_S(input_tensor=input_tensor, input_shape=model_input_shape + (3, ), weights=weights, pooling=None, include_top=False) elif model_type == 'mobilevit_xs': model = MobileViT_XS(input_tensor=input_tensor, input_shape=model_input_shape + (3, ), weights=weights, pooling=None, include_top=False) elif model_type == 'mobilevit_xxs': model = MobileViT_XXS(input_tensor=input_tensor, input_shape=model_input_shape + (3, ), weights=weights, pooling=None, include_top=False) elif model_type == 'resnet50': model = ResNet50(input_tensor=input_tensor, input_shape=model_input_shape + (3, ), weights=weights, pooling=None, include_top=False) elif model_type == 'simple_cnn': model = SimpleCNN(input_tensor=input_tensor, input_shape=model_input_shape + (3, ), weights=None, pooling=None, include_top=False) elif model_type == 'simple_cnn_lite': model = SimpleCNNLite(input_tensor=input_tensor, input_shape=model_input_shape + (3, ), weights=None, pooling=None, include_top=False) else: raise ValueError('Unsupported model type') return model