def tiny_yolo_body(inputs, num_anchors, num_classes): '''Create Tiny YOLO_v3 model CNN body in keras.''' x1 = compose( DarknetConv2D_BN_Leaky(16, (3, 3)), MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), DarknetConv2D_BN_Leaky(32, (3, 3)), MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), DarknetConv2D_BN_Leaky(64, (3, 3)), MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), DarknetConv2D_BN_Leaky(128, (3, 3)), MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), DarknetConv2D_BN_Leaky(256, (3, 3)))(inputs) x2 = compose( MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), DarknetConv2D_BN_Leaky(512, (3, 3)), MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='same'), DarknetConv2D_BN_Leaky(1024, (3, 3)), DarknetConv2D_BN_Leaky(256, (1, 1)))(x1) y1 = compose(DarknetConv2D_BN_Leaky(512, (3, 3)), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2) x2 = compose(DarknetConv2D_BN_Leaky(128, (1, 1)), UpSampling2D(2))(x2) y2 = compose(Concatenate(), DarknetConv2D_BN_Leaky(256, (3, 3)), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, x1]) return Model(inputs, [y1, y2])
def mobilenetv2_yolo_body(inputs, num_anchors, num_classes): #net, endpoint = inception_v2.inception_v2(inputs) mobilenet = MobileNetV2(input_tensor=inputs, weights='imagenet') # input: 416 x 416 x 3 # conv_pw_13_relu :13 x 13 x 1024 # conv_pw_11_relu :26 x 26 x 512 # conv_pw_5_relu : 52 x 52 x 256 f1 = mobilenet.get_layer('out_relu').output # f1 :13 x 13 x 1024 x, y1 = make_last_layers(f1, 512, num_anchors * (num_classes + 5)) x = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(x) f2 = mobilenet.get_layer('block_13_expand_relu').output # f2: 26 x 26 x 512 x = Concatenate()([x, f2]) x, y2 = make_last_layers(x, 256, num_anchors * (num_classes + 5)) x = compose(DarknetConv2D_BN_Leaky(128, (1, 1)), UpSampling2D(2))(x) f3 = mobilenet.get_layer('block_6_expand_relu').output # f3 : 52 x 52 x 256 x = Concatenate()([x, f3]) x, y3 = make_last_layers(x, 128, num_anchors * (num_classes + 5)) return Model(inputs=inputs, outputs=[y1, y2, y3])
def squeezenet_yolo_body(inputs, num_anchors, num_classes): #net, endpoint = inception_v2.inception_v2(inputs) mobilenet = squeezenet_body(input_tensor=inputs) # input: 416 x 416 x 3 # contatenate_10 :12 x 12 x 640 # contatenate_6 :25 x 25 x 384 # contatenate_4 : 51 x 51 x 256 f1 = mobilenet.get_layer('concatenate_10').output # f1 :13 x 13 x 1024 x, y1 = make_last_layers(f1, 512, num_anchors * (num_classes + 5)) x = compose( DarknetConv2D_BN_Leaky(256, (1,1)), UpSampling2D(2))(x) f2 = mobilenet.get_layer('concatenate_6').output # f2: 26 x 26 x 512 x = Concatenate()([x,f2]) x, y2 = make_last_layers(x, 256, num_anchors*(num_classes+5)) x = compose( DarknetConv2D_BN_Leaky(128, (1,1)), UpSampling2D(2))(x) f3 = mobilenet.get_layer('concatenate_4').output # f3 : 52 x 52 x 256 x = Concatenate()([x, f3]) x, y3 = make_last_layers(x, 128, num_anchors*(num_classes+5)) return Model(inputs = inputs, outputs=[y1,y2,y3])
def make_last_layers(x, num_filters, out_filters): '''6 Conv2D_BN_Leaky layers followed by a Conv2D_linear layer''' x = compose(DarknetConv2D_BN_Leaky(num_filters, (1, 1)), DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)), DarknetConv2D_BN_Leaky(num_filters, (1, 1)), DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)), DarknetConv2D_BN_Leaky(num_filters, (1, 1)))(x) y = compose(DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)), DarknetConv2D(out_filters, (1, 1)))(x) return x, y
def make_last_layers(x,filters,out_filters): """"6 Convolution Leaky_RELU Layers followed by a linear Convolution layer""" x = compose( DarknetConv2D_BN_Leaky(filters,(1,1)), DarknetConv2D_BN_Leaky(filters * 2,(3,3)), DarknetConv2D_BN_Leaky(filters,(1,1)), DarknetConv2D_BN_Leaky(filters * 2,(3,3)), DarknetConv2D_BN_Leaky(filters,(1,1)))(x) y = compose( DarknetConv2D_BN_Leaky(filters * 2,(3,3)), DarknetConv2D(out_filters,(1,1)))(x) return x,y
def yolo_body(inputs, num_anchors, num_classes): """Create YOLO_V3 model CNN body in Keras.""" darknet = Model(inputs, darknet_body(inputs)) x, y1 = make_last_layers(darknet.output, 512, num_anchors * (num_classes + 5)) x = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, darknet.layers[152].output]) x, y2 = make_last_layers(x, 256, num_anchors * (num_classes + 5)) x = compose(DarknetConv2D_BN_Leaky(128, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, darknet.layers[92].output]) x, y3 = make_last_layers(x, 128, num_anchors * (num_classes + 5)) return Model(inputs, [y1, y2, y3])
def DarknetConv2D_BN_Leaky(*args,**kwargs): """Darknet Convolution followed by Batch Normalization and LeakyReLU""" no_bias_kwargs = {'use_bias':False} no_bias_kwargs.update(kwargs) return compose( DarknetConv2D(*args, **no_bias_kwargs), BatchNormalization(), LeakyReLU(alpha=0.1))
def make_last_layers(input_layer, num_filters, out_filters): """ yolov3网络的输出层,由5个卷积层和1个卷积层加1个线性输出层构成, 其中5个卷积层的输出会连接到下一个输出层, 这里的结构类似FPN特征金字塔 :param input_layer: 输入 :param num_filters: 中间层的卷积核数 :param out_filters: 输出层的卷积核数 :return: yolo输出x,以及下一层的输入y """ x = compose(DarknetConv2D_BN_Leaky(num_filters, (1, 1)), DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)), DarknetConv2D_BN_Leaky(num_filters, (1, 1)), DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)), DarknetConv2D_BN_Leaky(num_filters, (1, 1)))(input_layer) y = compose(DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)), DarknetConv2D(out_filters, (1, 1)))(x) return x, y
def resblock_body(x, num_filters, num_blocks): '''A series of resblocks starting with a downsampling Convolution2D''' # Darknet uses left and top padding instead of 'same' mode x = ZeroPadding2D(((1, 0), (1, 0)))(x) x = DarknetConv2D_BN_Leaky(num_filters, (3, 3), strides=(2, 2))(x) for i in range(num_blocks): y = compose(DarknetConv2D_BN_Leaky(num_filters // 2, (1, 1)), DarknetConv2D_BN_Leaky(num_filters, (3, 3)))(x) x = Add()([x, y]) return x
def resblock_body(x,filters,num_blocks): """Residual blocks for Darknet""" x = ZeroPadding2D(((1,0),(1,0)))(x) x = DarknetConv2D_BN_Leaky(filters,(3,3),strides=(2,2))(x) for _ in range(num_blocks): y = compose( DarknetConv2D_BN_Leaky(filters//2,(1,1)), DarknetConv2D_BN_Leaky(filters,(3,3)))(x) x = Add()([x,y]) return x
def DarknetConv2D_BN_Leaky(*args, **kwargs): """ yolov3网络中的“卷积+标准化+激活层”结构:conv -> BN -> activate :param args: ** :param kwargs: ** :return: """ no_bias_kwargs = {'use_bias': False} no_bias_kwargs.update(kwargs) return compose(DarknetConv2D(*args, **no_bias_kwargs), BatchNormalization(), LeakyReLU(alpha=0.1))
def yolo_body(inputs, num_anchors, num_classes): """ yolov3的卷积神经网络主体 :param inputs: 输入 :param num_anchors: 锚框数 :param num_classes: 类数 :return: Model """ darknet = Model(inputs, darknet_body(inputs)) # 第一个输出13*13的小尺度特征图,对应检索大尺度特征 x, y1 = make_last_layers(darknet.output, 512, num_anchors * (num_classes + 5)) x = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, darknet.layers[152].output]) # 第二个输出26*26的中尺度特征图,对应检索中尺度特征 x, y2 = make_last_layers(x, 256, num_anchors * (num_classes + 5)) x = compose(DarknetConv2D_BN_Leaky(128, (1, 1)), UpSampling2D(2))(x) x = Concatenate()([x, darknet.layers[92].output]) # 第三个输出52*52的大尺度特征图,对应检索小尺度特征 x, y3 = make_last_layers(x, 128, num_anchors * (num_classes + 5)) return Model(inputs, [y1, y2, y3])
def resblock_body(input_layer, num_filters, num_blocks): """ yolov3网络的残差块结构:conv -> res ->out :param input_layer: 网络输入 :param num_filters: 卷积核数 :param num_blocks: 残差块结构数 :return: 输出 """ x = ZeroPadding2D(((1, 0), (1, 0)))(input_layer) x = DarknetConv2D_BN_Leaky(num_filters, (3, 3), strides=(2, 2))(x) for i in range(num_blocks): y = compose(DarknetConv2D_BN_Leaky(num_filters // 2, (1, 1)), DarknetConv2D_BN_Leaky(num_filters, (3, 3)))(x) x = Add()([x, y]) # 残差相加 return x