def yolo_body(inputs, num_anchors, num_classes): feat1, feat2, feat3 = darknet_body(inputs) # y1=(batch_size,13,13,3,85) P5 = DarknetConv2D_BN_Leaky(512, (1, 1))(feat3) P5 = DarknetConv2D_BN_Leaky(1024, (3, 3))(P5) P5 = DarknetConv2D_BN_Leaky(512, (1, 1))(P5) maxpool1 = MaxPooling2D(pool_size=(13, 13), strides=(1, 1), padding='same')(P5) maxpool2 = MaxPooling2D(pool_size=(9, 9), strides=(1, 1), padding='same')(P5) maxpool3 = MaxPooling2D(pool_size=(5, 5), strides=(1, 1), padding='same')(P5) P5 = Concatenate()([maxpool1, maxpool2, maxpool3, P5]) P5 = DarknetConv2D_BN_Leaky(512, (1, 1))(P5) P5 = DarknetConv2D_BN_Leaky(1024, (3, 3))(P5) P5 = DarknetConv2D_BN_Leaky(512, (1, 1))(P5) P5_upsample = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(P5) P4 = DarknetConv2D_BN_Leaky(256, (1, 1))(feat2) P4 = Concatenate()([P4, P5_upsample]) P4 = make_five_convs(P4, 256) P4_upsample = compose(DarknetConv2D_BN_Leaky(128, (1, 1)), UpSampling2D(2))(P4) P3 = DarknetConv2D_BN_Leaky(128, (1, 1))(feat1) P3 = Concatenate()([P3, P4_upsample]) P3 = make_five_convs(P3, 128) P3_output = DarknetConv2D_BN_Leaky(256, (3, 3))(P3) P3_output = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(P3_output) # 26,26 output P3_downsample = ZeroPadding2D(((1, 0), (1, 0)))(P3) P3_downsample = DarknetConv2D_BN_Leaky(256, (3, 3), strides=(2, 2))(P3_downsample) P4 = Concatenate()([P3_downsample, P4]) P4 = make_five_convs(P4, 256) P4_output = DarknetConv2D_BN_Leaky(512, (3, 3))(P4) P4_output = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(P4_output) # 13,13 output P4_downsample = ZeroPadding2D(((1, 0), (1, 0)))(P4) P4_downsample = DarknetConv2D_BN_Leaky(512, (3, 3), strides=(2, 2))(P4_downsample) P5 = Concatenate()([P4_downsample, P5]) P5 = make_five_convs(P5, 512) P5_output = DarknetConv2D_BN_Leaky(1024, (3, 3))(P5) P5_output = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(P5_output) return Model(inputs, [P5_output, P4_output, P3_output])
def yolo_body(inputs, num_anchors, num_classes): # 生成darknet53的主干模型 feat1, feat2, feat3 = darknet_body(inputs) # feat3进行三次卷积进入SPP结构 P5 = DarknetConv2D_BN_Leaky(512, (1, 1))(feat3) P5 = DarknetConv2D_BN_Leaky(1024, (3, 3))(P5) P5 = DarknetConv2D_BN_Leaky(512, (1, 1))(P5) # 使用了SPP结构,即不同尺度的最大池化后堆叠。 maxpool1 = MaxPooling2D(pool_size=(13, 13), strides=(1, 1), padding='same')(P5) maxpool2 = MaxPooling2D(pool_size=(9, 9), strides=(1, 1), padding='same')(P5) maxpool3 = MaxPooling2D(pool_size=(5, 5), strides=(1, 1), padding='same')(P5) P5 = Concatenate()([maxpool1, maxpool2, maxpool3, P5]) #SPP聚合后的三次卷积 P5 = DarknetConv2D_BN_Leaky(512, (1, 1))(P5) P5 = DarknetConv2D_BN_Leaky(1024, (3, 3))(P5) P5 = DarknetConv2D_BN_Leaky(512, (1, 1))(P5) #卷积+上采样 P5_upsample = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(P5) P4 = DarknetConv2D_BN_Leaky(256, (1, 1))(feat2) P4 = Concatenate()([P4, P5_upsample]) P4 = make_five_convs(P4, 256) P4_upsample = compose(DarknetConv2D_BN_Leaky(128, (1, 1)), UpSampling2D(2))(P4) P3 = DarknetConv2D_BN_Leaky(128, (1, 1))(feat1) P3 = Concatenate()([P3, P4_upsample]) P3 = make_five_convs(P3, 128) P3_output = DarknetConv2D_BN_Leaky(256, (3, 3))(P3) # num_anchors = 3 # num_classes+1+4(1是先验框是不是包含物体,4是先验框的参数) P3_output = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(P3_output) # 38x38 output P3_downsample = ZeroPadding2D(((1, 0), (1, 0)))(P3) P3_downsample = DarknetConv2D_BN_Leaky(256, (3, 3), strides=(2, 2))(P3_downsample) P4 = Concatenate()([P3_downsample, P4]) P4 = make_five_convs(P4, 256) P4_output = DarknetConv2D_BN_Leaky(512, (3, 3))(P4) P4_output = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(P4_output) # 19x19 output P4_downsample = ZeroPadding2D(((1, 0), (1, 0)))(P4) P4_downsample = DarknetConv2D_BN_Leaky(512, (3, 3), strides=(2, 2))(P4_downsample) P5 = Concatenate()([P4_downsample, P5]) P5 = make_five_convs(P5, 512) P5_output = DarknetConv2D_BN_Leaky(1024, (3, 3))(P5) P5_output = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(P5_output) return Model(inputs, [P5_output, P4_output, P3_output])
def yolo_body(inputs, num_anchors, num_classes): #---------------------------------------------------# # 生成CSPdarknet53的主干模型 # 获得三个有效特征层,他们的shape分别是: # 52,52,256 # 26,26,512 # 13,13,1024 #---------------------------------------------------# feat1, feat2, feat3 = darknet_body(inputs) # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,2048 -> 13,13,512 -> 13,13,1024 -> 13,13,512 P5 = DarknetConv2D_BN_Leaky(512, (1, 1))(feat3) P5 = DarknetConv2D_BN_Leaky(1024, (3, 3))(P5) P5 = DarknetConv2D_BN_Leaky(512, (1, 1))(P5) # 使用了SPP结构,即不同尺度的最大池化后堆叠。 maxpool1 = MaxPooling2D(pool_size=(13, 13), strides=(1, 1), padding='same')(P5) maxpool2 = MaxPooling2D(pool_size=(9, 9), strides=(1, 1), padding='same')(P5) maxpool3 = MaxPooling2D(pool_size=(5, 5), strides=(1, 1), padding='same')(P5) P5 = Concatenate()([maxpool1, maxpool2, maxpool3, P5]) P5 = DarknetConv2D_BN_Leaky(512, (1, 1))(P5) P5 = DarknetConv2D_BN_Leaky(1024, (3, 3))(P5) P5 = DarknetConv2D_BN_Leaky(512, (1, 1))(P5) # 13,13,512 -> 13,13,256 -> 26,26,256 P5_upsample = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(P5) # 26,26,512 -> 26,26,256 P4 = DarknetConv2D_BN_Leaky(256, (1, 1))(feat2) # 26,26,256 + 26,26,256 -> 26,26,512 P4 = Concatenate()([P4, P5_upsample]) # 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 P4 = make_five_convs(P4, 256) # 26,26,256 -> 26,26,128 -> 52,52,128 P4_upsample = compose(DarknetConv2D_BN_Leaky(128, (1, 1)), UpSampling2D(2))(P4) # 52,52,256 -> 52,52,128 P3 = DarknetConv2D_BN_Leaky(128, (1, 1))(feat1) # 52,52,128 + 52,52,128 -> 52,52,256 P3 = Concatenate()([P3, P4_upsample]) # 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128 P3 = make_five_convs(P3, 128) #---------------------------------------------------# # 第三个特征层 # y3=(batch_size,52,52,3,85) #---------------------------------------------------# P3_output = DarknetConv2D_BN_Leaky(256, (3, 3))(P3) P3_output = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(P3_output) # 52,52,128 -> 26,26,256 P3_downsample = ZeroPadding2D(((1, 0), (1, 0)))(P3) P3_downsample = DarknetConv2D_BN_Leaky(256, (3, 3), strides=(2, 2))(P3_downsample) # 26,26,256 + 26,26,256 -> 26,26,512 P4 = Concatenate()([P3_downsample, P4]) # 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 P4 = make_five_convs(P4, 256) #---------------------------------------------------# # 第二个特征层 # y2=(batch_size,26,26,3,85) #---------------------------------------------------# P4_output = DarknetConv2D_BN_Leaky(512, (3, 3))(P4) P4_output = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(P4_output) # 26,26,256 -> 13,13,512 P4_downsample = ZeroPadding2D(((1, 0), (1, 0)))(P4) P4_downsample = DarknetConv2D_BN_Leaky(512, (3, 3), strides=(2, 2))(P4_downsample) # 13,13,512 + 13,13,512 -> 13,13,1024 P5 = Concatenate()([P4_downsample, P5]) # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 P5 = make_five_convs(P5, 512) #---------------------------------------------------# # 第一个特征层 # y1=(batch_size,13,13,3,85) #---------------------------------------------------# P5_output = DarknetConv2D_BN_Leaky(1024, (3, 3))(P5) P5_output = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(P5_output) return Model(inputs, [P5_output, P4_output, P3_output])
def yolo_body(inputs, num_anchors, num_classes): # Generate the backbone model of darknet53 feat1, feat2, feat3 = darknet_body(inputs) # First feature layer # y1=(batch_size,13,13,3,85) P5 = DarknetConv2D_BN_Leaky(512, (1, 1))(feat3) P5 = DarknetConv2D_BN_Leaky(1024, (3, 3))(P5) P5 = DarknetConv2D_BN_Leaky(512, (1, 1))(P5) # The SPP structure is used, that is, the maximum pooling of different scales is stacked。 maxpool1 = MaxPooling2D(pool_size=(13, 13), strides=(1, 1), padding='same')(P5) maxpool2 = MaxPooling2D(pool_size=(9, 9), strides=(1, 1), padding='same')(P5) maxpool3 = MaxPooling2D(pool_size=(5, 5), strides=(1, 1), padding='same')(P5) P5 = Concatenate()([maxpool1, maxpool2, maxpool3, P5]) P5 = DarknetConv2D_BN_Leaky(512, (1, 1))(P5) P5 = DarknetConv2D_BN_Leaky(1024, (3, 3))(P5) P5 = DarknetConv2D_BN_Leaky(512, (1, 1))(P5) P5_upsample = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(P5) P4 = DarknetConv2D_BN_Leaky(256, (1, 1))(feat2) P4 = Concatenate()([P4, P5_upsample]) P4 = make_five_convs(P4, 256) P4_upsample = compose(DarknetConv2D_BN_Leaky(128, (1, 1)), UpSampling2D(2))(P4) P3 = DarknetConv2D_BN_Leaky(128, (1, 1))(feat1) P3 = Concatenate()([P3, P4_upsample]) P3 = make_five_convs(P3, 128) P3_output = DarknetConv2D_BN_Leaky(256, (3, 3))(P3) P3_output = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(P3_output) # 26,26 output P3_downsample = ZeroPadding2D(((1, 0), (1, 0)))(P3) P3_downsample = DarknetConv2D_BN_Leaky(256, (3, 3), strides=(2, 2))(P3_downsample) P4 = Concatenate()([P3_downsample, P4]) P4 = make_five_convs(P4, 256) P4_output = DarknetConv2D_BN_Leaky(512, (3, 3))(P4) P4_output = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(P4_output) # 13,13 output P4_downsample = ZeroPadding2D(((1, 0), (1, 0)))(P4) P4_downsample = DarknetConv2D_BN_Leaky(512, (3, 3), strides=(2, 2))(P4_downsample) P5 = Concatenate()([P4_downsample, P5]) P5 = make_five_convs(P5, 512) P5_output = DarknetConv2D_BN_Leaky(1024, (3, 3))(P5) P5_output = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(P5_output) return Model(inputs, [P5_output, P4_output, P3_output])
def yolo_body(inputs, num_anchors, num_classes): # 生成darknet53的主干模型 feat1, feat2, feat3 = darknet_body(inputs) # 第一个特征层 # y1=(batch_size,13,13,3,85) 进行三次卷积 P5 = DarknetConv2D_BN_Leaky(512, (1, 1))(feat3) P5 = DarknetConv2D_BN_Leaky(1024, (3, 3))(P5) P5 = DarknetConv2D_BN_Leaky(512, (1, 1))(P5) # 使用了SPP结构,即不同尺度的最大池化后堆叠。 进行三次池化 maxpool1 = MaxPooling2D(pool_size=(13, 13), strides=(1, 1), padding='same')(P5) maxpool2 = MaxPooling2D(pool_size=(9, 9), strides=(1, 1), padding='same')(P5) maxpool3 = MaxPooling2D(pool_size=(5, 5), strides=(1, 1), padding='same')(P5) P5 = Concatenate()([maxpool1, maxpool2, maxpool3, P5]) # 进行堆叠 其中有一个特征层没有经过池化 P5 = DarknetConv2D_BN_Leaky(512, (1, 1))(P5) # 再进行三次池化 P5 = DarknetConv2D_BN_Leaky(1024, (3, 3))(P5) P5 = DarknetConv2D_BN_Leaky(512, (1, 1))(P5) # 从这里开始就是PANet的内容了 PANet对CSPDarknet53中的特征层进行融合,并对SPP结构的输出进行了特征融合 P5_upsample = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(P5) # 上采样 P4 = DarknetConv2D_BN_Leaky(256, (1, 1))(feat2) # 1*1的卷积改变维度 然后进行堆叠 P4 = Concatenate()([P4, P5_upsample]) P4 = make_five_convs(P4, 256) # 5次卷积 P4_upsample = compose(DarknetConv2D_BN_Leaky(128, (1, 1)), UpSampling2D(2))(P4) # 对P4进行上采样 P3 = DarknetConv2D_BN_Leaky(128, (1, 1))(feat1) # 对P4上采样的结果和feat1进行堆叠 P3 = Concatenate()([P3, P4_upsample]) P3 = make_five_convs(P3, 128) # 进行5次卷积 # 至此 已完成特征金字塔的结构 但是对于yolov4来说 还需要进行一系列的下采样,加强特征融合 提取到更有用的特征 # Yolo Head完成输出 P3_output = DarknetConv2D_BN_Leaky(256, (3, 3))(P3) # 3*3的卷积 可以看作是对之前特征的整合 # batch_size, 52*52, num_anchors=3(代表3个先验框), num_classes(表示先验框包含的物体的种类)+1(是否包含物体)+4(代表先验框的调整参数) P3_output = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(P3_output) # 1*1的卷积 可以看作是利用这个特征 获得最终的输出 # 26,26 output 利用卷积,进行步长为(2, 2)的下采样 P3_downsample = ZeroPadding2D(((1, 0), (1, 0)))(P3) P3_downsample = DarknetConv2D_BN_Leaky(256, (3, 3), strides=(2, 2))(P3_downsample) P4 = Concatenate()([P3_downsample, P4]) # 把下采样的结果和P4进行堆叠 P4 = make_five_convs(P4, 256) # Yolo Head完成输出 P4_output = DarknetConv2D_BN_Leaky(512, (3, 3))(P4) # batch_size, 26*26, num_anchors=3(代表3个先验框), num_classes(表示先验框包含的物体的种类)+1(是否包含物体)+4(代表先验框的调整参数) P4_output = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(P4_output) # 13,13 output 继续进行下采采样 P4_downsample = ZeroPadding2D(((1, 0), (1, 0)))(P4) P4_downsample = DarknetConv2D_BN_Leaky(512, (3, 3), strides=(2, 2))(P4_downsample) P5 = Concatenate()([P4_downsample, P5]) P5 = make_five_convs(P5, 512) # Yolo Head完成输出 P5_output = DarknetConv2D_BN_Leaky(1024, (3, 3))(P5) # batch_size, 13*13 # num_anchors=3(代表3个先验框), num_classes(表示先验框包含的物体的种类)+1(是否包含物体)+4(代表先验框的调整参数) P5_output = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(P5_output) return Model(inputs, [P5_output, P4_output, P3_output])