Example #1
0
def csp_block(inputs, filters, num_blocks):
    """
    Create a CSPBlock which applies the following scheme to the input (N, H, W, C):
        - the first part (N, H, W, C // 2) goes into a series of residual connection
        - the second part is directly concatenated to the output of the previous operation

    Args:
        inputs (tf.Tensor): 4D (N,H,W,C) input tensor
        filters (int): Number of filters to use
        num_blocks (int): Number of residual blocks to apply

    Returns:
        tf.Tensor: 4D (N,H/2,W/2,filters) output tensor
    """
    half_filters = filters // 2

    x = conv_bn(
        inputs,
        filters=filters,
        kernel_size=3,
        strides=2,
        zero_pad=True,
        padding="valid",
        activation="mish",
    )
    route = conv_bn(x,
                    filters=half_filters,
                    kernel_size=1,
                    strides=1,
                    activation="mish")
    x = conv_bn(x,
                filters=half_filters,
                kernel_size=1,
                strides=1,
                activation="mish")

    x = residual_block(x, num_blocks=num_blocks)
    x = conv_bn(x,
                filters=half_filters,
                kernel_size=1,
                strides=1,
                activation="mish")
    x = tf.keras.layers.Concatenate()([x, route])

    x = conv_bn(x,
                filters=filters,
                kernel_size=1,
                strides=1,
                activation="mish")

    return x
Example #2
0
def csp_darknet53(input_shape):
    """
    CSPDarknet53 implementation based on AlexeyAB/darknet config

    https://github.com/AlexeyAB/darknet/blob/master/cfg/yolov4.cfg
    """
    inputs = tf.keras.Input(shape=input_shape)

    # First downsampling: L29 -> L103
    x = conv_bn(inputs,
                filters=32,
                kernel_size=3,
                strides=1,
                activation="mish")

    # This block could be expressed as a CSPBlock with modification of num_filters in the middle
    # For readability purpose, we chose to keep the CSPBlock as simple as possible and have a little redondancy
    x = conv_bn(
        x,
        filters=64,
        kernel_size=3,
        strides=2,
        zero_pad=True,
        padding="valid",
        activation="mish",
    )
    route = conv_bn(x, filters=64, kernel_size=1, strides=1, activation="mish")

    shortcut = conv_bn(x,
                       filters=64,
                       kernel_size=1,
                       strides=1,
                       activation="mish")
    x = conv_bn(shortcut,
                filters=32,
                kernel_size=1,
                strides=1,
                activation="mish")
    x = conv_bn(x, filters=64, kernel_size=3, strides=1, activation="mish")

    x = x + shortcut
    x = conv_bn(x, filters=64, kernel_size=1, strides=1, activation="mish")
    x = tf.keras.layers.Concatenate()([x, route])
    x = conv_bn(x, filters=64, kernel_size=1, strides=1, activation="mish")

    # Second downsampling: L105 -> L191
    x = csp_block(x, filters=128, num_blocks=2)

    # Third downsampling: L193 -> L400
    output_1 = csp_block(x, filters=256, num_blocks=8)

    # Fourth downsampling: L402 -> L614
    output_2 = csp_block(output_1, filters=512, num_blocks=8)

    # Fifth downsampling: L616 -> L744
    output_3 = csp_block(output_2, filters=1024, num_blocks=4)

    return tf.keras.Model(inputs, [output_1, output_2, output_3],
                          name="CSPDarknet53")
Example #3
0
def residual_block(inputs, num_blocks):
    """
    Applies several residual connections.

    Args:
        inputs (tf.Tensor): 4D (N,H,W,C) input tensor
        num_blocks (int): Number of residual blocks

    Returns:
        tf.Tensor: 4D (N,H,W,C) output Tensor
    """
    _, _, _, filters = inputs.shape
    x = inputs
    for _ in range(num_blocks):
        block_inputs = x
        x = conv_bn(x, filters, kernel_size=1, strides=1, activation="mish")
        x = conv_bn(x, filters, kernel_size=3, strides=1, activation="mish")

        x = x + block_inputs

    return x
Example #4
0
def yolov4_neck(input_shapes):
    """
    Implements the neck of YOLOv4, including the SPP and the modified PAN.

    Args:
        input_shapes (List[Tuple[int]]): List of 3 tuples, which are the output shapes of the backbone.
            For CSPDarknet53, those are: [(52, 52, 256), (26, 26, 512), (13, 13, 1024)] for a (416, 416) input.

    Returns:
        tf.keras.Model: Neck model
    """
    input_1 = tf.keras.Input(shape=filter(None, input_shapes[0]))
    input_2 = tf.keras.Input(shape=filter(None, input_shapes[1]))
    input_3 = tf.keras.Input(shape=filter(None, input_shapes[2]))

    x = conv_bn(input_3, filters=512, kernel_size=1, strides=1, activation="leaky_relu")
    x = conv_bn(x, filters=1024, kernel_size=3, strides=1, activation="leaky_relu")
    x = conv_bn(x, filters=512, kernel_size=1, strides=1, activation="leaky_relu")

    maxpool_1 = tf.keras.layers.MaxPool2D((5, 5), strides=1, padding="same")(x)
    maxpool_2 = tf.keras.layers.MaxPool2D((9, 9), strides=1, padding="same")(x)
    maxpool_3 = tf.keras.layers.MaxPool2D((13, 13), strides=1, padding="same")(x)

    spp = tf.keras.layers.Concatenate()([maxpool_3, maxpool_2, maxpool_1, x])

    x = conv_bn(spp, filters=512, kernel_size=1, strides=1, activation="leaky_relu")
    x = conv_bn(x, filters=1024, kernel_size=3, strides=1, activation="leaky_relu")
    output_3 = conv_bn(
        x, filters=512, kernel_size=1, strides=1, activation="leaky_relu"
    )
    x = conv_bn(
        output_3, filters=256, kernel_size=1, strides=1, activation="leaky_relu"
    )

    upsampled = tf.keras.layers.UpSampling2D()(x)

    x = conv_bn(input_2, filters=256, kernel_size=1, strides=1, activation="leaky_relu")
    x = tf.keras.layers.Concatenate()([x, upsampled])

    x = conv_bn(x, filters=256, kernel_size=1, strides=1, activation="leaky_relu")
    x = conv_bn(x, filters=512, kernel_size=3, strides=1, activation="leaky_relu")
    x = conv_bn(x, filters=256, kernel_size=1, strides=1, activation="leaky_relu")
    x = conv_bn(x, filters=512, kernel_size=3, strides=1, activation="leaky_relu")
    output_2 = conv_bn(
        x, filters=256, kernel_size=1, strides=1, activation="leaky_relu"
    )
    x = conv_bn(
        output_2, filters=128, kernel_size=1, strides=1, activation="leaky_relu"
    )

    upsampled = tf.keras.layers.UpSampling2D()(x)

    x = conv_bn(input_1, filters=128, kernel_size=1, strides=1, activation="leaky_relu")
    x = tf.keras.layers.Concatenate()([x, upsampled])

    x = conv_bn(x, filters=128, kernel_size=1, strides=1, activation="leaky_relu")
    x = conv_bn(x, filters=256, kernel_size=3, strides=1, activation="leaky_relu")
    x = conv_bn(x, filters=128, kernel_size=1, strides=1, activation="leaky_relu")
    x = conv_bn(x, filters=256, kernel_size=3, strides=1, activation="leaky_relu")
    output_1 = conv_bn(
        x, filters=128, kernel_size=1, strides=1, activation="leaky_relu"
    )

    return tf.keras.Model(
        [input_1, input_2, input_3], [output_1, output_2, output_3], name="YOLOv4_neck"
    )
def yolov3_head(
    input_shapes,
    anchors,
    num_classes,
    training,
    yolo_max_boxes,
    yolo_iou_threshold,
    yolo_score_threshold,
):
    """
    Returns the YOLOv3 head, which is used in YOLOv4

    Args:
        input_shapes (List[Tuple[int]]): List of 3 tuples, which are the output shapes of the neck.
            None dimensions are ignored.
            For CSPDarknet53+YOLOv4_neck, those are: [ (52, 52, 128), (26, 26, 256), (13, 13, 512)] for a (416,
            416) input.
        anchors (List[numpy.array[int, 2]]): List of 3 numpy arrays containing the anchor sizes used for each stage.
            The first and second columns of the numpy arrays respectively contain the anchors width and height.
        num_classes (int): Number of classes.
        training (boolean): If False, will output boxes computed through YOLO regression and NMS, and YOLO features
            otherwise. Set it True for training, and False for inferences.
        yolo_max_boxes (int): Maximum number of boxes predicted on each image (across all anchors/stages)
        yolo_iou_threshold (float between 0. and 1.): IOU threshold defining whether close boxes will be merged
            during non max regression.
        yolo_score_threshold (float between 0. and 1.): Boxes with score lower than this threshold will be filtered
            out during non max regression.
    Returns:
        tf.keras.Model: Head model
    """
    input_1 = tf.keras.Input(shape=filter(None, input_shapes[0]))
    input_2 = tf.keras.Input(shape=filter(None, input_shapes[1]))
    input_3 = tf.keras.Input(shape=filter(None, input_shapes[2]))

    x = conv_bn(input_1,
                filters=256,
                kernel_size=3,
                strides=1,
                activation="leaky_relu")
    output_1 = conv_classes_anchors(x,
                                    num_anchors_stage=len(anchors[0]),
                                    num_classes=num_classes)

    x = conv_bn(
        input_1,
        filters=256,
        kernel_size=3,
        strides=2,
        zero_pad=True,
        padding="valid",
        activation="leaky_relu",
    )
    x = tf.keras.layers.Concatenate()([x, input_2])
    x = conv_bn(x,
                filters=256,
                kernel_size=1,
                strides=1,
                activation="leaky_relu")
    x = conv_bn(x,
                filters=512,
                kernel_size=3,
                strides=1,
                activation="leaky_relu")
    x = conv_bn(x,
                filters=256,
                kernel_size=1,
                strides=1,
                activation="leaky_relu")
    x = conv_bn(x,
                filters=512,
                kernel_size=3,
                strides=1,
                activation="leaky_relu")
    connection = conv_bn(x,
                         filters=256,
                         kernel_size=1,
                         strides=1,
                         activation="leaky_relu")
    x = conv_bn(connection,
                filters=512,
                kernel_size=3,
                strides=1,
                activation="leaky_relu")
    output_2 = conv_classes_anchors(x,
                                    num_anchors_stage=len(anchors[1]),
                                    num_classes=num_classes)

    x = conv_bn(
        connection,
        filters=512,
        kernel_size=3,
        strides=2,
        zero_pad=True,
        padding="valid",
        activation="leaky_relu",
    )
    x = tf.keras.layers.Concatenate()([x, input_3])
    x = conv_bn(x,
                filters=512,
                kernel_size=1,
                strides=1,
                activation="leaky_relu")
    x = conv_bn(x,
                filters=1024,
                kernel_size=3,
                strides=1,
                activation="leaky_relu")
    x = conv_bn(x,
                filters=512,
                kernel_size=1,
                strides=1,
                activation="leaky_relu")
    x = conv_bn(x,
                filters=1024,
                kernel_size=3,
                strides=1,
                activation="leaky_relu")
    x = conv_bn(x,
                filters=512,
                kernel_size=1,
                strides=1,
                activation="leaky_relu")
    x = conv_bn(x,
                filters=1024,
                kernel_size=3,
                strides=1,
                activation="leaky_relu")
    output_3 = conv_classes_anchors(x,
                                    num_anchors_stage=len(anchors[2]),
                                    num_classes=num_classes)

    if training:
        return tf.keras.Model(
            [input_1, input_2, input_3],
            [output_1, output_2, output_3],
            name="YOLOv3_head",
        )

    predictions_1 = tf.keras.layers.Lambda(
        lambda x_input: yolov3_boxes_regression(x_input, anchors[0]),
        name="yolov3_boxes_regression_small_scale",
    )(output_1)
    predictions_2 = tf.keras.layers.Lambda(
        lambda x_input: yolov3_boxes_regression(x_input, anchors[1]),
        name="yolov3_boxes_regression_medium_scale",
    )(output_2)
    predictions_3 = tf.keras.layers.Lambda(
        lambda x_input: yolov3_boxes_regression(x_input, anchors[2]),
        name="yolov3_boxes_regression_large_scale",
    )(output_3)

    output = tf.keras.layers.Lambda(
        lambda x_input: yolo_nms(
            x_input,
            yolo_max_boxes=yolo_max_boxes,
            yolo_iou_threshold=yolo_iou_threshold,
            yolo_score_threshold=yolo_score_threshold,
        ),
        name="yolov4_nms",
    )([predictions_1, predictions_2, predictions_3])

    return tf.keras.Model([input_1, input_2, input_3],
                          output,
                          name="YOLOv3_head")