Beispiel #1
0
def efficientdet(phi, num_classes=20, num_anchors=9, weighted_bifpn=False, freeze_bn=False,
                 score_threshold=0.01, detect_quadrangle=False, anchor_parameters=None, separable_conv=True):
    assert phi in range(7)
    input_size = image_sizes[phi]
    input_shape = (input_size, input_size, 3)
    image_input = layers.Input(input_shape)
    w_bifpn = w_bifpns[phi]
    d_bifpn = d_bifpns[phi]
    w_head = w_bifpn
    d_head = d_heads[phi]
    backbone_cls = backbones[phi]
    features = backbone_cls(input_tensor=image_input, freeze_bn=freeze_bn)
    if weighted_bifpn:
        fpn_features = features
        for i in range(d_bifpn):
            fpn_features = build_wBiFPN(fpn_features, w_bifpn, i, freeze_bn=freeze_bn)
    else:
        fpn_features = features
        for i in range(d_bifpn):
            fpn_features = build_BiFPN(fpn_features, w_bifpn, i, freeze_bn=freeze_bn)
    box_net = BoxNet(w_head, d_head, num_anchors=num_anchors, separable_conv=separable_conv, freeze_bn=freeze_bn,
                     detect_quadrangle=detect_quadrangle, name='box_net')
    class_net = ClassNet(w_head, d_head, num_classes=num_classes, num_anchors=num_anchors,
                         separable_conv=separable_conv, freeze_bn=freeze_bn, name='class_net')
    classification = [class_net([feature, i]) for i, feature in enumerate(fpn_features)]
    classification = layers.Concatenate(axis=1, name='classification')(classification)
    regression = [box_net([feature, i]) for i, feature in enumerate(fpn_features)]
    regression = layers.Concatenate(axis=1, name='regression')(regression)

    model = models.Model(inputs=[image_input], outputs=[classification, regression], name='efficientdet')

    # apply predicted regression to anchors
    anchors = anchors_for_shape((input_size, input_size), anchor_params=anchor_parameters)
    anchors_input = np.expand_dims(anchors, axis=0)
    boxes = RegressBoxes(name='boxes')([anchors_input, regression[..., :4]])
    boxes = ClipBoxes(name='clipped_boxes')([image_input, boxes])

    # filter detections (apply NMS / score threshold / select top-k)
    if detect_quadrangle:
        detections = FilterDetections(
            name='filtered_detections',
            score_threshold=score_threshold,
            detect_quadrangle=True
        )([boxes, classification, regression[..., 4:8], regression[..., 8]])
    else:
        detections = FilterDetections(
            name='filtered_detections',
            score_threshold=score_threshold
        )([boxes, classification])

    prediction_model = models.Model(inputs=[image_input], outputs=detections, name='efficientdet_p')
    return model, prediction_model
Beispiel #2
0
def freeze_model(model, config, score_threshold=0.5):

    classification, regression = model.outputs

    anchors = anchors_for_shape(
        image_shape=config.input_shape,
        sizes=config.sizes,
        ratios=config.ratios,
        scales=config.scales,
        strides=config.strides,
        pyramid_levels=[3, 4, 5, 6, 7],
        shapes_callback=None,
    )
    # apply predicted regression to anchors
    anchors = tf.convert_to_tensor(anchors)
    anchors_input = tf.expand_dims(anchors, axis=0)
    boxes = RegressBoxes(name='boxes')([anchors_input, regression[..., :4]])
    boxes = ClipBoxes(name='clipped_boxes')([model.input, boxes])

    # filter detections (apply NMS / score threshold / select top-k)
    # if detect_quadrangle:
    #     detections = FilterDetections(
    #         name='filtered_detections',
    #         score_threshold=score_threshold,
    #         detect_quadrangle=True
    #     )([boxes, classification, regression[..., 4:8], regression[..., 8]])
    # else:
    detections = FilterDetections(name='filtered_detections',
                                  score_threshold=score_threshold)(
                                      [boxes, classification])

    prediction_model = models.Model(inputs=model.input,
                                    outputs=detections,
                                    name='efficientdet_p')
    return prediction_model
Beispiel #3
0
def efficientdet(phi,
                 num_classes=20,
                 weighted_bifpn=False,
                 freeze_bn=False,
                 score_threshold=0.01):
    assert phi in range(7)
    input_size = image_sizes[phi]
    input_shape = (input_size, input_size, 3)
    # input_shape = (None, None, 3)
    image_input = layers.Input(input_shape)
    w_bifpn = w_bifpns[phi]
    d_bifpn = 2 + phi
    w_head = w_bifpn
    d_head = 3 + int(phi / 3)
    backbone_cls = backbones[phi]
    # features = backbone_cls(include_top=False, input_shape=input_shape, weights=weights)(image_input)
    features = backbone_cls(input_tensor=image_input, freeze_bn=freeze_bn)
    if weighted_bifpn:
        for i in range(d_bifpn):
            features = build_wBiFPN(features, w_bifpn, i, freeze_bn=freeze_bn)
    else:
        for i in range(d_bifpn):
            features = build_BiFPN(features, w_bifpn, i, freeze_bn=freeze_bn)
    regress_head = build_regress_head(w_head, d_head)
    class_head = build_class_head(w_head, d_head, num_classes=num_classes)
    regression = [regress_head(feature) for feature in features]
    regression = layers.Concatenate(axis=1, name='regression')(regression)
    classification = [class_head(feature) for feature in features]
    classification = layers.Concatenate(axis=1,
                                        name='classification')(classification)

    model = models.Model(inputs=[image_input],
                         outputs=[regression, classification],
                         name='efficientdet')

    # apply predicted regression to anchors
    # anchors = tf.tile(tf.expand_dims(tf.constant(anchors), axis=0), (tf.shape(regression)[0], 1, 1))
    anchors_input = layers.Input((None, 4))
    boxes = RegressBoxes(name='boxes')([anchors_input, regression])
    boxes = ClipBoxes(name='clipped_boxes')([image_input, boxes])

    # filter detections (apply NMS / score threshold / select top-k)
    detections = FilterDetections(name='filtered_detections',
                                  score_threshold=score_threshold)(
                                      [boxes, classification])
    prediction_model = models.Model(inputs=[image_input, anchors_input],
                                    outputs=detections,
                                    name='efficientdet_p')
    return model, prediction_model
Beispiel #4
0
def efficientdet(num_anchors, num_classes, num_properties, w_bifpn, d_bifpn,
                 d_head, score_threshold, nms_threshold):
    image_input = layers.Input(shape=(None, None, 3))
    w_head = w_bifpn
    backbone_cls = backbones[0]
    # [(?, 256, 256, 16), (?, 128, 128, 24),(?, 64, 64, 24),(?, 32,32, 24),(?, 16, 16, 24)]
    features = backbone_cls(input_tensor=image_input)

    fpn_features = features
    for i in range(d_bifpn):
        fpn_features = build_wBiFPN(fpn_features, w_bifpn, i)
    reg = regression_coco(fpn_features, w_head, d_head, num_anchors)
    cls = classification_coco(fpn_features, w_head, d_head, num_anchors,
                              num_classes)
    pro = properties_sand(fpn_features, w_head, d_head, num_anchors,
                          num_properties)
    model = models.Model(inputs=[image_input],
                         outputs=[reg, cls, pro],
                         name='efficientdet')

    anchors_input = layers.Input((None, 4), name='anchors_input')
    boxes = RegressBoxes(name='boxes')([anchors_input, reg])
    boxes = ClipBoxes(name='clipped_boxes')([image_input, boxes])

    # filter detections (apply NMS / score threshold / select top-k)
    # boxes (?, 49104, 4) (?, 49104, 1) (?, 49104, 3)
    detections = FilterDetections(name='filtered_detections',
                                  score_threshold=score_threshold,
                                  nms_threshold=nms_threshold,
                                  class_specific_filter=True,
                                  max_detections=100)([boxes, cls, pro])

    prediction_model = models.Model(inputs=[image_input, anchors_input],
                                    outputs=detections,
                                    name='efficientdet_p')
    return model, prediction_model
Beispiel #5
0
def efficientdet_sand(num_anchors, num_classes, num_properties, w_bifpn,
                      d_bifpn, d_head, score_threshold, nms_threshold):
    image_input = layers.Input(shape=(None, None, 3))
    w_head = w_bifpn
    backbone_cls = backbones[0]
    # [(?, 256, 256, 16), (?, 128, 128, 24),(?, 64, 64, 24),(?, 32,32, 24),(?, 16, 16, 24)]
    features = backbone_cls(input_tensor=image_input)

    fpn_features = features
    for i in range(d_bifpn):
        fpn_features = build_wBiFPN(fpn_features, w_bifpn, i)
    reg = regression_coco(fpn_features, w_head, d_head, num_anchors)
    cls = classification_coco(fpn_features, w_head, d_head, num_anchors, 90)
    coco_model = models.Model(inputs=[image_input],
                              outputs=[reg, cls],
                              name='efficientdet_coco')
    path = os.path.join(os.path.dirname(__file__),
                        'weights/efficientdet-d0.h5')
    coco_model.load_weights(path, by_name=True)
    # for i in range(1, 227):  # 321
    #     coco_model.layers[i].trainable = False
    # coco_model.layers[i].training = False

    P3_out = coco_model.get_layer(
        name='fpn_cells/cell_2/fnode3/op_after_combine8/bn').output
    P4_td = coco_model.get_layer(
        name='fpn_cells/cell_2/fnode2/op_after_combine7/bn').output
    P5_td = coco_model.get_layer(
        name='fpn_cells/cell_2/fnode1/op_after_combine6/bn').output
    P6_td = coco_model.get_layer(
        name='fpn_cells/cell_2/fnode0/op_after_combine5/bn').output
    P7_out = coco_model.get_layer(
        name='fpn_cells/cell_2/fnode7/op_after_combine12/bn').output

    tmp_fpn_features = [P3_out, P4_td, P5_td, P6_td, P7_out]
    sand_reg = regression_sand(tmp_fpn_features, w_head, d_head, num_anchors)
    sand_cls = classification_sand(tmp_fpn_features, w_head, d_head,
                                   num_anchors, num_classes)
    sand_pro = properties_sand(tmp_fpn_features, w_head, d_head, num_anchors,
                               num_properties)
    sand_model = models.Model(inputs=[image_input],
                              outputs=[sand_reg, sand_cls, sand_pro],
                              name='efficientdet_sand')

    anchors_input = layers.Input((None, 4), name='anchors_input')
    boxes = RegressBoxes(name='boxes')([anchors_input, sand_reg])
    boxes = ClipBoxes(name='clipped_boxes')([image_input, boxes])

    # filter detections (apply NMS / score threshold / select top-k)
    # boxes (?, 49104, 4) (?, 49104, 1) (?, 49104, 3)
    detections = FilterDetections(
        name='filtered_detections',
        score_threshold=score_threshold,
        nms_threshold=nms_threshold,
        class_specific_filter=True,
        max_detections=100)([boxes, sand_cls, sand_pro])

    prediction_model = models.Model(inputs=[image_input, anchors_input],
                                    outputs=detections,
                                    name='efficientdet_p')

    return sand_model, prediction_model
Beispiel #6
0
def sapd(
    phi,
    soft_select=False,
    num_classes=20,
    freeze_bn=False,
    max_gt_boxes=100,
    batch_size=32,
    score_threshold=0.01,
):
    assert phi in range(7)
    image_size = image_sizes[phi]
    input_shape = (image_size, image_size, 3)
    # input_shape = (None, None, 3)
    image_input = layers.Input(input_shape)
    gt_boxes_input = layers.Input((max_gt_boxes, 5))
    num_gt_boxes_input = layers.Input((1, ), dtype='int32')
    fm_shapes_input = layers.Input((5, 2), dtype='int32')

    backbone_cls = backbones[phi]
    # (C1, C2, C3, C4, C5)
    features = backbone_cls(input_tensor=image_input, freeze_bn=freeze_bn)
    w_bifpn = w_bifpns[phi]
    d_bifpn = 2 + phi
    w_head = w_bifpn
    d_head = 3 + int(phi / 3)
    for i in range(d_bifpn):
        features = build_BiFPN(features, w_bifpn, i, freeze_bn=freeze_bn)
    regr_head = build_regress_head(w_head, d_head)
    cls_head = build_class_head(w_head, d_head, num_classes=num_classes)
    pyramid_features = features
    fpn_width = w_head
    cls_pred = [cls_head(feature) for feature in pyramid_features]
    cls_pred = layers.Concatenate(axis=1, name='classification')(cls_pred)
    regr_pred = [regr_head(feature) for feature in pyramid_features]
    regr_pred = layers.Concatenate(axis=1, name='regression')(regr_pred)

    # meta select net
    meta_select_net = build_meta_select_net(width=fpn_width)
    meta_select_input, gt_boxes_batch_ids = MetaSelectInput()(
        [gt_boxes_input, *pyramid_features])
    meta_select_pred = meta_select_net(meta_select_input)
    meta_select_target = MetaSelectTarget()(
        [cls_pred, regr_pred, fm_shapes_input, gt_boxes_input])
    # # lambda == 0.1 in paper
    meta_select_loss = layers.Lambda(
        lambda x: 0.1 * losses.sparse_categorical_crossentropy(x[0], x[1]),
        output_shape=(1, ),
        name="meta_select_loss")([meta_select_target, meta_select_pred])

    if soft_select:
        meta_select_weight = MetaSelectWeight(
            max_gt_boxes=max_gt_boxes,
            soft_select=soft_select,
            batch_size=batch_size,
        )([meta_select_pred, gt_boxes_batch_ids, num_gt_boxes_input])
    else:
        meta_select_weight = MetaSelectWeight(
            max_gt_boxes=max_gt_boxes,
            soft_select=soft_select,
            batch_size=batch_size,
        )([meta_select_target, gt_boxes_batch_ids, num_gt_boxes_input])

    cls_target, regr_target = SAPDTarget(num_classes=num_classes)(
        [fm_shapes_input, gt_boxes_input, meta_select_weight])

    focal_loss = focal_with_weight_and_mask()
    iou_loss = iou_with_weight_and_mask()
    cls_loss = layers.Lambda(focal_loss, output_shape=(1, ),
                             name="cls_loss")([cls_target, cls_pred])
    regr_loss = layers.Lambda(iou_loss, output_shape=(1, ),
                              name="regr_loss")([regr_target, regr_pred])

    model = models.Model(inputs=[
        image_input, gt_boxes_input, num_gt_boxes_input, fm_shapes_input
    ],
                         outputs=[
                             cls_loss, regr_loss, meta_select_loss, cls_pred,
                             regr_pred, cls_target, regr_target
                         ],
                         name='sapd')

    locations, strides = Locations()(pyramid_features)

    # apply predicted regression to anchors
    boxes = RegressBoxes(name='boxes')([locations, strides, regr_pred])
    boxes = ClipBoxes(name='clipped_boxes')([image_input, boxes])

    # filter detections (apply NMS / score threshold / select top-k)
    detections = FilterDetections(name='filtered_detections',
                                  score_threshold=score_threshold)(
                                      [boxes, cls_pred])

    prediction_model = models.Model(inputs=[image_input],
                                    outputs=detections,
                                    name='sapd_p')

    return model, prediction_model
Beispiel #7
0
def build_EfficientPose(phi,
                        num_classes=8,
                        num_anchors=9,
                        freeze_bn=False,
                        score_threshold=0.5,
                        anchor_parameters=None,
                        num_rotation_parameters=3,
                        print_architecture=True):
    """
    Builds an EfficientPose model
    Args:
        phi: EfficientPose scaling hyperparameter phi
        num_classes: Number of classes,
        num_anchors: The number of anchors, usually 3 scales and 3 aspect ratios resulting in 3 * 3 = 9 anchors
        freeze_bn: Boolean indicating if the batch norm layers should be freezed during training or not.
        score_threshold: Minimum score threshold at which a prediction is not filtered out
        anchor_parameters: Struct containing anchor parameters. If None, default values are used.
        num_rotation_parameters: Number of rotation parameters, e.g. 3 for axis angle representation
        print_architecture: Boolean indicating if the model architecture should be printed or not
    
    Returns:
        efficientpose_train: EfficientPose model without NMS used for training
        efficientpose_prediction: EfficientPose model including NMS used for evaluating and inferencing
        all_layers: List of all layers in the EfficientPose model to load weights. Otherwise it can happen that a subnet is considered as a single unit when loading weights and if the output dimension doesn't match with the weight file, the whole subnet weight loading is skipped
    """

    #select parameters according to the given phi
    assert phi in range(7)
    scaled_parameters = get_scaled_parameters(phi)

    input_size = scaled_parameters["input_size"]
    input_shape = (input_size, input_size, 3)
    bifpn_width = subnet_width = scaled_parameters["bifpn_width"]
    bifpn_depth = scaled_parameters["bifpn_depth"]
    subnet_depth = scaled_parameters["subnet_depth"]
    subnet_num_iteration_steps = scaled_parameters[
        "subnet_num_iteration_steps"]
    num_groups_gn = scaled_parameters["num_groups_gn"]
    backbone_class = scaled_parameters["backbone_class"]

    #input layers
    image_input = layers.Input(input_shape)
    camera_parameters_input = layers.Input(
        (6, )
    )  #camera parameters and image scale for calculating the translation vector from 2D x-, y-coordinates

    #build EfficientNet backbone
    backbone_feature_maps = backbone_class(input_tensor=image_input,
                                           freeze_bn=freeze_bn)

    #build BiFPN
    fpn_feature_maps = build_BiFPN(backbone_feature_maps, bifpn_depth,
                                   bifpn_width, freeze_bn)

    #build subnets
    box_net, class_net, rotation_net, translation_net = build_subnets(
        num_classes, subnet_width, subnet_depth, subnet_num_iteration_steps,
        num_groups_gn, num_rotation_parameters, freeze_bn, num_anchors)

    #apply subnets to feature maps
    classification, bbox_regression, rotation, translation, transformation, bboxes = apply_subnets_to_feature_maps(
        box_net, class_net, rotation_net, translation_net, fpn_feature_maps,
        image_input, camera_parameters_input, input_size, anchor_parameters)

    #get the EfficientPose model for training without NMS and the rotation and translation output combined in the transformation output because of the loss calculation
    efficientpose_train = models.Model(
        inputs=[image_input, camera_parameters_input],
        outputs=[classification, bbox_regression, transformation],
        name='efficientpose')

    # filter detections (apply NMS / score threshold / select top-k)
    filtered_detections = FilterDetections(
        num_rotation_parameters=num_rotation_parameters,
        num_translation_parameters=3,
        name='filtered_detections',
        score_threshold=score_threshold)(
            [bboxes, classification, rotation, translation])

    efficientpose_prediction = models.Model(
        inputs=[image_input, camera_parameters_input],
        outputs=filtered_detections,
        name='efficientpose_prediction')

    if print_architecture:
        print_models(efficientpose_train, box_net, class_net, rotation_net,
                     translation_net)

    #create list with all layers to be able to load all layer weights because sometimes the whole subnet weight loading is skipped if the output shape does not match instead of skipping just the output layer
    all_layers = list(
        set(efficientpose_train.layers + box_net.layers + class_net.layers +
            rotation_net.layers + translation_net.layers))

    return efficientpose_train, efficientpose_prediction, all_layers
def yolo_body(num_classes=20, score_threshold=0.01):
    """
    Create YOLO_V3 model CNN body in Keras.

    Args:
        num_classes:
        score_threshold:

    Returns:

    """
    image_input = Input(shape=(None, None, 3), name='image_input')
    darknet = Model([image_input], darknet_body(image_input))
    ##################################################
    # build fsaf head
    ##################################################
    x, y1 = make_last_layers(darknet.output, 512, 4 + num_classes)

    x = compose(darknet_conv2d_bn_leaky(256, (1, 1)), UpSampling2D(2))(x)
    x = Concatenate()([x, darknet.layers[152].output])
    x, y2 = make_last_layers(x, 256, 4 + num_classes)
    x = compose(darknet_conv2d_bn_leaky(128, (1, 1)), UpSampling2D(2))(x)
    x = Concatenate()([x, darknet.layers[92].output])
    x, y3 = make_last_layers(x, 128, 4 + num_classes)
    y1_ = Reshape((-1, 4 + num_classes))(y1)
    y2_ = Reshape((-1, 4 + num_classes))(y2)
    y3_ = Reshape((-1, 4 + num_classes))(y3)
    y = Concatenate(axis=1)([y1_, y2_, y3_])
    batch_cls_pred = Lambda(lambda x: x[..., 4:])(y)
    batch_regr_pred = Lambda(lambda x: x[..., :4])(y)
    batch_cls_pred = Activation('sigmoid')(batch_cls_pred)
    batch_regr_pred = Activation('relu')(batch_regr_pred)

    gt_boxes_input = Input(shape=(config.MAX_NUM_GT_BOXES, 5),
                           name='gt_boxes_input')
    grid_shapes_input = Input((len(config.STRIDES), 2),
                              dtype='int32',
                              name='grid_shapes_input')
    batch_gt_box_levels = LevelSelect(name='level_select')(
        [batch_cls_pred, batch_regr_pred, grid_shapes_input, gt_boxes_input])
    batch_cls_target, batch_cls_mask, batch_cls_num_pos, batch_regr_target, batch_regr_mask = FSAFTarget(
        num_classes=num_classes, name='fsaf_target')(
            [batch_gt_box_levels, grid_shapes_input, gt_boxes_input])
    focal_loss_graph = focal_with_mask()
    iou_loss_graph = iou_with_mask()
    cls_loss = Lambda(focal_loss_graph, output_shape=(1, ), name="cls_loss")(
        [batch_cls_target, batch_cls_pred, batch_cls_mask, batch_cls_num_pos])
    regr_loss = Lambda(iou_loss_graph, output_shape=(1, ), name="regr_loss")(
        [batch_regr_target, batch_regr_pred, batch_regr_mask])
    model = Model(inputs=[image_input, gt_boxes_input, grid_shapes_input],
                  outputs=[cls_loss, regr_loss],
                  name='fsaf')

    # compute the anchors
    features = [y1, y2, y3]

    locations, strides = Locations(strides=config.STRIDES)(features)

    # apply predicted regression to anchors
    boxes = RegressBoxes(name='boxes')([locations, strides, batch_regr_pred])
    boxes = ClipBoxes(name='clipped_boxes')([image_input, boxes])

    # filter detections (apply NMS / score threshold / select top-k)
    detections = FilterDetections(
        nms=True,
        class_specific_filter=True,
        name='filtered_detections',
        score_threshold=score_threshold)([boxes, batch_cls_pred])

    prediction_model = Model(inputs=image_input,
                             outputs=detections,
                             name='fsaf_detection')
    return model, prediction_model