Esempio n. 1
0
def retinanet_mask(inputs,
                   backbone_dict,
                   num_classes,
                   frames_per_batch=1,
                   backbone_levels=['C3', 'C4', 'C5'],
                   pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'],
                   retinanet_model=None,
                   anchor_params=None,
                   nms=True,
                   panoptic=False,
                   class_specific_filter=True,
                   crop_size=(14, 14),
                   mask_size=(28, 28),
                   name='retinanet-mask',
                   roi_submodels=None,
                   max_detections=100,
                   score_threshold=0.05,
                   nms_threshold=0.5,
                   mask_dtype=K.floatx(),
                   **kwargs):
    """Construct a RetinaNet mask model on top of a retinanet bbox model.
    Uses the retinanet bbox model and appends layers to compute masks.

    Args:
        inputs (tensor): List of tensorflow.keras.layers.Input.
            The first input is the image, the second input the blob of masks.
        backbone_dict (dict): A dictionary with the backbone layers.
        num_classes (int): Integer, number of classes to classify.
        frames_per_batch (int): Size of z axis in generated batches.
            If equal to 1, assumes 2D data.
        backbone_levels (list): The backbone levels to be used.
            to create the feature pyramid. Defaults to ['C3', 'C4', 'C5'].
        pyramid_levels (list): The pyramid levels to attach regression and
            classification heads to. Defaults to ['P3', 'P4', 'P5', 'P6', 'P7'].
        retinanet_model (tensorflow.keras.Model): RetinaNet model that predicts
            regression and classification values.
        anchor_params (AnchorParameters): Struct containing anchor parameters.
        nms (bool): Whether to use non-maximum suppression
            for the filtering step.
        panoptic (bool): Flag for adding the semantic head for panoptic
            segmentation tasks. Defaults to false.
        class_specific_filter (bool): Use class specific filtering.
        crop_size (tuple): 2-length tuple for the x-y size of the crops.
            Used to create default roi_submodels.
        mask_size (tuple): 2-length tuple for the x-y size of the masks.
            Used to create default roi_submodels.
        name (str): Name of the model.
        roi_submodels (list): Submodels for processing ROIs.
        max_detections (int): The maximum number of detections allowed.
        score_threshold (float): Minimum score for the FilterDetections layer.
        nms_threshold (float): Minimimum NMS for the FilterDetections layer.
        mask_dtype (str): Dtype to use for mask tensors.
        kwargs (dict): Additional kwargs to pass to the retinanet bbox model.

    Returns:
        tensorflow.keras.Model: Model with inputs as input and as output
            the output of each submodel for each pyramid level and the
            detections. The order is as defined in submodels.

            ```
            [
                regression, classification, other[0], ...,
                boxes_masks, boxes, scores, labels, masks, other[0], ...
            ]
            ```

    """
    if anchor_params is None:
        anchor_params = AnchorParameters.default

    if roi_submodels is None:
        retinanet_dtype = K.floatx()
        K.set_floatx(mask_dtype)
        roi_submodels = default_roi_submodels(num_classes, crop_size,
                                              mask_size, frames_per_batch,
                                              mask_dtype, retinanet_dtype)
        K.set_floatx(retinanet_dtype)

    image = inputs
    image_shape = Shape()(image)

    if retinanet_model is None:
        retinanet_model = retinanet(inputs=image,
                                    backbone_dict=backbone_dict,
                                    num_classes=num_classes,
                                    backbone_levels=backbone_levels,
                                    pyramid_levels=pyramid_levels,
                                    panoptic=panoptic,
                                    num_anchors=anchor_params.num_anchors(),
                                    frames_per_batch=frames_per_batch,
                                    **kwargs)

    # parse outputs
    regression = retinanet_model.outputs[0]
    classification = retinanet_model.outputs[1]

    if panoptic:
        # Determine the number of semantic heads
        n_semantic_heads = len([
            1 for layer in retinanet_model.layers if 'semantic' in layer.name
        ])

        # The  panoptic output should not be sent to filter detections
        other = retinanet_model.outputs[2:-n_semantic_heads]
        semantic = retinanet_model.outputs[-n_semantic_heads:]
    else:
        other = retinanet_model.outputs[2:]

    features = [
        retinanet_model.get_layer(name).output for name in pyramid_levels
    ]

    # build boxes
    anchors = __build_anchors(anchor_params,
                              features,
                              frames_per_batch=frames_per_batch)
    boxes = RegressBoxes(name='boxes')([anchors, regression])
    boxes = ClipBoxes(name='clipped_boxes')([image, boxes])

    # filter detections (apply NMS / score threshold / select top-k)
    detections = FilterDetections(
        nms=nms,
        nms_threshold=nms_threshold,
        score_threshold=score_threshold,
        class_specific_filter=class_specific_filter,
        max_detections=max_detections,
        name='filtered_detections')([boxes, classification] + other)

    # split up in known outputs and "other"
    boxes = detections[0]
    scores = detections[1]

    # get the region of interest features
    #
    # roi_input = [image_shape, boxes, classification] + features
    # rois = _RoiAlign(crop_size=crop_size)(roi_input)

    fpn = features[0]
    fpn = UpsampleLike()([fpn, image])
    rois = RoiAlign(crop_size=crop_size)([boxes, fpn])

    # execute maskrcnn submodels
    maskrcnn_outputs = [submodel(rois) for _, submodel in roi_submodels]

    # concatenate boxes for loss computation
    trainable_outputs = [
        ConcatenateBoxes(name=name)([boxes, output])
        for (name, _), output in zip(roi_submodels, maskrcnn_outputs)
    ]

    # reconstruct the new output
    outputs = [regression, classification] + other + trainable_outputs + \
        detections + maskrcnn_outputs

    if panoptic:
        outputs += list(semantic)

    model = Model(inputs=inputs, outputs=outputs, name=name)
    model.backbone_levels = backbone_levels
    model.pyramid_levels = pyramid_levels

    return model
Esempio n. 2
0
def retinanet_mask(inputs,
                   num_classes,
                   retinanet_model=None,
                   anchor_params=None,
                   nms=True,
                   class_specific_filter=True,
                   crop_size=(14, 14),
                   mask_size=(28, 28),
                   name='retinanet-mask',
                   roi_submodels=None,
                   mask_dtype=K.floatx(),
                   **kwargs):
    """Construct a RetinaNet mask model on top of a retinanet bbox model.
    Uses the retinanet bbox model and appends layers to compute masks.
    Args:
        inputs: List of tensorflow.keras.layers.Input.
            The first input is the image, the second input the blob of masks.
        num_classes: Integer, number of classes to classify.
        retinanet_model: deepcell.model_zoo.retinanet.retinanet model,
            returning regression and classification values.
        anchor_params: Struct containing anchor parameters.
        nms: Boolean, whether to use NMS.
        class_specific_filter: Boolean, use class specific filtering.
        roi_submodels: Submodels for processing ROIs.
        mask_dtype: Data type of the masks, can be different from the main one.
        name: Name of the model.
        **kwargs: Additional kwargs to pass to the retinanet bbox model.
    Returns:
        Model with inputs as input and as output the output of each submodel
        for each pyramid level and the detections. The order is as defined in
        submodels.
        ```
        [
            regression, classification, other[0], other[1], ...,
            boxes_masks, boxes, scores, labels, masks, other[0], other[1], ...
        ]
        ```
    """
    if anchor_params is None:
        anchor_params = AnchorParameters.default

    if roi_submodels is None:
        retinanet_dtype = K.floatx()
        K.set_floatx(mask_dtype)
        roi_submodels = default_roi_submodels(num_classes, crop_size,
                                              mask_size, mask_dtype,
                                              retinanet_dtype)
        K.set_floatx(retinanet_dtype)

    image = inputs
    image_shape = Shape()(image)

    if retinanet_model is None:
        retinanet_model = retinanet(inputs=image,
                                    num_classes=num_classes,
                                    num_anchors=anchor_params.num_anchors(),
                                    **kwargs)

    # parse outputs
    regression = retinanet_model.outputs[0]
    classification = retinanet_model.outputs[1]
    other = retinanet_model.outputs[2:]
    features = [
        retinanet_model.get_layer(name).output
        for name in ['P3', 'P4', 'P5', 'P6', 'P7']
    ]

    # build boxes
    anchors = __build_anchors(anchor_params, features)
    boxes = RegressBoxes(name='boxes')([anchors, regression])
    boxes = ClipBoxes(name='clipped_boxes')([image, boxes])

    # filter detections (apply NMS / score threshold / select top-k)
    detections = FilterDetections(
        nms=nms,
        class_specific_filter=class_specific_filter,
        max_detections=100,
        name='filtered_detections')([boxes, classification] + other)

    # split up in known outputs and "other"
    boxes = detections[0]
    scores = detections[1]

    # get the region of interest features
    roi_input = [image_shape, boxes, classification] + features
    rois = RoiAlign(crop_size=crop_size)(roi_input)

    # execute maskrcnn submodels
    maskrcnn_outputs = [submodel(rois) for _, submodel in roi_submodels]

    # concatenate boxes for loss computation
    trainable_outputs = [
        ConcatenateBoxes(name=name)([boxes, output])
        for (name, _), output in zip(roi_submodels, maskrcnn_outputs)
    ]

    # reconstruct the new output
    outputs = [regression, classification] + other + trainable_outputs + \
        detections + maskrcnn_outputs

    return Model(inputs=inputs, outputs=outputs, name=name)