예제 #1
0
def retinanet(inputs,
              backbone_dict,
              num_classes,
              backbone_levels=['C3', 'C4', 'C5'],
              pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'],
              num_anchors=None,
              create_pyramid_features=__create_pyramid_features,
              create_semantic_head=__create_semantic_head,
              panoptic=False,
              num_semantic_heads=1,
              num_semantic_classes=[3],
              submodels=None,
              name='retinanet'):
    """Construct a RetinaNet model on top of a backbone.

    This model is the minimum model necessary for training
    (with the unfortunate exception of anchors as output).

    Args:
        inputs (tensor): The inputs to the network.
        backbone_dict (dict): A dictionary with the backbone layers
        backbone_levels (list): The backbone levels to be used
            to create the feature pyramid. Defaults to ['C3', 'C4', 'C5']
        pyramid_levels (list): The pyramid levels to attach regression and
            classification heads to. Defaults to ['P3', 'P4', 'P5', 'P6', 'P7']
        num_classes (int): Number of classes to classify.
        num_anchors (int): Number of base anchors.
        create_pyramid_features (function): Function to create pyramid features.
        create_symantic_head (function): Function for creating a semantic head,
            which can be used for panoptic segmentation tasks
        panoptic (bool): Flag for adding the semantic head for panoptic
            segmentation tasks. Defaults to false.
        num_semantic_classes (int): The number of classes for the semantic
            segmentation part of panoptic segmentation tasks. Defaults to 3.
        submodels (list): Submodels to run on each feature map
            (default is regression and classification submodels).
        name (str): Name of the model.

    Returns:
        tensorflow.keras.Model: A Model which takes an image as input
            and outputs generated anchors and the result from each submodel on
            every pyramid level.

            The order of the outputs is as defined in submodels:

            ```
            [
                regression, classification, other[0], other[1], ...
            ]
            ```
    """
    if num_anchors is None:
        num_anchors = AnchorParameters.default.num_anchors()

    if submodels is None:
        submodels = default_submodels(num_classes, num_anchors)

    if not isinstance(num_semantic_classes, list):
        num_semantic_classes = list(num_semantic_classes)

    # compute pyramid features as per https://arxiv.org/abs/1708.02002

    # Use only the desired backbone levels to create the feature pyramid
    backbone_dict_reduced = {
        k: backbone_dict[k]
        for k in backbone_dict if k in backbone_levels
    }
    pyramid_dict = create_pyramid_features(backbone_dict_reduced)

    # for the desired pyramid levels, run available submodels
    features = [pyramid_dict[key] for key in pyramid_levels]
    object_head = __build_pyramid(submodels, features)

    if panoptic:
        semantic_levels = [int(re.findall(r'\d+', k)[0]) for k in pyramid_dict]
        target_level = min(semantic_levels)

        semantic_head_list = []
        for i in range(num_semantic_heads):
            semantic_head_list.append(
                create_semantic_head(pyramid_dict,
                                     n_classes=num_semantic_classes[i],
                                     input_target=inputs,
                                     target_level=target_level,
                                     semantic_id=i))

        outputs = object_head + semantic_head_list
    else:
        outputs = object_head

    model = Model(inputs=inputs, outputs=outputs, name=name)
    model.backbone_levels = backbone_levels
    model.pyramid_levels = pyramid_levels

    return model
예제 #2
0
def retinanet_mask(inputs,
                   backbone_dict,
                   num_classes,
                   frames_per_batch=1,
                   backbone_levels=['C3', 'C4', 'C5'],
                   pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'],
                   retinanet_model=None,
                   anchor_params=None,
                   nms=True,
                   panoptic=False,
                   class_specific_filter=True,
                   crop_size=(14, 14),
                   mask_size=(28, 28),
                   name='retinanet-mask',
                   roi_submodels=None,
                   max_detections=100,
                   score_threshold=0.05,
                   nms_threshold=0.5,
                   mask_dtype=K.floatx(),
                   **kwargs):
    """Construct a RetinaNet mask model on top of a retinanet bbox model.
    Uses the retinanet bbox model and appends layers to compute masks.

    Args:
        inputs (tensor): List of tensorflow.keras.layers.Input.
            The first input is the image, the second input the blob of masks.
        backbone_dict (dict): A dictionary with the backbone layers.
        num_classes (int): Integer, number of classes to classify.
        frames_per_batch (int): Size of z axis in generated batches.
            If equal to 1, assumes 2D data.
        backbone_levels (list): The backbone levels to be used.
            to create the feature pyramid. Defaults to ['C3', 'C4', 'C5'].
        pyramid_levels (list): The pyramid levels to attach regression and
            classification heads to. Defaults to ['P3', 'P4', 'P5', 'P6', 'P7'].
        retinanet_model (tensorflow.keras.Model): RetinaNet model that predicts
            regression and classification values.
        anchor_params (AnchorParameters): Struct containing anchor parameters.
        nms (bool): Whether to use non-maximum suppression
            for the filtering step.
        panoptic (bool): Flag for adding the semantic head for panoptic
            segmentation tasks. Defaults to false.
        class_specific_filter (bool): Use class specific filtering.
        crop_size (tuple): 2-length tuple for the x-y size of the crops.
            Used to create default roi_submodels.
        mask_size (tuple): 2-length tuple for the x-y size of the masks.
            Used to create default roi_submodels.
        name (str): Name of the model.
        roi_submodels (list): Submodels for processing ROIs.
        max_detections (int): The maximum number of detections allowed.
        score_threshold (float): Minimum score for the FilterDetections layer.
        nms_threshold (float): Minimimum NMS for the FilterDetections layer.
        mask_dtype (str): Dtype to use for mask tensors.
        kwargs (dict): Additional kwargs to pass to the retinanet bbox model.

    Returns:
        tensorflow.keras.Model: Model with inputs as input and as output
            the output of each submodel for each pyramid level and the
            detections. The order is as defined in submodels.

            ```
            [
                regression, classification, other[0], ...,
                boxes_masks, boxes, scores, labels, masks, other[0], ...
            ]
            ```

    """
    if anchor_params is None:
        anchor_params = AnchorParameters.default

    if roi_submodels is None:
        retinanet_dtype = K.floatx()
        K.set_floatx(mask_dtype)
        roi_submodels = default_roi_submodels(num_classes, crop_size,
                                              mask_size, frames_per_batch,
                                              mask_dtype, retinanet_dtype)
        K.set_floatx(retinanet_dtype)

    image = inputs
    image_shape = Shape()(image)

    if retinanet_model is None:
        retinanet_model = retinanet(inputs=image,
                                    backbone_dict=backbone_dict,
                                    num_classes=num_classes,
                                    backbone_levels=backbone_levels,
                                    pyramid_levels=pyramid_levels,
                                    panoptic=panoptic,
                                    num_anchors=anchor_params.num_anchors(),
                                    frames_per_batch=frames_per_batch,
                                    **kwargs)

    # parse outputs
    regression = retinanet_model.outputs[0]
    classification = retinanet_model.outputs[1]

    if panoptic:
        # Determine the number of semantic heads
        n_semantic_heads = len([
            1 for layer in retinanet_model.layers if 'semantic' in layer.name
        ])

        # The  panoptic output should not be sent to filter detections
        other = retinanet_model.outputs[2:-n_semantic_heads]
        semantic = retinanet_model.outputs[-n_semantic_heads:]
    else:
        other = retinanet_model.outputs[2:]

    features = [
        retinanet_model.get_layer(name).output for name in pyramid_levels
    ]

    # build boxes
    anchors = __build_anchors(anchor_params,
                              features,
                              frames_per_batch=frames_per_batch)
    boxes = RegressBoxes(name='boxes')([anchors, regression])
    boxes = ClipBoxes(name='clipped_boxes')([image, boxes])

    # filter detections (apply NMS / score threshold / select top-k)
    detections = FilterDetections(
        nms=nms,
        nms_threshold=nms_threshold,
        score_threshold=score_threshold,
        class_specific_filter=class_specific_filter,
        max_detections=max_detections,
        name='filtered_detections')([boxes, classification] + other)

    # split up in known outputs and "other"
    boxes = detections[0]
    scores = detections[1]

    # get the region of interest features
    #
    # roi_input = [image_shape, boxes, classification] + features
    # rois = _RoiAlign(crop_size=crop_size)(roi_input)

    fpn = features[0]
    fpn = UpsampleLike()([fpn, image])
    rois = RoiAlign(crop_size=crop_size)([boxes, fpn])

    # execute maskrcnn submodels
    maskrcnn_outputs = [submodel(rois) for _, submodel in roi_submodels]

    # concatenate boxes for loss computation
    trainable_outputs = [
        ConcatenateBoxes(name=name)([boxes, output])
        for (name, _), output in zip(roi_submodels, maskrcnn_outputs)
    ]

    # reconstruct the new output
    outputs = [regression, classification] + other + trainable_outputs + \
        detections + maskrcnn_outputs

    if panoptic:
        outputs += list(semantic)

    model = Model(inputs=inputs, outputs=outputs, name=name)
    model.backbone_levels = backbone_levels
    model.pyramid_levels = pyramid_levels

    return model
예제 #3
0
def retinanet_mask(inputs,
                   backbone_dict,
                   num_classes,
                   backbone_levels=['C3', 'C4', 'C5'],
                   pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'],
                   retinanet_model=None,
                   anchor_params=None,
                   nms=True,
                   panoptic=False,
                   class_specific_filter=True,
                   crop_size=(14, 14),
                   mask_size=(28, 28),
                   name='retinanet-mask',
                   roi_submodels=None,
                   max_detections=100,
                   mask_dtype=K.floatx(),
                   **kwargs):
    """Construct a RetinaNet mask model on top of a retinanet bbox model.
    Uses the retinanet bbox model and appends layers to compute masks.

    Args:
        inputs: List of tensorflow.keras.layers.Input.
            The first input is the image, the second input the blob of masks.
        num_classes: Integer, number of classes to classify.
        retinanet_model: deepcell.model_zoo.retinanet.retinanet model,
            returning regression and classification values.
        anchor_params: Struct containing anchor parameters.
        nms: Boolean, whether to use NMS.
        class_specific_filter: Boolean, use class specific filtering.
        roi_submodels: Submodels for processing ROIs.
        mask_dtype: Data type of the masks, can be different from the main one.
        name: Name of the model.
        **kwargs: Additional kwargs to pass to the retinanet bbox model.

    Returns:
        Model with inputs as input and as output the output of each submodel
        for each pyramid level and the detections. The order is as defined in
        submodels.
        ```
        [
            regression, classification, other[0], other[1], ...,
            boxes_masks, boxes, scores, labels, masks, other[0], other[1], ...
        ]
        ```
    """
    if anchor_params is None:
        anchor_params = AnchorParameters.default

    if roi_submodels is None:
        retinanet_dtype = K.floatx()
        K.set_floatx(mask_dtype)
        roi_submodels = default_roi_submodels(num_classes, crop_size,
                                              mask_size, mask_dtype,
                                              retinanet_dtype)
        K.set_floatx(retinanet_dtype)

    image = inputs
    image_shape = Shape()(image)

    if retinanet_model is None:
        retinanet_model = retinanet(inputs=image,
                                    backbone_dict=backbone_dict,
                                    num_classes=num_classes,
                                    backbone_levels=backbone_levels,
                                    pyramid_levels=pyramid_levels,
                                    panoptic=panoptic,
                                    num_anchors=anchor_params.num_anchors(),
                                    **kwargs)

    # parse outputs
    regression = retinanet_model.outputs[0]
    classification = retinanet_model.outputs[1]

    if panoptic:
        # Determine the number of semantic heads
        n_semantic_heads = len([
            1 for layer in retinanet_model.layers if 'semantic' in layer.name
        ])

        # The  panoptic output should not be sent to filter detections
        other = retinanet_model.outputs[2:-n_semantic_heads]
        semantic = retinanet_model.outputs[-n_semantic_heads:]
    else:
        other = retinanet_model.outputs[2:]

    features = [
        retinanet_model.get_layer(name).output for name in pyramid_levels
    ]

    # build boxes
    anchors = __build_anchors(anchor_params, features)
    boxes = RegressBoxes(name='boxes')([anchors, regression])
    boxes = ClipBoxes(name='clipped_boxes')([image, boxes])

    # filter detections (apply NMS / score threshold / select top-k)
    detections = FilterDetections(
        nms=nms,
        class_specific_filter=class_specific_filter,
        max_detections=max_detections,
        name='filtered_detections')([boxes, classification] + other)

    # split up in known outputs and "other"
    boxes = detections[0]
    scores = detections[1]

    # get the region of interest features
    roi_input = [image_shape, boxes, classification] + features
    rois = RoiAlign(crop_size=crop_size)(roi_input)

    # execute maskrcnn submodels
    maskrcnn_outputs = [submodel(rois) for _, submodel in roi_submodels]

    # concatenate boxes for loss computation
    trainable_outputs = [
        ConcatenateBoxes(name=name)([boxes, output])
        for (name, _), output in zip(roi_submodels, maskrcnn_outputs)
    ]

    # reconstruct the new output
    outputs = [regression, classification] + other + trainable_outputs + \
        detections + maskrcnn_outputs

    if panoptic:
        outputs += list(semantic)

    model = Model(inputs=inputs, outputs=outputs, name=name)
    model.backbone_levels = backbone_levels
    model.pyramid_levels = pyramid_levels

    return model