def retinanet_mask_3D(inputs, num_classes, retinanet_model=None, anchor_params=None, nms=True, class_specific_filter=True, crop_size=(14, 14), mask_size=(28, 28, 28), name='retinanet-mask-3D', roi_submodels=None, mask_dtype=K.floatx(), **kwargs): """Construct a RetinaNet mask model on top of a retinanet bbox model. Uses the retinanet bbox model and appends layers to compute masks. Args: inputs: List of tensorflow.keras.layers.Input. The first input is the image, the second input the blob of masks. num_classes: Integer, number of classes to classify. retinanet_model: deepcell.model_zoo.retinanet.retinanet model, returning regression and classification values. anchor_params: Struct containing anchor parameters. nms: Boolean, whether to use NMS. class_specific_filter: Boolean, use class specific filtering. roi_submodels: Submodels for processing ROIs. mask_dtype: Data type of the masks, can be different from the main one. name: Name of the model. **kwargs: Additional kwargs to pass to the retinanet bbox model. Returns: Model with inputs as input and as output the output of each submodel for each pyramid level and the detections. The order is as defined in submodels. ``` [ regression, classification, other[0], other[1], ..., boxes_masks, boxes, scores, labels, masks, other[0], other[1], ... ] ``` """ if anchor_params is None: anchor_params = AnchorParameters.default if roi_submodels is None: retinanet_dtype = K.floatx() K.set_floatx(mask_dtype) roi_submodels = default_roi_submodels(num_classes, crop_size, mask_size, mask_dtype, retinanet_dtype) K.set_floatx(retinanet_dtype) image = inputs image_shape = Shape()(image) if retinanet_model is None: retinanet_model = retinanet(inputs=image, num_classes=num_classes, num_anchors=anchor_params.num_anchors(), **kwargs) # parse outputs regression = retinanet_model.outputs[0] classification = retinanet_model.outputs[1] other = retinanet_model.outputs[2:] features = [ retinanet_model.get_layer(name).output for name in ['P3', 'P4', 'P5', 'P6', 'P7'] ] # build boxes anchors = __build_anchors(anchor_params, features) boxes = RegressBoxes(name='boxes')([anchors, regression]) boxes = ClipBoxes(name='clipped_boxes')([image, boxes]) # filter detections (apply NMS / score threshold / select top-k) detections = FilterDetections( nms=nms, class_specific_filter=class_specific_filter, max_detections=100, name='filtered_detections')([boxes, classification] + other) # split up in known outputs and "other" boxes = detections[0] scores = detections[1] # get the region of interest features roi_input = [image_shape, boxes, classification] + features rois = RoiAlign(crop_size=crop_size)(roi_input) # execute maskrcnn submodels maskrcnn_outputs = [submodel(rois) for _, submodel in roi_submodels] # concatenate boxes for loss computation trainable_outputs = [ ConcatenateBoxes(name=name)([boxes, output]) for (name, _), output in zip(roi_submodels, maskrcnn_outputs) ] # reconstruct the new output outputs = [regression, classification] + other + trainable_outputs + \ detections + maskrcnn_outputs return Model(inputs=inputs, outputs=outputs, name=name)
def retinanet_mask(inputs, backbone_dict, num_classes, frames_per_batch=1, backbone_levels=['C3', 'C4', 'C5'], pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], retinanet_model=None, anchor_params=None, nms=True, panoptic=False, class_specific_filter=True, crop_size=(14, 14), mask_size=(28, 28), name='retinanet-mask', roi_submodels=None, max_detections=100, score_threshold=0.05, nms_threshold=0.5, mask_dtype=K.floatx(), **kwargs): """Construct a RetinaNet mask model on top of a retinanet bbox model. Uses the retinanet bbox model and appends layers to compute masks. Args: inputs (tensor): List of tensorflow.keras.layers.Input. The first input is the image, the second input the blob of masks. backbone_dict (dict): A dictionary with the backbone layers. num_classes (int): Integer, number of classes to classify. frames_per_batch (int): Size of z axis in generated batches. If equal to 1, assumes 2D data. backbone_levels (list): The backbone levels to be used. to create the feature pyramid. Defaults to ['C3', 'C4', 'C5']. pyramid_levels (list): The pyramid levels to attach regression and classification heads to. Defaults to ['P3', 'P4', 'P5', 'P6', 'P7']. retinanet_model (tensorflow.keras.Model): RetinaNet model that predicts regression and classification values. anchor_params (AnchorParameters): Struct containing anchor parameters. nms (bool): Whether to use non-maximum suppression for the filtering step. panoptic (bool): Flag for adding the semantic head for panoptic segmentation tasks. Defaults to false. class_specific_filter (bool): Use class specific filtering. crop_size (tuple): 2-length tuple for the x-y size of the crops. Used to create default roi_submodels. mask_size (tuple): 2-length tuple for the x-y size of the masks. Used to create default roi_submodels. name (str): Name of the model. roi_submodels (list): Submodels for processing ROIs. max_detections (int): The maximum number of detections allowed. score_threshold (float): Minimum score for the FilterDetections layer. nms_threshold (float): Minimimum NMS for the FilterDetections layer. mask_dtype (str): Dtype to use for mask tensors. kwargs (dict): Additional kwargs to pass to the retinanet bbox model. Returns: tensorflow.keras.Model: Model with inputs as input and as output the output of each submodel for each pyramid level and the detections. The order is as defined in submodels. ``` [ regression, classification, other[0], ..., boxes_masks, boxes, scores, labels, masks, other[0], ... ] ``` """ if anchor_params is None: anchor_params = AnchorParameters.default if roi_submodels is None: retinanet_dtype = K.floatx() K.set_floatx(mask_dtype) roi_submodels = default_roi_submodels(num_classes, crop_size, mask_size, frames_per_batch, mask_dtype, retinanet_dtype) K.set_floatx(retinanet_dtype) image = inputs image_shape = Shape()(image) if retinanet_model is None: retinanet_model = retinanet(inputs=image, backbone_dict=backbone_dict, num_classes=num_classes, backbone_levels=backbone_levels, pyramid_levels=pyramid_levels, panoptic=panoptic, num_anchors=anchor_params.num_anchors(), frames_per_batch=frames_per_batch, **kwargs) # parse outputs regression = retinanet_model.outputs[0] classification = retinanet_model.outputs[1] if panoptic: # Determine the number of semantic heads n_semantic_heads = len([ 1 for layer in retinanet_model.layers if 'semantic' in layer.name ]) # The panoptic output should not be sent to filter detections other = retinanet_model.outputs[2:-n_semantic_heads] semantic = retinanet_model.outputs[-n_semantic_heads:] else: other = retinanet_model.outputs[2:] features = [ retinanet_model.get_layer(name).output for name in pyramid_levels ] # build boxes anchors = __build_anchors(anchor_params, features, frames_per_batch=frames_per_batch) boxes = RegressBoxes(name='boxes')([anchors, regression]) boxes = ClipBoxes(name='clipped_boxes')([image, boxes]) # filter detections (apply NMS / score threshold / select top-k) detections = FilterDetections( nms=nms, nms_threshold=nms_threshold, score_threshold=score_threshold, class_specific_filter=class_specific_filter, max_detections=max_detections, name='filtered_detections')([boxes, classification] + other) # split up in known outputs and "other" boxes = detections[0] scores = detections[1] # get the region of interest features # # roi_input = [image_shape, boxes, classification] + features # rois = _RoiAlign(crop_size=crop_size)(roi_input) fpn = features[0] fpn = UpsampleLike()([fpn, image]) rois = RoiAlign(crop_size=crop_size)([boxes, fpn]) # execute maskrcnn submodels maskrcnn_outputs = [submodel(rois) for _, submodel in roi_submodels] # concatenate boxes for loss computation trainable_outputs = [ ConcatenateBoxes(name=name)([boxes, output]) for (name, _), output in zip(roi_submodels, maskrcnn_outputs) ] # reconstruct the new output outputs = [regression, classification] + other + trainable_outputs + \ detections + maskrcnn_outputs if panoptic: outputs += list(semantic) model = Model(inputs=inputs, outputs=outputs, name=name) model.backbone_levels = backbone_levels model.pyramid_levels = pyramid_levels return model
def retinanet_bbox(model=None, nms=True, panoptic=False, num_semantic_heads=1, class_specific_filter=True, name='retinanet-bbox', anchor_params=None, **kwargs): """Construct a RetinaNet model on top of a backbone and adds convenience functions to output boxes directly. This model uses the minimum retinanet model and appends a few layers to compute boxes within the graph. These layers include applying the regression values to the anchors and performing NMS. Args: model (tensorflow.keras.Model): RetinaNet model to append bbox layers to. If None, it will create a RetinaNet model using kwargs. nms (bool): Whether to use non-maximum suppression for the filtering step. backbone_levels (list): Backbone levels to use for constructing retinanet. pyramid_levels (list): Pyramid levels to attach the object detection heads to. class_specific_filter (bool): Whether to use class specific filtering or filter for the best scoring class only. name (str): Name of the model. anchor_params (AnchorParameters): Struct containing anchor parameters. If None, default values are used. kwargs (dict): Additional kwargs to pass to the minimal retinanet model. Returns: tensorflow.keras.Model: A Model which takes an image as input and outputs the detections on the image. The order is defined as follows: ``` [ boxes, scores, labels, other[0], other[1], ... ] ``` Raises: ValueError: the given model does not have a regression or classification submodel. """ # if no anchor parameters are passed, use default values if anchor_params is None: anchor_params = AnchorParameters.default # create RetinaNet model if model is None: model = retinanet(num_anchors=anchor_params.num_anchors(), **kwargs) else: names = ('regression', 'classification') if not all(output in model.output_names for output in names): raise ValueError('Input is not a training model (no `regression` ' 'and `classification` outputs were found, ' 'outputs are: {}).'.format(model.output_names)) # compute the anchors features = [model.get_layer(l).output for l in model.pyramid_levels] anchors = __build_anchors(anchor_params, features) # we expect anchors, regression. and classification values as first output regression = model.outputs[0] classification = model.outputs[1] # "other" can be any additional output from custom submodels, by default [] if panoptic: # The last output is the panoptic output, which should not be # sent to filter detections other = model.outputs[2:-num_semantic_heads] semantic = model.outputs[-num_semantic_heads:] else: other = model.outputs[2:] # apply predicted regression to anchors boxes = RegressBoxes(name='boxes')([anchors, regression]) boxes = ClipBoxes(name='clipped_boxes')([model.inputs[0], boxes]) # filter detections (apply NMS / score threshold / select top-k) detections = FilterDetections( nms=nms, class_specific_filter=class_specific_filter, name='filtered_detections')([boxes, classification] + other) # add the semantic head's output if needed if panoptic: outputs = detections + list(semantic) else: outputs = detections # construct the model return Model(inputs=model.inputs, outputs=outputs, name=name)
def retinamask_bbox(model, nms=True, panoptic=False, num_semantic_heads=1, class_specific_filter=True, name='retinanet-bbox', anchor_params=None, max_detections=300, frames_per_batch=1, crop_size=(14, 14), **kwargs): """Construct a RetinaNet model on top of a backbone and adds convenience functions to output boxes directly. This model uses the minimum retinanet model and appends a few layers to compute boxes within the graph. These layers include applying the regression values to the anchors and performing NMS. Args: model (tensorflow.keras.Model): RetinaNet model to append bbox layers to. If ``None``, it will create a ``RetinaNet`` model using ``kwargs``. nms (bool): Whether to use non-maximum suppression for the filtering step. panoptic (bool): Flag for adding the semantic head for panoptic segmentation tasks. num_semantic_heads (int): Total number of semantic heads to build. class_specific_filter (bool): Whether to use class specific filtering or filter for the best scoring class only. anchor_params (AnchorParameters): Struct containing anchor parameters. max_detections (int): The maximum number of detections allowed. frames_per_batch (int): Size of z axis in generated batches. If equal to 1, assumes 2D data. crop_size (tuple): 2-length tuple for the x-y size of the crops. Used to create default ``roi_submodels``. kwargs (dict): Additional kwargs to pass to the :mod:`deepcell.model_zoo.retinanet.retinanet` model. Returns: tensorflow.keras.Model: A Model which takes an image as input and outputs the detections on the image. The order is defined as follows: .. code-block:: python [ boxes, scores, labels, other[0], other[1], ... ] Raises: ValueError: the given model does not have a regression or classification submodel. """ # if no anchor parameters are passed, use default values if anchor_params is None: anchor_params = AnchorParameters.default # create RetinaNet model names = ('regression', 'classification') if not all(output in model.output_names for output in names): raise ValueError('Input is not a training model (no `regression` ' 'and `classification` outputs were found, ' 'outputs are: {}).'.format(model.output_names)) # compute the anchors features = [model.get_layer(l).output for l in model.pyramid_levels] anchors = __build_anchors(anchor_params, features, frames_per_batch=frames_per_batch) # we expect anchors, regression. and classification values as first output regression = model.outputs[0] classification = model.outputs[1] semantic_classes = [ 1 for layer in model.layers if layer.name.startswith('semantic') ] # "other" can be any additional output from custom submodels, by default [] if panoptic: # The last output is the panoptic output, which should not be # sent to filter detections num_semantic_heads = len(semantic_classes) other = model.outputs[2:-num_semantic_heads] semantic = model.outputs[-num_semantic_heads:] else: other = model.outputs[2:] semantic = [] # apply predicted regression to anchors boxes = RegressBoxes(name='boxes')([anchors, regression]) boxes = ClipBoxes(name='clipped_boxes')([model.inputs[0], boxes]) # filter detections (apply NMS / score threshold / select top-k) detections = FilterDetections( nms=nms, class_specific_filter=class_specific_filter, max_detections=max_detections, name='filtered_detections')([boxes, classification]) # apply submodels to detections image = model.layers[0].output boxes = detections[0] fpn = features[0] fpn = UpsampleLike()([fpn, image]) rois = RoiAlign(crop_size=crop_size)([boxes, fpn]) mask_submodel = model.get_layer('mask_submodel') masks = [mask_submodel(rois)] # add the semantic head's output if needed outputs = detections + list(masks) + list(semantic) # construct the model new_model = Model(inputs=model.inputs, outputs=outputs, name=name) image_input = model.inputs[0] shape = (1, 1, 4) if frames_per_batch == 1 else (1, 1, 1, 4) temp_boxes = K.zeros(shape, name='temp_boxes') new_inputs = [image_input, temp_boxes] final_model = new_model(new_inputs) return Model(inputs=image_input, outputs=final_model)
def retinanet_bbox(model=None, nms=True, class_specific_filter=True, name='retinanet-bbox', anchor_params=None, **kwargs): """Construct a RetinaNet model on top of a backbone and adds convenience functions to output boxes directly. This model uses the minimum retinanet model and appends a few layers to compute boxes within the graph. These layers include applying the regression values to the anchors and performing NMS. Args: model: RetinaNet model to append bbox layers to. If None, it will create a RetinaNet model using **kwargs. nms: Whether to use non-maximum suppression for the filtering step. class_specific_filter: Whether to use class specific filtering or filter for the best scoring class only. name: Name of the model. anchor_params: Struct containing anchor parameters. If None, default values are used. *kwargs: Additional kwargs to pass to the minimal retinanet model. Returns: A Model which takes an image as input and outputs the detections on the image. The order is defined as follows: ``` [ boxes, scores, labels, other[0], other[1], ... ] ``` """ # if no anchor parameters are passed, use default values if anchor_params is None: anchor_params = AnchorParameters.default # create RetinaNet model if model is None: model = retinanet(num_anchors=anchor_params.num_anchors(), **kwargs) else: names = ('regression', 'classification') if not all(output in model.output_names for output in names): raise ValueError('Input is not a training model (no `regression` ' 'and `classification` outputs were found, ' 'outputs are: {}).'.format(model.output_names)) # compute the anchors p_names = ['P3', 'P4', 'P5', 'P6', 'P7'] features = [model.get_layer(p_name).output for p_name in p_names] anchors = __build_anchors(anchor_params, features) # we expect the anchors, regression and classification values as first output regression = model.outputs[0] classification = model.outputs[1] # "other" can be any additional output from custom submodels, by default this will be [] other = model.outputs[2:] # apply predicted regression to anchors boxes = RegressBoxes(name='boxes')([anchors, regression]) boxes = ClipBoxes(name='clipped_boxes')([model.inputs[0], boxes]) # filter detections (apply NMS / score threshold / select top-k) detections = FilterDetections( nms=nms, class_specific_filter=class_specific_filter, name='filtered_detections')([boxes, classification] + other) # construct the model return Model(inputs=model.inputs, outputs=detections, name=name)