def retinanet(inputs, backbone_dict, num_classes, backbone_levels=['C3', 'C4', 'C5'], pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], num_anchors=None, create_pyramid_features=__create_pyramid_features, create_semantic_head=__create_semantic_head, panoptic=False, num_semantic_heads=1, num_semantic_classes=[3], submodels=None, frames_per_batch=1, semantic_only=False, name='retinanet'): """Construct a ``RetinaNet`` model on top of a backbone. This model is the minimum model necessary for training (with the unfortunate exception of anchors as output). Args: inputs (tensor): The inputs to the network. backbone_dict (dict): A dictionary with the backbone layers. backbone_levels (list): The backbone levels to be used. to create the feature pyramid. pyramid_levels (list): The pyramid levels to attach regression and classification heads. num_classes (int): Number of classes to classify. num_anchors (int): Number of base anchors. create_pyramid_features (function): Function to create pyramid features. create_semantic_head (function): Function for creating a semantic head, which can be used for panoptic segmentation tasks. panoptic (bool): Flag for adding the semantic head for panoptic segmentation tasks. num_semantic_heads (int): The number of semantic segmentation heads. num_semantic_classes (list): The number of classes for the semantic segmentation part of panoptic segmentation tasks. submodels (list): Submodels to run on each feature map (default is regression and classification submodels). frames_per_batch (int): Size of z axis in generated batches. If equal to 1, assumes 2D data. name (str): Name of the model. Returns: tensorflow.keras.Model: A Model which takes an image as input and outputs generated anchors and the result from each submodel on every pyramid level. The order of the outputs is as defined in submodels: .. code-block:: python [ regression, classification, other[0], other[1], ... ] """ if num_anchors is None: num_anchors = AnchorParameters.default.num_anchors() if submodels is None: submodels = default_submodels(num_classes, num_anchors, frames_per_batch=frames_per_batch) if not isinstance(num_semantic_classes, list): num_semantic_classes = list(num_semantic_classes) # compute pyramid features as per https://arxiv.org/abs/1708.02002 # Use only the desired backbone levels to create the feature pyramid backbone_dict_reduced = { k: backbone_dict[k] for k in backbone_dict if k in backbone_levels } pyramid_dict = create_pyramid_features( backbone_dict_reduced, ndim=3 if frames_per_batch > 1 else 2) # for the desired pyramid levels, run available submodels features = [pyramid_dict[key] for key in pyramid_levels] object_head = __build_pyramid(submodels, features) if panoptic: semantic_levels = [int(re.findall(r'\d+', k)[0]) for k in pyramid_dict] target_level = min(semantic_levels) semantic_head_list = [] for i in range(num_semantic_heads): semantic_head_list.append( create_semantic_head(pyramid_dict, n_classes=num_semantic_classes[i], input_target=inputs, target_level=target_level, semantic_id=i, ndim=3 if frames_per_batch > 1 else 2)) outputs = object_head + semantic_head_list else: outputs = object_head if semantic_only: outputs = semantic_head_list model = Model(inputs=inputs, outputs=outputs, name=name) model.backbone_levels = backbone_levels model.pyramid_levels = pyramid_levels return model
def retinamask(inputs, backbone_dict, num_classes, frames_per_batch=1, backbone_levels=['C3', 'C4', 'C5'], pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], retinanet_model=None, anchor_params=None, nms=True, training=True, panoptic=False, class_specific_filter=True, crop_size=(14, 14), mask_size=(28, 28), name='retinanet-mask', roi_submodels=None, max_detections=100, score_threshold=0.05, nms_threshold=0.5, mask_dtype=K.floatx(), **kwargs): """Construct a masking model by appending layers to compute masks to a :mod:`deepcell.model_zoo.retinanet.retinanet` model. Args: inputs (tensor): List of ``tensorflow.keras.layers.Input``. The first input is the image, the second input the blob of masks. backbone_dict (dict): A dictionary with the backbone layers. num_classes (int): Integer, number of classes to classify. frames_per_batch (int): Size of z axis in generated batches. If equal to 1, assumes 2D data. backbone_levels (list): The backbone levels to be used to create the feature pyramid. pyramid_levels (list): The pyramid levels to attach regression and classification heads to. retinanet_model (tensorflow.keras.Model): :mod:`deepcell.model_zoo.retinanet.retinanet` model that predicts regression and classification values. anchor_params (AnchorParameters): Struct containing anchor parameters. nms (bool): Whether to use non-maximum suppression for the filtering step. training (bool): Whether to use the bounding boxes as the detections, during training or to use the :mod:`deepcell.layers.filter_detections.FilterDetections` during inference. panoptic (bool): Flag for adding the semantic head for panoptic segmentation tasks. class_specific_filter (bool): Use class specific filtering. crop_size (tuple): 2-length tuple for the x-y size of the crops. Used to create default ``roi_submodels``. mask_size (tuple): 2-length tuple for the x-y size of the masks. Used to create default ``roi_submodels``. name (str): Name of the model. roi_submodels (list): Submodels for processing ROIs. max_detections (int): The maximum number of detections allowed. score_threshold (float): Minimum score for the :mod:`deepcell.layers.filter_detections.FilterDetections` layer. nms_threshold (float): Minimimum NMS for the :mod:`deepcell.layers.filter_detections.FilterDetections` layer. mask_dtype (str): ``dtype`` to use for mask tensors. kwargs (dict): Additional kwargs to pass to the :mod:`deepcell.model_zoo.retinanet.retinanet` model. Returns: tensorflow.keras.Model: Model with inputs as input and as output the output of each submodel for each pyramid level and the detections. The order is as defined in submodels. .. code-block:: python [ regression, classification, other[0], ..., boxes_masks, boxes, scores, labels, masks, other[0], ... ] """ if anchor_params is None: anchor_params = AnchorParameters.default if roi_submodels is None: retinanet_dtype = K.floatx() K.set_floatx(mask_dtype) roi_submodels = default_roi_submodels(num_classes, crop_size, mask_size, frames_per_batch, mask_dtype, retinanet_dtype) K.set_floatx(retinanet_dtype) image = inputs if retinanet_model is None: retinanet_model = retinanet(inputs=image, backbone_dict=backbone_dict, num_classes=num_classes, backbone_levels=backbone_levels, pyramid_levels=pyramid_levels, panoptic=panoptic, num_anchors=anchor_params.num_anchors(), frames_per_batch=frames_per_batch, **kwargs) # parse outputs regression = retinanet_model.outputs[0] classification = retinanet_model.outputs[1] semantic_classes = [ 1 for layer in retinanet_model.layers if layer.name.startswith('semantic') ] if panoptic: # Determine the number of semantic heads n_semantic_heads = len(semantic_classes) # The panoptic output should not be sent to filter detections other = retinanet_model.outputs[2:-n_semantic_heads] semantic = retinanet_model.outputs[-n_semantic_heads:] else: other = retinanet_model.outputs[2:] semantic = [] features = [ retinanet_model.get_layer(name).output for name in pyramid_levels ] # build boxes anchors = __build_anchors(anchor_params, features, frames_per_batch=frames_per_batch) boxes = RegressBoxes(name='boxes')([anchors, regression]) boxes = ClipBoxes(name='clipped_boxes')([image, boxes]) # filter detections (apply NMS / score threshold / select top-k) if training: if frames_per_batch == 1: boxes = Input(shape=(None, 4), name='boxes_input') else: boxes = Input(shape=(None, None, 4), name='boxes_input') detections = [] else: detections = FilterDetections( nms=nms, nms_threshold=nms_threshold, score_threshold=score_threshold, class_specific_filter=class_specific_filter, max_detections=max_detections, name='filtered_detections')([boxes, classification] + other) # split up in known outputs and "other" boxes = detections[0] fpn = features[0] fpn = UpsampleLike()([fpn, image]) rois = RoiAlign(crop_size=crop_size)([boxes, fpn]) # execute maskrcnn submodels maskrcnn_outputs = [submodel(rois) for _, submodel in roi_submodels] # concatenate boxes for loss computation trainable_outputs = [ ConcatenateBoxes(name=name)([boxes, output]) for (name, _), output in zip(roi_submodels, maskrcnn_outputs) ] # reconstruct the new output outputs = [regression, classification] + other + trainable_outputs + \ detections + maskrcnn_outputs + list(semantic) inputs = [image, boxes] if training else image model = Model(inputs=inputs, outputs=outputs, name=name) model.backbone_levels = backbone_levels model.pyramid_levels = pyramid_levels return model