def LabelDetectionModel(input_shape=(None, None, 1), inputs=None, backbone='mobilenetv2', use_pretrained_weights=True): """Classify a microscopy image as Nuclear, Cytoplasm, or Phase. This can be helpful in determining the type of data (nuclear, cytoplasm, etc.) so that this data can be forwared to the correct segmenation model. """ required_channels = 3 # required for most backbones if inputs is None: inputs = keras.layers.Input(shape=input_shape) if keras.backend.image_data_format() == 'channels_first': channel_axis = 0 else: channel_axis = -1 norm = ImageNormalization2D(norm_method='whole_image')(inputs) fixed_inputs = TensorProduct(required_channels)(norm) # force the input shape fixed_input_shape = list(input_shape) fixed_input_shape[channel_axis] = required_channels fixed_input_shape = tuple(fixed_input_shape) backbone_model = get_backbone(backbone, fixed_inputs, use_imagenet=False, return_dict=False, include_top=False, weights=None, input_shape=fixed_input_shape, pooling=None) x = keras.layers.AveragePooling2D(4)(backbone_model.outputs[0]) x = TensorProduct(256)(x) x = TensorProduct(3)(x) x = keras.layers.Flatten()(x) outputs = keras.layers.Activation('softmax')(x) model = keras.Model(inputs=backbone_model.inputs, outputs=outputs) if use_pretrained_weights: local_name = 'LabelDetectionModel_{}.h5'.format(backbone) if backbone.lower() in {'mobilenetv2' or 'mobilenet_v2'}: weights_path = get_file( local_name, MOBILENETV2_WEIGHTS_PATH, cache_subdir='models', md5_hash='b8231f32f01c1cd6448d06e276dd5949') else: raise ValueError('Backbone %s does not have a weights file.' % backbone) model.load_weights(weights_path) return model
def semantic_prediction(semantic_names, semantic_features, target_level=0, input_target=None, n_filters=64, n_dense=64, ndim=2, n_classes=3, semantic_id=0): """Creates the prediction head from a list of semantic features Args: semantic_names (list): A list of the names of the semantic feature layers semantic_features (list): A list of semantic features NOTE: The semantic_names and semantic features should be in decreasing order e.g. [Q6, Q5, Q4, ...] target_level (int, optional): Defaults to 0. The level we need to reach. Performs 2x upsampling until we're at the target level input_target (tensor, optional): Defaults to None. Tensor with the input image. n_dense (int, optional): Defaults to 256. The number of filters for dense layers. n_classes (int, optional): Defaults to 3. The number of classes to be predicted. semantic_id (int): Defaults to 0. An number to name the final layer. Allows for multiple semantic heads. Returns: tensor: The softmax prediction for the semantic segmentation head Raises: ValueError: ndim is not 2 or 3 """ acceptable_ndims = [2, 3] if ndim not in acceptable_ndims: raise ValueError('Only 2 and 3 dimensional networks are supported') if K.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = -1 # Add all the semantic layers semantic_sum = semantic_features[0] for semantic_feature in semantic_features[1:]: semantic_sum = Add()([semantic_sum, semantic_feature]) # Final upsampling min_level = int(re.findall(r'\d+', semantic_names[-1])[0]) n_upsample = min_level - target_level x = semantic_upsample(semantic_sum, n_upsample, target=input_target) # First tensor product x = TensorProduct(n_dense)(x) x = BatchNormalization(axis=channel_axis)(x) x = Activation('relu')(x) # Apply tensor product and softmax layer x = TensorProduct(n_classes)(x) x = Softmax(axis=channel_axis, name='semantic_' + str(semantic_id))(x) return x
def ScaleDetectionModel(input_shape=(None, None, 1), inputs=None, backbone='mobilenetv2', use_pretrained_weights=True): """Create a ScaleDetectionModel for detecting scales of input data. This enables data to be scaled appropriately for other segmentation models which may not be resolution tolerant. """ required_channels = 3 # required for most backbones if inputs is None: inputs = keras.layers.Input(shape=input_shape) if keras.backend.image_data_format() == 'channels_first': channel_axis = 0 else: channel_axis = -1 norm = ImageNormalization2D(norm_method='whole_image')(inputs) fixed_inputs = TensorProduct(required_channels)(norm) # force the input shape fixed_input_shape = list(input_shape) fixed_input_shape[channel_axis] = required_channels fixed_input_shape = tuple(fixed_input_shape) backbone_model = get_backbone(backbone, fixed_inputs, use_imagenet=False, return_dict=False, include_top=False, weights=None, input_shape=fixed_input_shape, pooling=None) x = keras.layers.AveragePooling2D(4)(backbone_model.outputs[0]) x = TensorProduct(256, activation='relu')(x) x = TensorProduct(1)(x) outputs = keras.layers.Flatten()(x) model = keras.Model(inputs=backbone_model.inputs, outputs=outputs) if use_pretrained_weights: local_name = 'ScaleDetectionModel_{}.h5'.format(backbone) if backbone.lower() in {'mobilenetv2' or 'mobilenet_v2'}: weights_path = get_file( local_name, MOBILENETV2_WEIGHTS_PATH, cache_subdir='models', md5_hash='b9943554a86096fb66608ec66078aa46') else: raise ValueError('Backbone %s does not have a weights file.' % backbone) model.load_weights(weights_path) return model
def LabelDetectionModel(input_shape=(None, None, 1), inputs=None, backbone='mobilenetv2', num_classes=3): """Classify a microscopy image as Nuclear, Cytoplasm, or Phase. This can be helpful in determining the type of data (nuclear, cytoplasm, etc.) so that this data can be forwared to the correct segmenation model. Based on a standard backbone with an intiial ``ImageNormalization2D`` and final ``AveragePooling2D``, ``TensorProduct``, and ``Softmax`` layers. Args: input_shape (tuple): a 3-length tuple of the input data shape. inputs (tensorflow.keras.Layer): Optional input layer of the model. If not provided, creates a ``Layer`` based on ``input_shape``. backbone (str): name of the backbone to use for the model. num_classes (int): The number of labels to detect. """ required_channels = 3 # required for most backbones if inputs is None: inputs = tf.keras.layers.Input(shape=input_shape) if tf.keras.backend.image_data_format() == 'channels_first': channel_axis = 0 else: channel_axis = -1 norm = ImageNormalization2D(norm_method='whole_image')(inputs) fixed_inputs = TensorProduct(required_channels)(norm) # force the input shape fixed_input_shape = list(input_shape) fixed_input_shape[channel_axis] = required_channels fixed_input_shape = tuple(fixed_input_shape) backbone_model = get_backbone( backbone, fixed_inputs, use_imagenet=False, return_dict=False, include_top=False, weights=None, input_shape=fixed_input_shape, pooling=None) x = tf.keras.layers.AveragePooling2D(4)(backbone_model.outputs[0]) x = tf.keras.layers.Flatten()(x) x = TensorProduct(256)(x) x = TensorProduct(num_classes)(x) outputs = tf.keras.layers.Softmax(dtype=tf.keras.backend.floatx())(x) model = tf.keras.Model(inputs=backbone_model.inputs, outputs=outputs) return model
def ScaleDetectionModel(input_shape=(None, None, 1), inputs=None, backbone='mobilenetv2'): """Create a ``ScaleDetectionModel`` for detecting scales of input data. This enables data to be scaled appropriately for other segmentation models which may not be resolution tolerant. Based on a standard backbone with an intiial ``ImageNormalization2D`` and final ``AveragePooling2D`` and ``TensorProduct`` layers. Args: input_shape (tuple): a 3-length tuple of the input data shape. inputs (tensorflow.keras.Layer): Optional input layer of the model. If not provided, creates a ``Layer`` based on ``input_shape``. backbone (str): name of the backbone to use for the model. """ required_channels = 3 # required for most backbones if inputs is None: inputs = tf.keras.layers.Input(shape=input_shape) if tf.keras.backend.image_data_format() == 'channels_first': channel_axis = 0 else: channel_axis = -1 norm = ImageNormalization2D(norm_method='whole_image')(inputs) fixed_inputs = TensorProduct(required_channels)(norm) # force the input shape fixed_input_shape = list(input_shape) fixed_input_shape[channel_axis] = required_channels fixed_input_shape = tuple(fixed_input_shape) backbone_model = get_backbone( backbone, fixed_inputs, use_imagenet=False, return_dict=False, include_top=False, weights=None, input_shape=fixed_input_shape, pooling=None) x = tf.keras.layers.AveragePooling2D(4)(backbone_model.outputs[0]) x = TensorProduct(256, activation='relu')(x) x = TensorProduct(1)(x) outputs = tf.keras.layers.Flatten()(x) model = tf.keras.Model(inputs=backbone_model.inputs, outputs=outputs) return model
def MaskRCNN_3D(backbone, num_classes, input_shape, norm_method='whole_image', crop_size=(14, 14, 14), weights=None, pooling=None, mask_dtype=K.floatx(), required_channels=3, **kwargs): """Constructs a mrcnn model using a backbone from keras-applications. Args: backbone: string, name of backbone to use. num_classes: Number of classes to classify. input_shape: The shape of the input data. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. pooling: optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. required_channels: integer, the required number of channels of the backbone. 3 is the default for all current backbones. Returns: RetinaNet model with a backbone. """ inputs = Input(shape=input_shape) # force the channel size for backbone input to be `required_channels` norm = ImageNormalization3D(norm_method=norm_method)(inputs) fixed_inputs = TensorProduct(required_channels)(norm) model_kwargs = { 'include_top': False, 'input_tensor': fixed_inputs, 'weights': weights, 'pooling': pooling } layer_outputs = get_pyramid_layer_outputs(backbone, inputs, **model_kwargs) kwargs['backbone_layers'] = layer_outputs # create the full model return retinanet_mask_3D(inputs=inputs, num_classes=num_classes, crop_size=crop_size, name='{}_retinanet_mask_3D'.format(backbone), mask_dtype=mask_dtype, **kwargs)
def PanopticNet(backbone, input_shape, inputs=None, backbone_levels=['C3', 'C4', 'C5'], pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], create_pyramid_features=__create_pyramid_features, create_semantic_head=__create_semantic_head, frames_per_batch=1, temporal_mode=None, num_semantic_heads=1, num_semantic_classes=[3], required_channels=3, norm_method='whole_image', pooling=None, location=True, use_imagenet=True, name='panopticnet', **kwargs): """Constructs a mrcnn model using a backbone from keras-applications. Args: backbone (str): Name of backbone to use. input_shape (tuple): The shape of the input data. backbone_levels (list): The backbone levels to be used. to create the feature pyramid. Defaults to ['C3', 'C4', 'C5']. pyramid_levels (list): Pyramid levels to use. Defaults to ['P3','P4','P5','P6','P7'] create_pyramid_features (function): Function to get the pyramid features from the backbone. create_semantic_head (function): Function to get to build a semantic head submodel. frames_per_batch (int): Defaults to 1. temporal_mode: Mode of temporal convolution. Choose from {'conv','lstm','gru', None}. Defaults to None. num_semantic_heads (int): Defaults to 1. num_semantic_classes (list): Defaults to [3]. norm_method (str): ImageNormalization mode to use. Defaults to 'whole_image' location (bool): Whether to include location data. use_imagenet (bool): Whether to load imagenet-based pretrained weights. pooling (str): optional pooling mode for feature extraction when include_top is False. - None means that the output of the model will be the 4D tensor output of the last convolutional layer. - 'avg' means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - 'max' means that global max pooling will be applied. required_channels (int): The required number of channels of the backbone. 3 is the default for all current backbones. kwargs (dict): Other standard inputs for retinanet_mask. Raises: ValueError: temporal_mode not 'conv', 'lstm', 'gru' or None Returns: tensorflow.keras.Model: Panoptic model with a backbone. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 # Check input to __merge_temporal_features acceptable_modes = {'conv', 'lstm', 'gru', None} if temporal_mode is not None: temporal_mode = str(temporal_mode).lower() if temporal_mode not in acceptable_modes: raise ValueError('Mode {} not supported. Please choose from {}.'.format( temporal_mode, str(acceptable_modes))) if inputs is None: if frames_per_batch > 1: if channel_axis == 1: input_shape_with_time = tuple( [input_shape[0], frames_per_batch] + list(input_shape)[1:]) else: input_shape_with_time = tuple( [frames_per_batch] + list(input_shape)) inputs = Input(shape=input_shape_with_time) else: inputs = Input(shape=input_shape) # force the channel size for backbone input to be `required_channels` if norm_method is None: norm = inputs else: if frames_per_batch > 1: norm = TimeDistributed(ImageNormalization2D(norm_method=norm_method))(inputs) else: norm = ImageNormalization2D(norm_method=norm_method)(inputs) if location: if frames_per_batch > 1: # TODO: TimeDistributed is incompatible with channels_first loc = TimeDistributed(Location2D(in_shape=input_shape))(norm) else: loc = Location2D(in_shape=input_shape)(norm) concat = Concatenate(axis=channel_axis)([norm, loc]) else: concat = norm if frames_per_batch > 1: fixed_inputs = TimeDistributed(TensorProduct(required_channels))(concat) else: fixed_inputs = TensorProduct(required_channels)(concat) # force the input shape axis = 0 if K.image_data_format() == 'channels_first' else -1 fixed_input_shape = list(input_shape) fixed_input_shape[axis] = required_channels fixed_input_shape = tuple(fixed_input_shape) model_kwargs = { 'include_top': False, 'weights': None, 'input_shape': fixed_input_shape, 'pooling': pooling } _, backbone_dict = get_backbone(backbone, fixed_inputs, use_imagenet=use_imagenet, frames_per_batch=frames_per_batch, return_dict=True, **model_kwargs) backbone_dict_reduced = {k: backbone_dict[k] for k in backbone_dict if k in backbone_levels} ndim = 2 if frames_per_batch == 1 else 3 pyramid_dict = create_pyramid_features(backbone_dict_reduced, ndim=ndim) features = [pyramid_dict[key] for key in pyramid_levels] if frames_per_batch > 1: temporal_features = [__merge_temporal_features( feature, mode=temporal_mode) for feature in features] for f, k in zip(temporal_features, pyramid_dict.keys()): pyramid_dict[k] = f semantic_levels = [int(re.findall(r'\d+', k)[0]) for k in pyramid_dict] target_level = min(semantic_levels) semantic_head_list = [] for i in range(num_semantic_heads): semantic_head_list.append(create_semantic_head( pyramid_dict, n_classes=num_semantic_classes[i], input_target=inputs, target_level=target_level, semantic_id=i, ndim=ndim, **kwargs)) outputs = semantic_head_list model = Model(inputs=inputs, outputs=outputs, name=name) return model
def RetinaNet(backbone, num_classes, input_shape, inputs=None, norm_method='whole_image', location=False, use_imagenet=False, pooling=None, required_channels=3, **kwargs): """Constructs a retinanet model using a backbone from keras-applications. Args: backbone (str): Name of backbone to use. num_classes (int): Number of classes to classify. input_shape (tuple): The shape of the input data. weights (str): one of None (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. pooling (str): optional pooling mode for feature extraction when 'include_top' is False. - None means that the output of the model will be the 4D tensor output of the last convolutional layer. - 'avg' means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - 'max' means that global max pooling will be applied. required_channels (int): The required number of channels of the backbone. 3 is the default for all current backbones. Returns: tensorflow.keras.Model: RetinaNet model with a backbone. """ if inputs is None: inputs = Input(shape=input_shape) channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 if location: location = Location2D(in_shape=input_shape)(inputs) concat = Concatenate(axis=channel_axis)([inputs, location]) else: concat = inputs # force the channel size for backbone input to be `required_channels` norm = ImageNormalization2D(norm_method=norm_method)(concat) fixed_inputs = TensorProduct(required_channels)(norm) # force the input shape axis = 0 if K.image_data_format() == 'channels_first' else -1 fixed_input_shape = list(input_shape) fixed_input_shape[axis] = required_channels fixed_input_shape = tuple(fixed_input_shape) model_kwargs = { 'include_top': False, 'weights': None, 'input_shape': fixed_input_shape, 'pooling': pooling } backbone_dict = get_backbone(backbone, fixed_inputs, use_imagenet=use_imagenet, **model_kwargs) # create the full model return retinanet(inputs=inputs, num_classes=num_classes, backbone_dict=backbone_dict, name='{}_retinanet'.format(backbone), **kwargs)
def RetinaMask(backbone, num_classes, input_shape, inputs=None, backbone_levels=['C3', 'C4', 'C5'], pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], norm_method='whole_image', location=False, use_imagenet=False, crop_size=(14, 14), pooling=None, mask_dtype=K.floatx(), required_channels=3, frames_per_batch=1, **kwargs): """Constructs a mrcnn model using a backbone from keras-applications. Args: backbone (str): Name of backbone to use. num_classes (int): Number of classes to classify. input_shape (tuple): The shape of the input data. inputs (tensor): Optional input tensor, overrides input_shape. backbone_levels (list): The backbone levels to be used. to create the feature pyramid. Defaults to ['C3', 'C4', 'C5']. pyramid_levels (list): The pyramid levels to attach regression and classification heads to. Defaults to ['P3', 'P4', 'P5', 'P6', 'P7']. norm_method (str): ImageNormalization mode to use. location (bool): Whether to include location data. use_imagenet (bool): Whether to load imagenet-based pretrained weights. crop_size (tuple): 2-length tuple for the x-y size of the crops. Used to create default roi_submodels. pooling (str): optional pooling mode for feature extraction when include_top is False. - None means that the output of the model will be the 4D tensor output of the last convolutional layer. - 'avg' means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - 'max' means that global max pooling will be applied. mask_dtype (str): Dtype to use for mask tensors. required_channels (int): The required number of channels of the backbone. 3 is the default for all current backbones. frames_per_batch (int): Size of z axis in generated batches. If equal to 1, assumes 2D data. kwargs (dict): Other standard inputs for retinanet_mask. Returns: tensorflow.keras.Model: RetinaNet model with a backbone. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 if inputs is None: if frames_per_batch > 1: if channel_axis == 1: input_shape_with_time = tuple( [input_shape[0], frames_per_batch] + list(input_shape)[1:]) else: input_shape_with_time = tuple([frames_per_batch] + list(input_shape)) inputs = Input(shape=input_shape_with_time) else: inputs = Input(shape=input_shape) if location: if frames_per_batch > 1: # TODO: TimeDistributed is incompatible with channels_first loc = TimeDistributed(Location2D(in_shape=input_shape))(inputs) else: loc = Location2D(in_shape=input_shape)(inputs) concat = Concatenate(axis=channel_axis)([inputs, loc]) else: concat = inputs # force the channel size for backbone input to be `required_channels` if frames_per_batch > 1: norm = TimeDistributed( ImageNormalization2D(norm_method=norm_method))(concat) fixed_inputs = TimeDistributed(TensorProduct(required_channels))(norm) else: norm = ImageNormalization2D(norm_method=norm_method)(concat) fixed_inputs = TensorProduct(required_channels)(norm) # force the input shape axis = 0 if K.image_data_format() == 'channels_first' else -1 fixed_input_shape = list(input_shape) fixed_input_shape[axis] = required_channels fixed_input_shape = tuple(fixed_input_shape) model_kwargs = { 'include_top': False, 'weights': None, 'input_shape': fixed_input_shape, 'pooling': pooling } _, backbone_dict = get_backbone(backbone, fixed_inputs, use_imagenet=use_imagenet, frames_per_batch=frames_per_batch, return_dict=True, **model_kwargs) # create the full model return retinanet_mask(inputs=inputs, num_classes=num_classes, backbone_dict=backbone_dict, crop_size=crop_size, backbone_levels=backbone_levels, pyramid_levels=pyramid_levels, name='{}_retinanet_mask'.format(backbone), mask_dtype=mask_dtype, frames_per_batch=frames_per_batch, **kwargs)
def __create_semantic_head(pyramid_dict, n_classes=3, n_filters=64, n_dense=128, semantic_id=0, ndim=2, include_top=False, target_level=2, **kwargs): """Creates a semantic head from a feature pyramid network. Args: pyramid_dict: dict of pyramid names and features n_classes (int): Defaults to 3. The number of classes to be predicted n_filters (int): Defaults to 64. The number of convolutional filters. n_dense (int): Defaults to 128. Number of dense filters. semantic_id (int): Defaults to 0. ndim (int): Defaults to 2, 3d supported. include_top (bool): Defaults to False. target_level (int, optional): Defaults to 2. The level we need to reach. Performs 2x upsampling until we're at the target level Returns: keras.layers.Layer: The semantic segmentation head """ if K.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = -1 if n_classes == 1: include_top = False # Get pyramid names and features into list form pyramid_names = get_sorted_keys(pyramid_dict) pyramid_features = [pyramid_dict[name] for name in pyramid_names] # Reverse pyramid names and features pyramid_names.reverse() pyramid_features.reverse() semantic_sum = pyramid_features[-1] semantic_names = pyramid_names[-1] # Final upsampling min_level = int(re.findall(r'\d+', semantic_names[-1])[0]) n_upsample = min_level x = semantic_upsample(semantic_sum, n_upsample, ndim=ndim) # First tensor product x = TensorProduct(n_dense)(x) x = BatchNormalization(axis=channel_axis)(x) x = Activation('relu')(x) # Apply tensor product and softmax layer x = TensorProduct(n_classes)(x) if include_top: x = Softmax(axis=channel_axis, name='semantic_{}'.format(semantic_id))(x) else: x = Activation('relu', name='semantic_{}'.format(semantic_id))(x) return x
def LabelDetectionModel(input_shape=(None, None, 1), inputs=None, backbone='mobilenetv2', use_pretrained_weights=True): """Classify a microscopy image as Nuclear, Cytoplasm, or Phase. This can be helpful in determining the type of data (nuclear, cytoplasm, etc.) so that this data can be forwared to the correct segmenation model. Based on a standard backbone with an intiial ImageNormalization2D and final AveragePooling2D, TensorProduct, and Softmax layers. Args: input_shape (tuple): a 3-length tuple of the input data shape. inputs (tensorflow.keras.Layer): Optional input layer of the model. If not provided, creates a Layer based on input_shape. backbone (str): name of the backbone to use for the model. use_pretrained_weights (bool): whether to load pre-trained weights. Only supports the MobileNetV2 backbone. """ required_channels = 3 # required for most backbones if inputs is None: inputs = keras.layers.Input(shape=input_shape) if keras.backend.image_data_format() == 'channels_first': channel_axis = 0 else: channel_axis = -1 norm = ImageNormalization2D(norm_method='whole_image')(inputs) fixed_inputs = TensorProduct(required_channels)(norm) # force the input shape fixed_input_shape = list(input_shape) fixed_input_shape[channel_axis] = required_channels fixed_input_shape = tuple(fixed_input_shape) backbone_model = get_backbone( backbone, fixed_inputs, use_imagenet=False, return_dict=False, include_top=False, weights=None, input_shape=fixed_input_shape, pooling=None) x = keras.layers.AveragePooling2D(4)(backbone_model.outputs[0]) x = TensorProduct(256)(x) x = TensorProduct(3)(x) x = keras.layers.Flatten()(x) outputs = keras.layers.Activation('softmax')(x) model = keras.Model(inputs=backbone_model.inputs, outputs=outputs) if use_pretrained_weights: local_name = 'LabelDetectionModel_{}.h5'.format(backbone) if backbone.lower() in {'mobilenetv2' or 'mobilenet_v2'}: weights_path = get_file( local_name, MOBILENETV2_WEIGHTS_PATH, cache_subdir='models', file_hash='14d4b2f7c77d334c958d2dde79972e6e') else: raise ValueError('Backbone %s does not have a weights file.' % backbone) model.load_weights(weights_path) return model
def bn_feature_net_2D(receptive_field=61, input_shape=(256, 256, 1), inputs=None, n_features=3, n_channels=1, reg=1e-5, n_conv_filters=64, n_dense_filters=200, VGG_mode=False, init='he_normal', norm_method='std', location=False, dilated=False, padding=False, padding_mode='reflect', multires=False, include_top=True): """Creates a 2D featurenet. Args: receptive_field (int): the receptive field of the neural network. input_shape (tuple): If no input tensor, create one with this shape. inputs (tensor): optional input tensor n_features (int): Number of output features n_channels (int): number of input channels reg (int): regularization value n_conv_filters (int): number of convolutional filters n_dense_filters (int): number of dense filters VGG_mode (bool): If multires, uses VGG_mode for multiresolution init (str): Method for initalizing weights. norm_method (str): ImageNormalization mode to use location (bool): Whether to include location data dilated (bool): Whether to use dilated pooling. padding (bool): Whether to use padding. padding_mode (str): Type of padding, one of 'reflect' or 'zero' multires (bool): Enables multi-resolution mode include_top (bool): Whether to include the final layer of the model Returns: tensorflow.keras.Model: 2D FeatureNet """ # Create layers list (x) to store all of the layers. # We need to use the functional API to enable the multiresolution mode x = [] win = (receptive_field - 1) // 2 if dilated: padding = True if K.image_data_format() == 'channels_first': channel_axis = 1 row_axis = 2 col_axis = 3 if not dilated: input_shape = (n_channels, receptive_field, receptive_field) else: row_axis = 1 col_axis = 2 channel_axis = -1 if not dilated: input_shape = (receptive_field, receptive_field, n_channels) if inputs is not None: if not K.is_keras_tensor(inputs): img_input = Input(tensor=inputs, shape=input_shape) else: img_input = inputs x.append(img_input) else: x.append(Input(shape=input_shape)) x.append( ImageNormalization2D(norm_method=norm_method, filter_size=receptive_field)(x[-1])) if padding: if padding_mode == 'reflect': x.append(ReflectionPadding2D(padding=(win, win))(x[-1])) elif padding_mode == 'zero': x.append(ZeroPadding2D(padding=(win, win))(x[-1])) if location: x.append(Location2D(in_shape=tuple(x[-1].shape.as_list()[1:]))(x[-1])) x.append(Concatenate(axis=channel_axis)([x[-2], x[-1]])) layers_to_concat = [] rf_counter = receptive_field block_counter = 0 d = 1 while rf_counter > 4: filter_size = 3 if rf_counter % 2 == 0 else 4 x.append( Conv2D(n_conv_filters, filter_size, dilation_rate=d, kernel_initializer=init, padding='valid', kernel_regularizer=l2(reg))(x[-1])) x.append(BatchNormalization(axis=channel_axis)(x[-1])) x.append(Activation('relu')(x[-1])) block_counter += 1 rf_counter -= filter_size - 1 if block_counter % 2 == 0: if dilated: x.append( DilatedMaxPool2D(dilation_rate=d, pool_size=(2, 2))(x[-1])) d *= 2 else: x.append(MaxPool2D(pool_size=(2, 2))(x[-1])) if VGG_mode: n_conv_filters *= 2 rf_counter = rf_counter // 2 if multires: layers_to_concat.append(len(x) - 1) if multires: c = [] for l in layers_to_concat: output_shape = x[l].get_shape().as_list() target_shape = x[-1].get_shape().as_list() row_crop = int(output_shape[row_axis] - target_shape[row_axis]) if row_crop % 2 == 0: row_crop = (row_crop // 2, row_crop // 2) else: row_crop = (row_crop // 2, row_crop // 2 + 1) col_crop = int(output_shape[col_axis] - target_shape[col_axis]) if col_crop % 2 == 0: col_crop = (col_crop // 2, col_crop // 2) else: col_crop = (col_crop // 2, col_crop // 2 + 1) cropping = (row_crop, col_crop) c.append(Cropping2D(cropping=cropping)(x[l])) if multires: x.append(Concatenate(axis=channel_axis)(c)) x.append( Conv2D(n_dense_filters, (rf_counter, rf_counter), dilation_rate=d, kernel_initializer=init, padding='valid', kernel_regularizer=l2(reg))(x[-1])) x.append(BatchNormalization(axis=channel_axis)(x[-1])) x.append(Activation('relu')(x[-1])) if include_top: x.append( TensorProduct(n_dense_filters, kernel_initializer=init, kernel_regularizer=l2(reg))(x[-1])) x.append(BatchNormalization(axis=channel_axis)(x[-1])) x.append(Activation('relu')(x[-1])) x.append( TensorProduct(n_features, kernel_initializer=init, kernel_regularizer=l2(reg))(x[-1])) if not dilated: x.append(Flatten()(x[-1])) x.append(Softmax(axis=channel_axis)(x[-1])) if inputs is not None: real_inputs = keras_utils.get_source_inputs(x[0]) else: real_inputs = x[0] model = Model(inputs=real_inputs, outputs=x[-1]) return model
def RetinaNet(backbone, num_classes, input_shape, inputs=None, norm_method='whole_image', location=False, use_imagenet=False, pooling=None, required_channels=3, frames_per_batch=1, **kwargs): """Constructs a RetinaNet model using a backbone from ``keras-applications``. Args: backbone (str): Name of backbone to use. num_classes (int): Number of classes to classify. input_shape (tuple): The shape of the input data. inputs (tensor): Optional input tensor, overrides ``input_shape``. norm_method (str): Normalization method to use with the :mod:`deepcell.layers.normalization.ImageNormalization2D` layer. location (bool): Whether to include a :mod:`deepcell.layers.location.Location2D` layer. use_imagenet (bool): Whether to load imagenet-based pretrained weights. pooling (str): Pooling mode for feature extraction when ``include_top`` is ``False``. - None means that the output of the model will be the 4D tensor output of the last convolutional layer. - 'avg' means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - 'max' means that global max pooling will be applied. required_channels (int): The required number of channels of the backbone. 3 is the default for all current backbones. frames_per_batch (int): Size of z axis in generated batches. If equal to 1, assumes 2D data. kwargs (dict): Other standard inputs for `~retinanet`. Returns: tensorflow.keras.Model: RetinaNet model with a backbone. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 if inputs is None: if frames_per_batch > 1: if channel_axis == 1: input_shape_with_time = tuple( [input_shape[0], frames_per_batch] + list(input_shape)[1:]) else: input_shape_with_time = tuple([frames_per_batch] + list(input_shape)) inputs = Input(shape=input_shape_with_time, name='input') else: inputs = Input(shape=input_shape, name='input') if location: if frames_per_batch > 1: # TODO: TimeDistributed is incompatible with channels_first loc = TimeDistributed(Location2D(in_shape=input_shape))(inputs) else: loc = Location2D(in_shape=input_shape)(inputs) concat = Concatenate(axis=channel_axis)([inputs, loc]) else: concat = inputs # force the channel size for backbone input to be `required_channels` if frames_per_batch > 1: norm = TimeDistributed( ImageNormalization2D(norm_method=norm_method))(concat) fixed_inputs = TimeDistributed(TensorProduct(required_channels))(norm) else: norm = ImageNormalization2D(norm_method=norm_method)(concat) fixed_inputs = TensorProduct(required_channels)(norm) # force the input shape axis = 0 if K.image_data_format() == 'channels_first' else -1 fixed_input_shape = list(input_shape) fixed_input_shape[axis] = required_channels fixed_input_shape = tuple(fixed_input_shape) model_kwargs = { 'include_top': False, 'weights': None, 'input_shape': fixed_input_shape, 'pooling': pooling } _, backbone_dict = get_backbone(backbone, fixed_inputs, use_imagenet=use_imagenet, frames_per_batch=frames_per_batch, return_dict=True, **model_kwargs) # create the full model return retinanet(inputs=inputs, num_classes=num_classes, backbone_dict=backbone_dict, frames_per_batch=frames_per_batch, name='{}_retinanet'.format(backbone), **kwargs)
def bn_feature_net_3D(receptive_field=61, n_frames=5, input_shape=(5, 256, 256, 1), n_features=3, n_channels=1, reg=1e-5, n_conv_filters=64, n_dense_filters=200, VGG_mode=False, init='he_normal', norm_method='std', location=False, dilated=False, padding=False, padding_mode='reflect', multires=False, include_top=True, temporal=None, residual=False, temporal_kernel_size=3): """Creates a 3D featurenet. Args: receptive_field (int): the receptive field of the neural network. n_frames (int): Number of frames. input_shape (tuple): If no input tensor, create one with this shape. n_features (int): Number of output features n_channels (int): number of input channels reg (int): regularization value n_conv_filters (int): number of convolutional filters n_dense_filters (int): number of dense filters VGG_mode (bool): If ``multires``, uses ``VGG_mode`` for multiresolution init (str): Method for initalizing weights. norm_method (str): Normalization method to use with the :mod:`deepcell.layers.normalization.ImageNormalization3D` layer. location (bool): Whether to include a :mod:`deepcell.layers.location.Location3D` layer. dilated (bool): Whether to use dilated pooling. padding (bool): Whether to use padding. padding_mode (str): Type of padding, one of 'reflect' or 'zero' multires (bool): Enables multi-resolution mode include_top (bool): Whether to include the final layer of the model temporal (str): Type of temporal operation residual (bool): Whether to use temporal information as a residual temporal_kernel_size (int): size of 2D kernel used in temporal convolutions Returns: tensorflow.keras.Model: 3D FeatureNet """ # Create layers list (x) to store all of the layers. # We need to use the functional API to enable the multiresolution mode x = [] win = (receptive_field - 1) // 2 win_z = (n_frames - 1) // 2 if dilated: padding = True if K.image_data_format() == 'channels_first': channel_axis = 1 time_axis = 2 row_axis = 3 col_axis = 4 if not dilated: input_shape = (n_channels, n_frames, receptive_field, receptive_field) else: channel_axis = -1 time_axis = 1 row_axis = 2 col_axis = 3 if not dilated: input_shape = (n_frames, receptive_field, receptive_field, n_channels) x.append(Input(shape=input_shape)) x.append( ImageNormalization3D(norm_method=norm_method, filter_size=receptive_field)(x[-1])) if padding: if padding_mode == 'reflect': x.append(ReflectionPadding3D(padding=(win_z, win, win))(x[-1])) elif padding_mode == 'zero': x.append(ZeroPadding3D(padding=(win_z, win, win))(x[-1])) if location: x.append(Location3D()(x[-1])) x.append(Concatenate(axis=channel_axis)([x[-2], x[-1]])) layers_to_concat = [] rf_counter = receptive_field block_counter = 0 d = 1 while rf_counter > 4: filter_size = 3 if rf_counter % 2 == 0 else 4 x.append( Conv3D(n_conv_filters, (1, filter_size, filter_size), dilation_rate=(1, d, d), kernel_initializer=init, padding='valid', kernel_regularizer=l2(reg))(x[-1])) x.append(BatchNormalization(axis=channel_axis)(x[-1])) x.append(Activation('relu')(x[-1])) block_counter += 1 rf_counter -= filter_size - 1 if block_counter % 2 == 0: if dilated: x.append( DilatedMaxPool3D(dilation_rate=(1, d, d), pool_size=(1, 2, 2))(x[-1])) d *= 2 else: x.append(MaxPool3D(pool_size=(1, 2, 2))(x[-1])) if VGG_mode: n_conv_filters *= 2 rf_counter = rf_counter // 2 if multires: layers_to_concat.append(len(x) - 1) if multires: c = [] for l in layers_to_concat: output_shape = x[l].get_shape().as_list() target_shape = x[-1].get_shape().as_list() time_crop = (0, 0) row_crop = int(output_shape[row_axis] - target_shape[row_axis]) if row_crop % 2 == 0: row_crop = (row_crop // 2, row_crop // 2) else: row_crop = (row_crop // 2, row_crop // 2 + 1) col_crop = int(output_shape[col_axis] - target_shape[col_axis]) if col_crop % 2 == 0: col_crop = (col_crop // 2, col_crop // 2) else: col_crop = (col_crop // 2, col_crop // 2 + 1) cropping = (time_crop, row_crop, col_crop) c.append(Cropping3D(cropping=cropping)(x[l])) x.append(Concatenate(axis=channel_axis)(c)) x.append( Conv3D(n_dense_filters, (1, rf_counter, rf_counter), dilation_rate=(1, d, d), kernel_initializer=init, padding='valid', kernel_regularizer=l2(reg))(x[-1])) x.append(BatchNormalization(axis=channel_axis)(x[-1])) x.append(Activation('relu')(x[-1])) x.append( Conv3D(n_dense_filters, (n_frames, 1, 1), dilation_rate=(1, d, d), kernel_initializer=init, padding='valid', kernel_regularizer=l2(reg))(x[-1])) x.append(BatchNormalization(axis=channel_axis)(x[-1])) feature = Activation('relu')(x[-1]) def __merge_temporal_features(feature, mode='conv', residual=False, n_filters=256, n_frames=3, padding=True, temporal_kernel_size=3): if mode is None: return feature mode = str(mode).lower() if mode == 'conv': x = Conv3D(n_filters, (n_frames, temporal_kernel_size, temporal_kernel_size), kernel_initializer=init, padding='same', activation='relu', kernel_regularizer=l2(reg))(feature) elif mode == 'lstm': x = ConvLSTM2D(filters=n_filters, kernel_size=temporal_kernel_size, padding='same', kernel_initializer=init, activation='relu', kernel_regularizer=l2(reg), return_sequences=True)(feature) elif mode == 'gru': x = ConvGRU2D(filters=n_filters, kernel_size=temporal_kernel_size, padding='same', kernel_initializer=init, activation='relu', kernel_regularizer=l2(reg), return_sequences=True)(feature) else: raise ValueError( '`temporal` must be one of "conv", "lstm", "gru" or None') if residual is True: temporal_feature = Add()([feature, x]) else: temporal_feature = x temporal_feature_normed = BatchNormalization( axis=channel_axis)(temporal_feature) return temporal_feature_normed temporal_feature = __merge_temporal_features( feature, mode=temporal, residual=residual, n_filters=n_dense_filters, n_frames=n_frames, padding=padding, temporal_kernel_size=temporal_kernel_size) x.append(temporal_feature) x.append( TensorProduct(n_dense_filters, kernel_initializer=init, kernel_regularizer=l2(reg))(x[-1])) x.append(BatchNormalization(axis=channel_axis)(x[-1])) x.append(Activation('relu')(x[-1])) x.append( TensorProduct(n_features, kernel_initializer=init, kernel_regularizer=l2(reg))(x[-1])) if not dilated: x.append(Flatten()(x[-1])) if include_top: x.append(Softmax(axis=channel_axis, dtype=K.floatx())(x[-1])) model = Model(inputs=x[0], outputs=x[-1]) return model
def FPNet(backbone, input_shape, inputs=None, norm_method='whole_image', use_imagenet=False, pooling=None, required_channels=3, n_classes=3, name='fpnet', frames_per_batch=1, **kwargs): """Creates a Feature Pyramid Network with a semantic segmentation head Args: backbone (str): A name of a supported backbone from [deepcell, resnet50] input_shape (tuple): Shape of the input image. inputs (keras.Layer): Optional preexisting layers. norm_method (str): Normalization method, defaults to 'whole_image' use_imagenet (bool): Whether to load imagenet-based pretrained weights. pooling (str): Optional pooling mode for feature extraction when include_top is False. - None means that the output of the model will be the 4D tensor output of the last convolutional layer. - 'avg' means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - 'max' means that global max pooling will be applied. required_channels (int): The required number of channels of the backbone. 3 is the default for all current backbones. n_classes (int): The number of classes to be predicted name (str): Name to use for the model. frames_per_batch (int): Size of z axis in generated batches. If equal to 1, assumes 2D data. Returns: tensorflow.keras.models.Model: Feature pyramid network with a semantic segmentation head as the output """ if inputs is None: inputs = Input(shape=input_shape) # force the channel size for backbone input to be required_channels norm = ImageNormalization2D(norm_method=norm_method)(inputs) fixed_inputs = TensorProduct(required_channels)(norm) # force the input shape fixed_input_shape = list(input_shape) fixed_input_shape[-1] = required_channels fixed_input_shape = tuple(fixed_input_shape) model_kwargs = { 'include_top': False, 'weights': None, 'input_shape': fixed_input_shape, 'pooling': pooling } # Get backbone outputs _, backbone_dict = get_backbone(backbone, fixed_inputs, use_imagenet=use_imagenet, frames_per_batch=frames_per_batch, return_dict=True, **model_kwargs) # Construct feature pyramid network pyramid_dict = __create_pyramid_features(backbone_dict) levels = [int(re.findall(r'\d+', k)[0]) for k in pyramid_dict] target_level = min(levels) x = __create_semantic_head(pyramid_dict, n_classes=n_classes, input_target=inputs, target_level=target_level, ndim=len(input_shape) - 1) return Model(inputs=inputs, outputs=x, name=name)
def PanopticNet(backbone, input_shape, backbone_levels=['C3', 'C4', 'C5'], create_pyramid_features=__create_pyramid_features, create_semantic_head=__create_semantic_head, num_semantic_heads=1, num_semantic_classes=[3], required_channels=3, norm_method='whole_image', pooling=None, location=True, use_imagenet=True, name='panopticnet', **kwargs): """Constructs a mrcnn model using a backbone from keras-applications. Args: backbone (str): Name of backbone to use. input_shape (tuple): The shape of the input data. backbone_levels (list): The backbone levels to be used. to create the feature pyramid. Defaults to ['C3', 'C4', 'C5']. create_pyramid_features (function): Function to get the pyramid features from the backbone. create_semantic_head (function): Function to get to build a semantic head submodel. norm_method (str): ImageNormalization mode to use. location (bool): Whether to include location data. use_imagenet (bool): Whether to load imagenet-based pretrained weights. pooling (str): optional pooling mode for feature extraction when include_top is False. - None means that the output of the model will be the 4D tensor output of the last convolutional layer. - 'avg' means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - 'max' means that global max pooling will be applied. required_channels (int): The required number of channels of the backbone. 3 is the default for all current backbones. kwargs (dict): Other standard inputs for retinanet_mask. Returns: tensorflow.keras.Model: Panoptic model with a backbone. """ inputs = Input(shape=input_shape) norm = ImageNormalization2D(norm_method=norm_method)(inputs) if location: loc = Location2D(in_shape=input_shape)(norm) concat = Concatenate(axis=-1)([norm, loc]) else: concat = norm fixed_inputs = TensorProduct(required_channels)(concat) # force the input shape fixed_input_shape = list(input_shape) fixed_input_shape[-1] = required_channels fixed_input_shape = tuple(fixed_input_shape) model_kwargs = { 'include_top': False, 'weights': None, 'input_shape': fixed_input_shape, 'pooling': pooling } _, backbone_dict = get_backbone(backbone, fixed_inputs, use_imagenet=use_imagenet, frames_per_batch=1, return_dict=True, **model_kwargs) backbone_dict_reduced = { k: backbone_dict[k] for k in backbone_dict if k in backbone_levels } pyramid_dict = create_pyramid_features(backbone_dict_reduced, ndim=2) semantic_levels = [int(re.findall(r'\d+', k)[0]) for k in pyramid_dict] target_level = min(semantic_levels) semantic_head_list = [] for i in range(num_semantic_heads): semantic_head_list.append( create_semantic_head(pyramid_dict, n_classes=num_semantic_classes[i], input_target=inputs, target_level=target_level, semantic_id=i, ndim=2, **kwargs)) model = Model(inputs=inputs, outputs=semantic_head_list, name=name) return model
def RetinaMask(backbone, num_classes, input_shape, inputs=None, backbone_levels=['C3', 'C4', 'C5'], pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], norm_method='whole_image', location=False, use_imagenet=False, crop_size=(14, 14), pooling=None, mask_dtype=K.floatx(), required_channels=3, frames_per_batch=1, **kwargs): """Constructs a mrcnn model using a backbone from keras-applications. Args: backbone (str): Name of backbone to use. num_classes (int): Number of classes to classify. input_shape (tuple): The shape of the input data. weights (str): one of None (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. pooling (str): optional pooling mode for feature extraction when include_top is False. - None means that the output of the model will be the 4D tensor output of the last convolutional layer. - 'avg' means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - 'max' means that global max pooling will be applied. required_channels (int): The required number of channels of the backbone. 3 is the default for all current backbones. Returns: tensorflow.keras.Model: RetinaNet model with a backbone. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 if inputs is None: if frames_per_batch > 1: if channel_axis == 1: input_shape_with_time = tuple( [input_shape[0], frames_per_batch] + list(input_shape)[1:]) else: input_shape_with_time = tuple([frames_per_batch] + list(input_shape)) inputs = Input(shape=input_shape_with_time) else: inputs = Input(shape=input_shape) if location: if frames_per_batch > 1: # TODO: TimeDistributed is incompatible with channels_first loc = TimeDistributed(Location2D(in_shape=input_shape))(inputs) else: loc = Location2D(in_shape=input_shape)(inputs) concat = Concatenate(axis=channel_axis)([inputs, loc]) else: concat = inputs # force the channel size for backbone input to be `required_channels` if frames_per_batch > 1: norm = TimeDistributed( ImageNormalization2D(norm_method=norm_method))(concat) fixed_inputs = TimeDistributed(TensorProduct(required_channels))(norm) else: norm = ImageNormalization2D(norm_method=norm_method)(concat) fixed_inputs = TensorProduct(required_channels)(norm) # force the input shape axis = 0 if K.image_data_format() == 'channels_first' else -1 fixed_input_shape = list(input_shape) fixed_input_shape[axis] = required_channels fixed_input_shape = tuple(fixed_input_shape) model_kwargs = { 'include_top': False, 'weights': None, 'input_shape': fixed_input_shape, 'pooling': pooling } _, backbone_dict = get_backbone(backbone, fixed_inputs, use_imagenet=use_imagenet, frames_per_batch=frames_per_batch, return_dict=True, **model_kwargs) # create the full model return retinanet_mask(inputs=inputs, num_classes=num_classes, backbone_dict=backbone_dict, crop_size=crop_size, backbone_levels=backbone_levels, pyramid_levels=pyramid_levels, name='{}_retinanet_mask'.format(backbone), mask_dtype=mask_dtype, frames_per_batch=frames_per_batch, **kwargs)
def ScaleDetectionModel(input_shape=(None, None, 1), inputs=None, backbone='mobilenetv2', use_pretrained_weights=True): """Create a ScaleDetectionModel for detecting scales of input data. This enables data to be scaled appropriately for other segmentation models which may not be resolution tolerant. Based on a standard backbone with an intiial ImageNormalization2D and final AveragePooling2D and TensorProduct layers. Args: input_shape (tuple): a 3-length tuple of the input data shape. inputs (tensorflow.keras.Layer): Optional input layer of the model. If not provided, creates a Layer based on input_shape. backbone (str): name of the backbone to use for the model. use_pretrained_weights (bool): whether to load pre-trained weights. Only supports the MobileNetV2 backbone. """ required_channels = 3 # required for most backbones if inputs is None: inputs = keras.layers.Input(shape=input_shape) if keras.backend.image_data_format() == 'channels_first': channel_axis = 0 else: channel_axis = -1 norm = ImageNormalization2D(norm_method='whole_image')(inputs) fixed_inputs = TensorProduct(required_channels)(norm) # force the input shape fixed_input_shape = list(input_shape) fixed_input_shape[channel_axis] = required_channels fixed_input_shape = tuple(fixed_input_shape) backbone_model = get_backbone( backbone, fixed_inputs, use_imagenet=False, return_dict=False, include_top=False, weights=None, input_shape=fixed_input_shape, pooling=None) x = keras.layers.AveragePooling2D(4)(backbone_model.outputs[0]) x = TensorProduct(256, activation='relu')(x) x = TensorProduct(1)(x) outputs = keras.layers.Flatten()(x) model = keras.Model(inputs=backbone_model.inputs, outputs=outputs) if use_pretrained_weights: local_name = 'ScaleDetectionModel_{}.h5'.format(backbone) if backbone.lower() in {'mobilenetv2' or 'mobilenet_v2'}: weights_path = get_file( local_name, MOBILENETV2_WEIGHTS_PATH, cache_subdir='models', file_hash='aa78e6b9a4551289dd967f1f5ca83fed') else: raise ValueError('Backbone %s does not have a weights file.' % backbone) model.load_weights(weights_path) return model
def FPNet(backbone, input_shape, inputs=None, norm_method='whole_image', use_imagenet=False, pooling=None, required_channels=3, n_classes=3, name='fpnet', **kwargs): """ Creates a Feature Pyramid Network with a semantic segmentation head Args: backbone (str): A name of a supported backbone from [deepcell, resnet50] input_shape (tuple): Shape of the input image input (keras layer, optional): Defaults to None. Method to pass in preexisting layers norm_method (str, optional): Defaults to 'whole_image'. Normalization method weights (str, optional): Defaults to None. one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. pooling (str, optional): Defaults to None. optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. required_channels (int, optional): Defaults to 3. The required number of channels of the backbone. 3 is the default for all current backbones. n_classes (int, optional): Defaults to 3. The number of classes to be predicted name (str, optional): Defaults to 'fpnet'. Name to use for the model. Returns: Model with a feature pyramid network with a semantic segmentation head as the output """ if inputs is None: inputs = Input(shape=input_shape) # force the channel size for backbone input to be `required_channels` norm = ImageNormalization2D(norm_method=norm_method)(inputs) fixed_inputs = TensorProduct(required_channels)(norm) # force the input shape fixed_input_shape = list(input_shape) fixed_input_shape[-1] = required_channels fixed_input_shape = tuple(fixed_input_shape) model_kwargs = { 'include_top': False, 'weights': None, 'input_shape': fixed_input_shape, 'pooling': pooling } # Get backbone outputs backbone_dict = get_backbone( backbone, fixed_inputs, use_imagenet=use_imagenet, **model_kwargs) # Construct feature pyramid network pyramid_dict = __create_pyramid_features(backbone_dict) levels = [int(re.findall(r'\d+', k)[0]) for k in pyramid_dict] target_level = min(levels) x = __create_semantic_head(pyramid_dict, n_classes=n_classes, input_target=inputs, target_level=target_level) return Model(inputs=inputs, outputs=x, name=name)
def bn_feature_net_2D(receptive_field=61, input_shape=(256, 256, 1), n_features=3, n_channels=1, reg=1e-5, n_conv_filters=64, n_dense_filters=200, VGG_mode=False, init='he_normal', norm_method='std', location=False, dilated=False, padding=False, padding_mode='reflect', multires=False, include_top=True): # Create layers list (x) to store all of the layers. # We need to use the functional API to enable the multiresolution mode x = [] win = (receptive_field - 1) // 2 if dilated: padding = True if K.image_data_format() == 'channels_first': channel_axis = 1 row_axis = 2 col_axis = 3 if not dilated: input_shape = (n_channels, receptive_field, receptive_field) else: row_axis = 1 col_axis = 2 channel_axis = -1 if not dilated: input_shape = (receptive_field, receptive_field, n_channels) x.append(Input(shape=input_shape)) x.append(ImageNormalization2D(norm_method=norm_method, filter_size=receptive_field)(x[-1])) if padding: if padding_mode == 'reflect': x.append(ReflectionPadding2D(padding=(win, win))(x[-1])) elif padding_mode == 'zero': x.append(ZeroPadding2D(padding=(win, win))(x[-1])) if location: x.append(Location2D(in_shape=tuple(x[-1].shape.as_list()[1:]))(x[-1])) x.append(Concatenate(axis=channel_axis)([x[-2], x[-1]])) if multires: layers_to_concat = [] rf_counter = receptive_field block_counter = 0 d = 1 while rf_counter > 4: filter_size = 3 if rf_counter % 2 == 0 else 4 x.append(Conv2D(n_conv_filters, (filter_size, filter_size), dilation_rate=d, kernel_initializer=init, padding='valid', kernel_regularizer=l2(reg))(x[-1])) x.append(BatchNormalization(axis=channel_axis)(x[-1])) x.append(Activation('relu')(x[-1])) block_counter += 1 rf_counter -= filter_size - 1 if block_counter % 2 == 0: if dilated: x.append(DilatedMaxPool2D(dilation_rate=d, pool_size=(2, 2))(x[-1])) d *= 2 else: x.append(MaxPool2D(pool_size=(2, 2))(x[-1])) if VGG_mode: n_conv_filters *= 2 rf_counter = rf_counter // 2 if multires: layers_to_concat.append(len(x) - 1) if multires: c = [] for l in layers_to_concat: output_shape = x[l].get_shape().as_list() target_shape = x[-1].get_shape().as_list() row_crop = int(output_shape[row_axis] - target_shape[row_axis]) if row_crop % 2 == 0: row_crop = (row_crop // 2, row_crop // 2) else: row_crop = (row_crop // 2, row_crop // 2 + 1) col_crop = int(output_shape[col_axis] - target_shape[col_axis]) if col_crop % 2 == 0: col_crop = (col_crop // 2, col_crop // 2) else: col_crop = (col_crop // 2, col_crop // 2 + 1) cropping = (row_crop, col_crop) c.append(Cropping2D(cropping=cropping)(x[l])) x.append(Concatenate(axis=channel_axis)(c)) x.append(Conv2D(n_dense_filters, (rf_counter, rf_counter), dilation_rate=d, kernel_initializer=init, padding='valid', kernel_regularizer=l2(reg))(x[-1])) x.append(BatchNormalization(axis=channel_axis)(x[-1])) x.append(Activation('relu')(x[-1])) x.append(TensorProduct(n_dense_filters, kernel_initializer=init, kernel_regularizer=l2(reg))(x[-1])) x.append(BatchNormalization(axis=channel_axis)(x[-1])) x.append(Activation('relu')(x[-1])) x.append(TensorProduct(n_features, kernel_initializer=init, kernel_regularizer=l2(reg))(x[-1])) if not dilated: x.append(Flatten()(x[-1])) if include_top: x.append(Softmax(axis=channel_axis)(x[-1])) model = Model(inputs=x[0], outputs=x[-1]) return model
def MaskRCNN(backbone, num_classes, input_shape, backbone_levels=['C3', 'C4', 'C5'], pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], norm_method='whole_image', location=False, use_imagenet=False, crop_size=(14, 14), pooling=None, mask_dtype=K.floatx(), required_channels=3, **kwargs): """Constructs a mrcnn model using a backbone from keras-applications. Args: backbone: string, name of backbone to use. num_classes: Number of classes to classify. input_shape: The shape of the input data. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. pooling: optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. required_channels: integer, the required number of channels of the backbone. 3 is the default for all current backbones. Returns: RetinaNet model with a backbone. """ inputs = Input(shape=input_shape) channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 if location: location = Location2D(in_shape=input_shape)(inputs) inputs = Concatenate(axis=channel_axis)([inputs, location]) # force the channel size for backbone input to be `required_channels` norm = ImageNormalization2D(norm_method=norm_method)(inputs) fixed_inputs = TensorProduct(required_channels)(norm) # force the input shape fixed_input_shape = list(input_shape) fixed_input_shape[-1] = required_channels fixed_input_shape = tuple(fixed_input_shape) model_kwargs = { 'include_top': False, 'weights': None, 'input_shape': fixed_input_shape, 'pooling': pooling } backbone_dict = get_backbone(backbone, fixed_inputs, use_imagenet=use_imagenet, **model_kwargs) # create the full model return retinanet_mask(inputs=inputs, num_classes=num_classes, backbone_dict=backbone_dict, crop_size=crop_size, backbone_levels=backbone_levels, pyramid_levels=pyramid_levels, name='{}_retinanet_mask'.format(backbone), mask_dtype=mask_dtype, **kwargs)