def LabelDetectionModel(input_shape=(None, None, 1),
                        inputs=None,
                        backbone='mobilenetv2',
                        use_pretrained_weights=True):
    """Classify a microscopy image as Nuclear, Cytoplasm, or Phase.

    This can be helpful in determining the type of data (nuclear, cytoplasm,
    etc.) so that this data can be forwared to the correct segmenation model.
    """
    required_channels = 3  # required for most backbones

    if inputs is None:
        inputs = keras.layers.Input(shape=input_shape)

    if keras.backend.image_data_format() == 'channels_first':
        channel_axis = 0
    else:
        channel_axis = -1

    norm = ImageNormalization2D(norm_method='whole_image')(inputs)
    fixed_inputs = TensorProduct(required_channels)(norm)

    # force the input shape
    fixed_input_shape = list(input_shape)
    fixed_input_shape[channel_axis] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    backbone_model = get_backbone(backbone,
                                  fixed_inputs,
                                  use_imagenet=False,
                                  return_dict=False,
                                  include_top=False,
                                  weights=None,
                                  input_shape=fixed_input_shape,
                                  pooling=None)

    x = keras.layers.AveragePooling2D(4)(backbone_model.outputs[0])
    x = TensorProduct(256)(x)
    x = TensorProduct(3)(x)
    x = keras.layers.Flatten()(x)
    outputs = keras.layers.Activation('softmax')(x)

    model = keras.Model(inputs=backbone_model.inputs, outputs=outputs)

    if use_pretrained_weights:
        local_name = 'LabelDetectionModel_{}.h5'.format(backbone)
        if backbone.lower() in {'mobilenetv2' or 'mobilenet_v2'}:
            weights_path = get_file(
                local_name,
                MOBILENETV2_WEIGHTS_PATH,
                cache_subdir='models',
                md5_hash='b8231f32f01c1cd6448d06e276dd5949')
        else:
            raise ValueError('Backbone %s does not have a weights file.' %
                             backbone)

        model.load_weights(weights_path)

    return model
Beispiel #2
0
def bn_feature_net_skip_2D(receptive_field=61,
                           input_shape=(256, 256, 1),
                           inputs=None,
                           fgbg_model=None,
                           n_skips=2,
                           last_only=True,
                           norm_method='std',
                           padding_mode='reflect',
                           **kwargs):
    if K.image_data_format() == 'channels_first':
        channel_axis = 1
    else:
        channel_axis = -1

    inputs = Input(shape=input_shape)
    img = ImageNormalization2D(norm_method=norm_method,
                               filter_size=receptive_field)(inputs)

    models = []
    model_outputs = []

    if fgbg_model is not None:
        for layer in fgbg_model.layers:
            layer.trainable = False

        models.append(fgbg_model)
        fgbg_output = fgbg_model(inputs)
        if isinstance(fgbg_output, list):
            fgbg_output = fgbg_output[-1]
        model_outputs.append(fgbg_output)

    for _ in range(n_skips + 1):
        if model_outputs:
            model_input = Concatenate(axis=channel_axis)(
                [img, model_outputs[-1]])
        else:
            model_input = img

        new_input_shape = model_input.get_shape().as_list()[1:]
        models.append(
            bn_feature_net_2D(receptive_field=receptive_field,
                              input_shape=new_input_shape,
                              norm_method=None,
                              dilated=True,
                              padding=True,
                              padding_mode=padding_mode,
                              **kwargs))
        model_outputs.append(models[-1](model_input))

    if last_only:
        model = Model(inputs=inputs, outputs=model_outputs[-1])
    else:
        if fgbg_model is None:
            model = Model(inputs=inputs, outputs=model_outputs)
        else:
            model = Model(inputs=inputs, outputs=model_outputs[1:])

    return model
Beispiel #3
0
def ScaleDetectionModel(input_shape=(None, None, 1),
                        inputs=None,
                        backbone='mobilenetv2',
                        use_pretrained_weights=True):
    """Create a ScaleDetectionModel for detecting scales of input data.

    This enables data to be scaled appropriately for other segmentation models
    which may not be resolution tolerant.
    """
    required_channels = 3  # required for most backbones

    if inputs is None:
        inputs = keras.layers.Input(shape=input_shape)

    if keras.backend.image_data_format() == 'channels_first':
        channel_axis = 0
    else:
        channel_axis = -1

    norm = ImageNormalization2D(norm_method='whole_image')(inputs)
    fixed_inputs = TensorProduct(required_channels)(norm)

    # force the input shape
    fixed_input_shape = list(input_shape)
    fixed_input_shape[channel_axis] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    backbone_model = get_backbone(backbone,
                                  fixed_inputs,
                                  use_imagenet=False,
                                  return_dict=False,
                                  include_top=False,
                                  weights=None,
                                  input_shape=fixed_input_shape,
                                  pooling=None)

    x = keras.layers.AveragePooling2D(4)(backbone_model.outputs[0])
    x = TensorProduct(256, activation='relu')(x)
    x = TensorProduct(1)(x)
    outputs = keras.layers.Flatten()(x)

    model = keras.Model(inputs=backbone_model.inputs, outputs=outputs)

    if use_pretrained_weights:
        local_name = 'ScaleDetectionModel_{}.h5'.format(backbone)
        if backbone.lower() in {'mobilenetv2' or 'mobilenet_v2'}:
            weights_path = get_file(
                local_name,
                MOBILENETV2_WEIGHTS_PATH,
                cache_subdir='models',
                md5_hash='b9943554a86096fb66608ec66078aa46')
        else:
            raise ValueError('Backbone %s does not have a weights file.' %
                             backbone)

        model.load_weights(weights_path)

    return model
Beispiel #4
0
def MaskRCNN(backbone,
             num_classes,
             input_shape,
             norm_method='whole_image',
             crop_size=(14, 14),
             weights=None,
             pooling=None,
             mask_dtype=K.floatx(),
             required_channels=3,
             **kwargs):
    """Constructs a mrcnn model using a backbone from keras-applications.

    Args:
        backbone: string, name of backbone to use.
        num_classes: Number of classes to classify.
        input_shape: The shape of the input data.
        weights: one of `None` (random initialization),
            'imagenet' (pre-training on ImageNet),
            or the path to the weights file to be loaded.
        pooling: optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        required_channels: integer, the required number of channels of the
            backbone.  3 is the default for all current backbones.

    Returns:
        RetinaNet model with a backbone.
    """
    inputs = Input(shape=input_shape)
    # force the channel size for backbone input to be `required_channels`
    norm = ImageNormalization2D(norm_method=norm_method)(inputs)
    fixed_inputs = TensorProduct(required_channels)(norm)
    model_kwargs = {
        'include_top': False,
        'input_tensor': fixed_inputs,
        'weights': weights,
        'pooling': pooling
    }
    layer_outputs = get_backbone(backbone, inputs, **model_kwargs)

    kwargs['backbone_layers'] = layer_outputs

    # create the full model
    return retinanet_mask(inputs=inputs,
                          num_classes=num_classes,
                          crop_size=crop_size,
                          name='{}_retinanet_mask'.format(backbone),
                          mask_dtype=mask_dtype,
                          **kwargs)
Beispiel #5
0
def LabelDetectionModel(input_shape=(None, None, 1),
                        inputs=None,
                        backbone='mobilenetv2',
                        num_classes=3):
    """Classify a microscopy image as Nuclear, Cytoplasm, or Phase.

    This can be helpful in determining the type of data (nuclear, cytoplasm,
    etc.) so that this data can be forwared to the correct segmenation model.

    Based on a standard backbone with an intiial ``ImageNormalization2D`` and
    final ``AveragePooling2D``, ``TensorProduct``, and ``Softmax`` layers.

    Args:
        input_shape (tuple): a 3-length tuple of the input data shape.
        inputs (tensorflow.keras.Layer): Optional input layer of the model.
            If not provided, creates a ``Layer`` based on ``input_shape``.
        backbone (str): name of the backbone to use for the model.
        num_classes (int): The number of labels to detect.
    """
    required_channels = 3  # required for most backbones

    if inputs is None:
        inputs = tf.keras.layers.Input(shape=input_shape)

    if tf.keras.backend.image_data_format() == 'channels_first':
        channel_axis = 0
    else:
        channel_axis = -1

    norm = ImageNormalization2D(norm_method='whole_image')(inputs)
    fixed_inputs = TensorProduct(required_channels)(norm)

    # force the input shape
    fixed_input_shape = list(input_shape)
    fixed_input_shape[channel_axis] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    backbone_model = get_backbone(
        backbone,
        fixed_inputs,
        use_imagenet=False,
        return_dict=False,
        include_top=False,
        weights=None,
        input_shape=fixed_input_shape,
        pooling=None)

    x = tf.keras.layers.AveragePooling2D(4)(backbone_model.outputs[0])
    x = tf.keras.layers.Flatten()(x)
    x = TensorProduct(256)(x)
    x = TensorProduct(num_classes)(x)
    outputs = tf.keras.layers.Softmax(dtype=tf.keras.backend.floatx())(x)

    model = tf.keras.Model(inputs=backbone_model.inputs, outputs=outputs)

    return model
Beispiel #6
0
def ScaleDetectionModel(input_shape=(None, None, 1),
                        inputs=None,
                        backbone='mobilenetv2'):
    """Create a ``ScaleDetectionModel`` for detecting scales of input data.

    This enables data to be scaled appropriately for other segmentation models
    which may not be resolution tolerant.

    Based on a standard backbone with an intiial ``ImageNormalization2D`` and
    final ``AveragePooling2D`` and ``TensorProduct`` layers.

    Args:
        input_shape (tuple): a 3-length tuple of the input data shape.
        inputs (tensorflow.keras.Layer): Optional input layer of the model.
            If not provided, creates a ``Layer`` based on ``input_shape``.
        backbone (str): name of the backbone to use for the model.
    """
    required_channels = 3  # required for most backbones

    if inputs is None:
        inputs = tf.keras.layers.Input(shape=input_shape)

    if tf.keras.backend.image_data_format() == 'channels_first':
        channel_axis = 0
    else:
        channel_axis = -1

    norm = ImageNormalization2D(norm_method='whole_image')(inputs)
    fixed_inputs = TensorProduct(required_channels)(norm)

    # force the input shape
    fixed_input_shape = list(input_shape)
    fixed_input_shape[channel_axis] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    backbone_model = get_backbone(
        backbone,
        fixed_inputs,
        use_imagenet=False,
        return_dict=False,
        include_top=False,
        weights=None,
        input_shape=fixed_input_shape,
        pooling=None)

    x = tf.keras.layers.AveragePooling2D(4)(backbone_model.outputs[0])
    x = TensorProduct(256, activation='relu')(x)
    x = TensorProduct(1)(x)
    outputs = tf.keras.layers.Flatten()(x)

    model = tf.keras.Model(inputs=backbone_model.inputs, outputs=outputs)

    return model
Beispiel #7
0
    def get_appearance_encoder(self):
        app_shape = tuple([None] + list(self.appearance_shape)[2:])
        inputs = Input(shape=app_shape, name='encoder_app_input')

        x = inputs
        x = TimeDistributed(
            ImageNormalization2D(norm_method='whole_image',
                                 name='imgnrm_ae'))(x)

        for i in range(int(math.log(app_shape[1], 2))):
            x = Conv3D(self.n_filters, (1, 3, 3),
                       strides=1,
                       padding='same',
                       use_bias=False,
                       name='conv3d_ae{}'.format(i))(x)
            x = BatchNormalization(axis=-1, name='bn_ae{}'.format(i))(x)
            x = Activation('relu', name='relu_ae{}'.format(i))(x)
            x = MaxPool3D(pool_size=(1, 2, 2))(x)
        x = Lambda(lambda t: tf.squeeze(t, axis=(2, 3)))(x)
        x = Dense(self.encoder_dim, name='dense_aeout')(x)
        x = BatchNormalization(axis=-1, name='bn_aeout')(x)
        x = Activation('relu', name='appearance_embedding')(x)
        return Model(inputs=inputs, outputs=x)
Beispiel #8
0
def LabelDetectionModel(input_shape=(None, None, 1),
                        inputs=None,
                        backbone='mobilenetv2',
                        use_pretrained_weights=True):
    """Classify a microscopy image as Nuclear, Cytoplasm, or Phase.

    This can be helpful in determining the type of data (nuclear, cytoplasm,
    etc.) so that this data can be forwared to the correct segmenation model.

    Based on a standard backbone with an intiial ImageNormalization2D and final
    AveragePooling2D, TensorProduct, and Softmax layers.

    Args:
        input_shape (tuple): a 3-length tuple of the input data shape.
        inputs (tensorflow.keras.Layer): Optional input layer of the model.
            If not provided, creates a Layer based on input_shape.
        backbone (str): name of the backbone to use for the model.
        use_pretrained_weights (bool): whether to load pre-trained weights.
            Only supports the MobileNetV2 backbone.
    """
    required_channels = 3  # required for most backbones

    if inputs is None:
        inputs = keras.layers.Input(shape=input_shape)

    if keras.backend.image_data_format() == 'channels_first':
        channel_axis = 0
    else:
        channel_axis = -1

    norm = ImageNormalization2D(norm_method='whole_image')(inputs)
    fixed_inputs = TensorProduct(required_channels)(norm)

    # force the input shape
    fixed_input_shape = list(input_shape)
    fixed_input_shape[channel_axis] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    backbone_model = get_backbone(
        backbone,
        fixed_inputs,
        use_imagenet=False,
        return_dict=False,
        include_top=False,
        weights=None,
        input_shape=fixed_input_shape,
        pooling=None)

    x = keras.layers.AveragePooling2D(4)(backbone_model.outputs[0])
    x = TensorProduct(256)(x)
    x = TensorProduct(3)(x)
    x = keras.layers.Flatten()(x)
    outputs = keras.layers.Activation('softmax')(x)

    model = keras.Model(inputs=backbone_model.inputs, outputs=outputs)

    if use_pretrained_weights:
        local_name = 'LabelDetectionModel_{}.h5'.format(backbone)
        if backbone.lower() in {'mobilenetv2' or 'mobilenet_v2'}:
            weights_path = get_file(
                local_name,
                MOBILENETV2_WEIGHTS_PATH,
                cache_subdir='models',
                file_hash='14d4b2f7c77d334c958d2dde79972e6e')
        else:
            raise ValueError('Backbone %s does not have a weights file.' %
                             backbone)

        model.load_weights(weights_path)

    return model
Beispiel #9
0
def RetinaNet(backbone,
              num_classes,
              input_shape,
              inputs=None,
              norm_method='whole_image',
              location=False,
              use_imagenet=False,
              pooling=None,
              required_channels=3,
              frames_per_batch=1,
              **kwargs):
    """Constructs a RetinaNet model using a backbone from
    ``keras-applications``.

    Args:
        backbone (str): Name of backbone to use.
        num_classes (int): Number of classes to classify.
        input_shape (tuple): The shape of the input data.
        inputs (tensor): Optional input tensor, overrides ``input_shape``.
        norm_method (str): Normalization method to use with the
            :mod:`deepcell.layers.normalization.ImageNormalization2D` layer.
        location (bool): Whether to include a
            :mod:`deepcell.layers.location.Location2D` layer.
        use_imagenet (bool): Whether to load imagenet-based pretrained weights.
        pooling (str): Pooling mode for feature extraction
            when ``include_top`` is ``False``.

            - None means that the output of the model will be
              the 4D tensor output of the last convolutional layer.
            - 'avg' means that global average pooling will be applied to
              the output of the last convolutional layer, and thus
              the output of the model will be a 2D tensor.
            - 'max' means that global max pooling will be applied.

        required_channels (int): The required number of channels of the
            backbone. 3 is the default for all current backbones.
        frames_per_batch (int): Size of z axis in generated batches.
            If equal to 1, assumes 2D data.
        kwargs (dict): Other standard inputs for `~retinanet`.

    Returns:
        tensorflow.keras.Model: RetinaNet model with a backbone.
    """
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    if inputs is None:
        if frames_per_batch > 1:
            if channel_axis == 1:
                input_shape_with_time = tuple(
                    [input_shape[0], frames_per_batch] + list(input_shape)[1:])
            else:
                input_shape_with_time = tuple([frames_per_batch] +
                                              list(input_shape))
            inputs = Input(shape=input_shape_with_time, name='input')
        else:
            inputs = Input(shape=input_shape, name='input')

    if location:
        if frames_per_batch > 1:
            # TODO: TimeDistributed is incompatible with channels_first
            loc = TimeDistributed(Location2D(in_shape=input_shape))(inputs)
        else:
            loc = Location2D(in_shape=input_shape)(inputs)
        concat = Concatenate(axis=channel_axis)([inputs, loc])
    else:
        concat = inputs

    # force the channel size for backbone input to be `required_channels`
    if frames_per_batch > 1:
        norm = TimeDistributed(
            ImageNormalization2D(norm_method=norm_method))(concat)
        fixed_inputs = TimeDistributed(TensorProduct(required_channels))(norm)
    else:
        norm = ImageNormalization2D(norm_method=norm_method)(concat)
        fixed_inputs = TensorProduct(required_channels)(norm)

    # force the input shape
    axis = 0 if K.image_data_format() == 'channels_first' else -1
    fixed_input_shape = list(input_shape)
    fixed_input_shape[axis] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    model_kwargs = {
        'include_top': False,
        'weights': None,
        'input_shape': fixed_input_shape,
        'pooling': pooling
    }

    _, backbone_dict = get_backbone(backbone,
                                    fixed_inputs,
                                    use_imagenet=use_imagenet,
                                    frames_per_batch=frames_per_batch,
                                    return_dict=True,
                                    **model_kwargs)

    # create the full model
    return retinanet(inputs=inputs,
                     num_classes=num_classes,
                     backbone_dict=backbone_dict,
                     frames_per_batch=frames_per_batch,
                     name='{}_retinanet'.format(backbone),
                     **kwargs)
Beispiel #10
0
def PanopticNet(backbone,
                input_shape,
                backbone_levels=['C3', 'C4', 'C5'],
                create_pyramid_features=__create_pyramid_features,
                create_semantic_head=__create_semantic_head,
                num_semantic_heads=1,
                num_semantic_classes=[3],
                required_channels=3,
                norm_method='whole_image',
                pooling=None,
                location=True,
                use_imagenet=True,
                name='panopticnet',
                **kwargs):
    """Constructs a mrcnn model using a backbone from keras-applications.

    Args:
        backbone (str): Name of backbone to use.
        input_shape (tuple): The shape of the input data.
        backbone_levels (list): The backbone levels to be used.
            to create the feature pyramid. Defaults to ['C3', 'C4', 'C5'].
        create_pyramid_features (function): Function to get the pyramid
            features from the backbone.
        create_semantic_head (function): Function to get to build a
            semantic head submodel.
        norm_method (str): ImageNormalization mode to use.
        location (bool): Whether to include location data.
        use_imagenet (bool): Whether to load imagenet-based pretrained weights.
        pooling (str): optional pooling mode for feature extraction
            when include_top is False.

            - None means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - 'avg' means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - 'max' means that global max pooling will
                be applied.

        required_channels (int): The required number of channels of the
            backbone.  3 is the default for all current backbones.
        kwargs (dict): Other standard inputs for retinanet_mask.

    Returns:
        tensorflow.keras.Model: Panoptic model with a backbone.
    """
    inputs = Input(shape=input_shape)
    norm = ImageNormalization2D(norm_method=norm_method)(inputs)
    if location:
        loc = Location2D(in_shape=input_shape)(norm)
        concat = Concatenate(axis=-1)([norm, loc])
    else:
        concat = norm

    fixed_inputs = TensorProduct(required_channels)(concat)

    # force the input shape
    fixed_input_shape = list(input_shape)
    fixed_input_shape[-1] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    model_kwargs = {
        'include_top': False,
        'weights': None,
        'input_shape': fixed_input_shape,
        'pooling': pooling
    }

    _, backbone_dict = get_backbone(backbone,
                                    fixed_inputs,
                                    use_imagenet=use_imagenet,
                                    frames_per_batch=1,
                                    return_dict=True,
                                    **model_kwargs)
    backbone_dict_reduced = {
        k: backbone_dict[k]
        for k in backbone_dict if k in backbone_levels
    }

    pyramid_dict = create_pyramid_features(backbone_dict_reduced, ndim=2)

    semantic_levels = [int(re.findall(r'\d+', k)[0]) for k in pyramid_dict]
    target_level = min(semantic_levels)

    semantic_head_list = []
    for i in range(num_semantic_heads):
        semantic_head_list.append(
            create_semantic_head(pyramid_dict,
                                 n_classes=num_semantic_classes[i],
                                 input_target=inputs,
                                 target_level=target_level,
                                 semantic_id=i,
                                 ndim=2,
                                 **kwargs))

    model = Model(inputs=inputs, outputs=semantic_head_list, name=name)
    return model
Beispiel #11
0
def FPNet(backbone,
          input_shape,
          inputs=None,
          norm_method='whole_image',
          use_imagenet=False,
          pooling=None,
          required_channels=3,
          n_classes=3,
          name='fpnet',
          frames_per_batch=1,
          **kwargs):
    """Creates a Feature Pyramid Network with a semantic segmentation head

    Args:
        backbone (str): A name of a supported backbone from [deepcell, resnet50]
        input_shape (tuple): Shape of the input image.
        inputs (keras.Layer): Optional preexisting layers.
        norm_method (str): Normalization method, defaults to 'whole_image'
        use_imagenet (bool): Whether to load imagenet-based pretrained weights.
        pooling (str): Optional pooling mode for feature extraction
            when include_top is False.
            - None means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - 'avg' means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - 'max' means that global max pooling will
                be applied.
        required_channels (int): The required number of channels of the
            backbone. 3 is the default for all current backbones.
        n_classes (int): The number of classes to be predicted
        name (str): Name to use for the model.
        frames_per_batch (int): Size of z axis in generated batches.
            If equal to 1, assumes 2D data.

    Returns:
        tensorflow.keras.models.Model: Feature pyramid network with a semantic
            segmentation head as the output
    """

    if inputs is None:
        inputs = Input(shape=input_shape)

    # force the channel size for backbone input to be required_channels
    norm = ImageNormalization2D(norm_method=norm_method)(inputs)
    fixed_inputs = TensorProduct(required_channels)(norm)

    # force the input shape
    fixed_input_shape = list(input_shape)
    fixed_input_shape[-1] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    model_kwargs = {
        'include_top': False,
        'weights': None,
        'input_shape': fixed_input_shape,
        'pooling': pooling
    }

    # Get backbone outputs
    _, backbone_dict = get_backbone(backbone,
                                    fixed_inputs,
                                    use_imagenet=use_imagenet,
                                    frames_per_batch=frames_per_batch,
                                    return_dict=True,
                                    **model_kwargs)

    # Construct feature pyramid network
    pyramid_dict = __create_pyramid_features(backbone_dict)

    levels = [int(re.findall(r'\d+', k)[0]) for k in pyramid_dict]
    target_level = min(levels)

    x = __create_semantic_head(pyramid_dict,
                               n_classes=n_classes,
                               input_target=inputs,
                               target_level=target_level,
                               ndim=len(input_shape) - 1)

    return Model(inputs=inputs, outputs=x, name=name)
Beispiel #12
0
def bn_feature_net_skip_2D(receptive_field=61,
                           input_shape=(256, 256, 1),
                           inputs=None,
                           fgbg_model=None,
                           n_skips=2,
                           last_only=True,
                           norm_method='std',
                           padding_mode='reflect',
                           **kwargs):
    """Creates a 2D featurenet with skip-connections.

    Args:
        receptive_field (int): the receptive field of the neural network.
        input_shape (tuple): If no input tensor, create one with this shape.
        inputs (tensor): optional input tensor
        fgbg_model (tensorflow.keras.Model): Concatenate output of this model
            with the inputs as a skip-connection.
        last_only (bool): Model will only output the final prediction,
            and not return any of the underlying model predictions.
        n_skips (int): The number of skip-connections
        norm_method (str): The type of ImageNormalization to use
        padding_mode (str): Type of padding, one of 'reflect' or 'zero'
        kwargs (dict): Other model options defined in bn_feature_net_2D

    Returns:
        tensorflow.keras.Model: 2D FeatureNet with skip-connections
    """
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    inputs = Input(shape=input_shape)
    img = ImageNormalization2D(norm_method=norm_method,
                               filter_size=receptive_field)(inputs)

    models = []
    model_outputs = []

    if fgbg_model is not None:
        for layer in fgbg_model.layers:
            layer.trainable = False

        models.append(fgbg_model)
        fgbg_output = fgbg_model(inputs)
        if isinstance(fgbg_output, list):
            fgbg_output = fgbg_output[-1]
        model_outputs.append(fgbg_output)

    for _ in range(n_skips + 1):
        if model_outputs:
            model_input = Concatenate(axis=channel_axis)(
                [img, model_outputs[-1]])
        else:
            model_input = img

        new_input_shape = model_input.get_shape().as_list()[1:]
        models.append(
            bn_feature_net_2D(receptive_field=receptive_field,
                              input_shape=new_input_shape,
                              norm_method=None,
                              dilated=True,
                              padding=True,
                              padding_mode=padding_mode,
                              **kwargs))
        model_outputs.append(models[-1](model_input))

    if last_only:
        model = Model(inputs=inputs, outputs=model_outputs[-1])
    elif fgbg_model is None:
        model = Model(inputs=inputs, outputs=model_outputs)
    else:
        model = Model(inputs=inputs, outputs=model_outputs[1:])

    return model
Beispiel #13
0
def RetinaMask(backbone,
               num_classes,
               input_shape,
               inputs=None,
               backbone_levels=['C3', 'C4', 'C5'],
               pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'],
               norm_method='whole_image',
               location=False,
               use_imagenet=False,
               crop_size=(14, 14),
               pooling=None,
               mask_dtype=K.floatx(),
               required_channels=3,
               frames_per_batch=1,
               **kwargs):
    """Constructs a mrcnn model using a backbone from keras-applications.

    Args:
        backbone (str): Name of backbone to use.
        num_classes (int): Number of classes to classify.
        input_shape (tuple): The shape of the input data.
        inputs (tensor): Optional input tensor, overrides input_shape.
        backbone_levels (list): The backbone levels to be used.
            to create the feature pyramid. Defaults to ['C3', 'C4', 'C5'].
        pyramid_levels (list): The pyramid levels to attach regression and
            classification heads to. Defaults to ['P3', 'P4', 'P5', 'P6', 'P7'].
        norm_method (str): ImageNormalization mode to use.
        location (bool): Whether to include location data.
        use_imagenet (bool): Whether to load imagenet-based pretrained weights.
        crop_size (tuple): 2-length tuple for the x-y size of the crops.
            Used to create default roi_submodels.
        pooling (str): optional pooling mode for feature extraction
            when include_top is False.

            - None means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - 'avg' means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - 'max' means that global max pooling will
                be applied.

        mask_dtype (str): Dtype to use for mask tensors.
        required_channels (int): The required number of channels of the
            backbone.  3 is the default for all current backbones.
        frames_per_batch (int): Size of z axis in generated batches.
            If equal to 1, assumes 2D data.
        kwargs (dict): Other standard inputs for retinanet_mask.

    Returns:
        tensorflow.keras.Model: RetinaNet model with a backbone.
    """
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
    if inputs is None:
        if frames_per_batch > 1:
            if channel_axis == 1:
                input_shape_with_time = tuple(
                    [input_shape[0], frames_per_batch] + list(input_shape)[1:])
            else:
                input_shape_with_time = tuple([frames_per_batch] +
                                              list(input_shape))
            inputs = Input(shape=input_shape_with_time)
        else:
            inputs = Input(shape=input_shape)

    if location:
        if frames_per_batch > 1:
            # TODO: TimeDistributed is incompatible with channels_first
            loc = TimeDistributed(Location2D(in_shape=input_shape))(inputs)
        else:
            loc = Location2D(in_shape=input_shape)(inputs)
        concat = Concatenate(axis=channel_axis)([inputs, loc])
    else:
        concat = inputs

    # force the channel size for backbone input to be `required_channels`
    if frames_per_batch > 1:
        norm = TimeDistributed(
            ImageNormalization2D(norm_method=norm_method))(concat)
        fixed_inputs = TimeDistributed(TensorProduct(required_channels))(norm)
    else:
        norm = ImageNormalization2D(norm_method=norm_method)(concat)
        fixed_inputs = TensorProduct(required_channels)(norm)

    # force the input shape
    axis = 0 if K.image_data_format() == 'channels_first' else -1
    fixed_input_shape = list(input_shape)
    fixed_input_shape[axis] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    model_kwargs = {
        'include_top': False,
        'weights': None,
        'input_shape': fixed_input_shape,
        'pooling': pooling
    }

    _, backbone_dict = get_backbone(backbone,
                                    fixed_inputs,
                                    use_imagenet=use_imagenet,
                                    frames_per_batch=frames_per_batch,
                                    return_dict=True,
                                    **model_kwargs)

    # create the full model
    return retinanet_mask(inputs=inputs,
                          num_classes=num_classes,
                          backbone_dict=backbone_dict,
                          crop_size=crop_size,
                          backbone_levels=backbone_levels,
                          pyramid_levels=pyramid_levels,
                          name='{}_retinanet_mask'.format(backbone),
                          mask_dtype=mask_dtype,
                          frames_per_batch=frames_per_batch,
                          **kwargs)
Beispiel #14
0
def bn_feature_net_2D(receptive_field=61,
                      input_shape=(256, 256, 1),
                      n_features=3,
                      n_channels=1,
                      reg=1e-5,
                      n_conv_filters=64,
                      n_dense_filters=200,
                      VGG_mode=False,
                      init='he_normal',
                      norm_method='std',
                      location=False,
                      dilated=False,
                      padding=False,
                      padding_mode='reflect',
                      multires=False,
                      include_top=True):
    # Create layers list (x) to store all of the layers.
    # We need to use the functional API to enable the multiresolution mode
    x = []

    win = (receptive_field - 1) // 2

    if dilated:
        padding = True

    if K.image_data_format() == 'channels_first':
        channel_axis = 1
        row_axis = 2
        col_axis = 3

        if not dilated:
            input_shape = (n_channels, receptive_field, receptive_field)

    else:
        row_axis = 1
        col_axis = 2
        channel_axis = -1
        if not dilated:
            input_shape = (receptive_field, receptive_field, n_channels)

    x.append(Input(shape=input_shape))
    x.append(ImageNormalization2D(norm_method=norm_method, filter_size=receptive_field)(x[-1]))

    if padding:
        if padding_mode == 'reflect':
            x.append(ReflectionPadding2D(padding=(win, win))(x[-1]))
        elif padding_mode == 'zero':
            x.append(ZeroPadding2D(padding=(win, win))(x[-1]))

    if location:
        x.append(Location(in_shape=tuple(x[-1].shape.as_list()[1:]))(x[-1]))
        x.append(Concatenate(axis=channel_axis)([x[-2], x[-1]]))

    if multires:
        layers_to_concat = []

    rf_counter = receptive_field
    block_counter = 0
    d = 1

    while rf_counter > 4:
        filter_size = 3 if rf_counter % 2 == 0 else 4
        x.append(Conv2D(n_conv_filters, (filter_size, filter_size), dilation_rate=d, kernel_initializer=init, padding='valid', kernel_regularizer=l2(reg))(x[-1]))
        x.append(BatchNormalization(axis=channel_axis)(x[-1]))
        x.append(Activation('relu')(x[-1]))

        block_counter += 1
        rf_counter -= filter_size - 1

        if block_counter % 2 == 0:
            if dilated:
                x.append(DilatedMaxPool2D(dilation_rate=d, pool_size=(2, 2))(x[-1]))
                d *= 2
            else:
                x.append(MaxPool2D(pool_size=(2, 2))(x[-1]))

            if VGG_mode:
                n_conv_filters *= 2

            rf_counter = rf_counter // 2

            if multires:
                layers_to_concat.append(len(x) - 1)

    if multires:
        c = []
        for l in layers_to_concat:
            output_shape = x[l].get_shape().as_list()
            target_shape = x[-1].get_shape().as_list()

            row_crop = int(output_shape[row_axis] - target_shape[row_axis])
            if row_crop % 2 == 0:
                row_crop = (row_crop // 2, row_crop // 2)
            else:
                row_crop = (row_crop // 2, row_crop // 2 + 1)

            col_crop = int(output_shape[col_axis] - target_shape[col_axis])
            if col_crop % 2 == 0:
                col_crop = (col_crop // 2, col_crop // 2)
            else:
                col_crop = (col_crop // 2, col_crop // 2 + 1)

            cropping = (row_crop, col_crop)

            c.append(Cropping2D(cropping=cropping)(x[l]))
        x.append(Concatenate(axis=channel_axis)(c))

    x.append(Conv2D(n_dense_filters, (rf_counter, rf_counter), dilation_rate=d, kernel_initializer=init, padding='valid', kernel_regularizer=l2(reg))(x[-1]))
    x.append(BatchNormalization(axis=channel_axis)(x[-1]))
    x.append(Activation('relu')(x[-1]))

    x.append(TensorProd2D(n_dense_filters, n_dense_filters, kernel_initializer=init, kernel_regularizer=l2(reg))(x[-1]))
    x.append(BatchNormalization(axis=channel_axis)(x[-1]))
    x.append(Activation('relu')(x[-1]))

    x.append(TensorProd2D(n_dense_filters, n_features, kernel_initializer=init, kernel_regularizer=l2(reg))(x[-1]))

    if not dilated:
        x.append(Flatten()(x[-1]))

    if include_top:
        x.append(Softmax(axis=channel_axis)(x[-1]))

    model = Model(inputs=x[0], outputs=x[-1])

    return model
Beispiel #15
0
def FPNet(backbone,
          input_shape,
          inputs=None,
          norm_method='whole_image',
          use_imagenet=False,
          pooling=None,
          required_channels=3,
          n_classes=3,
          name='fpnet',
          **kwargs):
    """
    Creates a Feature Pyramid Network with a semantic segmentation head
    Args:
        backbone (str): A name of a supported backbone from [deepcell, resnet50]
        input_shape (tuple): Shape of the input image
        input (keras layer, optional): Defaults to None. Method to pass in preexisting layers
        norm_method (str, optional): Defaults to 'whole_image'. Normalization method
        weights (str, optional): Defaults to None. one of `None` (random initialization),
            'imagenet' (pre-training on ImageNet),
            or the path to the weights file to be loaded.
        pooling (str, optional): Defaults to None. optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        required_channels (int, optional): Defaults to 3. The required number of channels of the
            backbone.  3 is the default for all current backbones.
        n_classes (int, optional): Defaults to 3.  The number of classes to be predicted
        name (str, optional): Defaults to 'fpnet'. Name to use for the model.
    Returns:
        Model with a feature pyramid network with a semantic segmentation
        head as the output
    """

    if inputs is None:
        inputs = Input(shape=input_shape)

    # force the channel size for backbone input to be `required_channels`
    norm = ImageNormalization2D(norm_method=norm_method)(inputs)
    fixed_inputs = TensorProduct(required_channels)(norm)

    # force the input shape
    fixed_input_shape = list(input_shape)
    fixed_input_shape[-1] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    model_kwargs = {
        'include_top': False,
        'weights': None,
        'input_shape': fixed_input_shape,
        'pooling': pooling
    }

    # Get backbone outputs
    backbone_dict = get_backbone(
        backbone, fixed_inputs, use_imagenet=use_imagenet, **model_kwargs)

    # Construct feature pyramid network
    pyramid_dict = __create_pyramid_features(backbone_dict)

    levels = [int(re.findall(r'\d+', k)[0]) for k in pyramid_dict]
    target_level = min(levels)

    x = __create_semantic_head(pyramid_dict, n_classes=n_classes,
                               input_target=inputs, target_level=target_level)

    return Model(inputs=inputs, outputs=x, name=name)
Beispiel #16
0
def MaskRCNN(backbone,
             num_classes,
             input_shape,
             backbone_levels=['C3', 'C4', 'C5'],
             pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'],
             norm_method='whole_image',
             location=False,
             use_imagenet=False,
             crop_size=(14, 14),
             pooling=None,
             mask_dtype=K.floatx(),
             required_channels=3,
             **kwargs):
    """Constructs a mrcnn model using a backbone from keras-applications.

    Args:
        backbone: string, name of backbone to use.
        num_classes: Number of classes to classify.
        input_shape: The shape of the input data.
        weights: one of `None` (random initialization),
            'imagenet' (pre-training on ImageNet),
            or the path to the weights file to be loaded.
        pooling: optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        required_channels: integer, the required number of channels of the
            backbone.  3 is the default for all current backbones.

    Returns:
        RetinaNet model with a backbone.
    """
    inputs = Input(shape=input_shape)
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
    if location:
        location = Location2D(in_shape=input_shape)(inputs)
        inputs = Concatenate(axis=channel_axis)([inputs, location])

    # force the channel size for backbone input to be `required_channels`
    norm = ImageNormalization2D(norm_method=norm_method)(inputs)
    fixed_inputs = TensorProduct(required_channels)(norm)

    # force the input shape
    fixed_input_shape = list(input_shape)
    fixed_input_shape[-1] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    model_kwargs = {
        'include_top': False,
        'weights': None,
        'input_shape': fixed_input_shape,
        'pooling': pooling
    }

    backbone_dict = get_backbone(backbone,
                                 fixed_inputs,
                                 use_imagenet=use_imagenet,
                                 **model_kwargs)

    # create the full model
    return retinanet_mask(inputs=inputs,
                          num_classes=num_classes,
                          backbone_dict=backbone_dict,
                          crop_size=crop_size,
                          backbone_levels=backbone_levels,
                          pyramid_levels=pyramid_levels,
                          name='{}_retinanet_mask'.format(backbone),
                          mask_dtype=mask_dtype,
                          **kwargs)
Beispiel #17
0
def RetinaMask(backbone,
               num_classes,
               input_shape,
               inputs=None,
               backbone_levels=['C3', 'C4', 'C5'],
               pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'],
               norm_method='whole_image',
               location=False,
               use_imagenet=False,
               crop_size=(14, 14),
               pooling=None,
               mask_dtype=K.floatx(),
               required_channels=3,
               frames_per_batch=1,
               **kwargs):
    """Constructs a mrcnn model using a backbone from keras-applications.

    Args:
        backbone (str): Name of backbone to use.
        num_classes (int): Number of classes to classify.
        input_shape (tuple): The shape of the input data.
        weights (str): one of None (random initialization),
            'imagenet' (pre-training on ImageNet),
            or the path to the weights file to be loaded.
        pooling (str): optional pooling mode for feature extraction
            when include_top is False.
            - None means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - 'avg' means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - 'max' means that global max pooling will
                be applied.
        required_channels (int): The required number of channels of the
            backbone.  3 is the default for all current backbones.

    Returns:
        tensorflow.keras.Model: RetinaNet model with a backbone.
    """
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
    if inputs is None:
        if frames_per_batch > 1:
            if channel_axis == 1:
                input_shape_with_time = tuple(
                    [input_shape[0], frames_per_batch] + list(input_shape)[1:])
            else:
                input_shape_with_time = tuple([frames_per_batch] +
                                              list(input_shape))
            inputs = Input(shape=input_shape_with_time)
        else:
            inputs = Input(shape=input_shape)

    if location:
        if frames_per_batch > 1:
            # TODO: TimeDistributed is incompatible with channels_first
            loc = TimeDistributed(Location2D(in_shape=input_shape))(inputs)
        else:
            loc = Location2D(in_shape=input_shape)(inputs)
        concat = Concatenate(axis=channel_axis)([inputs, loc])
    else:
        concat = inputs

    # force the channel size for backbone input to be `required_channels`
    if frames_per_batch > 1:
        norm = TimeDistributed(
            ImageNormalization2D(norm_method=norm_method))(concat)
        fixed_inputs = TimeDistributed(TensorProduct(required_channels))(norm)
    else:
        norm = ImageNormalization2D(norm_method=norm_method)(concat)
        fixed_inputs = TensorProduct(required_channels)(norm)

    # force the input shape
    axis = 0 if K.image_data_format() == 'channels_first' else -1
    fixed_input_shape = list(input_shape)
    fixed_input_shape[axis] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    model_kwargs = {
        'include_top': False,
        'weights': None,
        'input_shape': fixed_input_shape,
        'pooling': pooling
    }

    _, backbone_dict = get_backbone(backbone,
                                    fixed_inputs,
                                    use_imagenet=use_imagenet,
                                    frames_per_batch=frames_per_batch,
                                    return_dict=True,
                                    **model_kwargs)

    # create the full model
    return retinanet_mask(inputs=inputs,
                          num_classes=num_classes,
                          backbone_dict=backbone_dict,
                          crop_size=crop_size,
                          backbone_levels=backbone_levels,
                          pyramid_levels=pyramid_levels,
                          name='{}_retinanet_mask'.format(backbone),
                          mask_dtype=mask_dtype,
                          frames_per_batch=frames_per_batch,
                          **kwargs)
def ScaleDetectionModel(input_shape=(None, None, 1),
                        inputs=None,
                        backbone='mobilenetv2',
                        use_pretrained_weights=True):
    """Create a ScaleDetectionModel for detecting scales of input data.

    This enables data to be scaled appropriately for other segmentation models
    which may not be resolution tolerant.

    Based on a standard backbone with an intiial ImageNormalization2D and final
    AveragePooling2D and TensorProduct layers.

    Args:
        input_shape (tuple): a 3-length tuple of the input data shape.
        inputs (tensorflow.keras.Layer): Optional input layer of the model.
            If not provided, creates a Layer based on input_shape.
        backbone (str): name of the backbone to use for the model.
        use_pretrained_weights (bool): whether to load pre-trained weights.
            Only supports the MobileNetV2 backbone.
    """
    required_channels = 3  # required for most backbones

    if inputs is None:
        inputs = keras.layers.Input(shape=input_shape)

    if keras.backend.image_data_format() == 'channels_first':
        channel_axis = 0
    else:
        channel_axis = -1

    norm = ImageNormalization2D(norm_method='whole_image')(inputs)
    fixed_inputs = TensorProduct(required_channels)(norm)

    # force the input shape
    fixed_input_shape = list(input_shape)
    fixed_input_shape[channel_axis] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    backbone_model = get_backbone(
        backbone,
        fixed_inputs,
        use_imagenet=False,
        return_dict=False,
        include_top=False,
        weights=None,
        input_shape=fixed_input_shape,
        pooling=None)

    x = keras.layers.AveragePooling2D(4)(backbone_model.outputs[0])
    x = TensorProduct(256, activation='relu')(x)
    x = TensorProduct(1)(x)
    outputs = keras.layers.Flatten()(x)

    model = keras.Model(inputs=backbone_model.inputs, outputs=outputs)

    if use_pretrained_weights:
        local_name = 'ScaleDetectionModel_{}.h5'.format(backbone)
        if backbone.lower() in {'mobilenetv2' or 'mobilenet_v2'}:
            weights_path = get_file(
                local_name,
                MOBILENETV2_WEIGHTS_PATH,
                cache_subdir='models',
                file_hash='aa78e6b9a4551289dd967f1f5ca83fed')
        else:
            raise ValueError('Backbone %s does not have a weights file.' %
                             backbone)

        model.load_weights(weights_path)

    return model
Beispiel #19
0
def bn_feature_net_2D(receptive_field=61,
                      input_shape=(256, 256, 1),
                      inputs=None,
                      n_features=3,
                      n_channels=1,
                      reg=1e-5,
                      n_conv_filters=64,
                      n_dense_filters=200,
                      VGG_mode=False,
                      init='he_normal',
                      norm_method='std',
                      location=False,
                      dilated=False,
                      padding=False,
                      padding_mode='reflect',
                      multires=False,
                      include_top=True):
    """Creates a 2D featurenet.

    Args:
        receptive_field (int): the receptive field of the neural network.
        input_shape (tuple): If no input tensor, create one with this shape.
        inputs (tensor): optional input tensor
        n_features (int): Number of output features
        n_channels (int): number of input channels
        reg (int): regularization value
        n_conv_filters (int): number of convolutional filters
        n_dense_filters (int): number of dense filters
        VGG_mode (bool): If multires, uses VGG_mode for multiresolution
        init (str): Method for initalizing weights.
        norm_method (str): ImageNormalization mode to use
        location (bool): Whether to include location data
        dilated (bool): Whether to use dilated pooling.
        padding (bool): Whether to use padding.
        padding_mode (str): Type of padding, one of 'reflect' or 'zero'
        multires (bool): Enables multi-resolution mode
        include_top (bool): Whether to include the final layer of the model

    Returns:
        tensorflow.keras.Model: 2D FeatureNet
    """
    # Create layers list (x) to store all of the layers.
    # We need to use the functional API to enable the multiresolution mode
    x = []

    win = (receptive_field - 1) // 2

    if dilated:
        padding = True

    if K.image_data_format() == 'channels_first':
        channel_axis = 1
        row_axis = 2
        col_axis = 3

        if not dilated:
            input_shape = (n_channels, receptive_field, receptive_field)

    else:
        row_axis = 1
        col_axis = 2
        channel_axis = -1
        if not dilated:
            input_shape = (receptive_field, receptive_field, n_channels)

    if inputs is not None:
        if not K.is_keras_tensor(inputs):
            img_input = Input(tensor=inputs, shape=input_shape)
        else:
            img_input = inputs
        x.append(img_input)
    else:
        x.append(Input(shape=input_shape))

    x.append(
        ImageNormalization2D(norm_method=norm_method,
                             filter_size=receptive_field)(x[-1]))

    if padding:
        if padding_mode == 'reflect':
            x.append(ReflectionPadding2D(padding=(win, win))(x[-1]))
        elif padding_mode == 'zero':
            x.append(ZeroPadding2D(padding=(win, win))(x[-1]))

    if location:
        x.append(Location2D(in_shape=tuple(x[-1].shape.as_list()[1:]))(x[-1]))
        x.append(Concatenate(axis=channel_axis)([x[-2], x[-1]]))

    layers_to_concat = []

    rf_counter = receptive_field
    block_counter = 0
    d = 1

    while rf_counter > 4:
        filter_size = 3 if rf_counter % 2 == 0 else 4
        x.append(
            Conv2D(n_conv_filters,
                   filter_size,
                   dilation_rate=d,
                   kernel_initializer=init,
                   padding='valid',
                   kernel_regularizer=l2(reg))(x[-1]))
        x.append(BatchNormalization(axis=channel_axis)(x[-1]))
        x.append(Activation('relu')(x[-1]))

        block_counter += 1
        rf_counter -= filter_size - 1

        if block_counter % 2 == 0:
            if dilated:
                x.append(
                    DilatedMaxPool2D(dilation_rate=d, pool_size=(2, 2))(x[-1]))
                d *= 2
            else:
                x.append(MaxPool2D(pool_size=(2, 2))(x[-1]))

            if VGG_mode:
                n_conv_filters *= 2

            rf_counter = rf_counter // 2

            if multires:
                layers_to_concat.append(len(x) - 1)

    if multires:
        c = []
        for l in layers_to_concat:
            output_shape = x[l].get_shape().as_list()
            target_shape = x[-1].get_shape().as_list()

            row_crop = int(output_shape[row_axis] - target_shape[row_axis])
            if row_crop % 2 == 0:
                row_crop = (row_crop // 2, row_crop // 2)
            else:
                row_crop = (row_crop // 2, row_crop // 2 + 1)

            col_crop = int(output_shape[col_axis] - target_shape[col_axis])
            if col_crop % 2 == 0:
                col_crop = (col_crop // 2, col_crop // 2)
            else:
                col_crop = (col_crop // 2, col_crop // 2 + 1)

            cropping = (row_crop, col_crop)

            c.append(Cropping2D(cropping=cropping)(x[l]))

        if multires:
            x.append(Concatenate(axis=channel_axis)(c))

    x.append(
        Conv2D(n_dense_filters, (rf_counter, rf_counter),
               dilation_rate=d,
               kernel_initializer=init,
               padding='valid',
               kernel_regularizer=l2(reg))(x[-1]))
    x.append(BatchNormalization(axis=channel_axis)(x[-1]))
    x.append(Activation('relu')(x[-1]))

    if include_top:
        x.append(
            TensorProduct(n_dense_filters,
                          kernel_initializer=init,
                          kernel_regularizer=l2(reg))(x[-1]))
        x.append(BatchNormalization(axis=channel_axis)(x[-1]))
        x.append(Activation('relu')(x[-1]))

        x.append(
            TensorProduct(n_features,
                          kernel_initializer=init,
                          kernel_regularizer=l2(reg))(x[-1]))

        if not dilated:
            x.append(Flatten()(x[-1]))

        x.append(Softmax(axis=channel_axis)(x[-1]))

    if inputs is not None:
        real_inputs = keras_utils.get_source_inputs(x[0])
    else:
        real_inputs = x[0]

    model = Model(inputs=real_inputs, outputs=x[-1])

    return model
Beispiel #20
0
def RetinaNet(backbone,
              num_classes,
              input_shape,
              inputs=None,
              norm_method='whole_image',
              location=False,
              use_imagenet=False,
              pooling=None,
              required_channels=3,
              **kwargs):
    """Constructs a retinanet model using a backbone from keras-applications.

    Args:
        backbone (str): Name of backbone to use.
        num_classes (int): Number of classes to classify.
        input_shape (tuple): The shape of the input data.
        weights (str): one of None (random initialization),
            'imagenet' (pre-training on ImageNet),
            or the path to the weights file to be loaded.
        pooling (str): optional pooling mode for feature extraction
            when 'include_top' is False.
            - None means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - 'avg' means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - 'max' means that global max pooling will
                be applied.
        required_channels (int): The required number of channels of the
            backbone.  3 is the default for all current backbones.

    Returns:
        tensorflow.keras.Model: RetinaNet model with a backbone.
    """
    if inputs is None:
        inputs = Input(shape=input_shape)

    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    if location:
        location = Location2D(in_shape=input_shape)(inputs)
        concat = Concatenate(axis=channel_axis)([inputs, location])
    else:
        concat = inputs

    # force the channel size for backbone input to be `required_channels`
    norm = ImageNormalization2D(norm_method=norm_method)(concat)
    fixed_inputs = TensorProduct(required_channels)(norm)

    # force the input shape
    axis = 0 if K.image_data_format() == 'channels_first' else -1
    fixed_input_shape = list(input_shape)
    fixed_input_shape[axis] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    model_kwargs = {
        'include_top': False,
        'weights': None,
        'input_shape': fixed_input_shape,
        'pooling': pooling
    }

    backbone_dict = get_backbone(backbone,
                                 fixed_inputs,
                                 use_imagenet=use_imagenet,
                                 **model_kwargs)

    # create the full model
    return retinanet(inputs=inputs,
                     num_classes=num_classes,
                     backbone_dict=backbone_dict,
                     name='{}_retinanet'.format(backbone),
                     **kwargs)
Beispiel #21
0
def PanopticNet(backbone,
                input_shape,
                inputs=None,
                backbone_levels=['C3', 'C4', 'C5'],
                pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'],
                create_pyramid_features=__create_pyramid_features,
                create_semantic_head=__create_semantic_head,
                frames_per_batch=1,
                temporal_mode=None,
                num_semantic_heads=1,
                num_semantic_classes=[3],
                required_channels=3,
                norm_method='whole_image',
                pooling=None,
                location=True,
                use_imagenet=True,
                name='panopticnet',
                **kwargs):
    """Constructs a mrcnn model using a backbone from keras-applications.

    Args:
        backbone (str): Name of backbone to use.
        input_shape (tuple): The shape of the input data.
        backbone_levels (list): The backbone levels to be used.
            to create the feature pyramid. Defaults to ['C3', 'C4', 'C5'].
        pyramid_levels (list): Pyramid levels to use. Defaults to ['P3','P4','P5','P6','P7']
        create_pyramid_features (function): Function to get the pyramid
            features from the backbone.
        create_semantic_head (function): Function to get to build a
            semantic head submodel.
        frames_per_batch (int): Defaults to 1.
        temporal_mode: Mode of temporal convolution. Choose from {'conv','lstm','gru', None}.
            Defaults to None.
        num_semantic_heads (int): Defaults to 1.
        num_semantic_classes (list): Defaults to [3].
        norm_method (str): ImageNormalization mode to use. Defaults to 'whole_image'
        location (bool): Whether to include location data.
        use_imagenet (bool): Whether to load imagenet-based pretrained weights.
        pooling (str): optional pooling mode for feature extraction
            when include_top is False.

            - None means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - 'avg' means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - 'max' means that global max pooling will
                be applied.

        required_channels (int): The required number of channels of the
            backbone.  3 is the default for all current backbones.
        kwargs (dict): Other standard inputs for retinanet_mask.

    Raises:
        ValueError: temporal_mode not 'conv', 'lstm', 'gru'  or None

    Returns:
        tensorflow.keras.Model: Panoptic model with a backbone.
    """
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    # Check input to __merge_temporal_features
    acceptable_modes = {'conv', 'lstm', 'gru', None}
    if temporal_mode is not None:
        temporal_mode = str(temporal_mode).lower()
        if temporal_mode not in acceptable_modes:
            raise ValueError('Mode {} not supported. Please choose from {}.'.format(
                temporal_mode, str(acceptable_modes)))

    if inputs is None:
        if frames_per_batch > 1:
            if channel_axis == 1:
                input_shape_with_time = tuple(
                    [input_shape[0], frames_per_batch] + list(input_shape)[1:])
            else:
                input_shape_with_time = tuple(
                    [frames_per_batch] + list(input_shape))
            inputs = Input(shape=input_shape_with_time)
        else:
            inputs = Input(shape=input_shape)

    # force the channel size for backbone input to be `required_channels`
    if norm_method is None:
        norm = inputs
    else:
        if frames_per_batch > 1:
            norm = TimeDistributed(ImageNormalization2D(norm_method=norm_method))(inputs)
        else:
            norm = ImageNormalization2D(norm_method=norm_method)(inputs)

    if location:
        if frames_per_batch > 1:
            # TODO: TimeDistributed is incompatible with channels_first
            loc = TimeDistributed(Location2D(in_shape=input_shape))(norm)
        else:
            loc = Location2D(in_shape=input_shape)(norm)
        concat = Concatenate(axis=channel_axis)([norm, loc])
    else:
        concat = norm

    if frames_per_batch > 1:
        fixed_inputs = TimeDistributed(TensorProduct(required_channels))(concat)
    else:
        fixed_inputs = TensorProduct(required_channels)(concat)

    # force the input shape
    axis = 0 if K.image_data_format() == 'channels_first' else -1
    fixed_input_shape = list(input_shape)
    fixed_input_shape[axis] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    model_kwargs = {
        'include_top': False,
        'weights': None,
        'input_shape': fixed_input_shape,
        'pooling': pooling
    }

    _, backbone_dict = get_backbone(backbone, fixed_inputs,
                                    use_imagenet=use_imagenet,
                                    frames_per_batch=frames_per_batch,
                                    return_dict=True, **model_kwargs)

    backbone_dict_reduced = {k: backbone_dict[k] for k in backbone_dict
                             if k in backbone_levels}
    ndim = 2 if frames_per_batch == 1 else 3
    pyramid_dict = create_pyramid_features(backbone_dict_reduced, ndim=ndim)

    features = [pyramid_dict[key] for key in pyramid_levels]

    if frames_per_batch > 1:
        temporal_features = [__merge_temporal_features(
            feature, mode=temporal_mode) for feature in features]
        for f, k in zip(temporal_features, pyramid_dict.keys()):
            pyramid_dict[k] = f

    semantic_levels = [int(re.findall(r'\d+', k)[0]) for k in pyramid_dict]
    target_level = min(semantic_levels)

    semantic_head_list = []
    for i in range(num_semantic_heads):
        semantic_head_list.append(create_semantic_head(
            pyramid_dict, n_classes=num_semantic_classes[i],
            input_target=inputs, target_level=target_level,
            semantic_id=i, ndim=ndim, **kwargs))

    outputs = semantic_head_list

    model = Model(inputs=inputs, outputs=outputs, name=name)
    return model
Beispiel #22
0
def PanopticNet(backbone,
                input_shape,
                inputs=None,
                backbone_levels=['C3', 'C4', 'C5'],
                pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'],
                create_pyramid_features=__create_pyramid_features,
                create_semantic_head=__create_semantic_head,
                frames_per_batch=1,
                temporal_mode=None,
                num_semantic_heads=1,
                num_semantic_classes=[3],
                required_channels=3,
                norm_method='whole_image',
                pooling=None,
                location=True,
                use_imagenet=True,
                lite=False,
                upsample_type='upsampling2d',
                interpolation='bilinear',
                name='panopticnet',
                **kwargs):
    """Constructs a mrcnn model using a backbone from keras-applications.

    Args:
        backbone (str): Name of backbone to use.
        input_shape (tuple): The shape of the input data.
        backbone_levels (list): The backbone levels to be used.
            to create the feature pyramid. Defaults to ['C3', 'C4', 'C5'].
        pyramid_levels (list): Pyramid levels to use. Defaults to
            ['P3','P4','P5','P6','P7']
        create_pyramid_features (function): Function to get the pyramid
            features from the backbone.
        create_semantic_head (function): Function to build a semantic head
            submodel.
        frames_per_batch (int): Defaults to 1.
        temporal_mode: Mode of temporal convolution. Choose from
            {'conv','lstm','gru', None}. Defaults to None.
        num_semantic_heads (int): Defaults to 1.
        num_semantic_classes (list): Defaults to [3].
        norm_method (str): ImageNormalization mode to use.
            Defaults to 'whole_image'.
        location (bool): Whether to include location data. Defaults to True
        use_imagenet (bool): Whether to load imagenet-based pretrained weights.
        lite (bool): Whether to use a depthwise conv in the feature pyramid
            rather than regular conv. Defaults to False.
        upsample_type (str): Choice of upsampling layer to use from
            ['upsamplelike', 'upsampling2d', 'upsampling3d']. Defaults to
            'upsampling2d'.
        interpolation (str): Choice of interpolation mode for upsampling
            layers from ['bilinear', 'nearest']. Defaults to bilinear.
        pooling (str): optional pooling mode for feature extraction
            when include_top is False.

            - None means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - 'avg' means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - 'max' means that global max pooling will
                be applied.

        required_channels (int): The required number of channels of the
            backbone.  3 is the default for all current backbones.
        kwargs (dict): Other standard inputs for retinanet_mask.

    Raises:
        ValueError: temporal_mode not 'conv', 'lstm', 'gru'  or None

    Returns:
        tensorflow.keras.Model: Panoptic model with a backbone.
    """
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
    conv = Conv3D if frames_per_batch > 1 else Conv2D
    conv_kernel = (1, 1, 1) if frames_per_batch > 1 else (1, 1)

    # Check input to __merge_temporal_features
    acceptable_modes = {'conv', 'lstm', 'gru', None}
    if temporal_mode is not None:
        temporal_mode = str(temporal_mode).lower()
        if temporal_mode not in acceptable_modes:
            raise ValueError('temporal_mode {} not supported. Please choose '
                             'from {}.'.format(temporal_mode, acceptable_modes))

    # TODO only works for 2D: do we check for 3D as well?
    # What are the requirements for 3D data?
    img_shape = input_shape[1:] if channel_axis == 1 else input_shape[:-1]
    if img_shape[0] != img_shape[1]:
        raise ValueError('Input data must be square, got dimensions {}'.format(
            img_shape))

    if not math.log(img_shape[0], 2).is_integer():
        raise ValueError('Input data dimensions must be a power of 2, '
                         'got {}'.format(img_shape[0]))

    # Check input to interpolation
    acceptable_interpolation = {'bilinear', 'nearest'}
    if interpolation not in acceptable_interpolation:
        raise ValueError('Interpolation mode "{}" not supported. '
                         'Choose from {}.'.format(
                             interpolation, list(acceptable_interpolation)))

    if inputs is None:
        if frames_per_batch > 1:
            if channel_axis == 1:
                input_shape_with_time = tuple(
                    [input_shape[0], frames_per_batch] + list(input_shape)[1:])
            else:
                input_shape_with_time = tuple(
                    [frames_per_batch] + list(input_shape))
            inputs = Input(shape=input_shape_with_time, name='input_0')
        else:
            inputs = Input(shape=input_shape, name='input_0')

    # Normalize input images
    if norm_method is None:
        norm = inputs
    else:
        if frames_per_batch > 1:
            norm = TimeDistributed(ImageNormalization2D(
                norm_method=norm_method, name='norm'), name='td_norm')(inputs)
        else:
            norm = ImageNormalization2D(norm_method=norm_method,
                                        name='norm')(inputs)

    # Add location layer
    if location:
        if frames_per_batch > 1:
            # TODO: TimeDistributed is incompatible with channels_first
            loc = TimeDistributed(Location2D(in_shape=input_shape,
                                             name='location'), name='td_location')(norm)
        else:
            loc = Location2D(in_shape=input_shape, name='location')(norm)
        concat = Concatenate(axis=channel_axis,
                             name='concatenate_location')([norm, loc])
    else:
        concat = norm

    # Force the channel size for backbone input to be `required_channels`
    fixed_inputs = conv(required_channels, conv_kernel, strides=1,
                        padding='same', name='conv_channels')(concat)

    # Force the input shape
    axis = 0 if K.image_data_format() == 'channels_first' else -1
    fixed_input_shape = list(input_shape)
    fixed_input_shape[axis] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    model_kwargs = {
        'include_top': False,
        'weights': None,
        'input_shape': fixed_input_shape,
        'pooling': pooling
    }

    _, backbone_dict = get_backbone(backbone, fixed_inputs,
                                    use_imagenet=use_imagenet,
                                    frames_per_batch=frames_per_batch,
                                    return_dict=True,
                                    **model_kwargs)

    backbone_dict_reduced = {k: backbone_dict[k] for k in backbone_dict
                             if k in backbone_levels}

    ndim = 2 if frames_per_batch == 1 else 3

    pyramid_dict = create_pyramid_features(backbone_dict_reduced,
                                           ndim=ndim,
                                           lite=lite,
                                           interpolation=interpolation,
                                           upsample_type=upsample_type)

    features = [pyramid_dict[key] for key in pyramid_levels]

    if frames_per_batch > 1:
        temporal_features = [__merge_temporal_features(f, mode=temporal_mode,
                                                       frames_per_batch=frames_per_batch)

                             for f in features]
        for f, k in zip(temporal_features, pyramid_levels):
            pyramid_dict[k] = f

    semantic_levels = [int(re.findall(r'\d+', k)[0]) for k in pyramid_dict]
    target_level = min(semantic_levels)

    semantic_head_list = []
    for i in range(num_semantic_heads):
        semantic_head_list.append(create_semantic_head(
            pyramid_dict, n_classes=num_semantic_classes[i],
            input_target=inputs, target_level=target_level,
            semantic_id=i, ndim=ndim, upsample_type=upsample_type,
            interpolation=interpolation, **kwargs))

    outputs = semantic_head_list

    model = Model(inputs=inputs, outputs=outputs, name=name)
    return model