예제 #1
0
def PanopticNet(backbone,
                input_shape,
                inputs=None,
                backbone_levels=['C3', 'C4', 'C5'],
                pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'],
                create_pyramid_features=__create_pyramid_features,
                create_semantic_head=__create_semantic_head,
                frames_per_batch=1,
                temporal_mode=None,
                num_semantic_heads=1,
                num_semantic_classes=[3],
                required_channels=3,
                norm_method='whole_image',
                pooling=None,
                location=True,
                use_imagenet=True,
                name='panopticnet',
                **kwargs):
    """Constructs a mrcnn model using a backbone from keras-applications.

    Args:
        backbone (str): Name of backbone to use.
        input_shape (tuple): The shape of the input data.
        backbone_levels (list): The backbone levels to be used.
            to create the feature pyramid. Defaults to ['C3', 'C4', 'C5'].
        pyramid_levels (list): Pyramid levels to use. Defaults to ['P3','P4','P5','P6','P7']
        create_pyramid_features (function): Function to get the pyramid
            features from the backbone.
        create_semantic_head (function): Function to get to build a
            semantic head submodel.
        frames_per_batch (int): Defaults to 1.
        temporal_mode: Mode of temporal convolution. Choose from {'conv','lstm','gru', None}.
            Defaults to None.
        num_semantic_heads (int): Defaults to 1.
        num_semantic_classes (list): Defaults to [3].
        norm_method (str): ImageNormalization mode to use. Defaults to 'whole_image'
        location (bool): Whether to include location data.
        use_imagenet (bool): Whether to load imagenet-based pretrained weights.
        pooling (str): optional pooling mode for feature extraction
            when include_top is False.

            - None means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - 'avg' means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - 'max' means that global max pooling will
                be applied.

        required_channels (int): The required number of channels of the
            backbone.  3 is the default for all current backbones.
        kwargs (dict): Other standard inputs for retinanet_mask.

    Raises:
        ValueError: temporal_mode not 'conv', 'lstm', 'gru'  or None

    Returns:
        tensorflow.keras.Model: Panoptic model with a backbone.
    """
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    # Check input to __merge_temporal_features
    acceptable_modes = {'conv', 'lstm', 'gru', None}
    if temporal_mode is not None:
        temporal_mode = str(temporal_mode).lower()
        if temporal_mode not in acceptable_modes:
            raise ValueError('Mode {} not supported. Please choose from {}.'.format(
                temporal_mode, str(acceptable_modes)))

    if inputs is None:
        if frames_per_batch > 1:
            if channel_axis == 1:
                input_shape_with_time = tuple(
                    [input_shape[0], frames_per_batch] + list(input_shape)[1:])
            else:
                input_shape_with_time = tuple(
                    [frames_per_batch] + list(input_shape))
            inputs = Input(shape=input_shape_with_time)
        else:
            inputs = Input(shape=input_shape)

    # force the channel size for backbone input to be `required_channels`
    if norm_method is None:
        norm = inputs
    else:
        if frames_per_batch > 1:
            norm = TimeDistributed(ImageNormalization2D(norm_method=norm_method))(inputs)
        else:
            norm = ImageNormalization2D(norm_method=norm_method)(inputs)

    if location:
        if frames_per_batch > 1:
            # TODO: TimeDistributed is incompatible with channels_first
            loc = TimeDistributed(Location2D(in_shape=input_shape))(norm)
        else:
            loc = Location2D(in_shape=input_shape)(norm)
        concat = Concatenate(axis=channel_axis)([norm, loc])
    else:
        concat = norm

    if frames_per_batch > 1:
        fixed_inputs = TimeDistributed(TensorProduct(required_channels))(concat)
    else:
        fixed_inputs = TensorProduct(required_channels)(concat)

    # force the input shape
    axis = 0 if K.image_data_format() == 'channels_first' else -1
    fixed_input_shape = list(input_shape)
    fixed_input_shape[axis] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    model_kwargs = {
        'include_top': False,
        'weights': None,
        'input_shape': fixed_input_shape,
        'pooling': pooling
    }

    _, backbone_dict = get_backbone(backbone, fixed_inputs,
                                    use_imagenet=use_imagenet,
                                    frames_per_batch=frames_per_batch,
                                    return_dict=True, **model_kwargs)

    backbone_dict_reduced = {k: backbone_dict[k] for k in backbone_dict
                             if k in backbone_levels}
    ndim = 2 if frames_per_batch == 1 else 3
    pyramid_dict = create_pyramid_features(backbone_dict_reduced, ndim=ndim)

    features = [pyramid_dict[key] for key in pyramid_levels]

    if frames_per_batch > 1:
        temporal_features = [__merge_temporal_features(
            feature, mode=temporal_mode) for feature in features]
        for f, k in zip(temporal_features, pyramid_dict.keys()):
            pyramid_dict[k] = f

    semantic_levels = [int(re.findall(r'\d+', k)[0]) for k in pyramid_dict]
    target_level = min(semantic_levels)

    semantic_head_list = []
    for i in range(num_semantic_heads):
        semantic_head_list.append(create_semantic_head(
            pyramid_dict, n_classes=num_semantic_classes[i],
            input_target=inputs, target_level=target_level,
            semantic_id=i, ndim=ndim, **kwargs))

    outputs = semantic_head_list

    model = Model(inputs=inputs, outputs=outputs, name=name)
    return model
예제 #2
0
def bn_feature_net_2D(receptive_field=61,
                      input_shape=(256, 256, 1),
                      inputs=None,
                      n_features=3,
                      n_channels=1,
                      reg=1e-5,
                      n_conv_filters=64,
                      n_dense_filters=200,
                      VGG_mode=False,
                      init='he_normal',
                      norm_method='std',
                      location=False,
                      dilated=False,
                      padding=False,
                      padding_mode='reflect',
                      multires=False,
                      include_top=True):
    """Creates a 2D featurenet.

    Args:
        receptive_field (int): the receptive field of the neural network.
        input_shape (tuple): If no input tensor, create one with this shape.
        inputs (tensor): optional input tensor
        n_features (int): Number of output features
        n_channels (int): number of input channels
        reg (int): regularization value
        n_conv_filters (int): number of convolutional filters
        n_dense_filters (int): number of dense filters
        VGG_mode (bool): If multires, uses VGG_mode for multiresolution
        init (str): Method for initalizing weights.
        norm_method (str): ImageNormalization mode to use
        location (bool): Whether to include location data
        dilated (bool): Whether to use dilated pooling.
        padding (bool): Whether to use padding.
        padding_mode (str): Type of padding, one of 'reflect' or 'zero'
        multires (bool): Enables multi-resolution mode
        include_top (bool): Whether to include the final layer of the model

    Returns:
        tensorflow.keras.Model: 2D FeatureNet
    """
    # Create layers list (x) to store all of the layers.
    # We need to use the functional API to enable the multiresolution mode
    x = []

    win = (receptive_field - 1) // 2

    if dilated:
        padding = True

    if K.image_data_format() == 'channels_first':
        channel_axis = 1
        row_axis = 2
        col_axis = 3

        if not dilated:
            input_shape = (n_channels, receptive_field, receptive_field)

    else:
        row_axis = 1
        col_axis = 2
        channel_axis = -1
        if not dilated:
            input_shape = (receptive_field, receptive_field, n_channels)

    if inputs is not None:
        if not K.is_keras_tensor(inputs):
            img_input = Input(tensor=inputs, shape=input_shape)
        else:
            img_input = inputs
        x.append(img_input)
    else:
        x.append(Input(shape=input_shape))

    x.append(
        ImageNormalization2D(norm_method=norm_method,
                             filter_size=receptive_field)(x[-1]))

    if padding:
        if padding_mode == 'reflect':
            x.append(ReflectionPadding2D(padding=(win, win))(x[-1]))
        elif padding_mode == 'zero':
            x.append(ZeroPadding2D(padding=(win, win))(x[-1]))

    if location:
        x.append(Location2D(in_shape=tuple(x[-1].shape.as_list()[1:]))(x[-1]))
        x.append(Concatenate(axis=channel_axis)([x[-2], x[-1]]))

    layers_to_concat = []

    rf_counter = receptive_field
    block_counter = 0
    d = 1

    while rf_counter > 4:
        filter_size = 3 if rf_counter % 2 == 0 else 4
        x.append(
            Conv2D(n_conv_filters,
                   filter_size,
                   dilation_rate=d,
                   kernel_initializer=init,
                   padding='valid',
                   kernel_regularizer=l2(reg))(x[-1]))
        x.append(BatchNormalization(axis=channel_axis)(x[-1]))
        x.append(Activation('relu')(x[-1]))

        block_counter += 1
        rf_counter -= filter_size - 1

        if block_counter % 2 == 0:
            if dilated:
                x.append(
                    DilatedMaxPool2D(dilation_rate=d, pool_size=(2, 2))(x[-1]))
                d *= 2
            else:
                x.append(MaxPool2D(pool_size=(2, 2))(x[-1]))

            if VGG_mode:
                n_conv_filters *= 2

            rf_counter = rf_counter // 2

            if multires:
                layers_to_concat.append(len(x) - 1)

    if multires:
        c = []
        for l in layers_to_concat:
            output_shape = x[l].get_shape().as_list()
            target_shape = x[-1].get_shape().as_list()

            row_crop = int(output_shape[row_axis] - target_shape[row_axis])
            if row_crop % 2 == 0:
                row_crop = (row_crop // 2, row_crop // 2)
            else:
                row_crop = (row_crop // 2, row_crop // 2 + 1)

            col_crop = int(output_shape[col_axis] - target_shape[col_axis])
            if col_crop % 2 == 0:
                col_crop = (col_crop // 2, col_crop // 2)
            else:
                col_crop = (col_crop // 2, col_crop // 2 + 1)

            cropping = (row_crop, col_crop)

            c.append(Cropping2D(cropping=cropping)(x[l]))

        if multires:
            x.append(Concatenate(axis=channel_axis)(c))

    x.append(
        Conv2D(n_dense_filters, (rf_counter, rf_counter),
               dilation_rate=d,
               kernel_initializer=init,
               padding='valid',
               kernel_regularizer=l2(reg))(x[-1]))
    x.append(BatchNormalization(axis=channel_axis)(x[-1]))
    x.append(Activation('relu')(x[-1]))

    if include_top:
        x.append(
            TensorProduct(n_dense_filters,
                          kernel_initializer=init,
                          kernel_regularizer=l2(reg))(x[-1]))
        x.append(BatchNormalization(axis=channel_axis)(x[-1]))
        x.append(Activation('relu')(x[-1]))

        x.append(
            TensorProduct(n_features,
                          kernel_initializer=init,
                          kernel_regularizer=l2(reg))(x[-1]))

        if not dilated:
            x.append(Flatten()(x[-1]))

        x.append(Softmax(axis=channel_axis)(x[-1]))

    if inputs is not None:
        real_inputs = keras_utils.get_source_inputs(x[0])
    else:
        real_inputs = x[0]

    model = Model(inputs=real_inputs, outputs=x[-1])

    return model
예제 #3
0
def RetinaMask(backbone,
               num_classes,
               input_shape,
               inputs=None,
               backbone_levels=['C3', 'C4', 'C5'],
               pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'],
               norm_method='whole_image',
               location=False,
               use_imagenet=False,
               crop_size=(14, 14),
               pooling=None,
               mask_dtype=K.floatx(),
               required_channels=3,
               frames_per_batch=1,
               **kwargs):
    """Constructs a mrcnn model using a backbone from keras-applications.

    Args:
        backbone (str): Name of backbone to use.
        num_classes (int): Number of classes to classify.
        input_shape (tuple): The shape of the input data.
        inputs (tensor): Optional input tensor, overrides input_shape.
        backbone_levels (list): The backbone levels to be used.
            to create the feature pyramid. Defaults to ['C3', 'C4', 'C5'].
        pyramid_levels (list): The pyramid levels to attach regression and
            classification heads to. Defaults to ['P3', 'P4', 'P5', 'P6', 'P7'].
        norm_method (str): ImageNormalization mode to use.
        location (bool): Whether to include location data.
        use_imagenet (bool): Whether to load imagenet-based pretrained weights.
        crop_size (tuple): 2-length tuple for the x-y size of the crops.
            Used to create default roi_submodels.
        pooling (str): optional pooling mode for feature extraction
            when include_top is False.

            - None means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - 'avg' means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - 'max' means that global max pooling will
                be applied.

        mask_dtype (str): Dtype to use for mask tensors.
        required_channels (int): The required number of channels of the
            backbone.  3 is the default for all current backbones.
        frames_per_batch (int): Size of z axis in generated batches.
            If equal to 1, assumes 2D data.
        kwargs (dict): Other standard inputs for retinanet_mask.

    Returns:
        tensorflow.keras.Model: RetinaNet model with a backbone.
    """
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
    if inputs is None:
        if frames_per_batch > 1:
            if channel_axis == 1:
                input_shape_with_time = tuple(
                    [input_shape[0], frames_per_batch] + list(input_shape)[1:])
            else:
                input_shape_with_time = tuple([frames_per_batch] +
                                              list(input_shape))
            inputs = Input(shape=input_shape_with_time)
        else:
            inputs = Input(shape=input_shape)

    if location:
        if frames_per_batch > 1:
            # TODO: TimeDistributed is incompatible with channels_first
            loc = TimeDistributed(Location2D(in_shape=input_shape))(inputs)
        else:
            loc = Location2D(in_shape=input_shape)(inputs)
        concat = Concatenate(axis=channel_axis)([inputs, loc])
    else:
        concat = inputs

    # force the channel size for backbone input to be `required_channels`
    if frames_per_batch > 1:
        norm = TimeDistributed(
            ImageNormalization2D(norm_method=norm_method))(concat)
        fixed_inputs = TimeDistributed(TensorProduct(required_channels))(norm)
    else:
        norm = ImageNormalization2D(norm_method=norm_method)(concat)
        fixed_inputs = TensorProduct(required_channels)(norm)

    # force the input shape
    axis = 0 if K.image_data_format() == 'channels_first' else -1
    fixed_input_shape = list(input_shape)
    fixed_input_shape[axis] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    model_kwargs = {
        'include_top': False,
        'weights': None,
        'input_shape': fixed_input_shape,
        'pooling': pooling
    }

    _, backbone_dict = get_backbone(backbone,
                                    fixed_inputs,
                                    use_imagenet=use_imagenet,
                                    frames_per_batch=frames_per_batch,
                                    return_dict=True,
                                    **model_kwargs)

    # create the full model
    return retinanet_mask(inputs=inputs,
                          num_classes=num_classes,
                          backbone_dict=backbone_dict,
                          crop_size=crop_size,
                          backbone_levels=backbone_levels,
                          pyramid_levels=pyramid_levels,
                          name='{}_retinanet_mask'.format(backbone),
                          mask_dtype=mask_dtype,
                          frames_per_batch=frames_per_batch,
                          **kwargs)
예제 #4
0
def PanopticNet(backbone,
                input_shape,
                backbone_levels=['C3', 'C4', 'C5'],
                create_pyramid_features=__create_pyramid_features,
                create_semantic_head=__create_semantic_head,
                num_semantic_heads=1,
                num_semantic_classes=[3],
                required_channels=3,
                norm_method='whole_image',
                pooling=None,
                location=True,
                use_imagenet=True,
                name='panopticnet',
                **kwargs):
    """Constructs a mrcnn model using a backbone from keras-applications.

    Args:
        backbone (str): Name of backbone to use.
        input_shape (tuple): The shape of the input data.
        backbone_levels (list): The backbone levels to be used.
            to create the feature pyramid. Defaults to ['C3', 'C4', 'C5'].
        create_pyramid_features (function): Function to get the pyramid
            features from the backbone.
        create_semantic_head (function): Function to get to build a
            semantic head submodel.
        norm_method (str): ImageNormalization mode to use.
        location (bool): Whether to include location data.
        use_imagenet (bool): Whether to load imagenet-based pretrained weights.
        pooling (str): optional pooling mode for feature extraction
            when include_top is False.

            - None means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - 'avg' means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - 'max' means that global max pooling will
                be applied.

        required_channels (int): The required number of channels of the
            backbone.  3 is the default for all current backbones.
        kwargs (dict): Other standard inputs for retinanet_mask.

    Returns:
        tensorflow.keras.Model: Panoptic model with a backbone.
    """
    inputs = Input(shape=input_shape)
    norm = ImageNormalization2D(norm_method=norm_method)(inputs)
    if location:
        loc = Location2D(in_shape=input_shape)(norm)
        concat = Concatenate(axis=-1)([norm, loc])
    else:
        concat = norm

    fixed_inputs = TensorProduct(required_channels)(concat)

    # force the input shape
    fixed_input_shape = list(input_shape)
    fixed_input_shape[-1] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    model_kwargs = {
        'include_top': False,
        'weights': None,
        'input_shape': fixed_input_shape,
        'pooling': pooling
    }

    _, backbone_dict = get_backbone(backbone,
                                    fixed_inputs,
                                    use_imagenet=use_imagenet,
                                    frames_per_batch=1,
                                    return_dict=True,
                                    **model_kwargs)
    backbone_dict_reduced = {
        k: backbone_dict[k]
        for k in backbone_dict if k in backbone_levels
    }

    pyramid_dict = create_pyramid_features(backbone_dict_reduced, ndim=2)

    semantic_levels = [int(re.findall(r'\d+', k)[0]) for k in pyramid_dict]
    target_level = min(semantic_levels)

    semantic_head_list = []
    for i in range(num_semantic_heads):
        semantic_head_list.append(
            create_semantic_head(pyramid_dict,
                                 n_classes=num_semantic_classes[i],
                                 input_target=inputs,
                                 target_level=target_level,
                                 semantic_id=i,
                                 ndim=2,
                                 **kwargs))

    model = Model(inputs=inputs, outputs=semantic_head_list, name=name)
    return model
예제 #5
0
def RetinaNet(backbone,
              num_classes,
              input_shape,
              inputs=None,
              norm_method='whole_image',
              location=False,
              use_imagenet=False,
              pooling=None,
              required_channels=3,
              frames_per_batch=1,
              **kwargs):
    """Constructs a RetinaNet model using a backbone from
    ``keras-applications``.

    Args:
        backbone (str): Name of backbone to use.
        num_classes (int): Number of classes to classify.
        input_shape (tuple): The shape of the input data.
        inputs (tensor): Optional input tensor, overrides ``input_shape``.
        norm_method (str): Normalization method to use with the
            :mod:`deepcell.layers.normalization.ImageNormalization2D` layer.
        location (bool): Whether to include a
            :mod:`deepcell.layers.location.Location2D` layer.
        use_imagenet (bool): Whether to load imagenet-based pretrained weights.
        pooling (str): Pooling mode for feature extraction
            when ``include_top`` is ``False``.

            - None means that the output of the model will be
              the 4D tensor output of the last convolutional layer.
            - 'avg' means that global average pooling will be applied to
              the output of the last convolutional layer, and thus
              the output of the model will be a 2D tensor.
            - 'max' means that global max pooling will be applied.

        required_channels (int): The required number of channels of the
            backbone. 3 is the default for all current backbones.
        frames_per_batch (int): Size of z axis in generated batches.
            If equal to 1, assumes 2D data.
        kwargs (dict): Other standard inputs for `~retinanet`.

    Returns:
        tensorflow.keras.Model: RetinaNet model with a backbone.
    """
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    if inputs is None:
        if frames_per_batch > 1:
            if channel_axis == 1:
                input_shape_with_time = tuple(
                    [input_shape[0], frames_per_batch] + list(input_shape)[1:])
            else:
                input_shape_with_time = tuple([frames_per_batch] +
                                              list(input_shape))
            inputs = Input(shape=input_shape_with_time, name='input')
        else:
            inputs = Input(shape=input_shape, name='input')

    if location:
        if frames_per_batch > 1:
            # TODO: TimeDistributed is incompatible with channels_first
            loc = TimeDistributed(Location2D(in_shape=input_shape))(inputs)
        else:
            loc = Location2D(in_shape=input_shape)(inputs)
        concat = Concatenate(axis=channel_axis)([inputs, loc])
    else:
        concat = inputs

    # force the channel size for backbone input to be `required_channels`
    if frames_per_batch > 1:
        norm = TimeDistributed(
            ImageNormalization2D(norm_method=norm_method))(concat)
        fixed_inputs = TimeDistributed(TensorProduct(required_channels))(norm)
    else:
        norm = ImageNormalization2D(norm_method=norm_method)(concat)
        fixed_inputs = TensorProduct(required_channels)(norm)

    # force the input shape
    axis = 0 if K.image_data_format() == 'channels_first' else -1
    fixed_input_shape = list(input_shape)
    fixed_input_shape[axis] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    model_kwargs = {
        'include_top': False,
        'weights': None,
        'input_shape': fixed_input_shape,
        'pooling': pooling
    }

    _, backbone_dict = get_backbone(backbone,
                                    fixed_inputs,
                                    use_imagenet=use_imagenet,
                                    frames_per_batch=frames_per_batch,
                                    return_dict=True,
                                    **model_kwargs)

    # create the full model
    return retinanet(inputs=inputs,
                     num_classes=num_classes,
                     backbone_dict=backbone_dict,
                     frames_per_batch=frames_per_batch,
                     name='{}_retinanet'.format(backbone),
                     **kwargs)
예제 #6
0
def PanopticNet(backbone,
                input_shape,
                inputs=None,
                backbone_levels=['C3', 'C4', 'C5'],
                pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'],
                create_pyramid_features=__create_pyramid_features,
                create_semantic_head=__create_semantic_head,
                frames_per_batch=1,
                temporal_mode=None,
                num_semantic_heads=1,
                num_semantic_classes=[3],
                required_channels=3,
                norm_method='whole_image',
                pooling=None,
                location=True,
                use_imagenet=True,
                lite=False,
                upsample_type='upsampling2d',
                interpolation='bilinear',
                name='panopticnet',
                **kwargs):
    """Constructs a mrcnn model using a backbone from keras-applications.

    Args:
        backbone (str): Name of backbone to use.
        input_shape (tuple): The shape of the input data.
        backbone_levels (list): The backbone levels to be used.
            to create the feature pyramid. Defaults to ['C3', 'C4', 'C5'].
        pyramid_levels (list): Pyramid levels to use. Defaults to
            ['P3','P4','P5','P6','P7']
        create_pyramid_features (function): Function to get the pyramid
            features from the backbone.
        create_semantic_head (function): Function to build a semantic head
            submodel.
        frames_per_batch (int): Defaults to 1.
        temporal_mode: Mode of temporal convolution. Choose from
            {'conv','lstm','gru', None}. Defaults to None.
        num_semantic_heads (int): Defaults to 1.
        num_semantic_classes (list): Defaults to [3].
        norm_method (str): ImageNormalization mode to use.
            Defaults to 'whole_image'.
        location (bool): Whether to include location data. Defaults to True
        use_imagenet (bool): Whether to load imagenet-based pretrained weights.
        lite (bool): Whether to use a depthwise conv in the feature pyramid
            rather than regular conv. Defaults to False.
        upsample_type (str): Choice of upsampling layer to use from
            ['upsamplelike', 'upsampling2d', 'upsampling3d']. Defaults to
            'upsampling2d'.
        interpolation (str): Choice of interpolation mode for upsampling
            layers from ['bilinear', 'nearest']. Defaults to bilinear.
        pooling (str): optional pooling mode for feature extraction
            when include_top is False.

            - None means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - 'avg' means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - 'max' means that global max pooling will
                be applied.

        required_channels (int): The required number of channels of the
            backbone.  3 is the default for all current backbones.
        kwargs (dict): Other standard inputs for retinanet_mask.

    Raises:
        ValueError: temporal_mode not 'conv', 'lstm', 'gru'  or None

    Returns:
        tensorflow.keras.Model: Panoptic model with a backbone.
    """
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
    conv = Conv3D if frames_per_batch > 1 else Conv2D
    conv_kernel = (1, 1, 1) if frames_per_batch > 1 else (1, 1)

    # Check input to __merge_temporal_features
    acceptable_modes = {'conv', 'lstm', 'gru', None}
    if temporal_mode is not None:
        temporal_mode = str(temporal_mode).lower()
        if temporal_mode not in acceptable_modes:
            raise ValueError('temporal_mode {} not supported. Please choose '
                             'from {}.'.format(temporal_mode, acceptable_modes))

    # TODO only works for 2D: do we check for 3D as well?
    # What are the requirements for 3D data?
    img_shape = input_shape[1:] if channel_axis == 1 else input_shape[:-1]
    if img_shape[0] != img_shape[1]:
        raise ValueError('Input data must be square, got dimensions {}'.format(
            img_shape))

    if not math.log(img_shape[0], 2).is_integer():
        raise ValueError('Input data dimensions must be a power of 2, '
                         'got {}'.format(img_shape[0]))

    # Check input to interpolation
    acceptable_interpolation = {'bilinear', 'nearest'}
    if interpolation not in acceptable_interpolation:
        raise ValueError('Interpolation mode "{}" not supported. '
                         'Choose from {}.'.format(
                             interpolation, list(acceptable_interpolation)))

    if inputs is None:
        if frames_per_batch > 1:
            if channel_axis == 1:
                input_shape_with_time = tuple(
                    [input_shape[0], frames_per_batch] + list(input_shape)[1:])
            else:
                input_shape_with_time = tuple(
                    [frames_per_batch] + list(input_shape))
            inputs = Input(shape=input_shape_with_time, name='input_0')
        else:
            inputs = Input(shape=input_shape, name='input_0')

    # Normalize input images
    if norm_method is None:
        norm = inputs
    else:
        if frames_per_batch > 1:
            norm = TimeDistributed(ImageNormalization2D(
                norm_method=norm_method, name='norm'), name='td_norm')(inputs)
        else:
            norm = ImageNormalization2D(norm_method=norm_method,
                                        name='norm')(inputs)

    # Add location layer
    if location:
        if frames_per_batch > 1:
            # TODO: TimeDistributed is incompatible with channels_first
            loc = TimeDistributed(Location2D(in_shape=input_shape,
                                             name='location'), name='td_location')(norm)
        else:
            loc = Location2D(in_shape=input_shape, name='location')(norm)
        concat = Concatenate(axis=channel_axis,
                             name='concatenate_location')([norm, loc])
    else:
        concat = norm

    # Force the channel size for backbone input to be `required_channels`
    fixed_inputs = conv(required_channels, conv_kernel, strides=1,
                        padding='same', name='conv_channels')(concat)

    # Force the input shape
    axis = 0 if K.image_data_format() == 'channels_first' else -1
    fixed_input_shape = list(input_shape)
    fixed_input_shape[axis] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    model_kwargs = {
        'include_top': False,
        'weights': None,
        'input_shape': fixed_input_shape,
        'pooling': pooling
    }

    _, backbone_dict = get_backbone(backbone, fixed_inputs,
                                    use_imagenet=use_imagenet,
                                    frames_per_batch=frames_per_batch,
                                    return_dict=True,
                                    **model_kwargs)

    backbone_dict_reduced = {k: backbone_dict[k] for k in backbone_dict
                             if k in backbone_levels}

    ndim = 2 if frames_per_batch == 1 else 3

    pyramid_dict = create_pyramid_features(backbone_dict_reduced,
                                           ndim=ndim,
                                           lite=lite,
                                           interpolation=interpolation,
                                           upsample_type=upsample_type)

    features = [pyramid_dict[key] for key in pyramid_levels]

    if frames_per_batch > 1:
        temporal_features = [__merge_temporal_features(f, mode=temporal_mode,
                                                       frames_per_batch=frames_per_batch)

                             for f in features]
        for f, k in zip(temporal_features, pyramid_levels):
            pyramid_dict[k] = f

    semantic_levels = [int(re.findall(r'\d+', k)[0]) for k in pyramid_dict]
    target_level = min(semantic_levels)

    semantic_head_list = []
    for i in range(num_semantic_heads):
        semantic_head_list.append(create_semantic_head(
            pyramid_dict, n_classes=num_semantic_classes[i],
            input_target=inputs, target_level=target_level,
            semantic_id=i, ndim=ndim, upsample_type=upsample_type,
            interpolation=interpolation, **kwargs))

    outputs = semantic_head_list

    model = Model(inputs=inputs, outputs=outputs, name=name)
    return model
예제 #7
0
def RetinaNet(backbone,
              num_classes,
              input_shape,
              inputs=None,
              norm_method='whole_image',
              location=False,
              use_imagenet=False,
              pooling=None,
              required_channels=3,
              **kwargs):
    """Constructs a retinanet model using a backbone from keras-applications.

    Args:
        backbone (str): Name of backbone to use.
        num_classes (int): Number of classes to classify.
        input_shape (tuple): The shape of the input data.
        weights (str): one of None (random initialization),
            'imagenet' (pre-training on ImageNet),
            or the path to the weights file to be loaded.
        pooling (str): optional pooling mode for feature extraction
            when 'include_top' is False.
            - None means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - 'avg' means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - 'max' means that global max pooling will
                be applied.
        required_channels (int): The required number of channels of the
            backbone.  3 is the default for all current backbones.

    Returns:
        tensorflow.keras.Model: RetinaNet model with a backbone.
    """
    if inputs is None:
        inputs = Input(shape=input_shape)

    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1

    if location:
        location = Location2D(in_shape=input_shape)(inputs)
        concat = Concatenate(axis=channel_axis)([inputs, location])
    else:
        concat = inputs

    # force the channel size for backbone input to be `required_channels`
    norm = ImageNormalization2D(norm_method=norm_method)(concat)
    fixed_inputs = TensorProduct(required_channels)(norm)

    # force the input shape
    axis = 0 if K.image_data_format() == 'channels_first' else -1
    fixed_input_shape = list(input_shape)
    fixed_input_shape[axis] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    model_kwargs = {
        'include_top': False,
        'weights': None,
        'input_shape': fixed_input_shape,
        'pooling': pooling
    }

    backbone_dict = get_backbone(backbone,
                                 fixed_inputs,
                                 use_imagenet=use_imagenet,
                                 **model_kwargs)

    # create the full model
    return retinanet(inputs=inputs,
                     num_classes=num_classes,
                     backbone_dict=backbone_dict,
                     name='{}_retinanet'.format(backbone),
                     **kwargs)
예제 #8
0
def bn_feature_net_2D(receptive_field=61,
                      input_shape=(256, 256, 1),
                      n_features=3,
                      n_channels=1,
                      reg=1e-5,
                      n_conv_filters=64,
                      n_dense_filters=200,
                      VGG_mode=False,
                      init='he_normal',
                      norm_method='std',
                      location=False,
                      dilated=False,
                      padding=False,
                      padding_mode='reflect',
                      multires=False,
                      include_top=True):
    # Create layers list (x) to store all of the layers.
    # We need to use the functional API to enable the multiresolution mode
    x = []

    win = (receptive_field - 1) // 2

    if dilated:
        padding = True

    if K.image_data_format() == 'channels_first':
        channel_axis = 1
        row_axis = 2
        col_axis = 3

        if not dilated:
            input_shape = (n_channels, receptive_field, receptive_field)

    else:
        row_axis = 1
        col_axis = 2
        channel_axis = -1
        if not dilated:
            input_shape = (receptive_field, receptive_field, n_channels)

    x.append(Input(shape=input_shape))
    x.append(ImageNormalization2D(norm_method=norm_method, filter_size=receptive_field)(x[-1]))

    if padding:
        if padding_mode == 'reflect':
            x.append(ReflectionPadding2D(padding=(win, win))(x[-1]))
        elif padding_mode == 'zero':
            x.append(ZeroPadding2D(padding=(win, win))(x[-1]))

    if location:
        x.append(Location2D(in_shape=tuple(x[-1].shape.as_list()[1:]))(x[-1]))
        x.append(Concatenate(axis=channel_axis)([x[-2], x[-1]]))

    if multires:
        layers_to_concat = []

    rf_counter = receptive_field
    block_counter = 0
    d = 1

    while rf_counter > 4:
        filter_size = 3 if rf_counter % 2 == 0 else 4
        x.append(Conv2D(n_conv_filters, (filter_size, filter_size), dilation_rate=d, kernel_initializer=init, padding='valid', kernel_regularizer=l2(reg))(x[-1]))
        x.append(BatchNormalization(axis=channel_axis)(x[-1]))
        x.append(Activation('relu')(x[-1]))

        block_counter += 1
        rf_counter -= filter_size - 1

        if block_counter % 2 == 0:
            if dilated:
                x.append(DilatedMaxPool2D(dilation_rate=d, pool_size=(2, 2))(x[-1]))
                d *= 2
            else:
                x.append(MaxPool2D(pool_size=(2, 2))(x[-1]))

            if VGG_mode:
                n_conv_filters *= 2

            rf_counter = rf_counter // 2

            if multires:
                layers_to_concat.append(len(x) - 1)

    if multires:
        c = []
        for l in layers_to_concat:
            output_shape = x[l].get_shape().as_list()
            target_shape = x[-1].get_shape().as_list()

            row_crop = int(output_shape[row_axis] - target_shape[row_axis])
            if row_crop % 2 == 0:
                row_crop = (row_crop // 2, row_crop // 2)
            else:
                row_crop = (row_crop // 2, row_crop // 2 + 1)

            col_crop = int(output_shape[col_axis] - target_shape[col_axis])
            if col_crop % 2 == 0:
                col_crop = (col_crop // 2, col_crop // 2)
            else:
                col_crop = (col_crop // 2, col_crop // 2 + 1)

            cropping = (row_crop, col_crop)

            c.append(Cropping2D(cropping=cropping)(x[l]))
        x.append(Concatenate(axis=channel_axis)(c))

    x.append(Conv2D(n_dense_filters, (rf_counter, rf_counter), dilation_rate=d, kernel_initializer=init, padding='valid', kernel_regularizer=l2(reg))(x[-1]))
    x.append(BatchNormalization(axis=channel_axis)(x[-1]))
    x.append(Activation('relu')(x[-1]))

    x.append(TensorProduct(n_dense_filters, kernel_initializer=init, kernel_regularizer=l2(reg))(x[-1]))
    x.append(BatchNormalization(axis=channel_axis)(x[-1]))
    x.append(Activation('relu')(x[-1]))

    x.append(TensorProduct(n_features, kernel_initializer=init, kernel_regularizer=l2(reg))(x[-1]))

    if not dilated:
        x.append(Flatten()(x[-1]))

    if include_top:
        x.append(Softmax(axis=channel_axis)(x[-1]))

    model = Model(inputs=x[0], outputs=x[-1])

    return model
예제 #9
0
def MaskRCNN(backbone,
             num_classes,
             input_shape,
             backbone_levels=['C3', 'C4', 'C5'],
             pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'],
             norm_method='whole_image',
             location=False,
             use_imagenet=False,
             crop_size=(14, 14),
             pooling=None,
             mask_dtype=K.floatx(),
             required_channels=3,
             **kwargs):
    """Constructs a mrcnn model using a backbone from keras-applications.

    Args:
        backbone: string, name of backbone to use.
        num_classes: Number of classes to classify.
        input_shape: The shape of the input data.
        weights: one of `None` (random initialization),
            'imagenet' (pre-training on ImageNet),
            or the path to the weights file to be loaded.
        pooling: optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        required_channels: integer, the required number of channels of the
            backbone.  3 is the default for all current backbones.

    Returns:
        RetinaNet model with a backbone.
    """
    inputs = Input(shape=input_shape)
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
    if location:
        location = Location2D(in_shape=input_shape)(inputs)
        inputs = Concatenate(axis=channel_axis)([inputs, location])

    # force the channel size for backbone input to be `required_channels`
    norm = ImageNormalization2D(norm_method=norm_method)(inputs)
    fixed_inputs = TensorProduct(required_channels)(norm)

    # force the input shape
    fixed_input_shape = list(input_shape)
    fixed_input_shape[-1] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    model_kwargs = {
        'include_top': False,
        'weights': None,
        'input_shape': fixed_input_shape,
        'pooling': pooling
    }

    backbone_dict = get_backbone(backbone,
                                 fixed_inputs,
                                 use_imagenet=use_imagenet,
                                 **model_kwargs)

    # create the full model
    return retinanet_mask(inputs=inputs,
                          num_classes=num_classes,
                          backbone_dict=backbone_dict,
                          crop_size=crop_size,
                          backbone_levels=backbone_levels,
                          pyramid_levels=pyramid_levels,
                          name='{}_retinanet_mask'.format(backbone),
                          mask_dtype=mask_dtype,
                          **kwargs)
예제 #10
0
def RetinaMask(backbone,
               num_classes,
               input_shape,
               inputs=None,
               backbone_levels=['C3', 'C4', 'C5'],
               pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'],
               norm_method='whole_image',
               location=False,
               use_imagenet=False,
               crop_size=(14, 14),
               pooling=None,
               mask_dtype=K.floatx(),
               required_channels=3,
               frames_per_batch=1,
               **kwargs):
    """Constructs a mrcnn model using a backbone from keras-applications.

    Args:
        backbone (str): Name of backbone to use.
        num_classes (int): Number of classes to classify.
        input_shape (tuple): The shape of the input data.
        weights (str): one of None (random initialization),
            'imagenet' (pre-training on ImageNet),
            or the path to the weights file to be loaded.
        pooling (str): optional pooling mode for feature extraction
            when include_top is False.
            - None means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - 'avg' means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - 'max' means that global max pooling will
                be applied.
        required_channels (int): The required number of channels of the
            backbone.  3 is the default for all current backbones.

    Returns:
        tensorflow.keras.Model: RetinaNet model with a backbone.
    """
    channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
    if inputs is None:
        if frames_per_batch > 1:
            if channel_axis == 1:
                input_shape_with_time = tuple(
                    [input_shape[0], frames_per_batch] + list(input_shape)[1:])
            else:
                input_shape_with_time = tuple([frames_per_batch] +
                                              list(input_shape))
            inputs = Input(shape=input_shape_with_time)
        else:
            inputs = Input(shape=input_shape)

    if location:
        if frames_per_batch > 1:
            # TODO: TimeDistributed is incompatible with channels_first
            loc = TimeDistributed(Location2D(in_shape=input_shape))(inputs)
        else:
            loc = Location2D(in_shape=input_shape)(inputs)
        concat = Concatenate(axis=channel_axis)([inputs, loc])
    else:
        concat = inputs

    # force the channel size for backbone input to be `required_channels`
    if frames_per_batch > 1:
        norm = TimeDistributed(
            ImageNormalization2D(norm_method=norm_method))(concat)
        fixed_inputs = TimeDistributed(TensorProduct(required_channels))(norm)
    else:
        norm = ImageNormalization2D(norm_method=norm_method)(concat)
        fixed_inputs = TensorProduct(required_channels)(norm)

    # force the input shape
    axis = 0 if K.image_data_format() == 'channels_first' else -1
    fixed_input_shape = list(input_shape)
    fixed_input_shape[axis] = required_channels
    fixed_input_shape = tuple(fixed_input_shape)

    model_kwargs = {
        'include_top': False,
        'weights': None,
        'input_shape': fixed_input_shape,
        'pooling': pooling
    }

    _, backbone_dict = get_backbone(backbone,
                                    fixed_inputs,
                                    use_imagenet=use_imagenet,
                                    frames_per_batch=frames_per_batch,
                                    return_dict=True,
                                    **model_kwargs)

    # create the full model
    return retinanet_mask(inputs=inputs,
                          num_classes=num_classes,
                          backbone_dict=backbone_dict,
                          crop_size=crop_size,
                          backbone_levels=backbone_levels,
                          pyramid_levels=pyramid_levels,
                          name='{}_retinanet_mask'.format(backbone),
                          mask_dtype=mask_dtype,
                          frames_per_batch=frames_per_batch,
                          **kwargs)