def PanopticNet(backbone, input_shape, inputs=None, backbone_levels=['C3', 'C4', 'C5'], pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], create_pyramid_features=__create_pyramid_features, create_semantic_head=__create_semantic_head, frames_per_batch=1, temporal_mode=None, num_semantic_heads=1, num_semantic_classes=[3], required_channels=3, norm_method='whole_image', pooling=None, location=True, use_imagenet=True, name='panopticnet', **kwargs): """Constructs a mrcnn model using a backbone from keras-applications. Args: backbone (str): Name of backbone to use. input_shape (tuple): The shape of the input data. backbone_levels (list): The backbone levels to be used. to create the feature pyramid. Defaults to ['C3', 'C4', 'C5']. pyramid_levels (list): Pyramid levels to use. Defaults to ['P3','P4','P5','P6','P7'] create_pyramid_features (function): Function to get the pyramid features from the backbone. create_semantic_head (function): Function to get to build a semantic head submodel. frames_per_batch (int): Defaults to 1. temporal_mode: Mode of temporal convolution. Choose from {'conv','lstm','gru', None}. Defaults to None. num_semantic_heads (int): Defaults to 1. num_semantic_classes (list): Defaults to [3]. norm_method (str): ImageNormalization mode to use. Defaults to 'whole_image' location (bool): Whether to include location data. use_imagenet (bool): Whether to load imagenet-based pretrained weights. pooling (str): optional pooling mode for feature extraction when include_top is False. - None means that the output of the model will be the 4D tensor output of the last convolutional layer. - 'avg' means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - 'max' means that global max pooling will be applied. required_channels (int): The required number of channels of the backbone. 3 is the default for all current backbones. kwargs (dict): Other standard inputs for retinanet_mask. Raises: ValueError: temporal_mode not 'conv', 'lstm', 'gru' or None Returns: tensorflow.keras.Model: Panoptic model with a backbone. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 # Check input to __merge_temporal_features acceptable_modes = {'conv', 'lstm', 'gru', None} if temporal_mode is not None: temporal_mode = str(temporal_mode).lower() if temporal_mode not in acceptable_modes: raise ValueError('Mode {} not supported. Please choose from {}.'.format( temporal_mode, str(acceptable_modes))) if inputs is None: if frames_per_batch > 1: if channel_axis == 1: input_shape_with_time = tuple( [input_shape[0], frames_per_batch] + list(input_shape)[1:]) else: input_shape_with_time = tuple( [frames_per_batch] + list(input_shape)) inputs = Input(shape=input_shape_with_time) else: inputs = Input(shape=input_shape) # force the channel size for backbone input to be `required_channels` if norm_method is None: norm = inputs else: if frames_per_batch > 1: norm = TimeDistributed(ImageNormalization2D(norm_method=norm_method))(inputs) else: norm = ImageNormalization2D(norm_method=norm_method)(inputs) if location: if frames_per_batch > 1: # TODO: TimeDistributed is incompatible with channels_first loc = TimeDistributed(Location2D(in_shape=input_shape))(norm) else: loc = Location2D(in_shape=input_shape)(norm) concat = Concatenate(axis=channel_axis)([norm, loc]) else: concat = norm if frames_per_batch > 1: fixed_inputs = TimeDistributed(TensorProduct(required_channels))(concat) else: fixed_inputs = TensorProduct(required_channels)(concat) # force the input shape axis = 0 if K.image_data_format() == 'channels_first' else -1 fixed_input_shape = list(input_shape) fixed_input_shape[axis] = required_channels fixed_input_shape = tuple(fixed_input_shape) model_kwargs = { 'include_top': False, 'weights': None, 'input_shape': fixed_input_shape, 'pooling': pooling } _, backbone_dict = get_backbone(backbone, fixed_inputs, use_imagenet=use_imagenet, frames_per_batch=frames_per_batch, return_dict=True, **model_kwargs) backbone_dict_reduced = {k: backbone_dict[k] for k in backbone_dict if k in backbone_levels} ndim = 2 if frames_per_batch == 1 else 3 pyramid_dict = create_pyramid_features(backbone_dict_reduced, ndim=ndim) features = [pyramid_dict[key] for key in pyramid_levels] if frames_per_batch > 1: temporal_features = [__merge_temporal_features( feature, mode=temporal_mode) for feature in features] for f, k in zip(temporal_features, pyramid_dict.keys()): pyramid_dict[k] = f semantic_levels = [int(re.findall(r'\d+', k)[0]) for k in pyramid_dict] target_level = min(semantic_levels) semantic_head_list = [] for i in range(num_semantic_heads): semantic_head_list.append(create_semantic_head( pyramid_dict, n_classes=num_semantic_classes[i], input_target=inputs, target_level=target_level, semantic_id=i, ndim=ndim, **kwargs)) outputs = semantic_head_list model = Model(inputs=inputs, outputs=outputs, name=name) return model
def bn_feature_net_2D(receptive_field=61, input_shape=(256, 256, 1), inputs=None, n_features=3, n_channels=1, reg=1e-5, n_conv_filters=64, n_dense_filters=200, VGG_mode=False, init='he_normal', norm_method='std', location=False, dilated=False, padding=False, padding_mode='reflect', multires=False, include_top=True): """Creates a 2D featurenet. Args: receptive_field (int): the receptive field of the neural network. input_shape (tuple): If no input tensor, create one with this shape. inputs (tensor): optional input tensor n_features (int): Number of output features n_channels (int): number of input channels reg (int): regularization value n_conv_filters (int): number of convolutional filters n_dense_filters (int): number of dense filters VGG_mode (bool): If multires, uses VGG_mode for multiresolution init (str): Method for initalizing weights. norm_method (str): ImageNormalization mode to use location (bool): Whether to include location data dilated (bool): Whether to use dilated pooling. padding (bool): Whether to use padding. padding_mode (str): Type of padding, one of 'reflect' or 'zero' multires (bool): Enables multi-resolution mode include_top (bool): Whether to include the final layer of the model Returns: tensorflow.keras.Model: 2D FeatureNet """ # Create layers list (x) to store all of the layers. # We need to use the functional API to enable the multiresolution mode x = [] win = (receptive_field - 1) // 2 if dilated: padding = True if K.image_data_format() == 'channels_first': channel_axis = 1 row_axis = 2 col_axis = 3 if not dilated: input_shape = (n_channels, receptive_field, receptive_field) else: row_axis = 1 col_axis = 2 channel_axis = -1 if not dilated: input_shape = (receptive_field, receptive_field, n_channels) if inputs is not None: if not K.is_keras_tensor(inputs): img_input = Input(tensor=inputs, shape=input_shape) else: img_input = inputs x.append(img_input) else: x.append(Input(shape=input_shape)) x.append( ImageNormalization2D(norm_method=norm_method, filter_size=receptive_field)(x[-1])) if padding: if padding_mode == 'reflect': x.append(ReflectionPadding2D(padding=(win, win))(x[-1])) elif padding_mode == 'zero': x.append(ZeroPadding2D(padding=(win, win))(x[-1])) if location: x.append(Location2D(in_shape=tuple(x[-1].shape.as_list()[1:]))(x[-1])) x.append(Concatenate(axis=channel_axis)([x[-2], x[-1]])) layers_to_concat = [] rf_counter = receptive_field block_counter = 0 d = 1 while rf_counter > 4: filter_size = 3 if rf_counter % 2 == 0 else 4 x.append( Conv2D(n_conv_filters, filter_size, dilation_rate=d, kernel_initializer=init, padding='valid', kernel_regularizer=l2(reg))(x[-1])) x.append(BatchNormalization(axis=channel_axis)(x[-1])) x.append(Activation('relu')(x[-1])) block_counter += 1 rf_counter -= filter_size - 1 if block_counter % 2 == 0: if dilated: x.append( DilatedMaxPool2D(dilation_rate=d, pool_size=(2, 2))(x[-1])) d *= 2 else: x.append(MaxPool2D(pool_size=(2, 2))(x[-1])) if VGG_mode: n_conv_filters *= 2 rf_counter = rf_counter // 2 if multires: layers_to_concat.append(len(x) - 1) if multires: c = [] for l in layers_to_concat: output_shape = x[l].get_shape().as_list() target_shape = x[-1].get_shape().as_list() row_crop = int(output_shape[row_axis] - target_shape[row_axis]) if row_crop % 2 == 0: row_crop = (row_crop // 2, row_crop // 2) else: row_crop = (row_crop // 2, row_crop // 2 + 1) col_crop = int(output_shape[col_axis] - target_shape[col_axis]) if col_crop % 2 == 0: col_crop = (col_crop // 2, col_crop // 2) else: col_crop = (col_crop // 2, col_crop // 2 + 1) cropping = (row_crop, col_crop) c.append(Cropping2D(cropping=cropping)(x[l])) if multires: x.append(Concatenate(axis=channel_axis)(c)) x.append( Conv2D(n_dense_filters, (rf_counter, rf_counter), dilation_rate=d, kernel_initializer=init, padding='valid', kernel_regularizer=l2(reg))(x[-1])) x.append(BatchNormalization(axis=channel_axis)(x[-1])) x.append(Activation('relu')(x[-1])) if include_top: x.append( TensorProduct(n_dense_filters, kernel_initializer=init, kernel_regularizer=l2(reg))(x[-1])) x.append(BatchNormalization(axis=channel_axis)(x[-1])) x.append(Activation('relu')(x[-1])) x.append( TensorProduct(n_features, kernel_initializer=init, kernel_regularizer=l2(reg))(x[-1])) if not dilated: x.append(Flatten()(x[-1])) x.append(Softmax(axis=channel_axis)(x[-1])) if inputs is not None: real_inputs = keras_utils.get_source_inputs(x[0]) else: real_inputs = x[0] model = Model(inputs=real_inputs, outputs=x[-1]) return model
def RetinaMask(backbone, num_classes, input_shape, inputs=None, backbone_levels=['C3', 'C4', 'C5'], pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], norm_method='whole_image', location=False, use_imagenet=False, crop_size=(14, 14), pooling=None, mask_dtype=K.floatx(), required_channels=3, frames_per_batch=1, **kwargs): """Constructs a mrcnn model using a backbone from keras-applications. Args: backbone (str): Name of backbone to use. num_classes (int): Number of classes to classify. input_shape (tuple): The shape of the input data. inputs (tensor): Optional input tensor, overrides input_shape. backbone_levels (list): The backbone levels to be used. to create the feature pyramid. Defaults to ['C3', 'C4', 'C5']. pyramid_levels (list): The pyramid levels to attach regression and classification heads to. Defaults to ['P3', 'P4', 'P5', 'P6', 'P7']. norm_method (str): ImageNormalization mode to use. location (bool): Whether to include location data. use_imagenet (bool): Whether to load imagenet-based pretrained weights. crop_size (tuple): 2-length tuple for the x-y size of the crops. Used to create default roi_submodels. pooling (str): optional pooling mode for feature extraction when include_top is False. - None means that the output of the model will be the 4D tensor output of the last convolutional layer. - 'avg' means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - 'max' means that global max pooling will be applied. mask_dtype (str): Dtype to use for mask tensors. required_channels (int): The required number of channels of the backbone. 3 is the default for all current backbones. frames_per_batch (int): Size of z axis in generated batches. If equal to 1, assumes 2D data. kwargs (dict): Other standard inputs for retinanet_mask. Returns: tensorflow.keras.Model: RetinaNet model with a backbone. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 if inputs is None: if frames_per_batch > 1: if channel_axis == 1: input_shape_with_time = tuple( [input_shape[0], frames_per_batch] + list(input_shape)[1:]) else: input_shape_with_time = tuple([frames_per_batch] + list(input_shape)) inputs = Input(shape=input_shape_with_time) else: inputs = Input(shape=input_shape) if location: if frames_per_batch > 1: # TODO: TimeDistributed is incompatible with channels_first loc = TimeDistributed(Location2D(in_shape=input_shape))(inputs) else: loc = Location2D(in_shape=input_shape)(inputs) concat = Concatenate(axis=channel_axis)([inputs, loc]) else: concat = inputs # force the channel size for backbone input to be `required_channels` if frames_per_batch > 1: norm = TimeDistributed( ImageNormalization2D(norm_method=norm_method))(concat) fixed_inputs = TimeDistributed(TensorProduct(required_channels))(norm) else: norm = ImageNormalization2D(norm_method=norm_method)(concat) fixed_inputs = TensorProduct(required_channels)(norm) # force the input shape axis = 0 if K.image_data_format() == 'channels_first' else -1 fixed_input_shape = list(input_shape) fixed_input_shape[axis] = required_channels fixed_input_shape = tuple(fixed_input_shape) model_kwargs = { 'include_top': False, 'weights': None, 'input_shape': fixed_input_shape, 'pooling': pooling } _, backbone_dict = get_backbone(backbone, fixed_inputs, use_imagenet=use_imagenet, frames_per_batch=frames_per_batch, return_dict=True, **model_kwargs) # create the full model return retinanet_mask(inputs=inputs, num_classes=num_classes, backbone_dict=backbone_dict, crop_size=crop_size, backbone_levels=backbone_levels, pyramid_levels=pyramid_levels, name='{}_retinanet_mask'.format(backbone), mask_dtype=mask_dtype, frames_per_batch=frames_per_batch, **kwargs)
def PanopticNet(backbone, input_shape, backbone_levels=['C3', 'C4', 'C5'], create_pyramid_features=__create_pyramid_features, create_semantic_head=__create_semantic_head, num_semantic_heads=1, num_semantic_classes=[3], required_channels=3, norm_method='whole_image', pooling=None, location=True, use_imagenet=True, name='panopticnet', **kwargs): """Constructs a mrcnn model using a backbone from keras-applications. Args: backbone (str): Name of backbone to use. input_shape (tuple): The shape of the input data. backbone_levels (list): The backbone levels to be used. to create the feature pyramid. Defaults to ['C3', 'C4', 'C5']. create_pyramid_features (function): Function to get the pyramid features from the backbone. create_semantic_head (function): Function to get to build a semantic head submodel. norm_method (str): ImageNormalization mode to use. location (bool): Whether to include location data. use_imagenet (bool): Whether to load imagenet-based pretrained weights. pooling (str): optional pooling mode for feature extraction when include_top is False. - None means that the output of the model will be the 4D tensor output of the last convolutional layer. - 'avg' means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - 'max' means that global max pooling will be applied. required_channels (int): The required number of channels of the backbone. 3 is the default for all current backbones. kwargs (dict): Other standard inputs for retinanet_mask. Returns: tensorflow.keras.Model: Panoptic model with a backbone. """ inputs = Input(shape=input_shape) norm = ImageNormalization2D(norm_method=norm_method)(inputs) if location: loc = Location2D(in_shape=input_shape)(norm) concat = Concatenate(axis=-1)([norm, loc]) else: concat = norm fixed_inputs = TensorProduct(required_channels)(concat) # force the input shape fixed_input_shape = list(input_shape) fixed_input_shape[-1] = required_channels fixed_input_shape = tuple(fixed_input_shape) model_kwargs = { 'include_top': False, 'weights': None, 'input_shape': fixed_input_shape, 'pooling': pooling } _, backbone_dict = get_backbone(backbone, fixed_inputs, use_imagenet=use_imagenet, frames_per_batch=1, return_dict=True, **model_kwargs) backbone_dict_reduced = { k: backbone_dict[k] for k in backbone_dict if k in backbone_levels } pyramid_dict = create_pyramid_features(backbone_dict_reduced, ndim=2) semantic_levels = [int(re.findall(r'\d+', k)[0]) for k in pyramid_dict] target_level = min(semantic_levels) semantic_head_list = [] for i in range(num_semantic_heads): semantic_head_list.append( create_semantic_head(pyramid_dict, n_classes=num_semantic_classes[i], input_target=inputs, target_level=target_level, semantic_id=i, ndim=2, **kwargs)) model = Model(inputs=inputs, outputs=semantic_head_list, name=name) return model
def RetinaNet(backbone, num_classes, input_shape, inputs=None, norm_method='whole_image', location=False, use_imagenet=False, pooling=None, required_channels=3, frames_per_batch=1, **kwargs): """Constructs a RetinaNet model using a backbone from ``keras-applications``. Args: backbone (str): Name of backbone to use. num_classes (int): Number of classes to classify. input_shape (tuple): The shape of the input data. inputs (tensor): Optional input tensor, overrides ``input_shape``. norm_method (str): Normalization method to use with the :mod:`deepcell.layers.normalization.ImageNormalization2D` layer. location (bool): Whether to include a :mod:`deepcell.layers.location.Location2D` layer. use_imagenet (bool): Whether to load imagenet-based pretrained weights. pooling (str): Pooling mode for feature extraction when ``include_top`` is ``False``. - None means that the output of the model will be the 4D tensor output of the last convolutional layer. - 'avg' means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - 'max' means that global max pooling will be applied. required_channels (int): The required number of channels of the backbone. 3 is the default for all current backbones. frames_per_batch (int): Size of z axis in generated batches. If equal to 1, assumes 2D data. kwargs (dict): Other standard inputs for `~retinanet`. Returns: tensorflow.keras.Model: RetinaNet model with a backbone. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 if inputs is None: if frames_per_batch > 1: if channel_axis == 1: input_shape_with_time = tuple( [input_shape[0], frames_per_batch] + list(input_shape)[1:]) else: input_shape_with_time = tuple([frames_per_batch] + list(input_shape)) inputs = Input(shape=input_shape_with_time, name='input') else: inputs = Input(shape=input_shape, name='input') if location: if frames_per_batch > 1: # TODO: TimeDistributed is incompatible with channels_first loc = TimeDistributed(Location2D(in_shape=input_shape))(inputs) else: loc = Location2D(in_shape=input_shape)(inputs) concat = Concatenate(axis=channel_axis)([inputs, loc]) else: concat = inputs # force the channel size for backbone input to be `required_channels` if frames_per_batch > 1: norm = TimeDistributed( ImageNormalization2D(norm_method=norm_method))(concat) fixed_inputs = TimeDistributed(TensorProduct(required_channels))(norm) else: norm = ImageNormalization2D(norm_method=norm_method)(concat) fixed_inputs = TensorProduct(required_channels)(norm) # force the input shape axis = 0 if K.image_data_format() == 'channels_first' else -1 fixed_input_shape = list(input_shape) fixed_input_shape[axis] = required_channels fixed_input_shape = tuple(fixed_input_shape) model_kwargs = { 'include_top': False, 'weights': None, 'input_shape': fixed_input_shape, 'pooling': pooling } _, backbone_dict = get_backbone(backbone, fixed_inputs, use_imagenet=use_imagenet, frames_per_batch=frames_per_batch, return_dict=True, **model_kwargs) # create the full model return retinanet(inputs=inputs, num_classes=num_classes, backbone_dict=backbone_dict, frames_per_batch=frames_per_batch, name='{}_retinanet'.format(backbone), **kwargs)
def PanopticNet(backbone, input_shape, inputs=None, backbone_levels=['C3', 'C4', 'C5'], pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], create_pyramid_features=__create_pyramid_features, create_semantic_head=__create_semantic_head, frames_per_batch=1, temporal_mode=None, num_semantic_heads=1, num_semantic_classes=[3], required_channels=3, norm_method='whole_image', pooling=None, location=True, use_imagenet=True, lite=False, upsample_type='upsampling2d', interpolation='bilinear', name='panopticnet', **kwargs): """Constructs a mrcnn model using a backbone from keras-applications. Args: backbone (str): Name of backbone to use. input_shape (tuple): The shape of the input data. backbone_levels (list): The backbone levels to be used. to create the feature pyramid. Defaults to ['C3', 'C4', 'C5']. pyramid_levels (list): Pyramid levels to use. Defaults to ['P3','P4','P5','P6','P7'] create_pyramid_features (function): Function to get the pyramid features from the backbone. create_semantic_head (function): Function to build a semantic head submodel. frames_per_batch (int): Defaults to 1. temporal_mode: Mode of temporal convolution. Choose from {'conv','lstm','gru', None}. Defaults to None. num_semantic_heads (int): Defaults to 1. num_semantic_classes (list): Defaults to [3]. norm_method (str): ImageNormalization mode to use. Defaults to 'whole_image'. location (bool): Whether to include location data. Defaults to True use_imagenet (bool): Whether to load imagenet-based pretrained weights. lite (bool): Whether to use a depthwise conv in the feature pyramid rather than regular conv. Defaults to False. upsample_type (str): Choice of upsampling layer to use from ['upsamplelike', 'upsampling2d', 'upsampling3d']. Defaults to 'upsampling2d'. interpolation (str): Choice of interpolation mode for upsampling layers from ['bilinear', 'nearest']. Defaults to bilinear. pooling (str): optional pooling mode for feature extraction when include_top is False. - None means that the output of the model will be the 4D tensor output of the last convolutional layer. - 'avg' means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - 'max' means that global max pooling will be applied. required_channels (int): The required number of channels of the backbone. 3 is the default for all current backbones. kwargs (dict): Other standard inputs for retinanet_mask. Raises: ValueError: temporal_mode not 'conv', 'lstm', 'gru' or None Returns: tensorflow.keras.Model: Panoptic model with a backbone. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 conv = Conv3D if frames_per_batch > 1 else Conv2D conv_kernel = (1, 1, 1) if frames_per_batch > 1 else (1, 1) # Check input to __merge_temporal_features acceptable_modes = {'conv', 'lstm', 'gru', None} if temporal_mode is not None: temporal_mode = str(temporal_mode).lower() if temporal_mode not in acceptable_modes: raise ValueError('temporal_mode {} not supported. Please choose ' 'from {}.'.format(temporal_mode, acceptable_modes)) # TODO only works for 2D: do we check for 3D as well? # What are the requirements for 3D data? img_shape = input_shape[1:] if channel_axis == 1 else input_shape[:-1] if img_shape[0] != img_shape[1]: raise ValueError('Input data must be square, got dimensions {}'.format( img_shape)) if not math.log(img_shape[0], 2).is_integer(): raise ValueError('Input data dimensions must be a power of 2, ' 'got {}'.format(img_shape[0])) # Check input to interpolation acceptable_interpolation = {'bilinear', 'nearest'} if interpolation not in acceptable_interpolation: raise ValueError('Interpolation mode "{}" not supported. ' 'Choose from {}.'.format( interpolation, list(acceptable_interpolation))) if inputs is None: if frames_per_batch > 1: if channel_axis == 1: input_shape_with_time = tuple( [input_shape[0], frames_per_batch] + list(input_shape)[1:]) else: input_shape_with_time = tuple( [frames_per_batch] + list(input_shape)) inputs = Input(shape=input_shape_with_time, name='input_0') else: inputs = Input(shape=input_shape, name='input_0') # Normalize input images if norm_method is None: norm = inputs else: if frames_per_batch > 1: norm = TimeDistributed(ImageNormalization2D( norm_method=norm_method, name='norm'), name='td_norm')(inputs) else: norm = ImageNormalization2D(norm_method=norm_method, name='norm')(inputs) # Add location layer if location: if frames_per_batch > 1: # TODO: TimeDistributed is incompatible with channels_first loc = TimeDistributed(Location2D(in_shape=input_shape, name='location'), name='td_location')(norm) else: loc = Location2D(in_shape=input_shape, name='location')(norm) concat = Concatenate(axis=channel_axis, name='concatenate_location')([norm, loc]) else: concat = norm # Force the channel size for backbone input to be `required_channels` fixed_inputs = conv(required_channels, conv_kernel, strides=1, padding='same', name='conv_channels')(concat) # Force the input shape axis = 0 if K.image_data_format() == 'channels_first' else -1 fixed_input_shape = list(input_shape) fixed_input_shape[axis] = required_channels fixed_input_shape = tuple(fixed_input_shape) model_kwargs = { 'include_top': False, 'weights': None, 'input_shape': fixed_input_shape, 'pooling': pooling } _, backbone_dict = get_backbone(backbone, fixed_inputs, use_imagenet=use_imagenet, frames_per_batch=frames_per_batch, return_dict=True, **model_kwargs) backbone_dict_reduced = {k: backbone_dict[k] for k in backbone_dict if k in backbone_levels} ndim = 2 if frames_per_batch == 1 else 3 pyramid_dict = create_pyramid_features(backbone_dict_reduced, ndim=ndim, lite=lite, interpolation=interpolation, upsample_type=upsample_type) features = [pyramid_dict[key] for key in pyramid_levels] if frames_per_batch > 1: temporal_features = [__merge_temporal_features(f, mode=temporal_mode, frames_per_batch=frames_per_batch) for f in features] for f, k in zip(temporal_features, pyramid_levels): pyramid_dict[k] = f semantic_levels = [int(re.findall(r'\d+', k)[0]) for k in pyramid_dict] target_level = min(semantic_levels) semantic_head_list = [] for i in range(num_semantic_heads): semantic_head_list.append(create_semantic_head( pyramid_dict, n_classes=num_semantic_classes[i], input_target=inputs, target_level=target_level, semantic_id=i, ndim=ndim, upsample_type=upsample_type, interpolation=interpolation, **kwargs)) outputs = semantic_head_list model = Model(inputs=inputs, outputs=outputs, name=name) return model
def RetinaNet(backbone, num_classes, input_shape, inputs=None, norm_method='whole_image', location=False, use_imagenet=False, pooling=None, required_channels=3, **kwargs): """Constructs a retinanet model using a backbone from keras-applications. Args: backbone (str): Name of backbone to use. num_classes (int): Number of classes to classify. input_shape (tuple): The shape of the input data. weights (str): one of None (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. pooling (str): optional pooling mode for feature extraction when 'include_top' is False. - None means that the output of the model will be the 4D tensor output of the last convolutional layer. - 'avg' means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - 'max' means that global max pooling will be applied. required_channels (int): The required number of channels of the backbone. 3 is the default for all current backbones. Returns: tensorflow.keras.Model: RetinaNet model with a backbone. """ if inputs is None: inputs = Input(shape=input_shape) channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 if location: location = Location2D(in_shape=input_shape)(inputs) concat = Concatenate(axis=channel_axis)([inputs, location]) else: concat = inputs # force the channel size for backbone input to be `required_channels` norm = ImageNormalization2D(norm_method=norm_method)(concat) fixed_inputs = TensorProduct(required_channels)(norm) # force the input shape axis = 0 if K.image_data_format() == 'channels_first' else -1 fixed_input_shape = list(input_shape) fixed_input_shape[axis] = required_channels fixed_input_shape = tuple(fixed_input_shape) model_kwargs = { 'include_top': False, 'weights': None, 'input_shape': fixed_input_shape, 'pooling': pooling } backbone_dict = get_backbone(backbone, fixed_inputs, use_imagenet=use_imagenet, **model_kwargs) # create the full model return retinanet(inputs=inputs, num_classes=num_classes, backbone_dict=backbone_dict, name='{}_retinanet'.format(backbone), **kwargs)
def bn_feature_net_2D(receptive_field=61, input_shape=(256, 256, 1), n_features=3, n_channels=1, reg=1e-5, n_conv_filters=64, n_dense_filters=200, VGG_mode=False, init='he_normal', norm_method='std', location=False, dilated=False, padding=False, padding_mode='reflect', multires=False, include_top=True): # Create layers list (x) to store all of the layers. # We need to use the functional API to enable the multiresolution mode x = [] win = (receptive_field - 1) // 2 if dilated: padding = True if K.image_data_format() == 'channels_first': channel_axis = 1 row_axis = 2 col_axis = 3 if not dilated: input_shape = (n_channels, receptive_field, receptive_field) else: row_axis = 1 col_axis = 2 channel_axis = -1 if not dilated: input_shape = (receptive_field, receptive_field, n_channels) x.append(Input(shape=input_shape)) x.append(ImageNormalization2D(norm_method=norm_method, filter_size=receptive_field)(x[-1])) if padding: if padding_mode == 'reflect': x.append(ReflectionPadding2D(padding=(win, win))(x[-1])) elif padding_mode == 'zero': x.append(ZeroPadding2D(padding=(win, win))(x[-1])) if location: x.append(Location2D(in_shape=tuple(x[-1].shape.as_list()[1:]))(x[-1])) x.append(Concatenate(axis=channel_axis)([x[-2], x[-1]])) if multires: layers_to_concat = [] rf_counter = receptive_field block_counter = 0 d = 1 while rf_counter > 4: filter_size = 3 if rf_counter % 2 == 0 else 4 x.append(Conv2D(n_conv_filters, (filter_size, filter_size), dilation_rate=d, kernel_initializer=init, padding='valid', kernel_regularizer=l2(reg))(x[-1])) x.append(BatchNormalization(axis=channel_axis)(x[-1])) x.append(Activation('relu')(x[-1])) block_counter += 1 rf_counter -= filter_size - 1 if block_counter % 2 == 0: if dilated: x.append(DilatedMaxPool2D(dilation_rate=d, pool_size=(2, 2))(x[-1])) d *= 2 else: x.append(MaxPool2D(pool_size=(2, 2))(x[-1])) if VGG_mode: n_conv_filters *= 2 rf_counter = rf_counter // 2 if multires: layers_to_concat.append(len(x) - 1) if multires: c = [] for l in layers_to_concat: output_shape = x[l].get_shape().as_list() target_shape = x[-1].get_shape().as_list() row_crop = int(output_shape[row_axis] - target_shape[row_axis]) if row_crop % 2 == 0: row_crop = (row_crop // 2, row_crop // 2) else: row_crop = (row_crop // 2, row_crop // 2 + 1) col_crop = int(output_shape[col_axis] - target_shape[col_axis]) if col_crop % 2 == 0: col_crop = (col_crop // 2, col_crop // 2) else: col_crop = (col_crop // 2, col_crop // 2 + 1) cropping = (row_crop, col_crop) c.append(Cropping2D(cropping=cropping)(x[l])) x.append(Concatenate(axis=channel_axis)(c)) x.append(Conv2D(n_dense_filters, (rf_counter, rf_counter), dilation_rate=d, kernel_initializer=init, padding='valid', kernel_regularizer=l2(reg))(x[-1])) x.append(BatchNormalization(axis=channel_axis)(x[-1])) x.append(Activation('relu')(x[-1])) x.append(TensorProduct(n_dense_filters, kernel_initializer=init, kernel_regularizer=l2(reg))(x[-1])) x.append(BatchNormalization(axis=channel_axis)(x[-1])) x.append(Activation('relu')(x[-1])) x.append(TensorProduct(n_features, kernel_initializer=init, kernel_regularizer=l2(reg))(x[-1])) if not dilated: x.append(Flatten()(x[-1])) if include_top: x.append(Softmax(axis=channel_axis)(x[-1])) model = Model(inputs=x[0], outputs=x[-1]) return model
def MaskRCNN(backbone, num_classes, input_shape, backbone_levels=['C3', 'C4', 'C5'], pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], norm_method='whole_image', location=False, use_imagenet=False, crop_size=(14, 14), pooling=None, mask_dtype=K.floatx(), required_channels=3, **kwargs): """Constructs a mrcnn model using a backbone from keras-applications. Args: backbone: string, name of backbone to use. num_classes: Number of classes to classify. input_shape: The shape of the input data. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. pooling: optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. required_channels: integer, the required number of channels of the backbone. 3 is the default for all current backbones. Returns: RetinaNet model with a backbone. """ inputs = Input(shape=input_shape) channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 if location: location = Location2D(in_shape=input_shape)(inputs) inputs = Concatenate(axis=channel_axis)([inputs, location]) # force the channel size for backbone input to be `required_channels` norm = ImageNormalization2D(norm_method=norm_method)(inputs) fixed_inputs = TensorProduct(required_channels)(norm) # force the input shape fixed_input_shape = list(input_shape) fixed_input_shape[-1] = required_channels fixed_input_shape = tuple(fixed_input_shape) model_kwargs = { 'include_top': False, 'weights': None, 'input_shape': fixed_input_shape, 'pooling': pooling } backbone_dict = get_backbone(backbone, fixed_inputs, use_imagenet=use_imagenet, **model_kwargs) # create the full model return retinanet_mask(inputs=inputs, num_classes=num_classes, backbone_dict=backbone_dict, crop_size=crop_size, backbone_levels=backbone_levels, pyramid_levels=pyramid_levels, name='{}_retinanet_mask'.format(backbone), mask_dtype=mask_dtype, **kwargs)
def RetinaMask(backbone, num_classes, input_shape, inputs=None, backbone_levels=['C3', 'C4', 'C5'], pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], norm_method='whole_image', location=False, use_imagenet=False, crop_size=(14, 14), pooling=None, mask_dtype=K.floatx(), required_channels=3, frames_per_batch=1, **kwargs): """Constructs a mrcnn model using a backbone from keras-applications. Args: backbone (str): Name of backbone to use. num_classes (int): Number of classes to classify. input_shape (tuple): The shape of the input data. weights (str): one of None (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. pooling (str): optional pooling mode for feature extraction when include_top is False. - None means that the output of the model will be the 4D tensor output of the last convolutional layer. - 'avg' means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - 'max' means that global max pooling will be applied. required_channels (int): The required number of channels of the backbone. 3 is the default for all current backbones. Returns: tensorflow.keras.Model: RetinaNet model with a backbone. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 if inputs is None: if frames_per_batch > 1: if channel_axis == 1: input_shape_with_time = tuple( [input_shape[0], frames_per_batch] + list(input_shape)[1:]) else: input_shape_with_time = tuple([frames_per_batch] + list(input_shape)) inputs = Input(shape=input_shape_with_time) else: inputs = Input(shape=input_shape) if location: if frames_per_batch > 1: # TODO: TimeDistributed is incompatible with channels_first loc = TimeDistributed(Location2D(in_shape=input_shape))(inputs) else: loc = Location2D(in_shape=input_shape)(inputs) concat = Concatenate(axis=channel_axis)([inputs, loc]) else: concat = inputs # force the channel size for backbone input to be `required_channels` if frames_per_batch > 1: norm = TimeDistributed( ImageNormalization2D(norm_method=norm_method))(concat) fixed_inputs = TimeDistributed(TensorProduct(required_channels))(norm) else: norm = ImageNormalization2D(norm_method=norm_method)(concat) fixed_inputs = TensorProduct(required_channels)(norm) # force the input shape axis = 0 if K.image_data_format() == 'channels_first' else -1 fixed_input_shape = list(input_shape) fixed_input_shape[axis] = required_channels fixed_input_shape = tuple(fixed_input_shape) model_kwargs = { 'include_top': False, 'weights': None, 'input_shape': fixed_input_shape, 'pooling': pooling } _, backbone_dict = get_backbone(backbone, fixed_inputs, use_imagenet=use_imagenet, frames_per_batch=frames_per_batch, return_dict=True, **model_kwargs) # create the full model return retinanet_mask(inputs=inputs, num_classes=num_classes, backbone_dict=backbone_dict, crop_size=crop_size, backbone_levels=backbone_levels, pyramid_levels=pyramid_levels, name='{}_retinanet_mask'.format(backbone), mask_dtype=mask_dtype, frames_per_batch=frames_per_batch, **kwargs)