def _se_block(inputs, filters, se_ratio, prefix): x = GlobalAveragePooling2D(name=prefix + 'squeeze_excite/AvgPool')(inputs) if K.image_data_format() == 'channels_first': x = Reshape((filters, 1, 1))(x) else: x = Reshape((1, 1, filters))(x) x = DeeplabConv2D(_depth(filters * se_ratio), kernel_size=1, padding='same', name=prefix + 'squeeze_excite/Conv')(x) x = ReLU(name=prefix + 'squeeze_excite/Relu')(x) x = DeeplabConv2D(filters, kernel_size=1, padding='same', name=prefix + 'squeeze_excite/Conv_1')(x) x = Activation(hard_sigmoid)(x) #if K.backend() == 'theano': ## For the Theano backend, we have to explicitly make ## the excitation weights broadcastable. #x = Lambda( #lambda br: K.pattern_broadcast(br, [True, True, True, False]), #output_shape=lambda input_shape: input_shape, #name=prefix + 'squeeze_excite/broadcast')(x) x = Multiply(name=prefix + 'squeeze_excite/Mul')([inputs, x]) return x
def _conv2d_same(x, filters, prefix, stride=1, kernel_size=3, rate=1): """Implements right 'same' padding for even kernel sizes Without this there is a 1 pixel drift when stride = 2 Args: x: input tensor filters: num of filters in pointwise convolution prefix: prefix before name stride: stride at depthwise conv kernel_size: kernel size for depthwise convolution rate: atrous rate for depthwise convolution """ if stride == 1: return DeeplabConv2D(filters, (kernel_size, kernel_size), strides=(stride, stride), padding='same', use_bias=False, dilation_rate=(rate, rate), name=prefix)(x) else: kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1) pad_total = kernel_size_effective - 1 pad_beg = pad_total // 2 pad_end = pad_total - pad_beg x = ZeroPadding2D((pad_beg, pad_end))(x) return DeeplabConv2D(filters, (kernel_size, kernel_size), strides=(stride, stride), padding='valid', use_bias=False, dilation_rate=(rate, rate), name=prefix)(x)
def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id, skip_connection, rate=1): #in_channels = inputs._keras_shape[-1] in_channels = inputs.shape.as_list()[-1] pointwise_conv_filters = int(filters * alpha) pointwise_filters = _make_divisible(pointwise_conv_filters, 8) x = inputs prefix = 'expanded_conv_{}_'.format(block_id) if block_id: # Expand x = DeeplabConv2D(expansion * in_channels, kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'expand')(x) x = CustomBatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'expand_BN')(x) x = ReLU(max_value=6.)(x) else: prefix = 'expanded_conv_' # Depthwise x = DeeplabDepthwiseConv2D(kernel_size=3, strides=stride, activation=None, use_bias=False, padding='same', dilation_rate=(rate, rate), name=prefix + 'depthwise')(x) x = CustomBatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'depthwise_BN')(x) x = ReLU(max_value=6., name=prefix + 'depthwise_relu')(x) x = DeeplabConv2D(pointwise_filters, kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'project')(x) x = CustomBatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'project_BN')(x) if skip_connection: return Add(name=prefix + 'add')([inputs, x]) # if in_channels == pointwise_filters and stride == 1: # return Add(name='res_connect_' + str(block_id))([inputs, x]) return x
def identity_block(input_tensor, kernel_size, filters, stage, block, rate=1): """The identity block is the block that has no conv layer at shortcut. # Arguments input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names # Returns Output tensor for the block. """ filters1, filters2, filters3 = filters if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = DeeplabConv2D(filters1, (1, 1), kernel_initializer='he_normal', dilation_rate=(rate, rate), name=conv_name_base + '2a')(input_tensor) x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) x = ReLU()(x) x = DeeplabConv2D(filters2, kernel_size, padding='same', kernel_initializer='he_normal', dilation_rate=(rate, rate), name=conv_name_base + '2b')(x) x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) x = ReLU()(x) x = DeeplabConv2D(filters3, (1, 1), kernel_initializer='he_normal', dilation_rate=(rate, rate), name=conv_name_base + '2c')(x) x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) x = add([x, input_tensor]) x = ReLU()(x) return x
def Deeplabv3pResNet50(input_shape=(512, 512, 3), weights=None, input_tensor=None, classes=21, OS=8, **kwargs): """ Instantiates the Deeplabv3+ MobileNetV3Large architecture # Arguments input_shape: shape of input image. format HxWxC PASCAL VOC model was trained on (512,512,3) images weights: one of 'pascal_voc' (pre-trained on pascal voc) or None (random initialization) input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. classes: number of desired classes. If classes != 21, last layer is initialized randomly OS: determines input_shape/feature_extractor_output ratio. One of {8,16}. # Returns A Keras model instance. """ if not (weights in {'pascal_voc', None}): raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `pascal_voc` ' '(pre-trained on PASCAL VOC)') if input_tensor is None: img_input = Input(shape=input_shape, name='image_input') else: img_input = input_tensor # normalize input image img_norm = Lambda(normalize, name='input_normalize')(img_input) # backbone body for feature extract x, skip_feature, backbone_len = ResNet50(include_top=False, input_tensor=img_norm, weights='imagenet', OS=OS) # ASPP block x = ASPP_block(x, OS) # Deeplabv3+ decoder for feature projection x = Decoder_block(x, skip_feature) # Final prediction conv block x = DeeplabConv2D(classes, (1, 1), padding='same', name='logits_semantic')(x) x = Lambda(img_resize, arguments={'size': (input_shape[0],input_shape[1]), 'mode': 'bilinear'}, name='pred_resize')(x) x = Reshape((input_shape[0]*input_shape[1], classes)) (x) x = Softmax(name='Predictions/Softmax')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. #if input_tensor is not None: #inputs = get_source_inputs(input_tensor) #else: #inputs = img_input model = Model(img_input, x, name='deeplabv3p_resnet50') return model, backbone_len
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2), rate=1): """A block that has a conv layer at shortcut. # Arguments input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names strides: Strides for the first conv layer in the block. # Returns Output tensor for the block. Note that from stage 3, the first conv layer at main path is with strides=(2, 2) And the shortcut should have strides=(2, 2) as well """ filters1, filters2, filters3 = filters if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = DeeplabConv2D(filters1, (1, 1), strides=strides, kernel_initializer='he_normal', dilation_rate=(rate, rate), name=conv_name_base + '2a')(input_tensor) x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) x = ReLU()(x) x = DeeplabConv2D(filters2, kernel_size, padding='same', kernel_initializer='he_normal', dilation_rate=(rate, rate), name=conv_name_base + '2b')(x) x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) x = ReLU()(x) x = DeeplabConv2D(filters3, (1, 1), kernel_initializer='he_normal', dilation_rate=(rate, rate), name=conv_name_base + '2c')(x) x = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) shortcut = DeeplabConv2D(filters3, (1, 1), strides=strides, kernel_initializer='he_normal', dilation_rate=(rate, rate), name=conv_name_base + '1')(input_tensor) shortcut = CustomBatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut) x = add([x, shortcut]) x = ReLU()(x) return x
def ResNet50(include_top=True, OS=8, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000, **kwargs): """Instantiates the ResNet50 architecture. Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in your Keras config at `~/.keras/keras.json`. # Arguments include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 224)` (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(200, 200, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional block. - `avg` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. """ """ Modified ResNet50 feature extractor body with specified output stride and skip level feature """ if OS == 8: origin_os16_stride = (1, 1) origin_os16_block_rate = 2 origin_os32_stride = (1, 1) origin_os32_block_rate = 4 elif OS == 16: origin_os16_stride = (2, 2) origin_os16_block_rate = 1 origin_os32_stride = (1, 1) origin_os32_block_rate = 2 elif OS == 32: origin_os16_stride = (2, 2) origin_os16_block_rate = 1 origin_os32_stride = (2, 2) origin_os32_block_rate = 1 else: raise ValueError('invalid output stride', OS) if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=32, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: #if not backend.is_keras_tensor(input_tensor): #img_input = Input(tensor=input_tensor, shape=input_shape) #else: #img_input = input_tensor img_input = input_tensor if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 x = ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input) x = DeeplabConv2D(64, (7, 7), strides=(2, 2), padding='valid', kernel_initializer='he_normal', name='conv1')(x) x = CustomBatchNormalization(axis=bn_axis, name='bn_conv1')(x) x = ReLU()(x) x = ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x) x = MaxPooling2D((3, 3), strides=(2, 2))(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') # skip level feature, with output stride = 4 skip = x x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') # original output stride changes to 16 from here, so we start to control block stride and dilation rate x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a', strides=origin_os16_stride) # origin: stride=(2, 2) x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b', rate=origin_os16_block_rate) x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c', rate=origin_os16_block_rate) x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d', rate=origin_os16_block_rate) x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e', rate=origin_os16_block_rate) x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f', rate=origin_os16_block_rate) # original output stride changes to 32 from here x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a', strides=origin_os32_stride, rate=origin_os16_block_rate) # origin: stride=(2, 2) x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b', rate=origin_os32_block_rate) x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c', rate=origin_os32_block_rate) if include_top: x = GlobalAveragePooling2D(name='avg_pool')(x) x = Dense(classes, activation='softmax', name='fc1000')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) else: warnings.warn('The output shape of `ResNet50(include_top=False)` ' 'has been changed since Keras 2.2.0.') # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='resnet50') # Load weights. if weights == 'imagenet': if include_top: weights_path = get_file( 'resnet50_weights_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH, cache_subdir='models', md5_hash='a7b3fe01876f51b976af0dea6bc144eb') else: weights_path = get_file( 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', WEIGHTS_PATH_NO_TOP, cache_subdir='models', md5_hash='a268eb855778b3df3c7506639542a6af') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) backbone_len = len(model.layers) # need to return feature map and skip connection, # not the whole "no top" model return x, skip, backbone_len
def Deeplabv3pLiteMobileNetV3Large(input_shape=(512, 512, 3), alpha=1.0, weights=None, input_tensor=None, classes=21, OS=8, **kwargs): """ Instantiates the Deeplabv3+ MobileNetV3LargeLite architecture # Arguments input_shape: shape of input image. format HxWxC PASCAL VOC model was trained on (512,512,3) images alpha: controls the width of the MobileNetV3Large network. This is known as the width multiplier in the MobileNetV3Large paper. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. weights: one of 'pascal_voc' (pre-trained on pascal voc) or None (random initialization) input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. classes: number of desired classes. If classes != 21, last layer is initialized randomly OS: determines input_shape/feature_extractor_output ratio. One of {8,16}. # Returns A Keras model instance. # Raises RuntimeError: If attempting to run this model with a backend that does not support separable convolutions. ValueError: in case of invalid argument for `weights` or `backbone` """ if not (weights in {'pascal_voc', None}): raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `pascal_voc` ' '(pre-trained on PASCAL VOC)') if input_tensor is None: img_input = Input(shape=input_shape, name='image_input') else: img_input = input_tensor # normalize input image img_norm = Lambda(normalize, name='input_normalize')(img_input) # backbone body for feature extract x, _, backbone_len = MobileNetV3Large(include_top=False, input_tensor=img_norm, weights='imagenet', OS=OS, alpha=1.0) # use ASPP Lite block & no decode block x = ASPP_Lite_block(x) # Final prediction conv block x = DeeplabConv2D(classes, (1, 1), padding='same', name='logits_semantic')(x) x = Lambda(img_resize, arguments={ 'size': (input_shape[0], input_shape[1]), 'mode': 'bilinear' }, name='pred_resize')(x) x = Reshape((input_shape[0] * input_shape[1], classes))(x) x = Softmax(name='Predictions/Softmax')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. #if input_tensor is not None: #inputs = get_source_inputs(input_tensor) #else: #inputs = img_input model = Model(img_input, x, name='deeplabv3p_mobilenetv3large_lite') return model, backbone_len
def MobileNetV3(stack_fn, last_point_ch, input_shape=None, alpha=1.0, model_type='large', minimalistic=False, include_top=True, weights='imagenet', input_tensor=None, classes=1000, pooling=None, dropout_rate=0.2, **kwargs): """Instantiates the MobileNetV3 architecture. # Arguments stack_fn: a function that returns output tensor for the stacked residual blocks. last_point_ch: number channels at the last layer (before top) input_shape: optional shape tuple, to be specified if you would like to use a model with an input img resolution that is not (224, 224, 3). It should have exactly 3 inputs channels (224, 224, 3). You can also omit this option if you would like to infer input_shape from an input_tensor. If you choose to include both input_tensor and input_shape then input_shape will be used if they match, if the shapes do not match then we will throw an error. E.g. `(160, 160, 3)` would be one valid value. alpha: controls the width of the network. This is known as the depth multiplier in the MobileNetV3 paper, but the name is kept for consistency with MobileNetV1 in Keras. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. model_type: MobileNetV3 is defined as two models: large and small. These models are targeted at high and low resource use cases respectively. minimalistic: In addition to large and small models this module also contains so-called minimalistic models, these models have the same per-layer dimensions characteristic as MobilenetV3 however, they don't utilize any of the advanced blocks (squeeze-and-excite units, hard-swish, and 5x5 convolutions). While these models are less efficient on CPU, they are much more performant on GPU/DSP. include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. pooling: optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. dropout_rate: fraction of the input units to drop on the last layer # Returns A Keras model instance. # Raises ValueError: in case of invalid model type, argument for `weights`, or invalid input shape when weights='imagenet' """ if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top` ' 'as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=32, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) # If input_shape is None and input_tensor is None using standart shape if input_shape is None and input_tensor is None: input_shape = (None, None, 3) if K.image_data_format() == 'channels_last': row_axis, col_axis = (0, 1) else: row_axis, col_axis = (1, 2) rows = input_shape[row_axis] cols = input_shape[col_axis] if rows and cols and (rows < 32 or cols < 32): raise ValueError( 'Input size must be at least 32x32; got `input_shape=' + str(input_shape) + '`') if weights == 'imagenet': if minimalistic is False and alpha not in [0.75, 1.0] \ or minimalistic is True and alpha != 1.0: raise ValueError( 'If imagenet weights are being loaded, ' 'alpha can be one of `0.75`, `1.0` for non minimalistic' ' or `1.0` for minimalistic only.') if rows != cols or rows != 224: warnings.warn('`input_shape` is undefined or non-square, ' 'or `rows` is not 224.' ' Weights for input shape (224, 224) will be' ' loaded as the default.') if input_tensor is None: img_input = Input(shape=input_shape) else: #if not K.is_keras_tensor(input_tensor): #img_input = Input(tensor=input_tensor, shape=input_shape) #else: #img_input = input_tensor img_input = input_tensor channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 if minimalistic: kernel = 3 activation = relu se_ratio = None else: kernel = 5 activation = hard_swish se_ratio = 0.25 x = ZeroPadding2D(padding=correct_pad(K, img_input, 3), name='Conv_pad')(img_input) x = DeeplabConv2D(16, kernel_size=3, strides=(2, 2), padding='valid', use_bias=False, name='Conv')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv/BatchNorm')(x) x = Activation(activation)(x) x, skip_feature = stack_fn(x, kernel, activation, se_ratio) # keep end of the feature extrator as final feature map final_feature = x last_conv_ch = _depth(K.int_shape(x)[channel_axis] * 6) # if the width multiplier is greater than 1 we # increase the number of output channels if alpha > 1.0: last_point_ch = _depth(last_point_ch * alpha) x = DeeplabConv2D(last_conv_ch, kernel_size=1, padding='same', use_bias=False, name='Conv_1')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv_1/BatchNorm')(x) x = Activation(activation)(x) if include_top: x = GlobalAveragePooling2D()(x) if channel_axis == 1: x = Reshape((last_conv_ch, 1, 1))(x) else: x = Reshape((1, 1, last_conv_ch))(x) x = DeeplabConv2D(last_point_ch, kernel_size=1, padding='same', name='Conv_2')(x) x = Activation(activation)(x) if dropout_rate > 0: x = Dropout(dropout_rate)(x) x = DeeplabConv2D(classes, kernel_size=1, padding='same', name='Logits')(x) x = Flatten()(x) x = Softmax(name='Predictions/Softmax')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D(name='avg_pool')(x) elif pooling == 'max': x = GlobalMaxPooling2D(name='max_pool')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='MobilenetV3' + model_type) # Load weights. if weights == 'imagenet': model_name = "{}{}_224_{}_float".format( model_type, '_minimalistic' if minimalistic else '', str(alpha)) if include_top: file_name = 'weights_mobilenet_v3_' + model_name + '.h5' file_hash = WEIGHTS_HASHES[model_name][0] else: file_name = 'weights_mobilenet_v3_' + model_name + '_no_top.h5' file_hash = WEIGHTS_HASHES[model_name][1] weights_path = get_file(file_name, BASE_WEIGHT_PATH + file_name, cache_subdir='models', file_hash=file_hash) model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) #return model return final_feature, skip_feature, len(model.layers) - 3
def _inverted_res_block(x, expansion, filters, kernel_size, stride, se_ratio, activation, block_id, skip_connection=False, rate=1): channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 shortcut = x prefix = 'expanded_conv/' infilters = K.int_shape(x)[channel_axis] if block_id: # Expand prefix = 'expanded_conv_{}/'.format(block_id) x = DeeplabConv2D(_depth(infilters * expansion), kernel_size=1, padding='same', use_bias=False, name=prefix + 'expand')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'expand/BatchNorm')(x) x = Activation(activation)(x) #if stride == 2: #x = ZeroPadding2D(padding=correct_pad(K, x, kernel_size), #name=prefix + 'depthwise/pad')(x) x = DeeplabDepthwiseConv2D( kernel_size, strides=stride, padding='same', # if stride == 1 else 'valid', dilation_rate=(rate, rate), use_bias=False, name=prefix + 'depthwise/Conv')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'depthwise/BatchNorm')(x) x = Activation(activation)(x) if se_ratio: x = _se_block(x, _depth(infilters * expansion), se_ratio, prefix) x = DeeplabConv2D(filters, kernel_size=1, padding='same', use_bias=False, name=prefix + 'project')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'project/BatchNorm')(x) #if stride == 1 and infilters == filters: #x = Add(name=prefix + 'Add')([shortcut, x]) if skip_connection: x = Add(name=prefix + 'Add')([shortcut, x]) return x
def MobileNetV2_body(input_tensor, OS, alpha, weights='imagenet'): """ Modified MobileNetV2 feature extractor body with specified output stride and skip level feature """ if OS == 8: origin_os16_stride = 1 origin_os16_block_rate = 2 origin_os32_stride = 1 origin_os32_block_rate = 4 elif OS == 16: origin_os16_stride = 2 origin_os16_block_rate = 1 origin_os32_stride = 1 origin_os32_block_rate = 2 elif OS == 32: origin_os16_stride = 2 origin_os16_block_rate = 1 origin_os32_stride = 2 origin_os32_block_rate = 1 else: raise ValueError('invalid output stride', OS) first_block_filters = _make_divisible(32 * alpha, 8) x = DeeplabConv2D(first_block_filters, kernel_size=3, strides=(2, 2), padding='same', use_bias=False, name='Conv')(input_tensor) x = CustomBatchNormalization(epsilon=1e-3, momentum=0.999, name='Conv_BN')(x) x = ReLU(6.)(x) x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0, skip_connection=False) x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1, skip_connection=False) x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2, skip_connection=True) # skip level feature, with output stride = 4 skip = x x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3, skip_connection=False) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4, skip_connection=True) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5, skip_connection=True) # original output stride changes to 16 from here, so we start to control block stride and dilation rate x = _inverted_res_block( x, filters=64, alpha=alpha, stride=origin_os16_stride, # origin: stride=2! expansion=6, block_id=6, skip_connection=False) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=origin_os16_block_rate, expansion=6, block_id=7, skip_connection=True) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=origin_os16_block_rate, expansion=6, block_id=8, skip_connection=True) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=origin_os16_block_rate, expansion=6, block_id=9, skip_connection=True) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=origin_os16_block_rate, expansion=6, block_id=10, skip_connection=False) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=origin_os16_block_rate, expansion=6, block_id=11, skip_connection=True) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=origin_os16_block_rate, expansion=6, block_id=12, skip_connection=True) # original output stride changes to 32 from here x = _inverted_res_block( x, filters=160, alpha=alpha, stride=origin_os32_stride, rate=origin_os16_block_rate, # origin: stride=2! expansion=6, block_id=13, skip_connection=False) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=origin_os32_block_rate, expansion=6, block_id=14, skip_connection=True) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=origin_os32_block_rate, expansion=6, block_id=15, skip_connection=True) x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, rate=origin_os32_block_rate, expansion=6, block_id=16, skip_connection=False) # end of feature extractor # expand the model structure to MobileNetV2 no top, so # that we can load official imagenet pretrained weights # no alpha applied to last conv as stated in the paper: # if the width multiplier is greater than 1 we # increase the number of output channels if alpha > 1.0: last_block_filters = _make_divisible(1280 * alpha, 8) else: last_block_filters = 1280 y = DeeplabConv2D(last_block_filters, kernel_size=1, use_bias=False, name='Conv_1')(x) y = CustomBatchNormalization(epsilon=1e-3, momentum=0.999, name='Conv_1_bn')(y) y = ReLU(6., name='out_relu')(y) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) #else: #inputs = img_input # hardcode row=224 rows = 224 model = Model(inputs, y, name='mobilenetv2_%0.2f_%s' % (alpha, rows)) # Load weights. if weights == 'imagenet': model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + str(alpha) + '_' + str(rows) + '_no_top' + '.h5') weight_path = BACKBONE_WEIGHT_PATH + model_name weights_path = get_file(model_name, weight_path, cache_subdir='models') model.load_weights(weights_path) backbone_len = len(model.layers) - 3 # need to return feature map and skip connection, # not the whole "no top" model return x, skip, backbone_len
def Deeplabv3pMobileNetV2(input_shape=(512, 512, 3), alpha=1.0, weights=None, input_tensor=None, classes=21, OS=8, **kwargs): """ Instantiates the Deeplabv3+ MobileNetV2 architecture # Arguments input_shape: shape of input image. format HxWxC PASCAL VOC model was trained on (512,512,3) images alpha: controls the width of the MobileNetV2 network. This is known as the width multiplier in the MobileNetV2 paper. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. Used only for mobilenetv2 backbone weights: one of 'pascal_voc' (pre-trained on pascal voc) or None (random initialization) input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. classes: number of desired classes. If classes != 21, last layer is initialized randomly OS: determines input_shape/feature_extractor_output ratio. One of {8,16}. Used only for xception backbone. # Returns A Keras model instance. """ if not (weights in {'pascal_voc', None}): raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `pascal_voc` ' '(pre-trained on PASCAL VOC)') if input_tensor is None: img_input = Input(shape=input_shape, name='image_input') else: img_input = input_tensor # normalize input image img_norm = Lambda(normalize, name='input_normalize')(img_input) # backbone body for feature extract x, skip_feature, backbone_len = MobileNetV2_body(img_norm, OS, alpha) # ASPP block x = ASPP_block(x, OS) # Deeplabv3+ decoder for feature projection x = Decoder_block(x, skip_feature) # Final prediction conv block x = DeeplabConv2D(classes, (1, 1), padding='same', name='logits_semantic')(x) x = Lambda(img_resize, arguments={ 'size': (input_shape[0], input_shape[1]), 'mode': 'bilinear' }, name='pred_resize')(x) x = Reshape((input_shape[0] * input_shape[1], classes))(x) x = Softmax(name='Predictions/Softmax')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. #if input_tensor is not None: #inputs = get_source_inputs(input_tensor) #else: #inputs = img_input model = Model(img_input, x, name='deeplabv3p_mobilenetv2') # load weights #if weights == 'pascal_voc': #weights_path = get_file('deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels.h5', #WEIGHTS_PATH_MOBILE, #cache_subdir='models') #model.load_weights(weights_path, by_name=True) return model, backbone_len
def Deeplabv3pXception(input_shape=(512, 512, 3), weights='pascal_voc', input_tensor=None, classes=21, OS=16, **kwargs): """ Instantiates the Deeplabv3+ architecture Optionally loads weights pre-trained on PASCAL VOC. This model is available for TensorFlow only, and can only be used with inputs following the TensorFlow data format `(width, height, channels)`. # Arguments input_shape: shape of input image. format HxWxC PASCAL VOC model was trained on (512,512,3) images weights: one of 'pascal_voc' (pre-trained on pascal voc) or None (random initialization) input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. classes: number of desired classes. If classes != 21, last layer is initialized randomly OS: determines input_shape/feature_extractor_output ratio. One of {8,16}. Used only for xception backbone. # Returns A Keras model instance. # Raises RuntimeError: If attempting to run this model with a backend that does not support separable convolutions. ValueError: in case of invalid argument for `weights` or `backbone` """ if not (weights in {'pascal_voc', None}): raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `pascal_voc` ' '(pre-trained on PASCAL VOC)') if input_tensor is None: img_input = Input(shape=input_shape, name='image_input') else: img_input = input_tensor # normalize input image img_norm = Lambda(normalize, name='input_normalize')(img_input) # backbone body for feature extract x, skip_feature, backbone_len = Xception_body(img_norm, OS) # ASPP block x = ASPP_block(x, OS) # Deeplabv3+ decoder for feature projection x = Decoder_block(x, skip_feature) # Final prediction conv block x = DeeplabConv2D(classes, (1, 1), padding='same', name='logits_semantic')(x) x = Lambda(img_resize, arguments={ 'size': (input_shape[0], input_shape[1]), 'mode': 'bilinear' }, name='pred_resize')(x) x = Reshape((input_shape[0] * input_shape[1], classes))(x) x = Softmax(name='Predictions/Softmax')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. #if input_tensor is not None: #inputs = get_source_inputs(input_tensor) #else: #inputs = img_input model = Model(img_input, x, name='deeplabv3p_xception') # load weights if weights == 'pascal_voc': weights_path = get_file( 'deeplabv3_xception_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH_X, cache_subdir='models') model.load_weights(weights_path, by_name=True) return model, backbone_len
def Xception_body(input_tensor, OS): """ Modified Alighed Xception feature extractor body with specified output stride and skip level feature """ if OS == 8: origin_os16_stride = 1 origin_os16_block_rate = 2 origin_os32_stride = 1 origin_os32_block_rate = 4 elif OS == 16: origin_os16_stride = 2 origin_os16_block_rate = 1 origin_os32_stride = 1 origin_os32_block_rate = 2 elif OS == 32: origin_os16_stride = 2 origin_os16_block_rate = 1 origin_os32_stride = 2 origin_os32_block_rate = 1 else: raise ValueError('invalid output stride', OS) x = DeeplabConv2D(32, (3, 3), strides=(2, 2), name='entry_flow_conv1_1', use_bias=False, padding='same')(input_tensor) x = CustomBatchNormalization(name='entry_flow_conv1_1_BN')(x) x = ReLU()(x) x = _conv2d_same(x, 64, 'entry_flow_conv1_2', kernel_size=3, stride=1) x = CustomBatchNormalization(name='entry_flow_conv1_2_BN')(x) x = ReLU()(x) x = _xception_block(x, [128, 128, 128], 'entry_flow_block1', skip_connection_type='conv', stride=2, depth_activation=False) # skip level feature, with output stride = 4 x, skip = _xception_block(x, [256, 256, 256], 'entry_flow_block2', skip_connection_type='conv', stride=2, depth_activation=False, return_skip=True) # original output stride changes to 16 from here, so we start to control block stride and dilation rate x = _xception_block(x, [728, 728, 728], 'entry_flow_block3', skip_connection_type='conv', stride=origin_os16_stride, depth_activation=False) for i in range(16): x = _xception_block(x, [728, 728, 728], 'middle_flow_unit_{}'.format(i + 1), skip_connection_type='sum', stride=1, rate=origin_os16_block_rate, depth_activation=False) # original output stride changes to 32 from here x = _xception_block(x, [728, 1024, 1024], 'exit_flow_block1', skip_connection_type='conv', stride=origin_os32_stride, rate=origin_os16_block_rate, depth_activation=False) x = _xception_block(x, [1536, 1536, 2048], 'exit_flow_block2', skip_connection_type='none', stride=1, rate=origin_os32_block_rate, depth_activation=True) # end of feature extractor # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) #else: #inputs = img_input backbone_len = len(Model(inputs, x).layers) return x, skip, backbone_len