def _pep_block(inputs, proj_filters, filters, stride, expansion, block_id): #in_channels = backend.int_shape(inputs)[-1] in_channels = inputs.shape.as_list()[-1] pointwise_conv_filters = int(filters) x = inputs prefix = 'pep_block_{}_'.format(block_id) # Pre-project x = Conv2D(proj_filters, kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'preproject')(x) x = CustomBatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'preproject_BN')(x) x = ReLU(6., name=prefix + 'preproject_relu')(x) # Expand #x = Conv2D(int(expansion * in_channels), kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'expand')(x) x = Conv2D(int(expansion * proj_filters), kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'expand')(x) x = CustomBatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'expand_BN')(x) x = ReLU(6., name=prefix + 'expand_relu')(x) # Depthwise if stride == 2: x = ZeroPadding2D(padding=correct_pad(K, x, 3), name=prefix + 'pad')(x) x = DepthwiseConv2D(kernel_size=3, strides=stride, activation=None, use_bias=False, padding='same' if stride == 1 else 'valid', name=prefix + 'depthwise')(x) x = CustomBatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'depthwise_BN')(x) x = ReLU(6., name=prefix + 'depthwise_relu')(x) # Project x = Conv2D(pointwise_conv_filters, kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'project')(x) x = CustomBatchNormalization( epsilon=1e-3, momentum=0.999, name=prefix + 'project_BN')(x) if in_channels == pointwise_conv_filters and stride == 1: return Add(name=prefix + 'add')([inputs, x]) return x
def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id): channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 in_channels = K.int_shape(inputs)[channel_axis] pointwise_conv_filters = int(filters * alpha) pointwise_filters = _make_divisible(pointwise_conv_filters, 8) x = inputs prefix = 'block_{}_'.format(block_id) if block_id: # Expand x = YoloConv2D(expansion * in_channels, kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'expand')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'expand_BN')(x) x = ReLU(6., name=prefix + 'expand_relu')(x) else: prefix = 'expanded_conv_' # Depthwise if stride == 2: x = ZeroPadding2D(padding=correct_pad(K, x, 3), name=prefix + 'pad')(x) x = YoloDepthwiseConv2D(kernel_size=3, strides=stride, activation=None, use_bias=False, padding='same' if stride == 1 else 'valid', name=prefix + 'depthwise')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'depthwise_BN')(x) x = ReLU(6., name=prefix + 'depthwise_relu')(x) # Project x = YoloConv2D(pointwise_filters, kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'project')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'project_BN')(x) if in_channels == pointwise_filters and stride == 1: return Add(name=prefix + 'add')([inputs, x]) return x
def Depthwise_Separable_Conv2D_BN_Leaky(filters, kernel_size=(3, 3), block_id_str=None): """Depthwise Separable Convolution2D.""" if not block_id_str: block_id_str = str(K.get_uid()) return compose( YoloDepthwiseConv2D(kernel_size, padding='same', name='conv_dw_' + block_id_str), CustomBatchNormalization(name='conv_dw_%s_bn' % block_id_str), LeakyReLU(alpha=0.1, name='conv_dw_%s_leaky_relu' % block_id_str), YoloConv2D(filters, (1,1), padding='same', use_bias=False, strides=(1, 1), name='conv_pw_%s' % block_id_str), CustomBatchNormalization(name='conv_pw_%s_bn' % block_id_str), LeakyReLU(alpha=0.1, name='conv_pw_%s_leaky_relu' % block_id_str))
def Darknet_Depthwise_Separable_Conv2D_BN_Swish(filters, kernel_size=(3, 3), block_id_str=None, **kwargs): """Depthwise Separable Convolution2D.""" if not block_id_str: block_id_str = str(K.get_uid()) no_bias_kwargs = {'use_bias': False} no_bias_kwargs.update(kwargs) return compose( DarknetDepthwiseConv2D(kernel_size, name='conv_dw_' + block_id_str, **no_bias_kwargs), CustomBatchNormalization(name='conv_dw_%s_bn' % block_id_str), Activation(swish, name='conv_dw_%s_swish' % block_id_str), YoloConv2D(filters, (1,1), padding='same', use_bias=False, strides=(1, 1), name='conv_pw_%s' % block_id_str), CustomBatchNormalization(name='conv_pw_%s_bn' % block_id_str), Activation(swish, name='conv_pw_%s_swish' % block_id_str))
def _inverted_res_block(x, expansion, filters, kernel_size, stride, se_ratio, activation, block_id): channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 shortcut = x prefix = 'expanded_conv/' infilters = K.int_shape(x)[channel_axis] if block_id: # Expand prefix = 'expanded_conv_{}/'.format(block_id) x = Conv2D(_depth(infilters * expansion), kernel_size=1, padding='same', use_bias=False, name=prefix + 'expand')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'expand/BatchNorm')(x) x = Activation(activation)(x) if stride == 2: x = ZeroPadding2D(padding=correct_pad(K, x, kernel_size), name=prefix + 'depthwise/pad')(x) x = DepthwiseConv2D(kernel_size, strides=stride, padding='same' if stride == 1 else 'valid', use_bias=False, name=prefix + 'depthwise/Conv')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'depthwise/BatchNorm')(x) x = Activation(activation)(x) if se_ratio: x = _se_block(x, _depth(infilters * expansion), se_ratio, prefix) x = Conv2D(filters, kernel_size=1, padding='same', use_bias=False, name=prefix + 'project')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'project/BatchNorm')(x) if stride == 1 and infilters == filters: x = Add(name=prefix + 'Add')([shortcut, x]) return x
def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)): """Adds an initial convolution layer (with batch normalization and relu6). # Arguments inputs: Input tensor of shape `(rows, cols, 3)` (with `channels_last` data format) or (3, rows, cols) (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(224, 224, 3)` would be one valid value. filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the convolution). alpha: controls the width of the network. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. kernel: An integer or tuple/list of 2 integers, specifying the width and height of the 2D convolution window. Can be a single integer to specify the same value for all spatial dimensions. strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the width and height. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. # Input shape 4D tensor with shape: `(samples, channels, rows, cols)` if data_format='channels_first' or 4D tensor with shape: `(samples, rows, cols, channels)` if data_format='channels_last'. # Output shape 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if data_format='channels_first' or 4D tensor with shape: `(samples, new_rows, new_cols, filters)` if data_format='channels_last'. `rows` and `cols` values might have changed due to stride. # Returns Output tensor of block. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 filters = int(filters * alpha) x = ZeroPadding2D(padding=((0, 1), (0, 1)), name='conv1_pad')(inputs) x = YoloConv2D(filters, kernel, padding='valid', use_bias=False, strides=strides, name='conv1')(x) x = CustomBatchNormalization(axis=channel_axis, name='conv1_bn')(x) return ReLU(6., name='conv1_relu')(x)
def bottleneck_csp_block(x, num_filters, num_blocks, depth_multiple, width_multiple, shortcut=False): '''CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks''' num_filters = make_divisible(num_filters * width_multiple, 8) num_blocks = max(round(num_blocks * depth_multiple), 1) if num_blocks > 1 else num_blocks # depth gain res_connection = DarknetConv2D(num_filters // 2, (1, 1))(x) x = DarknetConv2D_BN_Swish(num_filters // 2, (1, 1))(x) # Bottleneck block stack for i in range(num_blocks): y = compose(DarknetConv2D_BN_Swish(num_filters // 2, (1, 1)), DarknetConv2D_BN_Swish(num_filters // 2, (3, 3)))(x) x = Add()([x, y]) if shortcut else y x = DarknetConv2D(num_filters // 2, (1, 1))(x) x = Concatenate()([x, res_connection]) x = CustomBatchNormalization()(x) x = Activation(swish)(x) return DarknetConv2D_BN_Swish(num_filters, (1, 1))(x)
def DarknetConv2D_BN_Swish(*args, **kwargs): """Darknet Convolution2D followed by CustomBatchNormalization and Swish.""" no_bias_kwargs = {'use_bias': False} no_bias_kwargs.update(kwargs) return compose( DarknetConv2D(*args, **no_bias_kwargs), CustomBatchNormalization(), Activation(swish))
def DarknetConv2D_BN_Leaky(*args, **kwargs): """Darknet Convolution2D followed by CustomBatchNormalization and LeakyReLU.""" no_bias_kwargs = {'use_bias': False} no_bias_kwargs.update(kwargs) return compose( DarknetConv2D(*args, **no_bias_kwargs), CustomBatchNormalization(), LeakyReLU(alpha=0.1))
def cheap_operations(x, output_filters, kernel_size, strides=(1,1), padding='same', act=True, use_bias=False, name=None): x = YoloDepthwiseConv2D(kernel_size=kernel_size, strides=strides, padding=padding, use_bias=use_bias, name=name+'_0')(x) x = CustomBatchNormalization(name=name+'_1')(x) x = ReLU(name=name+'_relu')(x) if act else x return x
def basic_conv2d_graph(x, out_channels, kernel_size, strides, padding, activation=True, name=''): x = YoloConv2D( out_channels, kernel_size=kernel_size, strides=strides, padding=padding, use_bias=False, name=name + '_conv')(x) x = CustomBatchNormalization(name=name + '_norm')(x) if activation: x = ReLU()(x) return x
def GhostBottleneck(input_x, mid_chs, out_chs, dw_kernel_size=3, stride=(1,1), se_ratio=0., name=None): '''ghostnet bottleneck w/optional se''' has_se = se_ratio is not None and se_ratio > 0. #1st ghost bottleneck x = GhostModule(input_x, mid_chs, act=True, name=name+'_ghost1') #depth_with convolution if stride[0] > 1: x = YoloDepthwiseConv2D(kernel_size=dw_kernel_size, strides=stride, padding='same', use_bias=False, name=name+'_conv_dw')(x) x = CustomBatchNormalization(name=name+'_bn_dw')(x) #Squeeze_and_excitation if has_se: x = SqueezeExcite(x, se_ratio=se_ratio, name=name+'_se') #2nd ghost bottleneck x = GhostModule(x, out_chs, act=False, name=name+'_ghost2') #short cut if (input_x.shape[-1] == out_chs and stride[0] == 1): sc = input_x else: name1 = name + '_shortcut' sc = YoloDepthwiseConv2D(kernel_size=dw_kernel_size, strides=stride, padding='same', use_bias=False, name=name1+'_0')(input_x) sc = CustomBatchNormalization(name=name1+'_1')(sc) sc = YoloConv2D(filters=out_chs, kernel_size=1, strides=(1,1), padding='valid', use_bias=False, name=name1+'_2')(sc) sc = CustomBatchNormalization(name=name1+'_3')(sc) x = Add(name=name+'_add')([x, sc]) return x
def primary_conv(x, output_filters, kernel_size, strides=(1,1), padding='same', act=True, use_bias=False, name=None): x = YoloConv2D(filters=output_filters, kernel_size=kernel_size, strides=strides, padding=padding, use_bias=use_bias, name=name + '_0')(x) x = CustomBatchNormalization(name=name+'_1')(x) x = ReLU(name=name+'_relu')(x) if act else x return x
def ConvBnAct(input_x, out_chs, kernel_size, stride=(1,1), name=None): x = YoloConv2D(filters=out_chs, kernel_size=kernel_size, strides=stride, padding='valid', use_bias=False, name=name+'_conv')(input_x) x = CustomBatchNormalization(name=name+'_bn1')(x) x = ReLU(name=name+'_relu')(x) return x
def Depthwise_Conv2D_BN_Leaky(kernel_size=(3, 3), block_id_str=None): """Depthwise Convolution2D.""" if not block_id_str: block_id_str = str(K.get_uid()) return compose( YoloDepthwiseConv2D(kernel_size, padding='same', name='conv_dw_' + block_id_str), CustomBatchNormalization(name='conv_dw_%s_bn' % block_id_str), LeakyReLU(alpha=0.1, name='conv_dw_%s_leaky_relu' % block_id_str))
def NanoConv2D_BN_Relu6(*args, **kwargs): """Darknet Convolution2D followed by CustomBatchNormalization and ReLU6.""" nano_name = kwargs.get('name') if nano_name: name_kwargs = {'name': nano_name + '_conv2d'} name_kwargs.update(kwargs) bn_name = nano_name + '_BN' relu_name = nano_name + '_relu' else: name_kwargs = {} name_kwargs.update(kwargs) bn_name = None relu_name = None no_bias_kwargs = {'use_bias': False} no_bias_kwargs.update(name_kwargs) return compose(DarknetConv2D(*args, **no_bias_kwargs), CustomBatchNormalization(name=bn_name), ReLU(6., name=relu_name))
def shuffle_unit(inputs, out_channels, bottleneck_ratio, strides=2, stage=1, block=1): if K.image_data_format() == 'channels_last': bn_axis = -1 else: raise ValueError('Only channels last supported') prefix = 'stage{}/block{}'.format(stage, block) bottleneck_channels = int(out_channels * bottleneck_ratio) if strides < 2: c_hat, c = channel_split(inputs, '{}/spl'.format(prefix)) inputs = c x = Conv2D(bottleneck_channels, kernel_size=(1, 1), strides=1, padding='same', name='{}/1x1conv_1'.format(prefix))(inputs) x = CustomBatchNormalization(axis=bn_axis, name='{}/bn_1x1conv_1'.format(prefix))(x) x = Activation('relu', name='{}/relu_1x1conv_1'.format(prefix))(x) x = DepthwiseConv2D(kernel_size=3, strides=strides, padding='same', name='{}/3x3dwconv'.format(prefix))(x) x = CustomBatchNormalization(axis=bn_axis, name='{}/bn_3x3dwconv'.format(prefix))(x) x = Conv2D(bottleneck_channels, kernel_size=1, strides=1, padding='same', name='{}/1x1conv_2'.format(prefix))(x) x = CustomBatchNormalization(axis=bn_axis, name='{}/bn_1x1conv_2'.format(prefix))(x) x = Activation('relu', name='{}/relu_1x1conv_2'.format(prefix))(x) if strides < 2: ret = Concatenate(axis=bn_axis, name='{}/concat_1'.format(prefix))([x, c_hat]) else: s2 = DepthwiseConv2D(kernel_size=3, strides=2, padding='same', name='{}/3x3dwconv_2'.format(prefix))(inputs) s2 = CustomBatchNormalization( axis=bn_axis, name='{}/bn_3x3dwconv_2'.format(prefix))(s2) s2 = Conv2D(bottleneck_channels, kernel_size=1, strides=1, padding='same', name='{}/1x1_conv_3'.format(prefix))(s2) s2 = CustomBatchNormalization( axis=bn_axis, name='{}/bn_1x1conv_3'.format(prefix))(s2) s2 = Activation('relu', name='{}/relu_1x1conv_3'.format(prefix))(s2) ret = Concatenate(axis=bn_axis, name='{}/concat_2'.format(prefix))([x, s2]) ret = Lambda(channel_shuffle, name='{}/channel_shuffle'.format(prefix))(ret) return ret
def MobileNetV2(input_shape=None, alpha=1.0, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000, **kwargs): """Instantiates the MobileNetV2 architecture. # Arguments input_shape: optional shape tuple, to be specified if you would like to use a model with an input img resolution that is not (224, 224, 3). It should have exactly 3 inputs channels (224, 224, 3). You can also omit this option if you would like to infer input_shape from an input_tensor. If you choose to include both input_tensor and input_shape then input_shape will be used if they match, if the shapes do not match then we will throw an error. E.g. `(160, 160, 3)` would be one valid value. alpha: controls the width of the network. This is known as the width multiplier in the MobileNetV2 paper, but the name is kept for consistency with MobileNetV1 in Keras. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional block. - `avg` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape or invalid alpha, rows when weights='imagenet' """ #global backend, layers, models, keras_utils #backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top` ' 'as true, `classes` should be 1000') input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=32, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) # Determine proper input shape and default size. # If both input_shape and input_tensor are used, they should match #if input_shape is not None and input_tensor is not None: #try: #is_input_t_tensor = K.is_keras_tensor(input_tensor) #except ValueError: #try: #is_input_t_tensor = K.is_keras_tensor( #get_source_inputs(input_tensor)) #except ValueError: #raise ValueError('input_tensor: ', input_tensor, #'is not type input_tensor') #if is_input_t_tensor: #if K.image_data_format == 'channels_first': #if K.int_shape(input_tensor)[1] != input_shape[1]: #raise ValueError('input_shape: ', input_shape, #'and input_tensor: ', input_tensor, #'do not meet the same shape requirements') #else: #if K.int_shape(input_tensor)[2] != input_shape[1]: #raise ValueError('input_shape: ', input_shape, #'and input_tensor: ', input_tensor, #'do not meet the same shape requirements') #else: #raise ValueError('input_tensor specified: ', input_tensor, #'is not a keras tensor') # If input_shape is None, infer shape from input_tensor #if input_shape is None and input_tensor is not None: #try: #K.is_keras_tensor(input_tensor) #except ValueError: #raise ValueError('input_tensor: ', input_tensor, #'is type: ', type(input_tensor), #'which is not a valid type') #if input_shape is None and not K.is_keras_tensor(input_tensor): #default_size = 224 #elif input_shape is None and K.is_keras_tensor(input_tensor): #if K.image_data_format() == 'channels_first': #rows = K.int_shape(input_tensor)[2] #cols = K.int_shape(input_tensor)[3] #else: #rows = K.int_shape(input_tensor)[1] #cols = K.int_shape(input_tensor)[2] #if rows == cols and rows in [96, 128, 160, 192, 224]: #default_size = rows #else: #default_size = 224 # If input_shape is None and no input_tensor #elif input_shape is None: #default_size = 224 # If input_shape is not None, assume default size #else: #if K.image_data_format() == 'channels_first': #rows = input_shape[1] #cols = input_shape[2] #else: #rows = input_shape[0] #cols = input_shape[1] #if rows == cols and rows in [96, 128, 160, 192, 224]: #default_size = rows #else: #default_size = 224 # If input_shape is None and input_tensor is None using standard shape if input_shape is None and input_tensor is None: input_shape = (None, None, 3) if K.image_data_format() == 'channels_last': row_axis, col_axis = (0, 1) else: row_axis, col_axis = (1, 2) rows = input_shape[row_axis] cols = input_shape[col_axis] if weights == 'imagenet': if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]: raise ValueError('If imagenet weights are being loaded, ' 'alpha can be one of `0.35`, `0.50`, `0.75`, ' '`1.0`, `1.3` or `1.4` only.') if rows != cols or rows not in [96, 128, 160, 192, 224]: rows = 224 warnings.warn('`input_shape` is undefined or non-square, ' 'or `rows` is not in [96, 128, 160, 192, 224].' ' Weights for input shape (224, 224) will be' ' loaded as the default.') if input_tensor is None: img_input = Input(shape=input_shape) else: #if not K.is_keras_tensor(input_tensor): #img_input = Input(tensor=input_tensor, shape=input_shape) #else: #img_input = input_tensor img_input = input_tensor channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 first_block_filters = _make_divisible(32 * alpha, 8) x = ZeroPadding2D(padding=correct_pad(K, img_input, 3), name='Conv1_pad')(img_input) x = YoloConv2D(first_block_filters, kernel_size=3, strides=(2, 2), padding='valid', use_bias=False, name='Conv1')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name='bn_Conv1')(x) x = ReLU(6., name='Conv1_relu')(x) x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0) x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1) x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=2, expansion=6, block_id=6) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=7) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=8) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=9) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=10) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=11) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=12) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15) x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16) # no alpha applied to last conv as stated in the paper: # if the width multiplier is greater than 1 we # increase the number of output channels if alpha > 1.0: last_block_filters = _make_divisible(1280 * alpha, 8) else: last_block_filters = 1280 x = YoloConv2D(last_block_filters, kernel_size=1, use_bias=False, name='Conv_1')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv_1_bn')(x) x = ReLU(6., name='out_relu')(x) if include_top: x = GlobalAveragePooling2D()(x) x = Dense(classes, activation='softmax', use_bias=True, name='Logits')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='mobilenetv2_%0.2f_%s' % (alpha, rows)) # Load weights. if weights == 'imagenet': if include_top: model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + str(alpha) + '_' + str(rows) + '.h5') weight_path = BASE_WEIGHT_PATH + model_name weights_path = get_file(model_name, weight_path, cache_subdir='models') else: model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + str(alpha) + '_' + str(rows) + '_no_top' + '.h5') weight_path = BASE_WEIGHT_PATH + model_name weights_path = get_file(model_name, weight_path, cache_subdir='models') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def GhostNet(input_shape=None, include_top=True, weights='imagenet', input_tensor=None, cfgs=DEFAULT_CFGS, width=1.0, dropout_rate=0.2, pooling=None, classes=1000, **kwargs): """Instantiates the GhostNet architecture. # Arguments input_shape: optional shape tuple, to be specified if you would like to use a model with an input img resolution that is not (224, 224, 3). It should have exactly 3 inputs channels (224, 224, 3). You can also omit this option if you would like to infer input_shape from an input_tensor. If you choose to include both input_tensor and input_shape then input_shape will be used if they match, if the shapes do not match then we will throw an error. E.g. `(160, 160, 3)` would be one valid value. include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. cfgs: model structure config list width: controls the width of the network dropout_rate: fraction of the input units to drop on the last layer pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional block. - `avg` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape or invalid alpha, rows when weights='imagenet' """ if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' 'as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=32, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) # If input_shape is None and input_tensor is None using standard shape if input_shape is None and input_tensor is None: input_shape = (None, None, 3) if input_tensor is None: img_input = Input(shape=input_shape) else: #if not K.is_keras_tensor(input_tensor): #img_input = Input(tensor=input_tensor, shape=input_shape) #else: #img_input = input_tensor img_input = input_tensor channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 # building first layer output_channel = int(_make_divisible(16 * width, 4)) x = YoloConv2D(filters=output_channel, kernel_size=3, strides=(2, 2), padding='same', use_bias=False, name='conv_stem')(img_input) x = CustomBatchNormalization(name='bn1')(x) x = ReLU(name='Conv2D_1_act')(x) # building inverted residual blocks for index, cfg in enumerate(cfgs): sub_index = 0 for k,exp_size,c,se_ratio,s in cfg: output_channel = int(_make_divisible(c * width, 4)) hidden_channel = int(_make_divisible(exp_size * width, 4)) x = GhostBottleneck(x, hidden_channel, output_channel, k, (s,s), se_ratio=se_ratio, name='blocks_'+str(index)+'_'+str(sub_index)) sub_index += 1 output_channel = _make_divisible(exp_size * width, 4) x = ConvBnAct(x, output_channel, kernel_size=1, name='blocks_9_0') if include_top: x = GlobalAveragePooling2D(name='global_avg_pooling2D')(x) if K.image_data_format() == 'channels_first': x = Reshape((output_channel, 1, 1))(x) else: x = Reshape((1, 1, output_channel))(x) # building last several layers output_channel = 1280 x = YoloConv2D(filters=output_channel, kernel_size=1, strides=(1,1), padding='valid', use_bias=True, name='conv_head')(x) x = ReLU(name='relu_head')(x) if dropout_rate > 0.: x = Dropout(dropout_rate, name='dropout_1')(x) x = Flatten()(x) x = Dense(units=classes, activation='softmax', use_bias=True, name='classifier')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='ghostnet_%0.2f' % (width)) # Load weights. if weights == 'imagenet': if include_top: file_name = 'ghostnet_weights_tf_dim_ordering_tf_kernels_224.h5' weight_path = BASE_WEIGHT_PATH + file_name else: file_name = 'ghostnet_weights_tf_dim_ordering_tf_kernels_224_no_top.h5' weight_path = BASE_WEIGHT_PATH + file_name weights_path = get_file(file_name, weight_path, cache_subdir='models') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def _shuffle_unit(inputs, in_channels, out_channels, groups, bottleneck_ratio, strides=2, stage=1, block=1): """ creates a shuffleunit Parameters ---------- inputs: Input tensor of with `channels_last` data format in_channels: number of input channels out_channels: number of output channels strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the width and height. groups: int(1) number of groups per channel bottleneck_ratio: float bottleneck ratio implies the ratio of bottleneck channels to output channels. For example, bottleneck ratio = 1 : 4 means the output feature map is 4 times the width of the bottleneck feature map. stage: int(1) stage number block: int(1) block number Returns ------- """ if K.image_data_format() == 'channels_last': bn_axis = -1 else: bn_axis = 1 prefix = 'stage%d/block%d' % (stage, block) #if strides >= 2: #out_channels -= in_channels # default: 1/4 of the output channel of a ShuffleNet Unit bottleneck_channels = int(out_channels * bottleneck_ratio) groups = (1 if stage == 2 and block == 1 else groups) x = _group_conv(inputs, in_channels, out_channels=bottleneck_channels, groups=(1 if stage == 2 and block == 1 else groups), name='%s/1x1_gconv_1' % prefix) x = CustomBatchNormalization(axis=bn_axis, name='%s/bn_gconv_1' % prefix)(x) x = Activation('relu', name='%s/relu_gconv_1' % prefix)(x) x = Lambda(channel_shuffle, arguments={'groups': groups}, name='%s/channel_shuffle' % prefix)(x) x = DepthwiseConv2D(kernel_size=(3, 3), padding="same", use_bias=False, strides=strides, name='%s/1x1_dwconv_1' % prefix)(x) x = CustomBatchNormalization(axis=bn_axis, name='%s/bn_dwconv_1' % prefix)(x) x = _group_conv( x, bottleneck_channels, out_channels=out_channels if strides == 1 else out_channels - in_channels, groups=groups, name='%s/1x1_gconv_2' % prefix) x = CustomBatchNormalization(axis=bn_axis, name='%s/bn_gconv_2' % prefix)(x) if strides < 2: ret = Add(name='%s/add' % prefix)([x, inputs]) else: avg = AveragePooling2D(pool_size=3, strides=2, padding='same', name='%s/avg_pool' % prefix)(inputs) ret = Concatenate(bn_axis, name='%s/concat' % prefix)([x, avg]) ret = Activation('relu', name='%s/relu_out' % prefix)(ret) return ret
def EfficientNet(width_coefficient, depth_coefficient, default_size, dropout_rate=0.2, drop_connect_rate=0.2, depth_divisor=8, activation_fn=swish, blocks_args=DEFAULT_BLOCKS_ARGS, model_name='efficientnet', include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000, **kwargs): """Instantiates the EfficientNet architecture using given scaling coefficients. Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in your Keras config at `~/.keras/keras.json`. # Arguments width_coefficient: float, scaling coefficient for network width. depth_coefficient: float, scaling coefficient for network depth. default_size: integer, default input image size. dropout_rate: float, dropout rate before final classifier layer. drop_connect_rate: float, dropout rate at skip connections. depth_divisor: integer, a unit of network width. activation_fn: activation function. blocks_args: list of dicts, parameters to construct block modules. model_name: string, model name. include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False. It should have exactly 3 inputs channels. pooling: optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. """ #global backend, layers, models, keras_utils #backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=default_size, min_size=32, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: #if not K.is_keras_tensor(input_tensor): #img_input = Input(tensor=input_tensor, shape=input_shape) #else: #img_input = input_tensor img_input = input_tensor bn_axis = 3 if K.image_data_format() == 'channels_last' else 1 def round_filters(filters, divisor=depth_divisor): """Round number of filters based on depth multiplier.""" filters *= width_coefficient new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_filters < 0.9 * filters: new_filters += divisor return int(new_filters) def round_repeats(repeats): """Round number of repeats based on depth multiplier.""" return int(math.ceil(depth_coefficient * repeats)) # Build stem x = img_input x = ZeroPadding2D(padding=correct_pad(K, x, 3), name='stem_conv_pad')(x) x = YoloConv2D(round_filters(32), 3, strides=2, padding='valid', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name='stem_conv')(x) x = CustomBatchNormalization(axis=bn_axis, name='stem_bn')(x) x = Activation(activation_fn, name='stem_activation')(x) # Build blocks from copy import deepcopy blocks_args = deepcopy(blocks_args) b = 0 blocks = float(sum(args['repeats'] for args in blocks_args)) for (i, args) in enumerate(blocks_args): assert args['repeats'] > 0 # Update block input and output filters based on depth multiplier. args['filters_in'] = round_filters(args['filters_in']) args['filters_out'] = round_filters(args['filters_out']) for j in range(round_repeats(args.pop('repeats'))): # The first block needs to take care of stride and filter size increase. if j > 0: args['strides'] = 1 args['filters_in'] = args['filters_out'] x = block(x, activation_fn, drop_connect_rate * b / blocks, name='block{}{}_'.format(i + 1, chr(j + 97)), **args) b += 1 # Build top x = YoloConv2D(round_filters(1280), 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name='top_conv')(x) x = CustomBatchNormalization(axis=bn_axis, name='top_bn')(x) x = Activation(activation_fn, name='top_activation')(x) if include_top: x = GlobalAveragePooling2D(name='avg_pool')(x) if dropout_rate > 0: x = Dropout(dropout_rate, name='top_dropout')(x) x = Dense(classes, activation='softmax', kernel_initializer=DENSE_KERNEL_INITIALIZER, name='probs')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D(name='avg_pool')(x) elif pooling == 'max': x = GlobalMaxPooling2D(name='max_pool')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name=model_name) # Load weights. if weights == 'imagenet': if include_top: file_suff = '_weights_tf_dim_ordering_tf_kernels_autoaugment.h5' file_hash = WEIGHTS_HASHES[model_name[-2:]][0] else: file_suff = '_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5' file_hash = WEIGHTS_HASHES[model_name[-2:]][1] file_name = model_name + file_suff weights_path = get_file(file_name, BASE_WEIGHTS_PATH + file_name, cache_subdir='models', file_hash=file_hash) model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1), block_id=1): """Adds a depthwise convolution block. A depthwise convolution block consists of a depthwise conv, batch normalization, relu6, pointwise convolution, batch normalization and relu6 activation. # Arguments inputs: Input tensor of shape `(rows, cols, channels)` (with `channels_last` data format) or (channels, rows, cols) (with `channels_first` data format). pointwise_conv_filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the pointwise convolution). alpha: controls the width of the network. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. depth_multiplier: The number of depthwise convolution output channels for each input channel. The total number of depthwise convolution output channels will be equal to `filters_in * depth_multiplier`. strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the width and height. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. block_id: Integer, a unique identification designating the block number. # Input shape 4D tensor with shape: `(batch, channels, rows, cols)` if data_format='channels_first' or 4D tensor with shape: `(batch, rows, cols, channels)` if data_format='channels_last'. # Output shape 4D tensor with shape: `(batch, filters, new_rows, new_cols)` if data_format='channels_first' or 4D tensor with shape: `(batch, new_rows, new_cols, filters)` if data_format='channels_last'. `rows` and `cols` values might have changed due to stride. # Returns Output tensor of block. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 pointwise_conv_filters = int(pointwise_conv_filters * alpha) if strides == (1, 1): x = inputs else: x = ZeroPadding2D(((0, 1), (0, 1)), name='conv_pad_%d' % block_id)(inputs) x = YoloDepthwiseConv2D((3, 3), padding='same' if strides == (1, 1) else 'valid', depth_multiplier=depth_multiplier, strides=strides, use_bias=False, name='conv_dw_%d' % block_id)(x) x = CustomBatchNormalization(axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x) x = ReLU(6., name='conv_dw_%d_relu' % block_id)(x) x = YoloConv2D(pointwise_conv_filters, (1, 1), padding='same', use_bias=False, strides=(1, 1), name='conv_pw_%d' % block_id)(x) x = CustomBatchNormalization(axis=channel_axis, name='conv_pw_%d_bn' % block_id)(x) return ReLU(6., name='conv_pw_%d_relu' % block_id)(x)
def MobileNetV3(stack_fn, last_point_ch, input_shape=None, alpha=1.0, model_type='large', minimalistic=False, include_top=True, weights='imagenet', input_tensor=None, classes=1000, pooling=None, dropout_rate=0.2, **kwargs): """Instantiates the MobileNetV3 architecture. # Arguments stack_fn: a function that returns output tensor for the stacked residual blocks. last_point_ch: number channels at the last layer (before top) input_shape: optional shape tuple, to be specified if you would like to use a model with an input img resolution that is not (224, 224, 3). It should have exactly 3 inputs channels (224, 224, 3). You can also omit this option if you would like to infer input_shape from an input_tensor. If you choose to include both input_tensor and input_shape then input_shape will be used if they match, if the shapes do not match then we will throw an error. E.g. `(160, 160, 3)` would be one valid value. alpha: controls the width of the network. This is known as the depth multiplier in the MobileNetV3 paper, but the name is kept for consistency with MobileNetV1 in Keras. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. model_type: MobileNetV3 is defined as two models: large and small. These models are targeted at high and low resource use cases respectively. minimalistic: In addition to large and small models this module also contains so-called minimalistic models, these models have the same per-layer dimensions characteristic as MobilenetV3 however, they don't utilize any of the advanced blocks (squeeze-and-excite units, hard-swish, and 5x5 convolutions). While these models are less efficient on CPU, they are much more performant on GPU/DSP. include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. pooling: optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. dropout_rate: fraction of the input units to drop on the last layer # Returns A Keras model instance. # Raises ValueError: in case of invalid model type, argument for `weights`, or invalid input shape when weights='imagenet' """ #global backend, layers, models, keras_utils #backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top` ' 'as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=32, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) # Determine proper input shape and default size. # If both input_shape and input_tensor are used, they should match #if input_shape is not None and input_tensor is not None: #try: #is_input_t_tensor = K.is_keras_tensor(input_tensor) #except ValueError: #try: #is_input_t_tensor = K.is_keras_tensor( #get_source_inputs(input_tensor)) #except ValueError: #raise ValueError('input_tensor: ', input_tensor, #'is not type input_tensor') #if is_input_t_tensor: #if K.image_data_format == 'channels_first': #if K.int_shape(input_tensor)[1] != input_shape[1]: #raise ValueError('input_shape: ', input_shape, #'and input_tensor: ', input_tensor, #'do not meet the same shape requirements') #else: #if K.int_shape(input_tensor)[2] != input_shape[1]: #raise ValueError('input_shape: ', input_shape, #'and input_tensor: ', input_tensor, #'do not meet the same shape requirements') #else: #raise ValueError('input_tensor specified: ', input_tensor, #'is not a keras tensor') # If input_shape is None, infer shape from input_tensor #if input_shape is None and input_tensor is not None: #try: #K.is_keras_tensor(input_tensor) #except ValueError: #raise ValueError('input_tensor: ', input_tensor, #'is type: ', type(input_tensor), #'which is not a valid type') #if K.is_keras_tensor(input_tensor): #if K.image_data_format() == 'channels_first': #rows = K.int_shape(input_tensor)[2] #cols = K.int_shape(input_tensor)[3] #input_shape = (3, cols, rows) #else: #rows = K.int_shape(input_tensor)[1] #cols = K.int_shape(input_tensor)[2] #input_shape = (cols, rows, 3) # If input_shape is None and input_tensor is None using standart shape if input_shape is None and input_tensor is None: input_shape = (None, None, 3) if K.image_data_format() == 'channels_last': row_axis, col_axis = (0, 1) else: row_axis, col_axis = (1, 2) rows = input_shape[row_axis] cols = input_shape[col_axis] if rows and cols and (rows < 32 or cols < 32): raise ValueError( 'Input size must be at least 32x32; got `input_shape=' + str(input_shape) + '`') if weights == 'imagenet': if minimalistic is False and alpha not in [0.75, 1.0] \ or minimalistic is True and alpha != 1.0: raise ValueError( 'If imagenet weights are being loaded, ' 'alpha can be one of `0.75`, `1.0` for non minimalistic' ' or `1.0` for minimalistic only.') if rows != cols or rows != 224: warnings.warn('`input_shape` is undefined or non-square, ' 'or `rows` is not 224.' ' Weights for input shape (224, 224) will be' ' loaded as the default.') if input_tensor is None: img_input = Input(shape=input_shape) else: #if not K.is_keras_tensor(input_tensor): #img_input = Input(tensor=input_tensor, shape=input_shape) #else: #img_input = input_tensor img_input = input_tensor channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 if minimalistic: kernel = 3 activation = relu se_ratio = None else: kernel = 5 activation = hard_swish se_ratio = 0.25 x = ZeroPadding2D(padding=correct_pad(K, img_input, 3), name='Conv_pad')(img_input) x = Conv2D(16, kernel_size=3, strides=(2, 2), padding='valid', use_bias=False, name='Conv')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv/BatchNorm')(x) x = Activation(activation)(x) x = stack_fn(x, kernel, activation, se_ratio) last_conv_ch = _depth(K.int_shape(x)[channel_axis] * 6) # if the width multiplier is greater than 1 we # increase the number of output channels if alpha > 1.0: last_point_ch = _depth(last_point_ch * alpha) x = Conv2D(last_conv_ch, kernel_size=1, padding='same', use_bias=False, name='Conv_1')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv_1/BatchNorm')(x) x = Activation(activation)(x) if include_top: x = GlobalAveragePooling2D()(x) if channel_axis == 1: x = Reshape((last_conv_ch, 1, 1))(x) else: x = Reshape((1, 1, last_conv_ch))(x) x = Conv2D(last_point_ch, kernel_size=1, padding='same', name='Conv_2')(x) x = Activation(activation)(x) if dropout_rate > 0: x = Dropout(dropout_rate)(x) x = Conv2D(classes, kernel_size=1, padding='same', name='Logits')(x) x = Flatten()(x) x = Softmax(name='Predictions/Softmax')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D(name='avg_pool')(x) elif pooling == 'max': x = GlobalMaxPooling2D(name='max_pool')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='MobilenetV3' + model_type) # Load weights. if weights == 'imagenet': model_name = "{}{}_224_{}_float".format( model_type, '_minimalistic' if minimalistic else '', str(alpha)) if include_top: file_name = 'weights_mobilenet_v3_' + model_name + '.h5' file_hash = WEIGHTS_HASHES[model_name][0] else: file_name = 'weights_mobilenet_v3_' + model_name + '_no_top.h5' file_hash = WEIGHTS_HASHES[model_name][1] weights_path = get_file(file_name, BASE_WEIGHT_PATH + file_name, cache_subdir='models', file_hash=file_hash) model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def block(inputs, activation_fn=swish, drop_rate=0., name='', filters_in=32, filters_out=16, kernel_size=3, strides=1, expand_ratio=1, se_ratio=0., id_skip=True): """A mobile inverted residual block. # Arguments inputs: input tensor. activation_fn: activation function. drop_rate: float between 0 and 1, fraction of the input units to drop. name: string, block label. filters_in: integer, the number of input filters. filters_out: integer, the number of output filters. kernel_size: integer, the dimension of the convolution window. strides: integer, the stride of the convolution. expand_ratio: integer, scaling coefficient for the input filters. se_ratio: float between 0 and 1, fraction to squeeze the input filters. id_skip: boolean. # Returns output tensor for the block. """ bn_axis = 3 if K.image_data_format() == 'channels_last' else 1 # Expansion phase filters = filters_in * expand_ratio if expand_ratio != 1: x = YoloConv2D(filters, 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + 'expand_conv')(inputs) x = CustomBatchNormalization(axis=bn_axis, name=name + 'expand_bn')(x) x = Activation(activation_fn, name=name + 'expand_activation')(x) else: x = inputs # Depthwise Convolution if strides == 2: x = ZeroPadding2D(padding=correct_pad(K, x, kernel_size), name=name + 'dwconv_pad')(x) conv_pad = 'valid' else: conv_pad = 'same' x = YoloDepthwiseConv2D(kernel_size, strides=strides, padding=conv_pad, use_bias=False, depthwise_initializer=CONV_KERNEL_INITIALIZER, name=name + 'dwconv')(x) x = CustomBatchNormalization(axis=bn_axis, name=name + 'bn')(x) x = Activation(activation_fn, name=name + 'activation')(x) # Squeeze and Excitation phase if 0 < se_ratio <= 1: filters_se = max(1, int(filters_in * se_ratio)) se = GlobalAveragePooling2D(name=name + 'se_squeeze')(x) se = Reshape((1, 1, filters), name=name + 'se_reshape')(se) se = YoloConv2D(filters_se, 1, padding='same', activation=activation_fn, kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + 'se_reduce')(se) se = YoloConv2D(filters, 1, padding='same', activation='sigmoid', kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + 'se_expand')(se) if K.backend() == 'theano': # For the Theano backend, we have to explicitly make # the excitation weights broadcastable. se = Lambda( lambda x: K.pattern_broadcast(x, [True, True, True, False]), output_shape=lambda input_shape: input_shape, name=name + 'se_broadcast')(se) x = multiply([x, se], name=name + 'se_excite') # Output phase x = YoloConv2D(filters_out, 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + 'project_conv')(x) x = CustomBatchNormalization(axis=bn_axis, name=name + 'project_bn')(x) if (id_skip is True and strides == 1 and filters_in == filters_out): if drop_rate > 0: if tf2.enabled(): x = Dropout(drop_rate, noise_shape=(None, 1, 1, 1), name=name + 'drop')(x) else: x = Dropout( drop_rate, #noise_shape=(None, 1, 1, 1), name=name + 'drop')(x) x = add([x, inputs], name=name + 'add') return x