def _se_block(inputs, filters, se_ratio, prefix): x = GlobalAveragePooling2D(name=prefix + 'squeeze_excite/AvgPool')(inputs) if K.image_data_format() == 'channels_first': x = Reshape((filters, 1, 1))(x) else: x = Reshape((1, 1, filters))(x) x = YoloConv2D(_depth(filters * se_ratio), kernel_size=1, padding='same', name=prefix + 'squeeze_excite/Conv')(x) x = ReLU(name=prefix + 'squeeze_excite/Relu')(x) x = YoloConv2D(filters, kernel_size=1, padding='same', name=prefix + 'squeeze_excite/Conv_1')(x) x = Activation(hard_sigmoid)(x) #if K.backend() == 'theano': ## For the Theano backend, we have to explicitly make ## the excitation weights broadcastable. #x = Lambda( #lambda br: K.pattern_broadcast(br, [True, True, True, False]), #output_shape=lambda input_shape: input_shape, #name=prefix + 'squeeze_excite/broadcast')(x) x = Multiply(name=prefix + 'squeeze_excite/Mul')([inputs, x]) return x
def _group_conv(x, in_channels, out_channels, groups, kernel=1, stride=1, name=''): """ grouped convolution Parameters ---------- x: Input tensor of with `channels_last` data format in_channels: number of input channels out_channels: number of output channels groups: number of groups per channel kernel: int(1) An integer or tuple/list of 2 integers, specifying the width and height of the 2D convolution window. Can be a single integer to specify the same value for all spatial dimensions. stride: int(1) An integer or tuple/list of 2 integers, specifying the strides of the convolution along the width and height. Can be a single integer to specify the same value for all spatial dimensions. name: str A string to specifies the layer name Returns ------- """ if groups == 1: return YoloConv2D(filters=out_channels, kernel_size=kernel, padding='same', use_bias=False, strides=stride, name=name)(x) # number of intput channels per group ig = in_channels // groups group_list = [] assert out_channels % groups == 0 for i in range(groups): offset = i * ig group = Lambda(lambda z: z[:, :, :, offset:offset + ig], name='%s/g%d_slice' % (name, i))(x) group_list.append( YoloConv2D(int(0.5 + out_channels / groups), kernel_size=kernel, strides=stride, use_bias=False, padding='same', name='%s_/g%d' % (name, i))(group)) return Concatenate(name='%s/concat' % name)(group_list)
def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id): channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 in_channels = K.int_shape(inputs)[channel_axis] pointwise_conv_filters = int(filters * alpha) pointwise_filters = _make_divisible(pointwise_conv_filters, 8) x = inputs prefix = 'block_{}_'.format(block_id) if block_id: # Expand x = YoloConv2D(expansion * in_channels, kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'expand')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'expand_BN')(x) x = ReLU(6., name=prefix + 'expand_relu')(x) else: prefix = 'expanded_conv_' # Depthwise if stride == 2: x = ZeroPadding2D(padding=correct_pad(K, x, 3), name=prefix + 'pad')(x) x = YoloDepthwiseConv2D(kernel_size=3, strides=stride, activation=None, use_bias=False, padding='same' if stride == 1 else 'valid', name=prefix + 'depthwise')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'depthwise_BN')(x) x = ReLU(6., name=prefix + 'depthwise_relu')(x) # Project x = YoloConv2D(pointwise_filters, kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'project')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'project_BN')(x) if in_channels == pointwise_filters and stride == 1: return Add(name=prefix + 'add')([inputs, x]) return x
def _inverted_res_block(x, expansion, filters, kernel_size, stride, se_ratio, activation, block_id): channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 shortcut = x prefix = 'expanded_conv/' infilters = K.int_shape(x)[channel_axis] if block_id: # Expand prefix = 'expanded_conv_{}/'.format(block_id) x = YoloConv2D(_depth(infilters * expansion), kernel_size=1, padding='same', use_bias=False, name=prefix + 'expand')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'expand/BatchNorm')(x) x = Activation(activation)(x) if stride == 2: x = ZeroPadding2D(padding=correct_pad(K, x, kernel_size), name=prefix + 'depthwise/pad')(x) x = YoloDepthwiseConv2D(kernel_size, strides=stride, padding='same' if stride == 1 else 'valid', use_bias=False, name=prefix + 'depthwise/Conv')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'depthwise/BatchNorm')(x) x = Activation(activation)(x) if se_ratio: x = _se_block(x, _depth(infilters * expansion), se_ratio, prefix) x = YoloConv2D(filters, kernel_size=1, padding='same', use_bias=False, name=prefix + 'project')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'project/BatchNorm')(x) if stride == 1 and infilters == filters: x = Add(name=prefix + 'Add')([shortcut, x]) return x
def _ep_block(inputs, filters, stride, expansion, block_id): #in_channels = backend.int_shape(inputs)[-1] in_channels = inputs.shape.as_list()[-1] pointwise_conv_filters = int(filters) x = inputs prefix = 'ep_block_{}_'.format(block_id) # Expand x = YoloConv2D(int(expansion * in_channels), kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'expand')(x) x = CustomBatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'expand_BN')(x) x = ReLU(6., name=prefix + 'expand_relu')(x) # Depthwise if stride == 2: x = ZeroPadding2D(padding=correct_pad(K, x, 3), name=prefix + 'pad')(x) x = YoloDepthwiseConv2D(kernel_size=3, strides=stride, activation=None, use_bias=False, padding='same' if stride == 1 else 'valid', name=prefix + 'depthwise')(x) x = CustomBatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'depthwise_BN')(x) x = ReLU(6., name=prefix + 'depthwise_relu')(x) # Project x = YoloConv2D(pointwise_conv_filters, kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'project')(x) x = CustomBatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'project_BN')(x) if in_channels == pointwise_conv_filters and stride == 1: return Add(name=prefix + 'add')([inputs, x]) return x
def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)): """Adds an initial convolution layer (with batch normalization and relu6). # Arguments inputs: Input tensor of shape `(rows, cols, 3)` (with `channels_last` data format) or (3, rows, cols) (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(224, 224, 3)` would be one valid value. filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the convolution). alpha: controls the width of the network. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. kernel: An integer or tuple/list of 2 integers, specifying the width and height of the 2D convolution window. Can be a single integer to specify the same value for all spatial dimensions. strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the width and height. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. # Input shape 4D tensor with shape: `(samples, channels, rows, cols)` if data_format='channels_first' or 4D tensor with shape: `(samples, rows, cols, channels)` if data_format='channels_last'. # Output shape 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if data_format='channels_first' or 4D tensor with shape: `(samples, new_rows, new_cols, filters)` if data_format='channels_last'. `rows` and `cols` values might have changed due to stride. # Returns Output tensor of block. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 filters = int(filters * alpha) x = ZeroPadding2D(padding=((0, 1), (0, 1)), name='conv1_pad')(inputs) x = YoloConv2D(filters, kernel, padding='valid', use_bias=False, strides=strides, name='conv1')(x) x = CustomBatchNormalization(axis=channel_axis, name='conv1_bn')(x) return ReLU(6., name='conv1_relu')(x)
def DarknetConv2D(*args, **kwargs): """Wrapper to set Darknet parameters for YoloConv2D.""" #darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} #darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same' darknet_conv_kwargs = {'padding': 'valid' if kwargs.get('strides')==(2,2) else 'same'} darknet_conv_kwargs.update(kwargs) return YoloConv2D(*args, **darknet_conv_kwargs)
def SqueezeExcite(input_x, se_ratio=0.25, reduced_base_chs=None, divisor=4, name=None): reduce_chs =_make_divisible((reduced_base_chs or int(input_x.shape[-1]))*se_ratio, divisor) x = GlobalAveragePooling2D(name=name+'_avg_pool2d')(input_x) if K.image_data_format() == 'channels_first': x = Reshape((int(input_x.shape[-1]), 1, 1))(x) else: x = Reshape((1, 1, int(input_x.shape[-1])))(x) x = YoloConv2D(filters=reduce_chs, kernel_size=1, use_bias=True, name=name+'_conv_reduce')(x) x = ReLU(name=name+'_act')(x) x = YoloConv2D(filters=int(input_x.shape[-1]), kernel_size=1, use_bias=True, name=name+'_conv_expand')(x) x = Activation(hard_sigmoid, name=name+'_hard_sigmoid')(x) x = Multiply()([input_x, x]) return x
def basic_conv2d_graph(x, out_channels, kernel_size, strides, padding, activation=True, name=''): x = YoloConv2D( out_channels, kernel_size=kernel_size, strides=strides, padding=padding, use_bias=False, name=name + '_conv')(x) x = CustomBatchNormalization(name=name + '_norm')(x) if activation: x = ReLU()(x) return x
def ConvBnAct(input_x, out_chs, kernel_size, stride=(1,1), name=None): x = YoloConv2D(filters=out_chs, kernel_size=kernel_size, strides=stride, padding='valid', use_bias=False, name=name+'_conv')(input_x) x = CustomBatchNormalization(name=name+'_bn1')(x) x = ReLU(name=name+'_relu')(x) return x
def primary_conv(x, output_filters, kernel_size, strides=(1,1), padding='same', act=True, use_bias=False, name=None): x = YoloConv2D(filters=output_filters, kernel_size=kernel_size, strides=strides, padding=padding, use_bias=use_bias, name=name + '_0')(x) x = CustomBatchNormalization(name=name+'_1')(x) x = ReLU(name=name+'_relu')(x) if act else x return x
def Depthwise_Separable_Conv2D_BN_Leaky(filters, kernel_size=(3, 3), block_id_str=None): """Depthwise Separable Convolution2D.""" if not block_id_str: block_id_str = str(K.get_uid()) return compose( YoloDepthwiseConv2D(kernel_size, padding='same', name='conv_dw_' + block_id_str), CustomBatchNormalization(name='conv_dw_%s_bn' % block_id_str), LeakyReLU(alpha=0.1, name='conv_dw_%s_leaky_relu' % block_id_str), YoloConv2D(filters, (1,1), padding='same', use_bias=False, strides=(1, 1), name='conv_pw_%s' % block_id_str), CustomBatchNormalization(name='conv_pw_%s_bn' % block_id_str), LeakyReLU(alpha=0.1, name='conv_pw_%s_leaky_relu' % block_id_str))
def tiny_yolo3_vgg16_body(inputs, num_anchors, num_classes): '''Create Tiny YOLO_v3 VGG16 model CNN body in keras.''' vgg16 = VGG16(input_tensor=inputs, weights='imagenet', include_top=False) x = vgg16.get_layer('block5_pool').output x = YoloConv2D(512, (3, 3), activation='relu', padding='same', name='block6_conv1')(x) x = YoloConv2D(512, (3, 3), activation='relu', padding='same', name='block6_conv2')(x) x = YoloConv2D(512, (3, 3), activation='relu', padding='same', name='block6_conv3')(x) #x = YoloConv2D(512, (3, 3), activation='relu', padding='same', name='block6_conv4')(x) # input: 416 x 416 x 3 # block6_conv3 :13 x 13 x 512 # block5_conv3 :26 x 26 x 512 # block4_conv3 : 52 x 52 x 512 x1 = vgg16.get_layer('block5_conv3').output x2 = x x2 = DarknetConv2D_BN_Leaky(512, (1, 1))(x2) y1 = compose( DarknetConv2D_BN_Leaky(1024, (3, 3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=1024, kernel_size=(3, 3), block_id_str='14'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2) x2 = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(x2) y2 = compose( Concatenate(), DarknetConv2D_BN_Leaky(512, (3, 3)), #Depthwise_Separable_Conv2D_BN_Leaky(filters=512, kernel_size=(3, 3), block_id_str='15'), DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, x1]) return Model(inputs, [y1, y2])
def Darknet_Depthwise_Separable_Conv2D_BN_Swish(filters, kernel_size=(3, 3), block_id_str=None, **kwargs): """Depthwise Separable Convolution2D.""" if not block_id_str: block_id_str = str(K.get_uid()) no_bias_kwargs = {'use_bias': False} no_bias_kwargs.update(kwargs) return compose( DarknetDepthwiseConv2D(kernel_size, name='conv_dw_' + block_id_str, **no_bias_kwargs), CustomBatchNormalization(name='conv_dw_%s_bn' % block_id_str), Activation(swish, name='conv_dw_%s_swish' % block_id_str), YoloConv2D(filters, (1,1), padding='same', use_bias=False, strides=(1, 1), name='conv_pw_%s' % block_id_str), CustomBatchNormalization(name='conv_pw_%s_bn' % block_id_str), Activation(swish, name='conv_pw_%s_swish' % block_id_str))
def GhostBottleneck(input_x, mid_chs, out_chs, dw_kernel_size=3, stride=(1,1), se_ratio=0., name=None): '''ghostnet bottleneck w/optional se''' has_se = se_ratio is not None and se_ratio > 0. #1st ghost bottleneck x = GhostModule(input_x, mid_chs, act=True, name=name+'_ghost1') #depth_with convolution if stride[0] > 1: x = YoloDepthwiseConv2D(kernel_size=dw_kernel_size, strides=stride, padding='same', use_bias=False, name=name+'_conv_dw')(x) x = CustomBatchNormalization(name=name+'_bn_dw')(x) #Squeeze_and_excitation if has_se: x = SqueezeExcite(x, se_ratio=se_ratio, name=name+'_se') #2nd ghost bottleneck x = GhostModule(x, out_chs, act=False, name=name+'_ghost2') #short cut if (input_x.shape[-1] == out_chs and stride[0] == 1): sc = input_x else: name1 = name + '_shortcut' sc = YoloDepthwiseConv2D(kernel_size=dw_kernel_size, strides=stride, padding='same', use_bias=False, name=name1+'_0')(input_x) sc = CustomBatchNormalization(name=name1+'_1')(sc) sc = YoloConv2D(filters=out_chs, kernel_size=1, strides=(1,1), padding='valid', use_bias=False, name=name1+'_2')(sc) sc = CustomBatchNormalization(name=name1+'_3')(sc) x = Add(name=name+'_add')([x, sc]) return x
def ShuffleNetV2(input_shape=None, include_top=True, weights='imagenet', input_tensor=None, scale_factor=1.0, pooling=None, num_shuffle_units=[3, 7, 3], bottleneck_ratio=1, classes=1000, **kwargs): """Instantiates the ShuffleNetV2 architecture. # Arguments input_shape: optional shape tuple, to be specified if you would like to use a model with an input img resolution that is not (224, 224, 3). It should have exactly 3 inputs channels (224, 224, 3). You can also omit this option if you would like to infer input_shape from an input_tensor. If you choose to include both input_tensor and input_shape then input_shape will be used if they match, if the shapes do not match then we will throw an error. E.g. `(160, 160, 3)` would be one valid value. include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional block. - `avg` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape or invalid alpha, rows when weights='imagenet' """ if K.backend() != 'tensorflow': raise RuntimeError('Only tensorflow supported for now') name = 'ShuffleNetV2_{}_{}_{}'.format( scale_factor, bottleneck_ratio, "".join([str(x) for x in num_shuffle_units])) input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=28, require_flatten=include_top, data_format=K.image_data_format()) out_dim_stage_two = {0.5: 48, 1: 116, 1.5: 176, 2: 244} if pooling not in ['max', 'avg', None]: raise ValueError('Invalid value for pooling') if not (float(scale_factor) * 4).is_integer(): raise ValueError('Invalid value for scale_factor, should be x over 4') exp = np.insert(np.arange(len(num_shuffle_units), dtype=np.float32), 0, 0) # [0., 0., 1., 2.] out_channels_in_stage = 2**exp out_channels_in_stage *= out_dim_stage_two[ bottleneck_ratio] # calculate output channels for each stage out_channels_in_stage[0] = 24 # first stage has always 24 output channels out_channels_in_stage *= scale_factor out_channels_in_stage = out_channels_in_stage.astype(int) if input_tensor is None: img_input = Input(shape=input_shape) else: #if not K.is_keras_tensor(input_tensor): #img_input = Input(tensor=input_tensor, shape=input_shape) #else: #img_input = input_tensor img_input = input_tensor # create shufflenet architecture x = YoloConv2D(filters=out_channels_in_stage[0], kernel_size=(3, 3), padding='same', use_bias=False, strides=(2, 2), activation='relu', name='conv1')(img_input) x = MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same', name='maxpool1')(x) # create stages containing shufflenet units beginning at stage 2 for stage in range(len(num_shuffle_units)): repeat = num_shuffle_units[stage] x = block(x, out_channels_in_stage, repeat=repeat, bottleneck_ratio=bottleneck_ratio, stage=stage + 2) if bottleneck_ratio < 2: k = 1024 else: k = 2048 x = YoloConv2D(k, kernel_size=1, padding='same', strides=1, name='1x1conv5_out', activation='relu')(x) if include_top: x = GlobalAveragePooling2D(name='global_avg_pool')(x) x = Dense(classes, activation='softmax', use_bias=True, name='Logits')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D(name='global_avg_pool')(x) elif pooling == 'max': x = GlobalMaxPooling2D(name='global_max_pool')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name=name) # Load weights. if weights == 'imagenet': if K.image_data_format() == 'channels_first': raise ValueError('Weights for "channels_first" format ' 'are not available.') if include_top: model_name = ('shufflenet_v2_weights_tf_dim_ordering_tf_kernels_' + str(alpha) + '_' + str(rows) + '.h5') weigh_path = BASE_WEIGHT_PATH + model_name weights_path = get_file(model_name, weigh_path, cache_subdir='models') else: model_name = ('shufflenet_v2_weights_tf_dim_ordering_tf_kernels_' + str(alpha) + '_' + str(rows) + '_no_top' + '.h5') weigh_path = BASE_WEIGHT_PATH + model_name weights_path = get_file(model_name, weigh_path, cache_subdir='models') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def block(inputs, activation_fn=swish, drop_rate=0., name='', filters_in=32, filters_out=16, kernel_size=3, strides=1, expand_ratio=1, se_ratio=0., id_skip=True): """A mobile inverted residual block. # Arguments inputs: input tensor. activation_fn: activation function. drop_rate: float between 0 and 1, fraction of the input units to drop. name: string, block label. filters_in: integer, the number of input filters. filters_out: integer, the number of output filters. kernel_size: integer, the dimension of the convolution window. strides: integer, the stride of the convolution. expand_ratio: integer, scaling coefficient for the input filters. se_ratio: float between 0 and 1, fraction to squeeze the input filters. id_skip: boolean. # Returns output tensor for the block. """ bn_axis = 3 if K.image_data_format() == 'channels_last' else 1 # Expansion phase filters = filters_in * expand_ratio if expand_ratio != 1: x = YoloConv2D(filters, 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + 'expand_conv')(inputs) x = CustomBatchNormalization(axis=bn_axis, name=name + 'expand_bn')(x) x = Activation(activation_fn, name=name + 'expand_activation')(x) else: x = inputs # Depthwise Convolution if strides == 2: x = ZeroPadding2D(padding=correct_pad(K, x, kernel_size), name=name + 'dwconv_pad')(x) conv_pad = 'valid' else: conv_pad = 'same' x = YoloDepthwiseConv2D(kernel_size, strides=strides, padding=conv_pad, use_bias=False, depthwise_initializer=CONV_KERNEL_INITIALIZER, name=name + 'dwconv')(x) x = CustomBatchNormalization(axis=bn_axis, name=name + 'bn')(x) x = Activation(activation_fn, name=name + 'activation')(x) # Squeeze and Excitation phase if 0 < se_ratio <= 1: filters_se = max(1, int(filters_in * se_ratio)) se = GlobalAveragePooling2D(name=name + 'se_squeeze')(x) se = Reshape((1, 1, filters), name=name + 'se_reshape')(se) se = YoloConv2D(filters_se, 1, padding='same', activation=activation_fn, kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + 'se_reduce')(se) se = YoloConv2D(filters, 1, padding='same', activation='sigmoid', kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + 'se_expand')(se) if K.backend() == 'theano': # For the Theano backend, we have to explicitly make # the excitation weights broadcastable. se = Lambda( lambda x: K.pattern_broadcast(x, [True, True, True, False]), output_shape=lambda input_shape: input_shape, name=name + 'se_broadcast')(se) x = multiply([x, se], name=name + 'se_excite') # Output phase x = YoloConv2D(filters_out, 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + 'project_conv')(x) x = CustomBatchNormalization(axis=bn_axis, name=name + 'project_bn')(x) if (id_skip is True and strides == 1 and filters_in == filters_out): if drop_rate > 0: if tf2.enabled(): x = Dropout(drop_rate, noise_shape=(None, 1, 1, 1), name=name + 'drop')(x) else: x = Dropout( drop_rate, #noise_shape=(None, 1, 1, 1), name=name + 'drop')(x) x = add([x, inputs], name=name + 'add') return x
def EfficientNet(width_coefficient, depth_coefficient, default_size, dropout_rate=0.2, drop_connect_rate=0.2, depth_divisor=8, activation_fn=swish, blocks_args=DEFAULT_BLOCKS_ARGS, model_name='efficientnet', include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000, **kwargs): """Instantiates the EfficientNet architecture using given scaling coefficients. Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in your Keras config at `~/.keras/keras.json`. # Arguments width_coefficient: float, scaling coefficient for network width. depth_coefficient: float, scaling coefficient for network depth. default_size: integer, default input image size. dropout_rate: float, dropout rate before final classifier layer. drop_connect_rate: float, dropout rate at skip connections. depth_divisor: integer, a unit of network width. activation_fn: activation function. blocks_args: list of dicts, parameters to construct block modules. model_name: string, model name. include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False. It should have exactly 3 inputs channels. pooling: optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. """ #global backend, layers, models, keras_utils #backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=default_size, min_size=32, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: #if not K.is_keras_tensor(input_tensor): #img_input = Input(tensor=input_tensor, shape=input_shape) #else: #img_input = input_tensor img_input = input_tensor bn_axis = 3 if K.image_data_format() == 'channels_last' else 1 def round_filters(filters, divisor=depth_divisor): """Round number of filters based on depth multiplier.""" filters *= width_coefficient new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_filters < 0.9 * filters: new_filters += divisor return int(new_filters) def round_repeats(repeats): """Round number of repeats based on depth multiplier.""" return int(math.ceil(depth_coefficient * repeats)) # Build stem x = img_input x = ZeroPadding2D(padding=correct_pad(K, x, 3), name='stem_conv_pad')(x) x = YoloConv2D(round_filters(32), 3, strides=2, padding='valid', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name='stem_conv')(x) x = CustomBatchNormalization(axis=bn_axis, name='stem_bn')(x) x = Activation(activation_fn, name='stem_activation')(x) # Build blocks from copy import deepcopy blocks_args = deepcopy(blocks_args) b = 0 blocks = float(sum(args['repeats'] for args in blocks_args)) for (i, args) in enumerate(blocks_args): assert args['repeats'] > 0 # Update block input and output filters based on depth multiplier. args['filters_in'] = round_filters(args['filters_in']) args['filters_out'] = round_filters(args['filters_out']) for j in range(round_repeats(args.pop('repeats'))): # The first block needs to take care of stride and filter size increase. if j > 0: args['strides'] = 1 args['filters_in'] = args['filters_out'] x = block(x, activation_fn, drop_connect_rate * b / blocks, name='block{}{}_'.format(i + 1, chr(j + 97)), **args) b += 1 # Build top x = YoloConv2D(round_filters(1280), 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name='top_conv')(x) x = CustomBatchNormalization(axis=bn_axis, name='top_bn')(x) x = Activation(activation_fn, name='top_activation')(x) if include_top: x = GlobalAveragePooling2D(name='avg_pool')(x) if dropout_rate > 0: x = Dropout(dropout_rate, name='top_dropout')(x) x = Dense(classes, activation='softmax', kernel_initializer=DENSE_KERNEL_INITIALIZER, name='probs')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D(name='avg_pool')(x) elif pooling == 'max': x = GlobalMaxPooling2D(name='max_pool')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name=model_name) # Load weights. if weights == 'imagenet': if include_top: file_suff = '_weights_tf_dim_ordering_tf_kernels_autoaugment.h5' file_hash = WEIGHTS_HASHES[model_name[-2:]][0] else: file_suff = '_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5' file_hash = WEIGHTS_HASHES[model_name[-2:]][1] file_name = model_name + file_suff weights_path = get_file(file_name, BASE_WEIGHTS_PATH + file_name, cache_subdir='models', file_hash=file_hash) model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def yolo3_vgg16_body(inputs, num_anchors, num_classes): """Create YOLO_V3 model CNN body in Keras.""" ''' Layer Name input_1 Output: Tensor("input_1:0", shape=(?, 416, 416, 3), dtype=float32) Layer Name block1_conv1 Output: Tensor("block1_conv1/Relu:0", shape=(?, 416, 416, 64), dtype=float32) Layer Name block1_conv2 Output: Tensor("block1_conv2/Relu:0", shape=(?, 416, 416, 64), dtype=float32) Layer Name block1_pool Output: Tensor("block1_pool/MaxPool:0", shape=(?, 208, 208, 64), dtype=float32) Layer Name block2_conv1 Output: Tensor("block2_conv1/Relu:0", shape=(?, 208, 208, 128), dtype=float32) Layer Name block2_conv2 Output: Tensor("block2_conv2/Relu:0", shape=(?, 208, 208, 128), dtype=float32) Layer Name block2_pool Output: Tensor("block2_pool/MaxPool:0", shape=(?, 104, 104, 128), dtype=float32) Layer Name block3_conv1 Output: Tensor("block3_conv1/Relu:0", shape=(?, 104, 104, 256), dtype=float32) Layer Name block3_conv2 Output: Tensor("block3_conv2/Relu:0", shape=(?, 104, 104, 256), dtype=float32) Layer Name block3_conv3 Output: Tensor("block3_conv3/Relu:0", shape=(?, 104, 104, 256), dtype=float32) Layer Name block3_pool Output: Tensor("block3_pool/MaxPool:0", shape=(?, 52, 52, 256), dtype=float32) Layer Name block4_conv1 Output: Tensor("block4_conv1/Relu:0", shape=(?, 52, 52, 512), dtype=float32) Layer Name block4_conv2 Output: Tensor("block4_conv2/Relu:0", shape=(?, 52, 52, 512), dtype=float32) Layer Name block4_conv3 Output: Tensor("block4_conv3/Relu:0", shape=(?, 52, 52, 512), dtype=float32) Layer Name block4_pool Output: Tensor("block4_pool/MaxPool:0", shape=(?, 26, 26, 512), dtype=float32) Layer Name block5_conv1 Output: Tensor("block5_conv1/Relu:0", shape=(?, 26, 26, 512), dtype=float32) Layer Name block5_conv2 Output: Tensor("block5_conv2/Relu:0", shape=(?, 26, 26, 512), dtype=float32) Layer Name block5_conv3 Output: Tensor("block5_conv3/Relu:0", shape=(?, 26, 26, 512), dtype=float32) Layer Name block5_pool Output: Tensor("block5_pool/MaxPool:0", shape=(?, 13, 13, 512), dtype=float32) ''' #net, endpoint = inception_v2.inception_v2(inputs) vgg16 = VGG16(input_tensor=inputs, weights='imagenet', include_top=False) x = vgg16.get_layer('block5_pool').output x = YoloConv2D(512, (3, 3), activation='relu', padding='same', name='block6_conv1')(x) x = YoloConv2D(512, (3, 3), activation='relu', padding='same', name='block6_conv2')(x) x = YoloConv2D(512, (3, 3), activation='relu', padding='same', name='block6_conv3')(x) x = YoloConv2D(512, (3, 3), activation='relu', padding='same', name='block6_conv4')(x) # input: 416 x 416 x 3 # block6_conv3 :13 x 13 x 512 # block5_conv3 :26 x 26 x 512 # block4_conv3 : 52 x 52 x 512 # f1 :13 x 13 x 1024 13 x 13 x 512 x, y1 = make_last_layers(x, 512, num_anchors * (num_classes + 5), predict_id='1') x = compose(DarknetConv2D_BN_Leaky(256, (1, 1)), UpSampling2D(2))(x) f2 = vgg16.get_layer('block5_conv3').output # f2: 26 x 26 x 512 x = Concatenate()([x, f2]) x, y2 = make_last_layers(x, 256, num_anchors * (num_classes + 5), predict_id='2') x = compose(DarknetConv2D_BN_Leaky(128, (1, 1)), UpSampling2D(2))(x) f3 = vgg16.get_layer('block4_conv3').output # f3 : 52 x 52 x 256 x = Concatenate()([x, f3]) x, y3 = make_last_layers(x, 128, num_anchors * (num_classes + 5), predict_id='3') return Model(inputs=inputs, outputs=[y1, y2, y3])
def MobileNetV2(input_shape=None, alpha=1.0, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000, **kwargs): """Instantiates the MobileNetV2 architecture. # Arguments input_shape: optional shape tuple, to be specified if you would like to use a model with an input img resolution that is not (224, 224, 3). It should have exactly 3 inputs channels (224, 224, 3). You can also omit this option if you would like to infer input_shape from an input_tensor. If you choose to include both input_tensor and input_shape then input_shape will be used if they match, if the shapes do not match then we will throw an error. E.g. `(160, 160, 3)` would be one valid value. alpha: controls the width of the network. This is known as the width multiplier in the MobileNetV2 paper, but the name is kept for consistency with MobileNetV1 in Keras. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional block. - `avg` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape or invalid alpha, rows when weights='imagenet' """ #global backend, layers, models, keras_utils #backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top` ' 'as true, `classes` should be 1000') input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=32, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) # Determine proper input shape and default size. # If both input_shape and input_tensor are used, they should match #if input_shape is not None and input_tensor is not None: #try: #is_input_t_tensor = K.is_keras_tensor(input_tensor) #except ValueError: #try: #is_input_t_tensor = K.is_keras_tensor( #get_source_inputs(input_tensor)) #except ValueError: #raise ValueError('input_tensor: ', input_tensor, #'is not type input_tensor') #if is_input_t_tensor: #if K.image_data_format == 'channels_first': #if K.int_shape(input_tensor)[1] != input_shape[1]: #raise ValueError('input_shape: ', input_shape, #'and input_tensor: ', input_tensor, #'do not meet the same shape requirements') #else: #if K.int_shape(input_tensor)[2] != input_shape[1]: #raise ValueError('input_shape: ', input_shape, #'and input_tensor: ', input_tensor, #'do not meet the same shape requirements') #else: #raise ValueError('input_tensor specified: ', input_tensor, #'is not a keras tensor') # If input_shape is None, infer shape from input_tensor #if input_shape is None and input_tensor is not None: #try: #K.is_keras_tensor(input_tensor) #except ValueError: #raise ValueError('input_tensor: ', input_tensor, #'is type: ', type(input_tensor), #'which is not a valid type') #if input_shape is None and not K.is_keras_tensor(input_tensor): #default_size = 224 #elif input_shape is None and K.is_keras_tensor(input_tensor): #if K.image_data_format() == 'channels_first': #rows = K.int_shape(input_tensor)[2] #cols = K.int_shape(input_tensor)[3] #else: #rows = K.int_shape(input_tensor)[1] #cols = K.int_shape(input_tensor)[2] #if rows == cols and rows in [96, 128, 160, 192, 224]: #default_size = rows #else: #default_size = 224 # If input_shape is None and no input_tensor #elif input_shape is None: #default_size = 224 # If input_shape is not None, assume default size #else: #if K.image_data_format() == 'channels_first': #rows = input_shape[1] #cols = input_shape[2] #else: #rows = input_shape[0] #cols = input_shape[1] #if rows == cols and rows in [96, 128, 160, 192, 224]: #default_size = rows #else: #default_size = 224 # If input_shape is None and input_tensor is None using standard shape if input_shape is None and input_tensor is None: input_shape = (None, None, 3) if K.image_data_format() == 'channels_last': row_axis, col_axis = (0, 1) else: row_axis, col_axis = (1, 2) rows = input_shape[row_axis] cols = input_shape[col_axis] if weights == 'imagenet': if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]: raise ValueError('If imagenet weights are being loaded, ' 'alpha can be one of `0.35`, `0.50`, `0.75`, ' '`1.0`, `1.3` or `1.4` only.') if rows != cols or rows not in [96, 128, 160, 192, 224]: rows = 224 warnings.warn('`input_shape` is undefined or non-square, ' 'or `rows` is not in [96, 128, 160, 192, 224].' ' Weights for input shape (224, 224) will be' ' loaded as the default.') if input_tensor is None: img_input = Input(shape=input_shape) else: #if not K.is_keras_tensor(input_tensor): #img_input = Input(tensor=input_tensor, shape=input_shape) #else: #img_input = input_tensor img_input = input_tensor channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 first_block_filters = _make_divisible(32 * alpha, 8) x = ZeroPadding2D(padding=correct_pad(K, img_input, 3), name='Conv1_pad')(img_input) x = YoloConv2D(first_block_filters, kernel_size=3, strides=(2, 2), padding='valid', use_bias=False, name='Conv1')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name='bn_Conv1')(x) x = ReLU(6., name='Conv1_relu')(x) x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0) x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1) x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=2, expansion=6, block_id=6) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=7) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=8) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=9) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=10) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=11) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=12) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15) x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16) # no alpha applied to last conv as stated in the paper: # if the width multiplier is greater than 1 we # increase the number of output channels if alpha > 1.0: last_block_filters = _make_divisible(1280 * alpha, 8) else: last_block_filters = 1280 x = YoloConv2D(last_block_filters, kernel_size=1, use_bias=False, name='Conv_1')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv_1_bn')(x) x = ReLU(6., name='out_relu')(x) if include_top: x = GlobalAveragePooling2D()(x) x = Dense(classes, activation='softmax', use_bias=True, name='Logits')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='mobilenetv2_%0.2f_%s' % (alpha, rows)) # Load weights. if weights == 'imagenet': if include_top: model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + str(alpha) + '_' + str(rows) + '.h5') weight_path = BASE_WEIGHT_PATH + model_name weights_path = get_file(model_name, weight_path, cache_subdir='models') else: model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + str(alpha) + '_' + str(rows) + '_no_top' + '.h5') weight_path = BASE_WEIGHT_PATH + model_name weights_path = get_file(model_name, weight_path, cache_subdir='models') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1), block_id=1): """Adds a depthwise convolution block. A depthwise convolution block consists of a depthwise conv, batch normalization, relu6, pointwise convolution, batch normalization and relu6 activation. # Arguments inputs: Input tensor of shape `(rows, cols, channels)` (with `channels_last` data format) or (channels, rows, cols) (with `channels_first` data format). pointwise_conv_filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the pointwise convolution). alpha: controls the width of the network. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. depth_multiplier: The number of depthwise convolution output channels for each input channel. The total number of depthwise convolution output channels will be equal to `filters_in * depth_multiplier`. strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the width and height. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. block_id: Integer, a unique identification designating the block number. # Input shape 4D tensor with shape: `(batch, channels, rows, cols)` if data_format='channels_first' or 4D tensor with shape: `(batch, rows, cols, channels)` if data_format='channels_last'. # Output shape 4D tensor with shape: `(batch, filters, new_rows, new_cols)` if data_format='channels_first' or 4D tensor with shape: `(batch, new_rows, new_cols, filters)` if data_format='channels_last'. `rows` and `cols` values might have changed due to stride. # Returns Output tensor of block. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 pointwise_conv_filters = int(pointwise_conv_filters * alpha) if strides == (1, 1): x = inputs else: x = ZeroPadding2D(((0, 1), (0, 1)), name='conv_pad_%d' % block_id)(inputs) x = YoloDepthwiseConv2D((3, 3), padding='same' if strides == (1, 1) else 'valid', depth_multiplier=depth_multiplier, strides=strides, use_bias=False, name='conv_dw_%d' % block_id)(x) x = CustomBatchNormalization(axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x) x = ReLU(6., name='conv_dw_%d_relu' % block_id)(x) x = YoloConv2D(pointwise_conv_filters, (1, 1), padding='same', use_bias=False, strides=(1, 1), name='conv_pw_%d' % block_id)(x) x = CustomBatchNormalization(axis=channel_axis, name='conv_pw_%d_bn' % block_id)(x) return ReLU(6., name='conv_pw_%d_relu' % block_id)(x)
def GhostNet(input_shape=None, include_top=True, weights='imagenet', input_tensor=None, cfgs=DEFAULT_CFGS, width=1.0, dropout_rate=0.2, pooling=None, classes=1000, **kwargs): """Instantiates the GhostNet architecture. # Arguments input_shape: optional shape tuple, to be specified if you would like to use a model with an input img resolution that is not (224, 224, 3). It should have exactly 3 inputs channels (224, 224, 3). You can also omit this option if you would like to infer input_shape from an input_tensor. If you choose to include both input_tensor and input_shape then input_shape will be used if they match, if the shapes do not match then we will throw an error. E.g. `(160, 160, 3)` would be one valid value. include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. cfgs: model structure config list width: controls the width of the network dropout_rate: fraction of the input units to drop on the last layer pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional block. - `avg` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape or invalid alpha, rows when weights='imagenet' """ if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' 'as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=32, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) # If input_shape is None and input_tensor is None using standard shape if input_shape is None and input_tensor is None: input_shape = (None, None, 3) if input_tensor is None: img_input = Input(shape=input_shape) else: #if not K.is_keras_tensor(input_tensor): #img_input = Input(tensor=input_tensor, shape=input_shape) #else: #img_input = input_tensor img_input = input_tensor channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 # building first layer output_channel = int(_make_divisible(16 * width, 4)) x = YoloConv2D(filters=output_channel, kernel_size=3, strides=(2, 2), padding='same', use_bias=False, name='conv_stem')(img_input) x = CustomBatchNormalization(name='bn1')(x) x = ReLU(name='Conv2D_1_act')(x) # building inverted residual blocks for index, cfg in enumerate(cfgs): sub_index = 0 for k,exp_size,c,se_ratio,s in cfg: output_channel = int(_make_divisible(c * width, 4)) hidden_channel = int(_make_divisible(exp_size * width, 4)) x = GhostBottleneck(x, hidden_channel, output_channel, k, (s,s), se_ratio=se_ratio, name='blocks_'+str(index)+'_'+str(sub_index)) sub_index += 1 output_channel = _make_divisible(exp_size * width, 4) x = ConvBnAct(x, output_channel, kernel_size=1, name='blocks_9_0') if include_top: x = GlobalAveragePooling2D(name='global_avg_pooling2D')(x) if K.image_data_format() == 'channels_first': x = Reshape((output_channel, 1, 1))(x) else: x = Reshape((1, 1, output_channel))(x) # building last several layers output_channel = 1280 x = YoloConv2D(filters=output_channel, kernel_size=1, strides=(1,1), padding='valid', use_bias=True, name='conv_head')(x) x = ReLU(name='relu_head')(x) if dropout_rate > 0.: x = Dropout(dropout_rate, name='dropout_1')(x) x = Flatten()(x) x = Dense(units=classes, activation='softmax', use_bias=True, name='classifier')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='ghostnet_%0.2f' % (width)) # Load weights. if weights == 'imagenet': if include_top: file_name = 'ghostnet_weights_tf_dim_ordering_tf_kernels_224.h5' weight_path = BASE_WEIGHT_PATH + file_name else: file_name = 'ghostnet_weights_tf_dim_ordering_tf_kernels_224_no_top.h5' weight_path = BASE_WEIGHT_PATH + file_name weights_path = get_file(file_name, weight_path, cache_subdir='models') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def MobileNet(input_shape=None, alpha=1.0, depth_multiplier=1, dropout=1e-3, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000, **kwargs): """Instantiates the MobileNet architecture. # Arguments input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or (3, 224, 224) (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(200, 200, 3)` would be one valid value. alpha: controls the width of the network. This is known as the width multiplier in the MobileNet paper. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. depth_multiplier: depth multiplier for depthwise convolution. This is called the resolution multiplier in the MobileNet paper. dropout: dropout rate include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional block. - `avg` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. RuntimeError: If attempting to run this model with a backend that does not support separable convolutions. """ #global backend, layers, models, keras_utils #backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top` ' 'as true, `classes` should be 1000') # Determine proper input shape and default size. if input_shape is None: default_size = 224 else: if K.image_data_format() == 'channels_first': rows = input_shape[1] cols = input_shape[2] else: rows = input_shape[0] cols = input_shape[1] if rows == cols and rows in [128, 160, 192, 224]: default_size = rows else: default_size = 224 input_shape = _obtain_input_shape(input_shape, default_size=default_size, min_size=32, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if K.image_data_format() == 'channels_last': row_axis, col_axis = (0, 1) else: row_axis, col_axis = (1, 2) rows = input_shape[row_axis] cols = input_shape[col_axis] if weights == 'imagenet': if depth_multiplier != 1: raise ValueError('If imagenet weights are being loaded, ' 'depth multiplier must be 1') if alpha not in [0.25, 0.50, 0.75, 1.0]: raise ValueError('If imagenet weights are being loaded, ' 'alpha can be one of' '`0.25`, `0.50`, `0.75` or `1.0` only.') if rows != cols or rows not in [128, 160, 192, 224]: rows = 224 warnings.warn('`input_shape` is undefined or non-square, ' 'or `rows` is not in [128, 160, 192, 224]. ' 'Weights for input shape (224, 224) will be' ' loaded as the default.') if input_tensor is None: img_input = Input(shape=input_shape) else: #if not K.is_keras_tensor(input_tensor): #img_input = Input(tensor=input_tensor, shape=input_shape) #else: #img_input = input_tensor img_input = input_tensor x = _conv_block(img_input, 32, alpha, strides=(2, 2)) x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1) x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, strides=(2, 2), block_id=2) x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3) x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, strides=(2, 2), block_id=4) x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, strides=(2, 2), block_id=6) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11) x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, strides=(2, 2), block_id=12) x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13) if include_top: if K.image_data_format() == 'channels_first': shape = (int(1024 * alpha), 1, 1) else: shape = (1, 1, int(1024 * alpha)) x = GlobalAveragePooling2D()(x) x = Reshape(shape, name='reshape_1')(x) x = Dropout(dropout, name='dropout')(x) x = YoloConv2D(classes, (1, 1), padding='same', name='conv_preds')(x) x = Reshape((classes, ), name='reshape_2')(x) x = Activation('softmax', name='act_softmax')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows)) # Load weights. if weights == 'imagenet': if alpha == 1.0: alpha_text = '1_0' elif alpha == 0.75: alpha_text = '7_5' elif alpha == 0.50: alpha_text = '5_0' else: alpha_text = '2_5' if include_top: model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows) weight_path = BASE_WEIGHT_PATH + model_name weights_path = get_file(model_name, weight_path, cache_subdir='models') else: model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows) weight_path = BASE_WEIGHT_PATH + model_name weights_path = get_file(model_name, weight_path, cache_subdir='models') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def MobileNetV3(stack_fn, last_point_ch, input_shape=None, alpha=1.0, model_type='large', minimalistic=False, include_top=True, weights='imagenet', input_tensor=None, classes=1000, pooling=None, dropout_rate=0.2, **kwargs): """Instantiates the MobileNetV3 architecture. # Arguments stack_fn: a function that returns output tensor for the stacked residual blocks. last_point_ch: number channels at the last layer (before top) input_shape: optional shape tuple, to be specified if you would like to use a model with an input img resolution that is not (224, 224, 3). It should have exactly 3 inputs channels (224, 224, 3). You can also omit this option if you would like to infer input_shape from an input_tensor. If you choose to include both input_tensor and input_shape then input_shape will be used if they match, if the shapes do not match then we will throw an error. E.g. `(160, 160, 3)` would be one valid value. alpha: controls the width of the network. This is known as the depth multiplier in the MobileNetV3 paper, but the name is kept for consistency with MobileNetV1 in Keras. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. model_type: MobileNetV3 is defined as two models: large and small. These models are targeted at high and low resource use cases respectively. minimalistic: In addition to large and small models this module also contains so-called minimalistic models, these models have the same per-layer dimensions characteristic as MobilenetV3 however, they don't utilize any of the advanced blocks (squeeze-and-excite units, hard-swish, and 5x5 convolutions). While these models are less efficient on CPU, they are much more performant on GPU/DSP. include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. pooling: optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. dropout_rate: fraction of the input units to drop on the last layer # Returns A Keras model instance. # Raises ValueError: in case of invalid model type, argument for `weights`, or invalid input shape when weights='imagenet' """ #global backend, layers, models, keras_utils #backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top` ' 'as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=32, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) # Determine proper input shape and default size. # If both input_shape and input_tensor are used, they should match #if input_shape is not None and input_tensor is not None: #try: #is_input_t_tensor = K.is_keras_tensor(input_tensor) #except ValueError: #try: #is_input_t_tensor = K.is_keras_tensor( #get_source_inputs(input_tensor)) #except ValueError: #raise ValueError('input_tensor: ', input_tensor, #'is not type input_tensor') #if is_input_t_tensor: #if K.image_data_format == 'channels_first': #if K.int_shape(input_tensor)[1] != input_shape[1]: #raise ValueError('input_shape: ', input_shape, #'and input_tensor: ', input_tensor, #'do not meet the same shape requirements') #else: #if K.int_shape(input_tensor)[2] != input_shape[1]: #raise ValueError('input_shape: ', input_shape, #'and input_tensor: ', input_tensor, #'do not meet the same shape requirements') #else: #raise ValueError('input_tensor specified: ', input_tensor, #'is not a keras tensor') # If input_shape is None, infer shape from input_tensor #if input_shape is None and input_tensor is not None: #try: #K.is_keras_tensor(input_tensor) #except ValueError: #raise ValueError('input_tensor: ', input_tensor, #'is type: ', type(input_tensor), #'which is not a valid type') #if K.is_keras_tensor(input_tensor): #if K.image_data_format() == 'channels_first': #rows = K.int_shape(input_tensor)[2] #cols = K.int_shape(input_tensor)[3] #input_shape = (3, cols, rows) #else: #rows = K.int_shape(input_tensor)[1] #cols = K.int_shape(input_tensor)[2] #input_shape = (cols, rows, 3) # If input_shape is None and input_tensor is None using standard shape if input_shape is None and input_tensor is None: input_shape = (None, None, 3) if K.image_data_format() == 'channels_last': row_axis, col_axis = (0, 1) else: row_axis, col_axis = (1, 2) rows = input_shape[row_axis] cols = input_shape[col_axis] if rows and cols and (rows < 32 or cols < 32): raise ValueError( 'Input size must be at least 32x32; got `input_shape=' + str(input_shape) + '`') if weights == 'imagenet': if minimalistic is False and alpha not in [0.75, 1.0] \ or minimalistic is True and alpha != 1.0: raise ValueError( 'If imagenet weights are being loaded, ' 'alpha can be one of `0.75`, `1.0` for non minimalistic' ' or `1.0` for minimalistic only.') if rows != cols or rows != 224: warnings.warn('`input_shape` is undefined or non-square, ' 'or `rows` is not 224.' ' Weights for input shape (224, 224) will be' ' loaded as the default.') if input_tensor is None: img_input = Input(shape=input_shape) else: #if not K.is_keras_tensor(input_tensor): #img_input = Input(tensor=input_tensor, shape=input_shape) #else: #img_input = input_tensor img_input = input_tensor channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 if minimalistic: kernel = 3 activation = relu se_ratio = None else: kernel = 5 activation = hard_swish se_ratio = 0.25 x = ZeroPadding2D(padding=correct_pad(K, img_input, 3), name='Conv_pad')(img_input) x = YoloConv2D(16, kernel_size=3, strides=(2, 2), padding='valid', use_bias=False, name='Conv')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv/BatchNorm')(x) x = Activation(activation)(x) x = stack_fn(x, kernel, activation, se_ratio) last_conv_ch = _depth(K.int_shape(x)[channel_axis] * 6) # if the width multiplier is greater than 1 we # increase the number of output channels if alpha > 1.0: last_point_ch = _depth(last_point_ch * alpha) x = YoloConv2D(last_conv_ch, kernel_size=1, padding='same', use_bias=False, name='Conv_1')(x) x = CustomBatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv_1/BatchNorm')(x) x = Activation(activation)(x) if include_top: x = GlobalAveragePooling2D()(x) if channel_axis == 1: x = Reshape((last_conv_ch, 1, 1))(x) else: x = Reshape((1, 1, last_conv_ch))(x) x = YoloConv2D(last_point_ch, kernel_size=1, padding='same', name='Conv_2')(x) x = Activation(activation)(x) if dropout_rate > 0: x = Dropout(dropout_rate)(x) x = YoloConv2D(classes, kernel_size=1, padding='same', name='Logits')(x) x = Flatten()(x) x = Softmax(name='Predictions/Softmax')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D(name='avg_pool')(x) elif pooling == 'max': x = GlobalMaxPooling2D(name='max_pool')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='MobilenetV3' + model_type) # Load weights. if weights == 'imagenet': model_name = "{}{}_224_{}_float".format( model_type, '_minimalistic' if minimalistic else '', str(alpha)) if include_top: file_name = 'weights_mobilenet_v3_' + model_name + '.h5' file_hash = WEIGHTS_HASHES[model_name][0] else: file_name = 'weights_mobilenet_v3_' + model_name + '_no_top.h5' file_hash = WEIGHTS_HASHES[model_name][1] weights_path = get_file(file_name, BASE_WEIGHT_PATH + file_name, cache_subdir='models', file_hash=file_hash) model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def ShuffleNet(include_top=True, input_tensor=None, scale_factor=1.0, pooling=None, input_shape=None, groups=1, weights='imagenet', num_shuffle_units=[3, 7, 3], bottleneck_ratio=0.25, classes=1000, **kwargs): """ ShuffleNet implementation for Keras 2 ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices Xiangyu Zhang, Xinyu Zhou, Mengxiao Lin, Jian Sun https://arxiv.org/pdf/1707.01083.pdf Note that only TensorFlow is supported for now, therefore it only works with the data format `image_data_format='channels_last'` in your Keras config at `~/.keras/keras.json`. Parameters ---------- include_top: bool(True) whether to include the fully-connected layer at the top of the network. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. scale_factor: scales the number of output channels input_shape: pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. groups: int number of groups per channel num_shuffle_units: list([3,7,3]) number of stages (list length) and the number of shufflenet units in a stage beginning with stage 2 because stage 1 is fixed e.g. idx 0 contains 3 + 1 (first shuffle unit in each stage differs) shufflenet units for stage 2 idx 1 contains 7 + 1 Shufflenet Units for stage 3 and idx 2 contains 3 + 1 Shufflenet Units bottleneck_ratio: bottleneck ratio implies the ratio of bottleneck channels to output channels. For example, bottleneck ratio = 1 : 4 means the output feature map is 4 times the width of the bottleneck feature map. classes: int(1000) number of classes to predict Returns ------- A Keras model instance References ---------- - [ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices] (http://www.arxiv.org/pdf/1707.01083.pdf) """ if K.backend() != 'tensorflow': raise RuntimeError('Only TensorFlow backend is currently supported, ' 'as other backends do not support ') name = "ShuffleNet_%.2gX_g%d_br_%.2g_%s" % ( scale_factor, groups, bottleneck_ratio, "".join( [str(x) for x in num_shuffle_units])) input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=28, require_flatten=include_top, data_format=K.image_data_format()) out_dim_stage_two = {1: 144, 2: 200, 3: 240, 4: 272, 8: 384} if groups not in out_dim_stage_two: raise ValueError("Invalid number of groups.") if pooling not in ['max', 'avg', None]: raise ValueError("Invalid value for pooling.") if not (float(scale_factor) * 4).is_integer(): raise ValueError("Invalid value for scale_factor. Should be x over 4.") exp = np.insert(np.arange(0, len(num_shuffle_units), dtype=np.float32), 0, 0) out_channels_in_stage = 2**exp out_channels_in_stage *= out_dim_stage_two[ groups] # calculate output channels for each stage out_channels_in_stage[0] = 24 # first stage has always 24 output channels out_channels_in_stage *= scale_factor out_channels_in_stage = out_channels_in_stage.astype(int) if input_tensor is None: img_input = Input(shape=input_shape) else: #if not K.is_keras_tensor(input_tensor): #img_input = Input(tensor=input_tensor, shape=input_shape) #else: #img_input = input_tensor img_input = input_tensor # create shufflenet architecture x = YoloConv2D(filters=out_channels_in_stage[0], kernel_size=(3, 3), padding='same', use_bias=False, strides=(2, 2), activation="relu", name="conv1")(img_input) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same', name="maxpool1")(x) # create stages containing shufflenet units beginning at stage 2 for stage in range(0, len(num_shuffle_units)): repeat = num_shuffle_units[stage] x = _block(x, out_channels_in_stage, repeat=repeat, bottleneck_ratio=bottleneck_ratio, groups=groups, stage=stage + 2) if include_top: #x = Dense(units=classes, name="fc")(x) #x = Activation('softmax', name='softmax')(x) x = GlobalAveragePooling2D(name='global_avg_pool')(x) x = Dense(units=classes, activation='softmax', use_bias=True, name='Logits')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D(name='global_avg_pool')(x) elif pooling == 'max': x = GlobalMaxPooling2D(name='global_max_pool')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs=inputs, outputs=x, name=name) # Load weights. if weights == 'imagenet': if K.image_data_format() == 'channels_first': raise ValueError('Weights for "channels_first" format ' 'are not available.') if include_top: model_name = ('shufflenet_weights_tf_dim_ordering_tf_kernels_' + str(alpha) + '_' + str(rows) + '.h5') weigh_path = BASE_WEIGHT_PATH + model_name weights_path = get_file(model_name, weigh_path, cache_subdir='models') else: model_name = ('shufflenet_weights_tf_dim_ordering_tf_kernels_' + str(alpha) + '_' + str(rows) + '_no_top' + '.h5') weigh_path = BASE_WEIGHT_PATH + model_name weights_path = get_file(model_name, weigh_path, cache_subdir='models') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def shuffle_unit(inputs, out_channels, bottleneck_ratio, strides=2, stage=1, block=1): if K.image_data_format() == 'channels_last': bn_axis = -1 else: raise ValueError('Only channels last supported') prefix = 'stage{}/block{}'.format(stage, block) bottleneck_channels = int(out_channels * bottleneck_ratio) if strides < 2: c_hat, c = channel_split(inputs, '{}/spl'.format(prefix)) inputs = c x = YoloConv2D(bottleneck_channels, kernel_size=(1, 1), strides=1, padding='same', name='{}/1x1conv_1'.format(prefix))(inputs) x = CustomBatchNormalization(axis=bn_axis, name='{}/bn_1x1conv_1'.format(prefix))(x) x = Activation('relu', name='{}/relu_1x1conv_1'.format(prefix))(x) x = YoloDepthwiseConv2D(kernel_size=3, strides=strides, padding='same', name='{}/3x3dwconv'.format(prefix))(x) x = CustomBatchNormalization(axis=bn_axis, name='{}/bn_3x3dwconv'.format(prefix))(x) x = YoloConv2D(bottleneck_channels, kernel_size=1, strides=1, padding='same', name='{}/1x1conv_2'.format(prefix))(x) x = CustomBatchNormalization(axis=bn_axis, name='{}/bn_1x1conv_2'.format(prefix))(x) x = Activation('relu', name='{}/relu_1x1conv_2'.format(prefix))(x) if strides < 2: ret = Concatenate(axis=bn_axis, name='{}/concat_1'.format(prefix))([x, c_hat]) else: s2 = YoloDepthwiseConv2D(kernel_size=3, strides=2, padding='same', name='{}/3x3dwconv_2'.format(prefix))(inputs) s2 = CustomBatchNormalization( axis=bn_axis, name='{}/bn_3x3dwconv_2'.format(prefix))(s2) s2 = YoloConv2D(bottleneck_channels, kernel_size=1, strides=1, padding='same', name='{}/1x1_conv_3'.format(prefix))(s2) s2 = CustomBatchNormalization( axis=bn_axis, name='{}/bn_1x1conv_3'.format(prefix))(s2) s2 = Activation('relu', name='{}/relu_1x1conv_3'.format(prefix))(s2) ret = Concatenate(axis=bn_axis, name='{}/concat_2'.format(prefix))([x, s2]) ret = Lambda(channel_shuffle, name='{}/channel_shuffle'.format(prefix))(ret) return ret