def _separable_conv_block(ip, filters, kernel_size=(3, 3), strides=(1, 1), block_id=None): """Adds 2 blocks of [relu-separable conv-batchnorm]. Arguments: ip: Input tensor filters: Number of output filters per layer kernel_size: Kernel size of separable convolutions strides: Strided convolution for downsampling block_id: String block_id Returns: A Keras tensor """ channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 with backend.name_scope('separable_conv_block_%s' % block_id): x = layers.Activation('relu')(ip) if strides == (2, 2): x = layers.ZeroPadding2D( padding=imagenet_utils.correct_pad(x, kernel_size), name='separable_conv_1_pad_%s' % block_id)(x) conv_pad = 'valid' else: conv_pad = 'same' x = layers.SeparableConv2D( filters, kernel_size, strides=strides, name='separable_conv_1_%s' % block_id, padding=conv_pad, use_bias=False, kernel_initializer='he_normal')( x) x = layers.BatchNormalization( axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='separable_conv_1_bn_%s' % (block_id))( x) x = layers.Activation('relu')(x) x = layers.SeparableConv2D( filters, kernel_size, name='separable_conv_2_%s' % block_id, padding='same', use_bias=False, kernel_initializer='he_normal')( x) x = layers.BatchNormalization( axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='separable_conv_2_bn_%s' % (block_id))( x) return x
def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id): """Inverted ResNet block.""" channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 in_channels = backend.int_shape(inputs)[channel_axis] pointwise_conv_filters = int(filters * alpha) pointwise_filters = _make_divisible(pointwise_conv_filters, 8) x = inputs prefix = 'block_{}_'.format(block_id) if block_id: # Expand x = layers.Conv2D(expansion * in_channels, kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'expand')(x) x = layers.BatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'expand_BN')(x) x = layers.ReLU(6., name=prefix + 'expand_relu')(x) else: prefix = 'expanded_conv_' # Depthwise if stride == 2: x = layers.ZeroPadding2D(padding=imagenet_utils.correct_pad(x, 3), name=prefix + 'pad')(x) x = layers.DepthwiseConv2D(kernel_size=3, strides=stride, activation=None, use_bias=False, padding='same' if stride == 1 else 'valid', name=prefix + 'depthwise')(x) x = layers.BatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'depthwise_BN')(x) x = layers.ReLU(6., name=prefix + 'depthwise_relu')(x) # Project x = layers.Conv2D(pointwise_filters, kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'project')(x) x = layers.BatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'project_BN')(x) if in_channels == pointwise_filters and stride == 1: return layers.Add(name=prefix + 'add')([inputs, x]) return x
def _inverted_res_block(x, expansion, filters, kernel_size, stride, se_ratio, activation, block_id): channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 shortcut = x prefix = 'expanded_conv/' infilters = backend.int_shape(x)[channel_axis] if block_id: # Expand prefix = 'expanded_conv_{}/'.format(block_id) x = layers.Conv2D(_depth(infilters * expansion), kernel_size=1, padding='same', use_bias=False, name=prefix + 'expand')(x) x = layers.BatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'expand/BatchNorm')(x) x = activation(x) if stride == 2: x = layers.ZeroPadding2D(padding=imagenet_utils.correct_pad( x, kernel_size), name=prefix + 'depthwise/pad')(x) x = layers.DepthwiseConv2D(kernel_size, strides=stride, padding='same' if stride == 1 else 'valid', use_bias=False, name=prefix + 'depthwise')(x) x = layers.BatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'depthwise/BatchNorm')(x) x = activation(x) if se_ratio: x = _se_block(x, _depth(infilters * expansion), se_ratio, prefix) x = layers.Conv2D(filters, kernel_size=1, padding='same', use_bias=False, name=prefix + 'project')(x) x = layers.BatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'project/BatchNorm')(x) if stride == 1 and infilters == filters: x = layers.Add(name=prefix + 'Add')([shortcut, x]) return x
def _separable_conv_block(ip, filters, kernel_size=(3, 3), strides=(1, 1), block_id=None): channel_dim = -1 with backend.name_scope('separable_conv_block_%s' % block_id): x = layers.Activation('relu')(ip) if strides == (2, 2): x = layers.ZeroPadding2D( padding=imagenet_utils.correct_pad(x, kernel_size), name='separable_conv_1_pad_%s' % block_id)(x) conv_pad = 'valid' else: conv_pad = 'same' x = layers.SeparableConv2D(filters, kernel_size, strides=strides, name='separable_conv_1_%s' % block_id, padding=conv_pad, use_bias=False, kernel_initializer='he_normal')(x) x = layers.BatchNormalization(axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='separable_conv_1_bn_%s' % (block_id))(x) x = layers.Activation('relu')(x) x = layers.SeparableConv2D(filters, kernel_size, name='separable_conv_2_%s' % block_id, padding='same', use_bias=False, kernel_initializer='he_normal')(x) x = layers.BatchNormalization(axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='separable_conv_2_bn_%s' % (block_id))(x) return x
def _reduction_a_cell(ip, p, filters, block_id=None): """Adds a Reduction cell for NASNet-A (Fig. 4 in the paper). Arguments: ip: Input tensor `x` p: Input tensor `p` filters: Number of output filters block_id: String block_id Returns: A Keras tensor """ channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 with backend.name_scope('reduction_A_block_%s' % block_id): p = _adjust_block(p, ip, filters, block_id) h = layers.Activation('relu')(ip) h = layers.Conv2D(filters, (1, 1), strides=(1, 1), padding='same', name='reduction_conv_1_%s' % block_id, use_bias=False, kernel_initializer='he_normal')(h) h = layers.BatchNormalization(axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='reduction_bn_1_%s' % block_id)(h) h3 = layers.ZeroPadding2D(padding=imagenet_utils.correct_pad(h, 3), name='reduction_pad_1_%s' % block_id)(h) with backend.name_scope('block_1'): x1_1 = _separable_conv_block(h, filters, (5, 5), strides=(2, 2), block_id='reduction_left1_%s' % block_id) x1_2 = _separable_conv_block(p, filters, (7, 7), strides=(2, 2), block_id='reduction_right1_%s' % block_id) x1 = layers.add([x1_1, x1_2], name='reduction_add_1_%s' % block_id) with backend.name_scope('block_2'): x2_1 = layers.MaxPooling2D( (3, 3), strides=(2, 2), padding='valid', name='reduction_left2_%s' % block_id)(h3) x2_2 = _separable_conv_block(p, filters, (7, 7), strides=(2, 2), block_id='reduction_right2_%s' % block_id) x2 = layers.add([x2_1, x2_2], name='reduction_add_2_%s' % block_id) with backend.name_scope('block_3'): x3_1 = layers.AveragePooling2D( (3, 3), strides=(2, 2), padding='valid', name='reduction_left3_%s' % block_id)(h3) x3_2 = _separable_conv_block(p, filters, (5, 5), strides=(2, 2), block_id='reduction_right3_%s' % block_id) x3 = layers.add([x3_1, x3_2], name='reduction_add3_%s' % block_id) with backend.name_scope('block_4'): x4 = layers.AveragePooling2D( (3, 3), strides=(1, 1), padding='same', name='reduction_left4_%s' % block_id)(x1) x4 = layers.add([x2, x4]) with backend.name_scope('block_5'): x5_1 = _separable_conv_block(x1, filters, (3, 3), block_id='reduction_left4_%s' % block_id) x5_2 = layers.MaxPooling2D( (3, 3), strides=(2, 2), padding='valid', name='reduction_right5_%s' % block_id)(h3) x5 = layers.add([x5_1, x5_2], name='reduction_add4_%s' % block_id) x = layers.concatenate([x2, x3, x4, x5], axis=channel_dim, name='reduction_concat_%s' % block_id) return x, ip
def residual_inverted( inputs: Optional[tf.Tensor] = None, filters_in: Optional[int] = None, filters_out: Optional[int] = None, expand_ratio: float = 2., se_ratio: float = 0.25, sigmoid_gating: bool = False, batchnorm: bool = True, batchnorm_kw: Optional[Dict[str, Any]] = None, dropout: float = 0.0, kernel_size: Tuple[int, int] = (3, 3), order: Literal['baw', 'wba'] = 'wba', strides: Tuple[int, int] = (1, 1), activation: Callable[[tf.Tensor], tf.Tensor] = tf.nn.swish, skip_mode: Literal['add', 'concat'] = 'add', skip_ratio: float = 1.0, name: str = 'residual_inverted', **conv_kw, ) -> Union[tf.Tensor, ResidualSequential]: if batchnorm_kw is None: batchnorm_kw = {} assert expand_ratio >= 1, ( f'Inverted residual only support expand_ratio >= 1, given {expand_ratio}' ) ## prepare layers = [] batchnorm_kw = dict(axis=3, **batchnorm_kw) if filters_in is None: filters_in = inputs.shape[-1] # assume NHWC filters = max(1, int(expand_ratio * filters_in)) if filters_out is None: filters_out = filters_in use_bias = not batchnorm if np.any(np.asarray(strides) >= 2): layers.append( ZeroPadding2D(padding=correct_pad(inputs, kernel_size), name=f'{name}_pad')) pad_mode = 'valid' else: pad_mode = 'same' ## expand if order == 'baw': if batchnorm: layers.append( BatchNormalization(**batchnorm_kw, name=f'{name}_bn1')) layers.append(Activation(activation, name=f'{name}_act1')) layers.append( Conv2D(filters=filters, kernel_size=kernel_size, padding=pad_mode, strides=strides, use_bias=use_bias, name=f'{name}_conv1', **conv_kw)) if order == 'wba': if batchnorm: layers.append( BatchNormalization(**batchnorm_kw, name=f'{name}_bn1')) layers.append(Activation(activation, name=f'{name}_act1')) ## squeeze if order == 'baw': if batchnorm: layers.append( BatchNormalization(**batchnorm_kw, name=f'{name}_bn2')) layers.append(Activation(activation, name=f'{name}_act2')) layers.append( DepthwiseConv2D(kernel_size=kernel_size, name=f'{name}_dwconv1', **conv_kw)) if order == 'wba': if batchnorm: layers.append( BatchNormalization(**batchnorm_kw, name=f'{name}_bn2')) layers.append(Activation(activation, name=f'{name}_act2')) ## final if se_ratio: layers.append( SqueezeExcitation(se_ratio=se_ratio, activation=activation, name=f'{name}_se')) layers.append( Conv2D(filters=filters_out * (2 if sigmoid_gating else 1), kernel_size=(1, 1), use_bias=use_bias, name=f'{name}_proj1', **conv_kw)) if batchnorm: layers.append(BatchNormalization(**batchnorm_kw, name=f'{name}_bn3')) if sigmoid_gating: layers.append(SigmoidGating(name=f'{name}_gating')) # no residual connection if strides > 1 if filters_out == filters_in and np.all(np.asarray(strides) == 1): if dropout > 0: layers.append(dropout2D(rate=dropout, name=f'{name}_drop')) else: skip_mode = 'none' ## final layer res = ResidualSequential(skip_mode=skip_mode, skip_ratio=skip_ratio, layers=layers, name=name) if inputs is None: return res return res(inputs)
def block(inputs, activation='swish', drop_rate=0., name='', filters_in=32, filters_out=16, kernel_size=3, strides=1, expand_ratio=1, se_ratio=0., id_skip=True): """An inverted residual block. Args: inputs: input tensor. activation: activation function. drop_rate: float between 0 and 1, fraction of the input units to drop. name: string, block label. filters_in: integer, the number of input filters. filters_out: integer, the number of output filters. kernel_size: integer, the dimension of the convolution window. strides: integer, the stride of the convolution. expand_ratio: integer, scaling coefficient for the input filters. se_ratio: float between 0 and 1, fraction to squeeze the input filters. id_skip: boolean. Returns: output tensor for the block. """ bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 # Expansion phase filters = filters_in * expand_ratio if expand_ratio != 1: x = layers.Conv2D(filters, 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + 'expand_conv')(inputs) x = layers.BatchNormalization(axis=bn_axis, name=name + 'expand_bn')(x) x = layers.Activation(activation, name=name + 'expand_activation')(x) else: x = inputs # Depthwise Convolution if strides == 2: x = layers.ZeroPadding2D(padding=imagenet_utils.correct_pad( x, kernel_size), name=name + 'dwconv_pad')(x) conv_pad = 'valid' else: conv_pad = 'same' x = layers.DepthwiseConv2D(kernel_size, strides=strides, padding=conv_pad, use_bias=False, depthwise_initializer=CONV_KERNEL_INITIALIZER, name=name + 'dwconv')(x) x = layers.BatchNormalization(axis=bn_axis, name=name + 'bn')(x) x = layers.Activation(activation, name=name + 'activation')(x) # Squeeze and Excitation phase if 0 < se_ratio <= 1: filters_se = max(1, int(filters_in * se_ratio)) se = layers.GlobalAveragePooling2D(name=name + 'se_squeeze')(x) if bn_axis == 1: se_shape = (filters, 1, 1) else: se_shape = (1, 1, filters) se = layers.Reshape(se_shape, name=name + 'se_reshape')(se) se = layers.Conv2D(filters_se, 1, padding='same', activation=activation, kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + 'se_reduce')(se) se = layers.Conv2D(filters, 1, padding='same', activation='sigmoid', kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + 'se_expand')(se) x = layers.multiply([x, se], name=name + 'se_excite') # Output phase x = layers.Conv2D(filters_out, 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + 'project_conv')(x) x = layers.BatchNormalization(axis=bn_axis, name=name + 'project_bn')(x) if id_skip and strides == 1 and filters_in == filters_out: if drop_rate > 0: x = layers.Dropout(drop_rate, noise_shape=(None, 1, 1, 1), name=name + 'drop')(x) x = layers.add([x, inputs], name=name + 'add') return x
def EfficientNet(width_coefficient, depth_coefficient, default_size, dropout_rate=0.2, drop_connect_rate=0.2, depth_divisor=8, activation='swish', blocks_args='default', model_name='efficientnet', include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000, classifier_activation='softmax'): """Instantiates the EfficientNet architecture using given scaling coefficients. Args: width_coefficient: float, scaling coefficient for network width. depth_coefficient: float, scaling coefficient for network depth. default_size: integer, default input image size. dropout_rate: float, dropout rate before final classifier layer. drop_connect_rate: float, dropout rate at skip connections. depth_divisor: integer, a unit of network width. activation: activation function. blocks_args: list of dicts, parameters to construct block modules. model_name: string, model name. include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False. It should have exactly 3 inputs channels. pooling: optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. Returns: A `keras.Model` instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape. ValueError: if `classifier_activation` is not `softmax` or `None` when using a pretrained top layer. """ if blocks_args == 'default': blocks_args = DEFAULT_BLOCKS_ARGS if not (weights in {'imagenet', None} or file_io.file_exists_v2(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = imagenet_utils.obtain_input_shape( input_shape, default_size=default_size, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 def round_filters(filters, divisor=depth_divisor): """Round number of filters based on depth multiplier.""" filters *= width_coefficient new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_filters < 0.9 * filters: new_filters += divisor return int(new_filters) def round_repeats(repeats): """Round number of repeats based on depth multiplier.""" return int(math.ceil(depth_coefficient * repeats)) # Build stem x = img_input x = layers.Rescaling(1. / 255.)(x) x = layers.Normalization(axis=bn_axis)(x) x = layers.ZeroPadding2D(padding=imagenet_utils.correct_pad(x, 3), name='stem_conv_pad')(x) x = layers.Conv2D(round_filters(32), 3, strides=2, padding='valid', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name='stem_conv')(x) x = layers.BatchNormalization(axis=bn_axis, name='stem_bn')(x) x = layers.Activation(activation, name='stem_activation')(x) # Build blocks blocks_args = copy.deepcopy(blocks_args) b = 0 blocks = float(sum(round_repeats(args['repeats']) for args in blocks_args)) for (i, args) in enumerate(blocks_args): assert args['repeats'] > 0 # Update block input and output filters based on depth multiplier. args['filters_in'] = round_filters(args['filters_in']) args['filters_out'] = round_filters(args['filters_out']) for j in range(round_repeats(args.pop('repeats'))): # The first block needs to take care of stride and filter size increase. if j > 0: args['strides'] = 1 args['filters_in'] = args['filters_out'] x = block(x, activation, drop_connect_rate * b / blocks, name='block{}{}_'.format(i + 1, chr(j + 97)), **args) b += 1 # Build top x = layers.Conv2D(round_filters(1280), 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name='top_conv')(x) x = layers.BatchNormalization(axis=bn_axis, name='top_bn')(x) x = layers.Activation(activation, name='top_activation')(x) if include_top: x = layers.GlobalAveragePooling2D(name='avg_pool')(x) if dropout_rate > 0: x = layers.Dropout(dropout_rate, name='top_dropout')(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Dense(classes, activation=classifier_activation, kernel_initializer=DENSE_KERNEL_INITIALIZER, name='predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D(name='avg_pool')(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D(name='max_pool')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = training.Model(inputs, x, name=model_name) # Load weights. if weights == 'imagenet': if include_top: file_suffix = '.h5' file_hash = WEIGHTS_HASHES[model_name[-2:]][0] else: file_suffix = '_notop.h5' file_hash = WEIGHTS_HASHES[model_name[-2:]][1] file_name = model_name + file_suffix weights_path = data_utils.get_file(file_name, BASE_WEIGHTS_PATH + file_name, cache_subdir='models', file_hash=file_hash) model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def MobileNetV2(input_shape=None, alpha=1.0, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000, classifier_activation='softmax', **kwargs): """Instantiates the MobileNetV2 architecture. Reference: - [MobileNetV2: Inverted Residuals and Linear Bottlenecks]( https://arxiv.org/abs/1801.04381) (CVPR 2018) Optionally loads weights pre-trained on ImageNet. Caution: Be sure to properly pre-process your inputs to the application. Please see `applications.mobilenet_v2.preprocess_input` for an example. Arguments: input_shape: Optional shape tuple, to be specified if you would like to use a model with an input image resolution that is not (224, 224, 3). It should have exactly 3 inputs channels (224, 224, 3). You can also omit this option if you would like to infer input_shape from an input_tensor. If you choose to include both input_tensor and input_shape then input_shape will be used if they match, if the shapes do not match then we will throw an error. E.g. `(160, 160, 3)` would be one valid value. alpha: Float between 0 and 1. controls the width of the network. This is known as the width multiplier in the MobileNetV2 paper, but the name is kept for consistency with `applications.MobileNetV1` model in Keras. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. include_top: Boolean, whether to include the fully-connected layer at the top of the network. Defaults to `True`. weights: String, one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. pooling: String, optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional block. - `avg` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: Integer, optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. **kwargs: For backwards compatibility only. Returns: A `keras.Model` instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape or invalid alpha, rows when weights='imagenet' ValueError: if `classifier_activation` is not `softmax` or `None` when using a pretrained top layer. """ global layers if 'layers' in kwargs: layers = kwargs.pop('layers') else: layers = VersionAwareLayers() if kwargs: raise ValueError('Unknown argument(s): %s' % (kwargs,)) if not (weights in {'imagenet', None} or file_io.file_exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' 'as true, `classes` should be 1000') # Determine proper input shape and default size. # If both input_shape and input_tensor are used, they should match if input_shape is not None and input_tensor is not None: try: is_input_t_tensor = backend.is_keras_tensor(input_tensor) except ValueError: try: is_input_t_tensor = backend.is_keras_tensor( layer_utils.get_source_inputs(input_tensor)) except ValueError: raise ValueError('input_tensor: ', input_tensor, 'is not type input_tensor') if is_input_t_tensor: if backend.image_data_format == 'channels_first': if backend.int_shape(input_tensor)[1] != input_shape[1]: raise ValueError('input_shape: ', input_shape, 'and input_tensor: ', input_tensor, 'do not meet the same shape requirements') else: if backend.int_shape(input_tensor)[2] != input_shape[1]: raise ValueError('input_shape: ', input_shape, 'and input_tensor: ', input_tensor, 'do not meet the same shape requirements') else: raise ValueError('input_tensor specified: ', input_tensor, 'is not a keras tensor') # If input_shape is None, infer shape from input_tensor if input_shape is None and input_tensor is not None: try: backend.is_keras_tensor(input_tensor) except ValueError: raise ValueError('input_tensor: ', input_tensor, 'is type: ', type(input_tensor), 'which is not a valid type') if input_shape is None and not backend.is_keras_tensor(input_tensor): default_size = 224 elif input_shape is None and backend.is_keras_tensor(input_tensor): if backend.image_data_format() == 'channels_first': rows = backend.int_shape(input_tensor)[2] cols = backend.int_shape(input_tensor)[3] else: rows = backend.int_shape(input_tensor)[1] cols = backend.int_shape(input_tensor)[2] if rows == cols and rows in [96, 128, 160, 192, 224]: default_size = rows else: default_size = 224 # If input_shape is None and no input_tensor elif input_shape is None: default_size = 224 # If input_shape is not None, assume default size else: if backend.image_data_format() == 'channels_first': rows = input_shape[1] cols = input_shape[2] else: rows = input_shape[0] cols = input_shape[1] if rows == cols and rows in [96, 128, 160, 192, 224]: default_size = rows else: default_size = 224 input_shape = imagenet_utils.obtain_input_shape( input_shape, default_size=default_size, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if backend.image_data_format() == 'channels_last': row_axis, col_axis = (0, 1) else: row_axis, col_axis = (1, 2) rows = input_shape[row_axis] cols = input_shape[col_axis] if weights == 'imagenet': if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]: raise ValueError('If imagenet weights are being loaded, ' 'alpha can be one of `0.35`, `0.50`, `0.75`, ' '`1.0`, `1.3` or `1.4` only.') if rows != cols or rows not in [96, 128, 160, 192, 224]: rows = 224 logging.warning('`input_shape` is undefined or non-square, ' 'or `rows` is not in [96, 128, 160, 192, 224].' ' Weights for input shape (224, 224) will be' ' loaded as the default.') if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 first_block_filters = _make_divisible(32 * alpha, 8) x = layers.ZeroPadding2D( padding=imagenet_utils.correct_pad(img_input, 3), name='Conv1_pad')(img_input) x = layers.Conv2D( first_block_filters, kernel_size=3, strides=(2, 2), padding='valid', use_bias=False, name='Conv1')( x) x = layers.BatchNormalization( axis=channel_axis, epsilon=1e-3, momentum=0.999, name='bn_Conv1')( x) x = layers.ReLU(6., name='Conv1_relu')(x) x = _inverted_res_block( x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0) x = _inverted_res_block( x, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1) x = _inverted_res_block( x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2) x = _inverted_res_block( x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3) x = _inverted_res_block( x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4) x = _inverted_res_block( x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5) x = _inverted_res_block( x, filters=64, alpha=alpha, stride=2, expansion=6, block_id=6) x = _inverted_res_block( x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=7) x = _inverted_res_block( x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=8) x = _inverted_res_block( x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=9) x = _inverted_res_block( x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=10) x = _inverted_res_block( x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=11) x = _inverted_res_block( x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=12) x = _inverted_res_block( x, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13) x = _inverted_res_block( x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14) x = _inverted_res_block( x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15) x = _inverted_res_block( x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16) # no alpha applied to last Conv as stated in the paper: # if the width multiplier is greater than 1 we # increase the number of output channels if alpha > 1.0: last_block_filters = _make_divisible(1280 * alpha, 8) else: last_block_filters = 1280 x = layers.Conv2D( last_block_filters, kernel_size=1, use_bias=False, name='Conv_1')( x) x = layers.BatchNormalization( axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv_1_bn')( x) x = layers.ReLU(6., name='out_relu')(x) if include_top: x = layers.GlobalAveragePooling2D()(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Dense(classes, activation=classifier_activation, name='predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D()(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = training.Model(inputs, x, name='mobilenetv2_%0.2f_%s' % (alpha, rows)) # Load weights. if weights == 'imagenet': if include_top: model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + str(alpha) + '_' + str(rows) + '.h5') weight_path = BASE_WEIGHT_PATH + model_name weights_path = data_utils.get_file( model_name, weight_path, cache_subdir='models') else: model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + str(alpha) + '_' + str(rows) + '_no_top' + '.h5') weight_path = BASE_WEIGHT_PATH + model_name weights_path = data_utils.get_file( model_name, weight_path, cache_subdir='models') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def build_model(self): # Build stem x = self.img_input x = layers.Rescaling(1. / 255.)(x) x = layers.Normalization(axis=self.bn_axis)(x) x = tf.keras.layers.ZeroPadding2D(padding=imagenet_utils.correct_pad( x, 3), name='stem_conv_pad')(x) x = tf.keras.layers.Conv2D(self.round_filters(32), 3, strides=2, padding='valid', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name='stem_conv')(x) x = build_normalization(**self.normalization, name='stem_bn')(x) x = build_activation(**self.activation, name='stem_activation')(x) block_outputs = [x] # Build blocks blocks_args = copy.deepcopy(self.blocks_args) b = 0 blocks = float(sum(args['repeats'] for args in blocks_args)) output_stages = [1, 2, 4, 6] for (i, args) in enumerate(blocks_args): assert args['repeats'] > 0 # Update block input and output filters based on depth multiplier. args['filters_in'] = self.round_filters(args['filters_in']) args['filters_out'] = self.round_filters(args['filters_out']) strides = args["strides"] for j in range(self.round_repeats(args.pop('repeats'))): # The first block needs to take care of stride and filter size increase. if j > 0: args['strides'] = 1 args['filters_in'] = args['filters_out'] x = self.block(x, self.activation, self.drop_connect_rate * b / blocks, name='block{}{}_'.format(i + 1, chr(j + 97)), **args) b += 1 if i in output_stages: block_outputs.append(x) # Build top x = tf.keras.layers.Conv2D(self.round_filters(1280), 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name='top_conv')(x) x = build_normalization(**self.normalization, name='top_bn')(x) x = build_activation(**self.activation, name='top_activation')(x) if -1 in self.output_indices: x = tf.keras.layers.GlobalAveragePooling2D(name='avg_pool')(x) if self.dropout_rate > 0: x = tf.keras.layers.Dropout(self.dropout_rate, name='top_dropout')(x) x = tf.keras.layers.Dense( 1000, activation=self.classifier_activation, kernel_initializer=DENSE_KERNEL_INITIALIZER, name='predictions')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if self.input_tensor is not None: inputs = tf.keras.utils.get_source_inputs(self.input_tensor) else: inputs = self.img_input # Create model. return tf.keras.Model(inputs=self.img_input, outputs=x, name=model_name) return [block_outputs[i - 1] for i in self.output_indices]
def EfficientNet(width_coefficient, depth_coefficient, default_size, dropout_rate=0.2, drop_connect_rate=0.2, depth_divisor=8, activation='swish', blocks_args='default', model_name='efficientnet', include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000, classifier_activation='softmax'): if blocks_args == 'default': blocks_args = DEFAULT_BLOCKS_ARGS if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 def round_filters(filters, divisor=depth_divisor): """Round number of filters based on depth multiplier.""" filters *= width_coefficient new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_filters < 0.9 * filters: new_filters += divisor return int(new_filters) def round_repeats(repeats): """Round number of repeats based on depth multiplier.""" return int(math.ceil(depth_coefficient * repeats)) # Build stem x = img_input x = layers.Rescaling(1. / 255.)(x) x = layers.Normalization(axis=bn_axis)(x) x = layers.ZeroPadding2D(padding=imagenet_utils.correct_pad(x, 3), name='stem_conv_pad')(x) x = layers.Conv2D(round_filters(32), 3, strides=2, padding='valid', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name='stem_conv')(x) x = layers.BatchNormalization(axis=bn_axis, name='stem_bn')(x) x = layers.Activation(activation, name='stem_activation')(x) # Build blocks blocks_args = copy.deepcopy(blocks_args) b = 0 blocks = float(sum(round_repeats(args['repeats']) for args in blocks_args)) for (i, args) in enumerate(blocks_args): assert args['repeats'] > 0 # Update block input and output filters based on depth multiplier. args['filters_in'] = round_filters(args['filters_in']) args['filters_out'] = round_filters(args['filters_out']) for j in range(round_repeats(args.pop('repeats'))): # The first block needs to take care of stride and filter size increase. if j > 0: args['strides'] = 1 args['filters_in'] = args['filters_out'] x = block(x, activation, drop_connect_rate * b / blocks, name='block{}{}_'.format(i + 1, chr(j + 97)), **args) b += 1 # Build top x = layers.Conv2D(round_filters(1280), 1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name='top_conv')(x) x = layers.BatchNormalization(axis=bn_axis, name='top_bn')(x) x = layers.Activation(activation, name='top_activation')(x) if include_top: x = layers.GlobalAveragePooling2D(name='avg_pool')(x) if dropout_rate > 0: x = layers.Dropout(dropout_rate, name='top_dropout')(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Dense(classes, activation=classifier_activation, kernel_initializer=DENSE_KERNEL_INITIALIZER, name='predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D(name='avg_pool')(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D(name='max_pool')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = training.Model(inputs, x, name=model_name) return model
def MobileNetV2(input_shape=None, alpha=1.0): if input_shape is None: default_size = 224 else: if backend.image_data_format() == 'channels_first': rows = input_shape[1] cols = input_shape[2] else: rows = input_shape[0] cols = input_shape[1] if rows == cols and rows in [96, 128, 160, 192, 224]: default_size = rows else: default_size = 224 input_shape = imagenet_utils.obtain_input_shape( input_shape, default_size=default_size, min_size=32, data_format=backend.image_data_format(), require_flatten=True) row_axis = 0 if backend.image_data_format() == 'channels_last' else 1 rows = input_shape[row_axis] img_input = layers.Input(shape=input_shape) channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 first_block_filters = _make_divisible(32 * alpha, 8) x = layers.ZeroPadding2D(padding=imagenet_utils.correct_pad(img_input, 3), name='Conv1_pad')(img_input) x = layers.Conv2D(first_block_filters, kernel_size=3, strides=(2, 2), padding='valid', use_bias=False, name='Conv1')(x) x = layers.BatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name='bn_Conv1')(x) x = layers.ReLU(6., name='Conv1_relu')(x) x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0) x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1) x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=2, expansion=6, block_id=6) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=7) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=8) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=9) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=10) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=11) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=12) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15) x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16) # no alpha applied to last conv as stated in the paper: # if the width multiplier is greater than 1 we # increase the number of output channels if alpha > 1.0: last_block_filters = _make_divisible(1280 * alpha, 8) else: last_block_filters = 1280 x = layers.Conv2D(last_block_filters, kernel_size=1, use_bias=False, name='Conv_1')(x) x = layers.BatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv_1_bn')(x) x = layers.ReLU(6., name='out_relu')(x) x = layers.GlobalAveragePooling2D()(x) imagenet_utils.validate_activation('softmax', None) x = layers.Dense(NUM_CLASSES, activation='softmax', name='predictions')(x) # Create model. model = training.Model(img_input, x, name='mobilenetv2_%0.2f_%s' % (alpha, rows)) return model
def mbconvblock(inputs, activations='swish', droprate=0., name='', filters_in=32, filters_out=16, kernel_size=3, strides=1, expand_ratio=1, se_ratio=0., id_skip=True, attn=True): bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 #expansion phase filters = filters_in * expand_ratio if expand_ratio != 1: x = layers.Conv2D(filters, kernel_size=1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER)(inputs) x = layers.BatchNormalization(axis=bn_axis)(x) x = layers.Activation(activations)(x) #swish activation else: x = inputs #depthwise conv if strides == 2: x = layers.ZeroPadding2D( padding=imagenet_utils.correct_pad(x, kernel_size))(x) conv_pad = 'valid' else: conv_pad = 'same' x = layers.DepthwiseConv2D( kernel_size=kernel_size, strides=strides, padding=conv_pad, use_bias=False, depthwise_initializer=CONV_KERNEL_INITIALIZER)(x) x = layers.BatchNormalization(axis=bn_axis)(x) x = layers.Activation(activations)(x) #squeeze and excitation if 0 < se_ratio <= 1: filters_se = max(1, int(filters_in * se_ratio)) se = layers.GlobalAveragePooling2D()(x) se = layers.Reshape((1, 1, filters))(se) se = layers.Conv2D(filters_se, kernel_size=1, padding='same', activation=activations, kernel_initializer=CONV_KERNEL_INITIALIZER)( se) #reduce se = layers.Conv2D(filters, kernel_size=1, padding='same', activation='sigmoid', kernel_initializer=CONV_KERNEL_INITIALIZER)( se) #expand x = layers.multiply([x, se]) if attn: avgp = layers.Lambda(lambda x: K.mean(x, axis=3, keepdims=True))(x) maxp = layers.Lambda(lambda x: K.max(x, axis=3, keepdims=True))(x) conc = layers.Concatenate(axis=3)([avgp, maxp]) attn_mask = layers.Conv2D( filters=1, kernel_size=1, activation='sigmoid', padding='same', name=name + '_Attn', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER)(conc) #Output phase x = layers.Conv2D(filters_out, kernel_size=1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER)(x) x = layers.BatchNormalization(axis=bn_axis)(x) if id_skip and strides == 1 and filters_in == filters_out: if droprate > 0: x = layers.Dropout(droprate, noise_shape=(None, 1, 1, 1))(x) x = layers.add([x, inputs]) if attn: x = layers.multiply([x, attn_mask]) return x
def efficientnetB0(width_coeff=1, depth_coeff=1, default_img_size=224, dropoutrate=0.2, drop_connect_rate=0.2, depth_divisor=8, activations='swish', block_args='default', include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000, classifier_activation='softmax'): if block_args == 'default': block_args = DEFAULT_BLOCKS_ARGS if not (weights in {'imagenet', None} or file_io.file_exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top`' ' as true, `classes` should be 1000') # proper input shape check input_shape = imagenet_utils.obtain_input_shape( input_shape, default_size=default_img_size, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 def round_filters(filters, divisor=depth_divisor): """ round number of filters based on depth multiplier""" filters *= width_coeff new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor) #making sure round down does not go down by 10% if new_filters < 0.9 * filters: new_filters += divisor return int(new_filters) def round_repeats(repeats): """Round number of repeats based on depth multiplier.""" return int(math.ceil(depth_coeff * repeats)) #building stem x = img_input x = layers.Rescaling(1. / 255.)(x) x = layers.Normalization(axis=bn_axis)(x) x = layers.ZeroPadding2D(padding=imagenet_utils.correct_pad(x, 3))(x) x = layers.Conv2D(round_filters(32), kernel_size=3, strides=2, padding='valid', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER)(x) x = layers.BatchNormalization(axis=bn_axis)(x) x = layers.Activation(activations)(x) #building blocks block_args = copy.deepcopy(block_args) b = 0 blocks = float(sum(round_repeats(args['repeats']) for args in block_args)) for (i, args) in enumerate(block_args): assert args['repeats'] > 0 args['filters_in'] = round_filters(args['filters_in']) args['filters_out'] = round_filters(args['filters_out']) for j in range(round_repeats(args.pop('repeats'))): # The first block needs to take care of stride and filter size increase. if j > 0: args['strides'] = 1 args['filters_in'] = args['filters_out'] x = mbconvblock(inputs=x, activations=activations, droprate=drop_connect_rate * b / blocks, name=str(i), **args) b += 1 #build top x = layers.Conv2D(round_filters(1280), kernel_size=1, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER)(x) x = layers.BatchNormalization(axis=bn_axis)(x) x = layers.Activation(activations)(x) if include_top: x = layers.GlobalAveragePooling2D()(x) if dropoutrate > 0: x = layers.Dropout(dropoutrate)(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Dense(classes, activation=classifier_activation, kernel_initializer=DENSE_KERNEL_INITIALIZER)(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D()(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input #creating model model = training.Model(inputs, x) return model
def mbconv_block(inputs, global_params, block_args, normalization, drop_connect_rate=None, trainable=True, l2_regularizer=tf.keras.regularizers.l2(l=4e-5), name=""): expand_ratio = block_args.expand_ratio data_format = global_params.data_format _momentum = global_params.batch_norm_momentum _epsilon = global_params.batch_norm_epsilon _axis = normalization["axis"] _mean_axis = [1, 2] if _axis == -1 or _axis == 3 else [2, 3] filters = block_args.in_filters * expand_ratio expand_ratio = expand_ratio if expand_ratio != 1: x = tf.keras.layers.Conv2D( filters=filters, kernel_size=(1, 1), strides=(1, 1), padding="same", data_format=data_format, use_bias=False, name=name + "/conv2d", trainable=trainable, kernel_initializer=conv2d_kernel_initializer)(inputs) x = build_normalization(**normalization, name=name + "/batch_normalization")(x) x = tf.keras.layers.Activation("swish", name=name + "/swish")(x) else: x = inputs # Depthwise Convolution if block_args.strides == 2: x = tf.keras.layers.ZeroPadding2D(padding=imagenet_utils.correct_pad( x, block_args.kernel_size), name=name + '/dwconv_pad')(x) conv_pad = 'valid' else: conv_pad = 'same' x = tf.keras.layers.DepthwiseConv2D( kernel_size=block_args.kernel_size, strides=block_args.strides, padding=conv_pad, data_format=data_format, use_bias=False, trainable=trainable, depthwise_initializer=conv2d_kernel_initializer, name=name + "/depthwise_conv2d")(x) x = build_normalization(**normalization, name=name + "/batch_normalization" if expand_ratio == 1 else name + "/batch_normalization_1")(x) x = tf.keras.layers.Activation( "swish", name=name + "/swish" if expand_ratio == 1 else name + "/swish_1")(x) has_se = block_args.se_ratio is not None and 0 < block_args.se_ratio < 1 if has_se: squeeze_filters = max(1, int(block_args.in_filters * block_args.se_ratio)) se = tf.keras.layers.Lambda( lambda inp: tf.reduce_mean(inp, axis=_mean_axis, keepdims=True), name=name + "/se/global_pooling")(x) se = tf.keras.layers.Conv2D( filters=squeeze_filters, kernel_size=(1, 1), strides=(1, 1), padding="same", data_format=data_format, use_bias=True, kernel_initializer=conv2d_kernel_initializer, trainable=trainable, name=name + "/se/conv2d")(se) se = tf.keras.layers.Activation("swish", name=name + "/se/swish_1")(se) se = tf.keras.layers.Conv2D( filters=filters, kernel_size=(1, 1), strides=(1, 1), padding="same", data_format=data_format, use_bias=True, trainable=trainable, kernel_initializer=conv2d_kernel_initializer, name=name + "/se/conv2d_1")(se) se = tf.keras.layers.Activation("sigmoid", name=name + "/se/sigmoid")(se) x = tf.keras.layers.Multiply(name=name + "/se/multiply")([se, x]) x = tf.keras.layers.Conv2D( block_args.out_filters, kernel_size=(1, 1), strides=(1, 1), padding="same", data_format=data_format, use_bias=False, trainable=trainable, kernel_initializer=conv2d_kernel_initializer, name=name + "/conv2d" if expand_ratio == 1 else name + "/conv2d_1")(x) x = build_normalization(**normalization, name=name + "/batch_normalization_2" if expand_ratio > 1 else name + "/batch_normalization_1")(x) if block_args.id_skip: if all(s == 1 for s in block_args.strides ) and block_args.in_filters == block_args.out_filters: # x = DropConnect(drop_connect_rate, name=name + "/drop_connect")(x) # x = tf.keras.layers.Dropout(drop_connect_rate, noise_shape=(None, 1, 1, 1), name=name + '/drop')(x) x = tf.keras.layers.Add(name=name + "/add")([x, inputs]) return x
def _reduction_a_cell(ip, p, filters, block_id=None): channel_dim = -1 with backend.name_scope('reduction_A_block_%s' % block_id): p = _adjust_block(p, ip, filters, block_id) h = layers.Activation('relu')(ip) h = layers.Conv2D(filters, (1, 1), strides=(1, 1), padding='same', name='reduction_conv_1_%s' % block_id, use_bias=False, kernel_initializer='he_normal')(h) h = layers.BatchNormalization(axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='reduction_bn_1_%s' % block_id)(h) h3 = layers.ZeroPadding2D(padding=imagenet_utils.correct_pad(h, 3), name='reduction_pad_1_%s' % block_id)(h) with backend.name_scope('block_1'): x1_1 = _separable_conv_block(h, filters, (5, 5), strides=(2, 2), block_id='reduction_left1_%s' % block_id) x1_2 = _separable_conv_block(p, filters, (7, 7), strides=(2, 2), block_id='reduction_right1_%s' % block_id) x1 = layers.add([x1_1, x1_2], name='reduction_add_1_%s' % block_id) with backend.name_scope('block_2'): x2_1 = layers.MaxPooling2D( (3, 3), strides=(2, 2), padding='valid', name='reduction_left2_%s' % block_id)(h3) x2_2 = _separable_conv_block(p, filters, (7, 7), strides=(2, 2), block_id='reduction_right2_%s' % block_id) x2 = layers.add([x2_1, x2_2], name='reduction_add_2_%s' % block_id) with backend.name_scope('block_3'): x3_1 = layers.AveragePooling2D( (3, 3), strides=(2, 2), padding='valid', name='reduction_left3_%s' % block_id)(h3) x3_2 = _separable_conv_block(p, filters, (5, 5), strides=(2, 2), block_id='reduction_right3_%s' % block_id) x3 = layers.add([x3_1, x3_2], name='reduction_add3_%s' % block_id) with backend.name_scope('block_4'): x4 = layers.AveragePooling2D( (3, 3), strides=(1, 1), padding='same', name='reduction_left4_%s' % block_id)(x1) x4 = layers.add([x2, x4]) with backend.name_scope('block_5'): x5_1 = _separable_conv_block(x1, filters, (3, 3), block_id='reduction_left4_%s' % block_id) x5_2 = layers.MaxPooling2D( (3, 3), strides=(2, 2), padding='valid', name='reduction_right5_%s' % block_id)(h3) x5 = layers.add([x5_1, x5_2], name='reduction_add4_%s' % block_id) x = layers.concatenate([x2, x3, x4, x5], axis=channel_dim, name='reduction_concat_%s' % block_id) return x, ip