def get_swish(**kwargs): backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) def swish(x): """Swish activation function: x * sigmoid(x). Reference: [Searching for Activation Functions](https://arxiv.org/abs/1710.05941) """ if backend.backend() == 'tensorflow': try: # The native TF implementation has a more # memory-efficient gradient implementation return backend.tf.nn.swish(x) except AttributeError: pass return x * backend.sigmoid(x) return swish
def get_dropout(**kwargs): """Wrapper over custom dropout. Fix problem of ``None`` shape for tf.keras. It is not possible to define FixedDropout class as global object, because we do not have modules for inheritance at first time. Issue: https://github.com/tensorflow/tensorflow/issues/30946 """ backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) class FixedDropout(layers.Dropout): def _get_noise_shape(self, inputs): if self.noise_shape is None: return self.noise_shape symbolic_shape = backend.shape(inputs) noise_shape = [symbolic_shape[axis] if shape is None else shape for axis, shape in enumerate(self.noise_shape)] return tuple(noise_shape) return FixedDropout
def EfficientNet(width_coefficient, depth_coefficient, default_resolution, dropout_rate=0.2, drop_connect_rate=0.2, depth_divisor=8, blocks_args=DEFAULT_BLOCKS_ARGS, model_name='efficientnet', include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000, freeze_bn=False, **kwargs): """Instantiates the EfficientNet architecture using given scaling coefficients. Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in your Keras config at `~/.keras/keras.json`. # Arguments width_coefficient: float, scaling coefficient for network width. depth_coefficient: float, scaling coefficient for network depth. default_resolution: int, default input image size. dropout_rate: float, dropout rate before final classifier layer. drop_connect_rate: float, dropout rate at skip connections. depth_divisor: int. blocks_args: A list of BlockArgs to construct block modules. model_name: string, model name. include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False. It should have exactly 3 inputs channels. pooling: optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. """ global backend, layers, models, keras_utils backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) features = [] if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=default_resolution, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if backend.backend() == 'tensorflow': from tensorflow.python.keras.backend import is_keras_tensor else: is_keras_tensor = backend.is_keras_tensor if not is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 activation = get_swish(**kwargs) # Build stem x = img_input x = layers.Conv2D(round_filters(32, width_coefficient, depth_divisor), 3, strides=(2, 2), padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER, name='stem_conv')(x) # x = BatchNormalization(freeze=freeze_bn, axis=bn_axis, name='stem_bn')(x) x = layers.BatchNormalization(axis=bn_axis, name='stem_bn')(x) x = layers.Activation(activation, name='stem_activation')(x) # Build blocks num_blocks_total = sum(block_args.num_repeat for block_args in blocks_args) block_num = 0 for idx, block_args in enumerate(blocks_args): assert block_args.num_repeat > 0 # Update block input and output filters based on depth multiplier. block_args = block_args._replace( input_filters=round_filters(block_args.input_filters, width_coefficient, depth_divisor), output_filters=round_filters(block_args.output_filters, width_coefficient, depth_divisor), num_repeat=round_repeats(block_args.num_repeat, depth_coefficient)) # The first block needs to take care of stride and filter size increase. drop_rate = drop_connect_rate * float(block_num) / num_blocks_total x = mb_conv_block(x, block_args, activation=activation, drop_rate=drop_rate, prefix='block{}a_'.format(idx + 1), freeze_bn=freeze_bn) block_num += 1 if block_args.num_repeat > 1: # pylint: disable=protected-access block_args = block_args._replace( input_filters=block_args.output_filters, strides=[1, 1]) # pylint: enable=protected-access for bidx in xrange(block_args.num_repeat - 1): drop_rate = drop_connect_rate * float( block_num) / num_blocks_total block_prefix = 'block{}{}_'.format( idx + 1, string.ascii_lowercase[bidx + 1]) x = mb_conv_block(x, block_args, activation=activation, drop_rate=drop_rate, prefix=block_prefix, freeze_bn=freeze_bn) block_num += 1 if idx < len(blocks_args) - 1 and blocks_args[idx + 1].strides[0] == 2: features.append(x) elif idx == len(blocks_args) - 1: features.append(x) return features