def MobileNet(input_shape=None, alpha=1.0, depth_multiplier=1, dropout=1e-3, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000, classifier_activation='softmax', **kwargs): global layers if 'layers' in kwargs: layers = kwargs.pop('layers') else: layers = VersionAwareLayers() if kwargs: raise ValueError('Unknown argument(s): %s' % (kwargs, )) if not (weights in {'imagenet', None} or file_io.file_exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top` ' 'as true, `classes` should be 1000') # Determine proper input shape and default size. if input_shape is None: default_size = 224 else: if backend.image_data_format() == 'channels_first': rows = input_shape[1] cols = input_shape[2] else: rows = input_shape[0] cols = input_shape[1] if rows == cols and rows in [128, 160, 192, 224]: default_size = rows else: default_size = 224 input_shape = imagenet_utils.obtain_input_shape( input_shape, default_size=default_size, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if backend.image_data_format() == 'channels_last': row_axis, col_axis = (0, 1) else: row_axis, col_axis = (1, 2) rows = input_shape[row_axis] cols = input_shape[col_axis] if weights == 'imagenet': if depth_multiplier != 1: raise ValueError('If imagenet weights are being loaded, ' 'depth multiplier must be 1') if alpha not in [0.25, 0.50, 0.75, 1.0]: raise ValueError('If imagenet weights are being loaded, ' 'alpha can be one of' '`0.25`, `0.50`, `0.75` or `1.0` only.') if rows != cols or rows not in [128, 160, 192, 224]: rows = 224 logging.warning('`input_shape` is undefined or non-square, ' 'or `rows` is not in [128, 160, 192, 224]. ' 'Weights for input shape (224, 224) will be' ' loaded as the default.') if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor x = _conv_block(img_input, 32, strides=(2, 2), block_id=0) x = _conv_block(x, 64, block_id=1) x = _conv_block(x, 128, strides=(2, 2), block_id=2) x = _conv_block(x, 128, block_id=3) x = _conv_block(x, 256, strides=(2, 2), block_id=4) x = _conv_block(x, 256, block_id=5) x = _conv_block(x, 512, strides=(2, 2), block_id=6) x = _conv_block(x, 512, block_id=7) x = _conv_block(x, 512, block_id=8) x = _conv_block(x, 512, block_id=9) x = _conv_block(x, 512, block_id=10) x = _conv_block(x, 512, block_id=11) x = _conv_block(x, 1024, strides=(2, 2), block_id=12) x = _conv_block(x, 1024, block_id=13) if include_top: if backend.image_data_format() == 'channels_first': shape = (int(1024 * alpha), 1, 1) else: shape = (1, 1, int(1024 * alpha)) x = layers.GlobalAveragePooling2D()(x) x = layers.Reshape(shape, name='reshape_1')(x) x = layers.Dropout(dropout, name='dropout')(x) x = layers.Conv2D(classes, (1, 1), padding='same', name='conv_preds')(x) x = layers.Reshape((classes, ), name='reshape_2')(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Activation(activation=classifier_activation, name='predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D()(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = training.Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows)) # Load weights. if weights == 'imagenet': if alpha == 1.0: alpha_text = '1_0' elif alpha == 0.75: alpha_text = '7_5' elif alpha == 0.50: alpha_text = '5_0' else: alpha_text = '2_5' if include_top: model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows) weight_path = BASE_WEIGHT_PATH + model_name weights_path = data_utils.get_file(model_name, weight_path, cache_subdir='models') else: model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows) weight_path = BASE_WEIGHT_PATH + model_name weights_path = data_utils.get_file(model_name, weight_path, cache_subdir='models') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def ResNet(stack_fn, preact, use_bias, model_name='resnet', include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000, classifier_activation='softmax', **kwargs): """Instantiates the ResNet, ResNetV2, and ResNeXt architecture. Reference: - [Deep Residual Learning for Image Recognition]( https://arxiv.org/abs/1512.03385) (CVPR 2015) Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in your Keras config at `~/.keras/keras.json`. Caution: Be sure to properly pre-process your inputs to the application. Please see `applications.resnet.preprocess_input` for an example. Arguments: stack_fn: a function that returns output tensor for the stacked residual blocks. preact: whether to use pre-activation or not (True for ResNetV2, False for ResNet and ResNeXt). use_bias: whether to use biases for convolutional layers or not (True for ResNet and ResNetV2, False for ResNeXt). model_name: string, model name. include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 224)` (with `channels_first` data format). It should have exactly 3 inputs channels. pooling: optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. **kwargs: For backwards compatibility only. Returns: A `keras.Model` instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape. ValueError: if `classifier_activation` is not `softmax` or `None` when using a pretrained top layer. """ global layers if 'layers' in kwargs: layers = kwargs.pop('layers') else: layers = VersionAwareLayers() if kwargs: raise ValueError('Unknown argument(s): %s' % (kwargs, )) if not (weights in {'imagenet', None} or file_io.file_exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = imagenet_utils.obtain_input_shape( input_shape, default_size=224, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)), name='conv1_pad')(img_input) x = layers.Conv2D(64, 7, strides=2, use_bias=use_bias, name='conv1_conv')(x) if not preact: x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name='conv1_bn')(x) x = layers.Activation('relu', name='conv1_relu')(x) x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name='pool1_pad')(x) x = layers.MaxPooling2D(3, strides=2, name='pool1_pool')(x) x = stack_fn(x) if preact: x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name='post_bn')(x) x = layers.Activation('relu', name='post_relu')(x) if include_top: x = layers.GlobalAveragePooling2D(name='avg_pool')(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Dense(classes, activation=classifier_activation, name='predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D(name='avg_pool')(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D(name='max_pool')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = training.Model(inputs, x, name=model_name) # Load weights. if (weights == 'imagenet') and (model_name in WEIGHTS_HASHES): if include_top: file_name = model_name + '_weights_tf_dim_ordering_tf_kernels.h5' file_hash = WEIGHTS_HASHES[model_name][0] else: file_name = model_name + '_weights_tf_dim_ordering_tf_kernels_notop.h5' file_hash = WEIGHTS_HASHES[model_name][1] weights_path = data_utils.get_file(file_name, BASE_WEIGHTS_PATH + file_name, cache_subdir='models', file_hash=file_hash) model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def MobileNetV2XNNPACK(input_shape=None, alpha=1.0, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000, classifier_activation='softmax', **kwargs): """Instantiates the MobileNetV2 architecture. Reference: - [MobileNetV2: Inverted Residuals and Linear Bottlenecks]( https://arxiv.org/abs/1801.04381) (CVPR 2018) Optionally loads weights pre-trained on ImageNet. Caution: Be sure to properly pre-process your inputs to the application. Please see `applications.mobilenet_v2.preprocess_input` for an example. Arguments: input_shape: Optional shape tuple, to be specified if you would like to use a model with an input image resolution that is not (224, 224, 3). It should have exactly 3 inputs channels (224, 224, 3). You can also omit this option if you would like to infer input_shape from an input_tensor. If you choose to include both input_tensor and input_shape then input_shape will be used if they match, if the shapes do not match then we will throw an error. E.g. `(160, 160, 3)` would be one valid value. alpha: Float between 0 and 1. controls the width of the network. This is known as the width multiplier in the MobileNetV2 paper, but the name is kept for consistency with `applications.MobileNetV1` model in Keras. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. include_top: Boolean, whether to include the fully-connected layer at the top of the network. Defaults to `True`. weights: String, one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. pooling: String, optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional block. - `avg` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: Integer, optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. **kwargs: For backwards compatibility only. Returns: A `keras.Model` instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape or invalid alpha, rows when weights='imagenet' ValueError: if `classifier_activation` is not `softmax` or `None` when using a pretrained top layer. """ global layers if 'layers' in kwargs: layers = kwargs.pop('layers') else: layers = VersionAwareLayers() if kwargs: raise ValueError('Unknown argument(s): %s' % (kwargs, )) if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top` ' 'as true, `classes` should be 1000') # Determine proper input shape and default size. # If both input_shape and input_tensor are used, they should match if input_shape is not None and input_tensor is not None: try: is_input_t_tensor = backend.is_keras_tensor(input_tensor) except ValueError: try: is_input_t_tensor = backend.is_keras_tensor( layer_utils.get_source_inputs(input_tensor)) except ValueError: raise ValueError('input_tensor: ', input_tensor, 'is not type input_tensor') if is_input_t_tensor: if backend.image_data_format == 'channels_first': if backend.int_shape(input_tensor)[1] != input_shape[1]: raise ValueError( 'input_shape: ', input_shape, 'and input_tensor: ', input_tensor, 'do not meet the same shape requirements') else: if backend.int_shape(input_tensor)[2] != input_shape[1]: raise ValueError( 'input_shape: ', input_shape, 'and input_tensor: ', input_tensor, 'do not meet the same shape requirements') else: raise ValueError('input_tensor specified: ', input_tensor, 'is not a keras tensor') # If input_shape is None, infer shape from input_tensor if input_shape is None and input_tensor is not None: try: backend.is_keras_tensor(input_tensor) except ValueError: raise ValueError('input_tensor: ', input_tensor, 'is type: ', type(input_tensor), 'which is not a valid type') if input_shape is None and not backend.is_keras_tensor(input_tensor): default_size = 224 elif input_shape is None and backend.is_keras_tensor(input_tensor): if backend.image_data_format() == 'channels_first': rows = backend.int_shape(input_tensor)[2] cols = backend.int_shape(input_tensor)[3] else: rows = backend.int_shape(input_tensor)[1] cols = backend.int_shape(input_tensor)[2] if rows == cols and rows in [96, 128, 160, 192, 224]: default_size = rows else: default_size = 224 # If input_shape is None and no input_tensor elif input_shape is None: default_size = 224 # If input_shape is not None, assume default size else: if backend.image_data_format() == 'channels_first': rows = input_shape[1] cols = input_shape[2] else: rows = input_shape[0] cols = input_shape[1] if rows == cols and rows in [96, 128, 160, 192, 224]: default_size = rows else: default_size = 224 input_shape = imagenet_utils.obtain_input_shape( input_shape, default_size=default_size, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if backend.image_data_format() == 'channels_last': row_axis, col_axis = (0, 1) else: row_axis, col_axis = (1, 2) rows = input_shape[row_axis] cols = input_shape[col_axis] if weights == 'imagenet': if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]: raise ValueError('If imagenet weights are being loaded, ' 'alpha can be one of `0.35`, `0.50`, `0.75`, ' '`1.0`, `1.3` or `1.4` only.') if rows != cols or rows not in [96, 128, 160, 192, 224]: rows = 224 logging.warning('`input_shape` is undefined or non-square, ' 'or `rows` is not in [96, 128, 160, 192, 224].' ' Weights for input shape (224, 224) will be' ' loaded as the default.') if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 first_block_filters = _make_divisible(32 * alpha, 8) # x = layers.ZeroPadding2D( # padding=imagenet_utils.correct_pad(img_input, 3), # name='Conv1_pad')(img_input) x = img_input x = layers.Conv2D(first_block_filters, kernel_size=3, strides=(2, 2), padding='same', use_bias=False, name='Conv1')(x) x = layers.BatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name='bn_Conv1')(x) x = layers.ReLU(6., name='Conv1_relu')(x) x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0) x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1) x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=2, expansion=6, block_id=6) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=7) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=8) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=9) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=10) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=11) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=12) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15) x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16) # no alpha applied to last conv as stated in the paper: # if the width multiplier is greater than 1 we # increase the number of output channels if alpha > 1.0: last_block_filters = _make_divisible(1280 * alpha, 8) else: last_block_filters = 1280 x = layers.Conv2D(last_block_filters, kernel_size=1, use_bias=False, name='Conv_1')(x) x = layers.BatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv_1_bn')(x) x = layers.ReLU(6., name='out_relu')(x) if include_top: # Use XNNPACK compatible average pooling # x = layers.GlobalAveragePooling2D()(x) x = layers.AveragePooling2D(pool_size=(7, 7))(x) imagenet_utils.validate_activation(classifier_activation, weights) # x = layers.Dense(classes, activation=classifier_activation, # name='predictions')(x) # Implement the top dense layer as a convolution, so that we don't need to remove spatial dims x = layers.Conv2D(classes, kernel_size=1, name='predictions')(x) x = layers.Softmax()(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D()(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = training.Model(inputs, x, name='mobilenetv2_%0.2f_%s' % (alpha, rows)) # Load weights. if weights == 'imagenet': if include_top: model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + str(alpha) + '_' + str(rows) + '.h5') weight_path = BASE_WEIGHT_PATH + model_name weights_path = data_utils.get_file(model_name, weight_path, cache_subdir='models') else: model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + str(alpha) + '_' + str(rows) + '_no_top' + '.h5') weight_path = BASE_WEIGHT_PATH + model_name weights_path = data_utils.get_file(model_name, weight_path, cache_subdir='models') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def MobileNet(input_shape=None, alpha=1.0, depth_multiplier=1, dropout=1e-3, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000, classifier_activation='softmax', **kwargs): """Instantiates the MobileNet architecture. Reference: - [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications]( https://arxiv.org/abs/1704.04861) Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in the `tf.keras.backend.image_data_format()`. Note: each Keras Application expects a specific kind of input preprocessing. For MobileNet, call `tf.keras.applications.mobilenet.preprocess_input` on your inputs before passing them to the model. Arguments: input_shape: Optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or (3, 224, 224) (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(200, 200, 3)` would be one valid value. Default to `None`. `input_shape` will be ignored if the `input_tensor` is provided. alpha: Controls the width of the network. This is known as the width multiplier in the MobileNet paper. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. Default to 1.0. depth_multiplier: Depth multiplier for depthwise convolution. This is called the resolution multiplier in the MobileNet paper. Default to 1.0. dropout: Dropout rate. Default to 0.001. include_top: Boolean, whether to include the fully-connected layer at the top of the network. Default to `True`. weights: One of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. Default to `imagenet`. input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. `input_tensor` is useful for sharing inputs between multiple different networks. Default to None. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` (default) means that the output of the model will be the 4D tensor output of the last convolutional block. - `avg` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: Optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. Defaults to 1000. classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. **kwargs: For backwards compatibility only. Returns: A `keras.Model` instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape. ValueError: if `classifier_activation` is not `softmax` or `None` when using a pretrained top layer. """ global layers if 'layers' in kwargs: layers = kwargs.pop('layers') else: layers = VersionAwareLayers() if kwargs: raise ValueError('Unknown argument(s): %s' % (kwargs, )) if not (weights in {'imagenet', None} or file_io.file_exists_v2(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top` ' 'as true, `classes` should be 1000') # Determine proper input shape and default size. if input_shape is None: default_size = 224 else: if backend.image_data_format() == 'channels_first': rows = input_shape[1] cols = input_shape[2] else: rows = input_shape[0] cols = input_shape[1] if rows == cols and rows in [128, 160, 192, 224]: default_size = rows else: default_size = 224 input_shape = imagenet_utils.obtain_input_shape( input_shape, default_size=default_size, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if backend.image_data_format() == 'channels_last': row_axis, col_axis = (0, 1) else: row_axis, col_axis = (1, 2) rows = input_shape[row_axis] cols = input_shape[col_axis] if weights == 'imagenet': if depth_multiplier != 1: raise ValueError('If imagenet weights are being loaded, ' 'depth multiplier must be 1') if alpha not in [0.25, 0.50, 0.75, 1.0]: raise ValueError('If imagenet weights are being loaded, ' 'alpha can be one of' '`0.25`, `0.50`, `0.75` or `1.0` only.') if rows != cols or rows not in [128, 160, 192, 224]: rows = 224 logging.warning('`input_shape` is undefined or non-square, ' 'or `rows` is not in [128, 160, 192, 224]. ' 'Weights for input shape (224, 224) will be' ' loaded as the default.') if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor x = _conv_block(img_input, 32, alpha, strides=(2, 2)) x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1) x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, strides=(2, 2), block_id=2) x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3) x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, strides=(2, 2), block_id=4) x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, strides=(2, 2), block_id=6) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11) x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, strides=(2, 2), block_id=12) x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13) if include_top: if backend.image_data_format() == 'channels_first': shape = (int(1024 * alpha), 1, 1) else: shape = (1, 1, int(1024 * alpha)) x = layers.GlobalAveragePooling2D()(x) x = layers.Reshape(shape, name='reshape_1')(x) x = layers.Dropout(dropout, name='dropout')(x) x = layers.Conv2D(classes, (1, 1), padding='same', name='conv_preds')(x) x = layers.Reshape((classes, ), name='reshape_2')(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Activation(activation=classifier_activation, name='predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D()(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = training.Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows)) # Load weights. if weights == 'imagenet': if alpha == 1.0: alpha_text = '1_0' elif alpha == 0.75: alpha_text = '7_5' elif alpha == 0.50: alpha_text = '5_0' else: alpha_text = '2_5' if include_top: model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows) weight_path = BASE_WEIGHT_PATH + model_name weights_path = data_utils.get_file(model_name, weight_path, cache_subdir='models') else: model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows) weight_path = BASE_WEIGHT_PATH + model_name weights_path = data_utils.get_file(model_name, weight_path, cache_subdir='models') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def MobileNet(input_shape=None, alpha=1.0, depth_multiplier=1, dropout=1e-3, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000, classifier_activation='softmax', **kwargs): global layers if 'layers' in kwargs: layers = kwargs.pop('layers') else: layers = VersionAwareLayers() if kwargs: raise ValueError('Unknown argument(s): %s' % (kwargs, )) if not (weights in {'imagenet', None} or file_io.file_exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top` ' 'as true, `classes` should be 1000') # Determine proper input shape and default size. if input_shape is None: default_size = 224 else: if backend.image_data_format() == 'channels_first': rows = input_shape[1] cols = input_shape[2] else: rows = input_shape[0] cols = input_shape[1] if rows == cols and rows in [128, 160, 192, 224]: default_size = rows else: default_size = 224 input_shape = imagenet_utils.obtain_input_shape( input_shape, default_size=default_size, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if backend.image_data_format() == 'channels_last': row_axis, col_axis = (0, 1) else: row_axis, col_axis = (1, 2) rows = input_shape[row_axis] cols = input_shape[col_axis] if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor x = _conv_block(img_input, 32, alpha, strides=(8, 8)) #x = _depthwise_conv_block( # x, 128, alpha, depth_multiplier, strides=(2,2), block_id=2) #x = _depthwise_conv_block( # x, 1024, alpha, depth_multiplier, strides=(2,2)) x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13) if include_top: if backend.image_data_format() == 'channels_first': shape = (int(1024 * alpha), 1, 1) else: shape = (1, 1, int(1024 * alpha)) x = layers.GlobalAveragePooling2D()(x) x = layers.Reshape(shape, name='reshape_1')(x) x = layers.Conv2D(classes, (1, 1), padding='same', name='conv_preds')(x) x = layers.Reshape((classes, ), name='reshape_2')(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Activation(activation=classifier_activation, name='predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D()(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = training.Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows)) return model
def InceptionResNetV2(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000, classifier_activation='softmax', **kwargs): """Instantiates the Inception-ResNet v2 architecture. Reference: - [Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning](https://arxiv.org/abs/1602.07261) (AAAI 2017) Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in your Keras config at `~/.keras/keras.json`. Caution: Be sure to properly pre-process your inputs to the application. Please see `applications.inception_resnet_v2.preprocess_input` for an example. Arguments: include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is `False` (otherwise the input shape has to be `(299, 299, 3)` (with `'channels_last'` data format) or `(3, 299, 299)` (with `'channels_first'` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 75. E.g. `(150, 150, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional block. - `'avg'` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - `'max'` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is `True`, and if no `weights` argument is specified. classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. **kwargs: For backwards compatibility only. Returns: A `keras.Model` instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape. ValueError: if `classifier_activation` is not `softmax` or `None` when using a pretrained top layer. """ global layers if 'layers' in kwargs: layers = kwargs.pop('layers') else: layers = VersionAwareLayers() if kwargs: raise ValueError('Unknown argument(s): %s' % (kwargs, )) if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = imagenet_utils.obtain_input_shape( input_shape, default_size=299, min_size=75, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor # Stem block: 35 x 35 x 192 x = conv2d_bn(img_input, 32, 3, strides=2, padding='valid') x = conv2d_bn(x, 32, 3, padding='valid') x = conv2d_bn(x, 64, 3) x = layers.MaxPooling2D(3, strides=2)(x) x = conv2d_bn(x, 80, 1, padding='valid') x = conv2d_bn(x, 192, 3, padding='valid') x = layers.MaxPooling2D(3, strides=2)(x) # Mixed 5b (Inception-A block): 35 x 35 x 320 branch_0 = conv2d_bn(x, 96, 1) branch_1 = conv2d_bn(x, 48, 1) branch_1 = conv2d_bn(branch_1, 64, 5) branch_2 = conv2d_bn(x, 64, 1) branch_2 = conv2d_bn(branch_2, 96, 3) branch_2 = conv2d_bn(branch_2, 96, 3) branch_pool = layers.AveragePooling2D(3, strides=1, padding='same')(x) branch_pool = conv2d_bn(branch_pool, 64, 1) branches = [branch_0, branch_1, branch_2, branch_pool] channel_axis = 1 if backend.image_data_format() == 'channels_first' else 3 x = layers.Concatenate(axis=channel_axis, name='mixed_5b')(branches) # 10x block35 (Inception-ResNet-A block): 35 x 35 x 320 for block_idx in range(1, 11): x = inception_resnet_block(x, scale=0.17, block_type='block35', block_idx=block_idx) # Mixed 6a (Reduction-A block): 17 x 17 x 1088 branch_0 = conv2d_bn(x, 384, 3, strides=2, padding='valid') branch_1 = conv2d_bn(x, 256, 1) branch_1 = conv2d_bn(branch_1, 256, 3) branch_1 = conv2d_bn(branch_1, 384, 3, strides=2, padding='valid') branch_pool = layers.MaxPooling2D(3, strides=2, padding='valid')(x) branches = [branch_0, branch_1, branch_pool] x = layers.Concatenate(axis=channel_axis, name='mixed_6a')(branches) # 20x block17 (Inception-ResNet-B block): 17 x 17 x 1088 for block_idx in range(1, 21): x = inception_resnet_block(x, scale=0.1, block_type='block17', block_idx=block_idx) # Mixed 7a (Reduction-B block): 8 x 8 x 2080 branch_0 = conv2d_bn(x, 256, 1) branch_0 = conv2d_bn(branch_0, 384, 3, strides=2, padding='valid') branch_1 = conv2d_bn(x, 256, 1) branch_1 = conv2d_bn(branch_1, 288, 3, strides=2, padding='valid') branch_2 = conv2d_bn(x, 256, 1) branch_2 = conv2d_bn(branch_2, 288, 3) branch_2 = conv2d_bn(branch_2, 320, 3, strides=2, padding='valid') branch_pool = layers.MaxPooling2D(3, strides=2, padding='valid')(x) branches = [branch_0, branch_1, branch_2, branch_pool] x = layers.Concatenate(axis=channel_axis, name='mixed_7a')(branches) # 10x block8 (Inception-ResNet-C block): 8 x 8 x 2080 for block_idx in range(1, 10): x = inception_resnet_block(x, scale=0.2, block_type='block8', block_idx=block_idx) x = inception_resnet_block(x, scale=1., activation=None, block_type='block8', block_idx=10) # Final convolution block: 8 x 8 x 1536 x = conv2d_bn(x, 1536, 1, name='conv_7b') if include_top: # Classification block x = layers.GlobalAveragePooling2D(name='avg_pool')(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Dense(classes, activation=classifier_activation, name='predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D()(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = training.Model(inputs, x, name='inception_resnet_v2') # Load weights. if weights == 'imagenet': if include_top: fname = 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5' weights_path = data_utils.get_file( fname, BASE_WEIGHT_URL + fname, cache_subdir='models', file_hash='e693bd0210a403b3192acc6073ad2e96') else: fname = ('inception_resnet_v2_weights_' 'tf_dim_ordering_tf_kernels_notop.h5') weights_path = data_utils.get_file( fname, BASE_WEIGHT_URL + fname, cache_subdir='models', file_hash='d19885ff4a710c122648d3b5c3b684e4') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def ResNet(stack_fn, preact, use_bias, model_name='resnet', include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000, classifier_activation='softmax', **kwargs): global layers if 'layers' in kwargs: layers = kwargs.pop('layers') else: layers = VersionAwareLayers() if kwargs: raise ValueError('Unknown argument(s): %s' % (kwargs, )) if not (weights in {'imagenet', None} or file_io.file_exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = imagenet_utils.obtain_input_shape( input_shape, default_size=224, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 # x = layers.ZeroPadding2D( # padding=((3, 3), (3, 3)), name='conv1_pad')(img_input) x = layers.Conv2D(64, 3, strides=1, padding='SAME', use_bias=use_bias, kernel_initializer='glorot_normal', name='conv1_conv')(img_input) if not preact: x = layers.BatchNormalization(axis=bn_axis, epsilon=2.001e-5, momentum=0.9, name='conv1_bn')(x) x = layers.PReLU(shared_axes=[1, 2], alpha_initializer='glorot_normal', name='conv1_prelu')(x) # x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name='pool1_pad')(x) # x = layers.MaxPooling2D(3, strides=2, name='pool1_pool')(x) x = stack_fn(x) if preact: x = layers.BatchNormalization(axis=bn_axis, epsilon=2.001e-5, momentum=0.9, name='post_bn')(x) x = layers.PReLU(shared_axes=[1, 2], alpha_initializer='glorot_normal', name='post_prelu')(x) if include_top: x = layers.GlobalAveragePooling2D(name='avg_pool')(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Dense(classes, activation=classifier_activation, name='predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D(name='avg_pool')(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D(name='max_pool')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = training.Model(inputs, x, name=model_name) # Load weights. if (weights == 'imagenet') and (model_name in WEIGHTS_HASHES): if include_top: file_name = model_name + '_weights_tf_dim_ordering_tf_kernels.h5' file_hash = WEIGHTS_HASHES[model_name][0] else: file_name = model_name + '_weights_tf_dim_ordering_tf_kernels_notop.h5' file_hash = WEIGHTS_HASHES[model_name][1] weights_path = data_utils.get_file(file_name, BASE_WEIGHTS_PATH + file_name, cache_subdir='models', file_hash=file_hash) model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model