def get_data_shape(): # input image dimensions img_rows, img_cols = 28, 28 if backend.image_data_format() == 'channels_first': return 1, img_rows, img_cols else: return img_rows, img_cols, 1
def __init__(self, x, y, image_data_generator, batch_size=32, shuffle=False, sample_weight=None, seed=None, data_format=None, save_to_dir=None, save_prefix='', save_format='png', subset=None, dtype=None): if data_format is None: data_format = backend.image_data_format() kwargs = {} if 'dtype' in tf_inspect.getfullargspec( image.NumpyArrayIterator.__init__)[0]: if dtype is None: dtype = backend.floatx() kwargs['dtype'] = dtype super(NumpyArrayIterator, self).__init__( x, y, image_data_generator, batch_size=batch_size, shuffle=shuffle, sample_weight=sample_weight, seed=seed, data_format=data_format, save_to_dir=save_to_dir, save_prefix=save_prefix, save_format=save_format, subset=subset, **kwargs)
def save_img(path, x, data_format=None, file_format=None, scale=True, **kwargs): """Saves an image stored as a Numpy array to a path or file object. Arguments: path: Path or file object. x: Numpy array. data_format: Image data format, either "channels_first" or "channels_last". file_format: Optional file format override. If omitted, the format to use is determined from the filename extension. If a file object was used instead of a filename, this parameter should always be used. scale: Whether to rescale image values to be within `[0, 255]`. **kwargs: Additional keyword arguments passed to `PIL.Image.save()`. """ if data_format is None: data_format = backend.image_data_format() image.save_img(path, x, data_format=data_format, file_format=file_format, scale=scale, **kwargs)
def array_to_img(x, data_format=None, scale=True, dtype=None): """Converts a 3D Numpy array to a PIL Image instance. Arguments: x: Input Numpy array. data_format: Image data format. either "channels_first" or "channels_last". scale: Whether to rescale image values to be within `[0, 255]`. dtype: Dtype to use. Returns: A PIL Image instance. Raises: ImportError: if PIL is not available. ValueError: if invalid `x` or `data_format` is passed. """ if data_format is None: data_format = backend.image_data_format() kwargs = {} if 'dtype' in tf_inspect.getfullargspec(image.array_to_img)[0]: if dtype is None: dtype = backend.floatx() kwargs['dtype'] = dtype return image.array_to_img(x, data_format=data_format, scale=scale, **kwargs)
def conv_block(x, growth_rate, name): """A building block for a dense block. Arguments: x: input tensor. growth_rate: float, growth rate at dense layers. name: string, block label. Returns: output tensor for the block. """ bn_axis = 3 if K.image_data_format() == 'channels_last' else 1 x1 = BatchNormalization( axis=bn_axis, epsilon=1.001e-5, name=name + '_0_bn')( x) x1 = Activation('relu', name=name + '_0_relu')(x1) x1 = Conv2D(4 * growth_rate, 1, use_bias=False, name=name + '_1_conv')(x1) x1 = BatchNormalization( axis=bn_axis, epsilon=1.001e-5, name=name + '_1_bn')( x1) x1 = Activation('relu', name=name + '_1_relu')(x1) x1 = Conv2D( growth_rate, 3, padding='same', use_bias=False, name=name + '_2_conv')( x1) x = Concatenate(axis=bn_axis, name=name + '_concat')([x, x1]) return x
def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_decay=1e-4): ''' Apply BatchNorm, Relu, 3x3 Conv2D, optional bottleneck block and dropout Args: ip: Input keras tensor nb_filter: number of filters bottleneck: add bottleneck block dropout_rate: dropout rate weight_decay: weight decay factor Returns: keras tensor with batch_norm, relu and convolution2d added (optional bottleneck) ''' concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(ip) x = Activation('relu')(x) if bottleneck: inter_channel = nb_filter * 4 # Obtained from https://github.com/liuzhuang13/DenseNet/blob/master/densenet.lua x = Conv2D(inter_channel, (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False, kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x) x = Activation('relu')(x) x = Conv2D(nb_filter, (3, 3), kernel_initializer='he_normal', padding='same', use_bias=False)(x) if dropout_rate: x = Dropout(dropout_rate)(x) return x
def __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=False, dropout_rate=None, weight_decay=1e-4, grow_nb_filters=True, return_concat_list=False): ''' Build a dense_block where the output of each conv_block is fed to subsequent ones Args: x: keras tensor nb_layers: the number of layers of conv_block to append to the model. nb_filter: number of filters growth_rate: growth rate bottleneck: bottleneck block dropout_rate: dropout rate weight_decay: weight decay factor grow_nb_filters: flag to decide to allow number of filters to grow return_concat_list: return the list of feature maps along with the actual output Returns: keras tensor with nb_layers of conv_block appended ''' concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 x_list = [x] for i in range(nb_layers): cb = __conv_block(x, growth_rate, bottleneck, dropout_rate, weight_decay) x_list.append(cb) x = concatenate([x, cb], axis=concat_axis) if grow_nb_filters: nb_filter += growth_rate if return_concat_list: return x, nb_filter, x_list else: return x, nb_filter
def load_data(label_mode='fine'): """Loads CIFAR100 dataset. Arguments: label_mode: one of "fine", "coarse". Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. Raises: ValueError: in case of invalid `label_mode`. """ if label_mode not in ['fine', 'coarse']: raise ValueError('`label_mode` must be one of `"fine"`, `"coarse"`.') dirname = 'cifar-100-python' origin = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' path = get_file(dirname, origin=origin, untar=True) fpath = os.path.join(path, 'train') x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels') fpath = os.path.join(path, 'test') x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels') y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test)
def prepare_simple_model(input_tensor, loss_name, target): axis = 1 if K.image_data_format() == 'channels_first' else -1 loss = None num_channels = None activation = None if loss_name == 'sparse_categorical_crossentropy': loss = lambda y_true, y_pred: K.sparse_categorical_crossentropy( # pylint: disable=g-long-lambda y_true, y_pred, axis=axis) num_channels = np.amax(target) + 1 activation = 'softmax' elif loss_name == 'categorical_crossentropy': loss = lambda y_true, y_pred: K.categorical_crossentropy( # pylint: disable=g-long-lambda y_true, y_pred, axis=axis) num_channels = target.shape[axis] activation = 'softmax' elif loss_name == 'binary_crossentropy': loss = lambda y_true, y_pred: K.binary_crossentropy(y_true, y_pred) # pylint: disable=unnecessary-lambda num_channels = target.shape[axis] activation = 'sigmoid' predictions = Conv2D(num_channels, 1, activation=activation, kernel_initializer='ones', bias_initializer='ones')(input_tensor) simple_model = keras.models.Model(inputs=input_tensor, outputs=predictions) simple_model.compile(optimizer='rmsprop', loss=loss) return simple_model
def load_data(): """Loads CIFAR10 dataset. Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ dirname = 'cifar-10-batches-py' origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' path = get_file(dirname, origin=origin, untar=True) num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples,), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000:i * 10000, :, :, :], y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test)
def preprocess_input(x, data_format=None, mode='caffe'): """Preprocesses a tensor or Numpy array encoding a batch of images. Arguments: x: Input Numpy or symbolic tensor, 3D or 4D. data_format: Data format of the image tensor/array. mode: One of "caffe", "tf". - caffe: will convert the images from RGB to BGR, then will zero-center each color channel with respect to the ImageNet dataset, without scaling. - tf: will scale pixels between -1 and 1, sample-wise. Returns: Preprocessed tensor or Numpy array. Raises: ValueError: In case of unknown `data_format` argument. """ if data_format is None: data_format = K.image_data_format() if data_format not in {'channels_first', 'channels_last'}: raise ValueError('Unknown data_format ' + str(data_format)) if isinstance(x, np.ndarray): return _preprocess_numpy_input(x, data_format=data_format, mode=mode) else: return _preprocess_symbolic_input(x, data_format=data_format, mode=mode)
def convert(in_path, out_path): """Convert any Keras model to the frugally-deep model format.""" assert K.backend() == "tensorflow" assert K.floatx() == "float32" assert K.image_data_format() == 'channels_last' print('loading {}'.format(in_path)) model = load_model(in_path) # Force creation of underlying functional model. # see: https://github.com/fchollet/keras/issues/8136 # Loss and optimizer type do not matter, since to don't train the model. model.compile(loss='mse', optimizer='sgd') model = convert_sequential_to_model(model) test_data = gen_test_data(model) json_output = {} json_output['architecture'] = json.loads(model.to_json()) json_output['image_data_format'] = K.image_data_format() for depth in range(1, 3, 1): json_output['conv2d_valid_offset_depth_' + str(depth)] =\ check_operation_offset(depth, offset_conv2d_eval, 'valid') json_output['conv2d_same_offset_depth_' + str(depth)] =\ check_operation_offset(depth, offset_conv2d_eval, 'same') json_output['separable_conv2d_valid_offset_depth_' + str(depth)] =\ check_operation_offset(depth, offset_sep_conv2d_eval, 'valid') json_output['separable_conv2d_same_offset_depth_' + str(depth)] =\ check_operation_offset(depth, offset_sep_conv2d_eval, 'same') json_output['max_pooling_2d_valid_offset'] =\ check_operation_offset(1, conv2d_offset_max_pool_eval, 'valid') json_output['max_pooling_2d_same_offset'] =\ check_operation_offset(1, conv2d_offset_max_pool_eval, 'same') json_output['average_pooling_2d_valid_offset'] =\ check_operation_offset(1, conv2d_offset_average_pool_eval, 'valid') json_output['average_pooling_2d_same_offset'] =\ check_operation_offset(1, conv2d_offset_average_pool_eval, 'same') json_output['input_shapes'] = get_shapes(test_data['inputs']) json_output['output_shapes'] = get_shapes(test_data['outputs']) json_output['tests'] = [test_data] json_output['trainable_params'] = get_all_weights(model) print('writing {}'.format(out_path)) write_text_file(out_path, json.dumps( json_output, allow_nan=False, indent=2, sort_keys=True))
def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)): """Adds an initial convolution layer (with batch normalization and relu6). Arguments: inputs: Input tensor of shape `(rows, cols, 3)` (with `channels_last` data format) or (3, rows, cols) (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(224, 224, 3)` would be one valid value. filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the convolution). alpha: controls the width of the network. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. kernel: An integer or tuple/list of 2 integers, specifying the width and height of the 2D convolution window. Can be a single integer to specify the same value for all spatial dimensions. strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the width and height. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. Input shape: 4D tensor with shape: `(samples, channels, rows, cols)` if data_format='channels_first' or 4D tensor with shape: `(samples, rows, cols, channels)` if data_format='channels_last'. Output shape: 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if data_format='channels_first' or 4D tensor with shape: `(samples, new_rows, new_cols, filters)` if data_format='channels_last'. `rows` and `cols` values might have changed due to stride. Returns: Output tensor of block. """ channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 filters = int(filters * alpha) x = ZeroPadding2D(padding=(1, 1), name='conv1_pad')(inputs) x = Conv2D( filters, kernel, padding='valid', use_bias=False, strides=strides, name='conv1')(x) x = BatchNormalization(axis=channel_axis, name='conv1_bn')(x) return Activation(relu6, name='conv1_relu')(x)
def set_model(self, model): """Sets Keras model and creates summary ops.""" self.model = model self.sess = K.get_session() # only make histogram summary op if it hasn't already been made if self.histogram_freq and self.merged is None: for layer in self.model.layers: for weight in layer.weights: mapped_weight_name = weight.name.replace(':', '_') tf_summary.histogram(mapped_weight_name, weight) if self.write_images: w_img = array_ops.squeeze(weight) shape = K.int_shape(w_img) if len(shape) == 2: # dense layer kernel case if shape[0] > shape[1]: w_img = array_ops.transpose(w_img) shape = K.int_shape(w_img) w_img = array_ops.reshape(w_img, [1, shape[0], shape[1], 1]) elif len(shape) == 3: # convnet case if K.image_data_format() == 'channels_last': # switch to channels_first to display # every kernel as a separate image w_img = array_ops.transpose(w_img, perm=[2, 0, 1]) shape = K.int_shape(w_img) w_img = array_ops.reshape(w_img, [shape[0], shape[1], shape[2], 1]) elif len(shape) == 1: # bias case w_img = array_ops.reshape(w_img, [1, shape[0], 1, 1]) else: # not possible to handle 3D convnets etc. continue shape = K.int_shape(w_img) assert len(shape) == 4 and shape[-1] in [1, 3, 4] tf_summary.image(mapped_weight_name, w_img) if self.write_grads: for weight in layer.trainable_weights: mapped_weight_name = weight.name.replace(':', '_') grads = model.optimizer.get_gradients(model.total_loss, weight) def is_indexed_slices(grad): return type(grad).__name__ == 'IndexedSlices' grads = [grad.values if is_indexed_slices(grad) else grad for grad in grads] tf_summary.histogram('{}_grad'.format(mapped_weight_name), grads) if hasattr(layer, 'output'): tf_summary.histogram('{}_out'.format(layer.name), layer.output) self.merged = tf_summary.merge_all() if self.write_graph: self.writer = self._writer_class(self.log_dir, self.sess.graph) else: self.writer = self._writer_class(self.log_dir)
def normalize_data_format(value): if value is None: value = backend.image_data_format() data_format = value.lower() if data_format not in {'channels_first', 'channels_last'}: raise ValueError('The `data_format` argument must be one of ' '"channels_first", "channels_last". Received: ' + str(value)) return data_format
def __init__(self, rate, data_format=None, **kwargs): super(SpatialDropout3D, self).__init__(rate, **kwargs) if data_format is None: data_format = K.image_data_format() if data_format not in {'channels_last', 'channels_first'}: raise ValueError('data_format must be in ' '{"channels_last", "channels_first"}') self.data_format = data_format self.input_spec = InputSpec(ndim=5)
def identity_block(input_tensor, kernel_size, filters, stage, block): """The identity block is the block that has no conv layer at shortcut. # Arguments input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names # Returns Output tensor for the block. """ filters1, filters2, filters3 = filters if backend.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = layers.Conv2D(filters1, (1, 1), use_bias=False, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2a')(input_tensor) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2a')(x) x = layers.Activation('relu')(x) x = layers.Conv2D(filters2, kernel_size, padding='same', use_bias=False, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2b')(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2b')(x) x = layers.Activation('relu')(x) x = layers.Conv2D(filters3, (1, 1), use_bias=False, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2c')(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2c')(x) x = layers.add([x, input_tensor]) x = layers.Activation('relu')(x) return x
def __init__(self, featurewise_center=False, samplewise_center=False, featurewise_std_normalization=False, samplewise_std_normalization=False, zca_whitening=False, zca_epsilon=1e-6, rotation_range=0, width_shift_range=0., height_shift_range=0., brightness_range=None, shear_range=0., zoom_range=0., channel_shift_range=0., fill_mode='nearest', cval=0., horizontal_flip=False, vertical_flip=False, rescale=None, preprocessing_function=None, data_format=None, validation_split=0.0, dtype=None): if data_format is None: data_format = backend.image_data_format() kwargs = {} if 'dtype' in tf_inspect.getfullargspec( image.ImageDataGenerator.__init__)[0]: if dtype is None: dtype = backend.floatx() kwargs['dtype'] = dtype super(ImageDataGenerator, self).__init__( featurewise_center=featurewise_center, samplewise_center=samplewise_center, featurewise_std_normalization=featurewise_std_normalization, samplewise_std_normalization=samplewise_std_normalization, zca_whitening=zca_whitening, zca_epsilon=zca_epsilon, rotation_range=rotation_range, width_shift_range=width_shift_range, height_shift_range=height_shift_range, brightness_range=brightness_range, shear_range=shear_range, zoom_range=zoom_range, channel_shift_range=channel_shift_range, fill_mode=fill_mode, cval=cval, horizontal_flip=horizontal_flip, vertical_flip=vertical_flip, rescale=rescale, preprocessing_function=preprocessing_function, data_format=data_format, validation_split=validation_split, **kwargs)
def _separable_conv_block(ip, filters, kernel_size=(3, 3), strides=(1, 1), block_id=None): """Adds 2 blocks of [relu-separable conv-batchnorm]. Arguments: ip: Input tensor filters: Number of output filters per layer kernel_size: Kernel size of separable convolutions strides: Strided convolution for downsampling block_id: String block_id Returns: A Keras tensor """ channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 with K.name_scope('separable_conv_block_%s' % block_id): x = Activation('relu')(ip) x = SeparableConv2D( filters, kernel_size, strides=strides, name='separable_conv_1_%s' % block_id, padding='same', use_bias=False, kernel_initializer='he_normal')( x) x = BatchNormalization( axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='separable_conv_1_bn_%s' % (block_id))( x) x = Activation('relu')(x) x = SeparableConv2D( filters, kernel_size, name='separable_conv_2_%s' % block_id, padding='same', use_bias=False, kernel_initializer='he_normal')( x) x = BatchNormalization( axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='separable_conv_2_bn_%s' % (block_id))( x) return x
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)): """A block that has a conv layer at shortcut. Arguments: input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names strides: Strides for the first conv layer in the block. Returns: Output tensor for the block. Note that from stage 3, the first conv layer at main path is with strides=(2, 2) And the shortcut should have strides=(2, 2) as well """ filters1, filters2, filters3 = filters if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = Conv2D( filters1, (1, 1), strides=strides, name=conv_name_base + '2a')( input_tensor) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) x = Activation('relu')(x) x = Conv2D( filters2, kernel_size, padding='same', name=conv_name_base + '2b')( x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) x = Activation('relu')(x) x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) shortcut = Conv2D( filters3, (1, 1), strides=strides, name=conv_name_base + '1')( input_tensor) shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut) x = layers.add([x, shortcut]) x = Activation('relu')(x) return x
def get_input_datasets(use_bfloat16=False): """Downloads the MNIST dataset and creates train and eval dataset objects. Args: use_bfloat16: Boolean to determine if input should be cast to bfloat16 Returns: Train dataset and eval dataset. The dataset doesn't include batch dim. """ cast_dtype = dtypes.bfloat16 if use_bfloat16 else dtypes.float32 # the data, split between train and test sets (x_train, y_train), (x_test, y_test) = mnist.load_data() train_data_shape = (x_train.shape[0],) + get_data_shape() test_data_shape = (x_test.shape[0],) + get_data_shape() if backend.image_data_format() == 'channels_first': x_train = x_train.reshape(train_data_shape) x_test = x_test.reshape(test_data_shape) else: x_train = x_train.reshape(train_data_shape) x_test = x_test.reshape(test_data_shape) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 # convert class vectors to binary class matrices y_train = utils.to_categorical(y_train, NUM_CLASSES) y_test = utils.to_categorical(y_test, NUM_CLASSES) # train dataset train_ds = dataset_ops.Dataset.from_tensor_slices((x_train, y_train)) # TODO(rchao): Remove maybe_shard_dataset() once auto-sharding is done. train_ds = maybe_shard_dataset(train_ds) train_ds = train_ds.repeat() train_ds = train_ds.map(lambda x, y: (math_ops.cast(x, cast_dtype), y)) train_ds = train_ds.batch(64, drop_remainder=True) # eval dataset eval_ds = dataset_ops.Dataset.from_tensor_slices((x_test, y_test)) # TODO(rchao): Remove maybe_shard_dataset() once auto-sharding is done. eval_ds = maybe_shard_dataset(eval_ds) eval_ds = eval_ds.repeat() eval_ds = eval_ds.map(lambda x, y: (math_ops.cast(x, cast_dtype), y)) eval_ds = eval_ds.batch(64, drop_remainder=True) return train_ds, eval_ds
def __init__(self, pool_function, pool_size, strides, padding='valid', data_format='channels_last', name=None, **kwargs): super(Pooling1D, self).__init__(name=name, **kwargs) if data_format is None: data_format = backend.image_data_format() if strides is None: strides = pool_size self.pool_function = pool_function self.pool_size = conv_utils.normalize_tuple(pool_size, 1, 'pool_size') self.strides = conv_utils.normalize_tuple(strides, 1, 'strides') self.padding = conv_utils.normalize_padding(padding) self.data_format = conv_utils.normalize_data_format(data_format) self.input_spec = InputSpec(ndim=3)
def conv2d_bn(x, filters, num_row, num_col, padding='same', strides=(1, 1), name=None): """Utility function to apply conv + BN. Arguments: x: input tensor. filters: filters in `Conv2D`. num_row: height of the convolution kernel. num_col: width of the convolution kernel. padding: padding mode in `Conv2D`. strides: strides in `Conv2D`. name: name of the ops; will become `name + '_conv'` for the convolution and `name + '_bn'` for the batch norm layer. Returns: Output tensor after applying `Conv2D` and `BatchNormalization`. """ if name is not None: bn_name = name + '_bn' conv_name = name + '_conv' else: bn_name = None conv_name = None if K.image_data_format() == 'channels_first': bn_axis = 1 else: bn_axis = 3 x = Conv2D( filters, (num_row, num_col), strides=strides, padding=padding, use_bias=False, name=conv_name)( x) x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x) x = Activation('relu', name=name)(x) return x
def __transition_block(ip, nb_filter, compression=1.0, weight_decay=1e-4): ''' Apply BatchNorm, Relu 1x1, Conv2D, optional compression, dropout and Maxpooling2D Args: ip: keras tensor nb_filter: number of filters compression: calculated as 1 - reduction. Reduces the number of feature maps in the transition block. dropout_rate: dropout rate weight_decay: weight decay factor Returns: keras tensor, after applying batch_norm, relu-conv, dropout, maxpool ''' concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(ip) x = Activation('relu')(x) x = Conv2D(int(nb_filter * compression), (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False, kernel_regularizer=l2(weight_decay))(x) x = AveragePooling2D((2, 2), strides=(2, 2))(x) return x
def __init__(self, directory, image_data_generator, target_size=(256, 256), color_mode='rgb', classes=None, class_mode='categorical', batch_size=32, shuffle=True, seed=None, data_format=None, save_to_dir=None, save_prefix='', save_format='png', follow_links=False, subset=None, interpolation='nearest', dtype=None): if data_format is None: data_format = backend.image_data_format() kwargs = {} if 'dtype' in tf_inspect.getfullargspec( image.ImageDataGenerator.__init__)[0]: if dtype is None: dtype = backend.floatx() kwargs['dtype'] = dtype super(DirectoryIterator, self).__init__( directory, image_data_generator, target_size=target_size, color_mode=color_mode, classes=classes, class_mode=class_mode, batch_size=batch_size, shuffle=shuffle, seed=seed, data_format=data_format, save_to_dir=save_to_dir, save_prefix=save_prefix, save_format=save_format, follow_links=follow_links, subset=subset, interpolation=interpolation, **kwargs)
def conv2d_bn(x, filters, kernel_size, strides=1, padding='same', activation='relu', use_bias=False, name=None): """Utility function to apply conv + BN. Arguments: x: input tensor. filters: filters in `Conv2D`. kernel_size: kernel size as in `Conv2D`. strides: strides in `Conv2D`. padding: padding mode in `Conv2D`. activation: activation in `Conv2D`. use_bias: whether to use a bias in `Conv2D`. name: name of the ops; will become `name + '_ac'` for the activation and `name + '_bn'` for the batch norm layer. Returns: Output tensor after applying `Conv2D` and `BatchNormalization`. """ x = Conv2D( filters, kernel_size, strides=strides, padding=padding, use_bias=use_bias, name=name)( x) if not use_bias: bn_axis = 1 if K.image_data_format() == 'channels_first' else 3 bn_name = None if name is None else name + '_bn' x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x) if activation is not None: ac_name = None if name is None else name + '_ac' x = Activation(activation, name=ac_name)(x) return x
def transition_block(x, reduction, name): """A transition block. Arguments: x: input tensor. reduction: float, compression rate at transition layers. name: string, block label. Returns: output tensor for the block. """ bn_axis = 3 if K.image_data_format() == 'channels_last' else 1 x = BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_bn')(x) x = Activation('relu', name=name + '_relu')(x) x = Conv2D( int(K.int_shape(x)[bn_axis] * reduction), 1, use_bias=False, name=name + '_conv')( x) x = AveragePooling2D(2, strides=2, name=name + '_pool')(x) return x
def preprocess_input(x, data_format=None): """Preprocesses a tensor encoding a batch of images. # Arguments x: input Numpy tensor, 4D. data_format: data format of the image tensor. # Returns Preprocessed tensor. """ if data_format is None: data_format = K.image_data_format() assert data_format in {'channels_last', 'channels_first'} if data_format == 'channels_first': if x.ndim == 3: # 'RGB'->'BGR' x = x[::-1, ...] # Zero-center by mean pixel x[0, :, :] -= 103.939 x[1, :, :] -= 116.779 x[2, :, :] -= 123.68 else: x = x[:, ::-1, ...] x[:, 0, :, :] -= 103.939 x[:, 1, :, :] -= 116.779 x[:, 2, :, :] -= 123.68 else: # 'RGB'->'BGR' x = x[..., ::-1] # Zero-center by mean pixel x[..., 0] -= 103.939 x[..., 1] -= 116.779 x[..., 2] -= 123.68 x *= 0.017 # scale values return x
def identity_block(input_tensor, kernel_size, filters, stage, block): """The identity block is the block that has no conv layer at shortcut. Arguments: input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names Returns: Output tensor for the block. """ filters1, filters2, filters3 = filters if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) x = Activation('relu')(x) x = Conv2D( filters2, kernel_size, padding='same', name=conv_name_base + '2b')( x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) x = Activation('relu')(x) x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) x = layers.add([x, input_tensor]) x = Activation('relu')(x) return x
def img_to_array(img, data_format=None, dtype=None): """Converts a PIL Image instance to a Numpy array. Arguments: img: PIL Image instance. data_format: Image data format, either "channels_first" or "channels_last". dtype: Dtype to use for the returned array. Returns: A 3D Numpy array. Raises: ValueError: if invalid `img` or `data_format` is passed. """ if data_format is None: data_format = backend.image_data_format() kwargs = {} if 'dtype' in tf_inspect.getfullargspec(image.img_to_array)[0]: if dtype is None: dtype = backend.floatx() kwargs['dtype'] = dtype return image.img_to_array(img, data_format=data_format, **kwargs)
def MobileNetV3(stack_fn, last_point_ch, input_shape=None, alpha=1.0, model_type='large', minimalistic=False, include_top=True, weights='imagenet', input_tensor=None, classes=1000, pooling=None, dropout_rate=0.2, classifier_activation='softmax'): if not (weights in {'imagenet', None} or file_io.file_exists_v2(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top` ' 'as true, `classes` should be 1000') # Determine proper input shape and default size. # If both input_shape and input_tensor are used, they should match if input_shape is not None and input_tensor is not None: try: is_input_t_tensor = backend.is_keras_tensor(input_tensor) except ValueError: try: is_input_t_tensor = backend.is_keras_tensor( layer_utils.get_source_inputs(input_tensor)) except ValueError: raise ValueError('input_tensor: ', input_tensor, 'is not type input_tensor') if is_input_t_tensor: if backend.image_data_format == 'channels_first': if backend.int_shape(input_tensor)[1] != input_shape[1]: raise ValueError( 'input_shape: ', input_shape, 'and input_tensor: ', input_tensor, 'do not meet the same shape requirements') else: if backend.int_shape(input_tensor)[2] != input_shape[1]: raise ValueError( 'input_shape: ', input_shape, 'and input_tensor: ', input_tensor, 'do not meet the same shape requirements') else: raise ValueError('input_tensor specified: ', input_tensor, 'is not a keras tensor') # If input_shape is None, infer shape from input_tensor if input_shape is None and input_tensor is not None: try: backend.is_keras_tensor(input_tensor) except ValueError: raise ValueError('input_tensor: ', input_tensor, 'is type: ', type(input_tensor), 'which is not a valid type') if backend.is_keras_tensor(input_tensor): if backend.image_data_format() == 'channels_first': rows = backend.int_shape(input_tensor)[2] cols = backend.int_shape(input_tensor)[3] input_shape = (3, cols, rows) else: rows = backend.int_shape(input_tensor)[1] cols = backend.int_shape(input_tensor)[2] input_shape = (cols, rows, 3) # If input_shape is None and input_tensor is None using standart shape if input_shape is None and input_tensor is None: input_shape = (None, None, 3) if backend.image_data_format() == 'channels_last': row_axis, col_axis = (0, 1) else: row_axis, col_axis = (1, 2) rows = input_shape[row_axis] cols = input_shape[col_axis] if rows and cols and (rows < 32 or cols < 32): raise ValueError( 'Input size must be at least 32x32; got `input_shape=' + str(input_shape) + '`') if weights == 'imagenet': if (not minimalistic and alpha not in [0.75, 1.0] or minimalistic and alpha != 1.0): raise ValueError( 'If imagenet weights are being loaded, ' 'alpha can be one of `0.75`, `1.0` for non minimalistic' ' or `1.0` for minimalistic only.') if rows != cols or rows != 224: logging.warning('`input_shape` is undefined or non-square, ' 'or `rows` is not 224.' ' Weights for input shape (224, 224) will be' ' loaded as the default.') if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 if minimalistic: kernel = 3 activation = relu se_ratio = None else: kernel = 5 activation = hard_swish se_ratio = 0.25 x = img_input # x = layers.Rescaling(1. / 255.)(x) x = layers.Conv2D(16, kernel_size=3, strides=(2, 2), padding='same', use_bias=False, name='Conv')(x) x = layers.BatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv/BatchNorm')(x) x = activation(x) x = stack_fn(x, kernel, activation, se_ratio) last_conv_ch = _depth(backend.int_shape(x)[channel_axis] * 6) # if the width multiplier is greater than 1 we # increase the number of output channels if alpha > 1.0: last_point_ch = _depth(last_point_ch * alpha) x = layers.Conv2D(last_conv_ch, kernel_size=1, padding='same', use_bias=False, name='Conv_1')(x) x = layers.BatchNormalization(axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv_1/BatchNorm')(x) x = activation(x) x = layers.Conv2D(last_point_ch, kernel_size=1, padding='same', use_bias=True, name='Conv_2')(x) x = activation(x) if include_top: x = layers.GlobalAveragePooling2D()(x) if channel_axis == 1: x = layers.Reshape((last_point_ch, 1, 1))(x) else: x = layers.Reshape((1, 1, last_point_ch))(x) if dropout_rate > 0: x = layers.Dropout(dropout_rate)(x) x = layers.Conv2D(classes, kernel_size=1, padding='same', name='Logits')(x) x = layers.Flatten()(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Activation(activation=classifier_activation, name='Predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D(name='avg_pool')(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D(name='max_pool')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = models.Model(inputs, x, name='MobilenetV3' + model_type) # Load weights. if weights == 'imagenet': model_name = '{}{}_224_{}_float'.format( model_type, '_minimalistic' if minimalistic else '', str(alpha)) if include_top: file_name = 'weights_mobilenet_v3_' + model_name + '.h5' file_hash = WEIGHTS_HASHES[model_name][0] else: file_name = 'weights_mobilenet_v3_' + model_name + '_no_top.h5' file_hash = WEIGHTS_HASHES[model_name][1] weights_path = data_utils.get_file(file_name, BASE_WEIGHT_PATH + file_name, cache_subdir='models', file_hash=file_hash) model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)): """A block that has a conv layer at shortcut. # Arguments input_tensor: input tensor kernel_size: default 3, the kernel size of middle conv layer at main path filters: list of integers, the filters of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names strides: Strides for the second conv layer in the block. # Returns Output tensor for the block. Note that from stage 3, the second conv layer at main path is with strides=(2, 2) And the shortcut should have strides=(2, 2) as well """ filters1, filters2, filters3 = filters if backend.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = layers.Conv2D(filters1, (1, 1), use_bias=False, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2a')(input_tensor) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2a')(x) x = layers.Activation('relu')(x) x = layers.Conv2D(filters2, kernel_size, strides=strides, padding='same', use_bias=False, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2b')(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2b')(x) x = layers.Activation('relu')(x) x = layers.Conv2D(filters3, (1, 1), use_bias=False, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '2c')(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '2c')(x) shortcut = layers.Conv2D( filters3, (1, 1), use_bias=False, strides=strides, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name=conv_name_base + '1')(input_tensor) shortcut = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name=bn_name_base + '1')(shortcut) x = layers.add([x, shortcut]) x = layers.Activation('relu')(x) return x
def DenseNet( batch_size, blocks, include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000, classifier_activation='softmax', ): """Instantiates the DenseNet architecture. Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in your Keras config at `~/.keras/keras.json`. Caution: Be sure to properly pre-process your inputs to the application. Please see `applications.densenet.preprocess_input` for an example. Arguments: blocks: numbers of building blocks for the four dense layers. include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `'channels_last'` data format) or `(3, 224, 224)` (with `'channels_first'` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(200, 200, 3)` would be one valid value. pooling: optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional block. - `avg` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. Returns: A `keras.Model` instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape. ValueError: if `classifier_activation` is not `softmax` or `None` when using a pretrained top layer. """ if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as `"imagenet"` with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = imagenet_utils.obtain_input_shape( input_shape, default_size=224, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = layers.Input(shape=input_shape, batch_size=batch_size) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape, batch_size=batch_size) else: img_input = input_tensor raise NotImplemented("Unsupported flow") bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)))(img_input) x = layers.Conv2D(64, 7, strides=2, use_bias=False, name='conv1/conv')(x) x = layers.BatchNormalization( axis=bn_axis, epsilon=1.001e-5, name='conv1/bn')( x) x = layers.Activation('relu', name='conv1/relu')(x) x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)))(x) x = layers.MaxPooling2D(3, strides=2, name='pool1')(x) x = dense_block(x, blocks[0], name='conv2') x = transition_block(x, 0.5, name='pool2') x = dense_block(x, blocks[1], name='conv3') x = transition_block(x, 0.5, name='pool3') x = dense_block(x, blocks[2], name='conv4') x = transition_block(x, 0.5, name='pool4') x = dense_block(x, blocks[3], name='conv5') x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name='bn')(x) x = layers.Activation('relu', name='relu')(x) if include_top: x = layers.GlobalAveragePooling2D(name='avg_pool')(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Dense(classes, activation=classifier_activation, name='predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D(name='avg_pool')(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D(name='max_pool')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. if blocks == [6, 12, 24, 16]: model = training.Model(inputs, x, name='densenet121') elif blocks == [6, 12, 32, 32]: model = training.Model(inputs, x, name='densenet169') elif blocks == [6, 12, 48, 32]: model = training.Model(inputs, x, name='densenet201') else: model = training.Model(inputs, x, name='densenet') # Load weights. if weights == 'imagenet': if include_top: if blocks == [6, 12, 24, 16]: weights_path = data_utils.get_file( 'densenet121_weights_tf_dim_ordering_tf_kernels.h5', DENSENET121_WEIGHT_PATH, cache_subdir='models', file_hash='9d60b8095a5708f2dcce2bca79d332c7') elif blocks == [6, 12, 32, 32]: weights_path = data_utils.get_file( 'densenet169_weights_tf_dim_ordering_tf_kernels.h5', DENSENET169_WEIGHT_PATH, cache_subdir='models', file_hash='d699b8f76981ab1b30698df4c175e90b') elif blocks == [6, 12, 48, 32]: weights_path = data_utils.get_file( 'densenet201_weights_tf_dim_ordering_tf_kernels.h5', DENSENET201_WEIGHT_PATH, cache_subdir='models', file_hash='1ceb130c1ea1b78c3bf6114dbdfd8807') else: if blocks == [6, 12, 24, 16]: weights_path = data_utils.get_file( 'densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5', DENSENET121_WEIGHT_PATH_NO_TOP, cache_subdir='models', file_hash='30ee3e1110167f948a6b9946edeeb738') elif blocks == [6, 12, 32, 32]: weights_path = data_utils.get_file( 'densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5', DENSENET169_WEIGHT_PATH_NO_TOP, cache_subdir='models', file_hash='b8c4d4c20dd625c148057b9ff1c1176b') elif blocks == [6, 12, 48, 32]: weights_path = data_utils.get_file( 'densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5', DENSENET201_WEIGHT_PATH_NO_TOP, cache_subdir='models', file_hash='c13680b51ded0fb44dff2d8f86ac8bb1') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def MobileNet(input_shape=None, alpha=1.0, depth_multiplier=1, dropout=1e-3, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000, classifier_activation='softmax', **kwargs): """Instantiates the MobileNet architecture. Reference paper: - [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/abs/1704.04861) Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in the `tf.keras.backend.image_data_format()`. Caution: Be sure to properly pre-process your inputs to the application. Please see `applications.mobilenet.preprocess_input` for an example. Arguments: input_shape: Optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or (3, 224, 224) (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(200, 200, 3)` would be one valid value. Default to `None`. `input_shape` will be ignored if the `input_tensor` is provided. alpha: Controls the width of the network. This is known as the width multiplier in the MobileNet paper. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. Default to 1.0. depth_multiplier: Depth multiplier for depthwise convolution. This is called the resolution multiplier in the MobileNet paper. Default to 1.0. dropout: Dropout rate. Default to 0.001. include_top: Boolean, whether to include the fully-connected layer at the top of the network. Default to `True`. weights: One of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. Default to `imagenet`. input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. `input_tensor` is useful for sharing inputs between multiple different networks. Default to None. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` (default) means that the output of the model will be the 4D tensor output of the last convolutional block. - `avg` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: Optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. Defaults to 1000. classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. **kwargs: For backwards compatibility only. Returns: A `keras.Model` instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape. ValueError: if `classifier_activation` is not `softmax` or `None` when using a pretrained top layer. """ if 'layers' in kwargs: global layers layers = kwargs.pop('layers') if kwargs: raise ValueError('Unknown argument(s): %s' % (kwargs, )) if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top` ' 'as true, `classes` should be 1000') # Determine proper input shape and default size. if input_shape is None: default_size = 224 else: if backend.image_data_format() == 'channels_first': rows = input_shape[1] cols = input_shape[2] else: rows = input_shape[0] cols = input_shape[1] if rows == cols and rows in [128, 160, 192, 224]: default_size = rows else: default_size = 224 input_shape = imagenet_utils.obtain_input_shape( input_shape, default_size=default_size, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if backend.image_data_format() == 'channels_last': row_axis, col_axis = (0, 1) else: row_axis, col_axis = (1, 2) rows = input_shape[row_axis] cols = input_shape[col_axis] if weights == 'imagenet': if depth_multiplier != 1: raise ValueError('If imagenet weights are being loaded, ' 'depth multiplier must be 1') if alpha not in [0.25, 0.50, 0.75, 1.0]: raise ValueError('If imagenet weights are being loaded, ' 'alpha can be one of' '`0.25`, `0.50`, `0.75` or `1.0` only.') if rows != cols or rows not in [128, 160, 192, 224]: rows = 224 logging.warning('`input_shape` is undefined or non-square, ' 'or `rows` is not in [128, 160, 192, 224]. ' 'Weights for input shape (224, 224) will be' ' loaded as the default.') if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor x = _conv_block(img_input, 32, alpha, strides=(2, 2)) x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1) x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, strides=(2, 2), block_id=2) x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3) x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, strides=(2, 2), block_id=4) x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, strides=(2, 2), block_id=6) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11) x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, strides=(2, 2), block_id=12) x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13) if include_top: if backend.image_data_format() == 'channels_first': shape = (int(1024 * alpha), 1, 1) else: shape = (1, 1, int(1024 * alpha)) x = layers.GlobalAveragePooling2D()(x) x = layers.Reshape(shape, name='reshape_1')(x) x = layers.Dropout(dropout, name='dropout')(x) x = layers.Conv2D(classes, (1, 1), padding='same', name='conv_preds')(x) x = layers.Reshape((classes, ), name='reshape_2')(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Activation(activation=classifier_activation, name='predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D()(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = training.Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows)) # Load weights. if weights == 'imagenet': if alpha == 1.0: alpha_text = '1_0' elif alpha == 0.75: alpha_text = '7_5' elif alpha == 0.50: alpha_text = '5_0' else: alpha_text = '2_5' if include_top: model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows) weight_path = BASE_WEIGHT_PATH + model_name weights_path = data_utils.get_file(model_name, weight_path, cache_subdir='models') else: model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows) weight_path = BASE_WEIGHT_PATH + model_name weights_path = data_utils.get_file(model_name, weight_path, cache_subdir='models') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def _reduction_a_cell(ip, p, filters, block_id=None): """Adds a Reduction cell for NASNet-A (Fig. 4 in the paper). Args: ip: Input tensor `x` p: Input tensor `p` filters: Number of output filters block_id: String block_id Returns: A Keras tensor """ channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 with backend.name_scope('reduction_A_block_%s' % block_id): p = _adjust_block(p, ip, filters, block_id) h = layers.Activation('relu')(ip) h = layers.Conv2D(filters, (1, 1), strides=(1, 1), padding='same', name='reduction_conv_1_%s' % block_id, use_bias=False, kernel_initializer='he_normal')(h) h = layers.BatchNormalization(axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='reduction_bn_1_%s' % block_id)(h) h3 = layers.ZeroPadding2D(padding=imagenet_utils.correct_pad(h, 3), name='reduction_pad_1_%s' % block_id)(h) with backend.name_scope('block_1'): x1_1 = _separable_conv_block(h, filters, (5, 5), strides=(2, 2), block_id='reduction_left1_%s' % block_id) x1_2 = _separable_conv_block(p, filters, (7, 7), strides=(2, 2), block_id='reduction_right1_%s' % block_id) x1 = layers.add([x1_1, x1_2], name='reduction_add_1_%s' % block_id) with backend.name_scope('block_2'): x2_1 = layers.MaxPooling2D( (3, 3), strides=(2, 2), padding='valid', name='reduction_left2_%s' % block_id)(h3) x2_2 = _separable_conv_block(p, filters, (7, 7), strides=(2, 2), block_id='reduction_right2_%s' % block_id) x2 = layers.add([x2_1, x2_2], name='reduction_add_2_%s' % block_id) with backend.name_scope('block_3'): x3_1 = layers.AveragePooling2D( (3, 3), strides=(2, 2), padding='valid', name='reduction_left3_%s' % block_id)(h3) x3_2 = _separable_conv_block(p, filters, (5, 5), strides=(2, 2), block_id='reduction_right3_%s' % block_id) x3 = layers.add([x3_1, x3_2], name='reduction_add3_%s' % block_id) with backend.name_scope('block_4'): x4 = layers.AveragePooling2D( (3, 3), strides=(1, 1), padding='same', name='reduction_left4_%s' % block_id)(x1) x4 = layers.add([x2, x4]) with backend.name_scope('block_5'): x5_1 = _separable_conv_block(x1, filters, (3, 3), block_id='reduction_left4_%s' % block_id) x5_2 = layers.MaxPooling2D( (3, 3), strides=(2, 2), padding='valid', name='reduction_right5_%s' % block_id)(h3) x5 = layers.add([x5_1, x5_2], name='reduction_add4_%s' % block_id) x = layers.concatenate([x2, x3, x4, x5], axis=channel_dim, name='reduction_concat_%s' % block_id) return x, ip
def _make_histogram_ops(self, model): """Defines histogram ops when histogram_freq > 0.""" # only make histogram summary op if it hasn't already been made if self.histogram_freq and self.merged is None: for layer in self.model.layers: for weight in layer.weights: mapped_weight_name = weight.name.replace(':', '_') tf_summary.histogram(mapped_weight_name, weight) if self.write_images: w_img = array_ops.squeeze(weight) shape = K.int_shape(w_img) if len(shape) == 2: # dense layer kernel case if shape[0] > shape[1]: w_img = array_ops.transpose(w_img) shape = K.int_shape(w_img) w_img = array_ops.reshape( w_img, [1, shape[0], shape[1], 1]) elif len(shape) == 3: # convnet case if K.image_data_format() == 'channels_last': # switch to channels_first to display # every kernel as a separate image w_img = array_ops.transpose(w_img, perm=[2, 0, 1]) shape = K.int_shape(w_img) w_img = array_ops.reshape( w_img, [shape[0], shape[1], shape[2], 1]) elif len(shape) == 1: # bias case w_img = array_ops.reshape(w_img, [1, shape[0], 1, 1]) else: # not possible to handle 3D convnets etc. continue shape = K.int_shape(w_img) assert len(shape) == 4 and shape[-1] in [1, 3, 4] tf_summary.image(mapped_weight_name, w_img) if self.write_grads: for weight in layer.trainable_weights: mapped_weight_name = weight.name.replace(':', '_') grads = model.optimizer.get_gradients( model.total_loss, weight) def is_indexed_slices(grad): return type(grad).__name__ == 'IndexedSlices' grads = [ grad.values if is_indexed_slices(grad) else grad for grad in grads ] tf_summary.histogram( '{}_grad'.format(mapped_weight_name), grads) if hasattr(layer, 'output'): if isinstance(layer.output, list): for i, output in enumerate(layer.output): tf_summary.histogram( '{}_out_{}'.format(layer.name, i), output) else: tf_summary.histogram('{}_out'.format(layer.name), layer.output)
def ResNet(input_shape=None, classes=10, block='bottleneck', residual_unit='v2', repetitions=None, initial_filters=64, activation='softmax', include_top=True, input_tensor=None, dropout=None, transition_dilation_rate=(1, 1), initial_strides=(2, 2), initial_kernel_size=(7, 7), initial_pooling='max', final_pooling=None, top='classification'): """Builds a custom ResNet like architecture. Defaults to ResNet50 v2. Args: input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` dim ordering) or `(3, 224, 224)` (with `channels_first` dim ordering). It should have exactly 3 inputs channels, and width and height should be no smaller than 8. E.g. `(224, 224, 3)` would be one valid value. classes: The number of outputs at final softmax layer block: The block function to use. This is either `'basic'` or `'bottleneck'`. The original paper used `basic` for layers < 50. repetitions: Number of repetitions of various block units. At each block unit, the number of filters are doubled and the input size is halved. Default of None implies the ResNet50v2 values of [3, 4, 6, 3]. transition_dilation_rate: Used for pixel-wise prediction tasks such as image segmentation. residual_unit: the basic residual unit, 'v1' for conv bn relu, 'v2' for bn relu conv. See [Identity Mappings in Deep Residual Networks](https://arxiv.org/abs/1603.05027) for details. dropout: None for no dropout, otherwise rate of dropout from 0 to 1. Based on [Wide Residual Networks.(https://arxiv.org/pdf/1605.07146) paper. transition_dilation_rate: Dilation rate for transition layers. For semantic segmentation of images use a dilation rate of (2, 2). initial_strides: Stride of the very first residual unit and MaxPooling2D call, with default (2, 2), set to (1, 1) for small images like cifar. initial_kernel_size: kernel size of the very first convolution, (7, 7) for imagenet and (3, 3) for small image datasets like tiny imagenet and cifar. See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details. initial_pooling: Determine if there will be an initial pooling layer, 'max' for imagenet and None for small image datasets. See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details. final_pooling: Optional pooling mode for feature extraction at the final model layer when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. top: Defines final layers to evaluate based on a specific problem type. Options are 'classification' for ImageNet style problems, 'segmentation' for problems like the Pascal VOC dataset, and None to exclude these layers entirely. Returns: The keras `Model`. """ if activation not in ['softmax', 'sigmoid', None]: raise ValueError( 'activation must be one of "softmax", "sigmoid", or None') if activation == 'sigmoid' and classes != 1: raise ValueError( 'sigmoid activation can only be used when classes = 1') if repetitions is None: repetitions = [3, 4, 6, 3] # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=32, min_size=8, data_format=K.image_data_format(), require_flatten=include_top) _handle_dim_ordering() if len(input_shape) != 3: raise Exception( "Input shape should be a tuple (nb_channels, nb_rows, nb_cols)") if block == 'basic': block_fn = basic_block elif block == 'bottleneck': block_fn = bottleneck elif isinstance(block, six.string_types): block_fn = _string_to_function(block) else: block_fn = block if residual_unit == 'v2': residual_unit = _bn_relu_conv elif residual_unit == 'v1': residual_unit = _conv_bn_relu elif isinstance(residual_unit, six.string_types): residual_unit = _string_to_function(residual_unit) else: residual_unit = residual_unit # Permute dimension order if necessary if K.image_data_format() == 'channels_first': input_shape = (input_shape[1], input_shape[2], input_shape[0]) # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=32, min_size=8, data_format=K.image_data_format(), require_flatten=include_top) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor x = _conv_bn_relu(filters=initial_filters, kernel_size=initial_kernel_size, strides=initial_strides)(img_input) if initial_pooling == 'max': x = MaxPooling2D(pool_size=(3, 3), strides=initial_strides, padding="same")(x) block = x filters = initial_filters for i, r in enumerate(repetitions): transition_dilation_rates = [transition_dilation_rate] * r transition_strides = [(1, 1)] * r if transition_dilation_rate == (1, 1) and i != 0: transition_strides[0] = (2, 2) block = _residual_block( block_fn, filters=filters, stage=i, blocks=r, is_first_layer=(i == 0), dropout=dropout, transition_dilation_rates=transition_dilation_rates, transition_strides=transition_strides, residual_unit=residual_unit)(block) filters *= 2 # Last activation x = _bn_relu(block) # Classifier block if include_top and top is 'classification': x = GlobalAveragePooling2D()(x) x = Dense(units=classes, activation=activation, kernel_initializer="he_normal")(x) elif include_top and top is 'segmentation': x = Conv2D(classes, (1, 1), activation='linear', padding='same')(x) if K.image_data_format() == 'channels_first': channel, row, col = input_shape else: row, col, channel = input_shape x = Reshape((row * col, classes))(x) x = Activation(activation)(x) x = Reshape((row, col, classes))(x) elif final_pooling == 'avg': x = GlobalAveragePooling2D()(x) elif final_pooling == 'max': x = GlobalMaxPooling2D()(x) if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input model = Model(inputs=inputs, outputs=x, name='resnet') return model
def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id): """Inverted ResNet block.""" channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 in_channels = backend.int_shape(inputs)[channel_axis] pointwise_conv_filters = int(filters * alpha) pointwise_filters = _make_divisible(pointwise_conv_filters, 8) x = inputs prefix = 'block_{}_'.format(block_id) if block_id: # Expand x = layers.Conv2D( expansion * in_channels, kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'expand')( x) x = layers.BatchNormalization( axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'expand_BN')( x) x = layers.ReLU(6., name=prefix + 'expand_relu')(x) else: prefix = 'expanded_conv_' # Depthwise if stride == 2: x = layers.ZeroPadding2D( padding=imagenet_utils.correct_pad(x, 3), name=prefix + 'pad')(x) x = layers.DepthwiseConv2D( kernel_size=3, strides=stride, activation=None, use_bias=False, padding='same' if stride == 1 else 'valid', name=prefix + 'depthwise')( x) x = layers.BatchNormalization( axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'depthwise_BN')( x) x = layers.ReLU(6., name=prefix + 'depthwise_relu')(x) # Project x = layers.Conv2D( pointwise_filters, kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'project')( x) x = layers.BatchNormalization( axis=channel_axis, epsilon=1e-3, momentum=0.999, name=prefix + 'project_BN')( x) if in_channels == pointwise_filters and stride == 1: return layers.Add(name=prefix + 'add')([inputs, x]) return x
def MobileNetV2(input_shape=None, alpha=1.0, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000, classifier_activation='softmax', **kwargs): """Instantiates the MobileNetV2 architecture. MobileNetV2 is very similar to the original MobileNet, except that it uses inverted residual blocks with bottlenecking features. It has a drastically lower parameter count than the original MobileNet. MobileNets support any input size greater than 32 x 32, with larger image sizes offering better performance. Reference: - [MobileNetV2: Inverted Residuals and Linear Bottlenecks]( https://arxiv.org/abs/1801.04381) (CVPR 2018) This function returns a Keras image classification model, optionally loaded with weights pre-trained on ImageNet. For image classification use cases, see [this page for detailed examples]( https://keras.io/api/applications/#usage-examples-for-image-classification-models). For transfer learning use cases, make sure to read the [guide to transfer learning & fine-tuning]( https://keras.io/guides/transfer_learning/). Note: each Keras Application expects a specific kind of input preprocessing. For MobileNetV2, call `tf.keras.applications.mobilenet_v2.preprocess_input` on your inputs before passing them to the model. `mobilenet_v2.preprocess_input` will scale input pixels between -1 and 1. Args: input_shape: Optional shape tuple, to be specified if you would like to use a model with an input image resolution that is not (224, 224, 3). It should have exactly 3 inputs channels (224, 224, 3). You can also omit this option if you would like to infer input_shape from an input_tensor. If you choose to include both input_tensor and input_shape then input_shape will be used if they match, if the shapes do not match then we will throw an error. E.g. `(160, 160, 3)` would be one valid value. alpha: Float between 0 and 1. controls the width of the network. This is known as the width multiplier in the MobileNetV2 paper, but the name is kept for consistency with `applications.MobileNetV1` model in Keras. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1.0, default number of filters from the paper are used at each layer. include_top: Boolean, whether to include the fully-connected layer at the top of the network. Defaults to `True`. weights: String, one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. pooling: String, optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional block. - `avg` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: Integer, optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. When loading pretrained weights, `classifier_activation` can only be `None` or `"softmax"`. **kwargs: For backwards compatibility only. Returns: A `keras.Model` instance. """ global layers if 'layers' in kwargs: layers = kwargs.pop('layers') else: layers = VersionAwareLayers() if kwargs: raise ValueError('Unknown argument(s): %s' % (kwargs,)) if not (weights in {'imagenet', None} or file_io.file_exists_v2(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' 'as true, `classes` should be 1000') # Determine proper input shape and default size. # If both input_shape and input_tensor are used, they should match if input_shape is not None and input_tensor is not None: try: is_input_t_tensor = backend.is_keras_tensor(input_tensor) except ValueError: try: is_input_t_tensor = backend.is_keras_tensor( layer_utils.get_source_inputs(input_tensor)) except ValueError: raise ValueError('input_tensor: ', input_tensor, 'is not type input_tensor') if is_input_t_tensor: if backend.image_data_format() == 'channels_first': if backend.int_shape(input_tensor)[1] != input_shape[1]: raise ValueError('input_shape: ', input_shape, 'and input_tensor: ', input_tensor, 'do not meet the same shape requirements') else: if backend.int_shape(input_tensor)[2] != input_shape[1]: raise ValueError('input_shape: ', input_shape, 'and input_tensor: ', input_tensor, 'do not meet the same shape requirements') else: raise ValueError('input_tensor specified: ', input_tensor, 'is not a keras tensor') # If input_shape is None, infer shape from input_tensor if input_shape is None and input_tensor is not None: try: backend.is_keras_tensor(input_tensor) except ValueError: raise ValueError('input_tensor: ', input_tensor, 'is type: ', type(input_tensor), 'which is not a valid type') if input_shape is None and not backend.is_keras_tensor(input_tensor): default_size = 224 elif input_shape is None and backend.is_keras_tensor(input_tensor): if backend.image_data_format() == 'channels_first': rows = backend.int_shape(input_tensor)[2] cols = backend.int_shape(input_tensor)[3] else: rows = backend.int_shape(input_tensor)[1] cols = backend.int_shape(input_tensor)[2] if rows == cols and rows in [96, 128, 160, 192, 224]: default_size = rows else: default_size = 224 # If input_shape is None and no input_tensor elif input_shape is None: default_size = 224 # If input_shape is not None, assume default size else: if backend.image_data_format() == 'channels_first': rows = input_shape[1] cols = input_shape[2] else: rows = input_shape[0] cols = input_shape[1] if rows == cols and rows in [96, 128, 160, 192, 224]: default_size = rows else: default_size = 224 input_shape = imagenet_utils.obtain_input_shape( input_shape, default_size=default_size, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if backend.image_data_format() == 'channels_last': row_axis, col_axis = (0, 1) else: row_axis, col_axis = (1, 2) rows = input_shape[row_axis] cols = input_shape[col_axis] if weights == 'imagenet': if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]: raise ValueError('If imagenet weights are being loaded, ' 'alpha can be one of `0.35`, `0.50`, `0.75`, ' '`1.0`, `1.3` or `1.4` only.') if rows != cols or rows not in [96, 128, 160, 192, 224]: rows = 224 logging.warning('`input_shape` is undefined or non-square, ' 'or `rows` is not in [96, 128, 160, 192, 224].' ' Weights for input shape (224, 224) will be' ' loaded as the default.') if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 first_block_filters = _make_divisible(32 * alpha, 8) x = layers.Conv2D( first_block_filters, kernel_size=3, strides=(2, 2), padding='same', use_bias=False, name='Conv1')(img_input) x = layers.BatchNormalization( axis=channel_axis, epsilon=1e-3, momentum=0.999, name='bn_Conv1')( x) x = layers.ReLU(6., name='Conv1_relu')(x) x = _inverted_res_block( x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0) x = _inverted_res_block( x, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1) x = _inverted_res_block( x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2) x = _inverted_res_block( x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3) x = _inverted_res_block( x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4) x = _inverted_res_block( x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5) x = _inverted_res_block( x, filters=64, alpha=alpha, stride=2, expansion=6, block_id=6) x = _inverted_res_block( x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=7) x = _inverted_res_block( x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=8) x = _inverted_res_block( x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=9) x = _inverted_res_block( x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=10) x = _inverted_res_block( x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=11) x = _inverted_res_block( x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=12) x = _inverted_res_block( x, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13) x = _inverted_res_block( x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14) x = _inverted_res_block( x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15) x = _inverted_res_block( x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16) # no alpha applied to last conv as stated in the paper: # if the width multiplier is greater than 1 we # increase the number of output channels if alpha > 1.0: last_block_filters = _make_divisible(1280 * alpha, 8) else: last_block_filters = 1280 x = layers.Conv2D( last_block_filters, kernel_size=1, use_bias=False, name='Conv_1')( x) x = layers.BatchNormalization( axis=channel_axis, epsilon=1e-3, momentum=0.999, name='Conv_1_bn')( x) x = layers.ReLU(6., name='out_relu')(x) if include_top: x = layers.GlobalAveragePooling2D()(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Dense(classes, activation=classifier_activation, name='predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D()(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = training.Model(inputs, x, name='mobilenetv2_%0.2f_%s' % (alpha, rows)) # Load weights. if weights == 'imagenet': if include_top: model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + str(float(alpha)) + '_' + str(rows) + '.h5') weight_path = BASE_WEIGHT_PATH + model_name weights_path = data_utils.get_file( model_name, weight_path, cache_subdir='models') else: model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + str(float(alpha)) + '_' + str(rows) + '_no_top' + '.h5') weight_path = BASE_WEIGHT_PATH + model_name weights_path = data_utils.get_file( model_name, weight_path, cache_subdir='models') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def __init__(self, num_classes=10, dtype="float32", batch_size=None): super(CustomModel, self).__init__(name="resnet50") if backend.image_data_format() == "channels_first": self._lambda = layers.Lambda( lambda x: backend.permute_dimensions(x, (0, 3, 1, 2)), name="transpose", ) bn_axis = 1 data_format = "channels_first" else: bn_axis = 3 data_format = "channels_last" self._padding = layers.ZeroPadding2D(padding=(3, 3), data_format=data_format, name="zero_pad") self._conv2d_1 = layers.Conv2D( 64, (7, 7), strides=(2, 2), padding="valid", use_bias=False, kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name="conv1", ) self._bn_1 = layers.BatchNormalization( axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name="bn_conv1", ) self._activation_1 = layers.Activation("relu") self._maxpooling2d = layers.MaxPooling2D((3, 3), strides=(2, 2), padding="same") self._conv_block_1 = ConvBlock(3, [64, 64, 256], stage=2, block="a", strides=(1, 1)) self._identity_block_1 = IdentityBlock(3, [64, 64, 256], stage=2, block="b") self._identity_block_2 = IdentityBlock(3, [64, 64, 256], stage=2, block="c") self._conv_block_2 = ConvBlock(3, [128, 128, 512], stage=3, block="a") self._identity_block_3 = IdentityBlock(3, [128, 128, 512], stage=3, block="b") self._identity_block_4 = IdentityBlock(3, [128, 128, 512], stage=3, block="c") self._identity_block_5 = IdentityBlock(3, [128, 128, 512], stage=3, block="d") self._conv_block_3 = ConvBlock(3, [256, 256, 1024], stage=4, block="a") self._identity_block_6 = IdentityBlock(3, [256, 256, 1024], stage=4, block="b") self._identity_block_7 = IdentityBlock(3, [256, 256, 1024], stage=4, block="c") self._identity_block_8 = IdentityBlock(3, [256, 256, 1024], stage=4, block="d") self._identity_block_9 = IdentityBlock(3, [256, 256, 1024], stage=4, block="e") self._identity_block_10 = IdentityBlock(3, [256, 256, 1024], stage=4, block="f") self._conv_block_4 = ConvBlock(3, [512, 512, 2048], stage=5, block="a") self._identity_block_11 = IdentityBlock(3, [512, 512, 2048], stage=5, block="b") self._identity_block_12 = IdentityBlock(3, [512, 512, 2048], stage=5, block="c") rm_axes = ([1, 2] if backend.image_data_format() == "channels_last" else [2, 3]) self._lamba_2 = layers.Lambda(lambda x: backend.mean(x, rm_axes), name="reduce_mean") self._dense = layers.Dense( num_classes, kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name="fc1000", ) self._activation_2 = layers.Activation("softmax")
def VGG16(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000, classifier_activation='softmax'): """Instantiates the VGG16 model. Reference paper: - [Very Deep Convolutional Networks for Large-Scale Image Recognition]( https://arxiv.org/abs/1409.1556) (ICLR 2015) By default, it loads weights pre-trained on ImageNet. Check 'weights' for other options. This model can be built both with 'channels_first' data format (channels, height, width) or 'channels_last' data format (height, width, channels). The default input size for this model is 224x224. Caution: Be sure to properly pre-process your inputs to the application. Please see `applications.vgg16.preprocess_input` for an example. Arguments: include_top: whether to include the 3 fully-connected layers at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 224)` (with `channels_first` data format). It should have exactly 3 input channels, and width and height should be no smaller than 32. E.g. `(200, 200, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional block. - `avg` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. Returns: A `keras.Model` instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape. ValueError: if `classifier_activation` is not `softmax` or `None` when using a pretrained top layer. """ if not (weights in {'imagenet', None} or file_io.file_exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = imagenet_utils.obtain_input_shape( input_shape, default_size=224, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor # Block 1 x = layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input) x = layers.Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) # Block 2 x = layers.Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x) x = layers.Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) # Block 3 x = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x) x = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x) x = layers.Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) # Block 4 x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x) x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x) x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) # Block 5 x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x) x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x) x = layers.Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x) x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) if include_top: # Classification block x = layers.Flatten(name='flatten')(x) x = layers.Dense(4096, activation='relu', name='fc1')(x) x = layers.Dense(4096, activation='relu', name='fc2')(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Dense(classes, activation=classifier_activation, name='predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D()(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = training.Model(inputs, x, name='vgg16') # Load weights. if weights == 'imagenet': if include_top: weights_path = data_utils.get_file( 'vgg16_weights_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH, cache_subdir='models', file_hash='64373286793e3c8b2b4e3219cbf3544b') else: weights_path = data_utils.get_file( 'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5', WEIGHTS_PATH_NO_TOP, cache_subdir='models', file_hash='6d6bbae143d832006294945121d1f1fc') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def NASNet(input_shape=None, penultimate_filters=4032, num_blocks=6, stem_block_filters=96, skip_reduction=True, filter_multiplier=2, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000, default_size=None, classifier_activation='softmax'): """Instantiates a NASNet model. Reference: - [Learning Transferable Architectures for Scalable Image Recognition]( https://arxiv.org/abs/1707.07012) (CVPR 2018) Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in your Keras config at `~/.keras/keras.json`. Args: input_shape: Optional shape tuple, the input shape is by default `(331, 331, 3)` for NASNetLarge and `(224, 224, 3)` for NASNetMobile. It should have exactly 3 input channels, and width and height should be no smaller than 32. E.g. `(224, 224, 3)` would be one valid value. penultimate_filters: Number of filters in the penultimate layer. NASNet models use the notation `NASNet (N @ P)`, where: - N is the number of blocks - P is the number of penultimate filters num_blocks: Number of repeated blocks of the NASNet model. NASNet models use the notation `NASNet (N @ P)`, where: - N is the number of blocks - P is the number of penultimate filters stem_block_filters: Number of filters in the initial stem block skip_reduction: Whether to skip the reduction step at the tail end of the network. filter_multiplier: Controls the width of the network. - If `filter_multiplier` < 1.0, proportionally decreases the number of filters in each layer. - If `filter_multiplier` > 1.0, proportionally increases the number of filters in each layer. - If `filter_multiplier` = 1, default number of filters from the paper are used at each layer. include_top: Whether to include the fully-connected layer at the top of the network. weights: `None` (random initialization) or `imagenet` (ImageNet weights) input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional block. - `avg` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: Optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. default_size: Specifies the default image size of the model classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. Returns: A `keras.Model` instance. Raises: ValueError: In case of invalid argument for `weights`, invalid input shape or invalid `penultimate_filters` value. ValueError: if `classifier_activation` is not `softmax` or `None` when using a pretrained top layer. """ if not (weights in {'imagenet', None} or file_io.file_exists_v2(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top` ' 'as true, `classes` should be 1000') if (isinstance(input_shape, tuple) and None in input_shape and weights == 'imagenet'): raise ValueError('When specifying the input shape of a NASNet' ' and loading `ImageNet` weights, ' 'the input_shape argument must be static ' '(no None entries). Got: `input_shape=' + str(input_shape) + '`.') if default_size is None: default_size = 331 # Determine proper input shape and default size. input_shape = imagenet_utils.obtain_input_shape( input_shape, default_size=default_size, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if backend.image_data_format() != 'channels_last': logging.warning( 'The NASNet family of models is only available ' 'for the input data format "channels_last" ' '(width, height, channels). ' 'However your settings specify the default ' 'data format "channels_first" (channels, width, height).' ' You should set `image_data_format="channels_last"` ' 'in your Keras config located at ~/.keras/keras.json. ' 'The model being returned right now will expect inputs ' 'to follow the "channels_last" data format.') backend.set_image_data_format('channels_last') old_data_format = 'channels_first' else: old_data_format = None if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor if penultimate_filters % (24 * (filter_multiplier**2)) != 0: raise ValueError( 'For NASNet-A models, the `penultimate_filters` must be a multiple ' 'of 24 * (`filter_multiplier` ** 2). Current value: %d' % penultimate_filters) channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 filters = penultimate_filters // 24 x = layers.Conv2D(stem_block_filters, (3, 3), strides=(2, 2), padding='valid', use_bias=False, name='stem_conv1', kernel_initializer='he_normal')(img_input) x = layers.BatchNormalization(axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='stem_bn1')(x) p = None x, p = _reduction_a_cell(x, p, filters // (filter_multiplier**2), block_id='stem_1') x, p = _reduction_a_cell(x, p, filters // filter_multiplier, block_id='stem_2') for i in range(num_blocks): x, p = _normal_a_cell(x, p, filters, block_id='%d' % (i)) x, p0 = _reduction_a_cell(x, p, filters * filter_multiplier, block_id='reduce_%d' % (num_blocks)) p = p0 if not skip_reduction else p for i in range(num_blocks): x, p = _normal_a_cell(x, p, filters * filter_multiplier, block_id='%d' % (num_blocks + i + 1)) x, p0 = _reduction_a_cell(x, p, filters * filter_multiplier**2, block_id='reduce_%d' % (2 * num_blocks)) p = p0 if not skip_reduction else p for i in range(num_blocks): x, p = _normal_a_cell(x, p, filters * filter_multiplier**2, block_id='%d' % (2 * num_blocks + i + 1)) x = layers.Activation('relu')(x) if include_top: x = layers.GlobalAveragePooling2D()(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Dense(classes, activation=classifier_activation, name='predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D()(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input model = training.Model(inputs, x, name='NASNet') # Load weights. if weights == 'imagenet': if default_size == 224: # mobile version if include_top: weights_path = data_utils.get_file( 'nasnet_mobile.h5', NASNET_MOBILE_WEIGHT_PATH, cache_subdir='models', file_hash='020fb642bf7360b370c678b08e0adf61') else: weights_path = data_utils.get_file( 'nasnet_mobile_no_top.h5', NASNET_MOBILE_WEIGHT_PATH_NO_TOP, cache_subdir='models', file_hash='1ed92395b5b598bdda52abe5c0dbfd63') model.load_weights(weights_path) elif default_size == 331: # large version if include_top: weights_path = data_utils.get_file( 'nasnet_large.h5', NASNET_LARGE_WEIGHT_PATH, cache_subdir='models', file_hash='11577c9a518f0070763c2b964a382f17') else: weights_path = data_utils.get_file( 'nasnet_large_no_top.h5', NASNET_LARGE_WEIGHT_PATH_NO_TOP, cache_subdir='models', file_hash='d81d89dc07e6e56530c4e77faddd61b5') model.load_weights(weights_path) else: raise ValueError( 'ImageNet weights can only be loaded with NASNetLarge' ' or NASNetMobile') elif weights is not None: model.load_weights(weights) if old_data_format: backend.set_image_data_format(old_data_format) return model
def InceptionResNetV2(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000, classifier_activation='softmax', **kwargs): """Instantiates the Inception-ResNet v2 architecture. Reference: - [Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning](https://arxiv.org/abs/1602.07261) (AAAI 2017) This function returns a Keras image classification model, optionally loaded with weights pre-trained on ImageNet. For image classification use cases, see [this page for detailed examples]( https://keras.io/api/applications/#usage-examples-for-image-classification-models). For transfer learning use cases, make sure to read the [guide to transfer learning & fine-tuning]( https://keras.io/guides/transfer_learning/). Note: each Keras Application expects a specific kind of input preprocessing. For InceptionResNetV2, call `tf.keras.applications.inception_resnet_v2.preprocess_input` on your inputs before passing them to the model. `inception_resnet_v2.preprocess_input` will scale input pixels between -1 and 1. Args: include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is `False` (otherwise the input shape has to be `(299, 299, 3)` (with `'channels_last'` data format) or `(3, 299, 299)` (with `'channels_first'` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 75. E.g. `(150, 150, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional block. - `'avg'` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - `'max'` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is `True`, and if no `weights` argument is specified. classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. When loading pretrained weights, `classifier_activation` can only be `None` or `"softmax"`. **kwargs: For backwards compatibility only. Returns: A `keras.Model` instance. """ global layers if 'layers' in kwargs: layers = kwargs.pop('layers') else: layers = VersionAwareLayers() if kwargs: raise ValueError('Unknown argument(s): %s' % (kwargs, )) if not (weights in {'imagenet', None} or file_io.file_exists_v2(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = imagenet_utils.obtain_input_shape( input_shape, default_size=299, min_size=75, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor # Stem block: 35 x 35 x 192 x = conv2d_bn(img_input, 32, 3, strides=2, padding='valid') x = conv2d_bn(x, 32, 3, padding='valid') x = conv2d_bn(x, 64, 3) x = layers.MaxPooling2D(3, strides=2)(x) x = conv2d_bn(x, 80, 1, padding='valid') x = conv2d_bn(x, 192, 3, padding='valid') x = layers.MaxPooling2D(3, strides=2)(x) # Mixed 5b (Inception-A block): 35 x 35 x 320 branch_0 = conv2d_bn(x, 96, 1) branch_1 = conv2d_bn(x, 48, 1) branch_1 = conv2d_bn(branch_1, 64, 5) branch_2 = conv2d_bn(x, 64, 1) branch_2 = conv2d_bn(branch_2, 96, 3) branch_2 = conv2d_bn(branch_2, 96, 3) branch_pool = layers.AveragePooling2D(3, strides=1, padding='same')(x) branch_pool = conv2d_bn(branch_pool, 64, 1) branches = [branch_0, branch_1, branch_2, branch_pool] channel_axis = 1 if backend.image_data_format() == 'channels_first' else 3 x = layers.Concatenate(axis=channel_axis, name='mixed_5b')(branches) # 10x block35 (Inception-ResNet-A block): 35 x 35 x 320 for block_idx in range(1, 11): x = inception_resnet_block(x, scale=0.17, block_type='block35', block_idx=block_idx) # Mixed 6a (Reduction-A block): 17 x 17 x 1088 branch_0 = conv2d_bn(x, 384, 3, strides=2, padding='valid') branch_1 = conv2d_bn(x, 256, 1) branch_1 = conv2d_bn(branch_1, 256, 3) branch_1 = conv2d_bn(branch_1, 384, 3, strides=2, padding='valid') branch_pool = layers.MaxPooling2D(3, strides=2, padding='valid')(x) branches = [branch_0, branch_1, branch_pool] x = layers.Concatenate(axis=channel_axis, name='mixed_6a')(branches) # 20x block17 (Inception-ResNet-B block): 17 x 17 x 1088 for block_idx in range(1, 21): x = inception_resnet_block(x, scale=0.1, block_type='block17', block_idx=block_idx) # Mixed 7a (Reduction-B block): 8 x 8 x 2080 branch_0 = conv2d_bn(x, 256, 1) branch_0 = conv2d_bn(branch_0, 384, 3, strides=2, padding='valid') branch_1 = conv2d_bn(x, 256, 1) branch_1 = conv2d_bn(branch_1, 288, 3, strides=2, padding='valid') branch_2 = conv2d_bn(x, 256, 1) branch_2 = conv2d_bn(branch_2, 288, 3) branch_2 = conv2d_bn(branch_2, 320, 3, strides=2, padding='valid') branch_pool = layers.MaxPooling2D(3, strides=2, padding='valid')(x) branches = [branch_0, branch_1, branch_2, branch_pool] x = layers.Concatenate(axis=channel_axis, name='mixed_7a')(branches) # 10x block8 (Inception-ResNet-C block): 8 x 8 x 2080 for block_idx in range(1, 10): x = inception_resnet_block(x, scale=0.2, block_type='block8', block_idx=block_idx) x = inception_resnet_block(x, scale=1., activation=None, block_type='block8', block_idx=10) # Final convolution block: 8 x 8 x 1536 x = conv2d_bn(x, 1536, 1, name='conv_7b') if include_top: # Classification block x = layers.GlobalAveragePooling2D(name='avg_pool')(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Dense(classes, activation=classifier_activation, name='predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D()(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = training.Model(inputs, x, name='inception_resnet_v2') # Load weights. if weights == 'imagenet': if include_top: fname = 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5' weights_path = data_utils.get_file( fname, BASE_WEIGHT_URL + fname, cache_subdir='models', file_hash='e693bd0210a403b3192acc6073ad2e96') else: fname = ('inception_resnet_v2_weights_' 'tf_dim_ordering_tf_kernels_notop.h5') weights_path = data_utils.get_file( fname, BASE_WEIGHT_URL + fname, cache_subdir='models', file_hash='d19885ff4a710c122648d3b5c3b684e4') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def resnet152(self, num_classes, dtype='float32', batch_size=None): # TODO(tfboyd): add training argument, just lik resnet56. """Instantiates the ResNet50 architecture. Args: num_classes: `int` number of classes for image classification. Returns: A Keras model instance. """ input_shape = (384, 384, 3) img_input = layers.Input(shape=input_shape, dtype=dtype, batch_size=batch_size) if backend.image_data_format() == 'channels_first': x = layers.Lambda( lambda x: backend.permute_dimensions(x, (0, 3, 1, 2)), name='transpose')(img_input) bn_axis = 1 else: # channels_last x = img_input bn_axis = 3 x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(x) x = layers.Conv2D(64, (7, 7), strides=(2, 2), padding='valid', use_bias=False, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2( self.L2_WEIGHT_DECAY), name='conv1')(x) x = layers.BatchNormalization(axis=bn_axis, momentum=self.BATCH_NORM_DECAY, epsilon=self.BATCH_NORM_EPSILON, name='bn_conv1')(x) x = layers.Activation('relu')(x) x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') x = identity_block(x, 3, [128, 128, 512], stage=3, block='e') x = identity_block(x, 3, [128, 128, 512], stage=3, block='f') x = identity_block(x, 3, [128, 128, 512], stage=3, block='g') x = identity_block(x, 3, [128, 128, 512], stage=3, block='h') x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='g') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='h') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='i') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='j') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='k') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='l') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='m') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='n') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='o') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='p') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='q') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='r') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='s') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='t') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='u') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='v') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='w') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='x') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='y') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='z') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='aa') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='ab') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='ac') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='ad') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='ae') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='af') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='ag') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='ah') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='ai') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='aj') x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') rm_axes = [ 1, 2 ] if backend.image_data_format() == 'channels_last' else [2, 3] x = layers.Lambda(lambda x: backend.mean(x, rm_axes), name='reduce_mean')(x) x = layers.Dense( num_classes, kernel_regularizer=regularizers.l2(self.L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(self.L2_WEIGHT_DECAY), name='fc1000')(x) # TODO(reedwm): Remove manual casts once mixed precision can be enabled with a # single line of code. x = backend.cast(x, 'float32') x = layers.Activation('softmax')(x) # Create model. return models.Model(img_input, x, name='resnet50')
def _normal_a_cell(ip, p, filters, block_id=None): """Adds a Normal cell for NASNet-A (Fig. 4 in the paper). Args: ip: Input tensor `x` p: Input tensor `p` filters: Number of output filters block_id: String block_id Returns: A Keras tensor """ channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 with backend.name_scope('normal_A_block_%s' % block_id): p = _adjust_block(p, ip, filters, block_id) h = layers.Activation('relu')(ip) h = layers.Conv2D(filters, (1, 1), strides=(1, 1), padding='same', name='normal_conv_1_%s' % block_id, use_bias=False, kernel_initializer='he_normal')(h) h = layers.BatchNormalization(axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='normal_bn_1_%s' % block_id)(h) with backend.name_scope('block_1'): x1_1 = _separable_conv_block(h, filters, kernel_size=(5, 5), block_id='normal_left1_%s' % block_id) x1_2 = _separable_conv_block(p, filters, block_id='normal_right1_%s' % block_id) x1 = layers.add([x1_1, x1_2], name='normal_add_1_%s' % block_id) with backend.name_scope('block_2'): x2_1 = _separable_conv_block(p, filters, (5, 5), block_id='normal_left2_%s' % block_id) x2_2 = _separable_conv_block(p, filters, (3, 3), block_id='normal_right2_%s' % block_id) x2 = layers.add([x2_1, x2_2], name='normal_add_2_%s' % block_id) with backend.name_scope('block_3'): x3 = layers.AveragePooling2D( (3, 3), strides=(1, 1), padding='same', name='normal_left3_%s' % (block_id))(h) x3 = layers.add([x3, p], name='normal_add_3_%s' % block_id) with backend.name_scope('block_4'): x4_1 = layers.AveragePooling2D( (3, 3), strides=(1, 1), padding='same', name='normal_left4_%s' % (block_id))(p) x4_2 = layers.AveragePooling2D( (3, 3), strides=(1, 1), padding='same', name='normal_right4_%s' % (block_id))(p) x4 = layers.add([x4_1, x4_2], name='normal_add_4_%s' % block_id) with backend.name_scope('block_5'): x5 = _separable_conv_block(h, filters, block_id='normal_left5_%s' % block_id) x5 = layers.add([x5, h], name='normal_add_5_%s' % block_id) x = layers.concatenate([p, x1, x2, x3, x4, x5], axis=channel_dim, name='normal_concat_%s' % block_id) return x, ip
def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1), block_id=1): """Adds a depthwise convolution block. A depthwise convolution block consists of a depthwise conv, batch normalization, relu6, pointwise convolution, batch normalization and relu6 activation. Arguments: inputs: Input tensor of shape `(rows, cols, channels)` (with `channels_last` data format) or (channels, rows, cols) (with `channels_first` data format). pointwise_conv_filters: Integer, the dimensionality of the output space (i.e. the number of output filters in the pointwise convolution). alpha: controls the width of the network. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. depth_multiplier: The number of depthwise convolution output channels for each input channel. The total number of depthwise convolution output channels will be equal to `filters_in * depth_multiplier`. strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution along the width and height. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1. block_id: Integer, a unique identification designating the block number. # Input shape 4D tensor with shape: `(batch, channels, rows, cols)` if data_format='channels_first' or 4D tensor with shape: `(batch, rows, cols, channels)` if data_format='channels_last'. # Output shape 4D tensor with shape: `(batch, filters, new_rows, new_cols)` if data_format='channels_first' or 4D tensor with shape: `(batch, new_rows, new_cols, filters)` if data_format='channels_last'. `rows` and `cols` values might have changed due to stride. Returns: Output tensor of block. """ channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 pointwise_conv_filters = int(pointwise_conv_filters * alpha) if strides == (1, 1): x = inputs else: x = layers.ZeroPadding2D(((0, 1), (0, 1)), name='conv_pad_%d' % block_id)(inputs) x = layers.DepthwiseConv2D( (3, 3), padding='same' if strides == (1, 1) else 'valid', depth_multiplier=depth_multiplier, strides=strides, use_bias=False, name='conv_dw_%d' % block_id)(x) x = layers.BatchNormalization(axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x) x = layers.ReLU(6., name='conv_dw_%d_relu' % block_id)(x) x = layers.Conv2D(pointwise_conv_filters, (1, 1), padding='same', use_bias=False, strides=(1, 1), name='conv_pw_%d' % block_id)(x) x = layers.BatchNormalization(axis=channel_axis, name='conv_pw_%d_bn' % block_id)(x) return layers.ReLU(6., name='conv_pw_%d_relu' % block_id)(x)
def _adjust_block(p, ip, filters, block_id=None): """Adjusts the input `previous path` to match the shape of the `input`. Used in situations where the output number of filters needs to be changed. Args: p: Input tensor which needs to be modified ip: Input tensor whose shape needs to be matched filters: Number of output filters to be matched block_id: String block_id Returns: Adjusted Keras tensor """ channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 img_dim = 2 if backend.image_data_format() == 'channels_first' else -2 ip_shape = backend.int_shape(ip) if p is not None: p_shape = backend.int_shape(p) with backend.name_scope('adjust_block'): if p is None: p = ip elif p_shape[img_dim] != ip_shape[img_dim]: with backend.name_scope('adjust_reduction_block_%s' % block_id): p = layers.Activation('relu', name='adjust_relu_1_%s' % block_id)(p) p1 = layers.AveragePooling2D( (1, 1), strides=(2, 2), padding='valid', name='adjust_avg_pool_1_%s' % block_id)(p) p1 = layers.Conv2D(filters // 2, (1, 1), padding='same', use_bias=False, name='adjust_conv_1_%s' % block_id, kernel_initializer='he_normal')(p1) p2 = layers.ZeroPadding2D(padding=((0, 1), (0, 1)))(p) p2 = layers.Cropping2D(cropping=((1, 0), (1, 0)))(p2) p2 = layers.AveragePooling2D( (1, 1), strides=(2, 2), padding='valid', name='adjust_avg_pool_2_%s' % block_id)(p2) p2 = layers.Conv2D(filters // 2, (1, 1), padding='same', use_bias=False, name='adjust_conv_2_%s' % block_id, kernel_initializer='he_normal')(p2) p = layers.concatenate([p1, p2], axis=channel_dim) p = layers.BatchNormalization(axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='adjust_bn_%s' % block_id)(p) elif p_shape[channel_dim] != filters: with backend.name_scope('adjust_projection_block_%s' % block_id): p = layers.Activation('relu')(p) p = layers.Conv2D(filters, (1, 1), strides=(1, 1), padding='same', name='adjust_conv_projection_%s' % block_id, use_bias=False, kernel_initializer='he_normal')(p) p = layers.BatchNormalization(axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='adjust_bn_%s' % block_id)(p) return p
""" settings.py Settings file for saving shared constants """ from tensorflow.python.keras import backend as K IMAGE_DATA_FORMAT = K.image_data_format() CHANNELS_FIRST = IMAGE_DATA_FORMAT == 'channels_first' CHANNELS_LAST = IMAGE_DATA_FORMAT == 'channels_last'
else: x = x.reshape((img_nrows, img_ncols, 3)) # Remove zero-center by mean pixel x[:, :, 0] += 103.939 x[:, :, 1] += 116.779 x[:, :, 2] += 123.68 # 'BGR'->'RGB' x = x[:, :, ::-1] x = np.clip(x, 0, 255).astype('uint8') return x base_image = K.variable(preprocess_image(input_image)) style_reference_image = K.variable(preprocess_image(style_image)) if K.image_data_format() == 'channels_first': combination_image = K.placeholder((1, 3, img_nrows, img_ncols)) else: combination_image = K.placeholder((1, img_nrows, img_ncols, 3)) input_tensor = K.concatenate( [base_image, style_reference_image, combination_image], axis=0) model = vgg19.VGG19(input_tensor=input_tensor, weights='imagenet', include_top=False) print('Model loaded.') outputs_dict = dict([(layer.name, layer.output) for layer in model.layers]) print(outputs_dict)
def Xception(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000, classifier_activation='softmax'): """Instantiates the Xception architecture. Reference: - [Xception: Deep Learning with Depthwise Separable Convolutions]( https://arxiv.org/abs/1610.02357) (CVPR 2017) Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in your Keras config at `~/.keras/keras.json`. Note that the default input image size for this model is 299x299. Caution: Be sure to properly pre-process your inputs to the application. Please see `applications.xception.preprocess_input` for an example. Arguments: include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(299, 299, 3)`. It should have exactly 3 inputs channels, and width and height should be no smaller than 71. E.g. `(150, 150, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional block. - `avg` means that global average pooling will be applied to the output of the last convolutional block, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. Returns: A `keras.Model` instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape. ValueError: if `classifier_activation` is not `softmax` or `None` when using a pretrained top layer. """ if not (weights in {'imagenet', None} or file_io.file_exists_v2(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = imagenet_utils.obtain_input_shape( input_shape, default_size=299, min_size=71, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 x = layers.Conv2D(32, (3, 3), strides=(2, 2), use_bias=False, name='block1_conv1')(img_input) x = layers.BatchNormalization(axis=channel_axis, name='block1_conv1_bn')(x) x = layers.Activation('relu', name='block1_conv1_act')(x) x = layers.Conv2D(64, (3, 3), use_bias=False, name='block1_conv2')(x) x = layers.BatchNormalization(axis=channel_axis, name='block1_conv2_bn')(x) x = layers.Activation('relu', name='block1_conv2_act')(x) residual = layers.Conv2D(128, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x) residual = layers.BatchNormalization(axis=channel_axis)(residual) x = layers.SeparableConv2D(128, (3, 3), padding='same', use_bias=False, name='block2_sepconv1')(x) x = layers.BatchNormalization(axis=channel_axis, name='block2_sepconv1_bn')(x) x = layers.Activation('relu', name='block2_sepconv2_act')(x) x = layers.SeparableConv2D(128, (3, 3), padding='same', use_bias=False, name='block2_sepconv2')(x) x = layers.BatchNormalization(axis=channel_axis, name='block2_sepconv2_bn')(x) x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block2_pool')(x) x = layers.add([x, residual]) residual = layers.Conv2D(256, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x) residual = layers.BatchNormalization(axis=channel_axis)(residual) x = layers.Activation('relu', name='block3_sepconv1_act')(x) x = layers.SeparableConv2D(256, (3, 3), padding='same', use_bias=False, name='block3_sepconv1')(x) x = layers.BatchNormalization(axis=channel_axis, name='block3_sepconv1_bn')(x) x = layers.Activation('relu', name='block3_sepconv2_act')(x) x = layers.SeparableConv2D(256, (3, 3), padding='same', use_bias=False, name='block3_sepconv2')(x) x = layers.BatchNormalization(axis=channel_axis, name='block3_sepconv2_bn')(x) x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block3_pool')(x) x = layers.add([x, residual]) residual = layers.Conv2D(728, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x) residual = layers.BatchNormalization(axis=channel_axis)(residual) x = layers.Activation('relu', name='block4_sepconv1_act')(x) x = layers.SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block4_sepconv1')(x) x = layers.BatchNormalization(axis=channel_axis, name='block4_sepconv1_bn')(x) x = layers.Activation('relu', name='block4_sepconv2_act')(x) x = layers.SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block4_sepconv2')(x) x = layers.BatchNormalization(axis=channel_axis, name='block4_sepconv2_bn')(x) x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block4_pool')(x) x = layers.add([x, residual]) for i in range(8): residual = x prefix = 'block' + str(i + 5) x = layers.Activation('relu', name=prefix + '_sepconv1_act')(x) x = layers.SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv1')(x) x = layers.BatchNormalization(axis=channel_axis, name=prefix + '_sepconv1_bn')(x) x = layers.Activation('relu', name=prefix + '_sepconv2_act')(x) x = layers.SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv2')(x) x = layers.BatchNormalization(axis=channel_axis, name=prefix + '_sepconv2_bn')(x) x = layers.Activation('relu', name=prefix + '_sepconv3_act')(x) x = layers.SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv3')(x) x = layers.BatchNormalization(axis=channel_axis, name=prefix + '_sepconv3_bn')(x) x = layers.add([x, residual]) residual = layers.Conv2D(1024, (1, 1), strides=(2, 2), padding='same', use_bias=False)(x) residual = layers.BatchNormalization(axis=channel_axis)(residual) x = layers.Activation('relu', name='block13_sepconv1_act')(x) x = layers.SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block13_sepconv1')(x) x = layers.BatchNormalization(axis=channel_axis, name='block13_sepconv1_bn')(x) x = layers.Activation('relu', name='block13_sepconv2_act')(x) x = layers.SeparableConv2D(1024, (3, 3), padding='same', use_bias=False, name='block13_sepconv2')(x) x = layers.BatchNormalization(axis=channel_axis, name='block13_sepconv2_bn')(x) x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block13_pool')(x) x = layers.add([x, residual]) x = layers.SeparableConv2D(1536, (3, 3), padding='same', use_bias=False, name='block14_sepconv1')(x) x = layers.BatchNormalization(axis=channel_axis, name='block14_sepconv1_bn')(x) x = layers.Activation('relu', name='block14_sepconv1_act')(x) x = layers.SeparableConv2D(2048, (3, 3), padding='same', use_bias=False, name='block14_sepconv2')(x) x = layers.BatchNormalization(axis=channel_axis, name='block14_sepconv2_bn')(x) x = layers.Activation('relu', name='block14_sepconv2_act')(x) if include_top: x = layers.GlobalAveragePooling2D(name='avg_pool')(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Dense(classes, activation=classifier_activation, name='predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D()(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = training.Model(inputs, x, name='xception') # Load weights. if weights == 'imagenet': if include_top: weights_path = data_utils.get_file( 'xception_weights_tf_dim_ordering_tf_kernels.h5', TF_WEIGHTS_PATH, cache_subdir='models', file_hash='0a58e3b7378bc2990ea3b43d5981f1f6') else: weights_path = data_utils.get_file( 'xception_weights_tf_dim_ordering_tf_kernels_notop.h5', TF_WEIGHTS_PATH_NO_TOP, cache_subdir='models', file_hash='b0042744bf5b25fce3cb969f33bebb97') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def pixelwise_transform(mask, dilation_radius=None, data_format=None, separate_edge_classes=False): """Transforms a label mask for a z stack edge, interior, and background Args: mask (tensor): tensor of labels dilation_radius (int): width to enlarge the edge feature of each instance data_format (str): 'channels_first' or 'channels_last' separate_edge_classes (bool): Whether to separate the cell edge class into 2 distinct cell-cell edge and cell-background edge classes. Returns: numpy.array: one-hot encoded tensor of masks: if not separate_edge_classes: [cell_edge, cell_interior, background] otherwise: [bg_cell_edge, cell_cell_edge, cell_interior, background] """ if data_format is None: data_format = K.image_data_format() if data_format == 'channels_first': channel_axis = 1 else: channel_axis = len(mask.shape) - 1 mask = np.squeeze(mask, axis=channel_axis) # Detect the edges and interiors new_masks = np.zeros(mask.shape) edges = np.zeros(mask.shape) strel = ball(1) if mask.ndim > 3 else disk(1) for cell_label in np.unique(mask): if cell_label != 0: for i in range(mask.shape[0]): # get the cell interior img = mask[i] == cell_label img = binary_erosion(img, strel) new_masks[i] += img interiors = np.multiply(new_masks, mask) edges = (mask - interiors > 0).astype('int') interiors = (interiors > 0).astype('int') if not separate_edge_classes: if dilation_radius: dil_strel = ball(dilation_radius) if mask.ndim > 3 else disk( dilation_radius) # Thicken cell edges to be more pronounced for i in range(edges.shape[0]): edges[i] = binary_dilation(edges[i], selem=dil_strel) # Thin the augmented edges by subtracting the interior features. edges = (edges - interiors > 0).astype('int') background = (1 - edges - interiors > 0) background = background.astype('int') all_stacks = [edges, interiors, background] return np.stack(all_stacks, axis=channel_axis) # dilate the background masks and subtract from all edges for background-edges dilated_background = np.zeros(mask.shape) for i in range(mask.shape[0]): background = (mask[i] == 0).astype('int') dilated_background[i] = binary_dilation(background, strel) background_edges = (edges - dilated_background > 0).astype('int') # edges that are not background-edges are interior-edges interior_edges = (edges - background_edges > 0).astype('int') if dilation_radius: dil_strel = ball(dilation_radius) if mask.ndim > 3 else disk( dilation_radius) # Thicken cell edges to be more pronounced for i in range(edges.shape[0]): interior_edges[i] = binary_dilation(interior_edges[i], selem=dil_strel) background_edges[i] = binary_dilation(background_edges[i], selem=dil_strel) # Thin the augmented edges by subtracting the interior features. interior_edges = (interior_edges - interiors > 0).astype('int') background_edges = (background_edges - interiors > 0).astype('int') background = (1 - background_edges - interior_edges - interiors > 0) background = background.astype('int') all_stacks = [background_edges, interior_edges, interiors, background] return np.stack(all_stacks, axis=channel_axis)
def train_model_siamese_daughter(model, dataset, expt='', test_size=.2, n_epoch=100, batch_size=1, num_gpus=None, crop_dim=32, min_track_length=1, neighborhood_scale_size=10, features=None, optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True), log_dir='/data/tensorboard_logs', model_dir='/data/models', model_name=None, focal=False, gamma=0.5, lr_sched=rate_scheduler(lr=0.01, decay=0.95), rotation_range=0, flip=True, shear=0, zoom_range=0, seed=0, **kwargs): is_channels_first = K.image_data_format() == 'channels_first' if model_name is None: todays_date = datetime.datetime.now().strftime('%Y-%m-%d') data_name = os.path.splitext(os.path.basename(dataset))[0] model_name = '{}_{}_[{}]_neighs={}_epochs={}_seed={}_{}'.format( todays_date, data_name, ','.join(f[0] for f in sorted(features)), neighborhood_scale_size, n_epoch, seed, expt) model_path = os.path.join(model_dir, '{}.h5'.format(model_name)) loss_path = os.path.join(model_dir, '{}.npz'.format(model_name)) print('training on dataset:', dataset) print('saving model at:', model_path) print('saving loss at:', loss_path) train_dict, val_dict = get_data(dataset, mode='siamese_daughters', seed=seed, test_size=test_size) # the data, shuffled and split between train and test sets print('X_train shape:', train_dict['X'].shape) print('y_train shape:', train_dict['y'].shape) print('X_test shape:', val_dict['X'].shape) print('y_test shape:', val_dict['y'].shape) print('Output Shape:', model.layers[-1].output_shape) n_classes = model.layers[-1].output_shape[1 if is_channels_first else -1] def loss_function(y_true, y_pred): if focal: return losses.weighted_focal_loss(y_true, y_pred, gamma=gamma, n_classes=n_classes, from_logits=False) return losses.weighted_categorical_crossentropy(y_true, y_pred, n_classes=n_classes, from_logits=False) if num_gpus is None: num_gpus = train_utils.count_gpus() if num_gpus >= 2: batch_size = batch_size * num_gpus model = train_utils.MultiGpuModel(model, num_gpus) print('Training on {} GPUs'.format(num_gpus)) model.compile(loss=loss_function, optimizer=optimizer, metrics=['accuracy']) print('Using real-time data augmentation.') # this will do preprocessing and realtime data augmentation datagen = image_generators.SiameseDataGenerator( rotation_range=rotation_range, shear_range=shear, zoom_range=zoom_range, horizontal_flip=flip, vertical_flip=flip) datagen_val = image_generators.SiameseDataGenerator( rotation_range=0, zoom_range=0, shear_range=0, horizontal_flip=0, vertical_flip=0) total_train_pairs = tracking_utils.count_pairs(train_dict['y'], same_probability=5.0) total_test_pairs = tracking_utils.count_pairs(val_dict['y'], same_probability=5.0) # total_train_pairs = tracking_utils.count_pairs(train_dict['y'], same_probability=0.5) # total_test_pairs = tracking_utils.count_pairs(val_dict['y'], same_probability=0.5) train_data = datagen.flow( train_dict, seed=seed, crop_dim=crop_dim, batch_size=batch_size, min_track_length=min_track_length, neighborhood_scale_size=neighborhood_scale_size, features=features) val_data = datagen_val.flow( val_dict, seed=seed, crop_dim=crop_dim, batch_size=batch_size, min_track_length=min_track_length, neighborhood_scale_size=neighborhood_scale_size, features=features) print('total_train_pairs:', total_train_pairs) print('total_test_pairs:', total_test_pairs) print('batch size:', batch_size) print('validation_steps: ', total_test_pairs // batch_size) train_callbacks = get_callbacks( model_path, lr_sched=lr_sched, tensorboard_log_dir=log_dir, save_weights_only=num_gpus >= 2, monitor='val_loss', verbose=1) # fit the model on the batches generated by datagen.flow() loss_history = model.fit_generator( train_data, steps_per_epoch=total_train_pairs // batch_size, epochs=n_epoch, validation_data=val_data, validation_steps=total_test_pairs // batch_size, callbacks=train_callbacks) model.save_weights(model_path) np.savez(loss_path, loss_history=loss_history.history) return model
def train_model_sample(model, dataset, expt='', test_size=.2, n_epoch=10, batch_size=32, num_gpus=None, transform=None, window_size=None, balance_classes=True, max_class_samples=None, log_dir='/data/tensorboard_logs', model_dir='/data/models', model_name=None, focal=False, gamma=0.5, optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True), lr_sched=rate_scheduler(lr=0.01, decay=0.95), rotation_range=0, flip=False, shear=0, zoom_range=0, seed=0, **kwargs): """Train a model using sample mode. Args: model (tensorflow.keras.Model): The model to train. dataset (str): Path to a dataset to train the model with. expt (str): Experiment, substring to include in model name. test_size (float): Percent of data to leave as test data. n_epoch (int): Number of training epochs. batch_size (int): Number of batches per training step. num_gpus (int): The number of GPUs to train on. transform (str): Defines the transformation of the training data. One of 'watershed', 'fgbg', 'pixelwise'. window_size (tuple(int, int)): Size of sampling window balance_classes (bool): Whether to perform class-balancing on data max_class_samples (int): Maximum number of examples per class to sample log_dir (str): Filepath to save tensorboard logs. If None, disables the tensorboard callback. model_dir (str): Directory to save the model file. model_name (str): Name of the model (and name of output file). focal (bool): If true, uses focal loss. gamma (float): Parameter for focal loss optimizer (object): Pre-initialized optimizer object (SGD, Adam, etc.) lr_sched (function): Learning rate schedular function rotation_range (int): Maximum rotation range for image augmentation flip (bool): Enables horizontal and vertical flipping for augmentation shear (int): Maximum rotation range for image augmentation zoom_range (tuple): Minimum and maximum zoom values (0.8, 1.2) seed (int): Random seed kwargs (dict): Other parameters to pass to _transform_masks Returns: tensorflow.keras.Model: The trained model """ is_channels_first = K.image_data_format() == 'channels_first' if model_name is None: todays_date = datetime.datetime.now().strftime('%Y-%m-%d') data_name = os.path.splitext(os.path.basename(dataset))[0] model_name = '{}_{}_{}'.format(todays_date, data_name, expt) model_path = os.path.join(model_dir, '{}.h5'.format(model_name)) loss_path = os.path.join(model_dir, '{}.npz'.format(model_name)) # train_dict, test_dict = get_data(dataset, test_size=test_size, seed=seed) train_dict, test_dict = get_data_from_path(dataset, patch_crop=False) n_classes = model.layers[-1].output_shape[1 if is_channels_first else -1] # the data, shuffled and split between train and test sets print('X_train shape:', train_dict['X'].shape) print('y_train shape:', train_dict['y'].shape) print('X_test shape:', test_dict['X'].shape) print('y_test shape:', test_dict['y'].shape) print('Output Shape:', model.layers[-1].output_shape) print('Number of Classes:', n_classes) def loss_function(y_true, y_pred): if isinstance(transform, str) and transform.lower() == 'disc': return losses.discriminative_instance_loss(y_true, y_pred) if focal: return losses.weighted_focal_loss( y_true, y_pred, gamma=gamma, n_classes=n_classes) return losses.weighted_categorical_crossentropy( y_true, y_pred, n_classes=n_classes) if num_gpus is None: num_gpus = train_utils.count_gpus() if num_gpus >= 2: batch_size = batch_size * num_gpus model = train_utils.MultiGpuModel(model, num_gpus) print('Training on {} GPUs'.format(num_gpus)) model.compile(loss=loss_function, optimizer=optimizer, metrics=['accuracy']) if train_dict['X'].ndim == 4: DataGenerator = image_generators.SampleDataGenerator window_size = window_size if window_size else (30, 30) elif train_dict['X'].ndim == 5: DataGenerator = image_generators.SampleMovieDataGenerator window_size = window_size if window_size else (30, 30, 3) else: raise ValueError('Expected `X` to have ndim 4 or 5. Got', train_dict['X'].ndim) # this will do preprocessing and realtime data augmentation datagen = DataGenerator( rotation_range=rotation_range, shear_range=shear, zoom_range=zoom_range, horizontal_flip=flip, vertical_flip=flip) # no validation augmentation datagen_val = DataGenerator( rotation_range=0, shear_range=0, zoom_range=0, horizontal_flip=0, vertical_flip=0) train_data = datagen.flow( train_dict, seed=seed, batch_size=batch_size, transform=transform, transform_kwargs=kwargs, window_size=window_size, balance_classes=balance_classes, max_class_samples=max_class_samples) # save_to_dir='./training_data', # save_prefix='t', # save_format='tif') val_data = datagen_val.flow( test_dict, seed=seed, batch_size=batch_size, transform=transform, transform_kwargs=kwargs, window_size=window_size, balance_classes=False, max_class_samples=max_class_samples) train_callbacks = get_callbacks( model_path, lr_sched=lr_sched, tensorboard_log_dir=log_dir, save_weights_only=num_gpus >= 2, monitor='val_loss', verbose=1) # fit the model on the batches generated by datagen.flow() loss_history = model.fit_generator( train_data, steps_per_epoch=train_data.y.shape[0] // batch_size, epochs=n_epoch, validation_data=val_data, validation_steps=val_data.y.shape[0] // batch_size, callbacks=train_callbacks) np.savez(loss_path, loss_history=loss_history.history) return model
def train_model_retinanet(model, dataset, expt='', test_size=.2, n_epoch=10, batch_size=1, num_gpus=None, include_masks=False, panoptic=False, panoptic_weight=0.1, transforms=['watershed'], transforms_kwargs={}, anchor_params=None, pyramid_levels=['P3', 'P4', 'P5', 'P6', 'P7'], min_objects=3, mask_size=(28, 28), optimizer=SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True), log_dir='/data/tensorboard_logs', model_dir='/data/models', model_name=None, sigma=3.0, alpha=0.25, gamma=2.0, score_threshold=0.01, iou_threshold=0.5, max_detections=100, weighted_average=True, lr_sched=rate_scheduler(lr=0.01, decay=0.95), rotation_range=0, flip=True, shear=0, zoom_range=0, compute_map=True, seed=0, semantic_only=False, **kwargs): """Train a RetinaNet model from the given backbone. Adapted from: https://github.com/fizyr/keras-retinanet & https://github.com/fizyr/keras-maskrcnn Args: model (tensorflow.keras.Model): The model to train. dataset (str): Path to a dataset to train the model with. expt (str): Experiment, substring to include in model name. test_size (float): Percent of data to leave as test data. n_epoch (int): Number of training epochs. batch_size (int): Number of batches per training step. num_gpus (int): The number of GPUs to train on. include_masks (bool): Whether to generate masks using MaskRCNN. panoptic (bool): Whether to include semantic segmentation heads. panoptic_weight (float): Weight applied to the semantic loss. transforms (list): List of transform names as strings. Each transform will have its own semantic segmentation head. transforms_kwargs (list): List of dicts of optional values for each transform in transforms. anchor_params (AnchorParameters): Struct containing anchor parameters. If None, default values are used. pyramid_levels (list): Pyramid levels to attach the object detection heads to. min_objects (int): If a training image has fewer than min_objects objects, the image will not be used for training. mask_size (tuple): The size of the masks. log_dir (str): Filepath to save tensorboard logs. If None, disables the tensorboard callback. model_dir (str): Directory to save the model file. model_name (str): Name of the model (and name of output file). sigma (float): The point where the loss changes from L2 to L1. alpha (float): Scale the focal weight with alpha. gamma (float): Take the power of the focal weight with gamma. iou_threshold (float): The threshold used to consider when a detection is positive or negative. score_threshold (float): The score confidence threshold to use for detections. max_detections (int): The maximum number of detections to use per image weighted_average (bool): Use a weighted average in evaluation. optimizer (object): Pre-initialized optimizer object (SGD, Adam, etc.) lr_sched (function): Learning rate schedular function rotation_range (int): Maximum rotation range for image augmentation flip (bool): Enables horizontal and vertical flipping for augmentation shear (int): Maximum rotation range for image augmentation zoom_range (tuple): Minimum and maximum zoom values (0.8, 1.2) seed (int): Random seed compute_map (bool): Whether to compute mAP at end of training. kwargs (dict): Other parameters to pass to _transform_masks Returns: tensorflow.keras.Model: The trained model """ is_channels_first = K.image_data_format() == 'channels_first' if model_name is None: todays_date = datetime.datetime.now().strftime('%Y-%m-%d') data_name = os.path.splitext(os.path.basename(dataset))[0] model_name = '{}_{}_{}'.format(todays_date, data_name, expt) model_path = os.path.join(model_dir, '{}.h5'.format(model_name)) loss_path = os.path.join(model_dir, '{}.npz'.format(model_name)) train_dict, test_dict = get_data(dataset, seed=seed, test_size=test_size) channel_axis = 1 if is_channels_first else -1 n_classes = model.layers[-1].output_shape[channel_axis] if panoptic: n_semantic_classes = [layer.output_shape[channel_axis] for layer in model.layers if 'semantic' in layer.name] else: n_semantic_classes = [] # the data, shuffled and split between train and test sets print('X_train shape:', train_dict['X'].shape) print('y_train shape:', train_dict['y'].shape) print('X_test shape:', test_dict['X'].shape) print('y_test shape:', test_dict['y'].shape) print('Output Shape:', model.layers[-1].output_shape) print('Number of Classes:', n_classes) if num_gpus is None: num_gpus = train_utils.count_gpus() if num_gpus >= 1e6: batch_size = batch_size * num_gpus model = train_utils.MultiGpuModel(model, num_gpus) print('Training on {} GPUs'.format(num_gpus)) # evaluation of model is done on `retinanet_bbox` if include_masks: prediction_model = model else: prediction_model = retinanet_bbox( model, nms=True, anchor_params=anchor_params, num_semantic_heads=len(n_semantic_classes), panoptic=panoptic, class_specific_filter=False) retinanet_losses = losses.RetinaNetLosses(sigma=sigma, alpha=alpha, gamma=gamma, iou_threshold=iou_threshold, mask_size=mask_size) def semantic_loss(n_classes): def _semantic_loss(y_pred, y_true): if n_classes > 1: return panoptic_weight * losses.weighted_categorical_crossentropy( y_pred, y_true, n_classes=n_classes) return panoptic_weight * MSE(y_pred, y_true) return _semantic_loss loss = { 'regression': retinanet_losses.regress_loss, 'classification': retinanet_losses.classification_loss } if include_masks: loss['masks'] = retinanet_losses.mask_loss if panoptic: # Give losses for all of the semantic heads for layer in model.layers: if 'semantic' in layer.name: n_classes = layer.output_shape[channel_axis] loss[layer.name] = semantic_loss(n_classes) model.compile(loss=loss, optimizer=optimizer) if num_gpus >= 2: # Each GPU must have at least one validation example if test_dict['y'].shape[0] < num_gpus: raise ValueError('Not enough validation data for {} GPUs. ' 'Received {} validation sample.'.format( test_dict['y'].shape[0], num_gpus)) # When using multiple GPUs and skip_connections, # the training data must be evenly distributed across all GPUs num_train = train_dict['y'].shape[0] nb_samples = num_train - num_train % batch_size if nb_samples: train_dict['y'] = train_dict['y'][:nb_samples] train_dict['X'] = train_dict['X'][:nb_samples] # this will do preprocessing and realtime data augmentation datagen = image_generators.RetinaNetGenerator( # fill_mode='constant', # for rotations rotation_range=rotation_range, shear_range=shear, zoom_range=zoom_range, horizontal_flip=flip, vertical_flip=flip) datagen_val = image_generators.RetinaNetGenerator( # fill_mode='constant', # for rotations rotation_range=0, shear_range=0, zoom_range=0, horizontal_flip=0, vertical_flip=0) # if 'vgg' in backbone or 'densenet' in backbone: # compute_shapes = make_shapes_callback(model) # else: # compute_shapes = guess_shapes compute_shapes = guess_shapes train_data = datagen.flow( train_dict, seed=seed, include_masks=include_masks, panoptic=panoptic, transforms=transforms, transforms_kwargs=transforms_kwargs, pyramid_levels=pyramid_levels, min_objects=min_objects, anchor_params=anchor_params, compute_shapes=compute_shapes, semantic_only=semantic_only, batch_size=batch_size) val_data = datagen_val.flow( test_dict, seed=seed, include_masks=include_masks, panoptic=panoptic, transforms=transforms, transforms_kwargs=transforms_kwargs, pyramid_levels=pyramid_levels, min_objects=min_objects, anchor_params=anchor_params, compute_shapes=compute_shapes, semantic_only=semantic_only, batch_size=batch_size) train_callbacks = get_callbacks( model_path, lr_sched=lr_sched, tensorboard_log_dir=log_dir, save_weights_only=num_gpus >= 2, monitor='val_loss', verbose=1) eval_callback = RedirectModel( Evaluate(val_data, iou_threshold=iou_threshold, score_threshold=score_threshold, max_detections=max_detections, tensorboard=train_callbacks[-1] if log_dir else None, weighted_average=weighted_average), prediction_model) train_callbacks.append(eval_callback) # fit the model on the batches generated by datagen.flow() loss_history = model.fit_generator( train_data, steps_per_epoch=train_data.y.shape[0] // batch_size, epochs=n_epoch, validation_data=val_data, validation_steps=val_data.y.shape[0] // batch_size, callbacks=train_callbacks) model.save_weights(model_path) np.savez(loss_path, loss_history=loss_history.history) if compute_map: average_precisions = evaluate( val_data, prediction_model, iou_threshold=iou_threshold, score_threshold=score_threshold, max_detections=max_detections, ) # print evaluation total_instances = [] precisions = [] for label, (average_precision, num_annotations) in average_precisions.items(): print('{:.0f} instances of class'.format(num_annotations), label, 'with average precision: {:.4f}'.format(average_precision)) total_instances.append(num_annotations) precisions.append(average_precision) if sum(total_instances) == 0: print('No test instances found.') else: print('mAP using the weighted average of precisions among classes: {:.4f}'.format( sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances))) print('mAP: {:.4f}'.format(sum(precisions) / sum(x > 0 for x in total_instances))) return model
def block3(x, filters, kernel_size=3, stride=1, groups=32, conv_shortcut=True, name=None): """A residual block. Args: x: input tensor. filters: integer, filters of the bottleneck layer. kernel_size: default 3, kernel size of the bottleneck layer. stride: default 1, stride of the first layer. groups: default 32, group size for grouped convolution. conv_shortcut: default True, use convolution shortcut if True, otherwise identity shortcut. name: string, block label. Returns: Output tensor for the residual block. """ bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 if conv_shortcut: shortcut = layers.Conv2D((64 // groups) * filters, 1, strides=stride, use_bias=False, name=name + '_0_conv')(x) shortcut = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_0_bn')(shortcut) else: shortcut = x x = layers.Conv2D(filters, 1, use_bias=False, name=name + '_1_conv')(x) x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_1_bn')(x) x = layers.Activation('relu', name=name + '_1_relu')(x) c = filters // groups x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x) x = layers.DepthwiseConv2D(kernel_size, strides=stride, depth_multiplier=c, use_bias=False, name=name + '_2_conv')(x) x_shape = backend.shape(x)[:-1] x = backend.reshape(x, backend.concatenate([x_shape, (groups, c, c)])) x = layers.Lambda(lambda x: sum(x[:, :, :, :, i] for i in range(c)), name=name + '_2_reduce')(x) x = backend.reshape(x, backend.concatenate([x_shape, (filters, )])) x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_2_bn')(x) x = layers.Activation('relu', name=name + '_2_relu')(x) x = layers.Conv2D((64 // groups) * filters, 1, use_bias=False, name=name + '_3_conv')(x) x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name=name + '_3_bn')(x) x = layers.Add(name=name + '_add')([shortcut, x]) x = layers.Activation('relu', name=name + '_out')(x) return x
def ResNet50(num_classes): """Instantiates the ResNet50 architecture. Args: num_classes: `int` number of classes for image classification. Returns: A Keras model instance. """ # Determine proper input shape if backend.image_data_format() == 'channels_first': input_shape = (3, 224, 224) bn_axis = 1 else: input_shape = (224, 224, 3) bn_axis = 3 img_input = layers.Input(shape=input_shape) x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input) x = layers.Conv2D(64, (7, 7), use_bias=False, strides=(2, 2), padding='valid', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name='conv1')(x) x = layers.BatchNormalization(axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, name='bn_conv1')(x) x = layers.Activation('relu')(x) x = layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') x = layers.GlobalAveragePooling2D(name='avg_pool')(x) x = layers.Dense(num_classes, activation='softmax', kernel_initializer=initializers.RandomNormal(stddev=0.01), kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name='fc1000')(x) # Create model. return models.Model(img_input, x, name='resnet50')
def ResNet(stack_fn, preact, use_bias, model_name='resnet', include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000, classifier_activation='softmax', **kwargs): """Instantiates the ResNet, ResNetV2, and ResNeXt architecture. Reference: - [Deep Residual Learning for Image Recognition]( https://arxiv.org/abs/1512.03385) (CVPR 2015) Optionally loads weights pre-trained on ImageNet. Note that the data format convention used by the model is the one specified in your Keras config at `~/.keras/keras.json`. Args: stack_fn: a function that returns output tensor for the stacked residual blocks. preact: whether to use pre-activation or not (True for ResNetV2, False for ResNet and ResNeXt). use_bias: whether to use biases for convolutional layers or not (True for ResNet and ResNetV2, False for ResNeXt). model_name: string, model name. include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 224)` (with `channels_first` data format). It should have exactly 3 inputs channels. pooling: optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. **kwargs: For backwards compatibility only. Returns: A `keras.Model` instance. Raises: ValueError: in case of invalid argument for `weights`, or invalid input shape. ValueError: if `classifier_activation` is not `softmax` or `None` when using a pretrained top layer. """ global layers if 'layers' in kwargs: layers = kwargs.pop('layers') else: layers = VersionAwareLayers() if kwargs: raise ValueError('Unknown argument(s): %s' % (kwargs, )) if not (weights in {'imagenet', None} or file_io.file_exists_v2(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top`' ' as true, `classes` should be 1000') # Determine proper input shape input_shape = imagenet_utils.obtain_input_shape( input_shape, default_size=224, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)), name='conv1_pad')(img_input) x = layers.Conv2D(64, 7, strides=2, use_bias=use_bias, name='conv1_conv')(x) if not preact: x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name='conv1_bn')(x) x = layers.Activation('relu', name='conv1_relu')(x) x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name='pool1_pad')(x) x = layers.MaxPooling2D(3, strides=2, name='pool1_pool')(x) x = stack_fn(x) if preact: x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, name='post_bn')(x) x = layers.Activation('relu', name='post_relu')(x) if include_top: x = layers.GlobalAveragePooling2D(name='avg_pool')(x) imagenet_utils.validate_activation(classifier_activation, weights) x = layers.Dense(classes, activation=classifier_activation, name='predictions')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D(name='avg_pool')(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D(name='max_pool')(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = layer_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = training.Model(inputs, x, name=model_name) # Load weights. if (weights == 'imagenet') and (model_name in WEIGHTS_HASHES): if include_top: file_name = model_name + '_weights_tf_dim_ordering_tf_kernels.h5' file_hash = WEIGHTS_HASHES[model_name][0] else: file_name = model_name + '_weights_tf_dim_ordering_tf_kernels_notop.h5' file_hash = WEIGHTS_HASHES[model_name][1] weights_path = data_utils.get_file(file_name, BASE_WEIGHTS_PATH + file_name, cache_subdir='models', file_hash=file_hash) model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) return model
def VGG16_Places365(include_top=True, weights='places', input_tensor=None, input_shape=None, pooling=None, classes=365): """Instantiates the VGG16-places365 architecture. Optionally loads weights pre-trained on Places. Note that when using TensorFlow, for best performance you should set `image_data_format="channels_last"` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. # Arguments include_top: whether to include the 3 fully-connected layers at the top of the network. weights: one of `None` (random initialization), 'places' (pre-training on Places), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 244)` (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 48. E.g. `(200, 200, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape """ if not (weights in {'places', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `places` ' '(pre-training on Places), ' 'or the path to the weights file to be loaded.') if weights == 'places' and include_top and classes != 365: raise ValueError('If using `weights` as places with `include_top`' ' as true, `classes` should be 365') # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=48, data_format=K.image_data_format(), require_flatten=include_top) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor # Block 1 x = Conv2D(filters=64, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block1_conv1')(img_input) x = Conv2D(filters=64, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block1_conv2')(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name="block1_pool", padding='valid')(x) # Block 2 x = Conv2D(filters=128, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block2_conv1')(x) x = Conv2D(filters=128, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block2_conv2')(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name="block2_pool", padding='valid')(x) # Block 3 x = Conv2D(filters=256, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block3_conv1')(x) x = Conv2D(filters=256, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block3_conv2')(x) x = Conv2D(filters=256, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block3_conv3')(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name="block3_pool", padding='valid')(x) # Block 4 x = Conv2D(filters=512, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block4_conv1')(x) x = Conv2D(filters=512, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block4_conv2')(x) x = Conv2D(filters=512, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block4_conv3')(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name="block4_pool", padding='valid')(x) # Block 5 x = Conv2D(filters=512, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block5_conv1')(x) x = Conv2D(filters=512, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block5_conv2')(x) x = Conv2D(filters=512, kernel_size=3, strides=(1, 1), padding='same', kernel_regularizer=l2(0.0002), activation='relu', name='block5_conv3')(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name="block5_pool", padding='valid')(x) if include_top: # Classification block x = Flatten(name='flatten')(x) x = Dense(4096, activation='relu', name='fc1')(x) x = Dropout(0.5, name='drop_fc1')(x) x = Dense(4096, activation='relu', name='fc2')(x) x = Dropout(0.5, name='drop_fc2')(x) x = Dense(365, activation='softmax', name="predictions")(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = Model(inputs, x, name='vgg16-places365') # load weights if weights == 'places': if include_top: weights_path = get_file( 'vgg16-places365_weights_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH, cache_subdir='models') else: weights_path = get_file( 'vgg16-places365_weights_tf_dim_ordering_tf_kernels_notop.h5', WEIGHTS_PATH_NO_TOP, cache_subdir='models') model.load_weights(weights_path) if K.backend() == 'theano': layer_utils.convert_all_kernels_in_model(model) if K.image_data_format() == 'channels_first': if include_top: maxpool = model.get_layer(name='block5_pool') shape = maxpool.output_shape[1:] dense = model.get_layer(name='fc1') layer_utils.convert_dense_weights_data_format( dense, shape, 'channels_first') if K.backend() == 'tensorflow': warnings.warn('You are using the TensorFlow backend, yet you ' 'are using the Theano ' 'image data format convention ' '(`image_data_format="channels_first"`). ' 'For best performance, set ' '`image_data_format="channels_last"` in ' 'your Keras config ' 'at ~/.keras/keras.json.') elif weights is not None: model.load_weights(weights) return model
def inception_resnet_block(x, scale, block_type, block_idx, activation='relu'): """Adds an Inception-ResNet block. This function builds 3 types of Inception-ResNet blocks mentioned in the paper, controlled by the `block_type` argument (which is the block name used in the official TF-slim implementation): - Inception-ResNet-A: `block_type='block35'` - Inception-ResNet-B: `block_type='block17'` - Inception-ResNet-C: `block_type='block8'` Args: x: input tensor. scale: scaling factor to scale the residuals (i.e., the output of passing `x` through an inception module) before adding them to the shortcut branch. Let `r` be the output from the residual branch, the output of this block will be `x + scale * r`. block_type: `'block35'`, `'block17'` or `'block8'`, determines the network structure in the residual branch. block_idx: an `int` used for generating layer names. The Inception-ResNet blocks are repeated many times in this network. We use `block_idx` to identify each of the repetitions. For example, the first Inception-ResNet-A block will have `block_type='block35', block_idx=0`, and the layer names will have a common prefix `'block35_0'`. activation: activation function to use at the end of the block (see [activations](../activations.md)). When `activation=None`, no activation is applied (i.e., "linear" activation: `a(x) = x`). Returns: Output tensor for the block. Raises: ValueError: if `block_type` is not one of `'block35'`, `'block17'` or `'block8'`. """ if block_type == 'block35': branch_0 = conv2d_bn(x, 32, 1) branch_1 = conv2d_bn(x, 32, 1) branch_1 = conv2d_bn(branch_1, 32, 3) branch_2 = conv2d_bn(x, 32, 1) branch_2 = conv2d_bn(branch_2, 48, 3) branch_2 = conv2d_bn(branch_2, 64, 3) branches = [branch_0, branch_1, branch_2] elif block_type == 'block17': branch_0 = conv2d_bn(x, 192, 1) branch_1 = conv2d_bn(x, 128, 1) branch_1 = conv2d_bn(branch_1, 160, [1, 7]) branch_1 = conv2d_bn(branch_1, 192, [7, 1]) branches = [branch_0, branch_1] elif block_type == 'block8': branch_0 = conv2d_bn(x, 192, 1) branch_1 = conv2d_bn(x, 192, 1) branch_1 = conv2d_bn(branch_1, 224, [1, 3]) branch_1 = conv2d_bn(branch_1, 256, [3, 1]) branches = [branch_0, branch_1] else: raise ValueError('Unknown Inception-ResNet block type. ' 'Expects "block35", "block17" or "block8", ' 'but got: ' + str(block_type)) block_name = block_type + '_' + str(block_idx) channel_axis = 1 if backend.image_data_format() == 'channels_first' else 3 mixed = layers.Concatenate(axis=channel_axis, name=block_name + '_mixed')(branches) up = conv2d_bn(mixed, backend.int_shape(x)[channel_axis], 1, activation=None, use_bias=True, name=block_name + '_conv') x = layers.Lambda(lambda inputs, scale: inputs[0] + inputs[1] * scale, output_shape=backend.int_shape(x)[1:], arguments={'scale': scale}, name=block_name)([x, up]) if activation is not None: x = layers.Activation(activation, name=block_name + '_ac')(x) return x
def semantic_prediction(semantic_names, semantic_features, target_level=0, input_target=None, n_filters=64, n_dense=64, ndim=2, n_classes=3, semantic_id=0): """Creates the prediction head from a list of semantic features Args: semantic_names (list): A list of the names of the semantic feature layers semantic_features (list): A list of semantic features NOTE: The semantic_names and semantic features should be in decreasing order e.g. [Q6, Q5, Q4, ...] target_level (int): (Optional) The level we need to reach. Performs 2x upsampling until we're at the target level. input_target (tensor): Optional tensor with the input image. n_filters (int): The number of filters for the 3x3 convolution. n_dense (int): The number of filters for dense layers. ndim (int): The spatial dimensions of the input data. Default is 2, but it also works with 3. n_classes (int): The number of classes to be predicted. semantic_id (int): Defaults to 0. A number to name the final layer. Allows for multiple semantic heads. Returns: tensor: The softmax prediction for the semantic segmentation head Raises: ValueError: ndim is not 2 or 3 """ acceptable_ndims = [2, 3] if ndim not in acceptable_ndims: raise ValueError('Only 2 and 3 dimensional networks are supported') if K.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = -1 # Add all the semantic layers semantic_sum = semantic_features[0] for semantic_feature in semantic_features[1:]: semantic_sum = Add()([semantic_sum, semantic_feature]) # Final upsampling min_level = int(re.findall(r'\d+', semantic_names[-1])[0]) n_upsample = min_level - target_level x = semantic_upsample(semantic_sum, n_upsample, target=input_target, ndim=ndim) # First tensor product x = TensorProduct(n_dense)(x) x = BatchNormalization(axis=channel_axis)(x) x = Activation('relu')(x) # Apply tensor product and softmax layer if n_classes > 1: x = TensorProduct(n_classes)(x) x = Softmax(axis=channel_axis, name='semantic_{}'.format(semantic_id))(x) else: # n_classes == 1 x = TensorProduct(n_classes)(x) x = Activation('relu', name='semantic_{}'.format(semantic_id))(x) return x