def _shortcut(input_tensor, residual, norm): """Computes the output of a shortcut block between an input and residual. More specifically, this block takes `input` and adds it to `residual`. If `input` is not the same shape as `residual`, then we first apply an appropriately-sized convolutional layer to alter its shape to that of `residual` and normalize via `norm` before adding it to `residual`. Args: input_tensor: The `tf.Tensor` to apply the block to. residual: A `tf.Tensor` added to `input_tensor` after it has been passed through a convolution and normalization. norm: A `NormLayer` specifying the type of normalization layer used. Returns: A `tf.Tensor`. """ input_shape = tf.keras.backend.int_shape(input_tensor) residual_shape = tf.keras.backend.int_shape(residual) if tf.keras.backend.image_data_format() == 'channels_last': row_axis = 1 col_axis = 2 channel_axis = 3 else: channel_axis = 1 row_axis = 2 col_axis = 3 stride_width = int(round(input_shape[row_axis] / residual_shape[row_axis])) stride_height = int(round(input_shape[col_axis] / residual_shape[col_axis])) equal_channels = input_shape[channel_axis] == residual_shape[channel_axis] shortcut = input_tensor # Use a 1-by-1 kernel if the strides are greater than 1, or there the input # and residual tensors have different numbers of channels. if stride_width > 1 or stride_height > 1 or not equal_channels: shortcut = tf.keras.layers.Conv2D( filters=residual_shape[channel_axis], kernel_size=(1, 1), strides=(stride_width, stride_height), padding='valid', use_bias=False, kernel_initializer='he_normal', kernel_regularizer=tf.keras.regularizers.l2(L2_WEIGHT_DECAY))( shortcut) if norm is NormLayer.GROUP_NORM: shortcut = group_norm.GroupNormalization( axis=channel_axis)(shortcut) elif norm is NormLayer.BATCH_NORM: shortcut = tf.keras.layers.BatchNormalization( axis=channel_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON)(shortcut) else: raise ValueError('The norm argument must be of type `NormLayer`.') return tf.keras.layers.add([shortcut, residual])
def _norm_relu(input_tensor, norm): """Applies normalization and ReLU activation to an input tensor. Args: input_tensor: The `tf.Tensor` to apply the block to. norm: A `NormLayer` specifying the type of normalization layer used. Returns: A `tf.Tensor`. """ if tf.keras.backend.image_data_format() == 'channels_last': channel_axis = 3 else: channel_axis = 1 if norm is NormLayer.GROUP_NORM: x = group_norm.GroupNormalization(axis=channel_axis)(input_tensor) elif norm is NormLayer.BATCH_NORM: x = tf.keras.layers.BatchNormalization( axis=channel_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON)(input_tensor) else: raise ValueError('The norm argument must be of type `NormLayer`.') return tf.keras.layers.Activation('relu')(x)
def test_groups_have_mean_0_var_leq_1_on_2d_data(self, input_shape, groups): # After group normalization, component groups should have mean 0 and # variance at most 1 (note that while generally it should be variance 1 # exactly, certain tensors will have smaller variance, potentially even 0. # This test ensures that the output of GroupNorm on 2d tensors has groups # with mean 0 and variance at most 1. model = tf.keras.models.Sequential() norm = group_norm.GroupNormalization(axis=1, groups=groups, input_shape=input_shape) batched_input_shape = (1, ) + input_shape model.add(norm) model.compile(loss='mse', optimizer='sgd') group_size = (input_shape[0] // groups) * input_shape[1] expected_group_means = tf.zeros((1, groups), dtype=tf.float32) # We seed our random input for reproducibility, but this test should # succeed independently of the input to the GroupNorm layer. for random_seed in range(100): np.random.seed(random_seed) x = np.random.normal(loc=2.0, scale=3.0, size=batched_input_shape) out = model.predict(x) reshaped_out = tf.reshape(out, (1, groups, group_size)) group_means = tf.math.reduce_mean(reshaped_out, axis=2) group_variances = tf.math.reduce_variance(reshaped_out, axis=2) self.assertAllClose(group_means, expected_group_means, atol=1e-2) self.assertAllLessEqual(group_variances, 1.0)
def test_layer_applies_normalization_correctly(self): input_shape = (1, 4) reshaped_inputs = tf.constant([[[2.0, 2.0], [3.0, 3.0]]]) layer = group_norm.GroupNormalization(groups=2, axis=1) normalized_input = layer._apply_normalization(reshaped_inputs, input_shape) self.assertAllClose(normalized_input, np.array([[[0.0, 0.0], [0.0, 0.0]]]))
def test_reshape(self, axis, group, input_shape, expected_shape): group_layer = group_norm.GroupNormalization(groups=group, axis=axis) group_layer.build(input_shape) inputs = np.ones(input_shape) tensor_input_shape = tf.convert_to_tensor(input_shape) _, group_shape = group_layer._reshape_into_groups( inputs, (10, 10, 10), tensor_input_shape) self.assertAllEqual(group_shape, expected_shape)
def test_model_with_groupnorm_layer_trains(self, groups): # Check if Axis is working for CONV nets np.random.seed(0) model = tf.keras.models.Sequential() model.add( group_norm.GroupNormalization(axis=1, groups=groups, input_shape=(20, 20, 3))) model.add(tf.keras.layers.Flatten()) model.add(tf.keras.layers.Dense(1, activation='softmax')) model.compile(optimizer=tf.keras.optimizers.SGD(0.01), loss='mse') x = np.random.randint(1000, size=(10, 20, 20, 3)) y = np.random.randint(1000, size=(10, 1)) model.fit(x=x, y=y, epochs=1)
def test_layer_has_no_weights(self): # Check if weights get initialized correctly group_norm_layer = group_norm.GroupNormalization(groups=1) group_norm_layer.build((None, 3, 4)) self.assertEmpty(group_norm_layer.trainable_weights) self.assertEmpty(group_norm_layer.weights)
def inverted_res_block(input_tensor, expansion_factor, stride, filters, alpha, block_number, num_groups=2, dropout_prob=None, expansion_layer=True): """Creates an inverted residual block. Args: input_tensor: A 4D input tensor, with shape (samples, channels, rows, cols) or (samples, rows, cols, channels). expansion_factor: A positive integer that governs (multiplicatively) how many channels are added in the initial expansion layer. stride: A positive integer giving the stride of the depthwise convolutional layer. filters: The base number of filters in the projection layer. alpha: A float multiplier for the number of filters in the projection layer. If set to 1.0, we use the number of filters is given by the`filters` arg. block_number: An integer specifying which inverted residual layer this is. Used only for naming purposes. num_groups: The number of groups to use in the GroupNorm layers. dropout_prob: The probability of setting a weight to zero in the dropout layer. If None, no dropout is used. expansion_layer: Whether to use an initial expansion layer. Returns: A 4D tensor with the same shape as the input tensor. """ if tf.keras.backend.image_data_format() == 'channels_last': row_axis = 1 col_axis = 2 channel_axis = 3 else: channel_axis = 1 row_axis = 2 col_axis = 3 image_shape = (input_tensor.shape[row_axis], input_tensor.shape[col_axis]) num_input_channels = input_tensor.shape[channel_axis] x = input_tensor prefix = 'block_{}_'.format(block_number) if expansion_layer: # We perform an initial pointwise convolution layer. x = tf.keras.layers.Conv2D(expansion_factor * num_input_channels, kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'expand_conv')(x) x = group_norm.GroupNormalization(groups=num_groups, axis=channel_axis, name=prefix + 'expand_gn')(x) if dropout_prob: x = tf.keras.layers.Dropout(dropout_prob, name=prefix + 'expand_dropout')(x) x = tf.keras.layers.ReLU(6.0, name=prefix + 'expand_relu')(x) # We now use depthwise convolutions if stride % 2 == 0: padding = compute_pad(image_shape, 3, enforce_odd=True) x = tf.keras.layers.ZeroPadding2D(padding=padding, name=prefix + 'pad')(x) padding_type = 'same' if stride == 1 else 'valid' x = tf.keras.layers.DepthwiseConv2D(kernel_size=3, strides=stride, activation=None, use_bias=False, padding=padding_type, name=prefix + 'depthwise_conv')(x) x = group_norm.GroupNormalization(groups=num_groups, axis=channel_axis, name=prefix + 'depthwise_gn')(x) if dropout_prob: x = tf.keras.layers.Dropout(dropout_prob, name=prefix + 'depthwise_dropout')(x) x = tf.keras.layers.ReLU(6.0, name=prefix + 'depthwise_relu')(x) # Projection phase, using pointwise convolutions num_projection_filters = _make_divisible(int(filters * alpha), 8) x = tf.keras.layers.Conv2D(num_projection_filters, kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'project_conv')(x) x = group_norm.GroupNormalization(groups=num_groups, axis=channel_axis, name=prefix + 'project_gn')(x) if dropout_prob: x = tf.keras.layers.Dropout(dropout_prob, name=prefix + 'project_dropout')(x) if num_input_channels == num_projection_filters and stride == 1: x = tf.keras.layers.add([input_tensor, x]) return x
def create_mobilenet_v2(input_shape: Tuple[int, int, int], alpha: float = 1.0, pooling: str = 'avg', num_groups: int = 2, dropout_prob: Optional[float] = None, num_classes: int = 1000): """Instantiates a MobileNetV2 model with Group Normalization. Args: input_shape: A tuple of length 3 describing the number of rows, columns, and channels of an input. Can be in channel-first or channel-last format. alpha: A positive float multiplier for the number of filters in the projection pointwise convolutional layers. If set to `1.0`, we recover the default number of filters from the original paper. pooling: A string indicating the pooling mode for the final fully-connected layer. Can be one of 'avg' or 'max'. num_groups: A positive integer indicating number of groups to use in the GroupNorm layers. dropout_prob: An optional float between `0.0` and `1.0` representing the probability of setting a weight to zero in the dropout layer. If `None`, no dropout is used. num_classes: A positive integer indicating the number of output classes. Returns: A `tf.keras.Model`. """ _validate_input_args(input_shape, alpha, pooling, num_groups, dropout_prob, num_classes) if tf.keras.backend.image_data_format() == 'channels_last': row_axis, col_axis = (0, 1) channel_axis = 3 else: row_axis, col_axis = (1, 2) channel_axis = 1 image_shape = (input_shape[row_axis], input_shape[col_axis]) img_input = tf.keras.layers.Input(shape=input_shape) initial_padding = compute_pad(image_shape, 3, enforce_odd=True) x = tf.keras.layers.ZeroPadding2D(initial_padding, name='initial_pad')(img_input) num_filters_first_block = _make_divisible(32 * alpha, 8) x = tf.keras.layers.Conv2D(num_filters_first_block, kernel_size=3, strides=(2, 2), padding='valid', use_bias=False, name='initial_conv')(x) x = group_norm.GroupNormalization(groups=num_groups, axis=channel_axis, name='initial_gn')(x) if dropout_prob: x = tf.keras.layers.Dropout(dropout_prob, name='initial_dropout')(x) x = tf.keras.layers.ReLU(6.0, name='initial_relu')(x) x = inverted_res_block(x, expansion_factor=1, stride=1, filters=16, alpha=alpha, block_number=0, num_groups=num_groups, dropout_prob=dropout_prob, expansion_layer=False) x = inverted_res_block(x, expansion_factor=6, stride=2, filters=24, alpha=alpha, block_number=1, num_groups=num_groups, dropout_prob=dropout_prob) x = inverted_res_block(x, expansion_factor=6, stride=1, filters=24, alpha=alpha, block_number=2, num_groups=num_groups, dropout_prob=dropout_prob) x = inverted_res_block(x, expansion_factor=6, stride=2, filters=32, alpha=alpha, block_number=3, num_groups=num_groups, dropout_prob=dropout_prob) x = inverted_res_block(x, expansion_factor=6, stride=1, filters=32, alpha=alpha, block_number=4, num_groups=num_groups, dropout_prob=dropout_prob) x = inverted_res_block(x, expansion_factor=6, stride=1, filters=32, alpha=alpha, block_number=5, num_groups=num_groups, dropout_prob=dropout_prob) x = inverted_res_block(x, expansion_factor=6, stride=2, filters=64, alpha=alpha, block_number=6, num_groups=num_groups, dropout_prob=dropout_prob) x = inverted_res_block(x, expansion_factor=6, stride=1, filters=64, alpha=alpha, block_number=7, num_groups=num_groups, dropout_prob=dropout_prob) x = inverted_res_block(x, expansion_factor=6, stride=1, filters=64, alpha=alpha, block_number=8, num_groups=num_groups, dropout_prob=dropout_prob) x = inverted_res_block(x, expansion_factor=6, stride=1, filters=64, alpha=alpha, block_number=9, num_groups=num_groups, dropout_prob=dropout_prob) x = inverted_res_block(x, expansion_factor=6, stride=1, filters=96, alpha=alpha, block_number=10, num_groups=num_groups, dropout_prob=dropout_prob) x = inverted_res_block(x, expansion_factor=6, stride=1, filters=96, alpha=alpha, block_number=11, num_groups=num_groups, dropout_prob=dropout_prob) x = inverted_res_block(x, expansion_factor=6, stride=1, filters=96, alpha=alpha, block_number=12, num_groups=num_groups, dropout_prob=dropout_prob) x = inverted_res_block(x, expansion_factor=6, stride=2, filters=160, alpha=alpha, block_number=13, num_groups=num_groups, dropout_prob=dropout_prob) x = inverted_res_block(x, expansion_factor=6, stride=1, filters=160, alpha=alpha, block_number=14, num_groups=num_groups, dropout_prob=dropout_prob) x = inverted_res_block(x, expansion_factor=6, stride=1, filters=160, alpha=alpha, block_number=15, num_groups=num_groups, dropout_prob=dropout_prob) x = inverted_res_block(x, expansion_factor=6, stride=1, filters=320, alpha=alpha, block_number=16, num_groups=num_groups, dropout_prob=dropout_prob) # For the last layer, we do not use alpha < 1. This is to recreate the # non-usage of alpha in the last layer, as stated in the paper. if alpha > 1.0: last_block_filters = _make_divisible(1280 * alpha, 8) else: last_block_filters = 1280 x = tf.keras.layers.Conv2D(last_block_filters, kernel_size=1, use_bias=False, name='last_conv')(x) x = group_norm.GroupNormalization(groups=num_groups, axis=channel_axis, name='last_gn')(x) if dropout_prob: x = tf.keras.layers.Dropout(dropout_prob, name='last_dropout')(x) x = tf.keras.layers.ReLU(6.0, name='last_relu')(x) if pooling == 'avg': x = tf.keras.layers.GlobalAveragePooling2D()(x) elif pooling == 'max': x = tf.keras.layers.GlobalMaxPooling2D()(x) else: raise ValueError( 'Found unexpected pooling argument {}'.format(pooling)) x = tf.keras.layers.Dense(num_classes, activation='softmax', use_bias=True, name='logits')(x) model = tf.keras.models.Model(inputs=img_input, outputs=x) return model