def coarsemask_decoder_net(self, images, is_training=False, batch_norm_relu=nn_ops.BatchNormRelu()): """Coarse mask decoder network architecture. Args: images: A tensor of size [batch, height_in, width_in, channels_in]. is_training: Whether batch_norm layers are in training mode. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). Returns: images: A feature tensor of size [batch, output_size, output_size, num_channels] """ for i in range(self._num_convs): images = tf.layers.conv2d( images, self._num_downsample_channels, kernel_size=(3, 3), bias_initializer=tf.zeros_initializer(), kernel_initializer=tf.random_normal_initializer(stddev=0.01), activation=None, padding='same', name='coarse-class-%d' % i) images = batch_norm_relu(images, is_training=is_training, name='coarse-class-%d-bn' % i) return images
def __init__(self, min_level, max_level, num_classes, anchors_per_location, num_convs=4, num_filters=256, use_separable_conv=False, batch_norm_relu=nn_ops.BatchNormRelu()): """Initialize params to build RetinaNet head. Args: min_level: `int` number of minimum feature level. max_level: `int` number of maximum feature level. num_classes: `int` number of classification categories. anchors_per_location: `int` number of anchors per pixel location. num_convs: `int` number of stacked convolution before the last prediction layer. num_filters: `int` number of filters used in the head architecture. use_separable_conv: `bool` to indicate whether to use separable convoluation. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._min_level = min_level self._max_level = max_level self._num_classes = num_classes self._anchors_per_location = anchors_per_location self._num_convs = num_convs self._num_filters = num_filters self._use_separable_conv = use_separable_conv self._batch_norm_relu = batch_norm_relu
def resample_feature_map(feat, level, target_level, is_training, target_feat_dims=256, conv2d_op=tf.layers.conv2d, batch_norm_relu=nn_ops.BatchNormRelu(), name=None): """Resample input feature map to have target number of channels and width.""" feat_dims = feat.get_shape().as_list()[3] with tf.variable_scope('resample_{}'.format(name)): if feat_dims != target_feat_dims: feat = conv2d_op(feat, filters=target_feat_dims, kernel_size=(1, 1), padding='same') feat = batch_norm_relu(feat, is_training=is_training, relu=False, name='bn') if level < target_level: stride = int(2**(target_level - level)) feat = tf.layers.max_pooling2d(inputs=feat, pool_size=stride, strides=[stride, stride], padding='SAME') elif level > target_level: scale = int(2**(level - target_level)) feat = spatial_transform_ops.nearest_upsampling(feat, scale=scale) return feat
def block_group(inputs, filters, strides, use_projection, block_fn, block_repeats, batch_norm_relu=nn_ops.BatchNormRelu(), dropblock=nn_ops.Dropblock(), drop_connect_rate=None, data_format='channels_last', name=None, is_training=False): """Builds one group of blocks. Args: inputs: a `Tensor` of size `[batch, channels, height, width]`. filters: an `int` number of filters for the first two convolutions. strides: an `int` block stride. If greater than 1, this block will ultimately downsample the input. use_projection: a `bool` for whether this block should use a projection shortcut (versus the default identity shortcut). This is usually `True` for the first block of a block group, which may change the number of filters and the resolution. block_fn: the `function` for the block to use within the model block_repeats: an `int` number of blocks to repeat in the group. batch_norm_relu: an operation that is added after convolutions, including a batch norm layer and an optional relu activation. dropblock: a drop block layer that is added after convluations. Note that the default implementation does not apply any drop block. drop_connect_rate: a 'float' number that specifies the drop connection rate of the block. Note that the default `None` means no drop connection is applied. data_format: a `str` that specifies the data format. name: a `str` name for the Tensor output of the block layer. is_training: a `bool` if True, the model is in training mode. Returns: The output `Tensor` of the block layer. """ # Only the first block per block_group uses projection shortcut and strides. inputs = block_fn(inputs, filters, strides, use_projection=use_projection, batch_norm_relu=batch_norm_relu, dropblock=dropblock, drop_connect_rate=drop_connect_rate, data_format=data_format, is_training=is_training) for _ in range(1, block_repeats): inputs = block_fn(inputs, filters, 1, use_projection=False, batch_norm_relu=batch_norm_relu, dropblock=dropblock, drop_connect_rate=drop_connect_rate, data_format=data_format, is_training=is_training) return tf.identity(inputs, name)
def __init__(self, min_level=3, max_level=7, fpn_feat_dims=256, use_separable_conv=False, batch_norm_relu=nn_ops.BatchNormRelu()): """FPN initialization function. Args: min_level: `int` minimum level in FPN output feature maps. max_level: `int` maximum level in FPN output feature maps. fpn_feat_dims: `int` number of filters in FPN layers. use_separable_conv: `bool`, if True use separable convolution for convolution in FPN layers. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._min_level = min_level self._max_level = max_level self._fpn_feat_dims = fpn_feat_dims if use_separable_conv: self._conv2d_op = functools.partial(tf.layers.separable_conv2d, depth_multiplier=1) else: self._conv2d_op = tf.layers.conv2d self._batch_norm_relu = batch_norm_relu
def __init__(self, num_classes, endpoints_num_filters=0, aggregation='top', dropout_rate=0.0, batch_norm_relu=nn_ops.BatchNormRelu(), data_format='channels_last'): """Initialize params to build classification head. Args: num_classes: the number of classes, including one background class. endpoints_num_filters: the number of filters of the optional embedding layer after the multiscale feature aggregation. If 0, no additional embedding layer is applied. aggregation: the method to aggregate the multiscale feature maps. If `top`, the feature map of the highest level will be directly used. If `all`, all levels will be used by nearest-neighbor upsampling and averaging to the same size as the lowest level (the number of filters for all levels should match). dropout_rate: the dropout rate of the optional dropout layer. If 0.0, no additional dropout layer is applied. batch_norm_relu: an operation that includes a batch normalization layer followed by an optional relu layer. data_format: An optional string from: `channels_last`, `channels_first`. Defaults to `channels_last`. """ self._num_classes = num_classes self._endpoints_num_filters = endpoints_num_filters self._aggregation = aggregation self._dropout_rate = dropout_rate self._batch_norm_relu = batch_norm_relu self._data_format = data_format
def batch_norm_relu_generator(params, activation='relu'): return nn_ops.BatchNormRelu( momentum=params.batch_norm_momentum, epsilon=params.batch_norm_epsilon, trainable=params.batch_norm_trainable, use_sync_bn=params.use_sync_bn, activation=activation)
def __init__(self, resnet_depth, dropblock=nn_ops.Dropblock(), batch_norm_relu=nn_ops.BatchNormRelu(), data_format='channels_last'): """ResNet initialization function. Args: resnet_depth: `int` depth of ResNet backbone model. dropblock: a dropblock layer. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). data_format: `str` either "channels_first" for `[batch, channels, height, width]` or "channels_last for `[batch, height, width, channels]`. """ self._resnet_depth = resnet_depth self._dropblock = dropblock self._batch_norm_relu = batch_norm_relu self._data_format = data_format model_params = { 18: { 'block': self.residual_block, 'layers': [2, 2, 2, 2] }, 34: { 'block': self.residual_block, 'layers': [3, 4, 6, 3] }, 50: { 'block': self.bottleneck_block, 'layers': [3, 4, 6, 3] }, 101: { 'block': self.bottleneck_block, 'layers': [3, 4, 23, 3] }, 152: { 'block': self.bottleneck_block, 'layers': [3, 8, 36, 3] }, 200: { 'block': self.bottleneck_block, 'layers': [3, 24, 36, 3] } } if resnet_depth not in model_params: valid_resnet_depths = ', '.join( [str(depth) for depth in sorted(model_params.keys())]) raise ValueError( 'The resnet_depth should be in [%s]. Not a valid resnet_depth:' % (valid_resnet_depths), self._resnet_depth) params = model_params[resnet_depth] self._resnet_fn = self.resnet_v1_generator(params['block'], params['layers'])
def decoder_net(self, features, is_training=False, batch_norm_relu=nn_ops.BatchNormRelu()): """Fine mask decoder network architecture. Args: features: A tensor of size [batch, height_in, width_in, channels_in]. is_training: Whether batch_norm layers are in training mode. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). Returns: images: A feature tensor of size [batch, output_size, output_size, num_channels], where output size is self._gt_upsample_scale times that of input. """ (batch_size, num_instances, height, width, num_channels) = features.get_shape().as_list() features = tf.reshape( features, [batch_size * num_instances, height, width, num_channels]) for i in range(self._num_convs): features = tf.layers.conv2d( features, self._num_downsample_channels, kernel_size=(3, 3), bias_initializer=tf.zeros_initializer(), kernel_initializer=tf.random_normal_initializer(stddev=0.01), activation=None, padding='same', name='class-%d' % i) features = batch_norm_relu(features, is_training=is_training, name='class-%d-bn' % i) # Predict per-class instance masks. mask_logits = tf.layers.conv2d( features, self._mask_num_classes, kernel_size=(1, 1), # Focal loss bias initialization to have foreground 0.01 probability. bias_initializer=tf.constant_initializer(-np.log((1 - 0.01) / 0.01)), kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.01), padding='same', name='class-predict') mask_logits = tf.reshape( mask_logits, [batch_size, num_instances, height, width, self._mask_num_classes]) return mask_logits
def block_group(inputs, filters, strides, block_fn_cand, block_repeats, activation=tf.nn.swish, batch_norm_relu=nn_ops.BatchNormRelu(), dropblock=nn_ops.Dropblock(), drop_connect_rate=None, data_format='channels_last', name=None, is_training=False): """Creates one group of blocks for SpineNet.""" block_fn_candidates = { 'bottleneck': nn_blocks.bottleneck_block, 'residual': nn_blocks.residual_block, } if block_fn_cand not in block_fn_candidates: raise ValueError( 'Block function {} not implemented.'.format(block_fn_cand)) block_fn = block_fn_candidates[block_fn_cand] _, _, _, num_filters = inputs.get_shape().as_list() if block_fn_cand == 'bottleneck': use_projection = not (num_filters == (filters * 4) and strides == 1) else: use_projection = not (num_filters == filters and strides == 1) # Only the first block per block_group uses projection shortcut and strides. inputs = block_fn(inputs, filters, strides, use_projection=use_projection, activation=activation, batch_norm_relu=batch_norm_relu, dropblock=dropblock, drop_connect_rate=drop_connect_rate, data_format=data_format, is_training=is_training) for _ in range(1, block_repeats): inputs = block_fn(inputs, filters, 1, use_projection=False, activation=activation, batch_norm_relu=batch_norm_relu, dropblock=dropblock, drop_connect_rate=drop_connect_rate, data_format=data_format, is_training=is_training) return tf.identity(inputs, name)
def __init__(self, num_classes, num_convs=0, num_filters=256, use_separable_conv=False, num_fcs=2, fc_dims=1024, use_batch_norm=True, batch_norm_relu=nn_ops.BatchNormRelu()): """Initialize params to build Fast R-CNN box head. Args: num_classes: a integer for the number of classes. num_convs: `int` number that represents the number of the intermediate conv layers before the FC layers. num_filters: `int` number that represents the number of filters of the intermediate conv layers. use_separable_conv: `bool`, indicating whether the separable conv layers is used. num_fcs: `int` number that represents the number of FC layers before the predictions. fc_dims: `int` number that represents the number of dimension of the FC layers. use_batch_norm: 'bool', indicating whether batchnorm layers are added. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._num_classes = num_classes self._num_convs = num_convs self._num_filters = num_filters if use_separable_conv: self._conv2d_op = functools.partial( tf.layers.separable_conv2d, depth_multiplier=1, bias_initializer=tf.zeros_initializer()) else: self._conv2d_op = functools.partial( tf.layers.conv2d, kernel_initializer=tf.keras.initializers.VarianceScaling( scale=2, mode='fan_out', distribution='untruncated_normal'), bias_initializer=tf.zeros_initializer()) self._num_fcs = num_fcs self._fc_dims = fc_dims self._use_batch_norm = use_batch_norm self._batch_norm_relu = batch_norm_relu
def __init__(self, num_classes, mrcnn_resolution, batch_norm_relu=nn_ops.BatchNormRelu()): """Initialize params to build Fast R-CNN head. Args: num_classes: a integer for the number of classes. mrcnn_resolution: a integer that is the resolution of masks. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._num_classes = num_classes self._mrcnn_resolution = mrcnn_resolution self._batch_norm_relu = batch_norm_relu
def __init__(self, num_classes, mlp_head_dim, batch_norm_relu=nn_ops.BatchNormRelu()): """Initialize params to build Fast R-CNN box head. Args: num_classes: a integer for the number of classes. mlp_head_dim: a integer that is the hidden dimension in the fully-connected layers. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._num_classes = num_classes self._mlp_head_dim = mlp_head_dim self._batch_norm_relu = batch_norm_relu
def __init__(self, min_level=3, max_level=7, fpn_feat_dims=256, num_repeats=7, use_separable_conv=False, dropblock=nn_ops.Dropblock(), batch_norm_relu=nn_ops.BatchNormRelu()): """NAS-FPN initialization function. Args: min_level: `int` minimum level in NAS-FPN output feature maps. max_level: `int` maximum level in NAS-FPN output feature maps. fpn_feat_dims: `int` number of filters in FPN layers. num_repeats: number of repeats for feature pyramid network. use_separable_conv: `bool`, if True use separable convolution for convolution in NAS-FPN layers. dropblock: a Dropblock layer. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._min_level = min_level self._max_level = max_level if min_level == 3 and max_level == 7: model_config = [ 3, 1, 1, 3, 3, 0, 1, 5, 4, 0, 0, 6, # Output to level 3. 3, 0, 6, 7, # Output to level 4. 2, 1, 7, 8, # Output to level 5. 0, 1, 6, 9, # Output to level 7. 1, 1, 9, 10] # Output to level 6. else: raise ValueError('The NAS-FPN with min level {} and max level {} ' 'is not supported.'.format(min_level, max_level)) self._config = Config(model_config, self._min_level, self._max_level) self._num_repeats = num_repeats self._fpn_feat_dims = fpn_feat_dims self._use_separable_conv = use_separable_conv self._dropblock = dropblock self._batch_norm_relu = batch_norm_relu self._resample_feature_map = functools.partial( resample_feature_map, target_feat_dims=fpn_feat_dims, batch_norm_relu=batch_norm_relu)
def __init__(self, min_level, max_level, anchors_per_location, num_convs=2, num_filters=256, use_separable_conv=False, use_batch_norm=True, batch_norm_relu=nn_ops.BatchNormRelu()): """Initialize params to build Region Proposal Network head. Args: min_level: `int` number of minimum feature level. max_level: `int` number of maximum feature level. anchors_per_location: `int` number of number of anchors per pixel location. num_convs: `int` number that represents the number of the intermediate conv layers before the prediction. num_filters: `int` number that represents the number of filters of the intermediate conv layers. use_separable_conv: `bool`, indicating whether the separable conv layers is used. use_batch_norm: 'bool', indicating whether batchnorm layers are added. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._min_level = min_level self._max_level = max_level self._anchors_per_location = anchors_per_location self._num_convs = num_convs self._num_filters = num_filters if use_separable_conv: self._conv2d_op = functools.partial( tf.layers.separable_conv2d, depth_multiplier=1, bias_initializer=tf.zeros_initializer()) else: self._conv2d_op = functools.partial( tf.layers.conv2d, kernel_initializer=tf.random_normal_initializer(stddev=0.01), bias_initializer=tf.zeros_initializer()) self._use_batch_norm = use_batch_norm self._batch_norm_relu = batch_norm_relu
def __init__(self, num_classes, mask_target_size, use_batch_norm=True, batch_norm_relu=nn_ops.BatchNormRelu()): """Initialize params to build Fast R-CNN head. Args: num_classes: a integer for the number of classes. mask_target_size: a integer that is the resolution of masks. use_batch_norm: 'bool', indicating whether batchnorm layers are added. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._num_classes = num_classes self._mask_target_size = mask_target_size self._use_batch_norm = use_batch_norm self._batch_norm_relu = batch_norm_relu
def __call__(self, features, is_training, batch_norm_relu=nn_ops.BatchNormRelu()): """Generate logits for semantic segmentation. Args: features: a float Tensor of shape [batch_size, num_instances, mask_crop_size, mask_crop_size, num_downsample_channels]. This is the instance feature crop. is_training: a bool indicating whether in training mode. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). Returns: logits: semantic segmentation logits as a float Tensor of shape [batch_size, height, width, num_classes]. """ features = features[self._level] feat_dim = features.get_shape().as_list()[-1] with tf.variable_scope('segmentation', reuse=tf.AUTO_REUSE): for i in range(self._num_convs): features = tf.layers.conv2d( features, feat_dim, kernel_size=(3, 3), bias_initializer=tf.zeros_initializer(), kernel_initializer=tf.random_normal_initializer(stddev=0.01), activation=None, padding='same', name='class-%d' % i) features = batch_norm_relu( features, is_training=is_training, name='class-%d-bn' % i) logits = tf.layers.conv2d( features, self._num_classes, # This include background class 0. kernel_size=(1, 1), bias_initializer=tf.zeros_initializer(), kernel_initializer=tf.random_normal_initializer(stddev=0.01), activation=None, padding='same') return logits
def __init__(self, min_level, max_level, anchors_per_location, batch_norm_relu=nn_ops.BatchNormRelu()): """Initialize params to build Region Proposal Network head. Args: min_level: `int` number of minimum feature level. max_level: `int` number of maximum feature level. anchors_per_location: `int` number of number of anchors per pixel location. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._min_level = min_level self._max_level = max_level self._anchors_per_location = anchors_per_location self._batch_norm_relu = batch_norm_relu
def __init__(self, min_level=3, max_level=7, fpn_feat_dims=256, batch_norm_relu=nn_ops.BatchNormRelu()): """FPN initialization function. Args: min_level: `int` minimum level in FPN output feature maps. max_level: `int` maximum level in FPN output feature maps. fpn_feat_dims: `int` number of filters in FPN layers. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._min_level = min_level self._max_level = max_level self._fpn_feat_dims = fpn_feat_dims self._batch_norm_relu = batch_norm_relu
def __init__(self, num_classes, level, num_convs, use_batch_norm=True, batch_norm_relu=nn_ops.BatchNormRelu()): """Initialize params to build segmentation head. Args: num_classes: `int` number of mask classification categories. The number of classes does not include background class. level: `int` feature level used for prediction. num_convs: `int` number of stacked convolution before the last prediction layer. use_batch_norm: 'bool', indicating whether batchnorm layers are added. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._num_classes = num_classes self._level = level self._num_convs = num_convs self._use_batch_norm = use_batch_norm self._batch_norm_relu = batch_norm_relu
def __init__(self, min_level=3, max_level=7, endpoints_num_filters=256, resample_alpha=0.5, use_native_resize_op=False, block_specs=build_block_specs(), block_repeats=1, filter_size_scale=1.0, activation='swish', batch_norm_relu=nn_ops.BatchNormRelu(), init_drop_connect_rate=None, data_format='channels_last'): """SpineNet initialization function. Args: min_level: an `int` representing the minimum level in SpineNet endpoints. max_level: an `int` representing the maximum level in SpineNet endpoints. endpoints_num_filters: an `int` representing the final feature dimension of endpoints before the shared conv layers in head. resample_alpha: a `float` representing the scaling factor to scale feature dimension before resolution resampling. use_native_resize_op: Whether to use native tf.image.nearest_neighbor_resize or the broadcast implmentation to do upsampling. block_specs: a list of BlockSpec objects that specifies the SpineNet network topology. By default, the previously discovered architecture is used. block_repeats: an `int` representing the number of repeats per block group. filter_size_scale: a `float` representing the scaling factor to uniformaly scale feature dimension in SpineNet. activation: activation function. Support 'relu' and 'swish'. batch_norm_relu: an operation that is added after convolutions, including a batch norm layer and an optional relu activation. init_drop_connect_rate: a 'float' number that specifies the initial drop connection rate. Note that the default `None` means no drop connection is applied. data_format: An optional string from: "channels_last", "channels_first". Defaults to "channels_last". """ self._min_level = min_level self._max_level = max_level self._endpoints_num_filters = endpoints_num_filters self._init_block_fn = 'bottleneck' self._num_init_blocks = 2 self._resample_alpha = resample_alpha self._use_native_resize_op = use_native_resize_op if activation == 'relu': self._activation = tf.nn.relu elif activation == 'swish': self._activation = tf.nn.swish else: raise ValueError( 'Activation {} not implemented.'.format(activation)) self._block_specs = block_specs self._block_repeats = block_repeats self._filter_size_scale = filter_size_scale self._batch_norm_relu = batch_norm_relu self._dropblock = nn_ops.Dropblock( ) # Hard-code it to not use DropBlock. self._init_drop_connect_rate = init_drop_connect_rate self._data_format = data_format
def batch_norm_relu_generator(params): return nn_ops.BatchNormRelu(momentum=params.batch_norm_momentum, epsilon=params.batch_norm_epsilon, trainable=params.batch_norm_trainable)
def _batch_norm_op(**kwargs): return nn_ops.BatchNormRelu(momentum=params.batch_norm_momentum, epsilon=params.batch_norm_epsilon, trainable=params.batch_norm_trainable, **kwargs)
def bottleneck_block(inputs, filters, strides, use_projection, activation=tf.nn.relu, batch_norm_relu=nn_ops.BatchNormRelu(), dropblock=nn_ops.Dropblock(), drop_connect_rate=None, data_format='channels_last', is_training=False): """The bottleneck block with BN and DropBlock after convolutions. Args: inputs: a `Tensor` of size `[batch, channels, height, width]`. filters: a `int` number of filters for the first two convolutions. Note that the third and final convolution will use 4 times as many filters. strides: an `int` block stride. If greater than 1, this block will ultimately downsample the input. use_projection: a `bool` for whether this block should use a projection shortcut (versus the default identity shortcut). This is usually `True` for the first block of a block group, which may change the number of filters and the resolution. activation: activation function. Support 'relu' and 'swish'. batch_norm_relu: an operation that is added after convolutions, including a batch norm layer and an optional relu activation. dropblock: a drop block layer that is added after convluations. Note that the default implementation does not apply any drop block. drop_connect_rate: a 'float' number that specifies the drop connection rate of the block. Note that the default `None` means no drop connection is applied. data_format: a `str` that specifies the data format. is_training: a `bool` if True, the model is in training mode. Returns: The output `Tensor` of the block. """ logging.info('-----> Building bottleneck block.') shortcut = inputs if use_projection: out_filters = 4 * filters shortcut = nn_ops.conv2d_fixed_padding(inputs=inputs, filters=out_filters, kernel_size=1, strides=strides, data_format=data_format) shortcut = batch_norm_relu(shortcut, relu=False, is_training=is_training) shortcut = dropblock(shortcut, is_training=is_training) inputs = nn_ops.conv2d_fixed_padding(inputs=inputs, filters=filters, kernel_size=1, strides=1, data_format=data_format) inputs = batch_norm_relu(inputs, is_training=is_training) inputs = dropblock(inputs, is_training=is_training) inputs = nn_ops.conv2d_fixed_padding(inputs=inputs, filters=filters, kernel_size=3, strides=strides, data_format=data_format) inputs = batch_norm_relu(inputs, is_training=is_training) inputs = dropblock(inputs, is_training=is_training) inputs = nn_ops.conv2d_fixed_padding(inputs=inputs, filters=4 * filters, kernel_size=1, strides=1, data_format=data_format) inputs = batch_norm_relu(inputs, relu=False, is_training=is_training) inputs = dropblock(inputs, is_training=is_training) if drop_connect_rate: inputs = nn_ops.drop_connect(inputs, is_training, drop_connect_rate) return activation(inputs + shortcut)
def mbconv_block(inputs, in_filters, out_filters, expand_ratio, strides, use_projection, kernel_size=3, se_ratio=None, batch_norm_relu=nn_ops.BatchNormRelu(), dropblock=nn_ops.Dropblock(), drop_connect_rate=None, data_format='channels_last', is_training=False): """The bottleneck block with BN and DropBlock after convolutions. Args: inputs: a `Tensor` of size `[batch, channels, height, width]`. in_filters: a `int` number of filters for the input feature map. out_filters: a `int` number of filters for the output feature map. expand_ratio: a `int` number as the feature dimension expansion ratio. strides: a `int` block stride. If greater than 1, this block will ultimately downsample the input. use_projection: a `bool` for whether this block should use a projection shortcut (versus the default identity shortcut). This is usually `True` for the first block of a block group, which may change the number of filters and the resolution. kernel_size: kernel size for the depthwise convolution. se_ratio: squeeze and excitation ratio. batch_norm_relu: an operation that is added after convolutions, including a batch norm layer and an optional relu activation. dropblock: a drop block layer that is added after convluations. Note that the default implementation does not apply any drop block. drop_connect_rate: a 'float' number that specifies the drop connection rate of the block. Note that the default `None` means no drop connection is applied. data_format: a `str` that specifies the data format. is_training: a `bool` if True, the model is in training mode. Returns: The output `Tensor` of the block. """ tf.logging.info('-----> Building mbconv block.') shortcut = inputs if use_projection: shortcut = nn_ops.conv2d_fixed_padding(inputs=inputs, filters=out_filters, kernel_size=1, strides=strides, data_format=data_format) shortcut = batch_norm_relu(shortcut, is_training=is_training) shortcut = dropblock(shortcut, is_training=is_training) # First 1x1 conv for channel expansion. inputs = nn_ops.conv2d_fixed_padding(inputs=inputs, filters=in_filters * expand_ratio, kernel_size=1, strides=1, data_format=data_format) inputs = batch_norm_relu(inputs, is_training=is_training) inputs = dropblock(inputs, is_training=is_training) # Second depthwise conv. inputs = nn_ops.depthwise_conv2d_fixed_padding(inputs=inputs, kernel_size=kernel_size, strides=strides, data_format=data_format) inputs = batch_norm_relu(inputs, is_training=is_training) inputs = dropblock(inputs, is_training=is_training) # Squeeze and excitation. if se_ratio is not None and se_ratio > 0 and se_ratio <= 1: inputs = nn_ops.squeeze_excitation(inputs, in_filters, se_ratio, expand_ratio=expand_ratio, data_format=data_format) # Third 1x1 conv for reversed bottleneck. inputs = nn_ops.conv2d_fixed_padding(inputs=inputs, filters=out_filters, kernel_size=1, strides=1, data_format=data_format) inputs = batch_norm_relu(inputs, is_training=is_training) inputs = dropblock(inputs, is_training=is_training) if drop_connect_rate: inputs = nn_ops.drop_connect(inputs, is_training, drop_connect_rate) return tf.add(inputs, shortcut)
def __init__(self, resnet_depth, dropblock=nn_ops.Dropblock(), activation='relu', batch_norm_relu=nn_ops.BatchNormRelu(), init_drop_connect_rate=None, data_format='channels_last'): """ResNet initialization function. Args: resnet_depth: `int` depth of ResNet backbone model. dropblock: a dropblock layer. activation: activation function. Support 'relu' and 'swish'. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). init_drop_connect_rate: a 'float' number that specifies the initial drop connection rate. Note that the default `None` means no drop connection is applied. data_format: `str` either "channels_first" for `[batch, channels, height, width]` or "channels_last for `[batch, height, width, channels]`. """ self._resnet_depth = resnet_depth self._dropblock = dropblock if activation == 'relu': self._activation = tf.nn.relu elif activation == 'swish': self._activation = tf.nn.swish else: raise ValueError( 'Activation {} not implemented.'.format(activation)) self._batch_norm_relu = batch_norm_relu self._init_drop_connect_rate = init_drop_connect_rate self._data_format = data_format model_params = { 10: { 'block': nn_blocks.residual_block, 'layers': [1, 1, 1, 1] }, 14: { 'block': nn_blocks.bottleneck_block, 'layers': [1, 1, 1, 1] }, 18: { 'block': nn_blocks.residual_block, 'layers': [2, 2, 2, 2] }, 26: { 'block': nn_blocks.bottleneck_block, 'layers': [2, 2, 2, 2] }, 34: { 'block': nn_blocks.residual_block, 'layers': [3, 4, 6, 3] }, 50: { 'block': nn_blocks.bottleneck_block, 'layers': [3, 4, 6, 3] }, 101: { 'block': nn_blocks.bottleneck_block, 'layers': [3, 4, 23, 3] }, 152: { 'block': nn_blocks.bottleneck_block, 'layers': [3, 8, 36, 3] }, 200: { 'block': nn_blocks.bottleneck_block, 'layers': [3, 24, 36, 3] } } if resnet_depth not in model_params: valid_resnet_depths = ', '.join( [str(depth) for depth in sorted(model_params.keys())]) raise ValueError( 'The resnet_depth should be in [%s]. Not a valid resnet_depth:' % (valid_resnet_depths), self._resnet_depth) params = model_params[resnet_depth] self._resnet_fn = self.resnet_v1_generator(params['block'], params['layers'])
def resample_with_alpha(feat, input_block_fn, target_width, target_num_filters, target_block_fn, alpha=1.0, use_native_resize_op=False, batch_norm_relu=nn_ops.BatchNormRelu(), data_format='channels_last', name=None, is_training=False): """Match resolution and feature dimension to the target block.""" _, height, width, num_filters = feat.get_shape().as_list() if width is None or num_filters is None: raise ValueError('Shape of feat is None (shape:{}).'.format( feat.shape)) if input_block_fn == 'bottleneck': num_filters /= 4 new_num_filters = int(num_filters * alpha) with tf.variable_scope('resample_with_alpha_{}'.format(name)): # First 1x1 conv to reduce feature dimension to alpha*. feat = nn_ops.conv2d_fixed_padding(inputs=feat, filters=new_num_filters, kernel_size=1, strides=1, data_format=data_format) feat = batch_norm_relu(feat, is_training=is_training) # Down-sample. if width > target_width: if width % target_width != 0: raise ValueError('wdith ({}) is not divisible by ' 'target_width ({}).'.format( width, target_width)) # Apply stride-2 conv to reduce feature map size to 1/2. feat = nn_ops.conv2d_fixed_padding(inputs=feat, filters=new_num_filters, kernel_size=3, strides=2, data_format=data_format) feat = batch_norm_relu(feat, is_training=is_training) # Apply maxpool to further reduce feature map size if necessary. if width // target_width > 2: feat = tf.layers.max_pooling2d( inputs=feat, pool_size=3 if width // target_width == 4 else 5, strides=[ width // target_width // 2, width // target_width // 2 ], padding='SAME', data_format=data_format) # Up-sample with NN interpolation. elif width < target_width: if target_width % width != 0: raise ValueError('target_wdith ({}) is not divisible by ' 'width ({}).'.format(target_width, width)) scale = target_width // width if use_native_resize_op: feat = tf.image.resize_nearest_neighbor( feat, [height * scale, width * scale]) else: feat = spatial_transform_ops.nearest_upsampling(feat, scale=scale) # Match feature dimension to the target block. if target_block_fn == 'bottleneck': target_num_filters *= 4 feat = nn_ops.conv2d_fixed_padding(inputs=feat, filters=target_num_filters, kernel_size=1, strides=1, data_format=data_format) feat = batch_norm_relu(feat, relu=False, is_training=is_training) return feat