def _build_box_net_layers(self, batch_norm_relu): """Build re-usable layers for box prediction network.""" self._box_predict = tf.keras.layers.Conv2D( 4 * self._anchors_per_location, kernel_size=(3, 3), bias_initializer=tf.zeros_initializer(), kernel_initializer=tf.keras.initializers.RandomNormal(stddev=1e-5), padding='same', name='box-predict') self._box_conv = [] self._box_batch_norm_relu = {} for i in range(self._num_convs): self._box_conv.append( tf.keras.layers.Conv2D( self._num_filters, kernel_size=(3, 3), activation=None, bias_initializer=tf.zeros_initializer(), kernel_initializer=tf.keras.initializers.RandomNormal( stddev=0.01), padding='same', name='box-' + str(i))) for level in range(self._min_level, self._max_level + 1): name = self._box_net_batch_norm_name(i, level) self._box_batch_norm_relu[name] = batch_norm_relu(name=name)
def __init__(self, num_classes, num_convs=0, num_filters=256, use_separable_conv=False, num_fcs=2, fc_dims=1024, use_batch_norm=True, batch_norm_relu=nn_ops.BatchNormRelu): """Initialize params to build Fast R-CNN box head. Args: num_classes: a integer for the number of classes. num_convs: `int` number that represents the number of the intermediate conv layers before the FC layers. num_filters: `int` number that represents the number of filters of the intermediate conv layers. use_separable_conv: `bool`, indicating whether the separable conv layers is used. num_fcs: `int` number that represents the number of FC layers before the predictions. fc_dims: `int` number that represents the number of dimension of the FC layers. use_batch_norm: 'bool', indicating whether batchnorm layers are added. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._num_classes = num_classes self._num_convs = num_convs self._num_filters = num_filters if use_separable_conv: self._conv2d_op = functools.partial( tf.keras.layers.SeparableConv2D, depth_multiplier=1, bias_initializer=tf.zeros_initializer()) else: self._conv2d_op = functools.partial( tf.keras.layers.Conv2D, kernel_initializer=tf.keras.initializers.VarianceScaling( scale=2, mode='fan_out', distribution='untruncated_normal'), bias_initializer=tf.zeros_initializer()) self._num_fcs = num_fcs self._fc_dims = fc_dims self._use_batch_norm = use_batch_norm self._batch_norm_relu = batch_norm_relu
def __init__(self, relu=True, init_zero=False, center=True, scale=True, data_format='channels_last', **kwargs): super(BatchNormRelu, self).__init__(**kwargs) self.relu = relu if init_zero: gamma_initializer = tf.zeros_initializer() else: gamma_initializer = tf.ones_initializer() if data_format == 'channels_first': axis = 1 else: axis = -1 self.bn = tf.keras.layers.BatchNormalization( axis=axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON, center=center, scale=scale, fused=False, gamma_initializer=gamma_initializer)
def coarsemask_decoder_net(self, images, is_training=None, batch_norm_relu=nn_ops.BatchNormRelu): """Coarse mask decoder network architecture. Args: images: A tensor of size [batch, height_in, width_in, channels_in]. is_training: Whether batch_norm layers are in training mode. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). Returns: images: A feature tensor of size [batch, output_size, output_size, num_channels] """ for i in range(self._num_convs): images = tf.keras.layers.Conv2D( self._num_downsample_channels, kernel_size=(3, 3), bias_initializer=tf.zeros_initializer(), kernel_initializer=tf.keras.initializers.RandomNormal( stddev=0.01), activation=None, padding='same', name='coarse-class-%d' % i)(images) images = batch_norm_relu(name='coarse-class-%d-bn' % i)( images, is_training=is_training) return images
def __init__(self, hidden_size, vocab_size, embeder, initializer=None, activation_fn=None, name="cls/predictions"): super(MaskedLMLayer, self).__init__(name=name) self.hidden_size = hidden_size self.vocab_size = vocab_size self.embeder = embeder # We apply one more non-linear transformation before the output layer. # This matrix is not used after pre-training. self.extra_layer = utils.Dense2dLayer(hidden_size, hidden_size, initializer, activation_fn, "transform") self.norm_layer = utils.NormLayer(hidden_size, name="transform") # The output weights are the same as the input embeddings, but there is # an output-only bias for each token. self.output_bias = tf.compat.v1.get_variable( name + "/output_bias", shape=[vocab_size], initializer=tf.zeros_initializer())
def __init__(self, num_groups=None, group_size=None, eps=1e-5, beta_init=tf.zeros_initializer(), gamma_init=tf.ones_initializer(), **kwargs): """Initializer. Args: num_groups: int, the number of channel-groups to normalize over. group_size: int, size of the groups to normalize over. eps: float, a small additive constant to avoid /sqrt(0). beta_init: initializer for bias, defaults to zeros. gamma_init: initializer for scale, defaults to ones. **kwargs: other tf.keras.layers.Layer arguments. """ super(GroupNormalization, self).__init__(**kwargs) if num_groups is None and group_size is None: num_groups = 32 self._num_groups = num_groups self._group_size = group_size self._eps = eps self._beta_init = beta_init self._gamma_init = gamma_init
def _get_multilevel_features(self, fpn_features): """Get multilevel features from FPN feature dictionary into one tensor. Args: fpn_features: a dictionary of FPN features. Returns: features: a float tensor of shape [batch_size, num_levels, max_feature_size, max_feature_size, num_downsample_channels]. """ # TODO(yeqing): Recover reuse=tf.AUTO_REUSE logic. with tf.name_scope('masknet'): mask_feats = {} # Reduce the feature dimension at each FPN level by convolution. for feat_level in range(self._min_mask_level, self._max_mask_level + 1): mask_feats[feat_level] = tf.keras.layers.Conv2D( self._num_downsample_channels, kernel_size=(1, 1), bias_initializer=tf.zeros_initializer(), kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), padding='same', name='mask-downsample')( fpn_features[feat_level]) # Concat features through padding to the max size. features = [mask_feats[self._min_mask_level]] for feat_level in range(self._min_mask_level + 1, self._max_mask_level + 1): features.append(tf.image.pad_to_bounding_box( mask_feats[feat_level], 0, 0, self._max_feature_size, self._max_feature_size)) features = tf.stack(features, axis=1) return features
def __call__(self, roi_features, is_training=None): """Box and class branches for the Mask-RCNN model. Args: roi_features: A ROI feature tensor of shape [batch_size, num_rois, height_l, width_l, num_filters]. is_training: `boolean`, if True if model is in training mode. Returns: class_outputs: a tensor with a shape of [batch_size, num_rois, num_classes], representing the class predictions. box_outputs: a tensor with a shape of [batch_size, num_rois, num_classes * 4], representing the box predictions. """ with backend.get_graph().as_default(), tf.name_scope('fast_rcnn_head'): # reshape inputs beofre FC. _, num_rois, height, width, filters = roi_features.get_shape( ).as_list() roi_features = tf.reshape(roi_features, [-1, num_rois, height * width * filters]) net = tf.keras.layers.Dense(units=self._mlp_head_dim, activation=None, name='fc6')(roi_features) net = self._batch_norm_relu(fused=False)(net, is_training=is_training) net = tf.keras.layers.Dense(units=self._mlp_head_dim, activation=None, name='fc7')(net) net = self._batch_norm_relu(fused=False)(net, is_training=is_training) class_outputs = tf.keras.layers.Dense( self._num_classes, kernel_initializer=tf.keras.initializers.RandomNormal( stddev=0.01), bias_initializer=tf.zeros_initializer(), name='class-predict')(net) box_outputs = tf.keras.layers.Dense( self._num_classes * 4, kernel_initializer=tf.keras.initializers.RandomNormal( stddev=0.001), bias_initializer=tf.zeros_initializer(), name='box-predict')(net) return class_outputs, box_outputs
def __init__(self, min_level, max_level, anchors_per_location, batch_norm_relu=nn_ops.BatchNormRelu): """Initialize params to build Region Proposal Network head. Args: min_level: `int` number of minimum feature level. max_level: `int` number of maximum feature level. anchors_per_location: `int` number of number of anchors per pixel location. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._min_level = min_level self._max_level = max_level self._anchors_per_location = anchors_per_location self._rpn_conv = tf.keras.layers.Conv2D( 256, kernel_size=(3, 3), strides=(1, 1), activation=None, bias_initializer=tf.zeros_initializer(), kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), padding='same', name='rpn') self._rpn_class_conv = tf.keras.layers.Conv2D( anchors_per_location, kernel_size=(1, 1), strides=(1, 1), bias_initializer=tf.zeros_initializer(), kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), padding='valid', name='rpn-class') self._rpn_box_conv = tf.keras.layers.Conv2D( 4 * anchors_per_location, kernel_size=(1, 1), strides=(1, 1), bias_initializer=tf.zeros_initializer(), kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), padding='valid', name='rpn-box') self._batch_norm_relus = {} for level in range(self._min_level, self._max_level + 1): self._batch_norm_relus[level] = batch_norm_relu(name='rpn%d-bn' % level)
def _build_class_net_layers(self, batch_norm_relu): """Build re-usable layers for class prediction network.""" if self._use_separable_conv: self._class_predict = tf.keras.layers.SeparableConv2D( self._num_classes * self._anchors_per_location, kernel_size=(3, 3), bias_initializer=tf.constant_initializer(-np.log((1 - 0.01) / 0.01)), padding='same', name='class-predict') else: self._class_predict = tf.keras.layers.Conv2D( self._num_classes * self._anchors_per_location, kernel_size=(3, 3), bias_initializer=tf.constant_initializer(-np.log((1 - 0.01) / 0.01)), kernel_initializer=tf.keras.initializers.RandomNormal( stddev=1e-5), padding='same', name='class-predict') self._class_conv = [] self._class_batch_norm_relu = {} for i in range(self._num_convs): if self._use_separable_conv: self._class_conv.append( tf.keras.layers.SeparableConv2D( self._num_filters, kernel_size=(3, 3), bias_initializer=tf.zeros_initializer(), activation=None, padding='same', name='class-' + str(i))) else: self._class_conv.append( tf.keras.layers.Conv2D( self._num_filters, kernel_size=(3, 3), bias_initializer=tf.zeros_initializer(), kernel_initializer=tf.keras.initializers.RandomNormal( stddev=0.01), activation=None, padding='same', name='class-' + str(i))) for level in range(self._min_level, self._max_level + 1): name = self._class_net_batch_norm_name(i, level) self._class_batch_norm_relu[name] = batch_norm_relu(name=name)
def __init__(self, num_classes, num_downsample_channels, mask_crop_size, num_convs, coarse_mask_thr, gt_upsample_scale, batch_norm_relu=nn_ops.BatchNormRelu): """Initialize params to build ShapeMask coarse and fine prediction head. Args: num_classes: `int` number of mask classification categories. num_downsample_channels: `int` number of filters at mask head. mask_crop_size: feature crop size. num_convs: `int` number of stacked convolution before the last prediction layer. coarse_mask_thr: the threshold for suppressing noisy coarse prediction. gt_upsample_scale: scale for upsampling groundtruths. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._mask_num_classes = num_classes self._num_downsample_channels = num_downsample_channels self._mask_crop_size = mask_crop_size self._num_convs = num_convs self._coarse_mask_thr = coarse_mask_thr self._gt_upsample_scale = gt_upsample_scale self._class_predict_conv = tf.keras.layers.Conv2D( self._mask_num_classes, kernel_size=(1, 1), # Focal loss bias initialization to have foreground 0.01 probability. bias_initializer=tf.constant_initializer(-np.log((1 - 0.01) / 0.01)), kernel_initializer=tf.keras.initializers.RandomNormal(mean=0, stddev=0.01), padding='same', name='affinity-class-predict') self._upsample_conv = tf.keras.layers.Conv2DTranspose( self._num_downsample_channels // 2, (self._gt_upsample_scale, self._gt_upsample_scale), (self._gt_upsample_scale, self._gt_upsample_scale)) self._fine_class_conv = [] self._fine_class_bn = [] for i in range(self._num_convs): self._fine_class_conv.append( tf.keras.layers.Conv2D( self._num_downsample_channels, kernel_size=(3, 3), bias_initializer=tf.zeros_initializer(), kernel_initializer=tf.keras.initializers.RandomNormal( stddev=0.01), activation=None, padding='same', name='fine-class-%d' % i)) self._fine_class_bn.append( batch_norm_relu(name='fine-class-%d-bn' % i))
def __init__(self, hdim, dtype=tf.float32, name="LayerNorm"): super(NormLayer, self).__init__(name=name) self._dtype = dtype with tf.compat.v1.variable_scope(name): self.beta = tf.compat.v1.get_variable( "beta", [hdim], dtype=dtype, initializer=tf.zeros_initializer()) self.gamma = tf.compat.v1.get_variable( "gamma", [hdim], dtype=dtype, initializer=tf.ones_initializer())
def __init__(self, hidden_size, initializer=None, name="cls/seq_relationship"): super(NSPLayer, self).__init__(name=name) self.hidden_size = hidden_size # Simple binary classification. Note that 0 is "next sentence" and 1 is # "random sentence". This weight matrix is not used after pre-training. with tf.compat.v1.variable_scope(name): self.output_weights = tf.compat.v1.get_variable( "output_weights", shape=[2, hidden_size], initializer=initializer) self._trainable_weights.append(self.output_weights) self.output_bias = tf.compat.v1.get_variable( "output_bias", shape=[2], initializer=tf.zeros_initializer()) self._trainable_weights.append(self.output_bias)
def __init__(self, num_attention_heads, size_per_head, initializer, activation, name=None, head_first=False, use_bias=True): """Constructor for dense layer with 3D kernel. Args: num_attention_heads: The size of output dimension. size_per_head: The size per attention head. initializer: Kernel initializer. activation: Actication function. name: The name scope of this layer. head_first: Whether to output head dimension before or after sequence dim. use_bias: Whether the layer uses a bias vector. """ super(Dense3dLayer, self).__init__(name=name) self.num_attention_heads = num_attention_heads self.size_per_head = size_per_head self.initializer = initializer self.activation = activation self.head_first = head_first self.use_bias = use_bias with tf.compat.v1.variable_scope(name): hidden_size = self.num_attention_heads * self.size_per_head self.w = tf.compat.v1.get_variable( name="kernel", shape=[hidden_size, hidden_size], initializer=self.initializer) if self.use_bias: self.b = tf.compat.v1.get_variable( name="bias", shape=[hidden_size], initializer=tf.zeros_initializer()) else: self.b = None
def __init__(self, relu=True, init_zero=False, center=True, scale=True, data_format='channels_last', **kwargs): super(BatchNormRelu, self).__init__(**kwargs) self.relu = relu if init_zero: gamma_initializer = tf.zeros_initializer() else: gamma_initializer = tf.ones_initializer() if data_format == 'channels_first': axis = 1 else: axis = -1 if FLAGS.global_bn: # TODO(srbs): Set fused=True # Batch normalization layers with fused=True only support 4D input # tensors. self.bn = tf.keras.layers.experimental.SyncBatchNormalization( axis=axis, momentum=FLAGS.batch_norm_decay, epsilon=BATCH_NORM_EPSILON, center=center, scale=scale, gamma_initializer=gamma_initializer) else: # TODO(srbs): Set fused=True # Batch normalization layers with fused=True only support 4D input # tensors. self.bn = tf.keras.layers.BatchNormalization( axis=axis, momentum=FLAGS.batch_norm_decay, epsilon=BATCH_NORM_EPSILON, center=center, scale=scale, fused=False, gamma_initializer=gamma_initializer)
def call(self, input_tensor): inputs = tf.convert_to_tensor(input_tensor) inputs_shape = get_shape_list(inputs) inputs_rank = len(inputs_shape) dtype = inputs.dtype.base_dtype norm_axis = inputs_rank - 1 params_shape = [inputs_shape[norm_axis]] # Allocate parameters for the beta and gamma of the normalization. if self.beta is None: self.beta = tf.compat.v1.get_variable( "beta", shape=params_shape, dtype=dtype, initializer=tf.zeros_initializer(), trainable=True) self._trainable_weights.append(self.beta) if self.gamma is None: self.gamma = tf.compat.v1.get_variable( "gamma", shape=params_shape, dtype=dtype, initializer=tf.ones_initializer(), trainable=True) self._trainable_weights.append(self.gamma) # Compute norm along last axis mean, variance = tf.nn.moments(inputs, [norm_axis], keepdims=True) # Compute layer normalization using the batch_normalization function. # Note that epsilon must be increased for float16 due to the limited # representable range. variance_epsilon = 1e-12 if dtype != tf.float16 else 1e-3 outputs = tf.nn.batch_normalization(inputs, mean, variance, offset=self.beta, scale=self.gamma, variance_epsilon=variance_epsilon) outputs.set_shape(inputs_shape) return outputs
def __init__(self, input_size, output_size, initializer, activation, name=None, use_bias=True): """Constructor for dense layer with 2D kernel. Args: input_size: The size of input dimension. output_size: The size of output dimension. initializer: Kernel initializer. activation: Actication function. name: The name scope of this layer. use_bias: Whether the layer uses a bias vector. """ super(SimpleDenseLayer, self).__init__(name=name) self.input_size = input_size self.output_size = output_size self.initializer = initializer self.activation = activation self.use_bias = use_bias with tf.compat.v1.variable_scope(name): self.w = tf.compat.v1.get_variable( name="kernel", shape=[self.input_size, self.output_size], initializer=self.initializer) if self.use_bias: self.b = tf.compat.v1.get_variable( name="bias", shape=[self.output_size], initializer=tf.zeros_initializer()) else: self.b = None
def DenseAR(x, h=None, hidden_layers=(), activation=tf.nn.relu, log_scale_clip=None, log_scale_clip_pre=None, train=False, dropout_rate=0.0, sigmoid_scale=False, log_scale_factor=1.0, log_scale_reg=0.0, shift_only=False, **kwargs): input_depth = int(x.shape.with_rank_at_least(1)[-1]) if input_depth is None: raise NotImplementedError( "Rightmost dimension must be known prior to graph execution.") input_shape = (np.int32(x.shape.as_list()) if x.shape.is_fully_defined() else tf.shape(x)) for i, units in enumerate(hidden_layers): x = MaskedDense(inputs=x, units=units, num_blocks=input_depth, exclusive=True if i == 0 else False, activation=activation, **kwargs) if h is not None: x += tfkl.Dense(units, use_bias=False, **kwargs)(h) if dropout_rate > 0: x = tfkl.Dropout(dropout_rate)(x, training=train) if shift_only: shift = MaskedDense(inputs=x, units=input_depth, num_blocks=input_depth, activation=None, **kwargs) return shift, None else: if log_scale_factor == 1.0 and log_scale_reg == 0.0 and not log_scale_clip_pre: x = MaskedDense(inputs=x, units=2 * input_depth, num_blocks=input_depth, activation=None, **kwargs) if h is not None: x += tfkl.Dense(2 * input_depth, use_bias=False, **kwargs)(h) x = tf.reshape(x, shape=tf.concat([input_shape, [2]], axis=0)) shift, log_scale = tf.unstack(x, num=2, axis=-1) else: shift = MaskedDense(inputs=x, units=input_depth, num_blocks=input_depth, activation=None, **kwargs) if log_scale_reg > 0.0: regularizer = lambda w: log_scale_reg * 2.0 * tf.nn.l2_loss(w) else: regularizer = None log_scale = MaskedDense(inputs=x, units=input_depth, num_blocks=input_depth, activation=None, use_bias=False, kernel_regularizer=regularizer, **kwargs) log_scale *= log_scale_factor if log_scale_clip_pre: log_scale = log_scale_clip_pre * tf.nn.tanh( log_scale / log_scale_clip_pre) log_scale += tf.get_variable("log_scale_bias", [1, input_depth], initializer=tf.zeros_initializer()) if h is not None: shift += tfkl.Dense(input_depth, use_bias=False, **kwargs)(h) log_scale += tfkl.Dense(input_depth, use_bias=False, **kwargs)(h) if sigmoid_scale: log_scale = tf.log_sigmoid(log_scale) if log_scale_clip: log_scale = log_scale_clip * tf.nn.tanh(log_scale / log_scale_clip) return shift, log_scale
def __call__(self, crop_features, detection_priors, inst_classes, is_training=None): """Generate instance masks from FPN features and detection priors. This corresponds to the Fig. 5-6 of the ShapeMask paper at https://arxiv.org/pdf/1904.03239.pdf Args: crop_features: a float Tensor of shape [batch_size * num_instances, mask_crop_size, mask_crop_size, num_downsample_channels]. This is the instance feature crop. detection_priors: a float Tensor of shape [batch_size * num_instances, mask_crop_size, mask_crop_size, 1]. This is the detection prior for the instance. inst_classes: a int Tensor of shape [batch_size, num_instances] of instance classes. is_training: a bool indicating whether in training mode. Returns: mask_outputs: instance mask prediction as a float Tensor of shape [batch_size * num_instances, mask_size, mask_size, num_classes]. """ # Embed the anchor map into some feature space for anchor conditioning. detection_prior_features = tf.keras.layers.Conv2D( self._num_downsample_channels, kernel_size=(1, 1), bias_initializer=tf.zeros_initializer(), kernel_initializer=tf.keras.initializers.RandomNormal(mean=0., stddev=0.01), padding='same', name='anchor-conv')(detection_priors) prior_conditioned_features = crop_features + detection_prior_features coarse_output_features = self.coarsemask_decoder_net( prior_conditioned_features, is_training) coarse_mask_classes = tf.keras.layers.Conv2D( self._mask_num_classes, kernel_size=(1, 1), # Focal loss bias initialization to have foreground 0.01 probability. bias_initializer=tf.constant_initializer(-np.log((1 - 0.01) / 0.01)), kernel_initializer=tf.keras.initializers.RandomNormal(mean=0, stddev=0.01), padding='same', name='class-predict')(coarse_output_features) if self._use_category_for_mask: inst_classes = tf.cast(tf.reshape(inst_classes, [-1]), tf.int32) coarse_mask_classes_t = tf.transpose(a=coarse_mask_classes, perm=(0, 3, 1, 2)) # pylint: disable=g-long-lambda coarse_mask_logits = tf.cond( pred=tf.size(input=inst_classes) > 0, true_fn=lambda: tf.gather_nd( coarse_mask_classes_t, tf.stack([ tf.range(tf.size(input=inst_classes)), inst_classes - 1 ], axis=1)), false_fn=lambda: coarse_mask_classes_t[:, 0, :, :]) # pylint: enable=g-long-lambda coarse_mask_logits = tf.expand_dims(coarse_mask_logits, -1) else: coarse_mask_logits = coarse_mask_classes coarse_class_probs = tf.nn.sigmoid(coarse_mask_logits) class_probs = tf.cast(coarse_class_probs, prior_conditioned_features.dtype) return coarse_mask_classes, class_probs, prior_conditioned_features
def masked_convolution(inputs, num_outputs, kernel_size, stride=1, padding='SAME', data_format=None, rate=1, activation_fn=tf.nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=xavier_initializer(), weights_regularizer=None, biases_initializer=tf.zeros_initializer(), biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): """Adds an 2D convolution followed by a optional normalizer layer.""" if data_format not in [ None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC', 'NCDHW' ]: raise ValueError('Invalid data_format: %r' % (data_format, )) layer_variable_getter = _build_variable_getter({ 'bias': 'biases', 'kernel': 'weights' }) with tf1.variable_scope(scope, 'Conv', [inputs], reuse=reuse, custom_getter=layer_variable_getter) as sc: inputs = tf.convert_to_tensor(inputs) input_rank = inputs.get_shape().ndims if input_rank == 4: layer_class = core.MaskedConv2D else: raise ValueError( 'Sparse Convolution not supported for input with rank', input_rank) if data_format is None or data_format == 'NHWC': df = 'channels_last' elif data_format == 'NCHW': df = 'channels_first' else: raise ValueError('Unsupported data format', data_format) layer = layer_class(filters=num_outputs, kernel_size=kernel_size, strides=stride, padding=padding, data_format=df, dilation_rate=rate, activation=None, use_bias=not normalizer_fn and biases_initializer, kernel_initializer=weights_initializer, bias_initializer=biases_initializer, kernel_regularizer=weights_regularizer, bias_regularizer=biases_regularizer, activity_regularizer=None, trainable=trainable, name=sc.name, dtype=inputs.dtype.base_dtype, _scope=sc, _reuse=reuse) outputs = layer.apply(inputs) # Add variables to collections. _add_variable_to_collections(layer.kernel, variables_collections, 'weights') if layer.use_bias: _add_variable_to_collections(layer.bias, variables_collections, 'biases') if normalizer_fn is not None: normalizer_params = normalizer_params or {} outputs = normalizer_fn(outputs, **normalizer_params) if activation_fn is not None: outputs = activation_fn(outputs) return collect_named_outputs(outputs_collections, sc.original_name_scope, outputs)
def __call__(self, roi_features, class_indices, is_training=None): """Mask branch for the Mask-RCNN model. Args: roi_features: A ROI feature tensor of shape [batch_size, num_rois, height_l, width_l, num_filters]. class_indices: a Tensor of shape [batch_size, num_rois], indicating which class the ROI is. is_training: `boolean`, if True if model is in training mode. Returns: mask_outputs: a tensor with a shape of [batch_size, num_masks, mask_height, mask_width, num_classes], representing the mask predictions. fg_gather_indices: a tensor with a shape of [batch_size, num_masks, 2], representing the fg mask targets. Raises: ValueError: If boxes is not a rank-3 tensor or the last dimension of boxes is not 4. """ def _get_stddev_equivalent_to_msra_fill(kernel_size, fan_out): """Returns the stddev of random normal initialization as MSRAFill.""" # Reference: https://github.com/pytorch/pytorch/blob/master/caffe2/operators/filler_op.h#L445-L463 # pylint: disable=line-too-long # For example, kernel size is (3, 3) and fan out is 256, stddev is 0.029. # stddev = (2/(3*3*256))^0.5 = 0.029 return (2 / (kernel_size[0] * kernel_size[1] * fan_out))**0.5 with backend.get_graph().as_default(): with tf.name_scope('mask_head'): _, num_rois, height, width, filters = roi_features.get_shape( ).as_list() net = tf.reshape(roi_features, [-1, height, width, filters]) for i in range(4): kernel_size = (3, 3) fan_out = 256 init_stddev = _get_stddev_equivalent_to_msra_fill( kernel_size, fan_out) net = tf.keras.layers.Conv2D( fan_out, kernel_size=kernel_size, strides=(1, 1), padding='same', dilation_rate=(1, 1), activation=None, kernel_initializer=tf.keras.initializers.RandomNormal( stddev=init_stddev), bias_initializer=tf.zeros_initializer(), name='mask-conv-l%d' % i)(net) net = self._batch_norm_relu()(net, is_training=is_training) kernel_size = (2, 2) fan_out = 256 init_stddev = _get_stddev_equivalent_to_msra_fill( kernel_size, fan_out) net = tf.keras.layers.Conv2DTranspose( fan_out, kernel_size=kernel_size, strides=(2, 2), padding='valid', activation=None, kernel_initializer=tf.keras.initializers.RandomNormal( stddev=init_stddev), bias_initializer=tf.zeros_initializer(), name='conv5-mask')(net) net = self._batch_norm_relu()(net, is_training=is_training) kernel_size = (1, 1) fan_out = self._num_classes init_stddev = _get_stddev_equivalent_to_msra_fill( kernel_size, fan_out) mask_outputs = tf.keras.layers.Conv2D( fan_out, kernel_size=kernel_size, strides=(1, 1), padding='valid', kernel_initializer=tf.keras.initializers.RandomNormal( stddev=init_stddev), bias_initializer=tf.zeros_initializer(), name='mask_fcn_logits')(net) mask_outputs = tf.reshape(mask_outputs, [ -1, num_rois, self._mrcnn_resolution, self._mrcnn_resolution, self._num_classes ]) with tf.name_scope('masks_post_processing'): # TODO(pengchong): Figure out the way not to use the static inferred # batch size. batch_size, num_masks = class_indices.get_shape().as_list() mask_outputs = tf.transpose(a=mask_outputs, perm=[0, 1, 4, 2, 3]) # Contructs indices for gather. batch_indices = tf.tile( tf.expand_dims(tf.range(batch_size), axis=1), [1, num_masks]) mask_indices = tf.tile( tf.expand_dims(tf.range(num_masks), axis=0), [batch_size, 1]) gather_indices = tf.stack( [batch_indices, mask_indices, class_indices], axis=2) mask_outputs = tf.gather_nd(mask_outputs, gather_indices) return mask_outputs
def __call__(self, roi_features, is_training=None): """Box and class branches for the Mask-RCNN model. Args: roi_features: A ROI feature tensor of shape [batch_size, num_rois, height_l, width_l, num_filters]. is_training: `boolean`, if True if model is in training mode. Returns: class_outputs: a tensor with a shape of [batch_size, num_rois, num_classes], representing the class predictions. box_outputs: a tensor with a shape of [batch_size, num_rois, num_classes * 4], representing the box predictions. """ with backend.get_graph().as_default(), tf.name_scope('fast_rcnn_head'): # reshape inputs beofre FC. _, num_rois, height, width, filters = roi_features.get_shape( ).as_list() net = tf.reshape(roi_features, [-1, height, width, filters]) for i in range(self._num_convs): net = self._conv2d_op( self._num_filters, kernel_size=(3, 3), strides=(1, 1), padding='same', dilation_rate=(1, 1), activation=(None if self._use_batch_norm else tf.nn.relu), name='conv_{}'.format(i))(net) if self._use_batch_norm: net = self._batch_norm_relu()(net, is_training=is_training) filters = self._num_filters if self._num_convs > 0 else filters net = tf.reshape(net, [-1, num_rois, height * width * filters]) if self._use_batch_norm: net = self._batch_norm_relu(fused=False)( net, is_training=is_training) for i in range(self._num_fcs): net = tf.keras.layers.Dense( units=self._fc_dims, activation=(None if self._use_batch_norm else tf.nn.relu), name='fc{}'.format(i + 6))(net) if self._use_batch_norm: net = self._batch_norm_relu(fused=False)( net, is_training=is_training) class_outputs = tf.keras.layers.Dense( self._num_classes, kernel_initializer=tf.keras.initializers.RandomNormal( stddev=0.01), bias_initializer=tf.zeros_initializer(), name='class-predict')(net) box_outputs = tf.keras.layers.Dense( self._num_classes * 4, kernel_initializer=tf.keras.initializers.RandomNormal( stddev=0.001), bias_initializer=tf.zeros_initializer(), name='box-predict')(net) return class_outputs, box_outputs
def __call__(self, roi_features, class_indices, is_training=None): """Mask branch for the Mask-RCNN model. Args: roi_features: A ROI feature tensor of shape [batch_size, num_rois, height_l, width_l, num_filters]. class_indices: a Tensor of shape [batch_size, num_rois], indicating which class the ROI is. is_training: `boolean`, if True if model is in training mode. Returns: mask_outputs: a tensor with a shape of [batch_size, num_masks, mask_height, mask_width, num_classes], representing the mask predictions. fg_gather_indices: a tensor with a shape of [batch_size, num_masks, 2], representing the fg mask targets. Raises: ValueError: If boxes is not a rank-3 tensor or the last dimension of boxes is not 4. """ with backend.get_graph().as_default(): with tf.name_scope('mask_head'): _, num_rois, height, width, filters = roi_features.get_shape( ).as_list() net = tf.reshape(roi_features, [-1, height, width, filters]) for i in range(self._num_convs): net = self._conv2d_op( self._num_filters, kernel_size=(3, 3), strides=(1, 1), padding='same', dilation_rate=(1, 1), activation=(None if self._use_batch_norm else tf.nn.relu), name='mask-conv-l%d' % i)(net) if self._use_batch_norm: net = self._batch_norm_relu()(net, is_training=is_training) net = tf.keras.layers.Conv2DTranspose( self._num_filters, kernel_size=(2, 2), strides=(2, 2), padding='valid', activation=(None if self._use_batch_norm else tf.nn.relu), kernel_initializer=tf.keras.initializers.VarianceScaling( scale=2, mode='fan_out', distribution='untruncated_normal'), bias_initializer=tf.zeros_initializer(), name='conv5-mask')(net) if self._use_batch_norm: net = self._batch_norm_relu()(net, is_training=is_training) mask_outputs = self._conv2d_op(self._num_classes, kernel_size=(1, 1), strides=(1, 1), padding='valid', name='mask_fcn_logits')(net) mask_outputs = tf.reshape(mask_outputs, [ -1, num_rois, self._mask_target_size, self._mask_target_size, self._num_classes ]) with tf.name_scope('masks_post_processing'): # TODO(pengchong): Figure out the way not to use the static inferred # batch size. batch_size, num_masks = class_indices.get_shape().as_list() mask_outputs = tf.transpose(a=mask_outputs, perm=[0, 1, 4, 2, 3]) # Contructs indices for gather. batch_indices = tf.tile( tf.expand_dims(tf.range(batch_size), axis=1), [1, num_masks]) mask_indices = tf.tile( tf.expand_dims(tf.range(num_masks), axis=0), [batch_size, 1]) gather_indices = tf.stack( [batch_indices, mask_indices, class_indices], axis=2) mask_outputs = tf.gather_nd(mask_outputs, gather_indices) return mask_outputs
def __init__(self, min_level, max_level, anchors_per_location, num_convs=2, num_filters=256, use_separable_conv=False, use_batch_norm=True, batch_norm_relu=nn_ops.BatchNormRelu): """Initialize params to build Region Proposal Network head. Args: min_level: `int` number of minimum feature level. max_level: `int` number of maximum feature level. anchors_per_location: `int` number of number of anchors per pixel location. num_convs: `int` number that represents the number of the intermediate conv layers before the prediction. num_filters: `int` number that represents the number of filters of the intermediate conv layers. use_separable_conv: `bool`, indicating whether the separable conv layers is used. use_batch_norm: 'bool', indicating whether batchnorm layers are added. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._min_level = min_level self._max_level = max_level self._anchors_per_location = anchors_per_location self._use_batch_norm = use_batch_norm if use_separable_conv: self._conv2d_op = functools.partial( tf.keras.layers.SeparableConv2D, depth_multiplier=1, bias_initializer=tf.zeros_initializer()) else: self._conv2d_op = functools.partial( tf.keras.layers.Conv2D, kernel_initializer=tf.keras.initializers.RandomNormal( stddev=0.01), bias_initializer=tf.zeros_initializer()) self._rpn_conv = self._conv2d_op( num_filters, kernel_size=(3, 3), strides=(1, 1), activation=(None if self._use_batch_norm else tf.nn.relu), padding='same', name='rpn') self._rpn_class_conv = self._conv2d_op(anchors_per_location, kernel_size=(1, 1), strides=(1, 1), padding='valid', name='rpn-class') self._rpn_box_conv = self._conv2d_op(4 * anchors_per_location, kernel_size=(1, 1), strides=(1, 1), padding='valid', name='rpn-box') self._batch_norm_relus = {} if self._use_batch_norm: for level in range(self._min_level, self._max_level + 1): self._batch_norm_relus[level] = batch_norm_relu( name='rpn-l%d-bn' % level)
def __init__(self, num_classes, mask_target_size, num_convs=4, num_filters=256, use_separable_conv=False, use_batch_norm=True, batch_norm_relu=nn_ops.BatchNormRelu): """Initialize params to build Fast R-CNN head. Args: num_classes: a integer for the number of classes. mask_target_size: a integer that is the resolution of masks. num_convs: `int` number that represents the number of the intermediate conv layers before the prediction. num_filters: `int` number that represents the number of filters of the intermediate conv layers. use_separable_conv: `bool`, indicating whether the separable conv layers is used. use_batch_norm: 'bool', indicating whether batchnorm layers are added. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._num_classes = num_classes self._mask_target_size = mask_target_size self._num_convs = num_convs self._num_filters = num_filters if use_separable_conv: self._conv2d_op = functools.partial( tf.keras.layers.SeparableConv2D, depth_multiplier=1, bias_initializer=tf.zeros_initializer()) else: self._conv2d_op = functools.partial( tf.keras.layers.Conv2D, kernel_initializer=tf.keras.initializers.VarianceScaling( scale=2, mode='fan_out', distribution='untruncated_normal'), bias_initializer=tf.zeros_initializer()) self._use_batch_norm = use_batch_norm self._batch_norm_relu = batch_norm_relu self._conv2d_ops = [] for i in range(self._num_convs): self._conv2d_ops.append( self._conv2d_op( self._num_filters, kernel_size=(3, 3), strides=(1, 1), padding='same', dilation_rate=(1, 1), activation=(None if self._use_batch_norm else tf.nn.relu), name='mask-conv-l%d' % i)) self._mask_conv_transpose = tf.keras.layers.Conv2DTranspose( self._num_filters, kernel_size=(2, 2), strides=(2, 2), padding='valid', activation=(None if self._use_batch_norm else tf.nn.relu), kernel_initializer=tf.keras.initializers.VarianceScaling( scale=2, mode='fan_out', distribution='untruncated_normal'), bias_initializer=tf.zeros_initializer(), name='conv5-mask')
def __init__(self, num_classes, num_convs=0, num_filters=256, use_separable_conv=False, num_fcs=2, fc_dims=1024, use_batch_norm=True, batch_norm_relu=nn_ops.BatchNormRelu): """Initialize params to build Fast R-CNN box head. Args: num_classes: a integer for the number of classes. num_convs: `int` number that represents the number of the intermediate conv layers before the FC layers. num_filters: `int` number that represents the number of filters of the intermediate conv layers. use_separable_conv: `bool`, indicating whether the separable conv layers is used. num_fcs: `int` number that represents the number of FC layers before the predictions. fc_dims: `int` number that represents the number of dimension of the FC layers. use_batch_norm: 'bool', indicating whether batchnorm layers are added. batch_norm_relu: an operation that includes a batch normalization layer followed by a relu layer(optional). """ self._num_classes = num_classes self._num_convs = num_convs self._num_filters = num_filters if use_separable_conv: self._conv2d_op = functools.partial( tf.keras.layers.SeparableConv2D, depth_multiplier=1, bias_initializer=tf.zeros_initializer()) else: self._conv2d_op = functools.partial( tf.keras.layers.Conv2D, kernel_initializer=tf.keras.initializers.VarianceScaling( scale=2, mode='fan_out', distribution='untruncated_normal'), bias_initializer=tf.zeros_initializer()) self._num_fcs = num_fcs self._fc_dims = fc_dims self._use_batch_norm = use_batch_norm self._batch_norm_relu = batch_norm_relu self._conv_ops = [] self._conv_bn_ops = [] for i in range(self._num_convs): self._conv_ops.append( self._conv2d_op( self._num_filters, kernel_size=(3, 3), strides=(1, 1), padding='same', dilation_rate=(1, 1), activation=(None if self._use_batch_norm else tf.nn.relu), name='conv_{}'.format(i))) if self._use_batch_norm: self._conv_bn_ops.append(self._batch_norm_relu()) self._fc_ops = [] self._fc_bn_ops = [] for i in range(self._num_fcs): self._fc_ops.append( tf.keras.layers.Dense( units=self._fc_dims, activation=(None if self._use_batch_norm else tf.nn.relu), name='fc{}'.format(i))) if self._use_batch_norm: self._fc_bn_ops.append(self._batch_norm_relu(fused=False)) self._class_predict = tf.keras.layers.Dense( self._num_classes, kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), bias_initializer=tf.zeros_initializer(), name='class-predict') self._box_predict = tf.keras.layers.Dense( self._num_classes * 4, kernel_initializer=tf.keras.initializers.RandomNormal( stddev=0.001), bias_initializer=tf.zeros_initializer(), name='box-predict')