def __call__(self, multilevel_features, is_training=None): """Returns the FPN features for a given multilevel features. Args: multilevel_features: a `dict` containing `int` keys for continuous feature levels, e.g., [2, 3, 4, 5]. The values are corresponding features with shape [batch_size, height_l, width_l, num_filters]. is_training: `bool` if True, the model is in training mode. Returns: a `dict` containing `int` keys for continuous feature levels [min_level, min_level + 1, ..., max_level]. The values are corresponding FPN features with shape [batch_size, height_l, width_l, fpn_feat_dims]. """ input_levels = list(multilevel_features.keys()) if min(input_levels) > self._min_level: raise ValueError('The minimum backbone level {} should be '.format( min(input_levels)) + 'less or equal to FPN minimum level {}.'.format( self._min_level)) backbone_max_level = min(max(input_levels), self._max_level) with keras_utils.maybe_enter_backend_graph(), tf.name_scope('fpn'): # Adds lateral connections. feats_lateral = {} for level in range(self._min_level, backbone_max_level + 1): feats_lateral[level] = self._lateral_conv2d_op[level]( multilevel_features[level]) # Adds top-down path. feats = {backbone_max_level: feats_lateral[backbone_max_level]} for level in range(backbone_max_level - 1, self._min_level - 1, -1): feats[level] = tf.keras.layers.UpSampling2D()( feats[level + 1]) + feats_lateral[level] # Adds post-hoc 3x3 convolution kernel. for level in range(self._min_level, backbone_max_level + 1): feats[level] = self._post_hoc_conv2d_op[level](feats[level]) # Adds coarser FPN levels introduced for RetinaNet. for level in range(backbone_max_level + 1, self._max_level + 1): feats_in = feats[level - 1] if level > backbone_max_level + 1: feats_in = self._activation_op(feats_in) feats[level] = self._coarse_conv2d_op[level](feats_in) if self._use_batch_norm: # Adds batch_norm layer. for level in range(self._min_level, self._max_level + 1): feats[level] = self._norm_activations[level]( feats[level], is_training=is_training) return feats
def __call__(self, features, is_training=None): scores_outputs = {} box_outputs = {} with keras_utils.maybe_enter_backend_graph(), tf.name_scope( 'rpn_head'): for level in range(self._min_level, self._max_level + 1): scores_output, box_output = self._shared_rpn_heads( features[level], self._anchors_per_location, level, is_training) scores_outputs[str(level)] = scores_output box_outputs[str(level)] = box_output return scores_outputs, box_outputs
def __call__(self, fpn_features, is_training=None): """Returns outputs of RetinaNet head.""" class_outputs = {} box_outputs = {} with keras_utils.maybe_enter_backend_graph(), tf.name_scope( 'retinanet_head'): for level in range(self._min_level, self._max_level + 1): features = fpn_features[level] class_outputs[str(level)] = self.class_net( features, level, is_training=is_training) box_outputs[str(level)] = self.box_net(features, level, is_training=is_training) return class_outputs, box_outputs
def __call__(self, inputs, is_training=None): """Returns the ResNet model for a given size and number of output classes. Args: inputs: a `Tesnor` with shape [batch_size, height, width, 3] representing a batch of images. is_training: `bool` if True, the model is in training mode. Returns: a `dict` containing `int` keys for continuous feature levels [2, 3, 4, 5]. The values are corresponding feature hierarchy in ResNet with shape [batch_size, height_l, width_l, num_filters]. """ with keras_utils.maybe_enter_backend_graph(): with tf.name_scope('resnet%s' % self._resnet_depth): return self._resnet_fn(inputs, is_training)
def build_model(self, weights=None, is_training=None): with keras_utils.maybe_enter_backend_graph(): outputs = self.model_outputs(self._input_layer, is_training) keras_model = tf.keras.models.Model(inputs=self._input_layer, outputs=outputs, name='retinanet') if self._checkpoint_path: logger.info('Init backbone') init_checkpoint_fn = self.make_restore_checkpoint_fn() init_checkpoint_fn(keras_model) if weights: logger.info('Loaded pretrained weights from {}'.format(weights)) keras_model.load_weights(weights) return keras_model
def build_model(self, weights=None, is_training=None): input_layers = self.build_input_layers(self._params, is_training) with keras_utils.maybe_enter_backend_graph(): outputs = self.model_outputs(input_layers, is_training) keras_model = tf.keras.models.Model(inputs=input_layers, outputs=outputs, name='maskrcnn') if self._checkpoint_path: logger.info('Init backbone') init_checkpoint_fn = self.make_restore_checkpoint_fn() init_checkpoint_fn(keras_model) if weights: logger.info('Loaded pretrained weights from {}'.format(weights)) _restore_baseline_weights(keras_model, weights) return keras_model
def __call__(self, roi_features, is_training=None): """Box and class branches for the Mask-RCNN model. Args: roi_features: A ROI feature tensor of shape [batch_size, num_rois, height_l, width_l, num_filters]. is_training: `boolean`, if True if model is in training mode. Returns: class_outputs: a tensor with a shape of [batch_size, num_rois, num_classes], representing the class predictions. box_outputs: a tensor with a shape of [batch_size, num_rois, num_classes * 4], representing the box predictions. """ with keras_utils.maybe_enter_backend_graph(), tf.name_scope( 'fast_rcnn_head'): # reshape inputs beofre FC. _, num_rois, height, width, filters = roi_features.get_shape( ).as_list() net = tf.reshape(roi_features, [-1, height, width, filters]) for i in range(self._num_convs): net = self._conv_ops[i](net) if self._use_batch_norm: net = self._conv_bn_ops[i](net, is_training=is_training) filters = self._num_filters if self._num_convs > 0 else filters net = tf.reshape(net, [-1, num_rois, height * width * filters]) for i in range(self._num_fcs): net = self._fc_ops[i](net) if self._use_batch_norm: net = self._fc_bn_ops[i](net, is_training=is_training) class_outputs = self._class_predict(net) box_outputs = self._box_predict(net) return class_outputs, box_outputs
def __call__(self, roi_features, class_indices, is_training=None): """Mask branch for the Mask-RCNN model. Args: roi_features: A ROI feature tensor of shape [batch_size, num_rois, height_l, width_l, num_filters]. class_indices: a Tensor of shape [batch_size, num_rois], indicating which class the ROI is. is_training: `boolean`, if True if model is in training mode. Returns: mask_outputs: a tensor with a shape of [batch_size, num_masks, mask_height, mask_width, num_classes], representing the mask predictions. fg_gather_indices: a tensor with a shape of [batch_size, num_masks, 2], representing the fg mask targets. Raises: ValueError: If boxes is not a rank-3 tensor or the last dimension of boxes is not 4. """ with keras_utils.maybe_enter_backend_graph(): with tf.name_scope('mask_head'): _, num_rois, height, width, filters = roi_features.get_shape( ).as_list() net = tf.reshape(roi_features, [-1, height, width, filters]) for i in range(self._num_convs): net = self._conv2d_ops[i](net) if self._use_batch_norm: net = self._norm_activation()(net, is_training=is_training) net = self._mask_conv_transpose(net) if self._use_batch_norm: net = self._norm_activation()(net, is_training=is_training) mask_outputs = self._conv2d_op(self._num_classes, kernel_size=(1, 1), strides=(1, 1), padding='valid', name='mask_fcn_logits')(net) mask_outputs = tf.reshape(mask_outputs, [ -1, num_rois, self._mask_target_size, self._mask_target_size, self._num_classes ]) with tf.name_scope('masks_post_processing'): batch_size, num_masks = class_indices.get_shape().as_list() mask_outputs = tf.transpose(a=mask_outputs, perm=[0, 1, 4, 2, 3]) # Contructs indices for gather. batch_indices = tf.tile( tf.expand_dims(tf.range(batch_size), axis=1), [1, num_masks]) mask_indices = tf.tile( tf.expand_dims(tf.range(num_masks), axis=0), [batch_size, 1]) gather_indices = tf.stack( [batch_indices, mask_indices, class_indices], axis=2) mask_outputs = tf.gather_nd(mask_outputs, gather_indices) return mask_outputs