예제 #1
0
    def __call__(self, multilevel_features, is_training=None):
        """Returns the FPN features for a given multilevel features.

        Args:
            multilevel_features: a `dict` containing `int` keys for continuous feature
                levels, e.g., [2, 3, 4, 5]. The values are corresponding features with
                shape [batch_size, height_l, width_l, num_filters].
            is_training: `bool` if True, the model is in training mode.

        Returns:
            a `dict` containing `int` keys for continuous feature levels
            [min_level, min_level + 1, ..., max_level]. The values are corresponding
            FPN features with shape [batch_size, height_l, width_l, fpn_feat_dims].
        """

        input_levels = list(multilevel_features.keys())
        if min(input_levels) > self._min_level:
            raise ValueError('The minimum backbone level {} should be '.format(
                min(input_levels)) +
                             'less or equal to FPN minimum level {}.'.format(
                                 self._min_level))

        backbone_max_level = min(max(input_levels), self._max_level)
        with keras_utils.maybe_enter_backend_graph(), tf.name_scope('fpn'):
            # Adds lateral connections.
            feats_lateral = {}
            for level in range(self._min_level, backbone_max_level + 1):
                feats_lateral[level] = self._lateral_conv2d_op[level](
                    multilevel_features[level])

            # Adds top-down path.
            feats = {backbone_max_level: feats_lateral[backbone_max_level]}
            for level in range(backbone_max_level - 1, self._min_level - 1,
                               -1):
                feats[level] = tf.keras.layers.UpSampling2D()(
                    feats[level + 1]) + feats_lateral[level]

            # Adds post-hoc 3x3 convolution kernel.
            for level in range(self._min_level, backbone_max_level + 1):
                feats[level] = self._post_hoc_conv2d_op[level](feats[level])

            # Adds coarser FPN levels introduced for RetinaNet.
            for level in range(backbone_max_level + 1, self._max_level + 1):
                feats_in = feats[level - 1]
                if level > backbone_max_level + 1:
                    feats_in = self._activation_op(feats_in)
                feats[level] = self._coarse_conv2d_op[level](feats_in)

            if self._use_batch_norm:
                # Adds batch_norm layer.
                for level in range(self._min_level, self._max_level + 1):
                    feats[level] = self._norm_activations[level](
                        feats[level], is_training=is_training)

        return feats
예제 #2
0
 def __call__(self, features, is_training=None):
     scores_outputs = {}
     box_outputs = {}
     with keras_utils.maybe_enter_backend_graph(), tf.name_scope(
             'rpn_head'):
         for level in range(self._min_level, self._max_level + 1):
             scores_output, box_output = self._shared_rpn_heads(
                 features[level], self._anchors_per_location, level,
                 is_training)
             scores_outputs[str(level)] = scores_output
             box_outputs[str(level)] = box_output
         return scores_outputs, box_outputs
예제 #3
0
    def __call__(self, fpn_features, is_training=None):
        """Returns outputs of RetinaNet head."""
        class_outputs = {}
        box_outputs = {}
        with keras_utils.maybe_enter_backend_graph(), tf.name_scope(
                'retinanet_head'):
            for level in range(self._min_level, self._max_level + 1):
                features = fpn_features[level]
                class_outputs[str(level)] = self.class_net(
                    features, level, is_training=is_training)
                box_outputs[str(level)] = self.box_net(features,
                                                       level,
                                                       is_training=is_training)

        return class_outputs, box_outputs
예제 #4
0
    def __call__(self, inputs, is_training=None):
        """Returns the ResNet model for a given size and number of output classes.

        Args:
          inputs: a `Tesnor` with shape [batch_size, height, width, 3] representing
            a batch of images.
          is_training: `bool` if True, the model is in training mode.

        Returns:
          a `dict` containing `int` keys for continuous feature levels [2, 3, 4, 5].
          The values are corresponding feature hierarchy in ResNet with shape
          [batch_size, height_l, width_l, num_filters].
        """
        with keras_utils.maybe_enter_backend_graph():
            with tf.name_scope('resnet%s' % self._resnet_depth):
                return self._resnet_fn(inputs, is_training)
예제 #5
0
    def build_model(self, weights=None, is_training=None):
        with keras_utils.maybe_enter_backend_graph():
            outputs = self.model_outputs(self._input_layer, is_training)
            keras_model = tf.keras.models.Model(inputs=self._input_layer,
                                                outputs=outputs,
                                                name='retinanet')

        if self._checkpoint_path:
            logger.info('Init backbone')
            init_checkpoint_fn = self.make_restore_checkpoint_fn()
            init_checkpoint_fn(keras_model)

        if weights:
            logger.info('Loaded pretrained weights from {}'.format(weights))
            keras_model.load_weights(weights)

        return keras_model
예제 #6
0
    def build_model(self, weights=None, is_training=None):
        input_layers = self.build_input_layers(self._params, is_training)
        with keras_utils.maybe_enter_backend_graph():
            outputs = self.model_outputs(input_layers, is_training)
            keras_model = tf.keras.models.Model(inputs=input_layers,
                                                outputs=outputs,
                                                name='maskrcnn')

        if self._checkpoint_path:
            logger.info('Init backbone')
            init_checkpoint_fn = self.make_restore_checkpoint_fn()
            init_checkpoint_fn(keras_model)

        if weights:
            logger.info('Loaded pretrained weights from {}'.format(weights))
            _restore_baseline_weights(keras_model, weights)

        return keras_model
예제 #7
0
    def __call__(self, roi_features, is_training=None):
        """Box and class branches for the Mask-RCNN model.

        Args:
            roi_features: A ROI feature tensor of shape [batch_size, num_rois,
                height_l, width_l, num_filters].
            is_training: `boolean`, if True if model is in training mode.

        Returns:
            class_outputs: a tensor with a shape of
                [batch_size, num_rois, num_classes], representing the class predictions.
            box_outputs: a tensor with a shape of
                [batch_size, num_rois, num_classes * 4], representing the box
                predictions.
        """

        with keras_utils.maybe_enter_backend_graph(), tf.name_scope(
                'fast_rcnn_head'):
            # reshape inputs beofre FC.
            _, num_rois, height, width, filters = roi_features.get_shape(
            ).as_list()

            net = tf.reshape(roi_features, [-1, height, width, filters])
            for i in range(self._num_convs):
                net = self._conv_ops[i](net)
                if self._use_batch_norm:
                    net = self._conv_bn_ops[i](net, is_training=is_training)

            filters = self._num_filters if self._num_convs > 0 else filters
            net = tf.reshape(net, [-1, num_rois, height * width * filters])

            for i in range(self._num_fcs):
                net = self._fc_ops[i](net)
                if self._use_batch_norm:
                    net = self._fc_bn_ops[i](net, is_training=is_training)

            class_outputs = self._class_predict(net)
            box_outputs = self._box_predict(net)
            return class_outputs, box_outputs
예제 #8
0
    def __call__(self, roi_features, class_indices, is_training=None):
        """Mask branch for the Mask-RCNN model.

        Args:
            roi_features: A ROI feature tensor of shape [batch_size, num_rois,
                height_l, width_l, num_filters].
            class_indices: a Tensor of shape [batch_size, num_rois], indicating which
                class the ROI is.
            is_training: `boolean`, if True if model is in training mode.

        Returns:
            mask_outputs: a tensor with a shape of
                [batch_size, num_masks, mask_height, mask_width, num_classes],
                representing the mask predictions.
            fg_gather_indices: a tensor with a shape of [batch_size, num_masks, 2],
                representing the fg mask targets.
        Raises:
          ValueError: If boxes is not a rank-3 tensor or the last dimension of
            boxes is not 4.
        """

        with keras_utils.maybe_enter_backend_graph():
            with tf.name_scope('mask_head'):
                _, num_rois, height, width, filters = roi_features.get_shape(
                ).as_list()
                net = tf.reshape(roi_features, [-1, height, width, filters])

                for i in range(self._num_convs):
                    net = self._conv2d_ops[i](net)
                    if self._use_batch_norm:
                        net = self._norm_activation()(net,
                                                      is_training=is_training)

                net = self._mask_conv_transpose(net)
                if self._use_batch_norm:
                    net = self._norm_activation()(net, is_training=is_training)

                mask_outputs = self._conv2d_op(self._num_classes,
                                               kernel_size=(1, 1),
                                               strides=(1, 1),
                                               padding='valid',
                                               name='mask_fcn_logits')(net)
                mask_outputs = tf.reshape(mask_outputs, [
                    -1, num_rois, self._mask_target_size,
                    self._mask_target_size, self._num_classes
                ])

                with tf.name_scope('masks_post_processing'):
                    batch_size, num_masks = class_indices.get_shape().as_list()
                    mask_outputs = tf.transpose(a=mask_outputs,
                                                perm=[0, 1, 4, 2, 3])
                    # Contructs indices for gather.
                    batch_indices = tf.tile(
                        tf.expand_dims(tf.range(batch_size), axis=1),
                        [1, num_masks])
                    mask_indices = tf.tile(
                        tf.expand_dims(tf.range(num_masks), axis=0),
                        [batch_size, 1])
                    gather_indices = tf.stack(
                        [batch_indices, mask_indices, class_indices], axis=2)
                    mask_outputs = tf.gather_nd(mask_outputs, gather_indices)
            return mask_outputs