def build_outputs(self, inputs, mode):
    is_training = mode == mode_keys.TRAIN
    model_outputs = {}

    image = inputs['image']
    _, image_height, image_width, _ = image.get_shape().as_list()
    backbone_features = self._backbone_fn(image, is_training)
    fpn_features = self._fpn_fn(backbone_features, is_training)

    rpn_score_outputs, rpn_box_outputs = self._rpn_head_fn(
        fpn_features, is_training)
    model_outputs.update({
        'rpn_score_outputs':
            tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                  rpn_score_outputs),
        'rpn_box_outputs':
            tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                  rpn_box_outputs),
    })
    input_anchor = anchor.Anchor(self._params.architecture.min_level,
                                 self._params.architecture.max_level,
                                 self._params.anchor.num_scales,
                                 self._params.anchor.aspect_ratios,
                                 self._params.anchor.anchor_size,
                                 (image_height, image_width))
    rpn_rois, _ = self._generate_rois_fn(rpn_box_outputs, rpn_score_outputs,
                                         input_anchor.multilevel_boxes,
                                         inputs['image_info'][:, 1, :],
                                         is_training)
    if is_training:
      rpn_rois = tf.stop_gradient(rpn_rois)

      # Sample proposals.
      rpn_rois, matched_gt_boxes, matched_gt_classes, matched_gt_indices = (
          self._sample_rois_fn(rpn_rois, inputs['gt_boxes'],
                               inputs['gt_classes']))

      # Create bounding box training targets.
      box_targets = box_utils.encode_boxes(
          matched_gt_boxes, rpn_rois, weights=[10.0, 10.0, 5.0, 5.0])
      # If the target is background, the box target is set to all 0s.
      box_targets = tf.where(
          tf.tile(
              tf.expand_dims(tf.equal(matched_gt_classes, 0), axis=-1),
              [1, 1, 4]),
          tf.zeros_like(box_targets),
          box_targets)
      model_outputs.update({
          'class_targets': matched_gt_classes,
          'box_targets': box_targets,
      })

    roi_features = spatial_transform_ops.multilevel_crop_and_resize(
        fpn_features, rpn_rois, output_size=7)

    class_outputs, box_outputs = self._frcnn_head_fn(roi_features, is_training)

    model_outputs.update({
        'class_outputs':
            tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                  class_outputs),
        'box_outputs':
            tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                  box_outputs),
    })

    # Add this output to train to make the checkpoint loadable in predict mode.
    # If we skip it in train mode, the heads will be out-of-order and checkpoint
    # loading will fail.
    boxes, scores, classes, valid_detections = self._generate_detections_fn(
        box_outputs, class_outputs, rpn_rois, inputs['image_info'][:, 1:2, :])
    model_outputs.update({
        'num_detections': valid_detections,
        'detection_boxes': boxes,
        'detection_classes': classes,
        'detection_scores': scores,
    })

    if not self._include_mask:
      return model_outputs

    if is_training:
      rpn_rois, classes, mask_targets = self._sample_masks_fn(
          rpn_rois, matched_gt_boxes, matched_gt_classes, matched_gt_indices,
          inputs['gt_masks'])
      mask_targets = tf.stop_gradient(mask_targets)

      classes = tf.cast(classes, dtype=tf.int32)

      model_outputs.update({
          'mask_targets': mask_targets,
          'sampled_class_targets': classes,
      })
    else:
      rpn_rois = boxes
      classes = tf.cast(classes, dtype=tf.int32)

    mask_roi_features = spatial_transform_ops.multilevel_crop_and_resize(
        fpn_features, rpn_rois, output_size=14)

    mask_outputs = self._mrcnn_head_fn(mask_roi_features, classes, is_training)

    if is_training:
      model_outputs.update({
          'mask_outputs':
              tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                    mask_outputs),
      })
    else:
      model_outputs.update({
          'detection_masks': tf.nn.sigmoid(mask_outputs)
      })

    return model_outputs
Beispiel #2
0
    def build_outputs(self, inputs, mode):
        is_training = mode == mode_keys.TRAIN
        model_outputs = {}

        image = inputs['image']
        _, image_height, image_width, _ = image.get_shape().as_list()
        backbone_features = self._backbone_fn(image, is_training)
        fpn_features = self._fpn_fn(backbone_features, is_training)

        # rpn_centerness.
        if self._include_centerness:
            rpn_score_outputs, rpn_box_outputs, rpn_center_outputs = (
                self._rpn_head_fn(fpn_features, is_training))
            model_outputs.update({
                'rpn_center_outputs':
                tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                      rpn_center_outputs),
            })
            object_scores = rpn_center_outputs
        else:
            rpn_score_outputs, rpn_box_outputs = self._rpn_head_fn(
                fpn_features, is_training)
            object_scores = None
        model_outputs.update({
            'rpn_score_outputs':
            tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                  rpn_score_outputs),
            'rpn_box_outputs':
            tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                  rpn_box_outputs),
        })
        input_anchor = anchor.Anchor(self._params.architecture.min_level,
                                     self._params.architecture.max_level,
                                     self._params.anchor.num_scales,
                                     self._params.anchor.aspect_ratios,
                                     self._params.anchor.anchor_size,
                                     (image_height, image_width))
        rpn_rois, rpn_roi_scores = self._generate_rois_fn(
            rpn_box_outputs,
            rpn_score_outputs,
            input_anchor.multilevel_boxes,
            inputs['image_info'][:, 1, :],
            is_training,
            is_box_lrtb=self._include_centerness,
            object_scores=object_scores,
        )
        if (not self._include_frcnn_class and not self._include_frcnn_box
                and not self._include_mask):
            # if not is_training:
            # For direct RPN detection,
            # use dummy box_outputs = (dy,dx,dh,dw = 0,0,0,0)
            box_outputs = tf.zeros_like(rpn_rois)
            box_outputs = tf.concat([box_outputs, box_outputs], -1)
            boxes, scores, classes, valid_detections = self._generate_detections_fn(
                box_outputs,
                rpn_roi_scores,
                rpn_rois,
                inputs['image_info'][:, 1:2, :],
                is_single_fg_score=
                True,  # if no_background, no softmax is applied.
                keep_nms=True)
            model_outputs.update({
                'num_detections': valid_detections,
                'detection_boxes': boxes,
                'detection_classes': classes,
                'detection_scores': scores,
            })
            return model_outputs

        # ---- OLN-Proposal finishes here. ----

        if is_training:
            rpn_rois = tf.stop_gradient(rpn_rois)
            rpn_roi_scores = tf.stop_gradient(rpn_roi_scores)

            # Sample proposals.
            (rpn_rois, rpn_roi_scores, matched_gt_boxes, matched_gt_classes,
             matched_gt_indices) = (self._sample_rois_fn(
                 rpn_rois, rpn_roi_scores, inputs['gt_boxes'],
                 inputs['gt_classes']))
            # Create bounding box training targets.
            box_targets = box_utils.encode_boxes(
                matched_gt_boxes, rpn_rois, weights=[10.0, 10.0, 5.0, 5.0])
            # If the target is background, the box target is set to all 0s.
            box_targets = tf.where(
                tf.tile(
                    tf.expand_dims(tf.equal(matched_gt_classes, 0), axis=-1),
                    [1, 1, 4]), tf.zeros_like(box_targets), box_targets)
            model_outputs.update({
                'class_targets': matched_gt_classes,
                'box_targets': box_targets,
            })
            # Create Box-IoU targets. {
            box_ious = box_utils.bbox_overlap(rpn_rois, inputs['gt_boxes'])
            matched_box_ious = tf.reduce_max(box_ious, 2)
            model_outputs.update({
                'box_iou_targets': matched_box_ious,
            })  # }

        roi_features = spatial_transform_ops.multilevel_crop_and_resize(
            fpn_features, rpn_rois, output_size=7)

        if not self._include_box_score:
            class_outputs, box_outputs = self._frcnn_head_fn(
                roi_features, is_training)
        else:
            class_outputs, box_outputs, score_outputs = self._frcnn_head_fn(
                roi_features, is_training)
            model_outputs.update({
                'box_score_outputs':
                tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                      score_outputs),
            })
        model_outputs.update({
            'class_outputs':
            tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                  class_outputs),
            'box_outputs':
            tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                  box_outputs),
        })

        # Add this output to train to make the checkpoint loadable in predict mode.
        # If we skip it in train mode, the heads will be out-of-order and checkpoint
        # loading will fail.
        if not self._include_frcnn_box:
            box_outputs = tf.zeros_like(box_outputs)  # dummy zeros.

        if self._include_box_score:
            score_outputs = tf.cast(tf.squeeze(score_outputs, -1),
                                    rpn_roi_scores.dtype)

            # box-score = (rpn-centerness * box-iou)^(1/2)
            # TR: rpn_roi_scores: b,1000, score_outputs: b,512
            # TS: rpn_roi_scores: b,1000, score_outputs: b,1000
            box_scores = tf.pow(rpn_roi_scores * tf.sigmoid(score_outputs),
                                1 / 2.)

        if not self._include_frcnn_class:
            boxes, scores, classes, valid_detections = self._generate_detections_fn(
                box_outputs,
                box_scores,
                rpn_rois,
                inputs['image_info'][:, 1:2, :],
                is_single_fg_score=True,
                keep_nms=True,
            )
        else:
            boxes, scores, classes, valid_detections = self._generate_detections_fn(
                box_outputs,
                class_outputs,
                rpn_rois,
                inputs['image_info'][:, 1:2, :],
                keep_nms=True,
            )
        model_outputs.update({
            'num_detections': valid_detections,
            'detection_boxes': boxes,
            'detection_classes': classes,
            'detection_scores': scores,
        })

        # ---- OLN-Box finishes here. ----

        if not self._include_mask:
            return model_outputs

        if is_training:
            rpn_rois, classes, mask_targets = self._sample_masks_fn(
                rpn_rois, matched_gt_boxes, matched_gt_classes,
                matched_gt_indices, inputs['gt_masks'])
            mask_targets = tf.stop_gradient(mask_targets)

            classes = tf.cast(classes, dtype=tf.int32)

            model_outputs.update({
                'mask_targets': mask_targets,
                'sampled_class_targets': classes,
            })
        else:
            rpn_rois = boxes
            classes = tf.cast(classes, dtype=tf.int32)

        mask_roi_features = spatial_transform_ops.multilevel_crop_and_resize(
            fpn_features, rpn_rois, output_size=14)

        mask_outputs = self._mrcnn_head_fn(mask_roi_features, classes,
                                           is_training)

        if is_training:
            model_outputs.update({
                'mask_outputs':
                tf.nest.map_structure(lambda x: tf.cast(x, tf.float32),
                                      mask_outputs),
            })
        else:
            model_outputs.update(
                {'detection_masks': tf.nn.sigmoid(mask_outputs)})

        return model_outputs