예제 #1
0
    def _model_outputs():
        """Generates outputs from the model."""
        fpn_feats, rpn_fn, faster_rcnn_fn, mask_rcnn_fn = model(
            features, labels, all_anchors, mode, params)
        rpn_score_outputs, rpn_box_outputs = rpn_fn(fpn_feats)
        (class_outputs, box_outputs, class_targets, box_targets, box_rois,
         proposal_to_label_map) = faster_rcnn_fn(fpn_feats, rpn_score_outputs,
                                                 rpn_box_outputs)
        encoded_box_targets = mask_rcnn_architecture.encode_box_targets(
            box_rois, box_targets, class_targets, params['bbox_reg_weights'])

        if mode != tf.estimator.ModeKeys.TRAIN:
            # Use TEST.NMS in the reference for this value. Reference: https://github.com/ddkang/Detectron/blob/80f329530843e66d07ca39e19901d5f3e5daf009/lib/core/config.py#L227  # pylint: disable=line-too-long

            # The mask branch takes inputs from different places in training vs in
            # eval/predict. In training, the mask branch uses proposals combined with
            # labels to produce both mask outputs and targets. At test time, it uses
            # the post-processed predictions to generate masks.
            # Generate detections one image at a time.
            batch_size, _, _ = class_outputs.get_shape().as_list()
            detections = []
            softmax_class_outputs = tf.nn.softmax(class_outputs)
            for i in range(batch_size):
                detections.append(
                    anchors.generate_detections_per_image_op(
                        softmax_class_outputs[i], box_outputs[i], box_rois[i],
                        labels['source_ids'][i], labels['image_info'][i],
                        params['test_detections_per_image'],
                        params['test_rpn_post_nms_topn'], params['test_nms'],
                        params['bbox_reg_weights']))
            detections = tf.stack(detections, axis=0)
            mask_outputs = mask_rcnn_fn(fpn_feats, detections=detections)
        else:
            (mask_outputs, select_class_targets, select_box_targets,
             select_box_rois, select_proposal_to_label_map,
             mask_targets) = mask_rcnn_fn(fpn_feats, class_targets,
                                          box_targets, box_rois,
                                          proposal_to_label_map)

        model_outputs = {
            'rpn_score_outputs': rpn_score_outputs,
            'rpn_box_outputs': rpn_box_outputs,
            'class_outputs': class_outputs,
            'box_outputs': box_outputs,
            'class_targets': class_targets,
            'box_targets': encoded_box_targets,
            'box_rois': box_rois,
            'mask_outputs': mask_outputs,
        }
        if mode == tf.estimator.ModeKeys.TRAIN:
            model_outputs.update({
                'select_class_targets': select_class_targets,
                'select_box_targets': select_box_targets,
                'select_box_rois': select_box_rois,
                'select_proposal_to_label_map': select_proposal_to_label_map,
                'mask_targets': mask_targets,
            })
        else:
            model_outputs.update({'detections': detections})
        return model_outputs
예제 #2
0
    def _model_outputs():
        """Generates outputs from the model."""

        model_outputs = {}

        with tf.variable_scope('resnet%s' % params['resnet_depth']):
            resnet_fn = resnet.resnet_v1(
                params['resnet_depth'],
                num_batch_norm_group=params['num_batch_norm_group'])
            backbone_feats = resnet_fn(features['images'],
                                       params['is_training_bn'])

        fpn_feats = fpn.fpn(backbone_feats, params['min_level'],
                            params['max_level'])

        rpn_score_outputs, rpn_box_outputs = heads.rpn_head(
            fpn_feats, params['min_level'], params['max_level'],
            len(params['aspect_ratios'] * params['num_scales']))

        if mode == tf.estimator.ModeKeys.TRAIN:
            rpn_pre_nms_topn = params['rpn_pre_nms_topn']
            rpn_post_nms_topn = params['rpn_post_nms_topn']
        else:
            rpn_pre_nms_topn = params['test_rpn_pre_nms_topn']
            rpn_post_nms_topn = params['test_rpn_post_nms_topn']

        _, rpn_box_rois = mask_rcnn_architecture.proposal_op(
            rpn_score_outputs, rpn_box_outputs, all_anchors,
            features['image_info'], rpn_pre_nms_topn, rpn_post_nms_topn,
            params['rpn_nms_threshold'], params['rpn_min_size'])
        rpn_box_rois = tf.to_float(rpn_box_rois)

        if mode == tf.estimator.ModeKeys.TRAIN:
            # Sampling
            box_targets, class_targets, rpn_box_rois, proposal_to_label_map = (
                mask_rcnn_architecture.proposal_label_op(
                    rpn_box_rois,
                    labels['gt_boxes'],
                    labels['gt_classes'],
                    features['image_info'],
                    batch_size_per_im=params['batch_size_per_im'],
                    fg_fraction=params['fg_fraction'],
                    fg_thresh=params['fg_thresh'],
                    bg_thresh_hi=params['bg_thresh_hi'],
                    bg_thresh_lo=params['bg_thresh_lo']))

        # Performs multi-level RoIAlign.
        box_roi_features = ops.multilevel_crop_and_resize(fpn_feats,
                                                          rpn_box_rois,
                                                          output_size=7)

        class_outputs, box_outputs = heads.box_head(
            box_roi_features,
            num_classes=params['num_classes'],
            mlp_head_dim=params['fast_rcnn_mlp_head_dim'])

        if mode != tf.estimator.ModeKeys.TRAIN:
            batch_size, _, _ = class_outputs.get_shape().as_list()
            detections = []
            softmax_class_outputs = tf.nn.softmax(class_outputs)
            for i in range(batch_size):
                detections.append(
                    anchors.generate_detections_per_image_op(
                        softmax_class_outputs[i], box_outputs[i],
                        rpn_box_rois[i], features['source_ids'][i],
                        features['image_info'][i],
                        params['test_detections_per_image'],
                        params['test_rpn_post_nms_topn'], params['test_nms'],
                        params['bbox_reg_weights']))
            detections = tf.stack(detections, axis=0)
            model_outputs.update({
                'detections': detections,
            })
        else:
            encoded_box_targets = mask_rcnn_architecture.encode_box_targets(
                rpn_box_rois, box_targets, class_targets,
                params['bbox_reg_weights'])
            model_outputs.update({
                'rpn_score_outputs': rpn_score_outputs,
                'rpn_box_outputs': rpn_box_outputs,
                'class_outputs': class_outputs,
                'box_outputs': box_outputs,
                'class_targets': class_targets,
                'box_targets': encoded_box_targets,
                'box_rois': rpn_box_rois,
            })

        # Faster-RCNN mode.
        if not params['include_mask']:
            return model_outputs

        # Mask sampling
        if mode != tf.estimator.ModeKeys.TRAIN:
            selected_box_rois = detections[:, :, 1:5]
            class_indices = tf.to_int32(detections[:, :, 6])
        else:
            (selected_class_targets, selected_box_targets, selected_box_rois,
             proposal_to_label_map) = (
                 mask_rcnn_architecture.select_fg_for_masks(
                     class_targets,
                     box_targets,
                     rpn_box_rois,
                     proposal_to_label_map,
                     max_num_fg=int(params['batch_size_per_im'] *
                                    params['fg_fraction'])))
            class_indices = tf.to_int32(selected_class_targets)

        mask_roi_features = ops.multilevel_crop_and_resize(fpn_feats,
                                                           selected_box_rois,
                                                           output_size=14)
        mask_outputs = heads.mask_head(
            mask_roi_features,
            class_indices,
            num_classes=params['num_classes'],
            mrcnn_resolution=params['mrcnn_resolution'])

        model_outputs.update({
            'mask_outputs': mask_outputs,
        })

        if mode == tf.estimator.ModeKeys.TRAIN:
            mask_targets = mask_rcnn_architecture.get_mask_targets(
                selected_box_rois, proposal_to_label_map, selected_box_targets,
                labels['cropped_gt_masks'], params['mrcnn_resolution'])
            model_outputs.update({
                'mask_targets':
                mask_targets,
                'selected_class_targets':
                selected_class_targets,
            })

        return model_outputs
예제 #3
0
  def _model_outputs():
    """Generates outputs from the model."""

    fpn_feats = mask_rcnn_architecture.resnet_fpn(
        features['images'],
        params['min_level'], params['max_level'], params['resnet_depth'],
        params['is_training_bn'])

    rpn_score_outputs, rpn_box_outputs = mask_rcnn_architecture.rpn_net(
        fpn_feats,
        params['min_level'], params['max_level'],
        len(params['aspect_ratios'] * params['num_scales']))

    if mode != tf.estimator.ModeKeys.TRAIN:
      # The mask branch takes inputs from different places in training vs in
      # eval/predict. In training, the mask branch uses proposals combined with
      # labels to produce both mask outputs and targets. At test time, it uses
      # the post-processed predictions to generate masks.
      # Generate detections one image at a time.
      class_outputs, box_outputs, box_rois = (
          mask_rcnn_architecture.faster_rcnn_fn(
              fpn_feats, rpn_score_outputs, rpn_box_outputs,
              all_anchors, features['image_info'], params,
              is_training=False))

      batch_size, _, _ = class_outputs.get_shape().as_list()
      detections = []
      softmax_class_outputs = tf.nn.softmax(class_outputs)
      for i in range(batch_size):
        detections.append(
            anchors.generate_detections_per_image_op(
                softmax_class_outputs[i], box_outputs[i], box_rois[i],
                features['source_ids'][i], features['image_info'][i],
                params['test_detections_per_image'],
                params['test_rpn_post_nms_topn'], params['test_nms'],
                params['bbox_reg_weights'])
            )
      detections = tf.stack(detections, axis=0)
      if params['include_mask']:
        mask_outputs = mask_rcnn_architecture.mask_rcnn_fn(
            fpn_feats, params, is_training=False, detections=detections)
    else:
      (class_outputs, box_outputs, box_rois, class_targets, box_targets,
       proposal_to_label_map) = mask_rcnn_architecture.faster_rcnn_fn(
           fpn_feats, rpn_score_outputs, rpn_box_outputs, all_anchors,
           features['image_info'], params, is_training=True, labels=labels)

      encoded_box_targets = mask_rcnn_architecture.encode_box_targets(
          box_rois, box_targets, class_targets, params['bbox_reg_weights'])

      if params['include_mask']:
        mask_outputs, select_class_targets, mask_targets = (
            mask_rcnn_architecture.mask_rcnn_fn(
                fpn_feats, params, is_training=True, detections=None,
                labels=labels,
                class_targets=class_targets,
                box_targets=box_targets,
                box_rois=box_rois,
                proposal_to_label_map=proposal_to_label_map))

    if mode == tf.estimator.ModeKeys.TRAIN:
      model_outputs = {
          'rpn_score_outputs': rpn_score_outputs,
          'rpn_box_outputs': rpn_box_outputs,
          'class_outputs': class_outputs,
          'box_outputs': box_outputs,
          'class_targets': class_targets,
          'box_targets': encoded_box_targets,
          'box_rois': box_rois,
      }
      if params['include_mask']:
        model_outputs.update({
            'mask_outputs': mask_outputs,
            'mask_targets': mask_targets,
            'select_class_targets': select_class_targets,
        })
    else:
      model_outputs = {
          'detections': detections,
      }
      if params['include_mask']:
        model_outputs.update({
            'mask_outputs': mask_outputs,
        })
    return model_outputs
예제 #4
0
  def _model_outputs(self, features, labels, image_size, mode, params):
    """Generates outputs from the model."""
    all_anchors = anchors.Anchors(
        params['min_level'], params['max_level'], params['num_scales'],
        params['aspect_ratios'], params['anchor_scale'], image_size)

    if params['conv0_space_to_depth_block_size'] != 0:
      image_size = tuple(x // params['conv0_space_to_depth_block_size']
                         for x in image_size)

    if params['transpose_input']:
      images = tf.reshape(
          features['images'],
          [image_size[0], image_size[1], params['batch_size'], -1])
      images = tf.transpose(images, [2, 0, 1, 3])
    else:
      images = tf.reshape(
          features['images'],
          [params['batch_size'], image_size[0], image_size[1], -1])

    fpn_feats = fpn.resnet_fpn(images, params['min_level'],
                               params['max_level'], params['resnet_depth'],
                               params['conv0_kernel_size'],
                               params['conv0_space_to_depth_block_size'],
                               params['is_training_bn'])

    rpn_score_outputs, rpn_box_outputs = mask_rcnn_architecture.rpn_net(
        fpn_feats, params['min_level'], params['max_level'],
        len(params['aspect_ratios'] * params['num_scales']))

    if mode == tf.estimator.ModeKeys.PREDICT:
      # Use TEST.NMS in the reference for this value. Reference: https://github.com/ddkang/Detectron/blob/80f329530843e66d07ca39e19901d5f3e5daf009/lib/core/config.py#L227  # pylint: disable=line-too-long

      # The mask branch takes inputs from different places in training vs in
      # eval/predict. In training, the mask branch uses proposals combined
      # with labels to produce both mask outputs and targets. At test time,
      # it uses the post-processed predictions to generate masks.
      # Generate detections one image at a time.
      (class_outputs, box_outputs,
       box_rois) = mask_rcnn_architecture.faster_rcnn(
           fpn_feats, rpn_score_outputs, rpn_box_outputs, all_anchors,
           features['image_info'], mode, params)
      batch_size, _, _ = class_outputs.get_shape().as_list()
      detections = []
      softmax_class_outputs = tf.nn.softmax(class_outputs)
      for i in range(batch_size):
        device = core_assignment_utils.get_core_assignment(
            i, params['num_cores_per_replica'])
        with tf.device(device):
          detections.append(
              post_processing.generate_detections_per_image_op(
                  softmax_class_outputs[i], box_outputs[i], box_rois[i],
                  features['source_ids'][i], features['image_info'][i],
                  params['test_detections_per_image'],
                  params['test_rpn_post_nms_topn'], params['test_nms'],
                  params['bbox_reg_weights']))
      device = core_assignment_utils.get_core_assignment(
          core_assignment_utils.CORE_1, params['num_cores_per_replica'])
      with tf.device(device):
        detections = tf.stack(detections, axis=0)
        mask_outputs = mask_rcnn_architecture.mask_rcnn(
            fpn_feats, mode, params, detections=detections)
        return {'detections': detections, 'mask_outputs': mask_outputs}
    else:
      (class_outputs, box_outputs, box_rois, class_targets, box_targets,
       proposal_to_label_map) = mask_rcnn_architecture.faster_rcnn(
           fpn_feats, rpn_score_outputs, rpn_box_outputs, all_anchors,
           features['image_info'], mode, params, labels)
      encoded_box_targets = mask_rcnn_architecture.encode_box_targets(
          box_rois, box_targets, class_targets, params['bbox_reg_weights'])
      (mask_outputs, select_class_targets,
       mask_targets) = mask_rcnn_architecture.mask_rcnn(
           fpn_feats, mode, params, labels, class_targets, box_targets,
           box_rois, proposal_to_label_map)
      return {
          'rpn_score_outputs': rpn_score_outputs,
          'rpn_box_outputs': rpn_box_outputs,
          'class_outputs': class_outputs,
          'box_outputs': box_outputs,
          'class_targets': class_targets,
          'box_targets': encoded_box_targets,
          'box_rois': box_rois,
          'select_class_targets': select_class_targets,
          'mask_outputs': mask_outputs,
          'mask_targets': mask_targets,}