Ejemplo n.º 1
0
  def _get_rpn_samples(self, match_results):
    """Computes anchor labels.

    This function performs subsampling for foreground (fg) and background (bg)
    anchors.
    Args:
      match_results: an integer tensor with shape [N] representing the
        matching results of anchors. (1) match_results[i]>=0,
        meaning that column i is matched with row match_results[i].
        (2) match_results[i]=-1, meaning that column i is not matched.
        (3) match_results[i]=-2, meaning that column i is ignored.
    Returns:
      score_targets: an integer tensor with the a shape of [N].
        (1) score_targets[i]=1, the anchor is a positive sample.
        (2) score_targets[i]=0, negative. (3) score_targets[i]=-1, the anchor is
        don't care (ignore).
    """
    sampler = (
        balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
            positive_fraction=self._rpn_fg_fraction, is_static=False))
    # indicator includes both positive and negative labels.
    # labels includes only positives labels.
    # positives = indicator & labels.
    # negatives = indicator & !labels.
    # ignore = !indicator.
    indicator = tf.greater(match_results, -2)
    labels = tf.greater(match_results, -1)

    samples = sampler.subsample(
        indicator, self._rpn_batch_size_per_im, labels)
    positive_labels = tf.where(
        tf.logical_and(samples, labels),
        tf.constant(2, dtype=tf.int32, shape=match_results.shape),
        tf.constant(0, dtype=tf.int32, shape=match_results.shape))
    negative_labels = tf.where(
        tf.logical_and(samples, tf.logical_not(labels)),
        tf.constant(1, dtype=tf.int32, shape=match_results.shape),
        tf.constant(0, dtype=tf.int32, shape=match_results.shape))
    ignore_labels = tf.fill(match_results.shape, -1)

    return (ignore_labels + positive_labels + negative_labels,
            positive_labels, negative_labels)
Ejemplo n.º 2
0
def proposal_label_op(boxes,
                      gt_boxes,
                      gt_labels,
                      image_info,
                      batch_size_per_im=512,
                      fg_fraction=0.25,
                      fg_thresh=0.5,
                      bg_thresh_hi=0.5,
                      bg_thresh_lo=0.):
    """Assigns the proposals with ground truth labels and performs subsmpling.

  Given proposal `boxes`, `gt_boxes`, and `gt_labels`, the function uses the
  following algorithm to generate the final `batch_size_per_im` RoIs.
  1. Calculates the IoU between each proposal box and each gt_boxes.
  2. Assigns each proposal box with a ground truth class and box label by
     choosing the largest overlap.
  3. Samples `batch_size_per_im` boxes from all proposal boxes, and returns
     box_targets, class_targets, and RoIs.
  The reference implementations of #1 and #2 are here: https://github.com/facebookresearch/Detectron/blob/master/detectron/datasets/json_dataset.py  # pylint: disable=line-too-long
  The reference implementation of #3 is here: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/fast_rcnn.py.  # pylint: disable=line-too-long

  Args:
    boxes: a tensor with a shape of [batch_size, N, 4]. N is the number of
      proposals before groundtruth assignment (e.g., rpn_post_nms_topn). The
      last dimension is the pixel coordinates of scaled images in
      [ymin, xmin, ymax, xmax] form.
    gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4]. This
      tensor might have paddings with a value of -1. The coordinates of gt_boxes
      are in the pixel coordinates of the original image scale.
    gt_labels: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
      tensor might have paddings with a value of -1.
    image_info: a tensor of shape [batch_size, 5] where the three columns
      encode the input image's [height, width, scale,
      original_height, original_width]. Height and width are for
      the input to the network, not the original image; scale is the scale
      factor used to scale the network input size to the original image size.
      See dataloader.DetectionInputProcessor for details. The last two are
      original height and width.
    batch_size_per_im: a integer represents RoI minibatch size per image.
    fg_fraction: a float represents the target fraction of RoI minibatch that
      is labeled foreground (i.e., class > 0).
    fg_thresh: a float represents the overlap threshold for an RoI to be
      considered foreground (if >= fg_thresh).
    bg_thresh_hi: a float represents the overlap threshold for an RoI to be
      considered background (class = 0 if overlap in [LO, HI)).
    bg_thresh_lo: a float represents the overlap threshold for an RoI to be
      considered background (class = 0 if overlap in [LO, HI)).
  Returns:
    box_targets: a tensor with a shape of [batch_size, K, 4]. The tensor
      contains the ground truth pixel coordinates of the scaled images for each
      roi. K is the number of sample RoIs (e.g., batch_size_per_im).
    class_targets: a integer tensor with a shape of [batch_size, K]. The tensor
      contains the ground truth class for each roi.
    rois: a tensor with a shape of [batch_size, K, 4], representing the
      coordinates of the selected RoI.
    proposal_to_label_map: a tensor with a shape of [batch_size, K]. This tensor
      keeps the mapping between proposal to labels. proposal_to_label_map[i]
      means the index of the ground truth instance for the i-th proposal.
  """
    with tf.name_scope('proposal_label'):
        batch_size = boxes.shape[0]
        # Scales ground truth boxes to the scaled image coordinates.
        image_scale = 1 / image_info[:, 2]
        scaled_gt_boxes = gt_boxes * tf.reshape(image_scale,
                                                [batch_size, 1, 1])

        # The reference implementation intentionally includes ground truth boxes in
        # the proposals. see https://github.com/facebookresearch/Detectron/blob/master/detectron/datasets/json_dataset.py#L359.  # pylint: disable=line-too-long
        boxes = tf.concat([boxes, scaled_gt_boxes], axis=1)
        iou = box_utils.bbox_overlap(boxes, scaled_gt_boxes)

        (pre_sample_box_targets, pre_sample_class_targets, max_overlap,
         proposal_to_label_map) = _add_class_assignments(
             iou, scaled_gt_boxes, gt_labels)

        # Generates a random sample of RoIs comprising foreground and background
        # examples. reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/fast_rcnn.py#L132  # pylint: disable=line-too-long
        positives = tf.greater(max_overlap,
                               fg_thresh * tf.ones_like(max_overlap))
        negatives = tf.logical_and(
            tf.greater_equal(max_overlap,
                             bg_thresh_lo * tf.ones_like(max_overlap)),
            tf.less(max_overlap, bg_thresh_hi * tf.ones_like(max_overlap)))
        pre_sample_class_targets = tf.where(
            negatives, tf.zeros_like(pre_sample_class_targets),
            pre_sample_class_targets)
        proposal_to_label_map = tf.where(negatives,
                                         tf.zeros_like(proposal_to_label_map),
                                         proposal_to_label_map)

        # Handles ground truth paddings.
        ignore_mask = tf.less(tf.reduce_min(iou, axis=2),
                              tf.zeros_like(max_overlap))
        # indicator includes both positive and negative labels.
        # labels includes only positives labels.
        # positives = indicator & labels.
        # negatives = indicator & !labels.
        # ignore = !indicator.
        labels = positives
        pos_or_neg = tf.logical_or(positives, negatives)
        indicator = tf.logical_and(pos_or_neg, tf.logical_not(ignore_mask))

        all_samples = []
        sampler = (
            balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
                positive_fraction=fg_fraction, is_static=True))
        # Batch-unroll the sub-sampling process.
        for i in range(batch_size):
            samples = sampler.subsample(indicator[i], batch_size_per_im,
                                        labels[i])
            all_samples.append(samples)
        all_samples = tf.stack([all_samples], axis=0)[0]
        # A workaround to get the indices from the boolean tensors.
        _, samples_indices = tf.nn.top_k(tf.to_int32(all_samples),
                                         k=batch_size_per_im,
                                         sorted=True)
        # Contructs indices for gather.
        samples_indices = tf.reshape(
            samples_indices +
            tf.expand_dims(tf.range(batch_size) * tf.shape(boxes)[1], 1), [-1])
        rois = tf.reshape(
            tf.gather(tf.reshape(boxes, [-1, 4]), samples_indices),
            [batch_size, -1, 4])
        class_targets = tf.reshape(
            tf.gather(tf.reshape(pre_sample_class_targets, [-1, 1]),
                      samples_indices), [batch_size, -1])
        sample_box_targets = tf.reshape(
            tf.gather(tf.reshape(pre_sample_box_targets, [-1, 4]),
                      samples_indices), [batch_size, -1, 4])
        sample_proposal_to_label_map = tf.reshape(
            tf.gather(tf.reshape(proposal_to_label_map, [-1, 1]),
                      samples_indices), [batch_size, -1])
    return sample_box_targets, class_targets, rois, sample_proposal_to_label_map