예제 #1
0
    def _get_rpn_samples(self, match_results):
        """Computes anchor labels.

    This function performs subsampling for foreground (fg) and background (bg)
    anchors.
    Args:
      match_results: A integer tensor with shape [N] representing the matching
        results of anchors. (1) match_results[i]>=0, meaning that column i is
        matched with row match_results[i]. (2) match_results[i]=-1, meaning that
        column i is not matched. (3) match_results[i]=-2, meaning that column i
        is ignored.

    Returns:
      score_targets: a integer tensor with the a shape of [N].
        (1) score_targets[i]=1, the anchor is a positive sample.
        (2) score_targets[i]=0, negative. (3) score_targets[i]=-1, the anchor is
        don't care (ignore).
    """
        sampler = (
            balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
                positive_fraction=self._rpn_fg_fraction, is_static=False))
        # indicator includes both positive and negative labels.
        # labels includes only positives labels.
        # positives = indicator & labels.
        # negatives = indicator & !labels.
        # ignore = !indicator.
        indicator = tf.greater(match_results, -2)
        labels = tf.greater(match_results, -1)

        samples = sampler.subsample(indicator, self._rpn_batch_size_per_im,
                                    labels)
        positive_labels = tf.where(
            tf.logical_and(samples, labels),
            tf.constant(2, dtype=tf.int32, shape=match_results.shape),
            tf.constant(0, dtype=tf.int32, shape=match_results.shape))
        negative_labels = tf.where(
            tf.logical_and(samples, tf.logical_not(labels)),
            tf.constant(1, dtype=tf.int32, shape=match_results.shape),
            tf.constant(0, dtype=tf.int32, shape=match_results.shape))
        ignore_labels = tf.fill(match_results.shape, -1)

        return (ignore_labels + positive_labels + negative_labels,
                positive_labels, negative_labels)
예제 #2
0
def assign_and_sample_proposals(proposed_boxes,
                                gt_boxes,
                                gt_classes,
                                num_samples_per_image=512,
                                mix_gt_boxes=True,
                                fg_fraction=0.25,
                                fg_iou_thresh=0.5,
                                bg_iou_thresh_hi=0.5,
                                bg_iou_thresh_lo=0.0):
    """Assigns the proposals with groundtruth classes and performs subsmpling.

  Given `proposed_boxes`, `gt_boxes`, and `gt_classes`, the function uses the
  following algorithm to generate the final `num_samples_per_image` RoIs.
    1. Calculates the IoU between each proposal box and each gt_boxes.
    2. Assigns each proposed box with a groundtruth class and box by choosing
       the largest IoU overlap.
    3. Samples `num_samples_per_image` boxes from all proposed boxes, and
       returns box_targets, class_targets, and RoIs.

  Args:
    proposed_boxes: a tensor of shape of [batch_size, N, 4]. N is the number of
      proposals before groundtruth assignment. The last dimension is the box
      coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] format.
    gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. The
      coordinates of gt_boxes are in the pixel coordinates of the scaled image.
      This tensor might have padding of values -1 indicating the invalid box
      coordinates.
    gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
      tensor might have paddings with values of -1 indicating the invalid
      classes.
    num_samples_per_image: a integer represents RoI minibatch size per image.
    mix_gt_boxes: a bool indicating whether to mix the groundtruth boxes before
      sampling proposals.
    fg_fraction: a float represents the target fraction of RoI minibatch that is
      labeled foreground (i.e., class > 0).
    fg_iou_thresh: a float represents the IoU overlap threshold for an RoI to be
      considered foreground (if >= fg_iou_thresh).
    bg_iou_thresh_hi: a float represents the IoU overlap threshold for an RoI to
      be considered background (class = 0 if overlap in [LO, HI)).
    bg_iou_thresh_lo: a float represents the IoU overlap threshold for an RoI to
      be considered background (class = 0 if overlap in [LO, HI)).

  Returns:
    sampled_rois: a tensor of shape of [batch_size, K, 4], representing the
      coordinates of the sampled RoIs, where K is the number of the sampled
      RoIs, i.e. K = num_samples_per_image.
    sampled_gt_boxes: a tensor of shape of [batch_size, K, 4], storing the
      box coordinates of the matched groundtruth boxes of the samples RoIs.
    sampled_gt_classes: a tensor of shape of [batch_size, K], storing the
      classes of the matched groundtruth boxes of the sampled RoIs.
    sampled_gt_indices: a tensor of shape of [batch_size, K], storing the
      indices of the sampled groudntruth boxes in the original `gt_boxes`
      tensor, i.e. gt_boxes[sampled_gt_indices[:, i]] = sampled_gt_boxes[:, i].
  """

    with tf.name_scope('sample_proposals'):
        if mix_gt_boxes:
            boxes = tf.concat([proposed_boxes, gt_boxes], axis=1)
        else:
            boxes = proposed_boxes

        (matched_gt_boxes, matched_gt_classes, matched_gt_indices, matched_iou,
         _) = box_matching(boxes, gt_boxes, gt_classes)

        positive_match = tf.greater(matched_iou, fg_iou_thresh)
        negative_match = tf.logical_and(
            tf.greater_equal(matched_iou, bg_iou_thresh_lo),
            tf.less(matched_iou, bg_iou_thresh_hi))
        ignored_match = tf.less(matched_iou, 0.0)

        # re-assign negatively matched boxes to the background class.
        matched_gt_classes = tf.where(negative_match,
                                      tf.zeros_like(matched_gt_classes),
                                      matched_gt_classes)
        matched_gt_indices = tf.where(negative_match,
                                      tf.zeros_like(matched_gt_indices),
                                      matched_gt_indices)

        sample_candidates = tf.logical_and(
            tf.logical_or(positive_match, negative_match),
            tf.logical_not(ignored_match))

        sampler = (
            balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
                positive_fraction=fg_fraction, is_static=True))

        batch_size, _ = sample_candidates.get_shape().as_list()
        sampled_indicators = []
        for i in range(batch_size):
            sampled_indicator = sampler.subsample(sample_candidates[i],
                                                  num_samples_per_image,
                                                  positive_match[i])
            sampled_indicators.append(sampled_indicator)
        sampled_indicators = tf.stack(sampled_indicators)
        _, sampled_indices = tf.nn.top_k(tf.cast(sampled_indicators,
                                                 dtype=tf.int32),
                                         k=num_samples_per_image,
                                         sorted=True)

        sampled_indices_shape = tf.shape(sampled_indices)
        batch_indices = (
            tf.expand_dims(tf.range(sampled_indices_shape[0]), axis=-1) *
            tf.ones([1, sampled_indices_shape[-1]], dtype=tf.int32))
        gather_nd_indices = tf.stack([batch_indices, sampled_indices], axis=-1)

        sampled_rois = tf.gather_nd(boxes, gather_nd_indices)
        sampled_gt_boxes = tf.gather_nd(matched_gt_boxes, gather_nd_indices)
        sampled_gt_classes = tf.gather_nd(matched_gt_classes,
                                          gather_nd_indices)
        sampled_gt_indices = tf.gather_nd(matched_gt_indices,
                                          gather_nd_indices)

        return (sampled_rois, sampled_gt_boxes, sampled_gt_classes,
                sampled_gt_indices)
예제 #3
0
    def label_anchors_lrtb(self, gt_boxes, gt_labels):
        """Labels anchors with ground truth inputs.

    Args:
      gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
        For each row, it stores [y0, x0, y1, x1] for four corners of a box.
      gt_labels: A integer tensor with shape [N, 1] representing groundtruth
        classes.

    Returns:
      score_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors]. The height_l and width_l
        represent the dimension of class logits at l-th level.
      box_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors * 4]. The height_l and
        width_l represent the dimension of bounding box regression output at
        l-th level.
      lrtb_targets_dict: Same strucure to box_target_dict, except the regression
        targets are converted from xyhw to lrtb format. Ordered dictionary with
        keys [min_level, min_level+1, ..., max_level]. The values are tensor
        with shape [height_l, width_l, num_anchors * 4]. The height_l and
        width_l represent the dimension of bounding box regression output at
        l-th level.
      center_targets_dict: Same structure to score_tragets_dict, except the
        scores are centerness values ranging from 0 to 1. Ordered dictionary
        with keys [min_level, min_level+1, ..., max_level]. The values are
        tensor with shape [height_l, width_l, num_anchors]. The height_l and
        width_l represent the dimension of class logits at l-th level.
    """
        gt_box_list = box_list.BoxList(gt_boxes)
        anchor_box_list = box_list.BoxList(self._anchor.boxes)

        # cls_targets, cls_weights, box_weights are not used.
        (_, _, box_targets, _, matches, matched_gt_box_list,
         matched_anchors_mask, center_matched_gt_box_list,
         center_matched_anchors_mask,
         matched_ious) = self._target_assigner.assign(anchor_box_list,
                                                      gt_box_list, gt_labels)
        # Box lrtb_targets.
        lrtb_targets, _ = box_utils.encode_boxes_lrtb(
            matched_gt_box_list.data['boxes'],
            anchor_box_list.data['boxes'],
            weights=[1.0, 1.0, 1.0, 1.0])
        lrtb_sanity = tf.logical_and(
            tf.greater(tf.reduce_min(lrtb_targets, -1), 0.),
            matched_anchors_mask)
        # To broadcast lrtb_sanity to the same shape as lrtb_targets.
        lrtb_sanity = tf.tile(tf.expand_dims(lrtb_sanity, 1),
                              [1, tf.shape(lrtb_targets)[1]])
        lrtb_targets = tf.where(lrtb_sanity, lrtb_targets,
                                tf.zeros_like(lrtb_targets))
        # RPN anchor-gtbox iou values.
        iou_targets = tf.where(tf.greater(matched_ious, 0.0), matched_ious,
                               tf.zeros_like(matched_ious))
        # Centerness_targets.
        _, center_targets = box_utils.encode_boxes_lrtb(
            center_matched_gt_box_list.data['boxes'],
            anchor_box_list.data['boxes'],
            weights=[1.0, 1.0, 1.0, 1.0])
        # Positive-negative centerness sampler.
        num_center_samples_per_im = self._num_center_samples_per_im
        center_pos_neg_sampler = (
            balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
                positive_fraction=(1. - 1. / num_center_samples_per_im),
                is_static=False))
        center_pos_neg_indicator = tf.logical_or(
            center_matched_anchors_mask,
            tf.less(iou_targets, self._center_unmatched_iou_threshold))
        center_pos_labels = center_matched_anchors_mask
        center_samples = center_pos_neg_sampler.subsample(
            center_pos_neg_indicator, num_center_samples_per_im,
            center_pos_labels)
        is_valid = center_samples
        center_targets = tf.where(is_valid, center_targets,
                                  (-1) * tf.ones_like(center_targets))

        # score_targets contains the subsampled positive and negative anchors.
        score_targets, _, _ = self._get_rpn_samples(matches.match_results)

        # Unpacks labels.
        score_targets_dict = self._anchor.unpack_labels(score_targets)
        box_targets_dict = self._anchor.unpack_labels(box_targets)
        lrtb_targets_dict = self._anchor.unpack_labels(lrtb_targets)
        center_targets_dict = self._anchor.unpack_labels(center_targets)

        return (score_targets_dict, box_targets_dict, lrtb_targets_dict,
                center_targets_dict)