def _get_rpn_samples(self, match_results): """Computes anchor labels. This function performs subsampling for foreground (fg) and background (bg) anchors. Args: match_results: an integer tensor with shape [N] representing the matching results of anchors. (1) match_results[i]>=0, meaning that column i is matched with row match_results[i]. (2) match_results[i]=-1, meaning that column i is not matched. (3) match_results[i]=-2, meaning that column i is ignored. Returns: score_targets: an integer tensor with the a shape of [N]. (1) score_targets[i]=1, the anchor is a positive sample. (2) score_targets[i]=0, negative. (3) score_targets[i]=-1, the anchor is don't care (ignore). """ sampler = ( balanced_positive_negative_sampler.BalancedPositiveNegativeSampler( positive_fraction=self._rpn_fg_fraction, is_static=False)) # indicator includes both positive and negative labels. # labels includes only positives labels. # positives = indicator & labels. # negatives = indicator & !labels. # ignore = !indicator. indicator = tf.greater(match_results, -2) labels = tf.greater(match_results, -1) samples = sampler.subsample( indicator, self._rpn_batch_size_per_im, labels) positive_labels = tf.where( tf.logical_and(samples, labels), tf.constant(2, dtype=tf.int32, shape=match_results.shape), tf.constant(0, dtype=tf.int32, shape=match_results.shape)) negative_labels = tf.where( tf.logical_and(samples, tf.logical_not(labels)), tf.constant(1, dtype=tf.int32, shape=match_results.shape), tf.constant(0, dtype=tf.int32, shape=match_results.shape)) ignore_labels = tf.fill(match_results.shape, -1) return (ignore_labels + positive_labels + negative_labels, positive_labels, negative_labels)
def proposal_label_op(boxes, gt_boxes, gt_labels, image_info, batch_size_per_im=512, fg_fraction=0.25, fg_thresh=0.5, bg_thresh_hi=0.5, bg_thresh_lo=0.): """Assigns the proposals with ground truth labels and performs subsmpling. Given proposal `boxes`, `gt_boxes`, and `gt_labels`, the function uses the following algorithm to generate the final `batch_size_per_im` RoIs. 1. Calculates the IoU between each proposal box and each gt_boxes. 2. Assigns each proposal box with a ground truth class and box label by choosing the largest overlap. 3. Samples `batch_size_per_im` boxes from all proposal boxes, and returns box_targets, class_targets, and RoIs. The reference implementations of #1 and #2 are here: https://github.com/facebookresearch/Detectron/blob/master/detectron/datasets/json_dataset.py # pylint: disable=line-too-long The reference implementation of #3 is here: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/fast_rcnn.py. # pylint: disable=line-too-long Args: boxes: a tensor with a shape of [batch_size, N, 4]. N is the number of proposals before groundtruth assignment (e.g., rpn_post_nms_topn). The last dimension is the pixel coordinates of scaled images in [ymin, xmin, ymax, xmax] form. gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4]. This tensor might have paddings with a value of -1. The coordinates of gt_boxes are in the pixel coordinates of the original image scale. gt_labels: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This tensor might have paddings with a value of -1. image_info: a tensor of shape [batch_size, 5] where the three columns encode the input image's [height, width, scale, original_height, original_width]. Height and width are for the input to the network, not the original image; scale is the scale factor used to scale the network input size to the original image size. See dataloader.DetectionInputProcessor for details. The last two are original height and width. batch_size_per_im: a integer represents RoI minibatch size per image. fg_fraction: a float represents the target fraction of RoI minibatch that is labeled foreground (i.e., class > 0). fg_thresh: a float represents the overlap threshold for an RoI to be considered foreground (if >= fg_thresh). bg_thresh_hi: a float represents the overlap threshold for an RoI to be considered background (class = 0 if overlap in [LO, HI)). bg_thresh_lo: a float represents the overlap threshold for an RoI to be considered background (class = 0 if overlap in [LO, HI)). Returns: box_targets: a tensor with a shape of [batch_size, K, 4]. The tensor contains the ground truth pixel coordinates of the scaled images for each roi. K is the number of sample RoIs (e.g., batch_size_per_im). class_targets: a integer tensor with a shape of [batch_size, K]. The tensor contains the ground truth class for each roi. rois: a tensor with a shape of [batch_size, K, 4], representing the coordinates of the selected RoI. proposal_to_label_map: a tensor with a shape of [batch_size, K]. This tensor keeps the mapping between proposal to labels. proposal_to_label_map[i] means the index of the ground truth instance for the i-th proposal. """ with tf.name_scope('proposal_label'): batch_size = boxes.shape[0] # Scales ground truth boxes to the scaled image coordinates. image_scale = 1 / image_info[:, 2] scaled_gt_boxes = gt_boxes * tf.reshape(image_scale, [batch_size, 1, 1]) # The reference implementation intentionally includes ground truth boxes in # the proposals. see https://github.com/facebookresearch/Detectron/blob/master/detectron/datasets/json_dataset.py#L359. # pylint: disable=line-too-long boxes = tf.concat([boxes, scaled_gt_boxes], axis=1) iou = box_utils.bbox_overlap(boxes, scaled_gt_boxes) (pre_sample_box_targets, pre_sample_class_targets, max_overlap, proposal_to_label_map) = _add_class_assignments( iou, scaled_gt_boxes, gt_labels) # Generates a random sample of RoIs comprising foreground and background # examples. reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/fast_rcnn.py#L132 # pylint: disable=line-too-long positives = tf.greater(max_overlap, fg_thresh * tf.ones_like(max_overlap)) negatives = tf.logical_and( tf.greater_equal(max_overlap, bg_thresh_lo * tf.ones_like(max_overlap)), tf.less(max_overlap, bg_thresh_hi * tf.ones_like(max_overlap))) pre_sample_class_targets = tf.where( negatives, tf.zeros_like(pre_sample_class_targets), pre_sample_class_targets) proposal_to_label_map = tf.where(negatives, tf.zeros_like(proposal_to_label_map), proposal_to_label_map) # Handles ground truth paddings. ignore_mask = tf.less(tf.reduce_min(iou, axis=2), tf.zeros_like(max_overlap)) # indicator includes both positive and negative labels. # labels includes only positives labels. # positives = indicator & labels. # negatives = indicator & !labels. # ignore = !indicator. labels = positives pos_or_neg = tf.logical_or(positives, negatives) indicator = tf.logical_and(pos_or_neg, tf.logical_not(ignore_mask)) all_samples = [] sampler = ( balanced_positive_negative_sampler.BalancedPositiveNegativeSampler( positive_fraction=fg_fraction, is_static=True)) # Batch-unroll the sub-sampling process. for i in range(batch_size): samples = sampler.subsample(indicator[i], batch_size_per_im, labels[i]) all_samples.append(samples) all_samples = tf.stack([all_samples], axis=0)[0] # A workaround to get the indices from the boolean tensors. _, samples_indices = tf.nn.top_k(tf.to_int32(all_samples), k=batch_size_per_im, sorted=True) # Contructs indices for gather. samples_indices = tf.reshape( samples_indices + tf.expand_dims(tf.range(batch_size) * tf.shape(boxes)[1], 1), [-1]) rois = tf.reshape( tf.gather(tf.reshape(boxes, [-1, 4]), samples_indices), [batch_size, -1, 4]) class_targets = tf.reshape( tf.gather(tf.reshape(pre_sample_class_targets, [-1, 1]), samples_indices), [batch_size, -1]) sample_box_targets = tf.reshape( tf.gather(tf.reshape(pre_sample_box_targets, [-1, 4]), samples_indices), [batch_size, -1, 4]) sample_proposal_to_label_map = tf.reshape( tf.gather(tf.reshape(proposal_to_label_map, [-1, 1]), samples_indices), [batch_size, -1]) return sample_box_targets, class_targets, rois, sample_proposal_to_label_map