def bbox_transform_inv(self, boxes, deltas): """All boxes and deltas is a Tensor name on IPU. args: boxes: [1, 12960, 4] deltas: [1, 12960, 4] """ # with gcop.variable_scope("bbox_transform_inv"): widths = boxes[:, :, 2] - boxes[:, :, 0] + 1.0 heights = boxes[:, :, 3] - boxes[:, :, 1] + 1.0 ctr_x = boxes[:, :, 0] + 0.5 * widths ctr_y = boxes[:, :, 1] + 0.5 * heights dx = deltas[:, :, 0] dy = deltas[:, :, 1] dw = deltas[:, :, 2] dh = deltas[:, :, 3] pred_ctr_x = dx * widths + ctr_x pred_ctr_y = dy * heights + ctr_y pred_w = gcop.exp(dw) * widths pred_h = gcop.exp(dh) * heights x1 = gcop.expand_dims(pred_ctr_x - 0.5 * pred_w, -1) y1 = gcop.expand_dims(pred_ctr_y - 0.5 * pred_h, -1) x2 = gcop.expand_dims(0.5 * pred_w + pred_ctr_x, -1) y2 = gcop.expand_dims(0.5 * pred_h + pred_ctr_y, -1) pred_boxes = gcop.concat([x1, y1, x2, y2], 2) return pred_boxes
def __forward__(self, x, training): """Algorithm: 1:for each (H, W) location i generate A anchor boxes centered on cell i. 2:apply predicted bbox deltas at cell i to each of the A anchors clip predicted boxes to image. 3:remove predicted boxes with either height or width < threshold sort all (proposal, score) pairs by score from highest to lowest. 4:take top pre_nms_topN proposals before NMS. 5:apply NMS with threshold 0.7 to remaining proposals take after_nms_topN proposals after NMS. return the top proposals (-> RoIs top, scores top) args: x[0]: rpn_cls_prob [1, 18, 30, 48] x[1]: rpn_bbox_pred [1, 36, 30, 48] """ # the first set of _num_anchors channels are bg probs # the second set are the fg probs with gcop.variable_scope("Proposal"): x[0] = x[0].detach() x[1] = x[1].detach() scores = x[0][:, self.num_anchors:, :, :] bbox_deltas = x[1] im_info = x[2] bbox_deltas = gcop.reshape(bbox_deltas, [self.batch_size, -1, 4]) scores = gcop.transpose(scores, perm=[0, 2, 3, 1]) B, C, H, W = scores.shape.as_list() scores = gcop.reshape(scores, [B, C * H * W]) proposals = self.bbox_transform_inv(self.anchors, bbox_deltas) clipped_proposals = self.clip_boxes(proposals, im_info) valid_area_boxes = get_valid_area_mask(clipped_proposals) self.add_output('clipped_valid_area_boxes', gcop.reduce_sum(valid_area_boxes)) if cfg.TRAIN.RPN_PRE_NMS_TOP_N > 0: rpn_pre_nms_top_n = min(cfg.TRAIN.RPN_PRE_NMS_TOP_N, scores.squeeze(0).pureShape[0]) sorted_scores, order = gcop.nn.top_k(scores.squeeze(0), k=rpn_pre_nms_top_n) sorted_clipped_proposals = gcop.gather(clipped_proposals, order, axis=1) else: sorted_scores = scores.squeeze(0) sorted_clipped_proposals = clipped_proposals output_boxes, output_keeps, _ = nms( sorted_scores.unsqueeze(0), sorted_clipped_proposals, numDetections=cfg.TRAIN.RPN_POST_NMS_TOP_N if training else cfg.TEST.RPN_POST_NMS_TOP_N) valid_area_output_boxes = get_valid_area_mask(output_boxes) self.add_output('valid_area_output_boxes', gcop.reduce_sum(valid_area_output_boxes)) return output_boxes, output_keeps
def subsample(self, indicator, batch_size, positive_flags, negative_flags, boxes_keep_arr): """Returns subsampled minibatch. Args: indicator: boolean tensor of shape [N] whose True entries can be sampled. boxes_keep_arr: some box's area is zero, this boolen array: False for zero area box, True for non-zero area box batch_size: desired batch size. If None, keeps all positive samples and randomly selects negative samples so that the positive sample fraction matches self._positive_fraction. It cannot be None is is_static is True. positive_flags: boolean tensor of shape [N] denoting positive(=True) and negative and unsampled (=False) examples. negative_flags: boolean tensor of shape [N] denoting negative(=True) and positive and unsampled (=False) examples. scope: name scope. Returns: sampled_idx_indicator: boolean tensor of shape [N], True for entries which are sampled. Raises: ValueError: if labels and indicator are not 1D boolean tensors. """ if len(indicator.shape) != 1: raise ValueError( 'indicator must be 1 dimensional, got a tensor of ' 'shape %s' % indicator.shape) if len(positive_flags.shape) != 1: raise ValueError( 'positive_flags must be 1 dimensional, got a tensor of ' 'shape %s' % positive_flags.shape) if len(negative_flags.shape) != 1: raise ValueError( 'negative_flags must be 1 dimensional, got a tensor of ' 'shape %s' % negative_flags.shape) if positive_flags.dtype != gcop.bool: raise ValueError( 'positive_flags should be of type bool. Received: %s' % positive_flags.dtype) if negative_flags.dtype != gcop.bool: raise ValueError( 'negative_flags should be of type bool. Received: %s' % negative_flags.dtype) if indicator.dtype != gcop.bool: raise ValueError('indicator should be of type bool. Received: %s' % indicator.dtype) with gcop.variable_scope('BalancedPositiveNegativeSampler'): if self._is_static: return self._static_subsample(indicator, batch_size, positive_flags, negative_flags, boxes_keep_arr) else: raise RuntimeError('only static sampler can be use')
def _add_class_assignments(self, iou, gt_boxes, gt_labels): """Computes object category assignment for each box. Args: iou: a tensor for the iou matrix with a shape of [batch_size, K, MAX_NUM_INSTANCES]. K is the number of post-nms RoIs (i.e., rpn_post_nms_topn). gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4]. This tensor might have paddings with negative values. The coordinates of gt_boxes are in the pixel coordinates of the scaled image scale. gt_labels: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This tensor might have paddings with a value of -1. Returns: max_boxes: a tensor with a shape of [batch_size, K, 4], representing the ground truth coordinates of each roi. max_classes: a int32 tensor with a shape of [batch_size, K], representing the ground truth class of each roi. max_overlap: a tensor with a shape of [batch_size, K], representing the maximum overlap of each roi. argmax_iou: a tensor with a shape of [batch_size, K], representing the iou argmax. """ with gcop.variable_scope('add_class_assignments'): batch_size, _, _ = iou.shape.as_list() argmax_iou = gcop.argmax(iou, axis=2) local_interval = gcop.constant( np.array(gt_labels.shape.as_list()[1])) indices = gcop.reshape( argmax_iou.cast(gcop.int32) + gcop.expand_dims( gcop.range(batch_size, dtype=gcop.int32) * local_interval.cast(gcop.int32), 1), [-1]).cast(gcop.int32) max_classes = gcop.reshape( gcop.gather(gcop.reshape(gt_labels, [-1, 1]), indices, axis=0), [batch_size, -1]) max_overlap = gcop.reduce_max(iou, axis=2) bg_mask = gcop.math.equal(max_overlap, gcop.zeros_like(max_overlap)) max_classes = gcop.where(bg_mask, gcop.zeros_like(max_classes), max_classes) max_boxes = gcop.reshape( gcop.gather(gcop.reshape(gt_boxes, [-1, 4]), indices, axis=0), [batch_size, -1, 4]) max_boxes = gcop.where( gcop.tile(gcop.expand_dims(bg_mask, axis=2), [1, 1, 4]), gcop.zeros_like(max_boxes), max_boxes) return max_boxes, max_classes, max_overlap, argmax_iou
def smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, sigma=1.0, reduceDim=None, debugPrefix=''): """SmoothL1(x) = 0.5 * (sigma * x)^2, if |x| < 1 / sigma^2 |x| - 0.5 / sigma^2, otherwise """ sigma2 = sigma * sigma # 1.0 # if bbox_inside_weights is None: inside_mul = bbox_pred - bbox_targets else: inside_sub = bbox_pred - bbox_targets inside_mul = bbox_inside_weights * inside_sub dst_type = inside_mul.dtype smooth_l1_sign = gcop.less( gcop.abs(inside_mul), gcop.constant(np.asarray(1.0 / sigma2), dst_type)) smooth_l1_sign = smooth_l1_sign.cast(inside_mul.dtype).detach() smooth_l1_option1 = inside_mul * inside_mul * gcop.constant( np.asarray(0.5 * sigma2), dst_type) smooth_l1_option2 = gcop.abs(inside_mul) - gcop.constant( np.asarray(0.5 / sigma2), dst_type) smooth_l1_result = smooth_l1_option1 * smooth_l1_sign + smooth_l1_option2 * ( gcop.abs(smooth_l1_sign - gcop.constant(np.asarray(1.0), dst_type))) if bbox_outside_weights is None: outside_mul = smooth_l1_result else: outside_mul = bbox_outside_weights * smooth_l1_result with gcop.variable_scope(debugPrefix): outside_mul = gcop.reduce_sum(outside_mul, reduceDim, keepdims=0) rest_dims = list(range(len(outside_mul.shape.as_list()))) if len(rest_dims) > 0: outside_mul = gcop.reduce_mean(outside_mul, rest_dims, keepdims=0) return outside_mul
def clip_boxes(self, boxes, im_info): with gcop.variable_scope("clip_boxes"): x1 = gcop.clip_by_value(boxes[:, :, 0], clip_value_min=0, clip_value_max=im_info[1] - 1).unsqueeze(-1) y1 = gcop.clip_by_value(boxes[:, :, 1], clip_value_min=0, clip_value_max=im_info[0] - 1).unsqueeze(-1) x2 = gcop.clip_by_value(boxes[:, :, 2], clip_value_min=0, clip_value_max=im_info[1] - 1).unsqueeze(-1) y2 = gcop.clip_by_value(boxes[:, :, 3], clip_value_min=0, clip_value_max=im_info[0] - 1).unsqueeze(-1) boxes = gcop.concat([x1, y1, x2, y2], 2) return boxes
def matmul_gather_on_zeroth_axis(self, params, indices, scope=None): """Matrix multiplication based implementation of self.gather on zeroth axis. TODO(rathodv, jonathanhuang): enable sparse matmul option. Args: params: A float32 Tensor. The tensor from which to gather values. Must be at least rank 1. indices: A Tensor. Must be one of the following types: int32, int64. Must be in range [0, params.shape[0]) scope: A name for the operation (optional). Returns: A Tensor. Has the same type as params. Values from params gathered from indices given by indices, with shape indices.shape + params.shape[1:]. """ with gcop.variable_scope('MatMulGather'): params_shape = self.combined_static_and_dynamic_shape(params) indices_shape = self.combined_static_and_dynamic_shape(indices) params2d = gcop.reshape(params, [params_shape[0], -1]) indicator_matrix = gcop.one_hot(indices.cast(gcop.int32), params_shape[0]) gathered_result_flattened = gcop.matmul( indicator_matrix.cast(gcop.float32), params2d.cast(gcop.float32)) return gcop.reshape(gathered_result_flattened, indices_shape + params_shape[1:])
def forward(self, x, im_info=None, rpn_data=None, stage_configs='0'): if cfg.MODEL.RPN_CONV_FP16_ON: x = x.cast(gcop.float16) else: x = x.cast(gcop.float32) with gcop.variable_scope("rpn"): x = gcop.cF.conv2d(x, self.rpn_channel, ksize=3, train=True, strides=[1, 1], padding_mode='same', fp16_on=None, weights_fp16_on=cfg.MODEL.RPN_CONV_FP16_ON, filters_data=self.normal_init( [self.rpn_channel, x.pureShape[1], 3, 3], 0, 0.01, dtype=self.dtype), debugContext='conv') x = gcop.nn.relu(x) with gcop.variable_scope("rpn_cls"): rpn_cls_score = gcop.cF.conv2d( x, self.nc_score_out, ksize=1, train=True, strides=[1, 1], padding_mode='same', fp16_on=None, filters_data=self.normal_init( [self.nc_score_out, x.pureShape[1], 1, 1], 0, 0.01, dtype=self.dtype)) B, C, H, W = rpn_cls_score.shape.as_list() target_shape = [B, 2, -1] rpn_cls_score_reshape = gcop.reshape(rpn_cls_score, target_shape) rpn_cls_prob_premute = gcop.transpose(rpn_cls_score_reshape, perm=[0, 2, 1]) rpn_cls_prob_premute_reshape = gcop.reshape( rpn_cls_prob_premute, [-1, 2]) rpn_cls_prob_premute_reshape = rpn_cls_prob_premute_reshape.cast( gcop.float32) logits = gcop.nn.softmax(rpn_cls_prob_premute_reshape) # get rpn offsets to the anchor boxes with gcop.variable_scope("rpn_box"): rpn_bbox_pred = gcop.cF.conv2d( x, self.nc_score_out * 2, ksize=1, train=True, strides=[1, 1], padding_mode='same', fp16_on=None, filters_data=self.normal_init( [self.nc_score_out * 2, x.pureShape[1], 1, 1], 0, 0.01, dtype=self.dtype)) rpn_bbox_pred = gcop.transpose(rpn_bbox_pred, [0, 2, 3, 1]) if cfg.MODEL.RPN_CONV_FP16_ON: rpn_bbox_pred = rpn_bbox_pred.cast(gcop.float32) logits = logits.cast(gcop.float32) rpn_cls_prob_premute_reshape = rpn_cls_prob_premute_reshape.cast( gcop.float32) with gcop.device(stage_configs): if self.training: _rpn_label, rpn_keep, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data rpn_keep = rpn_keep.squeeze(0) rpn_scores = gcop.gather( rpn_cls_prob_premute_reshape, rpn_keep.cast(gcop.int32), ) rpn_label = gcop.gather( gcop.reshape(_rpn_label, [-1]), rpn_keep.cast(gcop.int32), ).cast(gcop.int32) rpn_scores, rpn_bbox_pred = [ ele.cast(gcop.float32) for ele in [rpn_scores, rpn_bbox_pred] ] self.rpn_loss_cls = gcop.nn.sparse_softmax_cross_entropy_with_logits( labels=rpn_label, logits=rpn_scores, name="rpn_loss_cls") self.rpn_loss_box = smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights, sigma=3, reduceDim=[0, 1, 2, 3], debugPrefix='rpn_loss_box') else: self.rpn_loss_cls, self.rpn_loss_box = 0, 0 logits_transpose = gcop.transpose(logits, perm=[1, 0]) rpn_cls_prob = gcop.reshape(logits_transpose, [B, C, H, W]) fixed_length_roi, roi_keeps = self.proposal( [rpn_cls_prob, rpn_bbox_pred, im_info], self.training) return fixed_length_roi, roi_keeps, self.rpn_loss_cls, self.rpn_loss_box
def rois_sampler(self, boxes, gt_boxes, gt_labels, batch_size_per_im=512, fg_fraction=0.25, fg_thresh=0.5, bg_thresh_hi=0.5, bg_thresh_lo=0.0): """Assigns the proposals with ground truth labels and performs subsmpling. Given proposal `boxes`, `gt_boxes`, and `gt_labels`, the function uses the following algorithm to generate the final `batch_size_per_im` RoIs. 1. Calculates the IoU between each proposal box and each gt_boxes. 2. Assigns each proposal box with a ground truth class and box label by choosing the largest overlap. 3. Samples `batch_size_per_im` boxes from all proposal boxes, and returns box_targets, class_targets, and RoIs. The reference implementations of #1 and #2 are here: https://github.com/facebookresearch/Detectron/blob/master/detectron/datasets/json_dataset.py # pylint: disable=line-too-long The reference implementation of #3 is here: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/fast_rcnn.py. # pylint: disable=line-too-long Args: boxes: a tensor with a shape of [batch_size, N, 4]. N is the number of proposals before groundtruth assignment (e.g., rpn_post_nms_topn). The last dimension is the pixel coordinates of scaled images in [ymin, xmin, ymax, xmax] form. gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4]. This tensor might have paddings with a value of -1. The coordinates of gt_boxes are in the pixel coordinates of the scaled image. gt_labels: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This tensor might have paddings with a value of -1. batch_size_per_im: an integer represents RoI minibatch size per image. fg_fraction: a float represents the target fraction of RoI minibatch that is labeled foreground (i.e., class > 0). fg_thresh: a float represents the overlap threshold for an RoI to be considered foreground (if >= fg_thresh). bg_thresh_hi: a float represents the overlap threshold for an RoI to be considered background (class = 0 if overlap in [LO, HI)). bg_thresh_lo: a float represents the overlap threshold for an RoI to be considered background (class = 0 if overlap in [LO, HI)). Returns: box_targets: a tensor with a shape of [batch_size, K, 4]. The tensor contains the ground truth pixel coordinates of the scaled images for each roi. K is the number of sample RoIs (e.g., batch_size_per_im). class_targets: an integer tensor with a shape of [batch_size, K]. The tensor contains the ground truth class for each roi. Note, 0 for background, 1 to N represent N obj classes. rois: a tensor with a shape of [batch_size, K, 4], representing the coordinates of the selected RoI. proposal_to_label_map: a tensor with a shape of [batch_size, K]. This tensor keeps the mapping between proposal to labels. proposal_to_label_map[i] means the index of the ground truth instance for the i-th proposal. For example, -1 for no obj, 0 for first instance, 1 for second instance. """ with gcop.variable_scope('ProposalTargetLayer'): batch_size = boxes.shape.as_list()[0] # The reference implementation intentionally includes ground truth boxes in # the proposals. see https://github.com/facebookresearch/Detectron/blob/master/detectron/datasets/json_dataset.py#L359. # pylint: disable=line-too-long if cfg.TRAIN.ADD_GT_BOX_IN_SAMPLER: boxes = gcop.concat([boxes, gt_boxes], axis=1) else: pass boxes_keep_arr = self.get_valid_area_flags(boxes) gt_boxes_keep_arr = self.get_valid_area_flags(gt_boxes) iou = bbox_overlaps_torch(boxes[0], gt_boxes[0]) iou = iou.unsqueeze(0) iou_keep_arr = (boxes_keep_arr.cast( gcop.float32).unsqueeze(-1)) * (gt_boxes_keep_arr.cast( gcop.float32).unsqueeze(1)) iou = iou * iou_keep_arr (pre_sample_box_targets, pre_sample_class_targets, max_overlap, proposal_to_label_map) = self._add_class_assignments( iou, gt_boxes, gt_labels) # Generates a random sample of RoIs comprising foreground and background # examples. reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/fast_rcnn.py#L132 # pylint: disable=line-too-long positives = gcop.math.greater_equal( max_overlap.cast(gcop.float32), (fg_thresh * gcop.ones_like(max_overlap)).cast(gcop.float32)) negatives = gcop.math.logical_and( gcop.math.greater_equal( max_overlap, bg_thresh_lo * gcop.ones_like(max_overlap)), gcop.less(max_overlap, bg_thresh_hi * gcop.ones_like(max_overlap))) pre_sample_class_targets = gcop.where( negatives, gcop.zeros_like(pre_sample_class_targets), pre_sample_class_targets) proposal_to_label_map = gcop.where( negatives, gcop.ones_like(proposal_to_label_map).cast(gcop.int32) * -1, proposal_to_label_map.cast(gcop.int32), ) # -1 for no instance in current proposal, # 0 for first instance(not class, there might be one class but 888 instances) of input targets # Handles ground truth paddings. ignore_mask = gcop.less(gcop.reduce_min(iou, axis=2), gcop.zeros_like(max_overlap)) # indicator includes both positive and negative labels. # labels includes only positives labels. # positives = indicator & labels. # negatives = indicator & !labels. # ignore = !indicator. positive_flags = positives negative_flags = negatives pos_or_neg = gcop.math.logical_or(positives, negatives) indicator = gcop.math.logical_and( pos_or_neg, gcop.math.logical_not(ignore_mask)) all_samples = [] sampler = (balanced_positive_negative_sampler. BalancedPositiveNegativeSampler( fp16_on=self.fp16_on, training=True, positive_fraction=fg_fraction)) # Batch-unroll the sub-sampling process. for i in range(batch_size): samples = sampler.subsample(indicator[i], batch_size_per_im, positive_flags[i], negative_flags[i], boxes_keep_arr[i]) all_samples.append(samples) all_samples = gcop.stack(all_samples, axis=0) # A workaround to get the indices from the boolean tensors. _, samples_indices = gcop.nn.top_k(all_samples.cast(gcop.int32), k=batch_size_per_im, sorted=True) # Contructs indices for gather. samples_indices = gcop.reshape( samples_indices.cast(gcop.int32) + gcop.expand_dims( gcop.range(batch_size, dtype=gcop.int32).cast(gcop.int32) * boxes.shape.as_list()[1], 1).cast(gcop.int32), [-1]).cast(gcop.int32) rois = gcop.reshape( gcop.gather(gcop.reshape(boxes, [-1, 4]), samples_indices), [batch_size, -1, 4]) class_targets = gcop.reshape( gcop.gather(gcop.reshape(pre_sample_class_targets, [-1, 1]), samples_indices), [batch_size, -1]) sample_box_targets = gcop.reshape( gcop.gather(gcop.reshape(pre_sample_box_targets, [-1, 4]), samples_indices), [batch_size, -1, 4]) sample_proposal_to_label_map = gcop.reshape( gcop.gather(gcop.reshape(proposal_to_label_map, [-1, 1]), samples_indices), [batch_size, -1]) encoded_boxes_result = self._compute_targets_pytorch( rois, sample_box_targets) return sample_box_targets, class_targets, rois, sample_proposal_to_label_map, encoded_boxes_result