def _get_targets_single(self, flat_anchors, valid_flags, gt_bboxes, gt_labels, img_shape, unmap_outputs=True): """Compute regression and classification targets for anchors in a single image. Args: flat_anchors: Multi-level anchors of the image, which are concatenated into a single tensor of shape (num_anchors ,4) valid_flags: Multi level valid flags of the image, which are concatenated into a single tensor of shape (num_anchors,). gt_bboxes: Ground truth bboxes of the image, shape (num_gts, 4). gt_labels: Ground truth labels of each box, shape (num_gts,). If not None then assign these labels to positive anchors img_shape: shape of the image (unpadded) unmap_outputs: Whether to map outputs back to the original set of anchors. Returns: target_matches: (num_anchors,) 1 = positive anchor, -1 = negative anchor, 0 = neutral anchor bboxes_targets: (num_anchors, 4) bbox_inside_weights: (num_anchors, 4) bbox_outside_weights: (num_anchors, 4) """ gt_bboxes, _ = trim_zeros(gt_bboxes) # 1. Filter anchors to valid area inside_flags = self._anchor_inside_flags(flat_anchors, valid_flags, img_shape) # TODO: handle scenario where all flags are False anchors = tf.boolean_mask(flat_anchors, inside_flags) num_anchors = tf.shape(flat_anchors)[0] # 2. Find IoUs num_valid_anchors = tf.shape(anchors)[0] target_matches = -tf.ones((num_valid_anchors, ), tf.int32) overlaps = geometry.compute_overlaps(anchors, gt_bboxes) # a. best GT index for each anchor argmax_overlaps = tf.argmax(overlaps, axis=1, output_type=tf.int32) max_overlaps = tf.reduce_max(overlaps, axis=1) # b. best anchor index for each GT (non deterministic in case of ties) gt_argmax_overlaps = tf.argmax( overlaps, axis=0, output_type=tf.int32 ) # tf.where(tf.equal(overlaps, gt_max_overlaps))[:, 0] # 3. Assign labels bg_cond = tf.math.less(max_overlaps, self.neg_iou_thr) fg_cond = tf.math.greater_equal(max_overlaps, self.pos_iou_thr) target_matches = tf.where(bg_cond, tf.zeros_like(target_matches), target_matches) gt_indices = tf.expand_dims(gt_argmax_overlaps, axis=1) if gt_labels is None: # RPN will have gt labels set to None gt_labels = tf.ones(tf.shape(gt_indices)[0], dtype=tf.int32) #TODO check impact of next 2 lines target_matches = tf.tensor_scatter_nd_update( target_matches, gt_indices, gt_labels ) # note that in the case of one label matching multiple anchors the last one wins (is this okay???) target_matches = tf.where(fg_cond, tf.ones_like(target_matches), target_matches) else: gt_labels = gt_labels[:tf.shape(gt_indices) [0]] # get rid of padded labels (-1) target_matches = tf.where(fg_cond, tf.gather(gt_labels, argmax_overlaps), target_matches) # 4. Sample selected if we have greater number of candidates than needed by # config (only if num_samples > 0, e.g. in two stage) if self.num_samples > 0: fg_inds = tf.where(tf.equal(target_matches, 1))[:, 0] max_pos_samples = tf.cast( self.positive_fraction * self.num_samples, tf.int32) if tf.greater(tf.size(fg_inds), max_pos_samples): fg_inds = tf.random.shuffle(fg_inds) disable_inds = fg_inds[max_pos_samples:] fg_inds = fg_inds[:max_pos_samples] disable_inds = tf.expand_dims(disable_inds, axis=1) disable_labels = -tf.ones(tf.shape(disable_inds)[0], dtype=tf.int32) target_matches = tf.tensor_scatter_nd_update( target_matches, disable_inds, disable_labels) num_fg = tf.reduce_sum( tf.cast(tf.equal(target_matches, 1), tf.int32)) num_bg = self.num_samples - num_fg bg_inds = tf.where(tf.equal(target_matches, 0))[:, 0] if tf.greater(tf.size(bg_inds), num_bg): bg_inds = tf.random.shuffle(bg_inds) disable_inds = bg_inds[num_bg:] bg_inds = bg_inds[:num_bg] disable_inds = tf.expand_dims(disable_inds, axis=1) disable_labels = -tf.ones(tf.shape(disable_inds)[0], dtype=tf.int32) target_matches = tf.tensor_scatter_nd_update( target_matches, disable_inds, disable_labels) # 5. Calculate deltas for chosen targets based on GT (encode) bboxes_targets = transforms.bbox2delta(anchors, tf.gather( gt_bboxes, argmax_overlaps), target_means=self.target_means, target_stds=self.target_stds) # Regression weights bbox_inside_weights = tf.zeros((tf.shape(anchors)[0], 4), dtype=tf.float32) # match_indices = tf.where(tf.equal(target_matches, 1)) match_indices = tf.where(tf.math.greater(target_matches, 0)) updates = tf.ones([tf.shape(match_indices)[0], 4], bbox_inside_weights.dtype) bbox_inside_weights = tf.tensor_scatter_nd_update( bbox_inside_weights, match_indices, updates) bbox_outside_weights = tf.zeros((tf.shape(anchors)[0], 4), dtype=tf.float32) if self.num_samples > 0: num_examples = tf.reduce_sum( tf.cast(target_matches >= 0, bbox_outside_weights.dtype)) else: num_examples = tf.reduce_sum( tf.cast(target_matches > 0, bbox_outside_weights.dtype)) num_fg = num_examples num_bg = 0 # in RetinaNet we only care about positive anchors out_indices = tf.where(target_matches >= 0) updates = tf.ones([tf.shape(out_indices)[0], 4], bbox_outside_weights.dtype) * 1.0 / num_examples bbox_outside_weights = tf.tensor_scatter_nd_update( bbox_outside_weights, out_indices, updates) # for everything that is not selected fill with `fill` value selected_anchor_idx = tf.where(inside_flags)[:, 0] return (tf.stop_gradient( _unmap(target_matches, num_anchors, selected_anchor_idx, -1)), tf.stop_gradient( _unmap(bboxes_targets, num_anchors, selected_anchor_idx, 0)), tf.stop_gradient( _unmap(bbox_inside_weights, num_anchors, selected_anchor_idx, 0)), tf.stop_gradient( _unmap(bbox_outside_weights, num_anchors, selected_anchor_idx, 0)), num_fg, num_bg)
def _build_single_target(self, proposals, gt_boxes, gt_class_ids, img_shape): ''' Args --- proposals: [num_proposals, (y1, x1, y2, x2)] in regular coordinates. gt_boxes: [num_gt_boxes, (y1, x1, y2, x2)] gt_class_ids: [num_gt_boxes] img_shape: np.ndarray. [2]. (img_height, img_width) Returns --- rois: [num_rois, (y1, x1, y2, x2)] target_matchs: [num_positive_rois] target_deltas: [num_positive_rois, (dy, dx, log(dh), log(dw))] ''' # remove padded proposals and gt boxes if any proposals, _ = trim_zeros(proposals) gt_boxes, non_zeros = trim_zeros(gt_boxes) gt_boxes = tf.cast(gt_boxes, proposals.dtype) gt_labels = tf.boolean_mask(gt_class_ids, non_zeros) proposals_gt = tf.concat([proposals, gt_boxes], axis=0) iou = geometry.compute_overlaps(proposals_gt, gt_boxes) max_overlaps = tf.reduce_max(iou, axis=1) gt_assignment = tf.argmax(iou, axis=1) labels = tf.gather(gt_labels, gt_assignment) # get FG and BG fg_inds = tf.where(max_overlaps >= self.pos_iou_thr)[:, 0] bg_inds = tf.where( tf.logical_and(max_overlaps < self.pos_iou_thr, max_overlaps >= self.neg_iou_thr))[:, 0] # filter FG/BG if tf.size(fg_inds) > self._max_pos_samples: fg_inds = tf.random.shuffle(fg_inds)[:self._max_pos_samples] remaining = self.num_rcnn_deltas - tf.size(fg_inds) num_bg = tf.size(bg_inds) if tf.greater_equal(num_bg, remaining): bg_inds = tf.random.shuffle(bg_inds)[:remaining] else: # sample with replacement from very poor overlaps if number of backgrounds is not enough bg_inds = tf.where(max_overlaps < self.pos_iou_thr)[:, 0] bg_inds = tf.random.shuffle(bg_inds)[:remaining] num_bg = tf.size(bg_inds) while remaining > num_bg: dups = remaining - num_bg dup_bgs = tf.random.shuffle(bg_inds)[:dups] bg_inds = tf.concat([bg_inds, dup_bgs], axis=0) num_bg = tf.size(bg_inds) # tf.print('proposal target generated %d fgs and %d bgs.' % (tf.size(fg_inds), tf.size(bg_inds))) keep_inds = tf.concat([fg_inds, bg_inds], axis=0) final_rois = tf.gather(proposals_gt, keep_inds) # rois[keep_inds] final_labels = tf.gather(labels, keep_inds) # labels[keep_inds] zero_indices = tf.expand_dims(tf.range(tf.size(fg_inds), tf.size(keep_inds), dtype=tf.int32), axis=1) zero_labels = tf.zeros(tf.shape(zero_indices)[0], dtype=tf.int32) final_labels = tf.tensor_scatter_nd_update(final_labels, zero_indices, zero_labels) # inside weights - positive examples are set, rest are zeros bbox_inside_weights = tf.zeros( (tf.size(keep_inds), self.num_classes, 4), dtype=tf.float32) if tf.size(fg_inds) > 0: if self.reg_class_agnostic: cur_index = tf.transpose( tf.stack([ tf.range(tf.size(fg_inds)), tf.zeros(tf.size(fg_inds), dtype=tf.int32) ])) else: cur_index = tf.stack( [tf.range(tf.size(fg_inds)), tf.gather(labels, fg_inds)], axis=1) bbox_inside_weights = tf.tensor_scatter_nd_update( bbox_inside_weights, cur_index, tf.ones([tf.size(fg_inds), 4], bbox_inside_weights.dtype)) bbox_inside_weights = tf.reshape(bbox_inside_weights, [-1, self.num_classes * 4]) final_bbox_targets = tf.zeros( (tf.size(keep_inds), self.num_classes, 4), dtype=tf.float32) if tf.size(fg_inds) > 0: bbox_targets = transforms.bbox2delta( tf.gather(final_rois, tf.range(tf.size(fg_inds))), tf.gather(gt_boxes, tf.gather(gt_assignment, fg_inds)), target_stds=self.target_stds, target_means=self.target_means) if self.reg_class_agnostic: final_bbox_targets = tf.tensor_scatter_nd_update( final_bbox_targets, tf.transpose( tf.stack([ tf.range(tf.size(fg_inds)), tf.zeros(tf.size(fg_inds), dtype=tf.int32) ])), bbox_targets) else: final_bbox_targets = tf.tensor_scatter_nd_update( final_bbox_targets, tf.stack([ tf.range(tf.size(fg_inds)), tf.gather(labels, fg_inds) ], axis=1), bbox_targets) final_bbox_targets = tf.reshape(final_bbox_targets, [-1, self.num_classes * 4]) bbox_outside_weights = tf.ones_like( bbox_inside_weights, dtype=bbox_inside_weights.dtype) * 1.0 / self.num_rcnn_deltas fg_assignments = tf.gather(gt_assignment, keep_inds) return (tf.stop_gradient(final_rois), tf.stop_gradient(final_labels), tf.stop_gradient(final_bbox_targets), tf.stop_gradient(bbox_inside_weights), tf.stop_gradient(bbox_outside_weights), tf.stop_gradient(fg_assignments))
def _build_single_target(self, all_anchors, valid_flags, gt_bboxes, gt_class_ids): gt_bboxes, _ = trim_zeros(gt_bboxes) total_anchors = all_anchors.get_shape().as_list()[0] # 1. Filter anchors to valid area selected_anchor_idx = tf.where(tf.equal(valid_flags, 1))[:, 0] anchors = tf.gather(all_anchors, selected_anchor_idx) gt_bboxes = tf.cast(gt_bboxes, anchors.dtype) # 2. Find IoUs target_matchs = -tf.ones((tf.shape(anchors)[0],), tf.int32) overlaps = geometry.compute_overlaps(anchors, gt_bboxes) # [anchors_size, gt_bboxes_size] argmax_overlaps = tf.argmax(overlaps, axis=1, output_type=tf.int32) max_overlaps = tf.reduce_max(overlaps, axis=1) gt_max_overlaps = tf.reduce_max(overlaps, axis=0) gt_argmax_overlaps = tf.where(tf.equal(overlaps, gt_max_overlaps))[:, 0] # Assign labels bg_cond = tf.math.less(max_overlaps, self.neg_iou_thr) target_matchs = tf.where(bg_cond, tf.zeros_like(target_matchs), target_matchs) gt_indices = tf.expand_dims(gt_argmax_overlaps, axis=1) gt_labels = tf.ones(tf.shape(gt_indices)[0], dtype=tf.int32) target_matchs = tf.tensor_scatter_nd_update(target_matchs, gt_indices, gt_labels) fg_cond = tf.math.greater_equal(max_overlaps, self.pos_iou_thr) target_matchs = tf.where(fg_cond, tf.ones_like(target_matchs), target_matchs) # Sample selected if more than that required fg_inds = tf.where(tf.equal(target_matchs, 1))[:, 0] max_pos_samples = tf.cast(self.positive_fraction * self.num_rpn_deltas, tf.int32) if tf.greater(tf.size(fg_inds), max_pos_samples): fg_inds = tf.random.shuffle(fg_inds) disable_inds = fg_inds[max_pos_samples:] fg_inds = fg_inds[:max_pos_samples] disable_inds = tf.expand_dims(disable_inds, axis=1) disable_labels = -tf.ones(tf.shape(disable_inds)[0], dtype=tf.int32) target_matchs = tf.tensor_scatter_nd_update(target_matchs, disable_inds, disable_labels) num_bg = self.num_rpn_deltas - tf.reduce_sum(tf.cast(tf.equal(target_matchs, 1), tf.int32)) bg_inds = tf.where(tf.equal(target_matchs, 0))[:, 0] if tf.greater(tf.size(bg_inds), num_bg): bg_inds = tf.random.shuffle(bg_inds) disable_inds = bg_inds[num_bg:] bg_inds = bg_inds[:num_bg] disable_inds = tf.expand_dims(disable_inds, axis=1) disable_labels = -tf.ones(tf.shape(disable_inds)[0], dtype=tf.int32) target_matchs = tf.tensor_scatter_nd_update(target_matchs, disable_inds, disable_labels) # tf.print('anchor target generated %d fgs and %d bgs.' % (tf.size(fg_inds), tf.size(bg_inds))) # Calculate deltas for chosen targets based on GT bboxes_targets = transforms.bbox2delta(anchors, tf.gather(gt_bboxes, argmax_overlaps), target_means=self.target_means, target_stds=self.target_stds) # Regression weights bbox_inside_weights = tf.zeros((tf.shape(anchors)[0], 4), dtype=tf.float32) match_indices = tf.where(tf.equal(target_matchs, 1)) updates = tf.ones([tf.shape(match_indices)[0], 4], bbox_inside_weights.dtype) bbox_inside_weights = tf.tensor_scatter_nd_update(bbox_inside_weights, match_indices, updates) bbox_outside_weights = tf.zeros((tf.shape(anchors)[0], 4), dtype=tf.float32) num_examples = tf.reduce_sum(tf.cast(target_matchs >= 0, bbox_outside_weights.dtype)) out_indices = tf.where(target_matchs >= 0) updates = tf.ones([tf.shape(out_indices)[0], 4], bbox_outside_weights.dtype) * 1.0 / num_examples bbox_outside_weights = tf.tensor_scatter_nd_update(bbox_outside_weights, out_indices, updates) # for everything that is not selected fill with `fill` value return (tf.stop_gradient(_unmap(target_matchs, total_anchors, selected_anchor_idx, -1)), tf.stop_gradient(_unmap(bboxes_targets, total_anchors, selected_anchor_idx, 0)), tf.stop_gradient(_unmap(bbox_inside_weights, total_anchors, selected_anchor_idx, 0)), tf.stop_gradient(_unmap(bbox_outside_weights, total_anchors, selected_anchor_idx, 0)))