def _get_proposals_single(self, rpn_probs, rpn_deltas, mlvl_anchors, img_shape, with_probs, training=True): """ Calculate proposals per image Args: Returns: """ if training: num_pre_nms = self.num_pre_nms_train proposal_count = self.num_post_nms_train else: num_pre_nms = self.num_pre_nms_test proposal_count = self.num_post_nms_test level_ids = [] mlvl_scores = [] mlvl_deltas = [] mlvl_valid_anchors = [] mlvl_proposals = [] num_levels = len(rpn_probs) for idx in range(num_levels): level_probs = tf.reshape(rpn_probs[idx], [-1, 2]) # H, W, probs -> H * W, probs level_scores = level_probs[:, 1] level_deltas = tf.reshape(rpn_deltas[idx], [-1, 4]) level_anchors = mlvl_anchors[idx] pre_nms_limit = tf.math.minimum(num_pre_nms, tf.shape(level_anchors)[0]) ix = tf.nn.top_k(level_scores, pre_nms_limit, sorted=False).indices level_scores = tf.gather(level_scores, ix) level_deltas = tf.gather(level_deltas, ix) level_anchors = tf.gather(level_anchors, ix) mlvl_scores.append(level_scores) mlvl_deltas.append(level_deltas) mlvl_valid_anchors.append(level_anchors) level_ids.append(tf.fill([ tf.shape(level_scores)[0], ], idx)) scores = tf.concat(mlvl_scores, axis=0) anchors = tf.concat(mlvl_valid_anchors, axis=0) deltas = tf.concat(mlvl_deltas, axis=0) # get refined anchors proposals = transforms.delta2bbox(anchors, deltas, self.target_means, self.target_stds) # Clip to valid area window = tf.stack([0., 0., img_shape[0], img_shape[1]]) proposals = transforms.bbox_clip(proposals, window) ids = tf.concat(level_ids, axis=0) # NMS is appied per level independent of others keep = self.batched_nms(proposals, scores, ids, proposal_count, self.nms_threshold) proposals = tf.gather(proposals, keep) return tf.stop_gradient(proposals)
def call(self, inputs, training=True): ''' Args --- proposals_list: List of Tensors of shape [num_proposals, (ymin, xmin, ymax, xmax)] num_proposals=levels * proposals per level. levels refer to FPN levels. Length of list is the batch size gt_boxes: Tensor of shape [batch_size, 4] gt_class_ids: Tensor of shape [batch_size] img_metas: Tensor of shape [11] rcnn_feature_maps: List of outputs from the FPN ''' if training: proposals_list, rcnn_feature_maps, gt_boxes, \ gt_class_ids, img_metas = inputs else: proposals_list, rcnn_feature_maps, img_metas = inputs batch_size = img_metas.shape[0] loss_dict = {} for i in range(self.num_stages): if i == 0: rois_list = proposals_list if training: rois_list, rcnn_target_matches, rcnn_target_deltas, inside_weights, \ outside_weights = self.bbox_targets[i].build_targets( \ rois_list, gt_boxes, gt_class_ids, img_metas) pooled_regions_list = self.bbox_roi_extractor( (rois_list, rcnn_feature_maps, img_metas), training=training) rcnn_class_logits, rcnn_probs, rcnn_deltas = self.bbox_heads[i](pooled_regions_list, training=training) if training: loss_dict['rcnn_class_loss_stage_{}'.format(i)] = losses.rcnn_class_loss(rcnn_class_logits, rcnn_target_matches) * self.stage_loss_weights[i] loss_dict['rcnn_box_loss_stage_{}'.format(i)] = losses.rcnn_bbox_loss(rcnn_deltas, rcnn_target_deltas, inside_weights, outside_weights) * self.stage_loss_weights[i] roi_shapes = [tf.shape(i)[0] for i in rois_list] refinements = tf.split(rcnn_deltas, roi_shapes) new_rois = [] if i<(self.num_stages-1): for j in range(batch_size): new_rois.append(tf.stop_gradient(transforms.delta2bbox(rois_list[j], refinements[j], target_means=self.bbox_heads[i].target_means, \ target_stds=self.bbox_heads[i].target_stds))) rois_list = new_rois if training: return loss_dict else: detections_list = self.bbox_heads[-1].get_bboxes(rcnn_probs, rcnn_deltas, rois_list, img_metas) detections_dict = { 'bboxes': detections_list[0][0], 'labels': detections_list[0][1], 'scores': detections_list[0][2] } return detections_dict
def _get_bboxes_single(self, rcnn_probs, rcnn_deltas, rois, img_shape): ''' Args --- rcnn_probs: [num_rois, num_classes] rcnn_deltas: [num_rois, num_classes, (dy, dx, log(dh), log(dw))] rois: [num_rois, (y1, x1, y2, x2)] img_shape: np.ndarray. [2]. (img_height, img_width) ''' H = img_shape[0] W = img_shape[1] res_scores = tf.TensorArray(tf.float32, size=0, dynamic_size=True, infer_shape=True) res_bboxes = tf.TensorArray(tf.float32, size=0, dynamic_size=True, infer_shape=True) res_cls = tf.TensorArray(tf.int32, size=0, dynamic_size=True, infer_shape=True) for cls_id in range(1, self.num_classes): inds = tf.where(rcnn_probs[:, cls_id] > self.min_confidence)[:, 0] cls_score = tf.gather(rcnn_probs[:, cls_id], inds) rcnn_deltas = tf.reshape(rcnn_deltas, [-1, self.num_classes, 4]) final_bboxes = transforms.delta2bbox(tf.gather(rois, inds), tf.gather(rcnn_deltas[:, cls_id, :], inds), self.target_means, self.target_stds) window = tf.stack([tf.constant(0., H.dtype), tf.constant(0., W.dtype), H, W]) final_bboxes = transforms.bbox_clip(final_bboxes, window) cls_score = tf.cast(cls_score, final_bboxes.dtype) #keep = tf.image.non_max_suppression(final_bboxes, cls_score, # self.max_instances, # iou_threshold=self.nms_threshold) keep, selected_cls_scores, _ = tf.raw_ops.NonMaxSuppressionV5 ( boxes=final_bboxes, scores=cls_score, max_output_size=self.max_instances, iou_threshold=self.nms_threshold, score_threshold=0.0, soft_nms_sigma=self.soft_nms_sigma) pad_size = self.max_instances - tf.size(keep) padded_scores = tf.pad(selected_cls_scores, paddings=[[0, pad_size]], constant_values=0.0) res_scores = res_scores.write(cls_id-1, padded_scores)#.mark_used() padded_bboxes = tf.pad(tf.gather(final_bboxes, keep), paddings=[[0, pad_size], [0, 0]], constant_values=0.0) res_bboxes = res_bboxes.write(cls_id-1, padded_bboxes)#.mark_used() padded_cls = tf.pad(tf.ones_like(keep, dtype=tf.int32) * cls_id, paddings=[[0, pad_size]], constant_values=-1) res_cls = res_cls.write(cls_id-1, padded_cls)#.mark_used() res_scores = res_scores.stack() res_bboxes = res_bboxes.stack() res_cls = res_cls.stack() scores_after_nms = tf.reshape(res_scores, [-1]) bboxes_after_nms = tf.reshape(res_bboxes, [-1, 4]) cls_after_nms = tf.reshape(res_cls, [-1]) _, final_idx = tf.nn.top_k(scores_after_nms, k=tf.minimum(self.max_instances, tf.size(scores_after_nms)), sorted=False) return (tf.gather(bboxes_after_nms, final_idx), tf.gather(cls_after_nms, final_idx), tf.gather(scores_after_nms, final_idx))
def _get_proposals_single(self, scores_list, deltas_list, anchors_list, img_shape, with_probs, training=True): """ Transform outputs for a single batch item into labeled boxes. """ assert len(scores_list) == len(deltas_list) == len(anchors_list) level_ids = [] mlvl_deltas = [] mlvl_scores = [] mlvl_anchors = [] mlvl_proposals = [] num_levels = len(scores_list) for idx in range(num_levels): probs = tf.keras.layers.Activation( tf.nn.sigmoid, dtype=tf.float32)(scores_list[idx]) deltas = deltas_list[idx] anchors = anchors_list[idx] probs = tf.reshape(probs, [-1, self.num_classes]) deltas = tf.reshape(deltas, [-1, 4]) pre_nms_limit = tf.math.minimum(self.num_pre_nms, tf.shape(anchors)[0]) max_probs = tf.reduce_max(probs, axis=1) ix = tf.nn.top_k( max_probs, k=pre_nms_limit).indices # top k for each level (as per paper) level_anchors = tf.gather(anchors, ix) level_deltas = tf.gather(deltas, ix) level_scores = tf.gather(probs, ix) # these contain max_probs mlvl_deltas.append(level_deltas) mlvl_scores.append(level_scores) mlvl_anchors.append(level_anchors) scores = tf.concat(mlvl_scores, axis=0) anchors = tf.concat(mlvl_anchors, axis=0) deltas = tf.concat(mlvl_deltas, axis=0) proposals = transforms.delta2bbox(anchors, deltas, self.target_means, self.target_stds) # Clip to valid area window = tf.stack([0., 0., img_shape[0], img_shape[1]]) proposals = transforms.bbox_clip(proposals, window) return self.batched_nms(proposals, scores, self.max_instances, self.nms_threshold)
def _get_proposals_single(self, rpn_probs, rpn_deltas, anchors, valid_flags, img_shape, with_probs, training=True): ''' Calculate proposals. Args --- rpn_probs: [num_anchors] rpn_deltas: [num_anchors, (dy, dx, log(dh), log(dw))] anchors: [num_anchors, (y1, x1, y2, x2)] anchors defined in pixel coordinates. valid_flags: [num_anchors] img_shape: np.ndarray. [2]. (img_height, img_width) with_probs: bool. Returns --- proposals: [num_proposals, (y1, x1, y2, x2)] in normalized coordinates. ''' H = img_shape[0] W = img_shape[1] # filter invalid anchors, int => bool valid_flags = tf.cast(valid_flags, tf.bool) rpn_probs = tf.boolean_mask(rpn_probs, valid_flags) rpn_deltas = tf.boolean_mask(rpn_deltas, valid_flags) anchors = tf.boolean_mask(anchors, valid_flags) # Improve performance if training: num_pre_nms = self.num_pre_nms_train else: num_pre_nms = self.num_pre_nms_test pre_nms_limit = tf.math.minimum(num_pre_nms, tf.shape(anchors)[0]) ix = tf.nn.top_k(rpn_probs, pre_nms_limit, sorted=False).indices rpn_probs = tf.gather(rpn_probs, ix) rpn_deltas = tf.gather(rpn_deltas, ix) anchors = tf.gather(anchors, ix) # Get refined anchors proposals = transforms.delta2bbox(anchors, rpn_deltas, self.target_means, self.target_stds) # Clip to valid area window = tf.stack([0., 0., H, W]) proposals = transforms.bbox_clip(proposals, window) if training: proposal_count = self.num_post_nms_train else: proposal_count = self.num_post_nms_test rpn_probs = tf.cast(rpn_probs, proposals.dtype) # indices = tf.image.non_max_suppression(proposals, rpn_probs, # max_output_size=proposal_count, # iou_threshold=self.nms_threshold) indices = tf.raw_ops.NonMaxSuppressionV2( boxes=proposals, scores=rpn_probs, max_output_size=proposal_count, iou_threshold=self.nms_threshold) proposals = tf.stop_gradient(tf.gather(proposals, indices)) if with_probs: proposal_probs = tf.expand_dims(tf.gather(rpn_probs, indices), axis=1) proposals = tf.concat([proposals, proposal_probs], axis=1) return proposals