def _build(self, cls_prob, loc_pred, all_anchors, im_shape): """ Args: cls_prob: A softmax probability for each anchor where the idx = 0 is the background class (which we should ignore). Shape (total_anchors, num_classes + 1) loc_pred: A Tensor with the regression output for each anchor. Its shape should be (total_anchors, 4). all_anchors: A Tensor with the anchors bounding boxes of shape (total_anchors, 4), having (x_min, y_min, x_max, y_max) for each anchor. im_shape: A Tensor with the image shape in format (height, width). Returns: prediction_dict with the following keys: raw_proposals: The raw proposals i.e. the anchors adjusted using loc_pred. proposals: The proposals of the network after appling some filters like negative area; and NMS. It's shape is (final_num_proposals, 4), where final_num_proposals is unknown before-hand (it depends on NMS). The 4-length Tensor for each corresponds to: (x_min, y_min, x_max, y_max). proposal_label: It's shape is (final_num_proposals,) proposal_label_prob: It's shape is (final_num_proposals,) """ selected_boxes = [] selected_probs = [] selected_labels = [] selected_anchors = [] # For debugging for class_id in range(self._num_classes): # Get the confidences for this class (+ 1 is to ignore background) class_cls_prob = cls_prob[:, class_id + 1] # Filter by min_prob_threshold min_prob_filter = tf.greater_equal(class_cls_prob, self._min_prob_threshold) class_cls_prob = tf.boolean_mask(class_cls_prob, min_prob_filter) class_loc_pred = tf.boolean_mask(loc_pred, min_prob_filter) anchors = tf.boolean_mask(all_anchors, min_prob_filter) # Using the loc_pred and the anchors, we generate the proposals. raw_proposals = decode(anchors, class_loc_pred, self._variances) # Clip boxes to image. clipped_proposals = clip_boxes(raw_proposals, im_shape) # Filter proposals that have an non-valid area. (x_min, y_min, x_max, y_max) = tf.unstack(clipped_proposals, axis=1) proposal_filter = tf.greater( tf.maximum(x_max - x_min, 0.) * tf.maximum(y_max - y_min, 0.), 0.) class_proposals = tf.boolean_mask(clipped_proposals, proposal_filter) class_loc_pred = tf.boolean_mask(class_loc_pred, proposal_filter) class_cls_prob = tf.boolean_mask(class_cls_prob, proposal_filter) proposal_anchors = tf.boolean_mask(anchors, proposal_filter) # Log results of filtering non-valid area proposals total_anchors = tf.shape(all_anchors)[0] total_proposals = tf.shape(class_proposals)[0] total_raw_proposals = tf.shape(raw_proposals)[0] tf.summary.scalar('invalid_proposals', total_proposals - total_raw_proposals, ['ssd']) tf.summary.scalar( 'valid_proposals_ratio', tf.cast(total_anchors, tf.float32) / tf.cast(total_proposals, tf.float32), ['ssd']) # We have to use the TensorFlow's bounding box convention to use # the included function for NMS. # After gathering results we should normalize it back. class_proposal_tf = change_order(class_proposals) # Apply class NMS. class_selected_idx = tf.image.non_max_suppression( class_proposal_tf, class_cls_prob, self._class_max_detections, iou_threshold=self._class_nms_threshold) # Using NMS resulting indices, gather values from Tensors. class_proposal_tf = tf.gather(class_proposal_tf, class_selected_idx) class_cls_prob = tf.gather(class_cls_prob, class_selected_idx) # We append values to a regular list which will later be # transformed to a proper Tensor. selected_boxes.append(class_proposal_tf) selected_probs.append(class_cls_prob) # In the case of the class_id, since it is a loop on classes, we # already have a fixed class_id. We use `tf.tile` to create that # Tensor with the total number of indices returned by the NMS. selected_labels.append( tf.tile([class_id], [tf.shape(class_selected_idx)[0]])) selected_anchors.append(proposal_anchors) # We use concat (axis=0) to generate a Tensor where the rows are # stacked on top of each other proposals_tf = tf.concat(selected_boxes, axis=0) # Return to the original convention. proposals = change_order(proposals_tf) proposal_label = tf.concat(selected_labels, axis=0) proposal_label_prob = tf.concat(selected_probs, axis=0) proposal_anchors = tf.concat(selected_anchors, axis=0) # Get topK detections of all classes. k = tf.minimum(self._total_max_detections, tf.shape(proposal_label_prob)[0]) top_k = tf.nn.top_k(proposal_label_prob, k=k) top_k_proposal_label_prob = top_k.values top_k_proposals = tf.gather(proposals, top_k.indices) top_k_proposal_label = tf.gather(proposal_label, top_k.indices) top_k_proposal_anchors = tf.gather(proposal_anchors, top_k.indices) return { 'objects': top_k_proposals, 'labels': top_k_proposal_label, 'probs': top_k_proposal_label_prob, 'raw_proposals': raw_proposals, 'anchors': top_k_proposal_anchors, }
def _build(self, rpn_cls_prob, rpn_bbox_pred, all_anchors, im_shape): """ Args: rpn_cls_prob: A Tensor with the softmax output for each anchor. Its shape should be (total_anchors, 2), with the probability of being background and the probability of being foreground for each anchor. rpn_bbox_pred: A Tensor with the regression output for each anchor. Its shape should be (total_anchors, 4). all_anchors: A Tensor with the anchors bounding boxes of shape (total_anchors, 4), having (x_min, y_min, x_max, y_max) for each anchor. im_shape: A Tensor with the image shape in format (height, width). Returns: prediction_dict with the following keys: nms_proposals: A Tensor with the final selected proposed bounding boxes. Its shape should be (total_nms_proposals, 4). nms_proposals_scores: A Tensor with the probability of being an object for that proposal. Its shape should be (total_nms_proposals, 1) scores: A Tensor with the scores of the proposals contained in `proposals` and `proposals_unclipped`. proposals: A Tensor with all the valid area RPN proposals, this tensor is returned in debug mode and is used for testing, the proposals are clipped if `clip_after_nms` is set to False. proposals_unclipped: Same as proposals but the proposals in this tensor are never clipped. all_proposals: A Tensor with all the proposals, including the ones with zero or negative area. """ # Scores are extracted from the second scalar of the cls probability. # cls_probability is a softmax of (background, foreground). scores = rpn_cls_prob[:, 1] # Force flatten the scores (it should be already be flatten). scores = tf.reshape(scores, [-1]) if self._filter_outside_anchors: with tf.name_scope('filter_outside_anchors'): (x_min_anchor, y_min_anchor, x_max_anchor, y_max_anchor) = tf.unstack(all_anchors, axis=1) anchor_filter = tf.logical_and( tf.logical_and( tf.greater_equal(x_min_anchor, 0), tf.greater_equal(y_min_anchor, 0) ), tf.logical_and( tf.less(x_max_anchor, im_shape[1]), tf.less(y_max_anchor, im_shape[0]) ) ) anchor_filter = tf.reshape(anchor_filter, [-1]) all_anchors = tf.boolean_mask( all_anchors, anchor_filter, name='filter_anchors') rpn_bbox_pred = tf.boolean_mask(rpn_bbox_pred, anchor_filter) scores = tf.boolean_mask(scores, anchor_filter) # Decode boxes all_proposals = decode(all_anchors, rpn_bbox_pred) # Filter proposals with negative or zero area. (x_min, y_min, x_max, y_max) = tf.unstack( all_proposals, axis=1 ) proposal_filter = tf.greater( tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0), 0.0 ) proposal_filter = tf.reshape(proposal_filter, [-1]) # Filter proposals and scores. total_proposals = tf.shape(scores)[0] scores = tf.boolean_mask( scores, proposal_filter, name='filter_invalid_scores' ) proposals = tf.boolean_mask( all_proposals, proposal_filter, name='filter_invalid_proposals' ) if self._debug: proposals_unclipped = tf.identity(proposals) if not self._clip_after_nms: # Clip proposals to the image. proposals = clip_boxes(proposals, im_shape) filtered_proposals = tf.shape(scores)[0] tf.summary.scalar( 'valid_proposals_ratio', ( tf.cast(filtered_proposals, tf.float32) / tf.cast(total_proposals, tf.float32) ), ['rpn']) tf.summary.scalar( 'invalid_proposals', total_proposals - filtered_proposals, ['rpn']) # Get top `pre_nms_top_n` indices by sorting the proposals by score. k = tf.minimum(self._pre_nms_top_n, tf.shape(scores)[0]) top_k = tf.nn.top_k(scores, k=k) top_k_scores = top_k.values top_k_proposals = tf.gather(proposals, top_k.indices) # We reorder the proposals into TensorFlows bounding box order for # `tf.image.non_max_supression` compatibility. proposals_tf_order = change_order(top_k_proposals) # We cut the pre_nms filter in pure TF version and go straight into # NMS. selected_indices = tf.image.non_max_suppression( proposals_tf_order, tf.squeeze(top_k_scores), self._post_nms_top_n, iou_threshold=self._nms_threshold ) # Selected_indices is a smaller tensor, we need to extract the # proposals and scores using it. nms_proposals = tf.gather( proposals_tf_order, selected_indices, name='gather_nms_proposals' ) nms_proposals_scores = tf.gather( top_k_scores, selected_indices, name='gather_nms_proposals_scores' ) # We switch back again to the regular bbox encoding. nms_proposals = change_order(nms_proposals) if self._clip_after_nms: # Clip proposals to the image after NMS. nms_proposals = clip_boxes(nms_proposals, im_shape) # Adds batch number for consistency and multi image batch support. batch_inds = tf.zeros( (tf.shape(nms_proposals)[0], 1), dtype=tf.float32 ) nms_proposals = tf.concat([batch_inds, nms_proposals], axis=1) pred = { 'nms_proposals': tf.stop_gradient(nms_proposals), 'nms_proposals_scores': tf.stop_gradient(nms_proposals_scores), } if self._debug: pred.update({ 'proposals': proposals, 'scores': scores, 'proposals_unclipped': proposals_unclipped, 'top_k_proposals': top_k_proposals, 'top_k_scores': top_k_scores, 'all_proposals': all_proposals, }) return pred
def _build(self, proposals, bbox_pred, cls_prob, im_shape): """ Args: proposals: Tensor with the RPN proposals bounding boxes. Shape (num_proposals, 4). Where num_proposals is less than POST_NMS_TOP_N (We don't know exactly beforehand) bbox_pred: Tensor with the RCNN delta predictions for each proposal for each class. Shape (num_proposals, 4 * num_classes) cls_prob: A softmax probability for each proposal where the idx = 0 is the background class (which we should ignore). Shape (num_proposals, num_classes + 1) Returns: objects: Shape (final_num_proposals, 4) Where final_num_proposals is unknown before-hand (it depends on NMS). The 4-length Tensor for each corresponds to: (x_min, y_min, x_max, y_max). objects_label: Shape (final_num_proposals,) objects_label_prob: Shape (final_num_proposals,) """ # First we want get the most probable label for each proposal # We still have the background on idx 0 so we subtract 1 to the idxs. proposal_label = tf.argmax(cls_prob, axis=1) - 1 # Get the probability for the selected label for each proposal. proposal_label_prob = tf.reduce_max(cls_prob, axis=1) # We are going to use only the non-background proposals. non_background_filter = tf.greater_equal(proposal_label, 0) # Filter proposals with less than threshold probability. min_prob_filter = tf.greater_equal(proposal_label_prob, self._min_prob_threshold) proposal_filter = tf.logical_and(non_background_filter, min_prob_filter) total_proposals = tf.shape(proposals)[0] equal_shapes = tf.assert_equal( tf.shape(proposals)[0], tf.shape(bbox_pred)[0]) with tf.control_dependencies([equal_shapes]): # Filter all tensors for getting all non-background proposals. proposals = tf.boolean_mask(proposals, proposal_filter) proposal_label = tf.boolean_mask(proposal_label, proposal_filter) proposal_label_prob = tf.boolean_mask(proposal_label_prob, proposal_filter) bbox_pred = tf.boolean_mask(bbox_pred, proposal_filter) filtered_proposals = tf.shape(proposals)[0] tf.summary.scalar('background_or_low_prob_proposals', total_proposals - filtered_proposals, ['rcnn']) # Create one hot with labels for using it to filter bbox_predictions. label_one_hot = tf.one_hot(proposal_label, depth=self._num_classes) # Flatten label_one_hot to get # (num_non_background_proposals * num_classes, 1) for filtering. label_one_hot_flatten = tf.cast(tf.reshape(label_one_hot, [-1]), tf.bool) # Flatten bbox_predictions getting # (num_non_background_proposals * num_classes, 4). bbox_pred_flatten = tf.reshape(bbox_pred, [-1, 4]) equal_shapes = tf.assert_equal( tf.shape(bbox_pred_flatten)[0], tf.shape(label_one_hot_flatten)[0]) with tf.control_dependencies([equal_shapes]): # Control same number of dimensions between bbox and mask. bbox_pred = tf.boolean_mask(bbox_pred_flatten, label_one_hot_flatten) # Using the bbox_pred and the proposals we generate the objects. raw_objects = decode(proposals, bbox_pred) # Clip boxes to image. clipped_objects = clip_boxes(raw_objects, im_shape) # Filter objects that have an non-valid area. (x_min, y_min, x_max, y_max) = tf.unstack(clipped_objects, axis=1) object_filter = tf.greater_equal( tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0), 0.0) total_raw_objects = tf.shape(raw_objects)[0] objects = tf.boolean_mask(clipped_objects, object_filter) proposal_label = tf.boolean_mask(proposal_label, object_filter) proposal_label_prob = tf.boolean_mask(proposal_label_prob, object_filter) total_objects = tf.shape(objects)[0] tf.summary.scalar('invalid_proposals', total_objects - total_raw_objects, ['rcnn']) tf.summary.scalar( 'valid_proposals_ratio', tf.cast(total_proposals, tf.float32) / tf.cast(total_objects, tf.float32), ['rcnn']) # We have to use the TensorFlow's bounding box convention to use the # included function for NMS. # After gathering results we should normalize it back. objects_tf = change_order(objects) selected_boxes = [] selected_probs = [] selected_labels = [] # For each class we want to filter those objects and apply NMS to them. for class_id in range(self._num_classes): # Filter objects Tensors with class. class_filter = tf.equal(proposal_label, class_id) class_objects_tf = tf.boolean_mask(objects_tf, class_filter) class_prob = tf.boolean_mask(proposal_label_prob, class_filter) # Apply class NMS. class_selected_idx = tf.image.non_max_suppression( class_objects_tf, class_prob, self._class_max_detections, iou_threshold=self._class_nms_threshold) # Using NMS resulting indices, gather values from Tensors. class_objects_tf = tf.gather(class_objects_tf, class_selected_idx) class_prob = tf.gather(class_prob, class_selected_idx) # We append values to a regular list which will later be transform # to a proper Tensor. selected_boxes.append(class_objects_tf) selected_probs.append(class_prob) # In the case of the class_id, since it is a loop on classes, we # already have a fixed class_id. We use `tf.tile` to create that # Tensor with the total number of indices returned by the NMS. selected_labels.append( tf.tile([class_id], [tf.shape(class_selected_idx)[0]])) # We use concat (axis=0) to generate a Tensor where the rows are # stacked on top of each other objects_tf = tf.concat(selected_boxes, axis=0) # Return to the original convention. objects = change_order(objects_tf) proposal_label = tf.concat(selected_labels, axis=0) proposal_label_prob = tf.concat(selected_probs, axis=0) # Get topK detections of all classes. k = tf.minimum(self._total_max_detections, tf.shape(proposal_label_prob)[0]) top_k = tf.nn.top_k(proposal_label_prob, k=k) top_k_proposal_label_prob = top_k.values top_k_objects = tf.gather(objects, top_k.indices) top_k_proposal_label = tf.gather(proposal_label, top_k.indices) return { 'raw_objects': raw_objects, 'objects': top_k_objects, 'proposal_label': top_k_proposal_label, 'proposal_label_prob': top_k_proposal_label_prob, 'selected_boxes': selected_boxes, 'selected_probs': selected_probs, 'selected_labels': selected_labels, }
def _build(self, rpn_cls_prob, rpn_bbox_pred, all_anchors, im_shape): """ Args: rpn_cls_prob: A Tensor with the softmax output for each anchor. Its shape should be (total_anchors, 2), with the probability of being background and the probability of being foreground for each anchor. rpn_bbox_pred: A Tensor with the regression output for each anchor. Its shape should be (total_anchors, 4). all_anchors: A Tensor with the anchors bounding boxes of shape (total_anchors, 4), having (x_min, y_min, x_max, y_max) for each anchor. im_shape: A Tensor with the image shape in format (height, width). Returns: prediction_dict with the following keys: proposals: A Tensor with the final selected proposed bounding boxes. Its shape should be (total_proposals, 4). scores: A Tensor with the probability of being an object for that proposal. Its shape should be (total_proposals, 1) """ # Scores are extracted from the second scalar of the cls probability. # cls_probability is a softmax of (background, foreground). all_scores = rpn_cls_prob[:, 1] # Force flatten the scores (it should be already be flatten). all_scores = tf.reshape(all_scores, [-1]) if self._filter_outside_anchors: with tf.name_scope('filter_outside_anchors'): (x_min_anchor, y_min_anchor, x_max_anchor, y_max_anchor) = tf.unstack(all_anchors, axis=1) anchor_filter = tf.logical_and( tf.logical_and( tf.greater_equal(x_min_anchor, 0), tf.greater_equal(y_min_anchor, 0) ), tf.logical_and( tf.less(x_max_anchor, im_shape[1]), tf.less(y_max_anchor, im_shape[0]) ) ) anchor_filter = tf.reshape(anchor_filter, [-1]) all_anchors = tf.boolean_mask( all_anchors, anchor_filter, name='filter_anchors') rpn_bbox_pred = tf.boolean_mask(rpn_bbox_pred, anchor_filter) all_scores = tf.boolean_mask(all_scores, anchor_filter) # Decode boxes all_proposals = decode(all_anchors, rpn_bbox_pred) # Filter proposals with less than threshold probability. min_prob_filter = tf.greater_equal( all_scores, self._min_prob_threshold ) # Filter proposals with negative or zero area. (x_min, y_min, x_max, y_max) = tf.unstack(all_proposals, axis=1) zero_area_filter = tf.greater( tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0), 0.0 ) proposal_filter = tf.logical_and(zero_area_filter, min_prob_filter) # Filter proposals and scores. all_proposals_total = tf.shape(all_scores)[0] unsorted_scores = tf.boolean_mask( all_scores, proposal_filter, name='filtered_scores' ) unsorted_proposals = tf.boolean_mask( all_proposals, proposal_filter, name='filtered_proposals' ) if self._debug: proposals_unclipped = tf.identity(unsorted_proposals) if not self._clip_after_nms: # Clip proposals to the image. unsorted_proposals = clip_boxes(unsorted_proposals, im_shape) filtered_proposals_total = tf.shape(unsorted_scores)[0] tf.summary.scalar( 'valid_proposals_ratio', ( tf.cast(filtered_proposals_total, tf.float32) / tf.cast(all_proposals_total, tf.float32) ), ['rpn']) tf.summary.scalar( 'invalid_proposals', all_proposals_total - filtered_proposals_total, ['rpn']) # Get top `pre_nms_top_n` indices by sorting the proposals by score. k = tf.minimum(self._pre_nms_top_n, tf.shape(unsorted_scores)[0]) top_k = tf.nn.top_k(unsorted_scores, k=k) sorted_top_proposals = tf.gather(unsorted_proposals, top_k.indices) sorted_top_scores = top_k.values if self._apply_nms: with tf.name_scope('nms'): # We reorder the proposals into TensorFlows bounding box order # for `tf.image.non_max_supression` compatibility. proposals_tf_order = change_order(sorted_top_proposals) # We cut the pre_nms filter in pure TF version and go straight # into NMS. selected_indices = tf.image.non_max_suppression( proposals_tf_order, tf.reshape( sorted_top_scores, [-1] ), self._post_nms_top_n, iou_threshold=self._nms_threshold ) # Selected_indices is a smaller tensor, we need to extract the # proposals and scores using it. nms_proposals_tf_order = tf.gather( proposals_tf_order, selected_indices, name='gather_nms_proposals' ) # We switch back again to the regular bbox encoding. proposals = change_order(nms_proposals_tf_order) scores = tf.gather( sorted_top_scores, selected_indices, name='gather_nms_proposals_scores' ) else: proposals = sorted_top_proposals scores = sorted_top_scores if self._clip_after_nms: # Clip proposals to the image after NMS. proposals = clip_boxes(proposals, im_shape) pred = { 'proposals': proposals, 'scores': scores, } if self._debug: pred.update({ 'sorted_top_scores': sorted_top_scores, 'sorted_top_proposals': sorted_top_proposals, 'unsorted_proposals': unsorted_proposals, 'unsorted_scores': unsorted_scores, 'all_proposals': all_proposals, 'all_scores': all_scores, # proposals_unclipped has the unsorted_scores scores 'proposals_unclipped': proposals_unclipped, }) return pred
def _build(self, rpn_cls_prob, rpn_bbox_pred, all_anchors, im_shape): """ Args: rpn_cls_prob: A Tensor with the softmax output for each anchor. Its shape should be (total_anchors, 2), with the probability of being background and the probability of being foreground for each anchor. rpn预测的类别的概率 rpn_bbox_pred: A Tensor with the regression output for each anchor. Its shape should be (total_anchors, 4). rpn预测的框 all_anchors: A Tensor with the anchors bounding boxes of shape (total_anchors, 4), having (x_min, y_min, x_max, y_max) for each anchor. 进入rpn的anchors im_shape: A Tensor with the image shape in format (height, width). Returns: prediction_dict with the following keys: proposals: A Tensor with the final selected proposed bounding boxes. Its shape should be (total_proposals, 4). 最终确定的提案区域 scores: A Tensor with the probability of being an object for that proposal. Its shape should be (total_proposals, 1) 提案是目标的概率 """ # Scores are extracted from the second scalar of the cls probability. # cls_probability is a softmax of (background, foreground). # 得分从类概率的第二个标量中提出 # 类概率是一个关于前景背景的softmax分类结果 all_scores = rpn_cls_prob[:, 1] # Force flatten the scores (it should be already be flatten). # 这里这么做,还有必要么?还是说只是为了确保万无一失? all_scores = tf.reshape(all_scores, [-1]) if self._filter_outside_anchors: with tf.name_scope('filter_outside_anchors'): # 沿着指定维度进行拆分,保留剩余的维度 原本为(total_anchors, 4) # 拆分为四个独立的anchor数目为长度的张量,聚合了四个坐标的值 (x_min_anchor, y_min_anchor, x_max_anchor, y_max_anchor) = tf.unstack(all_anchors, axis=1) # 逻辑操作,判断是否超界,对于图像,横为x纵为y # 所以im_shape[0]对应着y,im_shape[1]对应着x # im_shape in format (height, width). # 对左上角和右下角坐标在图像范围内的对应的张量判定为真,其余为假 # 筛选出来没有超界的anchor,顺带得到对应的预测边框和得分 anchor_filter = tf.logical_and( tf.logical_and(tf.greater_equal(x_min_anchor, 0), tf.greater_equal(y_min_anchor, 0)), tf.logical_and(tf.less(x_max_anchor, im_shape[1]), tf.less(y_max_anchor, im_shape[0]))) anchor_filter = tf.reshape(anchor_filter, [-1]) all_anchors = tf.boolean_mask(all_anchors, anchor_filter, name='filter_anchors') rpn_bbox_pred = tf.boolean_mask(rpn_bbox_pred, anchor_filter) all_scores = tf.boolean_mask(all_scores, anchor_filter) # Decode boxes # 从参考的anchors和预测的偏移量获得最终预测的原图的框坐标 all_proposals = decode(all_anchors, rpn_bbox_pred) # Filter proposals with less than threshold probability. # 滤掉小于概率阈值的得分,得到的是一个代表大于等于阈值的元素位置的张量 min_prob_filter = tf.greater_equal(all_scores, self._min_prob_threshold) # Filter proposals with negative or zero area. # 因为要求xmax>xmin, ymax>ymin,所以需要保证正常的计算面积要为正 (x_min, y_min, x_max, y_max) = tf.unstack(all_proposals, axis=1) zero_area_filter = tf.greater( tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0), 0.0) # 得到的是一个面积为正的提案区域的逻辑张量,也对应着数据有效的位置 proposal_filter = tf.logical_and(zero_area_filter, min_prob_filter) # Filter proposals and scores. # all_scores = rpn_cls_prob[:, 1] # 下面两步boolean_mask得到了对应要保留的得分和提案 all_proposals_total = tf.shape(all_scores)[0] unsorted_scores = tf.boolean_mask(all_scores, proposal_filter, name='filtered_scores') unsorted_proposals = tf.boolean_mask(all_proposals, proposal_filter, name='filtered_proposals') if self._debug: proposals_unclipped = tf.identity(unsorted_proposals) # Run clipping of proposals after running NMS. # 不在NMS后,而是在其前运行提案剪裁 # clip_boxes对于位于图像区域之外的提案框进行了一定的限制 if not self._clip_after_nms: # Clip proposals to the image. unsorted_proposals = clip_boxes(unsorted_proposals, im_shape) filtered_proposals_total = tf.shape(unsorted_scores)[0] tf.summary.scalar('valid_proposals_ratio', (tf.cast(filtered_proposals_total, tf.float32) / tf.cast(all_proposals_total, tf.float32)), ['rpn']) tf.summary.scalar('invalid_proposals', all_proposals_total - filtered_proposals_total, ['rpn']) # Get top `pre_nms_top_n` indices by sorting the proposals by score. # NMS之前排序获得前N个提案,但要保证N<=shape[0] k = tf.minimum(self._pre_nms_top_n, tf.shape(unsorted_scores)[0]) # 查找最后一个维度的k个最大条目的值和索引。 top_k = tf.nn.top_k(unsorted_scores, k=k) # 根据索引,从unsorted_proposals上采集切片,同时获取对应的得分 sorted_top_proposals = tf.gather(unsorted_proposals, top_k.indices) sorted_top_scores = top_k.values if self._apply_nms: with tf.name_scope('nms'): # We reorder the proposals into TensorFlows bounding box order # for `tf.image.non_max_supression` compatibility. # 为了与“tf.image.non_max_supression”兼容,我们将提案重新排序到 # TensorFlow边框顺序中。 proposals_tf_order = change_order(sorted_top_proposals) # We cut the pre_nms filter in pure TF version and go straight # into NMS. # 修剪掉与以前选择的框重叠的具有高度IOU的框 selected_indices = tf.image.non_max_suppression( proposals_tf_order, tf.reshape(sorted_top_scores, [-1]), self._post_nms_top_n, iou_threshold=self._nms_threshold) # Selected_indices is a smaller tensor, we need to extract the # proposals and scores using it. nms_proposals_tf_order = tf.gather(proposals_tf_order, selected_indices, name='gather_nms_proposals') # We switch back again to the regular bbox encoding. # 改回原始的提案编码 proposals = change_order(nms_proposals_tf_order) scores = tf.gather(sorted_top_scores, selected_indices, name='gather_nms_proposals_scores') else: proposals = sorted_top_proposals scores = sorted_top_scores # 在NMS后运行提案剪裁 if self._clip_after_nms: # Clip proposals to the image after NMS. proposals = clip_boxes(proposals, im_shape) pred = { 'proposals': proposals, 'scores': scores, } if self._debug: pred.update({ 'sorted_top_scores': sorted_top_scores, 'sorted_top_proposals': sorted_top_proposals, 'unsorted_proposals': unsorted_proposals, 'unsorted_scores': unsorted_scores, 'all_proposals': all_proposals, 'all_scores': all_scores, # proposals_unclipped has the unsorted_scores scores 'proposals_unclipped': proposals_unclipped, }) return pred
def build_without_filter(class_objects, cls_prob, cls_label): selected_boxes = [] selected_probs = [] selected_labels = [] # For each class, take the proposals with the class-specific # predictions (class scores and bbox regression) and filter accordingly # (valid area, min probability score and NMS). for class_id in range(self._num_classes): # Apply the class-specific transformations to the proposals to # obtain the current class' prediction. label_filer = tf.reshape(tf.where(tf.equal(class_id, cls_label)), [-1]) class_objects_filtered, cls_prob_filtered = map(lambda x: tf.gather(x, label_filer), [class_objects, cls_prob]) # Filter objects based on the min probability threshold and on them # having a valid area. #### for filter trivial padding conclusion prob_filter = tf.greater_equal( cls_prob_filtered, 0.2 ) (x_min, y_min, x_max, y_max) = tf.unstack(class_objects_filtered, axis=1) area_filter = tf.greater( tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0), 0.0 ) object_filter = tf.logical_and(area_filter, prob_filter) class_objects_filtered = tf.boolean_mask(class_objects_filtered, object_filter) cls_prob_filtered = tf.boolean_mask(cls_prob_filtered, object_filter) # We have to use the TensorFlow's bounding box convention to use # the included function for NMS. class_objects_tf = change_order(class_objects_filtered) # Apply class NMS. class_selected_idx = tf.image.non_max_suppression( class_objects_tf, cls_prob_filtered, self._class_max_detections, iou_threshold=self._class_nms_threshold ) # Using NMS resulting indices, gather values from Tensors. class_objects_tf = tf.gather(class_objects_tf, class_selected_idx) class_prob = tf.gather(cls_prob_filtered, class_selected_idx) # Revert to our bbox convention. class_objects_tf = change_order(class_objects_tf) # We append values to a regular list which will later be # transformed to a proper Tensor. selected_boxes.append(class_objects_tf) selected_probs.append(class_prob) # In the case of the class_id, since it is a loop on classes, we # already have a fixed class_id. We use `tf.tile` to create that # Tensor with the total number of indices returned by the NMS. selected_labels.append( tf.tile([class_id], [tf.shape(class_selected_idx)[0]]) ) # We use concat (axis=0) to generate a Tensor where the rows are # stacked on top of each other objects = tf.concat(selected_boxes, axis=0) proposal_label = tf.concat(selected_labels, axis=0) proposal_label_prob = tf.concat(selected_probs, axis=0) # Get top-k detections of all classes. k = tf.minimum( self._total_max_detections, tf.shape(proposal_label_prob)[0] ) top_k = tf.nn.top_k(proposal_label_prob, k=k) top_k_proposal_label_prob = top_k.values top_k_objects = tf.gather(objects, top_k.indices) top_k_proposal_label = tf.gather(proposal_label, top_k.indices) return (top_k_objects, top_k_proposal_label, top_k_proposal_label_prob)
def _build(self, proposals, bbox_pred, cls_prob, im_shape): """ Args: proposals: Tensor with the RPN proposals bounding boxes. Shape (num_proposals, 4). Where num_proposals is less than POST_NMS_TOP_N (We don't know exactly beforehand) bbox_pred: Tensor with the RCNN delta predictions for each proposal for each class. Shape (num_proposals, 4 * num_classes) cls_prob: A softmax probability for each proposal where the idx = 0 is the background class (which we should ignore). Shape (num_proposals, num_classes + 1) Returns: objects: Shape (final_num_proposals, 4) Where final_num_proposals is unknown before-hand (it depends on NMS). The 4-length Tensor for each corresponds to: (x_min, y_min, x_max, y_max). objects_label: Shape (final_num_proposals,) objects_label_prob: Shape (final_num_proposals,) """ with tf.variable_scope("build_without_filter"): without_filter_dict = self.build_without_filter( proposals, bbox_pred, cls_prob, im_shape ) selected_boxes = [] selected_probs = [] selected_labels = [] # For each class, take the proposals with the class-specific # predictions (class scores and bbox regression) and filter accordingly # (valid area, min probability score and NMS). for class_id in range(self._num_classes): # Apply the class-specific transformations to the proposals to # obtain the current class' prediction. class_prob = cls_prob[:, class_id + 1] # 0 is background class. class_bboxes = bbox_pred[:, (4 * class_id):(4 * class_id + 4)] raw_class_objects = decode( proposals, class_bboxes, variances=self._variances, ) # Clip bboxes so they don't go out of the image. class_objects = clip_boxes(raw_class_objects, im_shape) # Filter objects based on the min probability threshold and on them # having a valid area. prob_filter = tf.greater_equal( class_prob, self._min_prob_threshold ) (x_min, y_min, x_max, y_max) = tf.unstack(class_objects, axis=1) area_filter = tf.greater( tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0), 0.0 ) object_filter = tf.logical_and(area_filter, prob_filter) class_objects = tf.boolean_mask(class_objects, object_filter) class_prob = tf.boolean_mask(class_prob, object_filter) # We have to use the TensorFlow's bounding box convention to use # the included function for NMS. class_objects_tf = change_order(class_objects) # Apply class NMS. class_selected_idx = tf.image.non_max_suppression( class_objects_tf, class_prob, self._class_max_detections, iou_threshold=self._class_nms_threshold ) # Using NMS resulting indices, gather values from Tensors. class_objects_tf = tf.gather(class_objects_tf, class_selected_idx) class_prob = tf.gather(class_prob, class_selected_idx) # Revert to our bbox convention. class_objects = change_order(class_objects_tf) # We append values to a regular list which will later be # transformed to a proper Tensor. selected_boxes.append(class_objects) selected_probs.append(class_prob) # In the case of the class_id, since it is a loop on classes, we # already have a fixed class_id. We use `tf.tile` to create that # Tensor with the total number of indices returned by the NMS. selected_labels.append( tf.tile([class_id], [tf.shape(class_selected_idx)[0]]) ) # We use concat (axis=0) to generate a Tensor where the rows are # stacked on top of each other objects = tf.concat(selected_boxes, axis=0) proposal_label = tf.concat(selected_labels, axis=0) proposal_label_prob = tf.concat(selected_probs, axis=0) tf.summary.histogram( 'proposal_cls_scores', proposal_label_prob, ['rcnn'] ) # Get top-k detections of all classes. k = tf.minimum( self._total_max_detections, tf.shape(proposal_label_prob)[0] ) top_k = tf.nn.top_k(proposal_label_prob, k=k) top_k_proposal_label_prob = top_k.values top_k_objects = tf.gather(objects, top_k.indices) top_k_proposal_label = tf.gather(proposal_label, top_k.indices) return { 'objects': top_k_objects, 'proposal_label': top_k_proposal_label, 'proposal_label_prob': top_k_proposal_label_prob, 'selected_boxes': selected_boxes, 'selected_probs': selected_probs, 'selected_labels': selected_labels, "without_filter_dict": without_filter_dict }
def build_without_filter(self, proposals, bbox_pred, cls_prob, im_shape): selected_boxes = [] selected_probs = [] selected_labels = [] # For each class, take the proposals with the class-specific # predictions (class scores and bbox regression) and filter accordingly # (valid area, min probability score and NMS). for class_id in range(self._num_classes): # Apply the class-specific transformations to the proposals to # obtain the current class' prediction. class_prob = cls_prob[:, class_id + 1] # 0 is background class. class_bboxes = bbox_pred[:, (4 * class_id):(4 * class_id + 4)] raw_class_objects = decode( proposals, class_bboxes, variances=self._variances, ) # Clip bboxes so they don't go out of the image. class_objects = clip_boxes(raw_class_objects, im_shape) # Filter objects based on the min probability threshold and on them # having a valid area. ##### train for 0.7 prob_filter = tf.greater_equal( class_prob, 0.7 ) (x_min, y_min, x_max, y_max) = tf.unstack(class_objects, axis=1) area_filter = tf.greater( tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0), 76654.0 ) object_filter = tf.logical_and(area_filter, prob_filter) class_objects = tf.boolean_mask(class_objects, object_filter) class_prob = tf.boolean_mask(class_prob, object_filter) # We have to use the TensorFlow's bounding box convention to use # the included function for NMS. class_objects_tf = change_order(class_objects) # Apply class NMS. class_selected_idx = tf.image.non_max_suppression( class_objects_tf, class_prob, self._class_max_detections, iou_threshold=self._class_nms_threshold ) # Using NMS resulting indices, gather values from Tensors. class_objects_tf = tf.gather(class_objects_tf, class_selected_idx) class_prob = tf.gather(class_prob, class_selected_idx) # Revert to our bbox convention. class_objects = change_order(class_objects_tf) # We append values to a regular list which will later be # transformed to a proper Tensor. selected_boxes.append(class_objects) selected_probs.append(class_prob) # In the case of the class_id, since it is a loop on classes, we # already have a fixed class_id. We use `tf.tile` to create that # Tensor with the total number of indices returned by the NMS. selected_labels.append( tf.tile([class_id], [tf.shape(class_selected_idx)[0]]) ) # We use concat (axis=0) to generate a Tensor where the rows are # stacked on top of each other objects = tf.concat(selected_boxes, axis=0) proposal_label = tf.concat(selected_labels, axis=0) proposal_label_prob = tf.concat(selected_probs, axis=0) tf.summary.histogram( 'proposal_cls_scores', proposal_label_prob, ['rcnn'] ) # Get top-k detections of all classes. k = tf.minimum( self._total_max_detections, tf.shape(proposal_label_prob)[0] ) top_k = tf.nn.top_k(proposal_label_prob, k=k) top_k_proposal_label_prob = top_k.values top_k_objects = tf.gather(objects, top_k.indices) top_k_proposal_label = tf.gather(proposal_label, top_k.indices) return { 'objects': top_k_objects, 'proposal_label': top_k_proposal_label, 'proposal_label_prob': top_k_proposal_label_prob, 'selected_boxes': selected_boxes, 'selected_probs': selected_probs, 'selected_labels': selected_labels, }
def _build(self, cls_prob, loc_pred, all_anchors, im_shape): """ Args: cls_prob: A softmax probability for each anchor where the idx = 0 is the background class (which we should ignore). Shape (total_anchors, num_classes + 1) 预测类别概率 loc_pred: A Tensor with the regression output for each anchor. Its shape should be (total_anchors, 4). 预测框偏移缩放量 all_anchors: A Tensor with the anchors bounding boxes of shape (total_anchors, 4), having (x_min, y_min, x_max, y_max) for each anchor. 所有anchors的真实坐标 im_shape: A Tensor with the image shape in format (height, width). Returns: prediction_dict with the following keys: raw_proposals: The raw proposals i.e. the anchors adjusted using loc_pred. proposals: The proposals of the network after appling some filters like negative area; and NMS. It's shape is (final_num_proposals, 4), where final_num_proposals is unknown before-hand (it depends on NMS). The 4-length Tensor for each corresponds to: (x_min, y_min, x_max, y_max). proposal_label: It's shape is (final_num_proposals,) proposal_label_prob: It's shape is (final_num_proposals,) """ selected_boxes = [] selected_probs = [] selected_labels = [] selected_anchors = [] # For debugging # 分析各类别下, 大于最小概率阈值的预测概率和预测偏移缩放量, 进而以此获得预测的边界 # 框的坐标, 进行边界剪裁, 坐标合理性限定, NMS处理, 得到最终选定的各个类别下的提案 for class_id in range(self._num_classes): # Get the confidences for this class (+ 1 is to ignore background) # 获取该类别下, 所有预测框的情况 class_cls_prob = cls_prob[:, class_id + 1] # Filter by min_prob_threshold min_prob_filter = tf.greater_equal(class_cls_prob, self._min_prob_threshold) class_cls_prob = tf.boolean_mask(class_cls_prob, min_prob_filter) class_loc_pred = tf.boolean_mask(loc_pred, min_prob_filter) # 对所有anchors进行筛选 anchors = tf.boolean_mask(all_anchors, min_prob_filter) # Using the loc_pred and the anchors, we generate the proposals. raw_proposals = decode(anchors, class_loc_pred, self._variances) # Clip boxes to image. clipped_proposals = clip_boxes(raw_proposals, im_shape) # Filter proposals that have an non-valid area. (x_min, y_min, x_max, y_max) = tf.unstack(clipped_proposals, axis=1) proposal_filter = tf.greater( tf.maximum(x_max - x_min, 0.) * tf.maximum(y_max - y_min, 0.), 0.) # 筛选剪裁后的框坐标 class_proposals = tf.boolean_mask(clipped_proposals, proposal_filter) # 筛选边界框偏移 class_loc_pred = tf.boolean_mask(class_loc_pred, proposal_filter) # 筛选类别概率 class_cls_prob = tf.boolean_mask(class_cls_prob, proposal_filter) # 筛选对应的anchors proposal_anchors = tf.boolean_mask(anchors, proposal_filter) # Log results of filtering non-valid area proposals # 所有anchors数量 total_anchors = tf.shape(all_anchors)[0] # 所有坐标有效的框数量 total_proposals = tf.shape(class_proposals)[0] # ques: 所有框的数量, 这里数量和anchors应该是一样的吧? # ans: 不一样, 未进行坐标和理性判断时框的总数, 但是已经进行了阈值判断 total_raw_proposals = tf.shape(raw_proposals)[0] tf.summary.scalar('invalid_proposals', total_proposals - total_raw_proposals, ['ssd']) tf.summary.scalar( 'valid_proposals_ratio', tf.cast(total_anchors, tf.float32) / tf.cast(total_proposals, tf.float32), ['ssd']) # We have to use the TensorFlow's bounding box convention to use # the included function for NMS. # After gathering results we should normalize it back. class_proposal_tf = change_order(class_proposals) # Apply class NMS. # 使用该类别下所有预测的框坐标, 和对应的预测概率, 进行非极大值抑制, 得到索引 # 剩下来的就认为是该类别下的结果, 也就是这个类别选择了这几个预测 class_selected_idx = tf.image.non_max_suppression( class_proposal_tf, class_cls_prob, self._class_max_detections, iou_threshold=self._class_nms_threshold) # Using NMS resulting indices, gather values from Tensors. # 获得该类别选择的预测框和对应的类别预测概率 class_proposal_tf = tf.gather(class_proposal_tf, class_selected_idx) class_cls_prob = tf.gather(class_cls_prob, class_selected_idx) # We append values to a regular list which will later be # transformed to a proper Tensor. # 获得该类别选择的预测框和对应的类别预测概率 selected_boxes.append(class_proposal_tf) selected_probs.append(class_cls_prob) # In the case of the class_id, since it is a loop on classes, we # already have a fixed class_id. We use `tf.tile` to create that # Tensor with the total number of indices returned by the NMS. # 重复张量, 沿着后面指定的各个维度上的次数来进行重复 # 与下面的的张量里的anchors相对应, 表示其类别标签 selected_labels.append( tf.tile([class_id], [tf.shape(class_selected_idx)[0]])) # 确定该类别下所有坐标合理概率超过阈值的对应的anchors selected_anchors.append(proposal_anchors) # We use concat (axis=0) to generate a Tensor where the rows are # stacked on top of each other # (num_proposals, 4) proposals_tf = tf.concat(selected_boxes, axis=0) # Return to the original convention. proposals = change_order(proposals_tf) # (num_proposals, ) proposal_label = tf.concat(selected_labels, axis=0) # (num_proposals, ) proposal_label_prob = tf.concat(selected_probs, axis=0) # # (num_proposals, 4) proposal_anchors = tf.concat(selected_anchors, axis=0) # Get topK detections of all classes. k = tf.minimum(self._total_max_detections, tf.shape(proposal_label_prob)[0]) # 主题顺序是按照proposal_label_prob为参考的, 其中有各个类的结果, 顺序大致是按照 # 类别来的, 下面的都是, 所以使用同一个索引是可以 top_k = tf.nn.top_k(proposal_label_prob, k=k) # 依次获得NMS后前k个最大的预测概率值, 对应的预测框坐标组, 各类别中保留下来的提案对 # 应的该类别, 对应的参考anchors坐标 top_k_proposal_label_prob = top_k.values top_k_proposals = tf.gather(proposals, top_k.indices) top_k_proposal_label = tf.gather(proposal_label, top_k.indices) top_k_proposal_anchors = tf.gather(proposal_anchors, top_k.indices) return { 'objects': top_k_proposals, 'labels': top_k_proposal_label, 'probs': top_k_proposal_label_prob, 'raw_proposals': raw_proposals, 'anchors': top_k_proposal_anchors, }
def _build(self, proposals, bbox_pred, cls_prob, im_shape): """ Args: proposals: Tensor with the RPN proposals bounding boxes. Shape (num_proposals, 4). Where num_proposals is less than POST_NMS_TOP_N (We don't know exactly beforehand) bbox_pred: Tensor with the RCNN delta predictions for each proposal for each class. Shape (num_proposals, 4 * num_classes) cls_prob: A softmax probability for each proposal where the idx = 0 is the background class (which we should ignore). Shape (num_proposals, num_classes + 1) Returns: objects: Shape (final_num_proposals, 4) Where final_num_proposals is unknown before-hand (it depends on NMS). The 4-length Tensor for each corresponds to: (x_min, y_min, x_max, y_max). objects_label: Shape (final_num_proposals,) objects_label_prob: Shape (final_num_proposals,) """ # First we want get the most probable label for each proposal # We still have the background on idx 0 so we subtract 1 to the idxs. proposal_label = tf.argmax(cls_prob, axis=1) - 1 # Get the probability for the selected label for each proposal. proposal_label_prob = tf.reduce_max(cls_prob, axis=1) # We are going to use only the non-background proposals. non_background_filter = tf.greater_equal(proposal_label, 0) # Filter proposals with less than threshold probability. min_prob_filter = tf.greater_equal( proposal_label_prob, self._min_prob_threshold ) proposal_filter = tf.logical_and( non_background_filter, min_prob_filter ) total_proposals = tf.shape(proposals)[0] equal_shapes = tf.assert_equal( tf.shape(proposals)[0], tf.shape(bbox_pred)[0] ) with tf.control_dependencies([equal_shapes]): # Filter all tensors for getting all non-background proposals. proposals = tf.boolean_mask( proposals, proposal_filter) proposal_label = tf.boolean_mask( proposal_label, proposal_filter) proposal_label_prob = tf.boolean_mask( proposal_label_prob, proposal_filter) bbox_pred = tf.boolean_mask( bbox_pred, proposal_filter) filtered_proposals = tf.shape(proposals)[0] tf.summary.scalar( 'background_or_low_prob_proposals', total_proposals - filtered_proposals, ['rcnn'] ) # Create one hot with labels for using it to filter bbox_predictions. label_one_hot = tf.one_hot(proposal_label, depth=self._num_classes) # Flatten label_one_hot to get # (num_non_background_proposals * num_classes, 1) for filtering. label_one_hot_flatten = tf.cast( tf.reshape(label_one_hot, [-1]), tf.bool ) # Flatten bbox_predictions getting # (num_non_background_proposals * num_classes, 4). bbox_pred_flatten = tf.reshape(bbox_pred, [-1, 4]) equal_shapes = tf.assert_equal( tf.shape(bbox_pred_flatten)[0], tf.shape(label_one_hot_flatten)[0] ) with tf.control_dependencies([equal_shapes]): # Control same number of dimensions between bbox and mask. bbox_pred = tf.boolean_mask( bbox_pred_flatten, label_one_hot_flatten) # Using the bbox_pred and the proposals we generate the objects. raw_objects = decode(proposals, bbox_pred) # Clip boxes to image. clipped_objects = clip_boxes(raw_objects, im_shape) # Filter objects that have an non-valid area. (x_min, y_min, x_max, y_max) = tf.unstack(clipped_objects, axis=1) object_filter = tf.greater_equal( tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0), 0.0 ) total_raw_objects = tf.shape(raw_objects)[0] objects = tf.boolean_mask( clipped_objects, object_filter) proposal_label = tf.boolean_mask( proposal_label, object_filter) proposal_label_prob = tf.boolean_mask( proposal_label_prob, object_filter) total_objects = tf.shape(objects)[0] tf.summary.scalar( 'invalid_proposals', total_objects - total_raw_objects, ['rcnn'] ) valid_proposals_ratio = ( tf.cast(total_proposals, tf.float32) / tf.cast(total_objects, tf.float32) ) tf.summary.scalar( 'valid_proposals_ratio', valid_proposals_ratio, ['rcnn'] ) # We have to use the TensorFlow's bounding box convention to use the # included function for NMS. # After gathering results we should normalize it back. objects_tf = change_order(objects) selected_boxes = [] selected_probs = [] selected_labels = [] # For each class we want to filter those objects and apply NMS to them. for class_id in range(self._num_classes): # Filter objects Tensors with class. class_filter = tf.equal(proposal_label, class_id) class_objects_tf = tf.boolean_mask(objects_tf, class_filter) class_prob = tf.boolean_mask(proposal_label_prob, class_filter) # Apply class NMS. class_selected_idx = tf.image.non_max_suppression( class_objects_tf, class_prob, self._class_max_detections, iou_threshold=self._class_nms_threshold ) # Using NMS resulting indices, gather values from Tensors. class_objects_tf = tf.gather(class_objects_tf, class_selected_idx) class_prob = tf.gather(class_prob, class_selected_idx) # We append values to a regular list which will later be transform # to a proper Tensor. selected_boxes.append(class_objects_tf) selected_probs.append(class_prob) # In the case of the class_id, since it is a loop on classes, we # already have a fixed class_id. We use `tf.tile` to create that # Tensor with the total number of indices returned by the NMS. selected_labels.append( tf.tile([class_id], [tf.shape(class_selected_idx)[0]]) ) # We use concat (axis=0) to generate a Tensor where the rows are # stacked on top of each other objects_tf = tf.concat(selected_boxes, axis=0) # Return to the original convention. objects = change_order(objects_tf) proposal_label = tf.concat(selected_labels, axis=0) proposal_label_prob = tf.concat(selected_probs, axis=0) # Get topK detections of all classes. k = tf.minimum( self._total_max_detections, tf.shape(proposal_label_prob)[0] ) top_k = tf.nn.top_k(proposal_label_prob, k=k) top_k_proposal_label_prob = top_k.values top_k_objects = tf.gather(objects, top_k.indices) top_k_proposal_label = tf.gather(proposal_label, top_k.indices) return { 'raw_objects': raw_objects, 'objects': top_k_objects, 'proposal_label': top_k_proposal_label, 'proposal_label_prob': top_k_proposal_label_prob, 'selected_boxes': selected_boxes, 'selected_probs': selected_probs, 'selected_labels': selected_labels, }
def _build(self, proposals, bbox_pred, cls_prob, im_shape): """ Args: 这个是RPN的输出 proposals: Tensor with the RPN proposals bounding boxes. Shape (num_proposals, 4). Where num_proposals is less than POST_NMS_TOP_N (We don't know exactly beforehand) RPN边界框数据 这两个是RCNN的输出 bbox_pred: Tensor with the RCNN delta predictions for each proposal for each class. Shape (num_proposals, 4 * num_classes) RCNN针对每个(上面的RPN的)提案框在每个类别下的预测偏移量和缩放量 cls_prob: A softmax probability for each proposal where the idx = 0 is the background class (which we should ignore). Shape (num_proposals, num_classes + 1) 对于每个边界框针对各个类别的softmax概率 Returns: objects: Shape (final_num_proposals, 4) Where final_num_proposals is unknown before-hand (it depends on NMS). The 4-length Tensor for each corresponds to: (x_min, y_min, x_max, y_max). 最终保留下来的边界框的坐标集合 objects_label: Shape (final_num_proposals,) objects_label_prob: Shape (final_num_proposals,) """ selected_boxes = [] selected_probs = [] selected_labels = [] # For each class, take the proposals with the class-specific # predictions (class scores and bbox regression) and filter accordingly # (valid area, min probability score and NMS). # 对每个类别, 取其类特定预测的提案(类得分和边界框偏移缩放), 并根据合法区域, 最小概率 # 得分, NMS来进行过滤 # 对于class_id对应的类别进行如下的操作: # ... for class_id in range(self._num_classes): # Apply the class-specific transformations to the proposals to # obtain the current class' prediction. # 应用特定类别的转化到提案上, 来获取当前类别的预测 # 获取该类别下所有提案的类别预测结果, 以及边界框预测结果 class_prob = cls_prob[:, class_id + 1] # 0 is background class. class_bboxes = bbox_pred[:, (4 * class_id):(4 * class_id + 4)] # 针对该类, 从RCNN预测的偏移量(class_bboxes)和RPN输出的参考值(proposals)得 # 到的预测的左上角和右下角坐标, 获得RCNN的预测的框的真实坐标 raw_class_objects = decode( proposals, # (num_proposals, 4) class_bboxes, # (num_proposals, 4) variances=self._variances, ) # Clip bboxes so they don't go out of the image. # 对超出图像的边界框部分进行裁剪, 得到属于图像内部的边界框 # (num_proposals, 4) class_objects = clip_boxes(raw_class_objects, im_shape) # Filter objects based on the min probability threshold and on them # having a valid area. # 对于该类别预测概率大于等于阈值的数据进行筛选 prob_filter = tf.greater_equal(class_prob, self._min_prob_threshold) (x_min, y_min, x_max, y_max) = tf.unstack(class_objects, axis=1) # 要确保,x_max - x_min, y_max - y_min同号, 也就是保证计算面积为正 area_filter = tf.greater( tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0), 0.0) # 上面两条判定都要满足 object_filter = tf.logical_and(area_filter, prob_filter) # 满足上面两条要求的RCNN预测边界框坐标 class_objects = tf.boolean_mask(class_objects, object_filter) # 满足要求的RCNN预测边界框针对该类别的概率 class_prob = tf.boolean_mask(class_prob, object_filter) # We have to use the TensorFlow's bounding box convention to use # the included function for NMS. class_objects_tf = change_order(class_objects) # Apply class NMS. # NMS后得到保留的边界框的索引, 此时保留的也就是该类别下最终保留的 # 保留下来的数量是一定的, 由self._class_max_detections(Maximum number # of detections for each class.)确定 class_selected_idx = tf.image.non_max_suppression( class_objects_tf, class_prob, self._class_max_detections, iou_threshold=self._class_nms_threshold) # Using NMS resulting indices, gather values from Tensors. class_objects_tf = tf.gather(class_objects_tf, class_selected_idx) class_prob = tf.gather(class_prob, class_selected_idx) # Revert to our bbox convention. class_objects = change_order(class_objects_tf) # We append values to a regular list which will later be # transformed to a proper Tensor. # 这里选定的是该类别下, 经过"边界剪裁(不会删除边界框), 对于该类别预测概率限定+坐 # 标合理性限定+NMS(都会删除边界框)"处理后剩下的预测框的原图坐标和对应的预测概率 selected_boxes.append(class_objects) selected_probs.append(class_prob) # In the case of the class_id, since it is a loop on classes, we # already have a fixed class_id. We use `tf.tile` to create that # Tensor with the total number of indices returned by the NMS. # 这里利用tile重复张量[class_id]了tf.shape(class_selected_idx)[0]次, # 生成了与剩下来的边界框的数量相同的长度的张量, 对应着selected_probs, 表述其中 # 的边界框对应的类别 selected_labels.append( tf.tile([class_id], [tf.shape(class_selected_idx)[0]])) # We use concat (axis=0) to generate a Tensor where the rows are # stacked on top of each other # selected_boxes ([num_classes, num_pred_after_nms, 4]) objects = tf.concat(selected_boxes, axis=0) # selected_labels ([num_classes, num_pred_after_nms, 1]) proposal_label = tf.concat(selected_labels, axis=0) # selected_probs ([num_classes, num_pred_after_nms, 1]) proposal_label_prob = tf.concat(selected_probs, axis=0) tf.summary.histogram('proposal_cls_scores', proposal_label_prob, ['rcnn']) # Get top-k detections of all classes. k = tf.minimum(self._total_max_detections, tf.shape(proposal_label_prob)[0]) # 获得所有框的所有类别的预测概率中, 前k个最大的结果, 概率, 坐标, 类别标签 top_k = tf.nn.top_k(proposal_label_prob, k=k) top_k_proposal_label_prob = top_k.values top_k_objects = tf.gather(objects, top_k.indices) top_k_proposal_label = tf.gather(proposal_label, top_k.indices) return { 'objects': top_k_objects, 'proposal_label': top_k_proposal_label, 'proposal_label_prob': top_k_proposal_label_prob, 'selected_boxes': selected_boxes, 'selected_probs': selected_probs, 'selected_labels': selected_labels, }