def _run_rpn_proposal(self, all_anchors, rpn_cls_prob, config, gt_boxes=None, rpn_bbox_pred=None): """ Define one of gt_boxes or rpn_bbox_pred. If using gt_boxes, the correct rpn_bbox_pred for those gt_boxes will be used. """ feed_dict = {} rpn_cls_prob_tf = tf.placeholder( tf.float32, shape=(all_anchors.shape[0], 2)) feed_dict[rpn_cls_prob_tf] = rpn_cls_prob im_size_tf = tf.placeholder(tf.float32, shape=(2,)) feed_dict[im_size_tf] = self.im_size all_anchors_tf = tf.placeholder(tf.float32, shape=all_anchors.shape) feed_dict[all_anchors_tf] = all_anchors if rpn_bbox_pred is None and gt_boxes is not None: # Here we encode 'all_anchors' and 'gt_boxes' to get corrects # predictions that RPNProposal can decodes. rpn_bbox_pred_tf = encode(all_anchors, gt_boxes) else: rpn_bbox_pred_tf = tf.placeholder( tf.float32, shape=rpn_bbox_pred.shape ) feed_dict[rpn_bbox_pred_tf] = rpn_bbox_pred model = RPNProposal(all_anchors.shape[0], config, debug=True) results = model( rpn_cls_prob_tf, rpn_bbox_pred_tf, all_anchors_tf, im_size_tf) with self.test_session() as sess: results = sess.run(results, feed_dict=feed_dict) return results
def _run_rpn_proposal( self, all_anchors, rpn_cls_prob, config, gt_boxes=None, rpn_bbox_pred=None ): """ Define one of gt_boxes or rpn_bbox_pred. If using gt_boxes, the correct rpn_bbox_pred for those gt_boxes will be used. """ feed_dict = {} rpn_cls_prob_tf = tf.placeholder(tf.float32, shape=(all_anchors.shape[0], 2)) feed_dict[rpn_cls_prob_tf] = rpn_cls_prob im_size_tf = tf.placeholder(tf.float32, shape=(2,)) feed_dict[im_size_tf] = self.im_size all_anchors_tf = tf.placeholder(tf.float32, shape=all_anchors.shape) feed_dict[all_anchors_tf] = all_anchors if rpn_bbox_pred is None and gt_boxes is not None: # Here we encode 'all_anchors' and 'gt_boxes' to get corrects # predictions that RPNProposal can decodes. rpn_bbox_pred_tf = encode(all_anchors, gt_boxes) else: rpn_bbox_pred_tf = tf.placeholder(tf.float32, shape=rpn_bbox_pred.shape) feed_dict[rpn_bbox_pred_tf] = rpn_bbox_pred model = RPNProposal(all_anchors.shape[0], config, debug=True) results = model(rpn_cls_prob_tf, rpn_bbox_pred_tf, all_anchors_tf, im_size_tf) with self.test_session() as sess: results = sess.run(results, feed_dict=feed_dict) return results
def _build(self, proposals, gt_boxes): """ Args: proposals: A Tensor with the RPN bounding boxes proposals. The shape of the Tensor is (num_proposals, 4). gt_boxes: A Tensor with the ground truth boxes for the image. The shape of the Tensor is (num_gt, 5), having the truth label as the last value for each box. Returns: proposals_label: Either a truth value of the proposals (a value between 0 and num_classes, with 0 being background), or -1 when the proposal is to be ignored in the minibatch. The shape of the Tensor is (num_proposals, 1). bbox_targets: A bounding box regression target for each of the proposals that have and greater than zero label. For every other proposal we return zeros. The shape of the Tensor is (num_proposals, 4). """ overlaps = bbox_overlap_tf(proposals, gt_boxes[:, :4]) # overlaps now contains (num_proposals, num_gt_boxes) with the IoU of # proposal P and ground truth box G in overlaps[P, G] # We are going to label each proposal based on the IoU with # `gt_boxes`. Start by filling the labels with -1, marking them as # ignored. proposals_label_shape = tf.gather(tf.shape(proposals), [0]) proposals_label = tf.fill( dims=proposals_label_shape, value=-1. ) # For each overlap there is three possible outcomes for labelling: # if max(iou) < config.background_threshold_low then we ignore. # elif max(iou) <= config.background_threshold_high then we label # background. # elif max(iou) > config.foreground_threshold then we label with # the highest IoU in overlap. # # max_overlaps gets, for each proposal, the index in which we can # find the gt_box with which it has the highest overlap. max_overlaps = tf.reduce_max(overlaps, axis=1) iou_is_high_enough_for_bg = tf.greater_equal( max_overlaps, self._background_threshold_low ) iou_is_not_too_high_for_bg = tf.less( max_overlaps, self._background_threshold_high ) bg_condition = tf.logical_and( iou_is_high_enough_for_bg, iou_is_not_too_high_for_bg ) proposals_label = tf.where( condition=bg_condition, x=tf.zeros_like(proposals_label, dtype=tf.float32), y=proposals_label ) # Get the index of the best gt_box for each proposal. overlaps_best_gt_idxs = tf.argmax(overlaps, axis=1) # Having the index of the gt bbox with the best label we need to get # the label for each gt box and sum it one because 0 is used for # background. best_fg_labels_for_proposals = tf.add( tf.gather(gt_boxes[:, 4], overlaps_best_gt_idxs), 1. ) iou_is_fg = tf.greater_equal( max_overlaps, self._foreground_threshold ) best_proposals_idxs = tf.argmax(overlaps, axis=0) # Set the indices in best_proposals_idxs to True, and the rest to # false. # tf.sparse_to_dense is used because we know the set of indices which # we want to set to True, and we know the rest of the indices # should be set to False. That's exactly the use case of # tf.sparse_to_dense. is_best_box = tf.sparse_to_dense( sparse_indices=tf.reshape(best_proposals_idxs, [-1]), sparse_values=True, default_value=False, output_shape=tf.cast(proposals_label_shape, tf.int64), validate_indices=False ) # We update proposals_label with the value in # best_fg_labels_for_proposals only when the box is foreground. proposals_label = tf.where( condition=iou_is_fg, x=best_fg_labels_for_proposals, y=proposals_label ) # Now we need to find the proposals that are the best for each of the # gt_boxes. We overwrite the previous proposals_label with this # because setting the best proposal for each gt_box has priority. best_proposals_gt_labels = tf.sparse_to_dense( sparse_indices=tf.reshape(best_proposals_idxs, [-1]), sparse_values=gt_boxes[:, 4] + 1, default_value=0., output_shape=tf.cast(proposals_label_shape, tf.int64), validate_indices=False, name="get_right_labels_for_bestboxes" ) proposals_label = tf.where( condition=is_best_box, x=best_proposals_gt_labels, y=proposals_label, name="update_labels_for_bestbox_proposals" ) # proposals_label now has a value in [0, num_classes + 1] for # proposals we are going to use and -1 for the ones we should ignore. # But we still need to make sure we don't have a number of proposals # higher than minibatch_size * foreground_fraction. max_fg = int(self._foreground_fraction * self._minibatch_size) fg_condition = tf.logical_or( iou_is_fg, is_best_box ) fg_inds = tf.where( condition=fg_condition ) def disable_some_fgs(): # We want to delete a randomly-selected subset of fg_inds of # size `fg_inds.shape[0] - max_fg`. # We shuffle along the dimension 0 and then we get the first # num_fg_inds - max_fg indices and we disable them. shuffled_inds = tf.random_shuffle(fg_inds, seed=self._seed) disable_place = (tf.shape(fg_inds)[0] - max_fg) # This function should never run if num_fg_inds <= max_fg, so we # add an assertion to catch the wrong behaviour if it happens. integrity_assertion = tf.assert_positive( disable_place, message="disable_place in disable_some_fgs is negative." ) with tf.control_dependencies([integrity_assertion]): disable_inds = shuffled_inds[:disable_place] is_disabled = tf.sparse_to_dense( sparse_indices=disable_inds, sparse_values=True, default_value=False, output_shape=tf.cast(proposals_label_shape, tf.int64), # We are shuffling the indices, so they may not be ordered. validate_indices=False ) return tf.where( condition=is_disabled, # We set it to -label for debugging purposes. x=tf.negative(proposals_label), y=proposals_label ) # Disable some fgs if we have too many foregrounds. proposals_label = tf.cond( tf.greater(tf.shape(fg_inds)[0], max_fg), true_fn=disable_some_fgs, false_fn=lambda: proposals_label ) total_fg_in_batch = tf.shape( tf.where( condition=tf.greater(proposals_label, 0) ) )[0] # Now we want to do the same for backgrounds. # We calculate up to how many backgrounds we desire based on the # final number of foregrounds and the total desired batch size. max_bg = self._minibatch_size - total_fg_in_batch # We can't use bg_condition because some of the proposals that satisfy # the IoU conditions to be background may have been labeled as # foreground due to them being the best proposal for a certain gt_box. bg_mask = tf.equal(proposals_label, 0) bg_inds = tf.where( condition=bg_mask, ) def disable_some_bgs(): # Mutatis mutandis, all comments from disable_some_fgs apply. shuffled_inds = tf.random_shuffle(bg_inds, seed=self._seed) disable_place = (tf.shape(bg_inds)[0] - max_bg) integrity_assertion = tf.assert_non_negative( disable_place, message="disable_place in disable_some_bgs is negative." ) with tf.control_dependencies([integrity_assertion]): disable_inds = shuffled_inds[:disable_place] is_disabled = tf.sparse_to_dense( sparse_indices=disable_inds, sparse_values=True, default_value=False, output_shape=tf.cast(proposals_label_shape, tf.int64), validate_indices=False ) return tf.where( condition=is_disabled, x=tf.fill( dims=proposals_label_shape, value=-1. ), y=proposals_label ) proposals_label = tf.cond( tf.greater_equal(tf.shape(bg_inds)[0], max_bg), true_fn=disable_some_bgs, false_fn=lambda: proposals_label ) """ Next step is to calculate the proper targets for the proposals labeled based on the values of the ground-truth boxes. We have to use only the proposals labeled >= 1, each matching with the proper gt_boxes """ # Get the ids of the proposals that matter for bbox_target comparisson. is_proposal_with_target = tf.greater( proposals_label, 0 ) proposals_with_target_idx = tf.where( condition=is_proposal_with_target ) # Get the corresponding ground truth box only for the proposals with # target. gt_boxes_idxs = tf.gather( overlaps_best_gt_idxs, proposals_with_target_idx ) # Get the values of the ground truth boxes. proposals_gt_boxes = tf.gather_nd( gt_boxes[:, :4], gt_boxes_idxs ) # We create the same array but with the proposals proposals_with_target = tf.gather_nd( proposals, proposals_with_target_idx ) # We create our targets with bbox_transform bbox_targets_nonzero = encode( proposals_with_target, proposals_gt_boxes, ) # TODO: We should normalize it in order for bbox_targets to have zero # mean and unit variance according to the paper. # We unmap targets to proposal_labels (containing the length of # proposals) bbox_targets = tf.scatter_nd( indices=proposals_with_target_idx, updates=bbox_targets_nonzero, shape=tf.cast(tf.shape(proposals), tf.int64) ) proposals_label = proposals_label bbox_targets = bbox_targets return proposals_label, bbox_targets
def _build(self, proposals, gt_boxes): """ Args: proposals: A Tensor with the RPN bounding boxes proposals. The shape of the Tensor is (num_proposals, 4). gt_boxes: A Tensor with the ground truth boxes for the image. The shape of the Tensor is (num_gt, 5), having the truth label as the last value for each box. Returns: proposals_label: Either a truth value of the proposals (a value between 0 and num_classes, with 0 being background), or -1 when the proposal is to be ignored in the minibatch. The shape of the Tensor is (num_proposals, 1). bbox_targets: A bounding box regression target for each of the proposals that have and greater than zero label. For every other proposal we return zeros. The shape of the Tensor is (num_proposals, 4). """ overlaps = bbox_overlap_tf(proposals, gt_boxes[:, :4]) # overlaps now contains (num_proposals, num_gt_boxes) with the IoU of # proposal P and ground truth box G in overlaps[P, G] # We are going to label each proposal based on the IoU with # `gt_boxes`. Start by filling the labels with -1, marking them as # ignored. proposals_label_shape = tf.gather(tf.shape(proposals), [0]) proposals_label = tf.fill( dims=proposals_label_shape, value=-1. ) # For each overlap there is three possible outcomes for labelling: # if max(iou) < config.background_threshold_low then we ignore. # elif max(iou) <= config.background_threshold_high then we label # background. # elif max(iou) > config.foreground_threshold then we label with # the highest IoU in overlap. # # max_overlaps gets, for each proposal, the index in which we can # find the gt_box with which it has the highest overlap. max_overlaps = tf.reduce_max(overlaps, axis=1) iou_is_high_enough_for_bg = tf.greater_equal( max_overlaps, self._background_threshold_low ) iou_is_not_too_high_for_bg = tf.less( max_overlaps, self._background_threshold_high ) bg_condition = tf.logical_and( iou_is_high_enough_for_bg, iou_is_not_too_high_for_bg ) proposals_label = tf.where( condition=bg_condition, x=tf.zeros_like(proposals_label, dtype=tf.float32), y=proposals_label ) # Get the index of the best gt_box for each proposal. overlaps_best_gt_idxs = tf.argmax(overlaps, axis=1) # Having the index of the gt bbox with the best label we need to get # the label for each gt box and sum it one because 0 is used for # background. best_fg_labels_for_proposals = tf.add( tf.gather(gt_boxes[:, 4], overlaps_best_gt_idxs), 1. ) iou_is_fg = tf.greater_equal( max_overlaps, self._foreground_threshold ) best_proposals_idxs = tf.argmax(overlaps, axis=0) # Set the indices in best_proposals_idxs to True, and the rest to # false. # tf.sparse_to_dense is used because we know the set of indices which # we want to set to True, and we know the rest of the indices # should be set to False. That's exactly the use case of # tf.sparse_to_dense. is_best_box = tf.sparse_to_dense( sparse_indices=tf.reshape(best_proposals_idxs, [-1]), sparse_values=True, default_value=False, output_shape=tf.cast(proposals_label_shape, tf.int64), validate_indices=False ) # We update proposals_label with the value in # best_fg_labels_for_proposals only when the box is foreground. proposals_label = tf.where( condition=iou_is_fg, x=best_fg_labels_for_proposals, y=proposals_label ) # Now we need to find the proposals that are the best for each of the # gt_boxes. We overwrite the previous proposals_label with this # because setting the best proposal for each gt_box has priority. best_proposals_gt_labels = tf.sparse_to_dense( sparse_indices=tf.reshape(best_proposals_idxs, [-1]), sparse_values=gt_boxes[:, 4] + 1, default_value=0., output_shape=tf.cast(proposals_label_shape, tf.int64), validate_indices=False, name="get_right_labels_for_bestboxes" ) proposals_label = tf.where( condition=is_best_box, x=best_proposals_gt_labels, y=proposals_label, name="update_labels_for_bestbox_proposals" ) # proposals_label now has a value in [0, num_classes + 1] for # proposals we are going to use and -1 for the ones we should ignore. # But we still need to make sure we don't have a number of proposals # higher than minibatch_size * foreground_fraction. max_fg = int(self._foreground_fraction * self._minibatch_size) fg_condition = tf.logical_or( iou_is_fg, is_best_box ) fg_inds = tf.where( condition=fg_condition ) def disable_some_fgs(): # We want to delete a randomly-selected subset of fg_inds of # size `fg_inds.shape[0] - max_fg`. # We shuffle along the dimension 0 and then we get the first # num_fg_inds - max_fg indices and we disable them. shuffled_inds = tf.random_shuffle(fg_inds, seed=self._seed) disable_place = (tf.shape(fg_inds)[0] - max_fg) # This function should never run if num_fg_inds <= max_fg, so we # add an assertion to catch the wrong behaviour if it happens. integrity_assertion = tf.assert_positive( disable_place, message="disable_place in disable_some_fgs is negative." ) with tf.control_dependencies([integrity_assertion]): disable_inds = shuffled_inds[:disable_place] is_disabled = tf.sparse_to_dense( sparse_indices=disable_inds, sparse_values=True, default_value=False, output_shape=tf.cast(proposals_label_shape, tf.int64), # We are shuffling the indices, so they may not be ordered. validate_indices=False ) return tf.where( condition=is_disabled, # We set it to -label for debugging purposes. x=tf.negative(proposals_label), y=proposals_label ) # Disable some fgs if we have too many foregrounds. proposals_label = tf.cond( tf.greater(tf.shape(fg_inds)[0], max_fg), true_fn=disable_some_fgs, false_fn=lambda: proposals_label ) total_fg_in_batch = tf.shape( tf.where( condition=tf.greater(proposals_label, 0) ) )[0] # Now we want to do the same for backgrounds. # We calculate up to how many backgrounds we desire based on the # final number of foregrounds and the total desired batch size. max_bg = self._minibatch_size - total_fg_in_batch # We can't use bg_condition because some of the proposals that satisfy # the IoU conditions to be background may have been labeled as # foreground due to them being the best proposal for a certain gt_box. bg_mask = tf.equal(proposals_label, 0) bg_inds = tf.where( condition=bg_mask, ) def disable_some_bgs(): # Mutatis mutandis, all comments from disable_some_fgs apply. shuffled_inds = tf.random_shuffle(bg_inds, seed=self._seed) disable_place = (tf.shape(bg_inds)[0] - max_bg) integrity_assertion = tf.assert_non_negative( disable_place, message="disable_place in disable_some_bgs is negative." ) with tf.control_dependencies([integrity_assertion]): disable_inds = shuffled_inds[:disable_place] is_disabled = tf.sparse_to_dense( sparse_indices=disable_inds, sparse_values=True, default_value=False, output_shape=tf.cast(proposals_label_shape, tf.int64), validate_indices=False ) return tf.where( condition=is_disabled, x=tf.fill( dims=proposals_label_shape, value=-1. ), y=proposals_label ) proposals_label = tf.cond( tf.greater_equal(tf.shape(bg_inds)[0], max_bg), true_fn=disable_some_bgs, false_fn=lambda: proposals_label ) """ Next step is to calculate the proper targets for the proposals labeled based on the values of the ground-truth boxes. We have to use only the proposals labeled >= 1, each matching with the proper gt_boxes """ # Get the ids of the proposals that matter for bbox_target comparisson. is_proposal_with_target = tf.greater( proposals_label, 0 ) proposals_with_target_idx = tf.where( condition=is_proposal_with_target ) # Get the corresponding ground truth box only for the proposals with # target. gt_boxes_idxs = tf.gather( overlaps_best_gt_idxs, proposals_with_target_idx ) # Get the values of the ground truth boxes. proposals_gt_boxes = tf.gather_nd( gt_boxes[:, :4], gt_boxes_idxs ) # We create the same array but with the proposals proposals_with_target = tf.gather_nd( proposals, proposals_with_target_idx ) # We create our targets with bbox_transform. bbox_targets_nonzero = encode( proposals_with_target, proposals_gt_boxes, variances=self._variances, ) # We unmap targets to proposal_labels (containing the length of # proposals) bbox_targets = tf.scatter_nd( indices=proposals_with_target_idx, updates=bbox_targets_nonzero, shape=tf.cast(tf.shape(proposals), tf.int64) ) proposals_label = proposals_label bbox_targets = bbox_targets return proposals_label, bbox_targets
def _build(self, probs, all_anchors, gt_boxes): """ Args: all_anchors: A Tensor with anchors for all of SSD's features. The shape of the Tensor is (num_anchors, 4). gt_boxes: A Tensor with the ground truth boxes for the image. The shape of the Tensor is (num_gt, 5), having the truth label as the last value for each box. Returns: class_targets: Either a truth value of the anchor (a value between 0 and num_classes, with 0 being background), or -1 when the anchor is to be ignored in the minibatch. The shape of the Tensor is (num_anchors, 1). bbox_offsets_targets: A bounding box regression target for each of the anchors that have a greater than zero label. For every other anchors we return zeros. The shape of the Tensor is (num_anchors, 4). """ all_anchors = tf.cast(all_anchors, tf.float32) gt_boxes = tf.cast(gt_boxes, tf.float32) # We are going to label each anchor based on the IoU with # `gt_boxes`. Start by filling the labels with -1, marking them as # unknown. anchors_label_shape = tf.gather(tf.shape(all_anchors), [0]) anchors_label = tf.fill(dims=anchors_label_shape, value=-1.) overlaps = bbox_overlap_tf(all_anchors, gt_boxes[:, :4]) max_overlaps = tf.reduce_max(overlaps, axis=1) # Get the index of the best gt_box for each anchor. best_gtbox_for_anchors_idx = tf.argmax(overlaps, axis=1) # Having the index of the gt bbox with the best label we need to get # the label for each gt box and sum 1 to it because 0 is used for # background. best_fg_labels_for_anchors = tf.add( tf.gather(gt_boxes[:, 4], best_gtbox_for_anchors_idx), 1.) iou_is_fg = tf.greater_equal(max_overlaps, self._foreground_threshold) # We update anchors_label with the value in # best_fg_labels_for_anchors only when the box is foreground. # TODO: Replace with a sparse_to_dense with -1 default_value anchors_label = tf.where(condition=iou_is_fg, x=best_fg_labels_for_anchors, y=anchors_label) best_anchor_idxs = tf.argmax(overlaps, axis=0) is_best_box = tf.sparse_to_dense(sparse_indices=best_anchor_idxs, sparse_values=True, default_value=False, output_shape=tf.cast( anchors_label_shape, tf.int64), validate_indices=False) # Now we need to find the anchors that are the best for each of the # gt_boxes. We overwrite the previous anchors_label with this # because setting the best anchor for each gt_box has priority. best_anchors_gt_labels = tf.sparse_to_dense( sparse_indices=best_anchor_idxs, sparse_values=gt_boxes[:, 4] + 1, default_value=-1, output_shape=tf.cast(anchors_label_shape, tf.int64), validate_indices=False, name="get_right_labels_for_bestboxes") anchors_label = tf.where(condition=is_best_box, x=best_anchors_gt_labels, y=anchors_label, name="update_labels_for_bestbox_anchors") # Use the worst backgrounds (the bgs whose probability of being fg is # the greatest). cls_probs = probs[:, 1:] max_cls_probs = tf.reduce_max(cls_probs, axis=1) # Exclude boxes with IOU > `background_threshold_high` with any GT. iou_less_than_bg_tresh_high_filter = tf.less_equal( max_overlaps, self._background_threshold_high) bg_anchors = tf.less_equal(anchors_label, 0) bg_overlaps_filter = tf.logical_and(iou_less_than_bg_tresh_high_filter, bg_anchors) max_cls_probs = tf.where( condition=bg_overlaps_filter, x=max_cls_probs, y=tf.fill(dims=anchors_label_shape, value=-1.), ) # We calculate up to how many backgrounds we desire based on the # final number of foregrounds and the hard minning ratio. num_fg_mask = tf.greater(anchors_label, 0.0) num_fg = tf.cast(tf.count_nonzero(num_fg_mask), tf.float32) num_bg = tf.cast(num_fg * self._hard_negative_ratio, tf.int32) top_k_bg = tf.nn.top_k(max_cls_probs, k=num_bg) set_bg = tf.sparse_to_dense(sparse_indices=top_k_bg.indices, sparse_values=True, default_value=False, output_shape=anchors_label_shape, validate_indices=False) anchors_label = tf.where(condition=set_bg, x=tf.fill(dims=anchors_label_shape, value=0.), y=anchors_label) # Next step is to calculate the proper bbox targets for the labeled # anchors based on the values of the ground-truth boxes. # We have to use only the anchors labeled >= 1, each matching with # the proper gt_boxes # Get the ids of the anchors that mater for bbox_target comparison. is_anchor_with_target = tf.greater(anchors_label, 0) anchors_with_target_idx = tf.where(condition=is_anchor_with_target) # Get the corresponding ground truth box only for the anchors with # target. gt_boxes_idxs = tf.gather(best_gtbox_for_anchors_idx, anchors_with_target_idx) # Get the values of the ground truth boxes. anchors_gt_boxes = tf.gather_nd(gt_boxes[:, :4], gt_boxes_idxs) # We create the same array but with the anchors anchors_with_target = tf.gather_nd(all_anchors, anchors_with_target_idx) # We create our targets with bbox_transform bbox_targets = encode(anchors_with_target, anchors_gt_boxes, variances=self._variances) # We unmap targets to anchor_labels (containing the length of # anchors) bbox_targets = tf.scatter_nd(indices=anchors_with_target_idx, updates=bbox_targets, shape=tf.cast(tf.shape(all_anchors), tf.int64)) return anchors_label, bbox_targets
def bbox_encode(gt_boxes): return encode(proposed_boxes[:, 1:], gt_boxes)
def _build(self, proposals, gt_boxes): """ Args: proposals: A Tensor with the RPN bounding boxes proposals. The shape of the Tensor is (num_proposals, 4). RPN得出的边界框提案 gt_boxes: A Tensor with the ground truth boxes for the image. The shape of the Tensor is (num_gt, 5), having the truth label as the last value for each box. 真实的边界框提案 Returns: proposals_label: Either a truth value of the proposals (a value between 0 and num_classes, with 0 being background), or -1 when the proposal is to be ignored in the minibatch. The shape of the Tensor is (num_proposals, 1). 对于每个提案, 返回的是0~类别数目之间的值, 表示对应的类别, -1表示忽略的提案 对于这个结果, 实际上已经考虑了minibatch的内部的而正负样本之间的平衡的问题 bbox_targets: A bounding box regression target for each of the proposals that have and greater than zero label. For every other proposal we return zeros. The shape of the Tensor is (num_proposals, 4). 返回每个有着大于0标签的提案的边界框回归目标, 其他的返回0. 在前景提案的位置上更新与自身最好的真实框与前景提案之间的偏移量和缩放连量(4个 值), 其余为0 """ # 计算IoU (num_proposals, num_gt_boxes) overlaps = bbox_overlap_tf(proposals, gt_boxes[:, :4]) # overlaps now contains (num_proposals, num_gt_boxes) with the IoU of # proposal P and ground truth box G in overlaps[P, G] # We are going to label each proposal based on the IoU with # `gt_boxes`. Start by filling the labels with -1, marking them as # ignored. # tf.gather根据索引获取目标值组成的张量 # (num_proposals, 4) -> [num_proposals] proposals_label_shape = tf.gather(tf.shape(proposals), [0]) # (num_proposals, ) x -1 proposals_label = tf.fill(dims=proposals_label_shape, value=-1.) # For each overlap there is three possible outcomes for labelling: # if max(iou) < config.background_threshold_low then we ignore. # elif max(iou) <= config.background_threshold_high then we label # background. # elif max(iou) > config.foreground_threshold then we label with # the highest IoU in overlap. # # max_overlaps gets, for each proposal, the index in which we can # find the gt_box with which it has the highest overlap. # (num_proposals, ) <= (num_proposals, num_gt_boxes) # 得到对于每个提案各自与所有真实框之间的最大的IoU max_overlaps = tf.reduce_max(overlaps, axis=1) iou_is_high_enough_for_bg = tf.greater_equal( max_overlaps, self._background_threshold_low) iou_is_not_too_high_for_bg = tf.less(max_overlaps, self._background_threshold_high) # 获得背景提案集合 bg_condition = tf.logical_and(iou_is_high_enough_for_bg, iou_is_not_too_high_for_bg) # 背景提案的位置对应的标签为0, 其余的保持原样, 此时为-1 proposals_label = tf.where(condition=bg_condition, x=tf.zeros_like(proposals_label, dtype=tf.float32), y=proposals_label) # Get the index of the best gt_box for each proposal. # 得到对于每个提案而言最好的IoU的真实框的索引 # (num_proposals, ) <= (num_proposals, num_gt_boxes) overlaps_best_gt_idxs = tf.argmax(overlaps, axis=1) # Having the index of the gt bbox with the best label we need to get # the label for each gt box and sum it one because 0 is used for # background. # 对于每个提案, 最好的真实框的类别标签, 对于框的类别标签都要加上一个1, 为了给背景腾出 # 来一个0标签 # (num_proposals, ) -> (num_overlaps_best_gt_idxs, ) best_fg_labels_for_proposals = tf.add( tf.gather(gt_boxes[:, 4], overlaps_best_gt_idxs), 1.) # 获取前景 iou_is_fg = tf.greater_equal(max_overlaps, self._foreground_threshold) # 获取每个真实框对应的最为接近的提案的索引 # (num_gt_boxes, 1) <= (num_proposals, num_gt_boxes) best_proposals_idxs = tf.argmax(overlaps, axis=0) # Set the indices in best_proposals_idxs to True, and the rest to # false. # tf.sparse_to_dense is used because we know the set of indices which # we want to set to True, and we know the rest of the indices # should be set to False. That's exactly the use case of # tf.sparse_to_dense. # sparse_to_dense表示的就是在output_shape大小(num_proposal)的张量上, 设定默认 # 值为default_value, 而在sparse_indices对应的位置上, 设定为sparse_values # 这里也就是将原本的提案中的被真实框有着最好的对应的几个框的位置标定位True is_best_box = tf.sparse_to_dense( sparse_indices=tf.reshape(best_proposals_idxs, [-1]), sparse_values=True, default_value=False, output_shape=tf.cast(proposals_label_shape, tf.int64), validate_indices=False) # 将每个前景框对应的最好的真实框的类别更新到提案框的标签(num_proposals, )中 # We update proposals_label with the value in # best_fg_labels_for_proposals only when the box is foreground. proposals_label = tf.where(condition=iou_is_fg, x=best_fg_labels_for_proposals, y=proposals_label) # Now we need to find the proposals that are the best for each of the # gt_boxes. We overwrite the previous proposals_label with this # because setting the best proposal for each gt_box has priority. # 下面两个函数实现了对于每个真实框对应的最好的提案的位置上更新对应的类别标签 # 这里实现了对proposals_label_shape的best_proposals_idxs( # 对于每个真实框对应的最好的提案)位置上更新为真实类别+1 # 其余位置置零 # 挑选提案, 要使用每个真实框对应的最好的提案框 best_proposals_gt_labels = tf.sparse_to_dense( sparse_indices=tf.reshape(best_proposals_idxs, [-1]), sparse_values=gt_boxes[:, 4] + 1, default_value=0., output_shape=tf.cast(proposals_label_shape, tf.int64), validate_indices=False, name="get_right_labels_for_bestboxes") # 对每个真实框对应的最好的提案的位置上更新标签 proposals_label = tf.where(condition=is_best_box, x=best_proposals_gt_labels, y=proposals_label, name="update_labels_for_bestbox_proposals") # proposals_label now has a value in [0, num_classes + 1] for # proposals we are going to use and -1 for the ones we should ignore. # But we still need to make sure we don't have a number of proposals # higher than minibatch_size * foreground_fraction. # 在进行确定要被忽略的提案之前, 先要确定正负样本够不够, 比例合不合适 max_fg = int(self._foreground_fraction * self._minibatch_size) # 所谓前景: 每个真实框对应的最好的边界框, 以及IoU最大的边界框 # 所以这里或操作, 实现了一个合并 fg_condition = tf.logical_or(iou_is_fg, is_best_box) # 获得前景的索引 fg_inds = tf.where(condition=fg_condition) # 删除数量超出比例的前景 def disable_some_fgs(): # We want to delete a randomly-selected subset of fg_inds of # size `fg_inds.shape[0] - max_fg`. # We shuffle along the dimension 0 and then we get the first # num_fg_inds - max_fg indices and we disable them. shuffled_inds = tf.random_shuffle(fg_inds, seed=self._seed) disable_place = (tf.shape(fg_inds)[0] - max_fg) # This function should never run if num_fg_inds <= max_fg, so we # add an assertion to catch the wrong behaviour if it happens. integrity_assertion = tf.assert_positive( disable_place, message="disable_place in disable_some_fgs is negative.") with tf.control_dependencies([integrity_assertion]): disable_inds = shuffled_inds[:disable_place] is_disabled = tf.sparse_to_dense( sparse_indices=disable_inds, sparse_values=True, default_value=False, output_shape=tf.cast(proposals_label_shape, tf.int64), # We are shuffling the indices, so they may not be ordered. validate_indices=False) # 要是被忽略的话, 那就直接标签进行取反就可以 return tf.where( condition=is_disabled, # We set it to -label for debugging purposes. x=tf.negative(proposals_label), y=proposals_label) # Disable some fgs if we have too many foregrounds. proposals_label = tf.cond(tf.greater(tf.shape(fg_inds)[0], max_fg), true_fn=disable_some_fgs, false_fn=lambda: proposals_label) # 确定所有的前景的数量 total_fg_in_batch = tf.shape( tf.where(condition=tf.greater(proposals_label, 0)))[0] # Now we want to do the same for backgrounds. # We calculate up to how many backgrounds we desire based on the # final number of foregrounds and the total desired batch size. max_bg = self._minibatch_size - total_fg_in_batch # We can't use bg_condition because some of the proposals that satisfy # the IoU conditions to be background may have been labeled as # foreground due to them being the best proposal for a certain gt_box. bg_mask = tf.equal(proposals_label, 0) bg_inds = tf.where(condition=bg_mask, ) def disable_some_bgs(): # Mutatis mutandis, all comments from disable_some_fgs apply. shuffled_inds = tf.random_shuffle(bg_inds, seed=self._seed) disable_place = (tf.shape(bg_inds)[0] - max_bg) integrity_assertion = tf.assert_non_negative( disable_place, message="disable_place in disable_some_bgs is negative.") with tf.control_dependencies([integrity_assertion]): disable_inds = shuffled_inds[:disable_place] is_disabled = tf.sparse_to_dense(sparse_indices=disable_inds, sparse_values=True, default_value=False, output_shape=tf.cast( proposals_label_shape, tf.int64), validate_indices=False) return tf.where(condition=is_disabled, x=tf.fill(dims=proposals_label_shape, value=-1.), y=proposals_label) proposals_label = tf.cond(tf.greater_equal( tf.shape(bg_inds)[0], max_bg), true_fn=disable_some_bgs, false_fn=lambda: proposals_label) # Next step is to calculate the proper targets for the proposals labeled # based on the values of the ground-truth boxes. # We have to use only the proposals labeled >= 1, each matching with # the proper gt_boxes # 接下来基于真实边界框, 对于标定的预测边界框计算更为合适的target # 只需要计算标定值大于等于1(非背景, 未被忽略的提案), 每一个都匹配一个更为合适的真实框 # Get the ids of the proposals that matter for bbox_target comparisson. # 获得前景提案的逻辑索引 is_proposal_with_target = tf.greater(proposals_label, 0) # 获得前景提案的坐标索引 proposals_with_target_idx = tf.where(condition=is_proposal_with_target) # Get the corresponding ground truth box only for the proposals with # target. # 根据前面得到前景提案的索引, 从对于每个提案而言最好的真实框索引中索引数据 # overlaps_best_gt_idxs (num_proposals, ) gt_boxes_idxs = tf.gather(overlaps_best_gt_idxs, proposals_with_target_idx) # Get the values of the ground truth boxes. # 根据索引获得对于每个前景提案而言最好的真实框的数据 # gather_nd支持对多维的索引 proposals_gt_boxes = tf.gather_nd(gt_boxes[:, :4], gt_boxes_idxs) # 这里相当于就是索引前景提案 # We create the same array but with the proposals # proposal (num_proposals, 4), 这样的索引才可以真正保留原坐标的格式 proposals_with_target = tf.gather_nd(proposals, proposals_with_target_idx) # We create our targets with bbox_transform. # 计算proposals_gt_boxes与proposals_with_target的相对的偏移量和缩放量 # 也就是计算对于每个前景提案而言最好的真实框与前景提案之间的偏移量和缩放连量 bbox_targets_nonzero = encode( proposals_with_target, proposals_gt_boxes, variances=self._variances, ) # We unmap targets to proposal_labels (containing the length of # proposals) # 使用indices在zeros(update)的矩阵上对应的位置更新数据update # 这里的结果就是将前景提案的对应位置上, 更新与自身最好的真实框与前景提案之间的偏移量和 # 缩放连量 bbox_targets = tf.scatter_nd(indices=proposals_with_target_idx, updates=bbox_targets_nonzero, shape=tf.cast(tf.shape(proposals), tf.int64)) proposals_label = proposals_label bbox_targets = bbox_targets return proposals_label, bbox_targets
def _build(self, probs, all_anchors, gt_boxes): """ 在向类的实例传值的时候调用 Args: probs: 这里包含了背景的类别, 所以是 num_classes+1 all_anchors: A Tensor with anchors for all of SSD's features. The shape of the Tensor is (num_anchors, 4). 所有的anchors的原图上的坐标结果 gt_boxes: A Tensor with the ground truth boxes for the image. The shape of the Tensor is (num_gt, 5), having the truth label as the last value for each box. Returns: class_targets: Either a truth value of the anchor (a value between 0 and num_classes, with 0 being background), or -1 when the anchor is to be ignored in the minibatch. The shape of the Tensor is (num_anchors, 1). 返回各个anchor对应的类别标签 bbox_offsets_targets: A bounding box regression target for each of the anchors that have a greater than zero label. For every other anchors we return zeros. The shape of the Tensor is (num_anchors, 4). 返回各个前景anchor对应的坐标偏移量, 其余的返回0 在all_anchors中前景anchors的位置上更新对应的真实框相对于自身坐标的偏移 量和缩放量 """ all_anchors = tf.cast(all_anchors, tf.float32) gt_boxes = tf.cast(gt_boxes, tf.float32) # We are going to label each anchor based on the IoU with # `gt_boxes`. Start by filling the labels with -1, marking them as # unknown. # (num_anchors, 1) anchors_label_shape = tf.gather(tf.shape(all_anchors), [0]) # [-1] ############################################################### # -1 * (num_anchors, 1) anchors_label = tf.fill(dims=anchors_label_shape, value=-1.) # (num_anchors, num_gt) overlaps = bbox_overlap_tf(all_anchors, gt_boxes[:, :4]) # (num_anchors, ) # 对于每个eanchor和所有真实框的IoU的 最大IoU值 max_overlaps = tf.reduce_max(overlaps, axis=1) ####################################################################### # 这里开始从anchors的角度来思考, 考虑和它最好的真实框所对应的IoU, 超过阈值, # anchors就作为正样本 ####################################################################### # Get the index of the best gt_box for each anchor. # 对于每个anchor最为接近的真实框 # (num_anchors, ), 每个元素表示真实框的 对应序号 best_gtbox_for_anchors_idx = tf.argmax(overlaps, axis=1) # Having the index of the gt bbox with the best label we need to get # the label for each gt box and sum 1 to it because 0 is used for # background. # 在对于每个anchor最为接近的真实框的类别标签(0~20)上加1, 作为这些anchors的标签 # (num_anchors, 4) best_fg_labels_for_anchors = tf.add( tf.gather(gt_boxes[:, 4], best_gtbox_for_anchors_idx), 1.) # (num_anchors, ) 依据每个anchors对应的最大的IoU值, 确定前景anchors为true iou_is_fg = tf.greater_equal(max_overlaps, self._foreground_threshold) # [-1] =====> [-1, 1~20(前景anchor)] ################################## # We update anchors_label with the value in # best_fg_labels_for_anchors only when the box is foreground. # TODO: Replace with a sparse_to_dense with -1 default_value # 从前景anchor中将确定的最好的真实框的标签设定为anchors的标签, 其余保持-1不变 anchors_label = tf.where(condition=iou_is_fg, x=best_fg_labels_for_anchors, y=anchors_label) ####################################################################### # 这里开始从真实框的角度来思考, 防止有真实框没有对应的anchors, 所以要考虑和真实框对 # 应的最好的anchors作为正样本 ####################################################################### # (num_gt, ) 对于每个真实框而言, 最好的anchor的位置 best_anchor_idxs = tf.argmax(overlaps, axis=0) # 使用得到的anchors的位置, 生成一个稀疏张量, 大小为(num_anchors, ), # 有真实框对应的anchors位置上为True, 这是最起码的正样本 # 这里为后面的tf.where实际上创造了一个条件张量 is_best_box = tf.sparse_to_dense(sparse_indices=best_anchor_idxs, sparse_values=True, default_value=False, output_shape=tf.cast( anchors_label_shape, tf.int64), validate_indices=False) # Now we need to find the anchors that are the best for each of the # gt_boxes. We overwrite the previous anchors_label with this # because setting the best anchor for each gt_box has priority. # 这里与上面基本类似, 只不过这里是在对应的位置上标记类别标签 best_anchors_gt_labels = tf.sparse_to_dense( sparse_indices=best_anchor_idxs, sparse_values=gt_boxes[:, 4] + 1, default_value=-1, output_shape=tf.cast(anchors_label_shape, tf.int64), validate_indices=False, name="get_right_labels_for_bestboxes") # [-1, 1~20(前景anchor)] =====> [-1, 1~20(+对于每个真实框最接近的anchor)] # 修改anchors_label中, 每个真实框对应的最好的anchor的标签为对应的类别 # 注意, 到这里的时候, 可能会觉得存在一个anchors会对应多个类别, 但是没关系, 这里是一 # 个更新操作, 这里的优先级更高, 可以覆盖之前的判定 anchors_label = tf.where(condition=is_best_box, x=best_anchors_gt_labels, y=anchors_label, name="update_labels_for_bestbox_anchors") # Use the worst backgrounds (the bgs whose probability of being fg is # the greatest). # (num_anchors, (num_classes+1)[1:]), 选择各个anchors的前景类别的对应概率 cls_probs = probs[:, 1:] # 得到所有anchors的针对各个前景类别的最大概率 max_cls_probs = tf.reduce_max(cls_probs, axis=1) # Exclude boxes with IOU > `background_threshold_high` with any GT. # 最终被认定为背景的anchors, 是和所有真实框的最大IoU值小于背景阈值(0.2), 而且又是 # 标签被标定为小于等于0的anchors # 标签小于等于0, 实际上就是标签小于0, 因为标签为0尚未确定 iou_less_than_bg_tresh_high_filter = tf.less_equal( max_overlaps, self._background_threshold_high) # 这里确定了没有被通过IoU来判定为前景类别的anchors, 从中选择阈值小于背景上限阈值 # 的, 作为后续的操作对象 bg_anchors = tf.less_equal(anchors_label, 0) bg_overlaps_filter = tf.logical_and(iou_less_than_bg_tresh_high_filter, bg_anchors) # 在非前景anchors中选择和真实框的IoU小于阈值的, 在其位置上, 保留其针对各个前景类 # 别的最大概率, 留作后面选择背景anchors用, 其余的标记为 -1 # ques: 这里满足上面的条件的应该是对应的负样本/背景了呀, 怎么还保留可能的概率呢? # ans: 这里用作背景的anchors实际上是选择有着较大分类概率, 但是不接近真实框而且还 # 标签小于-1的anchors max_cls_probs = tf.where( condition=bg_overlaps_filter, x=max_cls_probs, y=tf.fill(dims=anchors_label_shape, value=-1.), ) # We calculate up to how many backgrounds we desire based on the # final number of foregrounds and the hard minning ratio. # 两句指令得到前景anchors数量 num_fg_mask = tf.greater(anchors_label, 0.0) num_fg = tf.cast(tf.count_nonzero(num_fg_mask), tf.float32) # 得到背景数量=3*num_fg num_bg = tf.cast(num_fg * self._hard_negative_ratio, tf.int32) # 从max_clas_prob里选择前num_bg(各个类别概率最大值)的anchors作为背景anchors # 索引 top_k_bg = tf.nn.top_k(max_cls_probs, k=num_bg) # 将对应的anchors位置标定位true, 这里当做下面的一个条件 set_bg = tf.sparse_to_dense(sparse_indices=top_k_bg.indices, sparse_values=True, default_value=False, output_shape=anchors_label_shape, validate_indices=False) # [-1, 1~20(+对于每个真实框最接近的anchor)] =====> [-1, 0, 1~20] ######### # 设定背景标签0 anchors_label = tf.where(condition=set_bg, x=tf.fill(dims=anchors_label_shape, value=0.), y=anchors_label) # Next step is to calculate the proper bbox targets for the labeled # anchors based on the values of the ground-truth boxes. # We have to use only the anchors labeled >= 1, each matching with # the proper gt_boxes # Get the ids of the anchors that mater for bbox_target comparison. # 只针对前景anchors is_anchor_with_target = tf.greater(anchors_label, 0) anchors_with_target_idx = tf.where(condition=is_anchor_with_target) # Get the corresponding ground truth box only for the anchors with # target. # 从每个anchors对应的最好的真实框索引中, 选择所有前景anchors对应的真实框索引, 进而 # 确定对应的真实框坐标 gt_boxes_idxs = tf.gather(best_gtbox_for_anchors_idx, anchors_with_target_idx) # Get the values of the ground truth boxes. anchors_gt_boxes = tf.gather_nd(gt_boxes[:, :4], gt_boxes_idxs) # We create the same array but with the anchors # 确定所有前景anchors的对应的anchor在原图的坐标 anchors_with_target = tf.gather_nd(all_anchors, anchors_with_target_idx) # We create our targets with bbox_transform # 获取所有前景anchors对应的真实框相对于自身坐标的偏移量和缩放量 bbox_targets = encode(anchors_with_target, anchors_gt_boxes, variances=self._variances) # We unmap targets to anchor_labels (containing the length of # anchors) # 在all_anchors中前景anchors的位置上更新对应的bbox_targets bbox_targets = tf.scatter_nd(indices=anchors_with_target_idx, updates=bbox_targets, shape=tf.cast(tf.shape(all_anchors), tf.int64)) return anchors_label, bbox_targets
def bbox_encode(gt_boxes): return encode( proposed_boxes, gt_boxes )