def iou_loss(bbox_pred, bbox_targets, gtbox, label, num_classes): """ :param bbox_pred: [-1, (cfgs.CLS_NUM +1) * 4] :param bbox_targets: [-1, (cfgs.CLS_NUM +1) * 4] :param gtbox: [-1, 4] :param label: [-1] :param num_classes: :return: """ gtbox = tf.tile(gtbox, [1, num_classes]) bbox_pred = tf.reshape(bbox_pred, [-1, 4]) bbox_targets = tf.reshape(bbox_targets, [-1, 4]) gtbox = tf.reshape(gtbox, [-1, 4]) pred_box = encode_and_decode.decode_boxes(bbox_pred, gtbox, scale_factors=cfgs.ROI_SCALE_FACTORS) gt_box = encode_and_decode.decode_boxes(bbox_targets, gtbox, scale_factors=cfgs.ROI_SCALE_FACTORS) inside_mask = tf.one_hot(tf.reshape(label, [-1, 1]), depth=num_classes, axis=1) inside_mask = tf.reshape(inside_mask, [-1, ]) iou = iou_calculate(pred_box, gt_box) iou_loss = tf.reduce_mean(-tf.log(iou*inside_mask+1e-5)) pred = tf.cast(tf.greater(iou, 0.5), tf.float32) pred = tf.reshape(pred, [-1, num_classes]) pred_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)) loss = iou_loss * 0.1 + pred_loss * 0.0 return loss
def rpn_proposals(self): with tf.variable_scope('rpn_proposals'): rpn_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=self.rpn_encode_boxes, reference_boxes=self.anchors, scale_factors=self.scale_factors) rpn_softmax_scores = slim.softmax(self.rpn_scores) rpn_object_score = rpn_softmax_scores[:, 1] # second column represent object if self.top_k_nms: rpn_object_score, top_k_indices = tf.nn.top_k(rpn_object_score, k=self.top_k_nms) rpn_decode_boxes = tf.gather(rpn_decode_boxes, top_k_indices) # NMS valid_indices = tf_wrapper.nms_rotate_tf( boxes_list=rpn_decode_boxes, scores=rpn_object_score, iou_threshold=self.rpn_nms_iou_threshold, max_output_size=self.max_proposals_num, use_gpu=cfgs.NMS_USE_GPU) valid_boxes = tf.gather(rpn_decode_boxes, valid_indices) valid_scores = tf.gather(rpn_object_score, valid_indices) # print_tensors(valid_scores, 'rpn_score') rpn_proposals_boxes, rpn_proposals_scores = tf.cond( tf.less(tf.shape(valid_boxes)[0], self.max_proposals_num), lambda: boxes_utils.padd_boxes_with_zeros( valid_boxes, valid_scores, self.max_proposals_num), lambda: (valid_boxes, valid_scores)) return rpn_proposals_boxes, rpn_proposals_scores
def rpn_proposals(self): with tf.variable_scope('rpn_proposals'): rpn_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=self.rpn_encode_boxes, reference_boxes=self.anchors, scale_factors=self.scale_factors) if not self.is_training: # when test, clip proposals to img boundaries img_shape = tf.shape(self.img_batch) rpn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(rpn_decode_boxes, img_shape) rpn_softmax_scores = slim.softmax(self.rpn_scores) rpn_object_score = rpn_softmax_scores[:, 1] # second column represent object if self.top_k_nms: rpn_object_score, top_k_indices = tf.nn.top_k(rpn_object_score, k=self.top_k_nms) rpn_decode_boxes = tf.gather(rpn_decode_boxes, top_k_indices) valid_indices = nms.non_maximal_suppression(boxes=rpn_decode_boxes, scores=rpn_object_score, max_output_size=self.max_proposals_num, iou_threshold=self.rpn_nms_iou_threshold) valid_boxes = tf.gather(rpn_decode_boxes, valid_indices) valid_scores = tf.gather(rpn_object_score, valid_indices) rpn_proposals_boxes, rpn_proposals_scores = tf.cond( tf.less(tf.shape(valid_boxes)[0], self.max_proposals_num), lambda: boxes_utils.padd_boxes_with_zeros(valid_boxes, valid_scores, self.max_proposals_num), lambda: (valid_boxes, valid_scores)) return rpn_proposals_boxes, rpn_proposals_scores
def fast_rcnn_predict(self): with tf.variable_scope('fast_rcnn_predict'): fast_rcnn_softmax_scores = slim.softmax( self.fast_rcnn_scores) # [-1, num_classes+1] fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes, [-1, 4]) reference_boxes = tf.tile( self.fast_rcnn_all_level_proposals, [1, self.num_classes]) # [N, 4*num_classes] reference_boxes = tf.reshape(reference_boxes, [-1, 4]) # [N*num_classes, 4] fast_rcnn_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=fast_rcnn_encode_boxes, reference_boxes=reference_boxes, scale_factors=self.scale_factors) fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( fast_rcnn_decode_boxes, img_shape=self.img_shape) # mutilclass NMS fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes, [-1, self.num_classes * 4]) fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \ self.fast_rcnn_proposals(fast_rcnn_decode_boxes, scores=fast_rcnn_softmax_scores) return fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category
def fast_rcnn_predict(self): with tf.variable_scope('fast_rcnn_predict'): fast_rcnn_softmax_scores = slim.softmax( self.fast_rcnn_scores) # [-1, num_classes+1] fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes, [-1, 5]) # reference_boxes = tf.tile(self.fast_rcnn_all_level_horizontal_proposals, [1, self.num_classes]) reference_boxes = tf.tile( self.fast_rcnn_all_level_rotate_proposals, [1, self.num_classes]) # [N, 5*num_classes] reference_boxes = tf.reshape(reference_boxes, [-1, 5]) # [N*num_classes, 5] fast_rcnn_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=fast_rcnn_encode_boxes, reference_boxes=reference_boxes, scale_factors=self.scale_factors) # mutilclass NMS fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes, [-1, self.num_classes * 5]) fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \ self.fast_rcnn_proposals(fast_rcnn_decode_boxes, scores=fast_rcnn_softmax_scores) return fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category
def postprocess_cascadercnn(self, rois, bbox_pred, cls_score, stage): ''' :param rois:[-1, 4] :param bbox_ppred: bbox_ppred: [-1, 4] :param scores: [-1, 1] :return: ''' # rois = tf.stop_gradient(rois) # bbox_pred = tf.stop_gradient(bbox_pred) bbox_pred_ins = tf.reshape(bbox_pred, [-1, cfgs.CLASS_NUM + 1, 4]) # only keep a box which score is the bigest keep_abox = tf.argmax(cls_score, axis=1) keep_inds = tf.reshape( tf.transpose( tf.stack([tf.cumsum(tf.ones_like(keep_abox)) - 1, keep_abox])), [-1, 2]) bbox_pred_fliter = tf.reshape(tf.gather_nd(bbox_pred_ins, keep_inds), [-1, 4]) # decode boxes decoded_boxes = encode_and_decode.decode_boxes( encoded_boxes=bbox_pred_fliter, reference_boxes=rois, scale_factors=cfgs.ROI_SCALE_FACTORS[stage - 1]) return decoded_boxes
def rpn_proposals(self): with tf.variable_scope('rpn_proposals'): rpn_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=self.rpn_encode_boxes, reference_boxes=self.anchors, scale_factors=self.scale_factors) rpn_softmax_scores = slim.softmax(self.rpn_scores) rpn_object_score = rpn_softmax_scores[:, 1] # second column represent object if self.top_k_nms: rpn_object_score, top_k_indices = tf.nn.top_k(rpn_object_score, k=self.top_k_nms) rpn_decode_boxes = tf.gather(rpn_decode_boxes, top_k_indices) # NMS valid_indices = tf_wrapper.nms_rotate_tf(boxes_list=rpn_decode_boxes, scores=rpn_object_score, iou_threshold=self.rpn_nms_iou_threshold, max_output_size=self.max_proposals_num, use_gpu=cfgs.NMS_USE_GPU) valid_boxes = tf.gather(rpn_decode_boxes, valid_indices) valid_scores = tf.gather(rpn_object_score, valid_indices) # print_tensors(valid_scores, 'rpn_score') rpn_proposals_boxes, rpn_proposals_scores = tf.cond( tf.less(tf.shape(valid_boxes)[0], self.max_proposals_num), lambda: boxes_utils.padd_boxes_with_zeros(valid_boxes, valid_scores, self.max_proposals_num), lambda: (valid_boxes, valid_scores)) return rpn_proposals_boxes, rpn_proposals_scores
def rpn_proposals(self): ''' :param:self.anchors: shape:[-1, 4]->[ymin, xmin, ymax, xmax] :param:self.rpn_scores: shape:[-1, 2]->[backgroud, foreground] :param:self.rpn_encode_boxes: shape:[-1, 4]->[ycenter, xcenter, h, w] :return: valid_boxes [2000, 4] :return: valid_scores [2000,] ''' with tf.variable_scope('rpn_proposals'): rpn_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=self.rpn_encode_boxes, reference_boxes=self.anchors, scale_factors=self.scale_factors) if not self.is_training: image_shape = tf.shape(self.img_batch) rpn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( rpn_decode_boxes, image_shape) rpn_softmax_scores = slim.softmax(self.rpn_scores) rpn_object_score = rpn_softmax_scores[:, 1] if self.top_k_nms: rpn_object_score, top_k_indices = tf.nn.top_k(rpn_object_score, k=self.top_k_nms) rpn_decode_boxes = tf.gather(rpn_decode_boxes, top_k_indices) nms_indices = boxes_utils.non_maximal_suppression( rpn_decode_boxes, rpn_object_score, self.rpn_nms_iou_threshold, self.max_proposal_num) valid_scores = tf.gather(rpn_object_score, nms_indices) valid_boxes = tf.gather(rpn_decode_boxes, nms_indices) return valid_boxes, valid_scores
def rpn_losses(self): with tf.variable_scope('rpn_losses'): minibatch_indices, minibatch_anchor_matched_gtboxes, object_mask, minibatch_labels_one_hot = \ self.make_minibatch(self.anchors) minibatch_anchors = tf.gather(self.anchors, minibatch_indices) minibatch_encode_boxes = tf.gather(self.rpn_encode_boxes, minibatch_indices) minibatch_boxes_scores = tf.gather(self.rpn_scores, minibatch_indices) # encode gtboxes minibatch_encode_gtboxes = encode_and_decode.encode_boxes( unencode_boxes=minibatch_anchor_matched_gtboxes, reference_boxes=minibatch_anchors, scale_factors=self.scale_factors) positive_anchors_in_img = draw_box_with_color( self.img_batch, minibatch_anchors * tf.expand_dims(object_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 1.0)))[0]) negative_mask = tf.cast( tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32) negative_anchors_in_img = draw_box_with_color( self.img_batch, minibatch_anchors * tf.expand_dims(negative_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 0.0)))[0]) minibatch_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=minibatch_encode_boxes, reference_boxes=minibatch_anchors, scale_factors=self.scale_factors) tf.summary.image('/positive_anchors', positive_anchors_in_img) tf.summary.image('/negative_anchors', negative_anchors_in_img) top_k_scores, top_k_indices = tf.nn.top_k( minibatch_boxes_scores[:, 1], k=5) top_detections_in_img = draw_box_with_color( self.img_batch, tf.gather(minibatch_decode_boxes, top_k_indices), text=tf.shape(top_k_scores)[0]) tf.summary.image('/top_5', top_detections_in_img) # losses with tf.variable_scope('rpn_location_loss'): location_loss = losses.l1_smooth_losses( predict_boxes=minibatch_encode_boxes, gtboxes=minibatch_encode_gtboxes, object_weights=object_mask) slim.losses.add_loss( location_loss) # add smooth l1 loss to losses collection with tf.variable_scope('rpn_classification_loss'): classification_loss = slim.losses.softmax_cross_entropy( logits=minibatch_boxes_scores, onehot_labels=minibatch_labels_one_hot) return location_loss, classification_loss
def fast_rcnn_prediction(self): ''' :param: self.fast_rcnn_cls_scores, [2000, num_cls+1], num_cls+background :param: self.fast_rcnn_encode_boxes, [2000, num_cls*4] :return: fast_rcnn_decode_boxes, [-1, 4] :return: fast_rcnn_category, [-1, ] :return: fast_rcnn_scores, [-1, ] :return: num_object, [-1, ] ''' with tf.variable_scope('fast_rcnn_predict'): fast_rcnn_softmax_score = slim.softmax(self.fast_rcnn_cls_scores) fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes, [-1, 4]) fast_rcnn_reference_boxes = tf.tile(self.rois_boxes, [1, self.num_cls]) fast_rcnn_reference_boxes = tf.reshape(fast_rcnn_reference_boxes, [-1, 4]) # ues encode boxes to decode the reference boxes fast_rcnn_decode_boxes = decode_boxes( encode_boxes=fast_rcnn_encode_boxes, reference_boxes=fast_rcnn_reference_boxes, scale_factors=self.scale_factors) # clip decode boxes to image shape fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( boxes=fast_rcnn_decode_boxes, img_shape=self.img_shape) # mutil-class nms fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes, [-1, 4 * self.num_cls]) fast_rcnn_decode_boxes, fast_rcnn_category, fast_rcnn_scores, num_object = \ self.mutil_class_nms(boxes=fast_rcnn_decode_boxes, scores=fast_rcnn_softmax_score) return fast_rcnn_decode_boxes, fast_rcnn_category, fast_rcnn_scores, num_object
def rpn_losses(self): with tf.variable_scope('rpn_losses'): minibatch_indices, minibatch_anchor_matched_gtboxes, \ object_mask, minibatch_labels_one_hot = self.make_minibatch(self.anchors) minibatch_anchors = tf.gather(self.anchors, minibatch_indices) minibatch_encode_boxes = tf.gather(self.rpn_encode_boxes, minibatch_indices) minibatch_boxes_scores = tf.gather(self.rpn_scores, minibatch_indices) # encode gtboxes minibatch_encode_gtboxes = encode_and_decode.encode_boxes(unencode_boxes=minibatch_anchor_matched_gtboxes, reference_boxes=minibatch_anchors, scale_factors=self.scale_factors) positive_anchors_in_img = draw_box_with_color(self.img_batch, minibatch_anchors * tf.expand_dims(object_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 1.0)))[0]) negative_mask = tf.cast(tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32) negative_anchors_in_img = draw_box_with_color(self.img_batch, minibatch_anchors * tf.expand_dims(negative_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 0.0)))[0]) minibatch_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=minibatch_encode_boxes, reference_boxes=minibatch_anchors, scale_factors=self.scale_factors) tf.summary.image('/positive_anchors', positive_anchors_in_img) tf.summary.image('/negative_anchors', negative_anchors_in_img) minibatch_boxes_softmax_scores = tf.gather(slim.softmax(self.rpn_scores), minibatch_indices) top_k_scores, top_k_indices = tf.nn.top_k(minibatch_boxes_softmax_scores[:, 1], k=20) top_k_boxes = tf.gather(minibatch_decode_boxes, top_k_indices) top_detections_in_img = draw_boxes_with_scores(self.img_batch, boxes=top_k_boxes, scores=top_k_scores) tf.summary.image('/top_20', top_detections_in_img) temp_indices = tf.reshape(tf.where(tf.greater(top_k_scores, cfgs.FINAL_SCORE_THRESHOLD)), [-1]) rpn_predict_boxes = tf.gather(top_k_boxes, temp_indices) rpn_predict_scores = tf.gather(top_k_scores, temp_indices) # losses with tf.variable_scope('rpn_location_loss'): location_loss = losses.l1_smooth_losses(predict_boxes=minibatch_encode_boxes, gtboxes=minibatch_encode_gtboxes, object_weights=object_mask) slim.losses.add_loss(location_loss) # add smooth l1 loss to losses collection with tf.variable_scope('rpn_classification_loss'): classification_loss = slim.losses.softmax_cross_entropy(logits=minibatch_boxes_scores, onehot_labels=minibatch_labels_one_hot) return location_loss, classification_loss, rpn_predict_boxes, rpn_predict_scores
def postprocess_rpn_proposals(rpn_bbox_pred, rpn_cls_prob, img_shape, anchors, is_training): ''' :param rpn_bbox_pred: [-1, 4] :param rpn_cls_prob: [-1, 2] :param img_shape: :param anchors:[-1, 4] :param is_training: :return: ''' if is_training: pre_nms_topN = cfgs.RPN_TOP_K_NMS_TRAIN post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TARIN nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD else: pre_nms_topN = cfgs.RPN_TOP_K_NMS_TEST post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TEST nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD cls_prob = rpn_cls_prob[:, 1] # 1. decode boxes decode_boxes = encode_and_decode.decode_boxes( encode_boxes=rpn_bbox_pred, reference_boxes=anchors, scale_factors=cfgs.ANCHOR_SCALE_FACTORS) # decode_boxes = encode_and_decode.decode_boxes(boxes=anchors, # deltas=rpn_bbox_pred, # scale_factor=None) # 2. clip to img boundaries decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( decode_boxes=decode_boxes, img_shape=img_shape) # 3. get top N to NMS if pre_nms_topN > 0: pre_nms_topN = tf.minimum(pre_nms_topN, tf.shape(decode_boxes)[0], name='avoid_unenough_boxes') cls_prob, top_k_indices = tf.nn.top_k(cls_prob, k=pre_nms_topN) decode_boxes = tf.gather(decode_boxes, top_k_indices) # 4. NMS keep = tf.image.non_max_suppression(boxes=decode_boxes, scores=cls_prob, max_output_size=post_nms_topN, iou_threshold=nms_thresh) final_boxes = tf.gather(decode_boxes, keep) final_probs = tf.gather(cls_prob, keep) return final_boxes, final_probs
def fast_rcnn_predict(self): with tf.variable_scope('fast_rcnn_predict'): fast_rcnn_softmax_scores = slim.softmax( self.fast_rcnn_scores) # [-1, num_classes+1] fast_rcnn_softmax_scores_rotate = slim.softmax( self.fast_rcnn_scores_rotate) # [-1, num_classes+1] fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes, [-1, 4]) fast_rcnn_encode_boxes_rotate = tf.reshape( self.fast_rcnn_encode_boxes_rotate, [-1, 5]) # Class agnostic regression without tile # reference_boxes = tf.tile(self.fast_rcnn_all_level_proposals, [1, self.num_classes]) # [N, 4*num_classes] reference_boxes = self.fast_rcnn_all_level_proposals reference_boxes = tf.reshape(reference_boxes, [-1, 4]) # [N*num_classes, 4] fast_rcnn_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=fast_rcnn_encode_boxes, reference_boxes=reference_boxes, scale_factors=self.scale_factors) fast_rcnn_decode_boxes_rotate = \ encode_and_decode.decode_boxes_rotate(encode_boxes=fast_rcnn_encode_boxes_rotate, reference_boxes=reference_boxes, scale_factors=self.scale_factors) fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( fast_rcnn_decode_boxes, img_shape=self.img_shape) # mutilclass NMS # Class-agnostic regression without reshape # fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes, [-1, self.num_classes*4]) # fast_rcnn_decode_boxes_rotate = tf.reshape(fast_rcnn_decode_boxes_rotate, [-1, self.num_classes * 5]) fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \ self.fast_rcnn_proposals(fast_rcnn_decode_boxes, scores=fast_rcnn_softmax_scores) fast_rcnn_decode_boxes_rotate, fast_rcnn_score_rotate, fast_rcnn_head_quadrant, \ num_of_objects_rotate, detection_category_rotate = \ self.fast_rcnn_proposals_rotate(fast_rcnn_decode_boxes_rotate, scores=fast_rcnn_softmax_scores_rotate, head_quadrant=self.fast_rcnn_head_quadrant) return fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category,\ fast_rcnn_decode_boxes_rotate, fast_rcnn_score_rotate, fast_rcnn_head_quadrant, \ num_of_objects_rotate, detection_category_rotate
def postprocess_rpn_proposals(self, rpn_bbox_pred, rpn_cls_prob, img_shape, anchors, is_training): """ rpn proposals operation :param rpn_bbox_pred: predict bbox :param rpn_cls_prob: probability of rpn classification :param img_shape: image_shape :param anchor: all reference anchor :param is_training: :return: """ if is_training: pre_nms_topN = cfgs.RPN_TOP_K_NMS_TRAIN post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TARIN nms_threshold = cfgs.RPN_NMS_IOU_THRESHOLD else: pre_nms_topN = cfgs.RPN_TOP_K_NMS_TEST post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TEST nms_threshold = cfgs.RPN_NMS_IOU_THRESHOLD cls_prob = rpn_cls_prob[:, 1] #(, 2) =>(negtive, postive) # step 1 decode boxes decode_boxes = encode_and_decode.decode_boxes( encoded_boxes=rpn_bbox_pred, reference_boxes=anchors, scale_factors=cfgs.ANCHOR_SCALE_FACTORS) # step 2 clip to image boundaries decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( decode_boxes, img_shape=img_shape) # step 3 get top N to NMS if pre_nms_topN > 0: pre_nms_topN = tf.minimum(pre_nms_topN, tf.shape(decode_boxes)[0], name='minimum_boxes') cls_prob, top_k_indices = tf.nn.top_k(cls_prob, k=pre_nms_topN) decode_boxes = tf.gather(params=decode_boxes, indices=top_k_indices) # step 4 NMS(Non Max Suppression) keep_indices = tf.image.non_max_suppression( boxes=decode_boxes, scores=cls_prob, max_output_size=post_nms_topN, iou_threshold=nms_threshold) final_boxes = tf.gather(decode_boxes, keep_indices) final_probs = tf.gather(cls_prob, keep_indices) return final_boxes, final_probs
def batch_slice_rpn_proposals(rpn_encode_boxes, rpn_scores, anchors, config, rpn_proposals_num): with tf.variable_scope('rpn_proposals'): rpn_softmax_scores = slim.softmax(rpn_scores) rpn_object_score = rpn_softmax_scores[:, 1] # second column represent object if config.RPN_TOP_K_NMS: top_k_indices = tf.nn.top_k(rpn_object_score, k=config.RPN_TOP_K_NMS).indices rpn_object_score = tf.gather(rpn_object_score, top_k_indices) rpn_encode_boxes = tf.gather(rpn_encode_boxes, top_k_indices) anchors = tf.gather(anchors, top_k_indices) rpn_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=rpn_encode_boxes, reference_boxes=anchors, dev_factors=config.RPN_BBOX_STD_DEV) valid_indices = boxes_utils.non_maximal_suppression( boxes=rpn_decode_boxes, scores=rpn_object_score, max_output_size=rpn_proposals_num, iou_threshold=config.RPN_NMS_IOU_THRESHOLD) rpn_decode_boxes = tf.gather(rpn_decode_boxes, valid_indices) rpn_object_score = tf.gather(rpn_object_score, valid_indices) # clip proposals to img boundaries(replace the out boundary with image boundary) rpn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( rpn_decode_boxes, [0, 0, config.TARGET_SIDE - 1, config.TARGET_SIDE - 1]) # Pad if needed padding = tf.maximum( rpn_proposals_num - tf.shape(rpn_decode_boxes)[0], 0) # care about why we don't use tf.pad in there zeros_padding = tf.zeros((padding, 4), dtype=tf.float32) rpn_proposals_boxes = tf.concat( [rpn_decode_boxes, zeros_padding], axis=0) rpn_object_score = tf.pad(rpn_object_score, [(0, padding)]) return rpn_proposals_boxes, rpn_object_score
def fast_rcnn_predict(self): with tf.variable_scope('fast_rcnn_predict'): fast_rcnn_softmax_scores = slim.softmax(self.fast_rcnn_scores) # [-1, num_classes+1] fast_rcnn_softmax_scores_rotate = slim.softmax(self.fast_rcnn_scores_rotate) # [-1, num_classes+1] fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes, [-1, 4]) fast_rcnn_encode_boxes_rotate = tf.reshape(self.fast_rcnn_encode_boxes_rotate, [-1, 5]) reference_boxes = tf.tile(self.fast_rcnn_all_level_proposals, [1, self.num_classes]) # [N, 4*num_classes] reference_boxes = tf.reshape(reference_boxes, [-1, 4]) # [N*num_classes, 4] fast_rcnn_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=fast_rcnn_encode_boxes, reference_boxes=reference_boxes, scale_factors=self.scale_factors) fast_rcnn_decode_boxes_rotate = \ encode_and_decode.decode_boxes_rotate(encode_boxes=fast_rcnn_encode_boxes_rotate, reference_boxes=reference_boxes, scale_factors=self.scale_factors) fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(fast_rcnn_decode_boxes, img_shape=self.img_shape) # mutilclass NMS fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes, [-1, self.num_classes*4]) fast_rcnn_decode_boxes_rotate = tf.reshape(fast_rcnn_decode_boxes_rotate, [-1, self.num_classes * 5]) fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \ self.fast_rcnn_proposals(fast_rcnn_decode_boxes, scores=fast_rcnn_softmax_scores) fast_rcnn_decode_boxes_rotate, fast_rcnn_score_rotate, num_of_objects_rotate, detection_category_rotate = \ self.fast_rcnn_proposals_rotate(fast_rcnn_decode_boxes_rotate, scores=fast_rcnn_softmax_scores_rotate) fast_rcnn_decode_boxes_rotate_reorder = tf.py_func(read_reorder, inp=[fast_rcnn_decode_boxes_rotate], Tout=tf.float32) fast_rcnn_decode_boxes_rotate_original = fast_rcnn_decode_boxes_rotate fast_rcnn_decode_boxes_rotate = fast_rcnn_decode_boxes_rotate_reorder return fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category,\ fast_rcnn_decode_boxes_rotate_original, fast_rcnn_decode_boxes_rotate, fast_rcnn_decode_boxes_rotate_reorder, fast_rcnn_score_rotate, num_of_objects_rotate, detection_category_rotate
def fast_rcnn_predict(self): with tf.variable_scope('fast_rcnn_predict'): fast_rcnn_softmax_scores = slim.softmax(self.fast_rcnn_scores) # [-1, num_classes+1] fast_rcnn_encode_boxes = tf.reshape(self.fast_rcnn_encode_boxes, [-1, 5]) # reference_boxes = tf.tile(self.fast_rcnn_all_level_horizontal_proposals, [1, self.num_classes]) reference_boxes = tf.tile(self.fast_rcnn_all_level_rotate_proposals, [1, self.num_classes]) # [N, 5*num_classes] reference_boxes = tf.reshape(reference_boxes, [-1, 5]) # [N*num_classes, 5] fast_rcnn_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=fast_rcnn_encode_boxes, reference_boxes=reference_boxes, scale_factors=self.scale_factors) # mutilclass NMS fast_rcnn_decode_boxes = tf.reshape(fast_rcnn_decode_boxes, [-1, self.num_classes*5]) fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \ self.fast_rcnn_proposals(fast_rcnn_decode_boxes, scores=fast_rcnn_softmax_scores) return fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category
def postprocess_cascade(self, rois, bbox_ppred, scope): ''' :param rois:[-1, 4] :param bbox_ppred: bbox_ppred: [-1, 4] :param scores: [-1, 1] :return: ''' with tf.name_scope('postprocess_cascade_{}'.format(scope)): rois = tf.stop_gradient(rois) bbox_ppred = tf.stop_gradient(bbox_ppred) # decode boxes decoded_boxes = encode_and_decode.decode_boxes( encoded_boxes=bbox_ppred, reference_boxes=rois, scale_factors=cfgs.ROI_SCALE_FACTORS) return decoded_boxes
def postprocess_fastrcnn(self, rois, bbox_ppred, scores, img_shape): ''' :param rois:[-1, 4] :param bbox_ppred: [-1, (cfgs.Class_num+1) * 4] :param scores: [-1, cfgs.Class_num + 1] :return: ''' with tf.name_scope('postprocess_fastrcnn'): rois = tf.stop_gradient(rois) scores = tf.stop_gradient(scores) bbox_ppred = tf.reshape(bbox_ppred, [-1, cfgs.CLASS_NUM + 1, 4]) bbox_ppred = tf.stop_gradient(bbox_ppred) bbox_pred_list = tf.unstack(bbox_ppred, axis=1) score_list = tf.unstack(scores, axis=1) allclasses_boxes = [] allclasses_scores = [] categories = [] for i in range(1, cfgs.CLASS_NUM + 1): # 1. decode boxes in each class tmp_encoded_box = bbox_pred_list[i] tmp_score = score_list[i] tmp_decoded_boxes = encode_and_decode.decode_boxes( encoded_boxes=tmp_encoded_box, reference_boxes=rois, scale_factors=cfgs.ROI_SCALE_FACTORS) # tmp_decoded_boxes = encode_and_decode.decode_boxes(boxes=rois, # deltas=tmp_encoded_box, # scale_factor=cfgs.ROI_SCALE_FACTORS) # 2. clip to img boundaries tmp_decoded_boxes = boxes_utils.clip_boxes_to_img_boundaries( decode_boxes=tmp_decoded_boxes, img_shape=img_shape) # 3. NMS keep = tf.image.non_max_suppression( boxes=tmp_decoded_boxes, scores=tmp_score, max_output_size=cfgs.FAST_RCNN_NMS_MAX_BOXES_PER_CLASS, iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD) perclass_boxes = tf.gather(tmp_decoded_boxes, keep) perclass_scores = tf.gather(tmp_score, keep) allclasses_boxes.append(perclass_boxes) allclasses_scores.append(perclass_scores) categories.append(tf.ones_like(perclass_scores) * i) final_boxes = tf.concat(allclasses_boxes, axis=0) final_scores = tf.concat(allclasses_scores, axis=0) final_category = tf.concat(categories, axis=0) if self.is_training: ''' in training. We should show the detecitons in the tensorboard. So we add this. ''' kept_indices = tf.reshape( tf.where( tf.greater_equal(final_scores, cfgs.SHOW_SCORE_THRSHOLD)), [-1]) final_boxes = tf.gather(final_boxes, kept_indices) final_scores = tf.gather(final_scores, kept_indices) final_category = tf.gather(final_category, kept_indices) return final_boxes, final_scores, final_category
def build_whole_detection_network(self, input_img_batch, gtboxes_batch): if self.is_training: # ensure shape is [M, 5] gtboxes_batch = tf.reshape(gtboxes_batch, [-1, 5]) gtboxes_batch = tf.cast(gtboxes_batch, tf.float32) img_shape = tf.shape(input_img_batch) # 1. build base network P_list = self.build_base_network( input_img_batch) # [P2, P3, P4, P5, P6] # 2. build rpn with tf.variable_scope('build_rpn', regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY)): fpn_cls_score = [] fpn_box_pred = [] for level_name, p in zip(cfgs.LEVLES, P_list): if cfgs.SHARE_HEADS: reuse_flag = None if level_name == cfgs.LEVLES[0] else True scope_list = [ 'rpn_conv/3x3', 'rpn_cls_score', 'rpn_bbox_pred' ] else: reuse_flag = None scope_list = [ 'rpn_conv/3x3_%s' % level_name, 'rpn_cls_score_%s' % level_name, 'rpn_bbox_pred_%s' % level_name ] rpn_conv3x3 = slim.conv2d(p, 512, [3, 3], trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, padding="SAME", activation_fn=tf.nn.relu, scope=scope_list[0], reuse=reuse_flag) rpn_cls_score = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location * 2, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=None, padding="VALID", scope=scope_list[1], reuse=reuse_flag) rpn_box_pred = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location * 4, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.BBOX_INITIALIZER, activation_fn=None, padding="VALID", scope=scope_list[2], reuse=reuse_flag) rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) fpn_cls_score.append(rpn_cls_score) fpn_box_pred.append(rpn_box_pred) fpn_cls_score = tf.concat(fpn_cls_score, axis=0, name='fpn_cls_score') fpn_box_pred = tf.concat(fpn_box_pred, axis=0, name='fpn_box_pred') fpn_cls_prob = slim.softmax(fpn_cls_score, scope='fpn_cls_prob') # 3. generate_anchors all_anchors = [] for i in range(len(cfgs.LEVLES)): level_name, p = cfgs.LEVLES[i], P_list[i] p_h, p_w = tf.shape(p)[1], tf.shape(p)[2] featuremap_height = tf.cast(p_h, tf.float32) featuremap_width = tf.cast(p_w, tf.float32) anchors = anchor_utils.make_anchors( base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[i], anchor_scales=cfgs.ANCHOR_SCALES, anchor_ratios=cfgs.ANCHOR_RATIOS, featuremap_height=featuremap_height, featuremap_width=featuremap_width, stride=cfgs.ANCHOR_STRIDE_LIST[i], name="make_anchors_for%s" % level_name) all_anchors.append(anchors) all_anchors = tf.concat(all_anchors, axis=0, name='all_anchors_of_FPN') # 4. postprocess rpn proposals. such as: decode, clip, NMS with tf.variable_scope('postprocess_FPN'): rois, roi_scores = postprocess_rpn_proposals( rpn_bbox_pred=fpn_box_pred, rpn_cls_prob=fpn_cls_prob, img_shape=img_shape, anchors=all_anchors, is_training=self.is_training) # rois shape [-1, 4] # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.is_training: score_gre_05 = tf.reshape( tf.where(tf.greater_equal(roi_scores, 0.5)), [-1]) score_gre_05_rois = tf.gather(rois, score_gre_05) score_gre_05_score = tf.gather(roi_scores, score_gre_05) score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_scores( img_batch=input_img_batch, boxes=score_gre_05_rois, scores=score_gre_05_score) tf.summary.image('score_greater_05_rois', score_gre_05_in_img) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.is_training: with tf.variable_scope('sample_anchors_minibatch'): fpn_labels, fpn_bbox_targets = \ tf.py_func( anchor_target_layer, [gtboxes_batch, img_shape, all_anchors], [tf.float32, tf.float32]) fpn_bbox_targets = tf.reshape(fpn_bbox_targets, [-1, 4]) fpn_labels = tf.to_int32(fpn_labels, name="to_int32") fpn_labels = tf.reshape(fpn_labels, [-1]) self.add_anchor_img_smry(input_img_batch, all_anchors, fpn_labels) # --------------------------------------add smry----------------------------------------------------------- fpn_cls_category = tf.argmax(fpn_cls_prob, axis=1) kept_rpppn = tf.reshape(tf.where(tf.not_equal(fpn_labels, -1)), [-1]) fpn_cls_category = tf.gather(fpn_cls_category, kept_rpppn) acc = tf.reduce_mean( tf.to_float( tf.equal(fpn_cls_category, tf.to_int64(tf.gather(fpn_labels, kept_rpppn))))) tf.summary.scalar('ACC/fpn_accuracy', acc) with tf.control_dependencies([fpn_labels]): with tf.variable_scope('sample_RCNN_minibatch'): rois, labels, bbox_targets = \ tf.py_func(proposal_target_layer, [rois, gtboxes_batch], [tf.float32, tf.float32, tf.float32]) rois = tf.reshape(rois, [-1, 4]) labels = tf.to_int32(labels) labels = tf.reshape(labels, [-1]) bbox_targets = tf.reshape(bbox_targets, [-1, 4 * (cfgs.CLASS_NUM + 1)]) self.add_roi_batch_img_smry(input_img_batch, rois, labels) if self.is_training: rois_list, labels, bbox_targets = self.assign_levels( all_rois=rois, labels=labels, bbox_targets=bbox_targets) else: rois_list = self.assign_levels( all_rois=rois ) # rois_list: [P2_rois, P3_rois, P4_rois, P5_rois] # -------------------------------------------------------------------------------------------------------------# # Fast-RCNN # # -------------------------------------------------------------------------------------------------------------# # 5. build Fast-RCNN # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10) bbox_pred, cls_score = self.build_fastrcnn(P_list=P_list, rois_list=rois_list, img_shape=img_shape) # bbox_pred shape: [-1, 4*(cls_num+1)]. # cls_score shape: [-1, cls_num+1] cls_prob = slim.softmax(cls_score, 'cls_prob') # ----------------------------------------------add smry------------------------------------------------------- if self.is_training: cls_category = tf.argmax(cls_prob, axis=1) fast_acc = tf.reduce_mean( tf.to_float(tf.equal(cls_category, tf.to_int64(labels)))) tf.summary.scalar('ACC/fast_acc', fast_acc) rois = tf.concat(rois_list, axis=0, name='concat_rois') # 6. postprocess_fastrcnn if not self.is_training: return self.postprocess_fastrcnn(rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, img_shape=img_shape) else: ''' when trian. We need build Loss ''' #GIOU loss需要先解码 fpn_pred = encode_and_decode.decode_boxes( encoded_boxes=fpn_box_pred, reference_boxes=all_anchors, scale_factors=cfgs.ROI_SCALE_FACTORS) loss_dict = self.build_loss(rpn_box_pred=fpn_pred, rpn_bbox_targets=all_anchors, rpn_cls_score=fpn_cls_score, rpn_labels=fpn_labels, bbox_pred=bbox_pred, bbox_targets=bbox_targets, cls_score=cls_score, labels=labels) final_bbox, final_scores, final_category = self.postprocess_fastrcnn( rois=rois, bbox_ppred=bbox_pred, scores=cls_prob, img_shape=img_shape) return final_bbox, final_scores, final_category, loss_dict
def postprocess_fastrcnn_proposals(bbox_ppred, scores, img_shape, rois, is_training): ''' :param rpn_bbox_pred: [-1, 4] :param rpn_cls_prob: [-1, 2] :param img_shape: :param anchors:[-1, 4] :param is_training: :return: ''' if is_training: pre_nms_topN = 2000 #cfgs.RPN_TOP_K_NMS_TRAIN post_nms_topN = 500 #cfgs.RPN_MAXIMUM_PROPOSAL_TARIN nms_thresh = 0.8 #cfgs.RPN_NMS_IOU_THRESHOLD else: pre_nms_topN = 1500 #cfgs.RPN_TOP_K_NMS_TEST post_nms_topN = 300 #cfgs.RPN_MAXIMUM_PROPOSAL_TEST nms_thresh = 0.7 #cfgs.RPN_NMS_IOU_THRESHOLD #rois = tf.stop_gradient(rois) #scores = tf.stop_gradient(scores) bbox_ppred = tf.reshape(bbox_ppred, [-1, cfgs.CLASS_NUM + 1, 4]) #bbox_ppred = tf.stop_gradient(bbox_ppred) bbox_pred_list = tf.unstack(bbox_ppred, axis=1) score_list = tf.unstack(scores, axis=1) allclasses_boxes = [] allclasses_scores = [] categories = [] for i in range(1, cfgs.CLASS_NUM + 1): # 1. decode boxes in each class tmp_encoded_box = bbox_pred_list[i] tmp_score = score_list[i] tmp_decoded_boxes = encode_and_decode.decode_boxes( encode_boxes=tmp_encoded_box, reference_boxes=rois, scale_factors=cfgs.ROI_SCALE_FACTORS) # tmp_decoded_boxes = encode_and_decode.decode_boxes(boxes=rois, # deltas=tmp_encoded_box, # scale_factor=cfgs.ROI_SCALE_FACTORS) # 2. clip to img boundaries tmp_decoded_boxes = boxes_utils.clip_boxes_to_img_boundaries( decode_boxes=tmp_decoded_boxes, img_shape=img_shape) # 3. NMS pre_nms_topN = tf.minimum(pre_nms_topN, tf.shape(tmp_decoded_boxes)[0], name='avoid_unenough_boxes') cls_prob, top_k_indices = tf.nn.top_k(tmp_score, k=pre_nms_topN) decode_boxes = tf.gather(tmp_decoded_boxes, top_k_indices) # 取索引 # 4. NMS keep = tf.image.non_max_suppression(boxes=decode_boxes, scores=cls_prob, max_output_size=post_nms_topN, iou_threshold=nms_thresh) perclass_boxes = tf.gather(tmp_decoded_boxes, keep) perclass_scores = tf.gather(tmp_score, keep) allclasses_boxes.append(perclass_boxes) allclasses_scores.append(perclass_scores) categories.append(tf.ones_like(perclass_scores) * i) final_boxes = tf.concat(allclasses_boxes, axis=0) final_scores = tf.concat(allclasses_scores, axis=0) final_category = tf.concat(categories, axis=0) return final_boxes, final_scores
def rpn_proposals(self): with tf.variable_scope('rpn_proposals'): rpn_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=self.rpn_encode_boxes, reference_boxes=self.anchors, scale_factors=self.scale_factors) # if not self.is_training: # when test, clip proposals to img boundaries # img_shape = tf.shape(self.img_batch) # rpn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(rpn_decode_boxes, img_shape) rpn_softmax_scores = slim.softmax(self.rpn_scores) rpn_object_score = rpn_softmax_scores[:, 1] # second column represent object if self.top_k_nms: rpn_object_score, top_k_indices = tf.nn.top_k(rpn_object_score, k=self.top_k_nms) rpn_decode_boxes = tf.gather(rpn_decode_boxes, top_k_indices) if not cfgs.USE_HORIZONTAL_NMS: valid_indices = nms_rotate.nms_rotate( decode_boxes=rpn_decode_boxes, scores=rpn_object_score, iou_threshold=self.rpn_nms_iou_threshold, max_output_size=self.max_proposals_num, use_angle_condition=self.use_angles_condition, angle_threshold=self.anchor_angle_threshold, use_gpu=cfgs.NMS_USE_GPU) ############################################################################################################ else: rpn_decode_boxes_convert = tf.py_func( coordinate_convert.forward_convert, inp=[rpn_decode_boxes], Tout=tf.float32) rpn_decode_boxes_convert = tf.reshape( rpn_decode_boxes_convert, [tf.shape(rpn_decode_boxes)[0], 8]) x1, y1, x2, y2, x3, y3, x4, y4 = tf.unstack( rpn_decode_boxes_convert, axis=1) x = tf.transpose(tf.stack([x1, x2, x3, x4])) y = tf.transpose(tf.stack([y1, y2, y3, y4])) min_x = tf.reduce_min(x, axis=1) max_x = tf.reduce_max(x, axis=1) min_y = tf.reduce_min(y, axis=1) max_y = tf.reduce_max(y, axis=1) rpn_decode_boxes_convert = tf.transpose( tf.stack([min_x, min_y, max_x, max_y])) valid_indices = tf.image.non_max_suppression( boxes=rpn_decode_boxes_convert, scores=rpn_object_score, max_output_size=self.max_proposals_num, iou_threshold=self.rpn_nms_iou_threshold, name='rpn_horizontal_nms') ############################################################################################################ valid_boxes = tf.gather(rpn_decode_boxes, valid_indices) valid_scores = tf.gather(rpn_object_score, valid_indices) rpn_proposals_boxes, rpn_proposals_scores = tf.cond( tf.less(tf.shape(valid_boxes)[0], self.max_proposals_num), lambda: boxes_utils.padd_boxes_with_zeros( valid_boxes, valid_scores, self.max_proposals_num), lambda: (valid_boxes, valid_scores)) return rpn_proposals_boxes, rpn_proposals_scores
def rpn_loss(self): ''' :param: self.gtboxes_and_label: [n, 5]->[ymin, xmin, ymax, xmax, cls] :param: self.anchors: [m, 4]-> [ymin, xmin, ymax, xmax] :param:self.rpn_encode_boxes: [m, 4]->[ycenter, xcenter, h, w] :return: ''' with tf.variable_scope('rpn_loss'): minibatch_indices,\ minibatch_anchor_matched_gtboxes,\ object_mask,\ minibatch_label_onehot = self.make_minibatch() minibatch_anchors = tf.gather(self.anchors, minibatch_indices) minibatch_rpn_encode_boxes = tf.gather(self.rpn_encode_boxes, minibatch_indices) minibatch_rpn_scores = tf.gather(self.rpn_scores, minibatch_indices) minibatch_encode_boxes_label = encode_and_decode.encode_boxes( minibatch_anchors, minibatch_anchor_matched_gtboxes, self.scale_factors) # summary positive_anchors_in_img = draw_box_with_tensor( img_batch=self.img_batch, boxes=minibatch_anchors * tf.expand_dims(object_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 1)))[0]) negative_mask = tf.cast( tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32) negative_anchors_in_img = draw_box_with_tensor( img_batch=self.img_batch, boxes=minibatch_anchors * tf.expand_dims(negative_mask, 1), text=tf.shape(tf.where(tf.equal(negative_mask, 1)))[0]) minibatch_decode_anchors = encode_and_decode.decode_boxes( encode_boxes=minibatch_rpn_encode_boxes, reference_boxes=minibatch_anchors, scale_factors=self.scale_factors) # clip boxes into image shape minibatch_decode_anchors = boxes_utils.clip_boxes_to_img_boundaries( minibatch_decode_anchors, tf.shape(self.img_batch)) positive_decode_anchor_in_img = \ draw_box_with_tensor(img_batch=self.img_batch, boxes=minibatch_decode_anchors*tf.expand_dims(object_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 1)))[0] ) tf.summary.image('images/rpn/losses/anchors_positive_minibatch', positive_anchors_in_img) tf.summary.image('images/rpn/losses/anchors_negative_minibatch', negative_anchors_in_img) tf.summary.image('images/rpn/losses/decode_anchor_positive', positive_decode_anchor_in_img) # losses with tf.variable_scope('rpn_localization_losses'): classify_loss = slim.losses.softmax_cross_entropy( logits=minibatch_rpn_scores, onehot_labels=minibatch_label_onehot) location_loss = losses.l1_smooth_losses( predict_boxes=minibatch_rpn_encode_boxes, gtboxes=minibatch_encode_boxes_label, object_weights=object_mask) slim.losses.add_loss( location_loss) # add location loss to losses collections return location_loss, classify_loss
def postprocess_rpn_proposals(rpn_bbox_pred, rpn_cls_prob, img_shape, anchors, is_training): ''' :param rpn_bbox_pred: [-1, 4] :param rpn_cls_prob: [-1, 2] :param img_shape: :param anchors:[-1, 4] :param is_training: :return: ''' if is_training: pre_nms_topN = cfgs.RPN_TOP_K_NMS_TRAIN # 默认12000 post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TARIN # 默认2000 nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD # 默认0.7 else: pre_nms_topN = cfgs.RPN_TOP_K_NMS_TEST # 默认6000 post_nms_topN = cfgs.RPN_MAXIMUM_PROPOSAL_TEST # 默认300 nms_thresh = cfgs.RPN_NMS_IOU_THRESHOLD # 默认0.7 cls_prob = rpn_cls_prob[:, 1] # 1. decode boxes # clw note:这个函数接受RPN网络的预测框位置,以及预测的类别(两类),图像的尺寸大小,以及生成的锚点作为输入。 # 经过解码后,得到的是真实的预测框的位置,因为有可能预测的框比设定的选取前N个框的个数还小, # 因此在预测框的数目以及设定的数目之间取最小值,之后再采用 tf.image.non_max_suppression抑制, # 选取最终的非极大值抑制后的Top K个框,原论文中未采用NMS之前为12000个(就是上面的cfgs.RPN_TOP_K_NMS_TRAIN), # NMS后为2000个(就是上面的cfgs.RPN_MAXIMUM_PROPOSAL_TARIN)。 # 这里还没有具体的分类那个框是那个目标,只是选出了前K个可能存在目标的框。 decode_boxes = encode_and_decode.decode_boxes( encoded_boxes=rpn_bbox_pred, reference_boxes=anchors, scale_factors=cfgs.ANCHOR_SCALE_FACTORS) # decode_boxes = encode_and_decode.decode_boxes(boxes=anchors, # deltas=rpn_bbox_pred, # scale_factor=None) # 2. clip to img boundaries decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( decode_boxes=decode_boxes, img_shape=img_shape) # 3. get top N to NMS if pre_nms_topN > 0: # clw note:初步得到一系列框(~60*40*9=20k)之后,如果是训练集,会去掉与边界相交的anchors,因此 # 数量会大大减小,即NMS之前的TopK个框(这里默认值是12k,文中给的6k),之后再进行NMS。 pre_nms_topN = tf.minimum(pre_nms_topN, tf.shape(decode_boxes)[0], name='avoid_unenough_boxes') cls_prob, top_k_indices = tf.nn.top_k(cls_prob, k=pre_nms_topN) decode_boxes = tf.gather(decode_boxes, top_k_indices) # 4. NMS keep = tf.image.non_max_suppression(boxes=decode_boxes, scores=cls_prob, max_output_size=post_nms_topN, iou_threshold=nms_thresh) final_boxes = tf.gather(decode_boxes, keep) final_probs = tf.gather(cls_prob, keep) return final_boxes, final_probs
def fast_rcnn_loss(self): with tf.variable_scope('fast_rcnn_loss'): minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, minibatch_object_mask, \ minibatch_label_one_hot = self.fast_rcnn_minibatch(self.fast_rcnn_all_level_proposals) minibatch_reference_boxes = tf.gather( self.fast_rcnn_all_level_proposals, minibatch_indices) minibatch_encode_boxes = tf.gather( self.fast_rcnn_encode_boxes, minibatch_indices) # [minibatch_size, num_classes*4] minibatch_scores = tf.gather(self.fast_rcnn_scores, minibatch_indices) positive_proposals_in_img = draw_box_with_color( self.img_batch, minibatch_reference_boxes * tf.expand_dims(minibatch_object_mask, 1), text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 1.0)))[0]) negative_mask = tf.cast( tf.logical_not(tf.cast(minibatch_object_mask, tf.bool)), tf.float32) negative_proposals_in_img = draw_box_with_color( self.img_batch, minibatch_reference_boxes * tf.expand_dims(negative_mask, 1), text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 0.0)))[0]) tf.summary.image('/positive_proposals', positive_proposals_in_img) tf.summary.image('/negative_proposals', negative_proposals_in_img) if cfgs.CLASS_NUM == 1: minibatch_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=minibatch_encode_boxes, reference_boxes=minibatch_reference_boxes, scale_factors=self.scale_factors) minibatch_softmax_scores = tf.gather( slim.softmax(self.fast_rcnn_scores), minibatch_indices) top_k_scores, top_k_indices = tf.nn.top_k( minibatch_softmax_scores[:, 1], k=5) top_detections_in_img = draw_boxes_with_scores( self.img_batch, boxes=tf.gather(minibatch_decode_boxes, top_k_indices), scores=top_k_scores) tf.summary.image('/top_5', top_detections_in_img) # encode gtboxes minibatch_encode_gtboxes = \ encode_and_decode.encode_boxes( unencode_boxes=minibatch_reference_boxes_mattached_gtboxes, reference_boxes=minibatch_reference_boxes, scale_factors=self.scale_factors ) # [minibatch_size, num_classes*4] minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes, [1, self.num_classes]) class_weights_list = [] category_list = tf.unstack(minibatch_label_one_hot, axis=1) for i in range(1, self.num_classes + 1): tmp_class_weights = tf.ones( shape=[tf.shape(minibatch_encode_boxes)[0], 4], dtype=tf.float32) tmp_class_weights = tmp_class_weights * tf.expand_dims( category_list[i], axis=1) class_weights_list.append(tmp_class_weights) class_weights = tf.concat( class_weights_list, axis=1) # [minibatch_size, num_classes*4] # loss with tf.variable_scope('fast_rcnn_classification_loss'): logits = tf.cast(minibatch_scores, tf.float32) onehot_labels = tf.cast(minibatch_label_one_hot, tf.float32) one = tf.ones(shape=tf.shape(onehot_labels), dtype=tf.float32) predictions_pt = tf.where(tf.equal(onehot_labels, 1), logits, 1 - logits) # add small value to avoid alpha_t = tf.scalar_mul(0.25, one) alpha_t = tf.where(tf.equal(onehot_labels, 1), alpha_t, 1 - alpha_t) gamma = tf.scalar_mul(2, one) new_gamma = tf.where(tf.less(predictions_pt, 0.5), -gamma, gamma) ##PFL fast_rcnn_classification_loss = tf.multiply( tf.multiply( alpha_t, slim.losses.softmax_cross_entropy( logits=logits, onehot_labels=onehot_labels)), tf.pow(1 - predictions_pt, new_gamma)) ##FL # fast_rcnn_classification_loss = tf.multiply(tf.multiply(alpha_t, slim.losses.softmax_cross_entropy(logits=logits, # onehot_labels=onehot_labels)), tf.pow(1-predictions_pt, 2)) #FL和PFL,不注释这句;CE注释这句 fast_rcnn_classification_loss = tf.reduce_sum( fast_rcnn_classification_loss[:, 0] + fast_rcnn_classification_loss[:, 1]) ##CE # fast_rcnn_classification_loss = slim.losses.softmax_cross_entropy(logits=tf.clip_by_value(minibatch_scores,1e-8,tf.reduce_max(minibatch_scores)), # onehot_labels=minibatch_label_one_hot) # fast_rcnn_classification_loss = slim.losses.softmax_cross_entropy( # logits=minibatch_scores, # onehot_labels=minibatch_label_one_hot) with tf.variable_scope('fast_rcnn_location_loss'): fast_rcnn_location_loss = losses.l1_smooth_losses( predict_boxes=minibatch_encode_boxes, gtboxes=minibatch_encode_gtboxes, object_weights=minibatch_object_mask, classes_weights=class_weights) slim.losses.add_loss(fast_rcnn_location_loss) return fast_rcnn_location_loss, fast_rcnn_classification_loss
def rpn_losses(self): with tf.variable_scope('rpn_losses'): minibatch_indices, minibatch_anchor_matched_gtboxes, object_mask, minibatch_labels_one_hot = \ self.make_minibatch(self.anchors) minibatch_anchors = tf.gather(self.anchors, minibatch_indices) minibatch_encode_boxes = tf.gather(self.rpn_encode_boxes, minibatch_indices) minibatch_boxes_scores = tf.gather(self.rpn_scores, minibatch_indices) # encode gtboxes minibatch_encode_gtboxes = encode_and_decode.encode_boxes( unencode_boxes=minibatch_anchor_matched_gtboxes, reference_boxes=minibatch_anchors, scale_factors=self.scale_factors) positive_anchors_in_img = draw_box_with_color( self.img_batch, minibatch_anchors * tf.expand_dims(object_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 1.0)))[0]) negative_mask = tf.cast( tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32) negative_anchors_in_img = draw_box_with_color( self.img_batch, minibatch_anchors * tf.expand_dims(negative_mask, 1), text=tf.shape(tf.where(tf.equal(object_mask, 0.0)))[0]) minibatch_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=minibatch_encode_boxes, reference_boxes=minibatch_anchors, scale_factors=self.scale_factors) tf.summary.image('/positive_anchors', positive_anchors_in_img) tf.summary.image('/negative_anchors', negative_anchors_in_img) top_k_scores, top_k_indices = tf.nn.top_k( minibatch_boxes_scores[:, 1], k=1) top_detections_in_img = draw_box_with_color( self.img_batch, tf.gather(minibatch_decode_boxes, top_k_indices), text=tf.shape(top_k_scores)[0]) tf.summary.image('/top_1', top_detections_in_img) # losses with tf.variable_scope('rpn_location_loss'): location_loss = losses.l1_smooth_losses( predict_boxes=minibatch_encode_boxes, gtboxes=minibatch_encode_gtboxes, object_weights=object_mask) slim.losses.add_loss( location_loss) # add smooth l1 loss to losses collection with tf.variable_scope('rpn_classification_loss'): # logits = tf.cast(minibatch_boxes_scores, tf.float32) # onehot_labels = tf.cast(minibatch_labels_one_hot, tf.float32) # one = tf.ones(shape=tf.shape(onehot_labels), dtype=tf.float32) # predictions_pt = tf.where(tf.equal(onehot_labels, 1), logits, 1-logits) # # # add small value to avoid # alpha_t = tf.scalar_mul(0.25, one) # alpha_t = tf.where(tf.equal(onehot_labels, 1), alpha_t, 1 - alpha_t) # gamma = tf.scalar_mul(2, one) # new_gamma = tf.where(tf.less(predictions_pt, 0.5), -gamma, gamma) # classification_loss = tf.multiply(tf.multiply(alpha_t, slim.losses.softmax_cross_entropy(logits=logits, # onehot_labels=onehot_labels)), tf.pow((1-predictions_pt), 2)) # # classification_loss = tf.multiply(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, # # labels=onehot_labels), tf.pow((1-predictions_pt), 2)) # classification_loss = tf.reduce_sum(classification_loss[:,0]+classification_loss[:,1]) # # classification_loss = slim.losses.softmax_cross_entropy(logits=tf.clip_by_value(minibatch_boxes_scores,1e-8,tf.reduce_max(minibatch_boxes_scores)), # # onehot_labels=minibatch_labels_one_hot) classification_loss = slim.losses.softmax_cross_entropy( logits=minibatch_boxes_scores, onehot_labels=minibatch_labels_one_hot) return location_loss, classification_loss
def fast_rcnn_loss(self): with tf.variable_scope('fast_rcnn_loss'): minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, minibatch_object_mask, \ minibatch_label_one_hot = self.fast_rcnn_minibatch(self.fast_rcnn_all_level_proposals) minibatch_reference_boxes = tf.gather( self.fast_rcnn_all_level_proposals, minibatch_indices) minibatch_encode_boxes = tf.gather( self.fast_rcnn_encode_boxes, minibatch_indices) # [minibatch_size, num_classes*4] minibatch_scores = tf.gather(self.fast_rcnn_scores, minibatch_indices) positive_proposals_in_img = draw_box_with_color( self.img_batch, minibatch_reference_boxes * tf.expand_dims(minibatch_object_mask, 1), text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 1.0)))[0]) negative_mask = tf.cast( tf.logical_not(tf.cast(minibatch_object_mask, tf.bool)), tf.float32) negative_proposals_in_img = draw_box_with_color( self.img_batch, minibatch_reference_boxes * tf.expand_dims(negative_mask, 1), text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 0.0)))[0]) tf.summary.image('/positive_proposals', positive_proposals_in_img) tf.summary.image('/negative_proposals', negative_proposals_in_img) if cfgs.CLASS_NUM == 1: minibatch_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=minibatch_encode_boxes, reference_boxes=minibatch_reference_boxes, scale_factors=self.scale_factors) minibatch_softmax_scores = tf.gather( slim.softmax(self.fast_rcnn_scores), minibatch_indices) top_k_scores, top_k_indices = tf.nn.top_k( minibatch_softmax_scores[:, 1], k=5) top_detections_in_img = draw_boxes_with_scores( self.img_batch, boxes=tf.gather(minibatch_decode_boxes, top_k_indices), scores=top_k_scores) tf.summary.image('/top_5', top_detections_in_img) # encode gtboxes minibatch_encode_gtboxes = \ encode_and_decode.encode_boxes( unencode_boxes=minibatch_reference_boxes_mattached_gtboxes, reference_boxes=minibatch_reference_boxes, scale_factors=self.scale_factors ) # [minibatch_size, num_classes*4] minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes, [1, self.num_classes]) class_weights_list = [] category_list = tf.unstack(minibatch_label_one_hot, axis=1) for i in range(1, self.num_classes + 1): tmp_class_weights = tf.ones( shape=[tf.shape(minibatch_encode_boxes)[0], 4], dtype=tf.float32) tmp_class_weights = tmp_class_weights * tf.expand_dims( category_list[i], axis=1) class_weights_list.append(tmp_class_weights) class_weights = tf.concat( class_weights_list, axis=1) # [minibatch_size, num_classes*4] # loss with tf.variable_scope('fast_rcnn_classification_loss'): fast_rcnn_classification_loss = tf.losses.softmax_cross_entropy( logits=minibatch_scores, onehot_labels=minibatch_label_one_hot) with tf.variable_scope('fast_rcnn_location_loss'): fast_rcnn_location_loss = losses.l1_smooth_losses( predict_boxes=minibatch_encode_boxes, gtboxes=minibatch_encode_gtboxes, object_weights=minibatch_object_mask, classes_weights=class_weights) tf.losses.add_loss(fast_rcnn_location_loss) return fast_rcnn_location_loss, fast_rcnn_classification_loss
def fast_rcnn_loss(self): with tf.variable_scope('fast_rcnn_loss'): minibatch_indices, minibatch_reference_boxes_mattached_gtboxes, minibatch_object_mask, \ minibatch_label_one_hot = self.fast_rcnn_minibatch(self.fast_rcnn_all_level_rotate_proposals) ####################### # minibatch_reference_boxes = tf.gather(self.fast_rcnn_all_level_horizontal_proposals, minibatch_indices) minibatch_reference_boxes = tf.gather(self.fast_rcnn_all_level_rotate_proposals, minibatch_indices) minibatch_encode_boxes = tf.gather(self.fast_rcnn_encode_boxes, minibatch_indices) # [minibatch_size, num_classes*5] minibatch_scores = tf.gather(self.fast_rcnn_scores, minibatch_indices) positive_proposals_in_img = draw_box_with_color(self.img_batch, minibatch_reference_boxes * tf.expand_dims( minibatch_object_mask, 1), text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 1.0)))[0]) negative_mask = tf.cast(tf.logical_not(tf.cast(minibatch_object_mask, tf.bool)), tf.float32) negative_proposals_in_img = draw_box_with_color(self.img_batch, minibatch_reference_boxes * tf.expand_dims(negative_mask, 1), text=tf.shape(tf.where(tf.equal(minibatch_object_mask, 0.0)))[0]) tf.summary.image('/positive_proposals', positive_proposals_in_img) tf.summary.image('/negative_proposals', negative_proposals_in_img) minibatch_decode_boxes = encode_and_decode.decode_boxes(encode_boxes=minibatch_encode_boxes, reference_boxes=minibatch_reference_boxes, scale_factors=self.scale_factors) minibatch_softmax_scores = tf.gather(slim.softmax(self.fast_rcnn_scores), minibatch_indices) top_k_scores, top_k_indices = tf.nn.top_k(minibatch_softmax_scores[:, 1], k=5) top_detections_in_img = draw_boxes_with_scores(self.img_batch, boxes=tf.gather(minibatch_decode_boxes, top_k_indices), scores=top_k_scores) tf.summary.image('/top_5', top_detections_in_img) # encode gtboxes minibatch_encode_gtboxes = \ encode_and_decode.encode_boxes( unencode_boxes=minibatch_reference_boxes_mattached_gtboxes, reference_boxes=minibatch_reference_boxes, scale_factors=self.scale_factors) # [minibatch_size, num_classes*5] minibatch_encode_gtboxes = tf.tile(minibatch_encode_gtboxes, [1, self.num_classes]) class_weights_list = [] category_list = tf.unstack(minibatch_label_one_hot, axis=1) for i in range(1, self.num_classes+1): tmp_class_weights = tf.ones(shape=[tf.shape(minibatch_encode_boxes)[0], 5], dtype=tf.float32) tmp_class_weights = tmp_class_weights * tf.expand_dims(category_list[i], axis=1) class_weights_list.append(tmp_class_weights) class_weights = tf.concat(class_weights_list, axis=1) # [minibatch_size, num_classes*5] # loss with tf.variable_scope('fast_rcnn_classification_loss'): fast_rcnn_classification_loss = slim.losses.softmax_cross_entropy(logits=minibatch_scores, onehot_labels=minibatch_label_one_hot) # if DEBUG: # print_tensors(minibatch_scores, 'minibatch_scores') # print_tensors(classification_loss, '2nd_cls_loss') with tf.variable_scope('fast_rcnn_location_loss'): fast_rcnn_location_loss = losses.l1_smooth_losses(predict_boxes=minibatch_encode_boxes, gtboxes=minibatch_encode_gtboxes, object_weights=minibatch_object_mask, classes_weights=class_weights) slim.losses.add_loss(fast_rcnn_location_loss) return fast_rcnn_location_loss, fast_rcnn_classification_loss
def batch_slice_head_proposals(rpn_proposal_bbox, encode_boxes, categories, scores, image_height, image_width): """ mutilclass NMS :param rpn_proposal_bbox: (N, 4) :param encode_boxes: (N, 4) :param categories:(N, ) :param scores: (N, ) :param image_window:(y1, x1, y2, x2) the boundary of image :return: detection_boxes_scores_labels : (-1, 6)[y1, x1, y2, x2, scores, labels] """ with tf.name_scope('head_proposals'): # trim the zero graph rpn_proposal_bbox, non_zeros = boxes_utils.trim_zeros_graph( rpn_proposal_bbox, name="trim_proposals_detection") encode_boxes = tf.boolean_mask(encode_boxes, non_zeros) categories = tf.boolean_mask(categories, non_zeros) scores = tf.boolean_mask(scores, non_zeros) fast_rcnn_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=encode_boxes, reference_boxes=rpn_proposal_bbox, scale_factors=cfgs.BBOX_STD_DEV) fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( fast_rcnn_decode_boxes, image_height, image_width) # remove the background keep = tf.cast(tf.where(categories > 0)[:, 0], tf.int32) if cfgs.DEBUG: print_categories = tf.gather(categories, keep) print_scores = tf.gather(scores, keep) num_item = tf.minimum(tf.shape(print_scores)[0], 100) print_scores_vision, print_index = tf.nn.top_k( print_scores, k=num_item) print_categories_vision = tf.gather( print_categories, print_index) boxes_utils.print_tensors(print_categories_vision, "categories") boxes_utils.print_tensors(print_scores_vision, "scores") # Filter out low confidence boxes if cfgs.FINAL_SCORE_THRESHOLD: # 0.7 conf_keep = tf.cast( tf.where(scores >= cfgs.FINAL_SCORE_THRESHOLD)[:, 0], tf.int32) keep = tf.sets.set_intersection( tf.expand_dims(keep, 0), tf.expand_dims(conf_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] pre_nms_class_ids = tf.gather(categories, keep) pre_nms_scores = tf.gather(scores, keep) pre_nms_rois = tf.gather(fast_rcnn_decode_boxes, keep) unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0] def nms_keep_map(class_id): """Apply Non-Maximum Suppression on ROIs of the given class.""" # Indices of ROIs of the given class ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0] # Apply NMS class_keep = tf.image.non_max_suppression( tf.gather(pre_nms_rois, ixs), tf.gather(pre_nms_scores, ixs), max_output_size=cfgs.DETECTION_MAX_INSTANCES, # 最多200条 iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD ) # 0.3 太高就过滤完了 # Map indicies class_keep = tf.gather(keep, tf.gather(ixs, class_keep)) # Pad with -1 so returned tensors have the same shape gap = cfgs.DETECTION_MAX_INSTANCES - tf.shape( class_keep)[0] class_keep = tf.pad(class_keep, [(0, gap)], mode='CONSTANT', constant_values=-1) # Set shape so map_fn() can infer result shape class_keep.set_shape([cfgs.DETECTION_MAX_INSTANCES]) return class_keep # 2. Map over class IDs nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids, dtype=tf.int32) # 3. Merge results into one list, and remove -1 padding nms_keep = tf.reshape(nms_keep, [-1]) nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0]) # 4. Compute intersection between keep and nms_keep keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(nms_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] # Keep top detections roi_count = cfgs.DETECTION_MAX_INSTANCES class_scores_keep = tf.gather(scores, keep) num_keep = tf.minimum( tf.shape(class_scores_keep)[0], roi_count) top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1] keep = tf.gather(keep, top_ids) # Arrange output as [N, (y1, x1, y2, x2, class_id, score)] # Coordinates are normalized. detections = tf.concat([ tf.gather(fast_rcnn_decode_boxes, keep), tf.to_float(tf.gather(categories, keep))[..., tf.newaxis], tf.gather(scores, keep)[..., tf.newaxis] ], axis=1) # Pad with zeros if detections < DETECTION_MAX_INSTANCES gap = cfgs.DETECTION_MAX_INSTANCES - tf.shape(detections)[0] detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT") return detections