def batch_slice_head_proposals(rpn_proposal_bbox, encode_boxes, categories, scores, image_height, image_width): """ mutilclass NMS :param rpn_proposal_bbox: (N, 4) :param encode_boxes: (N, 4) :param categories:(N, ) :param scores: (N, ) :param image_window:(y1, x1, y2, x2) the boundary of image :return: detection_boxes_scores_labels : (-1, 6)[y1, x1, y2, x2, scores, labels] """ with tf.name_scope('head_proposals'): # trim the zero graph rpn_proposal_bbox, non_zeros = boxes_utils.trim_zeros_graph( rpn_proposal_bbox, name="trim_proposals_detection") encode_boxes = tf.boolean_mask(encode_boxes, non_zeros) categories = tf.boolean_mask(categories, non_zeros) scores = tf.boolean_mask(scores, non_zeros) fast_rcnn_decode_boxes = encode_and_decode.decode_boxes( encode_boxes=encode_boxes, reference_boxes=rpn_proposal_bbox, scale_factors=cfgs.BBOX_STD_DEV) fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries( fast_rcnn_decode_boxes, image_height, image_width) # remove the background keep = tf.cast(tf.where(categories > 0)[:, 0], tf.int32) if cfgs.DEBUG: print_categories = tf.gather(categories, keep) print_scores = tf.gather(scores, keep) num_item = tf.minimum(tf.shape(print_scores)[0], 100) print_scores_vision, print_index = tf.nn.top_k( print_scores, k=num_item) print_categories_vision = tf.gather( print_categories, print_index) boxes_utils.print_tensors(print_categories_vision, "categories") boxes_utils.print_tensors(print_scores_vision, "scores") # Filter out low confidence boxes if cfgs.FINAL_SCORE_THRESHOLD: # 0.7 conf_keep = tf.cast( tf.where(scores >= cfgs.FINAL_SCORE_THRESHOLD)[:, 0], tf.int32) keep = tf.sets.set_intersection( tf.expand_dims(keep, 0), tf.expand_dims(conf_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] pre_nms_class_ids = tf.gather(categories, keep) pre_nms_scores = tf.gather(scores, keep) pre_nms_rois = tf.gather(fast_rcnn_decode_boxes, keep) unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0] def nms_keep_map(class_id): """Apply Non-Maximum Suppression on ROIs of the given class.""" # Indices of ROIs of the given class ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0] # Apply NMS class_keep = tf.image.non_max_suppression( tf.gather(pre_nms_rois, ixs), tf.gather(pre_nms_scores, ixs), max_output_size=cfgs.DETECTION_MAX_INSTANCES, # 最多200条 iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD ) # 0.3 太高就过滤完了 # Map indicies class_keep = tf.gather(keep, tf.gather(ixs, class_keep)) # Pad with -1 so returned tensors have the same shape gap = cfgs.DETECTION_MAX_INSTANCES - tf.shape( class_keep)[0] class_keep = tf.pad(class_keep, [(0, gap)], mode='CONSTANT', constant_values=-1) # Set shape so map_fn() can infer result shape class_keep.set_shape([cfgs.DETECTION_MAX_INSTANCES]) return class_keep # 2. Map over class IDs nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids, dtype=tf.int32) # 3. Merge results into one list, and remove -1 padding nms_keep = tf.reshape(nms_keep, [-1]) nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0]) # 4. Compute intersection between keep and nms_keep keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(nms_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] # Keep top detections roi_count = cfgs.DETECTION_MAX_INSTANCES class_scores_keep = tf.gather(scores, keep) num_keep = tf.minimum( tf.shape(class_scores_keep)[0], roi_count) top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1] keep = tf.gather(keep, top_ids) # Arrange output as [N, (y1, x1, y2, x2, class_id, score)] # Coordinates are normalized. detections = tf.concat([ tf.gather(fast_rcnn_decode_boxes, keep), tf.to_float(tf.gather(categories, keep))[..., tf.newaxis], tf.gather(scores, keep)[..., tf.newaxis] ], axis=1) # Pad with zeros if detections < DETECTION_MAX_INSTANCES gap = cfgs.DETECTION_MAX_INSTANCES - tf.shape(detections)[0] detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT") return detections
def compute_metric_ap(gt_boxes, gt_class_ids, pred_boxes, pred_class_ids, pred_scores, config, iou_threshold=0.5): """Compute Matching status at a set IoU threshold (default 0.5). Returns: match_gt_label:{1:Tensor(M1,),2:Tensor(M2,)...} match_pred_label:{1:Tensor(M1,),2:Tensor(M2,)...} """ gt_boxes, gt_non_zeros = trim_zeros_graph(gt_boxes) gt_class_ids = tf.boolean_mask(gt_class_ids, gt_non_zeros) pred_boxes, pred_non_zeros = trim_zeros_graph(pred_boxes) pred_class_ids = tf.boolean_mask(pred_class_ids, pred_non_zeros) pred_scores = tf.boolean_mask(pred_scores, pred_non_zeros) sorted_index = tf.contrib.framework.argsort(pred_scores, axis=-1, direction='DESCENDING') pred_boxes = tf.gather(pred_boxes, sorted_index) pred_scores = tf.gather(pred_scores, sorted_index) pred_class_ids = tf.gather(pred_class_ids, sorted_index) pred_gt_ious = iou_calculate(pred_boxes, gt_boxes) # build the matrix which means the iou between gt and pred is more than 0.5 greater_iou_matrix = tf.greater_equal(pred_gt_ious, iou_threshold) # build the matrix which means the label between gt and pred is equal. label_equal_matrix = tf.equal(tf.expand_dims(pred_class_ids, axis=1), tf.expand_dims(gt_class_ids, axis=0)) # obtain the location which has same label and iou is bigger than iou_thresh match_matrix = tf.logical_and(greater_iou_matrix, label_equal_matrix) match_matrix_int = tf.cast(match_matrix, tf.int32) match_iou = tf.multiply(pred_gt_ious, match_matrix_int) # Remove duplicate elements in a row single_match_pred = tf.where(tf.logical_and(tf.equal(match_iou, tf.reduce_max(match_iou, axis=1, keep_dims=True), tf.greater_equal(match_iou, iou_threshold))), 1, 0) # Remove duplicate elements in a column first_one_element = tf.one_hot(tf.argmax(single_match_pred, axis=0), depth=tf.shape(single_match_pred)[0], axis=0) single_match = tf.multiply(single_match_pred, first_one_element) match_iou = tf.multiply(single_match, match_iou) related_gt_box = tf.argmax(single_match, axis=-1) match_pred_gt_label = tf.gather(gt_class_ids, related_gt_box) # to set some pred to 0 because it's FP or double max_iou = tf.reduce_max(match_iou, axis=-1) iou_bigger_threshold = tf.greater_equal(max_iou, iou_threshold) iou_bigger_thres_int = tf.cast(iou_bigger_threshold, tf.int32) pred_gt_label = tf.multiply(match_pred_gt_label, iou_bigger_thres_int) # add some instance which missed miss_gt = tf.where(tf.equal(tf.reduce_sum(single_match, axis=0), 0)) miss_gt_label = tf.gather(gt_class_ids, miss_gt) correspond_pred = tf.zeros_like(miss_gt_label) # concat the missed and prediction eval_pred_class_ids = tf.concat(pred_class_ids, correspond_pred, axis=0) eval_gt_class_ids = tf.concat(gt_class_ids, miss_gt_label, axis=0) eval_metrics = {} for i in range(1, config.NUM_CLASS): temp_index = tf.where(tf.logical_or( tf.equal(eval_pred_class_ids, i), tf.equal(eval_gt_class_ids, i))) temp_pred = tf.gather(eval_pred_class_ids, temp_index) temp_gt = tf.gather(eval_gt_class_ids, temp_index) eval_metrics[config.LABEL_TO_NAME[i]] = \ tf.metrics.average_precision_at_k(temp_gt, tf.one_hot(temp_pred, depth=config.NUM_CLASS, axis=1), 1) return eval_metrics
def batch_slice_build_sample(gtboxes_and_label, rpn_proposals_boxes): with tf.name_scope('select_pos_neg_samples'): gtboxes = tf.cast( tf.reshape(gtboxes_and_label[:, :-1], [-1, 4]), tf.float32) gt_class_ids = tf.cast( tf.reshape(gtboxes_and_label[:, -1], [ -1, ]), tf.int32) gtboxes, non_zeros = boxes_utils.trim_zeros_graph( gtboxes, name="trim_gt_box") # [M, 4] gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros) rpn_proposals_boxes, _ = boxes_utils.trim_zeros_graph( rpn_proposals_boxes, name="trim_rpn_proposal_train") ious = iou.iou_calculate(rpn_proposals_boxes, gtboxes) # [N, M] matchs = tf.cast(tf.argmax(ious, axis=1), tf.int32) # [N, ] max_iou_each_row = tf.reduce_max(ious, axis=1) positives = tf.cast( tf.greater_equal( max_iou_each_row, cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD), tf.int32) reference_boxes_mattached_gtboxes = tf.gather( gtboxes, matchs) # [N, 4] gt_class_ids = tf.gather(gt_class_ids, matchs) # [N, ] object_mask = tf.cast(positives, tf.float32) # [N, ] # when box is background, not caculate gradient, so give a weight 0 to avoid caculate gradient gt_class_ids = gt_class_ids * positives with tf.name_scope('head_train_minibatch'): # choose the positive indices positive_indices = tf.reshape( tf.where(tf.equal(object_mask, 1.)), [-1]) num_of_positives = tf.minimum( tf.shape(positive_indices)[0], tf.cast( cfgs.FAST_RCNN_MINIBATCH_SIZE * cfgs.FAST_RCNN_POSITIVE_RATE, tf.int32)) positive_indices = tf.random_shuffle(positive_indices) positive_indices = tf.slice(positive_indices, begin=[0], size=[num_of_positives]) # choose the negative indices, # Strictly propose the proportion of positive and negative is 1:3 negative_indices = tf.reshape( tf.where(tf.equal(object_mask, 0.)), [-1]) num_of_negatives = tf.cast(int(1. / cfgs.FAST_RCNN_POSITIVE_RATE) * num_of_positives, tf.int32)\ - num_of_positives num_of_negatives = tf.minimum( tf.shape(negative_indices)[0], num_of_negatives) negative_indices = tf.random_shuffle(negative_indices) negative_indices = tf.slice(negative_indices, begin=[0], size=[num_of_negatives]) minibatch_indices = tf.concat( [positive_indices, negative_indices], axis=0) minibatch_reference_gtboxes = tf.gather( reference_boxes_mattached_gtboxes, minibatch_indices) minibatch_reference_proboxes = tf.gather( rpn_proposals_boxes, minibatch_indices) # encode gtboxes minibatch_encode_gtboxes = \ encode_and_decode.encode_boxes( unencode_boxes=minibatch_reference_gtboxes, reference_boxes=minibatch_reference_proboxes, scale_factors=cfgs.BBOX_STD_DEV) object_mask = tf.gather(object_mask, minibatch_indices) gt_class_ids = tf.gather(gt_class_ids, minibatch_indices) # padding if necessary gap = tf.cast(cfgs.FAST_RCNN_MINIBATCH_SIZE - (num_of_positives + num_of_negatives), dtype=tf.int32) bbox_padding = tf.zeros((gap, 4)) minibatch_reference_proboxes = tf.concat( [minibatch_reference_proboxes, bbox_padding], axis=0) minibatch_encode_gtboxes = tf.concat( [minibatch_encode_gtboxes, bbox_padding], axis=0) object_mask = tf.pad(object_mask, [(0, gap)]) gt_class_ids = tf.pad(gt_class_ids, [(0, gap)]) return minibatch_reference_proboxes, minibatch_encode_gtboxes, object_mask, gt_class_ids