def propsal(rpn_probs, rpn_bbox): scores = rpn_probs[:, :, 1] deltas = rpn_bbox deltas = deltas * np.reshape(cfg.RPN_BBOX_STD_DEV, [1, 1, 4]) anchors = cfg.norm_anchors pre_nms_limit = tf.minimum(6000, cfg.total_anchors) ix = tf.nn.top_k(scores, pre_nms_limit, sorted=True, name="top_anchors").indices window = np.array([0, 0, 1, 1], dtype=np.float32) result = [] for b in range(cfg.batch_size): scores_tp = tf.gather(scores[b, :], ix[b, :]) deltas_tp = tf.gather(deltas[b, :], ix[b, :]) pre_nms_anchors_tp = tf.gather(anchors, ix[b, :]) boxes_tp = utils.apply_box_deltas_graph(pre_nms_anchors_tp, deltas_tp) boxes_tp = utils.clip_boxes_graph(boxes_tp, window) props = utils.nms(boxes_tp, scores_tp, cfg) result.append(props) return tf.stack(result, axis=0)
def call(self, inputs): # Box Scores. Use the foreground class confidence. [Batch, num_rois, 1] scores = inputs[0][:, :, 1] # Box deltas [batch, num_rois, 4] deltas = inputs[1] deltas = deltas * np.reshape(self.rpn_bbox_std_dev, [1, 1, 4]) # Anchors anchors = inputs[2] # Improve performance by trimming to top anchors by score # and doing the rest on the smaller subset. pre_nms_limit = tf.minimum(self.pre_nms_limit, tf.shape(anchors)[1]) ix = tf.nn.top_k(scores, pre_nms_limit, sorted=True, name="top_anchors").indices scores = utils.batch_slice([scores, ix], lambda x, y: tf.gather(x, y), self.count_image_per_gpu) deltas = utils.batch_slice([deltas, ix], lambda x, y: tf.gather(x, y), self.count_image_per_gpu) pre_nms_anchors = utils.batch_slice([anchors, ix], lambda a, x: tf.gather(a, x), self.count_image_per_gpu, names=["pre_nms_anchors"]) # Apply deltas to anchors to get refined anchors. # [batch, N, (y1, x1, y2, x2)] boxes = utils.batch_slice([pre_nms_anchors, deltas], lambda x, y: utils.apply_box_deltas_graph(x, y), self.count_image_per_gpu, names=["refined_anchors"]) # Clip to image boundaries. Since we're in normalized coordinates, # clip to 0..1 range. [batch, N, (y1, x1, y2, x2)] window = np.array([0, 0, 1, 1], dtype=np.float32) boxes = utils.batch_slice(boxes, lambda x: utils.clip_boxes_graph(x, window), self.count_image_per_gpu, names=["refined_anchors_clipped"]) # Filter out small boxes # According to Xinlei Chen's paper, this reduces detection accuracy # for small objects, so we're skipping it. # Non-max suppression def nms(boxes, scores): indices = tf.image.non_max_suppression(boxes, scores, self.proposal_count, self.nms_threshold, name="rpn_non_max_suppression") proposals = tf.gather(boxes, indices) # Pad if needed padding = tf.maximum(self.proposal_count - tf.shape(proposals)[0], 0) proposals = tf.pad(proposals, [(0, padding), (0, 0)]) return proposals proposals = utils.batch_slice([boxes, scores], nms, self.count_image_per_gpu) return proposals
def generate_proposal(rpn_prob, rpn_bbox, anchors, proposal_count, config): nms_thresh = config.RPN_NMS_THRESHOLD # Box Scores [Batch, num_rois, 1] scores = rpn_prob[:, :, 1] # Box deltas [batch, num_rois, 4] deltas = rpn_bbox * np.reshape(config.RPN_BBOX_STD_DEV, [1, 1, 4]) # Improve performance by trimming to top anchors by score # and doing the rest on the smaller subset. pre_nms_limit = tf.minimum(config.PRE_NMS_LIMIT, tf.shape(anchors)[1]) ix = tf.nn.top_k(scores, pre_nms_limit, sorted=True, \ name="top_anchors").indices scores = utils.batch_slice([scores, ix], \ lambda x,y : tf.gather(x, y), config.IMAGES_PER_GPU) deltas = utils.batch_slice([deltas, ix], \ lambda x,y : tf.gather(x, y), config.IMAGES_PER_GPU) pre_nms_anchors = utils.batch_slice([anchors, ix], \ lambda x,y : tf.gather(x, y), config.IMAGES_PER_GPU) # Apply deltas to anchors to get refined anchors. # [batch, N, (y1, x1, y2, x2)] boxes = utils.batch_slice([pre_nms_anchors, deltas],\ lambda x, y: utils.apply_box_deltas_graph(x, y),\ config.IMAGES_PER_GPU, names=["refined_anchors"]) # Clip to image boundaries. Since we're in normalized coordinates, # clip to 0..1 range. [batch, N, (y1, x1, y2, x2)] window = np.array([0, 0, 1, 1], dtype=np.float32) boxes = utils.batch_slice(boxes,\ lambda x: utils.clip_boxes_graph(x, window),\ config.IMAGES_PER_GPU,names=["refined_anchors_clipped"]) # Filter out small boxes # According to Xinlei Chen's paper, this reduces detection accuracy # for small objects, so we're skipping it. # Non-max suppression def nms(boxes, scores): indices = tf.image.non_max_suppression(boxes, scores, proposal_count, nms_thresh,\ name='rpn_non_max_suppression') proposals = tf.gather(boxes, indices) #Pad if needed padding = tf.maximum(proposal_count - tf.shape(proposals)[0], 0) proposals = tf.pad(proposals, [(0, padding), (0, 0)]) return proposals proposals = utils.batch_slice([boxes, scores], nms, \ config.IMAGES_PER_GPU) proposals = tf.reshape(proposals, (-1, proposal_count, 4)) return proposals
def refine_detections_graph(rois, probs, deltas, window, bbox_std_dev, detection_min_confidence, detection_max_instance, detection_nms_threshold ): """Refine classified proposals and filter overlaps and return final detections. Inputs: rois: [N, (y1, x1, y2, x2)] in normalized coordinates probs: [N, num_classes]. Class probabilities. deltas: [N, num_classes, (dy, dx, log(dh), log(dw))]. Class-specific bounding box deltas. window: (y1, x1, y2, x2) in normalized coordinates. The part of the image that contains the image excluding the padding. Returns detections shaped: [num_detections, (y1, x1, y2, x2, class_id, score)] where coordinates are normalized. """ max_output_size = detection_max_instance iou_threshold = detection_nms_threshold # Class IDs per ROI class_ids = tf.argmax(probs, axis=1, output_type=tf.int32) # Class probability of the top class of each ROI indices = tf.stack([tf.range(probs.shape[0]), class_ids], axis=1) class_scores = tf.gather_nd(probs, indices) # Class-specific bounding box deltas deltas_specific = tf.gather_nd(deltas, indices) # Apply bounding box deltas # Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates refined_rois = utils.apply_box_deltas_graph( rois, deltas_specific * bbox_std_dev) # Clip boxes to image window refined_rois = utils.clip_boxes_graph(refined_rois, window) # TODO: Filter out boxes with zero area # Filter out background boxes keep = tf.where(class_ids > 0)[:, 0] # Filter out low confidence boxes if detection_min_confidence: conf_keep = tf.where(class_scores >= detection_min_confidence)[:, 0] keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(conf_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] # Apply per-class NMS # 1. Prepare variables pre_nms_class_ids = tf.gather(class_ids, keep) pre_nms_scores = tf.gather(class_scores, keep) pre_nms_rois = tf.gather(refined_rois, keep) unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0] max_output_size, iou_threshold def nms_keep_map(class_id): """Apply Non-Maximum Suppression on ROIs of the given class.""" # Indices of ROIs of the given class ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0] # Apply NMS class_keep = tf.image.non_max_suppression( tf.gather(pre_nms_rois, ixs), tf.gather(pre_nms_scores, ixs), max_output_size=max_output_size, iou_threshold=iou_threshold) # Map indices class_keep = tf.gather(keep, tf.gather(ixs, class_keep)) # Pad with -1 so returned tensors have the same shape gap = max_output_size - tf.shape(class_keep)[0] class_keep = tf.pad(class_keep, [(0, gap)], mode='CONSTANT', constant_values=-1) # Set shape so map_fn() can infer result shape class_keep.set_shape([max_output_size]) return class_keep # 2. Map over class IDs nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids, dtype=tf.int64) # 3. Merge results into one list, and remove -1 padding nms_keep = tf.reshape(nms_keep, [-1]) nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0]) # 4. Compute intersection between keep and nms_keep keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(nms_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] # Keep top detections roi_count = max_output_size class_scores_keep = tf.gather(class_scores, keep) num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count) top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1] keep = tf.gather(keep, top_ids) # Arrange output as [N, (y1, x1, y2, x2, class_id, score)] # Coordinates are normalized. detections = tf.concat([ tf.gather(refined_rois, keep), tf.to_float(tf.gather(class_ids, keep))[..., tf.newaxis], tf.gather(class_scores, keep)[..., tf.newaxis] ], axis=1) # Pad with zeros if detections < max_output_size gap = max_output_size - tf.shape(detections)[0] detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT") return detections
def refine_detections_graph(rois, probs, deltas, mrcnn_mask, gts, config=None): """ Refine classified proposals and filter overlaps and return final detections. Inputs: rois: [N, (y1, x1, y2, x2)] in normalized coordinates probs: [N, num_classes]. Class probabilities. deltas: [N, num_classes * 4]. Class-specific bounding box deltas. mrcnn_mask: [N, MASK_H, MASK_W, NUM_CLASSES] gts: [4, H, W, NUM_CLASSES] Returns detections shaped: [config.DETECTION_MAX_INSTANCES, (y1, x1, y2, x2, class_id, score)] where coordinates are normalized. masks: [config.DETECTION_MAX_INSTANCES, MASK_H, MASK_W, 2] with zero padding """ # Reshape: [N, num_classes, (dy, dx, log(dh), log(dw))] deltas = tf.reshape(deltas, [tf.shape(deltas)[0], config.NUM_CLASSES, 4]) # Class IDs per ROI class_ids = tf.argmax(probs, axis=1, output_type=tf.int32) # Class probability of the top class of each ROI indices = tf.stack([tf.range(probs.shape[0]), class_ids], axis=1) class_scores = tf.gather_nd(probs, indices) # Class-specific bounding bpx deltas deltas_specific = tf.gather_nd(deltas, indices) # Apply bounding box deltas # Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates refined_rois = utils.apply_box_deltas_graph( rois, deltas_specific * config.BBOX_STD_DEV) # Clip boxes to image shape refined_rois = utils.clip_boxes_graph(refined_rois,\ tf.constant([0., 0., 1., 1.])) # class_scores = tf.Print(class_scores, ['orignal scores',class_scores],summarize=100) class_scores = build_rescore_graph(refined_rois, class_scores, \ mrcnn_mask, gts, config) # class_scores = tf.Print(class_scores, ['rescores :', class_scores], summarize=100) # Filter out background boxes keep = tf.where(class_ids > 0)[:, 0] # Filter out low confidence boxes if config.DETECTION_MIN_CONFIDENCE: conf_keep = tf.where( class_scores >= config.DETECTION_MIN_CONFIDENCE)[:, 0] keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(conf_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] # Apply per-class NMS # 1. Prepare variables pre_nms_class_ids = tf.gather(class_ids, keep) pre_nms_scores = tf.gather(class_scores, keep) pre_nms_rois = tf.gather(refined_rois, keep) unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0] def nms_keep_map(class_id): """Apply Non-Maximum Suppression on ROIs of the given class.""" # Indices of ROIs of the given class ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0] # Apply NMS class_keep = tf.image.non_max_suppression( tf.gather(pre_nms_rois, ixs), tf.gather(pre_nms_scores, ixs), max_output_size=config.DETECTION_MAX_INSTANCES, iou_threshold=config.DETECTION_NMS_THRESHOLD) # Map indices class_keep = tf.gather(keep, tf.gather(ixs, class_keep)) # Pad with -1 so returned tensors have the same shape gap = config.DETECTION_MAX_INSTANCES - tf.shape(class_keep)[0] class_keep = tf.pad(class_keep, [(0, gap)], mode='CONSTANT', constant_values=-1) # Set shape so map_fn() can infer result shape class_keep.set_shape([config.DETECTION_MAX_INSTANCES]) return class_keep # 2. Map over class IDs nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids, dtype=tf.int64) # 3. Merge results into one list, and remove -1 padding nms_keep = tf.reshape(nms_keep, [-1]) nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0]) # 4. Compute intersection between keep and nms_keep keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(nms_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] # Keep top detections roi_count = config.DETECTION_MAX_INSTANCES class_scores_keep = tf.gather(class_scores, keep) num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count) top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1] keep = tf.gather(keep, top_ids) # Arrange output as [N, (y1, x1, y2, x2, class_id, score)] # Coordinates are normalized. detections = tf.concat([ tf.gather(refined_rois, keep), tf.cast(tf.gather(class_ids, keep), tf.float32)[..., tf.newaxis], tf.gather(class_scores, keep)[..., tf.newaxis] ], axis=1) masks = tf.gather(mrcnn_mask, keep) # Pad with zeros if detections < DETECTION_MAX_INSTANCES gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0] detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT") # Pad the masks with zeros mask_gap = config.DETECTION_MAX_INSTANCES - tf.shape(masks)[0] masks = tf.pad(masks, [(0, mask_gap), (0, 0), (0, 0), (0, 0)], 'CONSTANT') return detections, masks