def __init__(self, batch_size, **kwargs): super(DetectionLayer, self).__init__(**kwargs) self.batch_size = batch_size self.detection_max_instances = cfg.TEST.DETECTION_MAX_INSTANCES self.image_utils = ImageUtils() self.bbox_utils = BboxUtil() self.misc_utils = MiscUtils()
def __init__(self, batch_size, **kwargs): super(DetectionTargetLayer, self).__init__(**kwargs) self.batch_size = batch_size self.misc_utils = MiscUtils() self.rois_per_image = cfg.TRAIN.ROIS_PER_IMAGE self.mask_shape = cfg.TRAIN.MASK_SHAPE pass
def __init__(self): self.misc_utils = MiscUtils() self.bbox_utils = BboxUtil() # Cache anchors and reuse if image shape is the same self._anchor_cache = {} # self.anchors = None pass
def __init__(self, proposal_count, nms_threshold, batch_size, **kwargs): super(ProposalLayer, self).__init__(**kwargs) self.proposal_count = proposal_count self.nms_threshold = nms_threshold self.batch_size = batch_size self.misc_utils = MiscUtils() self.bbox_utils = BboxUtil() pass
class DetectionLayer(KE.Layer): """ Takes classified proposal boxes and their bounding box deltas and returns the final detection boxes. Returns: [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] where coordinates are normalized. """ def __init__(self, batch_size, **kwargs): super(DetectionLayer, self).__init__(**kwargs) self.batch_size = batch_size self.detection_max_instances = cfg.TEST.DETECTION_MAX_INSTANCES self.image_utils = ImageUtils() self.bbox_utils = BboxUtil() self.misc_utils = MiscUtils() def call(self, inputs): rois = inputs[0] mrcnn_class = inputs[1] mrcnn_bbox = inputs[2] image_meta = inputs[3] # Get windows of images in normalized coordinates. Windows are the area # in the image that excludes the padding. # Use the shape of the first image in the batch to normalize the window # because we know that all images get resized to the same size. m = self.image_utils.parse_image_meta_graph(image_meta) image_shape = m['image_shape'][0] window = self.bbox_utils.norm_boxes_graph(m['window'], image_shape[:2]) # Run detection refinement graph on each item in the batch detections_batch = self.misc_utils.batch_slice( [rois, mrcnn_class, mrcnn_bbox, window], lambda x, y, w, z: refine_detections_graph(x, y, w, z), self.batch_size) # Reshape output # [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] in # normalized coordinates return tf.reshape(detections_batch, [self.batch_size, self.detection_max_instances, 6]) def compute_output_shape(self, input_shape): return (None, self.detection_max_instances, 6)
class DetectionTargetLayer(KE.Layer): """ Subsamples proposals and generates target box refinement, class_ids, and masks for each. Inputs: proposals: [batch, N, (y1, x1, y2, x2)] in normalized coordinates. Might be zero padded if there are not enough proposals. gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs. gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates. gt_masks: [batch, height, width, MAX_GT_INSTANCES] of boolean type Returns: Target ROIs and corresponding class IDs, bounding box shifts, and masks. rois: [batch, TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates target_class_ids: [batch, TRAIN_ROIS_PER_IMAGE]. Integer class IDs. target_deltas: [batch, TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw)] target_mask: [batch, TRAIN_ROIS_PER_IMAGE, height, width] Masks cropped to bbox boundaries and resized to neural network output size. Note: Returned arrays might be zero padded if not enough target ROIs. """ def __init__(self, batch_size, **kwargs): super(DetectionTargetLayer, self).__init__(**kwargs) self.batch_size = batch_size self.misc_utils = MiscUtils() self.rois_per_image = cfg.TRAIN.ROIS_PER_IMAGE self.mask_shape = cfg.TRAIN.MASK_SHAPE pass def call(self, inputs): """ 这里的 call 方法,会被 __init__() 方法回调 :param inputs: 参数如下所示 :return: """ proposals = inputs[0] gt_class_ids = inputs[1] gt_boxes = inputs[2] gt_masks = inputs[3] # Slice the batch and run a graph for each slice # TODO: Rename target_bbox to target_deltas for clarity names = ["rois", "target_class_ids", "target_bbox", "target_mask"] outputs = self.misc_utils.batch_slice( [proposals, gt_class_ids, gt_boxes, gt_masks], lambda w, x, y, z: self.misc_utils.detection_targets_graph( w, x, y, z), self.batch_size, names=names) return outputs def compute_output_shape(self, input_shape): return [ (None, self.rois_per_image, 4), # rois (None, self.rois_per_image), # class_ids (None, self.rois_per_image, 4), # deltas (None, self.rois_per_image, self.mask_shape[0], self.mask_shape[1] ) # masks ] def compute_mask(self, inputs, mask=None): return [None, None, None, None]
class ProposalLayer(KE.Layer): """ Receives anchor scores and selects a subset to pass as proposals to the second stage. Filtering is done based on anchor scores and non-max suppression to remove overlaps. It also applies bounding box refinement deltas to anchors. Inputs: rpn_probs: [batch, num_anchors, (bg prob, fg prob)] rpn_bbox: [batch, num_anchors, (dy, dx, log(dh), log(dw))] anchors: [batch, num_anchors, (y1, x1, y2, x2)] anchors in normalized coordinates Returns: Proposals in normalized coordinates [batch, rois, (y1, x1, y2, x2)] """ def __init__(self, proposal_count, nms_threshold, batch_size, **kwargs): super(ProposalLayer, self).__init__(**kwargs) self.proposal_count = proposal_count self.nms_threshold = nms_threshold self.batch_size = batch_size self.misc_utils = MiscUtils() self.bbox_utils = BboxUtil() pass def call(self, inputs): """ 这里的 call 方法,会被 __init__() 方法回调 :param inputs: :return: """ # Box Scores. Use the foreground class confidence. [Batch, num_rois, 1] scores = inputs[0][:, :, 1] # Box deltas [batch, num_rois, 4] deltas = inputs[1] rpn_bbox_std_dev = np.array(cfg.COMMON.RPN_BBOX_STD_DEV) deltas = deltas * np.reshape(rpn_bbox_std_dev, [1, 1, 4]) # Anchors anchors = inputs[2] # Improve performance by trimming to top anchors by score # and doing the rest on the smaller subset. pre_nms_limit = tf.minimum(cfg.COMMON.PRE_NMS_LIMIT, tf.shape(anchors)[1]) ix = tf.nn.top_k(scores, pre_nms_limit, sorted=True, name="top_anchors").indices scores = self.misc_utils.batch_slice([scores, ix], lambda x, y: tf.gather(x, y), self.batch_size) deltas = self.misc_utils.batch_slice([deltas, ix], lambda x, y: tf.gather(x, y), self.batch_size) pre_nms_anchors = self.misc_utils.batch_slice( [anchors, ix], lambda a, x: tf.gather(a, x), self.batch_size, names=["pre_nms_anchors"]) # Apply deltas to anchors to get refined anchors. # [batch, N, (y1, x1, y2, x2)] boxes = self.misc_utils.batch_slice( [pre_nms_anchors, deltas], lambda x, y: self.bbox_utils.apply_box_deltas_graph(x, y), self.batch_size, names=["refined_anchors"]) # Clip to image boundaries. Since we're in normalized coordinates, # clip to 0..1 range. [batch, N, (y1, x1, y2, x2)] window = np.array([0, 0, 1, 1], dtype=np.float32) boxes = self.misc_utils.batch_slice( boxes, lambda x: self.bbox_utils.clip_boxes_graph(x, window), self.batch_size, names=["refined_anchors_clipped"]) # Filter out small boxes # According to Xinlei Chen's paper, this reduces detection accuracy # for small objects, so we're skipping it. # Non-max suppression def nms(boxes, scores): indices = tf.image.non_max_suppression( boxes, scores, self.proposal_count, self.nms_threshold, name="rpn_non_max_suppression") proposals = tf.gather(boxes, indices) # Pad if needed padding = tf.maximum(self.proposal_count - tf.shape(proposals)[0], 0) proposals = tf.pad(proposals, [(0, padding), (0, 0)]) return proposals proposals = self.misc_utils.batch_slice([boxes, scores], nms, self.batch_size) return proposals def compute_output_shape(self, input_shape): return (None, self.proposal_count, 4)
class AnchorUtils(object): def __init__(self): self.misc_utils = MiscUtils() self.bbox_utils = BboxUtil() # Cache anchors and reuse if image shape is the same self._anchor_cache = {} # self.anchors = None pass def get_anchors(self, image_shape): """ :return: Returns anchor pyramid for the given image size """ if tuple(image_shape) not in self._anchor_cache: # Generate Anchors anchor = self.generate_pyramid_anchors(image_shape) # Keep a copy of the latest anchors in pixel coordinates because # it's used in inspect_model notebooks. # TODO: Remove this after the notebook are refactored to not use it # self.anchors = anchor self._anchor_cache[tuple( image_shape)] = self.bbox_utils.norm_boxes( anchor, image_shape[:2]) pass return self._anchor_cache[tuple(image_shape)] pass def generate_pyramid_anchors(self, image_shape): """ Generate anchors at different levels of a feature pyramid. Each scale is associated with a level of the pyramid, but each ratio is used in all levels of the pyramid. :param image_shape: [h, w, c] :return: anchors: [N, (y1, x1, y2, x2)] All generated anchors in one array. Sorted with the same order of the given scales. So, anchors of scale[0] come first, then anchors of scale[1], and so on. """ backbone_strides = cfg.COMMON.BACKBONE_STRIDES # [N, (height, width)]. Where N is the number of stages backbone_shape = self.misc_utils.compute_backbone_shapes( image_shape, backbone_strides) # Anchors # [anchor_count, (y1, x1, y2, x2)] anchors = [] scales = cfg.COMMON.RPN_ANCHOR_SCALES scales_len = len(scales) for i in range(scales_len): anchor_box = self.generate_anchors(scales[i], backbone_shape[i], backbone_strides[i]) anchors.append(anchor_box) pass return np.concatenate(anchors, axis=0) pass # generate anchor box def generate_anchors(self, scales, backbone_shape, backbone_strides): """ :param scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128] :param backbone_shape: [height, width] spatial shape of the feature map over which to generate anchors. :param backbone_strides: Stride of the feature map relative to the image in pixels. :return: anchor box: Convert to corner coordinates (y1, x1, y2, x2) """ # 1D array of anchor ratios of width/height. Example: [0.5, 1, 2] ratios = cfg.COMMON.RPN_ANCHOR_RATIOS # Stride of anchors on the feature map. For example, # if the value is 2 then generate anchors for every other feature map pixel. anchor_stride = cfg.COMMON.RPN_ANCHOR_STRIDE # Get all combinations of scales and ratios scales, ratios = np.meshgrid(np.array(scales), np.array(ratios)) scales = scales.flatten() ratios = ratios.flatten() # Enumerate heights and widths from scales and ratios heights = scales / np.sqrt(ratios) widths = scales * np.sqrt(ratios) # Enumerate shifts in feature space shifts_y = np.arange(0, backbone_shape[0], anchor_stride) * backbone_strides shifts_x = np.arange(0, backbone_shape[1], anchor_stride) * backbone_strides shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y) # Enumerate combinations of shifts, widths, and heights box_widths, box_centers_x = np.meshgrid(widths, shifts_x) box_heights, box_centers_y = np.meshgrid(heights, shifts_y) # Reshape to get a list of (y, x) and a list of (h, w) box_centers = np.stack([box_centers_y, box_centers_x], axis=2).reshape([-1, 2]) box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2]) # Convert to corner coordinates (y1, x1, y2, x2) boxes = np.concatenate( [box_centers - 0.5 * box_sizes, box_centers + 0.5 * box_sizes], axis=1) return boxes pass