def get_anchors(self, image_shape): """Returns anchor pyramid for the given image size.""" backbone_shapes = compute_backbone_shapes(self.config, image_shape) # Cache anchors and reuse if image shape is the same if not hasattr(self, "_anchor_cache"): self._anchor_cache = {} if not tuple(image_shape) in self._anchor_cache: # Generate Anchors a = saved_model_utils.generate_pyramid_anchors( self.config.RPN_ANCHOR_SCALES, self.config.RPN_ANCHOR_RATIOS, backbone_shapes, self.config.BACKBONE_STRIDES, self.config.RPN_ANCHOR_STRIDE) # Keep a copy of the latest anchors in pixel coordinates because # it's used in inspect_model notebooks. # TODO: Remove this after the notebook are refactored to not use it self.anchors = a # Normalize coordinates self._anchor_cache[tuple( image_shape)] = saved_model_utils.norm_boxes( a, image_shape[:2]) return self._anchor_cache[tuple(image_shape)]
def unmold_detections(self, detections, mrcnn_mask, original_image_shape, image_shape, window): """Reformats the detections of one image from the format of the neural network output to a format suitable for use in the rest of the application. detections: [N, (y1, x1, y2, x2, class_id, score)] in normalized coordinates mrcnn_mask: [N, height, width, num_classes] original_image_shape: [H, W, C] Original image shape before resizing image_shape: [H, W, C] Shape of the image after resizing and padding window: [y1, x1, y2, x2] Pixel coordinates of box in the image where the real image is excluding the padding. Returns: boxes: [N, (y1, x1, y2, x2)] Bounding boxes in pixels class_ids: [N] Integer class IDs for each bounding box scores: [N] Float probability scores of the class_id masks: [height, width, num_instances] Instance masks """ # How many detections do we have? # Detections array is padded with zeros. Find the first class_id == 0. zero_ix = np.where(detections[:, 4] == 0)[0] N = zero_ix[0] if zero_ix.shape[0] > 0 else detections.shape[0] # Extract boxes, class_ids, scores, and class-specific masks boxes = detections[:N, :4] class_ids = detections[:N, 4].astype(np.int32) scores = detections[:N, 5] masks = mrcnn_mask[np.arange(N), :, :, class_ids] # Translate normalized coordinates in the resized image to pixel # coordinates in the original image before resizing window = saved_model_utils.norm_boxes(window, image_shape[:2]) wy1, wx1, wy2, wx2 = window shift = np.array([wy1, wx1, wy1, wx1]) wh = wy2 - wy1 # window height ww = wx2 - wx1 # window width scale = np.array([wh, ww, wh, ww]) # Convert boxes to normalized coordinates on the window boxes = np.divide(boxes - shift, scale) # Convert boxes to pixel coordinates on the original image boxes = saved_model_utils.denorm_boxes(boxes, original_image_shape[:2]) # Filter out detections with zero area. Happens in early training when # network weights are still random exclude_ix = np.where((boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) <= 0)[0] if exclude_ix.shape[0] > 0: boxes = np.delete(boxes, exclude_ix, axis=0) class_ids = np.delete(class_ids, exclude_ix, axis=0) scores = np.delete(scores, exclude_ix, axis=0) masks = np.delete(masks, exclude_ix, axis=0) N = class_ids.shape[0] # Resize masks to original image size and set boundary threshold. full_masks = [] for i in range(N): # Convert neural network mask to full size mask full_mask = saved_model_utils.unmold_mask(masks[i], boxes[i], original_image_shape) full_masks.append(full_mask) full_masks = np.stack(full_masks, axis=-1)\ if full_masks else np.empty(original_image_shape[:2] + (0,)) return boxes, class_ids, scores, full_masks