def unmold_predictions(self, detections, mrcnn_mask, original_image_shape, image_shape, window): ''' copied from mrcnn, changes in loop allows return of probability map ''' # Detections array is padded with zeros. Find the first class_id == 0. zero_ix = np.where(detections[:, 4] == 0)[0] N = zero_ix[0] if zero_ix.shape[0] > 0 else detections.shape[0] # Extract boxes, class_ids, scores, and class-specific masks boxes = detections[:N, :4] class_ids = detections[:N, 4].astype(np.int32) scores = detections[:N, 5] masks = mrcnn_mask[np.arange(N), :, :, class_ids] # Translate normalized coordinates in the resized image to pixel # coordinates in the original image before resizing window = utils.norm_boxes(window, image_shape[:2]) wy1, wx1, wy2, wx2 = window shift = np.array([wy1, wx1, wy1, wx1]) wh = wy2 - wy1 # window height ww = wx2 - wx1 # window width scale = np.array([wh, ww, wh, ww]) # Convert boxes to normalized coordinates on the window boxes = np.divide(boxes - shift, scale) # Convert boxes to pixel coordinates on the original image boxes = utils.denorm_boxes(boxes, original_image_shape[:2]) # Filter out detections with zero area. Happens in early training when # network weights are still random exclude_ix = np.where((boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) <= 0)[0] if exclude_ix.shape[0] > 0: boxes = np.delete(boxes, exclude_ix, axis=0) class_ids = np.delete(class_ids, exclude_ix, axis=0) scores = np.delete(scores, exclude_ix, axis=0) masks = np.delete(masks, exclude_ix, axis=0) N = class_ids.shape[0] # Resize masks to original image size and set boundary threshold. full_masks = [] full_pmap = [] for i in range(N): # Convert neural network mask to full size mask threshold = 0.5 y1, x1, y2, x2 = boxes[i] full_pmap.append( skimage.transform.resize(masks[i], (y2 - y1, x2 - x1), order=1, mode="constant")) mask = np.where(full_pmap[i] >= threshold, 1, 0).astype(np.bool) # Put the mask in the right location. full_mask = np.zeros(image_shape[:2], dtype=np.bool) full_mask[y1:y2, x1:x2] = mask # full_mask = utils.unmold_mask(masks[i], boxes[i], original_image_shape) full_masks.append(full_mask) full_masks = np.stack(full_masks, axis=-1)\ if full_masks else np.empty(original_image_shape[:2] + (0,)) return boxes, class_ids, scores, full_masks, full_pmap
def execute(image): if (config is None or model is None): init() molded_image, image_meta, windows = model.mold_inputs([image]) window = utils.norm_boxes(windows, molded_image[0].shape[:2])[0] anchors = model.get_anchors(molded_image[0].shape) anchors = np.broadcast_to(anchors, (model.config.BATCH_SIZE, ) + anchors.shape) dects, probs, deltas, masks, proposals, _, _ = model.keras_model.predict( [molded_image, image_meta, anchors], verbose=0) # Class ID, and score per proposal class_ids = np.argmax(probs[0], axis=1) class_scores = probs[0, np.arange(class_ids.shape[0]), class_ids] refined_rois = refine_proposals(proposals, class_ids, deltas, window) keep = filter_rois(refined_rois, class_ids, class_scores) class_scores = probs[0][keep] # Detections are in different order than the indices in the "keep" array. # Thus, we need to identify the matching detections for correct ordering of # class_scores. bounding_boxes = utils.denorm_boxes(dects[0, :len(keep), :4], molded_image[0].shape[:2]) roi_boxes = utils.denorm_boxes(refined_rois[keep], molded_image[0].shape[:2]) perm = [] for i in np.arange(len(keep)): perm = np.append( perm, np.where(np.all(roi_boxes == bounding_boxes[i], axis=1))[0][0]).astype(np.int32) class_scores = class_scores[perm] bounding_boxes, _, _, segmentation = model.unmold_detections( dects[0], masks[0], image.shape, molded_image[0].shape, windows[0]) keep_fusion = filter_fusion(segmentation) return generate_result(bounding_boxes[keep_fusion], segmentation[:, :, keep_fusion], class_scores[keep_fusion])
def unmold_detections(detections, mrcnn_mask, original_image_shape, image_shape, window): zero_ix = np.where(detections[:, 4] == 0)[0] N = zero_ix[0] if zero_ix.shape[0] > 0 else detections.shape[0] # Extract boxes, class_ids, scores, and class-specific masks boxes = detections[:N, :4] class_ids = detections[:N, 4].astype(np.int32) scores = detections[:N, 5] masks = mrcnn_mask[np.arange(N), :, :, class_ids] # [N, 28*28, class] # Translate normalized coordinates in the resized image to pixel # coordinates in the original image before resizing window = utils.norm_boxes(window, image_shape[:2]) wy1, wx1, wy2, wx2 = window shift = np.array([wy1, wx1, wy1, wx1]) wh = wy2 - wy1 # window height ww = wx2 - wx1 # window width scale = np.array([wh, ww, wh, ww]) # Convert boxes to normalized coordinates on the window boxes = np.divide(boxes - shift, scale) # Convert boxes to pixel coordinates on the original image boxes = utils.denorm_boxes(boxes, original_image_shape[:2]) # Filter out detections with zero area. Happens in early training when # network weights are still random exclude_ix = np.where((boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) <= 0)[0] if exclude_ix.shape[0] > 0: boxes = np.delete(boxes, exclude_ix, axis=0) class_ids = np.delete(class_ids, exclude_ix, axis=0) scores = np.delete(scores, exclude_ix, axis=0) masks = np.delete(masks, exclude_ix, axis=0) N = class_ids.shape[0] # Resize masks to original image size and set boundary threshold. full_masks = [] for i in range(N): # Convert neural network mask to full size mask full_mask = utils.unmold_mask(masks[i], boxes[i], original_image_shape) full_masks.append(full_mask) full_masks = np.stack(full_masks, axis=-1)\ if full_masks else np.empty(original_image_shape[:2] + (0,)) return boxes, class_ids, scores, full_masks
def get_anchors(image_shape, config): """Returns anchor pyramid for the given image size.""" backbone_shapes = compute_backbone_shapes(config, image_shape) # Cache anchors and reuse if image shape is the same _anchor_cache = {} if not tuple(image_shape) in _anchor_cache: # Generate Anchors a = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, backbone_shapes, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE) # Keep a copy of the latest anchors in pixel coordinates because # it's used in inspect_model notebooks. # TODO: Remove this after the notebook are refactored to not use it anchors = a # Normalize coordinates _anchor_cache[tuple(image_shape)] = utils.norm_boxes( a, image_shape[:2]) return _anchor_cache[tuple(image_shape)]
def unmold_detections(detections, mrcnn_mask, original_image_shape, image_shape, window): """Reformats the detections of one image from the format of the neural network output to a format suitable for use in the rest of the application. detections: [N, (y1, x1, y2, x2, class_id, score)] in normalized coordinates mrcnn_mask: [N, height, width, num_classes] original_image_shape: [H, W, C] Original image shape before resizing image_shape: [H, W, C] Shape of the image after resizing and padding window: [y1, x1, y2, x2] Pixel coordinates of box in the image where the real image is excluding the padding. Returns: boxes: [N, (y1, x1, y2, x2)] Bounding boxes in pixels class_ids: [N] Integer class IDs for each bounding box scores: [N] Float probability scores of the class_id masks: [height, width, num_instances] Instance masks """ # How many detections do we have? # Detections array is padded with zeros. Find the first class_id == 0. zero_ix = np.where(detections[:, 4] == 0)[0] N = zero_ix[0] if zero_ix.shape[0] > 0 else detections.shape[0] # Extract boxes, class_ids, scores, and class-specific masks boxes = detections[:N, :4] class_ids = detections[:N, 4].astype(np.int32) scores = detections[:N, 5] masks = mrcnn_mask[np.arange(N), :, :, class_ids] # Translate normalized coordinates in the resized image to pixel # coordinates in the original image before resizing window = utils.norm_boxes(window, image_shape[:2]) wy1, wx1, wy2, wx2 = window shift = np.array([wy1, wx1, wy1, wx1]) wh = wy2 - wy1 # window height ww = wx2 - wx1 # window width scale = np.array([wh, ww, wh, ww]) # Convert boxes to normalized coordinates on the window boxes = np.divide(boxes - shift, scale) # Convert boxes to pixel coordinates on the original image boxes = utils.denorm_boxes(boxes, original_image_shape[:2]) # Filter out detections with zero area. Happens in early training when # network weights are still random exclude_ix = np.where((boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) <= 0)[0] if exclude_ix.shape[0] > 0: boxes = np.delete(boxes, exclude_ix, axis=0) class_ids = np.delete(class_ids, exclude_ix, axis=0) scores = np.delete(scores, exclude_ix, axis=0) masks = np.delete(masks, exclude_ix, axis=0) N = class_ids.shape[0] # Resize masks to original image size and set boundary threshold. full_masks = [] for i in range(N): # Convert neural network mask to full size mask full_mask = utils.unmold_mask(masks[i], boxes[i], original_image_shape) full_masks.append(full_mask) full_masks = np.stack(full_masks, axis=-1)\ if full_masks else np.empty(original_image_shape[:2] + (0,)) return boxes, class_ids, scores, full_masks
def detect(self, targets, images, verbose=0, random_detections=False, eps=1e-6): """Runs the detection pipeline. images: List of images, potentially of different sizes. Returns a list of dicts, one dict per image. The dict contains: rois: [N, (y1, x1, y2, x2)] detection bounding boxes class_ids: [N] int class IDs scores: [N] float probability scores for the class IDs masks: [H, W, N] instance binary masks """ assert self.mode == "inference", "Create model in inference mode." assert len( images) == self.config.BATCH_SIZE, "len(images) must be equal to BATCH_SIZE" if verbose: modellib.log("Processing {} images".format(len(images))) for image in images: modellib.log("image", image) # CHANGE: added target to logs modellib.log("target", np.stack(targets)) # Mold inputs to format expected by the neural network # CHANGE: Removed moding of target -> detect expects molded target # TODO! molded_images, image_metas, windows = self.mold_inputs(images) # molded_targets, target_metas, target_windows = self.mold_inputs(targets) molded_targets = np.stack(targets) # Validate image sizes # All images in a batch MUST be of the same size image_shape = molded_images[0].shape for g in molded_images[1:]: assert g.shape == image_shape,\ "After resizing, all images must have the same size. Check IMAGE_RESIZE_MODE and image sizes." # CHANGE: add size assertion for target target_shape = molded_targets[0].shape for g in molded_targets[1:]: assert g.shape == target_shape,\ "After resizing, all images must have the same size. Check IMAGE_RESIZE_MODE and image sizes." # Anchors anchors = self.get_anchors(image_shape) # Duplicate across the batch dimension because Keras requires it # TODO: can this be optimized to avoid duplicating the anchors? anchors = np.broadcast_to(anchors, (self.config.BATCH_SIZE,) + anchors.shape) if verbose: modellib.log("molded_images", molded_images) # modellib.log("image_metas", image_metas) # CHANGE: add targets to log modellib.log("molded_targets", molded_targets) # modellib.log("target_metas", target_metas) modellib.log("anchors", anchors) # Run object detection # CHANGE: Use siamese detection model detections, _, _, mrcnn_mask, _, _, _ =\ self.keras_model.predict([molded_images, image_metas, molded_targets, anchors], verbose=0) if random_detections: # Randomly shift the detected boxes window_limits = utils.norm_boxes(windows, (molded_images[0].shape[:2]))[0] y_shifts = np.random.uniform(-detections[0,:,0] + window_limits[0], window_limits[2] - detections[0,:,2]) x_shifts = np.random.uniform(-detections[0,:,1] + window_limits[1], window_limits[3] - detections[0,:,3]) zeros = np.zeros(detections.shape[1]) shifts = np.stack([y_shifts, x_shifts, y_shifts, x_shifts, zeros, zeros], axis=-1)[np.newaxis] detections = detections + shifts # Randomly permute confidence scores non_zero_confidences = np.where(detections[0,:,-1])[0] random_perm = np.random.permutation(non_zero_confidences) permuted_confidences = np.concatenate([detections[0,:,-1][:len(non_zero_confidences)][random_perm], np.zeros(detections.shape[1] - len(non_zero_confidences))]) detections = np.concatenate([detections[:,:,:-1], permuted_confidences.reshape(1, detections.shape[1], 1)], axis=-1) # Keep the sorted order of confidence scores detections = detections[:, np.argsort(-detections[0,:,-1]), :] # Process detections results = [] for i, image in enumerate(images): final_rois, final_class_ids, final_scores, final_masks =\ self.unmold_detections(detections[i], mrcnn_mask[i], image.shape, molded_images[i].shape, windows[i]) results.append({ "rois": final_rois, "class_ids": final_class_ids, "scores": final_scores, "masks": final_masks, }) return results
def read_results_to_df(training_results_file): cols=['epoch','y1','x1','y2','x2','0','1','2','3','4','5','6','7','8',\ 'p0','p1','p2','p3','p4','p5','p6','p7','p8'] # df = pd.read_csv(address,index_col=None,sep=",") res_arr=np.loadtxt(training_results_file,\ skiprows=0,delimiter=",",dtype={'names': tuple(cols),\ 'formats': ('i4', 'f4', 'f4', 'f4', 'f4', 'i4', 'i4', 'i4',\ 'i4', 'i4', 'i4', 'i4', 'i4', 'i4', 'f4', 'f4',\ 'f4', 'f4', 'f4', 'f4', 'f4', 'f4', 'f4')}) res_df = pd.DataFrame(res_arr, columns=cols) # print('here:',image.shape, molded_images[i].shape,windows[i]) # (256, 256, 4) (256, 256, 4) # unq_epochs=res_df.epoch.unique() # res_df=res_df[res_df['epoch']==unq_epochs[1]].reset_index(drop=True) boxes = res_df[['y1', 'x1', 'y2', 'x2']].values image_shape = (256, 256, 4) original_image_shape = (256, 256, 4) window = [0, 0, 256, 256] window = utils.norm_boxes(window, image_shape[:2]) wy1, wx1, wy2, wx2 = window shift = np.array([wy1, wx1, wy1, wx1]) wh = wy2 - wy1 # window height ww = wx2 - wx1 # window width scale = np.array([wh, ww, wh, ww]) # Convert boxes to normalized coordinates on the window boxes = np.divide(boxes - shift, scale) # Convert boxes to pixel coordinates on the original image boxes = utils.denorm_boxes(boxes, original_image_shape[:2]) res_df[['y1_', 'x1_', 'y2_', 'x2_']] = boxes # Filter out detections with zero area. Happens in early training when # network weights are still random exclude_ix = np.where((boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) <= 0)[0] exclude_ix_big_area = np.where((boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) > 20)[0] # res_df[res_df] update_df = res_df.drop(exclude_ix) # update_df2 = update_df.drop(exclude_ix_big_area) # update_df # if exclude_ix.shape[0] > 0: # boxes = np.delete(boxes, exclude_ix, axis=0) # class_probs = np.delete(class_probs, exclude_ix, axis=0) # class_ids = np.delete(class_ids, exclude_ix, axis=0) # scores = np.delete(scores, exclude_ix, axis=0) # masks = np.delete(masks, exclude_ix, axis=0) # N = class_ids.shape[0] # height = box[:, 2] - box[:, 0] height = res_df['y2_'] - res_df['y1_'] width = res_df['x2_'] - res_df['x1_'] # width = box[:, 3] - box[:, 1] update_df['bb_center_y'] = res_df['y1_'] + 0.5 * height update_df['bb_center_x'] = res_df['x1_'] + 0.5 * width return update_df