def ensemble_prediction(model, config, image): """ Test time augmentation method using non-maximum supression""" masks = [] scores = [] boxes = [] results = {} result = model.detect([image], verbose=0, mask_threshold=config.DETECTION_MASK_THRESHOLD)[0] masks.append(result['masks']) scores.append(result['scores']) boxes.append(utils.extract_bboxes(result['masks'])) temp_img = np.fliplr(image) result = model.detect([temp_img], verbose=0, mask_threshold=config.DETECTION_MASK_THRESHOLD)[0] mask = np.fliplr(result['masks']) masks.append(mask) scores.append(result['scores']) boxes.append(utils.extract_bboxes(mask)) temp_img = np.flipud(image) result = model.detect([temp_img], verbose=0, mask_threshold=config.DETECTION_MASK_THRESHOLD)[0] mask = np.flipud(result['masks']) masks.append(mask) scores.append(result['scores']) boxes.append(utils.extract_bboxes(mask)) angle = np.random.choice([1, -1]) temp_img = np.rot90(image, k=angle, axes=(0, 1)) result = model.detect([temp_img], verbose=0, mask_threshold=config.DETECTION_MASK_THRESHOLD)[0] mask = np.rot90(result['masks'], k=-angle, axes=(0, 1)) masks.append(mask) scores.append(result['scores']) boxes.append(utils.extract_bboxes(mask)) masks = np.concatenate(masks, axis=-1) scores = np.concatenate(scores, axis=-1) boxes = np.concatenate(boxes, axis=0) # config.DETECTION_NMS_THRESHOLD) keep_ind = utils.non_max_suppression(boxes, scores, 0.1) masks = masks[:, :, keep_ind] scores = scores[keep_ind] results['masks'] = masks results['scores'] = scores return results
def reverse_flips_rotations(results_flip, images, use_semantic): for i in range(len(images)): results_flip[1][i]['masks'] = np.fliplr(results_flip[1][i]['masks']) results_flip[2][i]['masks'] = np.flipud(results_flip[2][i]['masks']) results_flip[3][i]['masks'] = np.fliplr(np.flipud(results_flip[3][i]['masks'])) results_flip[4][i]['masks'] = np.rot90(results_flip[4][i]['masks'], -1, (0, 1)) results_flip[5][i]['masks'] = np.rot90(results_flip[5][i]['masks'], -3, (0, 1)) if use_semantic: results_flip[1][i]['semantic_masks'] = np.fliplr(results_flip[1][i]['semantic_masks']) results_flip[2][i]['semantic_masks'] = np.flipud(results_flip[2][i]['semantic_masks']) results_flip[3][i]['semantic_masks'] = np.fliplr(np.flipud(results_flip[3][i]['semantic_masks'])) results_flip[4][i]['semantic_masks'] = np.rot90(results_flip[4][i]['semantic_masks'], -1, (0, 1)) results_flip[5][i]['semantic_masks'] = np.rot90(results_flip[5][i]['semantic_masks'], -3, (0, 1)) # Recalculate bboxes for j in range(len(results_flip)): results_flip[j][i]['rois'] = utils.extract_bboxes(results_flip[j][i]['masks']) # Reshape masks so that they can be concatenated correctly results_flip[j][i]['masks'] = np.moveaxis(results_flip[j][i]['masks'], -1, 0) if use_semantic: results_flip[j][i]['semantic_masks'] = np.moveaxis(results_flip[j][i]['semantic_masks'], -1, 0) return results_flip
def load_mask_GrabCut(self, image_id): #, image): image = self.load_image(image_id) image_info = self.image_info[image_id] assert image_info["source"] == "crowdai-mapping-challenge" annotations = self.image_info[image_id]["annotations"] test_mask_cut = np.zeros( (image_info["height"] + 4, image_info["width"] + 4)) # initialize for annotation in annotations: class_id = self.map_source_class_id( "crowdai-mapping-challenge.{}".format( annotation['category_id'])) if class_id: rle = cocomask.frPyObjects(annotation['segmentation'], image_info["height"], image_info["width"]) m = cocomask.decode(rle) test = utils.extract_bboxes(m) bgdModel = np.zeros( (1, 65), np.float64) # needed for internal working - DON'T Change fgdModel = np.zeros((1, 65), np.float64) grab_mask_in = np.zeros(image.shape[:2], np.uint8) grab_mask_out = np.zeros(image.shape[:2], np.uint8) # GrabCUT # rect = [x, y, w, h] h = int(test[0][2]) - int(test[0][0]) if (h == 300): h = 299 w = int(test[0][3]) - int(test[0][1]) if (w == 300): w = 299 rect = (int(test[0][1]), int(test[0][0]), w, h) if (np.linalg.norm(rect) == 0): # empty continue if (np.linalg.norm(rect) == 0 or (h < 3) or (w < 3)): # too small bounding boxes crash OpenCV.GrabCut grab_mask_out[int(test[0][0]):int(test[0][2]):1, int(test[0][1]):int(test[0][3]):1] = 1 #just set the whole small window as the FG continue cv.grabCut(image, grab_mask_in, rect, bgdModel, fgdModel, 1, cv.GC_INIT_WITH_RECT) grab_mask_out = np.where( (grab_mask_in == 2) | (grab_mask_in == 0), 0, 1).astype(np.float64) test_mask_cut[2:302, 2:302] += grab_mask_out del bgdModel, fgdModel, grab_mask_in, image, grab_mask_out # make sure the max value is one test_mask_cut = np.clip(test_mask_cut, np.min(test_mask_cut), 1) # return the final mask return test_mask_cut
def nocs_util_data_handling(): local_image_id = int(image_id) config = nocs_eval.InferenceConfig() config.OBJ_MODEL_DIR = r'E:\MASTERS_STUFF\MastersProject\networks\NOCS_CVPR2019\data\obj_models' dataset = nocs_dataset.NOCSDataset(pj.constants.NOCS_CLASSES, 'val', config) dataset.load_camera_scenes(str(CAMERA_DATASET)) dataset.prepare(pj.constants.CLASS_MAP) image = dataset.load_image(local_image_id) depth = dataset.load_depth(local_image_id) gt_mask, gt_coord, gt_class_ids, gt_scales, gt_domain_label = dataset.load_mask( local_image_id) gt_bbox = nocs_utils.extract_bboxes(gt_mask) gt_RTs, _, _, _ = nocs_utils.align(gt_class_ids, gt_mask, gt_coord, depth, pj.constants.INTRINSICS, pj.constants.NOCS_CLASSES, None, None) # Printing all information to determine issue print(f'image.shape: {image.shape}') print(f'depth.shape: {depth.shape} depth.dtype: {depth.dtype}') print(f'gt_mask.shape: {gt_mask.shape}') print(f'gt_coord.shape: {gt_coord.shape}') print(f'gt_class_ids: {gt_class_ids}') print(f'gt_domain_label: {gt_domain_label}') print(f'gt_bbox: {gt_bbox}') print(f'gt_scales: {gt_scales}') print(f'gt_RTs: {gt_RTs}') return gt_class_ids, gt_bbox, gt_mask, gt_coord, gt_RTs, [ 100 for i in range(len(gt_class_ids)) ], gt_scales, None
def visualize_instance_segmentation(data_base_dir, dataset_type, image_id, save_path='', verbose=True): split_dataset = SketchDataset(data_base_dir) split_dataset.load_sketches(dataset_type) split_dataset.prepare() original_image = split_dataset.load_image(image_id - 1) gt_mask, gt_class_id = split_dataset.load_mask(image_id - 1) gt_bbox = utils.extract_bboxes(gt_mask) if verbose: log('original_image', original_image) log('gt_class_id', gt_class_id) log('gt_bbox', gt_bbox) log('gt_mask', gt_mask) visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, split_dataset.class_names, save_path=save_path)
def load_image_gt(dataset, config, image_id, augment=False, use_mini_mask=False): image = dataset.load_image(image_id) mask, class_ids = dataset.load_mask(image_id) shape = image.shape image, window, scale, padding = utils.resize_image( image, min_dim=config.IMAGE_MIN_DIM, max_dim=config.IMAGE_MAX_DIM, padding=config.IMAGE_PADDING) mask = utils.resize_mask(mask, scale, padding) bbox = utils.extract_bboxes(mask) active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32) source_class_ids = dataset.source_class_ids[dataset.image_info[image_id] ["source"]] active_class_ids[source_class_ids] = 1 # Resize masks to smaller size to reduce memory usage if use_mini_mask: mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE) # Image meta data image_meta = compose_image_meta(image_id, shape, window, active_class_ids) return image, image_meta, class_ids, bbox, mask
def load_image_gt(dataset, config, image_id, augment=False, use_mini_mask=False): """Load and return ground truth data for an image (image, mask, bounding boxes). augment: If true, apply random image augmentation. Currently, only horizontal flipping is offered. use_mini_mask: If False, returns full-size masks that are the same height and width as the original image. These can be big, for example 1024x1024x100 (for 100 instances). Mini masks are smaller, typically, 224x224 and are generated by extracting the bounding box of the object and resizing it to MINI_MASK_SHAPE. Returns: image: [height, width, 3] shape: the original shape of the image before resizing and cropping. class_ids: [instance_count] Integer class IDs bbox: [instance_count, (y1, x1, y2, x2)] mask: [height, width, instance_count]. The height and width are those of the image unless use_mini_mask is True, in which case they are defined in MINI_MASK_SHAPE. """ # Load image and mask image = dataset.load_image(image_id) mask, class_ids = dataset.load_mask(image_id) shape = image.shape image, window, scale, padding = utils.resize_image( image, min_dim=config.IMAGE_MIN_DIM, max_dim=config.IMAGE_MAX_DIM, padding=config.IMAGE_PADDING) mask = utils.resize_mask(mask, scale, padding) # Random horizontal flips. if augment: if random.randint(0, 1): image = np.fliplr(image) mask = np.fliplr(mask) # Bounding boxes. Note that some boxes might be all zeros # if the corresponding mask got cropped out. # bbox: [num_instances, (y1, x1, y2, x2)] bbox = utils.extract_bboxes(mask) # Active classes # Different datasets have different classes, so track the # classes supported in the dataset of this image. active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32) source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]["source"]] active_class_ids[source_class_ids] = 1 # Resize masks to smaller size to reduce memory usage if use_mini_mask: mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE) # Image meta data image_meta = compose_image_meta(image_id, shape, window, active_class_ids) return image, image_meta, class_ids, bbox, mask
def eval_n_plot_val(model, config, dataset_val, save_plots=False): scores = [] image_ids = dataset_val.image_ids for mm, image_id in tqdm(enumerate(image_ids)): # Load image and ground truth data image = dataset_val.load_image(image_id, color=config.IMAGE_COLOR) gt_mask, gt_class_id = dataset_val.load_mask(image_id) img_name = dataset_val.image_info[image_id]['img_name'] # result = ensemble_prediction(model, config, image) result = cluster_prediction(model, config, image) # result = model.detect([image], verbose=0, mask_threshold=config.DETECTION_MASK_THRESHOLD)[0] # Clean overlaps and apply some post-processing result = postprocess_masks(result, image) # If there is no masks then try to predict on scaled image if result['masks'].sum() < 2: H, W = image.shape[:2] scaled_img = np.zeros([4 * H, 4 * W, 3], np.uint8) scaled_img[:H, :W] = image result = cluster_prediction(model, config, scaled_img) result['masks'] = result['masks'][:H, :W] result = postprocess_masks(result, image) pred_box, pred_class_id, pred_score, pred_mask = result['rois'], result['class_ids'], \ result['scores'], result['masks'] gt_box = utils.extract_bboxes(gt_mask) # Compute IoU scores for ground truth and predictions iou = utils.compute_ap_range(gt_box, gt_class_id, gt_mask, pred_box, pred_class_id, pred_score, pred_mask, iou_thresholds=None, verbose=0) # iou = mean_iou(gt_mask, pred_masks) if save_plots: fig = plt.figure() gs = gridspec.GridSpec(1, 1) plot_boundary(image, true_masks=gt_mask, pred_masks=pred_mask, ax=fig.add_subplot(gs[0])) fig.savefig('../images/validation_' + str(mm) + '.png', bbox_inches='tight') plt.close() scores.append(iou) if (mm + 1) % 10 == 0: print('Mean IoU for', mm + 1, 'imgs', np.mean(scores)) print("Mean IoU: ", np.mean(scores))
def get_image(image_size,mask_pool_size): try: bg, shpes = random_image(image_size, image_size) ig = load_image(bg_color=bg, shapes=shpes) msk, cls_id = load_mask(shpes) box = utils.extract_bboxes(msk) mask = utils.minimize_mask(box, msk, mini_shape=(mask_pool_size, mask_pool_size)) box = box*1.0/image_size return ig,cls_id,box,mask except: return get_image(image_size,mask_pool_size)
def pull_item(self, ind): img_dr = os.path.join(self.root, self.ids[ind].replace('\n', '') + '.png') json_dr = os.path.join(self.root, self.ids[ind].replace('\n', '') + '.json') dts = json.loads(open(json_dr).read()) total = len(dts) ig = cv2.imread(img_dr) ig = cv2.cvtColor(ig, cv2.COLOR_BGR2RGB) shape = ig.shape[0:2] msk = np.zeros((self.image_size, self.image_size, total)) ids = [] for idx, s in enumerate(dts): tm_img = np.zeros((shape[0], shape[1], 3)) center = [s['x'], s['y']] r = s['radius'] tm_img = cv2.circle(tm_img, center=tuple(center), radius=r, color=(255, 255, 255), thickness=-1) tm_img = cv2.resize(tm_img, dsize=(self.image_size, self.image_size)) msk[:, :, idx] = tm_img[:, :, 0] ids.append(0) ig = cv2.resize(ig, dsize=(self.image_size, self.image_size)) box = utils.extract_bboxes(msk) mask = utils.minimize_mask(box, msk, mini_shape=(self.mask_pool_size, self.mask_pool_size)) box = box * 1.0 / self.image_size #visual.display_instances(ig,box*self.image_size) return ig, ids, box, mask
def load_mask_naive(self, image_id): """ Load mask from bounding boxes only in a bitmap [height, width, 1] i.e. there will be only one mask Params: - image_id : reference id for a given image Returns: masks : A bool array of shape [height, width, instances] with one mask per instance class_ids : a 1D array of classIds of the corresponding instance masks (In this version of the challenge it will be of shape [instances] and always be filled with the class-id of the "Building" class.) """ image_info = self.image_info[image_id] assert image_info["source"] == "crowdai-mapping-challenge" annotations = self.image_info[image_id]["annotations"] test_mask = np.zeros((image_info["height"], image_info["width"])) test_mask_padded = np.zeros( (image_info["height"] + 4, image_info["width"] + 4)) for annotation in annotations: class_id = self.map_source_class_id( "crowdai-mapping-challenge.{}".format( annotation['category_id'])) if class_id: rle = cocomask.frPyObjects(annotation['segmentation'], image_info["height"], image_info["width"]) m = cocomask.decode(rle) test = utils.extract_bboxes(m) # Extract bounding box test_mask[int(test[0][0]):int(test[0][2]):1, int(test[0][1]):int(test[0][3]):1] = 1 # padded mask test_mask_padded[2:302, 2:302] = test_mask # return the final mask return test_mask_padded
def process_train(image_id): image_file = CONFIG.train_dir + '%s/images/%s.png' % ((image_id.decode()), (image_id.decode())) image = Image.open(image_file) image = image.convert('RGB') # print(np.shape(image)) image = image.resize(CONFIG.image_size) image_shape = np.array(np.shape(image)[:2], dtype=np.int32) image = np.array(image).astype(np.float32) masks = np.zeros( (CONFIG.max_instance, CONFIG.image_size[0], CONFIG.image_size[1]), dtype=np.float32) # print(masks.shape) masks_ids = os.listdir(CONFIG.train_dir + '%s/masks/' % (image_id.decode())) for i, idx in enumerate(masks_ids): mask = Image.open(CONFIG.train_dir + '%s/masks/%s' % (image_id.decode(), idx)) # print(np.array(mask).shape) mask = mask.convert('L') mask = mask.resize(CONFIG.image_size) # masks[i] = (np.array(mask)>0).astype(np.int32) mask = np.array(mask) if mask.any(): masks[i, :] = mask masks = np.transpose(masks, [1, 2, 0]) boxes = utils.extract_bboxes(masks) gt_anchors, gt_boxes, gt_labels, gt_masks = utils.boxes2anchor( image_shape=CONFIG.image_size, strides=CONFIG.strides[-1], anchor_size=CONFIG.anchor_size, anchor_ratio=CONFIG.anchor_ratio, boxes=boxes, masks=masks, target_size=CONFIG.target_size) # masks = scipy.misc.imresize(masks,CONFIG.image_size).astype(np.int32) return image, image_shape, gt_anchors, gt_labels, gt_boxes, gt_masks
def reverse_scaling(results_scale, images, use_semantic): # Reverse the scales for i in range(len(images)): for j in range(len(results_scale)): results_scale[j][i]['masks'] = scipy.ndimage.zoom(results_scale[j][i]['masks'], (images[i].shape[0] / results_scale[j][i]['masks'].shape[0], images[i].shape[1] / results_scale[j][i]['masks'].shape[1], 1), order = 0) results_scale[j][i]['rois'] = utils.extract_bboxes(results_scale[j][i]['masks']) # Reshape masks so that they can be concatenated correctly results_scale[j][i]['masks'] = np.moveaxis(results_scale[j][i]['masks'], -1, 0) if use_semantic: results_scale[j][i]['semantic_masks'] = scipy.ndimage.zoom(results_scale[j][i]['semantic_masks'], (images[i].shape[0] / results_scale[j][i]['semantic_masks'].shape[0], images[i].shape[1] / results_scale[j][i]['semantic_masks'].shape[1], 1), order = 0) # Reshape masks so that they can be concatenated correctly results_scale[j][i]['semantic_masks'] = np.moveaxis(results_scale[j][i]['semantic_masks'], -1, 0) return results_scale
def select_people(model, image): results = model.detect([image], verbose=1) r = results[0] indexes = np.argwhere(r['class_ids'] == 1) bbox = utils.extract_bboxes(r['masks']) peoples = [] boxes = [] for i in indexes: if r['scores'][i.item()] >= 0.9: top, left, bottom, right = bbox[i.item()] cropped_image = image[top:bottom, left:right] peoples.append(cropped_image) boxes.append([top, bottom, left, right]) return peoples, boxes
print('*' * 50) image_start = time.time() print('Image id: ', image_id) image_path = dataset.image_info[image_id]["path"] print(image_path) # record results result = {} # loading ground truth image = dataset.load_image(image_id) depth = dataset.load_depth(image_id) gt_mask, gt_coord, gt_class_ids, gt_scales, gt_domain_label = dataset.load_mask( image_id) gt_bbox = utils.extract_bboxes(gt_mask) result['image_id'] = image_id result['image_path'] = image_path result['gt_class_ids'] = gt_class_ids result['gt_bboxes'] = gt_bbox result['gt_RTs'] = None result['gt_scales'] = gt_scales image_path_parsing = image_path.split('/') gt_pkl_path = os.path.join( gt_dir, 'results_{}_{}_{}.pkl'.format(data, image_path_parsing[-2], image_path_parsing[-1])) print(gt_pkl_path)
image_ids = np.random.choice(dataset_train.image_ids, 4) for image_id in image_ids: image = dataset_train.load_image(image_id) mask, class_ids = dataset_train.load_mask(image_id) visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names) original_shape = image.shape # Resize image, window, scale, padding, _ = utils.resize_image( image, min_dim=config.IMAGE_MIN_DIM, max_dim=config.IMAGE_MAX_DIM, mode=config.IMAGE_RESIZE_MODE) mask = utils.resize_mask(mask, scale, padding) # Compute Bounding box bbox = utils.extract_bboxes(mask) # Display image and additional stats print("image_id: ", image_id, dataset_train.image_reference(image_id)) print("Original shape: ", original_shape) log("image", image) log("mask", mask) log("class_ids", class_ids) log("bbox", bbox) # Display image and instances visualize.display_instances(image, bbox, mask, class_ids, dataset_train.class_names) # Validation dataset dataset_val = MaritimeDataset() #dataset_val.load_maritime(args.dataset, "validation")
def generate_mask(model, original_image, DATA_DIR, gen_type = 'bmp', fuse_thres = 1): image_meta = original_image.shape original_image = scipy.misc.imresize(original_image.astype(float), (config.IMAGE_MAX_DIM, config.IMAGE_MAX_DIM), interp='bilinear') original_image = to_rgb(original_image) # Voting scheme: original image and equalized image are feed to give the result with variance of gray scale value is # from -10 to 10. The result is the region that have the 3/6 point value in area. # Initialize the cumulative mask as 0 cumulative_mask = original_image[:,:,0]*0 cumulative_box = original_image[:,:,0]*0 #""" # Normalize mean to 128: normal_image = np.uint8(original_image + 128 - np.mean(original_image)) #original_image = normal_image # original_image has 3 channels as the same equalized_img_gray, _, _, _ = histeq(original_image[:,:,0]) # Processing Histogram equalized image equalized_img_rgb = to_rgb(equalized_img_gray) # Revert original_image #original_image = revert(original_image) cumulative_mask += calculate_cumulative_mask(model, normal_image, -10, 11) cumulative_mask += calculate_cumulative_mask(model, equalized_img_rgb, -10, 11) cumulative_mask += calculate_cumulative_mask(model, original_image, -10, 11) THRESHOLD_CUMULATIVE_MASK = fuse_thres thres_compare = min(THRESHOLD_CUMULATIVE_MASK,np.max(np.max(cumulative_mask))) # Bitwise compare to get the mask that each pixel has value of THRESHOLD_CUMULATIVE_MASK cumulative_mask = (cumulative_mask >= thres_compare)*1 # Bounding box wrap the mask pad = 0 # Check if having mask return; if no mask, then box is the full image if np.sum(np.sum(cumulative_mask))>0: bbox = utils.extract_bboxes(to_one_depth(cumulative_mask)) cumulative_box[max(0,bbox[0,0]-pad):min(config.IMAGE_MAX_DIM-1,bbox[0,2]+pad), max(0,bbox[0,1]-pad):min(config.IMAGE_MAX_DIM-1,bbox[0,3]+pad)] = 1 else: cumulative_box[:,:] = 1 cumulative_mask[:, :] = 1 # Get the threshold size img_max_size = max(image_meta[1], image_meta[0]) # Resize mask as the same size as img resized_cumulative_box = scipy.misc.imresize(cumulative_box.astype(float), (img_max_size, img_max_size), interp='bilinear') resized_cumulative_box = resized_cumulative_box > 0 resized_cumulative_mask = scipy.misc.imresize(cumulative_mask.astype(float), (img_max_size, img_max_size), interp='bilinear') resized_cumulative_mask = resized_cumulative_mask > 0 current_size = max(config.IMAGE_MAX_DIM, img_max_size) pad_width = (current_size - image_meta[0])/2 pad_height = (current_size - image_meta[1])/2 if pad_width >= 0 or pad_height >= 0: if gen_type == 'box': #SAVE_MASKED_IMG_DIR_BOX = CHANGE_PATH + '_box/' #if not os.path.exists(SAVE_MASKED_IMG_DIR_BOX): # os.makedirs(SAVE_MASKED_IMG_DIR_BOX) #io.imsave(SAVE_MASKED_IMG_DIR_BOX + '%d_masked.bmp'%(image_id+1), resized_cumulative_box[pad_width:current_size - pad_width, pad_height:current_size - pad_height]*255) return resized_cumulative_box[pad_width:current_size - pad_width, pad_height:current_size - pad_height]*255, cumulative_mask*255 if gen_type == 'bmp': #SAVE_MASKED_IMG_DIR_BMP = CHANGE_PATH + '_bmp/' #if not os.path.exists(SAVE_MASKED_IMG_DIR_BMP): # os.makedirs(SAVE_MASKED_IMG_DIR_BMP) #io.imsave(SAVE_MASKED_IMG_DIR_BMP + '%d_masked.bmp'%(image_id+1), resized_cumulative_mask[pad_width:current_size - pad_width, pad_height:current_size - pad_height]*255) return resized_cumulative_mask[pad_width:current_size - pad_width, pad_height:current_size - pad_height]*255, cumulative_mask*255
def load_image_gt(dataset, image_id, augment=False, augmentation=None, use_mini_mask=False): image = dataset.load_image(image_id) mask, class_ids = dataset.load_mask(image_id) origin_shape = image.shape image, window, scale, padding, crop = utils.resize_image( image, min_dim=hyper_parameters.FLAGS.IMAGE_MIN_DIM, min_scale=hyper_parameters.FLAGS.IMAGE_MIN_SCALE, max_dim=hyper_parameters.FLAGS.IMAGE_MAX_DIM, mode=hyper_parameters.FLAGS.IMAGE_RESIZE_MODE) mask = utils.resize_mask(mask, scale, padding, crop) if augment: logging.warning("'augment' is deprecated. Use 'augmentation' instead.") if random.randint(0, 1): image = np.fliplr(image) mask = np.fliplr(mask) if augmentation: import imgaug mask_augmenters = [ "Sequential", "SomeOf", "OneOf", "Sometimes", "Fliplr", "Flipud", "CropAndPad", "Affine", "PiecewiseAffine" ] def hook(images, augmenter, parents, default): return augmenter.__class__.__name__ in mask_augmenters image_shape = image.shape mask_shape = mask.shape det = augmentation.to_deterministic() image = det.augment_image(image) mask = det.augment_image(mask.astype(np.uint8), hooks=imgaug.HooksImages(activator=hook)) assert image.shape == image_shape, "Augmentation shouldn't change image size" assert mask.shape == mask_shape, "Augmentation shouldn't change mask size" # Change mask back to bool mask = mask.astype(np.bool) _idx = np.sum(mask, axis=(0, 1)) > 0 mask = mask[:, :, _idx] class_ids = class_ids[_idx] bbox = utils.extract_bboxes(mask) active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32) source_class_ids = dataset.source_class_ids[dataset.image_info[image_id] ["source"]] active_class_ids[source_class_ids] = 1 if use_mini_mask: mask = utils.minimize_mask( bbox, mask, tuple(hyper_parameters.FLAGS.MINI_MASK_SHAPE)) image_meta = utils.compose_image_meta(image_id, origin_shape, image.shape, window, scale, active_class_ids) return image, image_meta, class_ids, bbox, mask
def count_boxed_lesions(netbox, target, thresh, scores): """ Comparing bounding boxes to ground truth in a way where TPR/FDR is averaged per slice Connected component analysis of between bounding box prediction `netbox' and ground truth `target` across lesion bin sizes. This is a "per slice" analysis :param netbox: network output on range [0,1], shape=(lesion_instances x N x M) :type netbox: float16, float32, float64 :param target: ground truth labels, shape=(lwsion_instances x N x M x O) :type target: int16 :param thresh: threshold to binarize prediction `h` :type thresh: float16, float32, float64 :return: dict """ threshed_box_idxs = [x for x, val in enumerate(scores) if val > thresh] netbox = netbox[threshed_box_idxs] mask_target = np.zeros((target.shape[1], target.shape[2])) for lesion in range(target.shape[0]): mask_target += target[lesion] target, _ = utils.remove_tiny_les(mask_target, nvox=2) gt_boxes = utils.extract_bboxes(target, dims=2, buf=2) # To Test netbox = gt_box (should get ROC as tpr = 1 and fdr = 0 everywhere) netbox0 = netbox.copy() netbox0 = netbox0.astype(int) nles = {} labels = {} labels['target'], nles['target'] = ndimage.label(mask_target) nles['netbox'] = netbox.shape[0] found_h = np.ones(nles['netbox'], np.int16) ntp = {'all': 0, 'small': 0, 'med': 0, 'large': 0} nfp = {'all': 0, 'small': 0, 'med': 0, 'large': 0} nfn = {'all': 0, 'small': 0, 'med': 0, 'large': 0} nb_les = {'all': 0, 'small': 0, 'med': 0, 'large': 0} nles_gt = {'all': nles['target'], 'small': 0, 'med': 0, 'large': 0} # print('Nles : ', nles['netbox']) # Go through ground truth boxes and count true positives/false negatives # For each ground truth lesion we keep a list of true positives h_lesions = [] for i in range(1, nles['target'] + 1): # Find the intersect between gt_boxes for each netbox gt_lesion_size = np.sum(target[labels['target'] == i]) nles_gt[utils.get_lesion_bin(gt_lesion_size)] += 1 # List of detected lesions in this area box_matrix = np.zeros((mask_target.shape[0], mask_target.shape[1])) t = 0 for j in range(nles['netbox']): # Go through each box, get coordinates, and append netbox index to list if it intersects the # ground truth lesion y1, x1, y2, x2 = netbox0[j] #print(y1, x1, y2, x2) box_matrix[y1:y2, x1:x2] = 1 intersect = box_matrix[labels['target'] == i].sum() if intersect > 0: h_lesions.append(j + 1) #print('j is : ', j ) box_matrix[y1:y2, x1:x2] = 0 # All the voxels in this area contribute to detecting the lesion netbox_matrix = np.zeros((mask_target.shape[0], mask_target.shape[1])) for k in range(nles['netbox']): y1, x1, y2, x2 = netbox0[k] netbox_matrix[y1:y2, x1:x2] = 1 nb_overlap = netbox_matrix[labels['target'] == i].sum() #print('Overlap : ', nb_overlap) if nb_overlap >= 3 or nb_overlap >= 0.5 * gt_lesion_size: nb_les[utils.get_lesion_bin(gt_lesion_size)] += 1 ntp[utils.get_lesion_bin(gt_lesion_size)] += 1 for h_lesion in h_lesions: if h_lesion != 0: found_h[h_lesion - 1] = 0 else: nfn[utils.get_lesion_bin(gt_lesion_size)] += 1 #print('H-lesions = ', h_lesions) #print('Number of lesions in netbox: ', nles['netbox']) print('Number of lesions gt: ', nles['target']) print('And found_h = ', found_h) # Now go through all the netbox lesions and it is considered false positive if it hasn't been # marked a true positive for i in range(1, nles['netbox']): y1, x1, y2, x2 = netbox0[i-1] netbox_matrix = np.zeros((mask_target.shape[0], mask_target.shape[1])) netbox_matrix[y1:y2, x1:x2] = 1 netbox_size = np.sum(netbox_matrix) if found_h[i - 1] == 1: nfp[utils.get_box_lesion_bin(netbox_size)] += 1 print('NFP : ', nfp) nb_les['all'] = nb_les['small'] + nb_les['med'] + nb_les['large'] ntp['all'] = ntp['small'] + ntp['med'] + ntp['large'] nfp['all'] = nfp['small'] + nfp['med'] + nfp['large'] nfn['all'] = nfn['small'] + nfn['med'] + nfn['large'] tpr = {} fdr = {} for s in ntp.keys(): # tpr (sensitivity) if nles_gt[s] != 0: tpr[s] = ntp[s] / nles_gt[s] elif nles_gt[s] == 0 and ntp[s] == 0: tpr[s] = 1 else: tpr[s] = 0 # ppv (1-fdr) if ntp[s] + nfp[s] != 0: ppv = ntp[s] / (ntp[s] + nfp[s]) elif ntp[s] == 0: ppv = 1 else: ppv = 0 fdr[s] = 1 - ppv return {'ntp': ntp, 'nfp': nfp, 'nfn': nfn, 'fdr': fdr, 'tpr': tpr, 'nles': nb_les, 'nles_gt': nles_gt}
def count_segmented_lesions_as_boxes(netseg, target, thresh): """ Comparing bounding boxes to ground truth in a way where TPR/FDR is averaged per volume Connected component analysis of between bounding box prediction `netseg' and ground truth `target` across lesion bin sizes. This returns per-slice stats. It boxes each instance segmentation and compares with ground truth boxes. :param netbox: network output on range [0,1], shape=(lesion_instances x N x M) :type netbox: float16, float32, float64 :param target: ground truth labels, shape=(lwsion_instances x N x M x O) :type target: int16 :param thresh: threshold to binarize prediction `h` :type thresh: float16, float32, float64 :return: dict """ # Threshold and prepare input netseg[netseg >= thresh] = 1 netseg[netseg < thresh] = 0 netseg, _ = utils.remove_tiny_les(netseg, nvox=2) netbox = utils.extract_bboxes(netseg, dims=2, buf=2) # buf = 4 (from 2) mask_target = np.zeros((target.shape[1], target.shape[2])) for lesion in range(target.shape[0]): mask_target += target[lesion] target, _ = utils.remove_tiny_les(mask_target, nvox=2) gt_boxes = utils.extract_bboxes(target, dims=2, buf=2) # buf = 4 (from 2) # To Test netbox = gt_box (should get ROC as tpr = 1 and fdr = 0 everywhere) netbox0 = netbox.copy() netbox0 = netbox0.astype(int) nles = {} labels = {} labels['target'], nles['target'] = ndimage.label(mask_target) nles['netbox'] = netbox.shape[0] found_h = np.ones(nles['netbox'], np.int16) ntp = {'all': 0, 'small': 0, 'med': 0, 'large': 0} nfp = {'all': 0, 'small': 0, 'med': 0, 'large': 0} nfn = {'all': 0, 'small': 0, 'med': 0, 'large': 0} nb_les = {'all': 0, 'small': 0, 'med': 0, 'large': 0} nles_gt = {'all': nles['target'], 'small': 0, 'med': 0, 'large': 0} # print('Nles : ', nles['netbox']) # Go through ground truth boxes and count true positives/false negatives for i in range(1, nles['target'] + 1): # Find the intersect between gt_boxes for each netbox gt_lesion_size = np.sum(target[labels['target'] == i]) nles_gt[utils.get_lesion_bin(gt_lesion_size)] += 1 # List of detected lesions in this area h_lesions = [] box_matrix = np.zeros((mask_target.shape[0], mask_target.shape[1])) for j in range(nles['netbox']): # Go through each box, get coordinates, and append netbox index to list if it intersects the # ground truth lesion y1, x1, y2, x2 = netbox0[j] box_matrix[y1:y2, x1:x2] = 1 intersect = box_matrix[labels['target'] == i].sum() if intersect > 0: h_lesions.append(j) box_matrix[y1:y2, x1:x2] = 0 # All the voxels in this area contribute to detecting the lesion netbox_matrix = np.zeros((mask_target.shape[0], mask_target.shape[1])) for k in range(nles['netbox']): y1, x1, y2, x2 = netbox0[k] netbox_matrix[y1:y2, x1:x2] = 1 nb_overlap = netbox_matrix[labels['target'] == i].sum() #print('Overlap : ', nb_overlap) if nb_overlap >= 3 or nb_overlap >= 0.5 * gt_lesion_size: nb_les[utils.get_lesion_bin(gt_lesion_size)] += 1 ntp[utils.get_lesion_bin(gt_lesion_size)] += 1 for h_lesion in h_lesions: if h_lesion != 0: found_h[h_lesion - 1] = 0 else: nfn[utils.get_lesion_bin(gt_lesion_size)] += 1 #if nles_gt['all'] > 0: #print('ntp is : ', ntp) #print('gt nles is :', nles_gt) for i in range(1, nles['netbox']): y1, x1, y2, x2 = netbox0[i-1] netbox_matrix = np.zeros((mask_target.shape[0], mask_target.shape[1])) netbox_matrix[y1:y2, x1:x2] = 1 netbox_size = np.sum(netbox_matrix) if found_h[i - 1] == 1: nfp[utils.get_box_lesion_bin(netbox_size)] += 1 nb_les['all'] = nb_les['small'] + nb_les['med'] + nb_les['large'] ntp['all'] = ntp['small'] + ntp['med'] + ntp['large'] nfp['all'] = nfp['small'] + nfp['med'] + nfp['large'] nfn['all'] = nfn['small'] + nfn['med'] + nfn['large'] tpr = {} fdr = {} for s in ntp.keys(): # tpr (sensitivity) if nles_gt[s] != 0: tpr[s] = ntp[s] / nles_gt[s] elif nles_gt[s] == 0 and ntp[s] == 0: tpr[s] = 1 else: tpr[s] = 0 # ppv (1-fdr) if ntp[s] + nfp[s] != 0: ppv = ntp[s] / (ntp[s] + nfp[s]) elif ntp[s] == 0: ppv = 1 else: ppv = 0 fdr[s] = 1 - ppv return {'ntp': ntp, 'nfp': nfp, 'nfn': nfn, 'fdr': fdr, 'tpr': tpr, 'nles': nb_les, 'nles_gt': nles_gt}
def test(model): """Test the model.""" save_path = "./results/" + args.weights[27:] + "/" if not os.path.exists(save_path): os.makedirs(save_path) per_class_ious = [] box_iou = [] detect_time = 0 for i in range(args.limit, 131): image = np.load(args.data + "image_np/liver_" + str(i) + ".npy").astype(np.float32) # [H, W, D] label = np.load(args.data + "label_np/liver_label_" + str(i) + ".npy").astype(np.int32) # [H, W, D, num_class] gt_bbox = utils.extract_bboxes(label) gt_bbox = utils.extend_bbox(gt_bbox, label.shape) nib_label = nib.load("/media/disk1/LiTS/labelTr/segmentation-" + str(i) + ".nii.gz") affine = nib_label.affine # prepared to save the predicted mask later ori_shape = nib_label.shape try: start_time = time.time() result = model.detect([image])[0] detect_time += time.time() - start_time print("detect_time:", time.time() - start_time) """The shape of result: a dict containing { "rois": final_rois, [N, (y1, x1, z1, y2, x2, z2)] in real coordinates "class_ids": final_class_ids, [N] "scores": final_scores, [N] "mask": final_mask, [mask_shape[0], mask_shape[1], mask_shape[2]] }""" rois = result["rois"] class_ids = result["class_ids"] scores = result["scores"] mask = result["mask"] if args.stage == 'beginning': mask = np.zeros(mask.shape).astype(np.int32) rois = rois.clip(min=0) rois[:, 3] = rois[:, 3].clip(max=mask.shape[0] - 1) rois[:, 4] = rois[:, 4].clip(max=mask.shape[1] - 1) rois[:, 5] = rois[:, 5].clip(max=mask.shape[2] - 1) rois = rois.astype(np.int32) # import pdb # pdb.set_trace() # compute bbox iou print("gt_bbox:", gt_bbox, "pred_bbox:", rois) box_iou.append(utils.compute_overlaps(np.array([gt_bbox]), rois)[0, 0]) if args.stage != 'beginning': # Prepare the gt-masks and pred-masks to calculate the ious. [H, W, D, num_classes - 1] gt_masks = np.zeros(label.shape[:3] + (model.config.NUM_CLASSES - 1,)) pred_masks = np.zeros(image.shape + (model.config.NUM_CLASSES - 1,)) for j in range(model.config.NUM_CLASSES - 1): gt_masks[:, :, :, j][label == j + 1] = 1 pred_masks[:, :, :, j][mask == j + 1] = 1 # calculate different kind of ious per_class_iou = utils.compute_per_class_mask_iou(gt_masks, pred_masks) per_class_ious.append(per_class_iou) # Save the results if args.save: # Draw bboxes if args.bbox: for j in range(rois.shape[0]): y1, x1, z1, y2, x2, z2 = rois[j, :] mask[y1:y2, x1:x2, z1:z2] = 100 mask = resize(mask, ori_shape, order=0, mode='constant', preserve_range=True, anti_aliasing=False) vol = nib.Nifti1Image(mask.astype(np.uint8), affine) if args.stage != 'beginning': nib.save(vol, save_path + str(per_class_iou.mean()) + "_liver_" + str(i) + ".nii.gz") print("liver_" + str(i) + " detected done. iou = " + str(per_class_iou)) else: nib.save(vol, save_path + str(box_iou[-1]) + "_liver_" + str(i) + ".nii.gz") print("liver_" + str(i) + " detected done. box_iou = " + str(box_iou[-1])) except: print("detect error!") pass print("Test completed.") # Print the iou results. box_iou = np.array(box_iou) print("box iou:", box_iou) print("mean:", box_iou.mean()) if args.stage != 'beginning': per_class_ious = np.array(per_class_ious) print("per class iou mean:", np.mean(per_class_ious, axis=0)) print("std:", np.std(per_class_ious, axis=0)) print("Total ious mean:", per_class_ious.mean()) print("Total detect time:", detect_time)
def load_image_gt(config, image_id, image, depth, mask, class_ids, parameters, augment=False, use_mini_mask=True): """Load and return ground truth data for an image (image, mask, bounding boxes). augment: If true, apply random image augmentation. Currently, only horizontal flipping is offered. use_mini_mask: If False, returns full-size masks that are the same height and width as the original image. These can be big, for example 1024x1024x100 (for 100 instances). Mini masks are smaller, typically, 224x224 and are generated by extracting the bounding box of the object and resizing it to MINI_MASK_SHAPE. Returns: image: [height, width, 3] shape: the original shape of the image before resizing and cropping. class_ids: [instance_count] Integer class IDs bbox: [instance_count, (y1, x1, y2, x2)] mask: [height, width, instance_count]. The height and width are those of the image unless use_mini_mask is True, in which case they are defined in MINI_MASK_SHAPE. """ ## Load image and mask shape = image.shape image, window, scale, padding = utils.resize_image( image, min_dim=config.IMAGE_MAX_DIM, max_dim=config.IMAGE_MAX_DIM, padding=config.IMAGE_PADDING) mask = utils.resize_mask(mask, scale, padding) ## Random horizontal flips. if augment and False: if np.random.randint(0, 1): image = np.fliplr(image) mask = np.fliplr(mask) depth = np.fliplr(depth) pass pass ## Bounding boxes. Note that some boxes might be all zeros ## if the corresponding mask got cropped out. ## bbox: [num_instances, (y1, x1, y2, x2)] bbox = utils.extract_bboxes(mask) ## Resize masks to smaller size to reduce memory usage if use_mini_mask: mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE) pass active_class_ids = np.ones(config.NUM_CLASSES, dtype=np.int32) ## Image meta data image_meta = utils.compose_image_meta(image_id, shape, window, active_class_ids) if config.NUM_PARAMETER_CHANNELS > 0: if config.OCCLUSION: depth = utils.resize_mask(depth, scale, padding) mask_visible = utils.minimize_mask(bbox, depth, config.MINI_MASK_SHAPE) mask = np.stack([mask, mask_visible], axis=-1) else: depth = np.expand_dims(depth, -1) depth = utils.resize_mask(depth, scale, padding).squeeze(-1) depth = utils.minimize_depth(bbox, depth, config.MINI_MASK_SHAPE) mask = np.stack([mask, depth], axis=-1) pass pass return image, image_meta, class_ids, bbox, mask, parameters
def draw_boxes(image, boxes=None, refined_boxes=None, mask=None, gt_mask=None, captions=None, visibilities=None, title="", ax=None, pn_labels=False, instance_masks=False, test=False): """Draw bounding boxes and segmentation masks with differnt customizations. boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates. masks: [N, height, width] captions: List of N titles to display on each box visibilities: (optional) List of values of 0, 1, or 2. Determine how prominant each bounding box should be. title: An optional title to show over the image ax: (optional) Matplotlib axis to draw on. """ # Number of boxes N = boxes.shape[0] if boxes is not None else 0 # Matplotlib Axis if not ax: _, ax = plt.subplots(1, figsize=(12, 12)) # Show area outside image boundaries. ax.axis('off') # Setup image - if no boxes then return the given image as matplotlib axis object ax.set_title(title) masked_image = image * 255 masked_image = masked_image.astype(np.uint32).copy() #if boxes is None: #ax.imshow(masked_image.astype(np.uint32), cmap=plt.cm.gray_r) #return ax # Generate random colors colors = random_colors(N, bright=True) gt_idx_list = [] gt_labels, gt_nles = ndimage.label(gt_mask) for i in range(N): # Box visibility visibility = visibilities[i] if visibilities is not None else 2 if visibility == 0: color = "gray" style = "dotted" alpha = 0.5 elif visibility == 1: color = colors[i] style = "dotted" alpha = 1 elif visibility == 2: color = colors[i] style = "solid" alpha = 1 # Boxes if boxes is not None: if not np.any(boxes[i]): # Skip this instance. Has no bbox. Likely lost in cropping. continue y1, x1, y2, x2 = boxes[i] # Get bounding box edge color based on IoU if pn_labels: bx_color, gt_idx = get_box_color([y1, x1, y2, x2], gt_mask, gt_labels, gt_nles) gt_idx_list.extend(gt_idx) p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, alpha=alpha, linestyle=style, edgecolor=bx_color, facecolor='none') else: p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, alpha=alpha, linestyle=style, edgecolor='r', facecolor='none') ax.add_patch(p) # Captions if captions is not None: caption = captions[i] caption = np.around(caption, decimals=3) # If there are refined boxes, display captions on them if refined_boxes is not None: y1, x1, y2, x2 = ry1, rx1, ry2, rx2 x = random.randint(x1, (x1 + x2) // 2) ax.text(x1, y1, caption, size=8, verticalalignment='top', color='w', backgroundcolor="none", bbox={ 'facecolor': color, 'alpha': 0.4, 'pad': 1, 'edgecolor': 'none' }) # Now go through the gt_idx list and get indices for lesions not detected (FNs): fn_indices = [ gt_idx for gt_idx in range(1, gt_nles + 1) if gt_idx not in gt_idx_list ] if len(fn_indices) != 0 and boxes is not None: for i in fn_indices: gt_mask[gt_labels != i] = 0 gt_mask[gt_labels == i] = 1 bbox = utils.extract_bboxes(gt_mask, dims=2, buf=1) bbox = bbox[0, 0:4] y1, x1, y2, x2 = bbox p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, alpha=alpha, linestyle=style, edgecolor='b', facecolor='none') ax.add_patch(p) for i in range(1, gt_nles + 1): gt_mask[gt_labels == i] = 1 else: pass #------------------------Masks------------------------------ # Two options: can display masks as one mask with one color, or can display them as # TPs = green, FPs = red, FNs = blue if mask is not None and pn_labels == False and instance_masks == False: colors = get_color(bright=True) color = colors[0] masked_image = apply_mask(masked_image, mask, color) # Mask Polygon # Pad to ensure proper polygons for masks that touch image edges. padded_mask = np.zeros((mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) padded_mask[1:-1, 1:-1] = mask contours = find_contours(padded_mask, 0.5) for verts in contours: # Subtract the padding and flip (y, x) to (x, y) verts = np.fliplr(verts) - 1 p = Polygon(verts, facecolor=color, edgecolor=color) ax.add_patch(p) elif mask is not None and pn_labels == True and instance_masks == False: # Return a mask for each lesion instance gt_labels, gt_nles = ndimage.label(gt_mask) labels, nles = ndimage.label(mask) masks = np.zeros([nles, gt_mask.shape[0], gt_mask.shape[1]], dtype=np.int32) # Check if there are no lesions if nles == 0: masks = np.zeros([1, gt_mask.shape[0], gt_mask.shape[1]], dtype=np.int32) masks[0] = mask # Look for all the voxels associated with a particular lesion for i in range(1, nles + 1): mask[labels != i] = 0 mask[labels == i] = 1 masks[i - 1] = mask for i in range(1, nles + 1): mask[labels == i] = 1 gt_idx_list = [] for i in range(masks.shape[0]): mask_color, gt_idx, tp_fp = get_mask_color(masks[i], gt_mask, gt_labels, gt_nles, test) if test == True and tp_fp == 0: print("Got one") gt_idx_list.extend(gt_idx) test = False else: gt_idx_list.extend(gt_idx) masked_image = apply_mask(masked_image, masks[i], mask_color) # Mask Polygon # Pad to ensure proper polygons for masks that touch image edges. padded_mask = np.zeros( (masks[i].shape[0] + 2, masks[i].shape[1] + 2), dtype=np.uint8) padded_mask[1:-1, 1:-1] = masks[i] contours = find_contours(padded_mask, 0.5) for verts in contours: # Subtract the padding and flip (y, x) to (x, y) verts = np.fliplr(verts) - 1 p = Polygon(verts, facecolor=mask_color, edgecolor=mask_color) ax.add_patch(p) # Now go through the gt_idx list and get indices for lesions not detected (FNs): fn_indices = [ gt_idx for gt_idx in range(1, gt_nles + 1) if gt_idx not in gt_idx_list ] if len(fn_indices) != 0: for i in fn_indices: gt_mask[gt_labels != i] = 0 gt_mask[gt_labels == i] = 1 masked_image = apply_mask(masked_image, gt_mask, mask_color) # Mask Polygon # Pad to ensure proper polygons for masks that touch image edges. padded_mask = np.zeros( (gt_mask.shape[0] + 2, gt_mask.shape[1] + 2), dtype=np.uint8) padded_mask[1:-1, 1:-1] = gt_mask contours = find_contours(padded_mask, 0.5) for verts in contours: # Subtract the padding and flip (y, x) to (x, y) verts = np.fliplr(verts) - 1 p = Polygon(verts, facecolor='b', edgecolor='b') ax.add_patch(p) for i in range(1, gt_nles + 1): gt_mask[gt_labels == i] = 1 else: pass elif mask is not None and instance_masks == True: # Return a mask for each lesion instance labels, nles = ndimage.label(mask) masks = np.zeros([nles, mask.shape[0], mask.shape[1]], dtype=np.int32) # Check if there are no lesions if nles == 0: masks = np.zeros([1, mask.shape[0], mask.shape[1]], dtype=np.int32) masks[0] = mask # Look for all the voxels associated with a particular lesion for i in range(1, nles + 1): mask[labels != i] = 0 mask[labels == i] = 1 masks[i - 1] = mask gt_idx_list = [] mask_colors = random_colors(nles + 1) for i in range(masks.shape[0]): masked_image = apply_mask(masked_image, masks[i], mask_colors[i]) # Mask Polygon # Pad to ensure proper polygons for masks that touch image edges. padded_mask = np.zeros( (masks[i].shape[0] + 2, masks[i].shape[1] + 2), dtype=np.uint8) padded_mask[1:-1, 1:-1] = masks[i] contours = find_contours(padded_mask, 0.5) for verts in contours: # Subtract the padding and flip (y, x) to (x, y) verts = np.fliplr(verts) - 1 p = Polygon(verts, facecolor=mask_colors[i], edgecolor=mask_colors[i]) ax.add_patch(p) for i in range(1, nles + 1): mask[labels == i] = 1 ax.imshow(masked_image.astype(np.uint32), cmap=plt.cm.gray) return ax
def manual_data_handling(): # Loading all raw information print("Loading raw data") instance_dict = {} # Loading instance information from meta file with open(str(meta_path), 'r') as f: for line in f: line_info = line.split(' ') instance_id = int(line_info[0]) class_id = int(line_info[1]) instance_dict[instance_id] = class_id #print(f"Instance #{instance_id}: {class_id} = {pj.constants.NOCS_CLASSES[class_id]}") # Loading data color_image = cv2.imread(str(image_path), cv2.IMREAD_UNCHANGED) mask_image = cv2.imread(str(mask_path), cv2.IMREAD_UNCHANGED)[:, :, 2] coord_map = cv2.imread(str(coord_path), cv2.IMREAD_UNCHANGED)[:, :, :3] coord_map = coord_map[:, :, (2, 1, 0)] depth_image = cv2.imread(str(depth_path), cv2.IMREAD_UNCHANGED) # Converting depth to the correct shape and dtype if len(depth_image.shape) == 3: # encoded depth image new_depth = np.uint16(depth_image[:, :, 1] * 256) + np.uint16( depth_image[:, :, 2]) new_depth = new_depth.astype(np.uint16) depth_image = new_depth elif len(depth_image.shape) == 2 and depth_image.dtype == 'uint16': pass # depth is perfecto! else: assert False, '[ Error ]: Unsupported depth type' """ cv2.imshow('mask_image', mask_image) cv2.imshow('coord_map', coord_map) cv2.imshow('depth_image', depth_image) cv2.waitKey(0) """ # Processing data print("Processing data") cdata = np.array(mask_image, dtype=np.int32) # The value of the mask, from 0 to 255, is the class id instance_ids = list(np.unique(cdata)) instance_ids = sorted(instance_ids) #print(f'instance_ids: {instance_ids}') # removing background assert instance_ids[-1] == 255 del instance_ids[-1] cdata[cdata == 255] = -1 assert (np.unique(cdata).shape[0] < 20) num_instance = len(instance_ids) h, w = cdata.shape # flip z axis of coord map coord_map = np.array(coord_map, dtype=np.float32) / 255 coord_map[:, :, 2] = 1 - coord_map[:, :, 2] masks = np.zeros([h, w, num_instance], dtype=np.uint8) coords = np.zeros((h, w, num_instance, 3), dtype=np.float32) class_ids = np.zeros([num_instance], dtype=np.int_) scales = np.zeros([num_instance, 3], dtype=np.float32) # Determing the scales (of the 3d bounding boxes) with open(str(meta_path), 'r') as f: lines = f.readlines() scale_factor = np.zeros((len(lines), 3), dtype=np.float32) for i, line in enumerate(lines): words = line[:-1].split(' ') symmetry_id = words[2] reference_id = words[3] bbox_file = obj_model_dir / symmetry_id / reference_id / 'bbox.txt' bbox = np.loadtxt(str(bbox_file)) value = bbox[0, :] - bbox[1, :] scale_factor[i, :] = value # [a b c] for scale of NOCS [x y z] # Deleting background objects and non-existing objects instance_dict = { instance_id: class_id for instance_id, class_id in instance_dict.items() if (class_id != 0 and instance_id in instance_ids) } i = 0 for instance_id in instance_ids: if instance_id not in instance_dict.keys(): continue instance_mask = np.equal(cdata, instance_id) assert np.sum(instance_mask) > 0 assert instance_dict[instance_id] masks[:, :, i] = instance_mask coords[:, :, i, :] = np.multiply(coord_map, np.expand_dims(instance_mask, axis=-1)) # class ids are also one-indexed class_ids[i] = instance_dict[instance_id] scales[i, :] = scale_factor[instance_id - 1, :] i += 1 masks = masks[:, :, :i] coords = coords[:, :, :i, :] coords = np.clip(coords, 0, 1) # normalize class_ids = class_ids[:i] scales = scales[:i] bboxes = nocs_utils.extract_bboxes(masks) scores = [100 for j in range(len(class_ids))] """ print(f"masks.shape: {masks.shape}") print(f"coords.shape: {coords.shape}") print(f"scales: {scales}") for i in range(masks.shape[2]): visual_mask = masks[:,:,i] visual_mask[visual_mask==0] = 255 cv2.imshow(f"({pj.constants.NOCS_CLASSES[class_ids[i]]}) - Mask {i}", visual_mask) cv2.imshow(f"({pj.constants.NOCS_CLASSES[class_ids[i]]}) - Coords {i}", coords[:,:,i,:]) cv2.waitKey(0) """ # now obtaining the rotation and translation RTs, _, _, _ = nocs_utils.align(class_ids, masks, coords, depth_image, pj.constants.INTRINSICS, pj.constants.NOCS_CLASSES, ".", None) print(f'color_image.shape: {color_image.shape}') print( f'depth_image.shape: {depth_image.shape} depth_image.dtype: {depth_image.dtype}' ) print( f'mask_image.shape: {mask_image.shape} mask_image.dtype: {mask_image.dtype}' ) print( f'coord_map.shape: {coord_map.shape} coord_map.dtype: {coord_map.dtype}' ) print(f'class_ids: {class_ids}') print(f'domain_label: {None}') print(f'bboxes: {bboxes}') print(f'scales: {scales}') print(f'RTs: {RTs}') return class_ids, bboxes, masks, coords, RTs, scores, scales, instance_dict
def data_generator(config, shuffle=True, augmentation=None,batch_size=1): """ A generator that returns images and corresponding target class ids, bounding box deltas, and masks. Returns a Python generator. Upon calling next() on it, the generator returns two lists, inputs and outputs. The contents of the lists differs depending on the received arguments: inputs list: - images: [batch, H, W, C] - image_meta: [batch, (meta data)] Image details. See compose_image_meta() - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral) - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas. - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width are those of the image unless use_mini_mask is True, in which case they are defined in MINI_MASK_SHAPE. outputs list: Usually empty in regular training. But if detection_targets is True then the outputs list contains target class_ids, bbox deltas, and masks. """ b = 0 ix = 0 image_files = glob.glob("./data/train/*.jpg") # Anchors # [anchor_count, (y1, x1, y2, x2)] backbone_shapes = compute_backbone_shapes(config, config.IMAGE_SHAPE) anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, backbone_shapes, config.BACKBONE_STRIDES, config.RPN_ANCHOR_STRIDE) while True: if shuffle and ix == 0: np.random.shuffle(image_files) image_path = image_files[ix] json_path = image_files[ix].replace("jpg", "json") image = load_image(image_path) original_shape = image.shape mask, class_ids = load_mask(json_path) image, window, scale, padding, crop = utils.resize_image( image, min_dim=config.IMAGE_MIN_DIM, min_scale=config.IMAGE_MIN_SCALE, max_dim=config.IMAGE_MAX_DIM, mode=config.IMAGE_RESIZE_MODE) mask = utils.resize_mask(mask, scale, padding, crop) # Augmentation # This requires the imgaug lib (https://github.com/aleju/imgaug) if augmentation: import imgaug # Augmenters that are safe to apply to masks # Some, such as Affine, have settings that make them unsafe, so always # test your augmentation on masks MASK_AUGMENTERS = ["Sequential", "SomeOf", "OneOf", "Sometimes", "Fliplr", "Flipud", "CropAndPad", "Affine", "PiecewiseAffine"] def hook(images, augmenter, parents, default): """Determines which augmenters to apply to masks.""" return augmenter.__class__.__name__ in MASK_AUGMENTERS # Store shapes before augmentation to compare image_shape = image.shape mask_shape = mask.shape # Make augmenters deterministic to apply similarly to images and masks det = augmentation.to_deterministic() image = det.augment_image(image) # Change mask to np.uint8 because imgaug doesn't support np.bool mask = det.augment_image(mask.astype(np.uint8), hooks=imgaug.HooksImages(activator=hook)) # Verify that shapes didn't change assert image.shape == image_shape, "Augmentation shouldn't change image size" assert mask.shape == mask_shape, "Augmentation shouldn't change mask size" # Change mask back to bool mask = mask.astype(np.bool) bbox = utils.extract_bboxes(mask) use_mini_mask = True if use_mini_mask: mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE) # image_meta is for debug image_meta = compose_image_meta(0, original_shape, image.shape, window, scale, np.ones(len(class_name2idx))) # RPN Targets rpn_match, rpn_bbox = build_rpn_targets(image.shape, anchors, class_ids, bbox, config) if b == 0: batch_image_meta = np.zeros( (batch_size,) + image_meta.shape, dtype=image_meta.dtype) batch_rpn_match = np.zeros( [batch_size, anchors.shape[0], 1], dtype=rpn_match.dtype) batch_rpn_bbox = np.zeros( [batch_size, config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4], dtype=rpn_bbox.dtype) batch_images = np.zeros( (batch_size,) + image.shape, dtype=np.float32) batch_gt_class_ids = np.zeros( (batch_size, config.MAX_GT_INSTANCES), dtype=np.int32) batch_gt_boxes = np.zeros( (batch_size, config.MAX_GT_INSTANCES, 4), dtype=np.int32) batch_gt_masks = np.zeros( (batch_size, mask.shape[0], mask.shape[1], config.MAX_GT_INSTANCES), dtype=mask.dtype) # Add to batch batch_image_meta[b] = image_meta batch_rpn_match[b] = rpn_match[:, np.newaxis] batch_rpn_bbox[b] = rpn_bbox batch_images[b] = mold_image(image.astype(np.float32), config) batch_gt_class_ids[b, :class_ids.shape[0]] = class_ids batch_gt_boxes[b, :bbox.shape[0]] = bbox batch_gt_masks[b, :, :, :mask.shape[-1]] = mask b += 1 ix = (ix + 1) % len(image_files) if b >= batch_size: inputs = [batch_images, batch_image_meta, batch_rpn_match, batch_rpn_bbox, batch_gt_class_ids, batch_gt_boxes, batch_gt_masks] outputs = [] yield inputs,outputs b = 0
count = masks.shape[2] print(" Nuclei (including cropped):", str(count)) if count < 1: continue skimage.io.imsave(os.path.join(outputDir, baseName + ".tiff"), mask) if separate: masksDir = os.path.join(outputDir, baseName, "masks") os.makedirs(name=masksDir, exist_ok=True) for m in range(count): skimage.io.imsave(os.path.join(masksDir, str(m) + ".png"), masks[:, :, m]) if scoreDir is not None: scoreFile = open(os.path.join(scoreDir, baseName + ".tsv"), "w") scoreFile.write("label\tscore\r\n") for s in range(count): scoreFile.write(str(s + 1) + "\t" + str(scores[s]) + "\r\n") scoreFile.close() if showOutputs: visualize.display_instances(image=image, boxes=utils.extract_bboxes(masks), masks=masks, scores=scores, title=baseName, class_ids=numpy.array( [1 for _ in range(count)]), class_names=["BG", "nucleus"])
print("{:3}. {:50}".format(i, info['name'])) #Load model model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config) model.load_weights(COCO_MODEL_PATH, by_name=True) n_num = 80000 for o in range(n_num): print('-'*20 + str(n_num - o) + '-'*20) # Load random image and mask. image_id = random.choice(dataset.image_ids) image = dataset.load_image(image_id) mask, class_ids = dataset.load_mask(image_id) #print('real mask shape:',np.shape(mask)) # Compute Bounding box bbox = utils.extract_bboxes(mask) if np.shape(bbox)[0]>3: continue is_show = False # Display image and additional stats if is_show == True: print("image_id ", image_id, dataset.image_reference(image_id)) log("image", image) log("mask", mask) log("class_ids", class_ids) log("bbox", bbox) # Display image and instances visualize.display_instances(image, bbox, mask, class_ids, dataset.class_names) results = model.detect([image], verbose=1) r = results[0] mask_pred = r['masks']
def data_generator(): img_idx = -1 img_ids = np.arange(len(dataset)) centers = compute_centers(cfg.DATA.IMAGE_SIZE, cfg.DATA.IMAGE_SIZE) class_ids_size = cfg.DATA.MAX_GT_INSTANCES bboxes_size = (cfg.DATA.MAX_GT_INSTANCES, 4) masks_size = (cfg.DATA.MAX_GT_INSTANCES, cfg.DATA.IMAGE_SIZE, cfg.DATA.IMAGE_SIZE) while True: try: # Increment index to pick next image. Shuffle if at the start of an epoch. img_idx = (img_idx + 1) % len(img_ids) if shuffle and img_idx == 0: np.random.shuffle(img_ids) # Get GT bounding boxes and masks for image. img_id = img_ids[img_idx] image = dataset.load_image(img_id, 3) image = mold_image(image, cfg) _masks, _class_ids = dataset.load_mask(img_id) _bboxes = extract_bboxes(_masks) # If more instances than fits in the array, sub-sample from them. if _bboxes.shape[0] > cfg.DATA.MAX_GT_INSTANCES: ids = np.random.choice(np.arange(_bboxes.shape[0]), cfg.DATA.MAX_GT_INSTANCES, replace=False) _class_ids = _class_ids[ids] _bboxes = _bboxes[ids] _masks = _masks[ids, :, :] semantic = dataset.load_semantic(img_id, _masks, _class_ids) class_ids = np.zeros(class_ids_size, dtype=np.float32) bboxes = np.zeros(bboxes_size, dtype=np.float32) masks = np.zeros(masks_size, dtype=np.bool) class_ids[:_class_ids.shape[0]] = _class_ids bboxes[:_bboxes.shape[0]] = _bboxes masks[:_masks.shape[0], :, :] = _masks fcos_targets = compute_targets(class_ids, bboxes, centers) cls_targets, ctr_targets, reg_targets, valid_centers, normalizer_value, center_indices = fcos_targets # normalize bboxes bboxes = (bboxes - [0, 0, 1, 1]) / [ masks.shape[1], masks.shape[2], masks.shape[1], masks.shape[2] ] cls_targets = cls_targets.numpy() ctr_targets = ctr_targets.numpy() reg_targets = reg_targets.numpy() val_centers = valid_centers.numpy() normalizer_val = normalizer_value.numpy() center_indices = center_indices.numpy() yield image, semantic, cls_targets, ctr_targets, reg_targets, val_centers, \ normalizer_val, tf.concat(centers, axis=0), bboxes, masks, center_indices except (GeneratorExit, KeyboardInterrupt): raise
def Train(self): fixedRandomSeed = None trainToValidationChance = 0.2 includeEvaluationInValidation = True stepMultiplier = None stepCount = 1000 showInputs = False augmentationLevel = 0 detNMSThresh = 0.35 rpnNMSThresh = 0.55 trainDir = os.path.join(os.curdir, self.__mParams["train_dir"]) evalDir = None inModelPath = os.path.join(os.curdir, self.__mParams["input_model"]) outModelPath = os.path.join(os.curdir, self.__mParams["output_model"]) blankInput = self.__mParams["blank_mrcnn"] == "true" maxdim = 1024 if "eval_dir" in self.__mParams: evalDir = os.path.join(os.curdir, self.__mParams["eval_dir"]) if "image_size" in self.__mParams: maxdim = int(self.__mParams["image_size"]) if "train_to_val_seed" in self.__mParams: fixedRandomSeed = self.__mParams["train_to_val_seed"] if "train_to_val_ratio" in self.__mParams: trainToValidationChance = float( self.__mParams["train_to_val_ratio"]) if "use_eval_in_val" in self.__mParams: includeEvaluationInValidation = self.__mParams[ "use_eval_in_val"] == "true" if "step_ratio" in self.__mParams: stepMultiplier = float(self.__mParams["step_ratio"]) if "step_num" in self.__mParams: stepCount = int(self.__mParams["step_num"]) if "show_inputs" in self.__mParams: showInputs = self.__mParams["show_inputs"] == "true" if "random_augmentation_level" in self.__mParams: augmentationLevel = int( self.__mParams["random_augmentation_level"]) if "detection_nms_threshold" in self.__mParams: detNMSThresh = float(self.__mParams["detection_nms_threshold"]) if "rpn_nms_threshold" in self.__mParams: rpnNMSThresh = float(self.__mParams["rpn_nms_threshold"]) rnd = random.Random() rnd.seed(fixedRandomSeed) trainImagesAndMasks = {} validationImagesAndMasks = {} # iterate through train set imagesDir = os.path.join(trainDir, "images") masksDir = os.path.join(trainDir, "masks") # splitting train data into train and validation imageFileList = [ f for f in os.listdir(imagesDir) if os.path.isfile(os.path.join(imagesDir, f)) ] for imageFile in imageFileList: baseName = os.path.splitext(os.path.basename(imageFile))[0] imagePath = os.path.join(imagesDir, imageFile) maskPath = os.path.join(masksDir, baseName + ".tiff") if not os.path.isfile(imagePath) or not os.path.isfile(maskPath): continue if rnd.random() > trainToValidationChance: trainImagesAndMasks[imagePath] = maskPath else: validationImagesAndMasks[imagePath] = maskPath # adding evaluation data into validation if includeEvaluationInValidation and evalDir is not None: # iterate through test set imagesDir = os.path.join(evalDir, "images") masksDir = os.path.join(evalDir, "masks") imageFileList = [ f for f in os.listdir(imagesDir) if os.path.isfile(os.path.join(imagesDir, f)) ] for imageFile in imageFileList: baseName = os.path.splitext(os.path.basename(imageFile))[0] imagePath = os.path.join(imagesDir, imageFile) maskPath = os.path.join(masksDir, baseName + ".tiff") if not os.path.isfile(imagePath) or not os.path.isfile( maskPath): continue validationImagesAndMasks[imagePath] = maskPath if len(trainImagesAndMasks) < 1: raise ValueError("Empty train image list") #just to be non-empty if len(validationImagesAndMasks) < 1: for key, value in trainImagesAndMasks.items(): validationImagesAndMasks[key] = value break # Training dataset dataset_train = mask_rcnn_additional.NucleiDataset() dataset_train.initialize(pImagesAndMasks=trainImagesAndMasks, pAugmentationLevel=augmentationLevel) dataset_train.prepare() # Validation dataset dataset_val = mask_rcnn_additional.NucleiDataset() dataset_val.initialize(pImagesAndMasks=validationImagesAndMasks, pAugmentationLevel=0) dataset_val.prepare() print("training images (with augmentation):", dataset_train.num_images) print("validation images (with augmentation):", dataset_val.num_images) config = mask_rcnn_additional.NucleiConfig() config.IMAGE_MAX_DIM = maxdim config.IMAGE_MIN_DIM = maxdim config.STEPS_PER_EPOCH = stepCount if stepMultiplier is not None: steps = int(float(dataset_train.num_images) * stepMultiplier) config.STEPS_PER_EPOCH = steps config.VALIDATION_STEPS = dataset_val.num_images config.DETECTION_NMS_THRESHOLD = detNMSThresh config.RPN_NMS_THRESHOLD = rpnNMSThresh config.__init__() # show config config.display() # show setup for a in dir(self): if not callable(getattr(self, a)): print("{:30} {}".format(a, getattr(self, a))) print("\n") if showInputs: # Load and display random samples image_ids = numpy.random.choice(dataset_train.image_ids, 20) for imageId in image_ids: image = dataset_train.load_image(imageId) mask, class_ids = dataset_train.load_mask(imageId) # visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names) visualize.display_instances( image=image, masks=mask, class_ids=class_ids, title=dataset_train.image_reference(imageId), boxes=utils.extract_bboxes(mask), class_names=dataset_train.class_names) # Create model in training mode mdl = model.MaskRCNN(mode="training", config=config, model_dir=os.path.dirname(outModelPath)) if blankInput: mdl.load_weights(inModelPath, by_name=True, exclude=[ "mrcnn_class_logits", "mrcnn_bbox_fc", "mrcnn_bbox", "mrcnn_mask" ]) else: mdl.load_weights(inModelPath, by_name=True) allcount = 0 for epochgroup in self.__mParams["epoch_groups"]: epochs = int(epochgroup["epochs"]) if epochs < 1: continue allcount += epochs mdl.train(dataset_train, dataset_val, learning_rate=float(epochgroup["learning_rate"]), epochs=allcount, layers=epochgroup["layers"]) mdl.keras_model.save_weights(outModelPath)
def load_image_gt(dataset, config, image_id, augment=False, augmentation=None, use_mini_mask=False): """Load and return ground truth data for an image (image, mask, bounding boxes). augment: (deprecated. Use augmentation instead). If true, apply random image augmentation. Currently, only horizontal flipping is offered. augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation. For example, passing imgaug.augmenters.Fliplr(0.5) flips images right/left 50% of the time. use_mini_mask: If False, returns full-size masks that are the same height and width as the original image. These can be big, for example 1024x1024x100 (for 100 instances). Mini masks are smaller, typically, 224x224 and are generated by extracting the bounding box of the object and resizing it to MINI_MASK_SHAPE. Returns: image: [height, width, 3] shape: the original shape of the image before resizing and cropping. class_ids: [instance_count] Integer class IDs bbox: [instance_count, (y1, x1, y2, x2)] mask: [height, width, instance_count]. The height and width are those of the image unless use_mini_mask is True, in which case they are defined in MINI_MASK_SHAPE. """ # Load image and mask image = dataset.load_image(image_id) mask, class_ids = dataset.load_mask(image_id) original_shape = image.shape image, window, scale, padding, crop = utils.resize_image( image, min_dim=config.IMAGE_MIN_DIM, min_scale=config.IMAGE_MIN_SCALE, max_dim=config.IMAGE_MAX_DIM, mode=config.IMAGE_RESIZE_MODE ) mask = utils.resize_mask(mask, scale, padding, crop) # Random horizontal flips. # TODO: will be removed in a future update in favor of augmentation if augment: logging.warning("'augment' id deprecated. Use 'augmentation' instead.") if random.randint(0, 1): image = np.fliplr(image) mask = np.fliplr(mask) # Augmentation # This requires the imgaug lib (https://github.com/aleju/imgaug) if augmentation: import imgaug # Augmenters that are safe to apply to masks # Some, such as Affine, have settings that make them unsafe, so always # test your augmentation on masks MASK_AUGMENTS = ["Sequential", "SomeOf", "OneOf", "Sometimes", "Fliplr", 'Flipud', 'CropAndPad', "Affine", "PiecewiseAffine"] def hook(images, augmenter, parents, default): """Determines which augmenters to apply to masks.""" return augmenter.__class__.__name__ in MASK_AUGMENTS # Store shapes before augmentation to compare image_shape = image.shape mask_shape = mask.shape # Make augmenters deterministic to apply similarly to images and masks det = augmentation.to_deterministic() image = det.augment_image(image) # Change mask to np.uint because imgaug does not support np.bool mask = det.augment_image(mask.astype(np.uint8), hooks=imgaug.HooksImage(activator=hook)) # Verify that shapes didn't change det = augmentation.to_deterministic() image = det.augment_image(image) # Change mask to np.uint8 because imgaug doesn't support np.bool mask = det.augment_image(mask.astype(np.uint8), hooks=imgaug.HooksImage(activator=hook)) # Verify that shapes didn't change assert image.shape == image_shape, "Augmentation shouldn't change image size" assert mask.shape == mask_shape, "Augmentation shouldn;t change mask size" # Change mask back to bool mask = mask.astype(np.bool) # Note that some boxes might be all zeros if the corresponding mask got cropped out. # and here is to filter them out _idx = np.sum(mask, axis=(0, 1)) > 0 mask = mask[:, :, _idx] class_ids = class_ids[_idx] # Bounding boxes. Note that some boxes might be all zeros # if the corresponding mask got cropped out. # bbox: [num_instances, (y1, x1, y2, x2)] bbox = utils.extract_bboxes(mask) # Active classes # Different datasets have different classes, so track the # classes supported in the dataset of this image. active_class_ids = np.zeros([dataset.num_classes], dtype='np.int32') source_class_ids = dataset.source_class_ids(dataset.image_info[image_id]["source"]) active_class_ids[source_class_ids] = 1 # Resize masks to smaller size to reduce memory usage if use_mini_mask: mask = utils.minimize_mask(bbox, mask, config.MNI_MASK_SHAPE) # Image meta data image_meta = utils.compose_image_meta(image_id, original_shape, image.shape, window, scale, active_class_ids) return image, image_meta, class_ids, bbox, mask