def test_augmentation_input_args(self): input_shape = (100, 100) output_shape = (50, 50) # define two augmentations with different args class TG1(T.Augmentation): def get_transform(self, image, sem_seg): return T.ResizeTransform(input_shape[0], input_shape[1], output_shape[0], output_shape[1]) class TG2(T.Augmentation): def get_transform(self, image): assert image.shape[:2] == output_shape # check that TG1 is applied return T.HFlipTransform(output_shape[1]) image = np.random.rand(*input_shape).astype("float32") sem_seg = (np.random.rand(*input_shape) < 0.5).astype("uint8") inputs = T.AugInput(image, sem_seg=sem_seg) # provide two args tfms = inputs.apply_augmentations([TG1(), TG2()]) self.assertIsInstance(tfms[0], T.ResizeTransform) self.assertIsInstance(tfms[1], T.HFlipTransform) self.assertTrue(inputs.image.shape[:2] == output_shape) self.assertTrue(inputs.sem_seg.shape[:2] == output_shape) class TG3(T.Augmentation): def get_transform(self, image, nonexist): pass with self.assertRaises(AttributeError): inputs.apply_augmentations([TG3()])
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # Load image. image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) # Panoptic label is encoded in RGB image. pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB") # Reuses semantic transform for panoptic labels. aug_input = T.AugInput(image, sem_seg=pan_seg_gt) _ = self.augmentations(aug_input) image, pan_seg_gt = aug_input.image, aug_input.sem_seg # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) # Generates training targets for Panoptic-DeepLab. targets = self.panoptic_target_generator(rgb2id(pan_seg_gt), dataset_dict["segments_info"]) dataset_dict.update(targets) return dataset_dict
def __call__(self, dataset_dict): dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format="BGR") aug_input = T.AugInput(image) transforms = self.augmentations(aug_input) image = aug_input.image # if not self.is_train: # # USER: Modify this if you want to keep them for some reason. # dataset_dict.pop("annotations", None) # dataset_dict.pop("sem_seg_file_name", None) # return dataset_dict image_shape = image.shape[:2] # h, w dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) annos = [ utils.transform_instance_annotations(obj, transforms, image_shape) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances(annos, image_shape) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def test_augmentation_list(self): input_shape = (100, 100) image = np.random.rand(*input_shape).astype("float32") sem_seg = (np.random.rand(*input_shape) < 0.5).astype("uint8") inputs = T.AugInput(image, sem_seg=sem_seg) # provide two args augs = T.AugmentationList([T.RandomFlip(), T.Resize(20)]) _ = T.AugmentationList([augs, T.Resize(30)])(inputs)
def apply_image_augmentations(image, dataset_dict, sem_seg_gt, augmentations): """Applies given augmentation to the given image and its attributes (segm, instances, etc). Almost no changes from D2's original code (apart from erasing non-relevant portions, e.g. for keypoints), just wrapped it in a function to avoid duplicate code.""" aug_input = T.AugInput(image, sem_seg=sem_seg_gt) transforms = augmentations(aug_input) image, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor( sem_seg_gt.astype("long")) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: for anno in dataset_dict["annotations"]: if not self.use_instance_mask: anno.pop("segmentation", None) if not self.use_keypoint: anno.pop("keypoints", None) annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.instance_mask_format) # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to # the intersection of original bounding box and the cropping box. if self.recompute_boxes: instances.gt_boxes = instances.gt_masks.get_bounding_boxes( ) dataset_dict["instances"] = utils.filter_empty_instances( instances) return dataset_dict, transforms
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # image1 = utils.convert_PIL_to_numpy( # Image.open(dataset_dict["image_file1"]), format=self.image_format) # image2 = utils.convert_PIL_to_numpy( # Image.open(dataset_dict["image_file2"]), format=self.image_format) image1 = utils.read_image(dataset_dict["image_file1"], format=self.image_format) image2 = utils.read_image(dataset_dict["image_file2"], format=self.image_format) flow_map = flow_utils.read_flow(dataset_dict["flow_map_file"]) _check_shape(image1, image2, flow_map) height, width = image1.shape[:2] # h, w dataset_dict["height"] = height dataset_dict["width"] = width # Apply augmentations aug_input = T.AugInput(image=image1) transforms = self.augmentations(aug_input) image1 = aug_input.image image2 = transforms.apply_image2(image2) flow_map = transforms.apply_flow(flow_map) _check_shape(image1, image2, flow_map) # Visualize # from detectron2.utils.flow_visualizer import ( # visualize_sample_from_array, # visualize_sample_from_file # ) # visualize_sample_from_array(image1, image2, flow_map, save=True) # visualize_sample_from_file( # dataset_dict["image_file1"], # dataset_dict["image_file2"], # dataset_dict["flow_map_file"], # save=True # ) dataset_dict["image1"] = torch.as_tensor( np.ascontiguousarray(image1.transpose(2, 0, 1))) dataset_dict["image2"] = torch.as_tensor( np.ascontiguousarray(image2.transpose(2, 0, 1))) dataset_dict["flow_map"] = torch.as_tensor( np.ascontiguousarray(flow_map.transpose(2, 0, 1))) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) aug_input = T.AugInput(image) transforms = self.augmentations(aug_input) image = aug_input.image image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) if not self.is_train: dataset_dict.pop("annotations", None) return dataset_dict if "annotations" in dataset_dict: # Maps points from the closed interval [0, image_size - 1] on discrete # image coordinates to the half-open interval [x1, x2) on continuous image # coordinates. We use the continuous-discrete conversion from Heckbert # 1990 ("What is the coordinate of a pixel?"): d = floor(c) and c = d + 0.5, # where d is a discrete coordinate and c is a continuous coordinate. for ann in dataset_dict["annotations"]: point_coords_wrt_image = np.array(ann["point_coords"]).astype(np.float) point_coords_wrt_image = point_coords_wrt_image + 0.5 ann["point_coords"] = point_coords_wrt_image annos = [ # also need to transform point coordinates transform_instance_annotations( obj, transforms, image_shape, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = annotations_to_instances( annos, image_shape, sample_points=self.sample_points, ) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) aug_input = T.AugInput(image) transforms = self.augmentations(aug_input) image = aug_input.image image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if "annotations" in dataset_dict: # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.instance_mask_format) # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to # the intersection of original bounding box and the cropping box. dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def _load_image_with_annos(self, dataset_dict): """ Load the image and annotations given a dataset_dict. """ # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) aug_input = T.AugInput(image) transforms = self.augmentations(aug_input) image = aug_input.image image_shape = image.shape[:2] # h, w if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return image, None if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.use_instance_mask: anno.pop("segmentation", None) if not self.use_keypoint: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other # types of data # apply meta_infos for mosaic transformation annos = [ transform_instance_annotations( obj, transforms, image_shape, add_meta_infos=self.add_meta_infos) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] else: annos = None return image, annos
def __call__(self, dataset_dict): dataset_dict = copy.deepcopy(dataset_dict) image = utils.read_image(dataset_dict["file_name"], format="BGR") aug_input = T.AugInput(image) transforms = self.augmentations(aug_input) image = aug_input.image image_shape = image.shape[:2] #h, w dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) annos = [ utils.transform_instance_annotations(obj, transforms, image_shape) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances(annos, image_shape) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below # can use other ways to read image image = utils.read_image(dataset_dict["file_name"], format="BGR") # See "Data Augmentation" tutorial for details usage auginput = T.AugInput(image) transform = T.Resize((800, 800))(auginput) print(f'resized image {image["file_name"]}') image = torch.from_numpy(auginput.image.transpose(2, 0, 1)) annos = [ utils.transform_instance_annotations(annotation, [transform], image.shape[1:]) for annotation in dataset_dict.pop("annotations") ] return { # create the format that the model expects "image": image, "instances": utils.annotations_to_instances(annos, image.shape[1:]) }
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: if "category_colors" in dataset_dict: sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "RGB") sem_seg_gt = rgb2mask(sem_seg_gt, dataset_dict["category_colors"]) else : sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L") sem_seg_gt = sem_seg_gt.squeeze(2) else : sem_seg_gt=None aug_input = T.AugInput(image, sem_seg=sem_seg_gt) transforms = self.augmentations(aug_input) image, sem_seg_gt = aug_input.image, aug_input.sem_seg # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt,dtype=torch.long) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("sem_seg_file_name", None) return dataset_dict return dataset_dict
def __call__(self, dataset_dict): dataset_dict = copy.deepcopy(dataset_dict) image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) aug_input = T.AugInput(image, sem_seg=None) transforms = self.augmentations(aug_input) image = aug_input.image image_shape = image.shape[:2] dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if not self.is_train: dataset_dict.pop("annotations", None) return dataset_dict if "annotations" in dataset_dict: annos = [ self.transform_instance_annotations_rotated( obj, transforms, image_shape) # obj for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances_rotated( annos, image_shape) if self.recompute_boxes: instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def augment(im): input = T.AugInput(im) transform = augs(input) # type: T.Transform x = input.image # new image return x
scallop_metadata = MetadataCatalog.get(P.DATASET_DIR + "train") cfg = get_cfg() if USE_SAVED_MODEL: cfg.merge_from_file('config.yml') cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, P.MODEL_PATH) else: cfg.merge_from_file( "./detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" ) cfg.MODEL.WEIGHTS = "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" if SHOW_INPUTS: for d in random.sample(dataset_dicts, 10): img = cv2.imread(d["file_name"]) input = transforms.AugInput(img) transform = augs(input) image_transformed = input.image visualizer = Visualizer(img[:, :, ::-1], metadata=scallop_metadata, scale=0.5) vis = visualizer.draw_dataset_dict(d) img = vis.get_image()[:, :, ::-1] cv2.imshow("Original image", img) visualizer = Visualizer(image_transformed[:, :, ::-1], metadata=scallop_metadata, scale=0.5) vis = visualizer.draw_dataset_dict(d) image_transformed = vis.get_image()[:, :, ::-1]
def _desc_to_example(desc: Dict): # Detectron2 Model Input Format: # image: Tensor[C, H, W]; # height, width: output height and width; # instances: Instances Object to training, with the following fields: # "gt_boxes": # "gt_classes": # "gt_masks": a PolygonMasks or BitMasks object storing N masks, one for each instance. desc = copy.deepcopy(desc) # it will be modified by code below image_path = os.path.join(images_dir, f'{desc["image_id"]}.jpg') # shape: [H, W, C] origin_image = detection_utils.read_image(image_path, format="BGR") oh, ow, oc = origin_height, origin_width, origin_channels = origin_image.shape if augmentations is not None: aug_input = T.AugInput(origin_image) transforms = augmentations(aug_input) auged_image = aug_input.image else: auged_image = origin_image ah, aw, ac = auged_height, auged_width, auged_channels = auged_image.shape if not is_train: return { "image_id": desc['image_id'], # COCOEvaluator.process() need it. # expected shape: [C, H, W] "image": torch.as_tensor( np.ascontiguousarray(auged_image.transpose(2, 0, 1))), "height": auged_height, "width": auged_width, } target = Instances(image_size=(ah, aw)) if 'fill gt_boxes': # shape: n_box, 4 boxes_abs = np.array( [anno['bbox'] for anno in desc['annotations']]) if augmentations is not None: # clip transformed bbox to image size boxes_auged = transforms.apply_box( np.array(boxes_abs)).clip(min=0) boxes_auged = np.minimum( boxes_auged, np.array([aw, ah, aw, ah])[np.newaxis, :]) else: boxes_auged = boxes_abs target.gt_boxes = Boxes(boxes_auged) if 'fill gt_classes': classes = [anno['category_id'] for anno in desc['annotations']] classes = torch.tensor(classes, dtype=torch.int64) target.gt_classes = classes if 'fill gt_masks': mask_paths = [ os.path.join(masks_dir, f'{anno["mask_id"]}.png') for anno in desc['annotations'] ] masks = np.array( list( map( lambda p: cv2.resize(cv2.imread( p, flags=cv2.IMREAD_GRAYSCALE), dsize=(ow, oh)), mask_paths))) if augmentations is not None: masks_auged = np.array( list(map(lambda x: transforms.apply_segmentation(x), masks))) else: masks_auged = masks masks_auged = masks_auged > MASK_THRESHOLD masks_auged = BitMasks( torch.stack([ torch.from_numpy(np.ascontiguousarray(x)) for x in masks_auged ])) target.gt_masks = masks_auged return { "image_id": desc['image_id'], # COCOEvaluator.process() need it. # expected shape: [C, H, W] "image": torch.as_tensor( np.ascontiguousarray(auged_image.transpose(2, 0, 1))), "height": auged_height, "width": auged_width, "instances": target, # refer: annotations_to_instances() }
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image( dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) else: sem_seg_gt = None aug_input = T.AugInput(image, sem_seg=sem_seg_gt) transforms = self.augmentations(aug_input) image, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor( sem_seg_gt.astype("long")) # USER: Remove if you don't use pre-computed proposals. # Most users would not need this feature. if self.proposal_topk is not None: utils.transform_proposals(dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.use_instance_mask: anno.pop("segmentation", None) if not self.use_keypoint: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = annotations_to_instances_with_attributes( annos, image_shape, mask_format=self.instance_mask_format, load_attributes=self.attribute_on, max_attr_per_ins=self.max_attr_per_ins) # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to # the intersection of original bounding box and the cropping box. if self.recompute_boxes: instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): ''' Adapted from https://detectron2.readthedocs.io/_modules/detectron2/data/dataset_mapper.html#DatasetMapper ''' dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file try: image = cv2.imdecode( np.frombuffer( self.fileServer.getFile(dataset_dict["file_name"]), np.uint8), -1) if self.image_format == 'RGB': # flip along spectral dimension if image.ndim >= 3: image = np.flip(image, 2) except: #TODO: cannot handle corrupt data input here; needs to be done earlier print( 'WARNING: Image {} is corrupt and could not be loaded.'.format( dataset_dict["file_name"])) image = None # ORIGINAL: image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) # convert annotations from relative to XYXY absolute format if needed image_shape = image.shape[:2] if 'annotations' in dataset_dict: for anno in dataset_dict['annotations']: if 'bbox_mode' in anno and anno['bbox_mode'] in [ BoxMode.XYWH_REL, BoxMode.XYXY_REL ]: if anno['bbox_mode'] == BoxMode.XYWH_REL: anno['bbox'][0] -= anno['bbox'][2] / 2 anno['bbox'][1] -= anno['bbox'][3] / 2 anno['bbox'][2] += anno['bbox'][0] anno['bbox'][3] += anno['bbox'][1] anno['bbox'][0] *= image_shape[0] #TODO: check order anno['bbox'][1] *= image_shape[1] #TODO: check order anno['bbox'][2] *= image_shape[0] #TODO: check order anno['bbox'][3] *= image_shape[1] #TODO: check order anno['bbox_mode'] = BoxMode.XYXY_ABS if "segmentationMask" in dataset_dict: try: raster = np.frombuffer(base64.b64decode( dataset_dict['segmentationMask']), dtype=np.uint8) sem_seg_gt = np.reshape(raster, image_shape) #TODO: check format if self.classIndexMap is not None: sem_seg_gt_copy = np.copy(sem_seg_gt) for k, v in self.classIndexMap.items(): sem_seg_gt_copy[sem_seg_gt == k] = v sem_seg_gt = sem_seg_gt_copy except: print( 'WARNING: Segmentation mask for image "{}" could not be loaded or decoded.' .format(dataset_dict["file_name"])) sem_seg_gt = None # ORIGINAL: sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) else: sem_seg_gt = None if "gt_label" in dataset_dict: dataset_dict["gt_label"] = torch.LongTensor( [dataset_dict["gt_label"]]) aug_input = T.AugInput(image, sem_seg=sem_seg_gt) transforms = self.augmentations(aug_input) image, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor( sem_seg_gt.astype("long")) if "annotations" in dataset_dict: annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.instance_mask_format) # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to # the intersection of original bounding box and the cropping box. if self.recompute_boxes and len(instances) and hasattr( instances, 'gt_masks'): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances( instances ) #TODO: do we want that? Maybe limit to width and height assignment to dict entry... return dataset_dict
def __call__(self, dataset_dict: dict): dataset_dict = deepcopy(dataset_dict) image = dutils.read_image( dataset_dict.get('file_name'), format=self.cfg.INPUT.FORMAT ) mask = dutils.read_image( dataset_dict.pop('sem_seg_file_name'), ) assert image.shape[:2] == mask.shape[:2] obj_ids = np.unique(mask)[1:] masks = mask == obj_ids[:, None, None] annotations = [] for i in range(len(obj_ids)): pos = np.where(masks[i]) box = ( np.min(pos[1]), np.min(pos[0]), np.max(pos[1]), np.max(pos[0]), ) annotations.append( { 'bbox': box, 'bbox_mode': 0, 'category_id': 0, 'segmentation': encode( np.array(mask, dtype=np.uint8, order='F') ), } ) if not self.is_train: return dict(image=image, annotations=annotations) aug_input = T.AugInput(image, sem_seg=mask) transforms = aug_input.apply_augmentations(self._augmentation) image = torch.from_numpy( aug_input.image.transpose((2, 0, 1)).astype('float32') ) mask = torch.from_numpy(aug_input.sem_seg.astype('float32')) annos = [ dutils.transform_instance_annotations( annotation, transforms, image.shape[1:] ) for annotation in annotations ] instances = dutils.annotations_to_instances( annos, image.shape[1:], mask_format=self.cfg.INPUT.MASK_FORMAT ) # instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict['image'] = image dataset_dict['sem_seg'] = mask # dataset_dict['instances'] = instances[instances.gt_boxes.nonempty()] dataset_dict['instances'] = dutils.filter_empty_instances(instances) return dataset_dict