def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) if self.crop_gen is None: image, transforms = T.apply_transform_gens(self.tfm_gens, image) else: if np.random.rand() > 0.5: image, transforms = T.apply_transform_gens(self.tfm_gens, image) else: image, transforms = T.apply_transform_gens( self.tfm_gens[:-1] + self.crop_gen + self.tfm_gens[-1:], image ) image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: # anno.pop("segmentation", None) # TODO #1 keep segmentation annotation anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations(obj, transforms, image_shape) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances(annos, image_shape) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def test_transform_simple_annotation(self): transforms = T.TransformList([T.HFlipTransform(400)]) anno = { "bbox": np.asarray([10, 10, 200, 300]), "bbox_mode": BoxMode.XYXY_ABS, "category_id": 3, "segmentation": [[10, 10, 100, 100, 100, 10], [150, 150, 200, 150, 200, 200]], } output = detection_utils.transform_instance_annotations(anno, transforms, (400, 400)) self.assertTrue(np.allclose(output["bbox"], [200, 10, 390, 300])) self.assertEqual(len(output["segmentation"]), len(anno["segmentation"])) self.assertTrue(np.allclose(output["segmentation"][0], [390, 10, 300, 100, 300, 10])) detection_utils.annotations_to_instances([output, output], (400, 400))
def mapper(dataset_dict): # Implement a mapper, similar to the default DatasetMapper, but with your own customizations dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below pdb.set_trace() image = utils.read_image(dataset_dict["file_name"], format="BGR") image, transforms = T.apply_transform_gens([T.Resize((800, 800))], image) dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def custom_mapper(dataset_dict, transform_list): dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format="BGR") image, transforms = T.apply_transform_gens(transform_list, image) dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") ] instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def test_crop(self): transforms = T.TransformList([T.CropTransform(300, 300, 10, 10)]) keypoints = np.random.rand(17, 3) * 50 + 15 keypoints[:, 2] = 2 anno = { "bbox": np.asarray([10, 10, 200, 300]), "bbox_mode": BoxMode.XYXY_ABS, "keypoints": keypoints, } output = detection_utils.transform_instance_annotations( copy.deepcopy(anno), transforms, (400, 400) ) self.assertTrue((output["bbox"] == np.asarray([-290, -290, -100, 0])).all()) # keypoints are no longer visible self.assertTrue((output["keypoints"][:, 2] == 0).all())
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) image, transforms = T.apply_transform_gens(self.augmentation, image) image_shape = image.shape[:2] # h, w dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) if not self.is_train: dataset_dict.pop("annotations", None) return dataset_dict annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") ] dataset_dict["instances"] = utils.annotations_to_instances( annos, image.shape[:2]) # # USER: Implement additional transformations if you have other types of data # # USER: Don't call transpose_densepose if you don't need # annos = [ # self._transform_densepose( # utils.transform_instance_annotations( # obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices # ), # transforms, # ) # for obj in dataset_dict.pop("annotations") # if obj.get("iscrowd", 0) == 0 # ] # instances = utils.annotations_to_instances(annos, image_shape, mask_format="bitmask") # dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()] return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) aug_input = T.AugInput(image) transforms = self.augmentations(aug_input) image = aug_input.image image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if "annotations" in dataset_dict: # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.instance_mask_format) # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to # the intersection of original bounding box and the cropping box. dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) image_shape = image.shape[:2] # h, w dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) if not self.is_train: dataset_dict.pop("annotations", None) return dataset_dict for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data # USER: Don't call transpose_densepose if you don't need annos = [ self._transform_densepose( utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices ), transforms, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances(annos, image_shape) if len(annos) and "densepose" in annos[0]: gt_densepose = [obj["densepose"] for obj in annos] instances.gt_densepose = DensePoseList(gt_densepose, instances.gt_boxes, image_shape) dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()] return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of ONE video, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below if self.is_train: dataset_dict = # TODO: sample a fixed number of frames new_dataset_dict = [] for item in dataset_dict: image = utils.read_image(item["filename"], format=self.img_format) utils.check_image_size(item, image) # TODO: SSD random crop image, transforms = T.apply_transform_gens( self.tfm_gens[:-1] + self.crop_gen + self.tfm_gens[-1:], image ) image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) sample = {"image": image} if not self.is_train: new_dataset_dict.append(sample) continue # USER: Implement additional transformations if you have other types of data boxes = [ utils.transform_instance_annotations(box, transforms, image_shape) for box in item["boxes"] ] instances = # sample["instances"] = instances new_dataset_dict.append(sample) return new_dataset_dict
def __call__(self, dataset_dict): dataset_dict = copy.deepcopy(dataset_dict) image = utils.read_image(dataset_dict["file_name"], format="BGR") aug_input = T.AugInput(image) transforms = self.augmentations(aug_input) image = aug_input.image image_shape = image.shape[:2] #h, w dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) annos = [ utils.transform_instance_annotations(obj, transforms, image_shape) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances(annos, image_shape) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def customMapper(dataset_dict): dataset_dict = copy.deepcopy(dataset_dict) image = utils.read_image(dataset_dict["file_name"], format="BGR") transform_list = [ T.Resize((600, 800)), T.RandomFlip(prob=0.6, horizontal=True, vertical=False), T.RandomFlip(prob=0.6, horizontal=False, vertical=True), ] image, transforms = T.apply_transform_gens(transform_list, image) dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below # can use other ways to read image image = utils.read_image(dataset_dict["file_name"], format="BGR") # See "Data Augmentation" tutorial for details usage auginput = T.AugInput(image) transform = T.Resize((800, 800))(auginput) print(f'resized image {image["file_name"]}') image = torch.from_numpy(auginput.image.transpose(2, 0, 1)) annos = [ utils.transform_instance_annotations(annotation, [transform], image.shape[1:]) for annotation in dataset_dict.pop("annotations") ] return { # create the format that the model expects "image": image, "instances": utils.annotations_to_instances(annos, image.shape[1:]) }
def test_transform_RLE_resize(self): transforms = T.TransformList( [T.HFlipTransform(400), T.ScaleTransform(300, 400, 400, 400, "bilinear")] ) mask = np.zeros((300, 400), order="F").astype("uint8") mask[:, :200] = 1 anno = { "bbox": np.asarray([10, 10, 200, 300]), "bbox_mode": BoxMode.XYXY_ABS, "segmentation": mask_util.encode(mask[:, :, None])[0], "category_id": 3, } output = detection_utils.transform_instance_annotations( copy.deepcopy(anno), transforms, (400, 400) ) inst = detection_utils.annotations_to_instances( [output, output], (400, 400), mask_format="bitmask" ) self.assertTrue(isinstance(inst.gt_masks, BitMasks))
def custom_mapper(input_dict): dataset_dict = copy.deepcopy( input_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format="BGR") transform_list = [ T.Resize((1200, 1200)), T.RandomFlip(prob=0.6, horizontal=True, vertical=False), T.RandomFlip(prob=0.6, horizontal=False, vertical=True), T.RandomContrast(0.7, 3.2), T.RandomBrightness(0.6, 1.8), ] image, transforms = T.apply_transform_gens(transform_list, image) dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") ] instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) image = utils.read_image(dataset_dict['file_name'], format=self.img_format) utils.check_image_size(dataset_dict, image) if self.crop_gen is None: image, transforms = T.apply_transform_gens(self.tfm_gens, image) elif np.random.rand() > 0.5: image, transforms = T.apply_transform_gens(self.tfm_gens, image) else: image, transforms = T.apply_transform_gens( self.tfm_gens[:-1] + self.crop_gen + self.tfm_gens[-1:], image) image_shape = image.shape[:2] dataset_dict['image'] = paddle.to_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if not self.is_train: dataset_dict.pop('annotations', None) return dataset_dict if 'annotations' in dataset_dict: for anno in dataset_dict['annotations']: if not self.mask_on: anno.pop('segmentation', None) anno.pop('keypoints', None) annos = [ utils.transform_instance_annotations(obj, transforms, image_shape) for obj in dataset_dict.pop('annotations') if obj.get('iscrowd', 0) == 0 ] instances = utils.annotations_to_instances(annos, image_shape) dataset_dict['instances'] = utils.filter_empty_instances(instances) return dataset_dict
def custom_mapper(dataset_dict): dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format="BGR") transform_list = [ InvertColors(), T.RandomBrightness(0.8, 1.8), T.RandomContrast(0.6, 1.3), T.RandomSaturation(0.8, 1.4), T.RandomLighting(0.7), ] image, transforms = T.apply_transform_gens(transform_list, image) dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") ] instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def custom_mapper(dataset_dict): dataset_dict = copy.deepcopy(dataset_dict) image = utils.read_image(dataset_dict["file_name"], format="BGR") image, transforms = T.apply_transform_gens([ T.Resize((1920, 1080)), T.RandomFlip(0.1), T.RandomSaturation(0.9, 1.1), T.RandomBrightness(0.9, 1.1), T.RandomContrast(0.9, 1.1) ], image) dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def test_transform_uncompressed_RLE(self): transforms = T.TransformList([T.HFlipTransform(400)]) mask = np.zeros((300, 400)).astype("uint8") mask[:, :200] = 1 anno = { "bbox": np.asarray([10, 10, 200, 300]), "bbox_mode": BoxMode.XYXY_ABS, "segmentation": binary_mask_to_uncompressed_rle(mask), "category_id": 3, } output = detection_utils.transform_instance_annotations( copy.deepcopy(anno), transforms, (300, 400) ) mask = output["segmentation"] self.assertTrue((mask[:, 200:] == 1).all()) self.assertTrue((mask[:, :200] == 0).all()) inst = detection_utils.annotations_to_instances( [output, output], (400, 400), mask_format="bitmask" ) self.assertTrue(isinstance(inst.gt_masks, BitMasks))
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ # Implement a mapper, similar to the default DatasetMapper, but with own customizations dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format="BGR") # Custom augs to be used while training augs = [T.RandomFlip(0.6, horizontal=False, vertical=True)] if self.is_train: tfm_gens = self.tfm_gens + augs else: tfm_gens = self.tfm_gens logging.getLogger(__name__).info("Original Augmentation: " + str(self.tfm_gens)) logging.getLogger(__name__).info("Updated Augmentation List: " + str(tfm_gens)) image, transforms = T.apply_transform_gens(tfm_gens, image) dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) dataset_dict["instances"] = filter_small_instances(dataset_dict["instances"], self.min_area, self.max_area) return dataset_dict
def _original_call(self, dataset_dict): """ Modified from detectron2's original __call__ in DatasetMapper """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = self._read_image(dataset_dict, format=self.img_format) if not self.backfill_size: utils.check_image_size(dataset_dict, image) image, dataset_dict = self._custom_transform(image, dataset_dict) inputs = AugInput(image=image) if "annotations" not in dataset_dict: transforms = AugmentationList( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens)(inputs) image = inputs.image else: # pass additional arguments, will only be used when the Augmentation # takes `annotations` as input inputs.annotations = dataset_dict["annotations"] # Crop around an instance if there are instances in the image. if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) inputs.image = crop_tfm.apply_image(image) transforms = AugmentationList(self.tfm_gens)(inputs) image = inputs.image if self.crop_gen: transforms = crop_tfm + transforms # Cache identical transforms in dataset_dict for subclass mappers # TODO T122215878 Find more explicit way to expose transforms used dataset_dict["transforms"] = transforms image_shape = image.shape[:2] # h, w if image.ndim == 2: image = np.expand_dims(image, 2) dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) # Can use uint8 if it turns out to be slow some day if self.load_proposals: utils.transform_proposals( dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk, min_box_size=self.proposal_min_box_size, ) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) if "sem_seg_file_name" in dataset_dict: sem_seg_gt = read_sem_seg_file_with_prefetch( dataset_dict.pop("sem_seg_file_name"), prefetched=dataset_dict.get(PREFETCHED_SEM_SEG_FILE_NAME, None), ) if len(sem_seg_gt.shape) > 2: sem_seg_gt = sem_seg_gt.squeeze(2) sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) dataset_dict["sem_seg"] = sem_seg_gt # extend standard D2 semantic segmentation to support multiple segmentation # files, each file can represent a class if "multi_sem_seg_file_names" in dataset_dict: raise NotImplementedError() if "_post_process_" in dataset_dict: proc_func = dataset_dict.pop("_post_process_") dataset_dict = proc_func(dataset_dict) return dataset_dict
def __call__(self, dataset_dict): assert 'annotations' in dataset_dict, '今回はセグメンテーションのみを対象にする' assert not 'sem_seg_file_name' in dataset_dict, 'パノプティックセグメンテーションは行わない' dataset_dict = copy.deepcopy(dataset_dict) image = utils.read_image(dataset_dict['file_name'], format=self.img_format) utils.check_image_size(dataset_dict, image) # 明るさ・コントラスト・彩度・カットアウト if self.cont_gen is not None: tfm = self.cont_gen.get_transform(image) image = tfm.apply_image(image) if self.bright_gen is not None: tfm = self.bright_gen.get_transform(image) image = tfm.apply_image(image) if self.sat_gen is not None: tfm = self.sat_gen.get_transform(image) image = tfm.apply_image(image) if self.cutout_gen is not None: tfm = self.cutout_gen.get_transform(image) image = tfm.apply_image(image) # アフィン if self.rotate_gen is not None: rotate_tfm = self.rotate_gen.get_transform(image) image = rotate_tfm.apply_image(image) if self.shear_gen is not None: shear_tfm = self.shear_gen.get_transform(image) image = shear_tfm.apply_image(image) if self.extent_gen is not None: extent_tfm = self.extent_gen.get_transform(image) image = extent_tfm.apply_image(image) if self.crop_gen is not None: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict['annotations'])) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen is not None: transforms = crop_tfm + transforms if self.extent_gen is not None: transforms = extent_tfm + transforms if self.shear_gen is not None: transforms = shear_tfm + transforms if self.rotate_gen is not None: transforms = rotate_tfm + transforms # テストの場合はアノテーションがいらないので削除して終了 if not self.is_train: dataset_dict.pop('annotations', None) dataset_dict.pop('sem_seg_file_name', None) return dataset_dict image_shape = image.shape[:2] # h, w dataset_dict['image'] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) annos = [ utils.transform_instance_annotations(obj, transforms, image_shape, keypoint_hflip_indices=None) for obj in dataset_dict.pop('annotations') if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format) # マスクからバウンディングボックスを作成 if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) # ################################################################################################################ # print("AutoAugDet:", dataset_dict["file_name"]) # h, w, c = image.shape # if h <= 0 or w <=0: # print("Empty image") # if self.autoaugdet and "annotations" in dataset_dict: # from detectron2.structures.boxes import BoxMode # bboxes = [] # for label in dataset_dict["annotations"]: # assert label['bbox_mode'] == BoxMode.XYWH_ABS # bboxes.append(label['bbox']) # # import cv2, random # # showimg_in = image.copy() # # for box in bboxes: # # cv2.rectangle(showimg_in, (int(box[0]), int(box[1])), (int(box[0] + box[2]), int(box[1] + box[3])),(random.randint(0,255), random.randint(0,255), random.randint(0,255))) # try: # image, bboxes = autoaugdet.autoaugdet(image, bboxes, self.autoaugdet) # except Exception as e: # print("AutoAug Error:", e) # # showimg_out = image.copy() # # for box in bboxes: # # cv2.rectangle(showimg_out, (int(box[0]), int(box[1])), (int(box[0] + box[2]), int(box[1] + box[3])),(random.randint(0,255), random.randint(0,255), random.randint(0,255))) # # cv2.imshow("in", showimg_in) # # cv2.imshow("out", showimg_out) # # cv2.waitKey(0) # for i in range(len(bboxes)): # dataset_dict["annotations"][i]['bbox'] = bboxes[i] # ################################################################################################# if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image ) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) # Can use uint8 if it turns out to be slow some day # USER: Remove if you don't use pre-computed proposals. if self.load_proposals: utils.transform_proposals( dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk ) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format ) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) # generate heatmaps of keypoints #if dataset_dict["instances"].has("gt_keypoints"): # #For segmentation-based detection, transform the instance-level segmentation mask into semantic segmasks and contour maps # turning instance-level segmentation map into semantic segmap # get the contour map for segmentation-based detection dataset_dict["contours"], dataset_dict["semseg"] = utils.annotations_to_segmaps(annos, self.num_classes, image_shape) kpts = [obj.get("keypoints", []) for obj in annos] map_shape = (image_shape[0], image_shape[1]) kp_maps, short_offsets = get_keypoint_maps(None, kpts, map_shape) dataset_dict["kp_maps"] = kp_maps.transpose(2, 0, 1) dataset_dict["short_offsets"] = short_offsets.transpose(2, 0, 1) ################################################################ # # visualize the keypoints # from detectron2.utils.visualizer import Visualizer # from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES # from os import path # image_rgb = image[..., ::-1] # V = Visualizer(image_rgb, dataset_dict) # # draw the foreground mask of each object category # binary_masks = kp_maps>0.1 # _, fn = path.split(dataset_dict["file_name"]) # fn_next, ext = path.splitext(fn) # print('Mask size: ', binary_masks.shape) # print('Image size: ', image_rgb.shape) # assert binary_masks.shape[1]==image_rgb.shape[0], (binary_masks.shape[1], image_rgb.shape[0]) # assert binary_masks.shape[2]==image_rgb.shape[1], (binary_masks.shape[2], image_rgb.shape[1]) # assert image_rgb.shape[2]==3, image_rgb.shape[2] # bm = binary_masks # for i in range(binary_masks.shape[0]): # masked_image = V.draw_binary_mask( # bm[i, :, :].squeeze(), color=None, edge_color='r', alpha=0.5, area_threshold=10 # ) # COCO_CATEGORIES[i]["color"] # # filepath = "tmp/" + fn_next + '_' + COCO_CATEGORIES[i]["name"] + '.png' # # masked_image.save(filepath) # filepath = "tmp/" + fn_next + '.png' # masked_image.save(filepath) ################################################################ ################################################ # # visualize the segmentation mask # from os import path # image_rgb = image[..., ::-1] #utils.read_image(dataset_dict["file_name"], format="RGB") # segmask = dataset_dict["semseg"].tensor.numpy() # _, fn = path.split(dataset_dict["file_name"]) # fn_next, ext = path.splitext(fn) # im = Image.fromarray(np.uint8(image_rgb)) # filepath = "tmp_segmap_sorted/" + fn_next + '_raw.png' # im.save(filepath) # im2 = Image.fromarray(np.uint8(segmask*3)) # filepath2 = "tmp_segmap_sorted/" + fn_next + '_seg.png' # im2.save(filepath2) ################################################ ############### # # visualize the segmentation map and contours # from detectron2.utils.visualizer import Visualizer # from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES # from os import path # #V.draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.8) # image_rgb = image[..., ::-1] #utils.read_image(dataset_dict["file_name"], format="RGB") # V = Visualizer(image_rgb, dataset_dict) # # draw the foreground mask of each object category # #binary_masks = dataset_dict["contours"].gt_segmasks.tensor # binary_masks = dataset_dict["contours"].gt_contours.tensor # _, fn = path.split(dataset_dict["file_name"]) # fn_next, ext = path.splitext(fn) # print('Mask size: ', binary_masks.size()) # print('Image size: ', image_rgb.shape) # assert binary_masks.size(1)==image_rgb.shape[0], (binary_masks.size(1), image_rgb.shape[0]) # assert binary_masks.size(2)==image_rgb.shape[1], (binary_masks.size(2), image_rgb.shape[1]) # assert image_rgb.shape[2]==3, image_rgb.shape[2] # bm = binary_masks.numpy() # # bm_uint8 = bm.astype("uint8") # # print(bm) # for i in range(binary_masks.size(0)): # masked_image = V.draw_binary_mask( # bm[i, :, :].squeeze(), color=None, edge_color='r', alpha=0.5, area_threshold=10 # ) # COCO_CATEGORIES[i]["color"] # # filepath = "tmp/" + fn_next + '_' + COCO_CATEGORIES[i]["name"] + '.png' # # masked_image.save(filepath) # filepath = "tmp/" + fn_next + '.png' # masked_image.save(filepath) ################################################################################################# # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: sem_seg_gt = Image.open(f) sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) dataset_dict["sem_seg"] = sem_seg_gt return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) image = transform(image=image)["image"] ############################################################################ ''' image = utils.read_image(dataset_dict["file_name"], format=self.image_format) h, w, _ = image.shape utils.check_image_size(dataset_dict, image) bboxes = [ann["bbox"] for ann in dataset_dict['annotations']] labels = [ann['category_id'] for ann in dataset_dict['annotations']] class_labels = [CLASSES[label] for label in labels] segmentations = [ann["segmentation"] for ann in dataset_dict['annotations']] #cprint("before :" , segmentations) masks = convert_coco_poly_to_mask(segmentations, h, w) masks = [mask.numpy() for mask in masks] transformed = transform(image=image, bboxes=bboxes, class_labels=class_labels, masks=masks) image = transformed["image"] bboxes = transformed["bboxes"] class_labels = transformed["class_labels"] labels = [CLASSES.index(cl) for cl in class_labels] filtered_masks = [] for mask in transformed["masks"]: #if len(np.unique(mask)) > 1: filtered_masks.append(mask) if len(bboxes) != len(filtered_masks): print(len(bboxes), len(filtered_masks), len(labels)) #print(len(bboxes), len(masks), len(labels)) seg_masks = [binary_mask_to_polygon(mask, tolerance=2) for mask in masks] for idx in range(len(labels)): dataset_dict['annotations'][idx]["bbox"] = bboxes[idx] dataset_dict['annotations'][idx]["labels"] = labels[idx] dataset_dict['annotations'][idx]["segmentation"] = seg_masks[idx] dataset_dict['annotations'] = dataset_dict['annotations'][:len(labels)] ''' # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) else: sem_seg_gt = None aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt) transforms = aug_input.apply_augmentations(self.augmentations) image, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long")) # USER: Remove if you don't use pre-computed proposals. # Most users would not need this feature. if self.proposal_topk is not None: utils.transform_proposals( dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk ) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.use_instance_mask: anno.pop("segmentation", None) if not self.use_keypoint: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.instance_mask_format ) # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to # the intersection of original bounding box and the cropping box. if self.recompute_boxes: instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def train_mapper(self,dataset_dict):#,dataset_used): # Implement a mapper, similar to the default DatasetMapper, but with your own customizations # Create a copy of the dataset dict dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below ##### Image Transformations ##### # Read in the image image = utils.read_image(dataset_dict["file_name"], format="BGR") # fileName = dataset_dict["file_name"] ## Crop to bounding box ## # Crop for all but comparison if(self.dataset_used != "comparison" and self.is_crop_to_bbox): # Get the bounding box bbox = ((dataset_dict["annotations"])[0])["bbox"] xmin,ymin,xmax,ymax = bbox w = xmax-xmin h = ymax-ymin # IsCropToBBox = True # if(IsCropToBBox): # Nudge the crop to be slightly outside of the bounding box nudgedXMin = xmin-15 nudgedYMin = ymin-15 nudgedW = w+50 nudgedH = h+50 # If the bounding boxes go outside of the image dimensions, fix this imageHeight = image.shape[0] imageWidth = image.shape[1] if(nudgedXMin < 0): nudgedXMin = 0 if(nudgedYMin < 0): nudgedYMin = 0 if(nudgedXMin+nudgedW >= imageWidth): nudgedW = imageWidth-1 if(nudgedYMin+nudgedH >= imageHeight): nudgedH = imageHeight-1 # Apply the crop cropT = T.CropTransform(nudgedXMin,nudgedYMin,nudgedW,nudgedH) image = cropT.apply_image(image) transforms = T.TransformList([cropT]) # Comparison has bbox the size of the image, so dont bother cropping else: # scaled between 0.5 and 1; shifted up to 0.5 in each dimension # randomExtant = T.RandomExtent( (0.5,1),(0.5,0.5) ) # transforms = T.TransformList([randomExtant]) transforms = T.TransformList([]) # Apply the crop to the bbox as well # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms dataset_dict["height"] = image.shape[0] dataset_dict["width"] = image.shape[1] # Add to the list of transforms # else: # nudgedH = dataset_dict["height"] # nudgedW = dataset_dict["width"] ## Scale the image size ## # thresholdDimension = 1000 # if(dataset_used == "large"): # thresholdDimension = 500 # thresholdDimension = 800 # thresholdDimension = 600 thresholdDimension = self.threshold_dimension currWidth = dataset_dict["width"] currHeight = dataset_dict["height"] # NOTE: YOLO input size must be multiple of 32 if(self.modelLink in ["VGG19_BN","YOLOV3"]): vgg_im_size = thresholdDimension # Apply the scaling transform scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=vgg_im_size,new_w=vgg_im_size,interp="nearest") image = scaleT.apply_image(image.copy()) # Apply the scaling to the bbox # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms # Add this to the list of transforms transforms = transforms + scaleT # Set the dimensions dataset_dict["height"] = image.shape[0] dataset_dict["width"] = image.shape[1] else:# Downscale only at this threshold if(currHeight > thresholdDimension or currWidth > thresholdDimension): myNewH = 0 myNewW = 0 # Scale the longest dimension to threshold, other in proportion if(currHeight > currWidth): myNewH = thresholdDimension ratio = currHeight/float(myNewH) myNewW = currWidth/float(ratio) myNewW = int(round(myNewW)) # myNewW = 800 else: # myNewH = 800 myNewW = thresholdDimension ratio = currWidth/float(myNewW) myNewH = currHeight/float(ratio) myNewH = int(round(myNewH)) # Apply the scaling transform if(self.fixed_wh): scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") else: scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") image = scaleT.apply_image(image.copy()) # Apply the scaling to the bbox # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms # Add this to the list of transforms transforms = transforms + scaleT # Set the dimensions dataset_dict["height"] = image.shape[0] dataset_dict["width"] = image.shape[1] ## Apply a random flip ## image, tfms = T.apply_transform_gens([T.RandomFlip()], image) transforms = transforms + tfms # Apply Other Transforms ## # Standard random image mods if(self.dataset_used != "comparison"): image, tfms = T.apply_transform_gens([T.RandomBrightness(0.4,1.6),T.RandomContrast(0.4,1.6),T.RandomSaturation(0.5,1.5),T.RandomLighting(1.2)], image) # More extreme for comparison set else: image, tfms = T.apply_transform_gens([T.RandomBrightness(0.2,1.8),T.RandomContrast(0.2,1.8),T.RandomSaturation(0.3,1.7),T.RandomLighting(1.5)], image) transforms = transforms + tfms ## Apply random affine (actually just a shear) ## # Pass in the image size PILImage = Image.fromarray(image) # Standard affine if(self.dataset_used != "comparison"): shear_range = 8 angle_range = 30 # rand_shear = (np.random.uniform(-shear_range,shear_range),np.random.uniform(-8,8)) # rand_angle = np.random.uniform(-30,30) # More extreme random affine for comparison else: shear_range = 50 angle_range = 30 # rand_shear = (np.random.uniform(-30,30),np.random.uniform(-30,30)) # rand_angle = np.random.uniform(-70,70) rand_shear = (np.random.uniform(-shear_range,shear_range),np.random.uniform(-shear_range,shear_range)) rand_angle = np.random.uniform(-angle_range,angle_range) RandAffT = RandomAffineTransform(PILImage.size,shear=rand_shear,angle=rand_angle) # Apply affine to image image = RandAffT.apply_image(image.copy()) # Append to transforms transforms = transforms + RandAffT ##### END Image Transformations ##### # Keep these in for now I suppose if(image.shape[0] == 0): raise ValueError("image shape[0] is 0!: ",print(image.shape),dataset_dict["file_name"]) if(image.shape[1] == 0): raise ValueError("image shape[1] is 0!: ",print(image.shape),dataset_dict["file_name"]) # Set the image in the dictionary dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) # Do remainder of dictionary classID = ((dataset_dict["annotations"])[0])["category_id"] dataset_dict["classID"] = classID # bboxes # if(self.dataset_used != "comparison"): annos = \ [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] # transformNames = [transforms.__name__ for x in transforms] # transformNames = ", ".join(transformNames) instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) # # no bboxes # else: # instances = Instances( (dataset_dict["height"],dataset_dict["width"]) ) # instances.gt_classes = torch.tensor([dataset_dict["classID"]]) # dataset_dict["instances"] = instances dataset_dict["transforms"] = transforms return dataset_dict
def test_mapper(self,dataset_dict):#,dataset_used): # If we're mapping at test time if(self.is_test_time_mapping): return self.train_mapper(dataset_dict) # Implement a mapper, similar to the default DatasetMapper, but with your own customizations # Create a copy of the dataset dict dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below ##### Image Transformations ##### # Read in the image image = utils.read_image(dataset_dict["file_name"], format="BGR") # fileName = dataset_dict["file_name"] ## Crop to bounding box ## if(self.dataset_used != "comparison" and self.is_crop_to_bbox): # Get the bounding box bbox = ((dataset_dict["annotations"])[0])["bbox"] xmin,ymin,xmax,ymax = bbox w = xmax-xmin h = ymax-ymin # IsCropToBBox = True # if(IsCropToBBox): # Nudge the crop to be slightly outside of the bounding box nudgedXMin = xmin-15 nudgedYMin = ymin-15 nudgedW = w+50 nudgedH = h+50 # If the bounding boxes go outside of the image dimensions, fix this imageHeight = image.shape[0] imageWidth = image.shape[1] if(nudgedXMin < 0): nudgedXMin = 0 if(nudgedYMin < 0): nudgedYMin = 0 if(nudgedXMin+nudgedW >= imageWidth): nudgedW = imageWidth-1 if(nudgedYMin+nudgedH >= imageHeight): nudgedH = imageHeight-1 # Apply the crop cropT = T.CropTransform(nudgedXMin,nudgedYMin,nudgedW,nudgedH) image = cropT.apply_image(image) transforms = T.TransformList([cropT]) # else: # nudgedH = dataset_dict["height"] # nudgedW = dataset_dict["width"] else: transforms = T.TransformList([]) # Apply the crop to the bbox as well # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms dataset_dict["height"] = image.shape[0] dataset_dict["width"] = image.shape[1] # Add to the list of transforms # else: # nudgedH = dataset_dict["height"] # nudgedW = dataset_dict["width"] ## Scale the image size ## # thresholdDimension = 1000 # if(dataset_used == "large"): # thresholdDimension = 500 # thresholdDimension = 800 # thresholdDimension = 600 thresholdDimension = self.threshold_dimension currWidth = dataset_dict["width"] currHeight = dataset_dict["height"] # the way ive done vgg and yolo means they need the same size images if(self.modelLink in ["VGG19_BN","YOLOV3"]): vgg_im_size = thresholdDimension # Apply the scaling transform scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=vgg_im_size,new_w=vgg_im_size,interp="nearest") image = scaleT.apply_image(image.copy()) # Apply the scaling to the bbox # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms # Add this to the list of transforms transforms = transforms + scaleT # Set the dimensions dataset_dict["height"] = image.shape[0] dataset_dict["width"] = image.shape[1] # not vgg or yolo else:# Downscale only at this threshold # Downscale only at this threshold if(currHeight > thresholdDimension or currWidth > thresholdDimension): myNewH = 0 myNewW = 0 # Scale the longest dimension to 1333, the shorter to 800 if(currHeight > currWidth): myNewH = thresholdDimension ratio = currHeight/float(myNewH) myNewW = currWidth/float(ratio) myNewW = int(round(myNewW)) # myNewW = 800 else: # myNewH = 800 myNewW = thresholdDimension ratio = currWidth/float(myNewW) myNewH = currHeight/float(ratio) myNewH = int(round(myNewH)) # Apply the scaling transform # scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") # scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") if(self.fixed_wh): scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") else: scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") image = scaleT.apply_image(image.copy()) # Apply the scaling to the bbox # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms # Add this to the list of transforms transforms = transforms + scaleT # Set the dimensions dataset_dict["height"] = image.shape[0] dataset_dict["width"] = image.shape[1] ## Apply a random flip ## # image, tfms = T.apply_transform_gens([T.RandomFlip()], image) # transforms = transforms + tfms # Apply Other Transforms ## # image, tfms = T.apply_transform_gens([T.RandomBrightness(0.4,1.6),T.RandomContrast(0.4,1.6),T.RandomSaturation(0.5,1.5),T.RandomLighting(1.2)], image) # transforms = transforms + tfms ## Apply random affine (actually just a shear) ## # Pass in the image size # PILImage = Image.fromarray(image) # RandAffT = RandomAffineTransform(PILImage.size) # Apply affine to image # image = RandAffT.apply_image(image.copy()) # Append to transforms # transforms = transforms + RandAffT ##### END Image Transformations ##### # Keep these in for now I suppose if(image.shape[0] == 0): raise ValueError("image shape[0] is 0!: ",print(image.shape),dataset_dict["file_name"]) if(image.shape[1] == 0): raise ValueError("image shape[1] is 0!: ",print(image.shape),dataset_dict["file_name"]) # Set the image in the dictionary dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) # Do remainder of dictionary classID = ((dataset_dict["annotations"])[0])["category_id"] dataset_dict["classID"] = classID annos = \ [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] # transformNames = [transforms.__name__ for x in transforms] # transformNames = ", ".join(transformNames) instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) dataset_dict["transforms"] = transforms return dataset_dict # # Small mappers # def small_train_mapper(self,dataset_dict): # return self.train_mapper(dataset_dict,"small") # def small_test_mapper(self,dataset_dict): # if(self.is_test_time_mapping): # return self.test_mapper(dataset_dict,"small") # else: # return self.train_mapper(dataset_dict,"small") # # Large mappers # def large_train_mapper(self,dataset_dict): # return self.train_mapper(dataset_dict,"large") # def large_test_mapper(self,dataset_dict): # if(self.is_test_time_mapping): # return self.test_mapper(dataset_dict,"large") # else: # return self.train_mapper(dataset_dict,"large") # # Full mappers # def full_train_mapper(self,dataset_dict): # return self.train_mapper(dataset_dict,"full") # def full_test_mapper(self,dataset_dict): # if(self.is_test_time_mapping): # return self.test_mapper(dataset_dict,"full") # else: # return self.train_mapper(dataset_dict,"full") # # Comparison mappers # def comparison_train_mapper(self,dataset_dict): # return self.train_mapper(dataset_dict,"comparison") # def comparison_test_mapper(self,dataset_dict): # if(self.is_test_time_mapping): # return self.test_mapper(dataset_dict,"comparison") # else: # return self.train_mapper(dataset_dict,"comparison")
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image( dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) else: sem_seg_gt = None aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt) transforms = aug_input.apply_augmentations(self.augmentations) image, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor( sem_seg_gt.astype("long")) # USER: Remove if you don't use pre-computed proposals. # Most users would not need this feature. if self.proposal_topk is not None: utils.transform_proposals(dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.use_instance_mask: anno.pop("segmentation", None) if not self.use_keypoint: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.instance_mask_format) # @ Will Lee 精细分类类别:非标准,基本标准 or 标准 standard_ids = [obj["standard_id"] for obj in annos] standard_ids = torch.tensor(standard_ids, dtype=torch.int64) instances.gt_standards = standard_ids # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to # the intersection of original bounding box and the cropping box. if self.recompute_boxes: instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) ### my code ### ## segmentaion の annotation を一旦退避して、後で追加する seg_bk = [ dictwk["segmentation"] for dictwk in dataset_dict["annotations"] ] for i in range(len(dataset_dict["annotations"])): dataset_dict["annotations"][i].pop("segmentation") image, dataset_dict = self.aug_handler( image=image, dataset_dict_detectron=dataset_dict) for i in range(len(dataset_dict["annotations"])): dataset_dict["annotations"][i]["segmentation"] = seg_bk[i] ### my code ### if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) # USER: Remove if you don't use pre-computed proposals. if self.load_proposals: utils.transform_proposals(dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: sem_seg_gt = Image.open(f) sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) dataset_dict["sem_seg"] = sem_seg_gt return dataset_dict
def __call__(self, dataset_dict): image = utils.read_image(dataset_dict["file_name"], format="BGR") utils.check_image_size(dataset_dict, image) data_transformations = [] if self.is_train: # Crop if self.crop: crop_gen = T.RandomCrop(self.crop_type, self.crop_size) data_transformations.append(crop_gen) print('crop') # Horizontal flip if self.flip: flip_gen = T.RandomFlip() data_transformations.append(flip_gen) # if self.rotation: # rotation_gen = T.RandomRotation([0, 90]) # data_transformations.append(rotation_gen) if self.saturation: saturation_gen = T.RandomSaturation(0.5, 1.5) data_transformations.append(saturation_gen) print(str(dataset_dict["file_name"])) image, transforms = T.apply_transform_gens(data_transformations, image) print( '\n\n -------------------PRINTING IMAGE---------------------- \n\n' ) img_name = dataset_dict["file_name"][len(dataset_dict["file_name"]) - 15:len(dataset_dict["file_name"] ) - 4] img_name = '/home/grupo01/images_augmented/' + img_name + '_augmented.png' print(len(dataset_dict["file_name"])) print(img_name) cv2.imwrite(img_name, image) dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) image_shape = image.shape[:2] if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=None) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file try: image = utils.read_image(dataset_dict["file_name"], format=self.img_format) except Exception as e: print(dataset_dict["file_name"]) print(e) raise e try: utils.check_image_size(dataset_dict, image) except SizeMismatchError as e: expected_wh = (dataset_dict["width"], dataset_dict["height"]) image_wh = (image.shape[1], image.shape[0]) if (image_wh[1], image_wh[0]) == expected_wh: print("transposing image {}".format(dataset_dict["file_name"])) image = image.transpose(1, 0, 2) else: raise e if "annotations" not in dataset_dict or len( dataset_dict["annotations"]) == 0: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) # Can use uint8 if it turns out to be slow some day # USER: Remove if you don't use pre-computed proposals. if self.load_proposals: utils.transform_proposals(dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) dataset_dict.pop("pano_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: sem_seg_gt = Image.open(f) sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) dataset_dict["sem_seg"] = sem_seg_gt if self.basis_loss_on and self.is_train: # load basis supervisions if self.ann_set == "coco": basis_sem_path = dataset_dict["file_name"].replace( 'train2017', 'thing_train2017').replace('image/train', 'thing_train') else: basis_sem_path = dataset_dict["file_name"].replace( 'coco', 'lvis').replace('train2017', 'thing_train').replace('jpg', 'npz') basis_sem_path = basis_sem_path.replace('jpg', 'npz') basis_sem_gt = np.load(basis_sem_path)["mask"] basis_sem_gt = transforms.apply_segmentation(basis_sem_gt) basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long")) dataset_dict["basis_sem"] = basis_sem_gt return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image ) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) # USER: Remove if you don't use pre-computed proposals. # Most users would not need this feature. if self.load_proposals: utils.transform_proposals( dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk, min_box_size=self.proposal_min_box_size, ) # HACK Keep annotations for test # if not self.is_train: # # USER: Modify this if you want to keep them for some reason. # dataset_dict.pop("annotations", None) # dataset_dict.pop("sem_seg_file_name", None) # return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format ) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict